pax_global_header00006660000000000000000000000064146426251130014516gustar00rootroot0000000000000052 comment=7f944cb16a7b9b08397d2f3fdfe981f6876788b2 scikit-bio-0.6.2/000077500000000000000000000000001464262511300135605ustar00rootroot00000000000000scikit-bio-0.6.2/.codecov.yml000066400000000000000000000001621464262511300160020ustar00rootroot00000000000000coverage: status: patch: default: target: 80% project: default: target: 80% scikit-bio-0.6.2/.coveragerc000066400000000000000000000005221464262511300157000ustar00rootroot00000000000000# this file is based on the examples provided on scikit-learn's .coveragerc [run] omit = */tests* */__init__.py source = skbio branch = True include = */skbio/* relative_files = True [report] exclude_lines = pragma: no cover raise NotImplementedError if __name__ == .__main__.: omit = */tests* */__init__.py scikit-bio-0.6.2/.dockerignore000066400000000000000000000001371464262511300162350ustar00rootroot00000000000000.pytest_cache build/ dist/ *.swp env*/ __pycache__/ */__pycache__/ **/__pycache__/ *.egg-info/ scikit-bio-0.6.2/.editorconfig000066400000000000000000000003251464262511300162350ustar00rootroot00000000000000root = true [*] charset = utf-8 insert_final_newline = false [*.{py,c,h}] indent_style = space indent_size = 4 [*.{yml,yaml}] indent_style = space indent_size = 2 [Makefile] indent_style = tab indent_size = 4 scikit-bio-0.6.2/.github/000077500000000000000000000000001464262511300151205ustar00rootroot00000000000000scikit-bio-0.6.2/.github/PULL_REQUEST_TEMPLATE.md000066400000000000000000000024511464262511300207230ustar00rootroot00000000000000Please complete the following checklist: * [ ] I have read the [contribution guidelines](https://scikit.bio/contribute.html). * [ ] I have documented all public-facing changes in the [changelog](https://github.com/scikit-bio/scikit-bio/blob/main/CHANGELOG.md). * [ ] **This pull request includes code, documentation, or other content derived from external source(s).** If this is the case, ensure the external source's license is compatible with scikit-bio's [license](https://github.com/scikit-bio/scikit-bio/blob/main/LICENSE.txt). Include the license in the `licenses` directory and add a comment in the code giving proper attribution. Ensure any other requirements set forth by the license and/or author are satisfied. - **It is your responsibility to disclose** code, documentation, or other content derived from external source(s). If you have questions about whether something can be included in the project or how to give proper attribution, include those questions in your pull request and a reviewer will assist you. * [ ] This pull request does not include code, documentation, or other content derived from external source(s). Note: [This document](https://scikit.bio/devdoc/review.html) may also be helpful to see some of the things code reviewers will be verifying when reviewing your pull request. scikit-bio-0.6.2/.github/workflows/000077500000000000000000000000001464262511300171555ustar00rootroot00000000000000scikit-bio-0.6.2/.github/workflows/ci.yml000066400000000000000000000175711464262511300203060ustar00rootroot00000000000000name: CI on: push: branches: [ main ] paths-ignore: ["web/**", "**.md", "README.rst"] pull_request: branches: [ main ] paths-ignore: ["web/**", "**.md", "README.rst"] env: latest_python: "3.12" supported_pythons: '["3.8", "3.9", "3.10", "3.11", "3.12"]' miniforge_version: "23.11.0-0" miniforge_variant: "Mambaforge" jobs: conf: # This job is needed to route the global environment variables into # a context that's available for matrix (and name, but that's unimportant) name: Prepare Test Plan runs-on: ubuntu-latest outputs: latest_python: ${{ steps.set-vars.outputs.latest_python }} supported_pythons: ${{ steps.set-vars.outputs.supported_pythons }} steps: - name: Report Plan id: set-vars run: | echo "latest_python=$latest_python" >> $GITHUB_OUTPUT echo "supported_pythons=$supported_pythons" >> $GITHUB_OUTPUT lint: name: Lint code (${{ needs.conf.outputs.latest_python }}, ubuntu-latest) needs: conf runs-on: ubuntu-latest defaults: run: shell: bash -l {0} steps: - uses: actions/checkout@v4 - uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true python-version: ${{ env.latest_python }} miniforge-version: ${{ env.miniforge_version }} miniforge-variant: ${{ env.miniforge_variant }} environment-file: ci/conda_host_env.yml - name: Install dependencies run: | pip install -r ci/requirements.lint.txt conda list - name: Run Ruff run: | ruff check --output-format=github . doc: name: Build Documentation (${{ needs.conf.outputs.latest_python }}, ubuntu-latest) needs: ["conf", "lint"] runs-on: ubuntu-latest defaults: run: shell: bash -l {0} steps: - uses: actions/checkout@v4 - uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true python-version: ${{ env.latest_python }} miniforge-version: ${{ env.miniforge_version }} miniforge-variant: ${{ env.miniforge_variant }} environment-file: ci/conda_host_env.yml - name: Install dependencies run: | pip install -r ci/requirements.doc.txt pip install . conda list - name: Make documentation run: make doc # save built documentation (HTML) for deployment - name: Substitute URLs if: github.event_name == 'push' run: python doc/suburl.py - name: Save documentation if: github.event_name == 'push' uses: actions/upload-artifact@v4 with: name: docpack path: doc/build/html/ # full-scale test with latest Python test-latest: name: Test (${{ needs.conf.outputs.latest_python }}, ${{ matrix.os }}) needs: conf runs-on: ${{ matrix.os }} defaults: run: shell: bash -l {0} strategy: fail-fast: true # macos-14 and on uses Apple silicon chips (ARM-based). # macos-13 is the last version that uses Intel chips. See: # https://docs.github.com/en/actions/using-github-hosted-runners/ # about-github-hosted-runners/about-github-hosted-runners#standard- # github-hosted-runners-for-public-repositories matrix: os: ["ubuntu-latest", "macos-13", "macos-14", "windows-latest"] steps: - uses: actions/checkout@v4 - uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true python-version: ${{ env.latest_python }} miniforge-version: ${{ env.miniforge_version }} miniforge-variant: ${{ env.miniforge_variant }} environment-file: ci/conda_host_env.yml - name: Install dependencies run: | pip install . conda list - name: Run unit tests env: WITH_COVERAGE: "TRUE" run: make test # upload coverage reports to Codecov (only under Linux) - name: Generate coverage reports if: runner.os == 'Linux' run: | cd ci && coverage lcov --rcfile ../.coveragerc - name: Upload coverage reports to Codecov if: runner.os == 'Linux' uses: codecov/codecov-action@v4.2.0 with: token: ${{ secrets.CODECOV_TOKEN }} slug: scikit-bio/scikit-bio # test under AArch64 (ARM64) architecture test-aarch64: name: Test (${{ needs.conf.outputs.latest_python }}, qemu::aarch64-centos) needs: conf runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 # setup-buildx-action uses the git context directly # but checklist wants the .git directory - name: Set up QEMU id: qemu uses: docker/setup-qemu-action@v3 - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Build and test for linux-aarch64 id: docker_build uses: docker/build-push-action@v6 with: context: . # ^ to use the local checkout, not the git context file: aarch64.Dockerfile cache-from: type=gha cache-to: type=gha,mode=max build-args: | PYTHON_VERSION=${{ env.latest_python }} # test under all combinations of environments (Python versions, operating systems, PyPI vs Conda) test-all: name: Test (${{ matrix.python_version }}, ${{ matrix.os }}, ${{ fromJSON('["pypi", "conda"]')[matrix.use_conda] }}) runs-on: ${{ matrix.os }} needs: ["conf", "test-latest", "lint"] defaults: run: shell: bash -l {0} strategy: fail-fast: false matrix: os: ["ubuntu-latest", "macos-latest", "windows-latest"] python_version: ${{ fromJSON(needs.conf.outputs.supported_pythons) }} use_conda: [true, false] steps: - uses: actions/checkout@v4 - uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true python-version: ${{ matrix.python_version }} miniforge-version: ${{ env.miniforge_version }} miniforge-variant: ${{ env.miniforge_variant }} environment-file: ci/conda_host_env.yml - name: Install dependencies (conda) if: ${{ matrix.use_conda }} run: | conda install -q --yes -c conda-forge --file ci/conda_requirements.txt pip install . --no-deps conda list - name: Install dependencies if: ${{ !matrix.use_conda }} run: | pip install . conda list - name: Run unit tests env: WITH_COVERAGE: "TRUE" run: make test # deploy the current development documentation to the website # only when event is push and all tests have passed deploy-doc: name: Deploy documentation if: github.event_name == 'push' needs: ["doc", "test-all", "test-aarch64"] runs-on: ubuntu-latest defaults: run: shell: bash -l {0} steps: # load documentation built by job "doc" - name: Load documentation uses: actions/download-artifact@v4 with: name: docpack path: docpack # checkout website repo - name: Check out website uses: actions/checkout@v4 with: repository: scikit-bio/scikit-bio.github.io path: website ssh-key: ${{ secrets.SSH_DEPLOY_KEY }} # synchronize documentation to website's docs/dev directory - name: Update documentation run: rsync -av --delete docpack/ website/docs/dev # push website back to repo - name: Push website run: | cd website git config user.name "${{ github.actor }}" git config user.email "${{ github.actor_id }}+${{ github.actor }}@users.noreply.github.com" git add -A git commit -m "Update from ${{ github.server_url }}/${{ github.repository }}/commit/${{ github.sha }}" git push scikit-bio-0.6.2/.github/workflows/release.yml000066400000000000000000000047051464262511300213260ustar00rootroot00000000000000name: Release on: push: tags: - '*' env: earliest_python: "3.8" latest_python: "3.12" miniforge_version: "23.11.0-0" miniforge_variant: "Mambaforge" jobs: pypi: name: Publish to PyPI runs-on: ubuntu-latest steps: - name: Check out repo uses: actions/checkout@v4 with: submodules: true - name: Set up Python uses: actions/setup-python@v5 with: python-version: ${{ env.earliest_python }} - name: Build distribution run: | export RELEASE_VERSION=${{ github.ref_name }} pip install numpy cython python setup.py sdist - name: Publish distribution if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') uses: pypa/gh-action-pypi-publish@release/v1 with: user: __token__ password: ${{ secrets.PYPI_API_TOKEN }} doc: name: Publish documentation runs-on: ubuntu-latest defaults: run: shell: bash -l {0} steps: - name: Check out repo uses: actions/checkout@v4 - name: Set up Python uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true python-version: ${{ env.latest_python }} miniforge-version: ${{ env.miniforge_version }} miniforge-variant: ${{ env.miniforge_variant }} environment-file: ci/conda_host_env.yml - name: Install dependencies run: | pip install -r ci/requirements.doc.txt pip install . conda list - name: Build documentation run: make doc - name: Substitute URLs run: python doc/suburl.py - name: Check out website uses: actions/checkout@v4 with: repository: scikit-bio/scikit-bio.github.io path: website ssh-key: ${{ secrets.SSH_DEPLOY_KEY }} - name: Update documentation run: | rsync -av --delete doc/build/html/ website/docs/${{ github.ref_name }} rsync -av --delete doc/build/html/ website/docs/latest - name: Publish website run: | cd website git config user.name "${{ github.actor }}" git config user.email "${{ github.actor_id }}+${{ github.actor }}@users.noreply.github.com" git add -A git commit -m "Update from ${{ github.server_url }}/${{ github.repository }}/commit/${{ github.sha }}" git push scikit-bio-0.6.2/.github/workflows/website.yml000066400000000000000000000045011464262511300213420ustar00rootroot00000000000000name: Website on: push: branches: [ main ] paths: [ "web/**", "!**.md" ] pull_request: branches: [ main ] paths: [ "web/**", "!**.md" ] env: latest_python: "3.12" miniforge_version: "23.11.0-0" miniforge_variant: "Mambaforge" jobs: build: name: Build website runs-on: ubuntu-latest defaults: run: shell: bash -l {0} steps: - name: Check out repo uses: actions/checkout@v4 - name: Set up conda uses: conda-incubator/setup-miniconda@v3 with: auto-update-conda: true python-version: ${{ env.latest_python }} miniforge-version: ${{ env.miniforge_version }} miniforge-variant: ${{ env.miniforge_variant }} - name: Install dependencies run: pip install -r ci/requirements.doc.txt - name: Make website run: make web - name: Substitute URLs if: github.event_name == 'push' run: python web/suburl.py - name: Add version list if: github.event_name == 'push' run: cp web/versions.json web/_build/html/ - name: Save website package if: github.event_name == 'push' uses: actions/upload-artifact@v4 with: name: webpack path: web/_build/html/ deploy: name: Deploy website if: github.event_name == 'push' needs: build runs-on: ubuntu-latest defaults: run: shell: bash -l {0} steps: - name: Check out repo uses: actions/checkout@v4 - name: Load website package uses: actions/download-artifact@v4 with: name: webpack path: webpack - name: Check out website repo uses: actions/checkout@v4 with: repository: scikit-bio/scikit-bio.github.io path: website ssh-key: ${{ secrets.SSH_DEPLOY_KEY }} - name: Update website run: rsync -av --delete --exclude-from web/.exclude webpack/ website - name: Push website run: | cd website git config user.name "${{ github.actor }}" git config user.email "${{ github.actor_id }}+${{ github.actor }}@users.noreply.github.com" git add -A git commit -m "Update from ${{ github.server_url }}/${{ github.repository }}/commit/${{ github.sha }}" git push scikit-bio-0.6.2/.gitignore000066400000000000000000000007631464262511300155560ustar00rootroot00000000000000# Temporary files *~ \#*# .pytest_cache/* *.py[cod] # C extensions *.so # Packages *.egg *.egg-info dist build eggs parts bin var sdist develop-eggs .installed.cfg lib lib64 __pycache__ # Installer logs pip-log.txt # Unit test / coverage reports .coverage .tox nosetests.xml # Translations *.mo # Mr Developer .mr.developer.cfg .project .pydevproject # vi .*.swp # Sphinx builds doc/source/generated doc/build web/_build # OSX files .DS_Store # VS Code .vscode # Cython files *.c !ssw.c scikit-bio-0.6.2/.pre-commit-config.yaml000066400000000000000000000004251464262511300200420ustar00rootroot00000000000000# This file gives the option to use Ruff as a pre-commit hook. repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. rev: v0.5.0 hooks: # Run the linter with the fix option enabled. - id: ruff # Run the formatter. - id: ruff-format scikit-bio-0.6.2/CHANGELOG.md000066400000000000000000003036231464262511300154000ustar00rootroot00000000000000# scikit-bio changelog ## Version 0.6.2 ### Features * Added support for Microsoft Windows operating system. ([#2071](https://github.com/scikit-bio/scikit-bio/pull/2071), [#2068](https://github.com/scikit-bio/scikit-bio/pull/2068), [#2067](https://github.com/scikit-bio/scikit-bio/pull/2067), [#2061](https://github.com/scikit-bio/scikit-bio/pull/2061), [#2046](https://github.com/scikit-bio/scikit-bio/pull/2046), [#2040](https://github.com/scikit-bio/scikit-bio/pull/2040), [#2036](https://github.com/scikit-bio/scikit-bio/pull/2036), [#2034](https://github.com/scikit-bio/scikit-bio/pull/2034), [#2032](https://github.com/scikit-bio/scikit-bio/pull/2032), [#2005](https://github.com/scikit-bio/scikit-bio/pull/2005)) * Added alpha diversity metrics: Hill number (`hill`), Renyi entropy (`renyi`) and Tsallis entropy (`tsallis`) ([#2074](https://github.com/scikit-bio/scikit-bio/pull/2074)). * Added `rename` method for `OrdinationResults` and `DissimilarityMatrix` classes ([#2027](https://github.com/scikit-bio/scikit-bio/pull/2027), [#2085](https://github.com/scikit-bio/scikit-bio/pull/2085)). * Added `nni` function for phylogenetic tree rearrangement using nearest neighbor interchange (NNI) ([#2050](https://github.com/scikit-bio/scikit-bio/pull/2050)). * Added method `TreeNode.unrooted_move`, which resembles `TreeNode.unrooted_copy` but rearranges the tree in place, thus avoid making copies of the nodes ([#2073](https://github.com/scikit-bio/scikit-bio/pull/2073)). * Added method `TreeNode.root_by_outgroup`, which reroots a tree according to a given outgroup ([#2073](https://github.com/scikit-bio/scikit-bio/pull/2073)). * Added method `TreeNode.unroot`, which converts a rooted tree into unrooted by trifucating its root ([#2073](https://github.com/scikit-bio/scikit-bio/pull/2073)). * Added method `TreeNode.insert`, which inserts a node into the branch connecting self and its parent ([#2073](https://github.com/scikit-bio/scikit-bio/pull/2073)). ### Performance enhancements * The time and memory efficiency of `TreeNode` has been significantly improved by making its caching mechanism lazy ([#2082](https://github.com/scikit-bio/scikit-bio/pull/2082)). * `Treenode.copy` and `TreeNode.unrooted_copy` can now perform shallow copy of a tree in addition to deep copy. * `TreeNode.unrooted_copy` can now copy all attributes of the nodes, in addition to name and length ([#2073](https://github.com/scikit-bio/scikit-bio/pull/2073)). * Paremter `above` was added to `TreeNode.root_at`, such that the user can root the tree within the branch connecting the given node and its parent, thereby creating a rooted tree ([#2073](https://github.com/scikit-bio/scikit-bio/pull/2073)). * Parameter `branch_attrs` was added to the `unrooted_copy`, `root_at`, and `root_at_midpoint` methods of `TreeNode`, such that the user can customize which node attributes should be considered as branch attributes and treated accordingly during the rerooting operation. The default behavior is preserved but is subject ot change in version 0.7.0 ([#2073](https://github.com/scikit-bio/scikit-bio/pull/2073)). * Parameter `root_name` was added to the `unrooted_copy`, `root_at`, and `root_at_midpoint` methods of `TreeNode`, such that the user can customize (or omit) the name to be given to the root node. The default behavior is preserved but is subject ot change in version 0.7.0 ([#2073](https://github.com/scikit-bio/scikit-bio/pull/2073)). ### Bug fixes * Cleared the internal node references after performing midpoint rooting (`TreeNode.root_at_midpoint`), such that a deep copy of the resulting tree will not result in infinite recursion ([#2073](https://github.com/scikit-bio/scikit-bio/pull/2073)). * Fixed the Zenodo link in the README to always point to the most recent version ([#2078](https://github.com/scikit-bio/scikit-bio/pull/2078)). ### Miscellaneous * Added statsmodels as a dependency of scikit-bio. It replaces some of the from-scratch statistical analyses in scikit-bio, including Welch's t-test (with confidence intervals), Benjamini-Hochberg FDR correction, and Holm-Bonferroni FDR correction ([#2049](https://github.com/scikit-bio/scikit-bio/pull/2049), ([#2063](https://github.com/scikit-bio/scikit-bio/pull/2063))). ### Deprecated functionality * Methods `deepcopy` and `unrooted_deepcopy` of `Treenode` are deprecated. Use `copy` and `unrooted_copy` instead. ## Version 0.6.1 ### Features * NumPy 2.0 is now supported ([#2051](https://github.com/scikit-bio/scikit-bio/pull/2051])). We thank @rgommers 's advice on this ([#1964](https://github.com/scikit-bio/scikit-bio/issues/1964)). * Added module `skbio.embedding` to provide support for storing and manipulating embeddings for biological objects, such as protein embeddings outputted from protein language models ([#2008](https://github.com/scikit-bio/scikit-bio/pull/2008])). * Added an efficient sequence alignment path data structure `AlignPath` and its derivative `PairAlignPath` to provide a uniform interface for various multiple and pariwise alignment formats ([#2011](https://github.com/scikit-bio/scikit-bio/pull/2011)). * Added `simpson_d` as an alias for `dominance` (Simpson's dominance index, a.k.a. Simpson's D) ([#2024](https://github.com/scikit-bio/scikit-bio/pull/2024)). * Added `inv_simpson` (inverse Simpson index), which is equivalent to `enspie` ([#2024](https://github.com/scikit-bio/scikit-bio/pull/2024)). * Added parameter `exp` to `shannon` to calculate the exponential of Shannon index (i.e., perplexity, or effective number of species) ([#2024](https://github.com/scikit-bio/scikit-bio/pull/2024)). * Added parameter `finite` to Simpson's _D_ (`dominance`) and derived metrics (`simpson`, `simpson_e` and `inv_simpson`) to correct for finite samples ([#2024](https://github.com/scikit-bio/scikit-bio/pull/2024)). * Added support for dictionary and pandas DataFrame as input for `TreeNode.from_taxonomy` ([#2042](https://github.com/scikit-bio/scikit-bio/pull/2042)). ### Performance enhancements * `subsample_counts` now uses an optimized method from `biom-format` ([#2016](https://github.com/scikit-bio/scikit-bio/pull/2016)). * Improved efficiency of counts matrix and vector validation prior to calculating community diversity metrics ([#2024](https://github.com/scikit-bio/scikit-bio/pull/2024)). ### Miscellaneous * Default logarithm base of Shannon index (`shannon`) was changed from 2 to e. This is to ensure consistency with other Shannon-based metrics (`pielou_e`), and with literature and implementations in the field. Meanwhile, parameter `base` was added to `pielou_e` such that the user can control this behavior ([#2024](https://github.com/scikit-bio/scikit-bio/pull/2024)). See discussions in [1884](https://github.com/scikit-bio/scikit-bio/issues/1884) and [2014](https://github.com/scikit-bio/scikit-bio/issues/2014). * Improved treatment of empty communities (i.e., all taxa have zero counts, or there is no taxon) when calculating alpha diversity metrics. Most metrics will return `np.nan` and do not raise a warning due to zero division. Exceptions are metrics that describe observed counts, includng `sobs`, `singles`, `doubles` and `osd`, which return zero ([#2024](https://github.com/scikit-bio/scikit-bio/pull/2024)). See discussions in [#2014](https://github.com/scikit-bio/scikit-bio/issues/2014). * Return values of `pielou_e` and `heip_e` were set to 1.0 for one-taxon communities, such that NaN is avoided, while honoring the definition (evenness of taxon abundance(s)) and the rationale (ratio between observed and maximum) ([#2024](https://github.com/scikit-bio/scikit-bio/pull/2024)). * Removed hdmedians as a dependency by porting its `geomedian` function (geometric median) into scikit-bio ([#2003](https://github.com/scikit-bio/scikit-bio/pull/2003)). * Removed 98% warnings issued during the test process ([#2045](https://github.com/scikit-bio/scikit-bio/pull/2045) and [#2037](https://github.com/scikit-bio/scikit-bio/pull/2037)). ## Version 0.6.0 ### Performance enhancements * Launched the new scikit-bio website: https://scikit.bio. The previous domain names _scikit-bio.org_ and _skbio.org_ continue to work and redirect to the new website. * Migrated the scikit-bio website repo from the `gh-pages` branch of the `scikit-bio` repo to a standalone repo: [`scikit-bio.github.io`](https://github.com/scikit-bio/scikit-bio.github.io). * Replaced the [Bootstrap theme](https://sphinx-bootstrap-theme.readthedocs.io/en/latest/) with the [PyData theme](https://pydata-sphinx-theme.readthedocs.io/en/stable/) for building documentation using Sphinx. Extended this theme to the website. Customized design elements ([#1934](https://github.com/scikit-bio/scikit-bio/pull/1934)). * Improved the calculation of Fisher's alpha diversity index (`fisher_alpha`). It is now compatible with optimizers in SciPy 1.11+. Edge cases such as all singletons can be handled correctly. Handling of errors and warnings was improved. Documentation was enriched ([#1890](https://github.com/scikit-bio/scikit-bio/pull/1890)). * Allowed `delimiter=None` which represents whitespace of arbitrary length in reading lsmat format matrices ([#1912](https://github.com/scikit-bio/scikit-bio/pull/1912)). ### Features * Added biom-format Table import and updated corresponding requirement files ([#1907](https://github.com/scikit-bio/scikit-bio/pull/1907)). * Added biom-format 2.1.0 IO support ([#1984](https://github.com/scikit-bio/scikit-bio/pull/1984)). * Added `Table` support to `alpha_diversity` and `beta_diversity` drivers ([#1984](https://github.com/scikit-bio/scikit-bio/pull/1984)). * Implemented a mechanism to automatically build documentation and/or homepage and deploy them to the website ([#1934](https://github.com/scikit-bio/scikit-bio/pull/1934)). * Added the Benjamini-Hochberg method as an option for FDR correction (in addition to the existing Holm-Bonferroni method) for `ancom` and `dirmult_ttest` ([#1988](https://github.com/scikit-bio/scikit-bio/pull/1988)). * Added function `dirmult_ttest`, which performs differential abundance test using a Dirichilet multinomial distribution. This function mirrors the method provided by ALDEx2 ([#1956](https://github.com/scikit-bio/scikit-bio/pull/1956)). * Added method `Sequence.to_indices` to convert a sequence into a vector of indices of characters in an alphabet (can be from a substitution matrix) or unique characters observed in the sequence. Supports gap masking and wildcard substitution ([#1917](https://github.com/scikit-bio/scikit-bio/pull/1917)). * Added class `SubstitutionMatrix` to support subsitution matrices for nucleotides, amino acids are more general cases ([#1913](https://github.com/scikit-bio/scikit-bio/pull/1913)). * Added alpha diversity metric `sobs`, which is the observed species richness (S_{obs}) of a sample. `sobs` will replace `observed_otus`, which uses the historical term "OTU". Also added metric `observed_features` to be compatible with the QIIME 2 terminology. All three metrics are equivalent ([#1902](https://github.com/scikit-bio/scikit-bio/pull/1902)). * `beta_diversity` now supports use of Pandas a `DataFrame` index, issue [#1808](https://github.com/scikit-bio/scikit-bio/issues/1808). * Added alpha diversity metric `phydiv`, which is a generalized phylogenetic diversity (PD) framework permitting unrooted or rooted tree, unweighted or weighted by abundance, and an exponent parameter of the weight term ([#1893](https://github.com/scikit-bio/scikit-bio/pull/1893)). * Adopted NumPy's new random generator `np.random.Generator` (see [NEP 19](https://numpy.org/neps/nep-0019-rng-policy.html)) ([#1889](https://github.com/scikit-bio/scikit-bio/pull/1889)). * SciPy 1.11+ is now supported ([#1887](https://github.com/scikit-bio/scikit-bio/pull/1887)). * Removed IPython as a dependency. Scikit-bio continues to support displaying plots in IPython, but it no longer requires importing IPython functionality ([#1901](https://github.com/scikit-bio/scikit-bio/pull/1901)). * Made Matplotlib an optional dependency. Scikit-bio no longer requires Matplotlib except for plotting, during which it attempts to import Matplotlib if it is present in the system, and raises an error if not ([#1901](https://github.com/scikit-bio/scikit-bio/pull/1901)). * Ported the QIIME 2 metadata object into skbio. ([#1929](https://github.com/scikit-bio/scikit-bio/pull/1929)) * Python 3.12+ is now supported, thank you @actapia ([#1930](https://github.com/scikit-bio/scikit-bio/pull/1930)) * Introduced native character conversion ([#1971])(https://github.com/scikit-bio/scikit-bio/pull/1971) ### Backward-incompatible changes [experimental] * Beta diversity metric `kulsinski` was removed. This was motivated by that SciPy replaced this distance metric with `kulczynski1` in version 1.11 (see SciPy issue [#2009](https://github.com/scipy/scipy/issues/2009)), and that both metrics do not return 0 on two identical vectors ([#1887](https://github.com/scikit-bio/scikit-bio/pull/1887)). ### Bug fixes * Fixed documentation interface of `vlr` and relevant functions ([#1934](https://github.com/scikit-bio/scikit-bio/pull/1934)). * Fixed broken link in documentation of Simpson's evenness index. See issue [#1923](https://github.com/scikit-bio/scikit-bio/issues/1923). * Safely handle `Sequence.iter_kmers` where `k` is greater than the sequence length ([#1723](https://github.com/scikit-bio/scikit-bio/issues/1723)) * Re-enabled OpenMP support, which has been mistakenly disabled in 0.5.8 ([#1874](https://github.com/scikit-bio/scikit-bio/pull/1874)) * `permanova` and `permdist` operate on a `DistanceMatrix` and a grouping object. Element IDs must be synchronized to compare correct sets of pairwise distances. This failed in case the grouping was provided as a `pandas.Series`, because it was interpreted as an ordered `list` and indices were ignored (see issue [#1877](https://github.com/scikit-bio/scikit-bio/issues/1877) for an example). Note: `pandas.DataFrame` was handled correctly. This behavior has been fixed with PR [#1879](https://github.com/scikit-bio/scikit-bio/pull/1879) * Fixed slicing for `TabularMSALoc` on Python 3.12. See issue [#1926](https://github.com/scikit-bio/scikit-bio/issues/1926). ### Miscellaneous * Replaced the historical term "OTU" with the more generic term "taxon" (plural: "taxa"). As a consequence, the parameter "otu_ids" in phylogenetic alpha and beta diversity metrics was replaced by "taxa". Meanwhile, the old parameter "otu_ids" is still kept as an alias of "taxa" for backward compatibility. However it will be removed in a future release. * Revised contributor's guidelines. * Renamed function `multiplicative_replacement` as `multi_replace` for conciseness ([#1988](https://github.com/scikit-bio/scikit-bio/pull/1988)). * Renamed parameter `multiple_comparisons_correction` as `p_adjust` of function `ancom` for conciseness ([#1988](https://github.com/scikit-bio/scikit-bio/pull/1988)). * Enabled code coverage reporting via Codecov. See [#1954](https://github.com/scikit-bio/scikit-bio/pull/1954). * Renamed the default branch from "master" to "main". See [#1953](https://github.com/scikit-bio/scikit-bio/pull/1953). * Enabled subclassing of DNA, RNA and Protein classes to allow secondary development. * Dropped support for NumPy < 1.17.0 in order to utilize the new random generator. * Use CYTHON by default during build ([#1874](https://github.com/scikit-bio/scikit-bio/pull/1874)) * Implemented augmented assignments proposed in issue [#1789](https://github.com/scikit-bio/scikit-bio/issues/1789) * Incorporated Ruff's formatting and linting via pre-commit hooks and GitHub Actions. See PR [#1924](https://github.com/scikit-bio/scikit-bio/pull/1924). * Improved docstrings for functions accross the entire codebase. See [#1933](https://github.com/scikit-bio/scikit-bio/pull/1933) and [#1940](https://github.com/scikit-bio/scikit-bio/pull/1940) * Removed API lifecycle decorators in favor of deprecation warnings. See [#1916](https://github.com/scikit-bio/scikit-bio/issues/1916) ## Version 0.5.9 ### Features * Adding Variance log ratio estimators in `skbio.stats.composition.vlr` and `skbio.stats.composition.pairwise_vlr` ([#1803](https://github.com/scikit-bio/scikit-bio/pull/1803)) * Added `skbio.stats.composition.tree_basis` to construct ILR bases from `TreeNode` objects. ([#1862](https://github.com/scikit-bio/scikit-bio/pull/1862)) * `IntervalMetadata.query` now defaults to obtaining all results, see [#1817](https://github.com/scikit-bio/scikit-bio/issues/1817). ### Backward-incompatible changes [experimental] * With the introduction of the `tree_basis` object, the ILR bases are now represented in log-odds coordinates rather than in probabilities to minimize issues with numerical stability. Furthermore, the `ilr` and `ilr_inv` functions now takes the `basis` input parameter in terms of log-odds coordinates. This affects the `skbio.stats.composition.sbp_basis` as well. ([#1862](https://github.com/scikit-bio/scikit-bio/pull/1862)) ### Important * Complex multiple axis indexing operations with `TabularMSA` have been removed from testing due to incompatibilities with modern versions of Pandas. ([#1851](https://github.com/scikit-bio/scikit-bio/pull/1851)) * Pinning `scipy <= 1.10.1` ([#1851](https://github.com/scikit-bio/scikit-bio/pull/1867)) ### Bug fixes * Fixed a bug that caused build failure on the ARM64 microarchitecture due to floating-point number handling. ([#1859](https://github.com/scikit-bio/scikit-bio/pull/1859)) * Never let the Gini index go below 0.0, see [#1844](https://github.com/scikit-bio/scikit-bio/issue/1844). * Fixed bug [#1847](https://github.com/scikit-bio/scikit-bio/issues/1847) in which the edge from the root was inadvertantly included in the calculation for `descending_branch_length` ### Miscellaneous * Replaced dependencies `CacheControl` and `lockfile` with `requests` to avoid a dependency inconsistency issue of the former. (See [#1863](https://github.com/scikit-bio/scikit-bio/pull/1863), merged in [#1859](https://github.com/scikit-bio/scikit-bio/pull/1859)) * Updated installation instructions for developers in `CONTRIBUTING.md` ([#1860](https://github.com/scikit-bio/scikit-bio/pull/1860)) ## Version 0.5.8 ### Features * Added NCBI taxonomy database dump format (`taxdump`) ([#1810](https://github.com/scikit-bio/scikit-bio/pull/1810)). * Added `TreeNode.from_taxdump` for converting taxdump into a tree ([#1810](https://github.com/scikit-bio/scikit-bio/pull/1810)). * scikit-learn has been removed as a dependency. This was a fairly heavy-weight dependency that was providing minor functionality to scikit-bio. The critical components have been implemented in scikit-bio directly, and the non-criticial components are listed under "Backward-incompatible changes [experimental]". * Python 3.11 is now supported. ### Backward-incompatible changes [experimental] * With the removal of the scikit-learn dependency, three beta diversity metric names can no longer be specified. These are `wminkowski`, `nan_euclidean`, and `haversine`. On testing, `wminkowski` and `haversine` did not work through `skbio.diversity.beta_diversity` (or `sklearn.metrics.pairwise_distances`). The former was deprecated in favor of calling `minkowski` with a vector of weights provided as kwarg `w` (example below), and the latter does not work with data of this shape. `nan_euclidean` can still be accessed fron scikit-learn directly if needed, if a user installs scikit-learn in their environment (example below). ``` counts = [[23, 64, 14, 0, 0, 3, 1], [0, 3, 35, 42, 0, 12, 1], [0, 5, 5, 0, 40, 40, 0], [44, 35, 9, 0, 1, 0, 0], [0, 2, 8, 0, 35, 45, 1], [0, 0, 25, 35, 0, 19, 0], [88, 31, 0, 5, 5, 5, 5], [44, 39, 0, 0, 0, 0, 0]] # new mechanism of accessing wminkowski from skbio.diversity import beta_diversity beta_diversity("minkowski", counts, w=[1,1,1,1,1,1,2]) # accessing nan_euclidean through scikit-learn directly import skbio from sklearn.metrics import pairwise_distances sklearn_dm = pairwise_distances(counts, metric="nan_euclidean") skbio_dm = skbio.DistanceMatrix(sklearn_dm) ``` ### Deprecated functionality [experimental] * `skbio.alignment.local_pairwise_align_ssw` has been deprecated ([#1814](https://github.com/scikit-bio/scikit-bio/issues/1814)) and will be removed or replaced in scikit-bio 0.6.0. ### Bug fixes * Use `oldest-supported-numpy` as build dependency. This fixes problems with environments that use an older version of numpy than the one used to build scikit-bio ([#1813](https://github.com/scikit-bio/scikit-bio/pull/1813)). ## Version 0.5.7 ### Features * Introduce support for Python 3.10 ([#1801](https://github.com/scikit-bio/scikit-bio/pull/1801)). * Tentative support for Apple M1 ([#1709](https://github.com/scikit-bio/scikit-bio/pull/1709)). * Added support for reading and writing a binary distance matrix object format. ([#1716](https://github.com/scikit-bio/scikit-bio/pull/1716)) * Added support for `np.float32` with `DissimilarityMatrix` objects. * Added support for method and number_of_dimensions to permdisp reducing the runtime by 100x at 4000 samples, [issue #1769](https://github.com/scikit-bio/scikit-bio/pull/1769). * OrdinationResults object is now accepted as input for permdisp. ### Performance enhancements * Avoid an implicit data copy on construction of `DissimilarityMatrix` objects. * Avoid validation on copy of `DissimilarityMatrix` and `DistanceMatrix` objects, see [PR #1747](https://github.com/scikit-bio/scikit-bio/pull/1747) * Use an optimized version of symmetry check in DistanceMatrix, see [PR #1747](https://github.com/scikit-bio/scikit-bio/pull/1747) * Avoid performing filtering when ids are identical, see [PR #1752](https://github.com/scikit-bio/scikit-bio/pull/1752) * center_distance_matrix has been re-implemented in cython for both speed and memory use. Indirectly speeds up pcoa [PR #1749](https://github.com/scikit-bio/scikit-bio/pull/1749) * Use a memory-optimized version of permute in DistanceMatrix, see [PR #1756](https://github.com/scikit-bio/scikit-bio/pull/1756). * Refactor pearson and spearman skbio.stats.distance.mantel implementations to drastically improve memory locality. Also cache intermediate results that are invariant across permutations, see [PR #1756](https://github.com/scikit-bio/scikit-bio/pull/1756). * Refactor permanova to remove intermediate buffers and cythonize the internals, see [PR #1768](https://github.com/scikit-bio/scikit-bio/pull/1768). ### Bug fixes * Fix windows and 32bit incompatibility in `unweighted_unifrac`. ### Miscellaneous * Python 3.6 has been removed from our testing matrix. * Specify build dependencies in pyproject.toml. This allows the package to be installed without having to first manually install numpy. * Update hdmedians package to a version which doesn't require an initial manual numpy install. * Now buildable on non-x86 platforms due to use of the [SIMD Everywhere](https://github.com/simd-everywhere/simde) library. * Regenerate Cython wrapper by default to avoid incompatibilities with installed CPython. * Update documentation for the `skbio.stats.composition.ancom` function. ([#1741](https://github.com/scikit-bio/scikit-bio/pull/1741)) ## Version 0.5.6 ### Features * Added option to return a capture group compiled regex pattern to any class inheriting ``GrammaredSequence`` through the ``to_regex`` method. ([#1431](https://github.com/scikit-bio/scikit-bio/issues/1431)) * Added `Dissimilarity.within` and `.between` to obtain the respective distances and express them as a `DataFrame`. ([#1662](https://github.com/scikit-bio/scikit-bio/pull/1662)) * Added Kendall Tau as possible correlation method in the `skbio.stats.distance.mantel` function ([#1675](https://github.com/scikit-bio/scikit-bio/issues/1675)). * Added support for IUPAC amino acid codes U (selenocysteine), O (pyrrolysine), and J (leucine or isoleucine). ([#1576](https://github.com/scikit-bio/scikit-bio/issues/1576) ### Backward-incompatible changes [stable] ### Backward-incompatible changes [experimental] * Changed `skbio.tree.TreeNode.support` from a method to a property. * Added `assign_supports` method to `skbio.tree.TreeNode` to extract branch support values from node labels. * Modified the way a node's label is printed: `support:name` if both exist, or `support` or `name` if either exists. ### Performance enhancements ### Bug fixes * Require `Sphinx <= 3.0`. Newer Sphinx versions caused build errors. [#1719](https://github.com/scikit-bio/scikit-bio/pull/1719) * * `skbio.stats.ordination` tests have been relaxed. ([#1713](https://github.com/scikit-bio/scikit-bio/issues/1713)) * Fixes build errors for newer versions of NumPy, Pandas, and SciPy. * Corrected a criticial bug in `skbio.alignment.StripedSmithWaterman`/`skbio.alignment.local_pairwise_align_ssw` which would cause the formatting of the aligned sequences to misplace gap characters by the number of gap characters present in the opposing aligned sequence up to that point. This was caused by a faulty implementation of CIGAR string parsing, see [#1679](https://github.com/scikit-bio/scikit-bio/pull/1679) for full details. * Fixes build errors for newer versions of NumPy, Pandas, and SciPy. * Corrected a criticial bug in `skbio.alignment.StripedSmithWaterman`/`skbio.alignment.local_pairwise_align_ssw` which would cause the formatting of the aligned sequences to misplace gap characters by the number of gap characters present in the opposing aligned sequence up to that point. This was caused by a faulty implementation of CIGAR string parsing, see [#1679](https://github.com/scikit-bio/scikit-bio/pull/1679) for full details. ### Deprecated functionality [stable] ### Deprecated functionality [experimental] ### Miscellaneous * `skbio.diversity.beta_diversity` now accepts a pandas DataFrame as input. * Avoid pandas 1.0.0 import warning ([#1688](https://github.com/scikit-bio/scikit-bio/issues/1688)) * Added support for Python 3.8 and dropped support for Python 3.5. * This version now depends on `scipy >= 1.3` and `pandas >= 1.0`. ## Version 0.5.5 (2018-12-10) ### Features * `skbio.stats.composition` now has methods to compute additive log-ratio transformation and inverse additive log-ratio transformation (`alr`, `alr_inv`) as well as a method to build a basis from a sequential binary partition (`sbp_basis`). ### Backward-incompatible changes [stable] ### Backward-incompatible changes [experimental] ### Performance enhancements ### Bug fixes ### Deprecated functionality [stable] ### Deprecated functionality [experimental] ### Miscellaneous * Python 3.6 and 3.7 compatibility is now supported * A pytest runner is shipped with every installation ([#1633](https://github.com/scikit-bio/scikit-bio/pull/1633)) * The nosetest framework has been replaced in favor of pytest ([#1624](https://github.com/scikit-bio/scikit-bio/pull/1624)) * The numpy docs are deprecated in favor of [Napoleon](http://www.sphinx-doc.org/en/master/usage/extensions/napoleon.html) ([#1629](https://github.com/scikit-bio/scikit-bio/pull/1629)) * This version is now compatible with numpy >= 1.17.0 and Pandas >= 0.23. ([#1627](https://github.com/scikit-bio/scikit-bio/pull/1627)) ## Version 0.5.4 (2018-08-23) ### Features * Added `FSVD`, an alternative fast heuristic method to perform Principal Coordinates Analysis, to `skbio.stats.ordination.pcoa`. ### Backward-incompatible changes [stable] ### Backward-incompatible changes [experimental] ### Performance enhancements * Added optimized utility methods `f_matrix_inplace` and `e_matrix_inplace` which perform `f_matrix` and `e_matrix` computations in-place and are used by the new `center_distance_matrix` method in `skbio.stats.ordination`. ### Bug fixes ### Deprecated functionality [stable] ### Deprecated functionality [experimental] ### Miscellaneous ## Version 0.5.3 (2018-08-07) ### Features * Added `unpack` and `unpack_by_func` methods to `skbio.tree.TreeNode` to unpack one or multiple internal nodes. The `unpack` operation removes an internal node and regrafts its children to its parent while retaining the overall length. ([#1572](https://github.com/scikit-bio/scikit-bio/pull/1572)) * Added `support` to `skbio.tree.TreeNode` to return the support value of a node. * Added `permdisp` to `skbio.stats.distance` to test for the homogeniety of groups. ([#1228](https://github.com/scikit-bio/scikit-bio/issues/1228)). * Added `pcoa_biplot` to `skbio.stats.ordination` to project descriptors into a PCoA plot. * Fixed pandas to 0.22.0 due to this: https://github.com/pandas-dev/pandas/issues/20527 ### Backward-incompatible changes [stable] ### Backward-incompatible changes [experimental] ### Performance enhancements ### Bug fixes * Relaxing type checking in diversity calculations. ([#1583](https://github.com/scikit-bio/scikit-bio/issues/1583)). ### Deprecated functionality [stable] ### Deprecated functionality [experimental] ### Miscellaneous ## Version 0.5.2 (2018-04-18) ### Features * Added ``skbio.io.format.embl`` for reading and writing EMBL files for ``DNA``, ``RNA`` and ``Sequence`` classes. * Removing ValueError check in `skbio.stats._subsample.subsample_counts` when `replace=True` and `n` is greater than the number of items in counts. [#1527](https://github.com/scikit-bio/scikit-bio/pull/1527) * Added ``skbio.io.format.gff3`` for reading and writing GFF3 files for ``DNA``, ``Sequence``, and ``IntervalMetadata`` classes. ([#1450](https://github.com/scikit-bio/scikit-bio/pull/1450)) * `skbio.metadata.IntervalMetadata` constructor has a new keyword argument, `copy_from`, for creating an `IntervalMetadata` object from an existing `IntervalMetadata` object with specified `upper_bound`. * `skbio.metadata.IntervalMetadata` constructor allows `None` as a valid value for `upper_bound`. An `upper_bound` of `None` means that the `IntervalMetadata` object has no upper bound. * `skbio.metadata.IntervalMetadata.drop` has a new boolean parameter `negate` to indicate whether to drop or keep the specified `Interval` objects. ### Backward-incompatible changes [stable] ### Backward-incompatible changes [experimental] ### Performance enhancements * `skbio.tree.nj` wall-clock runtime was decreased by 99% for a 500x500 distance matrix and 93% for a 100x100 distance matrix. ([#1512](https://github.com/scikit-bio/scikit-bio/pull/1512), [#1513](https://github.com/scikit-bio/scikit-bio/pull/1513)) ### Bug fixes * The `include_self` parameter was not being honored in `skbio.TreeNode.tips`. The scope of this bug was that if `TreeNode.tips` was called on a tip, it would always result in an empty `list` when unrolled. * In `skbio.stats.ordination.ca`, `proportion_explained` was missing in the returned `OrdinationResults` object. ([#1345](https://github.com/scikit-bio/scikit-bio/issues/1345)) * `skbio.diversity.beta_diversity` now handles qualitative metrics as expected such that `beta_diversity('jaccard', mat) == beta_diversity('jaccard', mat > 0)`. Please see [#1549](https://github.com/scikit-bio/scikit-bio/issues/1549) for further detail. * `skbio.stats.ordination.rda` The occasional column mismatch in output `biplot_scores` is fixed ([#1519](https://github.com/scikit-bio/scikit-bio/issues/1519)). ### Deprecated functionality [stable] ### Deprecated functionality [experimental] ### Miscellaneous * scikit-bio now depends on pandas >= 0.19.2, and is compatible with newer pandas versions (e.g. 0.20.3) that were previously incompatible. * scikit-bio now depends on `numpy >= 1.17.0, < 1.14.0` for compatibility with Python 3.4, 3.5, and 3.6 and the available numpy conda packages in `defaults` and `conda-forge` channels. * added support for running tests from `setup.py`. Both `python setup.py nosetests` and `python setup.py test` are now supported, however `python setup.py test` will only run a subset of the full test suite. ([#1341](https://github.com/scikit-bio/scikit-bio/issues/1341)) ## Version 0.5.1 (2016-11-12) ### Features * Added `IntervalMetadata` and `Interval` classes in `skbio.metadata` to store, query, and manipulate information of a sub-region of a sequence. ([#1414](https://github.com/scikit-bio/scikit-bio/issues/1414)) * `Sequence` and its child classes (including `GrammaredSequence`, `RNA`, `DNA`, `Protein`) now accept `IntervalMetadata` in their constructor API. Some of their relevant methods are also updated accordingly. ([#1430](https://github.com/scikit-bio/scikit-bio/pull/1430)) * GenBank parser now reads and writes `Sequence` or its subclass objects with `IntervalMetadata`. ([#1440](https://github.com/scikit-bio/scikit-bio/pull/1440)) * `DissimilarityMatrix` now has a new constructor method called `from_iterable`. ([#1343](https://github.com/scikit-bio/scikit-bio/issues/1343)). * `DissimilarityMatrix` now allows non-hollow matrices. ([#1343](https://github.com/scikit-bio/scikit-bio/issues/1343)). * `DistanceMatrix.from_iterable` now accepts a `validate=True` parameter. ([#1343](https://github.com/scikit-bio/scikit-bio/issues/1343)). * ``DistanceMatrix`` now has a new method called ``to_series`` to create a ``pandas.Series`` from a ``DistanceMatrix`` ([#1397](https://github.com/scikit-bio/scikit-bio/issues/1397)). * Added parallel beta diversity calculation support via `skbio.diversity.block_beta_diversity`. The issue and idea is discussed in ([#1181](https://github.com/scikit-bio/scikit-bio/issues/1181), while the actual code changes are in [#1352](https://github.com/scikit-bio/scikit-bio/pull/1352)). ### Backward-incompatible changes [stable] * The constructor API for `Sequence` and its child classes (including `GrammaredSequence`, `RNA`, `DNA`, `Protein`) are changed from `(sequence, metadata=None, positional_metadata=None, lowercase=False)` to `(sequence, metadata=None, positional_metadata=None, interval_metadata=None, lowercase=False)` The changes are made to allow these classes to adopt `IntervalMetadata` object for interval features on the sequence. The `interval_metadata` parameter is added imediately after `positional_metadata` instead of appended to the end, because it is more natural and logical and, more importantly, because it is unlikely in practice to break user code. A user's code would break only if they had supplied `metadata`, `postional_metadata`, and `lowercase` parameters positionally. In the unlikely event that this happens, users will get an error telling them a bool isn't a valid `IntervalMetadata` type, so it won't silently produce buggy behavior. ### Backward-incompatible changes [experimental] * Modifying basis handling in `skbio.stats.composition.ilr_inv` prior to checking for orthogonality. Now the basis is strictly assumed to be in the Aitchison simplex. * `DistanceMatrix.from_iterable` default behavior is now to validate matrix by computing all pairwise distances. Pass `validate=False` to get the previous behavior (no validation, but faster execution).([#1343](https://github.com/scikit-bio/scikit-bio/issues/1343)). * GenBank I/O now parses sequence features into the attribute of `interval_metadata` instead of `positiona_metadata`. And the key of `FEATURES` is removed from `metadata` attribute. ### Performance enhancements * `TreeNode.shear` was rewritten for approximately a 25% performance increase. ([#1399](https://github.com/scikit-bio/scikit-bio/pull/1399)) * The `IntervalMetadata` allows dramatic decrease in memory usage in reading GenBank files of feature rich sequences. ([#1159](https://github.com/scikit-bio/scikit-bio/issues/1159)) ### Bug fixes * `skbio.tree.TreeNode.prune` and implicitly `skbio.tree.TreeNode.shear` were not handling a situation in which a parent was validly removed during pruning operations as may happen if the resulting subtree does not include the root. Previously, an `AttributeError` would raise as `parent` would be `None` in this situation. * numpy linking was fixed for installation under El Capitan. * A bug was introduced in #1398 into `TreeNode.prune` and fixed in #1416 in which, under the special case of a single descendent existing from the root, the resulting children parent references were not updated. The cause of the bug was a call made to `self.children.extend` as opposed to `self.extend` where the former is a `list.extend` without knowledge of the tree, while the latter is `TreeNode.extend` which is able to adjust references to `self.parent`. ### Miscellaneous * Removed deprecated functions from `skbio.util`: `is_casava_v180_or_later`, `remove_files`, and `create_dir`. * Removed deprecated `skbio.Sequence.copy` method. ## Version 0.5.0 (2016-06-14) **IMPORTANT**: scikit-bio is no longer compatible with Python 2. scikit-bio is compatible with Python 3.4 and later. ### Features * Added more descriptive error message to `skbio.io.registry` when attempting to read without specifying `into` and when there is no generator reader. ([#1326](https://github.com/scikit-bio/scikit-bio/issues/1326)) * Added support for reference tags to `skbio.io.format.stockholm` reader and writer. ([#1348](https://github.com/scikit-bio/scikit-bio/issues/1348)) * Expanded error message in `skbio.io.format.stockholm` reader when `constructor` is not passed, in order to provide better explanation to user. ([#1327](https://github.com/scikit-bio/scikit-bio/issues/1327)) * Added `skbio.sequence.distance.kmer_distance` for computing the kmer distance between two sequences. ([#913](https://github.com/scikit-bio/scikit-bio/issues/913)) * Added `skbio.sequence.Sequence.replace` for assigning a character to positions in a `Sequence`. ([#1222](https://github.com/scikit-bio/scikit-bio/issues/1222)) * Added support for `pandas.RangeIndex`, lowering the memory footprint of default integer index objects. `Sequence.positional_metadata` and `TabularMSA.positional_metadata` now use `pd.RangeIndex` as the positional metadata index. `TabularMSA` now uses `pd.RangeIndex` as the default index. Usage of `pd.RangeIndex` over the previous `pd.Int64Index` [should be transparent](http://pandas.pydata.org/pandas-docs/version/0.18.0/whatsnew.html#range-index), so these changes should be non-breaking to users. scikit-bio now depends on pandas >= 0.18.0 ([#1308](https://github.com/scikit-bio/scikit-bio/issues/1308)) * Added `reset_index=False` parameter to `TabularMSA.append` and `TabularMSA.extend` for resetting the MSA's index to the default index after appending/extending. * Added support for partial pairwise calculations via `skbio.diversity.partial_beta_diversity`. ([#1221](https://github.com/scikit-bio/scikit-bio/issues/1221), [#1337](https://github.com/scikit-bio/scikit-bio/pull/1337)). This function is immediately deprecated as its return type will change in the future and should be used with caution in its present form (see the function's documentation for details). * `TemporaryFile` and `NamedTemporaryFile` are now supported IO sources for `skbio.io` and related functionality. ([#1291](https://github.com/scikit-bio/scikit-bio/issues/1291)) * Added `tree_node_class=TreeNode` parameter to `skbio.tree.majority_rule` to support returning consensus trees of type `TreeNode` (the default) or a type that has the same interface as `TreeNode` (e.g. `TreeNode` subclasses) ([#1193](https://github.com/scikit-bio/scikit-bio/pull/1193)) * `TreeNode.from_linkage_matrix` and `TreeNode.from_taxonomy` now support constructing `TreeNode` subclasses. `TreeNode.bifurcate` now supports `TreeNode` subclasses ([#1193](https://github.com/scikit-bio/scikit-bio/pull/1193)) * The `ignore_metadata` keyword has been added to `TabularMSA.iter_positions` to improve performance when metadata is not necessary. * Pairwise aligners in `skbio.alignment` now propagate per-sequence `metadata` objects (this does not include `positional_metadata`). ### Backward-incompatible changes [stable] ### Backward-incompatible changes [experimental] * `TabularMSA.append` and `TabularMSA.extend` now require one of `minter`, `index`, or `reset_index` to be provided when incorporating new sequences into an MSA. Previous behavior was to auto-increment the index labels if `minter` and `index` weren't provided and the MSA had a default integer index, otherwise error. Use `reset_index=True` to obtain the previous behavior in a more explicit way. * `skbio.stats.composition.ancom` now returns two `pd.DataFrame` objects, where it previously returned one. The first contains the ANCOM test results, as before, and the second contains percentile abundances of each feature in each group. The specific percentiles that are computed and returned is controlled by the new `percentiles` parameter to `skbio.stats.composition.ancom`. In the future, this second `pd.DataFrame` will not be returned by this function, but will be available through the [contingency table API](https://github.com/scikit-bio/scikit-bio/issues/848). ([#1293](https://github.com/scikit-bio/scikit-bio/issues/1293)) * `skbio.stats.composition.ancom` now performs multiple comparisons correction by default. The previous behavior of not performing multiple comparisons correction can be achieved by passing ``multiple_comparisons_correction=None``. * The ``reject`` column in the first ``pd.DataFrame`` returned from `skbio.stats.composition.ancom` has been renamed ``Reject null hypothesis`` for clarity. ([#1375](https://github.com/scikit-bio/scikit-bio/issues/1375)) ### Bug fixes * Fixed row and column names to `biplot_scores` in the `OrdinationResults` object from `skbio.stats.ordination`. This fix affect the `cca` and `rda` methods. ([#1322](https://github.com/scikit-bio/scikit-bio/issues/1322)) * Fixed bug when using `skbio.io.format.stockholm` reader on file with multi-line tree with no id. Previously this raised an `AttributeError`, now it correctly handles this type of tree. ([#1334](https://github.com/scikit-bio/scikit-bio/issues/1334)) * Fixed bug when reading Stockholm files with GF or GS features split over multiple lines. Previously, the feature text was simply concatenated because it was assumed to have trailing whitespace. There are examples of Stockholm files with and without trailing whitespace for multi-line features, so the `skbio.io.format.stockholm` reader now adds a single space when concatenating feature text without trailing whitespace to avoid joining words together. Multi-line trees stored as GF metadata are concatenated as they appear in the file; a space is not added when concatenating. ([#1328](https://github.com/scikit-bio/scikit-bio/issues/1328)) * Fixed bug when using `Sequence.iter_kmers` on empty `Sequence` object. Previously this raised a `ValueError`, now it returns an empty generator. * Fixed minor bug where adding sequences to an empty `TabularMSA` with MSA-wide `positional_metadata` would result in a `TabularMSA` object in an inconsistent state. This could happen using `TabularMSA.append` or `TabularMSA.extend`. This bug only affects a `TabularMSA` object *without* sequences that has MSA-wide `positional_metadata` (for example, `TabularMSA([], positional_metadata={'column': []})`). * `TreeNode.distance` now handles the situation in which `self` or `other` are ancestors. Previosly, a node further up the tree was used resulting in inflated distances. ([#807](https://github.com/scikit-bio/scikit-bio/issues/807)) * `TreeNode.prune` can now handle a root with a single descendent. Previously, the root was ignored from possibly having a single descendent. ([#1247](https://github.com/scikit-bio/scikit-bio/issues/1247)) * Providing the `format` keyword to `skbio.io.read` when creating a generator with an empty file will now return an empty generator instead of raising `StopIteration`. ([#1313](https://github.com/scikit-bio/scikit-bio/issues/1313)) * `OrdinationResults` is now importable from `skbio` and `skbio.stats.ordination` and correctly linked from the documentation ([#1205](https://github.com/scikit-bio/scikit-bio/issues/1205)) * Fixed performance bug in pairwise aligners resulting in 100x worse performance than in 0.2.4. ### Deprecated functionality [stable] * Deprecated use of the term "non-degenerate", in favor of "definite". `GrammaredSequence.nondegenerate_chars`, `GrammaredSequence.nondegenerates`, and `GrammaredSequence.has_nondegenerates` have been renamed to `GrammaredSequence.definite_chars`, `GrammaredSequence.definites`, and `GrammaredSequence.has_definites`, respectively. The old names will be removed in scikit-bio 0.5.2. Relevant affected public classes include `GrammaredSequence`, `DNA`, `RNA`, and `Protein`. ### Deprecated functionality [experimental] * Deprecated function `skbio.util.create_dir`. This function will be removed in scikit-bio 0.5.1. Please use the Python standard library functionality described [here](https://docs.python.org/2/library/os.html#os.makedirs). ([#833](https://github.com/scikit-bio/scikit-bio/issues/833)) * Deprecated function `skbio.util.remove_files`. This function will be removed in scikit-bio 0.5.1. Please use the Python standard library functionality described [here](https://docs.python.org/2/library/os.html#os.remove). ([#833](https://github.com/scikit-bio/scikit-bio/issues/833)) * Deprecated function `skbio.util.is_casava_v180_or_later`. This function will be removed in 0.5.1. Functionality moved to FASTQ sniffer. ([#833](https://github.com/scikit-bio/scikit-bio/issues/833)) ### Miscellaneous * When installing scikit-bio via `pip`, numpy must now be installed first ([#1296](https://github.com/scikit-bio/scikit-bio/issues/1296)) ## Version 0.4.2 (2016-02-17) Minor maintenance release. **This is the last Python 2.7 compatible release. Future scikit-bio releases will only support Python 3.** ### Features * Added `skbio.tree.TreeNode.bifurcate` for converting multifurcating trees into bifurcating trees. ([#896](https://github.com/scikit-bio/scikit-bio/issues/896)) * Added `skbio.io.format.stockholm` for reading Stockholm files into a `TabularMSA` and writing from a `TabularMSA`. ([#967](https://github.com/scikit-bio/scikit-bio/issues/967)) * scikit-bio `Sequence` objects have better compatibility with numpy. For example, calling `np.asarray(sequence)` now converts the sequence to a numpy array of characters (the same as calling `sequence.values`). * Added `skbio.sequence.distance` subpackage for computing distances between scikit-bio `Sequence` objects ([#913](https://github.com/scikit-bio/scikit-bio/issues/913)) * Added ``skbio.sequence.GrammaredSequence``, which can be inherited from to create grammared sequences with custom alphabets (e.g., for use with TabularMSA) ([#1175](https://github.com/scikit-bio/scikit-bio/issues/1175)) * Added ``skbio.util.classproperty`` decorator ### Backward-incompatible changes [stable] * When sniffing or reading a file (`skbio.io.sniff`, `skbio.io.read`, or the object-oriented `.read()` interface), passing `newline` as a keyword argument to `skbio.io.open` now raises a `TypeError`. This backward-incompatible change to a stable API is necessary because it fixes a bug (more details in bug fix section below). * When reading a FASTQ or QSEQ file and passing `variant='solexa'`, `ValueError` is now raised instead of `NotImplementedError`. This backward-incompatible change to a stable API is necessary to avoid creating a spin-locked process due to [a bug in Python](https://bugs.python.org/issue25786). See [#1256](https://github.com/scikit-bio/scikit-bio/issues/1256) for details. This change is temporary and will be reverted to `NotImplementedError` when the bug is fixed in Python. ### Backward-incompatible changes [experimental] * `skbio.io.format.genbank`: When reading GenBank files, the date field of the LOCUS line is no longer parsed into a `datetime.datetime` object and is left as a string. When writing GenBank files, the locus date metadata is expected to be a string instead of a `datetime.datetime` object ([#1153](https://github.com/scikit-bio/scikit-bio/issues/1153)) * `Sequence.distance` now converts the input sequence (`other`) to its type before passing both sequences to `metric`. Previous behavior was to always convert to `Sequence`. ### Bug fixes * Fixed bug when using `Sequence.distance` or `DistanceMatrix.from_iterable` to compute distances between `Sequence` objects with differing `metadata`/`positional_metadata` and passing `metric=scipy.spatial.distance.hamming` ([#1254](https://github.com/scikit-bio/scikit-bio/issues/1254)) * Fixed performance bug when computing Hamming distances between `Sequence` objects in `DistanceMatrix.from_iterable` ([#1250](https://github.com/scikit-bio/scikit-bio/issues/1250)) * Changed `skbio.stats.composition.multiplicative_replacement` to raise an error whenever a large value of `delta` is chosen ([#1241](https://github.com/scikit-bio/scikit-bio/issues/1241)) * When sniffing or reading a file (`skbio.io.sniff`, `skbio.io.read`, or the object-oriented `.read()` interface), passing `newline` as a keyword argument to `skbio.io.open` now raises a `TypeError`. The file format's `newline` character will be used when opening the file. Previous behavior allowed overriding the format's `newline` character but this could cause issues with readers that assume newline characters are those defined by the file format (which is an entirely reasonable assumption). This bug is very unlikely to have surfaced in practice as the default `newline` behavior is *universal newlines mode*. * DNA, RNA, and Protein are no longer inheritable because they assume an IUPAC alphabet. * `DistanceMatrix` constructor provides more informative error message when data contains NaNs ([#1276](https://github.com/scikit-bio/scikit-bio/issues/1276)) ### Miscellaneous * Warnings raised by scikit-bio now share a common subclass ``skbio.util.SkbioWarning``. ## Version 0.4.1 (2015-12-09) ### Features * The ``TabularMSA`` object was added to represent and operate on tabular multiple sequence alignments. This satisfies [RFC 1](https://github.com/scikit-bio/scikit-bio-rfcs/blob/master/active/001-tabular-msa.md). See the ``TabularMSA`` docs for full details. * Added phylogenetic diversity metrics, including weighted UniFrac, unweighted UniFrac, and Faith's Phylogenetic Diversity. These are accessible as ``skbio.diversity.beta.unweighted_unifrac``, ``skbio.diversity.beta.weighted_unifrac``, and ``skbio.diversity.alpha.faith_pd``, respectively. * Addition of the function ``skbio.diversity.alpha_diversity`` to support applying an alpha diversity metric to multiple samples in one call. * Addition of the functions ``skbio.diversity.get_alpha_diversity_metrics`` and ``skbio.diversity.get_beta_diversity_metrics`` to support discovery of the alpha and beta diversity metrics implemented in scikit-bio. * Added `skbio.stats.composition.ancom` function, a test for OTU differential abundance across sample categories. ([#1054](https://github.com/scikit-bio/scikit-bio/issues/1054)) * Added `skbio.io.format.blast7` for reading BLAST+ output format 7 or BLAST output format 9 files into a `pd.DataFrame`. ([#1110](https://github.com/scikit-bio/scikit-bio/issues/1110)) * Added `skbio.DissimilarityMatrix.to_data_frame` method for creating a ``pandas.DataFrame`` from a `DissimilarityMatrix` or `DistanceMatrix`. ([#757](https://github.com/scikit-bio/scikit-bio/issues/757)) * Added support for one-dimensional vector of dissimilarities in `skbio.stats.distance.DissimilarityMatrix` constructor. ([#6240](https://github.com/scikit-bio/scikit-bio/issues/624)) * Added `skbio.io.format.blast6` for reading BLAST+ output format 6 or BLAST output format 8 files into a `pd.DataFrame`. ([#1110](https://github.com/scikit-bio/scikit-bio/issues/1110)) * Added `inner`, `ilr`, `ilr_inv` and `clr_inv`, ``skbio.stats.composition``, which enables linear transformations on compositions ([#892](https://github.com/scikit-bio/scikit-bio/issues/892) * Added ``skbio.diversity.alpha.pielou_e`` function as an evenness metric of alpha diversity. ([#1068](https://github.com/scikit-bio/scikit-bio/issues/1068)) * Added `to_regex` method to `skbio.sequence._iupac_sequence` ABC - it returns a regex object that matches all non-degenerate versions of the sequence. * Added ``skbio.util.assert_ordination_results_equal`` function for comparing ``OrdinationResults`` objects in unit tests. * Added ``skbio.io.format.genbank`` for reading and writing GenBank/GenPept for ``DNA``, ``RNA``, ``Protein`` and ``Sequence`` classes. * Added ``skbio.util.RepresentationWarning`` for warning about substitutions, assumptions, or particular alterations that were made for the successful completion of a process. * ``TreeNode.tip_tip_distances`` now supports nodes without an associated length. In this case, a length of 0.0 is assumed and an ``skbio.util.RepresentationWarning`` is raised. Previous behavior was to raise a ``NoLengthError``. ([#791](https://github.com/scikit-bio/scikit-bio/issues/791)) * ``DistanceMatrix`` now has a new constructor method called `from_iterable`. * ``Sequence`` now accepts ``lowercase`` keyword like ``DNA`` and others. Updated ``fasta``, ``fastq``, and ``qseq`` readers/writers for ``Sequence`` to reflect this. * The ``lowercase`` method has been moved up to ``Sequence`` meaning all sequence objects now have a ``lowercase`` method. * Added ``reverse_transcribe`` class method to ``RNA``. * Added `Sequence.observed_chars` property for obtaining the set of observed characters in a sequence. ([#1075](https://github.com/scikit-bio/scikit-bio/issues/1075)) * Added `Sequence.frequencies` method for computing character frequencies in a sequence. ([#1074](https://github.com/scikit-bio/scikit-bio/issues/1074)) * Added experimental class-method ``Sequence.concat`` which will produce a new sequence from an iterable of existing sequences. Parameters control how positional metadata is propagated during a concatenation. * ``TreeNode.to_array`` now supports replacing ``nan`` branch lengths in the resulting branch length vector with the value provided as ``nan_length_value``. * ``skbio.io.format.phylip`` now supports sniffing and reading strict, sequential PHYLIP-formatted files into ``skbio.Alignment`` objects. ([#1006](https://github.com/scikit-bio/scikit-bio/issues/1006)) * Added `default_gap_char` class property to ``DNA``, ``RNA``, and ``Protein`` for representing gap characters in a new sequence. ### Backward-incompatible changes [stable] * `Sequence.kmer_frequencies` now returns a `dict`. Previous behavior was to return a `collections.Counter` if `relative=False` was passed, and a `collections.defaultdict` if `relative=True` was passed. In the case of a missing key, the `Counter` would return 0 and the `defaultdict` would return 0.0. Because the return type is now always a `dict`, attempting to access a missing key will raise a `KeyError`. This change *may* break backwards-compatibility depending on how the `Counter`/`defaultdict` is being used. We hope that in most cases this change will not break backwards-compatibility because both `Counter` and `defaultdict` are `dict` subclasses. If the previous behavior is desired, convert the `dict` into a `Counter`/`defaultdict`: ```python import collections from skbio import Sequence seq = Sequence('ACCGAGTTTAACCGAATA') # Counter freqs_dict = seq.kmer_frequencies(k=8) freqs_counter = collections.Counter(freqs_dict) # defaultdict freqs_dict = seq.kmer_frequencies(k=8, relative=True) freqs_default_dict = collections.defaultdict(float, freqs_dict) ``` **Rationale:** We believe it is safer to return `dict` instead of `Counter`/`defaultdict` as this may prevent error-prone usage of the return value. Previous behavior allowed accessing missing kmers, returning 0 or 0.0 depending on the `relative` parameter. This is convenient in many cases but also potentially misleading. For example, consider the following code: ```python from skbio import Sequence seq = Sequence('ACCGAGTTTAACCGAATA') freqs = seq.kmer_frequencies(k=8) freqs['ACCGA'] ``` Previous behavior would return 0 because the kmer `'ACCGA'` is not present in the `Counter`. In one respect this is the correct answer because we asked for kmers of length 8; `'ACCGA'` is a different length so it is not included in the results. However, we believe it is safer to avoid this implicit behavior in case the user assumes there are no `'ACCGA'` kmers in the sequence (which there are!). A `KeyError` in this case is more explicit and forces the user to consider their query. Returning a `dict` will also be consistent with `Sequence.frequencies`. ### Backward-incompatible changes [experimental] * Replaced ``PCoA``, ``CCA``, ``CA`` and ``RDA`` in ``skbio.stats.ordination`` with equivalent functions ``pcoa``, ``cca``, ``ca`` and ``rda``. These functions now take ``pd.DataFrame`` objects. * Change ``OrdinationResults`` to have its attributes based on ``pd.DataFrame`` and ``pd.Series`` objects, instead of pairs of identifiers and values. The changes are as follows: - ``species`` and ``species_ids`` have been replaced by a ``pd.DataFrame`` named ``features``. - ``site`` and ``site_ids`` have been replaced by a ``pd.DataFrame`` named ``samples``. - ``eigvals`` is now a ``pd.Series`` object. - ``proportion_explained`` is now a ``pd.Series`` object. - ``biplot`` is now a ``pd.DataFrame`` object named ``biplot_scores``. - ``site_constraints`` is now a ``pd.DataFrame`` object named ``sample_constraints``. * ``short_method_name`` and ``long_method_name`` are now required arguments of the ``OrdinationResults`` object. * Removed `skbio.diversity.alpha.equitability`. Please use `skbio.diversity.alpha.pielou_e`, which is more accurately named and better documented. Note that `equitability` by default used logarithm base 2 while `pielou_e` uses logarithm base `e` as described in Heip 1974. * ``skbio.diversity.beta.pw_distances`` is now called ``skbio.diversity.beta_diversity``. This function no longer defines a default metric, and ``metric`` is now the first argument to this function. This function can also now take a pairwise distances function as ``pairwise_func``. * Deprecated function ``skbio.diversity.beta.pw_distances_from_table`` has been removed from scikit-bio as scheduled. Code that used this should be adapted to use ``skbio.diversity.beta_diversity``. * ``TreeNode.index_tree`` now returns a 2-D numpy array as its second return value (the child node index) instead of a 1-D numpy array. * Deprecated functions `skbio.draw.boxplots` and `skbio.draw.grouped_distributions` have been removed from scikit-bio as scheduled. These functions generated plots that were not specific to bioinformatics. These types of plots can be generated with seaborn or another general-purpose plotting package. * Deprecated function `skbio.stats.power.bootstrap_power_curve` has been removed from scikit-bio as scheduled. Use `skbio.stats.power.subsample_power` or `skbio.stats.power.subsample_paired_power` followed by `skbio.stats.power.confidence_bound`. * Deprecated function `skbio.stats.spatial.procrustes` has been removed from scikit-bio as scheduled in favor of `scipy.spatial.procrustes`. * Deprecated class `skbio.tree.CompressedTrie` and function `skbio.tree.fasta_to_pairlist` have been removed from scikit-bio as scheduled in favor of existing general-purpose Python trie packages. * Deprecated function `skbio.util.flatten` has been removed from scikit-bio as scheduled in favor of solutions available in the Python standard library (see [here](http://stackoverflow.com/a/952952/3639023) and [here](http://stackoverflow.com/a/406199/3639023) for examples). * Pairwise alignment functions in `skbio.alignment` now return a tuple containing the `TabularMSA` alignment, alignment score, and start/end positions. The returned `TabularMSA`'s `index` is always the default integer index; sequence IDs are no longer propagated to the MSA. Additionally, the pairwise alignment functions now accept the following input types to align: - `local_pairwise_align_nucleotide`: `DNA` or `RNA` - `local_pairwise_align_protein`: `Protein` - `local_pairwise_align`: `IUPACSequence` - `global_pairwise_align_nucleotide`: `DNA`, `RNA`, or `TabularMSA[DNA|RNA]` - `global_pairwise_align_protein`: `Protein` or `TabularMSA[Protein]` - `global_pairwise_align`: `IUPACSequence` or `TabularMSA` - `local_pairwise_align_ssw`: `DNA`, `RNA`, or `Protein`. Additionally, this function now overrides the `protein` kwarg based on input type. `constructor` parameter was removed because the function now determines the return type based on input type. * Removed `skbio.alignment.SequenceCollection` in favor of using a list or other standard library containers to store scikit-bio sequence objects (most `SequenceCollection` operations were simple list comprehensions). Use `DistanceMatrix.from_iterable` instead of `SequenceCollection.distances` (pass `key="id"` to exactly match original behavior). * Removed `skbio.alignment.Alignment` in favor of `skbio.alignment.TabularMSA`. * Removed `skbio.alignment.SequenceCollectionError` and `skbio.alignment.AlignmentError` exceptions as their corresponding classes no longer exist. ### Bug Fixes * ``Sequence`` objects now handle slicing of empty positional metadata correctly. Any metadata that is empty will no longer be propagated by the internal ``_to`` constructor. ([#1133](https://github.com/scikit-bio/scikit-bio/issues/1133)) * ``DissimilarityMatrix.plot()`` no longer leaves a white border around the heatmap it plots (PR #1070). * TreeNode.root_at_midpoint`` no longer fails when a node with two equal length child branches exists in the tree. ([#1077](https://github.com/scikit-bio/scikit-bio/issues/1077)) * ``TreeNode._set_max_distance``, as called through ``TreeNode.get_max_distance`` or ``TreeNode.root_at_midpoint`` would store distance information as ``list``s in the attribute ``MaxDistTips`` on each node in the tree, however, these distances were only valid for the node in which the call to ``_set_max_distance`` was made. The values contained in ``MaxDistTips`` are now correct across the tree following a call to ``get_max_distance``. The scope of impact of this bug is limited to users that were interacting directly with ``MaxDistTips`` on descendant nodes; this bug does not impact any known method within scikit-bio. ([#1223](https://github.com/scikit-bio/scikit-bio/issues/1223)) * Added missing `nose` dependency to setup.py's `install_requires`. ([#1214](https://github.com/scikit-bio/scikit-bio/issues/1214)) * Fixed issue that resulted in legends of ``OrdinationResult`` plots sometimes being truncated. ([#1210](https://github.com/scikit-bio/scikit-bio/issues/1210)) ### Deprecated functionality [stable] * `skbio.Sequence.copy` has been deprecated in favor of `copy.copy(seq)` and `copy.deepcopy(seq)`. ### Miscellaneous * Doctests are now written in Python 3. * ``make test`` now validates MANIFEST.in using [check-manifest](https://github.com/mgedmin/check-manifest). ([#461](https://github.com/scikit-bio/scikit-bio/issues/461)) * Many new alpha diversity equations added to ``skbio.diversity.alpha`` documentation. ([#321](https://github.com/scikit-bio/scikit-bio/issues/321)) * Order of ``lowercase`` and ``validate`` keywords swapped in ``DNA``, ``RNA``, and ``Protein``. ## Version 0.4.0 (2015-07-08) Initial beta release. In addition to the changes detailed below, the following subpackages have been mostly or entirely rewritten and most of their APIs are substantially different (and improved!): * `skbio.sequence` * `skbio.io` The APIs of these subpackages are now stable, and all others are experimental. See the [API stability docs](https://github.com/scikit-bio/scikit-bio/tree/0.4.0/doc/source/user/api_stability.rst) for more details, including what we mean by *stable* and *experimental* in this context. We recognize that this is a lot of backward-incompatible changes. To avoid these types of changes being a surprise to our users, our public APIs are now decorated to make it clear to developers when an API can be relied upon (stable) and when it may be subject to change (experimental). ### Features * Added `skbio.stats.composition` for analyzing data made up of proportions * Added new ``skbio.stats.evolve`` subpackage for evolutionary statistics. Currently contains a single function, ``hommola_cospeciation``, which implements a permutation-based test of correlation between two distance matrices. * Added support for ``skbio.io.util.open_file`` and ``skbio.io.util.open_files`` to pull files from HTTP and HTTPS URLs. This behavior propagates to the I/O registry. * FASTA/QUAL (``skbio.io.format.fasta``) and FASTQ (``skbio.io.format.fastq``) readers now allow blank or whitespace-only lines at the beginning of the file, between records, or at the end of the file. A blank or whitespace-only line in any other location will continue to raise an error [#781](https://github.com/scikit-bio/scikit-bio/issues/781). * scikit-bio now ignores leading and trailing whitespace characters on each line while reading FASTA/QUAL and FASTQ files. * Added `ratio` parameter to `skbio.stats.power.subsample_power`. This allows the user to calculate power on groups for uneven size (For example, draw twice as many samples from Group B than Group A). If `ratio` is not set, group sizes will remain equal across all groups. * Power calculations (`skbio.stats.power.subsample_power` and `skbio.stats.power.subsample_paired_power`) can use test functions that return multiple p values, like some multivariate linear regression models. Previously, the power calculations required the test to return a single p value. * Added ``skbio.util.assert_data_frame_almost_equal`` function for comparing ``pd.DataFrame`` objects in unit tests. ### Performance enhancements * The speed of quality score decoding has been significantly improved (~2x) when reading `fastq` files. * The speed of `NucleotideSequence.reverse_complement` has been improved (~6x). ### Bug fixes * Changed `Sequence.distance` to raise an error any time two sequences are passed of different lengths regardless of the `distance_fn` being passed. [(#514)](https://github.com/scikit-bio/scikit-bio/issues/514) * Fixed issue with ``TreeNode.extend`` where if given the children of another ``TreeNode`` object (``tree.children``), both trees would be left in an incorrect and unpredictable state. ([#889](https://github.com/scikit-bio/scikit-bio/issues/889)) * Changed the way power was calculated in `subsample_paired_power` to move the subsample selection before the test is performed. This increases the number of Monte Carlo simulations performed during power estimation, and improves the accuracy of the returned estimate. Previous power estimates from `subsample_paired_power` should be disregarded and re-calculated. ([#910](https://github.com/scikit-bio/scikit-bio/issues/910)) * Fixed issue where `randdm` was attempting to create asymmetric distance matrices.This was causing an error to be raised by the `DistanceMatrix` constructor inside of the `randdm` function, so that `randdm` would fail when attempting to create large distance matrices. ([#943](https://github.com/scikit-bio/scikit-bio/issues/943)) ### Deprecated functionality * Deprecated `skbio.util.flatten`. This function will be removed in scikit-bio 0.3.1. Please use standard python library functionality described here [Making a flat list out of lists of lists](http://stackoverflow.com/a/952952/3639023), [Flattening a shallow list](http://stackoverflow.com/a/406199/3639023) ([#833](https://github.com/scikit-bio/scikit-bio/issues/833)) * Deprecated `skbio.stats.power.bootstrap_power_curve` will be removed in scikit-bio 0.4.1. It is deprecated in favor of using ``subsample_power`` or ``sample_paired_power`` to calculate a power matrix, and then the use of ``confidence_bounds`` to calculate the average and confidence intervals. ### Backward-incompatible changes * Removed the following deprecated functionality: - `skbio.parse` subpackage, including `SequenceIterator`, `FastaIterator`, `FastqIterator`, `load`, `parse_fasta`, `parse_fastq`, `parse_qual`, `write_clustal`, `parse_clustal`, and `FastqParseError`; please use `skbio.io` instead. - `skbio.format` subpackage, including `fasta_from_sequence`, `fasta_from_alignment`, and `format_fastq_record`; please use `skbio.io` instead. - `skbio.alignment.SequenceCollection.int_map`; please use `SequenceCollection.update_ids` instead. - `skbio.alignment.SequenceCollection` methods `to_fasta` and `toFasta`; please use `SequenceCollection.write` instead. - `constructor` parameter in `skbio.alignment.Alignment.majority_consensus`; please convert returned biological sequence object manually as desired (e.g., `str(seq)`). - `skbio.alignment.Alignment.to_phylip`; please use `Alignment.write` instead. - `skbio.sequence.BiologicalSequence.to_fasta`; please use `BiologicalSequence.write` instead. - `skbio.tree.TreeNode` methods `from_newick`, `from_file`, and `to_newick`; please use `TreeNode.read` and `TreeNode.write` instead. - `skbio.stats.distance.DissimilarityMatrix` methods `from_file` and `to_file`; please use `DissimilarityMatrix.read` and `DissimilarityMatrix.write` instead. - `skbio.stats.ordination.OrdinationResults` methods `from_file` and `to_file`; please use `OrdinationResults.read` and `OrdinationResults.write` instead. - `skbio.stats.p_value_to_str`; there is no replacement. - `skbio.stats.subsample`; please use `skbio.stats.subsample_counts` instead. - `skbio.stats.distance.ANOSIM`; please use `skbio.stats.distance.anosim` instead. - `skbio.stats.distance.PERMANOVA`; please use `skbio.stats.distance.permanova` instead. - `skbio.stats.distance.CategoricalStatsResults`; there is no replacement, please use `skbio.stats.distance.anosim` or `skbio.stats.distance.permanova`, which will return a `pandas.Series` object. * `skbio.alignment.Alignment.majority_consensus` now returns `BiologicalSequence('')` if the alignment is empty. Previously, `''` was returned. * `min_observations` was removed from `skbio.stats.power.subsample_power` and `skbio.stats.power.subsample_paired_power`. The minimum number of samples for subsampling depends on the data set and statistical tests. Having a default parameter to set unnecessary limitations on the technique. ### Miscellaneous * Changed testing procedures - Developers should now use `make test` - Users can use `python -m skbio.test` - Added `skbio.util._testing.TestRunner` (available through `skbio.util.TestRunner`). Used to provide a `test` method for each module init file. This class represents a unified testing path which wraps all `skbio` testing functionality. - Autodetect Python version and disable doctests for Python 3. * `numpy` is no longer required to be installed before installing scikit-bio! * Upgraded checklist.py to check source files non-conforming to [new header style](http://scikit-bio.org/docs/latest/development/new_module.html). ([#855](https://github.com/scikit-bio/scikit-bio/issues/855)) * Updated to use `natsort` >= 4.0.0. * The method of subsampling was changed for ``skbio.stats.power.subsample_paired_power``. Rather than drawing a paired sample for the run and then subsampling for each count, the subsample is now drawn for each sample and each run. In test data, this did not significantly alter the power results. * checklist.py now enforces `__future__` imports in .py files. ## Version 0.2.3 (2015-02-13) ### Features * Modified ``skbio.stats.distance.pwmantel`` to accept a list of filepaths. This is useful as it allows for a smaller amount of memory consumption as it only loads two matrices at a time as opposed to requiring that all distance matrices are loaded into memory. * Added ``skbio.util.find_duplicates`` for finding duplicate elements in an iterable. ### Bug fixes * Fixed floating point precision bugs in ``Alignment.position_frequencies``, ``Alignment.position_entropies``, ``Alignment.omit_gap_positions``, ``Alignment.omit_gap_sequences``, ``BiologicalSequence.k_word_frequencies``, and ``SequenceCollection.k_word_frequencies`` ([#801](https://github.com/scikit-bio/scikit-bio/issues/801)). ### Backward-incompatible changes * Removed ``feature_types`` attribute from ``BiologicalSequence`` and all subclasses ([#797](https://github.com/scikit-bio/scikit-bio/pull/797)). * Removed ``find_features`` method from ``BiologicalSequence`` and ``ProteinSequence`` ([#797](https://github.com/scikit-bio/scikit-bio/pull/797)). * ``BiologicalSequence.k_word_frequencies`` now returns a ``collections.defaultdict`` of type ``float`` instead of type ``int``. This only affects the "default" case, when a key isn't present in the dictionary. Previous behavior would return ``0`` as an ``int``, while the new behavior is to return ``0.0`` as a ``float``. This change also affects the ``defaultdict``s that are returned by ``SequenceCollection.k_word_frequencies``. ### Miscellaneous * ``DissimilarityMatrix`` and ``DistanceMatrix`` now report duplicate IDs in the ``DissimilarityMatrixError`` message that can be raised during validation. ## Version 0.2.2 (2014-12-04) ### Features * Added ``plot`` method to ``skbio.stats.distance.DissimilarityMatrix`` for creating basic heatmaps of a dissimilarity/distance matrix (see [#684](https://github.com/scikit-bio/scikit-bio/issues/684)). Also added ``_repr_png_`` and ``_repr_svg_`` methods for automatic display in the IPython Notebook, with ``png`` and ``svg`` properties for direct access. * Added `__str__` method to `skbio.stats.ordination.OrdinationResults`. * Added ``skbio.stats.distance.anosim`` and ``skbio.stats.distance.permanova`` functions, which replace the ``skbio.stats.distance.ANOSIM`` and ``skbio.stats.distance.PERMANOVA`` classes. These new functions provide simpler procedural interfaces to running these statistical methods. They also provide more convenient access to results by returning a ``pandas.Series`` instead of a ``CategoricalStatsResults`` object. These functions have more extensive documentation than their previous versions. If significance tests are suppressed, p-values are returned as ``np.nan`` instead of ``None`` for consistency with other statistical methods in scikit-bio. [#754](https://github.com/scikit-bio/scikit-bio/issues/754) * Added `skbio.stats.power` for performing empirical power analysis. The module uses existing datasets and iteratively draws samples to estimate the number of samples needed to see a significant difference for a given critical value. * Added `skbio.stats.isubsample` for subsampling from an unknown number of values. This method supports subsampling from multiple partitions and does not require that all items be stored in memory, requiring approximately `O(N*M)`` space where `N` is the number of partitions and `M` is the maximum subsample size. * Added ``skbio.stats.subsample_counts``, which replaces ``skbio.stats.subsample``. See deprecation section below for more details ([#770](https://github.com/scikit-bio/scikit-bio/issues/770)). ### Bug fixes * Fixed issue where SSW wouldn't compile on i686 architectures ([#409](https://github.com/scikit-bio/scikit-bio/issues/409)). ### Deprecated functionality * Deprecated ``skbio.stats.p_value_to_str``. This function will be removed in scikit-bio 0.3.0. Permutation-based p-values in scikit-bio are calculated as ``(num_extreme + 1) / (num_permutations + 1)``, so it is impossible to obtain a p-value of zero. This function historically existed for correcting the number of digits displayed when obtaining a p-value of zero. Since this is no longer possible, this functionality will be removed. * Deprecated ``skbio.stats.distance.ANOSIM`` and ``skbio.stats.distance.PERMANOVA`` in favor of ``skbio.stats.distance.anosim`` and ``skbio.stats.distance.permanova``, respectively. * Deprecated ``skbio.stats.distance.CategoricalStatsResults`` in favor of using ``pandas.Series`` to store statistical method results. ``anosim`` and ``permanova`` return ``pandas.Series`` instead of ``CategoricalStatsResults``. * Deprecated ``skbio.stats.subsample`` in favor of ``skbio.stats.subsample_counts``, which provides an identical interface; only the function name has changed. ``skbio.stats.subsample`` will be removed in scikit-bio 0.3.0. ### Backward-incompatible changes * Deprecation warnings are now raised using ``DeprecationWarning`` instead of ``UserWarning`` ([#774](https://github.com/scikit-bio/scikit-bio/issues/774)). ### Miscellaneous * The ``pandas.DataFrame`` returned by ``skbio.stats.distance.pwmantel`` now stores p-values as floats and does not convert them to strings with a specific number of digits. p-values that were previously stored as "N/A" are now stored as ``np.nan`` for consistency with other statistical methods in scikit-bio. See note in "Deprecated functionality" above regarding ``p_value_to_str`` for details. * scikit-bio now supports versions of IPython < 2.0.0 ([#767](https://github.com/scikit-bio/scikit-bio/issues/767)). ## Version 0.2.1 (2014-10-27) This is an alpha release of scikit-bio. At this stage, major backwards-incompatible API changes can and will happen. Unified I/O with the scikit-bio I/O registry was the focus of this release. ### Features * Added ``strict`` and ``lookup`` optional parameters to ``skbio.stats.distance.mantel`` for handling reordering and matching of IDs when provided ``DistanceMatrix`` instances as input (these parameters were previously only available in ``skbio.stats.distance.pwmantel``). * ``skbio.stats.distance.pwmantel`` now accepts an iterable of ``array_like`` objects. Previously, only ``DistanceMatrix`` instances were allowed. * Added ``plot`` method to ``skbio.stats.ordination.OrdinationResults`` for creating basic 3-D matplotlib scatterplots of ordination results, optionally colored by metadata in a ``pandas.DataFrame`` (see [#518](https://github.com/scikit-bio/scikit-bio/issues/518)). Also added ``_repr_png_`` and ``_repr_svg_`` methods for automatic display in the IPython Notebook, with ``png`` and ``svg`` properties for direct access. * Added ``skbio.stats.ordination.assert_ordination_results_equal`` for comparing ``OrdinationResults`` objects for equality in unit tests. * ``BiologicalSequence`` (and its subclasses) now optionally store Phred quality scores. A biological sequence's quality scores are stored as a 1-D ``numpy.ndarray`` of nonnegative integers that is the same length as the biological sequence. Quality scores can be provided upon object instantiation via the keyword argument ``quality``, and can be retrieved via the ``BiologicalSequence.quality`` property. ``BiologicalSequence.has_quality`` is also provided for determining whether a biological sequence has quality scores or not. See [#616](https://github.com/scikit-bio/scikit-bio/issues/616) for more details. * Added ``BiologicalSequence.sequence`` property for retrieving the underlying string representing the sequence characters. This was previously (and still is) accessible via ``BiologicalSequence.__str__``. It is provided via a property for convenience and explicitness. * Added ``BiologicalSequence.equals`` for full control over equality testing of biological sequences. By default, biological sequences must have the same type, underlying sequence of characters, identifier, description, and quality scores to compare equal. These properties can be ignored via the keyword argument ``ignore``. The behavior of ``BiologicalSequence.__eq__``/``__ne__`` remains unchanged (only type and underlying sequence of characters are compared). * Added ``BiologicalSequence.copy`` for creating a copy of a biological sequence, optionally with one or more attributes updated. * ``BiologicalSequence.__getitem__`` now supports specifying a sequence of indices to take from the biological sequence. * Methods to read and write taxonomies are now available under ``skbio.tree.TreeNode.from_taxonomy`` and ``skbio.tree.TreeNode.to_taxonomy`` respectively. * Added ``SequenceCollection.update_ids``, which provides a flexible way of updating sequence IDs on a ``SequenceCollection`` or ``Alignment`` (note that a new object is returned, since instances of these classes are immutable). Deprecated ``SequenceCollection.int_map`` in favor of this new method; it will be removed in scikit-bio 0.3.0. * Added ``skbio.util.cardinal_to_ordinal`` for converting a cardinal number to ordinal string (e.g., useful for error messages). * New I/O Registry: supports multiple file formats, automatic file format detection when reading, unified procedural ``skbio.io.read`` and ``skbio.io.write`` in addition to OOP interfaces (``read/write`` methods) on the below objects. See ``skbio.io`` for more details. - Added "clustal" format support: * Has sniffer * Readers: ``Alignment`` * Writers: ``Alignment`` - Added "lsmat" format support: * Has sniffer * Readers: ``DissimilarityMatrix``, ``DistanceMatrix`` * Writers: ``DissimilarityMatrix``, ``DistanceMatrix`` - Added "ordination" format support: * Has sniffer * Readers: ``OrdinationResults`` * Writers: ``OrdinationResults`` - Added "newick" format support: * Has sniffer * Readers: ``TreeNode`` * Writers: ``TreeNode`` - Added "phylip" format support: * No sniffer * Readers: None * Writers: ``Alignment`` - Added "qseq" format support: * Has sniffer * Readers: generator of ``BiologicalSequence`` or its subclasses, ``SequenceCollection``, ``BiologicalSequence``, ``NucleotideSequence``, ``DNASequence``, ``RNASequence``, ``ProteinSequence`` * Writers: None - Added "fasta"/QUAL format support: * Has sniffer * Readers: generator of ``BiologicalSequence`` or its subclasses, ``SequenceCollection``, ``Alignment``, ``BiologicalSequence``, ``NucleotideSequence``, ``DNASequence``, ``RNASequence``, ``ProteinSequence`` * Writers: same as readers - Added "fastq" format support: * Has sniffer * Readers: generator of ``BiologicalSequence`` or its subclasses, ``SequenceCollection``, ``Alignment``, ``BiologicalSequence``, ``NucleotideSequence``, ``DNASequence``, ``RNASequence``, ``ProteinSequence`` * Writers: same as readers ### Bug fixes * Removed ``constructor`` parameter from ``Alignment.k_word_frequencies``, ``BiologicalSequence.k_words``, ``BiologicalSequence.k_word_counts``, and ``BiologicalSequence.k_word_frequencies`` as it had no effect (it was never hooked up in the underlying code). ``BiologicalSequence.k_words`` now returns a generator of ``BiologicalSequence`` objects instead of strings. * Modified the ``Alignment`` constructor to verify that all sequences have the same length, if not, raise an ``AlignmentError`` exception. Updated the method ``Alignment.subalignment`` to calculate the indices only once now that identical sequence length is guaranteed. ### Deprecated functionality * Deprecated ``constructor`` parameter in ``Alignment.majority_consensus`` in favor of having users call ``str`` on the returned ``BiologicalSequence``. This parameter will be removed in scikit-bio 0.3.0. * Existing I/O functionality deprecated in favor of I/O registry, old functionality will be removed in scikit-bio 0.3.0. All functionality can be found at ``skbio.io.read``, ``skbio.io.write``, and the methods listed below: * Deprecated the following "clustal" readers/writers: - ``write_clustal`` -> ``Alignment.write`` - ``parse_clustal`` -> ``Alignment.read`` * Deprecated the following distance matrix format ("lsmat") readers/writers: - ``DissimilarityMatrix.from_file`` -> ``DissimilarityMatrix.read`` - ``DissimilarityMatrix.to_file`` -> ``DissimilarityMatrix.write`` - ``DistanceMatrix.from_file`` -> ``DistanceMatrix.read`` - ``DistanceMatrix.to_file`` -> ``DistanceMatrix.write`` * Deprecated the following ordination format ("ordination") readers/writers: - ``OrdinationResults.from_file`` -> ``OrdinationResults.read`` - ``OrdinationResults.to_file`` -> ``OrdinationResults.write`` * Deprecated the following "newick" readers/writers: - ``TreeNode.from_file`` -> ``TreeNode.read`` - ``TreeNode.from_newick`` -> ``TreeNode.read`` - ``TreeNode.to_newick`` -> ``TreeNode.write`` * Deprecated the following "phylip" writers: - ``Alignment.to_phylip`` -> ``Alignment.write`` * Deprecated the following "fasta"/QUAL readers/writers: - ``SequenceCollection.from_fasta_records`` -> ``SequenceCollection.read`` - ``SequenceCollection.to_fasta`` -> ``SequenceCollection.write`` - ``fasta_from_sequences`` -> ``skbio.io.write(obj, into=, format='fasta')`` - ``fasta_from_alignment`` -> ``Alignment.write`` - ``parse_fasta`` -> ``skbio.io.read(, format='fasta')`` - ``parse_qual`` -> ``skbio.io.read(, format='fasta', qual=)`` - ``BiologicalSequence.to_fasta`` -> ``BiologicalSequence.write`` * Deprecated the following "fastq" readers/writers: - ``parse_fastq`` -> ``skbio.io.read(, format='fastq')`` - ``format_fastq_record`` -> ``skbio.io.write(, format='fastq')`` ### Backward-incompatible changes * ``skbio.stats.distance.mantel`` now returns a 3-element tuple containing correlation coefficient, p-value, and the number of matching rows/cols in the distance matrices (``n``). The return value was previously a 2-element tuple containing only the correlation coefficient and p-value. * ``skbio.stats.distance.mantel`` reorders input ``DistanceMatrix`` instances based on matching IDs (see optional parameters ``strict`` and ``lookup`` for controlling this behavior). In the past, ``DistanceMatrix`` instances were treated the same as ``array_like`` input and no reordering took place, regardless of ID (mis)matches. ``array_like`` input behavior remains the same. * If mismatched types are provided to ``skbio.stats.distance.mantel`` (e.g., a ``DistanceMatrix`` and ``array_like``), a ``TypeError`` will be raised. ### Miscellaneous * Added git timestamp checking to checklist.py, ensuring that when changes are made to Cython (.pyx) files, their corresponding generated C files are also updated. * Fixed performance bug when instantiating ``BiologicalSequence`` objects. The previous runtime scaled linearly with sequence length; it is now constant time when the sequence is already a string. See [#623](https://github.com/scikit-bio/scikit-bio/issues/623) for details. * IPython and six are now required dependencies. ## Version 0.2.0 (2014-08-07) This is an initial alpha release of scikit-bio. At this stage, major backwards-incompatible API changes can and will happen. Many backwards-incompatible API changes were made since the previous release. ### Features * Added ability to compute distances between sequences in a ``SequenceCollection`` object ([#509](https://github.com/scikit-bio/scikit-bio/issues/509)), and expanded ``Alignment.distance`` to allow the user to pass a function for computing distances (the default distance metric is still ``scipy.spatial.distance.hamming``) ([#194](https://github.com/scikit-bio/scikit-bio/issues/194)). * Added functionality to not penalize terminal gaps in global alignment. This functionality results in more biologically relevant global alignments (see [#537](https://github.com/scikit-bio/scikit-bio/issues/537) for discussion of the issue) and is now the default behavior for global alignment. * The python global aligners (``global_pairwise_align``, ``global_pairwise_align_nucleotide``, and ``global_pairwise_align_protein``) now support aligning pairs of sequences, pairs of alignments, and a sequence and an alignment (see [#550](https://github.com/scikit-bio/scikit-bio/issues/550)). This functionality supports progressive multiple sequence alignment, among other things such as adding a sequence to an existing alignment. * Added ``StockholmAlignment.to_file`` for writing Stockholm-formatted files. * Added ``strict=True`` optional parameter to ``DissimilarityMatrix.filter``. * Added ``TreeNode.find_all`` for finding all tree nodes that match a given name. ### Bug fixes * Fixed bug that resulted in a ``ValueError`` from ``local_align_pairwise_nucleotide`` (see [#504](https://github.com/scikit-bio/scikit-bio/issues/504)) under many circumstances. This would not generate incorrect results, but would cause the code to fail. ### Backward-incompatible changes * Removed ``skbio.math``, leaving ``stats`` and ``diversity`` to become top level packages. For example, instead of ``from skbio.math.stats.ordination import PCoA`` you would now import ``from skbio.stats.ordination import PCoA``. * The module ``skbio.math.gradient`` as well as the contents of ``skbio.math.subsample`` and ``skbio.math.stats.misc`` are now found in ``skbio.stats``. As an example, to import subsample: ``from skbio.stats import subsample``; to import everything from gradient: ``from skbio.stats.gradient import *``. * The contents of ``skbio.math.stats.ordination.utils`` are now in ``skbio.stats.ordination``. * Removed ``skbio.app`` subpackage (i.e., the *application controller framework*) as this code has been ported to the standalone [burrito](https://github.com/biocore/burrito) Python package. This code was not specific to bioinformatics and is useful for wrapping command-line applications in general. * Removed ``skbio.core``, leaving ``alignment``, ``genetic_code``, ``sequence``, ``tree``, and ``workflow`` to become top level packages. For example, instead of ``from skbio.core.sequence import DNA`` you would now import ``from skbio.sequence import DNA``. * Removed ``skbio.util.exception`` and ``skbio.util.warning`` (see [#577](https://github.com/scikit-bio/scikit-bio/issues/577) for the reasoning behind this change). The exceptions/warnings were moved to the following locations: - ``FileFormatError``, ``RecordError``, ``FieldError``, and ``EfficiencyWarning`` have been moved to ``skbio.util`` - ``BiologicalSequenceError`` has been moved to ``skbio.sequence`` - ``SequenceCollectionError`` and ``StockholmParseError`` have been moved to ``skbio.alignment`` - ``DissimilarityMatrixError``, ``DistanceMatrixError``, ``DissimilarityMatrixFormatError``, and ``MissingIDError`` have been moved to ``skbio.stats.distance`` - ``TreeError``, ``NoLengthError``, ``DuplicateNodeError``, ``MissingNodeError``, and ``NoParentError`` have been moved to ``skbio.tree`` - ``FastqParseError`` has been moved to ``skbio.parse.sequences`` - ``GeneticCodeError``, ``GeneticCodeInitError``, and ``InvalidCodonError`` have been moved to ``skbio.genetic_code`` * The contents of ``skbio.genetic_code`` formerly ``skbio.core.genetic_code`` are now in ``skbio.sequence``. The ``GeneticCodes`` dictionary is now a function ``genetic_code``. The functionality is the same, except that because this is now a function rather than a dict, retrieving a genetic code is done using a function call rather than a lookup (so, for example, ``GeneticCodes[2]`` becomes ``genetic_code(2)``. * Many submodules have been made private with the intention of simplifying imports for users. See [#562](https://github.com/scikit-bio/scikit-bio/issues/562) for discussion of this change. The following list contains the previous module name and where imports from that module should now come from. - ``skbio.alignment.ssw`` to ``skbio.alignment`` - ``skbio.alignment.alignment`` to ``skbio.alignment`` - ``skbio.alignment.pairwise`` to ``skbio.alignment`` - ``skbio.diversity.alpha.base`` to ``skbio.diversity.alpha`` - ``skbio.diversity.alpha.gini`` to ``skbio.diversity.alpha`` - ``skbio.diversity.alpha.lladser`` to ``skbio.diversity.alpha`` - ``skbio.diversity.beta.base`` to ``skbio.diversity.beta`` - ``skbio.draw.distributions`` to ``skbio.draw`` - ``skbio.stats.distance.anosim`` to ``skbio.stats.distance`` - ``skbio.stats.distance.base`` to ``skbio.stats.distance`` - ``skbio.stats.distance.permanova`` to ``skbio.stats.distance`` - ``skbio.distance`` to ``skbio.stats.distance`` - ``skbio.stats.ordination.base`` to ``skbio.stats.ordination`` - ``skbio.stats.ordination.canonical_correspondence_analysis`` to ``skbio.stats.ordination`` - ``skbio.stats.ordination.correspondence_analysis`` to ``skbio.stats.ordination`` - ``skbio.stats.ordination.principal_coordinate_analysis`` to ``skbio.stats.ordination`` - ``skbio.stats.ordination.redundancy_analysis`` to ``skbio.stats.ordination`` - ``skbio.tree.tree`` to ``skbio.tree`` - ``skbio.tree.trie`` to ``skbio.tree`` - ``skbio.util.misc`` to ``skbio.util`` - ``skbio.util.testing`` to ``skbio.util`` - ``skbio.util.exception`` to ``skbio.util`` - ``skbio.util.warning`` to ``skbio.util`` * Moved ``skbio.distance`` contents into ``skbio.stats.distance``. ### Miscellaneous * Relaxed requirement in ``BiologicalSequence.distance`` that sequences being compared are of equal length. This is relevant for Hamming distance, so the check is still performed in that case, but other distance metrics may not have that requirement. See [#504](https://github.com/scikit-bio/scikit-bio/issues/507)). * Renamed ``powertrip.py`` repo-checking script to ``checklist.py`` for clarity. * ``checklist.py`` now ensures that all unit tests import from a minimally deep API. For example, it will produce an error if ``skbio.core.distance.DistanceMatrix`` is used over ``skbio.DistanceMatrix``. * Extra dimension is no longer calculated in ``skbio.stats.spatial.procrustes``. * Expanded documentation in various subpackages. * Added new scikit-bio logo. Thanks [Alina Prassas](http://cargocollective.com/alinaprassas)! ## Version 0.1.4 (2014-06-25) This is a pre-alpha release. At this stage, major backwards-incompatible API changes can and will happen. ### Features * Added Python implementations of Smith-Waterman and Needleman-Wunsch alignment as ``skbio.core.alignment.pairwise.local_pairwise_align`` and ``skbio.core.alignment.pairwise.global_pairwise_align``. These are much slower than native C implementations (e.g., ``skbio.core.alignment.local_pairwise_align_ssw``) and as a result raise an ``EfficencyWarning`` when called, but are included as they serve as useful educational examples as they’re simple to experiment with. * Added ``skbio.core.diversity.beta.pw_distances`` and ``skbio.core.diversity.beta.pw_distances_from_table``. These provide convenient access to the ``scipy.spatial.distance.pdist`` *beta diversity* metrics from within scikit-bio. The ``skbio.core.diversity.beta.pw_distances_from_table`` function will only be available temporarily, until the ``biom.table.Table`` object is merged into scikit-bio (see [#489](https://github.com/scikit-bio/scikit-bio/issues/489)), at which point ``skbio.core.diversity.beta.pw_distances`` will be updated to use that. * Added ``skbio.core.alignment.StockholmAlignment``, which provides support for parsing [Stockholm-formatted alignment files](http://sonnhammer.sbc.su.se/Stockholm.html) and working with those alignments in the context RNA secondary structural information. * Added ``skbio.core.tree.majority_rule`` function for computing consensus trees from a list of trees. ### Backward-incompatible changes * Function ``skbio.core.alignment.align_striped_smith_waterman`` renamed to ``local_pairwise_align_ssw`` and now returns an ``Alignment`` object instead of an ``AlignmentStructure`` * The following keyword-arguments for ``StripedSmithWaterman`` and ``local_pairwise_align_ssw`` have been renamed: * ``gap_open`` -> ``gap_open_penalty`` * ``gap_extend`` -> ``gap_extend_penalty`` * ``match`` -> ``match_score`` * ``mismatch`` -> ``mismatch_score`` * Removed ``skbio.util.sort`` module in favor of [natsort](https://pypi.python.org/pypi/natsort) package. ### Miscellaneous * Added powertrip.py script to perform basic sanity-checking of the repo based on recurring issues that weren't being caught until release time; added to Travis build. * Added RELEASE.md with release instructions. * Added intersphinx mappings to docs so that "See Also" references to numpy, scipy, matplotlib, and pandas are hyperlinks. * The following classes are no longer ``namedtuple`` subclasses (see [#359](https://github.com/scikit-bio/scikit-bio/issues/359) for the rationale): * ``skbio.math.stats.ordination.OrdinationResults`` * ``skbio.math.gradient.GroupResults`` * ``skbio.math.gradient.CategoryResults`` * ``skbio.math.gradient.GradientANOVAResults`` * Added coding guidelines draft. * Added new alpha diversity formulas to the ``skbio.math.diversity.alpha`` documentation. ## Version 0.1.3 (2014-06-12) This is a pre-alpha release. At this stage, major backwards-incompatible API changes can and will happen. ### Features * Added ``enforce_qual_range`` parameter to ``parse_fastq`` (on by default, maintaining backward compatibility). This allows disabling of the quality score range-checking. * Added ``skbio.core.tree.nj``, which applies neighbor-joining for phylogenetic reconstruction. * Added ``bioenv``, ``mantel``, and ``pwmantel`` distance-based statistics to ``skbio.math.stats.distance`` subpackage. * Added ``skbio.math.stats.misc`` module for miscellaneous stats utility functions. * IDs are now optional when constructing a ``DissimilarityMatrix`` or ``DistanceMatrix`` (monotonically-increasing integers cast as strings are automatically used). * Added ``DistanceMatrix.permute`` method for randomly permuting rows and columns of a distance matrix. * Added the following methods to ``DissimilarityMatrix``: ``filter``, ``index``, and ``__contains__`` for ID-based filtering, index lookup, and membership testing, respectively. * Added ``ignore_comment`` parameter to ``parse_fasta`` (off by default, maintaining backward compatibility). This handles stripping the comment field from the header line (i.e., all characters beginning with the first space) before returning the label. * Added imports of ``BiologicalSequence``, ``NucleotideSequence``, ``DNA``, ``DNASequence``, ``RNA``, ``RNASequence``, ``Protein``, ``ProteinSequence``, ``DistanceMatrix``, ``align_striped_smith_waterman``, `` SequenceCollection``, ``Alignment``, ``TreeNode``, ``nj``, ``parse_fasta``, ``parse_fastq``, ``parse_qual``, ``FastaIterator``, ``FastqIterator``, ``SequenceIterator`` in ``skbio/__init__.py`` for convenient importing. For example, it's now possible to ``from skbio import Alignment``, rather than ``from skbio.core.alignment import Alignment``. ### Bug fixes * Fixed a couple of unit tests that could fail stochastically. * Added missing ``__init__.py`` files to a couple of test directories so that these tests won't be skipped. * ``parse_fastq`` now raises an error on dangling records. * Fixed several warnings that were raised while running the test suite with Python 3.4. ### Backward-incompatible changes * Functionality imported from ``skbio.core.ssw`` must now be imported from ``skbio.core.alignment`` instead. ### Miscellaneous * Code is now flake8-compliant; added flake8 checking to Travis build. * Various additions and improvements to documentation (API, installation instructions, developer instructions, etc.). * ``__future__`` imports are now standardized across the codebase. * New website front page and styling changes throughout. Moved docs site to its own versioned subdirectories. * Reorganized alignment data structures and algorithms (e.g., SSW code, ``Alignment`` class, etc.) into an ``skbio.core.alignment`` subpackage. ## Version 0.1.1 (2014-05-16) Fixes to setup.py. This is a pre-alpha release. At this stage, major backwards-incompatible API changes can and will happen. ## Version 0.1.0 (2014-05-15) Initial pre-alpha release. At this stage, major backwards-incompatible API changes can and will happen. scikit-bio-0.6.2/LICENSE.txt000066400000000000000000000027531464262511300154120ustar00rootroot00000000000000Copyright (c) 2013--, scikit-bio development team. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the names scikit-bio, skbio, or biocore nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. scikit-bio-0.6.2/MANIFEST.in000066400000000000000000000005371464262511300153230ustar00rootroot00000000000000include CHANGELOG.md include LICENSE.txt include Makefile include pyproject.toml include README.rst include simde-sse2.h graft ci graft doc graft licenses graft logos graft skbio prune doc/build prune doc/source/generated prune web/_build global-exclude *.pyc global-exclude *.pyo global-exclude *.so global-exclude .*.swp global-exclude .coverage scikit-bio-0.6.2/Makefile000066400000000000000000000024531464262511300152240ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- ifeq ($(WITH_COVERAGE), TRUE) TEST_COMMAND = coverage run --rcfile ../.coveragerc -m skbio.test && coverage report --rcfile ../.coveragerc else TEST_COMMAND = python -m skbio.test endif .PHONY: doc web lint test dev install doc: $(MAKE) -C doc clean html web: $(MAKE) -C web clean html clean: $(MAKE) -C doc clean $(MAKE) -C web clean rm -rf build dist scikit_bio.egg-info lint: ruff check skbio setup.py checklist.py ./checklist.py check-manifest # cd into a directory that is different from scikit-bio root directory to # simulate a user's install and testing of scikit-bio. Running from the root # directory will find the `skbio` subpackage (not necessarily the installed # one!) because cwd is considered in Python's search path. It is important to # simulate a user's install/test process this way to find package data that did # not install correctly (for example). test: cd ci && $(TEST_COMMAND) install: pip install . dev: pip install -e . scikit-bio-0.6.2/README.rst000066400000000000000000000206511464262511300152530ustar00rootroot00000000000000|license| |build| |coverage| |bench| |release| |pypi| |conda| |gitter| .. image:: logos/logo.svg :width: 600 px :target: https://scikit.bio :alt: scikit-bio logo *scikit-bio is an open-source, BSD-licensed Python 3 package providing data structures, algorithms and educational resources for bioinformatics.* 🌟 Announcing the revitalization of scikit-bio! 🌟 -------------------------------------------------- We are excited to announce the resurgence of **scikit-bio** (``_)! With a re-assembled developer team and funding from the DOE, we're back with renewed vigor in 2024! Our vision is to expand scikit-bio into a more robust and versatile library, catering to the ever-growing demands of multi-omic data analysis. This resurgence marks a new chapter in which we will focus on: - Streamlining the analysis of diverse, massive omic data, emphasizing efficiency and versatility. - Integrating advanced techniques for multi-omic integration to unravel the complex interplay between biological systems and environments. - Implementing methods for modeling and annotating biological features utilizing community ecology and phylogenetics. We invite the scientific community to join us in shaping the future of scikit-bio. Your expertise, feedback, and contributions will be the driving force behind this exciting phase. Stay `tuned for updates `_, and let's innovate together for a deeper understanding of bio-complexities! Get involved with scikit-bio ---------------------------- Your questions, ideas, and contributions matter! Join our community on `GitHub Discussions `_: This is your go-to place for asking questions, sharing insights, and participating in discussions about scikit-bio. Engage with both the developers and fellow users here. Report issues and bugs: If you encounter specific problems when using scikit-bio, let us know directly through the `GitHub Issues `_ page. Your reports are vital for the continuous improvement of scikit-bio. Wanna contribute? We enthusiastically welcome community contributors! Whether it's adding new features, improving code, or enhancing documentation, your contributions drive scikit-bio and open-source bioinformatics forward. Start your journey by reading the `Contributor's guidelines `_. ---- Visit the new scikit-bio website: https://scikit.bio to learn more about this project. Releases -------- Latest release: `0.6.2 `_ (`documentation `_, `changelog `_). Compatible with Python 3.8 and above. Installation ------------ Install the latest release of scikit-bio using ``conda``:: conda install -c conda-forge scikit-bio Or using ``pip``:: pip install scikit-bio Verify the installation:: python -m skbio.test See further `instructions on installing `_ scikit-bio on various platforms. Adoption -------- Some of the projects that we know of that are using scikit-bio are: - `QIIME 2 `_, `Qiita `_, `Emperor `_, `tax2tree `_, `ghost-tree `_, `Platypus-Conquistador `_, `An Introduction to Applied Bioinformatics `_. License ------- scikit-bio is available under the new BSD license. See `LICENSE.txt `_ for scikit-bio's license, and the `licenses directory `_ for the licenses of third-party software that is (either partially or entirely) distributed with scikit-bio. Team ---- Our core development team consists of three lead developers: **Dr. Qiyun Zhu** at Arizona State University (ASU) (@qiyunzhu), **Dr. James Morton** at Gutz Analytics (@mortonjt), and **Dr. Daniel McDonald** at the University of California San Diego (UCSD) (@wasade), one software engineer: **Matthew Aton** (@mataton) and one bioinformatician: **Dr. Lars Hunger** (@LarsHunger). **Dr. Rob Knight** at UCSD (@rob-knight) provides guidance on the development and research. **Dr. Greg Caporaso** (@gregcaporaso) at Northern Arizona University (NAU), the former leader of the scikit-bio project, serves as an advisor on the current project. Credits ------- We thank the many contributors to scikit-bio. A complete `list of contributors `_ to the scikit-bio codebase is available at GitHub. This however may miss the larger community who contributed by testing the software and providing valuable comments, who we hold equal appreciation to. Wanna contribute? We enthusiastically welcome community contributors! Whether it's adding new features, improving code, or enhancing documentation, your contributions drive scikit-bio and open-source bioinformatics forward. Start your journey by reading the `Contributor's guidelines `_. Funding ------- The development of scikit-bio is currently supported by the U.S. Department of Energy, Office of Science under award number `DE-SC0024320 `_, awarded to Dr. Qiyun Zhu at ASU (lead PI), Dr. James Morton at Gutz Analytics, and Dr. Rob Knight at UCSD. Citation -------- If you use scikit-bio for any published research, please see our `Zenodo page `_ for how to cite. Collaboration ------------- For collaboration inquiries and other formal communications, please reach out to **Dr. Qiyun Zhu** at `qiyun.zhu@asu.edu`. We welcome academic and industrial partnerships to advance our mission. Branding -------- The logo of scikit-bio was created by `Alina Prassas `_. Vector and bitmap image files are available at the `logos `_ directory. Pre-history ----------- scikit-bio began from code derived from `PyCogent `_ and `QIIME `_, and the contributors and/or copyright holders have agreed to make the code they wrote for PyCogent and/or QIIME available under the BSD license. The contributors to PyCogent and/or QIIME modules that have been ported to scikit-bio are listed below: - Rob Knight (@rob-knight), Gavin Huttley (@gavinhuttley), Daniel McDonald (@wasade), Micah Hamady, Antonio Gonzalez (@antgonza), Sandra Smit, Greg Caporaso (@gregcaporaso), Jai Ram Rideout (@jairideout), Cathy Lozupone (@clozupone), Mike Robeson (@mikerobeson), Marcin Cieslik, Peter Maxwell, Jeremy Widmann, Zongzhi Liu, Michael Dwan, Logan Knecht (@loganknecht), Andrew Cochran, Jose Carlos Clemente (@cleme), Damien Coy, Levi McCracken, Andrew Butterfield, Will Van Treuren (@wdwvt1), Justin Kuczynski (@justin212k), Jose Antonio Navas Molina (@josenavas), Matthew Wakefield (@genomematt) and Jens Reeder (@jensreeder). .. |license| image:: https://img.shields.io/badge/License-BSD%203--Clause-blue.svg :alt: License :target: https://opensource.org/licenses/BSD-3-Clause .. |build| image:: https://github.com/scikit-bio/scikit-bio/actions/workflows/ci.yml/badge.svg :alt: Build Status :target: https://github.com/scikit-bio/scikit-bio/actions/workflows/ci.yml .. |coverage| image:: https://codecov.io/gh/scikit-bio/scikit-bio/graph/badge.svg?token=1qbzC6d2F5 :alt: Coverage Status :target: https://codecov.io/gh/scikit-bio/scikit-bio .. |bench| image:: https://img.shields.io/badge/benchmarked%20by-asv-green.svg :alt: ASV Benchmarks :target: https://s3-us-west-2.amazonaws.com/scikit-bio.org/benchmarks/main/index.html .. |release| image:: https://img.shields.io/github/v/release/scikit-bio/scikit-bio.svg :alt: Release :target: https://github.com/scikit-bio/scikit-bio/releases .. |pypi| image:: https://img.shields.io/pypi/dm/scikit-bio.svg?label=PyPI%20downloads :alt: PyPI Downloads :target: https://pypi.org/project/scikit-bio/ .. |conda| image:: https://img.shields.io/conda/dn/conda-forge/scikit-bio.svg?label=Conda%20downloads :alt: Conda Downloads :target: https://anaconda.org/conda-forge/scikit-bio .. |gitter| image:: https://badges.gitter.im/Join%20Chat.svg :alt: Gitter :target: https://gitter.im/biocore/scikit-bio scikit-bio-0.6.2/aarch64.Dockerfile000066400000000000000000000020201464262511300167730ustar00rootroot00000000000000FROM --platform=linux/arm64 condaforge/linux-anvil-aarch64 RUN sudo yum update -y && \ sudo yum install -y make git && \ sudo yum clean all ENV MPLBACKEND=Agg ARG PYTHON_VERSION RUN bash -c ". /opt/conda/etc/profile.d/conda.sh && conda activate base && conda create -n testing -c conda-forge --yes python=$PYTHON_VERSION gxx_linux-aarch64" COPY . /work WORKDIR /work RUN bash -c ". /opt/conda/etc/profile.d/conda.sh && conda activate testing && conda env update -q -f ci/conda_host_env.yml" RUN bash -c ". /opt/conda/etc/profile.d/conda.sh && conda activate testing && conda install -q --yes --file ci/aarch64.conda_requirements.txt" RUN bash -c ". /opt/conda/etc/profile.d/conda.sh && conda activate testing && pip install -r ci/aarch64.requirements.txt" RUN bash -c ". /opt/conda/etc/profile.d/conda.sh && conda activate testing && pip install . --no-deps" RUN bash -c ". /opt/conda/etc/profile.d/conda.sh && conda activate testing && conda list" RUN bash -c ". /opt/conda/etc/profile.d/conda.sh && conda activate testing && make test" scikit-bio-0.6.2/checklist.py000077500000000000000000000420611464262511300161110ustar00rootroot00000000000000#!/usr/bin/env python """Validate the content and structure of a scikit-bio repository. ---------------------------------------------------------------------------- Copyright (c) 2013--, scikit-bio development team. Distributed under the terms of the Modified BSD License. The full license is in the file LICENSE.txt, distributed with this software. ---------------------------------------------------------------------------- """ import collections import os import os.path import subprocess import sys import ast import tokenize import warnings import dateutil.parser if sys.version_info.major != 3: sys.exit( "scikit-bio can only be used with Python 3. You are currently " "running Python %d." % sys.version_info.major ) class ChecklistWarning(Warning): """General warning class for warnings raised by checklist.py.""" pass def main(): """Go on a power trip by nitpicking the scikit-bio repo. Attempts to find things that are wrong with the repo -- these are usually annoying details introduced by human error. The code goes out of its way to nitpick as much as possible in order to maximize the effectiveness of the power trip. Returns ------- int Return code: 0 if there were no validation errors, 1 otherwise. Useful as an exit code (e.g. for use with ``sys.exit``). """ root = "skbio" validators = [ InitValidator(), CopyrightHeadersValidator(), ExecPermissionValidator(), GeneratedCythonValidator(), APIRegressionValidator(), ] return_code = 0 for validator in validators: success, msg = validator.validate(root) if not success: return_code = 1 sys.stderr.write("\n".join(msg)) sys.stderr.write("\n\n") return return_code class RepoValidator: """Abstract base class representing a repository validator. Subclasses must override and implement ``_validate`` (see its docstring for more details). Subclasses should also provide a ``reason``: this is a string describing the reason for a particular type of validation failure (see subclasses for examples). ``reason`` is included in the validation error message/report created by ``validate``. """ reason = "" def validate(self, root): """Validate a directory tree recursively. Parameters ---------- root : str Root directory to validate recursively. Returns ------- tuple of (bool, list of str) First element is a ``bool`` indicating success status: ``True`` if `root` passed validation, ``False`` if there were any errors. Second element is a list of strings containing the validation error message. """ invalids = [] for root, dirs, files in os.walk(root): result = self._validate(root, dirs, files) invalids.extend(result) success = True msg = [] if invalids: success = False msg.append(self.reason) for invalid in invalids: msg.append(" %s" % invalid) return success, msg def _validate(self, root, dirs, files): """Validate a single directory. Subclasses must override and implement this method. The method is supplied with the three values yielded by ``os.walk``. Parameters ---------- root : str Path to the current directory to be validated. dirs : list of str Directory names within `root`. files : list of str Filenames within `root`. Returns ------- list of str List of filepaths or dirpaths to be considered invalid (i.e., that did not pass the validation checks). See Also -------- os.walk """ raise NotImplementedError("Subclasses must implement _validate.") def _system_call(self, cmd): """Issue a system call, returning stdout, stderr, and return value. This code was taken from verman's ``verman.Version.verman_system_call``. See licenses/verman.txt and https://github.com/biocore/verman for more details. """ proc = subprocess.Popen( cmd, shell=True, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, ) # communicate pulls all stdout/stderr from the PIPEs to # avoid blocking -- don't remove this line! stdout, stderr = proc.communicate() return_value = proc.returncode return stdout, stderr, return_value class CopyrightHeadersValidator(RepoValidator): """Flag library files with non-standard copyright headers. See the current standard for scikit-bio's copyright headers at ``https://scikit.bio/devdoc/new_module.html`` Individual files are ignored if the first line in the file is exactly: # checklist.py:CopyrightHeadersValidator IGNORE If a file is ignored, a ``ChecklistWarning`` is raised. Parameters ---------- skip_dirs : iterable of str, optional Directory names to skip during validation. Defaults to skipping any directories named ``'data'`` or ``'__pycache__'`` (and anything contained within them). """ reason = ( "Files non-conforming to standard headers as described in\n" "https://scikit.bio/devdoc/new_module.html:" ) COPYRIGHT_HEADER = """\ # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- """ def __init__(self, skip_dirs=None): """Initialize with directories to skip.""" if skip_dirs is None: skip_dirs = {"data", "__pycache__"} self.skip_dirs = set(skip_dirs) def _validate(self, root, dirs, files): for skip_dir in self.skip_dirs: if skip_dir in dirs: dirs.remove(skip_dir) invalid_files = [] for _file in files: if not _file.endswith(".py"): continue pos = 0 filepath = os.path.join(root, _file) f = open(filepath) first_line = f.readline().rstrip("\n") if first_line == "# checklist.py:CopyrightHeadersValidator IGNORE": warnings.warn( "File %s has IGNORE directive. Ignoring scikit-bio " "copyright header validation." % filepath, ChecklistWarning, ) continue f.seek(0) tokens = list(tokenize.generate_tokens(f.readline)) # A module docstring is fully described using just two tokens: the # main string and a terminating newline. By convention, however, it # is always followed by a newline, and thus we advance by three # positions to get to the next logical line. if tokens[pos][0] == tokenize.STRING: pos += 3 # copyright header consists of 7 lines, and by discussion in # preceding comment, spans through 14 tokens. cheader = "".join(map(lambda x: x[1], tokens[pos : pos + 14])) # Ensure that there is no blank line at the end of the file if cheader != self.COPYRIGHT_HEADER or ( tokens[pos + 14][0] != tokenize.NL and tokens[pos + 14][0] != tokenize.ENDMARKER ): invalid_files.append(f.name) f.close() return invalid_files class InitValidator(RepoValidator): """Flag library code directories that are missing init files. This type of validation is important mainly because it is very easy to forget to add an __init__.py file to a new test directory. If this happens, nose will skip those tests unless it is run from the root of the source repository. Thus, the tests will be skipped if the package is pip-installed, e.g., as an end-user might install a release. Parameters ---------- skip_dirs : iterable of str, optional Directory names to skip during validation. Defaults to skipping any directories named ``'data'`` or ``'__pycache__'`` (and anything contained within them). """ reason = "Directories missing init files:" def __init__(self, skip_dirs=None): """Initialize with directories to skip.""" if skip_dirs is None: skip_dirs = {"data", "__pycache__"} self.skip_dirs = set(skip_dirs) def _validate(self, root, dirs, files): # If any of the directories yet to be visited should be skipped, remove # them from ``dirs`` so that we don't visit them in a future iteration. # This guarantees that ``root`` is a valid directory that should not be # skipped (since we're doing a top-down walk). for skip_dir in self.skip_dirs: if skip_dir in dirs: dirs.remove(skip_dir) invalid_dirs = [] if "__init__.py" not in files: invalid_dirs.append(root) return invalid_dirs class ExecPermissionValidator(RepoValidator): """Flag code files that have execute permissions. Parameters ---------- extensions : iterable of str, optional File extensions of files to validate. Defaults to Python, Cython, and C files (header and source files). """ reason = "Library code with execute permissions:" def __init__(self, extensions=None): """Initialize with specific file extensions.""" if extensions is None: extensions = {".py", ".pyx", ".h", ".c"} self.extensions = set(extensions) def _validate(self, root, dirs, files): invalid_fps = [] for f in files: _, ext = os.path.splitext(f) if ext in self.extensions: fp = os.path.join(root, f) if os.access(fp, os.X_OK): invalid_fps.append(fp) return invalid_fps class GeneratedCythonValidator(RepoValidator): """Flag Cython files that have missing or outdated generated C files. Flags Cython files that aren't paired with an up-to-date generated C file. The generated C file must be in the same directory as the Cython file, and its name (besides the file extension) must match. The validator also ensures that the generated C file is not empty and that it was generated at the same time or later than the Cython file's timestamp. Parameters ---------- cython_ext : str, optional File extension for Cython files. c_ext : str, optional File extension for generated C files. """ reason = "Cython code with missing or outdated generated C code:" def __init__(self, cython_ext=".pyx", c_ext=".c"): """Initialize with specific file extensions.""" self.cython_ext = cython_ext self.c_ext = c_ext def _validate(self, root, dirs, files): invalid_fps = [] ext_to_base = collections.defaultdict(list) # Map from file extension to a list of basenames (without the # extension). for f in files: base, ext = os.path.splitext(f) ext_to_base[ext].append(base) # For each Cython file, try to find a matching C file. If we have a # match, make sure the C file isn't empty and that it was generated at # the same time or later than the Cython file. for cython_base in ext_to_base[self.cython_ext]: cython_fp = os.path.join(root, cython_base + self.cython_ext) c_fp = os.path.join(root, cython_base + self.c_ext) if cython_base not in ext_to_base[self.c_ext]: invalid_fps.append(cython_fp) elif os.path.getsize(c_fp) <= 0: invalid_fps.append(cython_fp) else: cython_ts = self._get_timestamp(cython_fp) c_ts = self._get_timestamp(c_fp) if c_ts < cython_ts: invalid_fps.append(cython_fp) return invalid_fps def _get_timestamp(self, fp): cmd = 'git log -1 --format="%%ad" -- %s' % fp stdout, stderr, retval = self._system_call(cmd) if retval != 0: raise RuntimeError( "Could not execute 'git log' command to " "determine file timestamp." ) return dateutil.parser.parse(stdout.strip()) class APIRegressionValidator(RepoValidator): """Flag tests that import from a non-minimized subpackage hierarchy. Flags tests that aren't imported from a minimally deep API target. (e.g. skbio.TabularMSA vs skbio.alignment.TabularMSA). This should prevent accidental regression in our API because tests will fail if any alias is removed, and this checklist will fail if any test doesn't import from the least deep API target. """ reason = "The following tests import `A` but should import `B`" " (file: A => B):" def __init__(self): """Initialize object for tests importing from non-minimized hierarchy.""" self._imports = {} def _validate(self, root, dirs, files): errors = [] test_imports = [] for file in files: current_fp = os.path.join(root, file) package, ext = os.path.splitext(current_fp) if ext == ".py": imports = self._parse_file(current_fp, root) if os.path.split(root)[1] == "tests": test_imports.append((current_fp, imports)) temp = package.split(os.sep) # Remove the __init__ if it is a directory import if temp[-1] == "__init__": temp = temp[:-1] package = ".".join(temp) self._add_imports(imports, package) for fp, imports in test_imports: for import_ in imports: substitute = self._minimal_import(import_) if substitute is not None: errors.append("%s: %s => %s" % (fp, import_, substitute)) return errors def _add_imports(self, imports, package): """Add the minimum depth import to our collection.""" for import_ in imports: value = import_ # The actual object imported will be the key. key = import_.split(".")[-1] # If package importing the behavior is shorter than its import: if len(package.split(".")) + 1 < len(import_.split(".")): value = ".".join([package, key]) if key in self._imports: sub = self._imports[key] if len(sub.split(".")) > len(value.split(".")): self._imports[key] = value else: self._imports[key] = value def _minimal_import(self, import_): """Given an normalized import, return a shorter substitute or None.""" key = import_.split(".")[-1] if key not in self._imports: return None substitute = self._imports[key] if len(substitute.split(".")) == len(import_.split(".")): return None else: return substitute def _parse_file(self, fp, root): """Parse a file and return all normalized skbio imports.""" imports = [] with open(fp) as f: # Read the file and run it through AST source = ast.parse(f.read()) # Get each top-level element, this is where API imports should be. for node in ast.iter_child_nodes(source): if isinstance(node, ast.Import): # Standard imports are easy, just get the names from the # ast.Alias list `node.names` imports += [x.name for x in node.names] elif isinstance(node, ast.ImportFrom): prefix = "" # Relative import handling. if node.level > 0: prefix = root extra = node.level - 1 while extra > 0: # Keep dropping... prefix = os.path.split(prefix)[0] extra -= 1 # We need this in '.' form not '/' prefix = prefix.replace(os.sep, ".") + "." # Prefix should be empty unless node.level > 0 if node.module is None: node.module = "" imports += [ ".".join([prefix + node.module, x.name]) for x in node.names ] skbio_imports = [] for import_ in imports: # Filter by skbio if import_.split(".")[0] == "skbio": skbio_imports.append(import_) return skbio_imports if __name__ == "__main__": sys.exit(main()) scikit-bio-0.6.2/ci/000077500000000000000000000000001464262511300141535ustar00rootroot00000000000000scikit-bio-0.6.2/ci/aarch64.conda_requirements.txt000066400000000000000000000002111464262511300220240ustar00rootroot00000000000000requests >= 2.20.0 decorator >= 3.4.2 natsort >= 4.0.3 numpy >= 1.17.0 pandas >= 1.5.0 scipy >= 1.9.0 h5py >= 3.6.0 statsmodels >= 0.14.0scikit-bio-0.6.2/ci/aarch64.requirements.txt000066400000000000000000000000261464262511300206640ustar00rootroot00000000000000biom-format >= 2.1.16 scikit-bio-0.6.2/ci/conda_host_env.yml000066400000000000000000000002131464262511300176630ustar00rootroot00000000000000name: testing channels: - conda-forge - defaults dependencies: - hdf5 - pip - cython - pip: - "-r requirements.test.txt" scikit-bio-0.6.2/ci/conda_requirements.txt000066400000000000000000000002371464262511300206050ustar00rootroot00000000000000requests >= 2.20.0 decorator >= 3.4.2 natsort >= 4.0.3 numpy >= 1.17.0 pandas >= 1.5.0 scipy >= 1.9.0 h5py >= 3.6.0 biom-format >= 2.1.16 statsmodels >= 0.14.0scikit-bio-0.6.2/ci/requirements.doc.txt000066400000000000000000000001601464262511300202000ustar00rootroot00000000000000sphinx sphinx-design sphinx-copybutton pydata-sphinx-theme sphinxcontrib-youtube numpydoc matplotlib statsmodelsscikit-bio-0.6.2/ci/requirements.lint.txt000066400000000000000000000000651464262511300204050ustar00rootroot00000000000000check-manifest python-dateutil numpy ruff pre-commit scikit-bio-0.6.2/ci/requirements.test.txt000066400000000000000000000000451464262511300204140ustar00rootroot00000000000000matplotlib pytest responses coverage scikit-bio-0.6.2/doc/000077500000000000000000000000001464262511300143255ustar00rootroot00000000000000scikit-bio-0.6.2/doc/Makefile000066400000000000000000000157011464262511300157710ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. # Turn warnings into errors. SPHINXOPTS = -W SPHINXBUILD = sphinx-build PAPER = BUILDDIR = build # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) endif # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " xml to make Docutils-native XML files" @echo " pseudoxml to make pseudoxml-XML files for display purposes" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" # In addition to removing everything from the build directory, we must also # remove the stubs for classes, functions, etc. that autosummary creates during # the build process. This differs from the original 'make clean' target that is # created by sphinx-quickstart. clean: rm -rf $(BUILDDIR)/* source/generated html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/scikit-bio.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/scikit-bio.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/scikit-bio" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/scikit-bio" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." latexpdfja: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through platex and dvipdfmx..." $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." xml: $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml @echo @echo "Build finished. The XML files are in $(BUILDDIR)/xml." pseudoxml: $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml @echo @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." scikit-bio-0.6.2/doc/README.md000066400000000000000000000004701464262511300156050ustar00rootroot00000000000000scikit-bio documentation ======================== Documentation of the latest release: https://scikit.bio/docs/latest/index.html Documentation of the current development version: https://scikit.bio/docs/dev/index.html Guidelines for contributing to the documentation: https://scikit.bio/devdoc/doc_guide.html scikit-bio-0.6.2/doc/source/000077500000000000000000000000001464262511300156255ustar00rootroot00000000000000scikit-bio-0.6.2/doc/source/_static/000077500000000000000000000000001464262511300172535ustar00rootroot00000000000000scikit-bio-0.6.2/doc/source/_static/css/000077500000000000000000000000001464262511300200435ustar00rootroot00000000000000scikit-bio-0.6.2/doc/source/_static/css/style.css000066400000000000000000000042771464262511300217270ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2013--, scikit-bio development team. Distributed under the terms of the Modified BSD License. The full license is in the file LICENSE.txt, distributed with this software. ---------------------------------------------------------------------------- */ /* Custom stylesheet for the Sphinx documentation with the PyData theme. Instructions: https://pydata-sphinx-theme.readthedocs.io/en/stable/user_guide/styling.html */ /* hide external link icon */ html { --pst-icon-external-link: unset; --pst-font-size-h1: 2rem; --pst-font-size-h2: 1.5rem; --pst-font-size-h3: 1.25rem; --pst-font-size-h4: 1.1rem; --pst-font-size-h5: 1.0rem; --pst-font-size-h6: 1.0rem; } /* remove inline code borders and background */ code { border: none !important; background-color: transparent !important; } /* brand color (monochromatic: shamrock green) */ /* primary color (for links, default: teal) secondary color (for links on hover, default: violet) target color (for highlights, default: sunset) inline code color (default: violet) Color variables are not well documented; but some of them can be found here: https://github.com/pydata/pydata-sphinx-theme/blob/50fa47727af8d23757ddecd375c7a31cd282d248/pydata_sphinx_theme/static/css/theme.css#L43-L48 */ html[data-theme="light"] { --pst-color-primary: #239552; --pst-color-target: #F5FDC6; --pst-color-secondary: var(--pst-color-primary); --pst-color-inline-code: var(--pst-color-accent); --pst-color-inline-code-links: var(--pst-color-primary); --pst-color-table-row-hover-bg: var(--pst-color-target); } html[data-theme="dark"] { --pst-color-primary: #72C093; --pst-color-target: #2C1E7F; --pst-color-secondary: var(--pst-color-primary); --pst-color-inline-code: var(--pst-color-accent); --pst-color-inline-code-links: var(--pst-color-primary); --pst-color-table-row-hover-bg: var(--pst-color-target); } /* banner background color (default: violet) */ html[data-theme="light"] div.bd-header-announcement { background-color: var(--pst-color-target); } html[data-theme="dark"] div.bd-header-announcement { background-color: var(--pst-color-target); } scikit-bio-0.6.2/doc/source/_static/favicon.ico000066400000000000000000000013761464262511300214030ustar00rootroot00000000000000 ( @ (/?Rf}ۓۜ!!PN! !n0"q !!0\QV!޾1an&R1!!!! ! !`!> !!!!!!!!!! !!!scikit-bio-0.6.2/doc/source/_static/logo.svg000066400000000000000000000144751464262511300207470ustar00rootroot00000000000000 image/svg+xmlscikit-bio-0.6.2/doc/source/_static/logo_inv.svg000066400000000000000000000144751464262511300216230ustar00rootroot00000000000000 image/svg+xmlscikit-bio-0.6.2/doc/source/_templates/000077500000000000000000000000001464262511300177625ustar00rootroot00000000000000scikit-bio-0.6.2/doc/source/_templates/autosummary/000077500000000000000000000000001464262511300223505ustar00rootroot00000000000000scikit-bio-0.6.2/doc/source/_templates/autosummary/attribute.rst000066400000000000000000000001551464262511300251060ustar00rootroot00000000000000:orphan: {{ fullname }} {{ underline }} .. currentmodule:: {{ module }} .. autoattribute:: {{ objname }} scikit-bio-0.6.2/doc/source/_templates/autosummary/class.rst000066400000000000000000000030561464262511300242130ustar00rootroot00000000000000{# This template was modified from autosummaries default format #} {{ fullname | escape | underline}} {# We need a list of the built-ins that we implemented, not the default ones #} {% set built_in_methods = [] %} {% for item in all_methods %} {% if (item not in ['__class__', '__delattr__', '__getattribute__', '__init__', '__dir__', '__format__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__subclasshook__', '__init_subclass__', '__class_getitem__'] and item.startswith('__')) %} {{ built_in_methods.append(item) or '' }} {% endif %} {% endfor %} .. currentmodule:: {{ module }} .. autoclass:: {{ objname }} {% if attributes %} .. rubric:: Attributes .. autosummary:: {% for item in attributes %} ~{{ name }}.{{ item }} {%- endfor %} {% endif %} {% if built_in_methods %} .. rubric:: Built-ins .. autosummary:: :toctree: {% for item in built_in_methods %} ~{{ name }}.{{ item }} {%- endfor %} {% endif %} {% if methods %} .. rubric:: Methods .. autosummary:: :toctree: {% for item in methods %} {% if item != '__init__' %} ~{{ name }}.{{ item }} {% endif %} {%- endfor %} {% endif %} scikit-bio-0.6.2/doc/source/_templates/autosummary/method.rst000066400000000000000000000001521464262511300243600ustar00rootroot00000000000000:orphan: {{ fullname }} {{ underline }} .. currentmodule:: {{ module }} .. automethod:: {{ objname }} scikit-bio-0.6.2/doc/source/_templates/autosummary/module.rst000066400000000000000000000006741464262511300243760ustar00rootroot00000000000000.. The extra blocks seem to be required for autosummary to populate our docstrings correctly. In the original template, these would recusively call autosummary again, but we already do that in the narratively convenient places. .. automodule:: {{ fullname }} {% block attributes %} {% endblock %} {% block functions %} {% endblock %} {% block classes %} {% endblock %} {% block exceptions %} {% endblock %} scikit-bio-0.6.2/doc/source/alignment.rst000066400000000000000000000000401464262511300203270ustar00rootroot00000000000000.. automodule:: skbio.alignment scikit-bio-0.6.2/doc/source/conf.py000066400000000000000000000152501464262511300171270ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- # Configuration file for the Sphinx documentation builder. # Instructions: # https://www.sphinx-doc.org/en/master/usage/configuration.html import os import sys from datetime import datetime import skbio # -- Project information ----------------------------------------------------- project = 'scikit-bio' author = f'{project} development team' copyright = f'2014-{datetime.now().year}, {author}' version = skbio.__version__ release = skbio.__version__ # -- General configuration --------------------------------------------------- extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.mathjax', 'sphinx.ext.linkcode', 'sphinx.ext.coverage', 'sphinx.ext.doctest', 'sphinx.ext.autosummary', 'sphinx.ext.intersphinx', 'sphinx.ext.extlinks', 'numpydoc', 'sphinx_design', 'sphinx_copybutton', 'matplotlib.sphinxext.plot_directive' ] root_doc = 'index' templates_path = ['_templates'] exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # -- Options for manual page output ------------------------------------------ # Elements: startdocname, name, description, authors, section man_pages = [ (root_doc, project, f'{project} Documentation', author, 1), ] # -- Options for Texinfo output ---------------------------------------------- # Elements: startdocname, targetname, title, author, dir_entry, description, # category, [toctree_only] texinfo_documents = [ (root_doc, project, f'{project} Documentation', author, project, 'Data structures, algorithms, and educational resources for working with ' 'biological data in Python.', 'Science'), ] # -- Options for HTML output ------------------------------------------------- html_title = f'{project} {version} documentation' html_short_title = project html_baseurl = 'https://scikit.bio' html_logo = '_static/logo.svg' html_favicon = '_static/favicon.ico' htmlhelp_basename = 'skbio-doc' # static files html_static_path = ['_static'] html_css_files = ['css/style.css'] # do not show source links html_show_sourcelink = False # -- External links ---------------------------------------------------------- github_url = f'https://github.com/{project}/{project}' twitter_url = 'https://twitter.com/scikitbio' extlinks = { 'home': (f'{html_baseurl}/%s', None), 'repo': (f'{github_url}/%s', None), } # -- numpydoc configuration -------------------------------------------------- # References: # https://numpydoc.readthedocs.io/en/latest/install.html#configuration numpydoc_class_members_toctree = False numpydoc_show_class_members = False numpydoc_show_inherited_class_members = False # -- PyData Theme configuration ---------------------------------------------- # References: # https://pydata-sphinx-theme.readthedocs.io/en/stable/user_guide/layout.html# # references html_theme = 'pydata_sphinx_theme' html_theme_options = { # logo image for light/dark modes # image files must be placed under _static/ 'logo': { 'link': html_baseurl, 'alt_text': html_title, 'image_light': '_static/logo.svg', 'image_dark': '_static/logo_inv.svg', }, # announcement banner on top of the screen # 'announcement': ( # f"{project} is back in active development! Check out our announcement of revitalization." # ), # social media links displayed as icons 'github_url': github_url, 'twitter_url': twitter_url, # show warning if not latest stable version # 'show_version_warning_banner': True, # version switcher 'switcher': { 'json_url': f'{html_baseurl}/versions.json', 'version_match': 'dev' if version.endswith('-dev') else version, }, # simplify section navigation 'navigation_depth': 2, 'collapse_navigation': True, # display all header links 'header_links_before_dropdown': 7, # header layout 'navbar_start': ['navbar-logo', 'version-switcher'], # footer layout 'footer_start': ['copyright'], 'footer_center': ['sphinx-version'], 'footer_end': ['theme-version'], # google analytics 'analytics': { 'google_analytics_id': 'UA-6636235-9', } } # -- Intersphinx configuration ----------------------------------------------- # References: # https://www.sphinx-doc.org/en/master/usage/extensions/intersphinx.html intersphinx_mapping = { 'python': ('https://docs.python.org/3/', None), 'numpy': ('https://numpy.org/doc/stable/', None), 'scipy': ('https://docs.scipy.org/doc/scipy/', None), 'matplotlib': ('https://matplotlib.org/stable/', None), 'pandas': ('https://pandas.pydata.org/pandas-docs/stable/', None), 'biom-format': ('https://biom-format.org/', None) } # -- matplotlib.sphinxext.plot_directive ------------------------------------- # References: # https://matplotlib.org/stable/api/sphinxext_plot_directive_api.html plot_include_source = True plot_html_show_source_link = False plot_formats = ['png'] plot_html_show_formats = False # -- Source code links -------------------------------------------------------- import inspect from os.path import relpath, dirname def linkcode_resolve(domain, info): """Determine the URL corresponding to a Python object. """ if domain != 'py': return None modname = info['module'] fullname = info['fullname'] submod = sys.modules.get(modname) if submod is None: return None obj = submod for part in fullname.split('.'): try: obj = getattr(obj, part) except: return None try: fn = inspect.getsourcefile(obj) except: fn = None if not fn: try: fn = inspect.getsourcefile(sys.modules[obj.__module__]) except: fn = None if not fn: return None try: _, lineno = inspect.findsource(obj) except: lineno = None branch = 'main' if 'dev' in version else version fn = relpath(fn, start=dirname(skbio.__file__)) linespec = f'#L{lineno + 1}' if lineno else '' return f'{github_url}/blob/{branch}/skbio/{fn}{linespec}' # You might see the following exception when building the documentation: # TypeError: 'abstractproperty' object is not iterable from skbio.util._decorator import classproperty def _closure(): def __get__(self, cls, owner): return self classproperty.__get__ = __get__ _closure() scikit-bio-0.6.2/doc/source/diversity.rst000066400000000000000000000000401464262511300203730ustar00rootroot00000000000000.. automodule:: skbio.diversity scikit-bio-0.6.2/doc/source/embedding.rst000066400000000000000000000000401464262511300202670ustar00rootroot00000000000000.. automodule:: skbio.embedding scikit-bio-0.6.2/doc/source/index.rst000066400000000000000000000021261464262511300174670ustar00rootroot00000000000000:html_theme.sidebar_secondary.remove: .. page style and classes .. raw:: html .. hidden page title .. title:: Home .. toctree hidden from document but provides header links .. toctree:: :hidden: :maxdepth: 1 Install Learn Documentation Contribute Community Releases About scikit-bio |version| documentation ================================== scikit-bio (canonically pronounced *sigh-kit-buy-oh*) is a library for working with biological data in Python 3. scikit-bio is open source, BSD-licensed software that is currently under active development. .. toctree:: :maxdepth: 1 io sequence alignment tree diversity stats embedding table metadata workflow util scikit-bio-0.6.2/doc/source/io.rst000066400000000000000000000000311464262511300167600ustar00rootroot00000000000000.. automodule:: skbio.io scikit-bio-0.6.2/doc/source/metadata.rst000066400000000000000000000000371464262511300201370ustar00rootroot00000000000000.. automodule:: skbio.metadata scikit-bio-0.6.2/doc/source/sequence.rst000066400000000000000000000000371464262511300201670ustar00rootroot00000000000000.. automodule:: skbio.sequence scikit-bio-0.6.2/doc/source/stats.rst000066400000000000000000000000341464262511300175120ustar00rootroot00000000000000.. automodule:: skbio.stats scikit-bio-0.6.2/doc/source/table.rst000066400000000000000000000000341464262511300174430ustar00rootroot00000000000000.. automodule:: skbio.table scikit-bio-0.6.2/doc/source/tree.rst000066400000000000000000000000331464262511300173120ustar00rootroot00000000000000.. automodule:: skbio.tree scikit-bio-0.6.2/doc/source/util.rst000066400000000000000000000000331464262511300173300ustar00rootroot00000000000000.. automodule:: skbio.util scikit-bio-0.6.2/doc/source/workflow.rst000066400000000000000000000000371464262511300202310ustar00rootroot00000000000000.. automodule:: skbio.workflow scikit-bio-0.6.2/doc/suburl.py000066400000000000000000000031121464262511300162100ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- # Script to substitute domain names with relative paths in hyperlinks in the # built html files, such as to avoid unnecessary domain resolutions when the # visitor navigates the website. It may be executed only when "doc" and "web" # are hosted at the same web server. import os import re import glob from functools import partial # -- Configuration ----------------------------------------------------------- rootdir = "build/html" source = "https://scikit.bio" target = "../.." # -- Workflow ---------------------------------------------------------------- pattern = re.compile(f'href="{re.escape(source)}/([^"]+)"') def substitute(match, prefix): return f'href="{prefix}{target}/{match.group(1)}"' cwd = os.getcwd() os.chdir(os.path.join(os.path.dirname(__file__), rootdir)) for file in glob.glob("**/*.html", recursive=True): depth = len(os.path.normpath(file).split(os.sep)) prefix = "../" * (depth - 1) with open(file, "r") as fh: content = fh.read() content = content.replace( f'href="{source}"', f'href="{prefix}{target}/index.html"' ) repl = partial(substitute, prefix=prefix) content = pattern.sub(repl, content) with open(file, "w") as fh: fh.write(content) os.chdir(cwd) scikit-bio-0.6.2/licenses/000077500000000000000000000000001464262511300153655ustar00rootroot00000000000000scikit-bio-0.6.2/licenses/bx_python.txt000066400000000000000000000021571464262511300201450ustar00rootroot00000000000000Copyright (c) 2005-2015 The Pennsylvania State University Copyright (c) 2013-2015 The Johns Hopkins University Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. scikit-bio-0.6.2/licenses/fastq-example-files-readme.txt000066400000000000000000000115271464262511300232360ustar00rootroot00000000000000This README file describes the FASTQ example files provided as supplementary information to the open-access publication: P.J.A. Cock, C.J. Fields, N. Goto, M.L. Heuer and P.M. Rice (2009). The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. These files are provided freely and we encourage anyone writing a FASTQ parser to use them as part of your test suite. Permission is granted to freely distribute and modify the files. We request (but do not insist) that this README file is included, or at least a reference to the above paper. Please cite the above paper if appropriate. We also request (but do not insist) that the example files are not modified, in order that they may serve as a common reference. Invalid FASTQ files =================== The archive contains the following sample FASTQ files with names of the form error_NAME.fastq, which all contain errors and should be rejected (if parsed as any of the three FASTQ variants): error_diff_ids.fastq error_double_qual.fastq error_double_seq.fastq error_long_qual.fastq error_no_qual.fastq error_qual_del.fastq error_qual_escape.fastq error_qual_null.fastq error_qual_space.fastq error_qual_tab.fastq error_qual_unit_sep.fastq error_qual_vtab.fastq error_short_qual.fastq error_spaces.fastq error_tabs.fastq error_trunc_at_seq.fastq error_trunc_at_plus.fastq error_trunc_at_qual.fastq error_trunc_in_title.fastq error_trunc_in_seq.fastq error_trunc_in_plus.fastq error_trunc_in_qual.fastq Of these, those with names error_qual_XXX.fastq would be valid except for the inclusion of spaces or non-printing ASCII characters outside the range allowed in the quality string. The files named error_trunc_XXX.fastq would be valid but for being truncated (e.g. simulating a partial copy over the network). The special cases of FASTQ files which would be valid as one variant, but not another, are covered below. Valid FASTQ =========== The archive contains the following valid sample FASTQ input files for testing: longreads_original_sanger.fastq wrapping_original_sanger.fastq illumina_full_range_original_illumina.fastq sanger_full_range_original_sanger.fastq solexa_full_range_original_solexa.fastq misc_dna_original_sanger.fastq misc_rna_original_sanger.fastq These all have the form NAME_original_FORMAT.fastq, where NAME is a prefix for that example, and FORMAT is one of sanger, solexa or illumina indicating which FASTQ variant that example is using. There are three matching files called NAME_as_FORMAT.fastq showing how the original file should be converted into each of the three FASTQ variants. These converted files are standardised not to use line wrapping (so each record has exactly four lines), and omit the optional repetition of the read titles on the plus line. The file longreads_original_sanger.fastq is based on real Roche 454 reads from the Sanger Institute for the the potato cyst nematodes Globodera pallida. Ten of the reads have been presented as FASTQ records, wrapping the sequence and the quality lines at 80 characters. This means some of the quality lines start with "@" or "+" characters, which may cause problems with naive parsers. Also note that the sequence is mixed case (with upper case denoting the trimmed region), and furthermore the free format title lines are over 100 characters and encode assorted read information (and are repeated on the "+" lines). The wrapping_original_sanger.fastq is based on three real reads from the NCBI Short Read Archive, but has been carefully edited to use line wrapping for the quality lines (but not the sequence lines) such that the due to the occurrence of "@" and "+" on alternating lines, the file may be misinterpreted by a simplistic parser. While this is therefore a very artificial example, it remains a valid FASTQ file, and is useful for testing purposes. The sanger_full_range_original_sanger.fastq file uses PHRED scores from 0 to 93 inclusive, covering ASCII characters from 33 (!) to 126 (~). This means it cannot be treated as a Solexa or Illumina 1.3+ FASTQ file, and attempting to parse it as such should raise an error. The solexa_full_range_original_solexa.fastq file uses Solexa scores from -5 to 62 inclusive, covering ASCII characters from 59 (;) to 126 (~). This means it cannot be treated as a Illumina 1.3+ FASTQ file, and attempting to parse it as such should raise an error. On the basis of the quality characters, the file would also qualify as a valid Sanger FASTQ file. The illumina_full_range_original_illumina.fastq file uses PHRED scores from 0 to 62 inclusive, covering ASCII characters from 64 (@) to 126 (~). On the basis of the quality characters, the file would also qualify as a valid Sanger or Solexa FASTQ file. The misc_dna_original_sanger.fastq and misc_rna_original_sanger.fastq files are artificial reads using the full range of IUPAC DNA or RNA letters, including ambiguous character codes, and both cases. scikit-bio-0.6.2/licenses/flask.txt000066400000000000000000000030561464262511300172320ustar00rootroot00000000000000Copyright (c) 2015 by Armin Ronacher and contributors. See AUTHORS for more details. Some rights reserved. Redistribution and use in source and binary forms of the software as well as documentation, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * The names of the contributors may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE AND DOCUMENTATION IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE AND DOCUMENTATION, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. scikit-bio-0.6.2/licenses/hdmedians.txt000066400000000000000000000261401464262511300200650ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. scikit-bio-0.6.2/licenses/ipython.txt000066400000000000000000000065271464262511300176320ustar00rootroot00000000000000============================= The IPython licensing terms ============================= IPython is licensed under the terms of the Modified BSD License (also known as New or Revised or 3-Clause BSD), as follows: - Copyright (c) 2008-2014, IPython Development Team - Copyright (c) 2001-2007, Fernando Perez - Copyright (c) 2001, Janko Hauser - Copyright (c) 2001, Nathaniel Gray All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. Neither the name of the IPython Development Team nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. About the IPython Development Team ---------------------------------- Fernando Perez began IPython in 2001 based on code from Janko Hauser and Nathaniel Gray . Fernando is still the project lead. The IPython Development Team is the set of all contributors to the IPython project. This includes all of the IPython subprojects. A full list with details is kept in the documentation directory, in the file ``about/credits.txt``. The core team that coordinates development on GitHub can be found here: https://github.com/ipython/. Our Copyright Policy -------------------- IPython uses a shared copyright model. Each contributor maintains copyright over their contributions to IPython. But, it is important to note that these contributions are typically only changes to the repositories. Thus, the IPython source code, in its entirety is not the copyright of any single person or institution. Instead, it is the collective copyright of the entire IPython Development Team. If individual contributors want to maintain a record of what changes/contributions they have specific copyright on, they should indicate their copyright in the commit message of the change, when they commit the change to one of the IPython repositories. With this in mind, the following banner should be used in any source code file to indicate the copyright and license terms: :: # Copyright (c) IPython Development Team. # Distributed under the terms of the Modified BSD License. scikit-bio-0.6.2/licenses/nb-slideshow-template.txt000066400000000000000000000030461464262511300223400ustar00rootroot00000000000000Licensed under the terms of the Simplified BSD license: http://opensource.org/licenses/BSD-3-Clause Copyright (c) 2013, Fernando Perez. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. Neither the name of the nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. scikit-bio-0.6.2/licenses/pydata_sphinx_theme.txt000066400000000000000000000027421464262511300221700ustar00rootroot00000000000000BSD 3-Clause License Copyright (c) 2018, pandas All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. scikit-bio-0.6.2/licenses/qiita.txt000066400000000000000000000027321464262511300172410ustar00rootroot00000000000000Copyright (c) 2013, Qiita development team All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. Neither the name of the {organization} nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. scikit-bio-0.6.2/licenses/scikit-learn.txt000066400000000000000000000032601464262511300205140ustar00rootroot00000000000000------------------------------------------------------------------------------ The file doc/source/_static/copybutton.js has the following license: New BSD License Copyright (c) 2007–2014 The scikit-learn developers. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: a. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. b. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. c. Neither the name of the Scikit-learn Developers nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. scikit-bio-0.6.2/licenses/scipy.txt000066400000000000000000000030541464262511300172570ustar00rootroot00000000000000Copyright (c) 2001, 2002 Enthought, Inc. All rights reserved. Copyright (c) 2003-2012 SciPy Developers. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: a. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. b. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. c. Neither the name of Enthought nor the names of the SciPy Developers may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. scikit-bio-0.6.2/licenses/simde.txt000066400000000000000000000020651464262511300172320ustar00rootroot00000000000000Copyright (c) 2017 Evan Nemerson Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. scikit-bio-0.6.2/licenses/ssw.txt000066400000000000000000000030251464262511300167420ustar00rootroot00000000000000/* The MIT License Copyright (c) 2012-1015 Boston College. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* Contact: Mengyao Zhao */ /* * ssw.c * * Created by Mengyao Zhao on 6/22/10. * Copyright 2010 Boston College. All rights reserved. * Version 0.1.4 * Last revision by Mengyao Zhao on 12/07/12. * */ /* * ssw.h * * Created by Mengyao Zhao on 6/22/10. * Copyright 2010 Boston College. All rights reserved. * Version 0.1.4 * Last revision by Mengyao Zhao on 01/30/13. * */ scikit-bio-0.6.2/licenses/verman.txt000066400000000000000000000027231464262511300174220ustar00rootroot00000000000000Copyright (c) 2014, Daniel McDonald All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the {organization} nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. scikit-bio-0.6.2/logos/000077500000000000000000000000001464262511300147035ustar00rootroot00000000000000scikit-bio-0.6.2/logos/favicon.ico000066400000000000000000000013761464262511300170330ustar00rootroot00000000000000 ( @ (/?Rf}ۓۜ!!PN! !n0"q !!0\QV!޾1an&R1!!!! ! !`!> !!!!!!!!!! !!!scikit-bio-0.6.2/logos/horizontal_powered_by.png000066400000000000000000000352331464262511300220270ustar00rootroot00000000000000PNG  IHDR-sBIT|d pHYs B(xtEXtSoftwarewww.inkscape.org< IDATxy|T?yLK6E*.ub(򵊝D?lkU*;nZ/d2 juCuZկKm !I>G$s'Iys$ar=B !B!p*B!#!B!M!B!I"B!p4IZB!&IB!$iB!B8+!̕~uVTkp^˫9rBB!Dn]jdz*@I[^?aB\E!ȡyzi>f&չ.=B'9-B!DB!i0_!B8$-B!D1J[&e;!p*IZB1M`b!!I"B.˫x6h$-B>K!"GXkն 2K!"W2#!D%I‘*O89-7f6cB'EH۷Ⱥ *-7BB8$-BG"2u Bd3նX$iB8EzTgg3!p2IZ #sƊXm[RiBY!Y*-#֖+-%Ä}$-BG"$-c^ia(&$iB8TZDGFRiBY!@^-:J)!D%I‘TUUU8&beB%!D%I‘(0(qMF2K!5M$sZD_,5SRiB]!,hTZD kITZ}$-B!j")!.WBd̪q?IEz֓iLօB!U[@gR *fCoŜw#!"p nFRi}[3VK9+O_hH/4fLnK~! I‘+, HE~&-eg}yc:hl"$iB8N&RY2ޥCUII`_ٌE!I"pnY5L洈ޏVJӋX˳BēEkq!D !"sZDc$-Ri|I)e5ߵtF?وC!#I‰"zꢃ-'-J+Vg-6bo;!HG!$+rY j%_if8nBuBBsBg &oWRJ$Ù}o%xLyy[vڵA7##al\)M_alqghUeuL+((V8۞貯jDzsM缰`|cӠ>X6卆JbK.relgm 5(?KDuj m{uIZC@?nw,&-EEE  hA͵!FS xڽy{hĈ^ %E_3"KE0bCIQ#Ӽfx@)e`k.0K׌m\q;-4դZ;0p:1O}aLsp4z̒%)Amu 4` inbZгv_kP 6LK>gm 6LF&AP L@e0qjY>!GyͷDor׍J ~m pCUR\dx>"C < v"`83M\\{0%6#mV,/*Ԙ3ݮk E f&nXaB\U72@?`Xxȅ?ci7,\gϞ'"pmuvE~*8Vl.ߒbݏ&,XRR2\b;bwժUn՘G^SܠZo4`^ FcHiKL!Fr󋆈 ݒ810x;$6glg6!b*-lyI^v\ߴ~[SkFD&)1;s\[00Z0lyXzwo_Fkة_x^S~h݉FwIZNrC9]_UTtG%%E#MBv]C)stwZ|R&- G"l`JF'[j|kR*s]9RT]PW{BOgϞ!YBJ72OIsJⱇ8[y{ Y% 1Vǭ,!IZ JZJZpt4tЂ;wZڤ# mGLYCu vbJ&=3ט{qP5j[!^y4ԧϢI hJ0KN880Kl-*ISF" x迢"h:CuL;+^uw~Coc`XY#H.J"` 8i BJ`e9 +o'=wg7,*$ggs,a08waM0w&`{g;E)Jb&N}jm[BC q껯4h;7Qìm]xf&rܹgs;dڌ2;=;ad>h%zz2IZBNp>]: 1Y^x㭴%Яo9OVak?IZr)ֱDNU`{wР?{ub[Yϖ7fYk+qXٮミwK@"+$iB8JjpfLAI }]w?SSŊAU 㚵bɷ k ,j{!rg\N$ƉijKdOVQb6_xUEEEƀyJ|e5*54`aJ=c\.]WXXɤI2"vBvF+Uvd#M6nj`'i5 ԸqFǔr:Ǐ:&0bJ%3>C굀\\)#L ϪjcfKa bS}n%8-o \:c ֮,DL|oↅv2Pp6 [IK8HπRD"Ghtf0 ` WDx^:4CO>YOq*,>dd+*w'@iZzu~^2QC@iÖؖsk.: Ӡ@w2uk#sS)G0(dir]"/۠_y<KC)ꤖXQL8qGdBUu +*_"ƃ55#ӧOoHge`LƑL8 bD&`q`@W\pAXj|KpE I] x<9}D cM1Lm;6ɠɘf©S/ڑiLȽ ղ"5 ttS:iWҿ$76mځ]_|Y&-k֬frPti'2+_/NDL tA5}}0k‘Ȭ@iiEX::~o"Dd2GۍL 9e: <(=pMy$2_Z$mV^?`LM` Lxm$rjiVQ8LK %|ǁg~TAϒw!Kk~ IPccg?&g Zk䜤ْO`at7{K `DXjѺ^-,[ /ջ}Y {\glG_=z:0ᖇ p.c+\Qq53M d@MAD7nݶh4x<:#D*!O8 ht4bz-\ \VqI O]v!eeS: SfYHD \pZhԈA@g8$.iy(?M`T5At=ՀP(F9.HnB! ` 06P8Ry 6 /:O:gDc\E;̊Wnጤ_.h4+k#]xv+x G*4.Mn6 ?/^RZf&/.TZvL%%!bX+M;]ku +8JK۽ZnZx!.vΚ_m"vܷ-rLt1op_%a=lc$,Lxm{L;N cLccɧ78 ڍcm$2YA=6ڠ[RT0(y'|ޑPfL4T= 1?=U3c{Jن; [|'k|?s0 DW yO Ac^i/<:KC~"`=]{ #u-Db² D3̆{vv-Pp?ۋm0bHX^SF6yO9b 68~~lKXb|yG|_24~m1iܘSt{ҡ1[ *'}2al2X9w< \ǑAK9DƔRx2'VZf\#b-ES7#6Oذó7.N3D`F&zx<|{' y Gri5Z|>I~2wбqmn ek+*?4}0)OG/ Ƿ p*Om#׀vntԳ*yJ0ԗx< J/%IHX\ʎcJдl .y'|ޛС zinnD.eƌO :"P{ԩ>Sb: 0\^"?60cևwF [p_L{rJK&6#a2,3O$ #=@?A\ew~3V&0G:FͯvHC} 8eEEEN&:WZZisow7?]vݼ:=J7,:ׁo0E |?i7DXFkq+Zn:n(y@U4qZ&poyy~ZCium S"FR&}RFp^ܡX{mR"&JxbƠlڴiio~3k#ˉç?Rt7KW/f͚_m?0)\Yy 46.7fܸK4q I֓7FY<tmR׎k֬/3>p>دLJnOۙ*8h5(c] he\'|>Ñʷ7[čjڧHBC|H].b;O^zEڔ7Ir.+4++-c"+XKZfYI_)ėeX})\} +0JױvϥA[; !Vϱ~e+MNhG8]𽭧~?'|@9U &eoRJ@&aɘYoq͒›)UҢ׺";b,xOg}yPj&,?) MCwA^2EVyޓ+=Dè2wi+ǘ/~`PuHK5++ FT6÷:@!bht0g5| q`K>)6;;UaqVuu81'L&%HD(Ml)*O<Lc2ӵ8#@„zV*ՐCԝIG.X~ _1Mk0ą9W%-CJ .)|ypICJ ()x"%/JXàՓ0n}(m8;&v3;aKEi n‡} I7o' TZ?oB!ckAmh7D,J ʖy!n#ȑſn7=GчTTEl|B ˛t:6>T;IhI? 1cJd{zslL$Ə W~B뛫NR4Xʉriqc+/ѱR͕J ;l"1w_ϣuN4@0PRT0]fz\ہmÉD;2{M7uj?,!zb|f t^R7nM/? vwf< n:`ܸq ٶ@|r1N\5^?SjB̭(73&y8E|l!6f}WwJ$2x|b\ -ǎ;Ie_bz;ޟAgNcJR)%̸',Hٯ_$ZUVV31f sb$*~ߨ x"|/ׁdɗ6TjmWER92ql/8аf4Puuu8Iq2~0H1mb"]!bW?^ σ{k֬ ,?j;4,=L짏IZ2иםCLk IDAT-jmLgڧRctZ̝6>nxﯻ"]yy1E t%'91V" Ǖ]ZTT4:ރoe٧qo*DG뇗j]Fm7 K0'LzkœՔ2a KM5m?>YU]Sh;D,gZI&խ|UnȠovIQ($f@dy#b OXM\|ҷc4)cǜCh^&;qHZP~Wnx'\b4.y>fWI&Յ+"9Uı'WǘZДhŗħf WTV#n3M"Z/,K>o#Ժ7ϳkUɠ !'ܟysFt,I?ًeh݈}S@cUHk,sК8.cs̱=8,kVOA'Ę1"a5M}tjH=}nnz֚U Qل@G$JxFիXLSz%&i!bDT 4G9-A ck @5cCO>YFk/Ϡ6{|;|z{ sO%VJ̬3NZTwxVU3Kݮѥ0!V-2įaOx6y e`N|\qnpU#BhAIF+-wE=w@ ;NЯ+:Y=ĩ hfueD4y;@F|oڴ9Iw=o{=\^AM }]Ou(uN<6\})M~=POMCGJɘ7O[j;eiFGVx0>`@Κ5kN:5_Hj230je[w+8-t p@cy*a&a;ihݤ~v%e}x6 ?&~tSLQ;a WzhX3~i񠂢eW~CL &hµ3~:įQ~.w{\))Ll#Zq1,rQ.7e3r+MѨK:#OMwnn8ReG5 I-e>߆82MF܈0&suu%E:+[nV[ӳޙt~N.|QhH!47,Me ,ɧ7ҷqYLn]ZkQ߳H}L/pSpP-sFpݖܖ8wFh4C+ՂG H_ǭڵFl118 ~Ӷ8OpףinyRG?a[m9fÕ$\QyI8Hؖ@YioV1(/|[飼9Yr@Ο3s}Iq6K` ..i5ֶȗTWo.j ujd VV!NC4UW,|4j""{ۈf\G{mzՍ;Ƨn7 KSP< S ?h0ys8=XBZ[Qy2oH-Ԡ*BM Ih @fhXe{x> %-DfdǏA#(U mhW[>z":<Ⱥ+*`;;MEhn',WD`㥚^ #4qM>5bhGrNK1Dh !vT@*kGP,TyZl3Wж,ygc'o/D_@=n}d'&Kc ;u%.k0Hex UF1xMjأ=6;頽!:pEQfz ?ܮ1cuV1_{C;pf m$|1fAOi3>m׻2&<29(T3={Dt䁅Wp>cη:|/SeoTTD=@1ui"w5﹀&J 쀦S> +Ba=Z'\^y9)zoidcXx˸u@Uc|  31BDctB>Є3 oC.t.1ngB`G+CyGĹ s!:DK {̠ ZgO\Ǒ G1+=C_`hB[LCo\Z2L'쪤M+Wθg "&M7O78!aaЫA'|wɓk3iS 0=MXMLsڵwIE6KJ8~\³IΌ`~@ADLL;1N-x޵z< 6O `OAmPo4LmK7i=>O} ̋5)(~uq{3M2]<_8W3aM,WOX,_@ J~ >%'s;"\YQΨTU;<煻k~eg_U؅$,< aZd} x܍:8> rGg0a.?8'iѴlpJ\,%|m3zAk)h׉ҏedy 6VX٪ى67%vΌpÝf^ݜ6GL0P33 kX|2 #&өv 4NXiɊDbŏ uL<4c_ً~DS~0_ً~kGo :~&=4DRt"\L)4>1H?+ґi. h4^룈d AKKKk"GRNd(Vkx;KjOD"MFà1`h0`|W^/.O>|v^tZ'ρamH4zC6o&(t3:6w☉;wxȐ!X2Rz 1b`$#*DPǠoe`+?w1ܳoWU9 2V6]S:#0 # smZ lQD[(#_t==5n[FD>HcN5e)ekܸٔ1q7soe_%&QL!u\hxvblhVs^CFЍDgc8fv:/„PS`-L(/lCӉ1o~Bu;8=\œ.@J>`u}©n+tKVy$ N\_tB~BgFC/+m\œ)"zC`lo~x~&gYxuhU(+3!$-B21}#>f$Z'$)AsZȶ3g3A ЉvMZ!rG!zM6֖GQ}.E,Np\ 7lN٧EnTk<}?0OQ"KguKe lݧE}VDBuLF>ǷaFd5b*l}}ZFи1qfٵOCB4kROY3\!:IZ|3=j3]̩K1Y=LnA T&8=X 7{Œ^FT_gAy` p &DEɛka e~o|A% Kİ6_DɲrD7/T}!d\ .vl'h&~+xAKb] K3NXItU$!z4 KIK5s]f];B#2?W!>-BM?--[W.! ѭf/_ koDӥ-xJs-+~J;Χ~ڹt\! B8 3'JX^p p3͘ՄΞ]CƞƽP/YN+JQ^oQT?V?h\$Dw{ҡf>H׸Xo<َ9+Mg_pgxjXvGBt$-BGzcj٧g?Xdc.c7iŢ3Qe~6[6sޭ%i0!ܹ3a=3D|!lt2M|KfX75L!'IQ6nC32Eq\O?;f#6!HE!TW ÐJYpM7ߣ/xYg ywߚ~L!$-BǡEk&DAl9Lt ڮX)֯ͺ{l&$iB8S0&D6cZ Oi:%g#.!h&Iy&\Z*-Btf} ex`(~~%&EL-Օ|mϜ OgKJ`B>OiB877|ݺw8$!hE4y}0Զ&賤"pek"D7cX0E/4P(? !&IZCaÆ먩"{Μd>LsBc_-}$-B=2_Z6sA>@u~O^2tx6B-!)i D]p lL:xm݋Ȅ}$-B͕Ekw\7PX+oAVB !G"!dٵO}:ԁ`Ĝ~$-B-ä",=*y.exL|YKI"pdEKE ^0\&gX`(re#6!D$ItcBRi©^vkA4 remčw90 !zIZԴj˜!nٌy3T-ON  RhB^D!5UXHV'mƼD>LϞbWF\BC!㘤o|"Dt43|щ#ks^D& $iB8Än1S~?SҚ^iY LI"pY,!z;1z9+MF\BM!hwcB9-BDg`<3=0g[ǒE'_BىLH"p$j' !z3?I `{fgl@)D&I©jPJ3i8Z<\;&p4IZNUs%!z;nQLv „n~n MP̹A!ژr"Xޤ赮_BD1%~@"/-nf=@!I™L׭+:!DB`%IB"Ä_:!D Z!D{$iB8I"B![sB!A!Ci!$-B"2$iB!o(GХIENDB`scikit-bio-0.6.2/logos/horizontal_powered_by.svg000066400000000000000000000340641464262511300220430ustar00rootroot00000000000000 image/svg+xmlscikit-bio-0.6.2/logos/logo.png000066400000000000000000000301551464262511300163550ustar00rootroot00000000000000PNG  IHDRsBIT|d pHYs B(xtEXtSoftwarewww.inkscape.org< IDATxy|eӔIP击@ܕcqe\Q&iӞ94iI) .2 \EDmѶ4M<)X 'yszs]is4( @"@"@"@"@"@ʧ=LDXl0Rr2 I3dR+w3ƾ?=+@5b1{zNI/41^Wf?ԋ/??ˆ5B5K/<4k/6cxar_sܟnUj K.,9udM&70x7 ֱe@u7d$&"4^4@p@yⓂ۫Rz@ ")h Omb`@Eʇ,79O E in/FdY@8}x@0@ LaaPPQBt(!6z !6 z+!4OB i !@d[g@BQB q!!t!A2.\ tg(-QP->9\-G m0 BptXZ@ E 8@*@ #d4uӹ;2I(_>M[cO~d`˫4"0!̋B?CB9-%;>̗6t"A }]!2]zNZY"/y)f\Pc2/N, au&Kc@m"ȼ\@h6+h**yŹJ:"Du@m"ȼzr@讚 {ZsP\pZdǒvg@Ҿ#$kȚʐ@m!5`ɝkLX*R+-XA[`@!%dhR*mɺUϕ%$j@M0)h L4I׹+[n>i4Vf6}zkb~IjK{I:#Gy䴴C):i$3zk&N3jކkѝs7Vҏt'=S 9M9WcFkc+b֬Y[o1BZ8ޑo|dڳ I@IKz)Krڱn I{F5^,_,ŷϹař4?>)׶J􅬯F~T=Gָ8}e;BEF[M:hQqEoS4όiRG듧]otϊ܇kM"]nf\y&ޑY-=`J®2VI 506KPxUC4W_{d)RIr_,=W)ՙ[[3$n=C 8ri) B~ =Y Y]}k6 <1ezmqa{g`07TdɂGx#Z[&KҞ5u^C@ż/S5uϞ;gc7T? ꝛybfyVy:3ӞV%l̀Ȣ[FS {H:)dM0V?Mr"*o\Q!uAa{{ų"=yiP;,d$ >QŠEfiP$" VI.a.de݇x"gқɒH:X;ÏRu$5q-wߎJ Ƕc{@>;xW\|8P1EQIS,aTܼC{ww==e1 S!=%?hD*q+oQ@h^ Sz@hf._?eΚ?u-޷Y~P06.m1@ctgw^KY!k6m%) ryφ$6J̄zcJoҞ,'] I3{}M$+Ɵfa\gbOⷬ?rq>7ϔ0pL{RVN3edʉd:qfʬ'NB4a+7ɍJ+֙Oh̉H/$׸b4Dae&aOS@hRsYO6B24KmHY&})b Ta}&Ԇ_{sq_%j 1֏NV|}}i&~A~JS rʨ,Vr)!Y6xؓFt9}bYY\ޔ%$xJw'Z-aTĹn#2Ҙ.CN0OOծ5:^=GHr$=!dM7[PCȚP'#J/d2$>RyD;LKisG$s6#rMJ{HfɖVafU{rmk{ʻ Ք/&%Qoxjh80X]n5G;Iڃ7q|,t]3.h,‰t&M-[8|>9}b (¯B~oADO  NĶmW@Jْ nX6cТm9Ej̶7;9۶ #9`jJ.ݿUPb [z2GB֔$wҿ tRs<̩̌0%s;d䟑LW^KAkӝm@ ġ)g%淤=r\. mcKi!kJMKPmDkCԚ0[F]5(]2fwEnwY~]߿޴,P쁓FV׺VwՇ u>gwQBNDIf$wy_7]w z9*\=\=qihܗx;}j¥W >FhB[|(Ԣ0дS\mhx~3`nd'&k]!#S+- \lNnjS-;1B(%{Zr>0K , fކcϕ ړDQZ$WTo%gǭc]P<Ƚ7 `W۰"ȁ$̣vgJigEOSݗxJwaj'H L0/'ڥ3Zׅ]]KKI;*k'Rdw&r@*̣fyÍ \Nl`_Ԗ ԽtzCI=nx5'С2e~Z׻L3BR\3Z_P¶mڨ@8Qq8*`<`cg \0\]Ͽܷhm$){s'}i-*жmߗDdy[^1sWoݺAm&k%wo/kN!e_h'C% _<+P?IoL{ +mtO9oq+f2w/= P ΁!I?ȟwqq+?/XRfA.`h焮 ԋ?ô@dt'v5Zv| asvW7hE[VJJ{lV7(z +?hJro>g5RЀfx񆹽?ru!PG8':d;j{F@Ȣkl\s7.ө-> 8`Po>fi;?!a%+_j/*MI2Yǡ@s]%"۰KvqFsYI+#|\J{n^!LGzbU'DoH:,dOIҾE?.&?s7xiY<n%z6H w{t;̂ρFQ-!aw1q\ <m xTGKq+"*`pxۆˍG ՗T%Ϥ=`>v_9sfJg&?k%f֜$>-Lo'$7 hi<3-K<%2m N{G7R8'J wv}ߙWbHU<>֭[%ZzyJ4|s" VKV=+;#?W8CEX{R>)I;6^yUs՜kq DW_{ΉOFUs+r.F ҈2V($Sek,ls,7׹,IE Ĭ5 ͘򖶶gLoVKܮWe>瞼aՂT64^IMwqE)H@M!V0l][vhkŌ>}a{{˩mmmiχV`aK6Y}KҴ5(5V T`Ƀ3 56:%/DNt;L&Sm-6w]nmppO " VDwY $o5ݒ%"<@ V[k%=NKf~rph|cӞQ>z/=Գs@5-Wd`r^zWM7'!, =# ]1{bi`( M (k.\ڃB REۧoINm;ΩD?ʃ;%oZso=XL8>9'#{A*\rD{ِ c%+~ c5@>}2}z%o$);ԻHɐgců;՜@w 4IzaY<mP?׮:FKO3@MóI)K6oIF%,kҝ2->u]AH xdI{j+{>@]}mC3eE3jGCyߞbOONN/UWI"{īx[NU..ʢ=Q(-7aV+N{X}*s޴f2NM;qQJ0m$LS %{dU odlPYy+K&}>92u`vVP94IDAT‡zc?e@iixD[]Ȉ+OIʉn[=J hYiq(Uj={K,+;YW,3y4*sNʽlO)IiQǖnS(: mv&7~c2 6K$Ҟ%F-=˲.l=wOkǝ+O{$R> s w=H0x'6Yyx^8@(I۷%ۆl"͖gɐ$^<88|sڃѹ'ӞaN'= иI0WxJ!(֭wܗHڛ,u`f n}uhǕkn4ɖ-. UmtJ=[sÌMGyݽT;%L)}kZ  Ҟb̦ӢɹPִzį/^?tcI;oƹxIץ%&msk8wO+=Z$m޹ssy2VϒIm!{eNIwJqO{rmڴief($IK:"*^n~k\O5%Iӓ :/xǚE+nL{#ɥMor;7^Sɦ h5,(~_siwn9/;wƩ`s/Iҫ-:.΍FI[lܬ> swh0Iœ%W+ٷR:ngnNk~c#} < *ȡ2\۳-^ )iGڏ;J$ԸXI{P|$Ysq\Aܿ5k׶T'hpB5<4FǣrwuJwuk[@@M|BkuE6dGI&sX,sT o4Դ®*ɇ,\)Ӓ$X[||zC P&,2:t Yݫt qNҖ}lEUR?@jڈAW-Ǽ[soq|I Q̔'R? jZ ~С2 [vNJrGI9eũU@MMk B-zQŒUR3k]*u@Ma1ϕEogAgyd'~U@mOA1MyݧSRrGpYs )sg I$YZIIO^d`#ٗ%bdݹ?찫BZ%Dà$]{I)~I 1cr?sjizJ?P7A+ PGvqa~T'?}qz:A P&>6e8BP 6ϏW/Z% ]V @B5 nz[s\g9K ?Cu+瞻Rs*KdضYgTIB5s`剒,$k-ҳX,%WeϤJq /lF?P{Bڵ-ʏ^/6M<'&X ޞ2$Mt/qN_`+ !`%K[v%{˾Y^p s"G'**׺V~ @ !@JVuxIB>ֵgUzQȀ7I*¶i @9ɿ(iZ) g+ d2InMt/~g{+ d@^/'KQ{-֯:ҽ@6 c..l2t/u=T@uYn*~3;oPBȸn9IS*M[}a @ x[nsOt\Y>J2 ubD]'y1sfg5R^rPG_T宖73O5Kx/PB3ռP>f͒_ 3PgyWgۤ-K/{fzX!:sWՐ"{s} @ Bs]VnU*gn[+a(Թ/]?zΫp+ ^Ibߒtx{|aBT?!4jU(MV(V!4jUFaaҽ@Ѐ|WoW-( @8TЃwJ^vOsݲdݪV(+xWN-쾮 B oQ 3­]f׺p0FB,LUwOw)TTͻ %}zpBPB/pBCTB6̿wUx8!o,8+]#*E=wA R*Gz՚sS^`頻 ?]vUFBxL+$/V^oᲯU @62I JdZzA@#Ƭ ^$RRs{tڅ.**@(KW5t/wBa{Јu+]+ӫ?۫ B ˢ&}]T${uϢeTk'ҿ08e{ image/svg+xmlscikit-bio-0.6.2/logos/logo_and_powered.pdf000066400000000000000000001743041464262511300207160ustar00rootroot00000000000000%PDF-1.4 % 8 0 obj <> endobj xref 8 6 0000000016 00000 n 0000000570 00000 n 0000000630 00000 n 0000000896 00000 n 0000001009 00000 n 0000000416 00000 n trailer <<2C780593B9ACC84D8EBE5A09F6477FED>]/Prev 63395>> startxref 0 %%EOF 13 0 obj <>stream hb```b``b ,  r1> endobj 10 0 obj <>>>/Rotate 0/TrimBox[0.0 0.0 720.0 200.0]/Type/Page>> endobj 11 0 obj <> endobj 12 0 obj <>stream Hl]7 }Q$y6 :@R&oSӒb#^C?8=~x#h9Hf!C|CVx=f`׸:29r2BU{jUnL0ƒ㥇 2c[? "kE* t,3Yn>k гZ4w4z/dЩ~-r -@tJm\iJI "ذ)X -yYکtʑ~^Zua 8nۡM4Y fe^Ϝoc Lt|/-#pv Q樌2u7J,`)܇e/oDݵ?f*Jz^izqo(XTgEP Ę#)۱Rljl{F,bE^\G YOb:-]x½-H\Y$/p<ڔh!ĐRw-z-Pv|BVqa;'sK^cQt \Fq9i!}ᵈU2wWC.j,L_+Y෕l$_p@kbMpuo" )QxiqwqtQTc$Ai("&ZYQ0 XĞ]<^UH7 Y}]LjC&<)ċ&3. ewڜݚr~fzFM uѥ3B?OMT4Y.I0mvIguL4/H(>x8%Io4BU,gVdHijXC2r]IBrP)KJWM#=x/|@=o)P&ߡf\LCVEaQ>Ev +z~3'g$7ub|&| $'5k{DG*:W8Q*%eWYw}0IFP|AY}ZpV'AaU9JUiDc7Nk.?-OZrIv(8{7('ٵER$AV2#RlCŇ|W4fDOF<4\&jŧb(f2h,+9PuOk@hY0Ia&&(*L4#sW)LW#hQs7?=дgx(&mRф@15*ƶ5&%iYސJ!i W5`٫Oqzr];"!Fվ37vU|Y_I&ǮU}UsQ3'EϿFn }jHcNVU6V[okhч_1K;_ 0;!R endstream endobj 1 0 obj <>>>/Rotate 0/TrimBox[0.0 0.0 390.0 175.0]/Type/Page>> endobj 2 0 obj <>stream Ht[Dk@|?Vo^@cm`s"Hvm"g8z?8g/g|{9C?|ˌ:qv*g#A<~oE`3oj>O?ǁ\f}ސNMg#CQaLQԒ6VEgʟ/"JA߳_X^3d'U6Uuz(O:"ilqTZc-gʁXV,Y춬t֢ul)jw`ztS,Ȇ,0ns Ab/0o} {$SI׊}}l?[[X~ݶӲ.e%Ma}=#O?Nf[M_NYImhrO r_ogLU eTJ5H8no($+ %+ZZ@[ɻ0R{.j,ػx]nDBfֺN 4'8{)YJ~Yw^ U wGd=@fs'vLwQ*( >!ʧUY/-5m3|&˂IhUTzz@g͗u6)aIM@FшRI wH;.O+)i\JO ɖ5*r\+GӴSǓ2zxdbMQ<||jS+}#ed)SL'iCԤSRzZ+;.*< C3?EfKz|I+*%io~p rH̥A&zJ_YJ(,pY}'-n纋TDuk§an2[רx3C|w;IY*@r%8^AD,!gE.h)b@QmT4,)Yؗ:?O8{\FRNb<}׽~|ϵo z5˗Q(VZk mw뉴ƼuahYzNj,q_pNѵN˛T %hQ(P}2n̚\QD@zS.CyP-h"NNc*5|n d!"& Vu0YxXDo+Rf8֬A/vzLKvkc`A+5%3qt92Ƣme +VbRhбbT4QտjCKBgS0~D:A`+%Yf*Ve5X:cxh g)D;VIž,S"g1Ť̡d̸*5_Cıfr8$]NX(s[t"9s5 . 1⃪UcV*pCQaVeQsfhQIip3}KiTD(% Q6mPA!h>IGdmL'n *:c{LBYZc=I=ۃEtm m5jN^-f[-xW&u 'k h)YJ#XiŦB*5oZQ5Y-AAh1&9*ɂw| 6ud6x]5 []_6(k1 #@$a-c]H.4}ަ3,ݿT2oyJxfN7Tꀮ",\&0$%؊+p΢OޔZE hdb(dj^h=l=C n0`aMp8ϟQiü,,&elFKQ~i1v X岜qQ+ݔ#SE¿Gs7)V܀sQ58̚uРF0}xRA!Pڟ=5;H™U#7q4 UI"!ߦ$_v5qKt nFX'ϸtC3Q-7"@ݎ];/ui8HGyt1TEэa#SV_Җ'c&Unkzi sijC >Нm{6r$xLXUa4dSr᜞˅l~ni&y&u80['ܦL5<޴*hnŰF>Y)LR>ܒɷLy[<1qu~{_m`Xp( IblX3OXRG+v/j^reC!E@4|J})\RBcn; 2&N]X# }e6·h H|%W$ebPc 9rI#fB֤O!FtBN j=>䢶քiÈl׉"n`/G϶^R8:Fi1Xx(S/5ǦwƙR*qpznT$yI(XE53j¬{a>%ow0y *' +. endstream endobj 3 0 obj <>>>/Rotate 0/TrimBox[0.0 0.0 650.0 110.0]/Type/Page>> endobj 4 0 obj <>stream HtW[$ SJk;m`JY='(2 ȗYɇ#ɇ߹og]*$e|dzXj\<>GZF,6-z~,ir=gjy^-үѮ+O˩]=ǒjlh̙ƨ>LxЧc)}sKK |/4xyAA,qy "U66+R/ CfYKױc0Skξ>z2|/&~\xK%oҭHAHHGdw#}fyWUPR,9/@7Ddde !380@H[Ca_EnEH"aExgk3='ARuuxEqJ\Y~Y~˙6f|GTվE[9-T*$ڈ(F ~swV5gj^Jsd[ng1" =I ;t>O|Q #W< i?Tݷ &Hq%#@WL_W/^%u,yރwUԼzD|:yViV1'/NTÁ3 hHT0ˇb^&SOPm1Pn?nBgQ܀$/1XXK{(?(PZc] Sh-·Ka1PoQ3+ P"ñ==2߱-R8Vk<%m8NdxZy77EM9Ǯ>vd*? ;vkHPO`H}Qcˁm!]5ij}()R $_ ؎;<{l Zϋl7zoWA_ (76\ WNjFA6u~$i7Zcw2F{juRk`\es`vv'q'۝me"YxaHt3/B )'ޘ78Z笤n^ZP57uv؄`" * e@8'a שy(G65J4(B 4"84 PnpP0*xY٥v VxMB/)ԓ!.-GH 4\= |v_[ 6w#?ËH^08¸b71}8.}/ ]5&+%vVvyN$@עP:eG12)H=Y`~5`(y8:B|ZϦd7S{rqq%b{'E?!QpJ:fHUn0`6t,/YQ XU}ч,T!"P vZrC\ByP)m hQ5^)d/nbzRK48UP3r8 [S_s(T_a<3V!V!}nl)7gg!B[MS2 5;Ҁ`#Q%!o〈Fo6PNsn]&D|G*ԞC͌(n/;Ѩ9s /h) BŦgFrvj$_""2:Z!;u!u]hDQZ4ɀ`+\,SavILIxZT若0"ɽ?cdQJք};eу:cڜA4q ҤOOԲj-%):l?KP-ΘK\"{Ő$Q&z0Dʹ -eiq(KêĂu?hgޤi@n ԩ?]֔/`a6w:Ѿ9촀 g_@,#&T?1H~U`Uib[Uj"fZkdbF>с mId#zm[ؖ$ƠRH=5A?٥ [!*=Mc覵&ցa*_&749y: " 8I2RC7`Q fyml?VA2MIߔMIJB/tecԗ+XG}GX<Fn%M/g=BSZa60<mTVmYBUe)G/'+=Ik5ڒ&=Gܜ=Np9Z1ґW3#om=ZS2}il执! Q˖ f2'ۙf\gJ l5FI]Y媫>rs5p!cU5 (T1FRr"8e2ݸ |m'X,Tsҏ@bEEO}f5WTs`[C gՑ5V8S,7$PNA,U)v2༌Q'V…):厺 Ep\\=x{)iOz셆\KϨou qPazbn`4M uéY1㿏X՘szj{}+r:%ಱDk/oVy}{,Xe"%$0kl,WQr@?wMHGBMf~K"8)H":Gv- )qB>7_%b|{ N_L#kac2tR %#jZ,u)+=#q0=f>%n=z֑ZyǖXX|KDkw#wm{dOY-ׯ?z endstream endobj 5 0 obj <> endobj 6 0 obj <>stream application/pdf scikit-bio final 2014-07-16T14:51:10-06:00 2014-07-16T14:51:10-06:00 2014-07-16T14:51:10-06:00 Adobe Illustrator CS5.1 256 68 JPEG /9j/4AAQSkZJRgABAgEBLAEsAAD/7QAsUGhvdG9zaG9wIDMuMAA4QklNA+0AAAAAABABLAAAAAEA AQEsAAAAAQAB/+4ADkFkb2JlAGTAAAAAAf/bAIQABgQEBAUEBgUFBgkGBQYJCwgGBggLDAoKCwoK DBAMDAwMDAwQDA4PEA8ODBMTFBQTExwbGxscHx8fHx8fHx8fHwEHBwcNDA0YEBAYGhURFRofHx8f Hx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8fHx8f/8AAEQgARAEAAwER AAIRAQMRAf/EAaIAAAAHAQEBAQEAAAAAAAAAAAQFAwIGAQAHCAkKCwEAAgIDAQEBAQEAAAAAAAAA AQACAwQFBgcICQoLEAACAQMDAgQCBgcDBAIGAnMBAgMRBAAFIRIxQVEGE2EicYEUMpGhBxWxQiPB UtHhMxZi8CRygvElQzRTkqKyY3PCNUQnk6OzNhdUZHTD0uIIJoMJChgZhJRFRqS0VtNVKBry4/PE 1OT0ZXWFlaW1xdXl9WZ2hpamtsbW5vY3R1dnd4eXp7fH1+f3OEhYaHiImKi4yNjo+Ck5SVlpeYmZ qbnJ2en5KjpKWmp6ipqqusra6voRAAICAQIDBQUEBQYECAMDbQEAAhEDBCESMUEFURNhIgZxgZEy obHwFMHR4SNCFVJicvEzJDRDghaSUyWiY7LCB3PSNeJEgxdUkwgJChgZJjZFGidkdFU38qOzwygp 0+PzhJSktMTU5PRldYWVpbXF1eX1RlZmdoaWprbG1ub2R1dnd4eXp7fH1+f3OEhYaHiImKi4yNjo +DlJWWl5iZmpucnZ6fkqOkpaanqKmqq6ytrq+v/aAAwDAQACEQMRAD8A9I3WoR2UsFrplqshllll kiiVUSQmRjOqSkpF65dnlKkktxetDviqPhvYpDJGssTzxMvqRh1BVJGPplgC9OSbrX7Xt2VV45Vk BKhhSn2lZeoDftAeP8OuKrsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVdirsVWx yxyoJI3DxturqQQfkRiq7FXYq7FXYq7FXYq7FWM6hp99aauupGyTVI5GSICLjFJApmMgpHwPqjkE blJJ8L1YcFJ4qo/VdP1DmkulygXHGUPBcs7W0iuG+2aOy8XkDUSnIDidgvFVV0jy/p2lc/qYkT1X klmUyyurSzENLIRI71Zio3JNN6Uq1VUyxV2KuxV2KuZgoLE0AFScVQ2n30d9AZ41ZYuRVC1KnjsT t4NUfRiqJxV2KuxV2KuxV2KuxV2KuxV2KuxV2KqdzN6NvLNQH00Z6E8R8Ir1PTFXi2g/85A6pqWi apfy6dYwS2NvZXCAXXOIfXLqO3YyyReqqKgk5bsG2+zTcKWW+WvzLutU1bTtOdbC6F7eX1o17ps7 zwAWVtFcBkZkXkWMtDTYePbFCzQ/M1/YeTL+/giWWy8uadcu6SH4prmBWlCAgkqiooFad/bAxiVe z85ec7jzJLpqaZZTW6aZHq8aRTyC4kjnMqRQr6iJEJC8VCWYLv1wskRq3n++01o47rTFtLp7VLl7 Oe4iMis0kiemDGzK5/d1+Dl1wEut1faHgzMaH03z589vs6WmF/5se2lkj4QQql+1j9YuZfShULai 45u/E0JJ4gYssmt4SRsPXw2TQ+ni/YmWgas2qWLXLCP4ZpIg8L+pE4jbjzR6CqnEOTps3iR4tuZ5 bhMsLe7FXYqgr6e7ju7NbeOSQSMyyD4VhC7Fmlfg7gha8AKBm2JFa4qrpLcEJziVCZHVgX34Ly4s tBuWAU8dqVO+26rrRL1YlF3LHNMB8bxRtEpap3Cs8pApTbl/YqrYq7FXYq7FXYqh7OwtrNXS3X04 5G58B9kEgA8R2xVEYq7FUBdajLDqdtaqkPoyAtdSzSPEyAkJF6K+k0crNKQrKZFK1B3qBiqPxV2K uxV2KuxV2KuxV2KuxVwINfbbcUxV2KuxVJL3y3Gk6X2jFNO1CPYlEpDMhNSk0a05ddm6g4opOxWm /Xvil2KuxV2KuxV2KuxVA3umXFzcCRdSuraICL/RofQCcopll58miaSrhfTcc+JQnYH4sVU4LC+t bVY4pIpWgZ/QUoIecXpkRws0YonFuPxKnRfs1xVMsVdirsVdirsVdirsVdiqDv8ATmvHhP1qaCOI uXhi9PhLzjKASh0ckLy5AAgV+1UbYqhIbf1NZSWa6llKpMI7erRIDHMd3iL/AB8FlUIwjp+0xNY6 KpvirsVdirsVdirsVdirsVdiqE1D6yyCOBZQSQTJF6dQAdx+8ZevyxVKGOsJcQwSXVx6koLInp24 5cKFtxLtsRiqtBDrkbIXluJArKWBS3HJQd1/vu/jiqJ0xNShPG6NxcFti8ggVVNSa0R2bpQd/wAc VTPFUHc6xp9tMYZnYSAAlVjkfZthuqnFVIeYNMYVV5GAUMf3Ug+EkCu6jxxVJ/M/5haX5etVvJ7a 6vLeSWCCNbRFaUvcRySqeMjxfDxj7b1PTFUDb/mjYX0bta2F3EIGVpmmFuy+nzhUryimkCufrC/C 9GXfkBTFB2TG188rc6pLYJoupUhZ0lvOEBgDRy+i1P33qkct6iOnHfFJV185WTWa3Qs7sK37DJGr Cis3eQL+x0r1oOuDicMawcPFwy+z9af4XMdirsVdirsVdiqldwfWLWa39R4fWRo/WiPGROQI5I1D RhWoOKoDyvokuh6DaaVNqN1q0lqrK2o38hluZeTs9ZHPWnLiPYYqmmKuxVbJHHLG0cih43BV0YAq ykUIIPUHFVOW0iluILhmkElvy9NUlkRDzHE841YJJ7cwadRTFVbFXYq7FXYq7FXYq7FXYq7FWLed Py60XzdcWU+pXF3C9hHcxW4tZEj2u0CSklkdq8VFN8VULD8t9M0qEnTJSZlmFzHHOkQg5qKKnpW8 cCInsiihAOKCu0L8s/LWlW0n1eGSG4uraC3uXWQciIbdYPiZFjEpIXkS6n4qsADgpE4iUSD1TOLy lp0T2zpLNW1dXjDMjbr6YA3U0/uV+zQ9R0OPC440kRW52/Z+pO8LlOxV2KrFgjWZ5gKPIqq58Qla f8SxVcyq6lWAZWFGU7gg9jiqnaWlvaW629uvCFK8EqSFBNaCpNAK7DoOgxVVxV2KuxV2KuxV2Kux V2KuxV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxV2KuxVj/m3WZNJtGulXnxaFApdo1Hqs4JJVl/lHU4 C05svAL5b9Uj8p+bb3XNRW3mgNvErVDpNMeY4OafEfEA420YM2SUzGQAHRkJvYpotWSITRy6fzX1 PVkKlvT5gip6juPlhcxjnmPzle6Xe6rFDbrJDpj2ylpJ5gz/AFmMvtRuoIpTw37YC6fVdpyxymBE VDh6/wA4Jo2tSRWkF24cx3DRBlDyMU9WGEqFHNeVZJafTjbsDqKgJEc6+39qtdLq07TWtvdGzuFk ZIp/jlXir255FDJuSsjD7XfFvJW+Xbi/uLC0vZrtpxcANQqyUJlRSN3YMo5Mtab9cWIJvyZNhbHY q7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FUo13QY9WHo3KrLZMF9 SEs0bc42JRgyg9ORxas2GOQVL8UgdO8n2+lSxS6VCkDo4Ll5pJQUowZQGXavLrgpjh08cf0/ffPd M7vTnn9aRbeJLmWF4DMJGBKuP2gEHKh6V6YW5LLjybZXd9NqF3AHvbleFy6TyojqECUMfErTiBgp xJ6DFKZmR6pc9zvtXLkjI9FlVZISkZtGVY44C3IBBGkZDc435V9MYXJ8McPDWy210KWzkX6sFWOr NJzlZ3LERBSGKdvQXrXBSiAAoIiDSjFM0iIqGVlaSj/DVWViQgRFqeHXCkRTPFk7FXYq7FXYq7FX Yq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq0zKv2iB88VaEsRbiHUtStARWmKpYn mry1K9xFBqlpcT2qs9zBBNHLKio/puWRGLAK/wAJqNjiqYPd2qRJM80axSUEcjMArcvs0JNDXtig yA5twXNvcKzQSpKqsUYowYBh1U074ojIHkVTFk7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FXYq7FX Yq7FXYq7FXYq7FXYq7FXYq7FXYqgdW/Q3pR/pX0fS5fu/X48eVD05bdMVQlr/hP65H9W+qfW/h9P 0+HPoONKe1KYqxnSf+VJfpu9/Rn6H/TFbn656Xp+rXlJ9Z/2VfVrTenLtXFSv03/AJUz+mJvq/6F /wAR82+ucvqv6T5eqtfrFP8ASK8+FfU3rTlvgacvh0OOqvr39GYaZ+ivSl/Rvp+n6p9b0qU9SgrW nfjT6MQnFwUeGuf2ozC2uxV2KuxV2Kv/2Q== uuid:c5eea29e-5a2f-4c92-a441-b709a51f3731 xmp.did:C73F93762A0DE411B42FD2453D6540A0 uuid:5D20892493BFDB11914A8590D31508C8 proof:pdf uuid:d9618e09-364d-4545-a3c8-10152cc09e2c xmp.did:2A3809C99907E41184C4D7B68ACFE949 uuid:5D20892493BFDB11914A8590D31508C8 proof:pdf saved xmp.iid:7C371A9505FEE311916CC1FAFB62AB15 2014-06-27T08:16:08-06:00 Adobe Illustrator CS5.1 / saved xmp.iid:7D371A9505FEE311916CC1FAFB62AB15 2014-06-27T08:50:57-06:00 Adobe Illustrator CS5.1 / saved xmp.iid:7E371A9505FEE311916CC1FAFB62AB15 2014-06-27T09:13:05-06:00 Adobe Illustrator CS5.1 / saved xmp.iid:D713ACBD9904E4119064929701B7940E 2014-07-07T10:20:46-06:00 Adobe Illustrator CS5.1 / saved xmp.iid:DB13ACBD9904E4119064929701B7940E 2014-07-07T14:47:34-06:00 Adobe Illustrator CS5.1 / saved xmp.iid:DC13ACBD9904E4119064929701B7940E 2014-07-07T14:52:36-06:00 Adobe Illustrator CS5.1 / saved xmp.iid:F24894488907E41184C4D7B68ACFE949 2014-07-09T10:51:33-06:00 Adobe Illustrator CS5.1 / saved xmp.iid:F34894488907E41184C4D7B68ACFE949 2014-07-09T12:13:24-06:00 Adobe Illustrator CS5.1 / saved xmp.iid:FA4894488907E41184C4D7B68ACFE949 2014-07-09T12:48:26-06:00 Adobe Illustrator CS5.1 / saved xmp.iid:263809C99907E41184C4D7B68ACFE949 2014-07-09T12:49:41-06:00 Adobe Illustrator CS5.1 / saved xmp.iid:2A3809C99907E41184C4D7B68ACFE949 2014-07-09T13:15:02-06:00 Adobe Illustrator CS5.1 / saved xmp.iid:C73F93762A0DE411B42FD2453D6540A0 2014-07-16T14:50:48-06:00 Adobe Illustrator CS5.1 / Print False False 1 10.000000 2.777778 Inches ArialMT Arial Regular Open Type Version 5.10 False arial.ttf BankGothicBT-Light BankGothic Lt BT Light TrueType mfgpctt-v1.67 Monday, August 30, 1993 3:58:29 pm (EST) False bgothl.ttf BankGothicBT-Medium BankGothic Md BT Medium TrueType mfgpctt-v1.52 Monday, January 25, 1993 2:11:37 pm (EST) False bgothm.ttf CenturyGothic-Bold Century Gothic Bold Open Type Version 2.35 False GOTHICB.TTF Cyan Magenta Yellow Black Default Swatch Group 0 White CMYK PROCESS 0.000000 0.000000 0.000000 0.000000 Black CMYK PROCESS 69.531298 67.187500 63.671899 73.828101 CMYK Red CMYK PROCESS 0.390601 98.828101 96.875000 0.000000 CMYK Yellow CMYK PROCESS 3.906298 0.000000 92.968798 0.000000 CMYK Green CMYK PROCESS 82.421899 6.640601 95.312500 0.390601 CMYK Cyan CMYK PROCESS 69.140601 14.062500 0.000000 0.000000 CMYK Blue CMYK PROCESS 98.828101 96.093798 3.515601 0.390601 CMYK Magenta CMYK PROCESS 0.781298 98.437500 0.390601 0.000000 C=15 M=100 Y=90 K=10 CMYK PROCESS 17.578101 100.000000 90.625000 7.812500 C=0 M=90 Y=85 K=0 CMYK PROCESS 0.000000 89.453101 84.375000 0.000000 C=0 M=80 Y=95 K=0 CMYK PROCESS 0.000000 79.687500 93.750000 0.000000 C=0 M=50 Y=100 K=0 CMYK PROCESS 0.000000 49.218798 98.046899 0.000000 C=0 M=35 Y=85 K=0 CMYK PROCESS 0.000000 34.765601 84.375000 0.000000 C=5 M=0 Y=90 K=0 CMYK PROCESS 5.468798 0.000000 89.453101 0.000000 C=20 M=0 Y=100 K=0 CMYK PROCESS 19.531298 0.000000 98.046899 0.000000 C=50 M=0 Y=100 K=0 CMYK PROCESS 49.609399 0.390601 98.437500 0.000000 C=75 M=0 Y=100 K=0 CMYK PROCESS 74.218798 0.000000 99.218798 0.000000 C=85 M=10 Y=100 K=10 CMYK PROCESS 85.937500 16.406298 100.000000 3.125000 C=90 M=30 Y=95 K=30 CMYK PROCESS 89.843798 33.203101 97.265601 25.781298 C=75 M=0 Y=75 K=0 CMYK PROCESS 74.609399 0.000000 74.609399 0.000000 C=80 M=10 Y=45 K=0 CMYK PROCESS 78.515601 10.546899 44.921899 0.000000 C=70 M=15 Y=0 K=0 CMYK PROCESS 69.921899 15.234399 0.000000 0.000000 C=85 M=50 Y=0 K=0 CMYK PROCESS 84.375000 50.000000 0.000000 0.000000 C=100 M=95 Y=5 K=0 CMYK PROCESS 98.828101 92.578101 5.859399 0.390601 C=100 M=100 Y=25 K=25 CMYK PROCESS 100.000000 100.000000 30.078101 21.484399 C=75 M=100 Y=0 K=0 CMYK PROCESS 74.218798 98.046899 0.781298 0.390601 C=50 M=100 Y=0 K=0 CMYK PROCESS 50.000000 98.828101 1.562500 0.000000 C=35 M=100 Y=35 K=10 CMYK PROCESS 35.937500 100.000000 35.156298 8.984399 C=10 M=100 Y=50 K=0 CMYK PROCESS 9.375000 99.218798 48.437500 0.390601 C=0 M=95 Y=20 K=0 CMYK PROCESS 0.000000 94.140601 20.312500 0.000000 C=25 M=25 Y=40 K=0 CMYK PROCESS 25.000000 24.609399 39.843798 0.000000 C=40 M=45 Y=50 K=5 CMYK PROCESS 39.062500 44.140601 48.828101 6.250000 C=50 M=50 Y=60 K=25 CMYK PROCESS 51.171899 50.781298 61.328101 22.656298 C=55 M=60 Y=65 K=40 CMYK PROCESS 54.687500 59.765601 64.453101 39.843798 C=25 M=40 Y=65 K=0 CMYK PROCESS 23.437500 39.062500 63.671899 1.562500 C=30 M=50 Y=75 K=10 CMYK PROCESS 30.468798 49.609399 74.609399 9.375000 C=35 M=60 Y=80 K=25 CMYK PROCESS 35.937500 59.765601 80.078101 23.437500 C=40 M=65 Y=90 K=35 CMYK PROCESS 39.062500 64.062500 88.281298 35.156298 C=40 M=70 Y=100 K=50 CMYK PROCESS 41.796899 69.140601 97.265601 48.828101 C=50 M=70 Y=80 K=70 CMYK PROCESS 51.562500 69.921899 78.906298 68.750000 R=43 G=182 B=133 CMYK PROCESS 73.828101 0.390601 64.062500 0.000000 R=48 G=161 B=112 CMYK PROCESS 77.734399 12.500000 72.656298 0.781298 R=38 G=148 B=115 CMYK PROCESS 80.468798 19.531298 67.187500 3.515601 R=125 G=143 B=135 CMYK PROCESS 53.906298 33.984399 44.921899 4.296899 Grays 1 C=0 M=0 Y=0 K=100 CMYK PROCESS 69.531298 67.187500 63.671899 73.828101 C=0 M=0 Y=0 K=90 CMYK PROCESS 67.578101 61.718798 57.812500 45.312500 C=0 M=0 Y=0 K=80 CMYK PROCESS 63.671899 55.468798 52.343798 27.343798 C=0 M=0 Y=0 K=70 CMYK PROCESS 58.203101 49.218798 46.093798 14.843798 C=0 M=0 Y=0 K=60 CMYK PROCESS 51.953101 42.578101 40.625000 6.250000 C=0 M=0 Y=0 K=50 CMYK PROCESS 44.921899 35.937500 34.765601 1.171899 C=0 M=0 Y=0 K=40 CMYK PROCESS 35.937500 28.125000 26.953101 0.000000 C=0 M=0 Y=0 K=30 CMYK PROCESS 26.171899 19.921899 19.531298 0.000000 C=0 M=0 Y=0 K=20 CMYK PROCESS 16.796899 12.109399 12.500000 0.000000 C=0 M=0 Y=0 K=10 CMYK PROCESS 8.203101 5.859399 5.859399 0.000000 C=0 M=0 Y=0 K=5 CMYK PROCESS 3.906298 2.343798 2.734399 0.000000 Brights 1 C=0 M=100 Y=100 K=0 CMYK PROCESS 0.390601 98.828101 96.875000 0.000000 C=0 M=75 Y=100 K=0 CMYK PROCESS 0.000000 74.218798 98.437500 0.000000 C=0 M=10 Y=95 K=0 CMYK PROCESS 1.562500 8.593798 96.484399 0.000000 C=85 M=10 Y=100 K=0 CMYK PROCESS 83.593798 9.375000 99.218798 0.781298 C=100 M=90 Y=0 K=0 CMYK PROCESS 99.218798 87.109399 3.125000 0.000000 C=60 M=90 Y=0 K=0 CMYK PROCESS 59.375000 89.453101 0.000000 0.000000 Adobe PDF library 9.90 endstream endobj 7 0 obj <> endobj xref 0 8 0000000000 65535 f 0000002856 00000 n 0000003119 00000 n 0000007055 00000 n 0000007315 00000 n 0000011197 00000 n 0000011261 00000 n 0000063214 00000 n trailer <<2C780593B9ACC84D8EBE5A09F6477FED>]>> startxref 116 %%EOF scikit-bio-0.6.2/logos/logo_inverted.png000066400000000000000000000261661464262511300202640ustar00rootroot00000000000000PNG  IHDRk/ή pHYsodtEXtSoftwarewww.inkscape.org< IDATxy|e$]-(n2 ::#Xh]iQgQ\~Ц@MM`@g2.nAQVAǕRަy RJ4s>+/l͹${sw@*^w󖦓ܓˢpIϔ4AzIOͭO^=G;[~HI׵(qjtz 5:$@ Bce @&]5&@u  qbdq{50=/`5{50mX k2bce =l­9+kd%ʚa @&`-B"Ȅm X k2!Ā  +be =L1`9{50iCrg._dÃiY]u Wִ7@]#Ȅ[ؕs"$k50V}C(lfKѤ 6ž_zECX RM Z`k2E=!FX$Za @6Æ5a 5LX85Pk1xe kdJW֜mde kd|FyPL~`ZEDXJja @BnܿR3 -^Ȏiujq[MyjÕ@I+X s/<ItХ$msMwy{stdѐ,tȚyhxQTze]5uw]/i'i@ 3& R.NreqwwHnI{𰩒oGߒS䄰 ;WZeyeJ5-nնO,:)@k2^YKTa˛7oJQ}vʕ`B Ȍ) ̒koxhOk?3 NT =#d,Xw#[HdU a @f"aWk6<PyxUZk4]Y&Z{.!`U@X6MJ2^zCtr<&a @f[ˏI)$?,XA'Gz0k2H*+KՂ%6W[Тnu֖5@!dpr7ڰG>*u=( k7DXk[ZI X2#( k^df~Q]0kޝ!Ȗyаf5p"%$KrzkQF "d۲e|iТf]+sla @̒+kQA6uuhf@Ad*N,hX[a܋054\.oYa @5YaYIB3 (1>4qHjPx\FI$'I[s(;$*i?U &: KZ'i>P{ÒJaLE6{)`bnlzNX2Q"?T?XlͼgӮ-vrl)žQjڰyuz+fi,NRlczmg/zNҽRy:F^I)̒->mYp+kJeM.-Y n Z3]7{c4J,OxR6*kTX4kGiuMwI~t5=瞊?8>GR))>Xznz8yܖ'?jU.IR&I.&忔^mn\^Z{^<ًY0RIג>%Ӓ4YRY&&kF)IK ںk}.}|U+n;3O鷫k֚TMJ j;sUWl^,q1L=IWh+XH]JݽY_з乒 Y\K̞`ȚP^nn=4?=L#3M\8W*/"I/{S4ULYsȝӮ]; kۖ:\ҏ%)A;T 'Ҷw% Vc0׭ z=h q~OU* W vP2Pe2Of?m;@mjR~S5UO4%MI!ޠhĢ$+dM7?`>vqq9!M)N.tZsԙ&7]z }/v֞/#jƾ^ UT*uDI{T%ξqEj^5.-8񺅯! _'YyQKO$8͖4>ۙjz"y[ k5b0Kk-rk_(Ȁ%KzE,kNjgJO"|0vfO"wo$vQב.>5M?ΐoaNjsC_RgQUŻo=ּd8嶲fRI]\%gwqޥGo8Ǝk:޳T=(L#@ī-,7 YӤʲʡF<ƎԼ'h Z^Ɂ/es%>@y7_&龔fzxTGS%kk􇾌8RP wk|ⲞeY3.\UJ*»I(U8um8xI4W=@Hk/e$ha-NI,Y9㌭kIInCHz^<&Q1fG 0@yWKє9ouTW5E6i=+9=-'䋳z^S5<9Zk- wׁJ\.[-:Y..hM ǭw=Ǟ:ʋ$5rI3WVZVA#i<^ͤSh^}3qlUEۇnkTS`5˙K&=ls@v2QIӿK YjuSW*7_6N;U]IG`Q&+Ο 7 JƮ?`UtG>~*SBtHzk_vI\v&$WuVIk*E魬ͻ`?ik[./=hBnvA&ß,ʝK?m\&PCKz_ClI{a @.8ZX+ѷ_de6&PK,68P8`dkZ@07]WsC qǶawPVIII< $\)`I]x>|y{髒99 ld&i7NE6 M{%ݳR=$am~߆{%>Dml09Wp\zC+fٶs;%I gWS|BUkn amc|NoBz^{56 %iBNscIZDka$zfw~;lQ oQ5yyuERzd=dw|LaRɥ4.E%?vN}۸J:b~߼񯇘 ($AuY@<#!ךFfStwJpӌp}u;-vť;+_^a\$f_/ے.t[su{-\Z\Yk[tY, #uVۧ-t]ܴGKg=sI_s7]YF^uDdJ=XSIj y/9$?z\5Kۿ8E@_IzCzseDq)e>F}(Kv`!ۇzfU@[b;=KN\*ͽnj~iq> I0K$qE !m}RȒ{*JK2idrIyw낕cp_K5?O\@Yvw}"k%}4!z9RSd&>' {? k@#rI?MfڢfZe!A dG/j~!钬!s%5=PV̘K ytӖZv{]z=jhP 7%>Yu _P@Vvt#FI8WѽT {а]Yѷdd_Ԟs}uu{_쐋Hhn/|xb }pcA &=t,9ςd4H5ˤK8ӟ͒6d@h;R4PGX߾$}_U=FII'I/^4=`D~{vR n'_ uV߻$m|:x=pp՞ɧvޓR}hxWc=t*xI 23VA6LIiI?ϹNxjI[c7GλjP59Ѽ؅]vjPܒx5z AQCE'ο-A{l߼BmXk3iu3lϤE^eY+fId%AnyEXR)AjP#2G-xՖZnΙ?*7 97zrJ$iIyOR= yPaDAn}s_S0P\澾-4uxặu+YDXM~_JPJqo <5T6{(_7VF(Fluon v,+yӬ]5݌RkCg|<ƺ&IA (%..{q .vѵMOҫBTk0{{*sڨ)shsɥK̞inC XmԒ Mhtx I/Ng\f;,i 2JwcH =H&c~'oݔӆ^^&OuM ]@%~w3ԫ>!I=K#0_5Ic{G#O漇'zf $ImSx?{<]scz;MXP7o]?yQnY n5A i%ݓ5V\E`_.:a"i7ݷ7=L8qb;dh>rʹEF}ْz< IJ%Av㋒)/9Q$KzGӝF(kI#[%}T:ir|M-C$bUA2xKÚ$K:JEHX咎TQuf#U=sw1k6=(4uU%= 47v7oaL[􁫧PS.rʹE%K>JGl/}<fK+$'%^*Ex\-[ ]z{sh=.|U{T}^<1.ұJnUO{R՜D[\zZ;0P'֘tKϛ7\qR~5J̕=p8Ihs?J$MyĢ~%LwE_7.5$隑'H:H/IJjy٠vu X#O(I:Xo~1YsA7Sj4)ȵæՒZ0 IDATqG\KvY@׻Y|T+%_.l|j K5 kÚ$-;&l)ߵmٲI5®%{ޒw.^/!؅Ӭyy>:~BIFG\o&WHZQ#(oɻ3ja @!ĥR Yڞi'&ʶ+WPxk y865[,j,gw.ďh[[*eBp w?Xty4#sIo[uM@GC!šKVyiZ5=uuG&>FXP[ ٝ4IҨ򎾮 z"(I$i^dҧ%5j?=Ҕ1b$@o)J"i]ʭmyG_)cHE_}OoO_;<{PL5('M\.oN МTKgz 5mM"@A}%&]w9> a $;m zΒқ˴{#@ h%OikzL7a jD@i)ژN_6gSv@蝽vGSn512}̔] @ 3RnUm]=);6HAm*Zs釒6fnnr.ىۗvސja L@;uҿys@+S@C>kPgzg/N&VI u{%?VSnUrplc$ԱaJ:2vs3 zPkPWhV&~7ބ}(oL gd8r9d EXͳ 7]}K5h &Y@bwϠݟ9wd CX1IM)Z/3{R@~h@ݳ;?6j_yr9b`XYַf}I_$^4/qٕ&=7fT.7 GXmYШtK/ˠIv`kIRGO7|Kǧ7 Z?^&ξq|Ei[岷ο-^)2y:{;5KRҿKٽL5ة%3KƧ݋x*`^K:(v>kmMfa [JWH:"vDM-= zPX5{AQ]^A{&4ů/U^A/ )ubyUq23}7v/>A c{}Lɍ_@bƌuYr;GRZ'72~7S @M5^[n7 jtS폅 @-be ׺fd>MR-tĵT^Bґ{!}%=z6ja ĹnJܾeW> 5$k_^3;]tֻ͒&O,U4v ;IENDB`scikit-bio-0.6.2/logos/logo_inverted.svg000066400000000000000000000144751464262511300202770ustar00rootroot00000000000000 image/svg+xmlscikit-bio-0.6.2/logos/vertical_powered_by.png000066400000000000000000000367711464262511300214570ustar00rootroot00000000000000PNG  IHDRsBIT|d pHYs B(xtEXtSoftwarewww.inkscape.org< IDATxy|\e?9gtoZ -uWT 4E{*?Ж- U m֑EPVu(Ph M'sIrfLf|߯W_$<9 |YPDRJbJRJ)iVJ)ʐ&hR iVJ)ʐ&hR iVJ)ʐ&hR iVJ)ʐ&hR iVJ)ʐ&hR iVJ)ʐ&hR @eK$yվt}f8qOg~1**.&[['{<Al$NVs=Lcc.l---դ g>B+_=3cKkG+ 3D}kOk{u;;w37Tkǧ{be>&>,F _>7Uc|{ D7Dvmmm틅)88sEuuU=q@>sB mtVF`ּXk{)=d)/[c~?KXZ;<.q)a{AhiIFn*nXH΀A1?SxϡZa[[-w )7 r xK{OoK$j3<((*,IΩۢD 8QHc\5 q#MK{OĬR4 8>,"MƗZ:7: }Fn͘MЮˇcChmM'5T֭;lޔyLO\^_'L]x;gY-Hn,nf%cĬx<%L\Y:g35/Qp7E ^D$mx}}Pl}HF7NOoinn^D'E!q֮_68{z*<3Xf[}]^H$&zq"r}Kk'Fī+u%pn3]_sjw'eHۏ?|cliKGY'򭷜i~>u+Buu7_^xs~uuC%L{5_ũ^3W%~%YÎUg3nvϾs]HCmt$2 p-2ʸ[bOmu:V:s= vg#&d'r=05xj1-"YϤÀcFxH2ο" 85'˪(b2qt:Ο7o kjmO,[-:?9kgb?5p0iʴ UpU}g2)iAM͟DS1_t|/=2;#CuoŬǧڶHp3 3[Okk~>qX<!t7ᠿjS m1ؚ0{Ls PxO "[Z:B M6}-N4>KL&$1&_䢀4{.l u MzA3R7ỹ'ʝH{-lgix޳ǓXAǭTpi@SG<ߏy8C?&@U퐡oiKƚ-3i Z?hno?zn:xF8_ u Z԰ O c?BƘ~z6wXYƶU0 Y;v.>[ޕO} ȷNͰ06l|(գ&>ß`!Ы6/ DCb3A'6^ֹƺ`vms-/L2Ck}e4`ݺ;w+/dPb`0D%L?Ǘr癷%掤/7a@ OAѩZY[sOgve,8ssK[#xΩ ${0.ĪEؿ@+l5%r&d?P0K#~6cꔋo_5" !j^KA {R|JL' ?O pAM͟Z[;q>9˸ɸL@\}ADz1\-8 pb!2F"r]C}M֊;}ݙN,5| )+/a :8>ްKK}~,oC9q!Si9A DHQ&7D/mk9%'Jia7 uujihpmƩ)|WԌ cw 8 D-@TwZ+uEE`BmI>;ekEgs2@ƪE=(fb7GA~S-u`+s(#𞍯m~kxˀ.Tk̶ǧ`!Z}1|?5P/GڈcKkp|$i`2sjkkg{սҖx(cO~E@+Zx q8]#F'fϞzOV(EJ1Ƶ#3Kle=¬dfؿo.J0(O↖zcE<ɻv {)ӦKD4Agl`#"-]L\D>4̔LVĂx-m ؽA>R]__?'`}[qfR|e7 0`H$fhƵcue k+uSkXV"|'y'+RY1#x؝t,k_lkk{i_^Y{L![wy[Z8/սFɋ!]YkļѾZiƧ~zsDXQkǦ9$ C^5rݎ~ C'#cfb‚ܙ=8z}}eУZcMmm!Tnր|חv|n~^ NlHؿr!}V577nO (kێFGu-\< 6-nǢ1c푴$23A >H$*Flk4I^ {ux ĘH$J^BCMMz֖@m9qtCC|tf6` C=[:.-Dn`3B2N_ACC 6kH֚Jx3v'{ON||$["qYP !x/0(k\Zi!Mܒ}_' ĀE]]ݦ\3^_AY6-8QSSaLox@:=R&ºx@&"6@v7!ڌK"pAIsZ[o߻G: 0$ITx>q*=w0CHskkpk!vؑCZ@N̘A>?l$&"IOp9 ¬9]D"1 K󚚚[Ybޮ="cqspuygt/$ Z{{]F}M{@KBknKԏtLcP[#LLԵmA(@j.8˺xadvsH}ޱͭ{ɻt8hD"13ɵ{;xPU;~.a:֯c|5bZZn? 5}NF2"MEV_;)X@Ïu&Et+u\ Sw?l;ZUlֺGLOwyc"P1ԷXY\ OGJvfKTCioo;w?R { "jkkW"ҒS{\׌e7KAt9Yqb-tL:HniGk2m.K83p8G8?e z-"6hv9^<棓Mqc-Z[ogk[ǭE<kǶ1QS0O`A ZH(7g><Җ/<xB׆_mpSPv]ޓeɻ0p Q8V_?g-md=BCWO4[JZY{+;;'9bsf.(g` qOτʆkG~KѨN?<SxC4A?6Nl_8M@N0u  {}9B#@s G2Aovp=z:u i Cݯ'${gillB8=ܑH$/;-А7*G`Pw>& 93kjyjX:]뽋JQ= k7a9Q0hu%9iO @goGuXݧޑo18t1Yrn}=5KF[%^7rWtt| V09cA+;K թaS!?o\3=P_}:)tK[t/]l ff,3_gv:ub3!EGz&V%kF}9'Ѳϟ H$_M&q4)r/.fJ@nT?c#f=ScyD[W?m\x (C\[<ѝjnVOkmu쐳=<'Lk{ݽy7$F3{ڞde各iR&?tm* eRJ)FA+ReHRJ)U4A+ReHRJ)U4A+ReHRJ)U4A+ReHRJ)U4A+ReHRJ)U4A+ReHRJ)U4A+ReHRJ)U4A+ReHRJ)U4A+Re-u*MMMfÆ f֬YfҤI70$J{wTT1뚞׸c1ZKZ֥yu]Zk)b8}7PDh1ƈXZǡc-}cc-E8O#Kkkt}BC8bR!-I#i1LIי^RR_ )Fe@#{#b{ }e︁{=(d2@R@`XXI_"}e,ޯ)2Ӧ2ҦʐھX"uVc߽m}~OӦ^ԵV811b∱ߧF$)}8bkZ#4bƘyq>gu]O:ƑIIc:IG18I 8yyw81i{zz3gδv[lmlly]Q;q'p;k֬XOO݉Wﹶw+l̵w11Zk]qױq\q눸2&F\#F\+BWkп] \!b)FRFRCi(0pI!ƁaB:&0H\81eQ^GO)5>=a!ˈК$]]+@- rX_}%h":)XV>D>I!U>BwZh%IB%RL}~_H]j' L}uN0ugoP#!YcL A}-_:)YuNBDuQtK HTnͲZ+V[#B +f"b)&k׊c,>E,L5lXW+/GNꘓ=x$=/[YYUWW{r'c% b$h5>5\çz[laNܹpwnLN͞&L^&I@29'&y=PU=VVzJ@{YQWXk 1׷6uZl뾲81hcq]?}שIcxixg8&)Oq8j*qq\qnNwnq]WvI,[ Ί !JKdҤIi&p[d*THU+o Wvgr/iGm7{[ks W-꩑G)5i*;iA/Z}\T$_x5y}T1(>MЪ$d.3):c4ŎG)5>hVeg…{ZM̊d u_L1QJUY T+-S8&T,䷋Rj|C&htST+B~6ombQJ?:[)pZ7Ufac㕽ŊE)5h Zq}+ WÕ{W]|ORjCvqo5b!&G}-BǝFڊ^f 7W\(ﯔR@c$?&8( +"!7n۶a Hdaf̘.{I9 [- p@^ьi@>HlD·+QܷS!&^Q7Ȍ)7A̬sBeg?R_NDW\Z~KI^u6K:`_Y~EYؓ^b?CӑxkPd H1ޗ@b_׬kBC'\l$ $O|f)M+}0>ӦܾUDbܯz{iF",JVHRS0Km(P|F$AC/zSJbYvDu,OS:鏝AdE'!_. $!@^>nC5c"Os/' c @)6~B'=k!Vz_?\?Pe}SCpduFEϘ6y~㈒z 5Kkpn[qU*Jؿ/uJ@y}(O{aP &VOr w6 S>uԙ۷o3Ґ7h.ku0X OxtBU6 WV` jA_~Yo Y|1fQ!+"i2RA(xShjxwM#5G:j{ a]]]} *t3?yl53KFÒqMc׷o6 z{(wA,yP?Ҹse{L,a jA[BH&/|S!+۶,zfLKv~QVHJkpGc(cIƚ7Q& ߌb%hx$D鐐N_~ C+%`-EkMA bwzvH'G-h7QlsƳl%W*ʌxS-"PT J"HlRv=`h "®`jŖY~& B~[ n VOkkEccݔغg*| ^- orwrŞ~6tM\T!n'"*[Y]]q+!S ~̊&;&"H^S=u_/u0*ve[g\Swmw wGY7J^pjB<0w٘^d,)z붝?X~ (KZ]܋V/[#V/l>:F M5 NQxQp (|>}JT2r+-yDs)5ZcX;>tՉH /xc.5kr? -9ABAN0 3)8UEF+C< 3FokO z5#p+( Lr hCsj}ĦD1E-6Xea̜6ee`ALM;-6ohH}h qܴ+.TDG]z^1jcK+zc50mb"cKbk/6>$[ zڴi3fNv'絽dBvqNNμ_odJG[~Eݿ7vو27ƘJo1?O3O} ?=D~o,ϔ:ƨr51+C3H5wpJ1!n%1 gUt SRp HXx+9o"ݥ,JbMk)KFŸrѕL2g®wO GP8*c[z~K?"WR3-BMs[-ӧ݈|@ 8vm JoAC=YTJy'S 0< EQlJvWD16+.y#ڔ*s3ibywq >`FtD@$K6xQSl/8s0; D IVMj}Gc(&8k;A P݄^Q{=RK*zo?.ULJ ǵR9(!_C|I {H{~1FZԼH\uWGR0}/EY#E38@5G|)D'=xE%'Đ! RSȻ<Hm^2z;#W BP|e# |a&WW-^ R#W/\1*;%.kat ǣa}]ء?d䟠 O덒H-`C,PAslA`sO}*kՒn~tKQI_:|%ct3D1C`= !IXE+tZL(QU FXO L,%kKSSƻWu,uzMn('_&W?JgG/۶=Y(;K?3բ#bTjx"'ɣ{McKDPˆb ݧVK!x:BT,)a_-eW@RQTb<u )-^ʈZJ[kp'-kɍp)Rǒ${ Wy:8rvGoڹ3.HauvôojLz$OCX߹mǑތ1g9,]z_g+UW~끹K E1y+7^xM\P23""npA4qΝcŁ3 Nda Y 6Zk6N]{KvnECkV]rEj%q2~Yci!!tBS`M;7o8#MŸwRz_ӌ ce) %^D("v W}:\vP=ŊQH,^@m[cy#x+oZdUAO_tU )WYzµM•R*bhv ^] `soܯUJhhV?뚆;_G]1 V]qZ*T4AQNξ . S kJ Z=Y<7kccccʅWK @kǔR#IpTc.-bW.r=kᓋL5w]z V)r Z=HpI@>+ӎZʍ&hU| @?.1Q^*鮼IV|{罌9JbӅJԨxzvKyox<=皚̞f9PeN⬕_9jwQJ7Mj[vyE@U&^bĥߴ[{]|E{1Bn]vUE L)5i ZE7H׹ ıy\_jll"L)5.i Z.k8aq-y\=Sܟ4}iERjxڂV*˖X?ޡ/q)MJ be!/ ؗRJ֊wę+/rCRJ:_xLs, 9^:]VJiVj+.XF?ǎX/ZlIQSJiŭT]B'\:|W=SJ?Ѣ5KB&%? 5Co4A+EkAA 9^kǘy/|S1RJk~ 03+_[lQSJ *o.;?/ W}NJRjYJ຋m΀^~V1RJ~*66xiP:^~aQSJj@WwN#Іo.Z,JpJQIǠؒ˖\}3sW{RjtT,YK~uƚRjTwjCJ? O[v/#. Z"ZzH~9]HliNE8)ʝNS'8BN t}tk)4TTdsG@&KūSReMR#`EoP/s46ݲRGǠAMMM->^8ꖕJ*%]+U^G~]R|hVD]z+nZňK)U4A+UB^6&v%Yt׮{QSJNSʟ#'ؒەH]JCR%⢫~E.9WJ"*+6u⿣^w?*v\J1hȥ701AqjE766z% P)5b4A+UܸBkme&eMJ)T1hR iVJ)ʐ&hR iVJ)ʐ&hR iVJ)ʐ&hR iVJ)AmWIENDB`scikit-bio-0.6.2/logos/vertical_powered_by.svg000066400000000000000000000331541464262511300214620ustar00rootroot00000000000000 image/svg+xmlscikit-bio-0.6.2/pyproject.toml000066400000000000000000000016351464262511300165010ustar00rootroot00000000000000[build-system] requires = ["setuptools", "wheel", "numpy", "cython"] [tool.pytest.ini_options] filterwarnings = [ "ignore::skbio.util.SkbioWarning", ] [tool.check-manifest] ignore = [ ".coveragerc", ".dockerignore", ".editorconfig", "aarch64.Dockerfile", "checklist.py", # created by conda-incubator/setup-miniconda@v3 action "ci/setup-miniconda-patched-conda_host_env.yml", ] # Basic configurations for Ruff. # See https://docs.astral.sh/ruff/configuration/ [tool.ruff] target-version = "py38" exclude = [ "skbio/**/tests/*", # ignoring to be able to implement pydocstyle "doc/**", # documentation "web/**", # website ] [tool.ruff.lint] select = ["E", "W"] # pycodestyle (E, W) ignore = [ "D203", # puts a space before class docstrings "D213", # puts summary on second line "D400", # redundant with D415 in place "D301", # forces raw string literals ] scikit-bio-0.6.2/setup.py000066400000000000000000000167101464262511300152770ustar00rootroot00000000000000#!/usr/bin/env python """Setup script for scikit-bio installation. ---------------------------------------------------------------------------- Copyright (c) 2013--, scikit-bio development team. Distributed under the terms of the Modified BSD License. The full license is in the file LICENSE.txt, distributed with this software. ---------------------------------------------------------------------------- """ import os import platform import re import ast import sys import sysconfig import subprocess from setuptools import find_packages, setup from setuptools.extension import Extension import numpy as np from Cython.Build import cythonize if sys.version_info.major != 3: sys.exit( "scikit-bio can only be used with Python 3. You are currently " "running Python %d." % sys.version_info.major ) def check_bin(ccbin, source, allow_dash): """Check if a given compiler matches the specified name.""" # remove any parameters (e.g. gcc -I /a/b/c -> gcc) source0 = source.split()[0] # remove any path bsource = os.path.basename(source0) # now let's go search for ccbin if allow_dash: found = False # allow for the ccbin to be between - (e.g. gcc-1.2) for el in bsource.split("-"): if el == ccbin: found = True break else: found = bsource == ccbin return found # Note: We are looking for Apple/MacOS clang, which does not support omp # Will treat "real clang" (e.g. llvm based) same as gcc clang = False # icc uses slightly different omp cmdline arguments icc = False # Are we using the default gcc as the compiler? gcc = True try: if os.environ["CC"] == "gcc": gcc = True elif os.environ["CC"] != "": gcc = False except KeyError: pass if not gcc: try: if check_bin("clang", os.environ["CC"], False): # note, the conda provideed clang is not detected here # and this is on purpose, as MacOS clang is very different # than conda-provised one (which is llvm based) # so do not look for substrings # (e.g. do not match x86_64-apple-darwin13.4.0-clang) clang = True elif check_bin("icc", os.environ["CC"], True): icc = True except KeyError: pass else: try: if check_bin("clang", sysconfig.get_config_vars()["CC"], False): # as above clang = True gcc = False elif check_bin("icc", sysconfig.get_config_vars()["CC"], True): icc = True gcc = False except KeyError: pass if gcc: # check if the default gcc is just a wrapper around clang try: if ( subprocess.check_output(["gcc", "--version"], universal_newlines=True).find( "clang" ) != -1 ): clang = True except (subprocess.CalledProcessError, FileNotFoundError): pass # version parsing from __init__ pulled from Flask's setup.py # https://github.com/mitsuhiko/flask/blob/master/setup.py _version_re = re.compile(r"__version__\s+=\s+(.*)") with open("skbio/__init__.py", "rb") as f: hit = _version_re.search(f.read().decode("utf-8")).group(1) version = str(ast.literal_eval(hit)) classes = """ Development Status :: 4 - Beta License :: OSI Approved :: BSD License Topic :: Software Development :: Libraries Topic :: Scientific/Engineering Topic :: Scientific/Engineering :: Bio-Informatics Programming Language :: Python :: 3 Programming Language :: Python :: 3 :: Only Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 Programming Language :: Python :: 3.11 Programming Language :: Python :: 3.12 Operating System :: Unix Operating System :: POSIX Operating System :: MacOS :: MacOS X Operating System :: Microsoft :: Windows """ classifiers = [s.strip() for s in classes.split("\n") if s] description = ( "Data structures, algorithms and educational " "resources for bioinformatics." ) with open("README.rst") as f: long_description = f.read() # Compile SSW module ssw_extra_compile_args = ["-I."] if platform.system() != "Windows": if icc: ssw_extra_compile_args.extend(["-qopenmp-simd", "-DSIMDE_ENABLE_OPENMP"]) elif not clang: ssw_extra_compile_args.extend(["-fopenmp-simd", "-DSIMDE_ENABLE_OPENMP"]) elif platform.system() == "Windows": ssw_extra_compile_args.extend(["-openmp:experimental"]) stats_extra_compile_args = [] + ssw_extra_compile_args stats_extra_link_args = [] if platform.system() != "Windows": if icc: stats_extra_compile_args.extend(["-qopenmp"]) stats_extra_link_args.extend(["-qopenmp"]) elif not clang: stats_extra_compile_args.extend(["-fopenmp"]) stats_extra_link_args.extend(["-fopenmp"]) # Users with i686 architectures have reported that adding this flag allows # SSW to be compiled. See https://github.com/scikit-bio/scikit-bio/issues/409 # and http://stackoverflow.com/q/26211814/3776794 for details. if platform.machine() == "i686": ssw_extra_compile_args.append("-msse2") # Cython modules (*.pyx). They will be compiled into C code (*.c) during build. ext = ".pyx" extensions = [ Extension("skbio.metadata._intersection", ["skbio/metadata/_intersection" + ext]), Extension( "skbio.alignment._ssw_wrapper", ["skbio/alignment/_ssw_wrapper" + ext, "skbio/alignment/_lib/ssw.c"], extra_compile_args=ssw_extra_compile_args, include_dirs=[np.get_include()], ), Extension( "skbio.diversity._phylogenetic", ["skbio/diversity/_phylogenetic" + ext], include_dirs=[np.get_include()], ), Extension( "skbio.stats.ordination._cutils", ["skbio/stats/ordination/_cutils" + ext], extra_compile_args=stats_extra_compile_args, extra_link_args=stats_extra_link_args, ), Extension( "skbio.stats.distance._cutils", ["skbio/stats/distance/_cutils" + ext], extra_compile_args=stats_extra_compile_args, extra_link_args=stats_extra_link_args, ), ] extensions = cythonize(extensions, force=True) setup( name="scikit-bio", version=version, license="BSD-3-Clause", description=description, long_description=long_description, author="scikit-bio development team", author_email="qiyunzhu@gmail.com", maintainer="scikit-bio development team", maintainer_email="qiyunzhu@gmail.com", url="https://scikit.bio", packages=find_packages(), ext_modules=extensions, include_dirs=[np.get_include()], tests_require=["pytest", "coverage"], install_requires=[ "requests >= 2.20.0", "decorator >= 3.4.2", "natsort >= 4.0.3", "numpy >= 1.17.0", "pandas >= 1.5.0", "scipy >= 1.9.0", "h5py >= 3.6.0", "biom-format >= 2.1.16", "statsmodels >= 0.14.0", ], classifiers=classifiers, package_data={ "skbio.diversity.alpha.tests": ["data/qiime-191-tt/*"], "skbio.diversity.beta.tests": ["data/qiime-191-tt/*"], "skbio.io.tests": ["data/*"], "skbio.io.format.tests": ["data/*"], "skbio.stats.tests": ["data/*"], "skbio.stats.distance.tests": ["data/*"], "skbio.stats.ordination.tests": ["data/*"], "skbio.metadata.tests": ["data/invalid/*", "data/valid/*"], "skbio.embedding.tests": ["data/*"], }, ) scikit-bio-0.6.2/simde-sse2.h000066400000000000000000030635751464262511300157270ustar00rootroot00000000000000/* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* :: Begin x86/sse2.h :: */ /* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2017-2020 Evan Nemerson * 2015-2017 John W. Ratcliff * 2015 Brandon Rowlett * 2015 Ken Fast * 2017 Hasindu Gamaarachchi * 2018 Jeff Daily */ #if !defined(SIMDE_X86_SSE2_H) #define SIMDE_X86_SSE2_H /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* :: Begin x86/sse.h :: */ /* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2017-2020 Evan Nemerson * 2015-2017 John W. Ratcliff * 2015 Brandon Rowlett * 2015 Ken Fast */ #if !defined(SIMDE_X86_SSE_H) #define SIMDE_X86_SSE_H /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* :: Begin x86/mmx.h :: */ /* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2017-2020 Evan Nemerson */ #if !defined(SIMDE_X86_MMX_H) #define SIMDE_X86_MMX_H /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* :: Begin simde-common.h :: */ /* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2017-2020 Evan Nemerson */ #if !defined(SIMDE_COMMON_H) #define SIMDE_COMMON_H /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* :: Begin hedley.h :: */ /* Hedley - https://nemequ.github.io/hedley * Created by Evan Nemerson * * To the extent possible under law, the author(s) have dedicated all * copyright and related and neighboring rights to this software to * the public domain worldwide. This software is distributed without * any warranty. * * For details, see . * SPDX-License-Identifier: CC0-1.0 */ #if !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < 16) #if defined(HEDLEY_VERSION) # undef HEDLEY_VERSION #endif #define HEDLEY_VERSION 16 #if defined(HEDLEY_STRINGIFY_EX) # undef HEDLEY_STRINGIFY_EX #endif #define HEDLEY_STRINGIFY_EX(x) #x #if defined(HEDLEY_STRINGIFY) # undef HEDLEY_STRINGIFY #endif #define HEDLEY_STRINGIFY(x) HEDLEY_STRINGIFY_EX(x) #if defined(HEDLEY_CONCAT_EX) # undef HEDLEY_CONCAT_EX #endif #define HEDLEY_CONCAT_EX(a,b) a##b #if defined(HEDLEY_CONCAT) # undef HEDLEY_CONCAT #endif #define HEDLEY_CONCAT(a,b) HEDLEY_CONCAT_EX(a,b) #if defined(HEDLEY_CONCAT3_EX) # undef HEDLEY_CONCAT3_EX #endif #define HEDLEY_CONCAT3_EX(a,b,c) a##b##c #if defined(HEDLEY_CONCAT3) # undef HEDLEY_CONCAT3 #endif #define HEDLEY_CONCAT3(a,b,c) HEDLEY_CONCAT3_EX(a,b,c) #if defined(HEDLEY_VERSION_ENCODE) # undef HEDLEY_VERSION_ENCODE #endif #define HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) #if defined(HEDLEY_VERSION_DECODE_MAJOR) # undef HEDLEY_VERSION_DECODE_MAJOR #endif #define HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) #if defined(HEDLEY_VERSION_DECODE_MINOR) # undef HEDLEY_VERSION_DECODE_MINOR #endif #define HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) #if defined(HEDLEY_VERSION_DECODE_REVISION) # undef HEDLEY_VERSION_DECODE_REVISION #endif #define HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) #if defined(HEDLEY_GNUC_VERSION) # undef HEDLEY_GNUC_VERSION #endif #if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) # define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) #elif defined(__GNUC__) # define HEDLEY_GNUC_VERSION HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) #endif #if defined(HEDLEY_GNUC_VERSION_CHECK) # undef HEDLEY_GNUC_VERSION_CHECK #endif #if defined(HEDLEY_GNUC_VERSION) # define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (HEDLEY_GNUC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_MSVC_VERSION) # undef HEDLEY_MSVC_VERSION #endif #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) # define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) #elif defined(_MSC_FULL_VER) && !defined(__ICL) # define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) #elif defined(_MSC_VER) && !defined(__ICL) # define HEDLEY_MSVC_VERSION HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) #endif #if defined(HEDLEY_MSVC_VERSION_CHECK) # undef HEDLEY_MSVC_VERSION_CHECK #endif #if !defined(HEDLEY_MSVC_VERSION) # define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) #elif defined(_MSC_VER) && (_MSC_VER >= 1400) # define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) #elif defined(_MSC_VER) && (_MSC_VER >= 1200) # define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) #else # define HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) #endif #if defined(HEDLEY_INTEL_VERSION) # undef HEDLEY_INTEL_VERSION #endif #if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) # define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) #elif defined(__INTEL_COMPILER) && !defined(__ICL) # define HEDLEY_INTEL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) #endif #if defined(HEDLEY_INTEL_VERSION_CHECK) # undef HEDLEY_INTEL_VERSION_CHECK #endif #if defined(HEDLEY_INTEL_VERSION) # define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_INTEL_CL_VERSION) # undef HEDLEY_INTEL_CL_VERSION #endif #if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) # define HEDLEY_INTEL_CL_VERSION HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) #endif #if defined(HEDLEY_INTEL_CL_VERSION_CHECK) # undef HEDLEY_INTEL_CL_VERSION_CHECK #endif #if defined(HEDLEY_INTEL_CL_VERSION) # define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (HEDLEY_INTEL_CL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_PGI_VERSION) # undef HEDLEY_PGI_VERSION #endif #if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) # define HEDLEY_PGI_VERSION HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) #endif #if defined(HEDLEY_PGI_VERSION_CHECK) # undef HEDLEY_PGI_VERSION_CHECK #endif #if defined(HEDLEY_PGI_VERSION) # define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (HEDLEY_PGI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_SUNPRO_VERSION) # undef HEDLEY_SUNPRO_VERSION #endif #if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) # define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) #elif defined(__SUNPRO_C) # define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) #elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) # define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) #elif defined(__SUNPRO_CC) # define HEDLEY_SUNPRO_VERSION HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) #endif #if defined(HEDLEY_SUNPRO_VERSION_CHECK) # undef HEDLEY_SUNPRO_VERSION_CHECK #endif #if defined(HEDLEY_SUNPRO_VERSION) # define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (HEDLEY_SUNPRO_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_EMSCRIPTEN_VERSION) # undef HEDLEY_EMSCRIPTEN_VERSION #endif #if defined(__EMSCRIPTEN__) # define HEDLEY_EMSCRIPTEN_VERSION HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) #endif #if defined(HEDLEY_EMSCRIPTEN_VERSION_CHECK) # undef HEDLEY_EMSCRIPTEN_VERSION_CHECK #endif #if defined(HEDLEY_EMSCRIPTEN_VERSION) # define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (HEDLEY_EMSCRIPTEN_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_ARM_VERSION) # undef HEDLEY_ARM_VERSION #endif #if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) # define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) #elif defined(__CC_ARM) && defined(__ARMCC_VERSION) # define HEDLEY_ARM_VERSION HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) #endif #if defined(HEDLEY_ARM_VERSION_CHECK) # undef HEDLEY_ARM_VERSION_CHECK #endif #if defined(HEDLEY_ARM_VERSION) # define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (HEDLEY_ARM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_IBM_VERSION) # undef HEDLEY_IBM_VERSION #endif #if defined(__ibmxl__) # define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) #elif defined(__xlC__) && defined(__xlC_ver__) # define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) #elif defined(__xlC__) # define HEDLEY_IBM_VERSION HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) #endif #if defined(HEDLEY_IBM_VERSION_CHECK) # undef HEDLEY_IBM_VERSION_CHECK #endif #if defined(HEDLEY_IBM_VERSION) # define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (HEDLEY_IBM_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_TI_VERSION) # undef HEDLEY_TI_VERSION #endif #if \ defined(__TI_COMPILER_VERSION__) && \ ( \ defined(__TMS470__) || defined(__TI_ARM__) || \ defined(__MSP430__) || \ defined(__TMS320C2000__) \ ) # if (__TI_COMPILER_VERSION__ >= 16000000) # define HEDLEY_TI_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) # endif #endif #if defined(HEDLEY_TI_VERSION_CHECK) # undef HEDLEY_TI_VERSION_CHECK #endif #if defined(HEDLEY_TI_VERSION) # define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_TI_CL2000_VERSION) # undef HEDLEY_TI_CL2000_VERSION #endif #if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) # define HEDLEY_TI_CL2000_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) #endif #if defined(HEDLEY_TI_CL2000_VERSION_CHECK) # undef HEDLEY_TI_CL2000_VERSION_CHECK #endif #if defined(HEDLEY_TI_CL2000_VERSION) # define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL2000_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_TI_CL430_VERSION) # undef HEDLEY_TI_CL430_VERSION #endif #if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) # define HEDLEY_TI_CL430_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) #endif #if defined(HEDLEY_TI_CL430_VERSION_CHECK) # undef HEDLEY_TI_CL430_VERSION_CHECK #endif #if defined(HEDLEY_TI_CL430_VERSION) # define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL430_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_TI_ARMCL_VERSION) # undef HEDLEY_TI_ARMCL_VERSION #endif #if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) # define HEDLEY_TI_ARMCL_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) #endif #if defined(HEDLEY_TI_ARMCL_VERSION_CHECK) # undef HEDLEY_TI_ARMCL_VERSION_CHECK #endif #if defined(HEDLEY_TI_ARMCL_VERSION) # define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_ARMCL_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_TI_CL6X_VERSION) # undef HEDLEY_TI_CL6X_VERSION #endif #if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) # define HEDLEY_TI_CL6X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) #endif #if defined(HEDLEY_TI_CL6X_VERSION_CHECK) # undef HEDLEY_TI_CL6X_VERSION_CHECK #endif #if defined(HEDLEY_TI_CL6X_VERSION) # define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL6X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_TI_CL7X_VERSION) # undef HEDLEY_TI_CL7X_VERSION #endif #if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) # define HEDLEY_TI_CL7X_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) #endif #if defined(HEDLEY_TI_CL7X_VERSION_CHECK) # undef HEDLEY_TI_CL7X_VERSION_CHECK #endif #if defined(HEDLEY_TI_CL7X_VERSION) # define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CL7X_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_TI_CLPRU_VERSION) # undef HEDLEY_TI_CLPRU_VERSION #endif #if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) # define HEDLEY_TI_CLPRU_VERSION HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) #endif #if defined(HEDLEY_TI_CLPRU_VERSION_CHECK) # undef HEDLEY_TI_CLPRU_VERSION_CHECK #endif #if defined(HEDLEY_TI_CLPRU_VERSION) # define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (HEDLEY_TI_CLPRU_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_CRAY_VERSION) # undef HEDLEY_CRAY_VERSION #endif #if defined(_CRAYC) # if defined(_RELEASE_PATCHLEVEL) # define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) # else # define HEDLEY_CRAY_VERSION HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) # endif #endif #if defined(HEDLEY_CRAY_VERSION_CHECK) # undef HEDLEY_CRAY_VERSION_CHECK #endif #if defined(HEDLEY_CRAY_VERSION) # define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (HEDLEY_CRAY_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_IAR_VERSION) # undef HEDLEY_IAR_VERSION #endif #if defined(__IAR_SYSTEMS_ICC__) # if __VER__ > 1000 # define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) # else # define HEDLEY_IAR_VERSION HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) # endif #endif #if defined(HEDLEY_IAR_VERSION_CHECK) # undef HEDLEY_IAR_VERSION_CHECK #endif #if defined(HEDLEY_IAR_VERSION) # define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (HEDLEY_IAR_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_TINYC_VERSION) # undef HEDLEY_TINYC_VERSION #endif #if defined(__TINYC__) # define HEDLEY_TINYC_VERSION HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) #endif #if defined(HEDLEY_TINYC_VERSION_CHECK) # undef HEDLEY_TINYC_VERSION_CHECK #endif #if defined(HEDLEY_TINYC_VERSION) # define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (HEDLEY_TINYC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_DMC_VERSION) # undef HEDLEY_DMC_VERSION #endif #if defined(__DMC__) # define HEDLEY_DMC_VERSION HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) #endif #if defined(HEDLEY_DMC_VERSION_CHECK) # undef HEDLEY_DMC_VERSION_CHECK #endif #if defined(HEDLEY_DMC_VERSION) # define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (HEDLEY_DMC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_COMPCERT_VERSION) # undef HEDLEY_COMPCERT_VERSION #endif #if defined(__COMPCERT_VERSION__) # define HEDLEY_COMPCERT_VERSION HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) #endif #if defined(HEDLEY_COMPCERT_VERSION_CHECK) # undef HEDLEY_COMPCERT_VERSION_CHECK #endif #if defined(HEDLEY_COMPCERT_VERSION) # define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (HEDLEY_COMPCERT_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_PELLES_VERSION) # undef HEDLEY_PELLES_VERSION #endif #if defined(__POCC__) # define HEDLEY_PELLES_VERSION HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) #endif #if defined(HEDLEY_PELLES_VERSION_CHECK) # undef HEDLEY_PELLES_VERSION_CHECK #endif #if defined(HEDLEY_PELLES_VERSION) # define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (HEDLEY_PELLES_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_MCST_LCC_VERSION) # undef HEDLEY_MCST_LCC_VERSION #endif #if defined(__LCC__) && defined(__LCC_MINOR__) # define HEDLEY_MCST_LCC_VERSION HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) #endif #if defined(HEDLEY_MCST_LCC_VERSION_CHECK) # undef HEDLEY_MCST_LCC_VERSION_CHECK #endif #if defined(HEDLEY_MCST_LCC_VERSION) # define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (HEDLEY_MCST_LCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_GCC_VERSION) # undef HEDLEY_GCC_VERSION #endif #if \ defined(HEDLEY_GNUC_VERSION) && \ !defined(__clang__) && \ !defined(HEDLEY_INTEL_VERSION) && \ !defined(HEDLEY_PGI_VERSION) && \ !defined(HEDLEY_ARM_VERSION) && \ !defined(HEDLEY_CRAY_VERSION) && \ !defined(HEDLEY_TI_VERSION) && \ !defined(HEDLEY_TI_ARMCL_VERSION) && \ !defined(HEDLEY_TI_CL430_VERSION) && \ !defined(HEDLEY_TI_CL2000_VERSION) && \ !defined(HEDLEY_TI_CL6X_VERSION) && \ !defined(HEDLEY_TI_CL7X_VERSION) && \ !defined(HEDLEY_TI_CLPRU_VERSION) && \ !defined(__COMPCERT__) && \ !defined(HEDLEY_MCST_LCC_VERSION) # define HEDLEY_GCC_VERSION HEDLEY_GNUC_VERSION #endif #if defined(HEDLEY_GCC_VERSION_CHECK) # undef HEDLEY_GCC_VERSION_CHECK #endif #if defined(HEDLEY_GCC_VERSION) # define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (HEDLEY_GCC_VERSION >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else # define HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) #endif #if defined(HEDLEY_HAS_ATTRIBUTE) # undef HEDLEY_HAS_ATTRIBUTE #endif #if \ defined(__has_attribute) && \ ( \ (!defined(HEDLEY_IAR_VERSION) || HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ ) # define HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) #else # define HEDLEY_HAS_ATTRIBUTE(attribute) (0) #endif #if defined(HEDLEY_GNUC_HAS_ATTRIBUTE) # undef HEDLEY_GNUC_HAS_ATTRIBUTE #endif #if defined(__has_attribute) # define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) #else # define HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_GCC_HAS_ATTRIBUTE) # undef HEDLEY_GCC_HAS_ATTRIBUTE #endif #if defined(__has_attribute) # define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_HAS_ATTRIBUTE(attribute) #else # define HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_HAS_CPP_ATTRIBUTE) # undef HEDLEY_HAS_CPP_ATTRIBUTE #endif #if \ defined(__has_cpp_attribute) && \ defined(__cplusplus) && \ (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) # define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) #else # define HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) #endif #if defined(HEDLEY_HAS_CPP_ATTRIBUTE_NS) # undef HEDLEY_HAS_CPP_ATTRIBUTE_NS #endif #if !defined(__cplusplus) || !defined(__has_cpp_attribute) # define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) #elif \ !defined(HEDLEY_PGI_VERSION) && \ !defined(HEDLEY_IAR_VERSION) && \ (!defined(HEDLEY_SUNPRO_VERSION) || HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) # define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) #else # define HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) #endif #if defined(HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) # undef HEDLEY_GNUC_HAS_CPP_ATTRIBUTE #endif #if defined(__has_cpp_attribute) && defined(__cplusplus) # define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) #else # define HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_GCC_HAS_CPP_ATTRIBUTE) # undef HEDLEY_GCC_HAS_CPP_ATTRIBUTE #endif #if defined(__has_cpp_attribute) && defined(__cplusplus) # define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) #else # define HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_HAS_BUILTIN) # undef HEDLEY_HAS_BUILTIN #endif #if defined(__has_builtin) # define HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) #else # define HEDLEY_HAS_BUILTIN(builtin) (0) #endif #if defined(HEDLEY_GNUC_HAS_BUILTIN) # undef HEDLEY_GNUC_HAS_BUILTIN #endif #if defined(__has_builtin) # define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) #else # define HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_GCC_HAS_BUILTIN) # undef HEDLEY_GCC_HAS_BUILTIN #endif #if defined(__has_builtin) # define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) #else # define HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_HAS_FEATURE) # undef HEDLEY_HAS_FEATURE #endif #if defined(__has_feature) # define HEDLEY_HAS_FEATURE(feature) __has_feature(feature) #else # define HEDLEY_HAS_FEATURE(feature) (0) #endif #if defined(HEDLEY_GNUC_HAS_FEATURE) # undef HEDLEY_GNUC_HAS_FEATURE #endif #if defined(__has_feature) # define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) #else # define HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_GCC_HAS_FEATURE) # undef HEDLEY_GCC_HAS_FEATURE #endif #if defined(__has_feature) # define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) #else # define HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_HAS_EXTENSION) # undef HEDLEY_HAS_EXTENSION #endif #if defined(__has_extension) # define HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) #else # define HEDLEY_HAS_EXTENSION(extension) (0) #endif #if defined(HEDLEY_GNUC_HAS_EXTENSION) # undef HEDLEY_GNUC_HAS_EXTENSION #endif #if defined(__has_extension) # define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) #else # define HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_GCC_HAS_EXTENSION) # undef HEDLEY_GCC_HAS_EXTENSION #endif #if defined(__has_extension) # define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) #else # define HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_HAS_DECLSPEC_ATTRIBUTE) # undef HEDLEY_HAS_DECLSPEC_ATTRIBUTE #endif #if defined(__has_declspec_attribute) # define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) #else # define HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) #endif #if defined(HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) # undef HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE #endif #if defined(__has_declspec_attribute) # define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) #else # define HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) # undef HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE #endif #if defined(__has_declspec_attribute) # define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) #else # define HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_HAS_WARNING) # undef HEDLEY_HAS_WARNING #endif #if defined(__has_warning) # define HEDLEY_HAS_WARNING(warning) __has_warning(warning) #else # define HEDLEY_HAS_WARNING(warning) (0) #endif #if defined(HEDLEY_GNUC_HAS_WARNING) # undef HEDLEY_GNUC_HAS_WARNING #endif #if defined(__has_warning) # define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) #else # define HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_GCC_HAS_WARNING) # undef HEDLEY_GCC_HAS_WARNING #endif #if defined(__has_warning) # define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) #else # define HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) #endif #if \ (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ defined(__clang__) || \ HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ (HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) # define HEDLEY_PRAGMA(value) _Pragma(#value) #elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) # define HEDLEY_PRAGMA(value) __pragma(value) #else # define HEDLEY_PRAGMA(value) #endif #if defined(HEDLEY_DIAGNOSTIC_PUSH) # undef HEDLEY_DIAGNOSTIC_PUSH #endif #if defined(HEDLEY_DIAGNOSTIC_POP) # undef HEDLEY_DIAGNOSTIC_POP #endif #if defined(__clang__) # define HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") # define HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") #elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) # define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") # define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") #elif HEDLEY_GCC_VERSION_CHECK(4,6,0) # define HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") # define HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") #elif \ HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) # define HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) #elif HEDLEY_ARM_VERSION_CHECK(5,6,0) # define HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") # define HEDLEY_DIAGNOSTIC_POP _Pragma("pop") #elif \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) # define HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") # define HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") #elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) # define HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") # define HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") #else # define HEDLEY_DIAGNOSTIC_PUSH # define HEDLEY_DIAGNOSTIC_POP #endif /* HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ #if defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) # undef HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ #endif #if defined(__cplusplus) # if HEDLEY_HAS_WARNING("-Wc++98-compat") # if HEDLEY_HAS_WARNING("-Wc++17-extensions") # if HEDLEY_HAS_WARNING("-Wc++1z-extensions") # define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ xpr \ HEDLEY_DIAGNOSTIC_POP # else # define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ xpr \ HEDLEY_DIAGNOSTIC_POP # endif # else # define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ xpr \ HEDLEY_DIAGNOSTIC_POP # endif # endif #endif #if !defined(HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) # define HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x #endif #if defined(HEDLEY_CONST_CAST) # undef HEDLEY_CONST_CAST #endif #if defined(__cplusplus) # define HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) #elif \ HEDLEY_HAS_WARNING("-Wcast-qual") || \ HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) # define HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ HEDLEY_DIAGNOSTIC_PUSH \ HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ ((T) (expr)); \ HEDLEY_DIAGNOSTIC_POP \ })) #else # define HEDLEY_CONST_CAST(T, expr) ((T) (expr)) #endif #if defined(HEDLEY_REINTERPRET_CAST) # undef HEDLEY_REINTERPRET_CAST #endif #if defined(__cplusplus) # define HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) #else # define HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) #endif #if defined(HEDLEY_STATIC_CAST) # undef HEDLEY_STATIC_CAST #endif #if defined(__cplusplus) # define HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) #else # define HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) #endif #if defined(HEDLEY_CPP_CAST) # undef HEDLEY_CPP_CAST #endif #if defined(__cplusplus) # if HEDLEY_HAS_WARNING("-Wold-style-cast") # define HEDLEY_CPP_CAST(T, expr) \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ ((T) (expr)) \ HEDLEY_DIAGNOSTIC_POP # elif HEDLEY_IAR_VERSION_CHECK(8,3,0) # define HEDLEY_CPP_CAST(T, expr) \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("diag_suppress=Pe137") \ HEDLEY_DIAGNOSTIC_POP # else # define HEDLEY_CPP_CAST(T, expr) ((T) (expr)) # endif #else # define HEDLEY_CPP_CAST(T, expr) (expr) #endif #if defined(HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) # undef HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED #endif #if HEDLEY_HAS_WARNING("-Wdeprecated-declarations") # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") #elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") #elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) #elif HEDLEY_PGI_VERSION_CHECK(20,7,0) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") #elif HEDLEY_PGI_VERSION_CHECK(17,10,0) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") #elif HEDLEY_GCC_VERSION_CHECK(4,3,0) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") #elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) #elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") #elif \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") #elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") #elif HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") #elif HEDLEY_IAR_VERSION_CHECK(8,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") #elif HEDLEY_PELLES_VERSION_CHECK(2,90,0) # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") #else # define HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED #endif #if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) # undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS #endif #if HEDLEY_HAS_WARNING("-Wunknown-pragmas") # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") #elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") #elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) #elif HEDLEY_PGI_VERSION_CHECK(17,10,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") #elif HEDLEY_GCC_VERSION_CHECK(4,3,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") #elif HEDLEY_MSVC_VERSION_CHECK(15,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) #elif \ HEDLEY_TI_VERSION_CHECK(16,9,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") #elif HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") #elif HEDLEY_IAR_VERSION_CHECK(8,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") #elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") #else # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS #endif #if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) # undef HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES #endif #if HEDLEY_HAS_WARNING("-Wunknown-attributes") # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") #elif HEDLEY_GCC_VERSION_CHECK(4,6,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") #elif HEDLEY_INTEL_VERSION_CHECK(17,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") #elif HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) #elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) #elif HEDLEY_PGI_VERSION_CHECK(20,7,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") #elif HEDLEY_PGI_VERSION_CHECK(17,10,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") #elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") #elif \ HEDLEY_TI_VERSION_CHECK(18,1,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") #elif HEDLEY_IAR_VERSION_CHECK(8,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") #elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") #else # define HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES #endif #if defined(HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) # undef HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL #endif #if HEDLEY_HAS_WARNING("-Wcast-qual") # define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") #elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") #elif HEDLEY_GCC_VERSION_CHECK(3,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") #else # define HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL #endif #if defined(HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) # undef HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION #endif #if HEDLEY_HAS_WARNING("-Wunused-function") # define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") #elif HEDLEY_GCC_VERSION_CHECK(3,4,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") #elif HEDLEY_MSVC_VERSION_CHECK(1,0,0) # define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) #elif HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") #else # define HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION #endif #if defined(HEDLEY_DEPRECATED) # undef HEDLEY_DEPRECATED #endif #if defined(HEDLEY_DEPRECATED_FOR) # undef HEDLEY_DEPRECATED_FOR #endif #if \ HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) # define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) #elif \ (HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(HEDLEY_IAR_VERSION)) || \ HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ HEDLEY_TI_VERSION_CHECK(18,1,0) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) # define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) #elif defined(__cplusplus) && (__cplusplus >= 201402L) # define HEDLEY_DEPRECATED(since) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) # define HEDLEY_DEPRECATED_FOR(since, replacement) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) #elif \ HEDLEY_HAS_ATTRIBUTE(deprecated) || \ HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ HEDLEY_IAR_VERSION_CHECK(8,10,0) # define HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) # define HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) #elif \ HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_DEPRECATED(since) __declspec(deprecated) # define HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) #elif HEDLEY_IAR_VERSION_CHECK(8,0,0) # define HEDLEY_DEPRECATED(since) _Pragma("deprecated") # define HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") #else # define HEDLEY_DEPRECATED(since) # define HEDLEY_DEPRECATED_FOR(since, replacement) #endif #if defined(HEDLEY_UNAVAILABLE) # undef HEDLEY_UNAVAILABLE #endif #if \ HEDLEY_HAS_ATTRIBUTE(warning) || \ HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) #else # define HEDLEY_UNAVAILABLE(available_since) #endif #if defined(HEDLEY_WARN_UNUSED_RESULT) # undef HEDLEY_WARN_UNUSED_RESULT #endif #if defined(HEDLEY_WARN_UNUSED_RESULT_MSG) # undef HEDLEY_WARN_UNUSED_RESULT_MSG #endif #if \ HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) # define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) #elif (HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) # define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) # define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) #elif HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) # define HEDLEY_WARN_UNUSED_RESULT HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) # define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) #elif defined(_Check_return_) /* SAL */ # define HEDLEY_WARN_UNUSED_RESULT _Check_return_ # define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ #else # define HEDLEY_WARN_UNUSED_RESULT # define HEDLEY_WARN_UNUSED_RESULT_MSG(msg) #endif #if defined(HEDLEY_SENTINEL) # undef HEDLEY_SENTINEL #endif #if \ HEDLEY_HAS_ATTRIBUTE(sentinel) || \ HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) #else # define HEDLEY_SENTINEL(position) #endif #if defined(HEDLEY_NO_RETURN) # undef HEDLEY_NO_RETURN #endif #if HEDLEY_IAR_VERSION_CHECK(8,0,0) # define HEDLEY_NO_RETURN __noreturn #elif \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_NO_RETURN __attribute__((__noreturn__)) #elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L # define HEDLEY_NO_RETURN _Noreturn #elif defined(__cplusplus) && (__cplusplus >= 201103L) # define HEDLEY_NO_RETURN HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) #elif \ HEDLEY_HAS_ATTRIBUTE(noreturn) || \ HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ HEDLEY_IAR_VERSION_CHECK(8,10,0) # define HEDLEY_NO_RETURN __attribute__((__noreturn__)) #elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) # define HEDLEY_NO_RETURN _Pragma("does_not_return") #elif \ HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_NO_RETURN __declspec(noreturn) #elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) # define HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") #elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) # define HEDLEY_NO_RETURN __attribute((noreturn)) #elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) # define HEDLEY_NO_RETURN __declspec(noreturn) #else # define HEDLEY_NO_RETURN #endif #if defined(HEDLEY_NO_ESCAPE) # undef HEDLEY_NO_ESCAPE #endif #if HEDLEY_HAS_ATTRIBUTE(noescape) # define HEDLEY_NO_ESCAPE __attribute__((__noescape__)) #else # define HEDLEY_NO_ESCAPE #endif #if defined(HEDLEY_UNREACHABLE) # undef HEDLEY_UNREACHABLE #endif #if defined(HEDLEY_UNREACHABLE_RETURN) # undef HEDLEY_UNREACHABLE_RETURN #endif #if defined(HEDLEY_ASSUME) # undef HEDLEY_ASSUME #endif #if \ HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_ASSUME(expr) __assume(expr) #elif HEDLEY_HAS_BUILTIN(__builtin_assume) # define HEDLEY_ASSUME(expr) __builtin_assume(expr) #elif \ HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) # if defined(__cplusplus) # define HEDLEY_ASSUME(expr) std::_nassert(expr) # else # define HEDLEY_ASSUME(expr) _nassert(expr) # endif #endif #if \ (HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(HEDLEY_ARM_VERSION))) || \ HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_UNREACHABLE() __builtin_unreachable() #elif defined(HEDLEY_ASSUME) # define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) #endif #if !defined(HEDLEY_ASSUME) # if defined(HEDLEY_UNREACHABLE) # define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (HEDLEY_UNREACHABLE(), 1))) # else # define HEDLEY_ASSUME(expr) HEDLEY_STATIC_CAST(void, expr) # endif #endif #if defined(HEDLEY_UNREACHABLE) # if \ HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) # define HEDLEY_UNREACHABLE_RETURN(value) return (HEDLEY_STATIC_CAST(void, HEDLEY_ASSUME(0)), (value)) # else # define HEDLEY_UNREACHABLE_RETURN(value) HEDLEY_UNREACHABLE() # endif #else # define HEDLEY_UNREACHABLE_RETURN(value) return (value) #endif #if !defined(HEDLEY_UNREACHABLE) # define HEDLEY_UNREACHABLE() HEDLEY_ASSUME(0) #endif HEDLEY_DIAGNOSTIC_PUSH #if HEDLEY_HAS_WARNING("-Wpedantic") # pragma clang diagnostic ignored "-Wpedantic" #endif #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) # pragma clang diagnostic ignored "-Wc++98-compat-pedantic" #endif #if HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) # if defined(__clang__) # pragma clang diagnostic ignored "-Wvariadic-macros" # elif defined(HEDLEY_GCC_VERSION) # pragma GCC diagnostic ignored "-Wvariadic-macros" # endif #endif #if defined(HEDLEY_NON_NULL) # undef HEDLEY_NON_NULL #endif #if \ HEDLEY_HAS_ATTRIBUTE(nonnull) || \ HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) # define HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) #else # define HEDLEY_NON_NULL(...) #endif HEDLEY_DIAGNOSTIC_POP #if defined(HEDLEY_PRINTF_FORMAT) # undef HEDLEY_PRINTF_FORMAT #endif #if defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) # define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) #elif defined(__MINGW32__) && HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) # define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) #elif \ HEDLEY_HAS_ATTRIBUTE(format) || \ HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) #elif HEDLEY_PELLES_VERSION_CHECK(6,0,0) # define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) #else # define HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) #endif #if defined(HEDLEY_CONSTEXPR) # undef HEDLEY_CONSTEXPR #endif #if defined(__cplusplus) # if __cplusplus >= 201103L # define HEDLEY_CONSTEXPR HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) # endif #endif #if !defined(HEDLEY_CONSTEXPR) # define HEDLEY_CONSTEXPR #endif #if defined(HEDLEY_PREDICT) # undef HEDLEY_PREDICT #endif #if defined(HEDLEY_LIKELY) # undef HEDLEY_LIKELY #endif #if defined(HEDLEY_UNLIKELY) # undef HEDLEY_UNLIKELY #endif #if defined(HEDLEY_UNPREDICTABLE) # undef HEDLEY_UNPREDICTABLE #endif #if HEDLEY_HAS_BUILTIN(__builtin_unpredictable) # define HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) #endif #if \ (HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(HEDLEY_PGI_VERSION) && !defined(HEDLEY_INTEL_VERSION)) || \ HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) # define HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) # define HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) # define HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) # define HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) #elif \ (HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ (HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_PREDICT(expr, expected, probability) \ (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (HEDLEY_STATIC_CAST(void, expected), (expr))) # define HEDLEY_PREDICT_TRUE(expr, probability) \ (__extension__ ({ \ double hedley_probability_ = (probability); \ ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ })) # define HEDLEY_PREDICT_FALSE(expr, probability) \ (__extension__ ({ \ double hedley_probability_ = (probability); \ ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ })) # define HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) # define HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) #else # define HEDLEY_PREDICT(expr, expected, probability) (HEDLEY_STATIC_CAST(void, expected), (expr)) # define HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) # define HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) # define HEDLEY_LIKELY(expr) (!!(expr)) # define HEDLEY_UNLIKELY(expr) (!!(expr)) #endif #if !defined(HEDLEY_UNPREDICTABLE) # define HEDLEY_UNPREDICTABLE(expr) HEDLEY_PREDICT(expr, 1, 0.5) #endif #if defined(HEDLEY_MALLOC) # undef HEDLEY_MALLOC #endif #if \ HEDLEY_HAS_ATTRIBUTE(malloc) || \ HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_MALLOC __attribute__((__malloc__)) #elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) # define HEDLEY_MALLOC _Pragma("returns_new_memory") #elif \ HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_MALLOC __declspec(restrict) #else # define HEDLEY_MALLOC #endif #if defined(HEDLEY_PURE) # undef HEDLEY_PURE #endif #if \ HEDLEY_HAS_ATTRIBUTE(pure) || \ HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_PURE __attribute__((__pure__)) #elif HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) # define HEDLEY_PURE _Pragma("does_not_write_global_data") #elif defined(__cplusplus) && \ ( \ HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ ) # define HEDLEY_PURE _Pragma("FUNC_IS_PURE;") #else # define HEDLEY_PURE #endif #if defined(HEDLEY_CONST) # undef HEDLEY_CONST #endif #if \ HEDLEY_HAS_ATTRIBUTE(const) || \ HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_CONST __attribute__((__const__)) #elif \ HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) # define HEDLEY_CONST _Pragma("no_side_effect") #else # define HEDLEY_CONST HEDLEY_PURE #endif #if defined(HEDLEY_RESTRICT) # undef HEDLEY_RESTRICT #endif #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) # define HEDLEY_RESTRICT restrict #elif \ HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ (HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ defined(__clang__) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_RESTRICT __restrict #elif HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) # define HEDLEY_RESTRICT _Restrict #else # define HEDLEY_RESTRICT #endif #if defined(HEDLEY_INLINE) # undef HEDLEY_INLINE #endif #if \ (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ (defined(__cplusplus) && (__cplusplus >= 199711L)) # define HEDLEY_INLINE inline #elif \ defined(HEDLEY_GCC_VERSION) || \ HEDLEY_ARM_VERSION_CHECK(6,2,0) # define HEDLEY_INLINE __inline__ #elif \ HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_INLINE __inline #else # define HEDLEY_INLINE #endif #if defined(HEDLEY_ALWAYS_INLINE) # undef HEDLEY_ALWAYS_INLINE #endif #if \ HEDLEY_HAS_ATTRIBUTE(always_inline) || \ HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ HEDLEY_IAR_VERSION_CHECK(8,10,0) # define HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) HEDLEY_INLINE #elif \ HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_ALWAYS_INLINE __forceinline #elif defined(__cplusplus) && \ ( \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ ) # define HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") #elif HEDLEY_IAR_VERSION_CHECK(8,0,0) # define HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") #else # define HEDLEY_ALWAYS_INLINE HEDLEY_INLINE #endif #if defined(HEDLEY_NEVER_INLINE) # undef HEDLEY_NEVER_INLINE #endif #if \ HEDLEY_HAS_ATTRIBUTE(noinline) || \ HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ HEDLEY_TI_VERSION_CHECK(15,12,0) || \ (HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ (HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ (HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ HEDLEY_IAR_VERSION_CHECK(8,10,0) # define HEDLEY_NEVER_INLINE __attribute__((__noinline__)) #elif \ HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_NEVER_INLINE __declspec(noinline) #elif HEDLEY_PGI_VERSION_CHECK(10,2,0) # define HEDLEY_NEVER_INLINE _Pragma("noinline") #elif HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) # define HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") #elif HEDLEY_IAR_VERSION_CHECK(8,0,0) # define HEDLEY_NEVER_INLINE _Pragma("inline=never") #elif HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) # define HEDLEY_NEVER_INLINE __attribute((noinline)) #elif HEDLEY_PELLES_VERSION_CHECK(9,0,0) # define HEDLEY_NEVER_INLINE __declspec(noinline) #else # define HEDLEY_NEVER_INLINE #endif #if defined(HEDLEY_PRIVATE) # undef HEDLEY_PRIVATE #endif #if defined(HEDLEY_PUBLIC) # undef HEDLEY_PUBLIC #endif #if defined(HEDLEY_IMPORT) # undef HEDLEY_IMPORT #endif #if defined(_WIN32) || defined(__CYGWIN__) # define HEDLEY_PRIVATE # define HEDLEY_PUBLIC __declspec(dllexport) # define HEDLEY_IMPORT __declspec(dllimport) #else # if \ HEDLEY_HAS_ATTRIBUTE(visibility) || \ HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ ( \ defined(__TI_EABI__) && \ ( \ (HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ ) \ ) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) # define HEDLEY_PUBLIC __attribute__((__visibility__("default"))) # else # define HEDLEY_PRIVATE # define HEDLEY_PUBLIC # endif # define HEDLEY_IMPORT extern #endif #if defined(HEDLEY_NO_THROW) # undef HEDLEY_NO_THROW #endif #if \ HEDLEY_HAS_ATTRIBUTE(nothrow) || \ HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_NO_THROW __attribute__((__nothrow__)) #elif \ HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) # define HEDLEY_NO_THROW __declspec(nothrow) #else # define HEDLEY_NO_THROW #endif #if defined(HEDLEY_FALL_THROUGH) # undef HEDLEY_FALL_THROUGH #endif #if defined(HEDLEY_INTEL_VERSION) # define HEDLEY_FALL_THROUGH #elif \ HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) #elif HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) # define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) #elif HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) # define HEDLEY_FALL_THROUGH HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) #elif defined(__fallthrough) /* SAL */ # define HEDLEY_FALL_THROUGH __fallthrough #else # define HEDLEY_FALL_THROUGH #endif #if defined(HEDLEY_RETURNS_NON_NULL) # undef HEDLEY_RETURNS_NON_NULL #endif #if \ HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) #elif defined(_Ret_notnull_) /* SAL */ # define HEDLEY_RETURNS_NON_NULL _Ret_notnull_ #else # define HEDLEY_RETURNS_NON_NULL #endif #if defined(HEDLEY_ARRAY_PARAM) # undef HEDLEY_ARRAY_PARAM #endif #if \ defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ !defined(__STDC_NO_VLA__) && \ !defined(__cplusplus) && \ !defined(HEDLEY_PGI_VERSION) && \ !defined(HEDLEY_TINYC_VERSION) # define HEDLEY_ARRAY_PARAM(name) (name) #else # define HEDLEY_ARRAY_PARAM(name) #endif #if defined(HEDLEY_IS_CONSTANT) # undef HEDLEY_IS_CONSTANT #endif #if defined(HEDLEY_REQUIRE_CONSTEXPR) # undef HEDLEY_REQUIRE_CONSTEXPR #endif /* HEDLEY_IS_CONSTEXPR_ is for HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ #if defined(HEDLEY_IS_CONSTEXPR_) # undef HEDLEY_IS_CONSTEXPR_ #endif #if \ HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) #endif #if !defined(__cplusplus) # if \ HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ HEDLEY_TINYC_VERSION_CHECK(0,9,24) # if defined(__INTPTR_TYPE__) # define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) # else # include # define HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) # endif # elif \ ( \ defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ !defined(HEDLEY_SUNPRO_VERSION) && \ !defined(HEDLEY_PGI_VERSION) && \ !defined(HEDLEY_IAR_VERSION)) || \ (HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(HEDLEY_IAR_VERSION)) || \ HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ HEDLEY_ARM_VERSION_CHECK(5,3,0) # if defined(__INTPTR_TYPE__) # define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) # else # include # define HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) # endif # elif \ defined(HEDLEY_GCC_VERSION) || \ defined(HEDLEY_INTEL_VERSION) || \ defined(HEDLEY_TINYC_VERSION) || \ defined(HEDLEY_TI_ARMCL_VERSION) || \ HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ defined(HEDLEY_TI_CL2000_VERSION) || \ defined(HEDLEY_TI_CL6X_VERSION) || \ defined(HEDLEY_TI_CL7X_VERSION) || \ defined(HEDLEY_TI_CLPRU_VERSION) || \ defined(__clang__) # define HEDLEY_IS_CONSTEXPR_(expr) ( \ sizeof(void) != \ sizeof(*( \ 1 ? \ ((void*) ((expr) * 0L) ) : \ ((struct { char v[sizeof(void) * 2]; } *) 1) \ ) \ ) \ ) # endif #endif #if defined(HEDLEY_IS_CONSTEXPR_) # if !defined(HEDLEY_IS_CONSTANT) # define HEDLEY_IS_CONSTANT(expr) HEDLEY_IS_CONSTEXPR_(expr) # endif # define HEDLEY_REQUIRE_CONSTEXPR(expr) (HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) #else # if !defined(HEDLEY_IS_CONSTANT) # define HEDLEY_IS_CONSTANT(expr) (0) # endif # define HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) #endif #if defined(HEDLEY_BEGIN_C_DECLS) # undef HEDLEY_BEGIN_C_DECLS #endif #if defined(HEDLEY_END_C_DECLS) # undef HEDLEY_END_C_DECLS #endif #if defined(HEDLEY_C_DECL) # undef HEDLEY_C_DECL #endif #if defined(__cplusplus) # define HEDLEY_BEGIN_C_DECLS extern "C" { # define HEDLEY_END_C_DECLS } # define HEDLEY_C_DECL extern "C" #else # define HEDLEY_BEGIN_C_DECLS # define HEDLEY_END_C_DECLS # define HEDLEY_C_DECL #endif #if defined(HEDLEY_STATIC_ASSERT) # undef HEDLEY_STATIC_ASSERT #endif #if \ !defined(__cplusplus) && ( \ (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ (HEDLEY_HAS_FEATURE(c_static_assert) && !defined(HEDLEY_INTEL_CL_VERSION)) || \ HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ defined(_Static_assert) \ ) # define HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) #elif \ (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) #else # define HEDLEY_STATIC_ASSERT(expr, message) #endif #if defined(HEDLEY_NULL) # undef HEDLEY_NULL #endif #if defined(__cplusplus) # if __cplusplus >= 201103L # define HEDLEY_NULL HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) # elif defined(NULL) # define HEDLEY_NULL NULL # else # define HEDLEY_NULL HEDLEY_STATIC_CAST(void*, 0) # endif #elif defined(NULL) # define HEDLEY_NULL NULL #else # define HEDLEY_NULL ((void*) 0) #endif #if defined(HEDLEY_MESSAGE) # undef HEDLEY_MESSAGE #endif #if HEDLEY_HAS_WARNING("-Wunknown-pragmas") # define HEDLEY_MESSAGE(msg) \ HEDLEY_DIAGNOSTIC_PUSH \ HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ HEDLEY_PRAGMA(message msg) \ HEDLEY_DIAGNOSTIC_POP #elif \ HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) # define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message msg) #elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) # define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(_CRI message msg) #elif HEDLEY_IAR_VERSION_CHECK(8,0,0) # define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) #elif HEDLEY_PELLES_VERSION_CHECK(2,0,0) # define HEDLEY_MESSAGE(msg) HEDLEY_PRAGMA(message(msg)) #else # define HEDLEY_MESSAGE(msg) #endif #if defined(HEDLEY_WARNING) # undef HEDLEY_WARNING #endif #if HEDLEY_HAS_WARNING("-Wunknown-pragmas") # define HEDLEY_WARNING(msg) \ HEDLEY_DIAGNOSTIC_PUSH \ HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ HEDLEY_PRAGMA(clang warning msg) \ HEDLEY_DIAGNOSTIC_POP #elif \ HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) # define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(GCC warning msg) #elif \ HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_WARNING(msg) HEDLEY_PRAGMA(message(msg)) #else # define HEDLEY_WARNING(msg) HEDLEY_MESSAGE(msg) #endif #if defined(HEDLEY_REQUIRE) # undef HEDLEY_REQUIRE #endif #if defined(HEDLEY_REQUIRE_MSG) # undef HEDLEY_REQUIRE_MSG #endif #if HEDLEY_HAS_ATTRIBUTE(diagnose_if) # if HEDLEY_HAS_WARNING("-Wgcc-compat") # define HEDLEY_REQUIRE(expr) \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ __attribute__((diagnose_if(!(expr), #expr, "error"))) \ HEDLEY_DIAGNOSTIC_POP # define HEDLEY_REQUIRE_MSG(expr,msg) \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ __attribute__((diagnose_if(!(expr), msg, "error"))) \ HEDLEY_DIAGNOSTIC_POP # else # define HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) # define HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) # endif #else # define HEDLEY_REQUIRE(expr) # define HEDLEY_REQUIRE_MSG(expr,msg) #endif #if defined(HEDLEY_FLAGS) # undef HEDLEY_FLAGS #endif #if HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) # define HEDLEY_FLAGS __attribute__((__flag_enum__)) #else # define HEDLEY_FLAGS #endif #if defined(HEDLEY_FLAGS_CAST) # undef HEDLEY_FLAGS_CAST #endif #if HEDLEY_INTEL_VERSION_CHECK(19,0,0) # define HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("warning(disable:188)") \ ((T) (expr)); \ HEDLEY_DIAGNOSTIC_POP \ })) #else # define HEDLEY_FLAGS_CAST(T, expr) HEDLEY_STATIC_CAST(T, expr) #endif #if defined(HEDLEY_EMPTY_BASES) # undef HEDLEY_EMPTY_BASES #endif #if \ (HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) # define HEDLEY_EMPTY_BASES __declspec(empty_bases) #else # define HEDLEY_EMPTY_BASES #endif /* Remaining macros are deprecated. */ #if defined(HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) # undef HEDLEY_GCC_NOT_CLANG_VERSION_CHECK #endif #if defined(__clang__) # define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) #else # define HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) HEDLEY_GCC_VERSION_CHECK(major,minor,patch) #endif #if defined(HEDLEY_CLANG_HAS_ATTRIBUTE) # undef HEDLEY_CLANG_HAS_ATTRIBUTE #endif #define HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) HEDLEY_HAS_ATTRIBUTE(attribute) #if defined(HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) # undef HEDLEY_CLANG_HAS_CPP_ATTRIBUTE #endif #define HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) HEDLEY_HAS_CPP_ATTRIBUTE(attribute) #if defined(HEDLEY_CLANG_HAS_BUILTIN) # undef HEDLEY_CLANG_HAS_BUILTIN #endif #define HEDLEY_CLANG_HAS_BUILTIN(builtin) HEDLEY_HAS_BUILTIN(builtin) #if defined(HEDLEY_CLANG_HAS_FEATURE) # undef HEDLEY_CLANG_HAS_FEATURE #endif #define HEDLEY_CLANG_HAS_FEATURE(feature) HEDLEY_HAS_FEATURE(feature) #if defined(HEDLEY_CLANG_HAS_EXTENSION) # undef HEDLEY_CLANG_HAS_EXTENSION #endif #define HEDLEY_CLANG_HAS_EXTENSION(extension) HEDLEY_HAS_EXTENSION(extension) #if defined(HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) # undef HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE #endif #define HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) #if defined(HEDLEY_CLANG_HAS_WARNING) # undef HEDLEY_CLANG_HAS_WARNING #endif #define HEDLEY_CLANG_HAS_WARNING(warning) HEDLEY_HAS_WARNING(warning) #endif /* !defined(HEDLEY_VERSION) || (HEDLEY_VERSION < X) */ /* :: End hedley.h :: */ #define SIMDE_VERSION_MAJOR 0 #define SIMDE_VERSION_MINOR 7 #define SIMDE_VERSION_MICRO 6 #define SIMDE_VERSION HEDLEY_VERSION_ENCODE(SIMDE_VERSION_MAJOR, SIMDE_VERSION_MINOR, SIMDE_VERSION_MICRO) // Also update meson.build in the root directory of the repository #include #include /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* :: Begin simde-detect-clang.h :: */ /* Detect Clang Version * Created by Evan Nemerson * * To the extent possible under law, the author(s) have dedicated all * copyright and related and neighboring rights to this software to * the public domain worldwide. This software is distributed without * any warranty. * * For details, see . * SPDX-License-Identifier: CC0-1.0 */ /* This file was originally part of SIMDe * (). You're free to do with it as * you please, but I do have a few small requests: * * * If you make improvements, please submit them back to SIMDe * (at ) so others can * benefit from them. * * Please keep a link to SIMDe intact so people know where to submit * improvements. * * If you expose it publicly, please change the SIMDE_ prefix to * something specific to your project. * * The version numbers clang exposes (in the ___clang_major__, * __clang_minor__, and __clang_patchlevel__ macros) are unreliable. * Vendors such as Apple will define these values to their version * numbers; for example, "Apple Clang 4.0" is really clang 3.1, but * __clang_major__ and __clang_minor__ are defined to 4 and 0 * respectively, instead of 3 and 1. * * The solution is *usually* to use clang's feature detection macros * () * to determine if the feature you're interested in is available. This * generally works well, and it should probably be the first thing you * try. Unfortunately, it's not possible to check for everything. In * particular, compiler bugs. * * This file just uses the feature checking macros to detect features * added in specific versions of clang to identify which version of * clang the compiler is based on. * * Right now it only goes back to 3.6, but I'm happy to accept patches * to go back further. And, of course, newer versions are welcome if * they're not already present, and if you find a way to detect a point * release that would be great, too! */ #if !defined(SIMDE_DETECT_CLANG_H) #define SIMDE_DETECT_CLANG_H 1 /* Attempt to detect the upstream clang version number. I usually only * worry about major version numbers (at least for 4.0+), but if you * need more resolution I'm happy to accept patches that are able to * detect minor versions as well. That said, you'll probably have a * hard time with detection since AFAIK most minor releases don't add * anything we can detect. Updated based on * https://github.com/google/highway/blob/438c705a295176b96a50336527bb3e7ea365ffac/hwy/detect_compiler_arch.h#L73 * - would welcome patches/updates there as well. */ #if defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) # if __has_attribute(nouwtable) // no new warnings in 16.0 # define SIMDE_DETECT_CLANG_VERSION 160000 # elif __has_warning("-Warray-parameter") # define SIMDE_DETECT_CLANG_VERSION 150000 # elif __has_warning("-Wbitwise-instead-of-logical") # define SIMDE_DETECT_CLANG_VERSION 140000 # elif __has_warning("-Wwaix-compat") # define SIMDE_DETECT_CLANG_VERSION 130000 # elif __has_warning("-Wformat-insufficient-args") # define SIMDE_DETECT_CLANG_VERSION 120000 # elif __has_warning("-Wimplicit-const-int-float-conversion") # define SIMDE_DETECT_CLANG_VERSION 110000 # elif __has_warning("-Wmisleading-indentation") # define SIMDE_DETECT_CLANG_VERSION 100000 # elif defined(__FILE_NAME__) # define SIMDE_DETECT_CLANG_VERSION 90000 # elif __has_warning("-Wextra-semi-stmt") || __has_builtin(__builtin_rotateleft32) # define SIMDE_DETECT_CLANG_VERSION 80000 // For reasons unknown, XCode 10.3 (Apple LLVM version 10.0.1) is apparently // based on Clang 7, but does not support the warning we test. // See https://en.wikipedia.org/wiki/Xcode#Toolchain_versions and // https://trac.macports.org/wiki/XcodeVersionInfo. # elif __has_warning("-Wc++98-compat-extra-semi") || \ (defined(__apple_build_version__) && __apple_build_version__ >= 10010000) # define SIMDE_DETECT_CLANG_VERSION 70000 # elif __has_warning("-Wpragma-pack") # define SIMDE_DETECT_CLANG_VERSION 60000 # elif __has_warning("-Wbitfield-enum-conversion") # define SIMDE_DETECT_CLANG_VERSION 50000 # elif __has_attribute(diagnose_if) # define SIMDE_DETECT_CLANG_VERSION 40000 # elif __has_warning("-Wcomma") # define SIMDE_DETECT_CLANG_VERSION 39000 # elif __has_warning("-Wdouble-promotion") # define SIMDE_DETECT_CLANG_VERSION 38000 # elif __has_warning("-Wshift-negative-value") # define SIMDE_DETECT_CLANG_VERSION 37000 # elif __has_warning("-Wambiguous-ellipsis") # define SIMDE_DETECT_CLANG_VERSION 36000 # else # define SIMDE_DETECT_CLANG_VERSION 1 # endif #endif /* defined(__clang__) && !defined(SIMDE_DETECT_CLANG_VERSION) */ /* The SIMDE_DETECT_CLANG_VERSION_CHECK macro is pretty * straightforward; it returns true if the compiler is a derivative * of clang >= the specified version. * * Since this file is often (primarily?) useful for working around bugs * it is also helpful to have a macro which returns true if only if the * compiler is a version of clang *older* than the specified version to * make it a bit easier to ifdef regions to add code for older versions, * such as pragmas to disable a specific warning. */ #if defined(SIMDE_DETECT_CLANG_VERSION) # define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION >= ((major * 10000) + (minor * 1000) + (revision))) # define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (SIMDE_DETECT_CLANG_VERSION < ((major * 10000) + (minor * 1000) + (revision))) #else # define SIMDE_DETECT_CLANG_VERSION_CHECK(major, minor, revision) (0) # define SIMDE_DETECT_CLANG_VERSION_NOT(major, minor, revision) (0) #endif #endif /* !defined(SIMDE_DETECT_CLANG_H) */ /* :: End simde-detect-clang.h :: */ /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* :: Begin simde-arch.h :: */ /* Architecture detection * Created by Evan Nemerson * * To the extent possible under law, the authors have waived all * copyright and related or neighboring rights to this code. For * details, see the Creative Commons Zero 1.0 Universal license at * * * SPDX-License-Identifier: CC0-1.0 * * Different compilers define different preprocessor macros for the * same architecture. This is an attempt to provide a single * interface which is usable on any compiler. * * In general, a macro named SIMDE_ARCH_* is defined for each * architecture the CPU supports. When there are multiple possible * versions, we try to define the macro to the target version. For * example, if you want to check for i586+, you could do something * like: * * #if defined(SIMDE_ARCH_X86) && (SIMDE_ARCH_X86 >= 5) * ... * #endif * * You could also just check that SIMDE_ARCH_X86 >= 5 without checking * if it's defined first, but some compilers may emit a warning about * an undefined macro being used (e.g., GCC with -Wundef). * * This was originally created for SIMDe * (hence the prefix), but this * header has no dependencies and may be used anywhere. It is * originally based on information from * , though it * has been enhanced with additional information. * * If you improve this file, or find a bug, please file the issue at * . If you copy this into * your project, even if you change the prefix, please keep the links * to SIMDe intact so others know where to report issues, submit * enhancements, and find the latest version. */ #if !defined(SIMDE_ARCH_H) #define SIMDE_ARCH_H /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* Alpha */ #if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) # if defined(__alpha_ev6__) # define SIMDE_ARCH_ALPHA 6 # elif defined(__alpha_ev5__) # define SIMDE_ARCH_ALPHA 5 # elif defined(__alpha_ev4__) # define SIMDE_ARCH_ALPHA 4 # else # define SIMDE_ARCH_ALPHA 1 # endif #endif #if defined(SIMDE_ARCH_ALPHA) # define SIMDE_ARCH_ALPHA_CHECK(version) ((version) <= SIMDE_ARCH_ALPHA) #else # define SIMDE_ARCH_ALPHA_CHECK(version) (0) #endif /* Atmel AVR */ #if defined(__AVR_ARCH__) # define SIMDE_ARCH_AVR __AVR_ARCH__ #endif /* AMD64 / x86_64 */ #if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) # if !defined(_M_ARM64EC) # define SIMDE_ARCH_AMD64 1000 # endif #endif /* ARM */ #if defined(__ARM_ARCH) # if __ARM_ARCH > 100 # define SIMDE_ARCH_ARM (__ARM_ARCH) # else # define SIMDE_ARCH_ARM (__ARM_ARCH * 100) # endif #elif defined(_M_ARM) # if _M_ARM > 100 # define SIMDE_ARCH_ARM (_M_ARM) # else # define SIMDE_ARCH_ARM (_M_ARM * 100) # endif #elif defined(_M_ARM64) || defined(_M_ARM64EC) # define SIMDE_ARCH_ARM 800 #elif defined(__arm__) || defined(__thumb__) || defined(__TARGET_ARCH_ARM) || defined(_ARM) || defined(_M_ARM) || defined(_M_ARM) # define SIMDE_ARCH_ARM 1 #endif #if defined(SIMDE_ARCH_ARM) # define SIMDE_ARCH_ARM_CHECK(major, minor) (((major * 100) + (minor)) <= SIMDE_ARCH_ARM) #else # define SIMDE_ARCH_ARM_CHECK(major, minor) (0) #endif /* AArch64 */ #if defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC) # define SIMDE_ARCH_AARCH64 1000 #endif #if defined(SIMDE_ARCH_AARCH64) # define SIMDE_ARCH_AARCH64_CHECK(version) ((version) <= SIMDE_ARCH_AARCH64) #else # define SIMDE_ARCH_AARCH64_CHECK(version) (0) #endif /* ARM SIMD ISA extensions */ #if defined(__ARM_NEON) || defined(SIMDE_ARCH_AARCH64) # if defined(SIMDE_ARCH_AARCH64) # define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_AARCH64 # elif defined(SIMDE_ARCH_ARM) # define SIMDE_ARCH_ARM_NEON SIMDE_ARCH_ARM # endif #endif #if defined(__ARM_FEATURE_SVE) # define SIMDE_ARCH_ARM_SVE #endif /* Blackfin */ #if defined(__bfin) || defined(__BFIN__) || defined(__bfin__) # define SIMDE_ARCH_BLACKFIN 1 #endif /* CRIS */ #if defined(__CRIS_arch_version) # define SIMDE_ARCH_CRIS __CRIS_arch_version #elif defined(__cris__) || defined(__cris) || defined(__CRIS) || defined(__CRIS__) # define SIMDE_ARCH_CRIS 1 #endif /* Convex */ #if defined(__convex_c38__) # define SIMDE_ARCH_CONVEX 38 #elif defined(__convex_c34__) # define SIMDE_ARCH_CONVEX 34 #elif defined(__convex_c32__) # define SIMDE_ARCH_CONVEX 32 #elif defined(__convex_c2__) # define SIMDE_ARCH_CONVEX 2 #elif defined(__convex__) # define SIMDE_ARCH_CONVEX 1 #endif #if defined(SIMDE_ARCH_CONVEX) # define SIMDE_ARCH_CONVEX_CHECK(version) ((version) <= SIMDE_ARCH_CONVEX) #else # define SIMDE_ARCH_CONVEX_CHECK(version) (0) #endif /* Adapteva Epiphany */ #if defined(__epiphany__) # define SIMDE_ARCH_EPIPHANY 1 #endif /* Fujitsu FR-V */ #if defined(__frv__) # define SIMDE_ARCH_FRV 1 #endif /* H8/300 */ #if defined(__H8300__) # define SIMDE_ARCH_H8300 #endif /* Elbrus (8S, 8SV and successors) */ #if defined(__e2k__) # define SIMDE_ARCH_E2K #endif /* HP/PA / PA-RISC */ #if defined(__PA8000__) || defined(__HPPA20__) || defined(__RISC2_0__) || defined(_PA_RISC2_0) # define SIMDE_ARCH_HPPA 20 #elif defined(__PA7100__) || defined(__HPPA11__) || defined(_PA_RISC1_1) # define SIMDE_ARCH_HPPA 11 #elif defined(_PA_RISC1_0) # define SIMDE_ARCH_HPPA 10 #elif defined(__hppa__) || defined(__HPPA__) || defined(__hppa) # define SIMDE_ARCH_HPPA 1 #endif #if defined(SIMDE_ARCH_HPPA) # define SIMDE_ARCH_HPPA_CHECK(version) ((version) <= SIMDE_ARCH_HPPA) #else # define SIMDE_ARCH_HPPA_CHECK(version) (0) #endif /* x86 */ #if defined(_M_IX86) # define SIMDE_ARCH_X86 (_M_IX86 / 100) #elif defined(__I86__) # define SIMDE_ARCH_X86 __I86__ #elif defined(i686) || defined(__i686) || defined(__i686__) # define SIMDE_ARCH_X86 6 #elif defined(i586) || defined(__i586) || defined(__i586__) # define SIMDE_ARCH_X86 5 #elif defined(i486) || defined(__i486) || defined(__i486__) # define SIMDE_ARCH_X86 4 #elif defined(i386) || defined(__i386) || defined(__i386__) # define SIMDE_ARCH_X86 3 #elif defined(_X86_) || defined(__X86__) || defined(__THW_INTEL__) # define SIMDE_ARCH_X86 3 #endif #if defined(SIMDE_ARCH_X86) # define SIMDE_ARCH_X86_CHECK(version) ((version) <= SIMDE_ARCH_X86) #else # define SIMDE_ARCH_X86_CHECK(version) (0) #endif /* SIMD ISA extensions for x86/x86_64 and Elbrus */ #if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) || defined(SIMDE_ARCH_E2K) # if defined(_M_IX86_FP) # define SIMDE_ARCH_X86_MMX # if (_M_IX86_FP >= 1) # define SIMDE_ARCH_X86_SSE 1 # endif # if (_M_IX86_FP >= 2) # define SIMDE_ARCH_X86_SSE2 1 # endif # elif defined(_M_X64) # define SIMDE_ARCH_X86_SSE 1 # define SIMDE_ARCH_X86_SSE2 1 # else # if defined(__MMX__) # define SIMDE_ARCH_X86_MMX 1 # endif # if defined(__SSE__) # define SIMDE_ARCH_X86_SSE 1 # endif # if defined(__SSE2__) # define SIMDE_ARCH_X86_SSE2 1 # endif # endif # if defined(__SSE3__) # define SIMDE_ARCH_X86_SSE3 1 # endif # if defined(__SSSE3__) # define SIMDE_ARCH_X86_SSSE3 1 # endif # if defined(__SSE4_1__) # define SIMDE_ARCH_X86_SSE4_1 1 # endif # if defined(__SSE4_2__) # define SIMDE_ARCH_X86_SSE4_2 1 # endif # if defined(__XOP__) # define SIMDE_ARCH_X86_XOP 1 # endif # if defined(__AVX__) # define SIMDE_ARCH_X86_AVX 1 # if !defined(SIMDE_ARCH_X86_SSE3) # define SIMDE_ARCH_X86_SSE3 1 # endif # if !defined(SIMDE_ARCH_X86_SSE4_1) # define SIMDE_ARCH_X86_SSE4_1 1 # endif # if !defined(SIMDE_ARCH_X86_SSE4_1) # define SIMDE_ARCH_X86_SSE4_2 1 # endif # endif # if defined(__AVX2__) # define SIMDE_ARCH_X86_AVX2 1 # if defined(_MSC_VER) # define SIMDE_ARCH_X86_FMA 1 # endif # endif # if defined(__FMA__) # define SIMDE_ARCH_X86_FMA 1 # if !defined(SIMDE_ARCH_X86_AVX) # define SIMDE_ARCH_X86_AVX 1 # endif # endif # if defined(__AVX512VP2INTERSECT__) # define SIMDE_ARCH_X86_AVX512VP2INTERSECT 1 # endif # if defined(__AVX512BITALG__) # define SIMDE_ARCH_X86_AVX512BITALG 1 # endif # if defined(__AVX512VPOPCNTDQ__) # define SIMDE_ARCH_X86_AVX512VPOPCNTDQ 1 # endif # if defined(__AVX512VBMI__) # define SIMDE_ARCH_X86_AVX512VBMI 1 # endif # if defined(__AVX512VBMI2__) # define SIMDE_ARCH_X86_AVX512VBMI2 1 # endif # if defined(__AVX512VNNI__) # define SIMDE_ARCH_X86_AVX512VNNI 1 # endif # if defined(__AVX5124VNNIW__) # define SIMDE_ARCH_X86_AVX5124VNNIW 1 # endif # if defined(__AVX512BW__) # define SIMDE_ARCH_X86_AVX512BW 1 # endif # if defined(__AVX512BF16__) # define SIMDE_ARCH_X86_AVX512BF16 1 # endif # if defined(__AVX512CD__) # define SIMDE_ARCH_X86_AVX512CD 1 # endif # if defined(__AVX512DQ__) # define SIMDE_ARCH_X86_AVX512DQ 1 # endif # if defined(__AVX512F__) # define SIMDE_ARCH_X86_AVX512F 1 # endif # if defined(__AVX512VL__) # define SIMDE_ARCH_X86_AVX512VL 1 # endif # if defined(__GFNI__) # define SIMDE_ARCH_X86_GFNI 1 # endif # if defined(__PCLMUL__) # define SIMDE_ARCH_X86_PCLMUL 1 # endif # if defined(__VPCLMULQDQ__) # define SIMDE_ARCH_X86_VPCLMULQDQ 1 # endif # if defined(__F16C__) || (defined(HEDLEY_MSVC_VERSION) && HEDLEY_MSVC_VERSION_CHECK(19,30,0) && defined(SIMDE_ARCH_X86_AVX2) ) # define SIMDE_ARCH_X86_F16C 1 # endif #endif /* Itanium */ #if defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(__ia64) || defined(_M_IA64) || defined(__itanium__) # define SIMDE_ARCH_IA64 1 #endif /* Renesas M32R */ #if defined(__m32r__) || defined(__M32R__) # define SIMDE_ARCH_M32R #endif /* Motorola 68000 */ #if defined(__mc68060__) || defined(__MC68060__) # define SIMDE_ARCH_M68K 68060 #elif defined(__mc68040__) || defined(__MC68040__) # define SIMDE_ARCH_M68K 68040 #elif defined(__mc68030__) || defined(__MC68030__) # define SIMDE_ARCH_M68K 68030 #elif defined(__mc68020__) || defined(__MC68020__) # define SIMDE_ARCH_M68K 68020 #elif defined(__mc68010__) || defined(__MC68010__) # define SIMDE_ARCH_M68K 68010 #elif defined(__mc68000__) || defined(__MC68000__) # define SIMDE_ARCH_M68K 68000 #endif #if defined(SIMDE_ARCH_M68K) # define SIMDE_ARCH_M68K_CHECK(version) ((version) <= SIMDE_ARCH_M68K) #else # define SIMDE_ARCH_M68K_CHECK(version) (0) #endif /* Xilinx MicroBlaze */ #if defined(__MICROBLAZE__) || defined(__microblaze__) # define SIMDE_ARCH_MICROBLAZE #endif /* MIPS */ #if defined(_MIPS_ISA_MIPS64R2) # define SIMDE_ARCH_MIPS 642 #elif defined(_MIPS_ISA_MIPS64) # define SIMDE_ARCH_MIPS 640 #elif defined(_MIPS_ISA_MIPS32R2) # define SIMDE_ARCH_MIPS 322 #elif defined(_MIPS_ISA_MIPS32) # define SIMDE_ARCH_MIPS 320 #elif defined(_MIPS_ISA_MIPS4) # define SIMDE_ARCH_MIPS 4 #elif defined(_MIPS_ISA_MIPS3) # define SIMDE_ARCH_MIPS 3 #elif defined(_MIPS_ISA_MIPS2) # define SIMDE_ARCH_MIPS 2 #elif defined(_MIPS_ISA_MIPS1) # define SIMDE_ARCH_MIPS 1 #elif defined(_MIPS_ISA_MIPS) || defined(__mips) || defined(__MIPS__) # define SIMDE_ARCH_MIPS 1 #endif #if defined(SIMDE_ARCH_MIPS) # define SIMDE_ARCH_MIPS_CHECK(version) ((version) <= SIMDE_ARCH_MIPS) #else # define SIMDE_ARCH_MIPS_CHECK(version) (0) #endif #if defined(__mips_loongson_mmi) # define SIMDE_ARCH_MIPS_LOONGSON_MMI 1 #endif #if defined(__mips_msa) # define SIMDE_ARCH_MIPS_MSA 1 #endif /* Matsushita MN10300 */ #if defined(__MN10300__) || defined(__mn10300__) # define SIMDE_ARCH_MN10300 1 #endif /* POWER */ #if defined(_M_PPC) # define SIMDE_ARCH_POWER _M_PPC #elif defined(_ARCH_PWR9) # define SIMDE_ARCH_POWER 900 #elif defined(_ARCH_PWR8) # define SIMDE_ARCH_POWER 800 #elif defined(_ARCH_PWR7) # define SIMDE_ARCH_POWER 700 #elif defined(_ARCH_PWR6) # define SIMDE_ARCH_POWER 600 #elif defined(_ARCH_PWR5) # define SIMDE_ARCH_POWER 500 #elif defined(_ARCH_PWR4) # define SIMDE_ARCH_POWER 400 #elif defined(_ARCH_440) || defined(__ppc440__) # define SIMDE_ARCH_POWER 440 #elif defined(_ARCH_450) || defined(__ppc450__) # define SIMDE_ARCH_POWER 450 #elif defined(_ARCH_601) || defined(__ppc601__) # define SIMDE_ARCH_POWER 601 #elif defined(_ARCH_603) || defined(__ppc603__) # define SIMDE_ARCH_POWER 603 #elif defined(_ARCH_604) || defined(__ppc604__) # define SIMDE_ARCH_POWER 604 #elif defined(_ARCH_605) || defined(__ppc605__) # define SIMDE_ARCH_POWER 605 #elif defined(_ARCH_620) || defined(__ppc620__) # define SIMDE_ARCH_POWER 620 #elif defined(__powerpc) || defined(__powerpc__) || defined(__POWERPC__) || defined(__ppc__) || defined(__PPC__) || defined(_ARCH_PPC) || defined(__ppc) # define SIMDE_ARCH_POWER 1 #endif #if defined(SIMDE_ARCH_POWER) #define SIMDE_ARCH_POWER_CHECK(version) ((version) <= SIMDE_ARCH_POWER) #else #define SIMDE_ARCH_POWER_CHECK(version) (0) #endif #if defined(__ALTIVEC__) # define SIMDE_ARCH_POWER_ALTIVEC SIMDE_ARCH_POWER #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) ((version) <= SIMDE_ARCH_POWER) #else #define SIMDE_ARCH_POWER_ALTIVEC_CHECK(version) (0) #endif /* SPARC */ #if defined(__sparc_v9__) || defined(__sparcv9) # define SIMDE_ARCH_SPARC 9 #elif defined(__sparc_v8__) || defined(__sparcv8) # define SIMDE_ARCH_SPARC 8 #elif defined(__sparc_v7__) || defined(__sparcv7) # define SIMDE_ARCH_SPARC 7 #elif defined(__sparc_v6__) || defined(__sparcv6) # define SIMDE_ARCH_SPARC 6 #elif defined(__sparc_v5__) || defined(__sparcv5) # define SIMDE_ARCH_SPARC 5 #elif defined(__sparc_v4__) || defined(__sparcv4) # define SIMDE_ARCH_SPARC 4 #elif defined(__sparc_v3__) || defined(__sparcv3) # define SIMDE_ARCH_SPARC 3 #elif defined(__sparc_v2__) || defined(__sparcv2) # define SIMDE_ARCH_SPARC 2 #elif defined(__sparc_v1__) || defined(__sparcv1) # define SIMDE_ARCH_SPARC 1 #elif defined(__sparc__) || defined(__sparc) # define SIMDE_ARCH_SPARC 1 #endif #if defined(SIMDE_ARCH_SPARC) #define SIMDE_ARCH_SPARC_CHECK(version) ((version) <= SIMDE_ARCH_SPARC) #else #define SIMDE_ARCH_SPARC_CHECK(version) (0) #endif /* SuperH */ #if defined(__sh5__) || defined(__SH5__) # define SIMDE_ARCH_SUPERH 5 #elif defined(__sh4__) || defined(__SH4__) # define SIMDE_ARCH_SUPERH 4 #elif defined(__sh3__) || defined(__SH3__) # define SIMDE_ARCH_SUPERH 3 #elif defined(__sh2__) || defined(__SH2__) # define SIMDE_ARCH_SUPERH 2 #elif defined(__sh1__) || defined(__SH1__) # define SIMDE_ARCH_SUPERH 1 #elif defined(__sh__) || defined(__SH__) # define SIMDE_ARCH_SUPERH 1 #endif /* IBM System z */ #if defined(__370__) || defined(__THW_370__) || defined(__s390__) || defined(__s390x__) || defined(__zarch__) || defined(__SYSC_ZARCH__) # define SIMDE_ARCH_ZARCH __ARCH__ #endif #if defined(SIMDE_ARCH_ZARCH) #define SIMDE_ARCH_ZARCH_CHECK(version) ((version) <= SIMDE_ARCH_ZARCH) #else #define SIMDE_ARCH_ZARCH_CHECK(version) (0) #endif #if defined(SIMDE_ARCH_ZARCH) && defined(__VEC__) #define SIMDE_ARCH_ZARCH_ZVECTOR SIMDE_ARCH_ZARCH #endif /* TMS320 DSP */ #if defined(_TMS320C6740) || defined(__TMS320C6740__) # define SIMDE_ARCH_TMS320 6740 #elif defined(_TMS320C6700_PLUS) || defined(__TMS320C6700_PLUS__) # define SIMDE_ARCH_TMS320 6701 #elif defined(_TMS320C6700) || defined(__TMS320C6700__) # define SIMDE_ARCH_TMS320 6700 #elif defined(_TMS320C6600) || defined(__TMS320C6600__) # define SIMDE_ARCH_TMS320 6600 #elif defined(_TMS320C6400_PLUS) || defined(__TMS320C6400_PLUS__) # define SIMDE_ARCH_TMS320 6401 #elif defined(_TMS320C6400) || defined(__TMS320C6400__) # define SIMDE_ARCH_TMS320 6400 #elif defined(_TMS320C6200) || defined(__TMS320C6200__) # define SIMDE_ARCH_TMS320 6200 #elif defined(_TMS320C55X) || defined(__TMS320C55X__) # define SIMDE_ARCH_TMS320 550 #elif defined(_TMS320C54X) || defined(__TMS320C54X__) # define SIMDE_ARCH_TMS320 540 #elif defined(_TMS320C28X) || defined(__TMS320C28X__) # define SIMDE_ARCH_TMS320 280 #endif #if defined(SIMDE_ARCH_TMS320) #define SIMDE_ARCH_TMS320_CHECK(version) ((version) <= SIMDE_ARCH_TMS320) #else #define SIMDE_ARCH_TMS320_CHECK(version) (0) #endif /* WebAssembly */ #if defined(__wasm__) # define SIMDE_ARCH_WASM 1 #endif #if defined(SIMDE_ARCH_WASM) && defined(__wasm_simd128__) # define SIMDE_ARCH_WASM_SIMD128 #endif /* Xtensa */ #if defined(__xtensa__) || defined(__XTENSA__) # define SIMDE_ARCH_XTENSA 1 #endif /* Availability of 16-bit floating-point arithmetic intrinsics */ #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) # define SIMDE_ARCH_ARM_NEON_FP16 #endif /* LoongArch */ #if defined(__loongarch32) # define SIMDE_ARCH_LOONGARCH 1 #elif defined(__loongarch64) # define SIMDE_ARCH_LOONGARCH 2 #endif /* LSX: LoongArch 128-bits SIMD extension */ #if defined(__loongarch_sx) # define SIMDE_ARCH_LOONGARCH_LSX 1 #endif /* LASX: LoongArch 256-bits SIMD extension */ #if defined(__loongarch_asx) # define SIMDE_ARCH_LOONGARCH_LASX 2 #endif #endif /* !defined(SIMDE_ARCH_H) */ /* :: End simde-arch.h :: */ /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* :: Begin simde-features.h :: */ /* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ /* simde-arch.h is used to determine which features are available according to the compiler. However, we want to make it possible to forcibly enable or disable APIs */ #if !defined(SIMDE_FEATURES_H) #define SIMDE_FEATURES_H /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* :: Begin simde-diagnostic.h :: */ /* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2017-2020 Evan Nemerson */ /* SIMDe targets a very wide range of standards and compilers, and our * goal is to compile cleanly even with extremely aggressive warnings * (i.e., -Weverything in clang, -Wextra in GCC, /W4 for MSVC, etc.) * treated as errors. * * While our preference is to resolve the underlying issue a given * diagnostic is warning us about, sometimes that's not possible. * Fixing a warning in one compiler may cause problems in another. * Sometimes a warning doesn't really apply to us (false positives), * and sometimes adhering to a warning would mean dropping a feature * we *know* the compiler supports since we have tested specifically * for the compiler or feature. * * When practical, warnings are only disabled for specific code. For * a list of warnings which are enabled by default in all SIMDe code, * see SIMDE_DISABLE_UNWANTED_DIAGNOSTICS. Note that we restore the * warning stack when SIMDe is done parsing, so code which includes * SIMDe is not deprived of these warnings. */ #if !defined(SIMDE_DIAGNOSTIC_H) #define SIMDE_DIAGNOSTIC_H /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* This is only to help us implement functions like _mm_undefined_ps. */ #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) #undef SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ #endif #if HEDLEY_HAS_WARNING("-Wuninitialized") #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wuninitialized\"") #elif HEDLEY_GCC_VERSION_CHECK(4,2,0) #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("GCC diagnostic ignored \"-Wuninitialized\"") #elif HEDLEY_PGI_VERSION_CHECK(19,10,0) #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 549") #elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE,unassigned)") #elif HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,SEC_UNINITIALIZED_MEM_READ,SEC_UNDEFINED_RETURN_VALUE)") #elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) && defined(__cplusplus) #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("error_messages(off,unassigned)") #elif \ HEDLEY_TI_VERSION_CHECK(16,9,9) || \ HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("diag_suppress 551") #elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ _Pragma("warning(disable:592)") #elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) && !defined(__MSVC_RUNTIME_CHECKS) #define SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ __pragma(warning(disable:4700)) #endif /* GCC emits a lot of "notes" about the ABI being different for things * in newer versions of GCC. We don't really care because all our * functions are inlined and don't generate ABI. */ #if HEDLEY_GCC_VERSION_CHECK(7,0,0) #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ _Pragma("GCC diagnostic ignored \"-Wpsabi\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_PSABI_ #endif /* Since MMX uses x87 FP registers, you're supposed to call _mm_empty() * after each MMX function before any floating point instructions. * Some compilers warn about functions which use MMX functions but * don't call _mm_empty(). However, since SIMDe is implementyng the * MMX API we shouldn't be calling _mm_empty(); we leave it to the * caller to invoke simde_mm_empty(). */ #if HEDLEY_INTEL_VERSION_CHECK(19,0,0) #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ _Pragma("warning(disable:13200 13203)") #elif defined(HEDLEY_MSVC_VERSION) #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ __pragma(warning(disable:4799)) #else #define SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ #endif /* Intel is pushing people to use OpenMP SIMD instead of Cilk+, so they * emit a diagnostic if you use #pragma simd instead of * #pragma omp simd. SIMDe supports OpenMP SIMD, you just need to * compile with -qopenmp or -qopenmp-simd and define * SIMDE_ENABLE_OPENMP. Cilk+ is just a fallback. */ #if HEDLEY_INTEL_VERSION_CHECK(18,0,0) #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ _Pragma("warning(disable:3948)") #else #define SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ #endif /* MSVC emits a diagnostic when we call a function (like * simde_mm_set_epi32) while initializing a struct. We currently do * this a *lot* in the tests. */ #if \ defined(HEDLEY_MSVC_VERSION) #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ __pragma(warning(disable:4204)) #else #define SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ #endif /* This warning needs a lot of work. It is triggered if all you do is * pass the value to memcpy/__builtin_memcpy, or if you initialize a * member of the union, even if that member takes up the entire union. * Last tested with clang-10, hopefully things will improve in the * future; if clang fixes this I'd love to enable it. */ #if \ HEDLEY_HAS_WARNING("-Wconditional-uninitialized") #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ _Pragma("clang diagnostic ignored \"-Wconditional-uninitialized\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ #endif /* This warning is meant to catch things like `0.3 + 0.4 == 0.7`, which * will is false. However, SIMDe uses these operations exclusively * for things like _mm_cmpeq_ps, for which we really do want to check * for equality (or inequality). * * If someone wants to put together a SIMDE_FLOAT_EQUAL(a, op, b) macro * which just wraps a check in some code do disable this diagnostic I'd * be happy to accept it. */ #if \ HEDLEY_HAS_WARNING("-Wfloat-equal") || \ HEDLEY_GCC_VERSION_CHECK(3,0,0) #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ #endif /* This is because we use HEDLEY_STATIC_ASSERT for static assertions. * If Hedley can't find an implementation it will preprocess to * nothing, which means there will be a trailing semi-colon. */ #if HEDLEY_HAS_WARNING("-Wextra-semi") #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("clang diagnostic ignored \"-Wextra-semi\"") #elif HEDLEY_GCC_VERSION_CHECK(8,1,0) && defined(__cplusplus) #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ _Pragma("GCC diagnostic ignored \"-Wextra-semi\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ #endif /* We do use a few variadic macros, which technically aren't available * until C99 and C++11, but every compiler I'm aware of has supported * them for much longer. That said, usage is isolated to the test * suite and compilers known to support them. */ #if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ \ _Pragma("clang diagnostic ignored \"-Wvariadic-macros\"") \ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ _Pragma("GCC diagnostic ignored \"-Wvariadic-macros\"") #endif #else #define SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ #endif /* emscripten requires us to use a __wasm_unimplemented_simd128__ macro * before we can access certain SIMD intrinsics, but this diagnostic * warns about it being a reserved name. It is a reserved name, but * it's reserved for the compiler and we are using it to convey * information to the compiler. * * This is also used when enabling native aliases since we don't get to * choose the macro names. */ #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ #endif /* Similar to above; types like simde__m128i are reserved due to the * double underscore, but we didn't choose them, Intel did. */ #if HEDLEY_HAS_WARNING("-Wreserved-identifier") #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ #endif /* clang 3.8 warns about the packed attribute being unnecessary when * used in the _mm_loadu_* functions. That *may* be true for version * 3.8, but for later versions it is crucial in order to make unaligned * access safe. */ #if HEDLEY_HAS_WARNING("-Wpacked") #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ _Pragma("clang diagnostic ignored \"-Wpacked\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_PACKED_ #endif /* Triggered when assigning a float to a double implicitly. We use * explicit casts in SIMDe, this is only used in the test suite. */ #if HEDLEY_HAS_WARNING("-Wdouble-promotion") #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ _Pragma("clang diagnostic ignored \"-Wdouble-promotion\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_DOUBLE_PROMOTION_ #endif /* Several compilers treat conformant array parameters as VLAs. We * test to make sure we're in C mode (C++ doesn't support CAPs), and * that the version of the standard supports CAPs. We also reject * some buggy compilers like MSVC (the logic is in Hedley if you want * to take a look), but with certain warnings enabled some compilers * still like to emit a diagnostic. */ #if HEDLEY_HAS_WARNING("-Wvla") #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("clang diagnostic ignored \"-Wvla\"") #elif HEDLEY_GCC_VERSION_CHECK(4,3,0) #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ _Pragma("GCC diagnostic ignored \"-Wvla\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_VLA_ #endif /* If you add an unused attribute to a function and don't use it, clang * may emit this. */ #if HEDLEY_HAS_WARNING("-Wused-but-marked-unused") #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ _Pragma("clang diagnostic ignored \"-Wused-but-marked-unused\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ #endif #if HEDLEY_HAS_WARNING("-Wpass-failed") #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ _Pragma("clang diagnostic ignored \"-Wpass-failed\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ #endif #if HEDLEY_HAS_WARNING("-Wpadded") #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ _Pragma("clang diagnostic ignored \"-Wpadded\"") #elif HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Likely goes back further */ #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ __pragma(warning(disable:4324)) #else #define SIMDE_DIAGNOSTIC_DISABLE_PADDED_ #endif #if HEDLEY_HAS_WARNING("-Wzero-as-null-pointer-constant") #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ _Pragma("clang diagnostic ignored \"-Wzero-as-null-pointer-constant\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_ZERO_AS_NULL_POINTER_CONSTANT_ #endif #if HEDLEY_HAS_WARNING("-Wold-style-cast") #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_OLD_STYLE_CAST_ #endif #if HEDLEY_HAS_WARNING("-Wcast-function-type") || HEDLEY_GCC_VERSION_CHECK(8,0,0) #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_CAST_FUNCTION_TYPE_ #endif /* clang will emit this warning when we use C99 extensions whan not in * C99 mode, even though it does support this. In such cases we check * the compiler and version first, so we know it's not a problem. */ #if HEDLEY_HAS_WARNING("-Wc99-extensions") #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc99-extensions\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_C99_EXTENSIONS_ #endif /* Similar problm as above; we rely on some basic C99 support, but clang * has started warning obut this even in C17 mode with -Weverything. */ #if HEDLEY_HAS_WARNING("-Wdeclaration-after-statement") #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ _Pragma("clang diagnostic ignored \"-Wdeclaration-after-statement\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ #endif /* https://github.com/simd-everywhere/simde/issues/277 */ #if defined(HEDLEY_GCC_VERSION) && HEDLEY_GCC_VERSION_CHECK(4,6,0) && !HEDLEY_GCC_VERSION_CHECK(6,4,0) && defined(__cplusplus) #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ _Pragma("GCC diagnostic ignored \"-Wunused-but-set-variable\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ #endif /* This is the warning that you normally define _CRT_SECURE_NO_WARNINGS * to silence, but you have to do that before including anything and * that would require reordering includes. */ #if defined(_MSC_VER) #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ __pragma(warning(disable:4996)) #else #define SIMDE_DIAGNOSTIC_DISABLE_ANNEX_K_ #endif /* Some compilers, such as clang, may use `long long` for 64-bit * integers, but `long long` triggers a diagnostic with * -Wc++98-compat-pedantic which says 'long long' is incompatible with * C++98. */ #if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") #if HEDLEY_HAS_WARNING("-Wc++11-long-long") #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") \ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ _Pragma("clang diagnostic ignored \"-Wc++98-compat-pedantic\"") #endif #else #define SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ #endif /* Some problem as above */ #if HEDLEY_HAS_WARNING("-Wc++11-long-long") #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ _Pragma("clang diagnostic ignored \"-Wc++11-long-long\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ #endif /* emscripten emits this whenever stdin/stdout/stderr is used in a * macro. */ #if HEDLEY_HAS_WARNING("-Wdisabled-macro-expansion") #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ _Pragma("clang diagnostic ignored \"-Wdisabled-macro-expansion\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_DISABLED_MACRO_EXPANSION_ #endif /* Clang uses C11 generic selections to implement some AltiVec * functions, which triggers this diagnostic when not compiling * in C11 mode */ #if HEDLEY_HAS_WARNING("-Wc11-extensions") #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ #endif /* Clang sometimes triggers this warning in macros in the AltiVec and * NEON headers, or due to missing functions. */ #if HEDLEY_HAS_WARNING("-Wvector-conversion") #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ _Pragma("clang diagnostic ignored \"-Wvector-conversion\"") /* For NEON, the situation with -Wvector-conversion in clang < 10 is * bad enough that we just disable the warning altogether. On x86, * clang has similar issues on several sse4.2+ intrinsics before 3.8. */ #if \ (defined(SIMDE_ARCH_ARM) && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0)) || \ SIMDE_DETECT_CLANG_VERSION_NOT(3,8,0) #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ #endif #else #define SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ #endif #if !defined(SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_) #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ #endif /* Prior to 5.0, clang didn't support disabling diagnostics in * statement exprs. As a result, some macros we use don't * properly silence warnings. */ #if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") && HEDLEY_HAS_WARNING("-Wcast-align") #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") _Pragma("clang diagnostic ignored \"-Wcast-align\"") #elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-qual") #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-qual\"") #elif SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) && HEDLEY_HAS_WARNING("-Wcast-align") #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ _Pragma("clang diagnostic ignored \"-Wcast-align\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ #endif /* SLEEF triggers this a *lot* in their headers */ #if HEDLEY_HAS_WARNING("-Wignored-qualifiers") #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("clang diagnostic ignored \"-Wignored-qualifiers\"") #elif HEDLEY_GCC_VERSION_CHECK(4,3,0) #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ _Pragma("GCC diagnostic ignored \"-Wignored-qualifiers\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ #endif /* GCC emits this under some circumstances when using __int128 */ #if HEDLEY_GCC_VERSION_CHECK(4,8,0) #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ _Pragma("GCC diagnostic ignored \"-Wpedantic\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ #endif /* MSVC doesn't like (__assume(0), code) and will warn about code being * unreachable, but we want it there because not all compilers * understand the unreachable macro and will complain if it is missing. * I'm planning on adding a new macro to Hedley to handle this a bit * more elegantly, but until then... */ #if defined(HEDLEY_MSVC_VERSION) #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ __pragma(warning(disable:4702)) #elif defined(__clang__) #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ HEDLEY_PRAGMA(clang diagnostic ignored "-Wunreachable-code") #else #define SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ #endif /* This is a false positive from GCC in a few places. */ #if HEDLEY_GCC_VERSION_CHECK(4,7,0) #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ _Pragma("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") #else #define SIMDE_DIAGNOSTIC_DISABLE_MAYBE_UNINITIAZILED_ #endif #if defined(SIMDE_ENABLE_NATIVE_ALIASES) #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_MACRO_ #else #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ #endif /* Some native functions on E2K with instruction set < v6 are declared * as deprecated due to inefficiency. Still they are more efficient * than SIMDe implementation. So we're using them, and switching off * these deprecation warnings. */ #if defined(HEDLEY_MCST_LCC_VERSION) # define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS _Pragma("diag_suppress 1215,1444") # define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS _Pragma("diag_default 1215,1444") #else # define SIMDE_LCC_DISABLE_DEPRECATED_WARNINGS # define SIMDE_LCC_REVERT_DEPRECATED_WARNINGS #endif #define SIMDE_DISABLE_UNWANTED_DIAGNOSTICS \ HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION \ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS_NATIVE_ALIASES_ \ SIMDE_DIAGNOSTIC_DISABLE_PSABI_ \ SIMDE_DIAGNOSTIC_DISABLE_NO_EMMS_INSTRUCTION_ \ SIMDE_DIAGNOSTIC_DISABLE_SIMD_PRAGMA_DEPRECATED_ \ SIMDE_DIAGNOSTIC_DISABLE_CONDITIONAL_UNINITIALIZED_ \ SIMDE_DIAGNOSTIC_DISABLE_DECLARATION_AFTER_STATEMENT_ \ SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL_ \ SIMDE_DIAGNOSTIC_DISABLE_NON_CONSTANT_AGGREGATE_INITIALIZER_ \ SIMDE_DIAGNOSTIC_DISABLE_EXTRA_SEMI_ \ SIMDE_DIAGNOSTIC_DISABLE_VLA_ \ SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ SIMDE_DIAGNOSTIC_DISABLE_PASS_FAILED_ \ SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ \ SIMDE_DIAGNOSTIC_DISABLE_CPP11_LONG_LONG_ \ SIMDE_DIAGNOSTIC_DISABLE_BUGGY_UNUSED_BUT_SET_VARIBALE_ \ SIMDE_DIAGNOSTIC_DISABLE_BUGGY_CASTS_ \ SIMDE_DIAGNOSTIC_DISABLE_BUGGY_VECTOR_CONVERSION_ \ SIMDE_DIAGNOSTIC_DISABLE_RESERVED_ID_ #endif /* !defined(SIMDE_DIAGNOSTIC_H) */ /* :: End simde-diagnostic.h :: */ #if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_SVML) #define SIMDE_X86_SVML_NATIVE #endif #endif #if defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_NATIVE #endif #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512VP2INTERSECT_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX512VP2INTERSECT) #define SIMDE_X86_AVX512VP2INTERSECT_NATIVE #endif #endif #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_NATIVE #endif #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512VPOPCNTDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX512VPOPCNTDQ) #define SIMDE_X86_AVX512VPOPCNTDQ_NATIVE #endif #endif #if defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_NATIVE #endif #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512BITALG_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX512BITALG) #define SIMDE_X86_AVX512BITALG_NATIVE #endif #endif #if defined(SIMDE_X86_AVX512BITALG_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_NATIVE #endif #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512VBMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX512VBMI) #define SIMDE_X86_AVX512VBMI_NATIVE #endif #endif #if defined(SIMDE_X86_AVX512VBMI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_NATIVE #endif #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512VBMI2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX512VBMI2) #define SIMDE_X86_AVX512VBMI2_NATIVE #endif #endif #if defined(SIMDE_X86_AVX512VBMI2_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_NATIVE #endif #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512VNNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX512VNNI) #define SIMDE_X86_AVX512VNNI_NATIVE #endif #endif #if defined(SIMDE_X86_AVX512VNNI_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_NATIVE #endif #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX5124VNNIW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX5124VNNIW) #define SIMDE_X86_AVX5124VNNIW_NATIVE #endif #endif #if defined(SIMDE_X86_AVX5124VNNIW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_NATIVE #endif #if !defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512CD_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX512CD) #define SIMDE_X86_AVX512CD_NATIVE #endif #endif #if defined(SIMDE_X86_AVX512CD_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_NATIVE #endif #if !defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512DQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX512DQ) #define SIMDE_X86_AVX512DQ_NATIVE #endif #endif #if defined(SIMDE_X86_AVX512DQ_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_NATIVE #endif #if !defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512VL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX512VL) #define SIMDE_X86_AVX512VL_NATIVE #endif #endif #if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_NATIVE #endif #if !defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512BW_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX512BW) #define SIMDE_X86_AVX512BW_NATIVE #endif #endif #if defined(SIMDE_X86_AVX512BW_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_NATIVE #endif #if !defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512BF16_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX512BF16) #define SIMDE_X86_AVX512BF16_NATIVE #endif #endif #if defined(SIMDE_X86_AVX512BF16_NATIVE) && !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_NATIVE #endif #if !defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX512F_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX512F) #define SIMDE_X86_AVX512F_NATIVE #endif #endif #if defined(SIMDE_X86_AVX512F_NATIVE) && !defined(SIMDE_X86_AVX2_NATIVE) #define SIMDE_X86_AVX2_NATIVE #endif #if !defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_FMA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_FMA) #define SIMDE_X86_FMA_NATIVE #endif #endif #if defined(SIMDE_X86_FMA_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) #define SIMDE_X86_AVX_NATIVE #endif #if !defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX2) #define SIMDE_X86_AVX2_NATIVE #endif #endif #if defined(SIMDE_X86_AVX2_NATIVE) && !defined(SIMDE_X86_AVX_NATIVE) #define SIMDE_X86_AVX_NATIVE #endif #if !defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_AVX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_AVX) #define SIMDE_X86_AVX_NATIVE #endif #endif #if defined(SIMDE_X86_AVX_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) #define SIMDE_X86_SSE4_2_NATIVE #endif #if !defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_XOP_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_XOP) #define SIMDE_X86_XOP_NATIVE #endif #endif #if defined(SIMDE_X86_XOP_NATIVE) && !defined(SIMDE_X86_SSE4_2_NATIVE) #define SIMDE_X86_SSE4_2_NATIVE #endif #if !defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_SSE4_2) #define SIMDE_X86_SSE4_2_NATIVE #endif #endif #if defined(SIMDE_X86_SSE4_2_NATIVE) && !defined(SIMDE_X86_SSE4_1_NATIVE) #define SIMDE_X86_SSE4_1_NATIVE #endif #if !defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSE4_1_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_SSE4_1) #define SIMDE_X86_SSE4_1_NATIVE #endif #endif #if defined(SIMDE_X86_SSE4_1_NATIVE) && !defined(SIMDE_X86_SSSE3_NATIVE) #define SIMDE_X86_SSSE3_NATIVE #endif #if !defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_SSSE3) #define SIMDE_X86_SSSE3_NATIVE #endif #endif #if defined(SIMDE_X86_SSSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NATIVE) #define SIMDE_X86_SSE3_NATIVE #endif #if !defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE3_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_SSE3) #define SIMDE_X86_SSE3_NATIVE #endif #endif #if defined(SIMDE_X86_SSE3_NATIVE) && !defined(SIMDE_X86_SSE2_NATIVE) #define SIMDE_X86_SSE2_NATIVE #endif #if !defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE2_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_SSE2) #define SIMDE_X86_SSE2_NATIVE #endif #endif #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_X86_SSE_NATIVE) #define SIMDE_X86_SSE_NATIVE #endif #if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_SSE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_SSE) #define SIMDE_X86_SSE_NATIVE #endif #endif #if !defined(SIMDE_X86_MMX_NATIVE) && !defined(SIMDE_X86_MMX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_MMX) #define SIMDE_X86_MMX_NATIVE #endif #endif #if !defined(SIMDE_X86_GFNI_NATIVE) && !defined(SIMDE_X86_GFNI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_GFNI) #define SIMDE_X86_GFNI_NATIVE #endif #endif #if !defined(SIMDE_X86_PCLMUL_NATIVE) && !defined(SIMDE_X86_PCLMUL_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_PCLMUL) #define SIMDE_X86_PCLMUL_NATIVE #endif #endif #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) && !defined(SIMDE_X86_VPCLMULQDQ_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_VPCLMULQDQ) #define SIMDE_X86_VPCLMULQDQ_NATIVE #endif #endif #if !defined(SIMDE_X86_F16C_NATIVE) && !defined(SIMDE_X86_F16C_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_X86_F16C) #define SIMDE_X86_F16C_NATIVE #endif #endif #if !defined(SIMDE_X86_SVML_NATIVE) && !defined(SIMDE_X86_SVML_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(__INTEL_COMPILER) #define SIMDE_X86_SVML_NATIVE #endif #endif #if defined(HEDLEY_MSVC_VERSION) #pragma warning(push) #pragma warning(disable:4799) #endif #if \ defined(SIMDE_X86_AVX_NATIVE) || defined(SIMDE_X86_GFNI_NATIVE) #include #elif defined(SIMDE_X86_SSE4_2_NATIVE) #include #elif defined(SIMDE_X86_SSE4_1_NATIVE) #include #elif defined(SIMDE_X86_SSSE3_NATIVE) #include #elif defined(SIMDE_X86_SSE3_NATIVE) #include #elif defined(SIMDE_X86_SSE2_NATIVE) #include #elif defined(SIMDE_X86_SSE_NATIVE) #include #elif defined(SIMDE_X86_MMX_NATIVE) #include #endif #if defined(SIMDE_X86_XOP_NATIVE) #if defined(_MSC_VER) #include #else #include #endif #endif #if defined(HEDLEY_MSVC_VERSION) #pragma warning(pop) #endif #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_ARM_NEON) && defined(SIMDE_ARCH_AARCH64) && SIMDE_ARCH_ARM_CHECK(8,0) #define SIMDE_ARM_NEON_A64V8_NATIVE #endif #endif #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NATIVE) #define SIMDE_ARM_NEON_A32V8_NATIVE #endif #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(8,0) && (__ARM_NEON_FP & 0x02) #define SIMDE_ARM_NEON_A32V8_NATIVE #endif #endif #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define SIMDE_ARM_NEON_A32V7_NATIVE #endif #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A32V7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_ARM_NEON) && SIMDE_ARCH_ARM_CHECK(7,0) #define SIMDE_ARM_NEON_A32V7_NATIVE #endif #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #include #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) #include #endif #endif #if !defined(SIMDE_ARM_SVE_NATIVE) && !defined(SIMDE_ARM_SVE_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_ARM_SVE) #define SIMDE_ARM_SVE_NATIVE #include #endif #endif #if !defined(SIMDE_WASM_SIMD128_NATIVE) && !defined(SIMDE_WASM_SIMD128_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_WASM_SIMD128) #define SIMDE_WASM_SIMD128_NATIVE #endif #endif #if defined(SIMDE_WASM_SIMD128_NATIVE) #include #endif #if !defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P9_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(900) #define SIMDE_POWER_ALTIVEC_P9_NATIVE #endif #endif #if defined(SIMDE_POWER_ALTIVEC_P9_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8) #define SIMDE_POWER_ALTIVEC_P8_NATIVE #endif #if !defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P8_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(800) #define SIMDE_POWER_ALTIVEC_P8_NATIVE #endif #endif #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7) #define SIMDE_POWER_ALTIVEC_P7_NATIVE #endif #if !defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P7_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(700) #define SIMDE_POWER_ALTIVEC_P7_NATIVE #endif #endif #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6) #define SIMDE_POWER_ALTIVEC_P6_NATIVE #endif #if !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P6_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(600) #define SIMDE_POWER_ALTIVEC_P6_NATIVE #endif #endif #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5) #define SIMDE_POWER_ALTIVEC_P5_NATIVE #endif #if !defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) && !defined(SIMDE_POWER_ALTIVEC_P5_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if SIMDE_ARCH_POWER_ALTIVEC_CHECK(500) #define SIMDE_POWER_ALTIVEC_P5_NATIVE #endif #endif #if !defined(SIMDE_ZARCH_ZVECTOR_15_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_15_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if SIMDE_ARCH_ZARCH_CHECK(13) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) #define SIMDE_ZARCH_ZVECTOR_15_NATIVE #endif #endif #if !defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_14_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if SIMDE_ARCH_ZARCH_CHECK(12) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) #define SIMDE_ZARCH_ZVECTOR_14_NATIVE #endif #endif #if !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if SIMDE_ARCH_ZARCH_CHECK(11) && defined(SIMDE_ARCH_ZARCH_ZVECTOR) #define SIMDE_ZARCH_ZVECTOR_13_NATIVE #endif #endif #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) /* AltiVec conflicts with lots of stuff. The bool keyword conflicts * with the bool keyword in C++ and the bool macro in C99+ (defined * in stdbool.h). The vector keyword conflicts with std::vector in * C++ if you are `using std;`. * * Luckily AltiVec allows you to use `__vector`/`__bool`/`__pixel` * instead, but altivec.h will unconditionally define * `vector`/`bool`/`pixel` so we need to work around that. * * Unfortunately this means that if your code uses AltiVec directly * it may break. If this is the case you'll want to define * `SIMDE_POWER_ALTIVEC_NO_UNDEF` before including SIMDe. Or, even * better, port your code to use the double-underscore versions. */ #if defined(bool) #undef bool #endif #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #include #if !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) #if defined(vector) #undef vector #endif #if defined(pixel) #undef pixel #endif #if defined(bool) #undef bool #endif #endif /* !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) */ #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) #include #endif /* Use these intsead of vector/pixel/bool in SIMDe. */ #define SIMDE_POWER_ALTIVEC_VECTOR(T) __vector T #define SIMDE_POWER_ALTIVEC_PIXEL __pixel #define SIMDE_POWER_ALTIVEC_BOOL __bool /* Re-define bool if we're using stdbool.h */ #if !defined(__cplusplus) && defined(__bool_true_false_are_defined) && !defined(SIMDE_POWER_ALTIVEC_NO_UNDEF) #define bool _Bool #endif #endif #if !defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) && !defined(SIMDE_MIPS_LOONGSON_MMI_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_MIPS_LOONGSON_MMI) #define SIMDE_MIPS_LOONGSON_MMI_NATIVE 1 #endif #endif #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) #include #endif #if !defined(SIMDE_MIPS_MSA_NATIVE) && !defined(SIMDE_MIPS_MSA_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_MIPS_MSA) #define SIMDE_MIPS_MSA_NATIVE 1 #endif #endif #if defined(SIMDE_MIPS_MSA_NATIVE) #include #endif /* This is used to determine whether or not to fall back on a vector * function in an earlier ISA extensions, as well as whether * we expected any attempts at vectorization to be fruitful or if we * expect to always be running serial code. * * Note that, for some architectures (okay, *one* architecture) there * can be a split where some types are supported for one vector length * but others only for a shorter length. Therefore, it is possible to * provide separate values for float/int/double types. */ #if !defined(SIMDE_NATURAL_VECTOR_SIZE) #if defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_NATURAL_VECTOR_SIZE (512) #elif defined(SIMDE_X86_AVX2_NATIVE) #define SIMDE_NATURAL_VECTOR_SIZE (256) #elif defined(SIMDE_X86_AVX_NATIVE) #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (256) #define SIMDE_NATURAL_INT_VECTOR_SIZE (128) #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (128) #elif \ defined(SIMDE_X86_SSE2_NATIVE) || \ defined(SIMDE_ARM_NEON_A32V7_NATIVE) || \ defined(SIMDE_WASM_SIMD128_NATIVE) || \ defined(SIMDE_POWER_ALTIVEC_P5_NATIVE) || \ defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) || \ defined(SIMDE_MIPS_MSA_NATIVE) #define SIMDE_NATURAL_VECTOR_SIZE (128) #elif defined(SIMDE_X86_SSE_NATIVE) #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE (128) #define SIMDE_NATURAL_INT_VECTOR_SIZE (64) #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE (0) #endif #if !defined(SIMDE_NATURAL_VECTOR_SIZE) #if defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_FLOAT_VECTOR_SIZE #elif defined(SIMDE_NATURAL_INT_VECTOR_SIZE) #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_INT_VECTOR_SIZE #elif defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) #define SIMDE_NATURAL_VECTOR_SIZE SIMDE_NATURAL_DOUBLE_VECTOR_SIZE #else #define SIMDE_NATURAL_VECTOR_SIZE (0) #endif #endif #if !defined(SIMDE_NATURAL_FLOAT_VECTOR_SIZE) #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE #endif #if !defined(SIMDE_NATURAL_INT_VECTOR_SIZE) #define SIMDE_NATURAL_INT_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE #endif #if !defined(SIMDE_NATURAL_DOUBLE_VECTOR_SIZE) #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE SIMDE_NATURAL_VECTOR_SIZE #endif #endif #define SIMDE_NATURAL_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE <= (x))) #define SIMDE_NATURAL_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_VECTOR_SIZE > 0) && (SIMDE_NATURAL_VECTOR_SIZE >= (x))) #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE <= (x))) #define SIMDE_NATURAL_FLOAT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_FLOAT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_FLOAT_VECTOR_SIZE >= (x))) #define SIMDE_NATURAL_INT_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE <= (x))) #define SIMDE_NATURAL_INT_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_INT_VECTOR_SIZE > 0) && (SIMDE_NATURAL_INT_VECTOR_SIZE >= (x))) #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_LE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE <= (x))) #define SIMDE_NATURAL_DOUBLE_VECTOR_SIZE_GE(x) ((SIMDE_NATURAL_DOUBLE_VECTOR_SIZE > 0) && (SIMDE_NATURAL_DOUBLE_VECTOR_SIZE >= (x))) /* Native aliases */ #if defined(SIMDE_ENABLE_NATIVE_ALIASES) #if !defined(SIMDE_X86_MMX_NATIVE) #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_SSE_NATIVE) #define SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_SSE2_NATIVE) #define SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_SSE3_NATIVE) #define SIMDE_X86_SSE3_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_SSSE3_NATIVE) #define SIMDE_X86_SSSE3_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_SSE4_1_NATIVE) #define SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_SSE4_2_NATIVE) #define SIMDE_X86_SSE4_2_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX_NATIVE) #define SIMDE_X86_AVX_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX2_NATIVE) #define SIMDE_X86_AVX2_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_FMA_NATIVE) #define SIMDE_X86_FMA_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX512F_NATIVE) #define SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX512VL_NATIVE) #define SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX512VBMI_NATIVE) #define SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX512VBMI2_NATIVE) #define SIMDE_X86_AVX512VBMI2_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX512BW_NATIVE) #define SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX512VNNI_NATIVE) #define SIMDE_X86_AVX512VNNI_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX5124VNNIW_NATIVE) #define SIMDE_X86_AVX5124VNNIW_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX512BF16_NATIVE) #define SIMDE_X86_AVX512BF16_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX512BITALG_NATIVE) #define SIMDE_X86_AVX512BITALG_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX512DQ_NATIVE) #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_AVX512CD_NATIVE) #define SIMDE_X86_AVX512CD_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_GFNI_NATIVE) #define SIMDE_X86_GFNI_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_PCLMUL_NATIVE) #define SIMDE_X86_PCLMUL_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_VPCLMULQDQ_NATIVE) #define SIMDE_X86_VPCLMULQDQ_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_X86_F16C_NATIVE) #define SIMDE_X86_F16C_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_ARM_NEON_A32V8_NATIVE) #define SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_ARM_SVE_NATIVE) #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES #endif #if !defined(SIMDE_WASM_SIMD128_NATIVE) #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES #endif #endif /* Are floating point values stored using IEEE 754? Knowing * this at during preprocessing is a bit tricky, mostly because what * we're curious about is how values are stored and not whether the * implementation is fully conformant in terms of rounding, NaN * handling, etc. * * For example, if you use -ffast-math or -Ofast on * GCC or clang IEEE 754 isn't strictly followed, therefore IEE 754 * support is not advertised (by defining __STDC_IEC_559__). * * However, what we care about is whether it is safe to assume that * floating point values are stored in IEEE 754 format, in which case * we can provide faster implementations of some functions. * * Luckily every vaugely modern architecture I'm aware of uses IEEE 754- * so we just assume IEEE 754 for now. There is a test which verifies * this, if that test fails sowewhere please let us know and we'll add * an exception for that platform. Meanwhile, you can define * SIMDE_NO_IEEE754_STORAGE. */ #if !defined(SIMDE_IEEE754_STORAGE) && !defined(SIMDE_NO_IEE754_STORAGE) #define SIMDE_IEEE754_STORAGE #endif #if defined(SIMDE_ARCH_ARM_NEON_FP16) #define SIMDE_ARM_NEON_FP16 #endif #if !defined(SIMDE_LOONGARCH_LASX_NATIVE) && !defined(SIMDE_LOONGARCH_LASX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_LOONGARCH_LASX) #define SIMDE_LOONGARCH_LASX_NATIVE #endif #endif #if !defined(SIMDE_LOONGARCH_LSX_NATIVE) && !defined(SIMDE_LOONGARCH_LSX_NO_NATIVE) && !defined(SIMDE_NO_NATIVE) #if defined(SIMDE_ARCH_LOONGARCH_LSX) #define SIMDE_LOONGARCH_LSX_NATIVE #endif #endif #if defined(SIMDE_LOONGARCH_LASX_NATIVE) #include #endif #if defined(SIMDE_LOONGARCH_LSX_NATIVE) #include #endif #endif /* !defined(SIMDE_FEATURES_H) */ /* :: End simde-features.h :: */ /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* :: Begin simde-math.h :: */ /* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2017-2020 Evan Nemerson */ /* Attempt to find math functions. Functions may be in , * , compiler built-ins/intrinsics, or platform/architecture * specific headers. In some cases, especially those not built in to * libm, we may need to define our own implementations. */ #if !defined(SIMDE_MATH_H) #define SIMDE_MATH_H 1 /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ #include #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #include #endif HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS /* SLEEF support * https://sleef.org/ * * If you include prior to including SIMDe, SIMDe will use * SLEEF. You can also define SIMDE_MATH_SLEEF_ENABLE prior to * including SIMDe to force the issue. * * Note that SLEEF does requires linking to libsleef. * * By default, SIMDe will use the 1 ULP functions, but if you use * SIMDE_ACCURACY_PREFERENCE of 0 we will use up to 4 ULP. This is * only the case for the simde_math_* functions; for code in other * SIMDe headers which calls SLEEF directly we may use functions with * greater error if the API we're implementing is less precise (for * example, SVML guarantees 4 ULP, so we will generally use the 3.5 * ULP functions from SLEEF). */ #if !defined(SIMDE_MATH_SLEEF_DISABLE) #if defined(__SLEEF_H__) #define SIMDE_MATH_SLEEF_ENABLE #endif #endif #if defined(SIMDE_MATH_SLEEF_ENABLE) && !defined(__SLEEF_H__) HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_IGNORED_QUALIFIERS_ #include HEDLEY_DIAGNOSTIC_POP #endif #if defined(SIMDE_MATH_SLEEF_ENABLE) && defined(__SLEEF_H__) #if defined(SLEEF_VERSION_MAJOR) #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(SLEEF_VERSION_MAJOR, SLEEF_VERSION_MINOR, SLEEF_VERSION_PATCHLEVEL) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #else #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (HEDLEY_VERSION_ENCODE(3,0,0) >= HEDLEY_VERSION_ENCODE(major, minor, patch)) #endif #else #define SIMDE_MATH_SLEEF_VERSION_CHECK(major, minor, patch) (0) #endif #if defined(__has_builtin) #define SIMDE_MATH_BUILTIN_LIBM(func) __has_builtin(__builtin_##func) #elif \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_GCC_VERSION_CHECK(4,4,0) #define SIMDE_MATH_BUILTIN_LIBM(func) (1) #else #define SIMDE_MATH_BUILTIN_LIBM(func) (0) #endif #if defined(HUGE_VAL) /* Looks like or has already been included. */ /* The math.h from libc++ (yes, the C header from the C++ standard * library) will define an isnan function, but not an isnan macro * like the C standard requires. So we detect the header guards * macro libc++ uses. */ #if defined(isnan) || (defined(_LIBCPP_MATH_H) && !defined(_LIBCPP_CMATH)) #define SIMDE_MATH_HAVE_MATH_H #elif defined(__cplusplus) #define SIMDE_MATH_HAVE_CMATH #endif #elif defined(__has_include) #if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() #define SIMDE_MATH_HAVE_CMATH #include #elif __has_include() #define SIMDE_MATH_HAVE_MATH_H #include #elif !defined(SIMDE_MATH_NO_LIBM) #define SIMDE_MATH_NO_LIBM #endif #elif !defined(SIMDE_MATH_NO_LIBM) #if defined(__cplusplus) && (__cplusplus >= 201103L) #define SIMDE_MATH_HAVE_CMATH HEDLEY_DIAGNOSTIC_PUSH #if defined(HEDLEY_MSVC_VERSION) /* VS 14 emits this diagnostic about noexcept being used on a * function, which we can't do anything about. */ #pragma warning(disable:4996) #endif #include HEDLEY_DIAGNOSTIC_POP #else #define SIMDE_MATH_HAVE_MATH_H #include #endif #endif #if !defined(SIMDE_MATH_INFINITY) #if \ HEDLEY_HAS_BUILTIN(__builtin_inf) || \ HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_CRAY_VERSION_CHECK(8,1,0) #define SIMDE_MATH_INFINITY (__builtin_inf()) #elif defined(INFINITY) #define SIMDE_MATH_INFINITY INFINITY #endif #endif #if !defined(SIMDE_INFINITYF) #if \ HEDLEY_HAS_BUILTIN(__builtin_inff) || \ HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,0) #define SIMDE_MATH_INFINITYF (__builtin_inff()) #elif defined(INFINITYF) #define SIMDE_MATH_INFINITYF INFINITYF #elif defined(SIMDE_MATH_INFINITY) #define SIMDE_MATH_INFINITYF HEDLEY_STATIC_CAST(float, SIMDE_MATH_INFINITY) #endif #endif #if !defined(SIMDE_MATH_NAN) #if \ HEDLEY_HAS_BUILTIN(__builtin_nan) || \ HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,0) #define SIMDE_MATH_NAN (__builtin_nan("")) #elif defined(NAN) #define SIMDE_MATH_NAN NAN #endif #endif #if !defined(SIMDE_NANF) #if \ HEDLEY_HAS_BUILTIN(__builtin_nanf) || \ HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_CRAY_VERSION_CHECK(8,1,0) #define SIMDE_MATH_NANF (__builtin_nanf("")) #elif defined(NANF) #define SIMDE_MATH_NANF NANF #elif defined(SIMDE_MATH_NAN) #define SIMDE_MATH_NANF HEDLEY_STATIC_CAST(float, SIMDE_MATH_NAN) #endif #endif #if !defined(SIMDE_MATH_PI) #if defined(M_PI) #define SIMDE_MATH_PI M_PI #else #define SIMDE_MATH_PI 3.14159265358979323846 #endif #endif #if !defined(SIMDE_MATH_PIF) #if defined(M_PI) #define SIMDE_MATH_PIF HEDLEY_STATIC_CAST(float, M_PI) #else #define SIMDE_MATH_PIF 3.14159265358979323846f #endif #endif #if !defined(SIMDE_MATH_PI_OVER_180) #define SIMDE_MATH_PI_OVER_180 0.0174532925199432957692369076848861271344287188854172545609719144 #endif #if !defined(SIMDE_MATH_PI_OVER_180F) #define SIMDE_MATH_PI_OVER_180F 0.0174532925199432957692369076848861271344287188854172545609719144f #endif #if !defined(SIMDE_MATH_180_OVER_PI) #define SIMDE_MATH_180_OVER_PI 57.295779513082320876798154814105170332405472466564321549160243861 #endif #if !defined(SIMDE_MATH_180_OVER_PIF) #define SIMDE_MATH_180_OVER_PIF 57.295779513082320876798154814105170332405472466564321549160243861f #endif #if !defined(SIMDE_MATH_FLT_MIN) #if defined(__FLT_MIN__) #define SIMDE_MATH_FLT_MIN __FLT_MIN__ #else #if !defined(FLT_MIN) #if defined(__cplusplus) #include #else #include #endif #endif #define SIMDE_MATH_FLT_MIN FLT_MIN #endif #endif #if !defined(SIMDE_MATH_FLT_MAX) #if defined(__FLT_MAX__) #define SIMDE_MATH_FLT_MAX __FLT_MAX__ #else #if !defined(FLT_MAX) #if defined(__cplusplus) #include #else #include #endif #endif #define SIMDE_MATH_FLT_MAX FLT_MAX #endif #endif #if !defined(SIMDE_MATH_DBL_MIN) #if defined(__DBL_MIN__) #define SIMDE_MATH_DBL_MIN __DBL_MIN__ #else #if !defined(DBL_MIN) #if defined(__cplusplus) #include #else #include #endif #endif #define SIMDE_MATH_DBL_MIN DBL_MIN #endif #endif #if !defined(SIMDE_MATH_DBL_MAX) #if defined(__DBL_MAX__) #define SIMDE_MATH_DBL_MAX __DBL_MAX__ #else #if !defined(DBL_MAX) #if defined(__cplusplus) #include #else #include #endif #endif #define SIMDE_MATH_DBL_MAX DBL_MAX #endif #endif /*** Classification macros from C99 ***/ #if !defined(simde_math_isinf) #if SIMDE_MATH_BUILTIN_LIBM(isinf) #define simde_math_isinf(v) __builtin_isinf(v) #elif defined(isinf) || defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_isinf(v) isinf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_isinf(v) std::isinf(v) #endif #endif #if !defined(simde_math_isinff) #if HEDLEY_HAS_BUILTIN(__builtin_isinff) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) #define simde_math_isinff(v) __builtin_isinff(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_isinff(v) std::isinf(v) #elif defined(simde_math_isinf) #define simde_math_isinff(v) simde_math_isinf(HEDLEY_STATIC_CAST(double, v)) #endif #endif #if !defined(simde_math_isnan) #if SIMDE_MATH_BUILTIN_LIBM(isnan) #define simde_math_isnan(v) __builtin_isnan(v) #elif defined(isnan) || defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_isnan(v) isnan(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_isnan(v) std::isnan(v) #endif #endif #if !defined(simde_math_isnanf) #if HEDLEY_HAS_BUILTIN(__builtin_isnanf) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) /* XL C/C++ has __builtin_isnan but not __builtin_isnanf */ #define simde_math_isnanf(v) __builtin_isnanf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_isnanf(v) std::isnan(v) #elif defined(simde_math_isnan) #define simde_math_isnanf(v) simde_math_isnan(HEDLEY_STATIC_CAST(double, v)) #endif #endif #if !defined(simde_math_isnormal) #if SIMDE_MATH_BUILTIN_LIBM(isnormal) #define simde_math_isnormal(v) __builtin_isnormal(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_isnormal(v) isnormal(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_isnormal(v) std::isnormal(v) #endif #endif #if !defined(simde_math_isnormalf) #if HEDLEY_HAS_BUILTIN(__builtin_isnormalf) #define simde_math_isnormalf(v) __builtin_isnormalf(v) #elif SIMDE_MATH_BUILTIN_LIBM(isnormal) #define simde_math_isnormalf(v) __builtin_isnormal(v) #elif defined(isnormalf) #define simde_math_isnormalf(v) isnormalf(v) #elif defined(isnormal) || defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_isnormalf(v) isnormal(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_isnormalf(v) std::isnormal(v) #elif defined(simde_math_isnormal) #define simde_math_isnormalf(v) simde_math_isnormal(v) #endif #endif #if !defined(simde_math_issubnormalf) #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) #define simde_math_issubnormalf(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) #elif defined(fpclassify) #define simde_math_issubnormalf(v) (fpclassify(v) == FP_SUBNORMAL) #elif defined(SIMDE_IEEE754_STORAGE) #define simde_math_issubnormalf(v) (((simde_float32_as_uint32(v) & UINT32_C(0x7F800000)) == UINT32_C(0)) && ((simde_float32_as_uint32(v) & UINT32_C(0x007FFFFF)) != UINT32_C(0))) #endif #endif #if !defined(simde_math_issubnormal) #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) #define simde_math_issubnormal(v) __builtin_fpclassify(0, 0, 0, 1, 0, v) #elif defined(fpclassify) #define simde_math_issubnormal(v) (fpclassify(v) == FP_SUBNORMAL) #elif defined(SIMDE_IEEE754_STORAGE) #define simde_math_issubnormal(v) (((simde_float64_as_uint64(v) & UINT64_C(0x7FF0000000000000)) == UINT64_C(0)) && ((simde_float64_as_uint64(v) & UINT64_C(0x00FFFFFFFFFFFFF)) != UINT64_C(0))) #endif #endif #if defined(FP_NAN) #define SIMDE_MATH_FP_NAN FP_NAN #else #define SIMDE_MATH_FP_NAN 0 #endif #if defined(FP_INFINITE) #define SIMDE_MATH_FP_INFINITE FP_INFINITE #else #define SIMDE_MATH_FP_INFINITE 1 #endif #if defined(FP_ZERO) #define SIMDE_MATH_FP_ZERO FP_ZERO #else #define SIMDE_MATH_FP_ZERO 2 #endif #if defined(FP_SUBNORMAL) #define SIMDE_MATH_FP_SUBNORMAL FP_SUBNORMAL #else #define SIMDE_MATH_FP_SUBNORMAL 3 #endif #if defined(FP_NORMAL) #define SIMDE_MATH_FP_NORMAL FP_NORMAL #else #define SIMDE_MATH_FP_NORMAL 4 #endif static HEDLEY_INLINE int simde_math_fpclassifyf(float v) { #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); #elif defined(fpclassify) return fpclassify(v); #else return simde_math_isnormalf(v) ? SIMDE_MATH_FP_NORMAL : (v == 0.0f) ? SIMDE_MATH_FP_ZERO : simde_math_isnanf(v) ? SIMDE_MATH_FP_NAN : simde_math_isinff(v) ? SIMDE_MATH_FP_INFINITE : SIMDE_MATH_FP_SUBNORMAL; #endif } static HEDLEY_INLINE int simde_math_fpclassify(double v) { #if SIMDE_MATH_BUILTIN_LIBM(fpclassify) return __builtin_fpclassify(SIMDE_MATH_FP_NAN, SIMDE_MATH_FP_INFINITE, SIMDE_MATH_FP_NORMAL, SIMDE_MATH_FP_SUBNORMAL, SIMDE_MATH_FP_ZERO, v); #elif defined(fpclassify) return fpclassify(v); #else return simde_math_isnormal(v) ? SIMDE_MATH_FP_NORMAL : (v == 0.0) ? SIMDE_MATH_FP_ZERO : simde_math_isnan(v) ? SIMDE_MATH_FP_NAN : simde_math_isinf(v) ? SIMDE_MATH_FP_INFINITE : SIMDE_MATH_FP_SUBNORMAL; #endif } /*** Manipulation functions ***/ #if !defined(simde_math_nextafter) #if \ (HEDLEY_HAS_BUILTIN(__builtin_nextafter) && !defined(HEDLEY_IBM_VERSION)) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) #define simde_math_nextafter(x, y) __builtin_nextafter(x, y) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_nextafter(x, y) std::nextafter(x, y) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_nextafter(x, y) nextafter(x, y) #endif #endif #if !defined(simde_math_nextafterf) #if \ (HEDLEY_HAS_BUILTIN(__builtin_nextafterf) && !defined(HEDLEY_IBM_VERSION)) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) #define simde_math_nextafterf(x, y) __builtin_nextafterf(x, y) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_nextafterf(x, y) std::nextafter(x, y) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_nextafterf(x, y) nextafterf(x, y) #endif #endif /*** Functions from C99 ***/ #if !defined(simde_math_abs) #if SIMDE_MATH_BUILTIN_LIBM(abs) #define simde_math_abs(v) __builtin_abs(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_abs(v) std::abs(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_abs(v) abs(v) #endif #endif #if !defined(simde_math_labs) #if SIMDE_MATH_BUILTIN_LIBM(labs) #define simde_math_labs(v) __builtin_labs(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_labs(v) std::labs(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_labs(v) labs(v) #endif #endif #if !defined(simde_math_llabs) #if SIMDE_MATH_BUILTIN_LIBM(llabs) #define simde_math_llabs(v) __builtin_llabs(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_llabs(v) std::llabs(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_llabs(v) llabs(v) #endif #endif #if !defined(simde_math_fabsf) #if SIMDE_MATH_BUILTIN_LIBM(fabsf) #define simde_math_fabsf(v) __builtin_fabsf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_fabsf(v) std::abs(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_fabsf(v) fabsf(v) #endif #endif #if !defined(simde_math_acos) #if SIMDE_MATH_BUILTIN_LIBM(acos) #define simde_math_acos(v) __builtin_acos(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_acos(v) std::acos(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_acos(v) acos(v) #endif #endif #if !defined(simde_math_acosf) #if SIMDE_MATH_BUILTIN_LIBM(acosf) #define simde_math_acosf(v) __builtin_acosf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_acosf(v) std::acos(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_acosf(v) acosf(v) #endif #endif #if !defined(simde_math_acosh) #if SIMDE_MATH_BUILTIN_LIBM(acosh) #define simde_math_acosh(v) __builtin_acosh(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_acosh(v) std::acosh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_acosh(v) acosh(v) #endif #endif #if !defined(simde_math_acoshf) #if SIMDE_MATH_BUILTIN_LIBM(acoshf) #define simde_math_acoshf(v) __builtin_acoshf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_acoshf(v) std::acosh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_acoshf(v) acoshf(v) #endif #endif #if !defined(simde_math_asin) #if SIMDE_MATH_BUILTIN_LIBM(asin) #define simde_math_asin(v) __builtin_asin(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_asin(v) std::asin(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_asin(v) asin(v) #endif #endif #if !defined(simde_math_asinf) #if SIMDE_MATH_BUILTIN_LIBM(asinf) #define simde_math_asinf(v) __builtin_asinf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_asinf(v) std::asin(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_asinf(v) asinf(v) #endif #endif #if !defined(simde_math_asinh) #if SIMDE_MATH_BUILTIN_LIBM(asinh) #define simde_math_asinh(v) __builtin_asinh(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_asinh(v) std::asinh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_asinh(v) asinh(v) #endif #endif #if !defined(simde_math_asinhf) #if SIMDE_MATH_BUILTIN_LIBM(asinhf) #define simde_math_asinhf(v) __builtin_asinhf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_asinhf(v) std::asinh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_asinhf(v) asinhf(v) #endif #endif #if !defined(simde_math_atan) #if SIMDE_MATH_BUILTIN_LIBM(atan) #define simde_math_atan(v) __builtin_atan(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_atan(v) std::atan(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_atan(v) atan(v) #endif #endif #if !defined(simde_math_atan2) #if SIMDE_MATH_BUILTIN_LIBM(atan2) #define simde_math_atan2(y, x) __builtin_atan2(y, x) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_atan2(y, x) std::atan2(y, x) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_atan2(y, x) atan2(y, x) #endif #endif #if !defined(simde_math_atan2f) #if SIMDE_MATH_BUILTIN_LIBM(atan2f) #define simde_math_atan2f(y, x) __builtin_atan2f(y, x) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_atan2f(y, x) std::atan2(y, x) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_atan2f(y, x) atan2f(y, x) #endif #endif #if !defined(simde_math_atanf) #if SIMDE_MATH_BUILTIN_LIBM(atanf) #define simde_math_atanf(v) __builtin_atanf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_atanf(v) std::atan(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_atanf(v) atanf(v) #endif #endif #if !defined(simde_math_atanh) #if SIMDE_MATH_BUILTIN_LIBM(atanh) #define simde_math_atanh(v) __builtin_atanh(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_atanh(v) std::atanh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_atanh(v) atanh(v) #endif #endif #if !defined(simde_math_atanhf) #if SIMDE_MATH_BUILTIN_LIBM(atanhf) #define simde_math_atanhf(v) __builtin_atanhf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_atanhf(v) std::atanh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_atanhf(v) atanhf(v) #endif #endif #if !defined(simde_math_cbrt) #if SIMDE_MATH_BUILTIN_LIBM(cbrt) #define simde_math_cbrt(v) __builtin_cbrt(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_cbrt(v) std::cbrt(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_cbrt(v) cbrt(v) #endif #endif #if !defined(simde_math_cbrtf) #if SIMDE_MATH_BUILTIN_LIBM(cbrtf) #define simde_math_cbrtf(v) __builtin_cbrtf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_cbrtf(v) std::cbrt(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_cbrtf(v) cbrtf(v) #endif #endif #if !defined(simde_math_ceil) #if SIMDE_MATH_BUILTIN_LIBM(ceil) #define simde_math_ceil(v) __builtin_ceil(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_ceil(v) std::ceil(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_ceil(v) ceil(v) #endif #endif #if !defined(simde_math_ceilf) #if SIMDE_MATH_BUILTIN_LIBM(ceilf) #define simde_math_ceilf(v) __builtin_ceilf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_ceilf(v) std::ceil(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_ceilf(v) ceilf(v) #endif #endif #if !defined(simde_math_copysign) #if SIMDE_MATH_BUILTIN_LIBM(copysign) #define simde_math_copysign(x, y) __builtin_copysign(x, y) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_copysign(x, y) std::copysign(x, y) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_copysign(x, y) copysign(x, y) #endif #endif #if !defined(simde_math_copysignf) #if SIMDE_MATH_BUILTIN_LIBM(copysignf) #define simde_math_copysignf(x, y) __builtin_copysignf(x, y) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_copysignf(x, y) std::copysignf(x, y) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_copysignf(x, y) copysignf(x, y) #endif #endif #if !defined(simde_math_signbit) #if SIMDE_MATH_BUILTIN_LIBM(signbit) #if (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) #define simde_math_signbit(x) __builtin_signbit(x) #else #define simde_math_signbit(x) __builtin_signbit(HEDLEY_STATIC_CAST(double, (x))) #endif #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_signbit(x) std::signbit(x) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_signbit(x) signbit(x) #endif #endif #if !defined(simde_math_cos) #if SIMDE_MATH_BUILTIN_LIBM(cos) #define simde_math_cos(v) __builtin_cos(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_cos(v) std::cos(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_cos(v) cos(v) #endif #endif #if !defined(simde_math_cosf) #if defined(SIMDE_MATH_SLEEF_ENABLE) #if SIMDE_ACCURACY_PREFERENCE < 1 #define simde_math_cosf(v) Sleef_cosf_u35(v) #else #define simde_math_cosf(v) Sleef_cosf_u10(v) #endif #elif SIMDE_MATH_BUILTIN_LIBM(cosf) #define simde_math_cosf(v) __builtin_cosf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_cosf(v) std::cos(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_cosf(v) cosf(v) #endif #endif #if !defined(simde_math_cosh) #if SIMDE_MATH_BUILTIN_LIBM(cosh) #define simde_math_cosh(v) __builtin_cosh(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_cosh(v) std::cosh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_cosh(v) cosh(v) #endif #endif #if !defined(simde_math_coshf) #if SIMDE_MATH_BUILTIN_LIBM(coshf) #define simde_math_coshf(v) __builtin_coshf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_coshf(v) std::cosh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_coshf(v) coshf(v) #endif #endif #if !defined(simde_math_erf) #if SIMDE_MATH_BUILTIN_LIBM(erf) #define simde_math_erf(v) __builtin_erf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_erf(v) std::erf(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_erf(v) erf(v) #endif #endif #if !defined(simde_math_erff) #if SIMDE_MATH_BUILTIN_LIBM(erff) #define simde_math_erff(v) __builtin_erff(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_erff(v) std::erf(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_erff(v) erff(v) #endif #endif #if !defined(simde_math_erfc) #if SIMDE_MATH_BUILTIN_LIBM(erfc) #define simde_math_erfc(v) __builtin_erfc(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_erfc(v) std::erfc(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_erfc(v) erfc(v) #endif #endif #if !defined(simde_math_erfcf) #if SIMDE_MATH_BUILTIN_LIBM(erfcf) #define simde_math_erfcf(v) __builtin_erfcf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_erfcf(v) std::erfc(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_erfcf(v) erfcf(v) #endif #endif #if !defined(simde_math_exp) #if SIMDE_MATH_BUILTIN_LIBM(exp) #define simde_math_exp(v) __builtin_exp(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_exp(v) std::exp(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_exp(v) exp(v) #endif #endif #if !defined(simde_math_expf) #if SIMDE_MATH_BUILTIN_LIBM(expf) #define simde_math_expf(v) __builtin_expf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_expf(v) std::exp(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_expf(v) expf(v) #endif #endif #if !defined(simde_math_expm1) #if SIMDE_MATH_BUILTIN_LIBM(expm1) #define simde_math_expm1(v) __builtin_expm1(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_expm1(v) std::expm1(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_expm1(v) expm1(v) #endif #endif #if !defined(simde_math_expm1f) #if SIMDE_MATH_BUILTIN_LIBM(expm1f) #define simde_math_expm1f(v) __builtin_expm1f(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_expm1f(v) std::expm1(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_expm1f(v) expm1f(v) #endif #endif #if !defined(simde_math_exp2) #if SIMDE_MATH_BUILTIN_LIBM(exp2) #define simde_math_exp2(v) __builtin_exp2(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_exp2(v) std::exp2(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_exp2(v) exp2(v) #endif #endif #if !defined(simde_math_exp2f) #if SIMDE_MATH_BUILTIN_LIBM(exp2f) #define simde_math_exp2f(v) __builtin_exp2f(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_exp2f(v) std::exp2(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_exp2f(v) exp2f(v) #endif #endif #if HEDLEY_HAS_BUILTIN(__builtin_exp10) || HEDLEY_GCC_VERSION_CHECK(3,4,0) # define simde_math_exp10(v) __builtin_exp10(v) #else # define simde_math_exp10(v) pow(10.0, (v)) #endif #if HEDLEY_HAS_BUILTIN(__builtin_exp10f) || HEDLEY_GCC_VERSION_CHECK(3,4,0) # define simde_math_exp10f(v) __builtin_exp10f(v) #else # define simde_math_exp10f(v) powf(10.0f, (v)) #endif #if !defined(simde_math_fabs) #if SIMDE_MATH_BUILTIN_LIBM(fabs) #define simde_math_fabs(v) __builtin_fabs(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_fabs(v) std::fabs(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_fabs(v) fabs(v) #endif #endif #if !defined(simde_math_fabsf) #if SIMDE_MATH_BUILTIN_LIBM(fabsf) #define simde_math_fabsf(v) __builtin_fabsf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_fabsf(v) std::fabs(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_fabsf(v) fabsf(v) #endif #endif #if !defined(simde_math_floor) #if SIMDE_MATH_BUILTIN_LIBM(floor) #define simde_math_floor(v) __builtin_floor(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_floor(v) std::floor(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_floor(v) floor(v) #endif #endif #if !defined(simde_math_floorf) #if SIMDE_MATH_BUILTIN_LIBM(floorf) #define simde_math_floorf(v) __builtin_floorf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_floorf(v) std::floor(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_floorf(v) floorf(v) #endif #endif #if !defined(simde_math_fma) #if SIMDE_MATH_BUILTIN_LIBM(fma) #define simde_math_fma(x, y, z) __builtin_fma(x, y, z) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_fma(x, y, z) std::fma(x, y, z) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_fma(x, y, z) fma(x, y, z) #endif #endif #if !defined(simde_math_fmaf) #if SIMDE_MATH_BUILTIN_LIBM(fmaf) #define simde_math_fmaf(x, y, z) __builtin_fmaf(x, y, z) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_fmaf(x, y, z) std::fma(x, y, z) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_fmaf(x, y, z) fmaf(x, y, z) #endif #endif #if !defined(simde_math_fmax) #if SIMDE_MATH_BUILTIN_LIBM(fmax) #define simde_math_fmax(x, y) __builtin_fmax(x, y) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_fmax(x, y) std::fmax(x, y) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_fmax(x, y) fmax(x, y) #endif #endif #if !defined(simde_math_fmaxf) #if SIMDE_MATH_BUILTIN_LIBM(fmaxf) #define simde_math_fmaxf(x, y) __builtin_fmaxf(x, y) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_fmaxf(x, y) std::fmax(x, y) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_fmaxf(x, y) fmaxf(x, y) #endif #endif #if !defined(simde_math_hypot) #if SIMDE_MATH_BUILTIN_LIBM(hypot) #define simde_math_hypot(y, x) __builtin_hypot(y, x) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_hypot(y, x) std::hypot(y, x) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_hypot(y, x) hypot(y, x) #endif #endif #if !defined(simde_math_hypotf) #if SIMDE_MATH_BUILTIN_LIBM(hypotf) #define simde_math_hypotf(y, x) __builtin_hypotf(y, x) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_hypotf(y, x) std::hypot(y, x) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_hypotf(y, x) hypotf(y, x) #endif #endif #if !defined(simde_math_log) #if SIMDE_MATH_BUILTIN_LIBM(log) #define simde_math_log(v) __builtin_log(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_log(v) std::log(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_log(v) log(v) #endif #endif #if !defined(simde_math_logf) #if SIMDE_MATH_BUILTIN_LIBM(logf) #define simde_math_logf(v) __builtin_logf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_logf(v) std::log(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_logf(v) logf(v) #endif #endif #if !defined(simde_math_logb) #if SIMDE_MATH_BUILTIN_LIBM(logb) #define simde_math_logb(v) __builtin_logb(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_logb(v) std::logb(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_logb(v) logb(v) #endif #endif #if !defined(simde_math_logbf) #if SIMDE_MATH_BUILTIN_LIBM(logbf) #define simde_math_logbf(v) __builtin_logbf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_logbf(v) std::logb(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_logbf(v) logbf(v) #endif #endif #if !defined(simde_math_log1p) #if SIMDE_MATH_BUILTIN_LIBM(log1p) #define simde_math_log1p(v) __builtin_log1p(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_log1p(v) std::log1p(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_log1p(v) log1p(v) #endif #endif #if !defined(simde_math_log1pf) #if SIMDE_MATH_BUILTIN_LIBM(log1pf) #define simde_math_log1pf(v) __builtin_log1pf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_log1pf(v) std::log1p(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_log1pf(v) log1pf(v) #endif #endif #if !defined(simde_math_log2) #if SIMDE_MATH_BUILTIN_LIBM(log2) #define simde_math_log2(v) __builtin_log2(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_log2(v) std::log2(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_log2(v) log2(v) #endif #endif #if !defined(simde_math_log2f) #if SIMDE_MATH_BUILTIN_LIBM(log2f) #define simde_math_log2f(v) __builtin_log2f(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_log2f(v) std::log2(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_log2f(v) log2f(v) #endif #endif #if !defined(simde_math_log10) #if SIMDE_MATH_BUILTIN_LIBM(log10) #define simde_math_log10(v) __builtin_log10(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_log10(v) std::log10(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_log10(v) log10(v) #endif #endif #if !defined(simde_math_log10f) #if SIMDE_MATH_BUILTIN_LIBM(log10f) #define simde_math_log10f(v) __builtin_log10f(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_log10f(v) std::log10(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_log10f(v) log10f(v) #endif #endif #if !defined(simde_math_modf) #if SIMDE_MATH_BUILTIN_LIBM(modf) #define simde_math_modf(x, iptr) __builtin_modf(x, iptr) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_modf(x, iptr) std::modf(x, iptr) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_modf(x, iptr) modf(x, iptr) #endif #endif #if !defined(simde_math_modff) #if SIMDE_MATH_BUILTIN_LIBM(modff) #define simde_math_modff(x, iptr) __builtin_modff(x, iptr) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_modff(x, iptr) std::modf(x, iptr) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_modff(x, iptr) modff(x, iptr) #endif #endif #if !defined(simde_math_nearbyint) #if SIMDE_MATH_BUILTIN_LIBM(nearbyint) #define simde_math_nearbyint(v) __builtin_nearbyint(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_nearbyint(v) std::nearbyint(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_nearbyint(v) nearbyint(v) #endif #endif #if !defined(simde_math_nearbyintf) #if SIMDE_MATH_BUILTIN_LIBM(nearbyintf) #define simde_math_nearbyintf(v) __builtin_nearbyintf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_nearbyintf(v) std::nearbyint(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_nearbyintf(v) nearbyintf(v) #endif #endif #if !defined(simde_math_pow) #if SIMDE_MATH_BUILTIN_LIBM(pow) #define simde_math_pow(y, x) __builtin_pow(y, x) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_pow(y, x) std::pow(y, x) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_pow(y, x) pow(y, x) #endif #endif #if !defined(simde_math_powf) #if SIMDE_MATH_BUILTIN_LIBM(powf) #define simde_math_powf(y, x) __builtin_powf(y, x) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_powf(y, x) std::pow(y, x) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_powf(y, x) powf(y, x) #endif #endif #if !defined(simde_math_rint) #if SIMDE_MATH_BUILTIN_LIBM(rint) #define simde_math_rint(v) __builtin_rint(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_rint(v) std::rint(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_rint(v) rint(v) #endif #endif #if !defined(simde_math_rintf) #if SIMDE_MATH_BUILTIN_LIBM(rintf) #define simde_math_rintf(v) __builtin_rintf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_rintf(v) std::rint(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_rintf(v) rintf(v) #endif #endif #if !defined(simde_math_round) #if SIMDE_MATH_BUILTIN_LIBM(round) #define simde_math_round(v) __builtin_round(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_round(v) std::round(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_round(v) round(v) #endif #endif #if !defined(simde_math_roundf) #if SIMDE_MATH_BUILTIN_LIBM(roundf) #define simde_math_roundf(v) __builtin_roundf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_roundf(v) std::round(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_roundf(v) roundf(v) #endif #endif #if !defined(simde_math_roundeven) #if \ (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundeven)) || \ HEDLEY_GCC_VERSION_CHECK(10,0,0) #define simde_math_roundeven(v) __builtin_roundeven(v) #elif defined(simde_math_round) && defined(simde_math_fabs) static HEDLEY_INLINE double simde_math_roundeven(double v) { double rounded = simde_math_round(v); double diff = rounded - v; if (HEDLEY_UNLIKELY(simde_math_fabs(diff) == 0.5) && (HEDLEY_STATIC_CAST(int64_t, rounded) & 1)) { rounded = v - diff; } return rounded; } #define simde_math_roundeven simde_math_roundeven #endif #endif #if !defined(simde_math_roundevenf) #if \ (!defined(HEDLEY_EMSCRIPTEN_VERSION) && HEDLEY_HAS_BUILTIN(__builtin_roundevenf)) || \ HEDLEY_GCC_VERSION_CHECK(10,0,0) #define simde_math_roundevenf(v) __builtin_roundevenf(v) #elif defined(simde_math_roundf) && defined(simde_math_fabsf) static HEDLEY_INLINE float simde_math_roundevenf(float v) { float rounded = simde_math_roundf(v); float diff = rounded - v; if (HEDLEY_UNLIKELY(simde_math_fabsf(diff) == 0.5f) && (HEDLEY_STATIC_CAST(int32_t, rounded) & 1)) { rounded = v - diff; } return rounded; } #define simde_math_roundevenf simde_math_roundevenf #endif #endif #if !defined(simde_math_sin) #if SIMDE_MATH_BUILTIN_LIBM(sin) #define simde_math_sin(v) __builtin_sin(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_sin(v) std::sin(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_sin(v) sin(v) #endif #endif #if !defined(simde_math_sinf) #if SIMDE_MATH_BUILTIN_LIBM(sinf) #define simde_math_sinf(v) __builtin_sinf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_sinf(v) std::sin(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_sinf(v) sinf(v) #endif #endif #if !defined(simde_math_sinh) #if SIMDE_MATH_BUILTIN_LIBM(sinh) #define simde_math_sinh(v) __builtin_sinh(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_sinh(v) std::sinh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_sinh(v) sinh(v) #endif #endif #if !defined(simde_math_sinhf) #if SIMDE_MATH_BUILTIN_LIBM(sinhf) #define simde_math_sinhf(v) __builtin_sinhf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_sinhf(v) std::sinh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_sinhf(v) sinhf(v) #endif #endif #if !defined(simde_math_sqrt) #if SIMDE_MATH_BUILTIN_LIBM(sqrt) #define simde_math_sqrt(v) __builtin_sqrt(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_sqrt(v) std::sqrt(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_sqrt(v) sqrt(v) #endif #endif #if !defined(simde_math_sqrtf) #if SIMDE_MATH_BUILTIN_LIBM(sqrtf) #define simde_math_sqrtf(v) __builtin_sqrtf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_sqrtf(v) std::sqrt(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_sqrtf(v) sqrtf(v) #endif #endif #if !defined(simde_math_tan) #if SIMDE_MATH_BUILTIN_LIBM(tan) #define simde_math_tan(v) __builtin_tan(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_tan(v) std::tan(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_tan(v) tan(v) #endif #endif #if !defined(simde_math_tanf) #if SIMDE_MATH_BUILTIN_LIBM(tanf) #define simde_math_tanf(v) __builtin_tanf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_tanf(v) std::tan(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_tanf(v) tanf(v) #endif #endif #if !defined(simde_math_tanh) #if SIMDE_MATH_BUILTIN_LIBM(tanh) #define simde_math_tanh(v) __builtin_tanh(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_tanh(v) std::tanh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_tanh(v) tanh(v) #endif #endif #if !defined(simde_math_tanhf) #if SIMDE_MATH_BUILTIN_LIBM(tanhf) #define simde_math_tanhf(v) __builtin_tanhf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_tanhf(v) std::tanh(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_tanhf(v) tanhf(v) #endif #endif #if !defined(simde_math_trunc) #if SIMDE_MATH_BUILTIN_LIBM(trunc) #define simde_math_trunc(v) __builtin_trunc(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_trunc(v) std::trunc(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_trunc(v) trunc(v) #endif #endif #if !defined(simde_math_truncf) #if SIMDE_MATH_BUILTIN_LIBM(truncf) #define simde_math_truncf(v) __builtin_truncf(v) #elif defined(SIMDE_MATH_HAVE_CMATH) #define simde_math_truncf(v) std::trunc(v) #elif defined(SIMDE_MATH_HAVE_MATH_H) #define simde_math_truncf(v) truncf(v) #endif #endif /*** Comparison macros (which don't raise invalid errors) ***/ #if defined(isunordered) #define simde_math_isunordered(x, y) isunordered(x, y) #elif HEDLEY_HAS_BUILTIN(__builtin_isunordered) #define simde_math_isunordered(x, y) __builtin_isunordered(x, y) #else static HEDLEY_INLINE int simde_math_isunordered(double x, double y) { return (x != y) && (x != x || y != y); } #define simde_math_isunordered simde_math_isunordered static HEDLEY_INLINE int simde_math_isunorderedf(float x, float y) { return (x != y) && (x != x || y != y); } #define simde_math_isunorderedf simde_math_isunorderedf #endif #if !defined(simde_math_isunorderedf) #define simde_math_isunorderedf simde_math_isunordered #endif /*** Additional functions not in libm ***/ #if defined(simde_math_fabs) && defined(simde_math_sqrt) && defined(simde_math_exp) static HEDLEY_INLINE double simde_math_cdfnorm(double x) { /* https://www.johndcook.com/blog/cpp_phi/ * Public Domain */ static const double a1 = 0.254829592; static const double a2 = -0.284496736; static const double a3 = 1.421413741; static const double a4 = -1.453152027; static const double a5 = 1.061405429; static const double p = 0.3275911; const int sign = x < 0; x = simde_math_fabs(x) / simde_math_sqrt(2.0); /* A&S formula 7.1.26 */ double t = 1.0 / (1.0 + p * x); double y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_exp(-x * x); return 0.5 * (1.0 + (sign ? -y : y)); } #define simde_math_cdfnorm simde_math_cdfnorm #endif #if defined(simde_math_fabsf) && defined(simde_math_sqrtf) && defined(simde_math_expf) static HEDLEY_INLINE float simde_math_cdfnormf(float x) { /* https://www.johndcook.com/blog/cpp_phi/ * Public Domain */ static const float a1 = 0.254829592f; static const float a2 = -0.284496736f; static const float a3 = 1.421413741f; static const float a4 = -1.453152027f; static const float a5 = 1.061405429f; static const float p = 0.3275911f; const int sign = x < 0; x = simde_math_fabsf(x) / simde_math_sqrtf(2.0f); /* A&S formula 7.1.26 */ float t = 1.0f / (1.0f + p * x); float y = 1.0f - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * simde_math_expf(-x * x); return 0.5f * (1.0f + (sign ? -y : y)); } #define simde_math_cdfnormf simde_math_cdfnormf #endif #if !defined(simde_math_cdfnorminv) && defined(simde_math_log) && defined(simde_math_sqrt) /*https://web.archive.org/web/20150910081113/http://home.online.no/~pjacklam/notes/invnorm/impl/sprouse/ltqnorm.c*/ static HEDLEY_INLINE double simde_math_cdfnorminv(double p) { static const double a[] = { -3.969683028665376e+01, 2.209460984245205e+02, -2.759285104469687e+02, 1.383577518672690e+02, -3.066479806614716e+01, 2.506628277459239e+00 }; static const double b[] = { -5.447609879822406e+01, 1.615858368580409e+02, -1.556989798598866e+02, 6.680131188771972e+01, -1.328068155288572e+01 }; static const double c[] = { -7.784894002430293e-03, -3.223964580411365e-01, -2.400758277161838e+00, -2.549732539343734e+00, 4.374664141464968e+00, 2.938163982698783e+00 }; static const double d[] = { 7.784695709041462e-03, 3.224671290700398e-01, 2.445134137142996e+00, 3.754408661907416e+00 }; static const double low = 0.02425; static const double high = 0.97575; double q, r; if (p < 0 || p > 1) { return 0.0; } else if (p == 0) { return -SIMDE_MATH_INFINITY; } else if (p == 1) { return SIMDE_MATH_INFINITY; } else if (p < low) { q = simde_math_sqrt(-2.0 * simde_math_log(p)); return (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); } else if (p > high) { q = simde_math_sqrt(-2.0 * simde_math_log(1.0 - p)); return -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); } else { q = p - 0.5; r = q * q; return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); } } #define simde_math_cdfnorminv simde_math_cdfnorminv #endif #if !defined(simde_math_cdfnorminvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) static HEDLEY_INLINE float simde_math_cdfnorminvf(float p) { static const float a[] = { -3.969683028665376e+01f, 2.209460984245205e+02f, -2.759285104469687e+02f, 1.383577518672690e+02f, -3.066479806614716e+01f, 2.506628277459239e+00f }; static const float b[] = { -5.447609879822406e+01f, 1.615858368580409e+02f, -1.556989798598866e+02f, 6.680131188771972e+01f, -1.328068155288572e+01f }; static const float c[] = { -7.784894002430293e-03f, -3.223964580411365e-01f, -2.400758277161838e+00f, -2.549732539343734e+00f, 4.374664141464968e+00f, 2.938163982698783e+00f }; static const float d[] = { 7.784695709041462e-03f, 3.224671290700398e-01f, 2.445134137142996e+00f, 3.754408661907416e+00f }; static const float low = 0.02425f; static const float high = 0.97575f; float q, r; if (p < 0 || p > 1) { return 0.0f; } else if (p == 0) { return -SIMDE_MATH_INFINITYF; } else if (p == 1) { return SIMDE_MATH_INFINITYF; } else if (p < low) { q = simde_math_sqrtf(-2.0f * simde_math_logf(p)); return (((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); } else if (p > high) { q = simde_math_sqrtf(-2.0f * simde_math_logf(1.0f - p)); return -(((((c[0] * q + c[1]) * q + c[2]) * q + c[3]) * q + c[4]) * q + c[5]) / (((((d[0] * q + d[1]) * q + d[2]) * q + d[3]) * q + 1)); } else { q = p - 0.5f; r = q * q; return (((((a[0] * r + a[1]) * r + a[2]) * r + a[3]) * r + a[4]) * r + a[5]) * q / (((((b[0] * r + b[1]) * r + b[2]) * r + b[3]) * r + b[4]) * r + 1); } } #define simde_math_cdfnorminvf simde_math_cdfnorminvf #endif #if !defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_copysign) && defined(simde_math_sqrt) static HEDLEY_INLINE double simde_math_erfinv(double x) { /* https://stackoverflow.com/questions/27229371/inverse-error-function-in-c * * The original answer on SO uses a constant of 0.147, but in my * testing 0.14829094707965850830078125 gives a lower average absolute error * (0.0001410958211636170744895935 vs. 0.0001465479290345683693885803). * That said, if your goal is to minimize the *maximum* absolute * error, 0.15449436008930206298828125 provides significantly better * results; 0.0009250640869140625000000000 vs ~ 0.005. */ double tt1, tt2, lnx; double sgn = simde_math_copysign(1.0, x); x = (1.0 - x) * (1.0 + x); lnx = simde_math_log(x); tt1 = 2.0 / (SIMDE_MATH_PI * 0.14829094707965850830078125) + 0.5 * lnx; tt2 = (1.0 / 0.14829094707965850830078125) * lnx; return sgn * simde_math_sqrt(-tt1 + simde_math_sqrt(tt1 * tt1 - tt2)); } #define simde_math_erfinv simde_math_erfinv #endif #if !defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_copysignf) && defined(simde_math_sqrtf) static HEDLEY_INLINE float simde_math_erfinvf(float x) { float tt1, tt2, lnx; float sgn = simde_math_copysignf(1.0f, x); x = (1.0f - x) * (1.0f + x); lnx = simde_math_logf(x); tt1 = 2.0f / (SIMDE_MATH_PIF * 0.14829094707965850830078125f) + 0.5f * lnx; tt2 = (1.0f / 0.14829094707965850830078125f) * lnx; return sgn * simde_math_sqrtf(-tt1 + simde_math_sqrtf(tt1 * tt1 - tt2)); } #define simde_math_erfinvf simde_math_erfinvf #endif #if !defined(simde_math_erfcinv) && defined(simde_math_erfinv) && defined(simde_math_log) && defined(simde_math_sqrt) static HEDLEY_INLINE double simde_math_erfcinv(double x) { if(x >= 0.0625 && x < 2.0) { return simde_math_erfinv(1.0 - x); } else if (x < 0.0625 && x >= 1.0e-100) { static const double p[6] = { 0.1550470003116, 1.382719649631, 0.690969348887, -1.128081391617, 0.680544246825, -0.16444156791 }; static const double q[3] = { 0.155024849822, 1.385228141995, 1.000000000000 }; const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / (q[0] + t * (q[1] + t * (q[2]))); } else if (x < 1.0e-100 && x >= SIMDE_MATH_DBL_MIN) { static const double p[4] = { 0.00980456202915, 0.363667889171, 0.97302949837, -0.5374947401 }; static const double q[3] = { 0.00980451277802, 0.363699971544, 1.000000000000 }; const double t = 1.0 / simde_math_sqrt(-simde_math_log(x)); return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / (q[0] + t * (q[1] + t * (q[2]))); } else if (!simde_math_isnormal(x)) { return SIMDE_MATH_INFINITY; } else { return -SIMDE_MATH_INFINITY; } } #define simde_math_erfcinv simde_math_erfcinv #endif #if !defined(simde_math_erfcinvf) && defined(simde_math_erfinvf) && defined(simde_math_logf) && defined(simde_math_sqrtf) static HEDLEY_INLINE float simde_math_erfcinvf(float x) { if(x >= 0.0625f && x < 2.0f) { return simde_math_erfinvf(1.0f - x); } else if (x < 0.0625f && x >= SIMDE_MATH_FLT_MIN) { static const float p[6] = { 0.1550470003116f, 1.382719649631f, 0.690969348887f, -1.128081391617f, 0.680544246825f -0.164441567910f }; static const float q[3] = { 0.155024849822f, 1.385228141995f, 1.000000000000f }; const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); return (p[0] / t + p[1] + t * (p[2] + t * (p[3] + t * (p[4] + t * p[5])))) / (q[0] + t * (q[1] + t * (q[2]))); } else if (x < SIMDE_MATH_FLT_MIN && simde_math_isnormalf(x)) { static const float p[4] = { 0.00980456202915f, 0.36366788917100f, 0.97302949837000f, -0.5374947401000f }; static const float q[3] = { 0.00980451277802f, 0.36369997154400f, 1.00000000000000f }; const float t = 1.0f / simde_math_sqrtf(-simde_math_logf(x)); return (p[0] / t + p[1] + t * (p[2] + t * p[3])) / (q[0] + t * (q[1] + t * (q[2]))); } else { return simde_math_isnormalf(x) ? -SIMDE_MATH_INFINITYF : SIMDE_MATH_INFINITYF; } } #define simde_math_erfcinvf simde_math_erfcinvf #endif static HEDLEY_INLINE double simde_math_rad2deg(double radians) { return radians * SIMDE_MATH_180_OVER_PI; } static HEDLEY_INLINE float simde_math_rad2degf(float radians) { return radians * SIMDE_MATH_180_OVER_PIF; } static HEDLEY_INLINE double simde_math_deg2rad(double degrees) { return degrees * SIMDE_MATH_PI_OVER_180; } static HEDLEY_INLINE float simde_math_deg2radf(float degrees) { return degrees * (SIMDE_MATH_PI_OVER_180F); } /*** Saturated arithmetic ***/ static HEDLEY_INLINE int8_t simde_math_adds_i8(int8_t a, int8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqaddb_s8(a, b); #else uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); uint8_t r_ = a_ + b_; a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT8_MAX; if (HEDLEY_STATIC_CAST(int8_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { r_ = a_; } return HEDLEY_STATIC_CAST(int8_t, r_); #endif } static HEDLEY_INLINE int16_t simde_math_adds_i16(int16_t a, int16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqaddh_s16(a, b); #else uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); uint16_t r_ = a_ + b_; a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT16_MAX; if (HEDLEY_STATIC_CAST(int16_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { r_ = a_; } return HEDLEY_STATIC_CAST(int16_t, r_); #endif } static HEDLEY_INLINE int32_t simde_math_adds_i32(int32_t a, int32_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqadds_s32(a, b); #else uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); uint32_t r_ = a_ + b_; a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT32_MAX; if (HEDLEY_STATIC_CAST(int32_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { r_ = a_; } return HEDLEY_STATIC_CAST(int32_t, r_); #endif } static HEDLEY_INLINE int64_t simde_math_adds_i64(int64_t a, int64_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqaddd_s64(a, b); #else uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); uint64_t r_ = a_ + b_; a_ = (a_ >> ((8 * sizeof(r_)) - 1)) + INT64_MAX; if (HEDLEY_STATIC_CAST(int64_t, ((a_ ^ b_) | ~(b_ ^ r_))) >= 0) { r_ = a_; } return HEDLEY_STATIC_CAST(int64_t, r_); #endif } static HEDLEY_INLINE uint8_t simde_math_adds_u8(uint8_t a, uint8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqaddb_u8(a, b); #else uint8_t r = a + b; r |= -(r < a); return r; #endif } static HEDLEY_INLINE uint16_t simde_math_adds_u16(uint16_t a, uint16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqaddh_u16(a, b); #else uint16_t r = a + b; r |= -(r < a); return r; #endif } static HEDLEY_INLINE uint32_t simde_math_adds_u32(uint32_t a, uint32_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqadds_u32(a, b); #else uint32_t r = a + b; r |= -(r < a); return r; #endif } static HEDLEY_INLINE uint64_t simde_math_adds_u64(uint64_t a, uint64_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqaddd_u64(a, b); #else uint64_t r = a + b; r |= -(r < a); return r; #endif } static HEDLEY_INLINE int8_t simde_math_subs_i8(int8_t a, int8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqsubb_s8(a, b); #else uint8_t a_ = HEDLEY_STATIC_CAST(uint8_t, a); uint8_t b_ = HEDLEY_STATIC_CAST(uint8_t, b); uint8_t r_ = a_ - b_; a_ = (a_ >> 7) + INT8_MAX; if (HEDLEY_STATIC_CAST(int8_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { r_ = a_; } return HEDLEY_STATIC_CAST(int8_t, r_); #endif } static HEDLEY_INLINE int16_t simde_math_subs_i16(int16_t a, int16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqsubh_s16(a, b); #else uint16_t a_ = HEDLEY_STATIC_CAST(uint16_t, a); uint16_t b_ = HEDLEY_STATIC_CAST(uint16_t, b); uint16_t r_ = a_ - b_; a_ = (a_ >> 15) + INT16_MAX; if (HEDLEY_STATIC_CAST(int16_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { r_ = a_; } return HEDLEY_STATIC_CAST(int16_t, r_); #endif } static HEDLEY_INLINE int32_t simde_math_subs_i32(int32_t a, int32_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqsubs_s32(a, b); #else uint32_t a_ = HEDLEY_STATIC_CAST(uint32_t, a); uint32_t b_ = HEDLEY_STATIC_CAST(uint32_t, b); uint32_t r_ = a_ - b_; a_ = (a_ >> 31) + INT32_MAX; if (HEDLEY_STATIC_CAST(int32_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { r_ = a_; } return HEDLEY_STATIC_CAST(int32_t, r_); #endif } static HEDLEY_INLINE int64_t simde_math_subs_i64(int64_t a, int64_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqsubd_s64(a, b); #else uint64_t a_ = HEDLEY_STATIC_CAST(uint64_t, a); uint64_t b_ = HEDLEY_STATIC_CAST(uint64_t, b); uint64_t r_ = a_ - b_; a_ = (a_ >> 63) + INT64_MAX; if (HEDLEY_STATIC_CAST(int64_t, (a_ ^ b_) & (a_ ^ r_)) < 0) { r_ = a_; } return HEDLEY_STATIC_CAST(int64_t, r_); #endif } static HEDLEY_INLINE uint8_t simde_math_subs_u8(uint8_t a, uint8_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqsubb_u8(a, b); #else uint8_t res = a - b; res &= -(res <= a); return res; #endif } static HEDLEY_INLINE uint16_t simde_math_subs_u16(uint16_t a, uint16_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqsubh_u16(a, b); #else uint16_t res = a - b; res &= -(res <= a); return res; #endif } static HEDLEY_INLINE uint32_t simde_math_subs_u32(uint32_t a, uint32_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqsubs_u32(a, b); #else uint32_t res = a - b; res &= -(res <= a); return res; #endif } static HEDLEY_INLINE uint64_t simde_math_subs_u64(uint64_t a, uint64_t b) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqsubd_u64(a, b); #else uint64_t res = a - b; res &= -(res <= a); return res; #endif } HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_MATH_H) */ /* :: End simde-math.h :: */ /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* :: Begin simde-constify.h :: */ /* SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, copy, * modify, merge, publish, distribute, sublicense, and/or sell copies * of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Copyright: * 2020 Evan Nemerson */ /* Constify macros. For internal use only. * * These are used to make it possible to call a function which takes * an Integer Constant Expression (ICE) using a compile time constant. * Technically it would also be possible to use a value not trivially * known by the compiler, but there would be a siginficant performance * hit (a switch switch is used). * * The basic idea is pretty simple; we just emit a do while loop which * contains a switch with a case for every possible value of the * constant. * * As long as the value you pass to the function in constant, pretty * much any copmiler shouldn't have a problem generating exactly the * same code as if you had used an ICE. * * This is intended to be used in the SIMDe implementations of * functions the compilers require to be an ICE, but the other benefit * is that if we also disable the warnings from * SIMDE_REQUIRE_CONSTANT_RANGE we can actually just allow the tests * to use non-ICE parameters */ #if !defined(SIMDE_CONSTIFY_H) #define SIMDE_CONSTIFY_H /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ #define SIMDE_CONSTIFY_2_(func_name, result, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: result = func_name(__VA_ARGS__, 0); break; \ case 1: result = func_name(__VA_ARGS__, 1); break; \ default: result = default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_4_(func_name, result, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: result = func_name(__VA_ARGS__, 0); break; \ case 1: result = func_name(__VA_ARGS__, 1); break; \ case 2: result = func_name(__VA_ARGS__, 2); break; \ case 3: result = func_name(__VA_ARGS__, 3); break; \ default: result = default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_8_(func_name, result, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: result = func_name(__VA_ARGS__, 0); break; \ case 1: result = func_name(__VA_ARGS__, 1); break; \ case 2: result = func_name(__VA_ARGS__, 2); break; \ case 3: result = func_name(__VA_ARGS__, 3); break; \ case 4: result = func_name(__VA_ARGS__, 4); break; \ case 5: result = func_name(__VA_ARGS__, 5); break; \ case 6: result = func_name(__VA_ARGS__, 6); break; \ case 7: result = func_name(__VA_ARGS__, 7); break; \ default: result = default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_16_(func_name, result, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: result = func_name(__VA_ARGS__, 0); break; \ case 1: result = func_name(__VA_ARGS__, 1); break; \ case 2: result = func_name(__VA_ARGS__, 2); break; \ case 3: result = func_name(__VA_ARGS__, 3); break; \ case 4: result = func_name(__VA_ARGS__, 4); break; \ case 5: result = func_name(__VA_ARGS__, 5); break; \ case 6: result = func_name(__VA_ARGS__, 6); break; \ case 7: result = func_name(__VA_ARGS__, 7); break; \ case 8: result = func_name(__VA_ARGS__, 8); break; \ case 9: result = func_name(__VA_ARGS__, 9); break; \ case 10: result = func_name(__VA_ARGS__, 10); break; \ case 11: result = func_name(__VA_ARGS__, 11); break; \ case 12: result = func_name(__VA_ARGS__, 12); break; \ case 13: result = func_name(__VA_ARGS__, 13); break; \ case 14: result = func_name(__VA_ARGS__, 14); break; \ case 15: result = func_name(__VA_ARGS__, 15); break; \ default: result = default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_32_(func_name, result, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: result = func_name(__VA_ARGS__, 0); break; \ case 1: result = func_name(__VA_ARGS__, 1); break; \ case 2: result = func_name(__VA_ARGS__, 2); break; \ case 3: result = func_name(__VA_ARGS__, 3); break; \ case 4: result = func_name(__VA_ARGS__, 4); break; \ case 5: result = func_name(__VA_ARGS__, 5); break; \ case 6: result = func_name(__VA_ARGS__, 6); break; \ case 7: result = func_name(__VA_ARGS__, 7); break; \ case 8: result = func_name(__VA_ARGS__, 8); break; \ case 9: result = func_name(__VA_ARGS__, 9); break; \ case 10: result = func_name(__VA_ARGS__, 10); break; \ case 11: result = func_name(__VA_ARGS__, 11); break; \ case 12: result = func_name(__VA_ARGS__, 12); break; \ case 13: result = func_name(__VA_ARGS__, 13); break; \ case 14: result = func_name(__VA_ARGS__, 14); break; \ case 15: result = func_name(__VA_ARGS__, 15); break; \ case 16: result = func_name(__VA_ARGS__, 16); break; \ case 17: result = func_name(__VA_ARGS__, 17); break; \ case 18: result = func_name(__VA_ARGS__, 18); break; \ case 19: result = func_name(__VA_ARGS__, 19); break; \ case 20: result = func_name(__VA_ARGS__, 20); break; \ case 21: result = func_name(__VA_ARGS__, 21); break; \ case 22: result = func_name(__VA_ARGS__, 22); break; \ case 23: result = func_name(__VA_ARGS__, 23); break; \ case 24: result = func_name(__VA_ARGS__, 24); break; \ case 25: result = func_name(__VA_ARGS__, 25); break; \ case 26: result = func_name(__VA_ARGS__, 26); break; \ case 27: result = func_name(__VA_ARGS__, 27); break; \ case 28: result = func_name(__VA_ARGS__, 28); break; \ case 29: result = func_name(__VA_ARGS__, 29); break; \ case 30: result = func_name(__VA_ARGS__, 30); break; \ case 31: result = func_name(__VA_ARGS__, 31); break; \ default: result = default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_64_(func_name, result, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: result = func_name(__VA_ARGS__, 0); break; \ case 1: result = func_name(__VA_ARGS__, 1); break; \ case 2: result = func_name(__VA_ARGS__, 2); break; \ case 3: result = func_name(__VA_ARGS__, 3); break; \ case 4: result = func_name(__VA_ARGS__, 4); break; \ case 5: result = func_name(__VA_ARGS__, 5); break; \ case 6: result = func_name(__VA_ARGS__, 6); break; \ case 7: result = func_name(__VA_ARGS__, 7); break; \ case 8: result = func_name(__VA_ARGS__, 8); break; \ case 9: result = func_name(__VA_ARGS__, 9); break; \ case 10: result = func_name(__VA_ARGS__, 10); break; \ case 11: result = func_name(__VA_ARGS__, 11); break; \ case 12: result = func_name(__VA_ARGS__, 12); break; \ case 13: result = func_name(__VA_ARGS__, 13); break; \ case 14: result = func_name(__VA_ARGS__, 14); break; \ case 15: result = func_name(__VA_ARGS__, 15); break; \ case 16: result = func_name(__VA_ARGS__, 16); break; \ case 17: result = func_name(__VA_ARGS__, 17); break; \ case 18: result = func_name(__VA_ARGS__, 18); break; \ case 19: result = func_name(__VA_ARGS__, 19); break; \ case 20: result = func_name(__VA_ARGS__, 20); break; \ case 21: result = func_name(__VA_ARGS__, 21); break; \ case 22: result = func_name(__VA_ARGS__, 22); break; \ case 23: result = func_name(__VA_ARGS__, 23); break; \ case 24: result = func_name(__VA_ARGS__, 24); break; \ case 25: result = func_name(__VA_ARGS__, 25); break; \ case 26: result = func_name(__VA_ARGS__, 26); break; \ case 27: result = func_name(__VA_ARGS__, 27); break; \ case 28: result = func_name(__VA_ARGS__, 28); break; \ case 29: result = func_name(__VA_ARGS__, 29); break; \ case 30: result = func_name(__VA_ARGS__, 30); break; \ case 31: result = func_name(__VA_ARGS__, 31); break; \ case 32: result = func_name(__VA_ARGS__, 32); break; \ case 33: result = func_name(__VA_ARGS__, 33); break; \ case 34: result = func_name(__VA_ARGS__, 34); break; \ case 35: result = func_name(__VA_ARGS__, 35); break; \ case 36: result = func_name(__VA_ARGS__, 36); break; \ case 37: result = func_name(__VA_ARGS__, 37); break; \ case 38: result = func_name(__VA_ARGS__, 38); break; \ case 39: result = func_name(__VA_ARGS__, 39); break; \ case 40: result = func_name(__VA_ARGS__, 40); break; \ case 41: result = func_name(__VA_ARGS__, 41); break; \ case 42: result = func_name(__VA_ARGS__, 42); break; \ case 43: result = func_name(__VA_ARGS__, 43); break; \ case 44: result = func_name(__VA_ARGS__, 44); break; \ case 45: result = func_name(__VA_ARGS__, 45); break; \ case 46: result = func_name(__VA_ARGS__, 46); break; \ case 47: result = func_name(__VA_ARGS__, 47); break; \ case 48: result = func_name(__VA_ARGS__, 48); break; \ case 49: result = func_name(__VA_ARGS__, 49); break; \ case 50: result = func_name(__VA_ARGS__, 50); break; \ case 51: result = func_name(__VA_ARGS__, 51); break; \ case 52: result = func_name(__VA_ARGS__, 52); break; \ case 53: result = func_name(__VA_ARGS__, 53); break; \ case 54: result = func_name(__VA_ARGS__, 54); break; \ case 55: result = func_name(__VA_ARGS__, 55); break; \ case 56: result = func_name(__VA_ARGS__, 56); break; \ case 57: result = func_name(__VA_ARGS__, 57); break; \ case 58: result = func_name(__VA_ARGS__, 58); break; \ case 59: result = func_name(__VA_ARGS__, 59); break; \ case 60: result = func_name(__VA_ARGS__, 60); break; \ case 61: result = func_name(__VA_ARGS__, 61); break; \ case 62: result = func_name(__VA_ARGS__, 62); break; \ case 63: result = func_name(__VA_ARGS__, 63); break; \ default: result = default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_2_NO_RESULT_(func_name, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: func_name(__VA_ARGS__, 0); break; \ case 1: func_name(__VA_ARGS__, 1); break; \ default: default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_4_NO_RESULT_(func_name, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: func_name(__VA_ARGS__, 0); break; \ case 1: func_name(__VA_ARGS__, 1); break; \ case 2: func_name(__VA_ARGS__, 2); break; \ case 3: func_name(__VA_ARGS__, 3); break; \ default: default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_8_NO_RESULT_(func_name, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: func_name(__VA_ARGS__, 0); break; \ case 1: func_name(__VA_ARGS__, 1); break; \ case 2: func_name(__VA_ARGS__, 2); break; \ case 3: func_name(__VA_ARGS__, 3); break; \ case 4: func_name(__VA_ARGS__, 4); break; \ case 5: func_name(__VA_ARGS__, 5); break; \ case 6: func_name(__VA_ARGS__, 6); break; \ case 7: func_name(__VA_ARGS__, 7); break; \ default: default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_16_NO_RESULT_(func_name, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: func_name(__VA_ARGS__, 0); break; \ case 1: func_name(__VA_ARGS__, 1); break; \ case 2: func_name(__VA_ARGS__, 2); break; \ case 3: func_name(__VA_ARGS__, 3); break; \ case 4: func_name(__VA_ARGS__, 4); break; \ case 5: func_name(__VA_ARGS__, 5); break; \ case 6: func_name(__VA_ARGS__, 6); break; \ case 7: func_name(__VA_ARGS__, 7); break; \ case 8: func_name(__VA_ARGS__, 8); break; \ case 9: func_name(__VA_ARGS__, 9); break; \ case 10: func_name(__VA_ARGS__, 10); break; \ case 11: func_name(__VA_ARGS__, 11); break; \ case 12: func_name(__VA_ARGS__, 12); break; \ case 13: func_name(__VA_ARGS__, 13); break; \ case 14: func_name(__VA_ARGS__, 14); break; \ case 15: func_name(__VA_ARGS__, 15); break; \ default: default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_32_NO_RESULT_(func_name, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: func_name(__VA_ARGS__, 0); break; \ case 1: func_name(__VA_ARGS__, 1); break; \ case 2: func_name(__VA_ARGS__, 2); break; \ case 3: func_name(__VA_ARGS__, 3); break; \ case 4: func_name(__VA_ARGS__, 4); break; \ case 5: func_name(__VA_ARGS__, 5); break; \ case 6: func_name(__VA_ARGS__, 6); break; \ case 7: func_name(__VA_ARGS__, 7); break; \ case 8: func_name(__VA_ARGS__, 8); break; \ case 9: func_name(__VA_ARGS__, 9); break; \ case 10: func_name(__VA_ARGS__, 10); break; \ case 11: func_name(__VA_ARGS__, 11); break; \ case 12: func_name(__VA_ARGS__, 12); break; \ case 13: func_name(__VA_ARGS__, 13); break; \ case 14: func_name(__VA_ARGS__, 14); break; \ case 15: func_name(__VA_ARGS__, 15); break; \ case 16: func_name(__VA_ARGS__, 16); break; \ case 17: func_name(__VA_ARGS__, 17); break; \ case 18: func_name(__VA_ARGS__, 18); break; \ case 19: func_name(__VA_ARGS__, 19); break; \ case 20: func_name(__VA_ARGS__, 20); break; \ case 21: func_name(__VA_ARGS__, 21); break; \ case 22: func_name(__VA_ARGS__, 22); break; \ case 23: func_name(__VA_ARGS__, 23); break; \ case 24: func_name(__VA_ARGS__, 24); break; \ case 25: func_name(__VA_ARGS__, 25); break; \ case 26: func_name(__VA_ARGS__, 26); break; \ case 27: func_name(__VA_ARGS__, 27); break; \ case 28: func_name(__VA_ARGS__, 28); break; \ case 29: func_name(__VA_ARGS__, 29); break; \ case 30: func_name(__VA_ARGS__, 30); break; \ case 31: func_name(__VA_ARGS__, 31); break; \ default: default_case; break; \ } \ } while (0) #define SIMDE_CONSTIFY_64_NO_RESULT_(func_name, default_case, imm, ...) \ do { \ switch(imm) { \ case 0: func_name(__VA_ARGS__, 0); break; \ case 1: func_name(__VA_ARGS__, 1); break; \ case 2: func_name(__VA_ARGS__, 2); break; \ case 3: func_name(__VA_ARGS__, 3); break; \ case 4: func_name(__VA_ARGS__, 4); break; \ case 5: func_name(__VA_ARGS__, 5); break; \ case 6: func_name(__VA_ARGS__, 6); break; \ case 7: func_name(__VA_ARGS__, 7); break; \ case 8: func_name(__VA_ARGS__, 8); break; \ case 9: func_name(__VA_ARGS__, 9); break; \ case 10: func_name(__VA_ARGS__, 10); break; \ case 11: func_name(__VA_ARGS__, 11); break; \ case 12: func_name(__VA_ARGS__, 12); break; \ case 13: func_name(__VA_ARGS__, 13); break; \ case 14: func_name(__VA_ARGS__, 14); break; \ case 15: func_name(__VA_ARGS__, 15); break; \ case 16: func_name(__VA_ARGS__, 16); break; \ case 17: func_name(__VA_ARGS__, 17); break; \ case 18: func_name(__VA_ARGS__, 18); break; \ case 19: func_name(__VA_ARGS__, 19); break; \ case 20: func_name(__VA_ARGS__, 20); break; \ case 21: func_name(__VA_ARGS__, 21); break; \ case 22: func_name(__VA_ARGS__, 22); break; \ case 23: func_name(__VA_ARGS__, 23); break; \ case 24: func_name(__VA_ARGS__, 24); break; \ case 25: func_name(__VA_ARGS__, 25); break; \ case 26: func_name(__VA_ARGS__, 26); break; \ case 27: func_name(__VA_ARGS__, 27); break; \ case 28: func_name(__VA_ARGS__, 28); break; \ case 29: func_name(__VA_ARGS__, 29); break; \ case 30: func_name(__VA_ARGS__, 30); break; \ case 31: func_name(__VA_ARGS__, 31); break; \ case 32: func_name(__VA_ARGS__, 32); break; \ case 33: func_name(__VA_ARGS__, 33); break; \ case 34: func_name(__VA_ARGS__, 34); break; \ case 35: func_name(__VA_ARGS__, 35); break; \ case 36: func_name(__VA_ARGS__, 36); break; \ case 37: func_name(__VA_ARGS__, 37); break; \ case 38: func_name(__VA_ARGS__, 38); break; \ case 39: func_name(__VA_ARGS__, 39); break; \ case 40: func_name(__VA_ARGS__, 40); break; \ case 41: func_name(__VA_ARGS__, 41); break; \ case 42: func_name(__VA_ARGS__, 42); break; \ case 43: func_name(__VA_ARGS__, 43); break; \ case 44: func_name(__VA_ARGS__, 44); break; \ case 45: func_name(__VA_ARGS__, 45); break; \ case 46: func_name(__VA_ARGS__, 46); break; \ case 47: func_name(__VA_ARGS__, 47); break; \ case 48: func_name(__VA_ARGS__, 48); break; \ case 49: func_name(__VA_ARGS__, 49); break; \ case 50: func_name(__VA_ARGS__, 50); break; \ case 51: func_name(__VA_ARGS__, 51); break; \ case 52: func_name(__VA_ARGS__, 52); break; \ case 53: func_name(__VA_ARGS__, 53); break; \ case 54: func_name(__VA_ARGS__, 54); break; \ case 55: func_name(__VA_ARGS__, 55); break; \ case 56: func_name(__VA_ARGS__, 56); break; \ case 57: func_name(__VA_ARGS__, 57); break; \ case 58: func_name(__VA_ARGS__, 58); break; \ case 59: func_name(__VA_ARGS__, 59); break; \ case 60: func_name(__VA_ARGS__, 60); break; \ case 61: func_name(__VA_ARGS__, 61); break; \ case 62: func_name(__VA_ARGS__, 62); break; \ case 63: func_name(__VA_ARGS__, 63); break; \ default: default_case; break; \ } \ } while (0) HEDLEY_DIAGNOSTIC_POP #endif /* :: End simde-constify.h :: */ /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* :: Begin simde-align.h :: */ /* Alignment * Created by Evan Nemerson * * To the extent possible under law, the authors have waived all * copyright and related or neighboring rights to this code. For * details, see the Creative Commons Zero 1.0 Universal license at * * * SPDX-License-Identifier: CC0-1.0 * ********************************************************************** * * This is portability layer which should help iron out some * differences across various compilers, as well as various verisons of * C and C++. * * It was originally developed for SIMD Everywhere * (), but since its only * dependency is Hedley (, also CC0) * it can easily be used in other projects, so please feel free to do * so. * * If you do use this in your project, please keep a link to SIMDe in * your code to remind you where to report any bugs and/or check for * updated versions. * * # API Overview * * The API has several parts, and most macros have a few variations. * There are APIs for declaring aligned fields/variables, optimization * hints, and run-time alignment checks. * * Briefly, macros ending with "_TO" take numeric values and are great * when you know the value you would like to use. Macros ending with * "_LIKE", on the other hand, accept a type and are used when you want * to use the alignment of a type instead of hardcoding a value. * * Documentation for each section of the API is inline. * * True to form, MSVC is the main problem and imposes several * limitations on the effectiveness of the APIs. Detailed descriptions * of the limitations of each macro are inline, but in general: * * * On C11+ or C++11+ code written using this API will work. The * ASSUME macros may or may not generate a hint to the compiler, but * that is only an optimization issue and will not actually cause * failures. * * If you're using pretty much any compiler other than MSVC, * everything should basically work as well as in C11/C++11. */ #if !defined(SIMDE_ALIGN_H) #define SIMDE_ALIGN_H /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* I know this seems a little silly, but some non-hosted compilers * don't have stddef.h, so we try to accomodate them. */ #if !defined(SIMDE_ALIGN_SIZE_T_) #if defined(__SIZE_TYPE__) #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ #elif defined(__SIZE_T_TYPE__) #define SIMDE_ALIGN_SIZE_T_ __SIZE_TYPE__ #elif defined(__cplusplus) #include #define SIMDE_ALIGN_SIZE_T_ size_t #else #include #define SIMDE_ALIGN_SIZE_T_ size_t #endif #endif #if !defined(SIMDE_ALIGN_INTPTR_T_) #if defined(__INTPTR_TYPE__) #define SIMDE_ALIGN_INTPTR_T_ __INTPTR_TYPE__ #elif defined(__PTRDIFF_TYPE__) #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_TYPE__ #elif defined(__PTRDIFF_T_TYPE__) #define SIMDE_ALIGN_INTPTR_T_ __PTRDIFF_T_TYPE__ #elif defined(__cplusplus) #include #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t #else #include #define SIMDE_ALIGN_INTPTR_T_ ptrdiff_t #endif #endif #if defined(SIMDE_ALIGN_DEBUG) #if defined(__cplusplus) #include #else #include #endif #endif /* SIMDE_ALIGN_OF(Type) * * The SIMDE_ALIGN_OF macro works like alignof, or _Alignof, or * __alignof, or __alignof__, or __ALIGNOF__, depending on the compiler. * It isn't defined everywhere (only when the compiler has some alignof- * like feature we can use to implement it), but it should work in most * modern compilers, as well as C11 and C++11. * * If we can't find an implementation for SIMDE_ALIGN_OF then the macro * will not be defined, so if you can handle that situation sensibly * you may need to sprinkle some ifdefs into your code. */ #if \ (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ (0 && HEDLEY_HAS_FEATURE(c_alignof)) #define SIMDE_ALIGN_OF(Type) _Alignof(Type) #elif \ (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ (0 && HEDLEY_HAS_FEATURE(cxx_alignof)) #define SIMDE_ALIGN_OF(Type) alignof(Type) #elif \ HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ HEDLEY_PGI_VERSION_CHECK(19,10,0) || \ HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ defined(__IBM__ALIGNOF__) || \ defined(__clang__) #define SIMDE_ALIGN_OF(Type) __alignof__(Type) #elif \ HEDLEY_IAR_VERSION_CHECK(8,40,0) #define SIMDE_ALIGN_OF(Type) __ALIGNOF__(Type) #elif \ HEDLEY_MSVC_VERSION_CHECK(19,0,0) /* Probably goes back much further, but MS takes down their old docs. * If you can verify that this works in earlier versions please let * me know! */ #define SIMDE_ALIGN_OF(Type) __alignof(Type) #endif /* SIMDE_ALIGN_MAXIMUM: * * This is the maximum alignment that the compiler supports. You can * define the value prior to including SIMDe if necessary, but in that * case *please* submit an issue so we can add the platform to the * detection code. * * Most compilers are okay with types which are aligned beyond what * they think is the maximum, as long as the alignment is a power * of two. Older versions of MSVC is the exception, so we need to cap * the alignment requests at values that the implementation supports. * * XL C/C++ will accept values larger than 16 (which is the alignment * of an AltiVec vector), but will not reliably align to the larger * value, so so we cap the value at 16 there. * * If the compiler accepts any power-of-two value within reason then * this macro should be left undefined, and the SIMDE_ALIGN_CAP * macro will just return the value passed to it. */ #if !defined(SIMDE_ALIGN_MAXIMUM) #if defined(HEDLEY_MSVC_VERSION) #if HEDLEY_MSVC_VERSION_CHECK(19, 16, 0) // Visual studio 2017 and newer does not need a max #else #if defined(_M_IX86) || defined(_M_AMD64) #if HEDLEY_MSVC_VERSION_CHECK(19,14,0) #define SIMDE_ALIGN_PLATFORM_MAXIMUM 64 #elif HEDLEY_MSVC_VERSION_CHECK(16,0,0) /* VS 2010 is really a guess based on Wikipedia; if anyone can * test with old VS versions I'd really appreciate it. */ #define SIMDE_ALIGN_PLATFORM_MAXIMUM 32 #else #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 #endif #elif defined(_M_ARM) || defined(_M_ARM64) #define SIMDE_ALIGN_PLATFORM_MAXIMUM 8 #endif #endif #elif defined(HEDLEY_IBM_VERSION) #define SIMDE_ALIGN_PLATFORM_MAXIMUM 16 #endif #endif /* You can mostly ignore these; they're intended for internal use. * If you do need to use them please let me know; if they fulfill * a common use case I'll probably drop the trailing underscore * and make them part of the public API. */ #if defined(SIMDE_ALIGN_PLATFORM_MAXIMUM) #if SIMDE_ALIGN_PLATFORM_MAXIMUM >= 64 #define SIMDE_ALIGN_64_ 64 #define SIMDE_ALIGN_32_ 32 #define SIMDE_ALIGN_16_ 16 #define SIMDE_ALIGN_8_ 8 #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 32 #define SIMDE_ALIGN_64_ 32 #define SIMDE_ALIGN_32_ 32 #define SIMDE_ALIGN_16_ 16 #define SIMDE_ALIGN_8_ 8 #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 16 #define SIMDE_ALIGN_64_ 16 #define SIMDE_ALIGN_32_ 16 #define SIMDE_ALIGN_16_ 16 #define SIMDE_ALIGN_8_ 8 #elif SIMDE_ALIGN_PLATFORM_MAXIMUM >= 8 #define SIMDE_ALIGN_64_ 8 #define SIMDE_ALIGN_32_ 8 #define SIMDE_ALIGN_16_ 8 #define SIMDE_ALIGN_8_ 8 #else #error Max alignment expected to be >= 8 #endif #else #define SIMDE_ALIGN_64_ 64 #define SIMDE_ALIGN_32_ 32 #define SIMDE_ALIGN_16_ 16 #define SIMDE_ALIGN_8_ 8 #endif /** * SIMDE_ALIGN_CAP(Alignment) * * Returns the minimum of Alignment or SIMDE_ALIGN_MAXIMUM. */ #if defined(SIMDE_ALIGN_MAXIMUM) #define SIMDE_ALIGN_CAP(Alignment) (((Alignment) < (SIMDE_ALIGN_PLATFORM_MAXIMUM)) ? (Alignment) : (SIMDE_ALIGN_PLATFORM_MAXIMUM)) #else #define SIMDE_ALIGN_CAP(Alignment) (Alignment) #endif /* SIMDE_ALIGN_TO(Alignment) * * SIMDE_ALIGN_TO is used to declare types or variables. It basically * maps to the align attribute in most compilers, the align declspec * in MSVC, or _Alignas/alignas in C11/C++11. * * Example: * * struct i32x4 { * SIMDE_ALIGN_TO(16) int32_t values[4]; * } * * Limitations: * * MSVC requires that the Alignment parameter be numeric; you can't do * something like `SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(int))`. This is * unfortunate because that's really how the LIKE macros are * implemented, and I am not aware of a way to get anything like this * to work without using the C11/C++11 keywords. * * It also means that we can't use SIMDE_ALIGN_CAP to limit the * alignment to the value specified, which MSVC also requires, so on * MSVC you should use the `SIMDE_ALIGN_TO_8/16/32/64` macros instead. * They work like `SIMDE_ALIGN_TO(SIMDE_ALIGN_CAP(Alignment))` would, * but should be safe to use on MSVC. * * All this is to say that, if you want your code to work on MSVC, you * should use the SIMDE_ALIGN_TO_8/16/32/64 macros below instead of * SIMDE_ALIGN_TO(8/16/32/64). */ #if \ HEDLEY_HAS_ATTRIBUTE(aligned) || \ HEDLEY_GCC_VERSION_CHECK(2,95,0) || \ HEDLEY_CRAY_VERSION_CHECK(8,4,0) || \ HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_PGI_VERSION_CHECK(19,4,0) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_TINYC_VERSION_CHECK(0,9,24) || \ HEDLEY_TI_ARMCL_VERSION_CHECK(16,9,0) || \ HEDLEY_TI_CL2000_VERSION_CHECK(16,9,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ HEDLEY_TI_CL430_VERSION_CHECK(16,9,0) || \ HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,2) #define SIMDE_ALIGN_TO(Alignment) __attribute__((__aligned__(SIMDE_ALIGN_CAP(Alignment)))) #elif \ (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) #define SIMDE_ALIGN_TO(Alignment) _Alignas(SIMDE_ALIGN_CAP(Alignment)) #elif \ (defined(__cplusplus) && (__cplusplus >= 201103L)) #define SIMDE_ALIGN_TO(Alignment) alignas(SIMDE_ALIGN_CAP(Alignment)) #elif \ defined(HEDLEY_MSVC_VERSION) #define SIMDE_ALIGN_TO(Alignment) __declspec(align(Alignment)) /* Unfortunately MSVC can't handle __declspec(align(__alignof(Type))); * the alignment passed to the declspec has to be an integer. */ #define SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE #endif #define SIMDE_ALIGN_TO_64 SIMDE_ALIGN_TO(SIMDE_ALIGN_64_) #define SIMDE_ALIGN_TO_32 SIMDE_ALIGN_TO(SIMDE_ALIGN_32_) #define SIMDE_ALIGN_TO_16 SIMDE_ALIGN_TO(SIMDE_ALIGN_16_) #define SIMDE_ALIGN_TO_8 SIMDE_ALIGN_TO(SIMDE_ALIGN_8_) /* SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) * * SIMDE_ALIGN_ASSUME_TO is semantically similar to C++20's * std::assume_aligned, or __builtin_assume_aligned. It tells the * compiler to assume that the provided pointer is aligned to an * `Alignment`-byte boundary. * * If you define SIMDE_ALIGN_DEBUG prior to including this header then * SIMDE_ALIGN_ASSUME_TO will turn into a runtime check. We don't * integrate with NDEBUG in this header, but it may be a good idea to * put something like this in your code: * * #if !defined(NDEBUG) * #define SIMDE_ALIGN_DEBUG * #endif * #include <.../simde-align.h> */ #if \ HEDLEY_HAS_BUILTIN(__builtin_assume_aligned) || \ HEDLEY_GCC_VERSION_CHECK(4,7,0) #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) \ HEDLEY_REINTERPRET_CAST(__typeof__(Pointer), __builtin_assume_aligned(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), Alignment)) #elif HEDLEY_INTEL_VERSION_CHECK(13,0,0) #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) (__extension__ ({ \ __typeof__(v) simde_assume_aligned_t_ = (Pointer); \ __assume_aligned(simde_assume_aligned_t_, Alignment); \ simde_assume_aligned_t_; \ })) #elif defined(__cplusplus) && (__cplusplus > 201703L) #include #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) std::assume_aligned(Pointer) #else #if defined(__cplusplus) template HEDLEY_ALWAYS_INLINE static T* simde_align_assume_to_unchecked(T* ptr, const size_t alignment) #else HEDLEY_ALWAYS_INLINE static void* simde_align_assume_to_unchecked(void* ptr, const size_t alignment) #endif { HEDLEY_ASSUME((HEDLEY_REINTERPRET_CAST(size_t, (ptr)) % SIMDE_ALIGN_CAP(alignment)) == 0); return ptr; } #if defined(__cplusplus) #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked((Pointer), (Alignment)) #else #define SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) simde_align_assume_to_unchecked(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment)) #endif #endif #if !defined(SIMDE_ALIGN_DEBUG) #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) SIMDE_ALIGN_ASSUME_TO_UNCHECKED(Pointer, Alignment) #else #include #if defined(__cplusplus) template static HEDLEY_ALWAYS_INLINE T* simde_align_assume_to_checked_uncapped(T* ptr, const size_t alignment, const char* file, int line, const char* ptrname) #else static HEDLEY_ALWAYS_INLINE void* simde_align_assume_to_checked_uncapped(void* ptr, const size_t alignment, const char* file, int line, const char* ptrname) #endif { if (HEDLEY_UNLIKELY((HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment))) != 0)) { fprintf(stderr, "%s:%d: alignment check failed for `%s' (%p %% %u == %u)\n", file, line, ptrname, HEDLEY_REINTERPRET_CAST(const void*, ptr), HEDLEY_STATIC_CAST(unsigned int, SIMDE_ALIGN_CAP(alignment)), HEDLEY_STATIC_CAST(unsigned int, HEDLEY_REINTERPRET_CAST(SIMDE_ALIGN_INTPTR_T_, (ptr)) % HEDLEY_STATIC_CAST(SIMDE_ALIGN_INTPTR_T_, SIMDE_ALIGN_CAP(alignment)))); } return ptr; } #if defined(__cplusplus) #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped((Pointer), (Alignment), __FILE__, __LINE__, #Pointer) #else #define SIMDE_ALIGN_ASSUME_TO(Pointer, Alignment) simde_align_assume_to_checked_uncapped(HEDLEY_CONST_CAST(void*, HEDLEY_REINTERPRET_CAST(const void*, Pointer)), (Alignment), __FILE__, __LINE__, #Pointer) #endif #endif /* SIMDE_ALIGN_LIKE(Type) * SIMDE_ALIGN_LIKE_#(Type) * * The SIMDE_ALIGN_LIKE macros are similar to the SIMDE_ALIGN_TO macros * except instead of an integer they take a type; basically, it's just * a more convenient way to do something like: * * SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) * * The versions with a numeric suffix will fall back on using a numeric * value in the event we can't use SIMDE_ALIGN_OF(Type). This is * mainly for MSVC, where __declspec(align()) can't handle anything * other than hard-coded numeric values. */ #if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_TO) && !defined(SIMDE_ALIGN_OF_UNUSABLE_FOR_LIKE) #define SIMDE_ALIGN_LIKE(Type) SIMDE_ALIGN_TO(SIMDE_ALIGN_OF(Type)) #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_LIKE(Type) #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_LIKE(Type) #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_LIKE(Type) #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_LIKE(Type) #else #define SIMDE_ALIGN_LIKE_64(Type) SIMDE_ALIGN_TO_64 #define SIMDE_ALIGN_LIKE_32(Type) SIMDE_ALIGN_TO_32 #define SIMDE_ALIGN_LIKE_16(Type) SIMDE_ALIGN_TO_16 #define SIMDE_ALIGN_LIKE_8(Type) SIMDE_ALIGN_TO_8 #endif /* SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) * * Tihs is similar to SIMDE_ALIGN_ASSUME_TO, except that it takes a * type instead of a numeric value. */ #if defined(SIMDE_ALIGN_OF) && defined(SIMDE_ALIGN_ASSUME_TO) #define SIMDE_ALIGN_ASSUME_LIKE(Pointer, Type) SIMDE_ALIGN_ASSUME_TO(Pointer, SIMDE_ALIGN_OF(Type)) #endif /* SIMDE_ALIGN_CAST(Type, Pointer) * * SIMDE_ALIGN_CAST is like C++'s reinterpret_cast, but it will try * to silence warnings that some compilers may produce if you try * to assign to a type with increased alignment requirements. * * Note that it does *not* actually attempt to tell the compiler that * the pointer is aligned like the destination should be; that's the * job of the next macro. This macro is necessary for stupid APIs * like _mm_loadu_si128 where the input is a __m128i* but the function * is specifically for data which isn't necessarily aligned to * _Alignof(__m128i). */ #if HEDLEY_HAS_WARNING("-Wcast-align") || defined(__clang__) || HEDLEY_GCC_VERSION_CHECK(3,4,0) #define SIMDE_ALIGN_CAST(Type, Pointer) (__extension__({ \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("GCC diagnostic ignored \"-Wcast-align\"") \ Type simde_r_ = HEDLEY_REINTERPRET_CAST(Type, Pointer); \ HEDLEY_DIAGNOSTIC_POP \ simde_r_; \ })) #else #define SIMDE_ALIGN_CAST(Type, Pointer) HEDLEY_REINTERPRET_CAST(Type, Pointer) #endif /* SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) * * This is sort of like a combination of a reinterpret_cast and a * SIMDE_ALIGN_ASSUME_LIKE. It uses SIMDE_ALIGN_ASSUME_LIKE to tell * the compiler that the pointer is aligned like the specified type * and casts the pointer to the specified type while suppressing any * warnings from the compiler about casting to a type with greater * alignment requirements. */ #define SIMDE_ALIGN_ASSUME_CAST(Type, Pointer) SIMDE_ALIGN_ASSUME_LIKE(SIMDE_ALIGN_CAST(Type, Pointer), Type) #endif /* !defined(SIMDE_ALIGN_H) */ /* :: End simde-align.h :: */ /* In some situations, SIMDe has to make large performance sacrifices * for small increases in how faithfully it reproduces an API, but * only a relatively small number of users will actually need the API * to be completely accurate. The SIMDE_FAST_* options can be used to * disable these trade-offs. * * They can be enabled by passing -DSIMDE_FAST_MATH to the compiler, or * the individual defines (e.g., -DSIMDE_FAST_NANS) if you only want to * enable some optimizations. Using -ffast-math and/or * -ffinite-math-only will also enable the relevant options. If you * don't want that you can pass -DSIMDE_NO_FAST_* to disable them. */ /* Most programs avoid NaNs by never passing values which can result in * a NaN; for example, if you only pass non-negative values to the sqrt * functions, it won't generate a NaN. On some platforms, similar * functions handle NaNs differently; for example, the _mm_min_ps SSE * function will return 0.0 if you pass it (0.0, NaN), but the NEON * vminq_f32 function will return NaN. Making them behave like one * another is expensive; it requires generating a mask of all lanes * with NaNs, then performing the operation (e.g., vminq_f32), then * blending together the result with another vector using the mask. * * If you don't want SIMDe to worry about the differences between how * NaNs are handled on the two platforms, define this (or pass * -ffinite-math-only) */ #if !defined(SIMDE_FAST_MATH) && !defined(SIMDE_NO_FAST_MATH) && defined(__FAST_MATH__) #define SIMDE_FAST_MATH #endif #if !defined(SIMDE_FAST_NANS) && !defined(SIMDE_NO_FAST_NANS) #if defined(SIMDE_FAST_MATH) #define SIMDE_FAST_NANS #elif defined(__FINITE_MATH_ONLY__) #if __FINITE_MATH_ONLY__ #define SIMDE_FAST_NANS #endif #endif #endif /* Many functions are defined as using the current rounding mode * (i.e., the SIMD version of fegetround()) when converting to * an integer. For example, _mm_cvtpd_epi32. Unfortunately, * on some platforms (such as ARMv8+ where round-to-nearest is * always used, regardless of the FPSCR register) this means we * have to first query the current rounding mode, then choose * the proper function (rounnd , ceil, floor, etc.) */ #if !defined(SIMDE_FAST_ROUND_MODE) && !defined(SIMDE_NO_FAST_ROUND_MODE) && defined(SIMDE_FAST_MATH) #define SIMDE_FAST_ROUND_MODE #endif /* This controls how ties are rounded. For example, does 10.5 round to * 10 or 11? IEEE 754 specifies round-towards-even, but ARMv7 (for * example) doesn't support it and it must be emulated (which is rather * slow). If you're okay with just using the default for whatever arch * you're on, you should definitely define this. * * Note that we don't use this macro to avoid correct implementations * in functions which are explicitly about rounding (such as vrnd* on * NEON, _mm_round_* on x86, etc.); it is only used for code where * rounding is a component in another function, and even then it isn't * usually a problem since such functions will use the current rounding * mode. */ #if !defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_NO_FAST_ROUND_TIES) && defined(SIMDE_FAST_MATH) #define SIMDE_FAST_ROUND_TIES #endif /* For functions which convert from one type to another (mostly from * floating point to integer types), sometimes we need to do a range * check and potentially return a different result if the value * falls outside that range. Skipping this check can provide a * performance boost, at the expense of faithfulness to the API we're * emulating. */ #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_NO_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_MATH) #define SIMDE_FAST_CONVERSION_RANGE #endif /* Due to differences across platforms, sometimes it can be much * faster for us to allow spurious floating point exceptions, * or to no generate them when we should. */ #if !defined(SIMDE_FAST_EXCEPTIONS) && !defined(SIMDE_NO_FAST_EXCEPTIONS) && defined(SIMDE_FAST_MATH) #define SIMDE_FAST_EXCEPTIONS #endif #if \ HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ (HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) #define SIMDE_CHECK_CONSTANT_(expr) (__builtin_constant_p(expr)) #elif defined(__cplusplus) && (__cplusplus > 201703L) #include #define SIMDE_CHECK_CONSTANT_(expr) (std::is_constant_evaluated()) #endif #if !defined(SIMDE_NO_CHECK_IMMEDIATE_CONSTANT) #if defined(SIMDE_CHECK_CONSTANT_) && \ SIMDE_DETECT_CLANG_VERSION_CHECK(9,0,0) && \ (!defined(__apple_build_version__) || ((__apple_build_version__ < 11000000) || (__apple_build_version__ >= 12000000))) #define SIMDE_REQUIRE_CONSTANT(arg) HEDLEY_REQUIRE_MSG(SIMDE_CHECK_CONSTANT_(arg), "`" #arg "' must be constant") #else #define SIMDE_REQUIRE_CONSTANT(arg) #endif #else #define SIMDE_REQUIRE_CONSTANT(arg) #endif #define SIMDE_REQUIRE_RANGE(arg, min, max) \ HEDLEY_REQUIRE_MSG((((arg) >= (min)) && ((arg) <= (max))), "'" #arg "' must be in [" #min ", " #max "]") #define SIMDE_REQUIRE_CONSTANT_RANGE(arg, min, max) \ SIMDE_REQUIRE_CONSTANT(arg) \ SIMDE_REQUIRE_RANGE(arg, min, max) /* A copy of HEDLEY_STATIC_ASSERT, except we don't define an empty * fallback if we can't find an implementation; instead we have to * check if SIMDE_STATIC_ASSERT is defined before using it. */ #if \ !defined(__cplusplus) && ( \ (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ HEDLEY_HAS_FEATURE(c_static_assert) || \ HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ defined(_Static_assert) \ ) /* Sometimes _Static_assert is defined (in cdefs.h) using a symbol which * starts with a double-underscore. This is a system header so we have no * control over it, but since it's a macro it will emit a diagnostic which * prevents compilation with -Werror. */ #if HEDLEY_HAS_WARNING("-Wreserved-identifier") #define SIMDE_STATIC_ASSERT(expr, message) (__extension__({ \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wreserved-identifier\"") \ _Static_assert(expr, message); \ HEDLEY_DIAGNOSTIC_POP \ })) #else #define SIMDE_STATIC_ASSERT(expr, message) _Static_assert(expr, message) #endif #elif \ (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ HEDLEY_MSVC_VERSION_CHECK(16,0,0) #define SIMDE_STATIC_ASSERT(expr, message) HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) #endif /* Statement exprs */ #if \ HEDLEY_GNUC_VERSION_CHECK(2,95,0) || \ HEDLEY_TINYC_VERSION_CHECK(0,9,26) || \ HEDLEY_INTEL_VERSION_CHECK(9,0,0) || \ HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) || \ HEDLEY_IBM_VERSION_CHECK(11,1,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) #define SIMDE_STATEMENT_EXPR_(expr) (__extension__ expr) #endif /* This is just a convenience macro to make it easy to call a single * function with a specific diagnostic disabled. */ #if defined(SIMDE_STATEMENT_EXPR_) #define SIMDE_DISABLE_DIAGNOSTIC_EXPR_(diagnostic, expr) \ SIMDE_STATEMENT_EXPR_(({ \ HEDLEY_DIAGNOSTIC_PUSH \ diagnostic \ (expr); \ HEDLEY_DIAGNOSTIC_POP \ })) #endif #if defined(SIMDE_CHECK_CONSTANT_) && defined(SIMDE_STATIC_ASSERT) #define SIMDE_ASSERT_CONSTANT_(v) SIMDE_STATIC_ASSERT(SIMDE_CHECK_CONSTANT_(v), #v " must be constant.") #endif #if \ (HEDLEY_HAS_ATTRIBUTE(may_alias) && !defined(HEDLEY_SUNPRO_VERSION)) || \ HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,0) # define SIMDE_MAY_ALIAS __attribute__((__may_alias__)) #else # define SIMDE_MAY_ALIAS #endif /* Lots of compilers support GCC-style vector extensions, but many don't support all the features. Define different macros depending on support for * SIMDE_VECTOR - Declaring a vector. * SIMDE_VECTOR_OPS - basic operations (binary and unary). * SIMDE_VECTOR_NEGATE - negating a vector * SIMDE_VECTOR_SCALAR - For binary operators, the second argument can be a scalar, in which case the result is as if that scalar had been broadcast to all lanes of a vector. * SIMDE_VECTOR_SUBSCRIPT - Supports array subscript notation for extracting/inserting a single element.= SIMDE_VECTOR can be assumed if any others are defined, the others are independent. */ #if !defined(SIMDE_NO_VECTOR) # if \ HEDLEY_GCC_VERSION_CHECK(4,8,0) # define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) # define SIMDE_VECTOR_OPS # define SIMDE_VECTOR_NEGATE # define SIMDE_VECTOR_SCALAR # define SIMDE_VECTOR_SUBSCRIPT # elif HEDLEY_INTEL_VERSION_CHECK(16,0,0) # define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) # define SIMDE_VECTOR_OPS # define SIMDE_VECTOR_NEGATE /* ICC only supports SIMDE_VECTOR_SCALAR for constants */ # define SIMDE_VECTOR_SUBSCRIPT # elif \ HEDLEY_GCC_VERSION_CHECK(4,1,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) # define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) # define SIMDE_VECTOR_OPS # elif HEDLEY_SUNPRO_VERSION_CHECK(5,12,0) # define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) # elif HEDLEY_HAS_ATTRIBUTE(vector_size) # define SIMDE_VECTOR(size) __attribute__((__vector_size__(size))) # define SIMDE_VECTOR_OPS # define SIMDE_VECTOR_NEGATE # define SIMDE_VECTOR_SUBSCRIPT # if SIMDE_DETECT_CLANG_VERSION_CHECK(5,0,0) # define SIMDE_VECTOR_SCALAR # endif # endif /* GCC and clang have built-in functions to handle shuffling and converting of vectors, but the implementations are slightly different. This macro is just an abstraction over them. Note that elem_size is in bits but vec_size is in bytes. */ # if !defined(SIMDE_NO_SHUFFLE_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) HEDLEY_DIAGNOSTIC_PUSH /* We don't care about -Wvariadic-macros; all compilers that support * shufflevector/shuffle support them. */ # if HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") # pragma clang diagnostic ignored "-Wc++98-compat-pedantic" # endif # if HEDLEY_HAS_WARNING("-Wvariadic-macros") || HEDLEY_GCC_VERSION_CHECK(4,0,0) # pragma GCC diagnostic ignored "-Wvariadic-macros" # endif # if HEDLEY_HAS_BUILTIN(__builtin_shufflevector) # define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) __builtin_shufflevector(a, b, __VA_ARGS__) # elif HEDLEY_GCC_HAS_BUILTIN(__builtin_shuffle,4,7,0) && !defined(__INTEL_COMPILER) # define SIMDE_SHUFFLE_VECTOR_(elem_size, vec_size, a, b, ...) (__extension__ ({ \ int##elem_size##_t SIMDE_VECTOR(vec_size) simde_shuffle_ = { __VA_ARGS__ }; \ __builtin_shuffle(a, b, simde_shuffle_); \ })) # endif HEDLEY_DIAGNOSTIC_POP # endif /* TODO: this actually works on XL C/C++ without SIMDE_VECTOR_SUBSCRIPT but the code needs to be refactored a bit to take advantage. */ # if !defined(SIMDE_NO_CONVERT_VECTOR) && defined(SIMDE_VECTOR_SUBSCRIPT) # if HEDLEY_HAS_BUILTIN(__builtin_convertvector) || HEDLEY_GCC_VERSION_CHECK(9,0,0) # if HEDLEY_GCC_VERSION_CHECK(9,0,0) && !HEDLEY_GCC_VERSION_CHECK(9,3,0) /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93557 */ # define SIMDE_CONVERT_VECTOR_(to, from) ((to) = (__extension__({ \ __typeof__(from) from_ = (from); \ ((void) from_); \ __builtin_convertvector(from_, __typeof__(to)); \ }))) # else # define SIMDE_CONVERT_VECTOR_(to, from) ((to) = __builtin_convertvector((from), __typeof__(to))) # endif # endif # endif #endif /* Since we currently require SUBSCRIPT before using a vector in a union, we define these as dependencies of SUBSCRIPT. They are likely to disappear in the future, once SIMDe learns how to make use of vectors without using the union members. Do not use them in your code unless you're okay with it breaking when SIMDe changes. */ #if defined(SIMDE_VECTOR_SUBSCRIPT) # if defined(SIMDE_VECTOR_OPS) # define SIMDE_VECTOR_SUBSCRIPT_OPS # endif # if defined(SIMDE_VECTOR_SCALAR) # define SIMDE_VECTOR_SUBSCRIPT_SCALAR # endif #endif #if !defined(SIMDE_DISABLE_OPENMP) #if !defined(SIMDE_ENABLE_OPENMP) && ((defined(_OPENMP) && (_OPENMP >= 201307L)) || (defined(_OPENMP_SIMD) && (_OPENMP_SIMD >= 201307L))) || defined(HEDLEY_MCST_LCC_VERSION) #define SIMDE_ENABLE_OPENMP #endif #endif #if !defined(SIMDE_ENABLE_CILKPLUS) && (defined(__cilk) || defined(HEDLEY_INTEL_VERSION)) # define SIMDE_ENABLE_CILKPLUS #endif #if defined(SIMDE_ENABLE_OPENMP) # define SIMDE_VECTORIZE HEDLEY_PRAGMA(omp simd) # define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(omp simd safelen(l)) # if defined(__clang__) # define SIMDE_VECTORIZE_REDUCTION(r) \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wsign-conversion\"") \ HEDLEY_PRAGMA(omp simd reduction(r)) \ HEDLEY_DIAGNOSTIC_POP # else # define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(omp simd reduction(r)) # endif # if !defined(HEDLEY_MCST_LCC_VERSION) # define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd aligned(a)) # else # define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(omp simd) # endif #elif defined(SIMDE_ENABLE_CILKPLUS) # define SIMDE_VECTORIZE HEDLEY_PRAGMA(simd) # define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(simd vectorlength(l)) # define SIMDE_VECTORIZE_REDUCTION(r) HEDLEY_PRAGMA(simd reduction(r)) # define SIMDE_VECTORIZE_ALIGNED(a) HEDLEY_PRAGMA(simd aligned(a)) #elif defined(__clang__) && !defined(HEDLEY_IBM_VERSION) # define SIMDE_VECTORIZE HEDLEY_PRAGMA(clang loop vectorize(enable)) # define SIMDE_VECTORIZE_SAFELEN(l) HEDLEY_PRAGMA(clang loop vectorize_width(l)) # define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE # define SIMDE_VECTORIZE_ALIGNED(a) #elif HEDLEY_GCC_VERSION_CHECK(4,9,0) # define SIMDE_VECTORIZE HEDLEY_PRAGMA(GCC ivdep) # define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE # define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE # define SIMDE_VECTORIZE_ALIGNED(a) #elif HEDLEY_CRAY_VERSION_CHECK(5,0,0) # define SIMDE_VECTORIZE HEDLEY_PRAGMA(_CRI ivdep) # define SIMDE_VECTORIZE_SAFELEN(l) SIMDE_VECTORIZE # define SIMDE_VECTORIZE_REDUCTION(r) SIMDE_VECTORIZE # define SIMDE_VECTORIZE_ALIGNED(a) #else # define SIMDE_VECTORIZE # define SIMDE_VECTORIZE_SAFELEN(l) # define SIMDE_VECTORIZE_REDUCTION(r) # define SIMDE_VECTORIZE_ALIGNED(a) #endif #define SIMDE_MASK_NZ_(v, mask) (((v) & (mask)) | !((v) & (mask))) /* Intended for checking coverage, you should never use this in production. */ #if defined(SIMDE_NO_INLINE) # define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static #else # define SIMDE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static #endif #if defined(SIMDE_NO_INLINE) # define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_NEVER_INLINE static #elif defined(SIMDE_CONSTRAINED_COMPILATION) # define SIMDE_HUGE_FUNCTION_ATTRIBUTES static #else # define SIMDE_HUGE_FUNCTION_ATTRIBUTES HEDLEY_ALWAYS_INLINE static #endif #if \ HEDLEY_HAS_ATTRIBUTE(unused) || \ HEDLEY_GCC_VERSION_CHECK(2,95,0) # define SIMDE_FUNCTION_POSSIBLY_UNUSED_ __attribute__((__unused__)) #else # define SIMDE_FUNCTION_POSSIBLY_UNUSED_ #endif HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ #if defined(_MSC_VER) # define SIMDE_BEGIN_DECLS_ HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(disable:4996 4204)) HEDLEY_BEGIN_C_DECLS # define SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP HEDLEY_END_C_DECLS #else # define SIMDE_BEGIN_DECLS_ \ HEDLEY_DIAGNOSTIC_PUSH \ SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ \ HEDLEY_BEGIN_C_DECLS # define SIMDE_END_DECLS_ \ HEDLEY_END_C_DECLS \ HEDLEY_DIAGNOSTIC_POP #endif #if defined(__SIZEOF_INT128__) # define SIMDE_HAVE_INT128_ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_PEDANTIC_ typedef __int128 simde_int128; typedef unsigned __int128 simde_uint128; HEDLEY_DIAGNOSTIC_POP #endif #if !defined(SIMDE_ENDIAN_LITTLE) # define SIMDE_ENDIAN_LITTLE 1234 #endif #if !defined(SIMDE_ENDIAN_BIG) # define SIMDE_ENDIAN_BIG 4321 #endif #if !defined(SIMDE_ENDIAN_ORDER) /* GCC (and compilers masquerading as GCC) define __BYTE_ORDER__. */ # if defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE # elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG /* TI defines _BIG_ENDIAN or _LITTLE_ENDIAN */ # elif defined(_BIG_ENDIAN) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG # elif defined(_LITTLE_ENDIAN) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE /* We know the endianness of some common architectures. Common * architectures not listed (ARM, POWER, MIPS, etc.) here are * bi-endian. */ # elif defined(__amd64) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE # elif defined(__s390x__) || defined(__zarch__) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG /* Looks like we'll have to rely on the platform. If we're missing a * platform, please let us know. */ # elif defined(_WIN32) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE # elif defined(sun) || defined(__sun) /* Solaris */ # include # if defined(_LITTLE_ENDIAN) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE # elif defined(_BIG_ENDIAN) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG # endif # elif defined(__APPLE__) # include # if defined(__LITTLE_ENDIAN__) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE # elif defined(__BIG_ENDIAN__) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG # endif # elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__bsdi__) || defined(__DragonFly__) || defined(BSD) # include # if defined(__BYTE_ORDER) && (__BYTE_ORDER == __LITTLE_ENDIAN) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE # elif defined(__BYTE_ORDER) && (__BYTE_ORDER == __BIG_ENDIAN) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG # endif # elif defined(__linux__) || defined(__linux) || defined(__gnu_linux__) # include # if defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_LITTLE # elif defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN) # define SIMDE_ENDIAN_ORDER SIMDE_ENDIAN_BIG # endif # endif #endif #if \ HEDLEY_HAS_BUILTIN(__builtin_bswap64) || \ HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) #define simde_bswap64(v) __builtin_bswap64(v) #elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) #define simde_bswap64(v) _byteswap_uint64(v) #else SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_bswap64(uint64_t v) { return ((v & (((uint64_t) 0xff) << 56)) >> 56) | ((v & (((uint64_t) 0xff) << 48)) >> 40) | ((v & (((uint64_t) 0xff) << 40)) >> 24) | ((v & (((uint64_t) 0xff) << 32)) >> 8) | ((v & (((uint64_t) 0xff) << 24)) << 8) | ((v & (((uint64_t) 0xff) << 16)) << 24) | ((v & (((uint64_t) 0xff) << 8)) << 40) | ((v & (((uint64_t) 0xff) )) << 56); } #endif #if !defined(SIMDE_ENDIAN_ORDER) # error Unknown byte order; please file a bug #else # if SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE # define simde_endian_bswap64_be(value) simde_bswap64(value) # define simde_endian_bswap64_le(value) (value) # elif SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG # define simde_endian_bswap64_be(value) (value) # define simde_endian_bswap64_le(value) simde_bswap64(value) # endif #endif /* TODO: we should at least make an attempt to detect the correct types for simde_float32/float64 instead of just assuming float and double. */ #if !defined(SIMDE_FLOAT32_TYPE) # define SIMDE_FLOAT32_TYPE float # define SIMDE_FLOAT32_C(value) value##f #else # define SIMDE_FLOAT32_C(value) ((SIMDE_FLOAT32_TYPE) value) #endif typedef SIMDE_FLOAT32_TYPE simde_float32; #if !defined(SIMDE_FLOAT64_TYPE) # define SIMDE_FLOAT64_TYPE double # define SIMDE_FLOAT64_C(value) value #else # define SIMDE_FLOAT64_C(value) ((SIMDE_FLOAT64_TYPE) value) #endif typedef SIMDE_FLOAT64_TYPE simde_float64; #if defined(__cplusplus) typedef bool simde_bool; #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) typedef _Bool simde_bool; #elif defined(bool) typedef bool simde_bool; #else #include typedef bool simde_bool; #endif #if HEDLEY_HAS_WARNING("-Wbad-function-cast") # define SIMDE_CONVERT_FTOI(T,v) \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wbad-function-cast\"") \ HEDLEY_STATIC_CAST(T, (v)) \ HEDLEY_DIAGNOSTIC_POP #else # define SIMDE_CONVERT_FTOI(T,v) ((T) (v)) #endif /* TODO: detect compilers which support this outside of C11 mode */ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) #define SIMDE_CHECKED_STATIC_CAST(to, from, value) _Generic((value), to: (value), default: (_Generic((value), from: ((to) (value))))) #else #define SIMDE_CHECKED_REINTERPRET_CAST(to, from, value) HEDLEY_REINTERPRET_CAST(to, value) #define SIMDE_CHECKED_STATIC_CAST(to, from, value) HEDLEY_STATIC_CAST(to, value) #endif #if HEDLEY_HAS_WARNING("-Wfloat-equal") # define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("clang diagnostic ignored \"-Wfloat-equal\"") #elif HEDLEY_GCC_VERSION_CHECK(3,0,0) # define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL _Pragma("GCC diagnostic ignored \"-Wfloat-equal\"") #else # define SIMDE_DIAGNOSTIC_DISABLE_FLOAT_EQUAL #endif /* Some functions can trade accuracy for speed. For those functions you can control the trade-off using this macro. Possible values: 0: prefer speed 1: reasonable trade-offs 2: prefer accuracy */ #if !defined(SIMDE_ACCURACY_PREFERENCE) # define SIMDE_ACCURACY_PREFERENCE 1 #endif #if defined(__STDC_HOSTED__) # define SIMDE_STDC_HOSTED __STDC_HOSTED__ #else # if \ defined(HEDLEY_PGI_VERSION) || \ defined(HEDLEY_MSVC_VERSION) # define SIMDE_STDC_HOSTED 1 # else # define SIMDE_STDC_HOSTED 0 # endif #endif /* Try to deal with environments without a standard library. */ #if !defined(simde_memcpy) #if HEDLEY_HAS_BUILTIN(__builtin_memcpy) #define simde_memcpy(dest, src, n) __builtin_memcpy(dest, src, n) #endif #endif #if !defined(simde_memset) #if HEDLEY_HAS_BUILTIN(__builtin_memset) #define simde_memset(s, c, n) __builtin_memset(s, c, n) #endif #endif #if !defined(simde_memcmp) #if HEDLEY_HAS_BUILTIN(__builtin_memcmp) #define simde_memcmp(s1, s2, n) __builtin_memcmp(s1, s2, n) #endif #endif #if !defined(simde_memcpy) || !defined(simde_memset) || !defined(simde_memcmp) #if !defined(SIMDE_NO_STRING_H) #if defined(__has_include) #if !__has_include() #define SIMDE_NO_STRING_H #endif #elif (SIMDE_STDC_HOSTED == 0) #define SIMDE_NO_STRING_H #endif #endif #if !defined(SIMDE_NO_STRING_H) #include #if !defined(simde_memcpy) #define simde_memcpy(dest, src, n) memcpy(dest, src, n) #endif #if !defined(simde_memset) #define simde_memset(s, c, n) memset(s, c, n) #endif #if !defined(simde_memcmp) #define simde_memcmp(s1, s2, n) memcmp(s1, s2, n) #endif #else /* These are meant to be portable, not fast. If you're hitting them you * should think about providing your own (by defining the simde_memcpy * macro prior to including any SIMDe files) or submitting a patch to * SIMDe so we can detect your system-provided memcpy/memset, like by * adding your compiler to the checks for __builtin_memcpy and/or * __builtin_memset. */ #if !defined(simde_memcpy) SIMDE_FUNCTION_ATTRIBUTES void simde_memcpy_(void* dest, const void* src, size_t len) { char* dest_ = HEDLEY_STATIC_CAST(char*, dest); char* src_ = HEDLEY_STATIC_CAST(const char*, src); for (size_t i = 0 ; i < len ; i++) { dest_[i] = src_[i]; } } #define simde_memcpy(dest, src, n) simde_memcpy_(dest, src, n) #endif #if !defined(simde_memset) SIMDE_FUNCTION_ATTRIBUTES void simde_memset_(void* s, int c, size_t len) { char* s_ = HEDLEY_STATIC_CAST(char*, s); char c_ = HEDLEY_STATIC_CAST(char, c); for (size_t i = 0 ; i < len ; i++) { s_[i] = c_[i]; } } #define simde_memset(s, c, n) simde_memset_(s, c, n) #endif #if !defined(simde_memcmp) SIMDE_FUCTION_ATTRIBUTES int simde_memcmp_(const void *s1, const void *s2, size_t n) { unsigned char* s1_ = HEDLEY_STATIC_CAST(unsigned char*, s1); unsigned char* s2_ = HEDLEY_STATIC_CAST(unsigned char*, s2); for (size_t i = 0 ; i < len ; i++) { if (s1_[i] != s2_[i]) { return (int) (s1_[i] - s2_[i]); } } return 0; } #define simde_memcmp(s1, s2, n) simde_memcmp_(s1, s2, n) #endif #endif #endif /*** Functions that quiet a signaling NaN ***/ static HEDLEY_INLINE double simde_math_quiet(double x) { uint64_t tmp, mask; if (!simde_math_isnan(x)) { return x; } simde_memcpy(&tmp, &x, 8); mask = 0x7ff80000; mask <<= 32; tmp |= mask; simde_memcpy(&x, &tmp, 8); return x; } static HEDLEY_INLINE float simde_math_quietf(float x) { uint32_t tmp; if (!simde_math_isnanf(x)) { return x; } simde_memcpy(&tmp, &x, 4); tmp |= 0x7fc00000lu; simde_memcpy(&x, &tmp, 4); return x; } #if defined(FE_ALL_EXCEPT) #define SIMDE_HAVE_FENV_H #elif defined(__has_include) #if __has_include() #include #define SIMDE_HAVE_FENV_H #endif #elif SIMDE_STDC_HOSTED == 1 #include #define SIMDE_HAVE_FENV_H #endif #if defined(EXIT_FAILURE) #define SIMDE_HAVE_STDLIB_H #elif defined(__has_include) #if __has_include() #include #define SIMDE_HAVE_STDLIB_H #endif #elif SIMDE_STDC_HOSTED == 1 #include #define SIMDE_HAVE_STDLIB_H #endif #if defined(__has_include) # if defined(__cplusplus) && (__cplusplus >= 201103L) && __has_include() # include # elif __has_include() # include # endif # if __has_include() # include # endif #elif SIMDE_STDC_HOSTED == 1 # include # include #endif #define SIMDE_DEFINE_CONVERSION_FUNCTION_(Name, T_To, T_From) \ static HEDLEY_ALWAYS_INLINE HEDLEY_CONST SIMDE_FUNCTION_POSSIBLY_UNUSED_ \ T_To \ Name (T_From value) { \ T_To r; \ simde_memcpy(&r, &value, sizeof(r)); \ return r; \ } SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float32_as_uint32, uint32_t, simde_float32) SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint32_as_float32, simde_float32, uint32_t) SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_float64_as_uint64, uint64_t, simde_float64) SIMDE_DEFINE_CONVERSION_FUNCTION_(simde_uint64_as_float64, simde_float64, uint64_t) /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* :: Begin check.h :: */ /* Check (assertions) * Portable Snippets - https://github.com/nemequ/portable-snippets * Created by Evan Nemerson * * To the extent possible under law, the authors have waived all * copyright and related or neighboring rights to this code. For * details, see the Creative Commons Zero 1.0 Universal license at * https://creativecommons.org/publicdomain/zero/1.0/ * * SPDX-License-Identifier: CC0-1.0 */ #if !defined(SIMDE_CHECK_H) #define SIMDE_CHECK_H #if !defined(SIMDE_NDEBUG) && !defined(SIMDE_DEBUG) # define SIMDE_NDEBUG 1 #endif /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ #include #if !defined(_WIN32) # define SIMDE_SIZE_MODIFIER "z" # define SIMDE_CHAR_MODIFIER "hh" # define SIMDE_SHORT_MODIFIER "h" #else # if defined(_M_X64) || defined(__amd64__) # define SIMDE_SIZE_MODIFIER "I64" # else # define SIMDE_SIZE_MODIFIER "" # endif # define SIMDE_CHAR_MODIFIER "" # define SIMDE_SHORT_MODIFIER "" #endif #if defined(_MSC_VER) && (_MSC_VER >= 1500) # define SIMDE_PUSH_DISABLE_MSVC_C4127_ __pragma(warning(push)) __pragma(warning(disable:4127)) # define SIMDE_POP_DISABLE_MSVC_C4127_ __pragma(warning(pop)) #else # define SIMDE_PUSH_DISABLE_MSVC_C4127_ # define SIMDE_POP_DISABLE_MSVC_C4127_ #endif #if !defined(simde_errorf) # if defined(__has_include) # if __has_include() # include # endif # elif defined(SIMDE_STDC_HOSTED) # if SIMDE_STDC_HOSTED == 1 # include # endif # elif defined(__STDC_HOSTED__) # if __STDC_HOSTETD__ == 1 # include # endif # endif /* AUTOMATICALLY GENERATED FILE, DO NOT MODIFY */ /* 8025b06b07f4789aad472563d363f86671d9e372 */ /* :: Begin debug-trap.h :: */ /* Debugging assertions and traps * Portable Snippets - https://github.com/nemequ/portable-snippets * Created by Evan Nemerson * * To the extent possible under law, the authors have waived all * copyright and related or neighboring rights to this code. For * details, see the Creative Commons Zero 1.0 Universal license at * https://creativecommons.org/publicdomain/zero/1.0/ * * SPDX-License-Identifier: CC0-1.0 */ #if !defined(SIMDE_DEBUG_TRAP_H) #define SIMDE_DEBUG_TRAP_H #if !defined(SIMDE_NDEBUG) && defined(NDEBUG) && !defined(SIMDE_DEBUG) # define SIMDE_NDEBUG 1 #endif #if defined(__has_builtin) && !defined(__ibmxl__) # if __has_builtin(__builtin_debugtrap) # define simde_trap() __builtin_debugtrap() # elif __has_builtin(__debugbreak) # define simde_trap() __debugbreak() # endif #endif #if !defined(simde_trap) # if defined(_MSC_VER) || defined(__INTEL_COMPILER) # define simde_trap() __debugbreak() # elif defined(__ARMCC_VERSION) # define simde_trap() __breakpoint(42) # elif defined(__ibmxl__) || defined(__xlC__) # include # define simde_trap() __trap(42) # elif defined(__DMC__) && defined(_M_IX86) static inline void simde_trap(void) { __asm int 3h; } # elif defined(__i386__) || defined(__x86_64__) static inline void simde_trap(void) { __asm__ __volatile__("int $03"); } # elif defined(__thumb__) static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xde01"); } # elif defined(__aarch64__) static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xd4200000"); } # elif defined(__arm__) static inline void simde_trap(void) { __asm__ __volatile__(".inst 0xe7f001f0"); } # elif defined (__alpha__) && !defined(__osf__) static inline void simde_trap(void) { __asm__ __volatile__("bpt"); } # elif defined(_54_) static inline void simde_trap(void) { __asm__ __volatile__("ESTOP"); } # elif defined(_55_) static inline void simde_trap(void) { __asm__ __volatile__(";\n .if (.MNEMONIC)\n ESTOP_1\n .else\n ESTOP_1()\n .endif\n NOP"); } # elif defined(_64P_) static inline void simde_trap(void) { __asm__ __volatile__("SWBP 0"); } # elif defined(_6x_) static inline void simde_trap(void) { __asm__ __volatile__("NOP\n .word 0x10000000"); } # elif defined(__STDC_HOSTED__) && (__STDC_HOSTED__ == 0) && defined(__GNUC__) # define simde_trap() __builtin_trap() # else # include # if defined(SIGTRAP) # define simde_trap() raise(SIGTRAP) # else # define simde_trap() raise(SIGABRT) # endif # endif #endif #if defined(HEDLEY_LIKELY) # define SIMDE_DBG_LIKELY(expr) HEDLEY_LIKELY(expr) #elif defined(__GNUC__) && (__GNUC__ >= 3) # define SIMDE_DBG_LIKELY(expr) __builtin_expect(!!(expr), 1) #else # define SIMDE_DBG_LIKELY(expr) (!!(expr)) #endif #if !defined(SIMDE_NDEBUG) || (SIMDE_NDEBUG == 0) # define simde_dbg_assert(expr) do { \ if (!SIMDE_DBG_LIKELY(expr)) { \ simde_trap(); \ } \ } while (0) #else # define simde_dbg_assert(expr) #endif #endif /* !defined(SIMDE_DEBUG_TRAP_H) */ /* :: End debug-trap.h :: */ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_VARIADIC_MACROS_ # if defined(EOF) # define simde_errorf(format, ...) (fprintf(stderr, format, __VA_ARGS__), abort()) # else # define simde_errorf(format, ...) (simde_trap()) # endif HEDLEY_DIAGNOSTIC_POP #endif #define simde_error(msg) simde_errorf("%s", msg) #if defined(SIMDE_NDEBUG) || \ (defined(__cplusplus) && (__cplusplus < 201103L)) || \ (defined(__STDC__) && (__STDC__ < 199901L)) # if defined(SIMDE_CHECK_FAIL_DEFINED) # define simde_assert(expr) # else # if defined(HEDLEY_ASSUME) # define simde_assert(expr) HEDLEY_ASSUME(expr) # elif HEDLEY_GCC_VERSION_CHECK(4,5,0) # define simde_assert(expr) ((void) (!!(expr) ? 1 : (__builtin_unreachable(), 1))) # elif HEDLEY_MSVC_VERSION_CHECK(13,10,0) # define simde_assert(expr) __assume(expr) # else # define simde_assert(expr) # endif # endif # define simde_assert_true(expr) simde_assert(expr) # define simde_assert_false(expr) simde_assert(!(expr)) # define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) simde_assert(((a) op (b))) # define simde_assert_double_equal(a, b, precision) # define simde_assert_string_equal(a, b) # define simde_assert_string_not_equal(a, b) # define simde_assert_memory_equal(size, a, b) # define simde_assert_memory_not_equal(size, a, b) #else # define simde_assert(expr) \ do { \ if (!HEDLEY_LIKELY(expr)) { \ simde_error("assertion failed: " #expr "\n"); \ } \ SIMDE_PUSH_DISABLE_MSVC_C4127_ \ } while (0) \ SIMDE_POP_DISABLE_MSVC_C4127_ # define simde_assert_true(expr) \ do { \ if (!HEDLEY_LIKELY(expr)) { \ simde_error("assertion failed: " #expr " is not true\n"); \ } \ SIMDE_PUSH_DISABLE_MSVC_C4127_ \ } while (0) \ SIMDE_POP_DISABLE_MSVC_C4127_ # define simde_assert_false(expr) \ do { \ if (!HEDLEY_LIKELY(!(expr))) { \ simde_error("assertion failed: " #expr " is not false\n"); \ } \ SIMDE_PUSH_DISABLE_MSVC_C4127_ \ } while (0) \ SIMDE_POP_DISABLE_MSVC_C4127_ # define simde_assert_type_full(prefix, suffix, T, fmt, a, op, b) \ do { \ T simde_tmp_a_ = (a); \ T simde_tmp_b_ = (b); \ if (!(simde_tmp_a_ op simde_tmp_b_)) { \ simde_errorf("assertion failed: %s %s %s (" prefix "%" fmt suffix " %s " prefix "%" fmt suffix ")\n", \ #a, #op, #b, simde_tmp_a_, #op, simde_tmp_b_); \ } \ SIMDE_PUSH_DISABLE_MSVC_C4127_ \ } while (0) \ SIMDE_POP_DISABLE_MSVC_C4127_ # define simde_assert_double_equal(a, b, precision) \ do { \ const double simde_tmp_a_ = (a); \ const double simde_tmp_b_ = (b); \ const double simde_tmp_diff_ = ((simde_tmp_a_ - simde_tmp_b_) < 0) ? \ -(simde_tmp_a_ - simde_tmp_b_) : \ (simde_tmp_a_ - simde_tmp_b_); \ if (HEDLEY_UNLIKELY(simde_tmp_diff_ > 1e-##precision)) { \ simde_errorf("assertion failed: %s == %s (%0." #precision "g == %0." #precision "g)\n", \ #a, #b, simde_tmp_a_, simde_tmp_b_); \ } \ SIMDE_PUSH_DISABLE_MSVC_C4127_ \ } while (0) \ SIMDE_POP_DISABLE_MSVC_C4127_ # include # define simde_assert_string_equal(a, b) \ do { \ const char* simde_tmp_a_ = a; \ const char* simde_tmp_b_ = b; \ if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) != 0)) { \ simde_errorf("assertion failed: string %s == %s (\"%s\" == \"%s\")\n", \ #a, #b, simde_tmp_a_, simde_tmp_b_); \ } \ SIMDE_PUSH_DISABLE_MSVC_C4127_ \ } while (0) \ SIMDE_POP_DISABLE_MSVC_C4127_ # define simde_assert_string_not_equal(a, b) \ do { \ const char* simde_tmp_a_ = a; \ const char* simde_tmp_b_ = b; \ if (HEDLEY_UNLIKELY(strcmp(simde_tmp_a_, simde_tmp_b_) == 0)) { \ simde_errorf("assertion failed: string %s != %s (\"%s\" == \"%s\")\n", \ #a, #b, simde_tmp_a_, simde_tmp_b_); \ } \ SIMDE_PUSH_DISABLE_MSVC_C4127_ \ } while (0) \ SIMDE_POP_DISABLE_MSVC_C4127_ # define simde_assert_memory_equal(size, a, b) \ do { \ const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ const size_t simde_tmp_size_ = (size); \ if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) != 0) { \ size_t simde_tmp_pos_; \ for (simde_tmp_pos_ = 0 ; simde_tmp_pos_ < simde_tmp_size_ ; simde_tmp_pos_++) { \ if (simde_tmp_a_[simde_tmp_pos_] != simde_tmp_b_[simde_tmp_pos_]) { \ simde_errorf("assertion failed: memory %s == %s, at offset %" SIMDE_SIZE_MODIFIER "u\n", \ #a, #b, simde_tmp_pos_); \ break; \ } \ } \ } \ SIMDE_PUSH_DISABLE_MSVC_C4127_ \ } while (0) \ SIMDE_POP_DISABLE_MSVC_C4127_ # define simde_assert_memory_not_equal(size, a, b) \ do { \ const unsigned char* simde_tmp_a_ = (const unsigned char*) (a); \ const unsigned char* simde_tmp_b_ = (const unsigned char*) (b); \ const size_t simde_tmp_size_ = (size); \ if (HEDLEY_UNLIKELY(memcmp(simde_tmp_a_, simde_tmp_b_, simde_tmp_size_)) == 0) { \ simde_errorf("assertion failed: memory %s != %s (%" SIMDE_SIZE_MODIFIER "u bytes)\n", \ #a, #b, simde_tmp_size_); \ } \ SIMDE_PUSH_DISABLE_MSVC_C4127_ \ } while (0) \ SIMDE_POP_DISABLE_MSVC_C4127_ #endif #define simde_assert_type(T, fmt, a, op, b) \ simde_assert_type_full("", "", T, fmt, a, op, b) #define simde_assert_char(a, op, b) \ simde_assert_type_full("'\\x", "'", char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) #define simde_assert_uchar(a, op, b) \ simde_assert_type_full("'\\x", "'", unsigned char, "02" SIMDE_CHAR_MODIFIER "x", a, op, b) #define simde_assert_short(a, op, b) \ simde_assert_type(short, SIMDE_SHORT_MODIFIER "d", a, op, b) #define simde_assert_ushort(a, op, b) \ simde_assert_type(unsigned short, SIMDE_SHORT_MODIFIER "u", a, op, b) #define simde_assert_int(a, op, b) \ simde_assert_type(int, "d", a, op, b) #define simde_assert_uint(a, op, b) \ simde_assert_type(unsigned int, "u", a, op, b) #define simde_assert_long(a, op, b) \ simde_assert_type(long int, "ld", a, op, b) #define simde_assert_ulong(a, op, b) \ simde_assert_type(unsigned long int, "lu", a, op, b) #define simde_assert_llong(a, op, b) \ simde_assert_type(long long int, "lld", a, op, b) #define simde_assert_ullong(a, op, b) \ simde_assert_type(unsigned long long int, "llu", a, op, b) #define simde_assert_size(a, op, b) \ simde_assert_type(size_t, SIMDE_SIZE_MODIFIER "u", a, op, b) #define simde_assert_float(a, op, b) \ simde_assert_type(float, "f", a, op, b) #define simde_assert_double(a, op, b) \ simde_assert_type(double, "g", a, op, b) #define simde_assert_ptr(a, op, b) \ simde_assert_type(const void*, "p", a, op, b) #define simde_assert_int8(a, op, b) \ simde_assert_type(int8_t, PRIi8, a, op, b) #define simde_assert_uint8(a, op, b) \ simde_assert_type(uint8_t, PRIu8, a, op, b) #define simde_assert_int16(a, op, b) \ simde_assert_type(int16_t, PRIi16, a, op, b) #define simde_assert_uint16(a, op, b) \ simde_assert_type(uint16_t, PRIu16, a, op, b) #define simde_assert_int32(a, op, b) \ simde_assert_type(int32_t, PRIi32, a, op, b) #define simde_assert_uint32(a, op, b) \ simde_assert_type(uint32_t, PRIu32, a, op, b) #define simde_assert_int64(a, op, b) \ simde_assert_type(int64_t, PRIi64, a, op, b) #define simde_assert_uint64(a, op, b) \ simde_assert_type(uint64_t, PRIu64, a, op, b) #define simde_assert_ptr_equal(a, b) \ simde_assert_ptr(a, ==, b) #define simde_assert_ptr_not_equal(a, b) \ simde_assert_ptr(a, !=, b) #define simde_assert_null(ptr) \ simde_assert_ptr(ptr, ==, NULL) #define simde_assert_not_null(ptr) \ simde_assert_ptr(ptr, !=, NULL) #define simde_assert_ptr_null(ptr) \ simde_assert_ptr(ptr, ==, NULL) #define simde_assert_ptr_not_null(ptr) \ simde_assert_ptr(ptr, !=, NULL) #endif /* !defined(SIMDE_CHECK_H) */ /* :: End check.h :: */ /* GCC/clang have a bunch of functionality in builtins which we would * like to access, but the suffixes indicate whether the operate on * int, long, or long long, not fixed width types (e.g., int32_t). * we use these macros to attempt to map from fixed-width to the * names GCC uses. Note that you should still cast the input(s) and * return values (to/from SIMDE_BUILTIN_TYPE_*_) since often even if * types are the same size they may not be compatible according to the * compiler. For example, on x86 long and long lonsg are generally * both 64 bits, but platforms vary on whether an int64_t is mapped * to a long or long long. */ #include HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ #if (INT8_MAX == INT_MAX) && (INT8_MIN == INT_MIN) #define SIMDE_BUILTIN_SUFFIX_8_ #define SIMDE_BUILTIN_TYPE_8_ int #elif (INT8_MAX == LONG_MAX) && (INT8_MIN == LONG_MIN) #define SIMDE_BUILTIN_SUFFIX_8_ l #define SIMDE_BUILTIN_TYPE_8_ long #elif (INT8_MAX == LLONG_MAX) && (INT8_MIN == LLONG_MIN) #define SIMDE_BUILTIN_SUFFIX_8_ ll #define SIMDE_BUILTIN_TYPE_8_ long long #endif #if (INT16_MAX == INT_MAX) && (INT16_MIN == INT_MIN) #define SIMDE_BUILTIN_SUFFIX_16_ #define SIMDE_BUILTIN_TYPE_16_ int #elif (INT16_MAX == LONG_MAX) && (INT16_MIN == LONG_MIN) #define SIMDE_BUILTIN_SUFFIX_16_ l #define SIMDE_BUILTIN_TYPE_16_ long #elif (INT16_MAX == LLONG_MAX) && (INT16_MIN == LLONG_MIN) #define SIMDE_BUILTIN_SUFFIX_16_ ll #define SIMDE_BUILTIN_TYPE_16_ long long #endif #if (INT32_MAX == INT_MAX) && (INT32_MIN == INT_MIN) #define SIMDE_BUILTIN_SUFFIX_32_ #define SIMDE_BUILTIN_TYPE_32_ int #elif (INT32_MAX == LONG_MAX) && (INT32_MIN == LONG_MIN) #define SIMDE_BUILTIN_SUFFIX_32_ l #define SIMDE_BUILTIN_TYPE_32_ long #elif (INT32_MAX == LLONG_MAX) && (INT32_MIN == LLONG_MIN) #define SIMDE_BUILTIN_SUFFIX_32_ ll #define SIMDE_BUILTIN_TYPE_32_ long long #endif #if (INT64_MAX == INT_MAX) && (INT64_MIN == INT_MIN) #define SIMDE_BUILTIN_SUFFIX_64_ #define SIMDE_BUILTIN_TYPE_64_ int #elif (INT64_MAX == LONG_MAX) && (INT64_MIN == LONG_MIN) #define SIMDE_BUILTIN_SUFFIX_64_ l #define SIMDE_BUILTIN_TYPE_64_ long #elif (INT64_MAX == LLONG_MAX) && (INT64_MIN == LLONG_MIN) #define SIMDE_BUILTIN_SUFFIX_64_ ll #define SIMDE_BUILTIN_TYPE_64_ long long #endif /* SIMDE_DIAGNOSTIC_DISABLE_CPP98_COMPAT_PEDANTIC_ */ HEDLEY_DIAGNOSTIC_POP #if defined(SIMDE_BUILTIN_SUFFIX_8_) #define SIMDE_BUILTIN_8_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_) #define SIMDE_BUILTIN_HAS_8_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_8_)) #else #define SIMDE_BUILTIN_HAS_8_(name) 0 #endif #if defined(SIMDE_BUILTIN_SUFFIX_16_) #define SIMDE_BUILTIN_16_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_) #define SIMDE_BUILTIN_HAS_16_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_16_)) #else #define SIMDE_BUILTIN_HAS_16_(name) 0 #endif #if defined(SIMDE_BUILTIN_SUFFIX_32_) #define SIMDE_BUILTIN_32_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_) #define SIMDE_BUILTIN_HAS_32_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_32_)) #else #define SIMDE_BUILTIN_HAS_32_(name) 0 #endif #if defined(SIMDE_BUILTIN_SUFFIX_64_) #define SIMDE_BUILTIN_64_(name) HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_) #define SIMDE_BUILTIN_HAS_64_(name) HEDLEY_HAS_BUILTIN(HEDLEY_CONCAT3(__builtin_, name, SIMDE_BUILTIN_SUFFIX_64_)) #else #define SIMDE_BUILTIN_HAS_64_(name) 0 #endif #if !defined(__cplusplus) #if defined(__clang__) #if HEDLEY_HAS_WARNING("-Wc11-extensions") #define SIMDE_GENERIC_(...) (__extension__ ({ \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wc11-extensions\"") \ _Generic(__VA_ARGS__); \ HEDLEY_DIAGNOSTIC_POP \ })) #elif HEDLEY_HAS_WARNING("-Wc1x-extensions") #define SIMDE_GENERIC_(...) (__extension__ ({ \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("clang diagnostic ignored \"-Wc1x-extensions\"") \ _Generic(__VA_ARGS__); \ HEDLEY_DIAGNOSTIC_POP \ })) #endif #elif \ defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) || \ HEDLEY_HAS_EXTENSION(c_generic_selections) || \ HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ HEDLEY_ARM_VERSION_CHECK(5,3,0) #define SIMDE_GENERIC_(...) _Generic(__VA_ARGS__) #endif #endif /* Sometimes we run into problems with specific versions of compilers which make the native versions unusable for us. Often this is due to missing functions, sometimes buggy implementations, etc. These macros are how we check for specific bugs. As they are fixed we'll start only defining them for problematic compiler versions. */ #if !defined(SIMDE_IGNORE_COMPILER_BUGS) # if defined(HEDLEY_GCC_VERSION) # if !HEDLEY_GCC_VERSION_CHECK(4,9,0) # define SIMDE_BUG_GCC_REV_208793 # endif # if !HEDLEY_GCC_VERSION_CHECK(5,0,0) # define SIMDE_BUG_GCC_BAD_MM_SRA_EPI32 /* TODO: find relevant bug or commit */ # endif # if !HEDLEY_GCC_VERSION_CHECK(6,0,0) # define SIMDE_BUG_GCC_SIZEOF_IMMEDIATE # endif # if !HEDLEY_GCC_VERSION_CHECK(4,6,0) # define SIMDE_BUG_GCC_BAD_MM_EXTRACT_EPI8 /* TODO: find relevant bug or commit */ # endif # if !HEDLEY_GCC_VERSION_CHECK(8,0,0) # define SIMDE_BUG_GCC_REV_247851 # endif # if !HEDLEY_GCC_VERSION_CHECK(10,0,0) # define SIMDE_BUG_GCC_REV_274313 # define SIMDE_BUG_GCC_91341 # define SIMDE_BUG_GCC_92035 # endif # if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) # define SIMDE_BUG_GCC_ARM_SHIFT_SCALAR # endif # if !HEDLEY_GCC_VERSION_CHECK(9,0,0) && defined(SIMDE_ARCH_AARCH64) # define SIMDE_BUG_GCC_BAD_VEXT_REV32 # endif # if defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64) # define SIMDE_BUG_GCC_94482 # endif # if (defined(SIMDE_ARCH_X86) && !defined(SIMDE_ARCH_AMD64)) || defined(SIMDE_ARCH_ZARCH) # define SIMDE_BUG_GCC_53784 # endif # if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) # if HEDLEY_GCC_VERSION_CHECK(4,3,0) /* -Wsign-conversion */ # define SIMDE_BUG_GCC_95144 # endif # if !HEDLEY_GCC_VERSION_CHECK(11,2,0) # define SIMDE_BUG_GCC_95483 # endif # if defined(__OPTIMIZE__) # define SIMDE_BUG_GCC_100927 # endif # define SIMDE_BUG_GCC_98521 # endif # if !HEDLEY_GCC_VERSION_CHECK(9,4,0) && defined(SIMDE_ARCH_AARCH64) # define SIMDE_BUG_GCC_94488 # endif # if !HEDLEY_GCC_VERSION_CHECK(9,1,0) && defined(SIMDE_ARCH_AARCH64) # define SIMDE_BUG_GCC_REV_264019 # endif # if defined(SIMDE_ARCH_ARM) # define SIMDE_BUG_GCC_95399 # define SIMDE_BUG_GCC_95471 # elif defined(SIMDE_ARCH_POWER) # define SIMDE_BUG_GCC_95227 # define SIMDE_BUG_GCC_95782 # if !HEDLEY_GCC_VERSION_CHECK(12,0,0) # define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS # endif # elif defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) # if !HEDLEY_GCC_VERSION_CHECK(10,2,0) && !defined(__OPTIMIZE__) # define SIMDE_BUG_GCC_96174 # endif # elif defined(SIMDE_ARCH_ZARCH) # define SIMDE_BUG_GCC_95782 # if HEDLEY_GCC_VERSION_CHECK(10,0,0) # define SIMDE_BUG_GCC_101614 # endif # endif # if defined(SIMDE_ARCH_MIPS_MSA) # define SIMDE_BUG_GCC_97248 # if !HEDLEY_GCC_VERSION_CHECK(12,1,0) # define SIMDE_BUG_GCC_100760 # define SIMDE_BUG_GCC_100761 # define SIMDE_BUG_GCC_100762 # endif # endif # define SIMDE_BUG_GCC_95399 # if !defined(__OPTIMIZE__) # define SIMDE_BUG_GCC_105339 # endif # elif defined(__clang__) # if defined(SIMDE_ARCH_AARCH64) # define SIMDE_BUG_CLANG_45541 # define SIMDE_BUG_CLANG_48257 # if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) # define SIMDE_BUG_CLANG_46840 # define SIMDE_BUG_CLANG_46844 # endif # if SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) # define SIMDE_BUG_CLANG_BAD_VI64_OPS # endif # if SIMDE_DETECT_CLANG_VERSION_NOT(9,0,0) # define SIMDE_BUG_CLANG_GIT_4EC445B8 # define SIMDE_BUG_CLANG_REV_365298 /* 0464e07c8f6e3310c28eb210a4513bc2243c2a7e */ # endif # endif # if defined(SIMDE_ARCH_ARM) # if !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) # define SIMDE_BUG_CLANG_BAD_VGET_SET_LANE_TYPES # endif # endif # if defined(SIMDE_ARCH_POWER) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) # define SIMDE_BUG_CLANG_46770 # endif # if defined(SIMDE_ARCH_POWER) && (SIMDE_ARCH_POWER == 700) && (SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)) # if !SIMDE_DETECT_CLANG_VERSION_CHECK(13,0,0) # define SIMDE_BUG_CLANG_50893 # define SIMDE_BUG_CLANG_50901 # endif # endif # if defined(_ARCH_PWR9) && !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) && !defined(__OPTIMIZE__) # define SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT # endif # if defined(SIMDE_ARCH_POWER) # define SIMDE_BUG_CLANG_50932 # if !SIMDE_DETECT_CLANG_VERSION_CHECK(12,0,0) # define SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS # endif # endif # if defined(SIMDE_ARCH_X86) || defined(SIMDE_ARCH_AMD64) # if SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) # define SIMDE_BUG_CLANG_REV_298042 /* 6afc436a7817a52e78ae7bcdc3faafd460124cac */ # endif # if SIMDE_DETECT_CLANG_VERSION_NOT(3,7,0) # define SIMDE_BUG_CLANG_REV_234560 /* b929ad7b1726a32650a8051f69a747fb6836c540 */ # endif # if SIMDE_DETECT_CLANG_VERSION_CHECK(3,8,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) # define SIMDE_BUG_CLANG_BAD_MADD # endif # if SIMDE_DETECT_CLANG_VERSION_CHECK(4,0,0) && SIMDE_DETECT_CLANG_VERSION_NOT(5,0,0) # define SIMDE_BUG_CLANG_REV_299346 /* ac9959eb533a58482ea4da6c4db1e635a98de384 */ # endif # if SIMDE_DETECT_CLANG_VERSION_NOT(8,0,0) # define SIMDE_BUG_CLANG_REV_344862 /* eae26bf73715994c2bd145f9b6dc3836aa4ffd4f */ # endif # if HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) # define SIMDE_BUG_CLANG_45931 # endif # if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0) # define SIMDE_BUG_CLANG_44589 # endif # define SIMDE_BUG_CLANG_48673 # endif # define SIMDE_BUG_CLANG_45959 # if defined(SIMDE_ARCH_WASM_SIMD128) # define SIMDE_BUG_CLANG_60655 # endif # elif defined(HEDLEY_MSVC_VERSION) # if defined(SIMDE_ARCH_X86) # define SIMDE_BUG_MSVC_ROUND_EXTRACT # endif # elif defined(HEDLEY_INTEL_VERSION) # define SIMDE_BUG_INTEL_857088 # elif defined(HEDLEY_MCST_LCC_VERSION) # define SIMDE_BUG_MCST_LCC_MISSING_AVX_LOAD_STORE_M128_FUNCS # define SIMDE_BUG_MCST_LCC_MISSING_CMOV_M256 # define SIMDE_BUG_MCST_LCC_FMA_WRONG_RESULT # elif defined(HEDLEY_PGI_VERSION) # define SIMDE_BUG_PGI_30104 # define SIMDE_BUG_PGI_30107 # define SIMDE_BUG_PGI_30106 # endif #endif /* GCC and Clang both have the same issue: * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95144 * https://bugs.llvm.org/show_bug.cgi?id=45931 * This is just an easy way to work around it. */ #if \ (HEDLEY_HAS_WARNING("-Wsign-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(11,0,0)) || \ HEDLEY_GCC_VERSION_CHECK(4,3,0) # define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (__extension__ ({ \ HEDLEY_DIAGNOSTIC_PUSH \ _Pragma("GCC diagnostic ignored \"-Wsign-conversion\"") \ __typeof__(expr) simde_bug_ignore_sign_conversion_v_= (expr); \ HEDLEY_DIAGNOSTIC_POP \ simde_bug_ignore_sign_conversion_v_; \ })) #else # define SIMDE_BUG_IGNORE_SIGN_CONVERSION(expr) (expr) #endif /* Usually the shift count is signed (for example, NEON or SSE). * OTOH, unsigned is good for PPC (vec_srl uses unsigned), and the only option for E2K. * Further info: https://github.com/simd-everywhere/simde/pull/700 */ #if defined(SIMDE_ARCH_E2K) || defined(SIMDE_ARCH_POWER) #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(uint##width##_t, (value)) #else #define SIMDE_CAST_VECTOR_SHIFT_COUNT(width, value) HEDLEY_STATIC_CAST(int##width##_t, (value)) #endif /* SIMDE_DIAGNOSTIC_DISABLE_USED_BUT_MARKED_UNUSED_ */ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_COMMON_H) */ /* :: End simde-common.h :: */ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS #if defined(SIMDE_X86_MMX_NATIVE) #define SIMDE_X86_MMX_USE_NATIVE_TYPE #elif defined(SIMDE_X86_SSE_NATIVE) #define SIMDE_X86_MMX_USE_NATIVE_TYPE #endif #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) #include #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #include #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) #include #endif #include #include SIMDE_BEGIN_DECLS_ typedef union { #if defined(SIMDE_VECTOR_SUBSCRIPT) SIMDE_ALIGN_TO_8 int8_t i8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_8 int16_t i16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_8 int32_t i32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_8 int64_t i64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_8 uint8_t u8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_8 uint16_t u16 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_8 uint32_t u32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_8 uint64_t u64 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_8 simde_float32 f32 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_8 int_fast32_t i32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_8 uint_fast32_t u32f SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; #else SIMDE_ALIGN_TO_8 int8_t i8[8]; SIMDE_ALIGN_TO_8 int16_t i16[4]; SIMDE_ALIGN_TO_8 int32_t i32[2]; SIMDE_ALIGN_TO_8 int64_t i64[1]; SIMDE_ALIGN_TO_8 uint8_t u8[8]; SIMDE_ALIGN_TO_8 uint16_t u16[4]; SIMDE_ALIGN_TO_8 uint32_t u32[2]; SIMDE_ALIGN_TO_8 uint64_t u64[1]; SIMDE_ALIGN_TO_8 simde_float32 f32[2]; SIMDE_ALIGN_TO_8 int_fast32_t i32f[8 / sizeof(int_fast32_t)]; SIMDE_ALIGN_TO_8 uint_fast32_t u32f[8 / sizeof(uint_fast32_t)]; #endif #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) __m64 n; #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int8x8_t neon_i8; int16x4_t neon_i16; int32x2_t neon_i32; int64x1_t neon_i64; uint8x8_t neon_u8; uint16x4_t neon_u16; uint32x2_t neon_u32; uint64x1_t neon_u64; float32x2_t neon_f32; #endif #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) int8x8_t mmi_i8; int16x4_t mmi_i16; int32x2_t mmi_i32; int64_t mmi_i64; uint8x8_t mmi_u8; uint16x4_t mmi_u16; uint32x2_t mmi_u32; uint64_t mmi_u64; #endif } simde__m64_private; #if defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) typedef __m64 simde__m64; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) typedef int32x2_t simde__m64; #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) typedef int32x2_t simde__m64; #elif defined(SIMDE_VECTOR_SUBSCRIPT) typedef int32_t simde__m64 SIMDE_ALIGN_TO_8 SIMDE_VECTOR(8) SIMDE_MAY_ALIAS; #else typedef simde__m64_private simde__m64; #endif #if !defined(SIMDE_X86_MMX_USE_NATIVE_TYPE) && defined(SIMDE_ENABLE_NATIVE_ALIASES) #define SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES typedef simde__m64 __m64; #endif HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64), "__m64 size incorrect"); HEDLEY_STATIC_ASSERT(8 == sizeof(simde__m64_private), "__m64 size incorrect"); #if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64) == 8, "simde__m64 is not 8-byte aligned"); HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m64_private) == 8, "simde__m64_private is not 8-byte aligned"); #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde__m64_from_private(simde__m64_private v) { simde__m64 r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m64_private simde__m64_to_private(simde__m64 v) { simde__m64_private r; simde_memcpy(&r, &v, sizeof(r)); return r; } #define SIMDE_X86_GENERATE_CONVERSION_FUNCTION(simde_type, source_type, isax, fragment) \ SIMDE_FUNCTION_ATTRIBUTES \ simde__##simde_type \ simde__##simde_type##_from_##isax##_##fragment(source_type value) { \ simde__##simde_type##_private r_; \ r_.isax##_##fragment = value; \ return simde__##simde_type##_from_private(r_); \ } \ \ SIMDE_FUNCTION_ATTRIBUTES \ source_type \ simde__##simde_type##_to_##isax##_##fragment(simde__##simde_type value) { \ simde__##simde_type##_private r_ = simde__##simde_type##_to_private(value); \ return r_.isax##_##fragment; \ } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, neon, i8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, neon, i16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, neon, i32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64x1_t, neon, i64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, neon, u8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, neon, u16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, neon, u32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64x1_t, neon, u64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, float32x2_t, neon, f32) #endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int8x8_t, mmi, i8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int16x4_t, mmi, i16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int32x2_t, mmi, i32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, int64_t, mmi, i64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint8x8_t, mmi, u8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint16x4_t, mmi, u16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint32x2_t, mmi, u32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m64, uint64_t, mmi, u64) #endif /* defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) */ SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_add_pi8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_add_pi8(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vadd_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i8 = paddb_s(a_.mmi_i8, b_.mmi_i8); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 + b_.i8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i] + b_.i8[i]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_paddb(a, b) simde_mm_add_pi8(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_add_pi8(a, b) simde_mm_add_pi8(a, b) # define _m_paddb(a, b) simde_m_paddb(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_add_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_add_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vadd_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = paddh_s(a_.mmi_i16, b_.mmi_i16); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 + b_.i16; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] + b_.i16[i]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_paddw(a, b) simde_mm_add_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_add_pi16(a, b) simde_mm_add_pi16(a, b) # define _m_paddw(a, b) simde_mm_add_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_add_pi32 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_add_pi32(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vadd_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = paddw_s(a_.mmi_i32, b_.mmi_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 + b_.i32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] + b_.i32[i]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_paddd(a, b) simde_mm_add_pi32(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_add_pi32(a, b) simde_mm_add_pi32(a, b) # define _m_paddd(a, b) simde_mm_add_pi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_adds_pi8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_adds_pi8(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vqadd_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i8 = paddsb(a_.mmi_i8, b_.mmi_i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { if ((((b_.i8[i]) > 0) && ((a_.i8[i]) > (INT8_MAX - (b_.i8[i]))))) { r_.i8[i] = INT8_MAX; } else if ((((b_.i8[i]) < 0) && ((a_.i8[i]) < (INT8_MIN - (b_.i8[i]))))) { r_.i8[i] = INT8_MIN; } else { r_.i8[i] = (a_.i8[i]) + (b_.i8[i]); } } #endif return simde__m64_from_private(r_); #endif } #define simde_m_paddsb(a, b) simde_mm_adds_pi8(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_adds_pi8(a, b) simde_mm_adds_pi8(a, b) # define _m_paddsb(a, b) simde_mm_adds_pi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_adds_pu8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_adds_pu8(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vqadd_u8(a_.neon_u8, b_.neon_u8); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_u8 = paddusb(a_.mmi_u8, b_.mmi_u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { const uint_fast16_t x = HEDLEY_STATIC_CAST(uint_fast16_t, a_.u8[i]) + HEDLEY_STATIC_CAST(uint_fast16_t, b_.u8[i]); if (x > UINT8_MAX) r_.u8[i] = UINT8_MAX; else r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_paddusb(a, b) simde_mm_adds_pu8(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_adds_pu8(a, b) simde_mm_adds_pu8(a, b) # define _m_paddusb(a, b) simde_mm_adds_pu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_adds_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_adds_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vqadd_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = paddsh(a_.mmi_i16, b_.mmi_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { if ((((b_.i16[i]) > 0) && ((a_.i16[i]) > (INT16_MAX - (b_.i16[i]))))) { r_.i16[i] = INT16_MAX; } else if ((((b_.i16[i]) < 0) && ((a_.i16[i]) < (SHRT_MIN - (b_.i16[i]))))) { r_.i16[i] = SHRT_MIN; } else { r_.i16[i] = (a_.i16[i]) + (b_.i16[i]); } } #endif return simde__m64_from_private(r_); #endif } #define simde_m_paddsw(a, b) simde_mm_adds_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_adds_pi16(a, b) simde_mm_adds_pi16(a, b) # define _m_paddsw(a, b) simde_mm_adds_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_adds_pu16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_adds_pu16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vqadd_u16(a_.neon_u16, b_.neon_u16); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_u16 = paddush(a_.mmi_u16, b_.mmi_u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { const uint32_t x = a_.u16[i] + b_.u16[i]; if (x > UINT16_MAX) r_.u16[i] = UINT16_MAX; else r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_paddusw(a, b) simde_mm_adds_pu16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_adds_pu16(a, b) simde_mm_adds_pu16(a, b) # define _m_paddusw(a, b) simde_mm_adds_pu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_and_si64 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_and_si64(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vand_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 & b_.i64; #else r_.i64[0] = a_.i64[0] & b_.i64[0]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_pand(a, b) simde_mm_and_si64(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_and_si64(a, b) simde_mm_and_si64(a, b) # define _m_pand(a, b) simde_mm_and_si64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_andnot_si64 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_andnot_si64(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vbic_s32(b_.neon_i32, a_.neon_i32); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = pandn_sw(a_.mmi_i32, b_.mmi_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = ~a_.i32f & b_.i32f; #else r_.u64[0] = (~(a_.u64[0])) & (b_.u64[0]); #endif return simde__m64_from_private(r_); #endif } #define simde_m_pandn(a, b) simde_mm_andnot_si64(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_andnot_si64(a, b) simde_mm_andnot_si64(a, b) # define _m_pandn(a, b) simde_mm_andnot_si64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cmpeq_pi8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpeq_pi8(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vceq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i8 = pcmpeqb_s(a_.mmi_i8, b_.mmi_i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_cmpeq_pi8(a, b) simde_mm_cmpeq_pi8(a, b) # define _m_pcmpeqb(a, b) simde_mm_cmpeq_pi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cmpeq_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpeq_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vceq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = pcmpeqh_s(a_.mmi_i16, b_.mmi_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_cmpeq_pi16(a, b) simde_mm_cmpeq_pi16(a, b) # define _m_pcmpeqw(a, b) simde_mm_cmpeq_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cmpeq_pi32 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpeq_pi32(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vceq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = pcmpeqw_s(a_.mmi_i32, b_.mmi_i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_cmpeq_pi32(a, b) simde_mm_cmpeq_pi32(a, b) # define _m_pcmpeqd(a, b) simde_mm_cmpeq_pi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cmpgt_pi8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpgt_pi8(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vcgt_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i8 = pcmpgtb_s(a_.mmi_i8, b_.mmi_i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_cmpgt_pi8(a, b) simde_mm_cmpgt_pi8(a, b) # define _m_pcmpgtb(a, b) simde_mm_cmpgt_pi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cmpgt_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpgt_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vcgt_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = pcmpgth_s(a_.mmi_i16, b_.mmi_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_cmpgt_pi16(a, b) simde_mm_cmpgt_pi16(a, b) # define _m_pcmpgtw(a, b) simde_mm_cmpgt_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cmpgt_pi32 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_cmpgt_pi32(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcgt_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = pcmpgtw_s(a_.mmi_i32, b_.mmi_i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_cmpgt_pi32(a, b) simde_mm_cmpgt_pi32(a, b) # define _m_pcmpgtd(a, b) simde_mm_cmpgt_pi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_mm_cvtm64_si64 (simde__m64 a) { #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) return _mm_cvtm64_si64(a); #else simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) HEDLEY_DIAGNOSTIC_PUSH #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) #pragma clang diagnostic ignored "-Wvector-conversion" #endif return vget_lane_s64(a_.neon_i64, 0); HEDLEY_DIAGNOSTIC_POP #else return a_.i64[0]; #endif #endif } #define simde_m_to_int64(a) simde_mm_cvtm64_si64(a) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) # define _mm_cvtm64_si64(a) simde_mm_cvtm64_si64(a) # define _m_to_int64(a) simde_mm_cvtm64_si64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cvtsi32_si64 (int32_t a) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtsi32_si64(a); #else simde__m64_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const int32_t av[2] = { a, 0 }; r_.neon_i32 = vld1_s32(av); #else r_.i32[0] = a; r_.i32[1] = 0; #endif return simde__m64_from_private(r_); #endif } #define simde_m_from_int(a) simde_mm_cvtsi32_si64(a) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_cvtsi32_si64(a) simde_mm_cvtsi32_si64(a) # define _m_from_int(a) simde_mm_cvtsi32_si64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cvtsi64_m64 (int64_t a) { #if defined(SIMDE_X86_MMX_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(__PGI) return _mm_cvtsi64_m64(a); #else simde__m64_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vld1_s64(&a); #else r_.i64[0] = a; #endif return simde__m64_from_private(r_); #endif } #define simde_m_from_int64(a) simde_mm_cvtsi64_m64(a) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) # define _mm_cvtsi64_m64(a) simde_mm_cvtsi64_m64(a) # define _m_from_int64(a) simde_mm_cvtsi64_m64(a) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_cvtsi64_si32 (simde__m64 a) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtsi64_si32(a); #else simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) HEDLEY_DIAGNOSTIC_PUSH #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) #pragma clang diagnostic ignored "-Wvector-conversion" #endif return vget_lane_s32(a_.neon_i32, 0); HEDLEY_DIAGNOSTIC_POP #else return a_.i32[0]; #endif #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_cvtsi64_si32(a) simde_mm_cvtsi64_si32(a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_empty (void) { #if defined(SIMDE_X86_MMX_NATIVE) _mm_empty(); #else /* noop */ #endif } #define simde_m_empty() simde_mm_empty() #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_empty() simde_mm_empty() # define _m_empty() simde_mm_empty() #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_madd_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_madd_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int32x4_t i1 = vmull_s16(a_.neon_i16, b_.neon_i16); r_.neon_i32 = vpadd_s32(vget_low_s32(i1), vget_high_s32(i1)); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = pmaddhw(a_.mmi_i16, b_.mmi_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i += 2) { r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_madd_pi16(a, b) simde_mm_madd_pi16(a, b) # define _m_pmaddwd(a, b) simde_mm_madd_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_mulhi_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_mulhi_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); const uint32x4_t t2 = vshrq_n_u32(vreinterpretq_u32_s32(t1), 16); const uint16x4_t t3 = vmovn_u32(t2); r_.neon_u16 = t3; #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = pmulhh(a_.mmi_i16, b_.mmi_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) >> 16)); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_mulhi_pi16(a, b) simde_mm_mulhi_pi16(a, b) # define _m_pmulhw(a, b) simde_mm_mulhi_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_mullo_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_mullo_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const int32x4_t t1 = vmull_s16(a_.neon_i16, b_.neon_i16); const uint16x4_t t2 = vmovn_u32(vreinterpretq_u32_s32(t1)); r_.neon_u16 = t2; #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = pmullh(a_.mmi_i16, b_.mmi_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, ((a_.i16[i] * b_.i16[i]) & 0xffff)); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pmullw(a, b) simde_mm_mullo_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_mullo_pi16(a, b) simde_mm_mullo_pi16(a, b) # define _m_pmullw(a, b) simde_mm_mullo_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_or_si64 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_or_si64(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vorr_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 | b_.i64; #else r_.i64[0] = a_.i64[0] | b_.i64[0]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_por(a, b) simde_mm_or_si64(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_or_si64(a, b) simde_mm_or_si64(a, b) # define _m_por(a, b) simde_mm_or_si64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_packs_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_packs_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vqmovn_s16(vcombine_s16(a_.neon_i16, b_.neon_i16)); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i8 = packsshb(a_.mmi_i16, b_.mmi_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { if (a_.i16[i] < INT8_MIN) { r_.i8[i] = INT8_MIN; } else if (a_.i16[i] > INT8_MAX) { r_.i8[i] = INT8_MAX; } else { r_.i8[i] = HEDLEY_STATIC_CAST(int8_t, a_.i16[i]); } } SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { if (b_.i16[i] < INT8_MIN) { r_.i8[i + 4] = INT8_MIN; } else if (b_.i16[i] > INT8_MAX) { r_.i8[i + 4] = INT8_MAX; } else { r_.i8[i + 4] = HEDLEY_STATIC_CAST(int8_t, b_.i16[i]); } } #endif return simde__m64_from_private(r_); #endif } #define simde_m_packsswb(a, b) simde_mm_packs_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_packs_pi16(a, b) simde_mm_packs_pi16(a, b) # define _m_packsswb(a, b) simde_mm_packs_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_packs_pi32 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_packs_pi32(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vqmovn_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = packsswh(a_.mmi_i32, b_.mmi_i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (8 / sizeof(a_.i32[0])) ; i++) { if (a_.i32[i] < SHRT_MIN) { r_.i16[i] = SHRT_MIN; } else if (a_.i32[i] > INT16_MAX) { r_.i16[i] = INT16_MAX; } else { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i32[i]); } } SIMDE_VECTORIZE for (size_t i = 0 ; i < (8 / sizeof(b_.i32[0])) ; i++) { if (b_.i32[i] < SHRT_MIN) { r_.i16[i + 2] = SHRT_MIN; } else if (b_.i32[i] > INT16_MAX) { r_.i16[i + 2] = INT16_MAX; } else { r_.i16[i + 2] = HEDLEY_STATIC_CAST(int16_t, b_.i32[i]); } } #endif return simde__m64_from_private(r_); #endif } #define simde_m_packssdw(a, b) simde_mm_packs_pi32(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_packs_pi32(a, b) simde_mm_packs_pi32(a, b) # define _m_packssdw(a, b) simde_mm_packs_pi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_packs_pu16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_packs_pu16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) const int16x8_t t1 = vcombine_s16(a_.neon_i16, b_.neon_i16); /* Set elements which are < 0 to 0 */ const int16x8_t t2 = vandq_s16(t1, vreinterpretq_s16_u16(vcgezq_s16(t1))); /* Vector with all s16 elements set to UINT8_MAX */ const int16x8_t vmax = vmovq_n_s16(HEDLEY_STATIC_CAST(int16_t, UINT8_MAX)); /* Elements which are within the acceptable range */ const int16x8_t le_max = vandq_s16(t2, vreinterpretq_s16_u16(vcleq_s16(t2, vmax))); const int16x8_t gt_max = vandq_s16(vmax, vreinterpretq_s16_u16(vcgtq_s16(t2, vmax))); /* Final values as 16-bit integers */ const int16x8_t values = vorrq_s16(le_max, gt_max); r_.neon_u8 = vmovn_u16(vreinterpretq_u16_s16(values)); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_u8 = packushb(a_.mmi_u16, b_.mmi_u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { if (a_.i16[i] > UINT8_MAX) { r_.u8[i] = UINT8_MAX; } else if (a_.i16[i] < 0) { r_.u8[i] = 0; } else { r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, a_.i16[i]); } } SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { if (b_.i16[i] > UINT8_MAX) { r_.u8[i + 4] = UINT8_MAX; } else if (b_.i16[i] < 0) { r_.u8[i + 4] = 0; } else { r_.u8[i + 4] = HEDLEY_STATIC_CAST(uint8_t, b_.i16[i]); } } #endif return simde__m64_from_private(r_); #endif } #define simde_m_packuswb(a, b) simde_mm_packs_pu16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_packs_pu16(a, b) simde_mm_packs_pu16(a, b) # define _m_packuswb(a, b) simde_mm_packs_pu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_set_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0); #else simde__m64_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const int8_t v[sizeof(r_.i8) / sizeof(r_.i8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; r_.neon_i8 = vld1_s8(v); #else r_.i8[0] = e0; r_.i8[1] = e1; r_.i8[2] = e2; r_.i8[3] = e3; r_.i8[4] = e4; r_.i8[5] = e5; r_.i8[6] = e6; r_.i8[7] = e7; #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_pi8(e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_x_mm_set_pu8 (uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { simde__m64_private r_; #if defined(SIMDE_X86_MMX_NATIVE) r_.n = _mm_set_pi8( HEDLEY_STATIC_CAST(int8_t, e7), HEDLEY_STATIC_CAST(int8_t, e6), HEDLEY_STATIC_CAST(int8_t, e5), HEDLEY_STATIC_CAST(int8_t, e4), HEDLEY_STATIC_CAST(int8_t, e3), HEDLEY_STATIC_CAST(int8_t, e2), HEDLEY_STATIC_CAST(int8_t, e1), HEDLEY_STATIC_CAST(int8_t, e0)); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) const uint8_t v[sizeof(r_.u8) / sizeof(r_.u8[0])] = { e0, e1, e2, e3, e4, e5, e6, e7 }; r_.neon_u8 = vld1_u8(v); #else r_.u8[0] = e0; r_.u8[1] = e1; r_.u8[2] = e2; r_.u8[3] = e3; r_.u8[4] = e4; r_.u8[5] = e5; r_.u8[6] = e6; r_.u8[7] = e7; #endif return simde__m64_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_set_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_set_pi16(e3, e2, e1, e0); #else simde__m64_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const int16_t v[sizeof(r_.i16) / sizeof(r_.i16[0])] = { e0, e1, e2, e3 }; r_.neon_i16 = vld1_s16(v); #else r_.i16[0] = e0; r_.i16[1] = e1; r_.i16[2] = e2; r_.i16[3] = e3; #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_set_pi16(e3, e2, e1, e0) simde_mm_set_pi16(e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_x_mm_set_pu16 (uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { simde__m64_private r_; #if defined(SIMDE_X86_MMX_NATIVE) r_.n = _mm_set_pi16( HEDLEY_STATIC_CAST(int16_t, e3), HEDLEY_STATIC_CAST(int16_t, e2), HEDLEY_STATIC_CAST(int16_t, e1), HEDLEY_STATIC_CAST(int16_t, e0) ); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) const uint16_t v[sizeof(r_.u16) / sizeof(r_.u16[0])] = { e0, e1, e2, e3 }; r_.neon_u16 = vld1_u16(v); #else r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; #endif return simde__m64_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_x_mm_set_pu32 (uint32_t e1, uint32_t e0) { simde__m64_private r_; #if defined(SIMDE_X86_MMX_NATIVE) r_.n = _mm_set_pi32( HEDLEY_STATIC_CAST(int32_t, e1), HEDLEY_STATIC_CAST(int32_t, e0)); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) const uint32_t v[sizeof(r_.u32) / sizeof(r_.u32[0])] = { e0, e1 }; r_.neon_u32 = vld1_u32(v); #else r_.u32[0] = e0; r_.u32[1] = e1; #endif return simde__m64_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_set_pi32 (int32_t e1, int32_t e0) { simde__m64_private r_; #if defined(SIMDE_X86_MMX_NATIVE) r_.n = _mm_set_pi32(e1, e0); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) const int32_t v[sizeof(r_.i32) / sizeof(r_.i32[0])] = { e0, e1 }; r_.neon_i32 = vld1_s32(v); #else r_.i32[0] = e0; r_.i32[1] = e1; #endif return simde__m64_from_private(r_); } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_set_pi32(e1, e0) simde_mm_set_pi32(e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_x_mm_set_pi64 (int64_t e0) { simde__m64_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const int64_t v[sizeof(r_.i64) / sizeof(r_.i64[0])] = { e0 }; r_.neon_i64 = vld1_s64(v); #else r_.i64[0] = e0; #endif return simde__m64_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_x_mm_set_f32x2 (simde_float32 e1, simde_float32 e0) { simde__m64_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const simde_float32 v[sizeof(r_.f32) / sizeof(r_.f32[0])] = { e0, e1 }; r_.neon_f32 = vld1_f32(v); #else r_.f32[0] = e0; r_.f32[1] = e1; #endif return simde__m64_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_set1_pi8 (int8_t a) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_set1_pi8(a); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde__m64_private r_; r_.neon_i8 = vmov_n_s8(a); return simde__m64_from_private(r_); #else return simde_mm_set_pi8(a, a, a, a, a, a, a, a); #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_set1_pi8(a) simde_mm_set1_pi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_set1_pi16 (int16_t a) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_set1_pi16(a); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde__m64_private r_; r_.neon_i16 = vmov_n_s16(a); return simde__m64_from_private(r_); #else return simde_mm_set_pi16(a, a, a, a); #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_set1_pi16(a) simde_mm_set1_pi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_set1_pi32 (int32_t a) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_set1_pi32(a); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde__m64_private r_; r_.neon_i32 = vmov_n_s32(a); return simde__m64_from_private(r_); #else return simde_mm_set_pi32(a, a); #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_set1_pi32(a) simde_mm_set1_pi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_setr_pi8 (int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0); #else return simde_mm_set_pi8(e0, e1, e2, e3, e4, e5, e6, e7); #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_pi8(e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_setr_pi16 (int16_t e3, int16_t e2, int16_t e1, int16_t e0) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_setr_pi16(e3, e2, e1, e0); #else return simde_mm_set_pi16(e0, e1, e2, e3); #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_setr_pi16(e3, e2, e1, e0) simde_mm_setr_pi16(e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_setr_pi32 (int32_t e1, int32_t e0) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_setr_pi32(e1, e0); #else return simde_mm_set_pi32(e0, e1); #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_setr_pi32(e1, e0) simde_mm_setr_pi32(e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_setzero_si64 (void) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_setzero_si64(); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) simde__m64_private r_; r_.neon_u32 = vmov_n_u32(0); return simde__m64_from_private(r_); #else return simde_mm_set_pi32(0, 0); #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_setzero_si64() simde_mm_setzero_si64() #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_x_mm_load_si64 (const void* mem_addr) { simde__m64 r; simde_memcpy(&r, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_x_mm_loadu_si64 (const void* mem_addr) { simde__m64 r; simde_memcpy(&r, mem_addr, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES void simde_x_mm_store_si64 (void* mem_addr, simde__m64 value) { simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m64), &value, sizeof(value)); } SIMDE_FUNCTION_ATTRIBUTES void simde_x_mm_storeu_si64 (void* mem_addr, simde__m64 value) { simde_memcpy(mem_addr, &value, sizeof(value)); } SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_x_mm_setone_si64 (void) { return simde_mm_set1_pi32(~INT32_C(0)); } SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sll_pi16 (simde__m64 a, simde__m64 count) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_sll_pi16(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private count_ = simde__m64_to_private(count); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) HEDLEY_DIAGNOSTIC_PUSH #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) #pragma clang diagnostic ignored "-Wvector-conversion" #endif r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); HEDLEY_DIAGNOSTIC_POP #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) return simde_mm_setzero_si64(); r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count_.u64[0]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i16 = a_.i16 << count_.u64[0]; #else if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { simde_memset(&r_, 0, sizeof(r_)); return simde__m64_from_private(r_); } SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count_.u64[0]); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psllw(a, count) simde_mm_sll_pi16(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_sll_pi16(a, count) simde_mm_sll_pi16(a, count) # define _m_psllw(a, count) simde_mm_sll_pi16(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sll_pi32 (simde__m64 a, simde__m64 count) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_sll_pi32(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private count_ = simde__m64_to_private(count); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) HEDLEY_DIAGNOSTIC_PUSH #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) #pragma clang diagnostic ignored "-Wvector-conversion" #endif r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); HEDLEY_DIAGNOSTIC_POP #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i32 = a_.i32 << count_.u64[0]; #else if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { simde_memset(&r_, 0, sizeof(r_)); return simde__m64_from_private(r_); } SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] << count_.u64[0]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pslld(a, count) simde_mm_sll_pi32(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_sll_pi32(a, count) simde_mm_sll_pi32(a, count) # define _m_pslld(a, count) simde_mm_sll_pi32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_slli_pi16 (simde__m64 a, int count) { #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) return _mm_slli_pi16(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = psllh_s(a_.mmi_i16, count); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) if (HEDLEY_UNLIKELY(count > 15)) return simde_mm_setzero_si64(); r_.i16 = a_.i16 << HEDLEY_STATIC_CAST(int16_t, count); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i16 = a_.i16 << count; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16((int16_t) count)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, a_.u16[i] << count); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psllwi(a, count) simde_mm_slli_pi16(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_slli_pi16(a, count) simde_mm_slli_pi16(a, count) # define _m_psllwi(a, count) simde_mm_slli_pi16(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_slli_pi32 (simde__m64 a, int count) { #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) return _mm_slli_pi32(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i32 = a_.i32 << count; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32((int32_t) count)); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = psllw_s(a_.mmi_i32, b_.mmi_i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] << count; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pslldi(a, b) simde_mm_slli_pi32(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_slli_pi32(a, count) simde_mm_slli_pi32(a, count) # define _m_pslldi(a, count) simde_mm_slli_pi32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_slli_si64 (simde__m64 a, int count) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_slli_si64(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i64 = a_.i64 << count; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vshl_s64(a_.neon_i64, vmov_n_s64((int64_t) count)); #else r_.u64[0] = a_.u64[0] << count; #endif return simde__m64_from_private(r_); #endif } #define simde_m_psllqi(a, count) simde_mm_slli_si64(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_slli_si64(a, count) simde_mm_slli_si64(a, count) # define _m_psllqi(a, count) simde_mm_slli_si64(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sll_si64 (simde__m64 a, simde__m64 count) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_sll_si64(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private count_ = simde__m64_to_private(count); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vshl_s64(a_.neon_i64, count_.neon_i64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 << count_.i64; #else if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { simde_memset(&r_, 0, sizeof(r_)); return simde__m64_from_private(r_); } r_.u64[0] = a_.u64[0] << count_.u64[0]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_psllq(a, count) simde_mm_sll_si64(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_sll_si64(a, count) simde_mm_sll_si64(a, count) # define _m_psllq(a, count) simde_mm_sll_si64(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_srl_pi16 (simde__m64 a, simde__m64 count) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_srl_pi16(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private count_ = simde__m64_to_private(count); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_BUG_CLANG_POWER9_16x4_BAD_SHIFT) if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) return simde_mm_setzero_si64(); r_.u16 = a_.u16 >> HEDLEY_STATIC_CAST(uint16_t, count_.u64[0]); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u16 = a_.u16 >> count_.u64[0]; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) vget_lane_u64(count_.neon_u64, 0)))); #else if (HEDLEY_UNLIKELY(count_.u64[0] > 15)) { simde_memset(&r_, 0, sizeof(r_)); return simde__m64_from_private(r_); } SIMDE_VECTORIZE for (size_t i = 0 ; i < sizeof(r_.u16) / sizeof(r_.u16[0]) ; i++) { r_.u16[i] = a_.u16[i] >> count_.u64[0]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psrlw(a, count) simde_mm_srl_pi16(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_srl_pi16(a, count) simde_mm_srl_pi16(a, count) # define _m_psrlw(a, count) simde_mm_srl_pi16(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_srl_pi32 (simde__m64 a, simde__m64 count) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_srl_pi32(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private count_ = simde__m64_to_private(count); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u32 = a_.u32 >> count_.u64[0]; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) vget_lane_u64(count_.neon_u64, 0)))); #else if (HEDLEY_UNLIKELY(count_.u64[0] > 31)) { simde_memset(&r_, 0, sizeof(r_)); return simde__m64_from_private(r_); } SIMDE_VECTORIZE for (size_t i = 0 ; i < sizeof(r_.u32) / sizeof(r_.u32[0]) ; i++) { r_.u32[i] = a_.u32[i] >> count_.u64[0]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psrld(a, count) simde_mm_srl_pi32(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_srl_pi32(a, count) simde_mm_srl_pi32(a, count) # define _m_psrld(a, count) simde_mm_srl_pi32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_srli_pi16 (simde__m64 a, int count) { #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) return _mm_srli_pi16(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u16 = a_.u16 >> count; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vshl_u16(a_.neon_u16, vmov_n_s16(-((int16_t) count))); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = psrlh_s(a_.mmi_i16, b_.mmi_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = a_.u16[i] >> count; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psrlwi(a, count) simde_mm_srli_pi16(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_srli_pi16(a, count) simde_mm_srli_pi16(a, count) # define _m_psrlwi(a, count) simde_mm_srli_pi16(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_srli_pi32 (simde__m64 a, int count) { #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) return _mm_srli_pi32(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u32 = a_.u32 >> count; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vshl_u32(a_.neon_u32, vmov_n_s32(-((int32_t) count))); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = psrlw_s(a_.mmi_i32, b_.mmi_i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] >> count; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psrldi(a, count) simde_mm_srli_pi32(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_srli_pi32(a, count) simde_mm_srli_pi32(a, count) # define _m_psrldi(a, count) simde_mm_srli_pi32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_srli_si64 (simde__m64 a, int count) { #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) return _mm_srli_si64(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u64 = vshl_u64(a_.neon_u64, vmov_n_s64(-count)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u64 = a_.u64 >> count; #else r_.u64[0] = a_.u64[0] >> count; #endif return simde__m64_from_private(r_); #endif } #define simde_m_psrlqi(a, count) simde_mm_srli_si64(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_srli_si64(a, count) simde_mm_srli_si64(a, count) # define _m_psrlqi(a, count) simde_mm_srli_si64(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_srl_si64 (simde__m64 a, simde__m64 count) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_srl_si64(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private count_ = simde__m64_to_private(count); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vshl_u64(a_.neon_u64, vneg_s64(count_.neon_i64)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u64 = a_.u64 >> count_.u64; #else if (HEDLEY_UNLIKELY(count_.u64[0] > 63)) { simde_memset(&r_, 0, sizeof(r_)); return simde__m64_from_private(r_); } r_.u64[0] = a_.u64[0] >> count_.u64[0]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_psrlq(a, count) simde_mm_srl_si64(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_srl_si64(a, count) simde_mm_srl_si64(a, count) # define _m_psrlq(a, count) simde_mm_srl_si64(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_srai_pi16 (simde__m64 a, int count) { #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) return _mm_srai_pi16(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i16 = a_.i16 >> (count & 0xff); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, count))); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = psrah_s(a_.mmi_i16, count); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] >> (count & 0xff); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psrawi(a, count) simde_mm_srai_pi16(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_srai_pi16(a, count) simde_mm_srai_pi16(a, count) # define _m_psrawi(a, count) simde_mm_srai_pi16(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_srai_pi32 (simde__m64 a, int count) { #if defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) return _mm_srai_pi32(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i32 = a_.i32 >> (count & 0xff); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, count))); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = psraw_s(a_.mmi_i32, count); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] >> (count & 0xff); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psradi(a, count) simde_mm_srai_pi32(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_srai_pi32(a, count) simde_mm_srai_pi32(a, count) # define _m_psradi(a, count) simde_mm_srai_pi32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sra_pi16 (simde__m64 a, simde__m64 count) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_sra_pi16(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private count_ = simde__m64_to_private(count); const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i16 = a_.i16 >> cnt; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vshl_s16(a_.neon_i16, vmov_n_s16(-HEDLEY_STATIC_CAST(int16_t, vget_lane_u64(count_.neon_u64, 0)))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] >> cnt; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psraw(a, count) simde_mm_sra_pi16(a, count) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_sra_pi16(a, count) simde_mm_sra_pi16(a, count) # define _m_psraw(a, count) simde_mm_sra_pi16(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sra_pi32 (simde__m64 a, simde__m64 count) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_sra_pi32(a, count); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private count_ = simde__m64_to_private(count); const int32_t cnt = (count_.u64[0] > 31) ? 31 : HEDLEY_STATIC_CAST(int32_t, count_.u64[0]); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i32 = a_.i32 >> cnt; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vshl_s32(a_.neon_i32, vmov_n_s32(-HEDLEY_STATIC_CAST(int32_t, vget_lane_u64(count_.neon_u64, 0)))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] >> cnt; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psrad(a, b) simde_mm_sra_pi32(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_sra_pi32(a, count) simde_mm_sra_pi32(a, count) # define _m_psrad(a, count) simde_mm_sra_pi32(a, count) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sub_pi8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_sub_pi8(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vsub_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i8 = psubb_s(a_.mmi_i8, b_.mmi_i8); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 - b_.i8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i] - b_.i8[i]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psubb(a, b) simde_mm_sub_pi8(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_sub_pi8(a, b) simde_mm_sub_pi8(a, b) # define _m_psubb(a, b) simde_mm_sub_pi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sub_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_sub_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vsub_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = psubh_s(a_.mmi_i16, b_.mmi_i16); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 - b_.i16; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] - b_.i16[i]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psubw(a, b) simde_mm_sub_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_sub_pi16(a, b) simde_mm_sub_pi16(a, b) # define _m_psubw(a, b) simde_mm_sub_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sub_pi32 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_sub_pi32(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vsub_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = psubw_s(a_.mmi_i32, b_.mmi_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 - b_.i32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] - b_.i32[i]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psubd(a, b) simde_mm_sub_pi32(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_sub_pi32(a, b) simde_mm_sub_pi32(a, b) # define _m_psubd(a, b) simde_mm_sub_pi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_subs_pi8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_subs_pi8(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vqsub_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i8 = psubsb(a_.mmi_i8, b_.mmi_i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { if (((b_.i8[i]) > 0 && (a_.i8[i]) < INT8_MIN + (b_.i8[i]))) { r_.i8[i] = INT8_MIN; } else if ((b_.i8[i]) < 0 && (a_.i8[i]) > INT8_MAX + (b_.i8[i])) { r_.i8[i] = INT8_MAX; } else { r_.i8[i] = (a_.i8[i]) - (b_.i8[i]); } } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psubsb(a, b) simde_mm_subs_pi8(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_subs_pi8(a, b) simde_mm_subs_pi8(a, b) # define _m_psubsb(a, b) simde_mm_subs_pi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_subs_pu8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_subs_pu8(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vqsub_u8(a_.neon_u8, b_.neon_u8); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_u8 = psubusb(a_.mmi_u8, b_.mmi_u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { const int32_t x = a_.u8[i] - b_.u8[i]; if (x < 0) { r_.u8[i] = 0; } else if (x > UINT8_MAX) { r_.u8[i] = UINT8_MAX; } else { r_.u8[i] = HEDLEY_STATIC_CAST(uint8_t, x); } } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psubusb(a, b) simde_mm_subs_pu8(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_subs_pu8(a, b) simde_mm_subs_pu8(a, b) # define _m_psubusb(a, b) simde_mm_subs_pu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_subs_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_subs_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vqsub_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = psubsh(a_.mmi_i16, b_.mmi_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { if (((b_.i16[i]) > 0 && (a_.i16[i]) < SHRT_MIN + (b_.i16[i]))) { r_.i16[i] = SHRT_MIN; } else if ((b_.i16[i]) < 0 && (a_.i16[i]) > INT16_MAX + (b_.i16[i])) { r_.i16[i] = INT16_MAX; } else { r_.i16[i] = (a_.i16[i]) - (b_.i16[i]); } } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psubsw(a, b) simde_mm_subs_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_subs_pi16(a, b) simde_mm_subs_pi16(a, b) # define _m_psubsw(a, b) simde_mm_subs_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_subs_pu16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_subs_pu16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vqsub_u16(a_.neon_u16, b_.neon_u16); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_u16 = psubush(a_.mmi_u16, b_.mmi_u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { const int x = a_.u16[i] - b_.u16[i]; if (x < 0) { r_.u16[i] = 0; } else if (x > UINT16_MAX) { r_.u16[i] = UINT16_MAX; } else { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, x); } } #endif return simde__m64_from_private(r_); #endif } #define simde_m_psubusw(a, b) simde_mm_subs_pu16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_subs_pu16(a, b) simde_mm_subs_pu16(a, b) # define _m_psubusw(a, b) simde_mm_subs_pu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_unpackhi_pi8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_unpackhi_pi8(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i8 = vzip2_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i8 = punpckhbh_s(a_.mmi_i8, b_.mmi_i8); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 4, 12, 5, 13, 6, 14, 7, 15); #else r_.i8[0] = a_.i8[4]; r_.i8[1] = b_.i8[4]; r_.i8[2] = a_.i8[5]; r_.i8[3] = b_.i8[5]; r_.i8[4] = a_.i8[6]; r_.i8[5] = b_.i8[6]; r_.i8[6] = a_.i8[7]; r_.i8[7] = b_.i8[7]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_unpackhi_pi8(a, b) simde_mm_unpackhi_pi8(a, b) # define _m_punpckhbw(a, b) simde_mm_unpackhi_pi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_unpackhi_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_unpackhi_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i16 = vzip2_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = punpckhhw_s(a_.mmi_i16, b_.mmi_i16); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 2, 6, 3, 7); #else r_.i16[0] = a_.i16[2]; r_.i16[1] = b_.i16[2]; r_.i16[2] = a_.i16[3]; r_.i16[3] = b_.i16[3]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_unpackhi_pi16(a, b) simde_mm_unpackhi_pi16(a, b) # define _m_punpckhwd(a, b) simde_mm_unpackhi_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_unpackhi_pi32 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_unpackhi_pi32(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i32 = vzip2_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = punpckhwd_s(a_.mmi_i32, b_.mmi_i32); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 1, 3); #else r_.i32[0] = a_.i32[1]; r_.i32[1] = b_.i32[1]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_unpackhi_pi32(a, b) simde_mm_unpackhi_pi32(a, b) # define _m_punpckhdq(a, b) simde_mm_unpackhi_pi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_unpacklo_pi8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_unpacklo_pi8(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i8 = vzip1_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i8 = punpcklbh_s(a_.mmi_i8, b_.mmi_i8); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 8, a_.i8, b_.i8, 0, 8, 1, 9, 2, 10, 3, 11); #else r_.i8[0] = a_.i8[0]; r_.i8[1] = b_.i8[0]; r_.i8[2] = a_.i8[1]; r_.i8[3] = b_.i8[1]; r_.i8[4] = a_.i8[2]; r_.i8[5] = b_.i8[2]; r_.i8[6] = a_.i8[3]; r_.i8[7] = b_.i8[3]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_unpacklo_pi8(a, b) simde_mm_unpacklo_pi8(a, b) # define _m_punpcklbw(a, b) simde_mm_unpacklo_pi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_unpacklo_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_unpacklo_pi16(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i16 = vzip1_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i16 = punpcklhw_s(a_.mmi_i16, b_.mmi_i16); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 8, a_.i16, b_.i16, 0, 4, 1, 5); #else r_.i16[0] = a_.i16[0]; r_.i16[1] = b_.i16[0]; r_.i16[2] = a_.i16[1]; r_.i16[3] = b_.i16[1]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_unpacklo_pi16(a, b) simde_mm_unpacklo_pi16(a, b) # define _m_punpcklwd(a, b) simde_mm_unpacklo_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_unpacklo_pi32 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_unpacklo_pi32(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i32 = vzip1_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_MIPS_LOONGSON_MMI_NATIVE) r_.mmi_i32 = punpcklwd_s(a_.mmi_i32, b_.mmi_i32); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 8, a_.i32, b_.i32, 0, 2); #else r_.i32[0] = a_.i32[0]; r_.i32[1] = b_.i32[0]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_unpacklo_pi32(a, b) simde_mm_unpacklo_pi32(a, b) # define _m_punpckldq(a, b) simde_mm_unpacklo_pi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_xor_si64 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_MMX_NATIVE) return _mm_xor_si64(a, b); #else simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = veor_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f ^ b_.i32f; #else r_.u64[0] = a_.u64[0] ^ b_.u64[0]; #endif return simde__m64_from_private(r_); #endif } #define simde_m_pxor(a, b) simde_mm_xor_si64(a, b) #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _mm_xor_si64(a, b) simde_mm_xor_si64(a, b) # define _m_pxor(a, b) simde_mm_xor_si64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_m_to_int (simde__m64 a) { #if defined(SIMDE_X86_MMX_NATIVE) return _m_to_int(a); #else simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) HEDLEY_DIAGNOSTIC_PUSH #if HEDLEY_HAS_WARNING("-Wvector-conversion") && SIMDE_DETECT_CLANG_VERSION_NOT(10,0,0) #pragma clang diagnostic ignored "-Wvector-conversion" #endif return vget_lane_s32(a_.neon_i32, 0); HEDLEY_DIAGNOSTIC_POP #else return a_.i32[0]; #endif #endif } #if defined(SIMDE_X86_MMX_ENABLE_NATIVE_ALIASES) # define _m_to_int(a) simde_m_to_int(a) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_MMX_H) */ /* :: End x86/mmx.h :: */ #if defined(_WIN32) && !defined(SIMDE_X86_SSE_NATIVE) && defined(_MSC_VER) #define NOMINMAX #include #endif #if defined(__ARM_ACLE) #include #endif HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ typedef union { #if defined(SIMDE_VECTOR_SUBSCRIPT) SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; #if defined(SIMDE_HAVE_INT128_) SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; #endif SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; #else SIMDE_ALIGN_TO_16 int8_t i8[16]; SIMDE_ALIGN_TO_16 int16_t i16[8]; SIMDE_ALIGN_TO_16 int32_t i32[4]; SIMDE_ALIGN_TO_16 int64_t i64[2]; SIMDE_ALIGN_TO_16 uint8_t u8[16]; SIMDE_ALIGN_TO_16 uint16_t u16[8]; SIMDE_ALIGN_TO_16 uint32_t u32[4]; SIMDE_ALIGN_TO_16 uint64_t u64[2]; #if defined(SIMDE_HAVE_INT128_) SIMDE_ALIGN_TO_16 simde_int128 i128[1]; SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; #endif SIMDE_ALIGN_TO_16 simde_float32 f32[4]; SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; #endif SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; SIMDE_ALIGN_TO_16 simde__m64 m64[2]; #if defined(SIMDE_X86_SSE_NATIVE) SIMDE_ALIGN_TO_16 __m128 n; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_TO_16 int8x16_t neon_i8; SIMDE_ALIGN_TO_16 int16x8_t neon_i16; SIMDE_ALIGN_TO_16 int32x4_t neon_i32; SIMDE_ALIGN_TO_16 int64x2_t neon_i64; SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; SIMDE_ALIGN_TO_16 float32x4_t neon_f32; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) SIMDE_ALIGN_TO_16 float64x2_t neon_f64; #endif #elif defined(SIMDE_MIPS_MSA_NATIVE) v16i8 msa_i8; v8i16 msa_i16; v4i32 msa_i32; v2i64 msa_i64; v16u8 msa_u8; v8u16 msa_u16; v4u32 msa_u32; v2u64 msa_u64; #elif defined(SIMDE_WASM_SIMD128_NATIVE) SIMDE_ALIGN_TO_16 v128_t wasm_v128; #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; #endif #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) v16i8 lsx_i8; v8i16 lsx_i16; v4i32 lsx_i32; v2i64 lsx_i64; v16u8 lsx_u8; v8u16 lsx_u16; v4u32 lsx_u32; v2u64 lsx_u64; v4f32 lsx_f32; v2f64 lsx_f64; #endif } simde__m128_private; #if defined(SIMDE_X86_SSE_NATIVE) typedef __m128 simde__m128; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) typedef float32x4_t simde__m128; #elif defined(SIMDE_WASM_SIMD128_NATIVE) typedef v128_t simde__m128; #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128; #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) typedef v4f32 simde__m128; #elif defined(SIMDE_VECTOR_SUBSCRIPT) typedef simde_float32 simde__m128 SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; #else typedef simde__m128_private simde__m128; #endif #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) typedef simde__m128 __m128; #endif HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128), "simde__m128 size incorrect"); HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128_private), "simde__m128_private size incorrect"); #if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128) == 16, "simde__m128 is not 16-byte aligned"); HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128_private) == 16, "simde__m128_private is not 16-byte aligned"); #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde__m128_from_private(simde__m128_private v) { simde__m128 r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m128_private simde__m128_to_private(simde__m128 v) { simde__m128_private r; simde_memcpy(&r, &v, sizeof(r)); return r; } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int8x16_t, neon, i8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int16x8_t, neon, i16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int32x4_t, neon, i32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, int64x2_t, neon, i64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint8x16_t, neon, u8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint16x8_t, neon, u16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint32x4_t, neon, u32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, uint64x2_t, neon, u64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float32x4_t, neon, f32) #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, float64x2_t, neon, f64) #endif #endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) #if defined(SIMDE_BUG_GCC_95782) SIMDE_FUNCTION_ATTRIBUTES SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128_to_altivec_f32(simde__m128 value) { simde__m128_private r_ = simde__m128_to_private(value); return r_.altivec_f32; } SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde__m128_from_altivec_f32(SIMDE_POWER_ALTIVEC_VECTOR(float) value) { simde__m128_private r_; r_.altivec_f32 = value; return simde__m128_from_private(r_); } #else SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(float), altivec, f32) #endif #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) #endif #elif defined(SIMDE_WASM_SIMD128_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v128_t, wasm, v128); #endif /* defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) */ #if defined(SIMDE_LOONGARCH_LSX_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16i8, lsx, i8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8i16, lsx, i16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4i32, lsx, i32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2i64, lsx, i64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v16u8, lsx, u8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v8u16, lsx, u16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4u32, lsx, u32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2u64, lsx, u64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v4f32, lsx, f32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128, v2f64, lsx, f64) #endif /* defined(SIMDE_LOONGARCH_LSX_NATIVE) */ enum { #if defined(SIMDE_X86_SSE_NATIVE) SIMDE_MM_ROUND_NEAREST = _MM_ROUND_NEAREST, SIMDE_MM_ROUND_DOWN = _MM_ROUND_DOWN, SIMDE_MM_ROUND_UP = _MM_ROUND_UP, SIMDE_MM_ROUND_TOWARD_ZERO = _MM_ROUND_TOWARD_ZERO #else SIMDE_MM_ROUND_NEAREST = 0x0000, SIMDE_MM_ROUND_DOWN = 0x2000, SIMDE_MM_ROUND_UP = 0x4000, SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000 #endif }; #if defined(_MM_FROUND_TO_NEAREST_INT) # define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT # define SIMDE_MM_FROUND_TO_NEG_INF _MM_FROUND_TO_NEG_INF # define SIMDE_MM_FROUND_TO_POS_INF _MM_FROUND_TO_POS_INF # define SIMDE_MM_FROUND_TO_ZERO _MM_FROUND_TO_ZERO # define SIMDE_MM_FROUND_CUR_DIRECTION _MM_FROUND_CUR_DIRECTION # define SIMDE_MM_FROUND_RAISE_EXC _MM_FROUND_RAISE_EXC # define SIMDE_MM_FROUND_NO_EXC _MM_FROUND_NO_EXC #else # define SIMDE_MM_FROUND_TO_NEAREST_INT 0x00 # define SIMDE_MM_FROUND_TO_NEG_INF 0x01 # define SIMDE_MM_FROUND_TO_POS_INF 0x02 # define SIMDE_MM_FROUND_TO_ZERO 0x03 # define SIMDE_MM_FROUND_CUR_DIRECTION 0x04 # define SIMDE_MM_FROUND_RAISE_EXC 0x00 # define SIMDE_MM_FROUND_NO_EXC 0x08 #endif #define SIMDE_MM_FROUND_NINT \ (SIMDE_MM_FROUND_TO_NEAREST_INT | SIMDE_MM_FROUND_RAISE_EXC) #define SIMDE_MM_FROUND_FLOOR \ (SIMDE_MM_FROUND_TO_NEG_INF | SIMDE_MM_FROUND_RAISE_EXC) #define SIMDE_MM_FROUND_CEIL \ (SIMDE_MM_FROUND_TO_POS_INF | SIMDE_MM_FROUND_RAISE_EXC) #define SIMDE_MM_FROUND_TRUNC \ (SIMDE_MM_FROUND_TO_ZERO | SIMDE_MM_FROUND_RAISE_EXC) #define SIMDE_MM_FROUND_RINT \ (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_RAISE_EXC) #define SIMDE_MM_FROUND_NEARBYINT \ (SIMDE_MM_FROUND_CUR_DIRECTION | SIMDE_MM_FROUND_NO_EXC) #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) && !defined(_MM_FROUND_TO_NEAREST_INT) # define _MM_FROUND_TO_NEAREST_INT SIMDE_MM_FROUND_TO_NEAREST_INT # define _MM_FROUND_TO_NEG_INF SIMDE_MM_FROUND_TO_NEG_INF # define _MM_FROUND_TO_POS_INF SIMDE_MM_FROUND_TO_POS_INF # define _MM_FROUND_TO_ZERO SIMDE_MM_FROUND_TO_ZERO # define _MM_FROUND_CUR_DIRECTION SIMDE_MM_FROUND_CUR_DIRECTION # define _MM_FROUND_RAISE_EXC SIMDE_MM_FROUND_RAISE_EXC # define _MM_FROUND_NINT SIMDE_MM_FROUND_NINT # define _MM_FROUND_FLOOR SIMDE_MM_FROUND_FLOOR # define _MM_FROUND_CEIL SIMDE_MM_FROUND_CEIL # define _MM_FROUND_TRUNC SIMDE_MM_FROUND_TRUNC # define _MM_FROUND_RINT SIMDE_MM_FROUND_RINT # define _MM_FROUND_NEARBYINT SIMDE_MM_FROUND_NEARBYINT #endif #if defined(_MM_EXCEPT_INVALID) # define SIMDE_MM_EXCEPT_INVALID _MM_EXCEPT_INVALID #else # define SIMDE_MM_EXCEPT_INVALID (0x0001) #endif #if defined(_MM_EXCEPT_DENORM) # define SIMDE_MM_EXCEPT_DENORM _MM_EXCEPT_DENORM #else # define SIMDE_MM_EXCEPT_DENORM (0x0002) #endif #if defined(_MM_EXCEPT_DIV_ZERO) # define SIMDE_MM_EXCEPT_DIV_ZERO _MM_EXCEPT_DIV_ZERO #else # define SIMDE_MM_EXCEPT_DIV_ZERO (0x0004) #endif #if defined(_MM_EXCEPT_OVERFLOW) # define SIMDE_MM_EXCEPT_OVERFLOW _MM_EXCEPT_OVERFLOW #else # define SIMDE_MM_EXCEPT_OVERFLOW (0x0008) #endif #if defined(_MM_EXCEPT_UNDERFLOW) # define SIMDE_MM_EXCEPT_UNDERFLOW _MM_EXCEPT_UNDERFLOW #else # define SIMDE_MM_EXCEPT_UNDERFLOW (0x0010) #endif #if defined(_MM_EXCEPT_INEXACT) # define SIMDE_MM_EXCEPT_INEXACT _MM_EXCEPT_INEXACT #else # define SIMDE_MM_EXCEPT_INEXACT (0x0020) #endif #if defined(_MM_EXCEPT_MASK) # define SIMDE_MM_EXCEPT_MASK _MM_EXCEPT_MASK #else # define SIMDE_MM_EXCEPT_MASK \ (SIMDE_MM_EXCEPT_INVALID | SIMDE_MM_EXCEPT_DENORM | \ SIMDE_MM_EXCEPT_DIV_ZERO | SIMDE_MM_EXCEPT_OVERFLOW | \ SIMDE_MM_EXCEPT_UNDERFLOW | SIMDE_MM_EXCEPT_INEXACT) #endif #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #define _MM_EXCEPT_INVALID SIMDE_MM_EXCEPT_INVALID #define _MM_EXCEPT_DENORM SIMDE_MM_EXCEPT_DENORM #define _MM_EXCEPT_DIV_ZERO SIMDE_MM_EXCEPT_DIV_ZERO #define _MM_EXCEPT_OVERFLOW SIMDE_MM_EXCEPT_OVERFLOW #define _MM_EXCEPT_UNDERFLOW SIMDE_MM_EXCEPT_UNDERFLOW #define _MM_EXCEPT_INEXACT SIMDE_MM_EXCEPT_INEXACT #define _MM_EXCEPT_MASK SIMDE_MM_EXCEPT_MASK #endif #if defined(_MM_MASK_INVALID) # define SIMDE_MM_MASK_INVALID _MM_MASK_INVALID #else # define SIMDE_MM_MASK_INVALID (0x0080) #endif #if defined(_MM_MASK_DENORM) # define SIMDE_MM_MASK_DENORM _MM_MASK_DENORM #else # define SIMDE_MM_MASK_DENORM (0x0100) #endif #if defined(_MM_MASK_DIV_ZERO) # define SIMDE_MM_MASK_DIV_ZERO _MM_MASK_DIV_ZERO #else # define SIMDE_MM_MASK_DIV_ZERO (0x0200) #endif #if defined(_MM_MASK_OVERFLOW) # define SIMDE_MM_MASK_OVERFLOW _MM_MASK_OVERFLOW #else # define SIMDE_MM_MASK_OVERFLOW (0x0400) #endif #if defined(_MM_MASK_UNDERFLOW) # define SIMDE_MM_MASK_UNDERFLOW _MM_MASK_UNDERFLOW #else # define SIMDE_MM_MASK_UNDERFLOW (0x0800) #endif #if defined(_MM_MASK_INEXACT) # define SIMDE_MM_MASK_INEXACT _MM_MASK_INEXACT #else # define SIMDE_MM_MASK_INEXACT (0x1000) #endif #if defined(_MM_MASK_MASK) # define SIMDE_MM_MASK_MASK _MM_MASK_MASK #else # define SIMDE_MM_MASK_MASK \ (SIMDE_MM_MASK_INVALID | SIMDE_MM_MASK_DENORM | \ SIMDE_MM_MASK_DIV_ZERO | SIMDE_MM_MASK_OVERFLOW | \ SIMDE_MM_MASK_UNDERFLOW | SIMDE_MM_MASK_INEXACT) #endif #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #define _MM_MASK_INVALID SIMDE_MM_MASK_INVALID #define _MM_MASK_DENORM SIMDE_MM_MASK_DENORM #define _MM_MASK_DIV_ZERO SIMDE_MM_MASK_DIV_ZERO #define _MM_MASK_OVERFLOW SIMDE_MM_MASK_OVERFLOW #define _MM_MASK_UNDERFLOW SIMDE_MM_MASK_UNDERFLOW #define _MM_MASK_INEXACT SIMDE_MM_MASK_INEXACT #define _MM_MASK_MASK SIMDE_MM_MASK_MASK #endif #if defined(_MM_FLUSH_ZERO_MASK) # define SIMDE_MM_FLUSH_ZERO_MASK _MM_FLUSH_ZERO_MASK #else # define SIMDE_MM_FLUSH_ZERO_MASK (0x8000) #endif #if defined(_MM_FLUSH_ZERO_ON) # define SIMDE_MM_FLUSH_ZERO_ON _MM_FLUSH_ZERO_ON #else # define SIMDE_MM_FLUSH_ZERO_ON (0x8000) #endif #if defined(_MM_FLUSH_ZERO_OFF) # define SIMDE_MM_FLUSH_ZERO_OFF _MM_FLUSH_ZERO_OFF #else # define SIMDE_MM_FLUSH_ZERO_OFF (0x0000) #endif #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #define _MM_FLUSH_ZERO_MASK SIMDE_MM_FLUSH_ZERO_MASK #define _MM_FLUSH_ZERO_ON SIMDE_MM_FLUSH_ZERO_ON #define _MM_FLUSH_ZERO_OFF SIMDE_MM_FLUSH_ZERO_OFF #endif SIMDE_FUNCTION_ATTRIBUTES unsigned int SIMDE_MM_GET_ROUNDING_MODE(void) { #if defined(SIMDE_X86_SSE_NATIVE) return _MM_GET_ROUNDING_MODE(); #elif defined(SIMDE_HAVE_FENV_H) unsigned int vfe_mode; switch (fegetround()) { #if defined(FE_TONEAREST) case FE_TONEAREST: vfe_mode = SIMDE_MM_ROUND_NEAREST; break; #endif #if defined(FE_TOWARDZERO) case FE_TOWARDZERO: vfe_mode = SIMDE_MM_ROUND_DOWN; break; #endif #if defined(FE_UPWARD) case FE_UPWARD: vfe_mode = SIMDE_MM_ROUND_UP; break; #endif #if defined(FE_DOWNWARD) case FE_DOWNWARD: vfe_mode = SIMDE_MM_ROUND_TOWARD_ZERO; break; #endif default: vfe_mode = SIMDE_MM_ROUND_NEAREST; break; } return vfe_mode; #else return SIMDE_MM_ROUND_NEAREST; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #define _MM_GET_ROUNDING_MODE() SIMDE_MM_GET_ROUNDING_MODE() #endif SIMDE_FUNCTION_ATTRIBUTES void SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) { #if defined(SIMDE_X86_SSE_NATIVE) _MM_SET_ROUNDING_MODE(a); #elif defined(SIMDE_HAVE_FENV_H) int fe_mode = FE_TONEAREST; switch (a) { #if defined(FE_TONEAREST) case SIMDE_MM_ROUND_NEAREST: fe_mode = FE_TONEAREST; break; #endif #if defined(FE_TOWARDZERO) case SIMDE_MM_ROUND_TOWARD_ZERO: fe_mode = FE_TOWARDZERO; break; #endif #if defined(FE_DOWNWARD) case SIMDE_MM_ROUND_DOWN: fe_mode = FE_DOWNWARD; break; #endif #if defined(FE_UPWARD) case SIMDE_MM_ROUND_UP: fe_mode = FE_UPWARD; break; #endif default: return; } fesetround(fe_mode); #else (void) a; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #define _MM_SET_ROUNDING_MODE(a) SIMDE_MM_SET_ROUNDING_MODE(a) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t SIMDE_MM_GET_FLUSH_ZERO_MODE (void) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_getcsr() & _MM_FLUSH_ZERO_MASK; #else return SIMDE_MM_FLUSH_ZERO_OFF; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) #endif SIMDE_FUNCTION_ATTRIBUTES void SIMDE_MM_SET_FLUSH_ZERO_MODE (uint32_t a) { #if defined(SIMDE_X86_SSE_NATIVE) _MM_SET_FLUSH_ZERO_MODE(a); #else (void) a; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a) #endif SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_mm_getcsr (void) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_getcsr(); #else return SIMDE_MM_GET_ROUNDING_MODE(); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #define _mm_getcsr() simde_mm_getcsr() #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_setcsr (uint32_t a) { #if defined(SIMDE_X86_SSE_NATIVE) _mm_setcsr(a); #else SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a)); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #define _mm_setcsr(a) simde_mm_setcsr(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding) SIMDE_REQUIRE_CONSTANT_RANGE(rounding, 0, 15) SIMDE_REQUIRE_CONSTANT_RANGE(lax_rounding, 0, 1) { simde__m128_private r_, a_ = simde__m128_to_private(a); (void) lax_rounding; /* For architectures which lack a current direction SIMD instruction. * * Note that NEON actually has a current rounding mode instruction, * but in ARMv8+ the rounding mode is ignored and nearest is always * used, so we treat ARMv7 as having a rounding mode but ARMv8 as * not. */ #if \ defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || \ defined(SIMDE_ARM_NEON_A32V8) if ((rounding & 7) == SIMDE_MM_FROUND_CUR_DIRECTION) rounding = HEDLEY_STATIC_CAST(int, SIMDE_MM_GET_ROUNDING_MODE()) << 13; #endif switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) { case SIMDE_MM_FROUND_CUR_DIRECTION: #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_round(a_.altivec_f32)); #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) r_.neon_f32 = vrndiq_f32(a_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); #elif defined(simde_math_nearbyintf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_nearbyintf(a_.f32[i]); } #else HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); #endif break; case SIMDE_MM_FROUND_TO_NEAREST_INT: #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_rint(a_.altivec_f32)); #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) r_.neon_f32 = vrndnq_f32(a_.neon_f32); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vfrintrne_s(a_.lsx_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_nearest(a_.wasm_v128); #elif defined(simde_math_roundevenf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_roundevenf(a_.f32[i]); } #else HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); #endif break; case SIMDE_MM_FROUND_TO_NEG_INF: #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_floor(a_.altivec_f32)); #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) r_.neon_f32 = vrndmq_f32(a_.neon_f32); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vfrintrm_s(a_.lsx_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_floor(a_.wasm_v128); #elif defined(simde_math_floorf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_floorf(a_.f32[i]); } #else HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); #endif break; case SIMDE_MM_FROUND_TO_POS_INF: #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_ceil(a_.altivec_f32)); #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) r_.neon_f32 = vrndpq_f32(a_.neon_f32); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vfrintrp_s(a_.lsx_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_ceil(a_.wasm_v128); #elif defined(simde_math_ceilf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_ceilf(a_.f32[i]); } #else HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); #endif break; case SIMDE_MM_FROUND_TO_ZERO: #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_trunc(a_.altivec_f32)); #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) r_.neon_f32 = vrndq_f32(a_.neon_f32); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vfrintrz_s(a_.lsx_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_trunc(a_.wasm_v128); #elif defined(simde_math_truncf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_truncf(a_.f32[i]); } #else HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); #endif break; default: HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd()); } return simde__m128_from_private(r_); } #if defined(SIMDE_X86_SSE4_1_NATIVE) #define simde_mm_round_ps(a, rounding) _mm_round_ps((a), (rounding)) #else #define simde_mm_round_ps(a, rounding) simde_x_mm_round_ps((a), (rounding), 0) #endif #if defined(SIMDE_X86_SSE4_1_ENABLE_NATIVE_ALIASES) #define _mm_round_ps(a, rounding) simde_mm_round_ps((a), (rounding)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_set_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_set_ps(e3, e2, e1, e0); #else simde__m128_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_TO_16 simde_float32 data[4] = { e0, e1, e2, e3 }; r_.neon_f32 = vld1q_f32(data); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_make(e0, e1, e2, e3); #else r_.f32[0] = e0; r_.f32[1] = e1; r_.f32[2] = e2; r_.f32[3] = e3; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_set_ps(e3, e2, e1, e0) simde_mm_set_ps(e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_set_ps1 (simde_float32 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_set_ps1(a); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdupq_n_f32(a); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) (void) a; return vec_splats(a); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) return (simde__m128)__lsx_vldrepl_w(&a, 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_splat(a); #else return simde_mm_set_ps(a, a, a, a); #endif } #define simde_mm_set1_ps(a) simde_mm_set_ps1(a) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_set_ps1(a) simde_mm_set_ps1(a) # define _mm_set1_ps(a) simde_mm_set1_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_move_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_move_ss(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(b_.neon_f32, 0), a_.neon_f32, 0); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) m = { ~0U, 0U, 0U, 0U }; r_.altivec_f32 = vec_sel(a_.altivec_f32, b_.altivec_f32, m); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_shuffle(b_.wasm_v128, a_.wasm_v128, 0, 1, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vextrins_w(a_.lsx_i64, b_.lsx_i64, 0); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 4, 1, 2, 3); #else r_.f32[0] = b_.f32[0]; r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_move_ss(a, b) simde_mm_move_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_broadcastlow_ps(simde__m128 a) { /* This function broadcasts the first element in the inpu vector to * all lanes. It is used to avoid generating spurious exceptions in * *_ss functions since there may be garbage in the upper lanes. */ #if defined(SIMDE_X86_SSE_NATIVE) return _mm_shuffle_ps(a, a, 0); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f32 = vdupq_laneq_f32(a_.neon_f32, 0); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = vec_splat(a_.altivec_f32, 0); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vreplvei_w(a_.lsx_i64, 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_splat(a_.f32[0]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[0]; } #endif return simde__m128_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_add_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_add_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vaddq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_add(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = vec_add(a_.altivec_f32, b_.altivec_f32); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_f32 = __lsx_vfadd_s(a_.lsx_f32, b_.lsx_f32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f32 = a_.f32 + b_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[i] + b_.f32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_add_ps(a, b) simde_mm_add_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_add_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_add_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_ss(a, simde_mm_add_ps(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_add_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32_t b0 = vgetq_lane_f32(b_.neon_f32, 0); float32x4_t value = vsetq_lane_f32(b0, vdupq_n_f32(0), 0); // the upper values in the result must be the remnants of . r_.neon_f32 = vaddq_f32(a_.neon_f32, value); #else r_.f32[0] = a_.f32[0] + b_.f32[0]; r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_add_ss(a, b) simde_mm_add_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_and_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_and_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vand_v(a_.lsx_i64, b_.lsx_i64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 & b_.i32; #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = vec_and(a_.altivec_f32, b_.altivec_f32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] & b_.i32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_and_ps(a, b) simde_mm_and_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_andnot_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_andnot_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) r_.altivec_f32 = vec_andc(b_.altivec_f32, a_.altivec_f32); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vandn_v(a_.lsx_i64, b_.lsx_i64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = ~a_.i32 & b_.i32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = ~(a_.i32[i]) & b_.i32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_andnot_ps(a, b) simde_mm_andnot_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_xor_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_xor_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vxor_v(a_.lsx_i64, b_.lsx_i64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f ^ b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] ^ b_.u32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_xor_ps(a, b) simde_mm_xor_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_or_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_or_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vor_v(a_.lsx_i64, b_.lsx_i64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f | b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] | b_.u32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_or_ps(a, b) simde_mm_or_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_not_ps(simde__m128 a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) __m128i ai = _mm_castps_si128(a); return _mm_castsi128_ps(_mm_ternarylogic_epi32(ai, ai, ai, 0x55)); #elif defined(SIMDE_X86_SSE2_NATIVE) /* Note: we use ints instead of floats because we don't want cmpeq * to return false for (NaN, NaN) */ __m128i ai = _mm_castps_si128(a); return _mm_castsi128_ps(_mm_andnot_si128(ai, _mm_cmpeq_epi32(ai, ai))); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vmvnq_s32(a_.neon_i32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vnor_v(a_.lsx_i64, a_.lsx_i64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = ~a_.i32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = ~(a_.i32[i]); } #endif return simde__m128_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_select_ps(simde__m128 a, simde__m128 b, simde__m128 mask) { /* This function is for when you want to blend two elements together * according to a mask. It is similar to _mm_blendv_ps, except that * it is undefined whether the blend is based on the highest bit in * each lane (like blendv) or just bitwise operations. This allows * us to implement the function efficiently everywhere. * * Basically, you promise that all the lanes in mask are either 0 or * ~0. */ #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_blendv_ps(a, b, mask); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b), mask_ = simde__m128_to_private(mask); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vbslq_s32(mask_.neon_u32, b_.neon_i32, a_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_bitselect(b_.wasm_v128, a_.wasm_v128, mask_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i32 = vec_sel(a_.altivec_i32, b_.altivec_i32, mask_.altivec_u32); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vbitsel_v(a_.lsx_i64, b_.lsx_i64, mask_.lsx_i64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 ^ ((a_.i32 ^ b_.i32) & mask_.i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] ^ ((a_.i32[i] ^ b_.i32[i]) & mask_.i32[i]); } #endif return simde__m128_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_avg_pu16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_avg_pu16(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vrhadd_u16(b_.neon_u16, a_.neon_u16); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) uint32_t wa SIMDE_VECTOR(16); uint32_t wb SIMDE_VECTOR(16); uint32_t wr SIMDE_VECTOR(16); SIMDE_CONVERT_VECTOR_(wa, a_.u16); SIMDE_CONVERT_VECTOR_(wb, b_.u16); wr = (wa + wb + 1) >> 1; SIMDE_CONVERT_VECTOR_(r_.u16, wr); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pavgw(a, b) simde_mm_avg_pu16(a, b) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_avg_pu16(a, b) simde_mm_avg_pu16(a, b) # define _m_pavgw(a, b) simde_mm_avg_pu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_avg_pu8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_avg_pu8(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vrhadd_u8(b_.neon_u8, a_.neon_u8); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) && !defined(SIMDE_BUG_GCC_100761) uint16_t wa SIMDE_VECTOR(16); uint16_t wb SIMDE_VECTOR(16); uint16_t wr SIMDE_VECTOR(16); SIMDE_CONVERT_VECTOR_(wa, a_.u8); SIMDE_CONVERT_VECTOR_(wb, b_.u8); wr = (wa + wb + 1) >> 1; SIMDE_CONVERT_VECTOR_(r_.u8, wr); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pavgb(a, b) simde_mm_avg_pu8(a, b) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_avg_pu8(a, b) simde_mm_avg_pu8(a, b) # define _m_pavgb(a, b) simde_mm_avg_pu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_abs_ps(simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) simde_float32 mask_; uint32_t u32_ = UINT32_C(0x7FFFFFFF); simde_memcpy(&mask_, &u32_, sizeof(u32_)); return _mm_and_ps(_mm_set1_ps(mask_), a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vabsq_f32(a_.neon_f32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) r_.altivec_f32 = vec_abs(a_.altivec_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_abs(a_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_fabsf(a_.f32[i]); } #endif return simde__m128_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpeq_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmpeq_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vceqq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_eq(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vfcmp_ceq_s(a_.lsx_f32, b_.lsx_f32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.f32 == b_.f32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (a_.f32[i] == b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpeq_ps(a, b) simde_mm_cmpeq_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpeq_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmpeq_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_ss(a, simde_mm_cmpeq_ps(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_cmpeq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); r_.u32[0] = (a_.f32[0] == b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = a_.u32[i]; } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpeq_ss(a, b) simde_mm_cmpeq_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpge_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmpge_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcgeq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_ge(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpge(a_.altivec_f32, b_.altivec_f32)); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vfcmp_cle_s(b_.lsx_f32, a_.lsx_f32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 >= b_.f32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (a_.f32[i] >= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpge_ps(a, b) simde_mm_cmpge_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpge_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) return _mm_cmpge_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_ss(a, simde_mm_cmpge_ps(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_cmpge_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); r_.u32[0] = (a_.f32[0] >= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = a_.u32[i]; } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpge_ss(a, b) simde_mm_cmpge_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpgt_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmpgt_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcgtq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vfcmp_clt_s(b_.lsx_f32, a_.lsx_f32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 > b_.f32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (a_.f32[i] > b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpgt_ps(a, b) simde_mm_cmpgt_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpgt_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) return _mm_cmpgt_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_ss(a, simde_mm_cmpgt_ps(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_cmpgt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); r_.u32[0] = (a_.f32[0] > b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = a_.u32[i]; } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpgt_ss(a, b) simde_mm_cmpgt_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmple_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmple_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcleq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_le(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmple(a_.altivec_f32, b_.altivec_f32)); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vfcmp_cle_s(a_.lsx_f32, b_.lsx_f32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 <= b_.f32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (a_.f32[i] <= b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmple_ps(a, b) simde_mm_cmple_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmple_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmple_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_ss(a, simde_mm_cmple_ps(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_cmple_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); r_.u32[0] = (a_.f32[0] <= b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = a_.u32[i]; } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmple_ss(a, b) simde_mm_cmple_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmplt_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmplt_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcltq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_lt(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmplt(a_.altivec_f32, b_.altivec_f32)); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vfcmp_clt_s(a_.lsx_f32, b_.lsx_f32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 < b_.f32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (a_.f32[i] < b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmplt_ps(a, b) simde_mm_cmplt_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmplt_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmplt_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_ss(a, simde_mm_cmplt_ps(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_cmplt_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); r_.u32[0] = (a_.f32[0] < b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = a_.u32[i]; } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmplt_ss(a, b) simde_mm_cmplt_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpneq_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmpneq_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_ne(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_cmpeq(a_.altivec_f32, b_.altivec_f32)); r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nor(r_.altivec_f32, r_.altivec_f32)); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vfcmp_cune_s(a_.lsx_f32, b_.lsx_f32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.f32 != b_.f32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (a_.f32[i] != b_.f32[i]) ? ~UINT32_C(0) : UINT32_C(0); } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpneq_ps(a, b) simde_mm_cmpneq_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpneq_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmpneq_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_ss(a, simde_mm_cmpneq_ps(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_cmpneq_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); r_.u32[0] = (a_.f32[0] != b_.f32[0]) ? ~UINT32_C(0) : UINT32_C(0); SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = a_.u32[i]; } return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpneq_ss(a, b) simde_mm_cmpneq_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpnge_ps (simde__m128 a, simde__m128 b) { return simde_mm_cmplt_ps(a, b); } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpnge_ps(a, b) simde_mm_cmpnge_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpnge_ss (simde__m128 a, simde__m128 b) { return simde_mm_cmplt_ss(a, b); } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpnge_ss(a, b) simde_mm_cmpnge_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpngt_ps (simde__m128 a, simde__m128 b) { return simde_mm_cmple_ps(a, b); } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpngt_ps(a, b) simde_mm_cmpngt_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpngt_ss (simde__m128 a, simde__m128 b) { return simde_mm_cmple_ss(a, b); } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpngt_ss(a, b) simde_mm_cmpngt_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpnle_ps (simde__m128 a, simde__m128 b) { return simde_mm_cmpgt_ps(a, b); } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpnle_ps(a, b) simde_mm_cmpnle_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpnle_ss (simde__m128 a, simde__m128 b) { return simde_mm_cmpgt_ss(a, b); } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpnle_ss(a, b) simde_mm_cmpnle_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpnlt_ps (simde__m128 a, simde__m128 b) { return simde_mm_cmpge_ps(a, b); } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpnlt_ps(a, b) simde_mm_cmpnlt_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpnlt_ss (simde__m128 a, simde__m128 b) { return simde_mm_cmpge_ss(a, b); } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpnlt_ss(a, b) simde_mm_cmpnlt_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpord_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmpord_ps(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_and(wasm_f32x4_eq(a, a), wasm_f32x4_eq(b, b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) /* Note: NEON does not have ordered compare builtin Need to compare a eq a and b eq b to check for NaN Do AND of results to get final */ uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); r_.neon_u32 = vandq_u32(ceqaa, ceqbb); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_and(wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_eq(b_.wasm_v128, b_.wasm_v128)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); r_.lsx_i64 = __lsx_vnor_v(r_.lsx_i64, r_.lsx_i64); #elif defined(simde_math_isnanf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? UINT32_C(0) : ~UINT32_C(0); } #else HEDLEY_UNREACHABLE(); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpord_ps(a, b) simde_mm_cmpord_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpunord_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmpunord_ps(a, b); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_v128_or(wasm_f32x4_ne(a, a), wasm_f32x4_ne(b, b)); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t ceqaa = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t ceqbb = vceqq_f32(b_.neon_f32, b_.neon_f32); r_.neon_u32 = vmvnq_u32(vandq_u32(ceqaa, ceqbb)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_or(wasm_f32x4_ne(a_.wasm_v128, a_.wasm_v128), wasm_f32x4_ne(b_.wasm_v128, b_.wasm_v128)); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_nand(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_and(vec_cmpeq(a_.altivec_f32, a_.altivec_f32), vec_cmpeq(b_.altivec_f32, b_.altivec_f32))); r_.altivec_f32 = vec_nor(r_.altivec_f32, r_.altivec_f32); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vfcmp_cun_s(a_.lsx_f32, b_.lsx_f32); #elif defined(simde_math_isnanf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = (simde_math_isnanf(a_.f32[i]) || simde_math_isnanf(b_.f32[i])) ? ~UINT32_C(0) : UINT32_C(0); } #else HEDLEY_UNREACHABLE(); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpunord_ps(a, b) simde_mm_cmpunord_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpunord_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) return _mm_cmpunord_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_ss(a, simde_mm_cmpunord_ps(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_cmpunord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(simde_math_isnanf) r_.u32[0] = (simde_math_isnanf(a_.f32[0]) || simde_math_isnanf(b_.f32[0])) ? ~UINT32_C(0) : UINT32_C(0); SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i]; } #else HEDLEY_UNREACHABLE(); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpunord_ss(a, b) simde_mm_cmpunord_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comieq_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_comieq_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); #else return a_.f32[0] == b_.f32[0]; #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_comieq_ss(a, b) simde_mm_comieq_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comige_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_comige_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); #else return a_.f32[0] >= b_.f32[0]; #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_comige_ss(a, b) simde_mm_comige_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comigt_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_comigt_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); #else return a_.f32[0] > b_.f32[0]; #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_comigt_ss(a, b) simde_mm_comigt_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comile_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_comile_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); #else return a_.f32[0] <= b_.f32[0]; #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_comile_ss(a, b) simde_mm_comile_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comilt_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_comilt_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); return !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); #else return a_.f32[0] < b_.f32[0]; #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_comilt_ss(a, b) simde_mm_comilt_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comineq_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_comineq_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); return !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); #else return a_.f32[0] != b_.f32[0]; #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_comineq_ss(a, b) simde_mm_comineq_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_copysign_ps(simde__m128 dest, simde__m128 src) { simde__m128_private r_, dest_ = simde__m128_to_private(dest), src_ = simde__m128_to_private(src); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const uint32x4_t sign_pos = vreinterpretq_u32_f32(vdupq_n_f32(-SIMDE_FLOAT32_C(0.0))); r_.neon_u32 = vbslq_u32(sign_pos, src_.neon_u32, dest_.neon_u32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) const v128_t sign_pos = wasm_f32x4_splat(-0.0f); r_.wasm_v128 = wasm_v128_bitselect(src_.wasm_v128, dest_.wasm_v128, sign_pos); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) r_.altivec_f32 = vec_cpsgn(dest_.altivec_f32, src_.altivec_f32); #else r_.altivec_f32 = vec_cpsgn(src_.altivec_f32, dest_.altivec_f32); #endif #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) const SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) sign_pos = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), vec_splats(-0.0f)); r_.altivec_f32 = vec_sel(dest_.altivec_f32, src_.altivec_f32, sign_pos); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) const v4f32 sign_pos = {-0.0f, -0.0f, -0.0f, -0.0f}; r_.lsx_i64 = __lsx_vbitsel_v(dest_.lsx_i64, src_.lsx_i64, (v2i64)sign_pos); #elif defined(SIMDE_IEEE754_STORAGE) (void) src_; (void) dest_; simde__m128 sign_pos = simde_mm_set1_ps(-0.0f); r_ = simde__m128_to_private(simde_mm_xor_ps(dest, simde_mm_and_ps(simde_mm_xor_ps(dest, src), sign_pos))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = simde_math_copysignf(dest_.f32[i], src_.f32[i]); } #endif return simde__m128_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_xorsign_ps(simde__m128 dest, simde__m128 src) { return simde_mm_xor_ps(simde_mm_and_ps(simde_mm_set1_ps(-0.0f), src), dest); } SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvt_pi2ps (simde__m128 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvt_pi2ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); #elif defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); r_.m64_private[1] = a_.m64_private[1]; #else r_.f32[0] = (simde_float32) b_.i32[0]; r_.f32[1] = (simde_float32) b_.i32[1]; r_.i32[2] = a_.i32[2]; r_.i32[3] = a_.i32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvt_pi2ps(a, b) simde_mm_cvt_pi2ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cvt_ps2pi (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvt_ps2pi(a); #else simde__m64_private r_; simde__m128_private a_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); #elif defined(SIMDE_CONVERT_VECTOR_) && SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_BUG_GCC_100761) a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); SIMDE_CONVERT_VECTOR_(r_.i32, a_.m64_private[0].f32); #else a_ = simde__m128_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = HEDLEY_STATIC_CAST(int32_t, simde_math_nearbyintf(a_.f32[i])); } #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvt_ps2pi(a) simde_mm_cvt_ps2pi((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvt_si2ss (simde__m128 a, int32_t b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cvt_si2ss(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float, b), a_.neon_f32, 0); #else r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); r_.i32[1] = a_.i32[1]; r_.i32[2] = a_.i32[2]; r_.i32[3] = a_.i32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvt_si2ss(a, b) simde_mm_cvt_si2ss((a), b) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_cvt_ss2si (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cvt_ss2si(a); #elif defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) return vgetq_lane_s32(vcvtnq_s32_f32(simde__m128_to_neon_f32(a)), 0); #else simde__m128_private a_ = simde__m128_to_private(simde_mm_round_ps(a, SIMDE_MM_FROUND_CUR_DIRECTION)); #if !defined(SIMDE_FAST_CONVERSION_RANGE) return ((a_.f32[0] > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (a_.f32[0] < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]) : INT32_MIN; #else return SIMDE_CONVERT_FTOI(int32_t, a_.f32[0]); #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvt_ss2si(a) simde_mm_cvt_ss2si((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtpi16_ps (simde__m64 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtpi16_ps(a); #else simde__m128_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(a_.neon_i16)); #elif defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.f32, a_.i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { simde_float32 v = a_.i16[i]; r_.f32[i] = v; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtpi16_ps(a) simde_mm_cvtpi16_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtpi32_ps (simde__m128 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtpi32_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a); simde__m64_private b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vcombine_f32(vcvt_f32_s32(b_.neon_i32), vget_high_f32(a_.neon_f32)); #elif defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, b_.i32); r_.m64_private[1] = a_.m64_private[1]; #else r_.f32[0] = (simde_float32) b_.i32[0]; r_.f32[1] = (simde_float32) b_.i32[1]; r_.i32[2] = a_.i32[2]; r_.i32[3] = a_.i32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtpi32_ps(a, b) simde_mm_cvtpi32_ps((a), b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtpi32x2_ps (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtpi32x2_ps(a, b); #else simde__m128_private r_; simde__m64_private a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vcvtq_f32_s32(vcombine_s32(a_.neon_i32, b_.neon_i32)); #elif defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.m64_private[0].f32, a_.i32); SIMDE_CONVERT_VECTOR_(r_.m64_private[1].f32, b_.i32); #else r_.f32[0] = (simde_float32) a_.i32[0]; r_.f32[1] = (simde_float32) a_.i32[1]; r_.f32[2] = (simde_float32) b_.i32[0]; r_.f32[3] = (simde_float32) b_.i32[1]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtpi32x2_ps(a, b) simde_mm_cvtpi32x2_ps(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtpi8_ps (simde__m64 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtpi8_ps(a); #else simde__m128_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vcvtq_f32_s32(vmovl_s16(vget_low_s16(vmovl_s8(a_.neon_i8)))); #else r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[0]); r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[1]); r_.f32[2] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[2]); r_.f32[3] = HEDLEY_STATIC_CAST(simde_float32, a_.i8[3]); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtpi8_ps(a) simde_mm_cvtpi8_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cvtps_pi16 (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtps_pi16(a); #else simde__m64_private r_; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95399) r_.neon_i16 = vmovn_s32(vcvtq_s32_f32(vrndiq_f32(a_.neon_f32))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = SIMDE_CONVERT_FTOI(int16_t, simde_math_roundf(a_.f32[i])); } #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtps_pi16(a) simde_mm_cvtps_pi16((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cvtps_pi32 (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtps_pi32(a); #else simde__m64_private r_; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_BUG_GCC_95399) r_.neon_i32 = vcvt_s32_f32(vget_low_f32(vrndiq_f32(a_.neon_f32))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { simde_float32 v = simde_math_roundf(a_.f32[i]); #if !defined(SIMDE_FAST_CONVERSION_RANGE) r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; #else r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); #endif } #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtps_pi32(a) simde_mm_cvtps_pi32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cvtps_pi8 (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtps_pi8(a); #else simde__m64_private r_; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_GCC_95471) /* Clamp the input to [INT8_MIN, INT8_MAX], round, convert to i32, narrow to * i16, combine with an all-zero vector of i16 (which will become the upper * half), narrow to i8. */ float32x4_t max = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)); float32x4_t min = vdupq_n_f32(HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)); float32x4_t values = vrndnq_f32(vmaxq_f32(vminq_f32(max, a_.neon_f32), min)); r_.neon_i8 = vmovn_s16(vcombine_s16(vmovn_s32(vcvtq_s32_f32(values)), vdup_n_s16(0))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.f32) / sizeof(a_.f32[0])) ; i++) { if (a_.f32[i] > HEDLEY_STATIC_CAST(simde_float32, INT8_MAX)) r_.i8[i] = INT8_MAX; else if (a_.f32[i] < HEDLEY_STATIC_CAST(simde_float32, INT8_MIN)) r_.i8[i] = INT8_MIN; else r_.i8[i] = SIMDE_CONVERT_FTOI(int8_t, simde_math_roundf(a_.f32[i])); } /* Note: the upper half is undefined */ #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtps_pi8(a) simde_mm_cvtps_pi8((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtpu16_ps (simde__m64 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtpu16_ps(a); #else simde__m128_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(a_.neon_u16)); #elif defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.f32, a_.u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = (simde_float32) a_.u16[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtpu16_ps(a) simde_mm_cvtpu16_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtpu8_ps (simde__m64 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtpu8_ps(a); #else simde__m128_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vcvtq_f32_u32(vmovl_u16(vget_low_u16(vmovl_u8(a_.neon_u8)))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = HEDLEY_STATIC_CAST(simde_float32, a_.u8[i]); } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtpu8_ps(a) simde_mm_cvtpu8_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtsi32_ss (simde__m128 a, int32_t b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cvtsi32_ss(a, b); #else simde__m128_private r_; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); #else r_ = a_; r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtsi32_ss(a, b) simde_mm_cvtsi32_ss((a), b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtsi64_ss (simde__m128 a, int64_t b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) #if !defined(__PGI) return _mm_cvtsi64_ss(a, b); #else return _mm_cvtsi64x_ss(a, b); #endif #else simde__m128_private r_; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vsetq_lane_f32(HEDLEY_STATIC_CAST(float32_t, b), a_.neon_f32, 0); #else r_ = a_; r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) # define _mm_cvtsi64_ss(a, b) simde_mm_cvtsi64_ss((a), b) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float32 simde_mm_cvtss_f32 (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cvtss_f32(a); #else simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vgetq_lane_f32(a_.neon_f32, 0); #else return a_.f32[0]; #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtss_f32(a) simde_mm_cvtss_f32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_cvtss_si32 (simde__m128 a) { return simde_mm_cvt_ss2si(a); } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtss_si32(a) simde_mm_cvtss_si32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_mm_cvtss_si64 (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) #if !defined(__PGI) return _mm_cvtss_si64(a); #else return _mm_cvtss_si64x(a); #endif #else simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(vgetq_lane_f32(a_.neon_f32, 0))); #else return SIMDE_CONVERT_FTOI(int64_t, simde_math_roundf(a_.f32[0])); #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) # define _mm_cvtss_si64(a) simde_mm_cvtss_si64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cvtt_ps2pi (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtt_ps2pi(a); #else simde__m64_private r_; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) r_.neon_i32 = vcvt_s32_f32(vget_low_f32(a_.neon_f32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { simde_float32 v = a_.f32[i]; #if !defined(SIMDE_FAST_CONVERSION_RANGE) r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; #else r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); #endif } #endif return simde__m64_from_private(r_); #endif } #define simde_mm_cvttps_pi32(a) simde_mm_cvtt_ps2pi(a) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtt_ps2pi(a) simde_mm_cvtt_ps2pi((a)) # define _mm_cvttps_pi32(a) simde_mm_cvttps_pi32((a)) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_cvtt_ss2si (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cvtt_ss2si(a); #else simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) return SIMDE_CONVERT_FTOI(int32_t, vgetq_lane_f32(a_.neon_f32, 0)); #else simde_float32 v = a_.f32[0]; #if !defined(SIMDE_FAST_CONVERSION_RANGE) return ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; #else return SIMDE_CONVERT_FTOI(int32_t, v); #endif #endif #endif } #define simde_mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cvtt_ss2si(a) simde_mm_cvtt_ss2si((a)) # define _mm_cvttss_si32(a) simde_mm_cvtt_ss2si((a)) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_mm_cvttss_si64 (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(_MSC_VER) #if defined(__PGI) return _mm_cvttss_si64x(a); #else return _mm_cvttss_si64(a); #endif #else simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return SIMDE_CONVERT_FTOI(int64_t, vgetq_lane_f32(a_.neon_f32, 0)); #else return SIMDE_CONVERT_FTOI(int64_t, a_.f32[0]); #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) # define _mm_cvttss_si64(a) simde_mm_cvttss_si64((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cmpord_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_cmpord_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_ss(a, simde_mm_cmpord_ps(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_cmpord_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(simde_math_isnanf) r_.u32[0] = (simde_math_isnanf(simde_mm_cvtss_f32(a)) || simde_math_isnanf(simde_mm_cvtss_f32(b))) ? UINT32_C(0) : ~UINT32_C(0); SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.u32[i] = a_.u32[i]; } #else HEDLEY_UNREACHABLE(); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_cmpord_ss(a, b) simde_mm_cmpord_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_div_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_div_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f32 = vdivq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x4_t recip0 = vrecpeq_f32(b_.neon_f32); float32x4_t recip1 = vmulq_f32(recip0, vrecpsq_f32(recip0, b_.neon_f32)); r_.neon_f32 = vmulq_f32(a_.neon_f32, recip1); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_div(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f32 = vec_div(a_.altivec_f32, b_.altivec_f32); #elif defined(SIMDE_LOONGARCH_LASX_NATIVE) r_.lsx_f32 = __lsx_vfdiv_s(a_.lsx_f32, b_.lsx_f32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f32 = a_.f32 / b_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[i] / b_.f32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_div_ps(a, b) simde_mm_div_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_div_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_div_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_ss(a, simde_mm_div_ps(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_div_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32_t value = vgetq_lane_f32(simde__m128_to_private(simde_mm_div_ps(a, b)).neon_f32, 0); r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); #else r_.f32[0] = a_.f32[0] / b_.f32[0]; SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_div_ss(a, b) simde_mm_div_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int16_t simde_mm_extract_pi16 (simde__m64 a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { simde__m64_private a_ = simde__m64_to_private(a); return a_.i16[imm8]; } #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(HEDLEY_PGI_VERSION) && !defined(SIMDE_BUG_CLANG_44589) #define simde_mm_extract_pi16(a, imm8) HEDLEY_STATIC_CAST(int16_t, _mm_extract_pi16(a, imm8)) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_extract_pi16(a, imm8) vget_lane_s16(simde__m64_to_private(a).neon_i16, imm8) #endif #define simde_m_pextrw(a, imm8) simde_mm_extract_pi16(a, imm8) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_extract_pi16(a, imm8) simde_mm_extract_pi16((a), (imm8)) # define _m_pextrw(a, imm8) simde_mm_extract_pi16((a), (imm8)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_insert_pi16 (simde__m64 a, int16_t i, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { simde__m64_private a_ = simde__m64_to_private(a); a_.i16[imm8] = i; return simde__m64_from_private(a_); } #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) && !defined(SIMDE_BUG_CLANG_44589) #define simde_mm_insert_pi16(a, i, imm8) _mm_insert_pi16(a, i, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_insert_pi16(a, i, imm8) simde__m64_from_neon_i16(vset_lane_s16((i), simde__m64_to_neon_i16(a), (imm8))) #endif #define simde_m_pinsrw(a, i, imm8) (simde_mm_insert_pi16(a, i, imm8)) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_insert_pi16(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) # define _m_pinsrw(a, i, imm8) simde_mm_insert_pi16(a, i, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_load_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_load_ps(mem_addr); #else simde__m128_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vld1q_f32(mem_addr); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f32 = vec_vsx_ld(0, mem_addr); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = vec_ld(0, mem_addr); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vld(mem_addr, 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_load(mem_addr); #else simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128), sizeof(r_)); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_load_ps(mem_addr) simde_mm_load_ps(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_load1_ps (simde_float32 const* mem_addr) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_load_ps1(mem_addr); #else simde__m128_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vld1q_dup_f32(mem_addr); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vldrepl_w(mem_addr, 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_load32_splat(mem_addr); #else r_ = simde__m128_to_private(simde_mm_set1_ps(*mem_addr)); #endif return simde__m128_from_private(r_); #endif } #define simde_mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_load_ps1(mem_addr) simde_mm_load1_ps(mem_addr) # define _mm_load1_ps(mem_addr) simde_mm_load1_ps(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_load_ss (simde_float32 const* mem_addr) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_load_ss(mem_addr); #else simde__m128_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vsetq_lane_f32(*mem_addr, vdupq_n_f32(0), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_load32_zero(mem_addr); #else r_.f32[0] = *mem_addr; r_.i32[1] = 0; r_.i32[2] = 0; r_.i32[3] = 0; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_load_ss(mem_addr) simde_mm_load_ss(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_loadh_pi (simde__m128 a, simde__m64 const* mem_addr) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_loadh_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vcombine_f32(vget_low_f32(a_.neon_f32), vld1_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 1); #else simde__m64_private b_ = *HEDLEY_REINTERPRET_CAST(simde__m64_private const*, mem_addr); r_.f32[0] = a_.f32[0]; r_.f32[1] = a_.f32[1]; r_.f32[2] = b_.f32[0]; r_.f32[3] = b_.f32[1]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #if HEDLEY_HAS_WARNING("-Wold-style-cast") #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) #else #define _mm_loadh_pi(a, mem_addr) simde_mm_loadh_pi((a), (simde__m64 const*) (mem_addr)) #endif #endif /* The SSE documentation says that there are no alignment requirements for mem_addr. Unfortunately they used the __m64 type for the argument which is supposed to be 8-byte aligned, so some compilers (like clang with -Wcast-align) will generate a warning if you try to cast, say, a simde_float32* to a simde__m64* for this function. I think the choice of argument type is unfortunate, but I do think we need to stick to it here. If there is demand I can always add something like simde_x_mm_loadl_f32(simde__m128, simde_float32 mem_addr[2]) */ SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_loadl_pi (simde__m128 a, simde__m64 const* mem_addr) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_loadl_pi(a, HEDLEY_REINTERPRET_CAST(__m64 const*, mem_addr)); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vcombine_f32(vld1_f32( HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)), vget_high_f32(a_.neon_f32)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_load64_lane(mem_addr, a_.wasm_v128, 0); #else simde__m64_private b_; simde_memcpy(&b_, mem_addr, sizeof(b_)); r_.i32[0] = b_.i32[0]; r_.i32[1] = b_.i32[1]; r_.i32[2] = a_.i32[2]; r_.i32[3] = a_.i32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #if HEDLEY_HAS_WARNING("-Wold-style-cast") #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), HEDLEY_REINTERPRET_CAST(simde__m64 const*, (mem_addr))) #else #define _mm_loadl_pi(a, mem_addr) simde_mm_loadl_pi((a), (simde__m64 const*) (mem_addr)) #endif #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_loadr_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_loadr_ps(mem_addr); #else simde__m128_private r_, v_ = simde__m128_to_private(simde_mm_load_ps(mem_addr)); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vrev64q_f32(v_.neon_f32); r_.neon_f32 = vextq_f32(r_.neon_f32, r_.neon_f32, 2); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) r_.altivec_f32 = vec_reve(v_.altivec_f32); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vshuf4i_w(v_.lsx_i64, 0x1b); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, v_.f32, v_.f32, 3, 2, 1, 0); #else r_.f32[0] = v_.f32[3]; r_.f32[1] = v_.f32[2]; r_.f32[2] = v_.f32[1]; r_.f32[3] = v_.f32[0]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_loadr_ps(mem_addr) simde_mm_loadr_ps(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_loadu_ps (simde_float32 const mem_addr[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_loadu_ps(mem_addr); #else simde__m128_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vld1q_f32(HEDLEY_REINTERPRET_CAST(const float32_t*, mem_addr)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_load(mem_addr); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && defined(__PPC64__) r_.altivec_f32 = vec_vsx_ld(0, mem_addr); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vld(mem_addr, 0); #else simde_memcpy(&r_, mem_addr, sizeof(r_)); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_loadu_ps(mem_addr) simde_mm_loadu_ps(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_maskmove_si64 (simde__m64 a, simde__m64 mask, int8_t* mem_addr) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) _mm_maskmove_si64(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); #else simde__m64_private a_ = simde__m64_to_private(a), mask_ = simde__m64_to_private(mask); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) if (mask_.i8[i] < 0) mem_addr[i] = a_.i8[i]; #endif } #define simde_m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64(a, mask, mem_addr) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_maskmove_si64(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) # define _m_maskmovq(a, mask, mem_addr) simde_mm_maskmove_si64((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_max_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_max_pi16(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vmax_s16(a_.neon_i16, b_.neon_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pmaxsw(a, b) simde_mm_max_pi16(a, b) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_max_pi16(a, b) simde_mm_max_pi16(a, b) # define _m_pmaxsw(a, b) simde_mm_max_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_max_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_max_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_FAST_NANS) r_.neon_f32 = vmaxq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vbslq_f32(vcgtq_f32(a_.neon_f32, b_.neon_f32), a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_NANS) r_.wasm_v128 = wasm_f32x4_max(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_bitselect(a_.wasm_v128, b_.wasm_v128, wasm_f32x4_gt(a_.wasm_v128, b_.wasm_v128)); #elif (defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE)) && defined(SIMDE_FAST_NANS) r_.altivec_f32 = vec_max(a_.altivec_f32, b_.altivec_f32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(a_.altivec_f32, b_.altivec_f32)); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) && defined(SIMDE_FAST_NANS) r_.lsx_f32 = __lsx_vfmax_s(a_.lsx_f32, b_.lsx_f32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = (a_.f32[i] > b_.f32[i]) ? a_.f32[i] : b_.f32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_max_ps(a, b) simde_mm_max_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_max_pu8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_max_pu8(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vmax_u8(a_.neon_u8, b_.neon_u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pmaxub(a, b) simde_mm_max_pu8(a, b) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_max_pu8(a, b) simde_mm_max_pu8(a, b) # define _m_pmaxub(a, b) simde_mm_max_pu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_max_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_max_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_ss(a, simde_mm_max_ps(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_max_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32_t value = vgetq_lane_f32(maxq_f32(a_.neon_f32, b_.neon_f32), 0); r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); #else r_.f32[0] = (a_.f32[0] > b_.f32[0]) ? a_.f32[0] : b_.f32[0]; r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_max_ss(a, b) simde_mm_max_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_min_pi16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_min_pi16(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vmin_s16(a_.neon_i16, b_.neon_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pminsw(a, b) simde_mm_min_pi16(a, b) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_min_pi16(a, b) simde_mm_min_pi16(a, b) # define _m_pminsw(a, b) simde_mm_min_pi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_min_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_min_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_FAST_NANS) && defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vminq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_pmin(b_.wasm_v128, a_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) #if defined(SIMDE_FAST_NANS) r_.altivec_f32 = vec_min(a_.altivec_f32, b_.altivec_f32); #else r_.altivec_f32 = vec_sel(b_.altivec_f32, a_.altivec_f32, vec_cmpgt(b_.altivec_f32, a_.altivec_f32)); #endif #elif defined(SIMDE_FAST_NANS) && defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_f32 = __lsx_vfmin_s(a_.lsx_f32, b_.lsx_f32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) uint32_t SIMDE_VECTOR(16) m = HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32 < b_.f32); r_.f32 = HEDLEY_REINTERPRET_CAST( __typeof__(r_.f32), ( (HEDLEY_REINTERPRET_CAST(__typeof__(m), a_.f32) & m) | (HEDLEY_REINTERPRET_CAST(__typeof__(m), b_.f32) & ~m) ) ); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = (a_.f32[i] < b_.f32[i]) ? a_.f32[i] : b_.f32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_min_ps(a, b) simde_mm_min_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_min_pu8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_min_pu8(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vmin_u8(a_.neon_u8, b_.neon_u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pminub(a, b) simde_mm_min_pu8(a, b) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_min_pu8(a, b) simde_mm_min_pu8(a, b) # define _m_pminub(a, b) simde_mm_min_pu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_min_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_min_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_ss(a, simde_mm_min_ps(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_min_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32_t value = vgetq_lane_f32(vminq_f32(a_.neon_f32, b_.neon_f32), 0); r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); #else r_.f32[0] = (a_.f32[0] < b_.f32[0]) ? a_.f32[0] : b_.f32[0]; r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_min_ss(a, b) simde_mm_min_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_movehl_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_movehl_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x2_t a32 = vget_high_f32(a_.neon_f32); float32x2_t b32 = vget_high_f32(b_.neon_f32); r_.neon_f32 = vcombine_f32(b32, a32); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_mergel(b_.altivec_i64, a_.altivec_i64)); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vilvh_d(a_.lsx_i64, b_.lsx_i64); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 6, 7, 2, 3); #else r_.f32[0] = b_.f32[2]; r_.f32[1] = b_.f32[3]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_movehl_ps(a, b) simde_mm_movehl_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_movelh_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_movelh_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x2_t a10 = vget_low_f32(a_.neon_f32); float32x2_t b10 = vget_low_f32(b_.neon_f32); r_.neon_f32 = vcombine_f32(a10, b10); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_f32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), vec_mergeh(a_.altivec_i64, b_.altivec_i64)); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vilvl_d(b_.lsx_i64, a_.lsx_i64); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 1, 4, 5); #else r_.f32[0] = a_.f32[0]; r_.f32[1] = a_.f32[1]; r_.f32[2] = b_.f32[0]; r_.f32[3] = b_.f32[1]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_movelh_ps(a, b) simde_mm_movelh_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_movemask_pi8 (simde__m64 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_movemask_pi8(a); #else simde__m64_private a_ = simde__m64_to_private(a); int r = 0; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint8x8_t input = a_.neon_u8; const int8_t xr[8] = {-7, -6, -5, -4, -3, -2, -1, 0}; const uint8x8_t mask_and = vdup_n_u8(0x80); const int8x8_t mask_shift = vld1_s8(xr); const uint8x8_t mask_result = vshl_u8(vand_u8(input, mask_and), mask_shift); uint8x8_t lo = mask_result; r = vaddv_u8(lo); #else const size_t nmemb = sizeof(a_.i8) / sizeof(a_.i8[0]); SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < nmemb ; i++) { r |= (a_.u8[nmemb - 1 - i] >> 7) << (nmemb - 1 - i); } #endif return r; #endif } #define simde_m_pmovmskb(a) simde_mm_movemask_pi8(a) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_movemask_pi8(a) simde_mm_movemask_pi8(a) # define _m_pmovmskb(a) simde_mm_movemask_pi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_movemask_ps (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_movemask_ps(a); #else int r = 0; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) // Shift out everything but the sign bits with a 32-bit unsigned shift right. uint64x2_t high_bits = vreinterpretq_u64_u32(vshrq_n_u32(a_.neon_u32, 31)); // Merge the two pairs together with a 64-bit unsigned shift right + add. uint8x16_t paired = vreinterpretq_u8_u64(vsraq_n_u64(high_bits, high_bits, 31)); // Extract the result. return vgetq_lane_u8(paired, 0) | (vgetq_lane_u8(paired, 8) << 2); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) static const uint32_t md[4] = { 1 << 0, 1 << 1, 1 << 2, 1 << 3 }; uint32x4_t extended = vreinterpretq_u32_s32(vshrq_n_s32(a_.neon_i32, 31)); uint32x4_t masked = vandq_u32(vld1q_u32(md), extended); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return HEDLEY_STATIC_CAST(int32_t, vaddvq_u32(masked)); #else uint64x2_t t64 = vpaddlq_u32(masked); return HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 0)) + HEDLEY_STATIC_CAST(int, vgetq_lane_u64(t64, 1)); #endif #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 96, 64, 32, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); return HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) v2i64 t64 = __lsx_vmskltz_w(a_.lsx_i64); r = __lsx_vpickve2gr_wu(t64, 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_bitmask(a_.wasm_v128)); #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < sizeof(a_.u32) / sizeof(a_.u32[0]) ; i++) { r |= (a_.u32[i] >> ((sizeof(a_.u32[i]) * CHAR_BIT) - 1)) << i; } #endif return r; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_movemask_ps(a) simde_mm_movemask_ps((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_mul_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_mul_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vmulq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_mul(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f32 = a_.f32 * b_.f32; #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f32 = vec_mul(a_.altivec_f32, b_.altivec_f32); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_f32 = __lsx_vfmul_s(a_.lsx_f32, b_.lsx_f32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[i] * b_.f32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_mul_ps(a, b) simde_mm_mul_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_mul_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_mul_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_ss(a, simde_mm_mul_ps(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_mul_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); r_.f32[0] = a_.f32[0] * b_.f32[0]; r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_mul_ss(a, b) simde_mm_mul_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_mulhi_pu16 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_mulhi_pu16(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const uint32x4_t t1 = vmull_u16(a_.neon_u16, b_.neon_u16); const uint32x4_t t2 = vshrq_n_u32(t1, 16); const uint16x4_t t3 = vmovn_u32(t2); r_.neon_u16 = t3; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, ((HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])) >> UINT32_C(16))); } #endif return simde__m64_from_private(r_); #endif } #define simde_m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_mulhi_pu16(a, b) simde_mm_mulhi_pu16(a, b) # define _m_pmulhuw(a, b) simde_mm_mulhi_pu16(a, b) #endif #if defined(SIMDE_X86_SSE_NATIVE) && defined(HEDLEY_GCC_VERSION) #define SIMDE_MM_HINT_NTA HEDLEY_STATIC_CAST(enum _mm_hint, 0) #define SIMDE_MM_HINT_T0 HEDLEY_STATIC_CAST(enum _mm_hint, 1) #define SIMDE_MM_HINT_T1 HEDLEY_STATIC_CAST(enum _mm_hint, 2) #define SIMDE_MM_HINT_T2 HEDLEY_STATIC_CAST(enum _mm_hint, 3) #define SIMDE_MM_HINT_ENTA HEDLEY_STATIC_CAST(enum _mm_hint, 4) #define SIMDE_MM_HINT_ET0 HEDLEY_STATIC_CAST(enum _mm_hint, 5) #define SIMDE_MM_HINT_ET1 HEDLEY_STATIC_CAST(enum _mm_hint, 6) #define SIMDE_MM_HINT_ET2 HEDLEY_STATIC_CAST(enum _mm_hint, 7) #else #define SIMDE_MM_HINT_NTA 0 #define SIMDE_MM_HINT_T0 1 #define SIMDE_MM_HINT_T1 2 #define SIMDE_MM_HINT_T2 3 #define SIMDE_MM_HINT_ENTA 4 #define SIMDE_MM_HINT_ET0 5 #define SIMDE_MM_HINT_ET1 6 #define SIMDE_MM_HINT_ET2 7 #endif #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) HEDLEY_DIAGNOSTIC_PUSH #if HEDLEY_HAS_WARNING("-Wreserved-id-macro") _Pragma("clang diagnostic ignored \"-Wreserved-id-macro\"") #endif #undef _MM_HINT_NTA #define _MM_HINT_NTA SIMDE_MM_HINT_NTA #undef _MM_HINT_T0 #define _MM_HINT_T0 SIMDE_MM_HINT_T0 #undef _MM_HINT_T1 #define _MM_HINT_T1 SIMDE_MM_HINT_T1 #undef _MM_HINT_T2 #define _MM_HINT_T2 SIMDE_MM_HINT_T2 #undef _MM_HINT_ENTA #define _MM_HINT_ETNA SIMDE_MM_HINT_ENTA #undef _MM_HINT_ET0 #define _MM_HINT_ET0 SIMDE_MM_HINT_ET0 #undef _MM_HINT_ET1 #define _MM_HINT_ET1 SIMDE_MM_HINT_ET1 #undef _MM_HINT_ET1 #define _MM_HINT_ET2 SIMDE_MM_HINT_ET2 HEDLEY_DIAGNOSTIC_POP #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_prefetch (const void* p, int i) { #if \ HEDLEY_HAS_BUILTIN(__builtin_prefetch) || \ HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ HEDLEY_INTEL_VERSION_CHECK(13,0,0) switch(i) { case SIMDE_MM_HINT_NTA: __builtin_prefetch(p, 0, 0); break; case SIMDE_MM_HINT_T0: __builtin_prefetch(p, 0, 3); break; case SIMDE_MM_HINT_T1: __builtin_prefetch(p, 0, 2); break; case SIMDE_MM_HINT_T2: __builtin_prefetch(p, 0, 1); break; case SIMDE_MM_HINT_ENTA: __builtin_prefetch(p, 1, 0); break; case SIMDE_MM_HINT_ET0: __builtin_prefetch(p, 1, 3); break; case SIMDE_MM_HINT_ET1: __builtin_prefetch(p, 1, 2); break; case SIMDE_MM_HINT_ET2: __builtin_prefetch(p, 0, 1); break; } #elif defined(__ARM_ACLE) #if (__ARM_ACLE >= 101) switch(i) { case SIMDE_MM_HINT_NTA: __pldx(0, 0, 1, p); break; case SIMDE_MM_HINT_T0: __pldx(0, 0, 0, p); break; case SIMDE_MM_HINT_T1: __pldx(0, 1, 0, p); break; case SIMDE_MM_HINT_T2: __pldx(0, 2, 0, p); break; case SIMDE_MM_HINT_ENTA: __pldx(1, 0, 1, p); break; case SIMDE_MM_HINT_ET0: __pldx(1, 0, 0, p); break; case SIMDE_MM_HINT_ET1: __pldx(1, 1, 0, p); break; case SIMDE_MM_HINT_ET2: __pldx(1, 2, 0, p); break; } #else (void) i; __pld(p) #endif #elif HEDLEY_PGI_VERSION_CHECK(10,0,0) (void) i; #pragma mem prefetch p #elif HEDLEY_CRAY_VERSION_CHECK(8,1,0) switch (i) { case SIMDE_MM_HINT_NTA: #pragma _CRI prefetch (nt) p break; case SIMDE_MM_HINT_T0: case SIMDE_MM_HINT_T1: case SIMDE_MM_HINT_T2: #pragma _CRI prefetch p break; case SIMDE_MM_HINT_ENTA: #pragma _CRI prefetch (write, nt) p break; case SIMDE_MM_HINT_ET0: case SIMDE_MM_HINT_ET1: case SIMDE_MM_HINT_ET2: #pragma _CRI prefetch (write) p break; } #elif HEDLEY_IBM_VERSION_CHECK(11,0,0) switch(i) { case SIMDE_MM_HINT_NTA: __prefetch_by_load(p, 0, 0); break; case SIMDE_MM_HINT_T0: __prefetch_by_load(p, 0, 3); break; case SIMDE_MM_HINT_T1: __prefetch_by_load(p, 0, 2); break; case SIMDE_MM_HINT_T2: __prefetch_by_load(p, 0, 1); break; case SIMDE_MM_HINT_ENTA: __prefetch_by_load(p, 1, 0); break; case SIMDE_MM_HINT_ET0: __prefetch_by_load(p, 1, 3); break; case SIMDE_MM_HINT_ET1: __prefetch_by_load(p, 1, 2); break; case SIMDE_MM_HINT_ET2: __prefetch_by_load(p, 0, 1); break; } #elif HEDLEY_MSVC_VERSION (void) i; (void) p; #endif } #if defined(SIMDE_X86_SSE_NATIVE) #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) /* https://reviews.llvm.org/D71718 */ #define simde_mm_prefetch(p, i) \ (__extension__({ \ HEDLEY_DIAGNOSTIC_PUSH \ HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ _mm_prefetch((p), (i)); \ HEDLEY_DIAGNOSTIC_POP \ })) #else #define simde_mm_prefetch(p, i) _mm_prefetch(p, i) #endif #endif #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) #define _mm_prefetch(p, i) simde_mm_prefetch(p, i) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_negate_ps(simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return simde_mm_xor_ps(a, _mm_set1_ps(SIMDE_FLOAT32_C(-0.0))); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vnegq_f32(a_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_neg(a_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) r_.altivec_f32 = vec_neg(a_.altivec_f32); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) const v4f32 f32 = {0.0f, 0.0f, 0.0f, 0.0f}; r_.lsx_f32 = __lsx_vfsub_s(f32, a_.lsx_f32); #elif defined(SIMDE_VECTOR_NEGATE) r_.f32 = -a_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = -a_.f32[i]; } #endif return simde__m128_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_rcp_ps (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_rcp_ps(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x4_t recip = vrecpeq_f32(a_.neon_f32); #if SIMDE_ACCURACY_PREFERENCE > 0 for (int i = 0; i < SIMDE_ACCURACY_PREFERENCE ; ++i) { recip = vmulq_f32(recip, vrecpsq_f32(recip, a_.neon_f32)); } #endif r_.neon_f32 = recip; #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), a_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = vec_re(a_.altivec_f32); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_f32 = __lsx_vfrecip_s(a_.lsx_f32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.f32 = 1.0f / a_.f32; #elif defined(SIMDE_IEEE754_STORAGE) /* https://stackoverflow.com/questions/12227126/division-as-multiply-and-lut-fast-float-division-reciprocal/12228234#12228234 */ SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { int32_t ix; simde_float32 fx = a_.f32[i]; simde_memcpy(&ix, &fx, sizeof(ix)); int32_t x = INT32_C(0x7EF311C3) - ix; simde_float32 temp; simde_memcpy(&temp, &x, sizeof(temp)); r_.f32[i] = temp * (SIMDE_FLOAT32_C(2.0) - temp * fx); } #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = 1.0f / a_.f32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_rcp_ps(a) simde_mm_rcp_ps((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_rcp_ss (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_rcp_ss(a); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_ss(a, simde_mm_rcp_ps(a)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_rcp_ps(simde_x_mm_broadcastlow_ps(a))); #else simde__m128_private r_, a_ = simde__m128_to_private(a); r_.f32[0] = 1.0f / a_.f32[0]; r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_rcp_ss(a) simde_mm_rcp_ss((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_rsqrt_ps (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_rsqrt_ps(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vrsqrteq_f32(a_.neon_f32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = vec_rsqrte(a_.altivec_f32); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_f32 = __lsx_vfrsqrt_s(a_.lsx_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_div(simde_mm_set1_ps(1.0f), wasm_f32x4_sqrt(a_.wasm_v128)); #elif defined(SIMDE_IEEE754_STORAGE) /* https://basesandframes.files.wordpress.com/2020/04/even_faster_math_functions_green_2020.pdf Pages 100 - 103 */ SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { #if SIMDE_ACCURACY_PREFERENCE <= 0 r_.i32[i] = INT32_C(0x5F37624F) - (a_.i32[i] >> 1); #else simde_float32 x = a_.f32[i]; simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; int32_t ix; simde_memcpy(&ix, &x, sizeof(ix)); #if SIMDE_ACCURACY_PREFERENCE == 1 ix = INT32_C(0x5F375A82) - (ix >> 1); #else ix = INT32_C(0x5F37599E) - (ix >> 1); #endif simde_memcpy(&x, &ix, sizeof(x)); #if SIMDE_ACCURACY_PREFERENCE >= 2 x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); #endif x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); r_.f32[i] = x; #endif } #elif defined(simde_math_sqrtf) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = 1.0f / simde_math_sqrtf(a_.f32[i]); } #else HEDLEY_UNREACHABLE(); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_rsqrt_ps(a) simde_mm_rsqrt_ps((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_rsqrt_ss (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_rsqrt_ss(a); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_ss(a, simde_mm_rsqrt_ps(a)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_rsqrt_ps(simde_x_mm_broadcastlow_ps(a))); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vsetq_lane_f32(vgetq_lane_f32(simde_mm_rsqrt_ps(a).neon_f32, 0), a_.neon_f32, 0); #elif defined(SIMDE_IEEE754_STORAGE) { #if SIMDE_ACCURACY_PREFERENCE <= 0 r_.i32[0] = INT32_C(0x5F37624F) - (a_.i32[0] >> 1); #else simde_float32 x = a_.f32[0]; simde_float32 xhalf = SIMDE_FLOAT32_C(0.5) * x; int32_t ix; simde_memcpy(&ix, &x, sizeof(ix)); #if SIMDE_ACCURACY_PREFERENCE == 1 ix = INT32_C(0x5F375A82) - (ix >> 1); #else ix = INT32_C(0x5F37599E) - (ix >> 1); #endif simde_memcpy(&x, &ix, sizeof(x)); #if SIMDE_ACCURACY_PREFERENCE >= 2 x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); #endif x = x * (SIMDE_FLOAT32_C(1.5008909) - xhalf * x * x); r_.f32[0] = x; #endif } r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; #elif defined(simde_math_sqrtf) r_.f32[0] = 1.0f / simde_math_sqrtf(a_.f32[0]); r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; #else HEDLEY_UNREACHABLE(); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_rsqrt_ss(a) simde_mm_rsqrt_ss((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sad_pu8 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_sad_pu8(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint64x1_t t = vpaddl_u32(vpaddl_u16(vpaddl_u8(vabd_u8(a_.neon_u8, b_.neon_u8)))); r_.neon_u16 = vset_lane_u16(HEDLEY_STATIC_CAST(uint64_t, vget_lane_u64(t, 0)), vdup_n_u16(0), 0); #else uint16_t sum = 0; SIMDE_VECTORIZE_REDUCTION(+:sum) for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { sum += HEDLEY_STATIC_CAST(uint8_t, simde_math_abs(a_.u8[i] - b_.u8[i])); } r_.i16[0] = HEDLEY_STATIC_CAST(int16_t, sum); r_.i16[1] = 0; r_.i16[2] = 0; r_.i16[3] = 0; #endif return simde__m64_from_private(r_); #endif } #define simde_m_psadbw(a, b) simde_mm_sad_pu8(a, b) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_sad_pu8(a, b) simde_mm_sad_pu8(a, b) # define _m_psadbw(a, b) simde_mm_sad_pu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_set_ss (simde_float32 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_set_ss(a); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vsetq_lane_f32(a, vdupq_n_f32(SIMDE_FLOAT32_C(0.0)), 0); #else return simde_mm_set_ps(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(0.0), a); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_set_ss(a) simde_mm_set_ss(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_setr_ps (simde_float32 e3, simde_float32 e2, simde_float32 e1, simde_float32 e0) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_setr_ps(e3, e2, e1, e0); #else return simde_mm_set_ps(e0, e1, e2, e3); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_setr_ps(e3, e2, e1, e0) simde_mm_setr_ps(e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_setzero_ps (void) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_setzero_ps(); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vdupq_n_f32(SIMDE_FLOAT32_C(0.0)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return vec_splats(SIMDE_FLOAT32_C(0.0)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f32x4_const(0.f, 0.f, 0.f, 0.f); #else simde__m128 r; simde_memset(&r, 0, sizeof(r)); return r; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_setzero_ps() simde_mm_setzero_ps() #endif #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_undefined_ps (void) { simde__m128_private r_; #if defined(SIMDE_HAVE_UNDEFINED128) r_.n = _mm_undefined_ps(); #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) r_ = simde__m128_to_private(simde_mm_setzero_ps()); #endif return simde__m128_from_private(r_); } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_undefined_ps() simde_mm_undefined_ps() #endif #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) HEDLEY_DIAGNOSTIC_POP #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_x_mm_setone_ps (void) { simde__m128 t = simde_mm_setzero_ps(); return simde_mm_cmpeq_ps(t, t); } SIMDE_FUNCTION_ATTRIBUTES void simde_mm_sfence (void) { /* TODO: Use Hedley. */ #if defined(SIMDE_X86_SSE_NATIVE) _mm_sfence(); #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) __atomic_thread_fence(__ATOMIC_SEQ_CST); #elif !defined(__INTEL_COMPILER) && defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_ATOMICS__) #if defined(__GNUC__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 9) __atomic_thread_fence(__ATOMIC_SEQ_CST); #else atomic_thread_fence(memory_order_seq_cst); #endif #elif defined(_MSC_VER) MemoryBarrier(); #elif HEDLEY_HAS_EXTENSION(c_atomic) __c11_atomic_thread_fence(__ATOMIC_SEQ_CST); #elif defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) __sync_synchronize(); #elif defined(_OPENMP) #pragma omp critical(simde_mm_sfence_) { } #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_sfence() simde_mm_sfence() #endif #define SIMDE_MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _MM_SHUFFLE(z, y, x, w) SIMDE_MM_SHUFFLE(z, y, x, w) #endif #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) # define simde_mm_shuffle_pi16(a, imm8) _mm_shuffle_pi16(a, imm8) #elif defined(SIMDE_SHUFFLE_VECTOR_) # define simde_mm_shuffle_pi16(a, imm8) (__extension__ ({ \ const simde__m64_private simde_tmp_a_ = simde__m64_to_private(a); \ simde__m64_from_private((simde__m64_private) { .i16 = \ SIMDE_SHUFFLE_VECTOR_(16, 8, \ (simde_tmp_a_).i16, \ (simde_tmp_a_).i16, \ (((imm8) ) & 3), \ (((imm8) >> 2) & 3), \ (((imm8) >> 4) & 3), \ (((imm8) >> 6) & 3)) }); })) #else SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_shuffle_pi16 (simde__m64 a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m64_private r_; simde__m64_private a_ = simde__m64_to_private(a); for (size_t i = 0 ; i < sizeof(r_.i16) / sizeof(r_.i16[0]) ; i++) { r_.i16[i] = a_.i16[(imm8 >> (i * 2)) & 3]; } HEDLEY_DIAGNOSTIC_PUSH #if HEDLEY_HAS_WARNING("-Wconditional-uninitialized") # pragma clang diagnostic ignored "-Wconditional-uninitialized" #endif return simde__m64_from_private(r_); HEDLEY_DIAGNOSTIC_POP } #endif #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) # define simde_m_pshufw(a, imm8) _m_pshufw(a, imm8) #else # define simde_m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) #endif #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_shuffle_pi16(a, imm8) simde_mm_shuffle_pi16(a, imm8) # define _m_pshufw(a, imm8) simde_mm_shuffle_pi16(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_shuffle_ps (simde__m128 a, simde__m128 b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); r_.f32[0] = a_.f32[(imm8 >> 0) & 3]; r_.f32[1] = a_.f32[(imm8 >> 2) & 3]; r_.f32[2] = b_.f32[(imm8 >> 4) & 3]; r_.f32[3] = b_.f32[(imm8 >> 6) & 3]; return simde__m128_from_private(r_); } #if defined(SIMDE_X86_SSE_NATIVE) && !defined(__PGI) # define simde_mm_shuffle_ps(a, b, imm8) _mm_shuffle_ps(a, b, imm8) #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ simde__m128_from_private((simde__m128_private) { .wasm_v128 = \ wasm_i32x4_shuffle( \ simde__m128_to_private(a).wasm_v128, \ simde__m128_to_private(b).wasm_v128, \ (((imm8) ) & 3), \ (((imm8) >> 2) & 3), \ (((imm8) >> 4) & 3) + 4, \ (((imm8) >> 6) & 3) + 4) }); })) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) #define simde_mm_shuffle_ps(a, b, imm8) \ (__extension__({ \ float32x4_t simde_mm_shuffle_ps_a_ = simde__m128_to_neon_f32(a); \ float32x4_t simde_mm_shuffle_ps_b_ = simde__m128_to_neon_f32(b); \ float32x4_t simde_mm_shuffle_ps_r_; \ \ simde_mm_shuffle_ps_r_ = vmovq_n_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, (imm8) & (0x3))); \ simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_ps_r_, 1); \ simde_mm_shuffle_ps_r_ = vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_ps_r_, 2); \ vsetq_lane_f32(vgetq_lane_f32(simde_mm_shuffle_ps_b_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_ps_r_, 3); \ })) #elif defined(SIMDE_SHUFFLE_VECTOR_) #define simde_mm_shuffle_ps(a, b, imm8) (__extension__ ({ \ simde__m128_from_private((simde__m128_private) { .f32 = \ SIMDE_SHUFFLE_VECTOR_(32, 16, \ simde__m128_to_private(a).f32, \ simde__m128_to_private(b).f32, \ (((imm8) ) & 3), \ (((imm8) >> 2) & 3), \ (((imm8) >> 4) & 3) + 4, \ (((imm8) >> 6) & 3) + 4) }); })) #endif #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_shuffle_ps(a, b, imm8) simde_mm_shuffle_ps((a), (b), imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_sqrt_ps (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_sqrt_ps(a); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f32 = vsqrtq_f32(a_.neon_f32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x4_t est = vrsqrteq_f32(a_.neon_f32); for (int i = 0 ; i <= SIMDE_ACCURACY_PREFERENCE ; i++) { est = vmulq_f32(vrsqrtsq_f32(vmulq_f32(a_.neon_f32, est), est), est); } r_.neon_f32 = vmulq_f32(a_.neon_f32, est); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_sqrt(a_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_14_NATIVE) r_.altivec_f32 = vec_sqrt(a_.altivec_f32); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_f32 = __lsx_vfsqrt_s(a_.lsx_f32); #elif defined(simde_math_sqrt) SIMDE_VECTORIZE for (size_t i = 0 ; i < sizeof(r_.f32) / sizeof(r_.f32[0]) ; i++) { r_.f32[i] = simde_math_sqrtf(a_.f32[i]); } #else HEDLEY_UNREACHABLE(); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_sqrt_ps(a) simde_mm_sqrt_ps((a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_sqrt_ss (simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_sqrt_ss(a); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_ss(a, simde_mm_sqrt_ps(a)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_sqrt_ps(simde_x_mm_broadcastlow_ps(a))); #else simde__m128_private r_, a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32_t value = vgetq_lane_f32(simde__m128_to_private(simde_mm_sqrt_ps(a)).neon_f32, 0); r_.neon_f32 = vsetq_lane_f32(value, a_.neon_f32, 0); #elif defined(simde_math_sqrtf) r_.f32[0] = simde_math_sqrtf(a_.f32[0]); r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; #else HEDLEY_UNREACHABLE(); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_sqrt_ss(a) simde_mm_sqrt_ss((a)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_store_ps (simde_float32 mem_addr[4], simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) _mm_store_ps(mem_addr, a); #else simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_f32(mem_addr, a_.neon_f32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) vec_st(a_.altivec_f32, 0, mem_addr); #elif defined(SIMDE_WASM_SIMD128_NATIVE) wasm_v128_store(mem_addr, a_.wasm_v128); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) __lsx_vst(a_.lsx_f32, mem_addr, 0); #else simde_memcpy(mem_addr, &a_, sizeof(a)); #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_store_ps(mem_addr, a) simde_mm_store_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_store1_ps (simde_float32 mem_addr[4], simde__m128 a) { simde_float32* mem_addr_ = SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128); #if defined(SIMDE_X86_SSE_NATIVE) _mm_store_ps1(mem_addr_, a); #else simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_f32(mem_addr_, vdupq_lane_f32(vget_low_f32(a_.neon_f32), 0)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) wasm_v128_store(mem_addr_, wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 0, 0, 0)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) vec_st(vec_splat(a_.altivec_f32, 0), 0, mem_addr_); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) __lsx_vst(__lsx_vreplvei_w(a_.lsx_f32, 0), mem_addr_, 0); #elif defined(SIMDE_SHUFFLE_VECTOR_) simde__m128_private tmp_; tmp_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 0, 0, 0, 0); simde_mm_store_ps(mem_addr_, tmp_.f32); #else SIMDE_VECTORIZE_ALIGNED(mem_addr_:16) for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { mem_addr_[i] = a_.f32[0]; } #endif #endif } #define simde_mm_store_ps1(mem_addr, a) simde_mm_store1_ps(mem_addr, a) #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_store_ps1(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) # define _mm_store1_ps(mem_addr, a) simde_mm_store1_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_store_ss (simde_float32* mem_addr, simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) _mm_store_ss(mem_addr, a); #else simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_lane_f32(mem_addr, a_.neon_f32, 0); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) __lsx_vstelm_w(a_.lsx_f32, mem_addr, 0, 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) wasm_v128_store32_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); #else *mem_addr = a_.f32[0]; #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_store_ss(mem_addr, a) simde_mm_store_ss(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storeh_pi (simde__m64* mem_addr, simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) _mm_storeh_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); #else simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1_f32(HEDLEY_REINTERPRET_CAST(float32_t*, mem_addr), vget_high_f32(a_.neon_f32)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); #else simde_memcpy(mem_addr, &(a_.m64[1]), sizeof(a_.m64[1])); #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_storeh_pi(mem_addr, a) simde_mm_storeh_pi(mem_addr, (a)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storel_pi (simde__m64* mem_addr, simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) _mm_storel_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) simde__m128_private a_ = simde__m128_to_private(a); wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); #else simde__m64_private* dest_ = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr); simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) dest_->neon_f32 = vget_low_f32(a_.neon_f32); #else dest_->f32[0] = a_.f32[0]; dest_->f32[1] = a_.f32[1]; #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_storel_pi(mem_addr, a) simde_mm_storel_pi(mem_addr, (a)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storer_ps (simde_float32 mem_addr[4], simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) _mm_storer_ps(mem_addr, a); #else simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) vec_st(vec_reve(a_.altivec_f32), 0, mem_addr); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x4_t tmp = vrev64q_f32(a_.neon_f32); vst1q_f32(mem_addr, vextq_f32(tmp, tmp, 2)); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) __lsx_vst(__lsx_vshuf4i_w(a_.lsx_f32, 0x1b), mem_addr, 0); #elif defined(SIMDE_SHUFFLE_VECTOR_) a_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, a_.f32, 3, 2, 1, 0); simde_mm_store_ps(mem_addr, simde__m128_from_private(a_)); #else SIMDE_VECTORIZE_ALIGNED(mem_addr:16) for (size_t i = 0 ; i < sizeof(a_.f32) / sizeof(a_.f32[0]) ; i++) { mem_addr[i] = a_.f32[((sizeof(a_.f32) / sizeof(a_.f32[0])) - 1) - i]; } #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_storer_ps(mem_addr, a) simde_mm_storer_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storeu_ps (simde_float32 mem_addr[4], simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) _mm_storeu_ps(mem_addr, a); #else simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_f32(mem_addr, a_.neon_f32); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) vec_vsx_st(a_.altivec_f32, 0, mem_addr); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) __lsx_vst(a_.lsx_f32, mem_addr, 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) wasm_v128_store(mem_addr, a_.wasm_v128); #else simde_memcpy(mem_addr, &a_, sizeof(a_)); #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_storeu_ps(mem_addr, a) simde_mm_storeu_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_sub_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_sub_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vsubq_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_sub(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = vec_sub(a_.altivec_f32, b_.altivec_f32); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_f32 = __lsx_vfsub_s(a_.lsx_f32, b_.lsx_f32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f32 = a_.f32 - b_.f32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = a_.f32[i] - b_.f32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_sub_ps(a, b) simde_mm_sub_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_sub_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_sub_ss(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_ss(a, simde_mm_sub_ps(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_ss(a, simde_mm_sub_ps(simde_x_mm_broadcastlow_ps(a), simde_x_mm_broadcastlow_ps(b))); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); r_.f32[0] = a_.f32[0] - b_.f32[0]; r_.f32[1] = a_.f32[1]; r_.f32[2] = a_.f32[2]; r_.f32[3] = a_.f32[3]; return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_sub_ss(a, b) simde_mm_sub_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomieq_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_ucomieq_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); uint32x4_t a_eq_b = vceqq_f32(a_.neon_f32, b_.neon_f32); r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_eq_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) == wasm_f32x4_extract_lane(b_.wasm_v128, 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f32[0] == b_.f32[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f32[0] == b_.f32[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_ucomieq_ss(a, b) simde_mm_ucomieq_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomige_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_ucomige_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); uint32x4_t a_ge_b = vcgeq_f32(a_.neon_f32, b_.neon_f32); r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_ge_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) >= wasm_f32x4_extract_lane(b_.wasm_v128, 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f32[0] >= b_.f32[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f32[0] >= b_.f32[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_ucomige_ss(a, b) simde_mm_ucomige_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomigt_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_ucomigt_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); uint32x4_t a_gt_b = vcgtq_f32(a_.neon_f32, b_.neon_f32); r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_gt_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) > wasm_f32x4_extract_lane(b_.wasm_v128, 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f32[0] > b_.f32[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f32[0] > b_.f32[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_ucomigt_ss(a, b) simde_mm_ucomigt_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomile_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_ucomile_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); uint32x4_t a_le_b = vcleq_f32(a_.neon_f32, b_.neon_f32); r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_le_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) <= wasm_f32x4_extract_lane(b_.wasm_v128, 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f32[0] <= b_.f32[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f32[0] <= b_.f32[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_ucomile_ss(a, b) simde_mm_ucomile_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomilt_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_ucomilt_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_or_b_nan = vmvnq_u32(vandq_u32(a_not_nan, b_not_nan)); uint32x4_t a_lt_b = vcltq_f32(a_.neon_f32, b_.neon_f32); r = !!(vgetq_lane_u32(vorrq_u32(a_or_b_nan, a_lt_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) < wasm_f32x4_extract_lane(b_.wasm_v128, 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f32[0] < b_.f32[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f32[0] < b_.f32[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_ucomilt_ss(a, b) simde_mm_ucomilt_ss((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomineq_ss (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_ucomineq_ss(a, b); #else simde__m128_private a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x4_t a_not_nan = vceqq_f32(a_.neon_f32, a_.neon_f32); uint32x4_t b_not_nan = vceqq_f32(b_.neon_f32, b_.neon_f32); uint32x4_t a_and_b_not_nan = vandq_u32(a_not_nan, b_not_nan); uint32x4_t a_neq_b = vmvnq_u32(vceqq_f32(a_.neon_f32, b_.neon_f32)); r = !!(vgetq_lane_u32(vandq_u32(a_and_b_not_nan, a_neq_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r = wasm_f32x4_extract_lane(a_.wasm_v128, 0) != wasm_f32x4_extract_lane(b_.wasm_v128, 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f32[0] != b_.f32[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f32[0] != b_.f32[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_ucomineq_ss(a, b) simde_mm_ucomineq_ss((a), (b)) #endif #if defined(SIMDE_X86_SSE_NATIVE) # if defined(__has_builtin) # if __has_builtin(__builtin_ia32_undef128) # define SIMDE_HAVE_UNDEFINED128 # endif # elif !defined(__PGI) && !defined(SIMDE_BUG_GCC_REV_208793) && !defined(_MSC_VER) # define SIMDE_HAVE_UNDEFINED128 # endif #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_unpackhi_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_unpackhi_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f32 = vzip2q_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x2_t a1 = vget_high_f32(a_.neon_f32); float32x2_t b1 = vget_high_f32(b_.neon_f32); float32x2x2_t result = vzip_f32(a1, b1); r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vilvh_w(b_.lsx_i64, a_.lsx_i64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 2, 6, 3, 7); #else r_.f32[0] = a_.f32[2]; r_.f32[1] = b_.f32[2]; r_.f32[2] = a_.f32[3]; r_.f32[3] = b_.f32[3]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_unpackhi_ps(a, b) simde_mm_unpackhi_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_unpacklo_ps (simde__m128 a, simde__m128 b) { #if defined(SIMDE_X86_SSE_NATIVE) return _mm_unpacklo_ps(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a), b_ = simde__m128_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f32 = vzip1q_f32(a_.neon_f32, b_.neon_f32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_f32 = vec_mergeh(a_.altivec_f32, b_.altivec_f32); #elif defined(SIMDE_LOONGARCH_LSX_NATIVE) r_.lsx_i64 = __lsx_vilvl_w(b_.lsx_i64, a_.lsx_i64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) float32x2_t a1 = vget_low_f32(a_.neon_f32); float32x2_t b1 = vget_low_f32(b_.neon_f32); float32x2x2_t result = vzip_f32(a1, b1); r_.neon_f32 = vcombine_f32(result.val[0], result.val[1]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.f32, b_.f32, 0, 4, 1, 5); #else r_.f32[0] = a_.f32[0]; r_.f32[1] = b_.f32[0]; r_.f32[2] = a_.f32[1]; r_.f32[3] = b_.f32[1]; #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_unpacklo_ps(a, b) simde_mm_unpacklo_ps((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_stream_pi (simde__m64* mem_addr, simde__m64 a) { #if defined(SIMDE_X86_SSE_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) _mm_stream_pi(HEDLEY_REINTERPRET_CAST(__m64*, mem_addr), a); #else simde__m64_private* dest = HEDLEY_REINTERPRET_CAST(simde__m64_private*, mem_addr), a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) dest->i64[0] = vget_lane_s64(a_.neon_i64, 0); #else dest->i64[0] = a_.i64[0]; #endif #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_stream_pi(mem_addr, a) simde_mm_stream_pi(mem_addr, (a)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_stream_ps (simde_float32 mem_addr[4], simde__m128 a) { #if defined(SIMDE_X86_SSE_NATIVE) _mm_stream_ps(mem_addr, a); #elif HEDLEY_HAS_BUILTIN(__builtin_nontemporal_store) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) simde__m128_private a_ = simde__m128_to_private(a); __builtin_nontemporal_store(a_.f32, SIMDE_ALIGN_CAST(__typeof__(a_.f32)*, mem_addr)); #else simde_mm_store_ps(mem_addr, a); #endif } #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _mm_stream_ps(mem_addr, a) simde_mm_stream_ps(SIMDE_CHECKED_REINTERPRET_CAST(float*, simde_float32*, mem_addr), (a)) #endif #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ do { \ float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW01 = vtrnq_f32(row0, row1); \ float32x4x2_t SIMDE_MM_TRANSPOSE4_PS_ROW23 = vtrnq_f32(row2, row3); \ row0 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ row1 = vcombine_f32(vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ vget_low_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ row2 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[0]), \ vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[0])); \ row3 = vcombine_f32(vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW01.val[1]), \ vget_high_f32(SIMDE_MM_TRANSPOSE4_PS_ROW23.val[1])); \ } while (0) #else #define SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ do { \ simde__m128 SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp0; \ SIMDE_MM_TRANSPOSE4_PS_tmp0 = simde_mm_unpacklo_ps((row0), (row1)); \ SIMDE_MM_TRANSPOSE4_PS_tmp2 = simde_mm_unpacklo_ps((row2), (row3)); \ SIMDE_MM_TRANSPOSE4_PS_tmp1 = simde_mm_unpackhi_ps((row0), (row1)); \ SIMDE_MM_TRANSPOSE4_PS_tmp3 = simde_mm_unpackhi_ps((row2), (row3)); \ row0 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp0, SIMDE_MM_TRANSPOSE4_PS_tmp2); \ row1 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp2, SIMDE_MM_TRANSPOSE4_PS_tmp0); \ row2 = simde_mm_movelh_ps(SIMDE_MM_TRANSPOSE4_PS_tmp1, SIMDE_MM_TRANSPOSE4_PS_tmp3); \ row3 = simde_mm_movehl_ps(SIMDE_MM_TRANSPOSE4_PS_tmp3, SIMDE_MM_TRANSPOSE4_PS_tmp1); \ } while (0) #endif #if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES) # define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) SIMDE_MM_TRANSPOSE4_PS(row0, row1, row2, row3) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_SSE_H) */ /* :: End x86/sse.h :: */ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ typedef union { #if defined(SIMDE_VECTOR_SUBSCRIPT) SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; #if defined(SIMDE_HAVE_INT128_) SIMDE_ALIGN_TO_16 simde_int128 i128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 simde_uint128 u128 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; #endif SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; #else SIMDE_ALIGN_TO_16 int8_t i8[16]; SIMDE_ALIGN_TO_16 int16_t i16[8]; SIMDE_ALIGN_TO_16 int32_t i32[4]; SIMDE_ALIGN_TO_16 int64_t i64[2]; SIMDE_ALIGN_TO_16 uint8_t u8[16]; SIMDE_ALIGN_TO_16 uint16_t u16[8]; SIMDE_ALIGN_TO_16 uint32_t u32[4]; SIMDE_ALIGN_TO_16 uint64_t u64[2]; #if defined(SIMDE_HAVE_INT128_) SIMDE_ALIGN_TO_16 simde_int128 i128[1]; SIMDE_ALIGN_TO_16 simde_uint128 u128[1]; #endif SIMDE_ALIGN_TO_16 simde_float32 f32[4]; SIMDE_ALIGN_TO_16 simde_float64 f64[2]; SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; #endif SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; SIMDE_ALIGN_TO_16 simde__m64 m64[2]; #if defined(SIMDE_X86_SSE2_NATIVE) SIMDE_ALIGN_TO_16 __m128i n; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_TO_16 int8x16_t neon_i8; SIMDE_ALIGN_TO_16 int16x8_t neon_i16; SIMDE_ALIGN_TO_16 int32x4_t neon_i32; SIMDE_ALIGN_TO_16 int64x2_t neon_i64; SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; #if defined(__ARM_FP16_FORMAT_IEEE) SIMDE_ALIGN_TO_16 float16x8_t neon_f16; #endif SIMDE_ALIGN_TO_16 float32x4_t neon_f32; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) SIMDE_ALIGN_TO_16 float64x2_t neon_f64; #endif #elif defined(SIMDE_MIPS_MSA_NATIVE) v16i8 msa_i8; v8i16 msa_i16; v4i32 msa_i32; v2i64 msa_i64; v16u8 msa_u8; v8u16 msa_u16; v4u32 msa_u32; v2u64 msa_u64; #elif defined(SIMDE_WASM_SIMD128_NATIVE) SIMDE_ALIGN_TO_16 v128_t wasm_v128; #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; #else SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; #endif SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; #else SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; #endif SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; #endif #endif } simde__m128i_private; typedef union { #if defined(SIMDE_VECTOR_SUBSCRIPT) SIMDE_ALIGN_TO_16 int8_t i8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int16_t i16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int32_t i32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int64_t i64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint8_t u8 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint16_t u16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint32_t u32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint64_t u64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 simde_float32 f32 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 simde_float64 f64 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 int_fast32_t i32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; SIMDE_ALIGN_TO_16 uint_fast32_t u32f SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; #else SIMDE_ALIGN_TO_16 int8_t i8[16]; SIMDE_ALIGN_TO_16 int16_t i16[8]; SIMDE_ALIGN_TO_16 int32_t i32[4]; SIMDE_ALIGN_TO_16 int64_t i64[2]; SIMDE_ALIGN_TO_16 uint8_t u8[16]; SIMDE_ALIGN_TO_16 uint16_t u16[8]; SIMDE_ALIGN_TO_16 uint32_t u32[4]; SIMDE_ALIGN_TO_16 uint64_t u64[2]; SIMDE_ALIGN_TO_16 simde_float32 f32[4]; SIMDE_ALIGN_TO_16 simde_float64 f64[2]; SIMDE_ALIGN_TO_16 int_fast32_t i32f[16 / sizeof(int_fast32_t)]; SIMDE_ALIGN_TO_16 uint_fast32_t u32f[16 / sizeof(uint_fast32_t)]; #endif SIMDE_ALIGN_TO_16 simde__m64_private m64_private[2]; SIMDE_ALIGN_TO_16 simde__m64 m64[2]; #if defined(SIMDE_X86_SSE2_NATIVE) SIMDE_ALIGN_TO_16 __m128d n; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_TO_16 int8x16_t neon_i8; SIMDE_ALIGN_TO_16 int16x8_t neon_i16; SIMDE_ALIGN_TO_16 int32x4_t neon_i32; SIMDE_ALIGN_TO_16 int64x2_t neon_i64; SIMDE_ALIGN_TO_16 uint8x16_t neon_u8; SIMDE_ALIGN_TO_16 uint16x8_t neon_u16; SIMDE_ALIGN_TO_16 uint32x4_t neon_u32; SIMDE_ALIGN_TO_16 uint64x2_t neon_u64; SIMDE_ALIGN_TO_16 float32x4_t neon_f32; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) SIMDE_ALIGN_TO_16 float64x2_t neon_f64; #endif #elif defined(SIMDE_MIPS_MSA_NATIVE) v16i8 msa_i8; v8i16 msa_i16; v4i32 msa_i32; v2i64 msa_i64; v16u8 msa_u8; v8u16 msa_u16; v4u32 msa_u32; v2u64 msa_u64; #elif defined(SIMDE_WASM_SIMD128_NATIVE) SIMDE_ALIGN_TO_16 v128_t wasm_v128; #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed char) altivec_i8; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed short) altivec_i16; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32; #if defined(__INT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__INT_FAST32_TYPE__) altivec_i32f; #else SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed int) altivec_i32f; #endif SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) altivec_u8; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned short) altivec_u16; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32; #if defined(__UINT_FAST32_TYPE__) && (defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(__UINT_FAST32_TYPE__) altivec_u32f; #else SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned int) altivec_u32f; #endif SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(float) altivec_f32; #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(signed long long) altivec_i64; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long) altivec_u64; SIMDE_ALIGN_TO_16 SIMDE_POWER_ALTIVEC_VECTOR(double) altivec_f64; #endif #endif } simde__m128d_private; #if defined(SIMDE_X86_SSE2_NATIVE) typedef __m128i simde__m128i; typedef __m128d simde__m128d; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) typedef int64x2_t simde__m128i; # if defined(SIMDE_ARM_NEON_A64V8_NATIVE) typedef float64x2_t simde__m128d; # elif defined(SIMDE_VECTOR_SUBSCRIPT) typedef simde_float64 simde__m128d SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; # else typedef simde__m128d_private simde__m128d; # endif #elif defined(SIMDE_WASM_SIMD128_NATIVE) typedef v128_t simde__m128i; typedef v128_t simde__m128d; #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) typedef SIMDE_POWER_ALTIVEC_VECTOR(float) simde__m128i; #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) typedef SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d; #else typedef simde__m128d_private simde__m128d; #endif #elif defined(SIMDE_VECTOR_SUBSCRIPT) typedef int64_t simde__m128i SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; typedef simde_float64 simde__m128d SIMDE_ALIGN_TO_16 SIMDE_VECTOR(16) SIMDE_MAY_ALIAS; #else typedef simde__m128i_private simde__m128i; typedef simde__m128d_private simde__m128d; #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) typedef simde__m128i __m128i; typedef simde__m128d __m128d; #endif HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i), "simde__m128i size incorrect"); HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128i_private), "simde__m128i_private size incorrect"); HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d), "simde__m128d size incorrect"); HEDLEY_STATIC_ASSERT(16 == sizeof(simde__m128d_private), "simde__m128d_private size incorrect"); #if defined(SIMDE_CHECK_ALIGNMENT) && defined(SIMDE_ALIGN_OF) HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i) == 16, "simde__m128i is not 16-byte aligned"); HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128i_private) == 16, "simde__m128i_private is not 16-byte aligned"); HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d) == 16, "simde__m128d is not 16-byte aligned"); HEDLEY_STATIC_ASSERT(SIMDE_ALIGN_OF(simde__m128d_private) == 16, "simde__m128d_private is not 16-byte aligned"); #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde__m128i_from_private(simde__m128i_private v) { simde__m128i r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m128i_private simde__m128i_to_private(simde__m128i v) { simde__m128i_private r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde__m128d_from_private(simde__m128d_private v) { simde__m128d r; simde_memcpy(&r, &v, sizeof(r)); return r; } SIMDE_FUNCTION_ATTRIBUTES simde__m128d_private simde__m128d_to_private(simde__m128d v) { simde__m128d_private r; simde_memcpy(&r, &v, sizeof(r)); return r; } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int8x16_t, neon, i8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int16x8_t, neon, i16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int32x4_t, neon, i32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, int64x2_t, neon, i64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint8x16_t, neon, u8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint16x8_t, neon, u16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint32x4_t, neon, u32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, uint64x2_t, neon, u64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float32x4_t, neon, f32) #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, float64x2_t, neon, f64) #endif #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) #endif #endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int8x16_t, neon, i8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int16x8_t, neon, i16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int32x4_t, neon, i32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, int64x2_t, neon, i64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint8x16_t, neon, u8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint16x8_t, neon, u16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint32x4_t, neon, u32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, uint64x2_t, neon, u64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float32x4_t, neon, f32) #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, float64x2_t, neon, f64) #endif #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed char), altivec, i8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed short), altivec, i16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed int), altivec, i32) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), altivec, u8) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned short), altivec, u16) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned int), altivec, u32) #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(unsigned long long), altivec, u64) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(signed long long), altivec, i64) #if defined(SIMDE_BUG_GCC_95782) SIMDE_FUNCTION_ATTRIBUTES SIMDE_POWER_ALTIVEC_VECTOR(double) simde__m128d_to_altivec_f64(simde__m128d value) { simde__m128d_private r_ = simde__m128d_to_private(value); return r_.altivec_f64; } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde__m128d_from_altivec_f64(SIMDE_POWER_ALTIVEC_VECTOR(double) value) { simde__m128d_private r_; r_.altivec_f64 = value; return simde__m128d_from_private(r_); } #else SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, SIMDE_POWER_ALTIVEC_VECTOR(double), altivec, f64) #endif #endif #elif defined(SIMDE_WASM_SIMD128_NATIVE) SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128d, v128_t, wasm, v128); SIMDE_X86_GENERATE_CONVERSION_FUNCTION(m128i, v128_t, wasm, v128); #endif /* defined(SIMDE_ARM_NEON_A32V7_NATIVE) */ SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_set_pd (simde_float64 e1, simde_float64 e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set_pd(e1, e0); #else simde__m128d_private r_; #if defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_make(e0, e1); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) SIMDE_ALIGN_TO_16 simde_float64 data[2] = { e0, e1 }; r_.neon_f64 = vld1q_f64(data); #else r_.f64[0] = e0; r_.f64[1] = e1; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set_pd(e1, e0) simde_mm_set_pd(e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_set1_pd (simde_float64 a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set1_pd(a); #else simde__m128d_private r_; #if defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_splat(a); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vdupq_n_f64(a); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_f64 = vec_splats(HEDLEY_STATIC_CAST(double, a)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.f64[i] = a; } #endif return simde__m128d_from_private(r_); #endif } #define simde_mm_set_pd1(a) simde_mm_set1_pd(a) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set1_pd(a) simde_mm_set1_pd(a) #define _mm_set_pd1(a) simde_mm_set1_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_x_mm_abs_pd(simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) simde_float64 mask_; uint64_t u64_ = UINT64_C(0x7FFFFFFFFFFFFFFF); simde_memcpy(&mask_, &u64_, sizeof(u64_)); return _mm_and_pd(_mm_set1_pd(mask_), a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vabsq_f64(a_.neon_f64); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_f64 = vec_abs(a_.altivec_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_abs(a_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_fabs(a_.f64[i]); } #endif return simde__m128d_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_x_mm_not_pd(simde__m128d a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) __m128i ai = _mm_castpd_si128(a); return _mm_castsi128_pd(_mm_ternarylogic_epi64(ai, ai, ai, 0x55)); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vmvnq_s32(a_.neon_i32); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = vec_nor(a_.altivec_f64, a_.altivec_f64); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = ~a_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = ~(a_.i32f[i]); } #endif return simde__m128d_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_x_mm_select_pd(simde__m128d a, simde__m128d b, simde__m128d mask) { /* This function is for when you want to blend two elements together * according to a mask. It is similar to _mm_blendv_pd, except that * it is undefined whether the blend is based on the highest bit in * each lane (like blendv) or just bitwise operations. This allows * us to implement the function efficiently everywhere. * * Basically, you promise that all the lanes in mask are either 0 or * ~0. */ #if defined(SIMDE_X86_SSE4_1_NATIVE) return _mm_blendv_pd(a, b, mask); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b), mask_ = simde__m128d_to_private(mask); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 ^ ((a_.i64 ^ b_.i64) & mask_.i64); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vbslq_s64(mask_.neon_u64, b_.neon_i64, a_.neon_i64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] ^ ((a_.i64[i] ^ b_.i64[i]) & mask_.i64[i]); } #endif return simde__m128d_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_add_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_add_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vaddq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i8 = vec_add(a_.altivec_i8, b_.altivec_i8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_add(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 + b_.i8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i] + b_.i8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_add_epi8(a, b) simde_mm_add_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_add_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_add_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vaddq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i16 = vec_add(a_.altivec_i16, b_.altivec_i16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_add(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 + b_.i16; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] + b_.i16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_add_epi16(a, b) simde_mm_add_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_add_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_add_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vaddq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_add(a_.altivec_i32, b_.altivec_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_add(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 + b_.i32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] + b_.i32[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_add_epi32(a, b) simde_mm_add_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_add_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_add_epi64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vaddq_s64(a_.neon_i64, b_.neon_i64); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) r_.altivec_i64 = vec_add(a_.altivec_i64, b_.altivec_i64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i64x2_add(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 + b_.i64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] + b_.i64[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_add_epi64(a, b) simde_mm_add_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_add_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_add_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vaddq_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = vec_add(a_.altivec_f64, b_.altivec_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_add(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f64 = a_.f64 + b_.f64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[i] + b_.f64[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_add_pd(a, b) simde_mm_add_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_move_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_move_sd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vsetq_lane_f64(vgetq_lane_f64(b_.neon_f64, 0), a_.neon_f64, 0); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) #if defined(HEDLEY_IBM_VERSION) r_.altivec_f64 = vec_xxpermdi(a_.altivec_f64, b_.altivec_f64, 1); #else r_.altivec_f64 = vec_xxpermdi(b_.altivec_f64, a_.altivec_f64, 1); #endif #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 1); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 2, 1); #else r_.f64[0] = b_.f64[0]; r_.f64[1] = a_.f64[1]; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_move_sd(a, b) simde_mm_move_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_x_mm_broadcastlow_pd(simde__m128d a) { /* This function broadcasts the first element in the input vector to * all lanes. It is used to avoid generating spurious exceptions in * *_sd functions since there may be garbage in the upper lanes. */ #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_castsi128_pd(_mm_shuffle_epi32(_mm_castpd_si128(a), 0x44)); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vdupq_laneq_f64(a_.neon_f64, 0); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = vec_splat(a_.altivec_f64, 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_splat(a_.f64[0]); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 0, 0); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[0]; } #endif return simde__m128d_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_add_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_add_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_sd(a, simde_mm_add_pd(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_add_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); r_.f64[0] = a_.f64[0] + b_.f64[0]; r_.f64[1] = a_.f64[1]; return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_add_sd(a, b) simde_mm_add_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_add_si64 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_add_si64(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vadd_s64(a_.neon_i64, b_.neon_i64); #else r_.i64[0] = a_.i64[0] + b_.i64[0]; #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_add_si64(a, b) simde_mm_add_si64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_adds_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_adds_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vqaddq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_add_sat(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i8 = vec_adds(a_.altivec_i8, b_.altivec_i8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = simde_math_adds_i8(a_.i8[i], b_.i8[i]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_adds_epi8(a, b) simde_mm_adds_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_adds_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_adds_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vqaddq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_add_sat(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i16 = vec_adds(a_.altivec_i16, b_.altivec_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = simde_math_adds_i16(a_.i16[i], b_.i16[i]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_adds_epi16(a, b) simde_mm_adds_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_adds_epu8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_adds_epu8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vqaddq_u8(a_.neon_u8, b_.neon_u8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u8x16_add_sat(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_u8 = vec_adds(a_.altivec_u8, b_.altivec_u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = simde_math_adds_u8(a_.u8[i], b_.u8[i]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_adds_epu8(a, b) simde_mm_adds_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_adds_epu16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_adds_epu16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vqaddq_u16(a_.neon_u16, b_.neon_u16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u16x8_add_sat(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_u16 = vec_adds(a_.altivec_u16, b_.altivec_u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = simde_math_adds_u16(a_.u16[i], b_.u16[i]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_adds_epu16(a, b) simde_mm_adds_epu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_and_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_and_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vandq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f64 = vec_and(a_.altivec_f64, b_.altivec_f64); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f & b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_and_pd(a, b) simde_mm_and_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_and_si128 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_and_si128(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vandq_s32(b_.neon_i32, a_.neon_i32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_u32f = vec_and(a_.altivec_u32f, b_.altivec_u32f); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_and(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f & b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] & b_.i32f[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_and_si128(a, b) simde_mm_and_si128(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_andnot_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_f64 = vec_andc(b_.altivec_f64, a_.altivec_f64); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32f = vec_andc(b_.altivec_i32f, a_.altivec_i32f); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = ~a_.i32f & b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = ~a_.u64[i] & b_.u64[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_andnot_pd(a, b) simde_mm_andnot_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_andnot_si128 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_andnot_si128(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vbicq_s32(b_.neon_i32, a_.neon_i32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i32 = vec_andc(b_.altivec_i32, a_.altivec_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_andnot(b_.wasm_v128, a_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = ~a_.i32f & b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = ~(a_.i32f[i]) & b_.i32f[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_andnot_si128(a, b) simde_mm_andnot_si128(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_xor_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_xor_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f ^ b_.i32f; #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_xor(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = veorq_s64(a_.neon_i64, b_.neon_i64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_xor_pd(a, b) simde_mm_xor_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_avg_epu8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_avg_epu8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vrhaddq_u8(b_.neon_u8, a_.neon_u8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u8x16_avgr(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_u8 = vec_avg(a_.altivec_u8, b_.altivec_u8); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) uint16_t wa SIMDE_VECTOR(32); uint16_t wb SIMDE_VECTOR(32); uint16_t wr SIMDE_VECTOR(32); SIMDE_CONVERT_VECTOR_(wa, a_.u8); SIMDE_CONVERT_VECTOR_(wb, b_.u8); wr = (wa + wb + 1) >> 1; SIMDE_CONVERT_VECTOR_(r_.u8, wr); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] + b_.u8[i] + 1) >> 1; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_avg_epu8(a, b) simde_mm_avg_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_avg_epu16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_avg_epu16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vrhaddq_u16(b_.neon_u16, a_.neon_u16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u16x8_avgr(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_u16 = vec_avg(a_.altivec_u16, b_.altivec_u16); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && defined(SIMDE_CONVERT_VECTOR_) uint32_t wa SIMDE_VECTOR(32); uint32_t wb SIMDE_VECTOR(32); uint32_t wr SIMDE_VECTOR(32); SIMDE_CONVERT_VECTOR_(wa, a_.u16); SIMDE_CONVERT_VECTOR_(wb, b_.u16); wr = (wa + wb + 1) >> 1; SIMDE_CONVERT_VECTOR_(r_.u16, wr); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = (a_.u16[i] + b_.u16[i] + 1) >> 1; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_avg_epu16(a, b) simde_mm_avg_epu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_setzero_si128 (void) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_setzero_si128(); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vdupq_n_s32(0); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, 0)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_splat(INT32_C(0)); #elif defined(SIMDE_VECTOR_SUBSCRIPT) r_.i32 = __extension__ (__typeof__(r_.i32)) { 0, 0, 0, 0 }; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = 0; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_setzero_si128() (simde_mm_setzero_si128()) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_bslli_si128 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m128i_private r_, a_ = simde__m128i_to_private(a); if (HEDLEY_UNLIKELY((imm8 & ~15))) { return simde_mm_setzero_si128(); } #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) r_.altivec_i8 = #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) vec_slo #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ vec_sro #endif (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i8 = vec_srb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); #elif defined(SIMDE_HAVE_INT128_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) r_.u128[0] = a_.u128[0] << (imm8 * 8); #else r_ = simde__m128i_to_private(simde_mm_setzero_si128()); for (int i = imm8 ; i < HEDLEY_STATIC_CAST(int, sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i - imm8]; } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) #define simde_mm_bslli_si128(a, imm8) _mm_slli_si128(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) #define simde_mm_bslli_si128(a, imm8) \ simde__m128i_from_neon_i8(((imm8) <= 0) ? simde__m128i_to_neon_i8(a) : (((imm8) > 15) ? (vdupq_n_s8(0)) : (vextq_s8(vdupq_n_s8(0), simde__m128i_to_neon_i8(a), 16 - (imm8))))) #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define simde_mm_bslli_si128(a, imm8) __extension__ ({ \ simde__m128i_from_wasm_v128( \ wasm_i8x16_shuffle(wasm_i32x4_splat(INT32_C(0)), \ simde__m128i_to_wasm_v128((a)), \ ((imm8)&0xF0) ? 0 : 16 - ((imm8)&0xF), \ ((imm8)&0xF0) ? 0 : 17 - ((imm8)&0xF), \ ((imm8)&0xF0) ? 0 : 18 - ((imm8)&0xF), \ ((imm8)&0xF0) ? 0 : 19 - ((imm8)&0xF), \ ((imm8)&0xF0) ? 0 : 20 - ((imm8)&0xF), \ ((imm8)&0xF0) ? 0 : 21 - ((imm8)&0xF), \ ((imm8)&0xF0) ? 0 : 22 - ((imm8)&0xF), \ ((imm8)&0xF0) ? 0 : 23 - ((imm8)&0xF), \ ((imm8)&0xF0) ? 0 : 24 - ((imm8)&0xF), \ ((imm8)&0xF0) ? 0 : 25 - ((imm8)&0xF), \ ((imm8)&0xF0) ? 0 : 26 - ((imm8)&0xF), \ ((imm8)&0xF0) ? 0 : 27 - ((imm8)&0xF), \ ((imm8)&0xF0) ? 0 : 28 - ((imm8)&0xF), \ ((imm8)&0xF0) ? 0 : 29 - ((imm8)&0xF), \ ((imm8)&0xF0) ? 0 : 30 - ((imm8)&0xF), \ ((imm8)&0xF0) ? 0 : 31 - ((imm8)&0xF))); }) #elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) #define simde_mm_bslli_si128(a, imm8) (__extension__ ({ \ const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ simde__m128i_private simde_tmp_r_; \ if (HEDLEY_UNLIKELY(imm8 > 15)) { \ simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ } else { \ simde_tmp_r_.i8 = \ SIMDE_SHUFFLE_VECTOR_(8, 16, \ simde_tmp_z_.i8, \ (simde_tmp_a_).i8, \ HEDLEY_STATIC_CAST(int8_t, (16 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (17 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (18 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (19 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (20 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (21 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (22 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (23 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (24 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (25 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (26 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (27 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (28 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (29 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (30 - imm8) & 31), \ HEDLEY_STATIC_CAST(int8_t, (31 - imm8) & 31)); \ } \ simde__m128i_from_private(simde_tmp_r_); })) #endif #define simde_mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_bslli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) #define _mm_slli_si128(a, imm8) simde_mm_bslli_si128(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_bsrli_si128 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m128i_private r_, a_ = simde__m128i_to_private(a); if (HEDLEY_UNLIKELY((imm8 & ~15))) { return simde_mm_setzero_si128(); } #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_ENDIAN_ORDER) r_.altivec_i8 = #if (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) vec_sro #else /* SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG */ vec_slo #endif (a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, imm8 * 8))); #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i8 = vec_slb(a_.altivec_i8, vec_splats(HEDLEY_STATIC_CAST(unsigned char, (imm8 & 15) << 3))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { const int e = HEDLEY_STATIC_CAST(int, i) + imm8; r_.i8[i] = (e < 16) ? a_.i8[e] : 0; } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) #define simde_mm_bsrli_si128(a, imm8) _mm_srli_si128(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(__clang__) #define simde_mm_bsrli_si128(a, imm8) \ simde__m128i_from_neon_i8(((imm8 < 0) || (imm8 > 15)) ? vdupq_n_s8(0) : (vextq_s8(simde__m128i_to_private(a).neon_i8, vdupq_n_s8(0), ((imm8 & 15) != 0) ? imm8 : (imm8 & 15)))) #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ if (HEDLEY_UNLIKELY(imm8 > 15)) { \ simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ } else { \ simde_tmp_r_.wasm_v128 = \ wasm_i8x16_shuffle( \ simde_tmp_z_.wasm_v128, \ simde_tmp_a_.wasm_v128, \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ } \ simde__m128i_from_private(simde_tmp_r_); })) #elif defined(SIMDE_SHUFFLE_VECTOR_) && !defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && !defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) #define simde_mm_bsrli_si128(a, imm8) (__extension__ ({ \ const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ const simde__m128i_private simde_tmp_z_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ simde__m128i_private simde_tmp_r_ = simde__m128i_to_private(a); \ if (HEDLEY_UNLIKELY(imm8 > 15)) { \ simde_tmp_r_ = simde__m128i_to_private(simde_mm_setzero_si128()); \ } else { \ simde_tmp_r_.i8 = \ SIMDE_SHUFFLE_VECTOR_(8, 16, \ simde_tmp_z_.i8, \ (simde_tmp_a_).i8, \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 16) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 17) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 18) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 19) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 20) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 21) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 22) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 23) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 24) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 25) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 26) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 27) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 28) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 29) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 30) & 31), \ HEDLEY_STATIC_CAST(int8_t, (imm8 + 31) & 31)); \ } \ simde__m128i_from_private(simde_tmp_r_); })) #endif #define simde_mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_bsrli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) #define _mm_srli_si128(a, imm8) simde_mm_bsrli_si128((a), (imm8)) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_clflush (void const* p) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_clflush(p); #else (void) p; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_clflush(p) simde_mm_clflush(p) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comieq_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_comieq_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return !!vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); #else return a_.f64[0] == b_.f64[0]; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_comieq_sd(a, b) simde_mm_comieq_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comige_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_comige_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return !!vgetq_lane_u64(vcgeq_f64(a_.neon_f64, b_.neon_f64), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); #else return a_.f64[0] >= b_.f64[0]; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_comige_sd(a, b) simde_mm_comige_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comigt_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_comigt_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return !!vgetq_lane_u64(vcgtq_f64(a_.neon_f64, b_.neon_f64), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); #else return a_.f64[0] > b_.f64[0]; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_comigt_sd(a, b) simde_mm_comigt_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comile_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_comile_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return !!vgetq_lane_u64(vcleq_f64(a_.neon_f64, b_.neon_f64), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); #else return a_.f64[0] <= b_.f64[0]; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_comile_sd(a, b) simde_mm_comile_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comilt_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_comilt_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return !!vgetq_lane_u64(vcltq_f64(a_.neon_f64, b_.neon_f64), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); #else return a_.f64[0] < b_.f64[0]; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_comilt_sd(a, b) simde_mm_comilt_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_comineq_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_comineq_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return !vgetq_lane_u64(vceqq_f64(a_.neon_f64, b_.neon_f64), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); #else return a_.f64[0] != b_.f64[0]; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_comineq_sd(a, b) simde_mm_comineq_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_x_mm_copysign_pd(simde__m128d dest, simde__m128d src) { simde__m128d_private r_, dest_ = simde__m128d_to_private(dest), src_ = simde__m128d_to_private(src); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint64x2_t sign_pos = vreinterpretq_u64_f64(vdupq_n_f64(-SIMDE_FLOAT64_C(0.0))); #else simde_float64 dbl_nz = -SIMDE_FLOAT64_C(0.0); uint64_t u64_nz; simde_memcpy(&u64_nz, &dbl_nz, sizeof(u64_nz)); uint64x2_t sign_pos = vdupq_n_u64(u64_nz); #endif r_.neon_u64 = vbslq_u64(sign_pos, src_.neon_u64, dest_.neon_u64); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) #if defined(SIMDE_BUG_VEC_CPSGN_REVERSED_ARGS) r_.altivec_f64 = vec_cpsgn(dest_.altivec_f64, src_.altivec_f64); #else r_.altivec_f64 = vec_cpsgn(src_.altivec_f64, dest_.altivec_f64); #endif #elif defined(simde_math_copysign) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_copysign(dest_.f64[i], src_.f64[i]); } #else simde__m128d sgnbit = simde_mm_set1_pd(-SIMDE_FLOAT64_C(0.0)); return simde_mm_xor_pd(simde_mm_and_pd(sgnbit, src), simde_mm_andnot_pd(sgnbit, dest)); #endif return simde__m128d_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_x_mm_xorsign_pd(simde__m128d dest, simde__m128d src) { return simde_mm_xor_pd(simde_mm_and_pd(simde_mm_set1_pd(-0.0), src), dest); } SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_castpd_ps (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_castpd_ps(a); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_f32_f64(a); #else simde__m128 r; simde_memcpy(&r, &a, sizeof(a)); return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_castpd_ps(a) simde_mm_castpd_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_castpd_si128 (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_castpd_si128(a); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_s64_f64(a); #else simde__m128i r; simde_memcpy(&r, &a, sizeof(a)); return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_castpd_si128(a) simde_mm_castpd_si128(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_castps_pd (simde__m128 a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_castps_pd(a); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_f64_f32(a); #else simde__m128d r; simde_memcpy(&r, &a, sizeof(a)); return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_castps_pd(a) simde_mm_castps_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_castps_si128 (simde__m128 a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_castps_si128(a); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return simde__m128i_from_neon_i32(simde__m128_to_private(a).neon_i32); #else simde__m128i r; simde_memcpy(&r, &a, sizeof(a)); return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_castps_si128(a) simde_mm_castps_si128(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_castsi128_pd (simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_castsi128_pd(a); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vreinterpretq_f64_s64(a); #else simde__m128d r; simde_memcpy(&r, &a, sizeof(a)); return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_castsi128_pd(a) simde_mm_castsi128_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_castsi128_ps (simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_castsi128_ps(a); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(float), a); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return simde__m128_from_neon_i32(simde__m128i_to_private(a).neon_i32); #else simde__m128 r; simde_memcpy(&r, &a, sizeof(a)); return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_castsi128_ps(a) simde_mm_castsi128_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmpeq_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpeq_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vceqq_s8(b_.neon_i8, a_.neon_i8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_eq(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpeq(a_.altivec_i8, b_.altivec_i8)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 == b_.i8)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] == b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpeq_epi8(a, b) simde_mm_cmpeq_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmpeq_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpeq_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vceqq_s16(b_.neon_i16, a_.neon_i16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_eq(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpeq(a_.altivec_i16, b_.altivec_i16)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = (a_.i16 == b_.i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] == b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpeq_epi16(a, b) simde_mm_cmpeq_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmpeq_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpeq_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vceqq_s32(b_.neon_i32, a_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_eq(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpeq(a_.altivec_i32, b_.altivec_i32)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), a_.i32 == b_.i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (a_.i32[i] == b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpeq_epi32(a, b) simde_mm_cmpeq_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpeq_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpeq_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vceqq_f64(b_.neon_f64, a_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_eq(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpeq(a_.altivec_f64, b_.altivec_f64)); #elif defined(SIMDE_MIPS_MSA_NATIVE) r_.msa_i32 = __msa_addv_w(a_.msa_i32, b_.msa_i32); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 == b_.f64)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (a_.f64[i] == b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpeq_pd(a, b) simde_mm_cmpeq_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpeq_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpeq_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_sd(a, simde_mm_cmpeq_pd(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_cmpeq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); r_.u64[0] = (a_.u64[0] == b_.u64[0]) ? ~UINT64_C(0) : 0; r_.u64[1] = a_.u64[1]; return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpeq_sd(a, b) simde_mm_cmpeq_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpneq_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpneq_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u32 = vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(b_.neon_f64, a_.neon_f64))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_ne(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 != b_.f64)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (a_.f64[i] != b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpneq_pd(a, b) simde_mm_cmpneq_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpneq_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpneq_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_sd(a, simde_mm_cmpneq_pd(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_cmpneq_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); r_.u64[0] = (a_.f64[0] != b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); r_.u64[1] = a_.u64[1]; return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpneq_sd(a, b) simde_mm_cmpneq_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmplt_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmplt_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vcltq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char),vec_cmplt(a_.altivec_i8, b_.altivec_i8)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_lt(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 < b_.i8)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] < b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmplt_epi8(a, b) simde_mm_cmplt_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmplt_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmplt_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vcltq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmplt(a_.altivec_i16, b_.altivec_i16)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_lt(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 < b_.i16)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmplt_epi16(a, b) simde_mm_cmplt_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmplt_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmplt_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcltq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmplt(a_.altivec_i32, b_.altivec_i32)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_lt(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 < b_.i32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (a_.i32[i] < b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmplt_epi32(a, b) simde_mm_cmplt_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmplt_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmplt_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vcltq_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmplt(a_.altivec_f64, b_.altivec_f64)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_lt(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 < b_.f64)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (a_.f64[i] < b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmplt_pd(a, b) simde_mm_cmplt_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmplt_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmplt_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_sd(a, simde_mm_cmplt_pd(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_cmplt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); r_.u64[0] = (a_.f64[0] < b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); r_.u64[1] = a_.u64[1]; return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmplt_sd(a, b) simde_mm_cmplt_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmple_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmple_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 <= b_.f64)); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vcleq_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_le(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmple(a_.altivec_f64, b_.altivec_f64)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (a_.f64[i] <= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmple_pd(a, b) simde_mm_cmple_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmple_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmple_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_sd(a, simde_mm_cmple_pd(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_cmple_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); r_.u64[0] = (a_.f64[0] <= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); r_.u64[1] = a_.u64[1]; return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmple_sd(a, b) simde_mm_cmple_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmpgt_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpgt_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vcgtq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_gt(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i8 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed char), vec_cmpgt(a_.altivec_i8, b_.altivec_i8)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i8), (a_.i8 > b_.i8)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = (a_.i8[i] > b_.i8[i]) ? ~INT8_C(0) : INT8_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpgt_epi8(a, b) simde_mm_cmpgt_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmpgt_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpgt_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vcgtq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_gt(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i16 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed short), vec_cmpgt(a_.altivec_i16, b_.altivec_i16)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i16), (a_.i16 > b_.i16)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? ~INT16_C(0) : INT16_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpgt_epi16(a, b) simde_mm_cmpgt_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cmpgt_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpgt_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vcgtq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_gt(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i32 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), vec_cmpgt(a_.altivec_i32, b_.altivec_i32)); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i32), (a_.i32 > b_.i32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = (a_.i32[i] > b_.i32[i]) ? ~INT32_C(0) : INT32_C(0); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpgt_epi32(a, b) simde_mm_cmpgt_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpgt_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpgt_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 > b_.f64)); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vcgtq_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_gt(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpgt(a_.altivec_f64, b_.altivec_f64)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (a_.f64[i] > b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpgt_pd(a, b) simde_mm_cmpgt_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpgt_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) return _mm_cmpgt_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_sd(a, simde_mm_cmpgt_pd(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_cmpgt_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); r_.u64[0] = (a_.f64[0] > b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); r_.u64[1] = a_.u64[1]; return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpgt_sd(a, b) simde_mm_cmpgt_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpge_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpge_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.i64), (a_.f64 >= b_.f64)); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_u64 = vcgeq_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_ge(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_f64 = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(double), vec_cmpge(a_.altivec_f64, b_.altivec_f64)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (a_.f64[i] >= b_.f64[i]) ? ~UINT64_C(0) : UINT64_C(0); } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpge_pd(a, b) simde_mm_cmpge_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpge_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) return _mm_cmpge_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_sd(a, simde_mm_cmpge_pd(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_cmpge_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); r_.u64[0] = (a_.f64[0] >= b_.f64[0]) ? ~UINT64_C(0) : UINT64_C(0); r_.u64[1] = a_.u64[1]; return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpge_sd(a, b) simde_mm_cmpge_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpngt_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpngt_pd(a, b); #else return simde_mm_cmple_pd(a, b); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpngt_pd(a, b) simde_mm_cmpngt_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpngt_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) return _mm_cmpngt_sd(a, b); #else return simde_mm_cmple_sd(a, b); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpngt_sd(a, b) simde_mm_cmpngt_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpnge_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpnge_pd(a, b); #else return simde_mm_cmplt_pd(a, b); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpnge_pd(a, b) simde_mm_cmpnge_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpnge_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) return _mm_cmpnge_sd(a, b); #else return simde_mm_cmplt_sd(a, b); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpnge_sd(a, b) simde_mm_cmpnge_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpnlt_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpnlt_pd(a, b); #else return simde_mm_cmpge_pd(a, b); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpnlt_pd(a, b) simde_mm_cmpnlt_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpnlt_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpnlt_sd(a, b); #else return simde_mm_cmpge_sd(a, b); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpnlt_sd(a, b) simde_mm_cmpnlt_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpnle_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpnle_pd(a, b); #else return simde_mm_cmpgt_pd(a, b); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpnle_pd(a, b) simde_mm_cmpnle_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpnle_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpnle_sd(a, b); #else return simde_mm_cmpgt_sd(a, b); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpnle_sd(a, b) simde_mm_cmpnle_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpord_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpord_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) /* Note: NEON does not have ordered compare builtin Need to compare a eq a and b eq b to check for NaN Do AND of results to get final */ uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); r_.neon_u64 = vandq_u64(ceqaa, ceqbb); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_and(wasm_f64x2_eq(a_.wasm_v128, a_.wasm_v128), wasm_f64x2_eq(b_.wasm_v128, b_.wasm_v128)); #elif defined(simde_math_isnan) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (!simde_math_isnan(a_.f64[i]) && !simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); } #else HEDLEY_UNREACHABLE(); #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpord_pd(a, b) simde_mm_cmpord_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde_float64 simde_mm_cvtsd_f64 (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) return _mm_cvtsd_f64(a); #else simde__m128d_private a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return HEDLEY_STATIC_CAST(simde_float64, vgetq_lane_f64(a_.neon_f64, 0)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return HEDLEY_STATIC_CAST(simde_float64, wasm_f64x2_extract_lane(a_.wasm_v128, 0)); #else return a_.f64[0]; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtsd_f64(a) simde_mm_cvtsd_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpord_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpord_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_sd(a, simde_mm_cmpord_pd(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_cmpord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(simde_math_isnan) r_.u64[0] = (!simde_math_isnan(a_.f64[0]) && !simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); r_.u64[1] = a_.u64[1]; #else HEDLEY_UNREACHABLE(); #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpord_sd(a, b) simde_mm_cmpord_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpunord_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpunord_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint64x2_t ceqaa = vceqq_f64(a_.neon_f64, a_.neon_f64); uint64x2_t ceqbb = vceqq_f64(b_.neon_f64, b_.neon_f64); r_.neon_u64 = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(ceqaa, ceqbb)))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_or(wasm_f64x2_ne(a_.wasm_v128, a_.wasm_v128), wasm_f64x2_ne(b_.wasm_v128, b_.wasm_v128)); #elif defined(simde_math_isnan) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.u64[i] = (simde_math_isnan(a_.f64[i]) || simde_math_isnan(b_.f64[i])) ? ~UINT64_C(0) : UINT64_C(0); } #else HEDLEY_UNREACHABLE(); #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpunord_pd(a, b) simde_mm_cmpunord_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cmpunord_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cmpunord_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_sd(a, simde_mm_cmpunord_pd(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_cmpunord_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(simde_math_isnan) r_.u64[0] = (simde_math_isnan(a_.f64[0]) || simde_math_isnan(b_.f64[0])) ? ~UINT64_C(0) : UINT64_C(0); r_.u64[1] = a_.u64[1]; #else HEDLEY_UNREACHABLE(); #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cmpunord_sd(a, b) simde_mm_cmpunord_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cvtepi32_pd (simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtepi32_pd(a); #else simde__m128d_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_convert_low_i32x4(a_.wasm_v128); #elif defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = (simde_float64) a_.i32[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtepi32_pd(a) simde_mm_cvtepi32_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtepi32_ps (simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtepi32_ps(a); #else simde__m128_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_f32 = vcvtq_f32_s32(a_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_convert_i32x4(a_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) HEDLEY_DIAGNOSTIC_PUSH #if HEDLEY_HAS_WARNING("-Wc11-extensions") #pragma clang diagnostic ignored "-Wc11-extensions" #endif r_.altivec_f32 = vec_ctf(a_.altivec_i32, 0); HEDLEY_DIAGNOSTIC_POP #elif defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.f32, a_.i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f32) / sizeof(r_.f32[0])) ; i++) { r_.f32[i] = (simde_float32) a_.i32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtepi32_ps(a) simde_mm_cvtepi32_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cvtpd_pi32 (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtpd_pi32(a); #else simde__m64_private r_; simde__m128d_private a_ = simde__m128d_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { simde_float64 v = simde_math_round(a_.f64[i]); #if defined(SIMDE_FAST_CONVERSION_RANGE) r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); #else r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; #endif } return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtpd_pi32(a) simde_mm_cvtpd_pi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtpd_epi32 (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_PGI_30107) return _mm_cvtpd_epi32(a); #else simde__m128i_private r_; r_.m64[0] = simde_mm_cvtpd_pi32(a); r_.m64[1] = simde_mm_setzero_si64(); return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtpd_epi32(a) simde_mm_cvtpd_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtpd_ps (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtpd_ps(a); #else simde__m128_private r_; simde__m128d_private a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f32 = vcombine_f32(vcvt_f32_f64(a_.neon_f64), vdup_n_f32(0.0f)); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) r_.altivec_f32 = vec_float2(a_.altivec_f64, vec_splats(0.0)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f32x4_demote_f64x2_zero(a_.wasm_v128); #elif HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && HEDLEY_HAS_BUILTIN(__builtin_convertvector) float __attribute__((__vector_size__(8))) z = { 0.0f, 0.0f }; r_.f32 = __builtin_shufflevector( __builtin_convertvector(__builtin_shufflevector(a_.f64, a_.f64, 0, 1), __typeof__(z)), z, 0, 1, 2, 3 ); #else r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[0]); r_.f32[1] = HEDLEY_STATIC_CAST(simde_float32, a_.f64[1]); r_.f32[2] = SIMDE_FLOAT32_C(0.0); r_.f32[3] = SIMDE_FLOAT32_C(0.0); #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtpd_ps(a) simde_mm_cvtpd_ps(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cvtpi32_pd (simde__m64 a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvtpi32_pd(a); #else simde__m128d_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.f64, a_.i32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = (simde_float64) a_.i32[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtpi32_pd(a) simde_mm_cvtpi32_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtps_epi32 (simde__m128 a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtps_epi32(a); #else simde__m128i_private r_; simde__m128_private a_; #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) && !defined(SIMDE_BUG_GCC_95399) a_ = simde__m128_to_private(a); r_.neon_i32 = vcvtnq_s32_f32(a_.neon_f32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) a_ = simde__m128_to_private(a); HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_C11_EXTENSIONS_ SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ r_.altivec_i32 = vec_cts(a_.altivec_f32, 1); HEDLEY_DIAGNOSTIC_POP #elif defined(SIMDE_WASM_SIMD128_NATIVE) && defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_ROUND_TIES) a_ = simde__m128_to_private(a); r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); #else a_ = simde__m128_to_private(simde_x_mm_round_ps(a, SIMDE_MM_FROUND_TO_NEAREST_INT, 1)); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { simde_float32 v = simde_math_roundf(a_.f32[i]); #if defined(SIMDE_FAST_CONVERSION_RANGE) r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); #else r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; #endif } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtps_epi32(a) simde_mm_cvtps_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cvtps_pd (simde__m128 a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtps_pd(a); #else simde__m128d_private r_; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_promote_low_f32x4(a_.wasm_v128); #elif defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.f64, a_.m64_private[0].f32); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vcvt_f64_f32(vget_low_f32(a_.neon_f32)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f32[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtps_pd(a) simde_mm_cvtps_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_cvtsd_si32 (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtsd_si32(a); #else simde__m128d_private a_ = simde__m128d_to_private(a); simde_float64 v = simde_math_round(a_.f64[0]); #if defined(SIMDE_FAST_CONVERSION_RANGE) return SIMDE_CONVERT_FTOI(int32_t, v); #else return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtsd_si32(a) simde_mm_cvtsd_si32(a) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_mm_cvtsd_si64 (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) #if defined(__PGI) return _mm_cvtsd_si64x(a); #else return _mm_cvtsd_si64(a); #endif #else simde__m128d_private a_ = simde__m128d_to_private(a); return SIMDE_CONVERT_FTOI(int64_t, simde_math_round(a_.f64[0])); #endif } #define simde_mm_cvtsd_si64x(a) simde_mm_cvtsd_si64(a) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) #define _mm_cvtsd_si64(a) simde_mm_cvtsd_si64(a) #define _mm_cvtsd_si64x(a) simde_mm_cvtsd_si64x(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128 simde_mm_cvtsd_ss (simde__m128 a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtsd_ss(a, b); #else simde__m128_private r_, a_ = simde__m128_to_private(a); simde__m128d_private b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f32 = vsetq_lane_f32(vcvtxd_f32_f64(vgetq_lane_f64(b_.neon_f64, 0)), a_.neon_f32, 0); #else r_.f32[0] = HEDLEY_STATIC_CAST(simde_float32, b_.f64[0]); SIMDE_VECTORIZE for (size_t i = 1 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i]; } #endif return simde__m128_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtsd_ss(a, b) simde_mm_cvtsd_ss(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int16_t simde_x_mm_cvtsi128_si16 (simde__m128i a) { simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vgetq_lane_s16(a_.neon_i16, 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return HEDLEY_STATIC_CAST(int16_t, wasm_i16x8_extract_lane(a_.wasm_v128, 0)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #if defined(SIMDE_BUG_GCC_95227) (void) a_; #endif return vec_extract(a_.altivec_i16, 0); #else return a_.i16[0]; #endif } SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_cvtsi128_si32 (simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtsi128_si32(a); #else simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vgetq_lane_s32(a_.neon_i32, 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return HEDLEY_STATIC_CAST(int32_t, wasm_i32x4_extract_lane(a_.wasm_v128, 0)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #if defined(SIMDE_BUG_GCC_95227) (void) a_; #endif return vec_extract(a_.altivec_i32, 0); #else return a_.i32[0]; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtsi128_si32(a) simde_mm_cvtsi128_si32(a) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_mm_cvtsi128_si64 (simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) #if defined(__PGI) return _mm_cvtsi128_si64x(a); #else return _mm_cvtsi128_si64(a); #endif #else simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) && !defined(HEDLEY_IBM_VERSION) return vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed long long), a_.i64), 0); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vgetq_lane_s64(a_.neon_i64, 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return HEDLEY_STATIC_CAST(int64_t, wasm_i64x2_extract_lane(a_.wasm_v128, 0)); #endif return a_.i64[0]; #endif } #define simde_mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64(a) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) #define _mm_cvtsi128_si64(a) simde_mm_cvtsi128_si64(a) #define _mm_cvtsi128_si64x(a) simde_mm_cvtsi128_si64x(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cvtsi32_sd (simde__m128d a, int32_t b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtsi32_sd(a, b); #else simde__m128d_private r_; simde__m128d_private a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); #else r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); r_.i64[1] = a_.i64[1]; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtsi32_sd(a, b) simde_mm_cvtsi32_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_cvtsi16_si128 (int16_t a) { simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vsetq_lane_s16(a, vdupq_n_s16(0), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_make(a, 0, 0, 0, 0, 0, 0, 0); #else r_.i16[0] = a; r_.i16[1] = 0; r_.i16[2] = 0; r_.i16[3] = 0; r_.i16[4] = 0; r_.i16[5] = 0; r_.i16[6] = 0; r_.i16[7] = 0; #endif return simde__m128i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtsi32_si128 (int32_t a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtsi32_si128(a); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vsetq_lane_s32(a, vdupq_n_s32(0), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_make(a, 0, 0, 0); #else r_.i32[0] = a; r_.i32[1] = 0; r_.i32[2] = 0; r_.i32[3] = 0; #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtsi32_si128(a) simde_mm_cvtsi32_si128(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cvtsi64_sd (simde__m128d a, int64_t b) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) #if !defined(__PGI) return _mm_cvtsi64_sd(a, b); #else return _mm_cvtsi64x_sd(a, b); #endif #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vsetq_lane_f64(HEDLEY_STATIC_CAST(float64_t, b), a_.neon_f64, 0); #else r_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b); r_.f64[1] = a_.f64[1]; #endif return simde__m128d_from_private(r_); #endif } #define simde_mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64_sd(a, b) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) #define _mm_cvtsi64_sd(a, b) simde_mm_cvtsi64_sd(a, b) #define _mm_cvtsi64x_sd(a, b) simde_mm_cvtsi64x_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvtsi64_si128 (int64_t a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) #if !defined(__PGI) return _mm_cvtsi64_si128(a); #else return _mm_cvtsi64x_si128(a); #endif #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vsetq_lane_s64(a, vdupq_n_s64(0), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i64x2_make(a, 0); #else r_.i64[0] = a; r_.i64[1] = 0; #endif return simde__m128i_from_private(r_); #endif } #define simde_mm_cvtsi64x_si128(a) simde_mm_cvtsi64_si128(a) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) #define _mm_cvtsi64_si128(a) simde_mm_cvtsi64_si128(a) #define _mm_cvtsi64x_si128(a) simde_mm_cvtsi64x_si128(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_cvtss_sd (simde__m128d a, simde__m128 b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvtss_sd(a, b); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) float64x2_t temp = vcvt_f64_f32(vset_lane_f32(vgetq_lane_f32(simde__m128_to_private(b).neon_f32, 0), vdup_n_f32(0), 0)); return vsetq_lane_f64(vgetq_lane_f64(simde__m128d_to_private(a).neon_f64, 1), temp, 1); #else simde__m128d_private a_ = simde__m128d_to_private(a); simde__m128_private b_ = simde__m128_to_private(b); a_.f64[0] = HEDLEY_STATIC_CAST(simde_float64, b_.f32[0]); return simde__m128d_from_private(a_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvtss_sd(a, b) simde_mm_cvtss_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_cvttpd_pi32 (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_cvttpd_pi32(a); #else simde__m64_private r_; simde__m128d_private a_ = simde__m128d_to_private(a); #if defined(SIMDE_CONVERT_VECTOR_) && defined(SIMDE_FAST_CONVERSION_RANGE) SIMDE_CONVERT_VECTOR_(r_.i32, a_.f64); #else for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { simde_float64 v = a_.f64[i]; #if defined(SIMDE_FAST_CONVERSION_RANGE) r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); #else r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; #endif } #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvttpd_pi32(a) simde_mm_cvttpd_pi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvttpd_epi32 (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvttpd_epi32(a); #else simde__m128i_private r_; r_.m64[0] = simde_mm_cvttpd_pi32(a); r_.m64[1] = simde_mm_setzero_si64(); return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvttpd_epi32(a) simde_mm_cvttpd_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_cvttps_epi32 (simde__m128 a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvttps_epi32(a); #else simde__m128i_private r_; simde__m128_private a_ = simde__m128_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vcvtq_s32_f32(a_.neon_f32); #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) /* Values below INT32_MIN saturate anyways, so we don't need to * test for that. */ #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) uint32x4_t valid_input = vandq_u32( vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))), vceqq_f32(a_.neon_f32, a_.neon_f32) ); #elif !defined(SIMDE_FAST_CONVERSION_RANGE) uint32x4_t valid_input = vcltq_f32(a_.neon_f32, vdupq_n_f32(SIMDE_FLOAT32_C(2147483648.0))); #elif !defined(SIMDE_FAST_NANS) uint32x4_t valid_input = vceqq_f32(a_.neon_f32, a_.neon_f32); #endif r_.neon_i32 = vbslq_s32(valid_input, r_.neon_i32, vdupq_n_s32(INT32_MIN)); #endif #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_trunc_sat_f32x4(a_.wasm_v128); #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) #if !defined(SIMDE_FAST_CONVERSION_RANGE) && !defined(SIMDE_FAST_NANS) v128_t valid_input = wasm_v128_and( wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))), wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128) ); #elif !defined(SIMDE_FAST_CONVERSION_RANGE) v128_t valid_input = wasm_f32x4_lt(a_.wasm_v128, wasm_f32x4_splat(SIMDE_FLOAT32_C(2147483648.0))); #elif !defined(SIMDE_FAST_NANS) v128_t valid_input = wasm_f32x4_eq(a_.wasm_v128, a_.wasm_v128); #endif r_.wasm_v128 = wasm_v128_bitselect(r_.wasm_v128, wasm_i32x4_splat(INT32_MIN), valid_input); #endif #elif defined(SIMDE_CONVERT_VECTOR_) SIMDE_CONVERT_VECTOR_(r_.i32, a_.f32); #if !defined(SIMDE_FAST_CONVERSION_RANGE) || !defined(SIMDE_FAST_NANS) #if !defined(SIMDE_FAST_CONVERSION_RANGE) static const simde_float32 SIMDE_VECTOR(16) first_too_high = { SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0), SIMDE_FLOAT32_C(2147483648.0) }; __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(r_.i32), (a_.f32 < first_too_high) & (a_.f32 >= -first_too_high) ); #elif !defined(SIMDE_FAST_NANS) __typeof__(r_.i32) valid_input = HEDLEY_REINTERPRET_CAST( __typeof__(valid_input), a_.f32 == a_.f32); #endif __typeof__(r_.i32) invalid_output = { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN }; r_.i32 = (r_.i32 & valid_input) | (invalid_output & ~valid_input); #endif #else for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { simde_float32 v = a_.f32[i]; #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_FAST_NANS) r_.i32[i] = SIMDE_CONVERT_FTOI(int32_t, v); #else r_.i32[i] = ((v > HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) ? SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; #endif } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvttps_epi32(a) simde_mm_cvttps_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_cvttsd_si32 (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_cvttsd_si32(a); #else simde__m128d_private a_ = simde__m128d_to_private(a); simde_float64 v = a_.f64[0]; #if defined(SIMDE_FAST_CONVERSION_RANGE) return SIMDE_CONVERT_FTOI(int32_t, v); #else return ((v > HEDLEY_STATIC_CAST(simde_float64, INT32_MIN)) && (v < HEDLEY_STATIC_CAST(simde_float64, INT32_MAX))) ? SIMDE_CONVERT_FTOI(int32_t, v) : INT32_MIN; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_cvttsd_si32(a) simde_mm_cvttsd_si32(a) #endif SIMDE_FUNCTION_ATTRIBUTES int64_t simde_mm_cvttsd_si64 (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) #if !defined(__PGI) return _mm_cvttsd_si64(a); #else return _mm_cvttsd_si64x(a); #endif #else simde__m128d_private a_ = simde__m128d_to_private(a); return SIMDE_CONVERT_FTOI(int64_t, a_.f64[0]); #endif } #define simde_mm_cvttsd_si64x(a) simde_mm_cvttsd_si64(a) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) #define _mm_cvttsd_si64(a) simde_mm_cvttsd_si64(a) #define _mm_cvttsd_si64x(a) simde_mm_cvttsd_si64x(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_div_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_div_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f64 = a_.f64 / b_.f64; #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vdivq_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_div(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[i] / b_.f64[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_div_pd(a, b) simde_mm_div_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_div_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_div_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_sd(a, simde_mm_div_pd(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_div_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) float64x2_t temp = vdivq_f64(a_.neon_f64, b_.neon_f64); r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); #else r_.f64[0] = a_.f64[0] / b_.f64[0]; r_.f64[1] = a_.f64[1]; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_div_sd(a, b) simde_mm_div_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_extract_epi16 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { uint16_t r; simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #if defined(SIMDE_BUG_GCC_95227) (void) a_; (void) imm8; #endif r = HEDLEY_STATIC_CAST(uint16_t, vec_extract(a_.altivec_i16, imm8)); #else r = a_.u16[imm8 & 7]; #endif return HEDLEY_STATIC_CAST(int32_t, r); } #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(4,6,0)) #define simde_mm_extract_epi16(a, imm8) _mm_extract_epi16(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_extract_epi16(a, imm8) (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_s16(simde__m128i_to_private(a).neon_i16, (imm8))) & (INT32_C(0x0000ffff))) #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define simde_mm_extract_epi16(a, imm8) HEDLEY_STATIC_CAST(int32_t, wasm_u16x8_extract_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7)) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_extract_epi16(a, imm8) simde_mm_extract_epi16(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_insert_epi16 (simde__m128i a, int16_t i, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 7) { simde__m128i_private a_ = simde__m128i_to_private(a); a_.i16[imm8 & 7] = i; return simde__m128i_from_private(a_); } #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) #define simde_mm_insert_epi16(a, i, imm8) _mm_insert_epi16((a), (i), (imm8)) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_insert_epi16(a, i, imm8) simde__m128i_from_neon_i16(vsetq_lane_s16((i), simde__m128i_to_neon_i16(a), (imm8))) #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define simde_mm_insert_epi16(a, i, imm8) wasm_i16x8_replace_lane(simde__m128i_to_wasm_v128((a)), (imm8) & 7, (i)) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_insert_epi16(a, i, imm8) simde_mm_insert_epi16(a, i, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_load_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_load_pd(mem_addr); #else simde__m128d_private r_; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vld1q_f64(mem_addr); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vld1q_u32(HEDLEY_REINTERPRET_CAST(uint32_t const*, mem_addr)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_load(mem_addr); #else simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), sizeof(r_)); #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_load_pd(mem_addr) simde_mm_load_pd(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_load1_pd (simde_float64 const* mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_load1_pd(mem_addr); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) return simde__m128d_from_neon_f64(vld1q_dup_f64(mem_addr)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return simde__m128d_from_wasm_v128(wasm_v128_load64_splat(mem_addr)); #else return simde_mm_set1_pd(*mem_addr); #endif } #define simde_mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_load_pd1(mem_addr) simde_mm_load1_pd(mem_addr) #define _mm_load1_pd(mem_addr) simde_mm_load1_pd(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_load_sd (simde_float64 const* mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_load_sd(mem_addr); #else simde__m128d_private r_; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vsetq_lane_f64(*mem_addr, vdupq_n_f64(0), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_load64_zero(HEDLEY_REINTERPRET_CAST(const void*, mem_addr)); #else r_.f64[0] = *mem_addr; r_.u64[1] = UINT64_C(0); #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_load_sd(mem_addr) simde_mm_load_sd(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_load_si128 (simde__m128i const* mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_load_si128(HEDLEY_REINTERPRET_CAST(__m128i const*, mem_addr)); #else simde__m128i_private r_; #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_ld(0, HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(int) const*, mem_addr)); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vld1q_s32(HEDLEY_REINTERPRET_CAST(int32_t const*, mem_addr)); #else simde_memcpy(&r_, SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), sizeof(simde__m128i)); #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_load_si128(mem_addr) simde_mm_load_si128(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_loadh_pd (simde__m128d a, simde_float64 const* mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadh_pd(a, mem_addr); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vcombine_f64(vget_low_f64(a_.neon_f64), vld1_f64(HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 1); #else simde_float64 t; simde_memcpy(&t, mem_addr, sizeof(t)); r_.f64[0] = a_.f64[0]; r_.f64[1] = t; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_loadh_pd(a, mem_addr) simde_mm_loadh_pd(a, mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_loadl_epi64 (simde__m128i const* mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadl_epi64(mem_addr); #else simde__m128i_private r_; int64_t value; simde_memcpy(&value, mem_addr, sizeof(value)); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vcombine_s64(vld1_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)), vdup_n_s64(0)); #else r_.i64[0] = value; r_.i64[1] = 0; #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_loadl_epi64(mem_addr) simde_mm_loadl_epi64(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_loadl_pd (simde__m128d a, simde_float64 const* mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadl_pd(a, mem_addr); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vcombine_f64(vld1_f64( HEDLEY_REINTERPRET_CAST(const float64_t*, mem_addr)), vget_high_f64(a_.neon_f64)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_load64_lane(HEDLEY_REINTERPRET_CAST(const void*, mem_addr), a_.wasm_v128, 0); #else r_.f64[0] = *mem_addr; r_.u64[1] = a_.u64[1]; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_loadl_pd(a, mem_addr) simde_mm_loadl_pd(a, mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_loadr_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadr_pd(mem_addr); #else simde__m128d_private r_; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vld1q_f64(mem_addr); r_.neon_f64 = vextq_f64(r_.neon_f64, r_.neon_f64, 1); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vld1q_s64(HEDLEY_REINTERPRET_CAST(int64_t const *, mem_addr)); r_.neon_i64 = vextq_s64(r_.neon_i64, r_.neon_i64, 1); #elif defined(SIMDE_WASM_SIMD128_NATIVE) v128_t tmp = wasm_v128_load(mem_addr); r_.wasm_v128 = wasm_i64x2_shuffle(tmp, tmp, 1, 0); #else r_.f64[0] = mem_addr[1]; r_.f64[1] = mem_addr[0]; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_loadr_pd(mem_addr) simde_mm_loadr_pd(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_loadu_pd (simde_float64 const mem_addr[HEDLEY_ARRAY_PARAM(2)]) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_pd(mem_addr); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vld1q_f64(mem_addr); #else simde__m128d_private r_; simde_memcpy(&r_, mem_addr, sizeof(r_)); return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_loadu_pd(mem_addr) simde_mm_loadu_pd(mem_addr) #endif #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) #define simde_mm_loadu_epi8(mem_addr) _mm_loadu_epi8(mem_addr) #else SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_loadu_epi8(void const * mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); #else simde_memcpy(&r_, mem_addr, sizeof(r_)); #endif return simde__m128i_from_private(r_); #endif } #endif #define simde_x_mm_loadu_epi8(mem_addr) simde_mm_loadu_epi8(mem_addr) #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) #undef _mm_loadu_epi8 #define _mm_loadu_epi8(a) simde_mm_loadu_epi8(a) #endif #if defined(SIMDE_X86_AVX512VL_NATIVE) && defined(SIMDE_X86_AVX512BW_NATIVE) \ && !defined(SIMDE_BUG_GCC_95483) && !defined(SIMDE_BUG_CLANG_REV_344862) \ && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) #define simde_mm_loadu_epi16(mem_addr) _mm_loadu_epi16(mem_addr) #else SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_loadu_epi16(void const * mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vreinterpretq_s16_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); #else simde_memcpy(&r_, mem_addr, sizeof(r_)); #endif return simde__m128i_from_private(r_); #endif } #endif #define simde_x_mm_loadu_epi16(mem_addr) simde_mm_loadu_epi16(mem_addr) #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) #undef _mm_loadu_epi16 #define _mm_loadu_epi16(a) simde_mm_loadu_epi16(a) #endif #if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ && !defined(SIMDE_BUG_CLANG_REV_344862) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) #define simde_mm_loadu_epi32(mem_addr) _mm_loadu_epi32(mem_addr) #else SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_loadu_epi32(void const * mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vreinterpretq_s32_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); #else simde_memcpy(&r_, mem_addr, sizeof(r_)); #endif return simde__m128i_from_private(r_); #endif } #endif #define simde_x_mm_loadu_epi32(mem_addr) simde_mm_loadu_epi32(mem_addr) #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) #undef _mm_loadu_epi32 #define _mm_loadu_epi32(a) simde_mm_loadu_epi32(a) #endif #if defined(SIMDE_X86_AVX512VL_NATIVE) && !defined(SIMDE_BUG_GCC_95483) \ && !defined(SIMDE_BUG_CLANG_REV_344862) \ && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,20,0)) #define simde_mm_loadu_epi64(mem_addr) _mm_loadu_epi64(mem_addr) #else SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_loadu_epi64(void const * mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(SIMDE_ALIGN_CAST(__m128i const *, mem_addr)); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vreinterpretq_s64_s8(vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr))); #else simde_memcpy(&r_, mem_addr, sizeof(r_)); #endif return simde__m128i_from_private(r_); #endif } #endif #define simde_x_mm_loadu_epi64(mem_addr) simde_mm_loadu_epi64(mem_addr) #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && (defined(SIMDE_BUG_GCC_95483) || defined(SIMDE_BUG_CLANG_REV_344862))) #undef _mm_loadu_epi64 #define _mm_loadu_epi64(a) simde_mm_loadu_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_loadu_si128 (void const* mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_loadu_si128(HEDLEY_STATIC_CAST(__m128i const*, mem_addr)); #else simde__m128i_private r_; #if HEDLEY_GNUC_HAS_ATTRIBUTE(may_alias,3,3,0) HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_PACKED_ struct simde_mm_loadu_si128_s { __typeof__(r_) v; } __attribute__((__packed__, __may_alias__)); r_ = HEDLEY_REINTERPRET_CAST(const struct simde_mm_loadu_si128_s *, mem_addr)->v; HEDLEY_DIAGNOSTIC_POP #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vld1q_s8(HEDLEY_REINTERPRET_CAST(int8_t const*, mem_addr)); #else simde_memcpy(&r_, mem_addr, sizeof(r_)); #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_loadu_si128(mem_addr) simde_mm_loadu_si128(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_madd_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_madd_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); int32x4_t ph = vmull_high_s16(a_.neon_i16, b_.neon_i16); r_.neon_i32 = vpaddq_s32(pl, ph); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int32x4_t pl = vmull_s16(vget_low_s16(a_.neon_i16), vget_low_s16(b_.neon_i16)); int32x4_t ph = vmull_s16(vget_high_s16(a_.neon_i16), vget_high_s16(b_.neon_i16)); int32x2_t rl = vpadd_s32(vget_low_s32(pl), vget_high_s32(pl)); int32x2_t rh = vpadd_s32(vget_low_s32(ph), vget_high_s32(ph)); r_.neon_i32 = vcombine_s32(rl, rh); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_msum(a_.altivec_i16, b_.altivec_i16, vec_splats(0)); #elif defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i32 = vec_mule(a_.altivec_i16, b_.altivec_i16) + vec_mulo(a_.altivec_i16, b_.altivec_i16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_dot_i16x8(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) int32_t SIMDE_VECTOR(32) a32, b32, p32; SIMDE_CONVERT_VECTOR_(a32, a_.i16); SIMDE_CONVERT_VECTOR_(b32, b_.i16); p32 = a32 * b32; r_.i32 = __builtin_shufflevector(p32, p32, 0, 2, 4, 6) + __builtin_shufflevector(p32, p32, 1, 3, 5, 7); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i += 2) { r_.i32[i / 2] = (a_.i16[i] * b_.i16[i]) + (a_.i16[i + 1] * b_.i16[i + 1]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_madd_epi16(a, b) simde_mm_madd_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_maskmoveu_si128 (simde__m128i a, simde__m128i mask, int8_t mem_addr[HEDLEY_ARRAY_PARAM(16)]) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_maskmoveu_si128(a, mask, HEDLEY_REINTERPRET_CAST(char*, mem_addr)); #else simde__m128i_private a_ = simde__m128i_to_private(a), mask_ = simde__m128i_to_private(mask); for (size_t i = 0 ; i < (sizeof(a_.i8) / sizeof(a_.i8[0])) ; i++) { if (mask_.u8[i] & 0x80) { mem_addr[i] = a_.i8[i]; } } #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_maskmoveu_si128(a, mask, mem_addr) simde_mm_maskmoveu_si128((a), (mask), SIMDE_CHECKED_REINTERPRET_CAST(int8_t*, char*, (mem_addr))) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_movemask_epi8 (simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__INTEL_COMPILER) /* ICC has trouble with _mm_movemask_epi8 at -O2 and above: */ return _mm_movemask_epi8(a); #else int32_t r = 0; simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) /* https://github.com/WebAssembly/simd/pull/201#issue-380682845 */ static const uint8_t md[16] = { 1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7, 1 << 0, 1 << 1, 1 << 2, 1 << 3, 1 << 4, 1 << 5, 1 << 6, 1 << 7, }; /* Extend sign bit over entire lane */ uint8x16_t extended = vreinterpretq_u8_s8(vshrq_n_s8(a_.neon_i8, 7)); /* Clear all but the bit we're interested in. */ uint8x16_t masked = vandq_u8(vld1q_u8(md), extended); /* Alternate bytes from low half and high half */ uint8x8x2_t tmp = vzip_u8(vget_low_u8(masked), vget_high_u8(masked)); uint16x8_t x = vreinterpretq_u16_u8(vcombine_u8(tmp.val[0], tmp.val[1])); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r = vaddvq_u16(x); #else uint64x2_t t64 = vpaddlq_u32(vpaddlq_u16(x)); r = HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 0)) + HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(t64, 1)); #endif #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 1)); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && !defined(HEDLEY_IBM_VERSION) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_BIG) static const SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) perm = { 120, 112, 104, 96, 88, 80, 72, 64, 56, 48, 40, 32, 24, 16, 8, 0 }; r = HEDLEY_STATIC_CAST(int32_t, vec_extract(vec_vbpermq(a_.altivec_u8, perm), 14)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r = HEDLEY_STATIC_CAST(int32_t, wasm_i8x16_bitmask(a_.wasm_v128)); #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.u8) / sizeof(a_.u8[0])) ; i++) { r |= (a_.u8[15 - i] >> 7) << (15 - i); } #endif return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_movemask_epi8(a) simde_mm_movemask_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES int32_t simde_mm_movemask_pd (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_movemask_pd(a); #else int32_t r = 0; simde__m128d_private a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_ uint64x2_t shifted = vshrq_n_u64(a_.neon_u64, 63); r = HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 0)) + (HEDLEY_STATIC_CAST(int32_t, vgetq_lane_u64(shifted, 1)) << 1); HEDLEY_DIAGNOSTIC_POP #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && defined(SIMDE_BUG_CLANG_50932) SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned char), vec_bperm(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(unsigned __int128), a_.altivec_u64), idx)); r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); #elif defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) idx = { 64, 0, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; SIMDE_POWER_ALTIVEC_VECTOR(unsigned char) res = vec_bperm(a_.altivec_u8, idx); r = HEDLEY_STATIC_CAST(int32_t, vec_extract(HEDLEY_REINTERPRET_CAST(SIMDE_POWER_ALTIVEC_VECTOR(signed int), res), 2)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r = HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_bitmask(a_.wasm_v128)); #else SIMDE_VECTORIZE_REDUCTION(|:r) for (size_t i = 0 ; i < (sizeof(a_.u64) / sizeof(a_.u64[0])) ; i++) { r |= (a_.u64[i] >> 63) << i; } #endif return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_movemask_pd(a) simde_mm_movemask_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_movepi64_pi64 (simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_movepi64_pi64(a); #else simde__m64_private r_; simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i64 = vget_low_s64(a_.neon_i64); #else r_.i64[0] = a_.i64[0]; #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_movepi64_pi64(a) simde_mm_movepi64_pi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_movpi64_epi64 (simde__m64 a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_movpi64_epi64(a); #else simde__m128i_private r_; simde__m64_private a_ = simde__m64_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vcombine_s64(a_.neon_i64, vdup_n_s64(0)); #else r_.i64[0] = a_.i64[0]; r_.i64[1] = 0; #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_movpi64_epi64(a) simde_mm_movpi64_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_min_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_min_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vminq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_min(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i16 = vec_min(a_.altivec_i16, b_.altivec_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] < b_.i16[i]) ? a_.i16[i] : b_.i16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_min_epi16(a, b) simde_mm_min_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_min_epu8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_min_epu8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vminq_u8(a_.neon_u8, b_.neon_u8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u8x16_min(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_u8 = vec_min(a_.altivec_u8, b_.altivec_u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] < b_.u8[i]) ? a_.u8[i] : b_.u8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_min_epu8(a, b) simde_mm_min_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_min_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_min_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_f64 = vec_min(a_.altivec_f64, b_.altivec_f64); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vminq_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_min(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = (a_.f64[i] < b_.f64[i]) ? a_.f64[i] : b_.f64[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_min_pd(a, b) simde_mm_min_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_min_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_min_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_sd(a, simde_mm_min_pd(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_min_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) float64x2_t temp = vminq_f64(a_.neon_f64, b_.neon_f64); r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); #else r_.f64[0] = (a_.f64[0] < b_.f64[0]) ? a_.f64[0] : b_.f64[0]; r_.f64[1] = a_.f64[1]; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_min_sd(a, b) simde_mm_min_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_max_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_max_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vmaxq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_max(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i16 = vec_max(a_.altivec_i16, b_.altivec_i16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = (a_.i16[i] > b_.i16[i]) ? a_.i16[i] : b_.i16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_max_epi16(a, b) simde_mm_max_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_max_epu8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_max_epu8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vmaxq_u8(a_.neon_u8, b_.neon_u8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u8x16_max(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_u8 = vec_max(a_.altivec_u8, b_.altivec_u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u8) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = (a_.u8[i] > b_.u8[i]) ? a_.u8[i] : b_.u8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_max_epu8(a, b) simde_mm_max_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_max_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_max_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_f64 = vec_max(a_.altivec_f64, b_.altivec_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_max(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vmaxq_f64(a_.neon_f64, b_.neon_f64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = (a_.f64[i] > b_.f64[i]) ? a_.f64[i] : b_.f64[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_max_pd(a, b) simde_mm_max_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_max_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_max_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_sd(a, simde_mm_max_pd(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_max_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) float64x2_t temp = vmaxq_f64(a_.neon_f64, b_.neon_f64); r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); #else r_.f64[0] = (a_.f64[0] > b_.f64[0]) ? a_.f64[0] : b_.f64[0]; r_.f64[1] = a_.f64[1]; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_max_sd(a, b) simde_mm_max_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_move_epi64 (simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_move_epi64(a); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vsetq_lane_s64(0, a_.neon_i64, 1); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, wasm_i64x2_const(0, 0), 0, 2); #else r_.i64[0] = a_.i64[0]; r_.i64[1] = 0; #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_move_epi64(a) simde_mm_move_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mul_epu32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_mul_epu32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint32x2_t a_lo = vmovn_u64(a_.neon_u64); uint32x2_t b_lo = vmovn_u64(b_.neon_u64); r_.neon_u64 = vmull_u32(a_lo, b_lo); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u64x2_extmul_low_u32x4( wasm_i32x4_shuffle(a_.wasm_v128, a_.wasm_v128, 0, 2, 0, 2), wasm_i32x4_shuffle(b_.wasm_v128, b_.wasm_v128, 0, 2, 0, 2)); #elif defined(SIMDE_SHUFFLE_VECTOR_) && (SIMDE_ENDIAN_ORDER == SIMDE_ENDIAN_LITTLE) __typeof__(a_.u32) z = { 0, }; a_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.u32, z, 0, 4, 2, 6); b_.u32 = SIMDE_SHUFFLE_VECTOR_(32, 16, b_.u32, z, 0, 4, 2, 6); r_.u64 = HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), a_.u32) * HEDLEY_REINTERPRET_CAST(__typeof__(r_.u64), b_.u32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[i * 2]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[i * 2]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_mul_epu32(a, b) simde_mm_mul_epu32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_mul_epi64 (simde__m128i a, simde__m128i b) { simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i64x2_mul(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 * b_.i64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] * b_.i64[i]; } #endif return simde__m128i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_mod_epi64 (simde__m128i a, simde__m128i b) { simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && !defined(SIMDE_BUG_PGI_30104) r_.i64 = a_.i64 % b_.i64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] % b_.i64[i]; } #endif return simde__m128i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_mul_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_mul_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f64 = a_.f64 * b_.f64; #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vmulq_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_mul(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[i] * b_.f64[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_mul_pd(a, b) simde_mm_mul_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_mul_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_mul_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_sd(a, simde_mm_mul_pd(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_mul_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) float64x2_t temp = vmulq_f64(a_.neon_f64, b_.neon_f64); r_.neon_f64 = vsetq_lane_f64(vgetq_lane(a_.neon_f64, 1), temp, 1); #else r_.f64[0] = a_.f64[0] * b_.f64[0]; r_.f64[1] = a_.f64[1]; #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_mul_sd(a, b) simde_mm_mul_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_mul_su32 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) && !defined(__PGI) return _mm_mul_su32(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.u64[0] = vget_lane_u64(vget_low_u64(vmull_u32(vreinterpret_u32_s64(a_.neon_i64), vreinterpret_u32_s64(b_.neon_i64))), 0); #else r_.u64[0] = HEDLEY_STATIC_CAST(uint64_t, a_.u32[0]) * HEDLEY_STATIC_CAST(uint64_t, b_.u32[0]); #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_mul_su32(a, b) simde_mm_mul_su32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mulhi_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_mulhi_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x4_t a3210 = vget_low_s16(a_.neon_i16); int16x4_t b3210 = vget_low_s16(b_.neon_i16); int32x4_t ab3210 = vmull_s16(a3210, b3210); /* 3333222211110000 */ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) int32x4_t ab7654 = vmull_high_s16(a_.neon_i16, b_.neon_i16); r_.neon_i16 = vuzp2q_s16(vreinterpretq_s16_s32(ab3210), vreinterpretq_s16_s32(ab7654)); #else int16x4_t a7654 = vget_high_s16(a_.neon_i16); int16x4_t b7654 = vget_high_s16(b_.neon_i16); int32x4_t ab7654 = vmull_s16(a7654, b7654); /* 7777666655554444 */ uint16x8x2_t rv = vuzpq_u16(vreinterpretq_u16_s32(ab3210), vreinterpretq_u16_s32(ab7654)); r_.neon_u16 = rv.val[1]; #endif #elif defined(SIMDE_WASM_SIMD128_NATIVE) const v128_t lo = wasm_i32x4_extmul_low_i16x8(a_.wasm_v128, b_.wasm_v128); const v128_t hi = wasm_i32x4_extmul_high_i16x8(a_.wasm_v128, b_.wasm_v128); r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (HEDLEY_STATIC_CAST(uint32_t, HEDLEY_STATIC_CAST(int32_t, a_.i16[i]) * HEDLEY_STATIC_CAST(int32_t, b_.i16[i])) >> 16)); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_mulhi_epi16(a, b) simde_mm_mulhi_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mulhi_epu16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) return _mm_mulhi_epu16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) uint16x4_t a3210 = vget_low_u16(a_.neon_u16); uint16x4_t b3210 = vget_low_u16(b_.neon_u16); uint32x4_t ab3210 = vmull_u16(a3210, b3210); /* 3333222211110000 */ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint32x4_t ab7654 = vmull_high_u16(a_.neon_u16, b_.neon_u16); r_.neon_u16 = vuzp2q_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); #else uint16x4_t a7654 = vget_high_u16(a_.neon_u16); uint16x4_t b7654 = vget_high_u16(b_.neon_u16); uint32x4_t ab7654 = vmull_u16(a7654, b7654); /* 7777666655554444 */ uint16x8x2_t neon_r = vuzpq_u16(vreinterpretq_u16_u32(ab3210), vreinterpretq_u16_u32(ab7654)); r_.neon_u16 = neon_r.val[1]; #endif #elif defined(SIMDE_WASM_SIMD128_NATIVE) const v128_t lo = wasm_u32x4_extmul_low_u16x8(a_.wasm_v128, b_.wasm_v128); const v128_t hi = wasm_u32x4_extmul_high_u16x8(a_.wasm_v128, b_.wasm_v128); r_.wasm_v128 = wasm_i16x8_shuffle(lo, hi, 1, 3, 5, 7, 9, 11, 13, 15); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i]) >> 16); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_mulhi_epu16(a, b) simde_mm_mulhi_epu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_mullo_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_mullo_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vmulq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) (void) a_; (void) b_; r_.altivec_i16 = vec_mul(a_.altivec_i16, b_.altivec_i16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_mul(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, HEDLEY_STATIC_CAST(uint32_t, a_.u16[i]) * HEDLEY_STATIC_CAST(uint32_t, b_.u16[i])); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_mullo_epi16(a, b) simde_mm_mullo_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_or_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_or_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f | b_.i32f; #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vorrq_s64(a_.neon_i64, b_.neon_i64); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_or_pd(a, b) simde_mm_or_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_or_si128 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_or_si128(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vorrq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_or(a_.altivec_i32, b_.altivec_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_or(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f | b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] | b_.i32f[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_or_si128(a, b) simde_mm_or_si128(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_packs_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_packs_epi16(a, b); #else simde__m128i_private a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), r_; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i8 = vqmovn_high_s16(vqmovn_s16(a_.neon_i16), b_.neon_i16); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vcombine_s8(vqmovn_s16(a_.neon_i16), vqmovn_s16(b_.neon_i16)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i8 = vec_packs(a_.altivec_i16, b_.altivec_i16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) int16_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); const int16_t SIMDE_VECTOR(32) min = { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }; const int16_t SIMDE_VECTOR(32) max = { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }; int16_t m SIMDE_VECTOR(32); m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); v = (v & ~m) | (min & m); m = v > max; v = (v & ~m) | (max & m); SIMDE_CONVERT_VECTOR_(r_.i8, v); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; r_.i8[i] = (v < INT8_MIN) ? INT8_MIN : ((v > INT8_MAX) ? INT8_MAX : HEDLEY_STATIC_CAST(int8_t, v)); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_packs_epi16(a, b) simde_mm_packs_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_packs_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_packs_epi32(a, b); #else simde__m128i_private a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), r_; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i16 = vqmovn_high_s32(vqmovn_s32(a_.neon_i32), b_.neon_i32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vcombine_s16(vqmovn_s32(a_.neon_i32), vqmovn_s32(b_.neon_i32)); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i16 = vec_packs(a_.altivec_i32, b_.altivec_i32); #elif defined(SIMDE_X86_SSE2_NATIVE) r_.sse_m128i = _mm_packs_epi32(a_.sse_m128i, b_.sse_m128i); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_narrow_i32x4(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) int32_t SIMDE_VECTOR(32) v = SIMDE_SHUFFLE_VECTOR_(32, 32, a_.i32, b_.i32, 0, 1, 2, 3, 4, 5, 6, 7); const int32_t SIMDE_VECTOR(32) min = { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }; const int32_t SIMDE_VECTOR(32) max = { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX }; int32_t m SIMDE_VECTOR(32); m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v < min); v = (v & ~m) | (min & m); m = HEDLEY_REINTERPRET_CAST(__typeof__(m), v > max); v = (v & ~m) | (max & m); SIMDE_CONVERT_VECTOR_(r_.i16, v); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { int32_t v = (i < (sizeof(a_.i32) / sizeof(a_.i32[0]))) ? a_.i32[i] : b_.i32[i & 3]; r_.i16[i] = (v < INT16_MIN) ? INT16_MIN : ((v > INT16_MAX) ? INT16_MAX : HEDLEY_STATIC_CAST(int16_t, v)); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_packs_epi32(a, b) simde_mm_packs_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_packus_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_packus_epi16(a, b); #else simde__m128i_private a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b), r_; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #if defined(SIMDE_BUG_CLANG_46840) r_.neon_u8 = vqmovun_high_s16(vreinterpret_s8_u8(vqmovun_s16(a_.neon_i16)), b_.neon_i16); #else r_.neon_u8 = vqmovun_high_s16(vqmovun_s16(a_.neon_i16), b_.neon_i16); #endif #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vcombine_u8( vqmovun_s16(a_.neon_i16), vqmovun_s16(b_.neon_i16) ); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_u8 = vec_packsu(a_.altivec_i16, b_.altivec_i16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u8x16_narrow_i16x8(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_CONVERT_VECTOR_) && HEDLEY_HAS_BUILTIN(__builtin_shufflevector) && defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) int16_t v SIMDE_VECTOR(32) = SIMDE_SHUFFLE_VECTOR_(16, 32, a_.i16, b_.i16, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); v &= ~(v >> 15); v |= HEDLEY_REINTERPRET_CAST(__typeof__(v), v > UINT8_MAX); SIMDE_CONVERT_VECTOR_(r_.i8, v); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { int16_t v = (i < (sizeof(a_.i16) / sizeof(a_.i16[0]))) ? a_.i16[i] : b_.i16[i & 7]; r_.u8[i] = (v < 0) ? UINT8_C(0) : ((v > UINT8_MAX) ? UINT8_MAX : HEDLEY_STATIC_CAST(uint8_t, v)); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_packus_epi16(a, b) simde_mm_packus_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_pause (void) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_pause(); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_pause() (simde_mm_pause()) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sad_epu8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sad_epu8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) const uint16x8_t t = vpaddlq_u8(vabdq_u8(a_.neon_u8, b_.neon_u8)); r_.neon_u64 = vcombine_u64( vpaddl_u32(vpaddl_u16(vget_low_u16(t))), vpaddl_u32(vpaddl_u16(vget_high_u16(t)))); #else for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { uint16_t tmp = 0; SIMDE_VECTORIZE_REDUCTION(+:tmp) for (size_t j = 0 ; j < ((sizeof(r_.u8) / sizeof(r_.u8[0])) / 2) ; j++) { const size_t e = j + (i * 8); tmp += (a_.u8[e] > b_.u8[e]) ? (a_.u8[e] - b_.u8[e]) : (b_.u8[e] - a_.u8[e]); } r_.i64[i] = tmp; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sad_epu8(a, b) simde_mm_sad_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_set_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set_epi8( e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0); #else simde__m128i_private r_; #if defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_make( e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_LIKE_16(int8x16_t) int8_t data[16] = { e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15}; r_.neon_i8 = vld1q_s8(data); #else r_.i8[ 0] = e0; r_.i8[ 1] = e1; r_.i8[ 2] = e2; r_.i8[ 3] = e3; r_.i8[ 4] = e4; r_.i8[ 5] = e5; r_.i8[ 6] = e6; r_.i8[ 7] = e7; r_.i8[ 8] = e8; r_.i8[ 9] = e9; r_.i8[10] = e10; r_.i8[11] = e11; r_.i8[12] = e12; r_.i8[13] = e13; r_.i8[14] = e14; r_.i8[15] = e15; #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_set_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_LIKE_16(int16x8_t) int16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; r_.neon_i16 = vld1q_s16(data); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); #else r_.i16[0] = e0; r_.i16[1] = e1; r_.i16[2] = e2; r_.i16[3] = e3; r_.i16[4] = e4; r_.i16[5] = e5; r_.i16[6] = e6; r_.i16[7] = e7; #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_set_epi16(e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_loadu_si16 (void const* mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ HEDLEY_GCC_VERSION_CHECK(12,1,0)) return _mm_loadu_si16(mem_addr); #else int16_t val; simde_memcpy(&val, mem_addr, sizeof(val)); return simde_x_mm_cvtsi16_si128(val); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_loadu_si16(mem_addr) simde_mm_loadu_si16(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_set_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set_epi32(e3, e2, e1, e0); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_LIKE_16(int32x4_t) int32_t data[4] = { e0, e1, e2, e3 }; r_.neon_i32 = vld1q_s32(data); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_make(e0, e1, e2, e3); #else r_.i32[0] = e0; r_.i32[1] = e1; r_.i32[2] = e2; r_.i32[3] = e3; #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set_epi32(e3, e2, e1, e0) simde_mm_set_epi32(e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_loadu_si32 (void const* mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(20,21,1) || \ HEDLEY_GCC_VERSION_CHECK(12,1,0)) return _mm_loadu_si32(mem_addr); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return simde__m128i_from_wasm_v128(wasm_v128_load32_zero(mem_addr)); #else int32_t val; simde_memcpy(&val, mem_addr, sizeof(val)); return simde_mm_cvtsi32_si128(val); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_loadu_si32(mem_addr) simde_mm_loadu_si32(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_set_epi64 (simde__m64 e1, simde__m64 e0) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_set_epi64(e1, e0); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vcombine_s64(simde__m64_to_neon_i64(e0), simde__m64_to_neon_i64(e1)); #else r_.m64[0] = e0; r_.m64[1] = e1; #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set_epi64(e1, e0) (simde_mm_set_epi64((e1), (e0))) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_set_epi64x (int64_t e1, int64_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) return _mm_set_epi64x(e1, e0); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_LIKE_16(int64x2_t) int64_t data[2] = {e0, e1}; r_.neon_i64 = vld1q_s64(data); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i64x2_make(e0, e1); #else r_.i64[0] = e0; r_.i64[1] = e1; #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set_epi64x(e1, e0) simde_mm_set_epi64x(e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_loadu_si64 (void const* mem_addr) { #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(20,21,1)) return _mm_loadu_si64(mem_addr); #else int64_t val; simde_memcpy(&val, mem_addr, sizeof(val)); return simde_mm_cvtsi64_si128(val); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_loadu_si64(mem_addr) simde_mm_loadu_si64(mem_addr) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_set_epu8 (uint8_t e15, uint8_t e14, uint8_t e13, uint8_t e12, uint8_t e11, uint8_t e10, uint8_t e9, uint8_t e8, uint8_t e7, uint8_t e6, uint8_t e5, uint8_t e4, uint8_t e3, uint8_t e2, uint8_t e1, uint8_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set_epi8( HEDLEY_STATIC_CAST(char, e15), HEDLEY_STATIC_CAST(char, e14), HEDLEY_STATIC_CAST(char, e13), HEDLEY_STATIC_CAST(char, e12), HEDLEY_STATIC_CAST(char, e11), HEDLEY_STATIC_CAST(char, e10), HEDLEY_STATIC_CAST(char, e9), HEDLEY_STATIC_CAST(char, e8), HEDLEY_STATIC_CAST(char, e7), HEDLEY_STATIC_CAST(char, e6), HEDLEY_STATIC_CAST(char, e5), HEDLEY_STATIC_CAST(char, e4), HEDLEY_STATIC_CAST(char, e3), HEDLEY_STATIC_CAST(char, e2), HEDLEY_STATIC_CAST(char, e1), HEDLEY_STATIC_CAST(char, e0)); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_LIKE_16(uint8x16_t) uint8_t data[16] = { e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15}; r_.neon_u8 = vld1q_u8(data); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u8x16_make(e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); #else r_.u8[ 0] = e0; r_.u8[ 1] = e1; r_.u8[ 2] = e2; r_.u8[ 3] = e3; r_.u8[ 4] = e4; r_.u8[ 5] = e5; r_.u8[ 6] = e6; r_.u8[ 7] = e7; r_.u8[ 8] = e8; r_.u8[ 9] = e9; r_.u8[10] = e10; r_.u8[11] = e11; r_.u8[12] = e12; r_.u8[13] = e13; r_.u8[14] = e14; r_.u8[15] = e15; #endif return simde__m128i_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_set_epu16 (uint16_t e7, uint16_t e6, uint16_t e5, uint16_t e4, uint16_t e3, uint16_t e2, uint16_t e1, uint16_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set_epi16( HEDLEY_STATIC_CAST(short, e7), HEDLEY_STATIC_CAST(short, e6), HEDLEY_STATIC_CAST(short, e5), HEDLEY_STATIC_CAST(short, e4), HEDLEY_STATIC_CAST(short, e3), HEDLEY_STATIC_CAST(short, e2), HEDLEY_STATIC_CAST(short, e1), HEDLEY_STATIC_CAST(short, e0)); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_LIKE_16(uint16x8_t) uint16_t data[8] = { e0, e1, e2, e3, e4, e5, e6, e7 }; r_.neon_u16 = vld1q_u16(data); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u16x8_make(e0, e1, e2, e3, e4, e5, e6, e7); #else r_.u16[0] = e0; r_.u16[1] = e1; r_.u16[2] = e2; r_.u16[3] = e3; r_.u16[4] = e4; r_.u16[5] = e5; r_.u16[6] = e6; r_.u16[7] = e7; #endif return simde__m128i_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_set_epu32 (uint32_t e3, uint32_t e2, uint32_t e1, uint32_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set_epi32( HEDLEY_STATIC_CAST(int, e3), HEDLEY_STATIC_CAST(int, e2), HEDLEY_STATIC_CAST(int, e1), HEDLEY_STATIC_CAST(int, e0)); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_LIKE_16(uint32x4_t) uint32_t data[4] = { e0, e1, e2, e3 }; r_.neon_u32 = vld1q_u32(data); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u32x4_make(e0, e1, e2, e3); #else r_.u32[0] = e0; r_.u32[1] = e1; r_.u32[2] = e2; r_.u32[3] = e3; #endif return simde__m128i_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_set_epu64x (uint64_t e1, uint64_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) return _mm_set_epi64x(HEDLEY_STATIC_CAST(int64_t, e1), HEDLEY_STATIC_CAST(int64_t, e0)); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) SIMDE_ALIGN_LIKE_16(uint64x2_t) uint64_t data[2] = {e0, e1}; r_.neon_u64 = vld1q_u64(data); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u64x2_make(e0, e1); #else r_.u64[0] = e0; r_.u64[1] = e1; #endif return simde__m128i_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_set_sd (simde_float64 a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set_sd(a); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vsetq_lane_f64(a, vdupq_n_f64(SIMDE_FLOAT64_C(0.0)), 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return simde__m128d_from_wasm_v128(wasm_f64x2_make(a, 0)); #else return simde_mm_set_pd(SIMDE_FLOAT64_C(0.0), a); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set_sd(a) simde_mm_set_sd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_set1_epi8 (int8_t a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set1_epi8(a); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vdupq_n_s8(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_splat(a); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i8 = vec_splats(HEDLEY_STATIC_CAST(signed char, a)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set1_epi8(a) simde_mm_set1_epi8(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_set1_epi16 (int16_t a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set1_epi16(a); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vdupq_n_s16(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_splat(a); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i16 = vec_splats(HEDLEY_STATIC_CAST(signed short, a)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set1_epi16(a) simde_mm_set1_epi16(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_set1_epi32 (int32_t a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_set1_epi32(a); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vdupq_n_s32(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_splat(a); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i32 = vec_splats(HEDLEY_STATIC_CAST(signed int, a)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set1_epi32(a) simde_mm_set1_epi32(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_set1_epi64x (int64_t a) { #if defined(SIMDE_X86_SSE2_NATIVE) && (!defined(HEDLEY_MSVC_VERSION) || HEDLEY_MSVC_VERSION_CHECK(19,0,0)) return _mm_set1_epi64x(a); #else simde__m128i_private r_; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vdupq_n_s64(a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i64x2_splat(a); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_i64 = vec_splats(HEDLEY_STATIC_CAST(signed long long, a)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set1_epi64x(a) simde_mm_set1_epi64x(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_set1_epi64 (simde__m64 a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_set1_epi64(a); #else simde__m64_private a_ = simde__m64_to_private(a); return simde_mm_set1_epi64x(a_.i64[0]); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_set1_epi64(a) simde_mm_set1_epi64(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_set1_epu8 (uint8_t value) { #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return simde__m128i_from_altivec_u8(vec_splats(HEDLEY_STATIC_CAST(unsigned char, value))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return simde__m128i_from_wasm_v128(wasm_u8x16_splat(value)); #else return simde_mm_set1_epi8(HEDLEY_STATIC_CAST(int8_t, value)); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_set1_epu16 (uint16_t value) { #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return simde__m128i_from_altivec_u16(vec_splats(HEDLEY_STATIC_CAST(unsigned short, value))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return simde__m128i_from_wasm_v128(wasm_u16x8_splat(value)); #else return simde_mm_set1_epi16(HEDLEY_STATIC_CAST(int16_t, value)); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_set1_epu32 (uint32_t value) { #if defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) return simde__m128i_from_altivec_u32(vec_splats(HEDLEY_STATIC_CAST(unsigned int, value))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return simde__m128i_from_wasm_v128(wasm_u32x4_splat(value)); #else return simde_mm_set1_epi32(HEDLEY_STATIC_CAST(int32_t, value)); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_set1_epu64 (uint64_t value) { #if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) return simde__m128i_from_altivec_u64(vec_splats(HEDLEY_STATIC_CAST(unsigned long long, value))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return simde__m128i_from_wasm_v128(wasm_u64x2_splat(value)); #else return simde_mm_set1_epi64x(HEDLEY_STATIC_CAST(int64_t, value)); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_setr_epi8 (int8_t e15, int8_t e14, int8_t e13, int8_t e12, int8_t e11, int8_t e10, int8_t e9, int8_t e8, int8_t e7, int8_t e6, int8_t e5, int8_t e4, int8_t e3, int8_t e2, int8_t e1, int8_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_setr_epi8( e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0); #else return simde_mm_set_epi8( e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi8(e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_setr_epi16 (int16_t e7, int16_t e6, int16_t e5, int16_t e4, int16_t e3, int16_t e2, int16_t e1, int16_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0); #else return simde_mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) simde_mm_setr_epi16(e7, e6, e5, e4, e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_setr_epi32 (int32_t e3, int32_t e2, int32_t e1, int32_t e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_setr_epi32(e3, e2, e1, e0); #else return simde_mm_set_epi32(e0, e1, e2, e3); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_setr_epi32(e3, e2, e1, e0) simde_mm_setr_epi32(e3, e2, e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_setr_epi64 (simde__m64 e1, simde__m64 e0) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_setr_epi64(e1, e0); #else return simde_mm_set_epi64(e0, e1); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_setr_epi64(e1, e0) (simde_mm_setr_epi64((e1), (e0))) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_setr_pd (simde_float64 e1, simde_float64 e0) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_setr_pd(e1, e0); #else return simde_mm_set_pd(e0, e1); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_setr_pd(e1, e0) simde_mm_setr_pd(e1, e0) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_setzero_pd (void) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_setzero_pd(); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return simde__m128d_from_wasm_v128(wasm_f64x2_const(0.0, 0.0)); #else return simde_mm_castsi128_pd(simde_mm_setzero_si128()); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_setzero_pd() simde_mm_setzero_pd() #endif #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_ #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_undefined_pd (void) { simde__m128d_private r_; #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) r_.n = _mm_undefined_pd(); #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) r_ = simde__m128d_to_private(simde_mm_setzero_pd()); #endif return simde__m128d_from_private(r_); } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_undefined_pd() simde_mm_undefined_pd() #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_undefined_si128 (void) { simde__m128i_private r_; #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE__HAVE_UNDEFINED128) r_.n = _mm_undefined_si128(); #elif !defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) r_ = simde__m128i_to_private(simde_mm_setzero_si128()); #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_undefined_si128() (simde_mm_undefined_si128()) #endif #if defined(SIMDE_DIAGNOSTIC_DISABLE_UNINITIALIZED_) HEDLEY_DIAGNOSTIC_POP #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_x_mm_setone_pd (void) { return simde_mm_castps_pd(simde_x_mm_setone_ps()); } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_setone_si128 (void) { return simde_mm_castps_si128(simde_x_mm_setone_ps()); } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_shuffle_epi32 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m128i_private r_, a_ = simde__m128i_to_private(a); for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[(imm8 >> (i * 2)) & 3]; } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_shuffle_epi32(a, imm8) _mm_shuffle_epi32((a), (imm8)) #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ simde__m128i_from_wasm_v128( \ wasm_i32x4_shuffle( \ (simde_tmp_a_).wasm_v128, \ (simde_tmp_a_).wasm_v128, \ ((imm8) ) & 3, \ ((imm8) >> 2) & 3, \ ((imm8) >> 4) & 3, \ ((imm8) >> 6) & 3)); })) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) #define simde_mm_shuffle_epi32(a, imm8) \ (__extension__ ({ \ const int32x4_t simde_mm_shuffle_epi32_a_ = simde__m128i_to_neon_i32(a); \ int32x4_t simde_mm_shuffle_epi32_r_; \ simde_mm_shuffle_epi32_r_ = vmovq_n_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, (imm8) & (0x3))); \ simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 2) & 0x3), simde_mm_shuffle_epi32_r_, 1); \ simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 4) & 0x3), simde_mm_shuffle_epi32_r_, 2); \ simde_mm_shuffle_epi32_r_ = vsetq_lane_s32(vgetq_lane_s32(simde_mm_shuffle_epi32_a_, ((imm8) >> 6) & 0x3), simde_mm_shuffle_epi32_r_, 3); \ vreinterpretq_s64_s32(simde_mm_shuffle_epi32_r_); \ })) #elif defined(SIMDE_SHUFFLE_VECTOR_) #define simde_mm_shuffle_epi32(a, imm8) (__extension__ ({ \ const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ simde__m128i_from_private((simde__m128i_private) { .i32 = \ SIMDE_SHUFFLE_VECTOR_(32, 16, \ (simde_tmp_a_).i32, \ (simde_tmp_a_).i32, \ ((imm8) ) & 3, \ ((imm8) >> 2) & 3, \ ((imm8) >> 4) & 3, \ ((imm8) >> 6) & 3) }); })) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_shuffle_epi32(a, imm8) simde_mm_shuffle_epi32(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_shuffle_pd (simde__m128d a, simde__m128d b, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 3) { simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); r_.f64[0] = ((imm8 & 1) == 0) ? a_.f64[0] : a_.f64[1]; r_.f64[1] = ((imm8 & 2) == 0) ? b_.f64[0] : b_.f64[1]; return simde__m128d_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(__PGI) #define simde_mm_shuffle_pd(a, b, imm8) _mm_shuffle_pd((a), (b), (imm8)) #elif defined(SIMDE_SHUFFLE_VECTOR_) #define simde_mm_shuffle_pd(a, b, imm8) (__extension__ ({ \ simde__m128d_from_private((simde__m128d_private) { .f64 = \ SIMDE_SHUFFLE_VECTOR_(64, 16, \ simde__m128d_to_private(a).f64, \ simde__m128d_to_private(b).f64, \ (((imm8) ) & 1), \ (((imm8) >> 1) & 1) + 2) }); })) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_shuffle_pd(a, b, imm8) simde_mm_shuffle_pd(a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_shufflehi_epi16 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m128i_private r_, a_ = simde__m128i_to_private(a); SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i++) { r_.i16[i] = a_.i16[i]; } for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[((imm8 >> ((i - 4) * 2)) & 3) + 4]; } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_shufflehi_epi16(a, imm8) _mm_shufflehi_epi16((a), (imm8)) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) #define simde_mm_shufflehi_epi16(a, imm8) \ (__extension__ ({ \ int16x8_t simde_mm_shufflehi_epi16_a_ = simde__m128i_to_neon_i16(a); \ int16x8_t simde_mm_shufflehi_epi16_r_ = simde_mm_shufflehi_epi16_a_; \ simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) ) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 4); \ simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 2) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 5); \ simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 4) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 6); \ simde_mm_shufflehi_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflehi_epi16_a_, (((imm8) >> 6) & 0x3) + 4), simde_mm_shufflehi_epi16_r_, 7); \ simde__m128i_from_neon_i16(simde_mm_shufflehi_epi16_r_); \ })) #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ simde__m128i_from_private((simde__m128i_private) { .wasm_v128 = \ wasm_i16x8_shuffle( \ (simde_tmp_a_).wasm_v128, \ (simde_tmp_a_).wasm_v128, \ 0, 1, 2, 3, \ (((imm8) ) & 3) + 4, \ (((imm8) >> 2) & 3) + 4, \ (((imm8) >> 4) & 3) + 4, \ (((imm8) >> 6) & 3) + 4) }); })) #elif defined(SIMDE_SHUFFLE_VECTOR_) #define simde_mm_shufflehi_epi16(a, imm8) (__extension__ ({ \ const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ simde__m128i_from_private((simde__m128i_private) { .i16 = \ SIMDE_SHUFFLE_VECTOR_(16, 16, \ (simde_tmp_a_).i16, \ (simde_tmp_a_).i16, \ 0, 1, 2, 3, \ (((imm8) ) & 3) + 4, \ (((imm8) >> 2) & 3) + 4, \ (((imm8) >> 4) & 3) + 4, \ (((imm8) >> 6) & 3) + 4) }); })) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_shufflehi_epi16(a, imm8) simde_mm_shufflehi_epi16(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_shufflelo_epi16 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m128i_private r_, a_ = simde__m128i_to_private(a); for (size_t i = 0 ; i < ((sizeof(r_.i16) / sizeof(r_.i16[0])) / 2) ; i++) { r_.i16[i] = a_.i16[((imm8 >> (i * 2)) & 3)]; } SIMDE_VECTORIZE for (size_t i = ((sizeof(a_.i16) / sizeof(a_.i16[0])) / 2) ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i]; } return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_shufflelo_epi16(a, imm8) _mm_shufflelo_epi16((a), (imm8)) #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define simde_mm_shufflelo_epi16(a, imm8) \ simde__m128i_from_wasm_v128( \ wasm_i16x8_shuffle( \ simde__m128i_to_wasm_v128((a)), \ wasm_i16x8_splat(0), \ (((imm8) & 0x03) ), \ (((imm8) & 0x0c) >> 2), \ (((imm8) & 0x30) >> 4), \ (((imm8) & 0xc0) >> 6), \ 4, 5, 6, 7)) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_STATEMENT_EXPR_) #define simde_mm_shufflelo_epi16(a, imm8) \ (__extension__({ \ int16x8_t simde_mm_shufflelo_epi16_a_ = simde__m128i_to_neon_i16(a); \ int16x8_t simde_mm_shufflelo_epi16_r_ = simde_mm_shufflelo_epi16_a_; \ simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) ) & 0x3)), simde_mm_shufflelo_epi16_r_, 0); \ simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 2) & 0x3)), simde_mm_shufflelo_epi16_r_, 1); \ simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 4) & 0x3)), simde_mm_shufflelo_epi16_r_, 2); \ simde_mm_shufflelo_epi16_r_ = vsetq_lane_s16(vgetq_lane_s16(simde_mm_shufflelo_epi16_a_, (((imm8) >> 6) & 0x3)), simde_mm_shufflelo_epi16_r_, 3); \ simde__m128i_from_neon_i16(simde_mm_shufflelo_epi16_r_); \ })) #elif defined(SIMDE_SHUFFLE_VECTOR_) #define simde_mm_shufflelo_epi16(a, imm8) (__extension__ ({ \ const simde__m128i_private simde_tmp_a_ = simde__m128i_to_private(a); \ simde__m128i_from_private((simde__m128i_private) { .i16 = \ SIMDE_SHUFFLE_VECTOR_(16, 16, \ (simde_tmp_a_).i16, \ (simde_tmp_a_).i16, \ (((imm8) ) & 3), \ (((imm8) >> 2) & 3), \ (((imm8) >> 4) & 3), \ (((imm8) >> 6) & 3), \ 4, 5, 6, 7) }); })) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_shufflelo_epi16(a, imm8) simde_mm_shufflelo_epi16(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sll_epi16 (simde__m128i a, simde__m128i count) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sll_epi16(a, count); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), count_ = simde__m128i_to_private(count); if (count_.u64[0] > 15) return simde_mm_setzero_si128(); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u16 = (a_.u16 << count_.u64[0]); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, count_.u64[0]))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 16) ? wasm_i16x8_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i16x8_const(0,0,0,0,0,0,0,0)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = HEDLEY_STATIC_CAST(uint16_t, (a_.u16[i] << count_.u64[0])); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sll_epi16(a, count) simde_mm_sll_epi16((a), (count)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sll_epi32 (simde__m128i a, simde__m128i count) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sll_epi32(a, count); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), count_ = simde__m128i_to_private(count); if (count_.u64[0] > 31) return simde_mm_setzero_si128(); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u32 = (a_.u32 << count_.u64[0]); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, count_.u64[0]))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = ((wasm_i64x2_extract_lane(count_.wasm_v128, 0) < 32) ? wasm_i32x4_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(int32_t, wasm_i64x2_extract_lane(count_.wasm_v128, 0))) : wasm_i32x4_const(0,0,0,0)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = HEDLEY_STATIC_CAST(uint32_t, (a_.u32[i] << count_.u64[0])); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sll_epi32(a, count) (simde_mm_sll_epi32(a, (count))) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sll_epi64 (simde__m128i a, simde__m128i count) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sll_epi64(a, count); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), count_ = simde__m128i_to_private(count); if (count_.u64[0] > 63) return simde_mm_setzero_si128(); const int_fast16_t s = HEDLEY_STATIC_CAST(int_fast16_t, count_.u64[0]); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, s))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = (s < 64) ? wasm_i64x2_shl(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, s)) : wasm_i64x2_const(0,0); #else #if !defined(SIMDE_BUG_GCC_94488) SIMDE_VECTORIZE #endif for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = a_.u64[i] << s; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sll_epi64(a, count) (simde_mm_sll_epi64(a, (count))) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_sqrt_pd (simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sqrt_pd(a); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vsqrtq_f64(a_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_sqrt(a_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE) r_.altivec_f64 = vec_sqrt(a_.altivec_f64); #elif defined(simde_math_sqrt) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = simde_math_sqrt(a_.f64[i]); } #else HEDLEY_UNREACHABLE(); #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sqrt_pd(a) simde_mm_sqrt_pd(a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_sqrt_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sqrt_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_sd(a, simde_mm_sqrt_pd(b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_sqrt_pd(simde_x_mm_broadcastlow_pd(b))); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(simde_math_sqrt) r_.f64[0] = simde_math_sqrt(b_.f64[0]); r_.f64[1] = a_.f64[1]; #else HEDLEY_UNREACHABLE(); #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sqrt_sd(a, b) simde_mm_sqrt_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_srl_epi16 (simde__m128i a, simde__m128i count) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_srl_epi16(a, count); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), count_ = simde__m128i_to_private(count); const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 16 ? 16 : count_.i64[0])); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vshlq_u16(a_.neon_u16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u16) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = a_.u16[i] >> cnt; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_srl_epi16(a, count) (simde_mm_srl_epi16(a, (count))) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_srl_epi32 (simde__m128i a, simde__m128i count) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_srl_epi32(a, count); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), count_ = simde__m128i_to_private(count); const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 32 ? 32 : count_.i64[0])); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vshlq_u32(a_.neon_u32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] >> cnt; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_srl_epi32(a, count) (simde_mm_srl_epi32(a, (count))) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_srl_epi64 (simde__m128i a, simde__m128i count) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_srl_epi64(a, count); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), count_ = simde__m128i_to_private(count); const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 64 ? 64 : count_.i64[0])); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(HEDLEY_STATIC_CAST(int64_t, -cnt))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u64x2_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); #else #if !defined(SIMDE_BUG_GCC_94488) SIMDE_VECTORIZE #endif for (size_t i = 0 ; i < (sizeof(r_.u64) / sizeof(r_.u64[0])) ; i++) { r_.u64[i] = a_.u64[i] >> cnt; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_srl_epi64(a, count) (simde_mm_srl_epi64(a, (count))) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_srai_epi16 (simde__m128i a, const int imm8) SIMDE_REQUIRE_RANGE(imm8, 0, 255) { /* MSVC requires a range of (0, 255). */ simde__m128i_private r_, a_ = simde__m128i_to_private(a); const int cnt = (imm8 & ~15) ? 15 : imm8; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] >> cnt; } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_srai_epi16(a, imm8) _mm_srai_epi16((a), (imm8)) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_srai_epi16(a, imm8) simde_mm_srai_epi16(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_srai_epi32 (simde__m128i a, const int imm8) SIMDE_REQUIRE_RANGE(imm8, 0, 255) { /* MSVC requires a range of (0, 255). */ simde__m128i_private r_, a_ = simde__m128i_to_private(a); const int cnt = (imm8 & ~31) ? 31 : imm8; #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(-cnt)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] >> cnt; } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_srai_epi32(a, imm8) _mm_srai_epi32((a), (imm8)) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_srai_epi32(a, imm8) simde_mm_srai_epi32(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sra_epi16 (simde__m128i a, simde__m128i count) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sra_epi16(a, count); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), count_ = simde__m128i_to_private(count); const int cnt = HEDLEY_STATIC_CAST(int, (count_.i64[0] > 15 ? 15 : count_.i64[0])); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vshlq_s16(a_.neon_i16, vdupq_n_s16(HEDLEY_STATIC_CAST(int16_t, -cnt))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] >> cnt; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sra_epi16(a, count) (simde_mm_sra_epi16(a, count)) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sra_epi32 (simde__m128i a, simde__m128i count) { #if defined(SIMDE_X86_SSE2_NATIVE) && !defined(SIMDE_BUG_GCC_BAD_MM_SRA_EPI32) return _mm_sra_epi32(a, count); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), count_ = simde__m128i_to_private(count); const int cnt = count_.u64[0] > 31 ? 31 : HEDLEY_STATIC_CAST(int, count_.u64[0]); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vshlq_s32(a_.neon_i32, vdupq_n_s32(HEDLEY_STATIC_CAST(int32_t, -cnt))); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_shr(a_.wasm_v128, HEDLEY_STATIC_CAST(uint32_t, cnt)); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] >> cnt; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sra_epi32(a, count) (simde_mm_sra_epi32(a, (count))) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_slli_epi16 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { if (HEDLEY_UNLIKELY((imm8 > 15))) { return simde_mm_setzero_si128(); } simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i16 = a_.i16 << SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); #else const int s = (imm8 > HEDLEY_STATIC_CAST(int, sizeof(r_.i16[0]) * CHAR_BIT) - 1) ? 0 : imm8; SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = HEDLEY_STATIC_CAST(int16_t, a_.i16[i] << s); } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_slli_epi16(a, imm8) _mm_slli_epi16(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_slli_epi16(a, imm8) \ (((imm8) <= 0) ? \ (a) : \ simde__m128i_from_neon_i16( \ ((imm8) > 15) ? \ vandq_s16(simde__m128i_to_neon_i16(a), vdupq_n_s16(0)) : \ vshlq_n_s16(simde__m128i_to_neon_i16(a), ((imm8) & 15)))) #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define simde_mm_slli_epi16(a, imm8) \ ((imm8 < 16) ? wasm_i16x8_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #define simde_mm_slli_epi16(a, imm8) \ ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sl(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_slli_epi16(a, imm8) simde_mm_slli_epi16(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_slli_epi32 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { if (HEDLEY_UNLIKELY((imm8 > 31))) { return simde_mm_setzero_si128(); } simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i32 = a_.i32 << imm8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] << (imm8 & 0xff); } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_slli_epi32(a, imm8) _mm_slli_epi32(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_slli_epi32(a, imm8) \ (((imm8) <= 0) ? \ (a) : \ simde__m128i_from_neon_i32( \ ((imm8) > 31) ? \ vandq_s32(simde__m128i_to_neon_i32(a), vdupq_n_s32(0)) : \ vshlq_n_s32(simde__m128i_to_neon_i32(a), ((imm8) & 31)))) #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define simde_mm_slli_epi32(a, imm8) \ ((imm8 < 32) ? wasm_i32x4_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #define simde_mm_slli_epi32(a, imm8) \ (__extension__ ({ \ simde__m128i ret; \ if ((imm8) <= 0) { \ ret = a; \ } else if ((imm8) > 31) { \ ret = simde_mm_setzero_si128(); \ } else { \ ret = simde__m128i_from_altivec_i32( \ vec_sl(simde__m128i_to_altivec_i32(a), \ vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ } \ ret; \ })) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_slli_epi32(a, imm8) simde_mm_slli_epi32(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_slli_epi64 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { if (HEDLEY_UNLIKELY((imm8 > 63))) { return simde_mm_setzero_si128(); } simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.i64 = a_.i64 << imm8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] << (imm8 & 0xff); } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_slli_epi64(a, imm8) _mm_slli_epi64(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_slli_epi64(a, imm8) \ (((imm8) <= 0) ? \ (a) : \ simde__m128i_from_neon_i64( \ ((imm8) > 63) ? \ vandq_s64(simde__m128i_to_neon_i64(a), vdupq_n_s64(0)) : \ vshlq_n_s64(simde__m128i_to_neon_i64(a), ((imm8) & 63)))) #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define simde_mm_slli_epi64(a, imm8) \ ((imm8 < 64) ? wasm_i64x2_shl(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_slli_epi64(a, imm8) simde_mm_slli_epi64(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_srli_epi16 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { if (HEDLEY_UNLIKELY((imm8 > 15))) { return simde_mm_setzero_si128(); } simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u16 = a_.u16 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.u16[i] = a_.u16[i] >> (imm8 & 0xff); } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_srli_epi16(a, imm8) _mm_srli_epi16(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_srli_epi16(a, imm8) \ (((imm8) <= 0) ? \ (a) : \ simde__m128i_from_neon_u16( \ ((imm8) > 15) ? \ vandq_u16(simde__m128i_to_neon_u16(a), vdupq_n_u16(0)) : \ vshrq_n_u16(simde__m128i_to_neon_u16(a), ((imm8) & 15) | (((imm8) & 15) == 0)))) #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define simde_mm_srli_epi16(a, imm8) \ ((imm8 < 16) ? wasm_u16x8_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i16x8_const(0,0,0,0,0,0,0,0)) #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #define simde_mm_srli_epi16(a, imm8) \ ((imm8 & ~15) ? simde_mm_setzero_si128() : simde__m128i_from_altivec_i16(vec_sr(simde__m128i_to_altivec_i16(a), vec_splat_u16(HEDLEY_STATIC_CAST(unsigned short, imm8))))) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_srli_epi16(a, imm8) simde_mm_srli_epi16(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_srli_epi32 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { if (HEDLEY_UNLIKELY((imm8 > 31))) { return simde_mm_setzero_si128(); } simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) r_.u32 = a_.u32 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8 & 0xff); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.u32[i] = a_.u32[i] >> (imm8 & 0xff); } #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_srli_epi32(a, imm8) _mm_srli_epi32(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_srli_epi32(a, imm8) \ (((imm8) <= 0) ? \ (a) : \ simde__m128i_from_neon_u32( \ ((imm8) > 31) ? \ vandq_u32(simde__m128i_to_neon_u32(a), vdupq_n_u32(0)) : \ vshrq_n_u32(simde__m128i_to_neon_u32(a), ((imm8) & 31) | (((imm8) & 31) == 0)))) #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define simde_mm_srli_epi32(a, imm8) \ ((imm8 < 32) ? wasm_u32x4_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i32x4_const(0,0,0,0)) #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) #define simde_mm_srli_epi32(a, imm8) \ (__extension__ ({ \ simde__m128i ret; \ if ((imm8) <= 0) { \ ret = a; \ } else if ((imm8) > 31) { \ ret = simde_mm_setzero_si128(); \ } else { \ ret = simde__m128i_from_altivec_i32( \ vec_sr(simde__m128i_to_altivec_i32(a), \ vec_splats(HEDLEY_STATIC_CAST(unsigned int, (imm8) & 31)))); \ } \ ret; \ })) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_srli_epi32(a, imm8) simde_mm_srli_epi32(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_srli_epi64 (simde__m128i a, const int imm8) SIMDE_REQUIRE_CONSTANT_RANGE(imm8, 0, 255) { simde__m128i_private r_, a_ = simde__m128i_to_private(a); if (HEDLEY_UNLIKELY((imm8 & 63) != imm8)) return simde_mm_setzero_si128(); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u64 = vshlq_u64(a_.neon_u64, vdupq_n_s64(-imm8)); #else #if defined(SIMDE_VECTOR_SUBSCRIPT_SCALAR) && !defined(SIMDE_BUG_GCC_94488) r_.u64 = a_.u64 >> SIMDE_CAST_VECTOR_SHIFT_COUNT(8, imm8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.u64[i] = a_.u64[i] >> imm8; } #endif #endif return simde__m128i_from_private(r_); } #if defined(SIMDE_X86_SSE2_NATIVE) #define simde_mm_srli_epi64(a, imm8) _mm_srli_epi64(a, imm8) #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) #define simde_mm_srli_epi64(a, imm8) \ (((imm8) <= 0) ? \ (a) : \ simde__m128i_from_neon_u64( \ ((imm8) > 63) ? \ vandq_u64(simde__m128i_to_neon_u64(a), vdupq_n_u64(0)) : \ vshrq_n_u64(simde__m128i_to_neon_u64(a), ((imm8) & 63) | (((imm8) & 63) == 0)))) #elif defined(SIMDE_WASM_SIMD128_NATIVE) #define simde_mm_srli_epi64(a, imm8) \ ((imm8 < 64) ? wasm_u64x2_shr(simde__m128i_to_private(a).wasm_v128, imm8) : wasm_i64x2_const(0,0)) #endif #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_srli_epi64(a, imm8) simde_mm_srli_epi64(a, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_store_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_store_pd(mem_addr, a); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), simde__m128d_to_private(a).neon_i64); #else simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128d), &a, sizeof(a)); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_store_pd(mem_addr, a) simde_mm_store_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_store1_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_store1_pd(mem_addr, a); #else simde__m128d_private a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) vst1q_f64(mem_addr, vdupq_laneq_f64(a_.neon_f64, 0)); #else mem_addr[0] = a_.f64[0]; mem_addr[1] = a_.f64[0]; #endif #endif } #define simde_mm_store_pd1(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_store1_pd(mem_addr, a) simde_mm_store1_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #define _mm_store_pd1(mem_addr, a) simde_mm_store_pd1(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_store_sd (simde_float64* mem_addr, simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_store_sd(mem_addr, a); #else simde__m128d_private a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) const simde_float64 v = vgetq_lane_f64(a_.neon_f64, 0); simde_memcpy(mem_addr, &v, sizeof(v)); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) const int64_t v = vgetq_lane_s64(a_.neon_i64, 0); simde_memcpy(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), &v, sizeof(v)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 0); #else simde_float64 v = a_.f64[0]; simde_memcpy(mem_addr, &v, sizeof(simde_float64)); #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_store_sd(mem_addr, a) simde_mm_store_sd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_store_si128 (simde__m128i* mem_addr, simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_store_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); #else simde__m128i_private a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_s32(HEDLEY_REINTERPRET_CAST(int32_t*, mem_addr), a_.neon_i32); #else simde_memcpy(SIMDE_ALIGN_ASSUME_LIKE(mem_addr, simde__m128i), &a_, sizeof(a_)); #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_store_si128(mem_addr, a) simde_mm_store_si128(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storeh_pd (simde_float64* mem_addr, simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_storeh_pd(mem_addr, a); #else simde__m128d_private a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) *mem_addr = vgetq_lane_f64(a_.neon_f64, 1); #elif defined(SIMDE_WASM_SIMD128_NATIVE) wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), a_.wasm_v128, 1); #else *mem_addr = a_.f64[1]; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_storeh_pd(mem_addr, a) simde_mm_storeh_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storel_epi64 (simde__m128i* mem_addr, simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_storel_epi64(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); #else simde__m128i_private a_ = simde__m128i_to_private(a); int64_t tmp; /* memcpy to prevent aliasing, tmp because we can't take the * address of a vector element. */ #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) tmp = vgetq_lane_s64(a_.neon_i64, 0); #elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) #if defined(SIMDE_BUG_GCC_95227) (void) a_; #endif tmp = vec_extract(a_.altivec_i64, 0); #else tmp = a_.i64[0]; #endif simde_memcpy(mem_addr, &tmp, sizeof(tmp)); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_storel_epi64(mem_addr, a) simde_mm_storel_epi64(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storel_pd (simde_float64* mem_addr, simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_storel_pd(mem_addr, a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) wasm_v128_store64_lane(HEDLEY_REINTERPRET_CAST(void*, mem_addr), simde__m128d_to_wasm_v128(a), 0); #else simde__m128d_private a_ = simde__m128d_to_private(a); simde_float64 tmp; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) tmp = vgetq_lane_f64(a_.neon_f64, 0); #else tmp = a_.f64[0]; #endif simde_memcpy(mem_addr, &tmp, sizeof(tmp)); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_storel_pd(mem_addr, a) simde_mm_storel_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storer_pd (simde_float64 mem_addr[2], simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_storer_pd(mem_addr, a); #else simde__m128d_private a_ = simde__m128d_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) vst1q_s64(HEDLEY_REINTERPRET_CAST(int64_t*, mem_addr), vextq_s64(a_.neon_i64, a_.neon_i64, 1)); #elif defined(SIMDE_WASM_SIMD128_NATIVE) a_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, a_.wasm_v128, 1, 0); simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); #elif defined(SIMDE_SHUFFLE_VECTOR_) a_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, a_.f64, 1, 0); simde_mm_store_pd(mem_addr, simde__m128d_from_private(a_)); #else mem_addr[0] = a_.f64[1]; mem_addr[1] = a_.f64[0]; #endif #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_storer_pd(mem_addr, a) simde_mm_storer_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storeu_pd (simde_float64* mem_addr, simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_storeu_pd(mem_addr, a); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) vst1q_f64(mem_addr, simde__m128d_to_private(a).neon_f64); #else simde_memcpy(mem_addr, &a, sizeof(a)); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_storeu_pd(mem_addr, a) simde_mm_storeu_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storeu_si128 (void* mem_addr, simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_storeu_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); #else simde_memcpy(mem_addr, &a, sizeof(a)); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_storeu_si128(mem_addr, a) simde_mm_storeu_si128(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storeu_si16 (void* mem_addr, simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(20,21,1)) _mm_storeu_si16(mem_addr, a); #else int16_t val = simde_x_mm_cvtsi128_si16(a); simde_memcpy(mem_addr, &val, sizeof(val)); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_storeu_si16(mem_addr, a) simde_mm_storeu_si16(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storeu_si32 (void* mem_addr, simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(20,21,1)) _mm_storeu_si32(mem_addr, a); #elif defined(SIMDE_WASM_SIMD128_NATIVE) wasm_v128_store32_lane(mem_addr, simde__m128i_to_wasm_v128(a), 0); #else int32_t val = simde_mm_cvtsi128_si32(a); simde_memcpy(mem_addr, &val, sizeof(val)); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_storeu_si32(mem_addr, a) simde_mm_storeu_si32(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_storeu_si64 (void* mem_addr, simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) && ( \ SIMDE_DETECT_CLANG_VERSION_CHECK(8,0,0) || \ HEDLEY_GCC_VERSION_CHECK(11,0,0) || \ HEDLEY_INTEL_VERSION_CHECK(20,21,1)) _mm_storeu_si64(mem_addr, a); #else int64_t val = simde_mm_cvtsi128_si64(a); simde_memcpy(mem_addr, &val, sizeof(val)); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_storeu_si64(mem_addr, a) simde_mm_storeu_si64(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_stream_pd (simde_float64 mem_addr[HEDLEY_ARRAY_PARAM(2)], simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_stream_pd(mem_addr, a); #else simde_memcpy(mem_addr, &a, sizeof(a)); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_stream_pd(mem_addr, a) simde_mm_stream_pd(HEDLEY_REINTERPRET_CAST(double*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_stream_si128 (simde__m128i* mem_addr, simde__m128i a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) _mm_stream_si128(HEDLEY_STATIC_CAST(__m128i*, mem_addr), a); #else simde_memcpy(mem_addr, &a, sizeof(a)); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_stream_si128(mem_addr, a) simde_mm_stream_si128(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_stream_si32 (int32_t* mem_addr, int32_t a) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_stream_si32(mem_addr, a); #else *mem_addr = a; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_stream_si32(mem_addr, a) simde_mm_stream_si32(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_stream_si64 (int64_t* mem_addr, int64_t a) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_ARCH_AMD64) && !defined(HEDLEY_MSVC_VERSION) _mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(long long int*, int64_t*, mem_addr), a); #else *mem_addr = a; #endif } #define simde_mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(mem_addr, a) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) || (defined(SIMDE_ENABLE_NATIVE_ALIASES) && !defined(SIMDE_ARCH_AMD64)) #define _mm_stream_si64(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) #define _mm_stream_si64x(mem_addr, a) simde_mm_stream_si64(SIMDE_CHECKED_REINTERPRET_CAST(int64_t*, __int64*, mem_addr), a) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sub_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sub_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vsubq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_sub(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i8 = a_.i8 - b_.i8; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i8) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = a_.i8[i] - b_.i8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sub_epi8(a, b) simde_mm_sub_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sub_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sub_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vsubq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_sub(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i16 = a_.i16 - b_.i16; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i16) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = a_.i16[i] - b_.i16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sub_epi16(a, b) simde_mm_sub_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sub_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sub_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vsubq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_sub(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32 = a_.i32 - b_.i32; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32) / sizeof(r_.i32[0])) ; i++) { r_.i32[i] = a_.i32[i] - b_.i32[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sub_epi32(a, b) simde_mm_sub_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_sub_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sub_epi64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vsubq_s64(a_.neon_i64, b_.neon_i64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i64x2_sub(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 - b_.i64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i64) / sizeof(r_.i64[0])) ; i++) { r_.i64[i] = a_.i64[i] - b_.i64[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sub_epi64(a, b) simde_mm_sub_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_sub_epu32 (simde__m128i a, simde__m128i b) { simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.u32 = a_.u32 - b_.u32; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u32 = vsubq_u32(a_.neon_u32, b_.neon_u32); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.u32) / sizeof(r_.u32[0])) ; i++) { r_.u32[i] = a_.u32[i] - b_.u32[i]; } #endif return simde__m128i_from_private(r_); } SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_sub_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sub_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.f64 = a_.f64 - b_.f64; #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vsubq_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_sub(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = a_.f64[i] - b_.f64[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sub_pd(a, b) simde_mm_sub_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_sub_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_sub_sd(a, b); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) return simde_mm_move_sd(a, simde_mm_sub_pd(a, b)); #elif (SIMDE_NATURAL_VECTOR_SIZE > 0) return simde_mm_move_sd(a, simde_mm_sub_pd(simde_x_mm_broadcastlow_pd(a), simde_x_mm_broadcastlow_pd(b))); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); r_.f64[0] = a_.f64[0] - b_.f64[0]; r_.f64[1] = a_.f64[1]; return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sub_sd(a, b) simde_mm_sub_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m64 simde_mm_sub_si64 (simde__m64 a, simde__m64 b) { #if defined(SIMDE_X86_SSE2_NATIVE) && defined(SIMDE_X86_MMX_NATIVE) return _mm_sub_si64(a, b); #else simde__m64_private r_, a_ = simde__m64_to_private(a), b_ = simde__m64_to_private(b); #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i64 = a_.i64 - b_.i64; #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i64 = vsub_s64(a_.neon_i64, b_.neon_i64); #else r_.i64[0] = a_.i64[0] - b_.i64[0]; #endif return simde__m64_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_sub_si64(a, b) simde_mm_sub_si64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_subs_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_subs_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i8 = vqsubq_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i8[0])) ; i++) { r_.i8[i] = simde_math_subs_i8(a_.i8[i], b_.i8[i]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_subs_epi8(a, b) simde_mm_subs_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_subs_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_subs_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i16 = vqsubq_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.i16[0])) ; i++) { r_.i16[i] = simde_math_subs_i16(a_.i16[i], b_.i16[i]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_subs_epi16(a, b) simde_mm_subs_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_subs_epu8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_subs_epu8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u8 = vqsubq_u8(a_.neon_u8, b_.neon_u8); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u8x16_sub_sat(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_u8 = vec_subs(a_.altivec_u8, b_.altivec_u8); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u8[0])) ; i++) { r_.u8[i] = simde_math_subs_u8(a_.u8[i], b_.u8[i]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_subs_epu8(a, b) simde_mm_subs_epu8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_subs_epu16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_subs_epu16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_u16 = vqsubq_u16(a_.neon_u16, b_.neon_u16); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_u16x8_sub_sat(a_.wasm_v128, b_.wasm_v128); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_u16 = vec_subs(a_.altivec_u16, b_.altivec_u16); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_) / sizeof(r_.u16[0])) ; i++) { r_.u16[i] = simde_math_subs_u16(a_.u16[i], b_.u16[i]); } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_subs_epu16(a, b) simde_mm_subs_epu16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomieq_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_ucomieq_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); uint64x2_t a_eq_b = vceqq_f64(a_.neon_f64, b_.neon_f64); r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_eq_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) == wasm_f64x2_extract_lane(b_.wasm_v128, 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f64[0] == b_.f64[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f64[0] == b_.f64[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_ucomieq_sd(a, b) simde_mm_ucomieq_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomige_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_ucomige_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); uint64x2_t a_ge_b = vcgeq_f64(a_.neon_f64, b_.neon_f64); r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_ge_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) >= wasm_f64x2_extract_lane(b_.wasm_v128, 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f64[0] >= b_.f64[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f64[0] >= b_.f64[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_ucomige_sd(a, b) simde_mm_ucomige_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomigt_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_ucomigt_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); uint64x2_t a_gt_b = vcgtq_f64(a_.neon_f64, b_.neon_f64); r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_gt_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) > wasm_f64x2_extract_lane(b_.wasm_v128, 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f64[0] > b_.f64[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f64[0] > b_.f64[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_ucomigt_sd(a, b) simde_mm_ucomigt_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomile_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_ucomile_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); uint64x2_t a_le_b = vcleq_f64(a_.neon_f64, b_.neon_f64); r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_le_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) <= wasm_f64x2_extract_lane(b_.wasm_v128, 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f64[0] <= b_.f64[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f64[0] <= b_.f64[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_ucomile_sd(a, b) simde_mm_ucomile_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomilt_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_ucomilt_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); uint64x2_t a_or_b_nan = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vandq_u64(a_not_nan, b_not_nan)))); uint64x2_t a_lt_b = vcltq_f64(a_.neon_f64, b_.neon_f64); r = !!(vgetq_lane_u64(vorrq_u64(a_or_b_nan, a_lt_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) < wasm_f64x2_extract_lane(b_.wasm_v128, 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f64[0] < b_.f64[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f64[0] < b_.f64[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_ucomilt_sd(a, b) simde_mm_ucomilt_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES int simde_mm_ucomineq_sd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_ucomineq_sd(a, b); #else simde__m128d_private a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); int r; #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) uint64x2_t a_not_nan = vceqq_f64(a_.neon_f64, a_.neon_f64); uint64x2_t b_not_nan = vceqq_f64(b_.neon_f64, b_.neon_f64); uint64x2_t a_and_b_not_nan = vandq_u64(a_not_nan, b_not_nan); uint64x2_t a_neq_b = vreinterpretq_u64_u32(vmvnq_u32(vreinterpretq_u32_u64(vceqq_f64(a_.neon_f64, b_.neon_f64)))); r = !!(vgetq_lane_u64(vandq_u64(a_and_b_not_nan, a_neq_b), 0) != 0); #elif defined(SIMDE_WASM_SIMD128_NATIVE) return wasm_f64x2_extract_lane(a_.wasm_v128, 0) != wasm_f64x2_extract_lane(b_.wasm_v128, 0); #elif defined(SIMDE_HAVE_FENV_H) fenv_t envp; int x = feholdexcept(&envp); r = a_.f64[0] != b_.f64[0]; if (HEDLEY_LIKELY(x == 0)) fesetenv(&envp); #else r = a_.f64[0] != b_.f64[0]; #endif return r; #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_ucomineq_sd(a, b) simde_mm_ucomineq_sd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_lfence (void) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_lfence(); #else simde_mm_sfence(); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_lfence() simde_mm_lfence() #endif SIMDE_FUNCTION_ATTRIBUTES void simde_mm_mfence (void) { #if defined(SIMDE_X86_SSE2_NATIVE) _mm_mfence(); #else simde_mm_sfence(); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_mfence() simde_mm_mfence() #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_unpackhi_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i8 = vzip2q_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int8x8_t a1 = vreinterpret_s8_s16(vget_high_s16(a_.neon_i16)); int8x8_t b1 = vreinterpret_s8_s16(vget_high_s16(b_.neon_i16)); int8x8x2_t result = vzip_s8(a1, b1); r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { r_.i8[(i * 2)] = a_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; r_.i8[(i * 2) + 1] = b_.i8[i + ((sizeof(r_) / sizeof(r_.i8[0])) / 2)]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_unpackhi_epi8(a, b) simde_mm_unpackhi_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_unpackhi_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i16 = vzip2q_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x4_t a1 = vget_high_s16(a_.neon_i16); int16x4_t b1 = vget_high_s16(b_.neon_i16); int16x4x2_t result = vzip_s16(a1, b1); r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 4, 12, 5, 13, 6, 14, 7, 15); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 4, 12, 5, 13, 6, 14, 7, 15); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { r_.i16[(i * 2)] = a_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; r_.i16[(i * 2) + 1] = b_.i16[i + ((sizeof(r_) / sizeof(r_.i16[0])) / 2)]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_unpackhi_epi16(a, b) simde_mm_unpackhi_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_unpackhi_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i32 = vzip2q_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int32x2_t a1 = vget_high_s32(a_.neon_i32); int32x2_t b1 = vget_high_s32(b_.neon_i32); int32x2x2_t result = vzip_s32(a1, b1); r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 2, 6, 3, 7); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 2, 6, 3, 7); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { r_.i32[(i * 2)] = a_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; r_.i32[(i * 2) + 1] = b_.i32[i + ((sizeof(r_) / sizeof(r_.i32[0])) / 2)]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_unpackhi_epi32(a, b) simde_mm_unpackhi_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_unpackhi_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_epi64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int64x1_t a_h = vget_high_s64(a_.neon_i64); int64x1_t b_h = vget_high_s64(b_.neon_i64); r_.neon_i64 = vcombine_s64(a_h, b_h); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 1, 3); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { r_.i64[(i * 2)] = a_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; r_.i64[(i * 2) + 1] = b_.i64[i + ((sizeof(r_) / sizeof(r_.i64[0])) / 2)]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_unpackhi_epi64(a, b) simde_mm_unpackhi_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_unpackhi_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpackhi_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vzip2q_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 1, 3); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 1, 3); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { r_.f64[(i * 2)] = a_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; r_.f64[(i * 2) + 1] = b_.f64[i + ((sizeof(r_) / sizeof(r_.f64[0])) / 2)]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_unpackhi_pd(a, b) simde_mm_unpackhi_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_unpacklo_epi8 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_epi8(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i8 = vzip1q_s8(a_.neon_i8, b_.neon_i8); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int8x8_t a1 = vreinterpret_s8_s16(vget_low_s16(a_.neon_i16)); int8x8_t b1 = vreinterpret_s8_s16(vget_low_s16(b_.neon_i16)); int8x8x2_t result = vzip_s8(a1, b1); r_.neon_i8 = vcombine_s8(result.val[0], result.val[1]); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i8x16_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i8 = SIMDE_SHUFFLE_VECTOR_(8, 16, a_.i8, b_.i8, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i8[0])) / 2) ; i++) { r_.i8[(i * 2)] = a_.i8[i]; r_.i8[(i * 2) + 1] = b_.i8[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_unpacklo_epi8(a, b) simde_mm_unpacklo_epi8(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_unpacklo_epi16 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_epi16(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i16 = vzip1q_s16(a_.neon_i16, b_.neon_i16); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int16x4_t a1 = vget_low_s16(a_.neon_i16); int16x4_t b1 = vget_low_s16(b_.neon_i16); int16x4x2_t result = vzip_s16(a1, b1); r_.neon_i16 = vcombine_s16(result.val[0], result.val[1]); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i16x8_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 8, 1, 9, 2, 10, 3, 11); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i16 = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.i16, b_.i16, 0, 8, 1, 9, 2, 10, 3, 11); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i16[0])) / 2) ; i++) { r_.i16[(i * 2)] = a_.i16[i]; r_.i16[(i * 2) + 1] = b_.i16[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_unpacklo_epi16(a, b) simde_mm_unpacklo_epi16(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_unpacklo_epi32 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_epi32(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_i32 = vzip1q_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) int32x2_t a1 = vget_low_s32(a_.neon_i32); int32x2_t b1 = vget_low_s32(b_.neon_i32); int32x2x2_t result = vzip_s32(a1, b1); r_.neon_i32 = vcombine_s32(result.val[0], result.val[1]); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i32x4_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 4, 1, 5); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i32 = SIMDE_SHUFFLE_VECTOR_(32, 16, a_.i32, b_.i32, 0, 4, 1, 5); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i32[0])) / 2) ; i++) { r_.i32[(i * 2)] = a_.i32[i]; r_.i32[(i * 2) + 1] = b_.i32[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_unpacklo_epi32(a, b) simde_mm_unpacklo_epi32(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_unpacklo_epi64 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_epi64(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) int64x1_t a_l = vget_low_s64(a_.neon_i64); int64x1_t b_l = vget_low_s64(b_.neon_i64); r_.neon_i64 = vcombine_s64(a_l, b_l); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.i64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.i64, b_.i64, 0, 2); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.i64[0])) / 2) ; i++) { r_.i64[(i * 2)] = a_.i64[i]; r_.i64[(i * 2) + 1] = b_.i64[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_unpacklo_epi64(a, b) simde_mm_unpacklo_epi64(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_mm_unpacklo_pd (simde__m128d a, simde__m128d b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_unpacklo_pd(a, b); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a), b_ = simde__m128d_to_private(b); #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vzip1q_f64(a_.neon_f64, b_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_i64x2_shuffle(a_.wasm_v128, b_.wasm_v128, 0, 2); #elif defined(SIMDE_SHUFFLE_VECTOR_) r_.f64 = SIMDE_SHUFFLE_VECTOR_(64, 16, a_.f64, b_.f64, 0, 2); #else SIMDE_VECTORIZE for (size_t i = 0 ; i < ((sizeof(r_) / sizeof(r_.f64[0])) / 2) ; i++) { r_.f64[(i * 2)] = a_.f64[i]; r_.f64[(i * 2) + 1] = b_.f64[i]; } #endif return simde__m128d_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_unpacklo_pd(a, b) simde_mm_unpacklo_pd(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128d simde_x_mm_negate_pd(simde__m128d a) { #if defined(SIMDE_X86_SSE2_NATIVE) return simde_mm_xor_pd(a, _mm_set1_pd(SIMDE_FLOAT64_C(-0.0))); #else simde__m128d_private r_, a_ = simde__m128d_to_private(a); #if defined(SIMDE_POWER_ALTIVEC_P8_NATIVE) && \ (!defined(HEDLEY_GCC_VERSION) || HEDLEY_GCC_VERSION_CHECK(8,1,0)) r_.altivec_f64 = vec_neg(a_.altivec_f64); #elif defined(SIMDE_ARM_NEON_A64V8_NATIVE) r_.neon_f64 = vnegq_f64(a_.neon_f64); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_f64x2_neg(a_.wasm_v128); #elif defined(SIMDE_VECTOR_NEGATE) r_.f64 = -a_.f64; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.f64) / sizeof(r_.f64[0])) ; i++) { r_.f64[i] = -a_.f64[i]; } #endif return simde__m128d_from_private(r_); #endif } SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_mm_xor_si128 (simde__m128i a, simde__m128i b) { #if defined(SIMDE_X86_SSE2_NATIVE) return _mm_xor_si128(a, b); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a), b_ = simde__m128i_to_private(b); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = veorq_s32(a_.neon_i32, b_.neon_i32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_xor(a_.altivec_i32, b_.altivec_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_xor(b_.wasm_v128, a_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = a_.i32f ^ b_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = a_.i32f[i] ^ b_.i32f[i]; } #endif return simde__m128i_from_private(r_); #endif } #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _mm_xor_si128(a, b) simde_mm_xor_si128(a, b) #endif SIMDE_FUNCTION_ATTRIBUTES simde__m128i simde_x_mm_not_si128 (simde__m128i a) { #if defined(SIMDE_X86_AVX512VL_NATIVE) return _mm_ternarylogic_epi32(a, a, a, 0x55); #else simde__m128i_private r_, a_ = simde__m128i_to_private(a); #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) r_.neon_i32 = vmvnq_s32(a_.neon_i32); #elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE) r_.altivec_i32 = vec_nor(a_.altivec_i32, a_.altivec_i32); #elif defined(SIMDE_WASM_SIMD128_NATIVE) r_.wasm_v128 = wasm_v128_not(a_.wasm_v128); #elif defined(SIMDE_VECTOR_SUBSCRIPT_OPS) r_.i32f = ~a_.i32f; #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.i32f) / sizeof(r_.i32f[0])) ; i++) { r_.i32f[i] = ~(a_.i32f[i]); } #endif return simde__m128i_from_private(r_); #endif } #define SIMDE_MM_SHUFFLE2(x, y) (((x) << 1) | (y)) #if defined(SIMDE_X86_SSE2_ENABLE_NATIVE_ALIASES) #define _MM_SHUFFLE2(x, y) SIMDE_MM_SHUFFLE2(x, y) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP #endif /* !defined(SIMDE_X86_SSE2_H) */ /* :: End x86/sse2.h :: */ scikit-bio-0.6.2/skbio/000077500000000000000000000000001464262511300146675ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/__init__.py000066400000000000000000000047471464262511300170140ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- # ruff: noqa: D104 # Add skbio.io to sys.modules to prevent cycles in our imports import skbio.io # noqa # imports included for convenience from skbio.sequence import Sequence, DNA, RNA, Protein, GeneticCode, SubstitutionMatrix from skbio.stats.distance import DistanceMatrix from skbio.alignment import local_pairwise_align_ssw, TabularMSA from skbio.tree import TreeNode, nj from skbio.embedding import ProteinEmbedding from skbio.io import read, write from skbio.stats.ordination import OrdinationResults from skbio.table import Table from skbio.metadata import SampleMetadata import skbio.diversity # noqa import skbio.stats.evolve # noqa __all__ = [ "Sequence", "DNA", "RNA", "Protein", "GeneticCode", "SubstitutionMatrix", "DistanceMatrix", "TabularMSA", "local_pairwise_align_ssw", "TreeNode", "nj", "read", "write", "OrdinationResults", "Table", "SampleMetadata", ] __credits__ = "https://github.com/scikit-bio/scikit-bio/graphs/contributors" __version__ = "0.6.2" mottos = [ # 03/15/2014 "It's gonna get weird, bro.", # 05/14/2014 "no cog yay", # 03/18/2015 "bincount!", ] motto = mottos[-1] # Created at patorjk.com title = r""" * * _ _ _ _ _ _ (_) | (_) | | | (_) ___ ___ _| | ___| |_ ______| |__ _ ___ / __|/ __| | |/ / | __|______| '_ \| |/ _ \ \__ \ (__| | <| | |_ | |_) | | (_) | |___/\___|_|_|\_\_|\__| |_.__/|_|\___/ * * """ # Created by @gregcaporaso art = r""" Opisthokonta \ Amoebozoa \ / * Euryarchaeota \ |_ Crenarchaeota \ * \ / * / / / * / \ / \ Proteobacteria \ Cyanobacteria """ __doc__ = title + art + (__doc__ or "") scikit-bio-0.6.2/skbio/_base.py000066400000000000000000000032301464262511300163100ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import abc class SkbioObject(metaclass=abc.ABCMeta): """Abstract base class defining core API common to all scikit-bio objects. Public scikit-bio classes should subclass this class to ensure a common, core API is present. All abstract methods and properties defined here must be implemented in subclasses, otherwise they will not be instantiable. """ @abc.abstractmethod def __str__(self): raise NotImplementedError class ElasticLines: """Store blocks of content separated by dashed lines. Each dashed line (separator) is as long as the longest content (non-separator) line. """ def __init__(self): self._lines = [] self._separator_idxs = [] self._max_line_len = -1 def add_line(self, line): line_len = len(line) if line_len > self._max_line_len: self._max_line_len = line_len self._lines.append(line) def add_lines(self, lines): for line in lines: self.add_line(line) def add_separator(self): self._lines.append(None) self._separator_idxs.append(len(self._lines) - 1) def to_str(self): separator = "-" * self._max_line_len for idx in self._separator_idxs: self._lines[idx] = separator return "\n".join(self._lines) scikit-bio-0.6.2/skbio/alignment/000077500000000000000000000000001464262511300166455ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/alignment/__init__.py000066400000000000000000000150621464262511300207620ustar00rootroot00000000000000r"""Sequence Alignments (:mod:`skbio.alignment`) ============================================ .. currentmodule:: skbio.alignment This module provides functionality for computing and manipulating sequence alignments. DNA, RNA, and protein sequences can be aligned, as well as sequences with custom alphabets. Alignment structure ------------------- .. autosummary:: :toctree: generated/ TabularMSA AlignPath PairAlignPath Alignment algorithms -------------------- .. rubric:: Optimized (i.e., production-ready) algorithms .. autosummary:: :toctree: generated/ StripedSmithWaterman AlignmentStructure local_pairwise_align_ssw .. rubric:: Slow (i.e., educational-purposes only) algorithms .. autosummary:: :toctree: generated/ global_pairwise_align_nucleotide global_pairwise_align_protein global_pairwise_align local_pairwise_align_nucleotide local_pairwise_align_protein local_pairwise_align Deprecated functionality ^^^^^^^^^^^^^^^^^^^^^^^^ .. autosummary:: :toctree: generated/ make_identity_substitution_matrix Tutorial -------- Alignment data structure ^^^^^^^^^^^^^^^^^^^^^^^^ Load two DNA sequences that have been previously aligned into a ``TabularMSA`` object, using sequence IDs as the MSA's index: >>> from skbio import TabularMSA, DNA >>> seqs = [DNA("ACC--G-GGTA..", metadata={'id': "seq1"}), ... DNA("TCC--G-GGCA..", metadata={'id': "seq2"})] >>> msa = TabularMSA(seqs, minter='id') >>> msa TabularMSA[DNA] ---------------------- Stats: sequence count: 2 position count: 13 ---------------------- ACC--G-GGTA.. TCC--G-GGCA.. >>> msa.index Index(['seq1', 'seq2'], dtype='object') Using the optimized alignment algorithm ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Using the convenient ``local_pairwise_align_ssw`` function: >>> from skbio.alignment import local_pairwise_align_ssw >>> alignment, score, start_end_positions = local_pairwise_align_ssw( ... DNA("ACTAAGGCTCTCTACCCCTCTCAGAGA"), ... DNA("ACTAAGGCTCCTAACCCCCTTTTCTCAGA") ... ) >>> alignment TabularMSA[DNA] ------------------------------ Stats: sequence count: 2 position count: 30 ------------------------------ ACTAAGGCTCTCT-ACCCC----TCTCAGA ACTAAGGCTC-CTAACCCCCTTTTCTCAGA >>> score 27 >>> start_end_positions [(0, 24), (0, 28)] Using the ``StripedSmithWaterman`` object: >>> from skbio.alignment import StripedSmithWaterman >>> query = StripedSmithWaterman("ACTAAGGCTCTCTACCCCTCTCAGAGA") >>> alignment = query("AAAAAACTCTCTAAACTCACTAAGGCTCTCTACCCCTCTTCAGAGAAGTCGA") >>> print(alignment) ACTAAGGCTC... ACTAAGGCTC... Score: 49 Length: 28 Using the ``StripedSmithWaterman`` object for multiple targets in an efficient way and finding the aligned sequence representations: >>> from skbio.alignment import StripedSmithWaterman >>> alignments = [] >>> target_sequences = [ ... "GCTAACTAGGCTCCCTTCTACCCCTCTCAGAGA", ... "GCCCAGTAGCTTCCCAATATGAGAGCATCAATTGTAGATCGGGCC", ... "TCTATAAGATTCCGCATGCGTTACTTATAAGATGTCTCAACGG", ... "TAGAGATTAATTGCCACTGCCAAAATTCTG" ... ] >>> query_sequence = "ACTAAGGCTCTCTACCCCTCTCAGAGA" >>> query = StripedSmithWaterman(query_sequence) >>> for target_sequence in target_sequences: ... alignment = query(target_sequence) ... alignments.append(alignment) ... >>> print(alignments[0]) ACTAAGGCTC... ACT-AGGCTC... Score: 38 Length: 30 >>> print(alignments[0].aligned_query_sequence) ACTAAGGCTC---TCTACCCCTCTCAGAGA >>> print(alignments[0].aligned_target_sequence) ACT-AGGCTCCCTTCTACCCCTCTCAGAGA Using the slow alignment algorithm ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ scikit-bio also provides pure-Python implementations of Smith-Waterman and Needleman-Wunsch alignment. These are much slower than the methods described above, but serve as useful educational examples as they're simpler to experiment with. Functions are provided for local and global alignment of protein and nucleotide sequences. The ``global*`` and ``local*`` functions differ in the underlying algorithm that is applied (``global*`` uses Needleman- Wunsch while ``local*`` uses Smith-Waterman), and ``*protein`` and ``*nucleotide`` differ in their default scoring of matches, mismatches, and gaps. Here we locally align a pair of protein sequences using gap open penalty of 11 and a gap extend penalty of 1 (in other words, it is much more costly to open a new gap than extend an existing one). >>> from skbio import Protein >>> from skbio.alignment import local_pairwise_align_protein >>> s1 = Protein("HEAGAWGHEE") >>> s2 = Protein("PAWHEAE") >>> alignment, score, start_end_positions = local_pairwise_align_protein( ... s1, s2, 11, 1) This returns an ``skbio.TabularMSA`` object, the alignment score, and start/end positions of each aligned sequence: >>> alignment TabularMSA[Protein] --------------------- Stats: sequence count: 2 position count: 5 --------------------- AWGHE AW-HE >>> score 25.0 >>> start_end_positions [(4, 8), (1, 4)] Similarly, we can perform global alignment of nucleotide sequences: >>> from skbio import DNA >>> from skbio.alignment import global_pairwise_align_nucleotide >>> s1 = DNA("GCGTGCCTAAGGTATGCAAG") >>> s2 = DNA("ACGTGCCTAGGTACGCAAG") >>> alignment, score, start_end_positions = global_pairwise_align_nucleotide( ... s1, s2) >>> alignment TabularMSA[DNA] ---------------------- Stats: sequence count: 2 position count: 20 ---------------------- GCGTGCCTAAGGTATGCAAG ACGTGCCTA-GGTACGCAAG """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from ._tabular_msa import TabularMSA from ._pairwise import ( local_pairwise_align_nucleotide, local_pairwise_align_protein, local_pairwise_align, global_pairwise_align_nucleotide, global_pairwise_align_protein, global_pairwise_align, make_identity_substitution_matrix, local_pairwise_align_ssw, ) from skbio.alignment._ssw_wrapper import StripedSmithWaterman, AlignmentStructure from skbio.alignment._path import AlignPath, PairAlignPath __all__ = [ "TabularMSA", "StripedSmithWaterman", "AlignmentStructure", "AlignPath", "PairAlignPath", "local_pairwise_align_ssw", "global_pairwise_align", "global_pairwise_align_nucleotide", "global_pairwise_align_protein", "local_pairwise_align", "local_pairwise_align_nucleotide", "local_pairwise_align_protein", "make_identity_substitution_matrix", ] scikit-bio-0.6.2/skbio/alignment/_indexing.py000066400000000000000000000210561464262511300211670ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from abc import ABCMeta, abstractmethod import collections.abc import numpy as np import pandas as pd class _Indexing(metaclass=ABCMeta): def __init__(self, instance, axis=None): self._obj = instance self._axis = axis def __call__(self, axis=None): """Set the axis to index on.""" # verify axis param, discard value self._obj._is_sequence_axis(axis) return self.__class__(self._obj, axis=axis) def __getitem__(self, indexable): if self._axis is not None: if self._obj._is_sequence_axis(self._axis): return self._slice_on_first_axis(self._obj, indexable) else: return self._slice_on_second_axis(self._obj, indexable) if type(indexable) is tuple: if len(indexable) > 2: raise ValueError( "Can only slice on two axes. Tuple is length:" " %r" % len(indexable) ) elif len(indexable) > 1: return self._handle_both_axes(*indexable) else: (indexable,) = indexable return self._slice_on_first_axis(self._obj, indexable) def _handle_both_axes(self, seq_index, pos_index): seq_index = self._convert_ellipsis(seq_index) pos_index = self._convert_ellipsis(pos_index) if not hasattr(seq_index, "__iter__") and seq_index == slice(None): # Only slice second axis return self._slice_on_second_axis(self._obj, pos_index) else: r = self._slice_on_first_axis(self._obj, seq_index) if type(r) is self._obj.dtype: # [1, 1] [1, *] return r[pos_index] else: # [*, 1] [*, *] return self._slice_on_second_axis(r, pos_index) def _slice_on_second_axis(self, obj, indexable): indexable = self._convert_ellipsis(indexable) if self.is_scalar(indexable, axis=1): # [..., 1] return self._get_position(obj, indexable) else: # [..., *] return self._slice_positions(obj, indexable) def _slice_on_first_axis(self, obj, indexable): indexable = self._convert_ellipsis(indexable) if self.is_scalar(indexable, axis=0): # [1] return self._get_sequence(obj, indexable) else: # [*] return self._slice_sequences(obj, indexable) def _convert_ellipsis(self, indexable): if indexable is Ellipsis: return slice(None) return indexable @abstractmethod def is_scalar(self, indexable, axis): raise NotImplementedError @abstractmethod def _get_sequence(self, obj, indexable): raise NotImplementedError @abstractmethod def _slice_sequences(self, obj, indexable): raise NotImplementedError def _get_position(self, obj, indexable): return obj._get_position_(indexable) def _slice_positions(self, obj, indexable): indexable = self._assert_bool_vector_right_size(indexable, axis=1) indexable = self._convert_iterable_of_slices(indexable) return obj._slice_positions_(indexable) def _convert_iterable_of_slices(self, indexable): # _assert_bool_vector_right_size will have converted the iterable to # an ndarray if it wasn't yet. if isinstance(indexable, np.ndarray) and indexable.dtype == object: indexable = np.r_[tuple(indexable)] return indexable def _assert_bool_vector_right_size(self, indexable, axis): if isinstance(indexable, np.ndarray): pass elif hasattr(indexable, "__iter__"): indexable = np.asarray(list(indexable)) else: return indexable if indexable.dtype == bool and len(indexable) != self._obj.shape[axis]: raise IndexError( "Boolean index's length (%r) does not match the" " axis length (%r)" % (len(indexable), self._obj.shape[axis]) ) return indexable class TabularMSAILoc(_Indexing): def is_scalar(self, indexable, axis): return np.isscalar(indexable) def _get_sequence(self, obj, indexable): return obj._get_sequence_iloc_(indexable) def _slice_sequences(self, obj, indexable): indexable = self._assert_bool_vector_right_size(indexable, axis=0) indexable = self._convert_iterable_of_slices(indexable) return obj._slice_sequences_iloc_(indexable) class TabularMSALoc(_Indexing): def is_scalar(self, indexable, axis): """Check if an indexable object is scalar or not. Sometimes (MultiIndex!) something that looks like a scalar, isn't and vice-versa. Consider: A 0 1 2 B 0 1 2 'A' looks like a scalar, but isn't. ('A', 0) doesn't look like a scalar, but it is. """ index = self._obj.index complete_key = False partial_key = False duplicated_key = False if not isinstance(indexable, collections.abc.Hashable): return False if axis == 0 and self._has_fancy_index(): try: if type(indexable) is tuple: complete_key = ( len(indexable) == len(index.levshape) and indexable in index ) partial_key = not complete_key and indexable in index except ( TypeError, pd.errors.InvalidIndexError, ): # Unhashable type, no biggie pass if index.has_duplicates: # for a given Index object index, # index[index.duplicated()].unique() is pandas' recommended # replacement for index.get_duplicates(), per the pandas 0.23 docs duplicated_key = indexable in index[index.duplicated()].unique() return not duplicated_key and ( (np.isscalar(indexable) and not partial_key) or complete_key ) def _get_sequence(self, obj, indexable): self._assert_tuple_rules(indexable) return obj._get_sequence_loc_(indexable) def _slice_sequences(self, obj, indexable): self._assert_tuple_rules(indexable) if ( self._has_fancy_index() and type(indexable) is not tuple and pd.api.types.is_list_like(indexable) and len(indexable) > 0 ): if not self.is_scalar(indexable[0], axis=0): raise TypeError( "A list is used with complete labels, try" " using a tuple to indicate independent" " selections of a `pd.MultiIndex`." ) # prevents # pd.Series.loc[['x', 'b', 'b', 'a']] from being interepereted as # pd.Series.loc[[('a', 0), ('b', 1)]] who knows why it does this. elif indexable[0] not in self._obj.index: raise KeyError(repr(indexable[0])) # pandas acts normal if the first element is actually a scalar self._assert_bool_vector_right_size(indexable, axis=0) return obj._slice_sequences_loc_(indexable) def _assert_tuple_rules(self, indexable): # pandas is scary in what it will accept sometimes... if type(indexable) is tuple: if not self._has_fancy_index(): # prevents unfriendly errors raise TypeError( "Cannot provide a tuple to the first axis of" " `loc` unless the MSA's `index` is a" " `pd.MultiIndex`." ) elif self.is_scalar(indexable[0], axis=0): # prevents unreasonable results # pd.Series.loc[('a', 0), ('b', 1)] would be interpreted as # pd.Series.loc[('a', 1)] which is horrifying. raise TypeError( "A tuple provided to the first axis of `loc`" " represents a selection for each index of a" " `pd.MultiIndex`; it should not contain a" " complete label." ) def _has_fancy_index(self): return hasattr(self._obj.index, "levshape") scikit-bio-0.6.2/skbio/alignment/_lib/000077500000000000000000000000001464262511300175525ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/alignment/_lib/__init__.py000066400000000000000000000005411464262511300216630ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- scikit-bio-0.6.2/skbio/alignment/_lib/ssw.c000066400000000000000000000764061464262511300205470ustar00rootroot00000000000000/* The MIT License Copyright (c) 2012-1015 Boston College. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ /* Contact: Mengyao Zhao */ /* * ssw.c * * Created by Mengyao Zhao on 6/22/10. * Copyright 2010 Boston College. All rights reserved. * Version 0.1.4 * Last revision by Mengyao Zhao on 12/07/12. * */ #define SIMDE_ENABLE_NATIVE_ALIASES #include "simde-sse2.h" #include #include #include #include #include #include "ssw.h" #ifdef __GNUC__ #define LIKELY(x) __builtin_expect((x),1) #define UNLIKELY(x) __builtin_expect((x),0) #else #define LIKELY(x) (x) #define UNLIKELY(x) (x) #endif /* Convert the coordinate in the scoring matrix into the coordinate in one line of the band. */ #define set_u(u, w, i, j) { int x=(i)-(w); x=x>0?x:0; (u)=(j)-x+1; } /* Convert the coordinate in the direction matrix into the coordinate in one line of the band. */ #define set_d(u, w, i, j, p) { int x=(i)-(w); x=x>0?x:0; x=(j)-x; (u)=x*3+p; } /*! @function @abstract Round an integer to the next closest power-2 integer. @param x integer to be rounded (in place) @discussion x will be modified. */ #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) typedef struct { uint16_t score; int32_t ref; //0-based position int32_t read; //alignment ending position on read, 0-based } alignment_end; typedef struct { uint32_t* seq; int32_t length; } cigar; struct _profile{ __m128i* profile_byte; // 0: none __m128i* profile_word; // 0: none const int8_t* read; const int8_t* mat; int32_t readLen; int32_t n; uint8_t bias; }; /* Generate query profile rearrange query sequence & calculate the weight of match/mismatch. */ __m128i* qP_byte (const int8_t* read_num, const int8_t* mat, const int32_t readLen, const int32_t n, /* the edge length of the squre matrix mat */ uint8_t bias) { int32_t segLen = (readLen + 15) / 16; /* Split the 128 bit register into 16 pieces. Each piece is 8 bit. Split the read into 16 segments. Calculat 16 segments in parallel. */ __m128i* vProfile = (__m128i*)malloc(n * segLen * sizeof(__m128i)); int8_t* t = (int8_t*)vProfile; int32_t nt, i, j, segNum; /* Generate query profile rearrange query sequence & calculate the weight of match/mismatch */ for (nt = 0; LIKELY(nt < n); nt ++) { for (i = 0; i < segLen; i ++) { j = i; for (segNum = 0; LIKELY(segNum < 16) ; segNum ++) { *t++ = j>= readLen ? bias : mat[nt * n + read_num[j]] + bias; j += segLen; } } } return vProfile; } /* Striped Smith-Waterman Record the highest score of each reference position. Return the alignment score and ending position of the best alignment, 2nd best alignment, etc. Gap begin and gap extension are different. wight_match > 0, all other weights < 0. The returned positions are 0-based. */ alignment_end* sw_sse2_byte (const int8_t* ref, int8_t ref_dir, // 0: forward ref; 1: reverse ref int32_t refLen, int32_t readLen, const uint8_t weight_gapO, /* will be used as - */ const uint8_t weight_gapE, /* will be used as - */ __m128i* vProfile, uint8_t terminate, /* the best alignment score: used to terminate the matrix calculation when locating the alignment beginning point. If this score is set to 0, it will not be used */ uint8_t bias, /* Shift 0 point to a positive value. */ int32_t maskLen) { #define max16(m, vm) (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 8)); \ (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 4)); \ (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 2)); \ (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 1)); \ (m) = _mm_extract_epi16((vm), 0) uint8_t max = 0; /* the max alignment score */ int32_t end_read = readLen - 1; int32_t end_ref = -1; /* 0_based best alignment ending point; Initialized as isn't aligned -1. */ int32_t segLen = (readLen + 15) / 16; /* number of segment */ /* array to record the largest score of each reference position */ uint8_t* maxColumn = (uint8_t*) calloc(refLen, 1); /* array to record the alignment read ending position of the largest score of each reference position */ int32_t* end_read_column = (int32_t*) calloc(refLen, sizeof(int32_t)); /* Define 16 byte 0 vector. */ __m128i vZero = _mm_set1_epi32(0); __m128i* pvHStore = (__m128i*) calloc(segLen, sizeof(__m128i)); __m128i* pvHLoad = (__m128i*) calloc(segLen, sizeof(__m128i)); __m128i* pvE = (__m128i*) calloc(segLen, sizeof(__m128i)); __m128i* pvHmax = (__m128i*) calloc(segLen, sizeof(__m128i)); int32_t i, j; /* 16 byte insertion begin vector */ __m128i vGapO = _mm_set1_epi8(weight_gapO); /* 16 byte insertion extension vector */ __m128i vGapE = _mm_set1_epi8(weight_gapE); /* 16 byte bias vector */ __m128i vBias = _mm_set1_epi8(bias); __m128i vMaxScore = vZero; /* Trace the highest score of the whole SW matrix. */ __m128i vMaxMark = vZero; /* Trace the highest score till the previous column. */ __m128i vTemp; int32_t edge, begin = 0, end = refLen, step = 1; // int32_t distance = readLen * 2 / 3; // int32_t distance = readLen / 2; // int32_t distance = readLen; /* outer loop to process the reference sequence */ if (ref_dir == 1) { begin = refLen - 1; end = -1; step = -1; } for (i = begin; LIKELY(i != end); i += step) { int32_t cmp; __m128i e = vZero, vF = vZero, vMaxColumn = vZero; /* Initialize F value to 0. Any errors to vH values will be corrected in the Lazy_F loop. */ // max16(maxColumn[i], vMaxColumn); // fprintf(stderr, "middle[%d]: %d\n", i, maxColumn[i]); __m128i vH = pvHStore[segLen - 1]; vH = _mm_slli_si128 (vH, 1); /* Shift the 128-bit value in vH left by 1 byte. */ __m128i* vP = vProfile + ref[i] * segLen; /* Right part of the vProfile */ /* Swap the 2 H buffers. */ __m128i* pv = pvHLoad; pvHLoad = pvHStore; pvHStore = pv; /* inner loop to process the query sequence */ for (j = 0; LIKELY(j < segLen); ++j) { vH = _mm_adds_epu8(vH, _mm_load_si128(vP + j)); vH = _mm_subs_epu8(vH, vBias); /* vH will be always > 0 */ // max16(maxColumn[i], vH); // fprintf(stderr, "H[%d]: %d\n", i, maxColumn[i]); // int8_t* t; // int32_t ti; //for (t = (int8_t*)&vH, ti = 0; ti < 16; ++ti) fprintf(stderr, "%d\t", *t++); /* Get max from vH, vE and vF. */ e = _mm_load_si128(pvE + j); vH = _mm_max_epu8(vH, e); vH = _mm_max_epu8(vH, vF); vMaxColumn = _mm_max_epu8(vMaxColumn, vH); // max16(maxColumn[i], vMaxColumn); // fprintf(stderr, "middle[%d]: %d\n", i, maxColumn[i]); // for (t = (int8_t*)&vMaxColumn, ti = 0; ti < 16; ++ti) fprintf(stderr, "%d\t", *t++); /* Save vH values. */ _mm_store_si128(pvHStore + j, vH); /* Update vE value. */ vH = _mm_subs_epu8(vH, vGapO); /* saturation arithmetic, result >= 0 */ e = _mm_subs_epu8(e, vGapE); e = _mm_max_epu8(e, vH); _mm_store_si128(pvE + j, e); /* Update vF value. */ vF = _mm_subs_epu8(vF, vGapE); vF = _mm_max_epu8(vF, vH); /* Load the next vH. */ vH = _mm_load_si128(pvHLoad + j); } /* Lazy_F loop: has been revised to disallow adjecent insertion and then deletion, so don't update E(i, j), learn from SWPS3 */ /* reset pointers to the start of the saved data */ j = 0; vH = _mm_load_si128 (pvHStore + j); /* the computed vF value is for the given column. since */ /* we are at the end, we need to shift the vF value over */ /* to the next column. */ vF = _mm_slli_si128 (vF, 1); vTemp = _mm_subs_epu8 (vH, vGapO); vTemp = _mm_subs_epu8 (vF, vTemp); vTemp = _mm_cmpeq_epi8 (vTemp, vZero); cmp = _mm_movemask_epi8 (vTemp); while (cmp != 0xffff) { vH = _mm_max_epu8 (vH, vF); vMaxColumn = _mm_max_epu8(vMaxColumn, vH); _mm_store_si128 (pvHStore + j, vH); vF = _mm_subs_epu8 (vF, vGapE); j++; if (j >= segLen) { j = 0; vF = _mm_slli_si128 (vF, 1); } vH = _mm_load_si128 (pvHStore + j); vTemp = _mm_subs_epu8 (vH, vGapO); vTemp = _mm_subs_epu8 (vF, vTemp); vTemp = _mm_cmpeq_epi8 (vTemp, vZero); cmp = _mm_movemask_epi8 (vTemp); } vMaxScore = _mm_max_epu8(vMaxScore, vMaxColumn); vTemp = _mm_cmpeq_epi8(vMaxMark, vMaxScore); cmp = _mm_movemask_epi8(vTemp); if (cmp != 0xffff) { uint8_t temp; vMaxMark = vMaxScore; max16(temp, vMaxScore); vMaxScore = vMaxMark; if (LIKELY(temp > max)) { max = temp; if (max + bias >= 255) break; //overflow end_ref = i; /* Store the column with the highest alignment score in order to trace the alignment ending position on read. */ for (j = 0; LIKELY(j < segLen); ++j) pvHmax[j] = pvHStore[j]; } } /* Record the max score of current column. */ max16(maxColumn[i], vMaxColumn); // fprintf(stderr, "maxColumn[%d]: %d\n", i, maxColumn[i]); if (maxColumn[i] == terminate) break; } /* Trace the alignment ending position on read. */ uint8_t *t = (uint8_t*)pvHmax; int32_t column_len = segLen * 16; for (i = 0; LIKELY(i < column_len); ++i, ++t) { int32_t temp; if (*t == max) { temp = i / 16 + i % 16 * segLen; if (temp < end_read) end_read = temp; } } free(pvHmax); free(pvE); free(pvHLoad); free(pvHStore); /* Find the most possible 2nd best alignment. */ alignment_end* bests = (alignment_end*) calloc(2, sizeof(alignment_end)); bests[0].score = max + bias >= 255 ? 255 : max; bests[0].ref = end_ref; bests[0].read = end_read; bests[1].score = 0; bests[1].ref = 0; bests[1].read = 0; edge = (end_ref - maskLen) > 0 ? (end_ref - maskLen) : 0; for (i = 0; i < edge; i ++) { // fprintf (stderr, "maxColumn[%d]: %d\n", i, maxColumn[i]); if (maxColumn[i] > bests[1].score) { bests[1].score = maxColumn[i]; bests[1].ref = i; } } edge = (end_ref + maskLen) > refLen ? refLen : (end_ref + maskLen); for (i = edge + 1; i < refLen; i ++) { // fprintf (stderr, "refLen: %d\tmaxColumn[%d]: %d\n", refLen, i, maxColumn[i]); if (maxColumn[i] > bests[1].score) { bests[1].score = maxColumn[i]; bests[1].ref = i; } } free(maxColumn); free(end_read_column); return bests; } __m128i* qP_word (const int8_t* read_num, const int8_t* mat, const int32_t readLen, const int32_t n) { int32_t segLen = (readLen + 7) / 8; __m128i* vProfile = (__m128i*)malloc(n * segLen * sizeof(__m128i)); int16_t* t = (int16_t*)vProfile; int32_t nt, i, j; int32_t segNum; /* Generate query profile rearrange query sequence & calculate the weight of match/mismatch */ for (nt = 0; LIKELY(nt < n); nt ++) { for (i = 0; i < segLen; i ++) { j = i; for (segNum = 0; LIKELY(segNum < 8) ; segNum ++) { *t++ = j>= readLen ? 0 : mat[nt * n + read_num[j]]; j += segLen; } } } return vProfile; } alignment_end* sw_sse2_word (const int8_t* ref, int8_t ref_dir, // 0: forward ref; 1: reverse ref int32_t refLen, int32_t readLen, const uint8_t weight_gapO, /* will be used as - */ const uint8_t weight_gapE, /* will be used as - */ __m128i* vProfile, uint16_t terminate, int32_t maskLen) { #define max8(m, vm) (vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 8)); \ (vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 4)); \ (vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 2)); \ (m) = _mm_extract_epi16((vm), 0) uint16_t max = 0; /* the max alignment score */ int32_t end_read = readLen - 1; int32_t end_ref = 0; /* 1_based best alignment ending point; Initialized as isn't aligned - 0. */ int32_t segLen = (readLen + 7) / 8; /* number of segment */ /* array to record the largest score of each reference position */ uint16_t* maxColumn = (uint16_t*) calloc(refLen, 2); /* array to record the alignment read ending position of the largest score of each reference position */ int32_t* end_read_column = (int32_t*) calloc(refLen, sizeof(int32_t)); /* Define 16 byte 0 vector. */ __m128i vZero = _mm_set1_epi32(0); __m128i* pvHStore = (__m128i*) calloc(segLen, sizeof(__m128i)); __m128i* pvHLoad = (__m128i*) calloc(segLen, sizeof(__m128i)); __m128i* pvE = (__m128i*) calloc(segLen, sizeof(__m128i)); __m128i* pvHmax = (__m128i*) calloc(segLen, sizeof(__m128i)); int32_t i, j, k; /* 16 byte insertion begin vector */ __m128i vGapO = _mm_set1_epi16(weight_gapO); /* 16 byte insertion extension vector */ __m128i vGapE = _mm_set1_epi16(weight_gapE); /* 16 byte bias vector */ __m128i vMaxScore = vZero; /* Trace the highest score of the whole SW matrix. */ __m128i vMaxMark = vZero; /* Trace the highest score till the previous column. */ __m128i vTemp; int32_t edge, begin = 0, end = refLen, step = 1; /* outer loop to process the reference sequence */ if (ref_dir == 1) { begin = refLen - 1; end = -1; step = -1; } for (i = begin; LIKELY(i != end); i += step) { int32_t cmp; __m128i e = vZero, vF = vZero; /* Initialize F value to 0. Any errors to vH values will be corrected in the Lazy_F loop. */ __m128i vH = pvHStore[segLen - 1]; vH = _mm_slli_si128 (vH, 2); /* Shift the 128-bit value in vH left by 2 byte. */ /* Swap the 2 H buffers. */ __m128i* pv = pvHLoad; __m128i vMaxColumn = vZero; /* vMaxColumn is used to record the max values of column i. */ __m128i* vP = vProfile + ref[i] * segLen; /* Right part of the vProfile */ pvHLoad = pvHStore; pvHStore = pv; /* inner loop to process the query sequence */ for (j = 0; LIKELY(j < segLen); j ++) { vH = _mm_adds_epi16(vH, _mm_load_si128(vP + j)); /* Get max from vH, vE and vF. */ e = _mm_load_si128(pvE + j); vH = _mm_max_epi16(vH, e); vH = _mm_max_epi16(vH, vF); vMaxColumn = _mm_max_epi16(vMaxColumn, vH); /* Save vH values. */ _mm_store_si128(pvHStore + j, vH); /* Update vE value. */ vH = _mm_subs_epu16(vH, vGapO); /* saturation arithmetic, result >= 0 */ e = _mm_subs_epu16(e, vGapE); e = _mm_max_epi16(e, vH); _mm_store_si128(pvE + j, e); /* Update vF value. */ vF = _mm_subs_epu16(vF, vGapE); vF = _mm_max_epi16(vF, vH); /* Load the next vH. */ vH = _mm_load_si128(pvHLoad + j); } /* Lazy_F loop: has been revised to disallow adjecent insertion and then deletion, so don't update E(i, j), learn from SWPS3 */ for (k = 0; LIKELY(k < 8); ++k) { vF = _mm_slli_si128 (vF, 2); for (j = 0; LIKELY(j < segLen); ++j) { vH = _mm_load_si128(pvHStore + j); vH = _mm_max_epi16(vH, vF); _mm_store_si128(pvHStore + j, vH); vH = _mm_subs_epu16(vH, vGapO); vF = _mm_subs_epu16(vF, vGapE); if (UNLIKELY(! _mm_movemask_epi8(_mm_cmpgt_epi16(vF, vH)))) goto end; } } end: vMaxScore = _mm_max_epi16(vMaxScore, vMaxColumn); vTemp = _mm_cmpeq_epi16(vMaxMark, vMaxScore); cmp = _mm_movemask_epi8(vTemp); if (cmp != 0xffff) { uint16_t temp; vMaxMark = vMaxScore; max8(temp, vMaxScore); vMaxScore = vMaxMark; if (LIKELY(temp > max)) { max = temp; end_ref = i; for (j = 0; LIKELY(j < segLen); ++j) pvHmax[j] = pvHStore[j]; } } /* Record the max score of current column. */ max8(maxColumn[i], vMaxColumn); if (maxColumn[i] == terminate) break; } /* Trace the alignment ending position on read. */ uint16_t *t = (uint16_t*)pvHmax; int32_t column_len = segLen * 8; for (i = 0; LIKELY(i < column_len); ++i, ++t) { int32_t temp; if (*t == max) { temp = i / 8 + i % 8 * segLen; if (temp < end_read) end_read = temp; } } free(pvHmax); free(pvE); free(pvHLoad); free(pvHStore); /* Find the most possible 2nd best alignment. */ alignment_end* bests = (alignment_end*) calloc(2, sizeof(alignment_end)); bests[0].score = max; bests[0].ref = end_ref; bests[0].read = end_read; bests[1].score = 0; bests[1].ref = 0; bests[1].read = 0; edge = (end_ref - maskLen) > 0 ? (end_ref - maskLen) : 0; for (i = 0; i < edge; i ++) { if (maxColumn[i] > bests[1].score) { bests[1].score = maxColumn[i]; bests[1].ref = i; } } edge = (end_ref + maskLen) > refLen ? refLen : (end_ref + maskLen); for (i = edge; i < refLen; i ++) { if (maxColumn[i] > bests[1].score) { bests[1].score = maxColumn[i]; bests[1].ref = i; } } free(maxColumn); free(end_read_column); return bests; } cigar* banded_sw (const int8_t* ref, const int8_t* read, int32_t refLen, int32_t readLen, int32_t score, const uint32_t weight_gapO, /* will be used as - */ const uint32_t weight_gapE, /* will be used as - */ int32_t band_width, const int8_t* mat, /* pointer to the weight matrix */ int32_t n) { uint32_t *c = (uint32_t*)malloc(16 * sizeof(uint32_t)), *c1; int32_t i, j, e, f, temp1, temp2, s = 16, s1 = 8, s2 = 1024, l, max = 0; int32_t width, width_d, *h_b, *e_b, *h_c; int8_t *direction, *direction_line; cigar* result = (cigar*)malloc(sizeof(cigar)); h_b = (int32_t*)malloc(s1 * sizeof(int32_t)); e_b = (int32_t*)malloc(s1 * sizeof(int32_t)); h_c = (int32_t*)malloc(s1 * sizeof(int32_t)); direction = (int8_t*)malloc(s2 * sizeof(int8_t)); do { width = band_width * 2 + 3, width_d = band_width * 2 + 1; while (width >= s1) { ++s1; kroundup32(s1); h_b = (int32_t*)realloc(h_b, s1 * sizeof(int32_t)); e_b = (int32_t*)realloc(e_b, s1 * sizeof(int32_t)); h_c = (int32_t*)realloc(h_c, s1 * sizeof(int32_t)); } while (width_d * readLen * 3 >= s2) { ++s2; kroundup32(s2); if (s2 < 0) { fprintf(stderr, "Alignment score and position are not consensus.\n"); exit(1); } direction = (int8_t*)realloc(direction, s2 * sizeof(int8_t)); } direction_line = direction; for (j = 1; LIKELY(j < width - 1); j ++) h_b[j] = 0; for (i = 0; LIKELY(i < readLen); i ++) { int32_t beg = 0, end = refLen - 1, u = 0, edge; j = i - band_width; beg = beg > j ? beg : j; // band start j = i + band_width; end = end < j ? end : j; // band end edge = end + 1 < width - 1 ? end + 1 : width - 1; f = h_b[0] = e_b[0] = h_b[edge] = e_b[edge] = h_c[0] = 0; direction_line = direction + width_d * i * 3; for (j = beg; LIKELY(j <= end); j ++) { int32_t b, e1, f1, d, de, df, dh; set_u(u, band_width, i, j); set_u(e, band_width, i - 1, j); set_u(b, band_width, i, j - 1); set_u(d, band_width, i - 1, j - 1); set_d(de, band_width, i, j, 0); set_d(df, band_width, i, j, 1); set_d(dh, band_width, i, j, 2); temp1 = i == 0 ? -weight_gapO : h_b[e] - weight_gapO; temp2 = i == 0 ? -weight_gapE : e_b[e] - weight_gapE; e_b[u] = temp1 > temp2 ? temp1 : temp2; direction_line[de] = temp1 > temp2 ? 3 : 2; temp1 = h_c[b] - weight_gapO; temp2 = f - weight_gapE; f = temp1 > temp2 ? temp1 : temp2; direction_line[df] = temp1 > temp2 ? 5 : 4; e1 = e_b[u] > 0 ? e_b[u] : 0; f1 = f > 0 ? f : 0; temp1 = e1 > f1 ? e1 : f1; temp2 = h_b[d] + mat[ref[j] * n + read[i]]; h_c[u] = temp1 > temp2 ? temp1 : temp2; if (h_c[u] > max) max = h_c[u]; if (temp1 <= temp2) direction_line[dh] = 1; else direction_line[dh] = e1 > f1 ? direction_line[de] : direction_line[df]; } for (j = 1; j <= u; j ++) h_b[j] = h_c[j]; } band_width *= 2; } while (LIKELY(max < score)); band_width /= 2; // trace back i = readLen - 1; j = refLen - 1; e = 0; // Count the number of M, D or I. l = 0; // record length of current cigar f = max = 0; // M temp2 = 2; // h while (LIKELY(i > 0)) { set_d(temp1, band_width, i, j, temp2); switch (direction_line[temp1]) { case 1: --i; --j; temp2 = 2; direction_line -= width_d * 3; f = 0; // M break; case 2: --i; temp2 = 0; // e direction_line -= width_d * 3; f = 1; // I break; case 3: --i; temp2 = 2; direction_line -= width_d * 3; f = 1; // I break; case 4: --j; temp2 = 1; f = 2; // D break; case 5: --j; temp2 = 2; f = 2; // D break; default: fprintf(stderr, "Trace back error: %d.\n", direction_line[temp1 - 1]); return 0; } if (f == max) ++e; else { ++l; while (l >= s) { ++s; kroundup32(s); c = (uint32_t*)realloc(c, s * sizeof(uint32_t)); } c[l - 1] = e<<4|max; max = f; e = 1; } } if (f == 0) { ++l; while (l >= s) { ++s; kroundup32(s); c = (uint32_t*)realloc(c, s * sizeof(uint32_t)); } c[l - 1] = (e+1)<<4; }else { l += 2; while (l >= s) { ++s; kroundup32(s); c = (uint32_t*)realloc(c, s * sizeof(uint32_t)); } c[l - 2] = e<<4|f; c[l - 1] = 16; // 1M } // reverse cigar c1 = (uint32_t*)malloc(l * sizeof(uint32_t)); s = 0; e = l - 1; while (LIKELY(s <= e)) { c1[s] = c[e]; c1[e] = c[s]; ++ s; -- e; } result->seq = c1; result->length = l; free(direction); free(h_c); free(e_b); free(h_b); free(c); return result; } int8_t* seq_reverse(const int8_t* seq, int32_t end) /* end is 0-based alignment ending position */ { int8_t* reverse = (int8_t*)calloc(end + 1, sizeof(int8_t)); int32_t start = 0; while (LIKELY(start <= end)) { reverse[start] = seq[end]; reverse[end] = seq[start]; ++ start; -- end; } return reverse; } s_profile* ssw_init (const int8_t* read, const int32_t readLen, const int8_t* mat, const int32_t n, const int8_t score_size) { s_profile* p = (s_profile*)calloc(1, sizeof(struct _profile)); p->profile_byte = 0; p->profile_word = 0; p->bias = 0; if (score_size == 0 || score_size == 2) { /* Find the bias to use in the substitution matrix */ int32_t bias = 0, i; for (i = 0; i < n*n; i++) if (mat[i] < bias) bias = mat[i]; bias = abs(bias); p->bias = bias; p->profile_byte = qP_byte (read, mat, readLen, n, bias); } if (score_size == 1 || score_size == 2) p->profile_word = qP_word (read, mat, readLen, n); p->read = read; p->mat = mat; p->readLen = readLen; p->n = n; return p; } void init_destroy (s_profile* p) { free(p->profile_byte); free(p->profile_word); free(p); } s_align* ssw_align (const s_profile* prof, const int8_t* ref, int32_t refLen, const uint8_t weight_gapO, const uint8_t weight_gapE, const uint8_t flag, // (from high to low) bit 5: return the best alignment beginning position; 6: if (ref_end1 - ref_begin1 <= filterd) && (read_end1 - read_begin1 <= filterd), return cigar; 7: if max score >= filters, return cigar; 8: always return cigar; if 6 & 7 are both setted, only return cigar when both filter fulfilled const uint16_t filters, const int32_t filterd, const int32_t maskLen) { alignment_end* bests = 0, *bests_reverse = 0; __m128i* vP = 0; int32_t word = 0, band_width = 0, readLen = prof->readLen; int8_t* read_reverse = 0; cigar* path; s_align* r = (s_align*)calloc(1, sizeof(s_align)); r->ref_begin1 = -1; r->read_begin1 = -1; r->cigar = 0; r->cigarLen = 0; if (maskLen < 15) { fprintf(stderr, "When maskLen < 15, the function ssw_align doesn't return 2nd best alignment information.\n"); } // Find the alignment scores and ending positions if (prof->profile_byte) { bests = sw_sse2_byte(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_byte, -1, prof->bias, maskLen); if (prof->profile_word && bests[0].score == 255) { free(bests); bests = sw_sse2_word(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_word, -1, maskLen); word = 1; } else if (bests[0].score == 255) { fprintf(stderr, "Please set 2 to the score_size parameter of the function ssw_init, otherwise the alignment results will be incorrect.\n"); return 0; } }else if (prof->profile_word) { bests = sw_sse2_word(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_word, -1, maskLen); word = 1; }else { fprintf(stderr, "Please call the function ssw_init before ssw_align.\n"); return 0; } r->score1 = bests[0].score; r->ref_end1 = bests[0].ref; r->read_end1 = bests[0].read; if (maskLen >= 15) { r->score2 = bests[1].score; r->ref_end2 = bests[1].ref; } else { r->score2 = 0; r->ref_end2 = -1; } free(bests); if (flag == 0 || (flag == 2 && r->score1 < filters)) goto end; // Find the beginning position of the best alignment. read_reverse = seq_reverse(prof->read, r->read_end1); if (word == 0) { vP = qP_byte(read_reverse, prof->mat, r->read_end1 + 1, prof->n, prof->bias); bests_reverse = sw_sse2_byte(ref, 1, r->ref_end1 + 1, r->read_end1 + 1, weight_gapO, weight_gapE, vP, r->score1, prof->bias, maskLen); } else { vP = qP_word(read_reverse, prof->mat, r->read_end1 + 1, prof->n); bests_reverse = sw_sse2_word(ref, 1, r->ref_end1 + 1, r->read_end1 + 1, weight_gapO, weight_gapE, vP, r->score1, maskLen); } free(vP); free(read_reverse); r->ref_begin1 = bests_reverse[0].ref; r->read_begin1 = r->read_end1 - bests_reverse[0].read; free(bests_reverse); if ((7&flag) == 0 || ((2&flag) != 0 && r->score1 < filters) || ((4&flag) != 0 && (r->ref_end1 - r->ref_begin1 > filterd || r->read_end1 - r->read_begin1 > filterd))) goto end; // Generate cigar. refLen = r->ref_end1 - r->ref_begin1 + 1; readLen = r->read_end1 - r->read_begin1 + 1; band_width = abs(refLen - readLen) + 1; path = banded_sw(ref + r->ref_begin1, prof->read + r->read_begin1, refLen, readLen, r->score1, weight_gapO, weight_gapE, band_width, prof->mat, prof->n); if (path == 0) r = 0; else { r->cigar = path->seq; r->cigarLen = path->length; free(path); } end: return r; } void align_destroy (s_align* a) { free(a->cigar); free(a); } scikit-bio-0.6.2/skbio/alignment/_lib/ssw.h000066400000000000000000000157641464262511300205540ustar00rootroot00000000000000/* * ssw.h * * Created by Mengyao Zhao on 6/22/10. * Copyright 2010 Boston College. All rights reserved. * Version 0.1.4 * Last revision by Mengyao Zhao on 01/30/13. * */ #ifndef SSW_H #define SSW_H #include #include #include #define SIMDE_ENABLE_NATIVE_ALIASES #include "simde-sse2.h" /*! @typedef structure of the query profile */ struct _profile; typedef struct _profile s_profile; /*! @typedef structure of the alignment result @field score1 the best alignment score @field score2 sub-optimal alignment score @field ref_begin1 0-based best alignment beginning position on reference; ref_begin1 = -1 when the best alignment beginning position is not available @field ref_end1 0-based best alignment ending position on reference @field read_begin1 0-based best alignment beginning position on read; read_begin1 = -1 when the best alignment beginning position is not available @field read_end1 0-based best alignment ending position on read @field read_end2 0-based sub-optimal alignment ending position on read @field cigar best alignment cigar; stored the same as that in BAM format, high 28 bits: length, low 4 bits: M/I/D (0/1/2); cigar = 0 when the best alignment path is not available @field cigarLen length of the cigar string; cigarLen = 0 when the best alignment path is not available */ typedef struct { uint16_t score1; uint16_t score2; int32_t ref_begin1; int32_t ref_end1; int32_t read_begin1; int32_t read_end1; int32_t ref_end2; uint32_t* cigar; int32_t cigarLen; } s_align; #ifdef __cplusplus extern "C" { #endif // __cplusplus /*! @function Create the query profile using the query sequence. @param read pointer to the query sequence; the query sequence needs to be numbers @param readLen length of the query sequence @param mat pointer to the substitution matrix; mat needs to be corresponding to the read sequence @param n the square root of the number of elements in mat (mat has n*n elements) @param score_size estimated Smith-Waterman score; if your estimated best alignment score is surely < 255 please set 0; if your estimated best alignment score >= 255, please set 1; if you don't know, please set 2 @return pointer to the query profile structure @note example for parameter read and mat: If the query sequence is: ACGTATC, the sequence that read points to can be: 1234142 Then if the penalty for match is 2 and for mismatch is -2, the substitution matrix of parameter mat will be: //A C G T 2 -2 -2 -2 //A -2 2 -2 -2 //C -2 -2 2 -2 //G -2 -2 -2 2 //T mat is the pointer to the array {2, -2, -2, -2, -2, 2, -2, -2, -2, -2, 2, -2, -2, -2, -2, 2} */ s_profile* ssw_init (const int8_t* read, const int32_t readLen, const int8_t* mat, const int32_t n, const int8_t score_size); /*! @function Release the memory allocated by function ssw_init. @param p pointer to the query profile structure */ void init_destroy (s_profile* p); // @function ssw alignment. /*! @function Do Striped Smith-Waterman alignment. @param prof pointer to the query profile structure @param ref pointer to the target sequence; the target sequence needs to be numbers and corresponding to the mat parameter of function ssw_init @param refLen length of the target sequence @param weight_gapO the absolute value of gap open penalty @param weight_gapE the absolute value of gap extension penalty @param flag bitwise FLAG; (from high to low) bit 5: when setted as 1, function ssw_align will return the best alignment beginning position; bit 6: when setted as 1, if (ref_end1 - ref_begin1 < filterd && read_end1 - read_begin1 < filterd), (whatever bit 5 is setted) the function will return the best alignment beginning position and cigar; bit 7: when setted as 1, if the best alignment score >= filters, (whatever bit 5 is setted) the function will return the best alignment beginning position and cigar; bit 8: when setted as 1, (whatever bit 5, 6 or 7 is setted) the function will always return the best alignment beginning position and cigar. When flag == 0, only the optimal and sub-optimal scores and the optimal alignment ending position will be returned. @param filters score filter: when bit 7 of flag is setted as 1 and bit 8 is setted as 0, filters will be used (Please check the decription of the flag parameter for detailed usage.) @param filterd distance filter: when bit 6 of flag is setted as 1 and bit 8 is setted as 0, filterd will be used (Please check the decription of the flag parameter for detailed usage.) @param maskLen The distance between the optimal and suboptimal alignment ending position >= maskLen. We suggest to use readLen/2, if you don't have special concerns. Note: maskLen has to be >= 15, otherwise this function will NOT return the suboptimal alignment information. Detailed description of maskLen: After locating the optimal alignment ending position, the suboptimal alignment score can be heuristically found by checking the second largest score in the array that contains the maximal score of each column of the SW matrix. In order to avoid picking the scores that belong to the alignments sharing the partial best alignment, SSW C library masks the reference loci nearby (mask length = maskLen) the best alignment ending position and locates the second largest score from the unmasked elements. @return pointer to the alignment result structure @note Whatever the parameter flag is setted, this function will at least return the optimal and sub-optimal alignment score, and the optimal alignment ending positions on target and query sequences. If both bit 6 and 7 of the flag are setted while bit 8 is not, the function will return cigar only when both criteria are fulfilled. All returned positions are 0-based coordinate. */ s_align* ssw_align (const s_profile* prof, const int8_t* ref, int32_t refLen, const uint8_t weight_gapO, const uint8_t weight_gapE, const uint8_t flag, const uint16_t filters, const int32_t filterd, const int32_t maskLen); /*! @function Release the memory allocated by function ssw_align. @param a pointer to the alignment result structure */ void align_destroy (s_align* a); #ifdef __cplusplus } #endif // __cplusplus #endif // SSW_H scikit-bio-0.6.2/skbio/alignment/_pairwise.py000066400000000000000000001132311464262511300212020ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from warnings import warn from itertools import product import numpy as np from skbio.alignment import TabularMSA from skbio.alignment._ssw_wrapper import StripedSmithWaterman from skbio.sequence import DNA, RNA, Protein from skbio.sequence import GrammaredSequence from skbio.sequence import SubstitutionMatrix from skbio.util import EfficiencyWarning from skbio.util._warning import _warn_deprecated def local_pairwise_align_nucleotide( seq1, seq2, gap_open_penalty=5, gap_extend_penalty=2, match_score=2, mismatch_score=-3, substitution_matrix=None, ): """Locally align exactly two nucleotide seqs with Smith-Waterman. Parameters ---------- seq1 : DNA or RNA The first unaligned sequence. seq2 : DNA or RNA The second unaligned sequence. gap_open_penalty : int or float, optional Penalty for opening a gap (this is substracted from previous best alignment score, so is typically positive). gap_extend_penalty : int or float, optional Penalty for extending a gap (this is substracted from previous best alignment score, so is typically positive). match_score : int or float, optional The score to add for a match between a pair of bases (this is added to the previous best alignment score, so is typically positive). mismatch_score : int or float, optional The score to add for a mismatch between a pair of bases (this is added to the previous best alignment score, so is typically negative). substitution_matrix: 2D dict (or similar) Lookup for substitution scores (these values are added to the previous best alignment score). If provided, this overrides ``match_score`` and ``mismatch_score``. Returns ------- tuple ``TabularMSA`` object containing the aligned sequences, alignment score (float), and start/end positions of each input sequence (iterable of two-item tuples). Note that start/end positions are indexes into the unaligned sequences. See Also -------- local_pairwise_align local_pairwise_align_protein skbio.alignment.local_pairwise_align_ssw global_pairwise_align global_pairwise_align_protein global_pairwise_align_nucelotide Notes ----- Default ``match_score``, ``mismatch_score``, ``gap_open_penalty`` and ``gap_extend_penalty`` parameters are derived from the NCBI BLAST Server [1]_. References ---------- .. [1] http://blast.ncbi.nlm.nih.gov/Blast.cgi """ for seq in seq1, seq2: if not isinstance(seq, (DNA, RNA)): raise TypeError( "`seq1` and `seq2` must be DNA or RNA, not type %r" % type(seq).__name__ ) # use the substitution matrix provided by the user, or compute from # match_score and mismatch_score if a substitution matrix was not provided if substitution_matrix is None: substitution_matrix = SubstitutionMatrix.identity( "ACGTU", match_score, mismatch_score ).to_dict() return local_pairwise_align( seq1, seq2, gap_open_penalty, gap_extend_penalty, substitution_matrix ) def local_pairwise_align_protein( seq1, seq2, gap_open_penalty=11, gap_extend_penalty=1, substitution_matrix=None ): """Locally align exactly two protein seqs with Smith-Waterman. Parameters ---------- seq1 : Protein The first unaligned sequence. seq2 : Protein The second unaligned sequence. gap_open_penalty : int or float, optional Penalty for opening a gap (this is substracted from previous best alignment score, so is typically positive). gap_extend_penalty : int or float, optional Penalty for extending a gap (this is substracted from previous best alignment score, so is typically positive). substitution_matrix: 2D dict (or similar), optional Lookup for substitution scores (these values are added to the previous best alignment score); default is BLOSUM 50. Returns ------- tuple ``TabularMSA`` object containing the aligned sequences, alignment score (float), and start/end positions of each input sequence (iterable of two-item tuples). Note that start/end positions are indexes into the unaligned sequences. See Also -------- local_pairwise_align local_pairwise_align_nucleotide skbio.alignment.local_pairwise_align_ssw global_pairwise_align global_pairwise_align_protein global_pairwise_align_nucelotide Notes ----- Default ``gap_open_penalty`` and ``gap_extend_penalty`` parameters are derived from the NCBI BLAST Server [1]_. The BLOSUM (blocks substitution matrices) amino acid substitution matrices were originally defined in [2]_. References ---------- .. [1] http://blast.ncbi.nlm.nih.gov/Blast.cgi .. [2] Amino acid substitution matrices from protein blocks. S Henikoff and J G Henikoff. Proc Natl Acad Sci U S A. Nov 15, 1992; 89(22): 10915-10919. """ for seq in seq1, seq2: if not isinstance(seq, Protein): raise TypeError( "`seq1` and `seq2` must be Protein, not type %r" % type(seq).__name__ ) if substitution_matrix is None: substitution_matrix = SubstitutionMatrix.by_name("BLOSUM50").to_dict() return local_pairwise_align( seq1, seq2, gap_open_penalty, gap_extend_penalty, substitution_matrix ) def local_pairwise_align( seq1, seq2, gap_open_penalty, gap_extend_penalty, substitution_matrix ): """Locally align exactly two seqs with Smith-Waterman. Parameters ---------- seq1 : GrammaredSequence The first unaligned sequence. seq2 : GrammaredSequence The second unaligned sequence. gap_open_penalty : int or float Penalty for opening a gap (this is substracted from previous best alignment score, so is typically positive). gap_extend_penalty : int or float Penalty for extending a gap (this is substracted from previous best alignment score, so is typically positive). substitution_matrix: 2D dict (or similar) Lookup for substitution scores (these values are added to the previous best alignment score). Returns ------- tuple ``TabularMSA`` object containing the aligned sequences, alignment score (float), and start/end positions of each input sequence (iterable of two-item tuples). Note that start/end positions are indexes into the unaligned sequences. See Also -------- local_pairwise_align_protein local_pairwise_align_nucleotide skbio.alignment.local_pairwise_align_ssw global_pairwise_align global_pairwise_align_protein global_pairwise_align_nucelotide Notes ----- This algorithm was originally described in [1]_. The scikit-bio implementation was validated against the EMBOSS water web server [2]_. References ---------- .. [1] Identification of common molecular subsequences. Smith TF, Waterman MS. J Mol Biol. 1981 Mar 25;147(1):195-7. .. [2] http://www.ebi.ac.uk/Tools/psa/emboss_water/ """ warn( "You're using skbio's python implementation of Smith-Waterman " "alignment. This will be very slow (e.g., thousands of times slower) " "than skbio.alignment.local_pairwise_align_ssw.", EfficiencyWarning, ) for seq in seq1, seq2: if not isinstance(seq, GrammaredSequence): raise TypeError( "`seq1` and `seq2` must be %r subclasses, not type %r" % (GrammaredSequence.__name__, type(seq).__name__) ) if type(seq1) is not type(seq2): raise TypeError( "`seq1` and `seq2` must be the same type: %r != %r" % (type(seq1).__name__, type(seq2).__name__) ) seq1 = _coerce_alignment_input_type(seq1) seq2 = _coerce_alignment_input_type(seq2) score_matrix, traceback_matrix = _compute_score_and_traceback_matrices( seq1, seq2, gap_open_penalty, gap_extend_penalty, substitution_matrix, new_alignment_score=0.0, init_matrices_f=_init_matrices_sw, ) end_row_position, end_col_position = np.unravel_index( np.argmax(score_matrix), score_matrix.shape ) aligned1, aligned2, score, seq1_start_position, seq2_start_position = _traceback( traceback_matrix, score_matrix, seq1, seq2, end_row_position, end_col_position ) start_end_positions = [ (seq1_start_position, end_col_position - 1), (seq2_start_position, end_row_position - 1), ] msa = TabularMSA(aligned1 + aligned2) return msa, score, start_end_positions def global_pairwise_align_nucleotide( seq1, seq2, gap_open_penalty=5, gap_extend_penalty=2, match_score=1, mismatch_score=-2, substitution_matrix=None, penalize_terminal_gaps=False, ): """Globally align nucleotide seqs or alignments with Needleman-Wunsch. Parameters ---------- seq1 : DNA, RNA, or TabularMSA[DNA|RNA] The first unaligned sequence(s). seq2 : DNA, RNA, or TabularMSA[DNA|RNA] The second unaligned sequence(s). gap_open_penalty : int or float, optional Penalty for opening a gap (this is substracted from previous best alignment score, so is typically positive). gap_extend_penalty : int or float, optional Penalty for extending a gap (this is substracted from previous best alignment score, so is typically positive). match_score : int or float, optional The score to add for a match between a pair of bases (this is added to the previous best alignment score, so is typically positive). mismatch_score : int or float, optional The score to add for a mismatch between a pair of bases (this is added to the previous best alignment score, so is typically negative). substitution_matrix: 2D dict (or similar) Lookup for substitution scores (these values are added to the previous best alignment score). If provided, this overrides ``match_score`` and ``mismatch_score``. penalize_terminal_gaps: bool, optional If True, will continue to penalize gaps even after one sequence has been aligned through its end. This behavior is true Needleman-Wunsch alignment, but results in (biologically irrelevant) artifacts when the sequences being aligned are of different length. This is ``False`` by default, which is very likely to be the behavior you want in all or nearly all cases. Returns ------- tuple ``TabularMSA`` object containing the aligned sequences, alignment score (float), and start/end positions of each input sequence (iterable of two-item tuples). Note that start/end positions are indexes into the unaligned sequences. See Also -------- local_pairwise_align local_pairwise_align_protein local_pairwise_align_nucleotide skbio.alignment.local_pairwise_align_ssw global_pairwise_align global_pairwise_align_protein Notes ----- Default ``match_score``, ``mismatch_score``, ``gap_open_penalty`` and ``gap_extend_penalty`` parameters are derived from the NCBI BLAST Server [1]_. This function can be use to align either a pair of sequences, a pair of alignments, or a sequence and an alignment. References ---------- .. [1] http://blast.ncbi.nlm.nih.gov/Blast.cgi """ for seq in seq1, seq2: if not isinstance(seq, (DNA, RNA, TabularMSA)): raise TypeError( "`seq1` and `seq2` must be DNA, RNA, or TabularMSA, not type " "%r" % type(seq).__name__ ) if isinstance(seq, TabularMSA) and not issubclass(seq.dtype, (DNA, RNA)): raise TypeError( "`seq1` and `seq2` must be TabularMSA with DNA or RNA dtype, " "not dtype %r" % seq.dtype.__name__ ) # use the substitution matrix provided by the user, or compute from # match_score and mismatch_score if a substitution matrix was not provided if substitution_matrix is None: substitution_matrix = make_identity_substitution_matrix( match_score, mismatch_score ) return global_pairwise_align( seq1, seq2, gap_open_penalty, gap_extend_penalty, substitution_matrix, penalize_terminal_gaps=penalize_terminal_gaps, ) def global_pairwise_align_protein( seq1, seq2, gap_open_penalty=11, gap_extend_penalty=1, substitution_matrix=None, penalize_terminal_gaps=False, ): """Globally align pair of protein seqs or alignments with Needleman-Wunsch. Parameters ---------- seq1 : Protein or TabularMSA[Protein] The first unaligned sequence(s). seq2 : Protein or TabularMSA[Protein] The second unaligned sequence(s). gap_open_penalty : int or float, optional Penalty for opening a gap (this is substracted from previous best alignment score, so is typically positive). gap_extend_penalty : int or float, optional Penalty for extending a gap (this is substracted from previous best alignment score, so is typically positive). substitution_matrix: 2D dict (or similar), optional Lookup for substitution scores (these values are added to the previous best alignment score); default is BLOSUM 50. penalize_terminal_gaps: bool, optional If True, will continue to penalize gaps even after one sequence has been aligned through its end. This behavior is true Needleman-Wunsch alignment, but results in (biologically irrelevant) artifacts when the sequences being aligned are of different length. This is ``False`` by default, which is very likely to be the behavior you want in all or nearly all cases. Returns ------- tuple ``TabularMSA`` object containing the aligned sequences, alignment score (float), and start/end positions of each input sequence (iterable of two-item tuples). Note that start/end positions are indexes into the unaligned sequences. See Also -------- local_pairwise_align local_pairwise_align_protein local_pairwise_align_nucleotide skbio.alignment.local_pairwise_align_ssw global_pairwise_align global_pairwise_align_nucelotide Notes ----- Default ``gap_open_penalty`` and ``gap_extend_penalty`` parameters are derived from the NCBI BLAST Server [1]_. The BLOSUM (blocks substitution matrices) amino acid substitution matrices were originally defined in [2]_. This function can be use to align either a pair of sequences, a pair of alignments, or a sequence and an alignment. References ---------- .. [1] http://blast.ncbi.nlm.nih.gov/Blast.cgi .. [2] Amino acid substitution matrices from protein blocks. S Henikoff and J G Henikoff. Proc Natl Acad Sci U S A. Nov 15, 1992; 89(22): 10915-10919. """ for seq in seq1, seq2: if not isinstance(seq, (Protein, TabularMSA)): raise TypeError( "`seq1` and `seq2` must be Protein or TabularMSA, not type %r" % type(seq).__name__ ) if isinstance(seq, TabularMSA) and not issubclass(seq.dtype, Protein): raise TypeError( "`seq1` and `seq2` must be TabularMSA with Protein dtype, " "not dtype %r" % seq.dtype.__name__ ) if substitution_matrix is None: substitution_matrix = SubstitutionMatrix.by_name("BLOSUM50").to_dict() return global_pairwise_align( seq1, seq2, gap_open_penalty, gap_extend_penalty, substitution_matrix, penalize_terminal_gaps=penalize_terminal_gaps, ) def global_pairwise_align( seq1, seq2, gap_open_penalty, gap_extend_penalty, substitution_matrix, penalize_terminal_gaps=False, ): """Globally align a pair of seqs or alignments with Needleman-Wunsch. Parameters ---------- seq1 : GrammaredSequence or TabularMSA The first unaligned sequence(s). seq2 : GrammaredSequence or TabularMSA The second unaligned sequence(s). gap_open_penalty : int or float Penalty for opening a gap (this is substracted from previous best alignment score, so is typically positive). gap_extend_penalty : int or float Penalty for extending a gap (this is substracted from previous best alignment score, so is typically positive). substitution_matrix: 2D dict (or similar) Lookup for substitution scores (these values are added to the previous best alignment score). penalize_terminal_gaps: bool, optional If True, will continue to penalize gaps even after one sequence has been aligned through its end. This behavior is true Needleman-Wunsch alignment, but results in (biologically irrelevant) artifacts when the sequences being aligned are of different length. This is ``False`` by default, which is very likely to be the behavior you want in all or nearly all cases. Returns ------- tuple ``TabularMSA`` object containing the aligned sequences, alignment score (float), and start/end positions of each input sequence (iterable of two-item tuples). Note that start/end positions are indexes into the unaligned sequences. See Also -------- local_pairwise_align local_pairwise_align_protein local_pairwise_align_nucleotide skbio.alignment.local_pairwise_align_ssw global_pairwise_align_protein global_pairwise_align_nucelotide Notes ----- This algorithm (in a slightly more basic form) was originally described in [1]_. The scikit-bio implementation was validated against the EMBOSS needle web server [2]_. This function can be use to align either a pair of sequences, a pair of alignments, or a sequence and an alignment. References ---------- .. [1] A general method applicable to the search for similarities in the amino acid sequence of two proteins. Needleman SB, Wunsch CD. J Mol Biol. 1970 Mar;48(3):443-53. .. [2] http://www.ebi.ac.uk/Tools/psa/emboss_needle/ """ warn( "You're using skbio's python implementation of Needleman-Wunsch " "alignment. This is known to be very slow (e.g., thousands of times " "slower than a native C implementation). We'll be adding a faster " "version soon (see https://github.com/scikit-bio/scikit-bio/issues/" "254 to track progress on this).", EfficiencyWarning, ) for seq in seq1, seq2: # We don't need to check the case where `seq` is a `TabularMSA` with a # dtype that isn't a subclass of `GrammaredSequence`, this is # guaranteed by `TabularMSA`. if not isinstance(seq, (GrammaredSequence, TabularMSA)): raise TypeError( "`seq1` and `seq2` must be GrammaredSequence subclasses or " "TabularMSA, not type %r" % type(seq).__name__ ) seq1 = _coerce_alignment_input_type(seq1) seq2 = _coerce_alignment_input_type(seq2) if seq1.dtype is not seq2.dtype: raise TypeError( "`seq1` and `seq2` must have the same dtype: %r != %r" % (seq1.dtype.__name__, seq2.dtype.__name__) ) if penalize_terminal_gaps: init_matrices_f = _init_matrices_nw else: init_matrices_f = _init_matrices_nw_no_terminal_gap_penalty score_matrix, traceback_matrix = _compute_score_and_traceback_matrices( seq1, seq2, gap_open_penalty, gap_extend_penalty, substitution_matrix, new_alignment_score=-np.inf, init_matrices_f=init_matrices_f, penalize_terminal_gaps=penalize_terminal_gaps, ) end_row_position = traceback_matrix.shape[0] - 1 end_col_position = traceback_matrix.shape[1] - 1 aligned1, aligned2, score, seq1_start_position, seq2_start_position = _traceback( traceback_matrix, score_matrix, seq1, seq2, end_row_position, end_col_position ) start_end_positions = [ (seq1_start_position, end_col_position - 1), (seq2_start_position, end_row_position - 1), ] msa = TabularMSA(aligned1 + aligned2) return msa, score, start_end_positions def local_pairwise_align_ssw(sequence1, sequence2, **kwargs): """Align query and target sequences with Striped Smith-Waterman. Parameters ---------- sequence1 : DNA, RNA, or Protein The first unaligned sequence sequence2 : DNA, RNA, or Protein The second unaligned sequence kwargs : dict Additional keyword arguments to pass to ``StripedSmithWaterman``. Returns ------- tuple ``TabularMSA`` object containing the aligned sequences, alignment score (float), and start/end positions of each input sequence (iterable of two-item tuples). Note that start/end positions are indexes into the unaligned sequences. Warnings -------- ``local_pairwise_align_ssw`` is deprecated as of ``0.5.8`` and will be removed in favor of more general-purpose and performant aligners. Additional details at :repo:`issues/1814`. Notes ----- This is a wrapper for the SSW package [1]_. For a complete list of optional keyword-arguments that can be provided, see ``skbio.alignment.StripedSmithWaterman``. The following kwargs will not have any effect: `suppress_sequences`, `zero_index`, and `protein` If an alignment does not meet a provided filter, `None` will be returned. References ---------- .. [1] Zhao, Mengyao, Wan-Ping Lee, Erik P. Garrison, & Gabor T. Marth. "SSW Library: An SIMD Smith-Waterman C/C++ Library for Applications". PLOS ONE (2013). Web. 11 July 2014. http://www.plosone.org/article/info:doi/10.1371/journal.pone.0082138 See Also -------- skbio.alignment.StripedSmithWaterman """ # @deprecated _warn_deprecated( local_pairwise_align_ssw, "0.5.8", msg="It will be removed in favor of more general purpose and performant " "aligners. Additional details at " "https://github.com/scikit-bio/scikit-bio/issues/1814.", ) for seq in sequence1, sequence2: if not isinstance(seq, (DNA, RNA, Protein)): raise TypeError( "`sequence1` and `sequence2` must be DNA, RNA, or Protein, " "not type %r" % type(seq).__name__ ) if type(sequence1) is not type(sequence2): raise TypeError( "`sequence1` and `sequence2` must be the same type: %r != %r" % (type(sequence1).__name__, type(sequence2).__name__) ) # We need the sequences for `TabularMSA` to make sense, so don't let the # user suppress them. kwargs["suppress_sequences"] = False kwargs["zero_index"] = True kwargs["protein"] = False if isinstance(sequence1, Protein): kwargs["protein"] = True query = StripedSmithWaterman(str(sequence1), **kwargs) alignment = query(str(sequence2)) # If there is no cigar, then it has failed a filter. Return None. if not alignment.cigar: return None start_end = None if alignment.query_begin != -1: start_end = [ (alignment.query_begin, alignment.query_end), (alignment.target_begin, alignment.target_end_optimal), ] metadata1 = metadata2 = None if sequence1.has_metadata(): metadata1 = sequence1.metadata if sequence2.has_metadata(): metadata2 = sequence2.metadata constructor = type(sequence1) msa = TabularMSA( [ constructor( alignment.aligned_query_sequence, metadata=metadata1, validate=False ), constructor( alignment.aligned_target_sequence, metadata=metadata2, validate=False ), ] ) return msa, alignment.optimal_alignment_score, start_end def make_identity_substitution_matrix(match_score, mismatch_score, alphabet="ACGTU"): """Generate substitution matrix where all matches are scored equally. Parameters ---------- match_score : int, float The score that should be assigned for all matches. This value is typically positive. mismatch_score : int, float The score that should be assigned for all mismatches. This value is typically negative. alphabet : iterable of str, optional The characters that should be included in the substitution matrix. Returns ------- dict of dicts All characters in alphabet are keys in both dictionaries, so that any pair of characters can be looked up to get their match or mismatch score. Warnings -------- ``make_identity_substitution_matrix`` is deprecated as of ``0.4.0``. It has been replaced by a SubstitutionMatrix class. Additional details at :repo:`pull/1913`. """ # @deprecated _warn_deprecated( make_identity_substitution_matrix, "0.4.0", msg="It has been " "replaced by the SubstitutionMatrix class. Additional " "details at " "https://github.com/scikit-bio/scikit-bio/pull/1913.", ) result = {} for c1 in alphabet: row = {} for c2 in alphabet: if c1 == c2: row[c2] = match_score else: row[c2] = mismatch_score result[c1] = row return result # Functions from here allow for generalized (global or local) alignment. I # will likely want to put these in a single object to make the naming a little # less clunky. def _coerce_alignment_input_type(seq): if isinstance(seq, GrammaredSequence): return TabularMSA([seq]) else: return seq _traceback_encoding = { "match": 1, "vertical-gap": 2, "horizontal-gap": 3, "uninitialized": -1, "alignment-end": 0, } def _init_matrices_sw(aln1, aln2, gap_open_penalty, gap_extend_penalty): shape = (aln2.shape.position + 1, aln1.shape.position + 1) score_matrix = np.zeros(shape) traceback_matrix = np.zeros(shape, dtype=int) traceback_matrix += _traceback_encoding["uninitialized"] traceback_matrix[0, :] = _traceback_encoding["alignment-end"] traceback_matrix[:, 0] = _traceback_encoding["alignment-end"] return score_matrix, traceback_matrix def _init_matrices_nw(aln1, aln2, gap_open_penalty, gap_extend_penalty): shape = (aln2.shape.position + 1, aln1.shape.position + 1) score_matrix = np.zeros(shape) traceback_matrix = np.zeros(shape, dtype=int) traceback_matrix += _traceback_encoding["uninitialized"] traceback_matrix[0, 0] = _traceback_encoding["alignment-end"] # cache some values for quicker access vgap = _traceback_encoding["vertical-gap"] hgap = _traceback_encoding["horizontal-gap"] for i in range(1, shape[0]): score_matrix[i, 0] = -gap_open_penalty - ((i - 1) * gap_extend_penalty) traceback_matrix[i, 0] = vgap for i in range(1, shape[1]): score_matrix[0, i] = -gap_open_penalty - ((i - 1) * gap_extend_penalty) traceback_matrix[0, i] = hgap return score_matrix, traceback_matrix def _init_matrices_nw_no_terminal_gap_penalty( aln1, aln2, gap_open_penalty, gap_extend_penalty ): shape = (aln2.shape.position + 1, aln1.shape.position + 1) score_matrix = np.zeros(shape) traceback_matrix = np.zeros(shape, dtype=int) traceback_matrix += _traceback_encoding["uninitialized"] traceback_matrix[0, 0] = _traceback_encoding["alignment-end"] # cache some values for quicker access vgap = _traceback_encoding["vertical-gap"] hgap = _traceback_encoding["horizontal-gap"] for i in range(1, shape[0]): traceback_matrix[i, 0] = vgap for i in range(1, shape[1]): traceback_matrix[0, i] = hgap return score_matrix, traceback_matrix def _compute_substitution_score( aln1_chars, aln2_chars, substitution_matrix, gap_substitution_score, gap_chars ): substitution_score = 0 for aln1_char, aln2_char in product(aln1_chars, aln2_chars): if aln1_char in gap_chars or aln2_char in gap_chars: substitution_score += gap_substitution_score else: try: substitution_score += substitution_matrix[aln1_char][aln2_char] except KeyError: offending_chars = [ c for c in (aln1_char, aln2_char) if c not in substitution_matrix ] raise ValueError( "One of the sequences contains a character that is " "not contained in the substitution matrix. Are you " "using an appropriate substitution matrix for your " "sequence type (e.g., a nucleotide substitution " "matrix does not make sense for aligning protein " "sequences)? Does your sequence contain invalid " "characters? The offending character(s) is: " " %s." % ", ".join(offending_chars) ) substitution_score /= len(aln1_chars) * len(aln2_chars) return substitution_score def _compute_score_and_traceback_matrices( aln1, aln2, gap_open_penalty, gap_extend_penalty, substitution_matrix, new_alignment_score=-np.inf, init_matrices_f=_init_matrices_nw, penalize_terminal_gaps=True, gap_substitution_score=0, ): """Return dynamic programming (score) and traceback matrices. A note on the ``penalize_terminal_gaps`` parameter. When this value is ``False``, this function is no longer true Smith-Waterman/Needleman-Wunsch scoring, but when ``True`` it can result in biologically irrelevant artifacts in Needleman-Wunsch (global) alignments. Specifically, if one sequence is longer than the other (e.g., if aligning a primer sequence to an amplification product, or searching for a gene in a genome) the shorter sequence will have a long gap inserted. The parameter is ``True`` by default (so that this function computes the score and traceback matrices as described by the original authors) but the global alignment wrappers pass ``False`` by default, so that the global alignment API returns the result that users are most likely to be looking for. """ aln1_length = aln1.shape.position aln2_length = aln2.shape.position # cache some values for quicker/simpler access aend = _traceback_encoding["alignment-end"] match = _traceback_encoding["match"] vgap = _traceback_encoding["vertical-gap"] hgap = _traceback_encoding["horizontal-gap"] new_alignment_score = (new_alignment_score, aend) # Initialize a matrix to use for scoring the alignment and for tracing # back the best alignment score_matrix, traceback_matrix = init_matrices_f( aln1, aln2, gap_open_penalty, gap_extend_penalty ) # Iterate over the characters in aln2 (which corresponds to the vertical # sequence in the matrix) for aln2_pos, aln2_chars in enumerate(aln2.iter_positions(ignore_metadata=True), 1): aln2_chars = str(aln2_chars) # Iterate over the characters in aln1 (which corresponds to the # horizontal sequence in the matrix) for aln1_pos, aln1_chars in enumerate( aln1.iter_positions(ignore_metadata=True), 1 ): aln1_chars = str(aln1_chars) # compute the score for a match/mismatch substitution_score = _compute_substitution_score( aln1_chars, aln2_chars, substitution_matrix, gap_substitution_score, aln1.dtype.gap_chars, ) diag_score = ( score_matrix[aln2_pos - 1, aln1_pos - 1] + substitution_score, match, ) # compute the score for adding a gap in aln2 (vertical) if not penalize_terminal_gaps and (aln1_pos == aln1_length): # we've reached the end of aln1, so adding vertical gaps # (which become gaps in aln1) should no longer # be penalized (if penalize_terminal_gaps == False) up_score = (score_matrix[aln2_pos - 1, aln1_pos], vgap) elif traceback_matrix[aln2_pos - 1, aln1_pos] == vgap: # gap extend, because the cell above was also a gap up_score = ( score_matrix[aln2_pos - 1, aln1_pos] - gap_extend_penalty, vgap, ) else: # gap open, because the cell above was not a gap up_score = ( score_matrix[aln2_pos - 1, aln1_pos] - gap_open_penalty, vgap, ) # compute the score for adding a gap in aln1 (horizontal) if not penalize_terminal_gaps and (aln2_pos == aln2_length): # we've reached the end of aln2, so adding horizontal gaps # (which become gaps in aln2) should no longer # be penalized (if penalize_terminal_gaps == False) left_score = (score_matrix[aln2_pos, aln1_pos - 1], hgap) elif traceback_matrix[aln2_pos, aln1_pos - 1] == hgap: # gap extend, because the cell to the left was also a gap left_score = ( score_matrix[aln2_pos, aln1_pos - 1] - gap_extend_penalty, hgap, ) else: # gap open, because the cell to the left was not a gap left_score = ( score_matrix[aln2_pos, aln1_pos - 1] - gap_open_penalty, hgap, ) # identify the largest score, and use that information to populate # the score and traceback matrices best_score = _first_largest( [new_alignment_score, left_score, diag_score, up_score] ) score_matrix[aln2_pos, aln1_pos] = best_score[0] traceback_matrix[aln2_pos, aln1_pos] = best_score[1] return score_matrix, traceback_matrix def _traceback(traceback_matrix, score_matrix, aln1, aln2, start_row, start_col): # cache some values for simpler reference aend = _traceback_encoding["alignment-end"] match = _traceback_encoding["match"] vgap = _traceback_encoding["vertical-gap"] hgap = _traceback_encoding["horizontal-gap"] gap_character = aln1.dtype.default_gap_char # initialize the result alignments aln1_sequence_count = aln1.shape.sequence aligned_seqs1 = [[] for e in range(aln1_sequence_count)] aln2_sequence_count = aln2.shape.sequence aligned_seqs2 = [[] for e in range(aln2_sequence_count)] current_row = start_row current_col = start_col best_score = score_matrix[current_row, current_col] current_value = None while current_value != aend: current_value = traceback_matrix[current_row, current_col] if current_value == match: for aligned_seq, input_seq in zip(aligned_seqs1, aln1): aligned_seq.append(str(input_seq[current_col - 1])) for aligned_seq, input_seq in zip(aligned_seqs2, aln2): aligned_seq.append(str(input_seq[current_row - 1])) current_row -= 1 current_col -= 1 elif current_value == vgap: for aligned_seq in aligned_seqs1: aligned_seq.append(gap_character) for aligned_seq, input_seq in zip(aligned_seqs2, aln2): aligned_seq.append(str(input_seq[current_row - 1])) current_row -= 1 elif current_value == hgap: for aligned_seq, input_seq in zip(aligned_seqs1, aln1): aligned_seq.append(str(input_seq[current_col - 1])) for aligned_seq in aligned_seqs2: aligned_seq.append(gap_character) current_col -= 1 elif current_value == aend: continue else: raise ValueError("Invalid value in traceback matrix: %s" % current_value) for i, (aligned_seq, original) in enumerate(zip(aligned_seqs1, aln1)): aligned_seq = "".join(aligned_seq)[::-1] constructor = aln1.dtype metadata = None if original.has_metadata(): metadata = original.metadata aligned_seqs1[i] = constructor(aligned_seq, metadata=metadata, validate=False) for i, (aligned_seq, original) in enumerate(zip(aligned_seqs2, aln2)): aligned_seq = "".join(aligned_seq)[::-1] constructor = aln2.dtype metadata = None if original.has_metadata(): metadata = original.metadata aligned_seqs2[i] = constructor(aligned_seq, metadata=metadata, validate=False) return aligned_seqs1, aligned_seqs2, best_score, current_col, current_row def _first_largest(scores): """Similar to max, but returns the first element achieving the high score. If max receives a tuple, it will break a tie for the highest value of entry[i] with entry[i+1]. We don't want that here - to better match with the results of other tools, we want to be able to define which entry is returned in the case of a tie. """ result = scores[0] for score, direction in scores[1:]: if score > result[0]: result = (score, direction) return result scikit-bio-0.6.2/skbio/alignment/_path.py000066400000000000000000000603601464262511300203170ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import collections import numpy as np from skbio._base import SkbioObject from skbio.util._decorator import classonlymethod from skbio.sequence import Sequence _Shape = collections.namedtuple("Shape", ["sequence", "position"]) # CIGAR codes indexed by states in PairAlignPath _cigar_codes = np.array(["M", "I", "D", "P"]) # Mapping of CIGAR codes to states in PairAlignPath _cigar_mapping = { "M": 0, "I": 1, "D": 2, "P": 3, "=": 0, "X": 0, "N": 2, "S": 1, "H": 3, } class AlignPath(SkbioObject): r"""Create an alignment path from segment lengths and states. The underliying data structure of the ``AlignPath`` class efficiently represents a sequence alignment as two equal-length vectors: lengths and gap status. The lengths vector contains the lengths of individual segments of the alignment with consistent gap status. The gap status vector contains the encoded bits of the gap (1) and character (0) status for each position in the alignment. This data structure is detached from the original sequences and is highly memory efficient. It permits fully vectorized operations and enables efficient conversion between various formats such as CIGAR, tabular, indices (Biotite), and coordinates (Biopython). Parameters ---------- lengths : array_like of int of shape (n_segments,) Length of each segment in the alignment. states : array_like of uint8 of shape (n_segments,) or (n_packs, n_segments) Packed bits representing character (0) or gap (1) status per sequence per segment in the alignment. starts : array_like of int of shape (n_sequences,) Start position (0-based) of each sequence in the alignment. See Also -------- skbio.sequence.Sequence skbio.alignment.TabularMSA Examples -------- Create an ``AlignPath`` object from a ``TabularMSA`` object with three DNA sequences and 20 positions. >>> from skbio import DNA, TabularMSA >>> from skbio.alignment import AlignPath >>> seqs = [ ... DNA('CGGTCGTAACGCGTA---CA'), ... DNA('CAG--GTAAG-CATACCTCA'), ... DNA('CGGTCGTCAC-TGTACACTA') ... ] >>> msa = TabularMSA(seqs) >>> msa TabularMSA[DNA] ---------------------- Stats: sequence count: 3 position count: 20 ---------------------- CGGTCGTAACGCGTA---CA CAG--GTAAG-CATACCTCA CGGTCGTCAC-TGTACACTA >>> path = AlignPath.from_tabular(msa) >>> path AlignPath Shape(sequence=3, position=20) lengths: [3 2 5 1 4 3 2] states: [0 2 0 6 0 1 0] Notes ----- The underlying logic of the ``AlignPath`` data structure is rooted in two concepts: run length encoding and bit arrays. The lengths array is calculated by performing run length encoding on the alignment, considering each segment with consistent gap status to be an individual unit in the encoding. In the above example, the first three positions of the alignment contain no gaps, so the first value in the lengths array is 3, and so on. The states array is calculated by turning the alignment segments into a bit array where gaps become 1's, and characters become zeros. Then, the 0's and 1's are converted into bytes. In the above example, the fourth segment, which has length 1, would become [0, 1, 1], which then becomes 6. """ def __init__(self, lengths, states, starts): self._lengths = np.asarray(lengths, dtype=np.int64) self._states = np.atleast_2d(np.asarray(states, dtype=np.uint8)) # start positions # Number of sequences needs to be explicitly provided, because the packed bits # does not contain this information. (It is merely in multiples of 8.) self._starts = np.asarray(starts, dtype=np.int64) if self._starts.ndim > 1: raise TypeError("`starts` must be a 1-D vector.") n_seqs = self._starts.size if np.ceil(n_seqs / 8) != self._states.shape[0]: raise ValueError("Sizes of `starts` and `states` do not match.") # Shape is n_seqs (rows) x n_positions (columns), which is consistent with # TabularMSA n_positions = self._lengths.sum() self._shape = _Shape(sequence=n_seqs, position=n_positions) def __str__(self): r"""String representation of this AlignPath.""" # Not sure if this makes sense for this class, but it is needed for all # SkbioObjects. return self.__repr__() def __repr__(self): r"""Summary of the alignment path.""" return ( f"{self.__class__.__name__}\n{self._shape}\nlengths: " f"{self._lengths}\nstates: {np.squeeze(self._states)}" ) @property def lengths(self): """Array of lengths of segments in alignment path.""" return self._lengths @property def states(self): """Array of gap status of segments in alignment path.""" return self._states @property def starts(self): """Array of start positions of sequences in the alignment.""" return self._starts @property def shape(self): """Number of sequences (rows) and positions (columns). Notes ----- This property is not writeable. """ return self._shape def to_bits(self): r"""Unpack states into an array of bits. Returns ------- ndarray of (0, 1) of shape (n_seqs, n_positions) Array of zeros (character) and ones (gap) which represent the alignment. Examples -------- >>> from skbio import DNA, TabularMSA >>> from skbio.alignment import AlignPath >>> seqs = [ ... DNA('CGTCGTGC'), ... DNA('CA--GT-C'), ... DNA('CGTCGT-T') ... ] >>> msa = TabularMSA(seqs) >>> path = AlignPath.from_tabular(msa) >>> path.to_bits() array([[0, 0, 0, 0, 0], [0, 1, 0, 1, 0], [0, 0, 0, 1, 0]], dtype=uint8) """ return np.unpackbits( np.atleast_2d(self._states), axis=0, count=self._shape[0], bitorder="little" ) @classonlymethod def from_bits(cls, bits, starts=None): r"""Create an alignment path from a bit array (0 - character, 1 - gap). Parameters ---------- bits : array_like of (0, 1) of shape (n_seqs, n_positions) Array of zeros (character) and ones (gap) which represent the alignment. starts : array_like of int of shape (n_sequences,), optional Start position (0-based) of each sequence in the alignment. If omitted, will set as zeros. Returns ------- AlignPath The alignment path created from the given bit array. Examples -------- >>> import numpy as np >>> from skbio.alignment import AlignPath >>> bit_arr = np.array([[0, 0, 0, 0, 0, 0, 0, 0], ... [0, 0, 1, 1, 0, 0, 1, 0], ... [0, 0, 0, 0, 0, 0, 1, 0]]) >>> path = AlignPath.from_bits(bit_arr) >>> path AlignPath Shape(sequence=3, position=8) lengths: [2 2 2 1 1] states: [0 2 0 6 0] """ # Pack bits into integers. ints = np.packbits(bits, axis=0, bitorder="little") # If there are 8 or less sequences, squeeze the 2D array into 1D. # This is an optional optimization especially for pairwise alignment. if ints.shape[0] == 1: ints = np.squeeze(ints, axis=0) # Get indices where segments start. Equivalent to but faster than: # idx = np.where(np.diff(ints, prepend=np.nan))[0] idx = np.append(0, np.where(ints[:-1] != ints[1:])[0] + 1) # Get lengths of segments. Equivalent to but faster than: # lens = np.diff(idx, append=ints.size) lens = np.append(idx[1:] - idx[:-1], ints.size - idx[-1]) # Keep indices of segment starts. ints = ints[idx] # This is the 2D equivalent of the above code. else: idx = np.append( 0, np.where((ints[:, :-1] != ints[:, 1:]).any(axis=0))[0] + 1 ) lens = np.append(idx[1:] - idx[:-1], ints.shape[1] - idx[-1]) ints = ints[:, idx] # set start positions as zeros if not specified if starts is None: starts = np.zeros(bits.shape[0], dtype=int) # return per-segment lengths and bits return cls(lens, ints, starts) @classonlymethod def from_tabular(cls, msa): r"""Create an alignment path from a `TabularMSA` object. Parameters ---------- msa : TabularMSA TabularMSA to be converted into AlignPath object. Returns ------- AlignPath The alignment path created from the TabularMSA object. Notes ----- The returned alignment path will span across the entire tabular MSA. Its start positions will be uniformly zeros. See Also -------- skbio.TabularMSA.from_path_seqs """ # Convert TabularMSA into a 2D array of bytes. # TODO: TabularMSA itself should be refactored to have this as the default data # structure. byte_arr = np.stack([x._bytes for x in msa._seqs]) # Get gap character code. gap_chars = [ord(x) for x in msa.dtype.gap_chars] # Identify gap positions, and convert them into a bit array, then create an # alignment path based on it. return cls.from_bits(np.isin(byte_arr, gap_chars)) def to_indices(self, gap=-1): r"""Generate an array of indices of characters in the original sequences. Parameters ---------- gap : int, np.nan, np.inf, "del", or "mask", optional Method to encode gaps in the alignment. If numeric, replace gaps with this value. If "del", delete columns that have any gap. If "mask", return an ``np.ma.MaskedArray``, with gaps masked. Default is -1. Returns ------- ndarray of int of shape (n_seqs, n_positions) Array of indices of characters in the original sequences. Examples -------- >>> from skbio.alignment import AlignPath >>> path = AlignPath(lengths=[1, 2, 2, 1], ... states=[0, 5, 2, 6], ... starts=[0, 0, 0]) >>> path.to_indices() array([[ 0, -1, -1, 1, 2, 3], [ 0, 1, 2, -1, -1, -1], [ 0, -1, -1, 1, 2, -1]]) """ errmsg = "Gap must be an integer, np.nan, np.inf, 'del', or 'mask'." valid_gaps = {"del", "mask"} if isinstance(gap, str): if gap not in valid_gaps: raise TypeError(errmsg) elif isinstance(gap, float): if not (np.isnan(gap) or np.isinf(gap)): raise TypeError(errmsg) elif not np.issubdtype(type(gap), np.integer): raise TypeError(errmsg) bits = np.squeeze(self.to_bits()) # TODO: Consider optimization using np.arange. # thought: initiate [-1, -1, -1 ... -1], then add slices of arange into it pos = np.repeat(1 - bits, self._lengths, axis=1) idx = np.cumsum(pos, axis=1, dtype=int) - 1 if self._starts.any(): idx += self._starts.reshape(-1, 1) if gap == "del": keep = np.repeat(self._states == 0, self._lengths) return idx[:, keep] elif gap == "mask": return np.ma.array(idx, mask=(1 - pos)) else: return np.where(pos, idx, gap) @classonlymethod def from_indices(cls, indices, gap=-1): r"""Create an alignment path from character indices in the original sequences. Parameters ---------- indices : array_like of int of shape (n_seqs, n_positions) Each element in the array is the index in the corresponding sequence. gap : int or "mask", optional The value which represents a gap in the alignment. Defaults to -1, but can be other values. If "mask", ``indices`` must be an ``np.ma.MaskedArray``. Cannot use "del". Returns ------- AlignPath The alignment path created from the given indices. Notes ----- If a sequence in the alignment consists of entirely gap characters, its start position will be equal to the gap character. Examples -------- >>> import numpy as np >>> from skbio.alignment import AlignPath >>> indices = np.array([[0, -1, -1, 1, 2, 3], ... [0, 1, 2, -1, -1, -1], ... [0, -1, -1, 1, 2, -1]]) >>> path = AlignPath.from_indices(indices) >>> path AlignPath Shape(sequence=3, position=6) lengths: [1 2 2 1] states: [0 5 2 6] """ if gap == "mask": return cls.from_bits( np.ma.getmask(indices), indices[ np.arange(indices.shape[0]), np.argmax(indices != indices.fill_value, axis=1), ], ) else: if isinstance(indices, np.ma.MaskedArray): raise TypeError("For masked arrays, gap must be 'mask'.") indices = np.asarray(indices) return cls.from_bits( indices == gap, indices[np.arange(indices.shape[0]), np.argmax(indices != gap, axis=1)], ) # TODO # n. optimization def to_coordinates(self): r"""Generate an array of segment coordinates in the original sequences. Returns ------- ndarray of int of shape (n_seqs, n_segments) Array where each value defines the start positions (index) of each segment for each sequence. Examples -------- >>> from skbio.alignment import AlignPath >>> path = AlignPath(lengths=[1, 2, 2, 1], ... states=[0, 5, 2, 6], ... starts=[0, 0, 0]) >>> path.to_coordinates() # doctest: +ELLIPSIS array([[0, 1, 1, 3, 4], [0, 1, 3, 3, 3], [0, 1, 1, 3, 3]]... """ lens = self._lengths * (1 - self.to_bits()) col0 = np.zeros((self._shape[0], 1), dtype=int) lens = np.append(col0, lens, axis=1) if self.starts.any(): return lens.cumsum(axis=1) + self.starts.reshape(-1, 1) else: return lens.cumsum(axis=1) @classonlymethod def from_coordinates(cls, coords): r"""Generate an alignment path from an array of segment coordinates. Parameters ---------- coords : array_like of int of shape (n_seqs, n_segments) Array where each value defines the start positions (index) of each segment for each sequence. Returns ------- AlignPath The alignment path created from the given coordinates. Examples -------- >>> import numpy as np >>> from skbio.alignment import AlignPath >>> coordinates = np.array([[0, 1, 1, 3, 4], ... [0, 1, 3, 3, 3], ... [0, 1, 1, 3, 3]]) >>> path = AlignPath.from_coordinates(coordinates) >>> path AlignPath Shape(sequence=3, position=6) lengths: [1 2 2 1] states: [0 5 2 6] """ starts = coords[:, 0] diff = np.diff(coords) bits = diff == 0 lens = diff[bits.argmin(axis=0), np.arange(diff.shape[1])] ints = np.packbits(bits, axis=0, bitorder="little") if ints.shape[0] == 1: ints = np.squeeze(ints, axis=0) return cls(lens, ints, starts) class PairAlignPath(AlignPath): r"""Create a pairwise alignment path from segment lengths and states. Parameters ---------- lengths : array_like of int of shape (n_segments,) Length of each segment in the alignment. states : array_like of uint8 of shape (n_segments,) Bits representing character (0) or gap (1) status per sequence per segment in the alignment. starts : array_like of (int, int), optional Start position (0-based) of each sequence in the alignment. See Also -------- skbio.sequence.Sequence skbio.alignment.TabularMSA """ def __str__(self): r"""Return string representation of this AlignPath.""" return self.__repr__() def __repr__(self): r"""Return summary of the alignment path.""" return ( f"<{self.__class__.__name__}, shape: {self._shape}, " f"CIGAR: '{self.to_cigar()}'>" ) @classonlymethod def from_bits(cls, bits): r"""Create a pairwise alignment path from a bit array. Parameters ---------- bits : array_like of 0's and 1's of shape (n_seqs, n_positions) Array of zeros (character) and ones (gap) which represent the alignment. Returns ------- PairAlignPath The pairwise alignment path of the provided bit array. """ # Ensure bits is a 2D array-like of ones and zeros. if not isinstance(bits, np.ndarray): bits = np.atleast_2d(bits) if bits.ndim != 2 or bits.shape[1] == 0: raise TypeError("Input 'bits' must be a non-empty 2D numpy array.") if not (np.logical_or(bits == 0, bits == 1).all()): raise ValueError("Input 'bits' must contain only zeros and ones.") ints = bits[0] + (bits[1] << 1) idx = np.append(0, np.where(ints[:-1] != ints[1:])[0] + 1) lens = np.append(idx[1:] - idx[:-1], ints.size - idx[-1]) return cls(lens, ints[idx], np.zeros(bits.shape[0], dtype=int)) def to_bits(self): r"""Unpack states into an array of bits.""" if not np.all(np.isin(self._states, [0, 1, 2, 3])): raise ValueError( "For pairwise alignment, `states` must only contain " "zeros, ones, twos, or threes." ) return np.stack([self._states & 1, self._states >> 1]) def to_cigar(self, seqs=None): r"""Generate a CIGAR string representing the pairwise alignment path. Parameters ---------- seqs : list of skbio.Sequence or string A pair of sequences to generate CIGAR string. If provided, will distinguish match (``=``) and mismatch (``X``). Otherwise, will uniformly note them as (mis)match (``M``). The first sequence in the list is the query sequence, the second is the reference sequence. Returns ------- str CIGAR string representing the pairwise alignment path. Examples -------- >>> from skbio.alignment import PairAlignPath >>> path = PairAlignPath(lengths=[2, 5, 3, 1], ... states=[0, 3, 2, 1], ... starts=[0, 0]) >>> path.to_cigar() '2M5P3D1I' """ cigar = [] states = np.squeeze(self._states) if seqs is not None: # test if seqs is strings or Sequence object or something else if isinstance(seqs[0], str) and isinstance(seqs[1], str): seq1 = np.frombuffer(seqs[0].encode("ascii"), dtype=np.uint8) seq2 = np.frombuffer(seqs[1].encode("ascii"), dtype=np.uint8) elif isinstance(seqs[0], Sequence) and isinstance(seqs[1], Sequence): seq1 = seqs[0]._bytes seq2 = seqs[1]._bytes else: raise TypeError("`seqs` must be strings or Sequence objects.") idx1, idx2 = self._starts for length, state in zip(self._lengths, states): if state == 0: match_arr = seq1[idx1 : idx1 + length] == seq2[idx2 : idx2 + length] char_arr = np.where(match_arr, "=", "X") n = len(char_arr) count = 1 curr_char = char_arr[0] for i in range(1, n): if char_arr[i] == curr_char: count += 1 else: cigar.append(str(count) + curr_char) curr_char = char_arr[i] count = 1 cigar.append(str(count) + curr_char) idx1 += length idx2 += length elif state == 1: cigar.append(str(length) + "I") idx2 += length elif state == 2: cigar.append(str(length) + "D") idx1 += length elif state == 3: cigar.append(str(length) + "P") return "".join(cigar) else: return "".join( f"{L}{C}" for L, C in zip(self._lengths, _cigar_codes[states]) ) @classonlymethod def from_cigar(cls, cigar, starts=None): r"""Create a pairwise alignment path from a CIGAR string. Parameters ---------- cigar : str CIGAR format string used to build the PairAlignPath. starts : array_like of (int, int), optional Start position (0-based) of each sequence in the alignment. If omitted, will set as zeros. Returns ------- PairAlignPath The pairwise alignment path created from the given CIGAR string. Examples -------- >>> from skbio.alignment import PairAlignPath >>> cigar = "2M5P3D1I" >>> path = PairAlignPath.from_cigar(cigar) >>> path """ # Make sure cigar is not empty. if not cigar: raise ValueError("CIGAR string must not be empty.") lengths = [] gaps = [] current_length = 0 no_ones = True for char in cigar: if char.isdigit(): no_ones = False current_length = current_length * 10 + int(char) elif char in _cigar_mapping: if no_ones: lengths.append(current_length + 1) else: lengths.append(current_length) gaps.append(_cigar_mapping[char]) current_length = 0 no_ones = True else: raise ValueError("CIGAR string contains invalid character(s).") lengths, gaps = _fix_arrays(lengths=np.array(lengths), gaps=np.array(gaps)) return cls(lengths, gaps, [0, 0] if starts is None else starts) def _fix_arrays(lengths, gaps): r"""Merge consecutive same values from gaps array and sum corresponding values in lengths array. Parameters ---------- lengths : array_like of int of shape (n_segments,) Length of each segment in the alignment. gaps : array_like of uint8 of shape (n_segments,) or (n_packs, n_segments) Packed bits representing character (0) or gap (1) status per sequence per segment in the alignment. """ idx = np.diff(gaps, prepend=np.array([True])) != 0 gaps_out = np.asarray(gaps[idx]) groups = np.cumsum(idx) lengths_out = np.asarray(np.bincount(groups, weights=lengths).astype(int)[1:]) return lengths_out, gaps_out def _run_length_encode(s): r"""Perform run length encoding on a string. Parameters ---------- s : str String on which to perform run length encoding. """ input_arr = np.array(list(s)) idx = np.append(0, np.where(input_arr[:-1] != input_arr[1:])[0] + 1) count = np.diff(np.concatenate((idx, [len(s)]))) unique = input_arr[idx] return "".join(str(c) + u for c, u in zip(count, unique)) scikit-bio-0.6.2/skbio/alignment/_repr.py000066400000000000000000000050321464262511300203260ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from skbio.metadata._repr import _MetadataReprBuilder class _TabularMSAReprBuilder(_MetadataReprBuilder): def __init__(self, msa, width, indent): super(_TabularMSAReprBuilder, self).__init__(msa, width, indent) self._ellipse_insert = " ... " def _process_header(self): cls_name = self._obj.__class__.__name__ if self._obj.dtype is not None: dtype_class = "[" + self._obj.dtype.__name__ + "]" else: dtype_class = "" self._lines.add_line(cls_name + dtype_class) self._lines.add_separator() def _process_data(self): num_sequences = self._obj.shape.sequence num_positions = self._obj.shape.position # catch case of all empty sequences if num_positions > 0: # display all sequences if we can, else display the first two and # last two sequences separated by ellipsis if num_sequences <= 5: self._lines.add_lines(self._format_sequences(range(num_sequences))) else: self._lines.add_lines(self._format_sequences(range(2))) self._lines.add_line("...") self._lines.add_lines( self._format_sequences(range(num_sequences - 2, num_sequences)) ) def _format_sequences(self, sequence_indices): lines = [] for line_index in sequence_indices: seq_str = str(self._obj._get_sequence_iloc_(line_index)) if len(seq_str) <= self._width: formatted_seq = seq_str else: formatted_seq = ( seq_str[0 : self._num_characters_before_ellipse()] + self._ellipse_insert + seq_str[-self._num_characters_after_ellipse() :] ) lines.append(formatted_seq) return lines def _num_characters_before_ellipse(self): return int(self._num_characters_to_display() / 2) def _num_characters_after_ellipse(self): return self._num_characters_to_display() - self._num_characters_before_ellipse() def _num_characters_to_display(self): return self._width - len(self._ellipse_insert) scikit-bio-0.6.2/skbio/alignment/_ssw_wrapper.pyx000066400000000000000000000615441464262511300221340ustar00rootroot00000000000000# ----------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ----------------------------------------------------------------------------- from cpython cimport bool import numpy as np cimport numpy as cnp from skbio.sequence import Protein, Sequence cdef extern from "_lib/ssw.h": ctypedef struct s_align: cnp.uint16_t score1 cnp.uint16_t score2 cnp.int32_t ref_begin1 cnp.int32_t ref_end1 cnp.int32_t read_begin1 cnp.int32_t read_end1 cnp.int32_t ref_end2 cnp.uint32_t* cigar cnp.int32_t cigarLen ctypedef struct s_profile: pass cdef s_profile* ssw_init(const cnp.int8_t* read, const cnp.int32_t readLen, const cnp.int8_t* mat, const cnp.int32_t n, const cnp.int8_t score_size) cdef void init_destroy(s_profile* p) cdef s_align* ssw_align(const s_profile* prof, const cnp.int8_t* ref, cnp.int32_t refLen, const cnp.uint8_t weight_gapO, const cnp.uint8_t weight_gapE, const cnp.uint8_t flag, const cnp.uint16_t filters, const cnp.int32_t filterd, const cnp.int32_t maskLen) cdef void align_destroy(s_align* a) np_aa_table = np.array([ 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 0, 20, 4, 3, 6, 13, 7, 8, 9, 23, 11, 10, 12, 2, 23, 14, 5, 1, 15, 16, 23, 19, 17, 22, 18, 21, 23, 23, 23, 23, 23, 23, 0, 20, 4, 3, 6, 13, 7, 8, 9, 23, 11, 10, 12, 2, 23, 14, 5, 1, 15, 16, 23, 19, 17, 22, 18, 21, 23, 23, 23, 23, 23]) np_nt_table = np.array([ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4]) mid_table = np.array(['M', 'I', 'D']) cdef class AlignmentStructure: """Wraps the result of an alignment c struct so it is accessible to Python Notes ----- `cigar` may be empty depending on parameters used. `target_begin` and `query_begin` may be -1 depending on parameters used. Developer note: `read_sequence` is an alias for `query_sequence` used by ssw.c as is `reference_sequence` for `target_sequence` """ cdef s_align *p cdef str read_sequence cdef str reference_sequence cdef int index_starts_at cdef str _cigar_string def __cinit__(self, read_sequence, reference_sequence, index_starts_at): # We use `read_sequence` and `reference_sequence` here as they are # treated sematically as a private output of ssw.c like the `s_align` # struct self.read_sequence = read_sequence self.reference_sequence = reference_sequence self.index_starts_at = index_starts_at cdef __constructor(self, s_align* pointer): self.p = pointer def __dealloc__(self): if self.p is not NULL: align_destroy(self.p) def __getitem__(self, key): return getattr(self, key) def __repr__(self): data = ['optimal_alignment_score', 'suboptimal_alignment_score', 'query_begin', 'query_end', 'target_begin', 'target_end_optimal', 'target_end_suboptimal', 'cigar', 'query_sequence', 'target_sequence'] return "{\n%s\n}" % ',\n'.join([ " {!r}: {!r}".format(k, self[k]) for k in data]) def __str__(self): score = "Score: %d" % self.optimal_alignment_score if self.query_sequence and self.cigar: target = self.aligned_target_sequence query = self.aligned_query_sequence align_len = len(query) if align_len > 13: target = target[:10] + "..." query = query[:10] + "..." length = "Length: %d" % align_len return "\n".join([query, target, score, length]) return score @property def optimal_alignment_score(self): """Optimal alignment score Returns ------- int The optimal alignment score """ return self.p.score1 @property def suboptimal_alignment_score(self): """Suboptimal alignment score Returns ------- int The suboptimal alignment score """ return self.p.score2 @property def target_begin(self): """Character index where the target's alignment begins Returns ------- int The character index of the target sequence's alignment's beginning Notes ----- The result is a 0-based index by default """ return self.p.ref_begin1 + self.index_starts_at if (self.p.ref_begin1 >= 0) else -1 @property def target_end_optimal(self): """Character index where the target's optimal alignment ends Returns ------- int The character index of the target sequence's optimal alignment's end Notes ----- The result is a 0-based index by default """ return self.p.ref_end1 + self.index_starts_at @property def target_end_suboptimal(self): """Character index where the target's suboptimal alignment ends Returns ------- int The character index of the target sequence's suboptimal alignment's end Notes ----- The result is a 0-based index by default """ return self.p.ref_end2 + self.index_starts_at @property def query_begin(self): """Returns the character index at which the query sequence begins Returns ------- int The character index of the query sequence beginning Notes ----- The result is a 0-based index by default """ return self.p.read_begin1 + self.index_starts_at if (self.p.read_begin1 >= 0) else -1 @property def query_end(self): """Character index at where query sequence ends Returns ------- int The character index of the query sequence ending Notes ----- The result is a 0-based index by default """ return self.p.read_end1 + self.index_starts_at @property def cigar(self): """Cigar formatted string for the optimal alignment Returns ------- str The cigar string of the optimal alignment Notes ----- The cigar string format is described in [1]_ and [2]_. If there is no cigar or optimal alignment, this will return an empty string References ---------- .. [1] http://genome.sph.umich.edu/wiki/SAM .. [2] http://samtools.github.io/hts-specs/SAMv1.pdf """ # Memoization! (1/2) if self._cigar_string is not None: return self._cigar_string cigar_list = [] for i in range(self.p.cigarLen): # stored the same as that in BAM format, # high 28 bits: length, low 4 bits: M/I/D (0/1/2) # Length, remove first 4 bits cigar_list.append(str(self.p.cigar[i] >> 4)) # M/I/D, lookup first 4 bits in the mid_table cigar_list.append(mid_table[self.p.cigar[i] & 0xf]) # Memoization! (2/2) self._cigar_string = "".join(cigar_list) return self._cigar_string @property def query_sequence(self): """Query sequence Returns ------- str The query sequence """ return self.read_sequence @property def target_sequence(self): """Target sequence Returns ------- str The target sequence """ return self.reference_sequence @property def aligned_query_sequence(self): """Returns the query sequence aligned by the cigar Returns ------- str Aligned query sequence Notes ----- This will return `None` if `suppress_sequences` was True when this object was created """ if self.query_sequence: return self._get_aligned_sequence(self.query_sequence, self._tuples_from_cigar(), self.query_begin, self.query_end, "D") return None @property def aligned_target_sequence(self): """Returns the target sequence aligned by the cigar Returns ------- str Aligned target sequence Notes ----- This will return `None` if `suppress_sequences` was True when this object was created """ if self.target_sequence: return self._get_aligned_sequence(self.target_sequence, self._tuples_from_cigar(), self.target_begin, self.target_end_optimal, "I") return None def set_zero_based(self, is_zero_based): """Set the aligment indices to start at 0 if True else 1 if False """ if is_zero_based: self.index_starts_at = 0 else: self.index_starts_at = 1 def is_zero_based(self): """Returns True if alignment inidices start at 0 else False Returns ------- bool Whether the alignment inidices start at 0 """ return self.index_starts_at == 0 def _get_aligned_sequence(self, sequence, tuple_cigar, begin, end, gap_type): # Save the original index scheme and then set it to 0 (1/2) orig_z_base = self.is_zero_based() self.set_zero_based(True) aligned_sequence = [] seq = sequence[begin:end + 1] index = 0 for length, mid in tuple_cigar: if mid == gap_type: aligned_sequence += ['-' * length] else: aligned_sequence += [seq[index:index + length]] index += length # Our sequence end is sometimes beyond the cigar: aligned_sequence += [seq[index:end - begin + 1]] # Revert our index scheme to the original (2/2) self.set_zero_based(orig_z_base) return "".join(aligned_sequence) def _tuples_from_cigar(self): tuples = [] length_stack = [] for character in self.cigar: if character.isdigit(): length_stack.append(character) else: tuples.append((int("".join(length_stack)), character)) length_stack = [] return tuples cdef class StripedSmithWaterman: """Performs a striped (banded) Smith Waterman Alignment. First a StripedSmithWaterman object must be instantiated with a query sequence. The resulting object is then callable with a target sequence and may be reused on a large collection of target sequences. Parameters ---------- query_sequence : string The query sequence, this may be upper or lowercase from the set of {A, C, G, T, N} (nucleotide) or from the set of {A, R, N, D, C, Q, E, G, H, I, L, K, M, F, P, S, T, W, Y, V, B, Z, X, * } (protein) gap_open_penalty : int, optional The penalty applied to creating a gap in the alignment. This CANNOT be 0. Default is 5. gap_extend_penalty : int, optional The penalty applied to extending a gap in the alignment. This CANNOT be 0. Default is 2. score_size : int, optional If your estimated best alignment score is < 255 this should be 0. If your estimated best alignment score is >= 255, this should be 1. If you don't know, this should be 2. Default is 2. mask_length : int, optional The distance between the optimal and suboptimal alignment ending position >= mask_length. We suggest to use len(query_sequence)/2, if you don't have special concerns. Detailed description of mask_length: After locating the optimal alignment ending position, the suboptimal alignment score can be heuristically found by checking the second largest score in the array that contains the maximal score of each column of the SW matrix. In order to avoid picking the scores that belong to the alignments sharing the partial best alignment, SSW C library masks the reference loci nearby (mask length = mask_length) the best alignment ending position and locates the second largest score from the unmasked elements. Default is 15. mask_auto : bool, optional This will automatically set the used mask length to be max(int(len(`query_sequence`)/2), `mask_length`). Default is True. score_only : bool, optional This will prevent the best alignment beginning positions (BABP) and the cigar from being returned as a result. This overrides any setting on `score_filter`, `distance_filter`, and `override_skip_babp`. It has the highest precedence. Default is False. score_filter : int, optional If set, this will prevent the cigar and best alignment beginning positions (BABP) from being returned if the optimal alignment score is less than `score_filter` saving some time computationally. This filter may be overridden by `score_only` (prevents BABP and cigar, regardless of other arguments), `distance_filter` (may prevent cigar, but will cause BABP to be calculated), and `override_skip_babp` (will ensure BABP) returned. Default is None. distance_filter : int, optional If set, this will prevent the cigar from being returned if the length of the `query_sequence` or the `target_sequence` is less than `distance_filter` saving some time computationally. The results of this filter may be overridden by `score_only` (prevents BABP and cigar, regardless of other arguments), and `score_filter` (may prevent cigar). `override_skip_babp` has no effect with this filter applied, as BABP must be calculated to perform the filter. Default is None. override_skip_babp : bool, optional When True, the best alignment beginning positions (BABP) will always be returned unless `score_only` is set to True. Default is False. protein : bool, optional When True, the `query_sequence` and `target_sequence` will be read as protein sequence. When False, the `query_sequence` and `target_sequence` will be read as nucleotide sequence. If True, a `substitution_matrix` must be supplied. Default is False. match_score : int, optional When using a nucleotide sequence, the match_score is the score added when a match occurs. This is ignored if `substitution_matrix` is provided. Default is 2. mismatch_score : int, optional When using a nucleotide sequence, the mismatch is the score subtracted when a mismatch occurs. This should be a negative integer. This is ignored if `substitution_matrix` is provided. Default is -3. substitution_matrix : 2D dict, optional Provides the score for each possible substitution of sequence characters. This may be used for protein or nucleotide sequences. The entire set of possible combinations for the relevant sequence type MUST be enumerated in the dict of dicts. This will override `match_score` and `mismatch_score`. Required when `protein` is True. Default is None. suppress_sequences : bool, optional If True, the query and target sequences will not be returned for convenience. Default is False. zero_index : bool, optional If True, all inidices will start at 0. If False, all inidices will start at 1. Default is True. Notes ----- This is a wrapper for the SSW package [1]_. `mask_length` has to be >= 15, otherwise the suboptimal alignment information will NOT be returned. `match_score` is a positive integer and `mismatch_score` is a negative integer. `match_score` and `mismatch_score` are only meaningful in the context of nucleotide sequences. A substitution matrix must be provided when working with protein sequences. References ---------- .. [1] Zhao, Mengyao, Wan-Ping Lee, Erik P. Garrison, & Gabor T. Marth. "SSW Library: An SIMD Smith-Waterman C/C++ Library for Applications". PLOS ONE (2013). Web. 11 July 2014. http://www.plosone.org/article/info:doi/10.1371/journal.pone.0082138 """ cdef s_profile *profile cdef cnp.uint8_t gap_open_penalty cdef cnp.uint8_t gap_extend_penalty cdef cnp.uint8_t bit_flag cdef cnp.uint16_t score_filter cdef cnp.int32_t distance_filter cdef cnp.int32_t mask_length cdef str read_sequence cdef int index_starts_at cdef bool is_protein cdef bool suppress_sequences cdef cnp.ndarray __KEEP_IT_IN_SCOPE_read cdef cnp.ndarray __KEEP_IT_IN_SCOPE_matrix def __cinit__(self, query_sequence, gap_open_penalty=5, # BLASTN Default gap_extend_penalty=2, # BLASTN Default score_size=2, # BLASTN Default mask_length=15, # Minimum length for a suboptimal alignment mask_auto=True, score_only=False, score_filter=None, distance_filter=None, override_skip_babp=False, protein=False, match_score=2, # BLASTN Default mismatch_score=-3, # BLASTN Default substitution_matrix=None, suppress_sequences=False, zero_index=True): # initialize our values self.read_sequence = query_sequence if gap_open_penalty <= 0: raise ValueError("`gap_open_penalty` must be > 0") self.gap_open_penalty = gap_open_penalty if gap_extend_penalty <= 0: raise ValueError("`gap_extend_penalty` must be > 0") self.gap_extend_penalty = gap_extend_penalty self.distance_filter = 0 if distance_filter is None else \ distance_filter self.score_filter = 0 if score_filter is None else score_filter self.suppress_sequences = suppress_sequences self.is_protein = protein self.bit_flag = self._get_bit_flag(override_skip_babp, score_only) # http://www.cs.utexas.edu/users/EWD/transcriptions/EWD08xx/EWD831.html # Dijkstra knows what's up: self.index_starts_at = 0 if zero_index else 1 # set up our matrix cdef cnp.ndarray[cnp.int8_t, ndim = 1, mode = "c"] matrix if substitution_matrix is None: if protein: raise Exception("Must provide a substitution matrix for" " protein sequences") matrix = self._build_match_matrix(match_score, mismatch_score) else: matrix = self._convert_dict2d_to_matrix(substitution_matrix) # Set up our mask_length # Mask is recommended to be max(query_sequence/2, 15) if mask_auto: self.mask_length = len(query_sequence) // 2 if self.mask_length < mask_length: self.mask_length = mask_length else: self.mask_length = mask_length cdef cnp.ndarray[cnp.int8_t, ndim = 1, mode = "c"] read_seq read_seq = self._seq_converter(query_sequence) cdef cnp.int32_t read_length read_length = len(query_sequence) cdef cnp.int8_t s_size s_size = score_size cdef cnp.int32_t m_width m_width = 24 if self.is_protein else 5 cdef s_profile* p self.profile = ssw_init( read_seq.data, read_length, matrix.data, m_width, s_size) # A hack to keep the python GC from eating our data self.__KEEP_IT_IN_SCOPE_read = read_seq self.__KEEP_IT_IN_SCOPE_matrix = matrix def __call__(self, target_sequence): """Align `target_sequence` to `query_sequence` Parameters ---------- target_sequence : str Returns ------- skbio.alignment.AlignmentStructure The resulting alignment. """ reference_sequence = target_sequence cdef cnp.ndarray[cnp.int8_t, ndim = 1, mode = "c"] reference reference = self._seq_converter(reference_sequence) cdef cnp.int32_t ref_length ref_length = len(reference_sequence) cdef s_align *align align = ssw_align(self.profile, reference.data, ref_length, self.gap_open_penalty, self.gap_extend_penalty, self.bit_flag, self.score_filter, self.distance_filter, self.mask_length) # Cython won't let me do this correctly, so duplicate code ahoy: if self.suppress_sequences: alignment = AlignmentStructure("", "", self.index_starts_at) else: alignment = AlignmentStructure(self.read_sequence, reference_sequence, self.index_starts_at) alignment.__constructor(align) # Hack to get a pointer through return alignment def __dealloc__(self): if self.profile is not NULL: init_destroy(self.profile) def _get_bit_flag(self, override_skip_babp, score_only): bit_flag = 0 if score_only: return bit_flag if override_skip_babp: bit_flag = bit_flag | 0x8 if self.distance_filter != 0: bit_flag = bit_flag | 0x4 if self.score_filter != 0: bit_flag = bit_flag | 0x2 if bit_flag == 0 or bit_flag == 8: bit_flag = bit_flag | 0x1 return bit_flag cdef cnp.ndarray[cnp.int8_t, ndim = 1, mode = "c"] _seq_converter( self, sequence): cdef cnp.ndarray[cnp.int8_t, ndim = 1, mode = "c"] seq seq = np.empty(len(sequence), dtype=np.int8) if self.is_protein: for i, char in enumerate(sequence): seq[i] = np_aa_table[ord(char)] else: for i, char in enumerate(sequence): seq[i] = np_nt_table[ord(char)] return seq cdef cnp.ndarray[cnp.int8_t, ndim = 1, mode = "c"] \ _build_match_matrix(self, match_score, mismatch_score): sequence_order = "ACGTN" dict2d = {} for row in sequence_order: dict2d[row] = {} for column in sequence_order: if column == 'N' or row == 'N': dict2d[row][column] = 0 else: dict2d[row][column] = match_score if row == column \ else mismatch_score return self._convert_dict2d_to_matrix(dict2d) cdef cnp.ndarray[cnp.int8_t, ndim = 1, mode = "c"] \ _convert_dict2d_to_matrix(self, dict2d): if self.is_protein: sequence_order = "ARNDCQEGHILKMFPSTWYVBZX*" else: sequence_order = "ACGTN" cdef int i = 0 length = len(sequence_order) cdef cnp.ndarray[cnp.int8_t, ndim = 1, mode = "c"] py_list_matrix = \ np.empty(length*length, dtype=np.int8) for row in sequence_order: for column in sequence_order: py_list_matrix[i] = dict2d[row][column] i += 1 return py_list_matrix scikit-bio-0.6.2/skbio/alignment/_tabular_msa.py000066400000000000000000002364331464262511300216630ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import collections import copy import numpy as np import pandas as pd import scipy.stats from skbio._base import SkbioObject from skbio.metadata._mixin import MetadataMixin, PositionalMetadataMixin from skbio.sequence import Sequence from skbio.sequence._grammared_sequence import GrammaredSequence from skbio.util._decorator import classonlymethod, overrides from skbio.util._misc import resolve_key from skbio.alignment._indexing import TabularMSAILoc, TabularMSALoc from skbio.alignment._repr import _TabularMSAReprBuilder _Shape = collections.namedtuple("Shape", ["sequence", "position"]) class TabularMSA(MetadataMixin, PositionalMetadataMixin, SkbioObject): """Store a multiple sequence alignment in tabular (row/column) form. Parameters ---------- sequences : iterable of GrammaredSequence, TabularMSA Aligned sequences in the MSA. Sequences must all be the same type and length. For example, `sequences` could be an iterable of ``DNA``, ``RNA``, or ``Protein`` sequences. If `sequences` is a ``TabularMSA``, its `metadata`, `positional_metadata`, and `index` will be used unless overridden by parameters `metadata`, `positional_metadata`, and `minter`/`index`, respectively. metadata : dict, optional Arbitrary metadata which applies to the entire MSA. A shallow copy of the ``dict`` will be made. positional_metadata : pd.DataFrame consumable, optional Arbitrary metadata which applies to each position in the MSA. Must be able to be passed directly to ``pd.DataFrame`` constructor. Each column of metadata must be the same length as the number of positions in the MSA. A shallow copy of the positional metadata will be made. minter : callable or metadata key, optional If provided, defines an index label for each sequence in `sequences`. Can either be a callable accepting a single argument (each sequence) or a key into each sequence's ``metadata`` attribute. Note that `minter` cannot be combined with `index`. index : pd.Index consumable, optional Index containing labels for `sequences`. Must be the same length as `sequences`. Must be able to be passed directly to ``pd.Index`` constructor. Note that `index` cannot be combined with `minter` and the contents of `index` must be hashable. Raises ------ ValueError If `minter` and `index` are both provided. ValueError If `index` is not the same length as `sequences`. TypeError If `sequences` contains an object that isn't a ``GrammaredSequence``. TypeError If `sequences` does not contain exactly the same type of ``GrammaredSequence`` objects. ValueError If `sequences` does not contain ``GrammaredSequence`` objects of the same length. See Also -------- skbio.sequence.DNA skbio.sequence.RNA skbio.sequence.Protein pandas.DataFrame pandas.Index reassign_index Notes ----- If neither `minter` nor `index` are provided, default index labels will be used: ``pd.RangeIndex(start=0, stop=len(sequences), step=1)``. Examples -------- Create a ``TabularMSA`` object with three DNA sequences and four positions: >>> from skbio import DNA, TabularMSA >>> seqs = [ ... DNA('ACGT'), ... DNA('AG-T'), ... DNA('-C-T') ... ] >>> msa = TabularMSA(seqs) >>> msa TabularMSA[DNA] --------------------- Stats: sequence count: 3 position count: 4 --------------------- ACGT AG-T -C-T Since `minter` or `index` wasn't provided, the MSA has default index labels: >>> msa.index RangeIndex(start=0, stop=3, step=1) Create an MSA with metadata, positional metadata, and non-default index labels: >>> msa = TabularMSA(seqs, index=['seq1', 'seq2', 'seq3'], ... metadata={'id': 'msa-id'}, ... positional_metadata={'prob': [3, 4, 2, 2]}) >>> msa TabularMSA[DNA] -------------------------- Metadata: 'id': 'msa-id' Positional metadata: 'prob': Stats: sequence count: 3 position count: 4 -------------------------- ACGT AG-T -C-T >>> msa.index Index(['seq1', 'seq2', 'seq3'], dtype='object') """ default_write_format = "fasta" __hash__ = None @property def dtype(self): """Data type of the stored sequences. Notes ----- This property is not writeable. Examples -------- >>> from skbio import DNA, TabularMSA >>> msa = TabularMSA([DNA('ACG'), DNA('AC-')]) >>> msa.dtype >>> msa.dtype is DNA True """ return type(self._get_sequence_iloc_(0)) if len(self) > 0 else None @property def shape(self): """Number of sequences (rows) and positions (columns). Notes ----- This property is not writeable. Examples -------- >>> from skbio import DNA, TabularMSA Create a ``TabularMSA`` object with 2 sequences and 3 positions: >>> msa = TabularMSA([DNA('ACG'), DNA('AC-')]) >>> msa.shape Shape(sequence=2, position=3) >>> msa.shape == (2, 3) True Dimensions can be accessed by index or by name: >>> msa.shape[0] 2 >>> msa.shape.sequence 2 >>> msa.shape[1] 3 >>> msa.shape.position 3 """ sequence_count = len(self) if sequence_count > 0: position_count = len(self._get_sequence_iloc_(0)) else: position_count = 0 return _Shape(sequence=sequence_count, position=position_count) @property def index(self): """Index containing labels along the sequence axis. Returns ------- pd.Index Index containing sequence labels. See Also -------- reassign_index Notes ----- This property can be set and deleted. Deleting the index will reset the index to the ``TabularMSA`` constructor's default. Examples -------- Create a ``TabularMSA`` object with sequences labeled by sequence identifier: >>> from skbio import DNA, TabularMSA >>> seqs = [DNA('ACG', metadata={'id': 'a'}), ... DNA('AC-', metadata={'id': 'b'}), ... DNA('AC-', metadata={'id': 'c'})] >>> msa = TabularMSA(seqs, minter='id') Retrieve index: >>> msa.index Index(['a', 'b', 'c'], dtype='object') Set index: >>> msa.index = ['seq1', 'seq2', 'seq3'] >>> msa.index Index(['seq1', 'seq2', 'seq3'], dtype='object') Deleting the index resets it to the ``TabularMSA`` constructor's default: >>> del msa.index >>> msa.index RangeIndex(start=0, stop=3, step=1) """ return self._seqs.index @index.setter def index(self, index): # Cast to Index to identify tuples as a MultiIndex to match # pandas constructor. Just setting would make an index of tuples. if not isinstance(index, pd.Index): index = pd.Index(index) self._seqs.index = index @index.deleter def index(self): # Create a memory-efficient integer index as the default MSA index. self._seqs.index = pd.RangeIndex(start=0, stop=len(self), step=1) @property def loc(self): """Slice the MSA on first axis by index label, second axis by position. This will return an object with the following interface: .. code-block:: python msa.loc[seq_idx] msa.loc[seq_idx, pos_idx] msa.loc(axis='sequence')[seq_idx] msa.loc(axis='position')[pos_idx] Parameters ---------- seq_idx : label, slice, 1D array_like (bool or label) Slice the first axis of the MSA. When this value is a scalar, a sequence of ``msa.dtype`` will be returned. This may be further sliced by `pos_idx`. pos_idx : (same as seq_idx), optional Slice the second axis of the MSA. When this value is a scalar, a sequence of type :class:`skbio.sequence.Sequence` will be returned. This represents a column of the MSA and may have been additionally sliced by `seq_idx`. axis : {'sequence', 'position', 0, 1, None}, optional Limit the axis to slice on. When set, a tuple as the argument will no longer be split into `seq_idx` and `pos_idx`. Returns ------- TabularMSA, GrammaredSequence, Sequence A ``TabularMSA`` is returned when `seq_idx` and `pos_idx` are non-scalars. A ``GrammaredSequence`` of type ``msa.dtype`` is returned when `seq_idx` is a scalar (this object will match the dtype of the MSA). A ``Sequence`` is returned when `seq_idx` is non-scalar and `pos_idx` is scalar. See Also -------- iloc __getitem__ Notes ----- If the slice operation results in a ``TabularMSA`` without any sequences, the MSA's ``positional_metadata`` will be unset. When the MSA's index is a ``pd.MultiIndex`` a tuple may be given to `seq_idx` to indicate the slicing operations to perform on each component index. Examples -------- First we need to set up an MSA to slice: >>> from skbio import TabularMSA, DNA >>> msa = TabularMSA([DNA("ACGT"), DNA("A-GT"), DNA("AC-T"), ... DNA("ACGA")], index=['a', 'b', 'c', 'd']) >>> msa TabularMSA[DNA] --------------------- Stats: sequence count: 4 position count: 4 --------------------- ACGT A-GT AC-T ACGA >>> msa.index Index(['a', 'b', 'c', 'd'], dtype='object') When we slice by a scalar we get the original sequence back out of the MSA: >>> msa.loc['b'] DNA -------------------------- Stats: length: 4 has gaps: True has degenerates: False has definites: True GC-content: 33.33% -------------------------- 0 A-GT Similarly when we slice the second axis by a scalar we get a column of the MSA: >>> msa.loc[..., 1] Sequence ------------- Stats: length: 4 ------------- 0 C-CC Note: we return an ``skbio.Sequence`` object because the column of an alignment has no biological meaning and many operations defined for the MSA's sequence `dtype` would be meaningless. When we slice both axes by a scalar, operations are applied left to right: >>> msa.loc['a', 0] DNA -------------------------- Stats: length: 1 has gaps: False has degenerates: False has definites: True GC-content: 0.00% -------------------------- 0 A In other words, it exactly matches slicing the resulting sequence object directly: >>> msa.loc['a'][0] DNA -------------------------- Stats: length: 1 has gaps: False has degenerates: False has definites: True GC-content: 0.00% -------------------------- 0 A When our slice is non-scalar we get back an MSA of the same `dtype`: >>> msa.loc[['a', 'c']] TabularMSA[DNA] --------------------- Stats: sequence count: 2 position count: 4 --------------------- ACGT AC-T We can similarly slice out a column of that: >>> msa.loc[['a', 'c'], 2] Sequence ------------- Stats: length: 2 ------------- 0 G- Slice syntax works as well: >>> msa.loc[:'c'] TabularMSA[DNA] --------------------- Stats: sequence count: 3 position count: 4 --------------------- ACGT A-GT AC-T Notice how the end label is included in the results. This is different from how positional slices behave: >>> msa.loc[[True, False, False, True], 2:3] TabularMSA[DNA] --------------------- Stats: sequence count: 2 position count: 1 --------------------- G G Here we sliced the first axis by a boolean vector, but then restricted the columns to a single column. Because the second axis was given a nonscalar we still recieve an MSA even though only one column is present. Duplicate labels can be an unfortunate reality in the real world, however `loc` is capable of handling this: >>> msa.index = ['a', 'a', 'b', 'c'] Notice how the label 'a' happens twice. If we were to access 'a' we get back an MSA with both sequences: >>> msa.loc['a'] TabularMSA[DNA] --------------------- Stats: sequence count: 2 position count: 4 --------------------- ACGT A-GT Remember that `iloc` can always be used to differentiate sequences with duplicate labels. More advanced slicing patterns are possible with different index types. Let's use a `pd.MultiIndex`: >>> msa.index = [('a', 0), ('a', 1), ('b', 0), ('b', 1)] Here we will explicitly set the axis that we are slicing by to make things easier to read: >>> msa.loc(axis='sequence')['a', 0] DNA -------------------------- Stats: length: 4 has gaps: False has degenerates: False has definites: True GC-content: 50.00% -------------------------- 0 ACGT This selected the first sequence because the complete label was provided. In other words `('a', 0)` was treated as a scalar for this index. We can also slice along the component indices of the multi-index: >>> msa.loc(axis='sequence')[:, 1] TabularMSA[DNA] --------------------- Stats: sequence count: 2 position count: 4 --------------------- A-GT ACGA If we were to do that again without the `axis` argument, it would look like this: >>> msa.loc[(slice(None), 1), ...] TabularMSA[DNA] --------------------- Stats: sequence count: 2 position count: 4 --------------------- A-GT ACGA Notice how we needed to specify the second axis. If we had left that out we would have simply gotten the 2nd column back instead. We also lost the syntactic sugar for slice objects. These are a few of the reasons specifying the `axis` preemptively can be useful. """ return self._loc @property def iloc(self): """Slice the MSA on either axis by index position. This will return an object with the following interface: .. code-block:: python msa.iloc[seq_idx] msa.iloc[seq_idx, pos_idx] msa.iloc(axis='sequence')[seq_idx] msa.iloc(axis='position')[pos_idx] Parameters ---------- seq_idx : int, slice, iterable (int and slice), 1D array_like (bool) Slice the first axis of the MSA. When this value is a scalar, a sequence of ``msa.dtype`` will be returned. This may be further sliced by `pos_idx`. pos_idx : (same as seq_idx), optional Slice the second axis of the MSA. When this value is a scalar, a sequence of type :class:`skbio.sequence.Sequence` will be returned. This represents a column of the MSA and may have been additionally sliced by `seq_idx`. axis : {'sequence', 'position', 0, 1, None}, optional Limit the axis to slice on. When set, a tuple as the argument will no longer be split into `seq_idx` and `pos_idx`. Returns ------- TabularMSA, GrammaredSequence, Sequence A ``TabularMSA`` is returned when `seq_idx` and `pos_idx` are non-scalars. A ``GrammaredSequence`` of type ``msa.dtype`` is returned when `seq_idx` is a scalar (this object will match the dtype of the MSA). A ``Sequence`` is returned when `seq_idx` is non-scalar and `pos_idx` is scalar. See Also -------- __getitem__ loc Notes ----- If the slice operation results in a ``TabularMSA`` without any sequences, the MSA's ``positional_metadata`` will be unset. Examples -------- First we need to set up an MSA to slice: >>> from skbio import TabularMSA, DNA >>> msa = TabularMSA([DNA("ACGT"), DNA("A-GT"), DNA("AC-T"), ... DNA("ACGA")]) >>> msa TabularMSA[DNA] --------------------- Stats: sequence count: 4 position count: 4 --------------------- ACGT A-GT AC-T ACGA When we slice by a scalar we get the original sequence back out of the MSA: >>> msa.iloc[1] DNA -------------------------- Stats: length: 4 has gaps: True has degenerates: False has definites: True GC-content: 33.33% -------------------------- 0 A-GT Similarly when we slice the second axis by a scalar we get a column of the MSA: >>> msa.iloc[..., 1] Sequence ------------- Stats: length: 4 ------------- 0 C-CC Note: we return an ``skbio.Sequence`` object because the column of an alignment has no biological meaning and many operations defined for the MSA's sequence `dtype` would be meaningless. When we slice both axes by a scalar, operations are applied left to right: >>> msa.iloc[0, 0] DNA -------------------------- Stats: length: 1 has gaps: False has degenerates: False has definites: True GC-content: 0.00% -------------------------- 0 A In other words, it exactly matches slicing the resulting sequence object directly: >>> msa.iloc[0][0] DNA -------------------------- Stats: length: 1 has gaps: False has degenerates: False has definites: True GC-content: 0.00% -------------------------- 0 A When our slice is non-scalar we get back an MSA of the same `dtype`: >>> msa.iloc[[0, 2]] TabularMSA[DNA] --------------------- Stats: sequence count: 2 position count: 4 --------------------- ACGT AC-T We can similarly slice out a column of that: >>> msa.iloc[[0, 2], 2] Sequence ------------- Stats: length: 2 ------------- 0 G- Slice syntax works as well: >>> msa.iloc[:3] TabularMSA[DNA] --------------------- Stats: sequence count: 3 position count: 4 --------------------- ACGT A-GT AC-T We can also use boolean vectors: >>> msa.iloc[[True, False, False, True], 2:3] TabularMSA[DNA] --------------------- Stats: sequence count: 2 position count: 1 --------------------- G G Here we sliced the first axis by a boolean vector, but then restricted the columns to a single column. Because the second axis was given a nonscalar we still recieve an MSA even though only one column is present. """ return self._iloc @classonlymethod def from_dict(cls, dictionary): """Create a ``TabularMSA`` from a ``dict``. Parameters ---------- dictionary : dict Dictionary mapping keys to ``GrammaredSequence`` sequence objects. The ``TabularMSA`` object will have its index labels set to the keys in the dictionary. Returns ------- TabularMSA ``TabularMSA`` object constructed from the keys and sequences in `dictionary`. See Also -------- to_dict sort Notes ----- The order of sequences and index labels in the resulting ``TabularMSA`` object is arbitrary. Use ``TabularMSA.sort`` to set a different order. Examples -------- >>> from skbio import DNA, TabularMSA >>> seqs = {'a': DNA('ACGT'), 'b': DNA('A--T')} >>> msa = TabularMSA.from_dict(seqs) >>> msa.shape Shape(sequence=2, position=4) >>> 'a' in msa True >>> 'b' in msa True """ # Python 3 guarantees same order of iteration as long as no # modifications are made to the dictionary between calls: # https://docs.python.org/3/library/stdtypes.html# # dictionary-view-objects return cls(dictionary.values(), index=dictionary.keys()) def __init__( self, sequences, metadata=None, positional_metadata=None, minter=None, index=None, ): if isinstance(sequences, TabularMSA): if metadata is None and sequences.has_metadata(): metadata = sequences.metadata if positional_metadata is None and sequences.has_positional_metadata(): positional_metadata = sequences.positional_metadata if minter is None and index is None: index = sequences.index # Give a better error message than the one raised by `extend` (it # references `reset_index`, which isn't a constructor parameter). if minter is not None and index is not None: raise ValueError("Cannot use both `minter` and `index` at the same time.") self._seqs = pd.Series([], dtype=object) self.extend( sequences, minter=minter, index=index, reset_index=minter is None and index is None, ) MetadataMixin._init_(self, metadata=metadata) PositionalMetadataMixin._init_(self, positional_metadata=positional_metadata) # Set up our indexers self._loc = TabularMSALoc(self) self._iloc = TabularMSAILoc(self) def _constructor_( self, sequences=NotImplemented, metadata=NotImplemented, positional_metadata=NotImplemented, index=NotImplemented, ): """Return new copy of the MSA with overridden properties. NotImplemented is used as a sentinel so that None may be used to override values. """ if metadata is NotImplemented: if self.has_metadata(): metadata = self.metadata else: metadata = None if positional_metadata is NotImplemented: if self.has_positional_metadata(): positional_metadata = self.positional_metadata else: positional_metadata = None if index is NotImplemented: if isinstance(sequences, pd.Series): index = sequences.index else: index = self.index if sequences is NotImplemented: sequences = self._seqs sequences = [copy.copy(s) for s in sequences] return self.__class__( sequences, metadata=metadata, positional_metadata=positional_metadata, index=index, ) def __repr__(self): """Return string summary of this MSA.""" pep8_line_length_limit = 79 length_taken_by_docstring_indent = 8 width = pep8_line_length_limit - length_taken_by_docstring_indent return _TabularMSAReprBuilder(msa=self, width=width, indent=4).build() def _repr_stats(self): return [ ("sequence count", str(self.shape.sequence)), ("position count", str(self.shape.position)), ] def __bool__(self): """Boolean indicating whether the MSA is empty or not. Returns ------- bool ``False`` if there are no sequences, OR if there are no positions (i.e., all sequences are empty). ``True`` otherwise. Examples -------- >>> from skbio import DNA, TabularMSA MSA with sequences and positions: >>> msa = TabularMSA([DNA('ACG'), DNA('AC-')]) >>> bool(msa) True No sequences: >>> msa = TabularMSA([]) >>> bool(msa) False No positions: >>> msa = TabularMSA([DNA(''), DNA('')]) >>> bool(msa) False """ # It is impossible to have 0 sequences and >0 positions. # TODO: change for #1198 return self.shape.position > 0 def __contains__(self, label): """Determine if an index label is in this MSA. Parameters ---------- label : hashable Label to search for in this MSA. Returns ------- bool Indicates whether `label` is in this MSA. Examples -------- >>> from skbio import DNA, TabularMSA >>> msa = TabularMSA([DNA('ACG'), DNA('AC-')], index=['l1', 'l2']) >>> 'l1' in msa True >>> 'l2' in msa True >>> 'l3' in msa False """ return label in self.index def __len__(self): """Return number of sequences in the MSA. Returns ------- int Number of sequences in the MSA (i.e., size of the 1st dimension). Notes ----- This is equivalent to ``msa.shape[0]``. Examples -------- >>> from skbio import DNA, TabularMSA >>> msa = TabularMSA([DNA('ACG'), DNA('AC-')]) >>> len(msa) 2 >>> msa = TabularMSA([]) >>> len(msa) 0 """ return len(self._seqs) def __iter__(self): """Iterate over sequences in the MSA. Yields ------ GrammaredSequence Each sequence in the order they are stored in the MSA. Examples -------- >>> from skbio import DNA, TabularMSA >>> msa = TabularMSA([DNA('ACG'), DNA('AC-')]) >>> for seq in msa: ... str(seq) 'ACG' 'AC-' """ return iter(self._seqs) def __reversed__(self): """Iterate in reverse order over sequences in the MSA. Yields ------ GrammaredSequence Each sequence in reverse order from how they are stored in the MSA. Examples -------- >>> from skbio import DNA, TabularMSA >>> msa = TabularMSA([DNA('ACG'), DNA('AC-')]) >>> for seq in reversed(msa): ... str(seq) 'AC-' 'ACG' """ return reversed(self._seqs) def __str__(self): """Return string summary of this MSA.""" return self.__repr__() def __eq__(self, other): """Determine if this MSA is equal to another. ``TabularMSA`` objects are equal if their sequences, index, metadata, and positional metadata are equal. Parameters ---------- other : TabularMSA MSA to test for equality against. Returns ------- bool Indicates whether this MSA is equal to `other`. Examples -------- >>> from skbio import DNA, RNA, TabularMSA >>> msa = TabularMSA([DNA('ACG'), DNA('AC-')]) >>> msa == msa True MSAs with different sequence characters are not equal: >>> msa == TabularMSA([DNA('ACG'), DNA('--G')]) False MSAs with different types of sequences (different ``dtype``) are not equal: >>> msa == TabularMSA([RNA('ACG'), RNA('AC-')]) False MSAs with different sequence metadata are not equal: >>> msa == TabularMSA([DNA('ACG', metadata={'id': 'a'}), DNA('AC-')]) False MSAs with different index labels are not equal: >>> msa == TabularMSA([DNA('ACG'), DNA('AC-')], minter=str) False MSAs with different metadata are not equal: >>> msa == TabularMSA([DNA('ACG'), DNA('AC-')], ... metadata={'id': 'msa-id'}) False MSAs with different positional metadata are not equal: >>> msa == TabularMSA([DNA('ACG'), DNA('AC-')], ... positional_metadata={'prob': [3, 2, 1]}) False """ if not isinstance(other, TabularMSA): return False if not MetadataMixin._eq_(self, other): return False if not PositionalMetadataMixin._eq_(self, other): return False return self._seqs.equals(other._seqs) def __ne__(self, other): """Determine if this MSA is not equal to another. ``TabularMSA`` objects are not equal if their sequences, index, metadata, or positional metadata are not equal. Parameters ---------- other : TabularMSA MSA to test for inequality against. Returns ------- bool Indicates whether this MSA is not equal to `other`. See Also -------- __eq__ """ return not (self == other) def __copy__(self): """Return a shallow copy of this MSA. Returns ------- TabularMSA Shallow copy of this MSA. Sequence objects will be shallow-copied. See Also -------- __deepcopy__ Examples -------- >>> import copy >>> from skbio import DNA, TabularMSA >>> msa = TabularMSA([DNA('ACG'), DNA('AC-')]) >>> msa_copy = copy.copy(msa) >>> msa_copy == msa True >>> msa_copy is msa False """ msa_copy = self._constructor_() msa_copy._metadata = MetadataMixin._copy_(self) msa_copy._positional_metadata = PositionalMetadataMixin._copy_(self) return msa_copy def __deepcopy__(self, memo): """Return a deep copy of this MSA. Returns ------- TabularMSA Deep copy of this MSA. Sequence objects will be deep-copied. See Also -------- __copy__ Examples -------- >>> import copy >>> from skbio import DNA, TabularMSA >>> msa = TabularMSA([DNA('ACG'), DNA('AC-')]) >>> msa_copy = copy.deepcopy(msa) >>> msa_copy == msa True >>> msa_copy is msa False """ seqs = (copy.deepcopy(seq, memo) for seq in self._seqs) msa_copy = self._constructor_(sequences=seqs) msa_copy._metadata = MetadataMixin._deepcopy_(self, memo) msa_copy._positional_metadata = PositionalMetadataMixin._deepcopy_(self, memo) return msa_copy def __getitem__(self, indexable): """Slice the MSA on either axis. This is a pass-through for :func:`skbio.alignment.TabularMSA.iloc`. Please refer to the associated documentation. See Also -------- iloc loc Notes ----- Axis restriction is not possible for this method. To slice by labels, use ``loc``. """ return self.iloc[indexable] # Helpers for TabularMSAILoc and TabularMSALoc def _get_sequence_iloc_(self, i): return self._seqs.iloc[i] def _slice_sequences_iloc_(self, i): new_seqs = self._seqs.iloc[i] # TODO: change for #1198 if len(new_seqs) == 0: return self._constructor_(new_seqs, positional_metadata=None) return self._constructor_(new_seqs) def _get_sequence_loc_(self, ids): new_seqs = self._seqs.loc[ids] if type(new_seqs) is self.dtype: return new_seqs else: # Thanks CategoricalIndex, you understand no such thing as a scalar if len(new_seqs) == 1: return new_seqs.iloc[0] else: # This was a common failure mode; shouldn't happen anymore, but # it could strike again. raise AssertionError( "Something went wrong with the index %r provided to" " `_get_sequence_loc_`, please report this stack trace to" "\nhttps://github.com/scikit-bio/scikit-bio/issues" % ids ) def _slice_sequences_loc_(self, ids): new_seqs = self._seqs.loc[ids] try: # TODO: change for #1198 if len(new_seqs) == 0: return self._constructor_(new_seqs, positional_metadata=None) return self._constructor_(new_seqs) except TypeError: # NaN hit the constructor, key was bad... probably raise KeyError("Part of `%r` was not in the index." % ids) def _get_position_(self, i, ignore_metadata=False): if ignore_metadata: return Sequence("".join([str(s[i]) for s in self._seqs])) seq = Sequence.concat([s[i] for s in self._seqs], how="outer") # TODO: change for #1198 if len(self) and self.has_positional_metadata(): seq.metadata = dict(self.positional_metadata.iloc[i]) return seq def _slice_positions_(self, i): seqs = self._seqs.apply(lambda seq: seq[i]) # TODO: change for #1198 pm = None if len(self) and self.has_positional_metadata(): pm = self.positional_metadata.iloc[i] return self._constructor_(seqs, positional_metadata=pm) # end of helpers def iter_positions(self, reverse=False, ignore_metadata=False): """Iterate over positions (columns) in the MSA. Parameters ---------- reverse : bool, optional If ``True``, iterate over positions in reverse order. ignore_metadata : bool, optional If ``True``, ``Sequence.metadata`` and ``Sequence.positional_metadata`` will not be included. This can significantly improve performance if metadata is not needed. Yields ------ Sequence Each position in the order they are stored in the MSA. See Also -------- __iter__ __reversed__ skbio.sequence.Sequence.concat Notes ----- Each position will be yielded as *exactly* a ``Sequence`` object, regardless of this MSA's ``dtype``. ``Sequence`` is used because a position is an artifact of multiple sequence alignment and is not a real biological sequence. Each ``Sequence`` object will have its corresponding MSA positional metadata stored as ``metadata`` unless ``ignore_metadata`` is set to ``True``. Sequences will have their positional metadata concatenated using an outer join unless ``ignore_metadata`` is set to ``True``. See ``Sequence.concat(how='outer')`` for details. Examples -------- Create an MSA with positional metadata: >>> from skbio import DNA, TabularMSA >>> sequences = [DNA('ACG'), ... DNA('A-T')] >>> msa = TabularMSA(sequences, ... positional_metadata={'prob': [3, 1, 2]}) Iterate over positions: >>> for position in msa.iter_positions(): ... position ... print() Sequence ------------- Metadata: 'prob': 3 Stats: length: 2 ------------- 0 AA Sequence ------------- Metadata: 'prob': 1 Stats: length: 2 ------------- 0 C- Sequence ------------- Metadata: 'prob': 2 Stats: length: 2 ------------- 0 GT Note that MSA positional metadata is stored as ``metadata`` on each ``Sequence`` object. Iterate over positions in reverse order: >>> for position in msa.iter_positions(reverse=True): ... position ... print('') Sequence ------------- Metadata: 'prob': 2 Stats: length: 2 ------------- 0 GT Sequence ------------- Metadata: 'prob': 1 Stats: length: 2 ------------- 0 C- Sequence ------------- Metadata: 'prob': 3 Stats: length: 2 ------------- 0 AA """ indices = range(self.shape.position) if reverse: indices = reversed(indices) return ( self._get_position_(index, ignore_metadata=ignore_metadata) for index in indices ) def consensus(self): """Compute the majority consensus sequence for this MSA. The majority consensus sequence contains the most common character at each position in this MSA. Ties will be broken in an arbitrary manner. Returns ------- Sequence The majority consensus sequence for this MSA. The type of sequence returned will be the same as this MSA's ``dtype`` or ``Sequence`` if this MSA does not contain any sequences. The majority consensus sequence will have its positional metadata set to this MSA's positional metadata if present. Notes ----- The majority consensus sequence will use this MSA's default gap character (``dtype.default_gap_char``) to represent gap majority at a position, regardless of the gap characters present at that position. Different gap characters at a position are **not** treated as distinct characters. All gap characters at a position contribute to that position's gap consensus. Examples -------- >>> from skbio import DNA, TabularMSA >>> sequences = [DNA('AC---'), ... DNA('AT-C.'), ... DNA('TT-CG')] >>> msa = TabularMSA(sequences, ... positional_metadata={'prob': [2, 1, 2, 3, 5]}) >>> msa.consensus() DNA -------------------------- Positional metadata: 'prob': Stats: length: 5 has gaps: True has degenerates: False has definites: True GC-content: 33.33% -------------------------- 0 AT-C- Note that the last position in the MSA has more than one type of gap character. These are not treated as distinct characters; both types of gap characters contribute to the position's consensus. Also note that ``DNA.default_gap_char`` is used to represent gap majority at a position (``'-'``). """ dtype = self.dtype if dtype is None: dtype = Sequence positional_metadata = None if self.has_positional_metadata(): positional_metadata = self.positional_metadata consensus = [] for position in self.iter_positions(ignore_metadata=True): freqs = position.frequencies() gap_freq = 0 for gap_char in dtype.gap_chars: if gap_char in freqs: gap_freq += freqs.pop(gap_char) assert dtype.default_gap_char not in freqs freqs[dtype.default_gap_char] = gap_freq consensus.append(collections.Counter(freqs).most_common(1)[0][0]) return dtype("".join(consensus), positional_metadata=positional_metadata) def _build_inverse_shannon_uncertainty_f(self, include_gaps): base = len(self.dtype.definite_chars) if include_gaps: # Increment the base by one to reflect the possible inclusion of # the default gap character. base += 1 def f(p): freqs = list(p.frequencies().values()) return 1.0 - scipy.stats.entropy(freqs, base=base) return f def conservation( self, metric="inverse_shannon_uncertainty", degenerate_mode="error", gap_mode="nan", ): """Apply metric to compute conservation for all alignment positions. Parameters ---------- metric : {'inverse_shannon_uncertainty'}, optional Metric that should be applied for computing conservation. Resulting values should be larger when a position is more conserved. degenerate_mode : {'nan', 'error'}, optional Mode for handling positions with degenerate characters. If ``"nan"``, positions with degenerate characters will be assigned a conservation score of ``np.nan``. If ``"error"``, an error will be raised if one or more degenerate characters are present. gap_mode : {'nan', 'ignore', 'error', 'include'}, optional Mode for handling positions with gap characters. If ``"nan"``, positions with gaps will be assigned a conservation score of ``np.nan``. If ``"ignore"``, positions with gaps will be filtered to remove gaps before ``metric`` is applied. If ``"error"``, an error will be raised if one or more gap characters are present. If ``"include"``, conservation will be computed on alignment positions with gaps included. In this case, it is up to the metric to ensure that gaps are handled as they should be or to raise an error if gaps are not supported by that metric. Returns ------- np.array of floats Values resulting from the application of ``metric`` to each position in the alignment. Raises ------ ValueError If an unknown ``metric``, ``degenerate_mode`` or ``gap_mode`` is provided. ValueError If any degenerate characters are present in the alignment when ``degenerate_mode`` is ``"error"``. ValueError If any gaps are present in the alignment when ``gap_mode`` is ``"error"``. Notes ----- Users should be careful interpreting results when ``gap_mode = "include"`` as the results may be misleading. For example, as pointed out in [1]_, a protein alignment position composed of 90% gaps and 10% tryptophans would score as more highly conserved than a position composed of alanine and glycine in equal frequencies with the ``"inverse_shannon_uncertainty"`` metric. ``gap_mode = "include"`` will result in all gap characters being recoded to ``TabularMSA.dtype.default_gap_char``. Because no conservation metrics that we are aware of consider different gap characters differently (e.g., none of the metrics described in [1]_), they are all treated the same within this method. The ``inverse_shannon_uncertainty`` metric is simply one minus Shannon's uncertainty metric. This method uses the inverse of Shannon's uncertainty so that larger values imply higher conservation. Shannon's uncertainty is also referred to as Shannon's entropy, but when making computations from symbols, as is done here, "uncertainty" is the preferred term ([2]_). References ---------- .. [1] Valdar WS. Scoring residue conservation. Proteins. (2002) .. [2] Schneider T. Pitfalls in information theory (website, ca. 2015). https://schneider.ncifcrf.gov/glossary.html#Shannon_entropy """ if gap_mode not in {"nan", "error", "include", "ignore"}: raise ValueError("Unknown gap_mode provided: %s" % gap_mode) if degenerate_mode not in {"nan", "error"}: raise ValueError("Unknown degenerate_mode provided: %s" % degenerate_mode) if metric not in {"inverse_shannon_uncertainty"}: raise ValueError("Unknown metric provided: %s" % metric) if self.shape[0] == 0: # handle empty alignment to avoid error on lookup of character sets return np.array([]) # Since the only currently allowed metric is # inverse_shannon_uncertainty, and we already know that a valid metric # was provided, we just define metric_f here. When additional metrics # are supported, this will be handled differently (e.g., via a lookup # or if/elif/else). metric_f = self._build_inverse_shannon_uncertainty_f(gap_mode == "include") result = [] for p in self.iter_positions(ignore_metadata=True): cons = None # cast p to self.dtype for access to gap/degenerate related # functionality pos_seq = self.dtype(p) # handle degenerate characters if present if pos_seq.has_degenerates(): if degenerate_mode == "nan": cons = np.nan else: # degenerate_mode == 'error' is the only choice left degenerate_chars = pos_seq[pos_seq.degenerates()] raise ValueError( "Conservation is undefined for positions " "with degenerate characters. The " "following degenerate characters were " "observed: %s." % degenerate_chars ) # handle gap characters if present if pos_seq.has_gaps(): if gap_mode == "nan": cons = np.nan elif gap_mode == "error": raise ValueError("Gap characters present in alignment.") elif gap_mode == "ignore": pos_seq = pos_seq.degap() else: # gap_mode == 'include' is the only choice left # Recode all gap characters with pos_seq.default_gap_char. pos_seq = pos_seq.replace(pos_seq.gaps(), pos_seq.default_gap_char) if cons is None: cons = metric_f(pos_seq) result.append(cons) return np.array(result) def gap_frequencies(self, axis="sequence", relative=False): """Compute frequency of gap characters across an axis. Parameters ---------- axis : {'sequence', 'position'}, optional Axis to compute gap character frequencies across. If 'sequence' or 0, frequencies are computed for each position in the MSA. If 'position' or 1, frequencies are computed for each sequence. relative : bool, optional If ``True``, return the relative frequency of gap characters instead of the count. Returns ------- 1D np.ndarray (int or float) Vector of gap character frequencies across the specified axis. Will have ``int`` dtype if ``relative=False`` and ``float`` dtype if ``relative=True``. Raises ------ ValueError If `axis` is invalid. Notes ----- If there are no positions in the MSA, ``axis='position'``, **and** ``relative=True``, the relative frequency of gap characters in each sequence will be ``np.nan``. Examples -------- Compute frequency of gap characters for each position in the MSA (i.e., *across* the sequence axis): >>> from skbio import DNA, TabularMSA >>> msa = TabularMSA([DNA('ACG'), ... DNA('A--'), ... DNA('AC.'), ... DNA('AG.')]) >>> msa.gap_frequencies() array([0, 1, 3]) Compute relative frequencies across the same axis: >>> msa.gap_frequencies(relative=True) array([ 0. , 0.25, 0.75]) Compute frequency of gap characters for each sequence (i.e., *across* the position axis): >>> msa.gap_frequencies(axis='position') array([0, 2, 1, 1]) """ if self._is_sequence_axis(axis): seq_iterator = self.iter_positions(ignore_metadata=True) length = self.shape.sequence else: seq_iterator = self length = self.shape.position gap_freqs = [] for seq in seq_iterator: # Not using Sequence.frequencies(relative=relative) because each # gap character's relative frequency is computed separately and # must be summed. This is less precise than summing the absolute # frequencies of gap characters and dividing by the length. Likely # not a big deal for typical gap characters ('-', '.') but can be # problematic as the number of gap characters grows (we aren't # guaranteed to always have two gap characters). See unit tests for # an example. freqs = seq.frequencies(chars=self.dtype.gap_chars) gap_freqs.append(sum(freqs.values())) gap_freqs = np.asarray(gap_freqs, dtype=float if relative else int) if relative: gap_freqs /= length return gap_freqs def reassign_index(self, mapping=None, minter=None): """Reassign index labels to sequences in this MSA. Parameters ---------- mapping : dict or callable, optional Dictionary or callable that maps existing labels to new labels. Any label without a mapping will remain the same. minter : callable or metadata key, optional If provided, defines an index label for each sequence. Can either be a callable accepting a single argument (each sequence) or a key into each sequence's ``metadata`` attribute. Raises ------ ValueError If `mapping` and `minter` are both provided. See Also -------- index Notes ----- If neither `mapping` nor `minter` are provided, index labels will be reset to the ``TabularMSA`` constructor's default. Examples -------- Create a ``TabularMSA`` object with default index labels: >>> from skbio import DNA, TabularMSA >>> seqs = [DNA('ACG', metadata={'id': 'a'}), ... DNA('AC-', metadata={'id': 'b'}), ... DNA('CCG', metadata={'id': 'c'})] >>> msa = TabularMSA(seqs) >>> msa.index RangeIndex(start=0, stop=3, step=1) Assign new index to the MSA using each sequence's ID as a label: >>> msa.reassign_index(minter='id') >>> msa.index Index(['a', 'b', 'c'], dtype='object') Assign default index: >>> msa.reassign_index() >>> msa.index RangeIndex(start=0, stop=3, step=1) Alternatively, a mapping of existing labels to new labels may be passed via `mapping`: >>> msa.reassign_index(mapping={0: 'seq1', 1: 'seq2'}) >>> msa.index Index(['seq1', 'seq2', 2], dtype='object') """ if mapping is not None and minter is not None: raise ValueError("Cannot use both `mapping` and `minter` at the same time.") if mapping is not None: if isinstance(mapping, dict): self.index = [ mapping[label] if label in mapping else label for label in self.index ] elif callable(mapping): self.index = [mapping(label) for label in self.index] else: raise TypeError( "`mapping` must be a dict or callable, not type %r" % type(mapping).__name__ ) elif minter is not None: self.index = [resolve_key(seq, minter) for seq in self._seqs] else: del self.index def append(self, sequence, minter=None, index=None, reset_index=False): """Append a sequence to the MSA without recomputing alignment. Parameters ---------- sequence : GrammaredSequence Sequence to be appended. Must match the dtype of the MSA and the number of positions in the MSA. minter : callable or metadata key, optional Used to create an index label for the sequence being appended. If callable, it generates a label directly. Otherwise it's treated as a key into the sequence metadata. Note that `minter` cannot be combined with `index` nor `reset_index`. index : object, optional Index label to use for the appended sequence. Note that `index` cannot be combined with `minter` nor `reset_index`. reset_index : bool, optional If ``True``, this MSA's index is reset to the ``TabularMSA`` constructor's default after appending. Note that `reset_index` cannot be combined with `minter` nor `index`. Raises ------ ValueError If exactly one choice of `minter`, `index`, or `reset_index` is not provided. TypeError If the sequence object isn't a ``GrammaredSequence``. TypeError If the type of the sequence does not match the dtype of the MSA. ValueError If the length of the sequence does not match the number of positions in the MSA. See Also -------- extend reassign_index Notes ----- The MSA is not automatically re-aligned when a sequence is appended. Therefore, this operation is not necessarily meaningful on its own. Examples -------- Create an MSA with a single sequence labeled ``'seq1'``: >>> from skbio import DNA, TabularMSA >>> msa = TabularMSA([DNA('ACGT')], index=['seq1']) >>> msa TabularMSA[DNA] --------------------- Stats: sequence count: 1 position count: 4 --------------------- ACGT >>> msa.index Index(['seq1'], dtype='object') Append a new sequence to the MSA, providing its index label via `index`: >>> msa.append(DNA('AG-T'), index='seq2') >>> msa TabularMSA[DNA] --------------------- Stats: sequence count: 2 position count: 4 --------------------- ACGT AG-T >>> msa.index Index(['seq1', 'seq2'], dtype='object') Append another sequence, this time resetting the MSA's index labels to the default with `reset_index`. Note that since the MSA's index is reset, we do not need to provide an index label for the new sequence via `index` or `minter`: >>> msa.append(DNA('ACGA'), reset_index=True) >>> msa TabularMSA[DNA] --------------------- Stats: sequence count: 3 position count: 4 --------------------- ACGT AG-T ACGA >>> msa.index RangeIndex(start=0, stop=3, step=1) """ if index is not None: index = [index] self.extend([sequence], minter=minter, index=index, reset_index=reset_index) def extend(self, sequences, minter=None, index=None, reset_index=False): """Extend this MSA with sequences without recomputing alignment. Parameters ---------- sequences : iterable of GrammaredSequence Sequences to be appended. Must match the dtype of the MSA and the number of positions in the MSA. minter : callable or metadata key, optional Used to create index labels for the sequences being appended. If callable, it generates a label directly. Otherwise it's treated as a key into the sequence metadata. Note that `minter` cannot be combined with `index` nor `reset_index`. index : pd.Index consumable, optional Index labels to use for the appended sequences. Must be the same length as `sequences`. Must be able to be passed directly to ``pd.Index`` constructor. Note that `index` cannot be combined with `minter` nor `reset_index`. reset_index : bool, optional If ``True``, this MSA's index is reset to the ``TabularMSA`` constructor's default after extending. Note that `reset_index` cannot be combined with `minter` nor `index`. Raises ------ ValueError If exactly one choice of `minter`, `index`, or `reset_index` is not provided. ValueError If `index` is not the same length as `sequences`. TypeError If `sequences` contains an object that isn't a ``GrammaredSequence``. TypeError If `sequences` contains a type that does not match the dtype of the MSA. ValueError If the length of a sequence does not match the number of positions in the MSA. See Also -------- append reassign_index Notes ----- The MSA is not automatically re-aligned when appending sequences. Therefore, this operation is not necessarily meaningful on its own. Examples -------- Create an MSA with a single sequence labeled ``'seq1'``: >>> from skbio import DNA, TabularMSA >>> msa = TabularMSA([DNA('ACGT')], index=['seq1']) >>> msa TabularMSA[DNA] --------------------- Stats: sequence count: 1 position count: 4 --------------------- ACGT >>> msa.index Index(['seq1'], dtype='object') Extend the MSA with sequences, providing their index labels via `index`: >>> msa.extend([DNA('AG-T'), DNA('-G-T')], index=['seq2', 'seq3']) >>> msa TabularMSA[DNA] --------------------- Stats: sequence count: 3 position count: 4 --------------------- ACGT AG-T -G-T >>> msa.index Index(['seq1', 'seq2', 'seq3'], dtype='object') Extend with more sequences, this time resetting the MSA's index labels to the default with `reset_index`. Note that since the MSA's index is reset, we do not need to provide index labels for the new sequences via `index` or `minter`: >>> msa.extend([DNA('ACGA'), DNA('AC-T'), DNA('----')], ... reset_index=True) >>> msa TabularMSA[DNA] --------------------- Stats: sequence count: 6 position count: 4 --------------------- ACGT AG-T ... AC-T ---- >>> msa.index RangeIndex(start=0, stop=6, step=1) """ if sum([minter is not None, index is not None, bool(reset_index)]) != 1: raise ValueError( "Must provide exactly one of the following parameters: " "`minter`, `index`, `reset_index`" ) # Verify `sequences` first because `minter` could interact with each # sequence's `metadata`. sequences = list(sequences) self._assert_valid_sequences(sequences) if minter is not None: # Convert to Index to identify tuples as a MultiIndex instead of an # index of tuples. index = pd.Index([resolve_key(seq, minter) for seq in sequences]) elif index is not None: # Convert to Index to identify tuples as a MultiIndex instead of an # index of tuples. if not isinstance(index, pd.Index): index = pd.Index(index) # pandas doesn't give a user-friendly error message if we pass # through. if len(sequences) != len(index): raise ValueError( "Number of sequences (%d) must match index length (%d)" % (len(sequences), len(index)) ) else: # Case for `reset_index=True`. We could simply set `index=None` # since it will be reset after appending below, but we can avoid a # memory spike if Series.append creates a new RangeIndex from # adjacent RangeIndexes in the future (pandas 0.18.0 creates an # Int64Index). index = pd.RangeIndex( start=len(self), stop=len(self) + len(sequences), step=1 ) if len(self): self._seqs = pd.concat( [self._seqs, pd.Series(sequences, index=index, dtype=object)] ) else: # Not using Series.append to avoid turning a RangeIndex supplied # via `index` parameter into an Int64Index (this happens in pandas # 0.18.0). self._seqs = pd.Series(sequences, index=index, dtype=object) # When extending a TabularMSA without sequences, the number of # positions in the TabularMSA may change from zero to non-zero. If # this happens, the TabularMSA's positional_metadata must be reset # to its default "empty" representation for the new number of # positions, otherwise the number of positions in the TabularMSA # and positional_metadata will differ. # # TODO: change for #1198 if self.shape.position > 0: del self.positional_metadata if reset_index: self.reassign_index() def _assert_valid_sequences(self, sequences): if not sequences: return if len(self): expected_dtype = self.dtype expected_length = self.shape.position else: sequence = sequences[0] expected_dtype = type(sequence) if not issubclass(expected_dtype, GrammaredSequence): raise TypeError( "Each sequence must be of type %r, not type %r" % (GrammaredSequence.__name__, expected_dtype.__name__) ) expected_length = len(sequence) for sequence in sequences: dtype = type(sequence) if dtype is not expected_dtype: raise TypeError( "Sequences in MSA must have matching type. Type %r does " "not match type %r" % (dtype.__name__, expected_dtype.__name__) ) length = len(sequence) if length != expected_length: raise ValueError( "Each sequence's length must match the number of " "positions in the MSA: %d != %d" % (length, expected_length) ) def join(self, other, how="strict"): """Join this MSA with another by sequence (horizontally). Sequences will be joined by index labels. MSA ``positional_metadata`` will be joined by columns. Use `how` to control join behavior. Alignment is **not** recomputed during join operation (see *Notes* section for details). Parameters ---------- other : TabularMSA MSA to join with. Must have same ``dtype`` as this MSA. how : {'strict', 'inner', 'outer', 'left', 'right'}, optional How to join the sequences and MSA `positional_metadata`: * ``'strict'``: MSA indexes and `positional_metadata` columns must match * ``'inner'``: an inner-join of the MSA indexes and ``positional_metadata`` columns (only the shared set of index labels and columns are used) * ``'outer'``: an outer-join of the MSA indexes and ``positional_metadata`` columns (all index labels and columns are used). Unshared sequences will be padded with the MSA's default gap character (``TabularMSA.dtype.default_gap_char``). Unshared columns will be padded with NaN. * ``'left'``: a left-outer-join of the MSA indexes and ``positional_metadata`` columns (this MSA's index labels and columns are used). Padding of unshared data is handled the same as ``'outer'``. * ``'right'``: a right-outer-join of the MSA indexes and ``positional_metadata`` columns (`other` index labels and columns are used). Padding of unshared data is handled the same as ``'outer'``. Returns ------- TabularMSA Joined MSA. There is no guaranteed ordering to its index (call ``sort`` to define one). Raises ------ ValueError If `how` is invalid. ValueError If either the index of this MSA or the index of `other` contains duplicates. ValueError If ``how='strict'`` and this MSA's index doesn't match with `other`. ValueError If ``how='strict'`` and this MSA's ``positional_metadata`` columns don't match with `other`. TypeError If `other` is not a subclass of ``TabularMSA``. TypeError If the ``dtype`` of `other` does not match this MSA's ``dtype``. See Also -------- extend sort skbio.sequence.Sequence.concat Notes ----- The join operation does not automatically perform re-alignment; sequences are simply joined together. Therefore, this operation is not necessarily meaningful on its own. The index labels of this MSA must be unique. Likewise, the index labels of `other` must be unique. The MSA-wide and per-sequence metadata (``TabularMSA.metadata`` and ``Sequence.metadata``) are not retained on the joined ``TabularMSA``. The positional metadata of the sequences will be outer-joined, regardless of `how` (using ``Sequence.concat(how='outer')``). If the join operation results in a ``TabularMSA`` without any sequences, the MSA's ``positional_metadata`` will not be set. Examples -------- .. note:: The following examples call `.sort()` on the joined MSA because there isn't a guaranteed ordering to the index. The joined MSA is sorted in these examples to make the output reproducible. When using this method with your own data, sorting the joined MSA is not necessary. Join MSAs by sequence: >>> from skbio import DNA, TabularMSA >>> msa1 = TabularMSA([DNA('AC'), ... DNA('A-')]) >>> msa2 = TabularMSA([DNA('G-T'), ... DNA('T--')]) >>> joined = msa1.join(msa2) >>> joined.sort() # unnecessary in practice, see note above >>> joined TabularMSA[DNA] --------------------- Stats: sequence count: 2 position count: 5 --------------------- ACG-T A-T-- Sequences are joined based on MSA index labels: >>> msa1 = TabularMSA([DNA('AC'), ... DNA('A-')], index=['a', 'b']) >>> msa2 = TabularMSA([DNA('G-T'), ... DNA('T--')], index=['b', 'a']) >>> joined = msa1.join(msa2) >>> joined.sort() # unnecessary in practice, see note above >>> joined TabularMSA[DNA] --------------------- Stats: sequence count: 2 position count: 5 --------------------- ACT-- A-G-T >>> joined.index Index(['a', 'b'], dtype='object') By default both MSA indexes must match. Use ``how`` to specify an inner join: >>> msa1 = TabularMSA([DNA('AC'), ... DNA('A-'), ... DNA('-C')], index=['a', 'b', 'c'], ... positional_metadata={'col1': [42, 43], ... 'col2': [1, 2]}) >>> msa2 = TabularMSA([DNA('G-T'), ... DNA('T--'), ... DNA('ACG')], index=['b', 'a', 'z'], ... positional_metadata={'col2': [3, 4, 5], ... 'col3': ['f', 'o', 'o']}) >>> joined = msa1.join(msa2, how='inner') >>> joined.sort() # unnecessary in practice, see note above >>> joined TabularMSA[DNA] -------------------------- Positional metadata: 'col2': Stats: sequence count: 2 position count: 5 -------------------------- ACT-- A-G-T >>> joined.index Index(['a', 'b'], dtype='object') >>> joined.positional_metadata col2 0 1 1 2 2 3 3 4 4 5 When performing an outer join (``'outer'``, ``'left'``, or ``'right'``), unshared sequences are padded with gaps and unshared ``positional_metadata`` columns are padded with NaN: >>> joined = msa1.join(msa2, how='outer') >>> joined.sort() # unnecessary in practice, see note above >>> joined TabularMSA[DNA] ---------------------------- Positional metadata: 'col1': 'col2': 'col3': Stats: sequence count: 4 position count: 5 ---------------------------- ACT-- A-G-T -C--- --ACG >>> joined.index Index(['a', 'b', 'c', 'z'], dtype='object') >>> joined.positional_metadata col1 col2 col3 0 42.0 1 NaN 1 43.0 2 NaN 2 NaN 3 f 3 NaN 4 o 4 NaN 5 o """ if how not in {"strict", "inner", "outer", "left", "right"}: raise ValueError( "`how` must be 'strict', 'inner', 'outer', 'left', or " "'right'." ) self._assert_joinable(other) join_index, concat_kwargs = self._get_join_index(other, how) joined_seqs = [] for label in join_index: left_seq = self._get_sequence_for_join(label) right_seq = other._get_sequence_for_join(label) joined_seqs.append(self.dtype.concat([left_seq, right_seq], how="outer")) # TODO: update when #1198 is implemented. joined_positional_metadata = None if joined_seqs: if how == "left": joined_positional_metadata = pd.concat( [ self.positional_metadata, other.positional_metadata.reindex( columns=self.positional_metadata.columns ), ], ignore_index=True, sort=True, ) elif how == "right": joined_positional_metadata = pd.concat( [ self.positional_metadata.reindex( columns=other.positional_metadata.columns ), other.positional_metadata, ], ignore_index=True, sort=True, ) else: joined_positional_metadata = pd.concat( [self.positional_metadata, other.positional_metadata], ignore_index=True, sort=True, **concat_kwargs, ) if not self.has_positional_metadata(): del self.positional_metadata if not other.has_positional_metadata(): del other.positional_metadata joined = self.__class__( joined_seqs, index=join_index, positional_metadata=joined_positional_metadata, ) if not joined.has_positional_metadata(): del joined.positional_metadata return joined def _assert_joinable(self, other): if not isinstance(other, TabularMSA): raise TypeError( "`other` must be a `TabularMSA` object, not type %r" % type(other).__name__ ) if self.dtype is not other.dtype: raise TypeError( "`other` dtype %r does not match this MSA's dtype %r" % ( other.dtype if other.dtype is None else other.dtype.__name__, self.dtype if self.dtype is None else self.dtype.__name__, ) ) if not self.index.is_unique: raise ValueError("This MSA's index labels must be unique.") if not other.index.is_unique: raise ValueError("`other`'s index labels must be unique.") def _get_join_index(self, other, how): if how == "strict": diff = self.index.symmetric_difference(other.index) if len(diff) > 0: raise ValueError("Index labels must all match with `how='strict'`") diff = self.positional_metadata.columns.symmetric_difference( other.positional_metadata.columns ) if not self.has_positional_metadata(): del self.positional_metadata if not other.has_positional_metadata(): del other.positional_metadata if len(diff) > 0: raise ValueError( "Positional metadata columns must all match with " "`how='strict'`" ) join_index = self.index concat_kwargs = {"join": "inner"} elif how == "inner": join_index = self.index.intersection(other.index) concat_kwargs = {"join": "inner"} elif how == "outer": join_index = self.index.union(other.index) concat_kwargs = {"join": "outer"} elif how == "left": join_index = self.index concat_kwargs = {"join_axes": [self.positional_metadata.columns]} else: # how='right' join_index = other.index concat_kwargs = {"join_axes": [other.positional_metadata.columns]} return join_index, concat_kwargs def _get_sequence_for_join(self, label): if label in self.index: return self.loc[label] else: return self.dtype(self.dtype.default_gap_char * self.shape.position) def sort(self, level=None, ascending=True): """Sort sequences by index label in-place. Parameters ---------- level : int or object, optional Index level to sort on when index is a ``pd.MultiIndex``. Does nothing otherwise. ascending: bool, optional If ``False``, sort in descending (i.e., reverse) order. See Also -------- index reassign_index pandas.Series.sort_index Notes ----- This is a passthrough to ``pd.Series.sort_index`` internally. Examples -------- Create a ``TabularMSA`` object with sequence identifiers as index labels: >>> from skbio import DNA, TabularMSA >>> seqs = [DNA('ACG', metadata={'id': 'c'}), ... DNA('AC-', metadata={'id': 'b'}), ... DNA('AC-', metadata={'id': 'a'})] >>> msa = TabularMSA(seqs, minter='id') >>> msa TabularMSA[DNA] --------------------- Stats: sequence count: 3 position count: 3 --------------------- ACG AC- AC- >>> msa.index Index(['c', 'b', 'a'], dtype='object') Sort the sequences in alphabetical order by index label: >>> msa.sort() >>> msa TabularMSA[DNA] --------------------- Stats: sequence count: 3 position count: 3 --------------------- AC- AC- ACG >>> msa.index Index(['a', 'b', 'c'], dtype='object') Note that since the sort is in-place, the ``TabularMSA`` object is modified (a new object is *not* returned). """ self._seqs.sort_index(ascending=ascending, level=level, inplace=True) self.positional_metadata.sort_index(axis=1, inplace=True) def to_dict(self): """Create a ``dict`` from this ``TabularMSA``. Returns ------- dict Dictionary constructed from the index labels and sequences in this ``TabularMSA``. Raises ------ ValueError If index labels are not unique. See Also -------- from_dict index reassign_index Examples -------- >>> from skbio import DNA, TabularMSA >>> seqs = [DNA('ACGT'), DNA('A--T')] >>> msa = TabularMSA(seqs, index=['a', 'b']) >>> dictionary = msa.to_dict() >>> dictionary == {'a': DNA('ACGT'), 'b': DNA('A--T')} True """ if self.index.is_unique: return self._seqs.to_dict() else: raise ValueError("Cannot convert to dict. Index labels are not" " unique.") def _is_sequence_axis(self, axis): if axis == "sequence" or axis == 0: return True elif axis == "position" or axis == 1: return False else: raise ValueError( "`axis` must be 'sequence' (0) or 'position' (1), not %r" % axis ) @overrides(PositionalMetadataMixin) def _positional_metadata_axis_len_(self): return self.shape.position @classonlymethod def from_path_seqs(cls, path, seqs): """Create a tabular MSA from an alignment path and sequences.""" if not all(isinstance(x, GrammaredSequence) for x in seqs): raise ValueError("`seqs` must be of skbio.Sequence type.") else: seqtype = seqs[0].__class__ bits = path.to_bits() gaps = np.repeat(bits, path.lengths, axis=1) gap_char = ord(seqtype.default_gap_char) byte_arr = np.full(path.shape, gap_char, dtype=np.uint8) byte_arr[gaps == 0] = np.concatenate( [x._bytes[x._bytes != gap_char] for x in seqs] ) return cls([seqtype(x) for x in byte_arr]) scikit-bio-0.6.2/skbio/alignment/tests/000077500000000000000000000000001464262511300200075ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/alignment/tests/__init__.py000066400000000000000000000005411464262511300221200ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- scikit-bio-0.6.2/skbio/alignment/tests/data/000077500000000000000000000000001464262511300207205ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/alignment/tests/data/il6.prot.aln000066400000000000000000000034151464262511300230740ustar00rootroot00000000000000>human MNS------------------FSTSAFGPVAF---SLGLLLVLPAAFPAPVPPGED---- SKDVAAPHRQPLTSSERIDKQIRYILDGISALRKETCNKSNMCESSKEALAENNLNLPKM AEKDGCFQSGFNEETCLVKIITGLLEFEVYLEYLQNRFESSE-EQARAVQMSTKVLIQFL QKKAKNLDAITTPDPTTNASLLTKLQAQNQWLQDMTTHLILRSFKEFLQSSLRALRQM-- ------ >chimp MNS------------------VSTSAFGPVAF---SLGLLLVLPAAFPAPVPPGED---- SKDVAAPHRQPLTSSERIDKQIRYILDGISALRKETCNKSNMCESSKEALAENNLNLPKM AEKDGCFQSGFNEETCLVKIITGLLEFEVYLEYLQNRFESSE-EQARAVQMSTKVLIQFL QKKAKNLDAITTPDPTTNASLLTKLQAQNQWLQDMTTHLILRSFKEFLQSSLRALRQM-- ------ >monkey MNS------------------VSTSAFGPVAF---SLGLLLVLPAAFPAPVLPGED---- SKDVAAPHSQPLTSSERIDKHIRYILDGISALRKETCNRSNMCESSKEALAENNLNLPKM AEKDGCFQSGFNEDTCLVKIITGLLEFEVYLEYLQNRFESSE-EQARAVQMSTKVLIQFL QKKAKNLDAITTPEPTTNASLLTKLQAQNQWLQDMTTHLILRSFKEFLQSSLRALRQM-- ------ >pig MNS------------------LSTSAFSPVAF---SLGLLLVMATAFPTPGRLEED---- AKGDATSDKMLFTSPDKTEELIKYILGKISAMRKEMCEKYEKCENSKEVLAENNLNLPKM AEKDGCFQSGFNQETCLMRITTGLVEFQIYLDYLQKEYESNK-GNVEAVQISTKALIQTL RQKGKNPDKATTPNPTTNAGLLDKLQSQNEWMKNTKIILILRSLEDFLQFSLRAIRIM-- ------ >mouse MKF------------------LSARDFHPVAF----LGLMLVTTTAFPTSQVRRGD---- FTEDTTPNRPVYT-TSQVGGLITHVLWEIVEMRKELCNGNSDCMNNDDALAENNLKLPEI QRNDGCYQTGYNQEICLLKISSGLLEYHSYLEYMKNNLKDNKKDKARVLQRDTETLIHIF NQEVKDLHKIVLPTPISNALLTDKLESQKEWLRTKTIQFILKSLEEFLKVTLRSTRQT-- ------ >rat MKF------------------LSARDFQPVAF----LGLMLLTATAFPTSQVRRGD---- FTEDTTHNRPVYT-TSQVGGLITYVLREILEMRKELCNGNSDCMNSDDALSENNLKLPEI QRNDGCFQTGYNQEICLLKICSGLLEFRFYLEFVKNNLQDNKKDKARVIQSNTETLVHIF KQEIKDSYKIVLPTPTSNALLMEKLESQKEWLRTKTIQLILKALEEFLKVTMRSTRQT-- ------ >chicken MNFTEGCEATGRRPGSAGSRRRRAPRPGPVALLPLLLPLLLPPAAAVPLPAAADSSGEVG LEEEAGARRALL----DCEPLARVLRDRAVQLQDEMCKKFTVCENSMEMLVRNNLNLPKV TEEDGCLLAGFDEEKCLTKLSSGLFAFQTYLEFIQETFDSEK-QNVESLCYSTKHLAATI RQMVINPDEVVIPDSAAQKSLLANLKSDKDWIEKITMHLILRDFTSFMEKTVRAVRYLKK TRSFSA scikit-bio-0.6.2/skbio/alignment/tests/test_pairwise.py000066400000000000000000001066111464262511300232500ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main import warnings import numpy as np from skbio import Sequence, Protein, DNA, RNA, TabularMSA, SubstitutionMatrix from skbio.alignment import ( global_pairwise_align_protein, local_pairwise_align_protein, global_pairwise_align_nucleotide, local_pairwise_align_nucleotide, make_identity_substitution_matrix, local_pairwise_align, global_pairwise_align) from skbio.alignment._pairwise import ( _init_matrices_sw, _init_matrices_nw, _compute_score_and_traceback_matrices, _traceback, _first_largest, _compute_substitution_score) from skbio.sequence import GrammaredSequence from skbio.util import classproperty from skbio.util._decorator import overrides class CustomSequence(GrammaredSequence): @classproperty @overrides(GrammaredSequence) def gap_chars(cls): return set('^$') @classproperty @overrides(GrammaredSequence) def default_gap_char(cls): return '^' @classproperty @overrides(GrammaredSequence) def definite_chars(cls): return set('WXYZ') @classproperty @overrides(GrammaredSequence) def degenerate_map(cls): return {} class PairwiseAlignmentTests(TestCase): """ Note: In the high-level tests, the expected results were derived with assistance from the EMBOSS web server: http://www.ebi.ac.uk/Tools/psa/emboss_needle/ http://www.ebi.ac.uk/Tools/psa/emboss_water/ In some cases, placement of non-gap characters surrounded by gap characters are slighly different between scikit-bio and the EMBOSS server. These differences arise from arbitrary implementation differences, and always result in the same score (which tells us that the alignments are equivalent). In cases where the expected results included here differ from those generated by the EMBOSS server, I note the EMBOSS result as a comment below the expected value. """ def setUp(self): """Ignore warnings during tests.""" warnings.simplefilter("ignore") def tearDown(self): """Clear the list of warning filters, so that no filters are active.""" warnings.resetwarnings() def test_make_identity_substitution_matrix(self): expected = {'A': {'A': 1, 'C': -2, 'G': -2, 'T': -2, 'U': -2}, 'C': {'A': -2, 'C': 1, 'G': -2, 'T': -2, 'U': -2}, 'G': {'A': -2, 'C': -2, 'G': 1, 'T': -2, 'U': -2}, 'T': {'A': -2, 'C': -2, 'G': -2, 'T': 1, 'U': -2}, 'U': {'A': -2, 'C': -2, 'G': -2, 'T': -2, 'U': 1}} self.assertEqual(make_identity_substitution_matrix(1, -2), expected) expected = {'A': {'A': 5, 'C': -4, 'G': -4, 'T': -4, 'U': -4}, 'C': {'A': -4, 'C': 5, 'G': -4, 'T': -4, 'U': -4}, 'G': {'A': -4, 'C': -4, 'G': 5, 'T': -4, 'U': -4}, 'T': {'A': -4, 'C': -4, 'G': -4, 'T': 5, 'U': -4}, 'U': {'A': -4, 'C': -4, 'G': -4, 'T': -4, 'U': 5}} self.assertEqual(make_identity_substitution_matrix(5, -4), expected) # TODO: duplicate of test_global_pairwise_align_custom_alphabet, remove # when nondegenerate_chars is removed def test_global_pairwise_align_custom_alphabet_nondegenerate_chars(self): custom_substitution_matrix = make_identity_substitution_matrix( 1, -1, alphabet=CustomSequence.nondegenerate_chars) custom_msa, custom_score, custom_start_end = global_pairwise_align( CustomSequence("WXYZ"), CustomSequence("WXYYZZ"), 10.0, 5.0, custom_substitution_matrix) # Expected values computed by running an equivalent alignment using the # DNA alphabet with the following mapping: # # W X Y Z # | | | | # A C G T # self.assertEqual(custom_msa, TabularMSA([CustomSequence('WXYZ^^'), CustomSequence('WXYYZZ')])) self.assertEqual(custom_score, 2.0) self.assertEqual(custom_start_end, [(0, 3), (0, 5)]) def test_global_pairwise_align_custom_alphabet(self): custom_substitution_matrix = make_identity_substitution_matrix( 1, -1, alphabet=CustomSequence.definite_chars) custom_msa, custom_score, custom_start_end = global_pairwise_align( CustomSequence("WXYZ"), CustomSequence("WXYYZZ"), 10.0, 5.0, custom_substitution_matrix) # Expected values computed by running an equivalent alignment using the # DNA alphabet with the following mapping: # # W X Y Z # | | | | # A C G T # self.assertEqual(custom_msa, TabularMSA([CustomSequence('WXYZ^^'), CustomSequence('WXYYZZ')])) self.assertEqual(custom_score, 2.0) self.assertEqual(custom_start_end, [(0, 3), (0, 5)]) # TODO: duplicate of test_local_pairwise_align_custom_alphabet, remove # when nondegenerate_chars is removed. def test_local_pairwise_align_custom_alphabet_nondegenerate_chars(self): custom_substitution_matrix = make_identity_substitution_matrix( 5, -4, alphabet=CustomSequence.nondegenerate_chars) custom_msa, custom_score, custom_start_end = local_pairwise_align( CustomSequence("YWXXZZYWXXWYYZWXX"), CustomSequence("YWWXZZZYWXYZWWX"), 5.0, 0.5, custom_substitution_matrix) # Expected values computed by running an equivalent alignment using the # DNA alphabet with the following mapping: # # W X Y Z # | | | | # A C G T # self.assertEqual( custom_msa, TabularMSA([CustomSequence('WXXZZYWXXWYYZWXX'), CustomSequence('WXZZZYWX^^^YZWWX')])) self.assertEqual(custom_score, 41.0) self.assertEqual(custom_start_end, [(1, 16), (2, 14)]) def test_local_pairwise_align_custom_alphabet(self): custom_substitution_matrix = make_identity_substitution_matrix( 5, -4, alphabet=CustomSequence.definite_chars) custom_msa, custom_score, custom_start_end = local_pairwise_align( CustomSequence("YWXXZZYWXXWYYZWXX"), CustomSequence("YWWXZZZYWXYZWWX"), 5.0, 0.5, custom_substitution_matrix) # Expected values computed by running an equivalent alignment using the # DNA alphabet with the following mapping: # # W X Y Z # | | | | # A C G T # self.assertEqual( custom_msa, TabularMSA([CustomSequence('WXXZZYWXXWYYZWXX'), CustomSequence('WXZZZYWX^^^YZWWX')])) self.assertEqual(custom_score, 41.0) self.assertEqual(custom_start_end, [(1, 16), (2, 14)]) def test_global_pairwise_align_invalid_type(self): with self.assertRaisesRegex(TypeError, r"GrammaredSequence.*" "TabularMSA.*'Sequence'"): global_pairwise_align(DNA('ACGT'), Sequence('ACGT'), 1.0, 1.0, {}) def test_global_pairwise_align_dtype_mismatch(self): with self.assertRaisesRegex(TypeError, r"same dtype: 'DNA' != 'RNA'"): global_pairwise_align(DNA('ACGT'), TabularMSA([RNA('ACGU')]), 1.0, 1.0, {}) with self.assertRaisesRegex(TypeError, r"same dtype: 'DNA' != 'RNA'"): global_pairwise_align(TabularMSA([DNA('ACGT')]), TabularMSA([RNA('ACGU')]), 1.0, 1.0, {}) def test_global_pairwise_align_protein(self): obs_msa, obs_score, obs_start_end = global_pairwise_align_protein( Protein("HEAGAWGHEE"), Protein("PAWHEAE"), gap_open_penalty=10., gap_extend_penalty=5.) self.assertEqual(obs_msa, TabularMSA([Protein("HEAGAWGHEE-"), Protein("---PAW-HEAE")])) self.assertEqual(obs_score, 23.0) self.assertEqual(obs_start_end, [(0, 9), (0, 6)]) # EMBOSS result: P---AW-HEAE obs_msa, obs_score, obs_start_end = global_pairwise_align_protein( Protein("HEAGAWGHEE"), Protein("PAWHEAE"), gap_open_penalty=5., gap_extend_penalty=0.5) self.assertEqual(obs_msa, TabularMSA([Protein("HEAGAWGHE-E"), Protein("---PAW-HEAE")])) self.assertEqual(obs_score, 30.0) self.assertEqual(obs_start_end, [(0, 9), (0, 6)]) # Alternative substitution matrix obs_msa, obs_score, obs_start_end = global_pairwise_align_protein( Protein("HEAGAWGHEE"), Protein("PAWHEAE"), gap_open_penalty=11., gap_extend_penalty=1., substitution_matrix=SubstitutionMatrix.by_name( 'BLOSUM62').to_dict()) self.assertEqual(obs_msa, TabularMSA([Protein("---HEAGAWGHEE"), Protein("PAWHEAE------")])) self.assertEqual(obs_score, 15.0) self.assertEqual(obs_start_end, [(0, 9), (0, 6)]) # Protein sequences with metadata obs_msa, obs_score, obs_start_end = global_pairwise_align_protein( Protein("HEAGAWGHEE", metadata={'id': "s1"}), Protein("PAWHEAE", metadata={'id': "s2"}), gap_open_penalty=10., gap_extend_penalty=5.) self.assertEqual( obs_msa, TabularMSA([Protein("HEAGAWGHEE-", metadata={'id': "s1"}), Protein("---PAW-HEAE", metadata={'id': "s2"})])) self.assertEqual(obs_score, 23.0) self.assertEqual(obs_start_end, [(0, 9), (0, 6)]) # One TabularMSA and one Protein as input obs_msa, obs_score, obs_start_end = global_pairwise_align_protein( TabularMSA([Protein("HEAGAWGHEE", metadata={'id': "s1"})]), Protein("PAWHEAE", metadata={'id': "s2"}), gap_open_penalty=10., gap_extend_penalty=5.) self.assertEqual( obs_msa, TabularMSA([Protein("HEAGAWGHEE-", metadata={'id': "s1"}), Protein("---PAW-HEAE", metadata={'id': "s2"})])) self.assertEqual(obs_score, 23.0) self.assertEqual(obs_start_end, [(0, 9), (0, 6)]) # One single-sequence alignment as input and one double-sequence # alignment as input. Score confirmed manually. obs_msa, obs_score, obs_start_end = global_pairwise_align_protein( TabularMSA([Protein("HEAGAWGHEE", metadata={'id': "s1"}), Protein("HDAGAWGHDE", metadata={'id': "s2"})]), TabularMSA([Protein("PAWHEAE", metadata={'id': "s3"})]), gap_open_penalty=10., gap_extend_penalty=5.) self.assertEqual( obs_msa, TabularMSA([Protein("HEAGAWGHEE-", metadata={'id': "s1"}), Protein("HDAGAWGHDE-", metadata={'id': "s2"}), Protein("---PAW-HEAE", metadata={'id': "s3"})])) self.assertEqual(obs_score, 21.0) self.assertEqual(obs_start_end, [(0, 9), (0, 6)]) # TypeError on invalid input self.assertRaises(TypeError, global_pairwise_align_protein, 42, Protein("HEAGAWGHEE")) self.assertRaises(TypeError, global_pairwise_align_protein, Protein("HEAGAWGHEE"), 42) def test_global_pairwise_align_protein_invalid_dtype(self): with self.assertRaisesRegex(TypeError, r"TabularMSA with Protein dtype.*dtype " "'DNA'"): global_pairwise_align_protein(TabularMSA([Protein('PAW')]), TabularMSA([DNA('ACGT')])) def test_global_pairwise_align_protein_penalize_terminal_gaps(self): obs_msa, obs_score, obs_start_end = global_pairwise_align_protein( Protein("HEAGAWGHEE"), Protein("PAWHEAE"), gap_open_penalty=10., gap_extend_penalty=5., penalize_terminal_gaps=True) self.assertEqual(obs_msa, TabularMSA([Protein("HEAGAWGHEE"), Protein("---PAWHEAE")])) self.assertEqual(obs_score, 1.0) self.assertEqual(obs_start_end, [(0, 9), (0, 6)]) def test_global_pairwise_align_nucleotide_penalize_terminal_gaps(self): # in these tests one sequence is about 3x the length of the other. # we toggle penalize_terminal_gaps to confirm that it results in # different alignments and alignment scores. seq1 = DNA("ACCGTGGACCGTTAGGATTGGACCCAAGGTTG") seq2 = DNA("T"*25 + "ACCGTGGACCGTAGGATTGGACCAAGGTTA" + "A"*25) obs_msa, obs_score, obs_start_end = global_pairwise_align_nucleotide( seq1, seq2, gap_open_penalty=5., gap_extend_penalty=0.5, match_score=5, mismatch_score=-4, penalize_terminal_gaps=False) self.assertEqual( obs_msa, TabularMSA([DNA("-------------------------ACCGTGGACCGTTAGGA" "TTGGACCCAAGGTTG-------------------------"), DNA("TTTTTTTTTTTTTTTTTTTTTTTTTACCGTGGACCGT-AGGA" "TTGGACC-AAGGTTAAAAAAAAAAAAAAAAAAAAAAAAAA")])) self.assertEqual(obs_score, 131.0) obs_msa, obs_score, obs_start_end = global_pairwise_align_nucleotide( seq1, seq2, gap_open_penalty=5., gap_extend_penalty=0.5, match_score=5, mismatch_score=-4, penalize_terminal_gaps=True) self.assertEqual( obs_msa, TabularMSA([DNA("-------------------------ACCGTGGACCGTTAGGA" "TTGGACCCAAGGTT-------------------------G"), DNA("TTTTTTTTTTTTTTTTTTTTTTTTTACCGTGGACCGT-AGGA" "TTGGACC-AAGGTTAAAAAAAAAAAAAAAAAAAAAAAAAA")])) self.assertEqual(obs_score, 97.0) def test_local_pairwise_align_protein(self): obs_msa, obs_score, obs_start_end = local_pairwise_align_protein( Protein("HEAGAWGHEE"), Protein("PAWHEAE"), gap_open_penalty=10., gap_extend_penalty=5.) self.assertEqual(obs_msa, TabularMSA([Protein("AWGHE"), Protein("AW-HE")])) self.assertEqual(obs_score, 26.0) self.assertEqual(obs_start_end, [(4, 8), (1, 4)]) obs_msa, obs_score, obs_start_end = local_pairwise_align_protein( Protein("HEAGAWGHEE"), Protein("PAWHEAE"), gap_open_penalty=5., gap_extend_penalty=0.5) self.assertEqual(obs_msa, TabularMSA([Protein("AWGHE-E"), Protein("AW-HEAE")])) self.assertEqual(obs_score, 32.0) self.assertEqual(obs_start_end, [(4, 9), (1, 6)]) # Protein sequences with metadata obs_msa, obs_score, obs_start_end = local_pairwise_align_protein( Protein("HEAGAWGHEE", metadata={'id': "s1"}), Protein("PAWHEAE", metadata={'id': "s2"}), gap_open_penalty=10., gap_extend_penalty=5.) self.assertEqual( obs_msa, TabularMSA([Protein("AWGHE", metadata={'id': "s1"}), Protein("AW-HE", metadata={'id': "s2"})])) self.assertEqual(obs_score, 26.0) self.assertEqual(obs_start_end, [(4, 8), (1, 4)]) # Fails when either input is passed as a TabularMSA self.assertRaises(TypeError, local_pairwise_align_protein, TabularMSA([Protein("HEAGAWGHEE", metadata={'id': "s1"})]), Protein("PAWHEAE", metadata={'id': "s2"}), gap_open_penalty=10., gap_extend_penalty=5.) self.assertRaises(TypeError, local_pairwise_align_protein, Protein("HEAGAWGHEE", metadata={'id': "s1"}), TabularMSA([Protein("PAWHEAE", metadata={'id': "s2"})]), gap_open_penalty=10., gap_extend_penalty=5.) # TypeError on invalid input self.assertRaises(TypeError, local_pairwise_align_protein, 42, Protein("HEAGAWGHEE")) self.assertRaises(TypeError, local_pairwise_align_protein, Protein("HEAGAWGHEE"), 42) def test_global_pairwise_align_nucleotide(self): obs_msa, obs_score, obs_start_end = global_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"), gap_open_penalty=5., gap_extend_penalty=0.5, match_score=5, mismatch_score=-4) self.assertEqual(obs_msa, TabularMSA([DNA("G-ACCTTGACCAGGTACC"), DNA("GAACTTTGAC---GTAAC")])) self.assertEqual(obs_score, 41.0) self.assertEqual(obs_start_end, [(0, 16), (0, 14)]) obs_msa, obs_score, obs_start_end = global_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"), gap_open_penalty=10., gap_extend_penalty=0.5, match_score=5, mismatch_score=-4) self.assertEqual(obs_msa, TabularMSA([DNA("-GACCTTGACCAGGTACC"), DNA("GAACTTTGAC---GTAAC")])) self.assertEqual(obs_score, 32.0) self.assertEqual(obs_start_end, [(0, 16), (0, 14)]) # Use a substitution matrix obs_msa, obs_score, obs_start_end = global_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"), gap_open_penalty=5., gap_extend_penalty=2., substitution_matrix=SubstitutionMatrix.by_name( 'NUC.4.4').to_dict()) self.assertEqual(obs_msa, TabularMSA([DNA("G-ACCTTGACCAGGTACC"), DNA("GAACTTTGAC---GTAAC")])) self.assertEqual(obs_score, 38.0) self.assertEqual(obs_start_end, [(0, 16), (0, 14)]) # DNA sequences with metadata obs_msa, obs_score, obs_start_end = global_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC", metadata={'id': "s1"}), DNA("GAACTTTGACGTAAC", metadata={'id': "s2"}), gap_open_penalty=10., gap_extend_penalty=0.5, match_score=5, mismatch_score=-4) self.assertEqual( obs_msa, TabularMSA([DNA("-GACCTTGACCAGGTACC", metadata={'id': "s1"}), DNA("GAACTTTGAC---GTAAC", metadata={'id': "s2"})])) self.assertEqual(obs_score, 32.0) self.assertEqual(obs_start_end, [(0, 16), (0, 14)]) # Align one DNA sequence and one TabularMSA, score computed manually obs_msa, obs_score, obs_start_end = global_pairwise_align_nucleotide( TabularMSA([DNA("GACCTTGACCAGGTACC", metadata={'id': "s1"}), DNA("GACCATGACCAGGTACC", metadata={'id': "s2"})]), DNA("GAACTTTGACGTAAC", metadata={'id': "s3"}), gap_open_penalty=10., gap_extend_penalty=0.5, match_score=5, mismatch_score=-4) self.assertEqual( obs_msa, TabularMSA([DNA("-GACCTTGACCAGGTACC", metadata={'id': "s1"}), DNA("-GACCATGACCAGGTACC", metadata={'id': "s2"}), DNA("GAACTTTGAC---GTAAC", metadata={'id': "s3"})])) self.assertEqual(obs_score, 27.5) self.assertEqual(obs_start_end, [(0, 16), (0, 14)]) # TypeError on invalid input self.assertRaises(TypeError, global_pairwise_align_nucleotide, 42, DNA("ACGT")) self.assertRaises(TypeError, global_pairwise_align_nucleotide, DNA("ACGT"), 42) def test_global_pairwise_align_nucleotide_invalid_dtype(self): with self.assertRaisesRegex(TypeError, r"TabularMSA with DNA or RNA dtype.*dtype " "'Protein'"): global_pairwise_align_nucleotide(TabularMSA([DNA('ACGT')]), TabularMSA([Protein('PAW')])) def test_local_pairwise_align_nucleotide(self): obs_msa, obs_score, obs_start_end = local_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"), gap_open_penalty=5., gap_extend_penalty=0.5, match_score=5, mismatch_score=-4) self.assertEqual(obs_msa, TabularMSA([DNA("ACCTTGACCAGGTACC"), DNA("ACTTTGAC---GTAAC")])) self.assertEqual(obs_score, 41.0) self.assertEqual(obs_start_end, [(1, 16), (2, 14)]) obs_msa, obs_score, obs_start_end = local_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) self.assertEqual(obs_msa, TabularMSA([DNA("ACCTTGAC"), DNA("ACTTTGAC")])) self.assertEqual(obs_score, 31.0) self.assertEqual(obs_start_end, [(1, 8), (2, 9)]) # DNA sequences with metadata obs_msa, obs_score, obs_start_end = local_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC", metadata={'id': "s1"}), DNA("GAACTTTGACGTAAC", metadata={'id': "s2"}), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) self.assertEqual( obs_msa, TabularMSA([DNA("ACCTTGAC", metadata={'id': "s1"}), DNA("ACTTTGAC", metadata={'id': "s2"})])) self.assertEqual(obs_score, 31.0) self.assertEqual(obs_start_end, [(1, 8), (2, 9)]) # Fails when either input is passed as a TabularMSA self.assertRaises(TypeError, local_pairwise_align_nucleotide, TabularMSA([DNA("GACCTTGACCAGGTACC", metadata={'id': "s1"})]), DNA("GAACTTTGACGTAAC", metadata={'id': "s2"}), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) self.assertRaises(TypeError, local_pairwise_align_nucleotide, DNA("GACCTTGACCAGGTACC", metadata={'id': "s1"}), TabularMSA([DNA("GAACTTTGACGTAAC", metadata={'id': "s2"})]), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) # TypeError on invalid input self.assertRaises(TypeError, local_pairwise_align_nucleotide, 42, DNA("ACGT")) self.assertRaises(TypeError, local_pairwise_align_nucleotide, DNA("ACGT"), 42) def test_nucleotide_aligners_use_substitution_matrices(self): alt_sub = make_identity_substitution_matrix(10, -10) # alternate substitution matrix yields different alignment (the # aligned sequences and the scores are different) with local alignment msa_no_sub, score_no_sub, start_end_no_sub = \ local_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) msa_alt_sub, score_alt_sub, start_end_alt_sub = \ local_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4, substitution_matrix=alt_sub) self.assertNotEqual(msa_no_sub, msa_alt_sub) self.assertNotEqual(score_no_sub, score_alt_sub) self.assertNotEqual(start_end_no_sub, start_end_alt_sub) # alternate substitution matrix yields different alignment (the # aligned sequences and the scores are different) with global alignment msa_no_sub, score_no_sub, start_end_no_sub = \ global_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4) msa_alt_sub, score_alt_sub, start_end_alt_sub = \ global_pairwise_align_nucleotide( DNA("GACCTTGACCAGGTACC"), DNA("GAACTTTGACGTAAC"), gap_open_penalty=10., gap_extend_penalty=5., match_score=5, mismatch_score=-4, substitution_matrix=alt_sub) self.assertNotEqual(msa_no_sub, msa_alt_sub) self.assertNotEqual(score_no_sub, score_alt_sub) self.assertEqual(start_end_no_sub, start_end_alt_sub) def test_local_pairwise_align_invalid_type(self): with self.assertRaisesRegex(TypeError, r'GrammaredSequence.*Sequence'): local_pairwise_align(DNA('ACGT'), Sequence('ACGT'), 1.0, 1.0, {}) def test_local_pairwise_align_type_mismatch(self): with self.assertRaisesRegex(TypeError, r"same type: 'DNA' != 'RNA'"): local_pairwise_align(DNA('ACGT'), RNA('ACGU'), 1.0, 1.0, {}) def test_init_matrices_sw(self): expected_score_m = np.zeros((5, 4)) expected_tback_m = [[0, 0, 0, 0], [0, -1, -1, -1], [0, -1, -1, -1], [0, -1, -1, -1], [0, -1, -1, -1]] actual_score_m, actual_tback_m = _init_matrices_sw( TabularMSA([DNA('AAA', metadata={'id': 'id'})]), TabularMSA([DNA('AAAA', metadata={'id': 'id'})]), 5, 2) np.testing.assert_array_equal(actual_score_m, expected_score_m) np.testing.assert_array_equal(actual_tback_m, expected_tback_m) def test_init_matrices_nw(self): expected_score_m = [[0, -5, -7, -9], [-5, 0, 0, 0], [-7, 0, 0, 0], [-9, 0, 0, 0], [-11, 0, 0, 0]] expected_tback_m = [[0, 3, 3, 3], [2, -1, -1, -1], [2, -1, -1, -1], [2, -1, -1, -1], [2, -1, -1, -1]] actual_score_m, actual_tback_m = _init_matrices_nw( TabularMSA([DNA('AAA', metadata={'id': 'id'})]), TabularMSA([DNA('AAAA', metadata={'id': 'id'})]), 5, 2) np.testing.assert_array_equal(actual_score_m, expected_score_m) np.testing.assert_array_equal(actual_tback_m, expected_tback_m) def test_compute_substitution_score(self): # these results were computed manually subs_m = make_identity_substitution_matrix(5, -4) gap_chars = set('-.') self.assertEqual( _compute_substitution_score(['A'], ['A'], subs_m, 0, gap_chars), 5.0) self.assertEqual( _compute_substitution_score(['A', 'A'], ['A'], subs_m, 0, gap_chars), 5.0) self.assertEqual( _compute_substitution_score(['A', 'C'], ['A'], subs_m, 0, gap_chars), 0.5) self.assertEqual( _compute_substitution_score(['A', 'C'], ['A', 'C'], subs_m, 0, gap_chars), 0.5) self.assertEqual( _compute_substitution_score(['A', 'A'], ['A', '-'], subs_m, 0, gap_chars), 2.5) self.assertEqual( _compute_substitution_score(['A', 'A'], ['A', '-'], subs_m, 1, gap_chars), 3) # alt subs_m subs_m = make_identity_substitution_matrix(1, -2) self.assertEqual( _compute_substitution_score(['A', 'A'], ['A', '-'], subs_m, 0, gap_chars), 0.5) def test_compute_score_and_traceback_matrices(self): # these results were computed manually expected_score_m = [[0, -5, -7, -9], [-5, 2, -3, -5], [-7, -3, 4, -1], [-9, -5, -1, 6], [-11, -7, -3, 1]] expected_tback_m = [[0, 3, 3, 3], [2, 1, 3, 3], [2, 2, 1, 3], [2, 2, 2, 1], [2, 2, 2, 2]] m = make_identity_substitution_matrix(2, -1) actual_score_m, actual_tback_m = _compute_score_and_traceback_matrices( TabularMSA([DNA('ACG', metadata={'id': 'id'})]), TabularMSA([DNA('ACGT', metadata={'id': 'id'})]), 5, 2, m) np.testing.assert_array_equal(actual_score_m, expected_score_m) np.testing.assert_array_equal(actual_tback_m, expected_tback_m) # different sequences # these results were computed manually expected_score_m = [[0, -5, -7, -9], [-5, 2, -3, -5], [-7, -3, 4, -1], [-9, -5, -1, 3], [-11, -7, -3, -2]] expected_tback_m = [[0, 3, 3, 3], [2, 1, 3, 3], [2, 2, 1, 3], [2, 2, 2, 1], [2, 2, 2, 1]] m = make_identity_substitution_matrix(2, -1) actual_score_m, actual_tback_m = _compute_score_and_traceback_matrices( TabularMSA([DNA('ACC', metadata={'id': 'id'})]), TabularMSA([DNA('ACGT', metadata={'id': 'id'})]), 5, 2, m) np.testing.assert_array_equal(actual_score_m, expected_score_m) np.testing.assert_array_equal(actual_tback_m, expected_tback_m) # four sequences provided in two alignments # these results were computed manually expected_score_m = [[0, -5, -7, -9], [-5, 2, -3, -5], [-7, -3, 4, -1], [-9, -5, -1, 3], [-11, -7, -3, -2]] expected_tback_m = [[0, 3, 3, 3], [2, 1, 3, 3], [2, 2, 1, 3], [2, 2, 2, 1], [2, 2, 2, 1]] m = make_identity_substitution_matrix(2, -1) actual_score_m, actual_tback_m = _compute_score_and_traceback_matrices( TabularMSA([DNA('ACC', metadata={'id': 's1'}), DNA('ACC', metadata={'id': 's2'})]), TabularMSA([DNA('ACGT', metadata={'id': 's3'}), DNA('ACGT', metadata={'id': 's4'})]), 5, 2, m) np.testing.assert_array_equal(actual_score_m, expected_score_m) np.testing.assert_array_equal(actual_tback_m, expected_tback_m) def test_compute_score_and_traceback_matrices_invalid(self): # if the sequence contains a character that is not in the # substitution matrix, an informative error should be raised m = make_identity_substitution_matrix(2, -1) self.assertRaises(ValueError, _compute_score_and_traceback_matrices, TabularMSA([DNA('AWG', metadata={'id': 'id'})]), TabularMSA([DNA('ACGT', metadata={'id': 'id'})]), 5, 2, m) def test_traceback(self): score_m = [[0, -5, -7, -9], [-5, 2, -3, -5], [-7, -3, 4, -1], [-9, -5, -1, 6], [-11, -7, -3, 1]] score_m = np.array(score_m) tback_m = [[0, 3, 3, 3], [2, 1, 3, 3], [2, 2, 1, 3], [2, 2, 2, 1], [2, 2, 2, 2]] tback_m = np.array(tback_m) # start at bottom-right expected = ([DNA("ACG-", metadata={'id': 'foo'})], [DNA("ACGT", metadata={'id': 'bar'})], 1, 0, 0) actual = _traceback(tback_m, score_m, TabularMSA([DNA('ACG', metadata={'id': 'foo'})]), TabularMSA([DNA('ACGT', metadata={'id': 'bar'})]), 4, 3) self.assertEqual(actual, expected) # four sequences in two alignments score_m = [[0, -5, -7, -9], [-5, 2, -3, -5], [-7, -3, 4, -1], [-9, -5, -1, 6], [-11, -7, -3, 1]] score_m = np.array(score_m) tback_m = [[0, 3, 3, 3], [2, 1, 3, 3], [2, 2, 1, 3], [2, 2, 2, 1], [2, 2, 2, 2]] tback_m = np.array(tback_m) # start at bottom-right expected = ([DNA("ACG-", metadata={'id': 's1'}), DNA("ACG-", metadata={'id': 's2'})], [DNA("ACGT", metadata={'id': 's3'}), DNA("ACGT", metadata={'id': 's4'})], 1, 0, 0) actual = _traceback(tback_m, score_m, TabularMSA([DNA('ACG', metadata={'id': 's1'}), DNA('ACG', metadata={'id': 's2'})]), TabularMSA([DNA('ACGT', metadata={'id': 's3'}), DNA('ACGT', metadata={'id': 's4'})]), 4, 3) self.assertEqual(actual, expected) # start at highest-score expected = ([DNA("ACG", metadata={'id': 'foo'})], [DNA("ACG", metadata={'id': 'bar'})], 6, 0, 0) actual = _traceback(tback_m, score_m, TabularMSA([DNA('ACG', metadata={'id': 'foo'})]), TabularMSA([DNA('ACGT', metadata={'id': 'bar'})]), 3, 3) self.assertEqual(actual, expected) # terminate traceback before top-right tback_m = [[0, 3, 3, 3], [2, 1, 3, 3], [2, 2, 0, 3], [2, 2, 2, 1], [2, 2, 2, 2]] tback_m = np.array(tback_m) expected = ([DNA("G", metadata={'id': 'a'})], [DNA("G", metadata={'id': 'a'})], 6, 2, 2) actual = _traceback(tback_m, score_m, TabularMSA([DNA('ACG', metadata={'id': 'a'})]), TabularMSA([DNA('ACGT', metadata={'id': 'a'})]), 3, 3) self.assertEqual(actual, expected) def test_first_largest(self): input = [(5, 'a'), (5, 'b'), (5, 'c')] self.assertEqual(_first_largest(input), (5, 'a')) input = [(5, 'c'), (5, 'b'), (5, 'a')] self.assertEqual(_first_largest(input), (5, 'c')) input = [(5, 'c'), (6, 'b'), (5, 'a')] self.assertEqual(_first_largest(input), (6, 'b')) # works for more than three entries input = [(5, 'c'), (6, 'b'), (5, 'a'), (7, 'd')] self.assertEqual(_first_largest(input), (7, 'd')) # Note that max([(5, 'a'), (5, 'c')]) == max([(5, 'c'), (5, 'a')]) # but for the purposes needed here, we want the max to be the same # regardless of what the second item in the tuple is. if __name__ == "__main__": main() scikit-bio-0.6.2/skbio/alignment/tests/test_path.py000066400000000000000000000461111464262511300223570ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest import numpy as np import numpy.testing as npt from skbio.alignment._path import PairAlignPath, AlignPath, _run_length_encode from skbio.alignment import TabularMSA from skbio.sequence import DNA class TestAlignPath(unittest.TestCase): def test_init(self): # test 1-D starts vector with self.assertRaises(TypeError, msg="`starts` must be a 1-D vector."): path = AlignPath(lengths=[1, 2, 3], states=[1, 2, 3], starts=[[0], [0]]) # test states and starts matching with self.assertRaises(ValueError, msg="Sizes of `starts` and `states` do not " "match."): path = AlignPath(lengths=[1, 2, 3], states=[1, 2, 3], starts=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) def test_lengths(self): obs = AlignPath(lengths=[3, 2, 5, 1, 4, 3, 2], states=[0, 2, 0, 6, 0, 1, 0], starts=[0, 0, 0]).lengths npt.assert_array_equal(obs, np.array([3, 2, 5, 1, 4, 3, 2], dtype=np.int64)) def test_states(self): obs = AlignPath(lengths=[3, 2, 5, 1, 4, 3, 2], states=[0, 2, 0, 6, 0, 1, 0], starts=[0, 0, 0]).states npt.assert_array_equal(obs, np.array([[0, 2, 0, 6, 0, 1, 0]], dtype=np.uint8)) def test_starts(self): obs = AlignPath(lengths=[3, 2, 5, 1, 4, 3, 2], states=[0, 2, 0, 6, 0, 1, 0], starts=[0, 0, 0]).starts npt.assert_array_equal(obs, np.array([0, 0, 0], dtype=np.int64)) def test_shape(self): obs = AlignPath(lengths=[3, 2, 5, 1, 4, 3, 2], states=[0, 2, 0, 6, 0, 1, 0], starts=[0, 0, 0]).shape self.assertEqual(obs.sequence, 3) self.assertEqual(obs.position, 20) def test_to_bits(self): path = AlignPath(lengths=[3, 2, 5, 1, 4, 3, 2], states=[0, 2, 0, 6, 0, 1, 0], starts=[0, 0, 0]) exp = np.array(([0, 0, 0, 0, 0, 1, 0], [0, 1, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0])) obs = path.to_bits() npt.assert_array_equal(obs, exp) def test_from_bits(self): # test 1D base case, less than 8 sequences bits = np.array(([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0], [0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])) exp = AlignPath(lengths=[3, 2, 5, 1, 4, 3, 2], states=[0, 2, 0, 6, 0, 1, 0], starts=[0, 0, 0]) obs = AlignPath.from_bits(bits) npt.assert_array_equal(obs.lengths, exp.lengths) npt.assert_array_equal(obs.states, exp.states) # test starts parameter starts = [1, 2, 3] obs = AlignPath.from_bits(bits, starts) npt.assert_array_equal(obs.lengths, exp.lengths) npt.assert_array_equal(obs.states, exp.states) npt.assert_array_equal(obs.starts, starts) # test 2D base case, more than 8 sequences rng = np.random.default_rng(seed=42) bits = rng.choice([0, 1], size=(10, 10), p=[0.85, 0.15]) exp = AlignPath(lengths=[1, 1, 1, 1, 1, 1, 3, 1], states=[[0, 10, 133, 4, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 2]], starts=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) obs = AlignPath.from_bits(bits) npt.assert_array_equal(obs.lengths, exp.lengths) npt.assert_array_equal(obs.states, exp.states) def test_from_tabular(self): msa = ('CGGTCGTAACGCGTA---CA', 'CAG--GTAAG-CATACCTCA', 'CGGTCGTCAC-TGTACACTA') tabular = TabularMSA([DNA(x) for x in msa]) path = AlignPath.from_tabular(tabular) lengths = [3, 2, 5, 1, 4, 3, 2] states = [0, 2, 0, 6, 0, 1, 0] npt.assert_array_equal(lengths, path.lengths) npt.assert_array_equal(states, np.squeeze(path.states)) def test_to_indices(self): # test gap = -1 path = AlignPath(lengths=[3, 2, 5, 1, 4, 3, 2], states=[0, 2, 0, 6, 0, 1, 0], starts=[0, 0, 0]) exp = np.array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, -1, -1, -1, 15, 16], [0, 1, 2, -1, -1, 3, 4, 5, 6, 7, -1, 8, 9, 10, 11, 12, 13, 14, 15, 16], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, 12, 13, 14, 15, 16, 17, 18]]) obs = path.to_indices() npt.assert_array_equal(obs, exp) # test gap = 'del' exp = np.array([[0, 1, 2, 5, 6, 7, 8, 9, 11, 12, 13, 14, 15, 16], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15, 16], [0, 1, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 17, 18]]) obs = path.to_indices(gap='del') npt.assert_array_equal(obs, exp) # test gap = 'mask' exp = np.ma.array(data=[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 14, 14, 14, 15, 16], [0, 1, 2, 2, 2, 3, 4, 5, 6, 7, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]], mask=[[False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, True, False, False], [False, False, False, True, True, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False]], fill_value=999999) obs = path.to_indices(gap='mask') npt.assert_array_equal(obs, exp) # test with starts as non-zero path = AlignPath(lengths=[3, 2, 5, 1, 4, 3, 2], states=[0, 2, 0, 6, 0, 1, 0], starts=[1, 35, 28]) exp = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, 16, 17], [35, 36, 37, -1, -1, 38, 39, 40, 41, 42, -1, 43, 44, 45, 46, 47, 48, 49, 50, 51], [28, 29, 30, 31, 32, 33, 34, 35, 36, 37, -1, 38, 39, 40, 41, 42, 43, 44, 45, 46]]) obs = path.to_indices() npt.assert_array_equal(obs, exp) # test 'del' with non-zero starts exp = np.array([[1, 2, 3, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17], [35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 50, 51], [28, 29, 30, 33, 34, 35, 36, 37, 38, 39, 40, 41, 45, 46]]) obs = path.to_indices(gap='del') npt.assert_array_equal(obs, exp) # test 'mask' with non-zero starts exp = np.ma.array(data=[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15, 15, 16, 17], [35, 36, 37, 37, 37, 38, 39, 40, 41, 42, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51], [28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46]], mask=[[False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, True, False, False], [False, False, False, True, True, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False], [False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False]], fill_value=999999) obs = path.to_indices(gap='mask') npt.assert_array_equal(obs, exp) # test invalid gap with self.assertRaises(TypeError, msg="Gap must be an integer, np.nan, np.inf, 'del', " "or 'mask'."): path.to_indices(gap="no") def test_from_indices(self): # test no mask indices = np.array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, -1, -1, -1, 15, 16], [0, 1, 2, -1, -1, 3, 4, 5, 6, 7, -1, 8, 9, 10, 11, 12, 13, 14, 15, 16], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, 10, 11, 12, 13, 14, 15, 16, 17, 18]]) path = AlignPath.from_indices(indices) lengths = [3, 2, 5, 1, 4, 3, 2] states = [0, 2, 0, 6, 0, 1, 0] npt.assert_array_equal(lengths, path.lengths) npt.assert_array_equal(states, np.squeeze(path.states)) # test masked array masked = np.ma.array(indices, mask=(indices == -1)) path = AlignPath.from_indices(masked, gap="mask") npt.assert_array_equal(lengths, path.lengths) npt.assert_array_equal(states, np.squeeze(path.states)) # test non-zero indices indices = np.array([[25, 26, -1, -1, 27, 28, 29, 30], [-1, 79, 80, 81, 82, 83, 84, -1]]) path = AlignPath.from_indices(indices) lengths = [1, 1, 2, 3, 1] states = [2, 0, 1, 0, 2] npt.assert_array_equal(lengths, path.lengths) npt.assert_array_equal(states, np.squeeze(path.states)) npt.assert_array_equal(path.starts, [25, 79]) # test masked array and non-zero indices # TODO # test indices all gaps indices = np.array([[25, 26, -1, -1, 27, 28, 29, 30], [-1, -1, -1, -1, -1, -1, -1, -1]]) path = AlignPath.from_indices(indices) lengths = [2, 2, 4] states = [2, 3, 2] starts = [25, -1] npt.assert_array_equal(path.lengths, lengths) npt.assert_array_equal(np.squeeze(path.states), states) npt.assert_array_equal(path.starts, starts) def test_to_coordinates(self): # test base case exp = np.array([[0, 3, 5, 10, 11, 15, 15, 17], [0, 3, 3, 8, 8, 12, 15, 17], [0, 3, 5, 10, 10, 14, 17, 19]]) path = AlignPath(lengths=[3, 2, 5, 1, 4, 3, 2], states=[0, 2, 0, 6, 0, 1, 0], starts=[0, 0, 0]) obs = path.to_coordinates() npt.assert_array_equal(obs, exp) # test non-zero starts exp = np.array([[2, 5, 7, 12, 13, 17, 17, 19], [512, 515, 515, 520, 520, 524, 527, 529], [28, 31, 33, 38, 38, 42, 45, 47]]) path = AlignPath(lengths=[3, 2, 5, 1, 4, 3, 2], states=[0, 2, 0, 6, 0, 1, 0], starts=[2, 512, 28]) obs = path.to_coordinates() npt.assert_array_equal(obs, exp) def test_from_coordinates(self): # test base case coords = np.array([[0, 3, 5, 10, 11, 15, 15, 17], [0, 3, 3, 8, 8, 12, 15, 17], [0, 3, 5, 10, 10, 14, 17, 19]]) path = AlignPath.from_coordinates(coords) lengths = [3, 2, 5, 1, 4, 3, 2] states = [0, 2, 0, 6, 0, 1, 0] starts = [0, 0, 0] npt.assert_array_equal(lengths, path.lengths) npt.assert_array_equal(states, np.squeeze(path.states)) npt.assert_array_equal(starts, path.starts) # test non-zero starts coords = np.array([[2, 5, 7, 12, 13, 17, 17, 19], [512, 515, 515, 520, 520, 524, 527, 529], [28, 31, 33, 38, 38, 42, 45, 47]]) path = AlignPath.from_coordinates(coords) lengths = [3, 2, 5, 1, 4, 3, 2] states = [0, 2, 0, 6, 0, 1, 0] starts = [2, 512, 28] npt.assert_array_equal(lengths, path.lengths) npt.assert_array_equal(states, np.squeeze(path.states)) npt.assert_array_equal(starts, path.starts) class TestPairAlignPath(unittest.TestCase): def test_from_cigar(self): # test valid cigar with no = or X cigar = "3M42I270M32D" path = PairAlignPath.from_cigar(cigar) lengths = [3, 42, 270, 32] states = [0, 1, 0, 2] npt.assert_array_equal(lengths, path.lengths) npt.assert_array_equal(states, np.squeeze(path.states)) # test valid cigar with = or X cigar = "3M42I270M23X663=32D24X43=" path = PairAlignPath.from_cigar(cigar) lengths = [3, 42, 956, 32, 67] states = [0, 1, 0, 2, 0] npt.assert_array_equal(lengths, path.lengths) npt.assert_array_equal(states, np.squeeze(path.states)) # test empty cigar string with self.assertRaises(ValueError, msg="CIGAR string must not be empty."): PairAlignPath.from_cigar("") # test invalid cigar string with self.assertRaises(ValueError, msg="Invalid characters in CIGAR string."): PairAlignPath.from_cigar("23M45B13X") # test valid cigar with no 1's cigar = "MID12MI" path = PairAlignPath.from_cigar(cigar) lengths = [1, 1, 1, 12, 1] states = [0, 1, 2, 0, 1] npt.assert_array_equal(lengths, path.lengths) npt.assert_array_equal(states, np.squeeze(path.states)) # test cigar with all possible valid codes cigar = "1M2I3D4P5=6X7N8S9H" path = PairAlignPath.from_cigar(cigar) lengths = [1, 2, 3, 4, 11, 7, 8, 9] states = [0, 1, 2, 3, 0, 2, 1, 3] npt.assert_array_equal(lengths, path.lengths) npt.assert_array_equal(states, np.squeeze(path.states)) def test_to_cigar(self): # test base case lengths = [1, 2, 3, 4, 50, 234] gaps = [1, 0, 2, 1, 0, 1] ExamplePairAlignPath = PairAlignPath(lengths=lengths, states=gaps, starts=[0, 0]) obs = ExamplePairAlignPath.to_cigar() exp = "1I2M3D4I50M234I" self.assertEqual(obs, exp) # test if seqs are provided seq1 = "-AATCT----" + "C"*50 + "-"*234 seq2 = "TAC---GGCC" + "C"*20 + "A"*264 seqs = [DNA(seq1), DNA(seq2)] obs = ExamplePairAlignPath.to_cigar(seqs=seqs) exp = "1I2X3D4I5X18=27X234I" self.assertEqual(obs, exp) # test if alignment has two gaps in same position lengths = [1, 2, 3, 4, 1] gaps = [1, 0, 2, 1, 3] path = PairAlignPath(lengths=lengths, states=gaps, starts=[0, 0]) obs = path.to_cigar() exp = "1I2M3D4I1P" self.assertEqual(obs, exp) # two gaps with seqs provided seq1 = '-ATCGC-----' seq2 = 'GTA---ATTA-' seqs = [DNA(seq1), DNA(seq2)] obs = path.to_cigar(seqs=seqs) exp = "1I1X1=3D4I1P" self.assertEqual(obs, exp) # test sequences as strings seq1 = '-ATCGC-----' seq2 = 'GTA---ATTA-' seqs = [seq1, seq2] obs = path.to_cigar(seqs=seqs) exp = "1I1X1=3D4I1P" self.assertEqual(obs, exp) # test invalid sequence input seq1 = 1 seq2 = 'GTA---ATTA-' seqs = [seq1, seq2] with self.assertRaises(TypeError, msg="`seqs` must be of type string or Sequence object."): obs = path.to_cigar(seqs=seqs) def test_from_bits(self): # test base case bits = np.array(([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0], [0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])) exp = PairAlignPath(lengths=[3, 2, 5, 1, 4, 3, 2], states=[0, 2, 0, 2, 0, 1, 0], starts=[0, 0]) obs = PairAlignPath.from_bits(bits) npt.assert_array_equal(obs.lengths, exp.lengths) npt.assert_array_equal(obs.states, exp.states) # test empty bit array bits = np.array(([], [])) with self.assertRaises(TypeError, msg="Input 'bits' must be a non-empty 2D numpy array."): PairAlignPath.from_bits(bits) # test 1D bit array bits = np.array([0, 0, 1]) with self.assertRaises(TypeError, msg="Input 'bits' must be a non-empty 2D numpy array."): PairAlignPath.from_bits(bits) # test array with invalid values bits = np.array(([1, 2, 3], [0, 5, 1])) with self.assertRaises(ValueError, msg="Input 'bits' must contain only zeros and ones."): PairAlignPath.from_bits(bits) # test non numpy array input bits = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0], [0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]] exp = PairAlignPath(lengths=[3, 2, 5, 1, 4, 3, 2], states=[0, 2, 0, 2, 0, 1, 0], starts=[0, 0, 0]) obs = PairAlignPath.from_bits(bits) npt.assert_array_equal(obs.lengths, exp.lengths) npt.assert_array_equal(obs.states, exp.states) def test_to_bits(self): # test input with invalid values with self.assertRaises(ValueError, msg="For pairwise alignment, `states` must only " "contain zeros, ones, twos, or threes."): PairAlignPath(lengths=[1, 2, 3], states=[1, 2, 4], starts=[0, 0]).to_bits() # test base case path = PairAlignPath(lengths=[3, 2, 5, 1, 4, 3, 2], states=[0, 2, 0, 2, 0, 1, 0], starts=[0, 0]) exp = np.array(([0, 0, 0, 0, 0, 1, 0], [0, 1, 0, 1, 0, 0, 0])) obs = path.to_bits() npt.assert_array_equal(np.squeeze(obs), exp) def test_run_length_encode(self): obs = _run_length_encode("ABBCCCDDDD") exp = "1A2B3C4D" self.assertEqual(obs, exp) if __name__ == "__main__": unittest.main() scikit-bio-0.6.2/skbio/alignment/tests/test_ssw.py000066400000000000000000000776171464262511300222560ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- # Special thanks to http://www.faculty.ucr.edu/~mmaduro/random.htm for the # random DNA generator. # These tests confirm that StripedSmithWaterman returns the same results as # SSW. We don't test for correctness of those results (i.e., we assume that # ssw.c and ssw.h are correct) as that testing is beyond the scope of skbio. # Furthermore all expected results are created by running StripedSmithWaterman # the resulting alignments are verified by hand. Creating tests from the base # C API is impractical at this time. from unittest import TestCase, main from skbio import (local_pairwise_align_ssw, Sequence, DNA, RNA, Protein, SubstitutionMatrix, TabularMSA) from skbio.alignment import StripedSmithWaterman, AlignmentStructure class TestSSW(TestCase): align_attributes = [ "optimal_alignment_score", "suboptimal_alignment_score", "target_begin", "target_end_optimal", "target_end_suboptimal", "query_begin", "query_end", "cigar", "query_sequence", "target_sequence" ] blosum50 = SubstitutionMatrix.by_name('BLOSUM50').to_dict() def _check_alignment(self, alignment, expected): for attribute in self.align_attributes: # The first element of this tuple is to identify # the broken sequence if one should fail self.assertEqual((expected['target_sequence'], expected[attribute]), (alignment['target_sequence'], alignment[attribute])) def _check_argument_with_inequality_on_optimal_align_score( self, query_sequences=None, target_sequences=None, arg=None, default=None, i_range=None, compare_lt=None, compare_gt=None): iterable_kwarg = {} default_kwarg = {} default_kwarg[arg] = default for query_sequence in query_sequences: for target_sequence in target_sequences: for i in i_range: iterable_kwarg[arg] = i query1 = StripedSmithWaterman(query_sequence, **iterable_kwarg) align1 = query1(target_sequence) query2 = StripedSmithWaterman(query_sequence, **default_kwarg) align2 = query2(target_sequence) if i == default: self.assertEqual(align1.optimal_alignment_score, align2.optimal_alignment_score) if i < default: compare_lt(align1.optimal_alignment_score, align2.optimal_alignment_score) if i > default: compare_gt(align1.optimal_alignment_score, align2.optimal_alignment_score) def _check_bit_flag_sets_properties_falsy_or_negative( self, query_sequences=None, target_sequences=None, arg_settings=[], properties_to_null=[]): kwarg = {} def falsy_or_negative(alignment, prop): if isinstance(alignment[prop], int): return alignment[prop] < 0 else: return not alignment[prop] for query_sequence in query_sequences: for target_sequence in target_sequences: for arg, setting in arg_settings: kwarg[arg] = setting query = StripedSmithWaterman(query_sequence, **kwarg) alignment = query(target_sequence) for prop in properties_to_null: self.assertTrue(falsy_or_negative(alignment, prop)) # Every property not in our null list for prop in [p for p in self.align_attributes if p not in properties_to_null]: self.assertFalse(falsy_or_negative(alignment, prop)) class TestStripedSmithWaterman(TestSSW): def test_object_is_reusable(self): q_seq = "AGGGTAATTAGGCGTGTTCACCTA" expected_alignments = [ { 'optimal_alignment_score': 10, 'suboptimal_alignment_score': 10, 'query_begin': 4, 'query_end': 8, 'target_begin': 3, 'target_end_optimal': 7, 'target_end_suboptimal': 34, 'cigar': '5M', 'query_sequence': q_seq, 'target_sequence': ('TTATAATTTTCTTATTATTATCAATATTTATAATTTGATTT' 'TGTTGTAAT') }, { 'optimal_alignment_score': 36, 'suboptimal_alignment_score': 16, 'query_begin': 0, 'query_end': 23, 'target_begin': 6, 'target_end_optimal': 29, 'target_end_suboptimal': 13, 'cigar': '8M1D8M1I7M', 'query_sequence': q_seq, 'target_sequence': 'AGTCGAAGGGTAATATAGGCGTGTCACCTA' }, { 'optimal_alignment_score': 16, 'suboptimal_alignment_score': 0, 'query_begin': 0, 'query_end': 7, 'target_begin': 6, 'target_end_optimal': 13, 'target_end_suboptimal': 0, 'cigar': '8M', 'query_sequence': q_seq, 'target_sequence': 'AGTCGAAGGGTAATA' }, { 'optimal_alignment_score': 8, 'suboptimal_alignment_score': 8, 'query_begin': 0, 'query_end': 3, 'target_begin': 7, 'target_end_optimal': 10, 'target_end_suboptimal': 42, 'cigar': '4M', 'query_sequence': q_seq, 'target_sequence': ('CTGCCTCAGGGGGAGGAAAGCGTCAGCGCGGCTGCCGTCGG' 'CGCAGGGGC') }, { 'optimal_alignment_score': 48, 'suboptimal_alignment_score': 16, 'query_begin': 0, 'query_end': 23, 'target_begin': 0, 'target_end_optimal': 23, 'target_end_suboptimal': 7, 'cigar': '24M', 'query_sequence': q_seq, 'target_sequence': q_seq } ] query = StripedSmithWaterman(q_seq) results = [] for expected in expected_alignments: alignment = query(expected['target_sequence']) results.append(alignment) for result, expected in zip(results, expected_alignments): self._check_alignment(result, expected) def test_regression_on_instantiation_arguments(self): expected = { 'optimal_alignment_score': 23, 'suboptimal_alignment_score': 10, 'query_begin': 0, 'query_end': 16, 'target_begin': 0, 'target_end_optimal': 20, 'target_end_suboptimal': 4, 'cigar': '6M4D11M', 'query_sequence': 'AAACGATAAATCCGCGTA', 'target_sequence': 'AAACGACTACTAAATCCGCGTGATAGGGGA' } query = StripedSmithWaterman(expected['query_sequence'], gap_open_penalty=5, gap_extend_penalty=2, score_size=2, mask_length=15, mask_auto=True, score_only=False, score_filter=None, distance_filter=None, override_skip_babp=False, protein=False, match_score=2, mismatch_score=-3, substitution_matrix=None, suppress_sequences=False, zero_index=True) alignment = query(expected['target_sequence']) self._check_alignment(alignment, expected) def test_protein_sequence_is_usable(self): expected = { 'optimal_alignment_score': 316, 'suboptimal_alignment_score': 95, 'query_begin': 0, 'query_end': 52, 'target_begin': 0, 'target_end_optimal': 52, 'target_end_suboptimal': 18, 'cigar': '15M1D15M1I22M', 'query_sequence': ('VHLTGEEKSAVAALWGKVNVDEVGGEALGRXLLVVYPWTQRFFESF' 'SDLSTPDABVMSNPKVKAHGK'), 'target_sequence': ('VHLTPEEKSAVTALWBGKVNVDEVGGEALGRLLVVYPWTQRFFES' 'FGDLSTPD*') } query = StripedSmithWaterman(expected['query_sequence'], protein=True, substitution_matrix=self.blosum50) alignment = query(expected['target_sequence']) self._check_alignment(alignment, expected) def test_lowercase_is_valid_sequence(self): expected = { 'optimal_alignment_score': 23, 'suboptimal_alignment_score': 10, 'query_begin': 0, 'query_end': 16, 'target_begin': 0, 'target_end_optimal': 20, 'target_end_suboptimal': 4, 'cigar': '6M4D11M', 'query_sequence': 'aaacgataaatccgcgta', 'target_sequence': 'aaacgactactaaatccgcgtgatagggga' } query = StripedSmithWaterman(expected['query_sequence']) alignment = query(expected['target_sequence']) self._check_alignment(alignment, expected) def test_align_with_N_in_nucleotide_sequence(self): expected = { 'optimal_alignment_score': 9, 'suboptimal_alignment_score': 0, 'query_begin': 0, 'query_end': 8, 'target_begin': 0, 'target_end_optimal': 9, 'target_end_suboptimal': 0, 'cigar': '4M1D5M', 'query_sequence': 'ACTCANNATCGANCTAGC', 'target_sequence': 'ACTCGAAAATGTNNGCA' } query = StripedSmithWaterman(expected['query_sequence']) alignment = query(expected['target_sequence']) self._check_alignment(alignment, expected) def test_arg_match_score(self): query_sequences = [ "TTTTTTCTTATTATTATCAATATTTATAATTTGATTTTGTTGTAAT", "AGTCGAAGGGTCAATATAGGCGTGTCACCTA", "AGTCGAAGGGTAATA", "CTGCCTCAAGGGGGAGGAAAGCGTCAGCGCGGCTGCCGTCGGCGCAGGGGC", "AGGGTAATTTTAGGCGTGTTCACCTA" ] target_sequences = query_sequences self._check_argument_with_inequality_on_optimal_align_score( query_sequences=query_sequences, target_sequences=target_sequences, arg='match_score', default=2, i_range=range(0, 5), compare_lt=self.assertLess, compare_gt=self.assertGreater ) # The above is a strict bound, so we don't need a expected align def test_arg_mismatch_score(self): query_sequences = [ "TTATAATTAATTCTTATTATTATCAATATTTATAATTTGATTTTGTTGTAAT", "AGTCGAAGGGTAAGGGGTATAGGCGTGTCACCTA", "AGTCGAAGGGTAATA", "CTGCCTCAGGGGCGAGGAAAGCGTCAGCGCGGCTGCCGTCGGCGCAGGGGC", "AGGGTAATTAGCGCGTGTTCACCTA" ] target_sequences = query_sequences self._check_argument_with_inequality_on_optimal_align_score( query_sequences=query_sequences, target_sequences=target_sequences, arg='mismatch_score', default=-3, i_range=range(-6, 1), # These are intentionally inverted compare_lt=self.assertLessEqual, compare_gt=self.assertGreaterEqual ) # The above is not a strict bound, so lets use an expected align # to plug the hole where every align is exactly equal to default expected = { 'optimal_alignment_score': 8, 'suboptimal_alignment_score': 0, 'query_begin': 5, 'query_end': 8, 'target_begin': 10, 'target_end_optimal': 13, 'target_end_suboptimal': 0, 'cigar': '4M', 'query_sequence': 'AGAGGGTAATCAGCCGTGTCCACCGGAACACAACGCTATCGGGCGA', 'target_sequence': 'GTTCGCCCCAGTAAAGTTGCTACCAAATCCGCATG' } query = StripedSmithWaterman(expected['query_sequence'], mismatch_score=-8) alignment = query(expected['target_sequence']) self._check_alignment(alignment, expected) def test_arg_matrix_overrides_match_and_mismatch(self): query_sequences = [ "TTATAATTAATTCTTATTATTATCAATATTTATAATTTGATTTTGTTGTAAT", "AGTCGAAGGGTAAGGGGTATAGGCGTGTCACCTA", "AGTCGAAGGGTAATA", "CTGCCTCAGGGGCGAGGAAAGCGTCAGCGCGGCTGCCGTCGGCGCAGGGGC", "AGGGTAATTAGCGCGTGTTCACCTA" ] target_sequences = query_sequences matrix = { # This is a biologically meaningless matrix "A": {"A": 4, "T": -1, "C": -2, "G": -3, "N": 4}, "T": {"A": -1, "T": 1, "C": -1, "G": -4, "N": 1}, "C": {"A": -2, "T": -1, "C": 10, "G": 1, "N": 1}, "G": {"A": -3, "T": -4, "C": 1, "G": 3, "N": 1}, "N": {"A": 4, "T": 1, "C": 1, "G": 1, "N": 0} } for query_sequence in query_sequences: for target_sequence in target_sequences: query1 = StripedSmithWaterman(query_sequence) align1 = query1(target_sequence) query2 = StripedSmithWaterman(query_sequence, substitution_matrix=matrix) align2 = query2(target_sequence) self.assertNotEqual(align1.optimal_alignment_score, align2.optimal_alignment_score) def test_arg_gap_open_penalty(self): query_sequences = [ "TTATAATTTTCTTAGTTATTATCAATATTTATAATTTGATTTTGTTGTAAT", "AGTCCGAAGGGTAATATAGGCGTGTCACCTA", "AGTCGAAGGCGGTAATA", "CTGCCTCGGCAGGGGGAGGAAAGCGTCAGCGCGGCTGCCGTCGGCGCAGGGGC", "AGGGTAATTAAAGGCGTGTTCACCTA" ] target_sequences = query_sequences self._check_argument_with_inequality_on_optimal_align_score( query_sequences=query_sequences, target_sequences=target_sequences, arg='gap_open_penalty', default=5, i_range=range(1, 12), # These are intentionally inverted compare_lt=self.assertGreaterEqual, compare_gt=self.assertLessEqual ) # The above is not a strict bound, so lets use an expected align # to plug the hole where every align is exactly equal to default expected = { 'optimal_alignment_score': 51, 'suboptimal_alignment_score': 20, 'query_begin': 0, 'query_end': 37, 'target_begin': 0, 'target_end_optimal': 29, 'target_end_suboptimal': 9, 'cigar': '5M4I3M3I1M1I21M', 'query_sequence': 'TAGAGATTAATTGCCACATTGCCACTGCCAAAATTCTG', 'target_sequence': 'TAGAGATTAATTGCCACTGCCAAAATTCTG' } query = StripedSmithWaterman(expected['query_sequence'], gap_open_penalty=1) alignment = query(expected['target_sequence']) self._check_alignment(alignment, expected) def test_arg_gap_extend_penalty(self): query_sequences = [ "TTATAATTTTCTTATTATTATCAATATTTATAATTTGATTTTGTTGTAAT", "AGTCGAAGGGTAATACTAGGCGTGTCACCTA", "AGTCGAAGGGTAATA", "CTGCCTCAGGGGGAGGCAAAGCGTCAGCGCGGCTGCCGTCGGCGCAGGGGC", "AGGGTAATTAGGCGTGTTCACCTA" ] target_sequences = query_sequences self._check_argument_with_inequality_on_optimal_align_score( query_sequences=query_sequences, target_sequences=target_sequences, arg='gap_extend_penalty', default=2, i_range=range(1, 10), # These are intentionally inverted compare_lt=self.assertGreaterEqual, compare_gt=self.assertLessEqual ) # The above is not a strict bound, so lets use an expected align # to plug the hole where every align is exactly equal to default expected = { 'optimal_alignment_score': 9, 'suboptimal_alignment_score': 8, 'query_begin': 6, 'query_end': 12, 'target_begin': 7, 'target_end_optimal': 13, 'target_end_suboptimal': 38, 'cigar': '7M', 'query_sequence': 'TCTATAAGATTCCGCATGCGTTACTTATAAGATGTCTCAACGG', 'target_sequence': 'GCCCAGTAGCTTCCCAATATGAGAGCATCAATTGTAGATCGGGCC' } query = StripedSmithWaterman(expected['query_sequence'], gap_extend_penalty=10) alignment = query(expected['target_sequence']) self._check_alignment(alignment, expected) def test_arg_score_only(self): query_sequences = [ "TTATCGTGATTATTATCAATATTTATAATTTGATTTTGTTGTAAT", "AGTCGAAGGGTAATACTATAAGGCGTGTCACCTA", "AGTCGAAGGGTAATA", "AGGGTAATTAGGCGTGCGTGCGTGTTCACCTA", "AGGGTATTAGGCGTGTTCACCTA" ] target_sequences = query_sequences self._check_bit_flag_sets_properties_falsy_or_negative( query_sequences=query_sequences, target_sequences=target_sequences, arg_settings=[('score_only', True)], properties_to_null=['query_begin', 'target_begin', 'cigar'] ) def test_arg_score_filter_is_used(self): query_sequences = [ "TTATCGTGATTATTATCAATATTTATAATTTGATTTTGTTGTAAT", "AGTCGAAGGGTAATACTATAAGGCGTGTCACCTA", "AGTCGAAGGGTAATA", "AGGGTAATTAGGCGTGCGTGCGTGTTCACCTA", "AGGGTATTAGGCGTGTTCACCTA" ] target_sequences = query_sequences self._check_bit_flag_sets_properties_falsy_or_negative( query_sequences=query_sequences, target_sequences=target_sequences, # score_filter will force a BABP and cigar to be falsy arg_settings=[('score_filter', 9001)], properties_to_null=['query_begin', 'target_begin', 'cigar'] ) def test_arg_distance_filter_is_used(self): query_sequences = [ "TTATCGTGATTATTATCAATATTTATAATTTGATTTTGTTGTAAT", "AGTCGAAGGGTAATACTATAAGGCGTGTCACCTA", "AGTCGAAGGGTAATA", "AGGGTAATTAGGCGTGCGTGCGTGTTCACCTA", "AGGGTATTAGGCGTGTTCACCTA" ] target_sequences = query_sequences self._check_bit_flag_sets_properties_falsy_or_negative( query_sequences=query_sequences, target_sequences=target_sequences, # distance_filter will force cigar to be falsy only arg_settings=[('distance_filter', 1)], properties_to_null=['cigar'] ) def test_arg_override_skip_babp(self): query_sequences = [ "TTATCGTGATTATTATCAATATTTATAATTTGATTTTGTTGTAAT", "AGTCGAAGGGTAATACTATAAGGCGTGTCACCTA", "AGTCGAAGGGTAATA", "AGGGTAATTAGGCGTGCGTGCGTGTTCACCTA", "AGGGTATTAGGCGTGTTCACCTA" ] target_sequences = query_sequences self._check_bit_flag_sets_properties_falsy_or_negative( query_sequences=query_sequences, target_sequences=target_sequences, # score_filter will force a BABP and cigar to be falsy if not for # override_skip_babp preventing this for all but the cigar arg_settings=[('override_skip_babp', True), ('score_filter', 9001)], properties_to_null=['cigar'] ) def test_arg_zero_index_changes_base_of_index_to_0_or_1(self): expected_alignments = [ ({ 'optimal_alignment_score': 100, 'suboptimal_alignment_score': 44, 'query_begin': 5, 'query_end': 54, 'target_begin': 0, 'target_end_optimal': 49, 'target_end_suboptimal': 21, 'cigar': '50M', 'query_sequence': ('AGTCACGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCG' 'CCCCGGGCGGGGC'), 'target_sequence': ('CGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCGCCCC' 'GGGCGGGGC') }, True), ({ 'optimal_alignment_score': 100, 'suboptimal_alignment_score': 44, 'query_begin': 6, 'query_end': 55, 'target_begin': 1, 'target_end_optimal': 50, 'target_end_suboptimal': 22, 'cigar': '50M', 'query_sequence': ('AGTCACGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCG' 'CCCCGGGCGGGGC'), 'target_sequence': ('CGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCGCCCC' 'GGGCGGGGC') }, False) ] for expected, z in expected_alignments: query = StripedSmithWaterman(expected['query_sequence'], zero_index=z) alignment = query(expected['target_sequence']) self._check_alignment(alignment, expected) def test_arg_suppress_sequences(self): expected = { 'optimal_alignment_score': 100, 'suboptimal_alignment_score': 44, 'query_begin': 5, 'query_end': 54, 'target_begin': 0, 'target_end_optimal': 49, 'target_end_suboptimal': 21, 'cigar': '50M', 'query_sequence': '', 'target_sequence': '' } query = StripedSmithWaterman( "AGTCACGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCGCCCCGGGCGGGGC", suppress_sequences=True) alignment = query("CGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCGCCCCGGGCGGGGC") self._check_alignment(alignment, expected) class TestAlignStripedSmithWaterman(TestSSW): def _check_TabularMSA_to_AlignmentStructure(self, alignment, structure, expected_dtype): msa, score, start_end = alignment self.assertEqual(score, structure.optimal_alignment_score) self.assertEqual( msa, TabularMSA([expected_dtype(structure.aligned_query_sequence), expected_dtype(structure.aligned_target_sequence)])) if structure.query_begin == -1: self.assertEqual(start_end, None) else: for (start, end), (expected_start, expected_end) in \ zip(start_end, [(structure.query_begin, structure.query_end), (structure.target_begin, structure.target_end_optimal)]): self.assertEqual(start, expected_start) self.assertEqual(end, expected_end) def test_same_as_using_StripedSmithWaterman_object_DNA(self): query_sequence = 'ATGGAAGCTATAAGCGCGGGTGAG' target_sequence = 'AACTTATATAATAAAAATTATATATTCGTTGGGTTCTTTTGATATAAATC' query = StripedSmithWaterman(query_sequence) align1 = query(target_sequence) align2 = local_pairwise_align_ssw(DNA(query_sequence), DNA(target_sequence)) self._check_TabularMSA_to_AlignmentStructure(align2, align1, DNA) def test_same_as_using_StripedSmithWaterman_object_Protein(self): query_sequence = 'HEAGAWGHEE' target_sequence = 'PAWHEAE' query = StripedSmithWaterman(query_sequence, protein=True, substitution_matrix=self.blosum50) align1 = query(target_sequence) align2 = local_pairwise_align_ssw(Protein(query_sequence), Protein(target_sequence), substitution_matrix=self.blosum50) self._check_TabularMSA_to_AlignmentStructure(align2, align1, Protein) def test_kwargs_are_usable(self): kwargs = {} kwargs['mismatch_score'] = -2 kwargs['match_score'] = 5 query_sequence = 'AGGGTAATTAGGCGTGTTCACCTA' target_sequence = 'TACTTATAAGATGTCTCAACGGCATGCGCAACTTGTGAAGTG' query = StripedSmithWaterman(query_sequence, **kwargs) align1 = query(target_sequence) align2 = local_pairwise_align_ssw(DNA(query_sequence), DNA(target_sequence), **kwargs) self._check_TabularMSA_to_AlignmentStructure(align2, align1, DNA) def test_invalid_type(self): with self.assertRaisesRegex(TypeError, r"not type 'Sequence'"): local_pairwise_align_ssw(DNA('ACGT'), Sequence('ACGT')) with self.assertRaisesRegex(TypeError, r"not type 'str'"): local_pairwise_align_ssw('ACGU', RNA('ACGU')) def test_type_mismatch(self): with self.assertRaisesRegex(TypeError, r"same type: 'DNA' != 'RNA'"): local_pairwise_align_ssw(DNA('ACGT'), RNA('ACGU')) class TestAlignmentStructure(TestSSW): def mock_object_factory(self, dictionary): class MockAlignmentStructure(AlignmentStructure): def __init__(self, _a, _b, _c): for key in dictionary: setattr(self, key, dictionary[key]) return MockAlignmentStructure(None, None, 0) def test_works_for_dot_and_square_bracket_access(self): q_seq = "AGGGTAATTAGGCGTGTTCACCTA" query = StripedSmithWaterman(q_seq) alignment = query("TACTTATAAGATGTCTCAACGGCATGCGCAACTTGTGAAGTG") for accessible in self.align_attributes: self.assertEqual(getattr(alignment, accessible), alignment[accessible]) def test_is_zero_based_returns_true_if_index_base_is_zero(self): expected_alignments = [ ({ 'query_sequence': ('AGTCACGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCG' 'CCCCGGGCGGGGC'), 'target_sequence': ('CGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCGCCCC' 'GGGCGGGGC') }, True), ({ 'query_sequence': ('AGTCACGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCG' 'CCCCGGGCGGGGC'), 'target_sequence': ('CGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCGCCCC' 'GGGCGGGGC') }, False) ] for expected, z in expected_alignments: query = StripedSmithWaterman(expected['query_sequence'], zero_index=z) alignment = query(expected['target_sequence']) self.assertEqual(z, alignment.is_zero_based()) def test_set_zero_based_changes_the_index_base(self): expected_alignments = [ ({ 'query_sequence': ('AGTCACGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCG' 'CCCCGGGCGGGGC'), 'target_sequence': ('CGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCGCCCC' 'GGGCGGGGC') }, True), ({ 'query_sequence': ('AGTCACGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCG' 'CCCCGGGCGGGGC'), 'target_sequence': ('CGCGCGCCGCCGGGGGGCCGGCCGGCGCCGGGGGGCGCCCC' 'GGGCGGGGC') }, False) ] for expected, z in expected_alignments: query = StripedSmithWaterman(expected['query_sequence'], zero_index=z) alignment = query(expected['target_sequence']) alignment.set_zero_based(not z) self.assertEqual(not z, alignment.is_zero_based()) def test__get_aligned_sequences(self): generic_sequence = "123456789abcdefghijklmnopqrstuvwxyz" tests = [ # `end_after_cigar` is how far end extends beyond the cigar. # Negative values on this should not be possible with SSW { 'cigar_tuples': [ (4, 'M'), (3, 'I'), (1, 'D'), (15, 'M') ], 'begin': 4, 'end_after_cigar': 2, 'gap_type': 'I', 'expected': "5678---9abcdefghijklmnopq" }, { 'cigar_tuples': [ (12, 'M') ], 'begin': 10, 'end_after_cigar': 0, 'gap_type': 'D', 'expected': "bcdefghijklm" }, { 'cigar_tuples': [ (10, 'D'), (1, 'M'), (3, 'I'), (2, 'M') ], 'begin': 0, 'end_after_cigar': 5, 'gap_type': 'I', 'expected': "123456789ab---cdefghi" }, { 'cigar_tuples': [ (10, 'D'), (1, 'M'), (3, 'I'), (2, 'M') ], 'begin': 3, 'end_after_cigar': 0, 'gap_type': 'D', 'expected': "----------456789" }, { 'cigar_tuples': [ (1, 'I'), (4, 'M'), (3, 'I'), (1, 'D'), (8, 'M'), (8, 'D'), (2, 'I'), (6, 'M'), (1, 'I') ], 'begin': 4, 'end_after_cigar': 3, 'gap_type': 'I', 'expected': "-5678---9abcdefghijklmnop--qrstuv-wxy" } ] for test in tests: mock_object = self.mock_object_factory({}) # Because SSW's output is [a, b] and Python's list ranges use # [a, b) a 1 is added in the calculation of aligned sequences. # We just have to subtract 1 while we are testing with the easy to # verify interface of `end_after_cigar` to cancel this range effect # out. end = test['end_after_cigar'] - 1 + test['begin'] + \ sum(le if t != test['gap_type'] else 0 for le, t in test['cigar_tuples']) self.assertEqual(test['expected'], AlignmentStructure._get_aligned_sequence( mock_object, generic_sequence, test['cigar_tuples'], test['begin'], end, test['gap_type'])) def test_aligned_query_target_sequence(self): query = StripedSmithWaterman("AGGGTAATTAGGCGTGTTCACCTA") alignment = query("AGTCGAAGGGTAATATAGGCGTGTCACCTA") self.assertEqual("AGGGTAATATAGGCGTG-TCACCTA", alignment.aligned_target_sequence) self.assertEqual("AGGGTAAT-TAGGCGTGTTCACCTA", alignment.aligned_query_sequence) def test_aligned_query_target_sequence_with_suppressed_sequences(self): query = StripedSmithWaterman("AGGGTAATTAGGCGTGTTCACCTA", suppress_sequences=True) alignment = query("AGTCGAAGGGTAATATAGGCGTGTCACCTA") self.assertEqual(None, alignment.aligned_target_sequence) self.assertEqual(None, alignment.aligned_query_sequence) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/alignment/tests/test_tabular_msa.py000066400000000000000000004374461464262511300237340ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import collections.abc import copy import unittest import functools import itertools import types import numpy as np import numpy.testing as npt import pandas as pd import scipy.stats from skbio import Sequence, DNA, RNA, Protein, TabularMSA from skbio.alignment import AlignPath from skbio.sequence import GrammaredSequence from skbio.util import classproperty from skbio.util._decorator import overrides from skbio.util._testing import ReallyEqualMixin from skbio.metadata._testing import (MetadataMixinTests, PositionalMetadataMixinTests) from skbio.util import assert_data_frame_almost_equal from skbio.util._testing import assert_index_equal class TabularMSASubclass(TabularMSA): """Used for testing purposes.""" pass class TestTabularMSAMetadata(unittest.TestCase, ReallyEqualMixin, MetadataMixinTests): def setUp(self): self._metadata_constructor_ = functools.partial(TabularMSA, []) class TestTabularMSAPositionalMetadata(unittest.TestCase, ReallyEqualMixin, PositionalMetadataMixinTests): def setUp(self): def factory(axis_len, positional_metadata=None): return TabularMSA([DNA('A' * axis_len)], positional_metadata=positional_metadata) self._positional_metadata_constructor_ = factory class TestTabularMSA(unittest.TestCase, ReallyEqualMixin): def test_from_dict_empty(self): self.assertEqual(TabularMSA.from_dict({}), TabularMSA([], index=[])) def test_from_dict_single_sequence(self): self.assertEqual(TabularMSA.from_dict({'foo': DNA('ACGT')}), TabularMSA([DNA('ACGT')], index=['foo'])) def test_from_dict_multiple_sequences(self): msa = TabularMSA.from_dict( {1: DNA('ACG'), 2: DNA('GGG'), 3: DNA('TAG')}) # Sort because order is arbitrary. msa.sort() self.assertEqual( msa, TabularMSA([DNA('ACG'), DNA('GGG'), DNA('TAG')], index=[1, 2, 3])) def test_from_dict_invalid_input(self): # Basic test to make sure error-checking in the TabularMSA constructor # is being invoked. with self.assertRaisesRegex( ValueError, r'must match the number of positions'): TabularMSA.from_dict({'a': DNA('ACG'), 'b': DNA('ACGT')}) def test_constructor_invalid_dtype(self): with self.assertRaisesRegex(TypeError, r'GrammaredSequence.*Sequence'): TabularMSA([Sequence('')]) with self.assertRaisesRegex(TypeError, r'GrammaredSequence.*int'): TabularMSA([42, DNA('')]) def test_constructor_not_monomorphic(self): with self.assertRaisesRegex(TypeError, r'matching type.*RNA.*DNA'): TabularMSA([DNA(''), RNA('')]) with self.assertRaisesRegex(TypeError, r'matching type.*float.*Protein'): TabularMSA([Protein(''), Protein(''), 42.0, Protein('')]) def test_constructor_unequal_length(self): with self.assertRaisesRegex( ValueError, r'must match the number of positions.*1 != 0'): TabularMSA([Protein(''), Protein('P')]) with self.assertRaisesRegex( ValueError, r'must match the number of positions.*1 != 3'): TabularMSA([Protein('PAW'), Protein('ABC'), Protein('A')]) def test_constructor_non_iterable(self): with self.assertRaises(TypeError): TabularMSA(42) def test_constructor_minter_and_index_both_provided(self): with self.assertRaisesRegex(ValueError, r'both.*minter.*index'): TabularMSA([DNA('ACGT'), DNA('TGCA')], minter=str, index=['a', 'b']) def test_constructor_invalid_minter_callable(self): with self.assertRaises(TypeError): TabularMSA([DNA('ACGT'), DNA('TGCA')], minter=float) def test_constructor_missing_minter_metadata_key(self): with self.assertRaises(KeyError): TabularMSA([DNA('ACGT', metadata={'foo': 'bar'}), DNA('TGCA')], minter='foo') def test_constructor_unhashable_minter_metadata_key(self): with self.assertRaises(TypeError): TabularMSA([DNA('ACGT'), DNA('TGCA')], minter=[]) def test_constructor_index_length_mismatch_iterable(self): with self.assertRaisesRegex(ValueError, r'sequences.*2.*index length.*0'): TabularMSA([DNA('ACGT'), DNA('TGCA')], index=iter([])) def test_constructor_index_length_mismatch_index_object(self): with self.assertRaisesRegex(ValueError, r'sequences.*2.*index length.*0'): TabularMSA([DNA('ACGT'), DNA('TGCA')], index=pd.Index([])) def test_constructor_invalid_index_scalar(self): with self.assertRaises(TypeError): TabularMSA([DNA('ACGT'), DNA('TGCA')], index=42) def test_constructor_non_unique_labels(self): msa = TabularMSA([DNA('ACGT'), DNA('ACGT')], index=[1, 1]) assert_index_equal(msa.index, pd.Index([1, 1], dtype=np.int64)) def test_constructor_empty_no_index(self): # sequence empty msa = TabularMSA([]) self.assertIsNone(msa.dtype) self.assertEqual(msa.shape, (0, 0)) assert_index_equal(msa.index, pd.RangeIndex(0)) with self.assertRaises(StopIteration): next(iter(msa)) # position empty seqs = [DNA(''), DNA('')] msa = TabularMSA(seqs) self.assertIs(msa.dtype, DNA) self.assertEqual(msa.shape, (2, 0)) assert_index_equal(msa.index, pd.RangeIndex(2)) self.assertEqual(list(msa), seqs) def test_constructor_empty_with_labels(self): # sequence empty msa = TabularMSA([], minter=lambda x: x) assert_index_equal(msa.index, pd.Index([])) msa = TabularMSA([], index=iter([])) assert_index_equal(msa.index, pd.Index([])) # position empty msa = TabularMSA([DNA('', metadata={'id': 42}), DNA('', metadata={'id': 43})], minter='id') assert_index_equal(msa.index, pd.Index([42, 43])) msa = TabularMSA([DNA(''), DNA('')], index=iter([42, 43])) assert_index_equal(msa.index, pd.Index([42, 43])) def test_constructor_non_empty_no_labels_provided(self): # 1x3 seqs = [DNA('ACG')] msa = TabularMSA(seqs) self.assertIs(msa.dtype, DNA) self.assertEqual(msa.shape, (1, 3)) assert_index_equal(msa.index, pd.RangeIndex(1)) self.assertEqual(list(msa), seqs) # 3x1 seqs = [DNA('A'), DNA('C'), DNA('G')] msa = TabularMSA(seqs) self.assertIs(msa.dtype, DNA) self.assertEqual(msa.shape, (3, 1)) assert_index_equal(msa.index, pd.RangeIndex(3)) self.assertEqual(list(msa), seqs) def test_constructor_non_empty_with_labels_provided(self): seqs = [DNA('ACG'), DNA('CGA'), DNA('GTT')] msa = TabularMSA(seqs, minter=str) self.assertIs(msa.dtype, DNA) self.assertEqual(msa.shape, (3, 3)) assert_index_equal(msa.index, pd.Index(['ACG', 'CGA', 'GTT'])) self.assertEqual(list(msa), seqs) msa = TabularMSA(seqs, index=iter([42, 43, 44])) assert_index_equal(msa.index, pd.Index([42, 43, 44])) def test_constructor_works_with_iterator(self): seqs = [DNA('ACG'), DNA('CGA'), DNA('GTT')] msa = TabularMSA(iter(seqs), minter=str) self.assertIs(msa.dtype, DNA) self.assertEqual(msa.shape, (3, 3)) assert_index_equal(msa.index, pd.Index(['ACG', 'CGA', 'GTT'])) self.assertEqual(list(msa), seqs) def test_constructor_with_multiindex_index(self): msa = TabularMSA([DNA('AA'), DNA('GG')], index=[('foo', 42), ('bar', 43)]) self.assertIsInstance(msa.index, pd.MultiIndex) assert_index_equal(msa.index, pd.Index([('foo', 42), ('bar', 43)])) def test_constructor_with_multiindex_minter(self): def multiindex_minter(seq): if str(seq) == 'AC': return ('foo', 42) else: return ('bar', 43) msa = TabularMSA([DNA('AC'), DNA('GG')], minter=multiindex_minter) self.assertIsInstance(msa.index, pd.MultiIndex) assert_index_equal(msa.index, pd.Index([('foo', 42), ('bar', 43)])) def test_copy_constructor_respects_default_index(self): msa = TabularMSA([DNA('ACGT'), DNA('----'), DNA('AAAA')]) copy = TabularMSA(msa) self.assertEqual(msa, copy) self.assertIsNot(msa, copy) assert_index_equal(msa.index, pd.RangeIndex(3)) assert_index_equal(copy.index, pd.RangeIndex(3)) def test_copy_constructor_without_metadata(self): msa = TabularMSA([DNA('ACGT'), DNA('----')]) copy = TabularMSA(msa) self.assertEqual(msa, copy) self.assertIsNot(msa, copy) assert_index_equal(copy.index, pd.RangeIndex(2)) def test_copy_constructor_with_metadata(self): msa = TabularMSA([DNA('ACGT'), DNA('----')], metadata={'foo': 42}, positional_metadata={'bar': range(4)}, index=['idx1', 'idx2']) copy = TabularMSA(msa) self.assertEqual(msa, copy) self.assertIsNot(msa, copy) self.assertIsNot(msa.metadata, copy.metadata) self.assertIsNot(msa.positional_metadata, copy.positional_metadata) # pd.Index is immutable, no copy necessary. self.assertIs(msa.index, copy.index) def test_copy_constructor_state_override_with_minter(self): msa = TabularMSA([DNA('ACGT'), DNA('----')], metadata={'foo': 42}, positional_metadata={'bar': range(4)}, index=['idx1', 'idx2']) copy = TabularMSA(msa, metadata={'foo': 43}, positional_metadata={'bar': range(4, 8)}, minter=str) self.assertNotEqual(msa, copy) self.assertEqual( copy, TabularMSA([DNA('ACGT'), DNA('----')], metadata={'foo': 43}, positional_metadata={'bar': range(4, 8)}, minter=str)) def test_copy_constructor_state_override_with_index(self): msa = TabularMSA([DNA('ACGT'), DNA('----')], metadata={'foo': 42}, positional_metadata={'bar': range(4)}, index=['idx1', 'idx2']) copy = TabularMSA(msa, metadata={'foo': 43}, positional_metadata={'bar': range(4, 8)}, index=['a', 'b']) self.assertNotEqual(msa, copy) self.assertEqual( copy, TabularMSA([DNA('ACGT'), DNA('----')], metadata={'foo': 43}, positional_metadata={'bar': range(4, 8)}, index=['a', 'b'])) def test_copy_constructor_with_minter_and_index(self): msa = TabularMSA([DNA('ACGT'), DNA('----')], index=['idx1', 'idx2']) with self.assertRaisesRegex(ValueError, r'both.*minter.*index'): TabularMSA(msa, index=['a', 'b'], minter=str) def test_dtype(self): self.assertIsNone(TabularMSA([]).dtype) self.assertIs(TabularMSA([Protein('')]).dtype, Protein) with self.assertRaises(AttributeError): TabularMSA([]).dtype = DNA with self.assertRaises(AttributeError): del TabularMSA([]).dtype def test_shape(self): shape = TabularMSA([DNA('ACG'), DNA('GCA')]).shape self.assertEqual(shape, (2, 3)) self.assertEqual(shape.sequence, shape[0]) self.assertEqual(shape.position, shape[1]) with self.assertRaises(TypeError): shape[0] = 3 with self.assertRaises(AttributeError): TabularMSA([]).shape = (3, 3) with self.assertRaises(AttributeError): del TabularMSA([]).shape def test_index_getter_default_index(self): msa = TabularMSA([DNA('AC'), DNA('AG'), DNA('AT')]) assert_index_equal(msa.index, pd.RangeIndex(3)) # immutable with self.assertRaises(TypeError): msa.index[1] = 2 # original state is maintained assert_index_equal(msa.index, pd.RangeIndex(3)) def test_index_getter(self): index = TabularMSA([DNA('AC'), DNA('AG'), DNA('AT')], minter=str).index self.assertIsInstance(index, pd.Index) assert_index_equal(index, pd.Index(['AC', 'AG', 'AT'])) # immutable with self.assertRaises(TypeError): index[1] = 'AA' # original state is maintained assert_index_equal(index, pd.Index(['AC', 'AG', 'AT'])) def test_index_mixed_type(self): msa = TabularMSA([DNA('AC'), DNA('CA'), DNA('AA')], index=['abc', 'd', 42]) assert_index_equal(msa.index, pd.Index(['abc', 'd', 42])) def test_index_setter_empty(self): msa = TabularMSA([]) msa.index = iter([]) assert_index_equal(msa.index, pd.Index([])) def test_index_setter_non_empty(self): msa = TabularMSA([DNA('AC'), DNA('AG'), DNA('AT')]) msa.index = range(3) assert_index_equal(msa.index, pd.RangeIndex(3)) msa.index = range(3, 6) assert_index_equal(msa.index, pd.RangeIndex(3, 6)) def test_index_setter_length_mismatch(self): msa = TabularMSA([DNA('ACGT'), DNA('TGCA')], minter=str) index = pd.Index(['ACGT', 'TGCA']) assert_index_equal(msa.index, index) with self.assertRaisesRegex(ValueError, r'Length mismatch.*2.*3'): msa.index = iter(['ab', 'cd', 'ef']) # original state is maintained assert_index_equal(msa.index, index) def test_index_setter_non_unique_index(self): msa = TabularMSA([RNA('UUU'), RNA('AAA')], minter=str) msa.index = ['1', '1'] self.assertEqual(msa, TabularMSA([RNA('UUU'), RNA('AAA')], index=['1', '1'])) def test_index_setter_tuples(self): msa = TabularMSA([RNA('UUU'), RNA('AAA')]) msa.index = [('foo', 42), ('bar', 43)] self.assertIsInstance(msa.index, pd.MultiIndex) assert_index_equal( msa.index, pd.Index([('foo', 42), ('bar', 43)], tupleize_cols=True)) def test_index_setter_preserves_range_index(self): msa = TabularMSA([RNA('UUU'), RNA('AAA')], minter=str) msa.index = pd.RangeIndex(2) self.assertEqual(msa, TabularMSA([RNA('UUU'), RNA('AAA')])) assert_index_equal(msa.index, pd.RangeIndex(2)) def test_index_deleter(self): msa = TabularMSA([RNA('UUU'), RNA('AAA')], minter=str) assert_index_equal(msa.index, pd.Index(['UUU', 'AAA'])) del msa.index assert_index_equal(msa.index, pd.RangeIndex(2)) # Delete again. del msa.index assert_index_equal(msa.index, pd.RangeIndex(2)) def test_bool(self): self.assertFalse(TabularMSA([])) self.assertFalse(TabularMSA([RNA('')])) self.assertFalse( TabularMSA([RNA('', metadata={'id': 1}), RNA('', metadata={'id': 2})], minter='id')) self.assertTrue(TabularMSA([RNA('U')])) self.assertTrue(TabularMSA([RNA('--'), RNA('..')])) self.assertTrue(TabularMSA([RNA('AUC'), RNA('GCA')])) def test_len(self): self.assertEqual(len(TabularMSA([])), 0) self.assertEqual(len(TabularMSA([DNA('')])), 1) self.assertEqual(len(TabularMSA([DNA('AT'), DNA('AG'), DNA('AT')])), 3) def test_iter(self): with self.assertRaises(StopIteration): next(iter(TabularMSA([]))) seqs = [DNA(''), DNA('')] self.assertEqual(list(iter(TabularMSA(seqs))), seqs) seqs = [DNA('AAA'), DNA('GCT')] self.assertEqual(list(iter(TabularMSA(seqs))), seqs) def test_reversed(self): with self.assertRaises(StopIteration): next(reversed(TabularMSA([]))) seqs = [DNA(''), DNA('', metadata={'id': 42})] self.assertEqual(list(reversed(TabularMSA(seqs))), seqs[::-1]) seqs = [DNA('AAA'), DNA('GCT')] self.assertEqual(list(reversed(TabularMSA(seqs))), seqs[::-1]) def test_eq_and_ne(self): # Each element contains the components necessary to construct a # TabularMSA object: seqs and kwargs. None of these objects (once # constructed) should compare equal to one another. components = [ # empties ([], {}), ([RNA('')], {}), ([RNA('')], {'minter': str}), # 1x1 ([RNA('U')], {'minter': str}), # 2x3 ([RNA('AUG'), RNA('GUA')], {'minter': str}), ([RNA('AG'), RNA('GG')], {}), # has labels ([RNA('AG'), RNA('GG')], {'minter': str}), # different dtype ([DNA('AG'), DNA('GG')], {'minter': str}), # different labels ([RNA('AG'), RNA('GG')], {'minter': lambda x: str(x) + '42'}), # different sequence metadata ([RNA('AG', metadata={'id': 42}), RNA('GG')], {'minter': str}), # different sequence data, same labels ([RNA('AG'), RNA('GA')], {'minter': lambda x: 'AG' if 'AG' in x else 'GG'}), # different MSA metadata ([RNA('AG'), RNA('GG')], {'metadata': {'foo': 42}}), ([RNA('AG'), RNA('GG')], {'metadata': {'foo': 43}}), ([RNA('AG'), RNA('GG')], {'metadata': {'foo': 42, 'bar': 43}}), # different MSA positional metadata ([RNA('AG'), RNA('GG')], {'positional_metadata': {'foo': [42, 43]}}), ([RNA('AG'), RNA('GG')], {'positional_metadata': {'foo': [43, 44]}}), ([RNA('AG'), RNA('GG')], {'positional_metadata': {'foo': [42, 43], 'bar': [43, 44]}}), ] for seqs, kwargs in components: obj = TabularMSA(seqs, **kwargs) self.assertReallyEqual(obj, obj) self.assertReallyEqual(obj, TabularMSA(seqs, **kwargs)) self.assertReallyEqual(obj, TabularMSASubclass(seqs, **kwargs)) for (seqs1, kwargs1), (seqs2, kwargs2) in \ itertools.combinations(components, 2): obj1 = TabularMSA(seqs1, **kwargs1) obj2 = TabularMSA(seqs2, **kwargs2) self.assertReallyNotEqual(obj1, obj2) self.assertReallyNotEqual(obj1, TabularMSASubclass(seqs2, **kwargs2)) # completely different types msa = TabularMSA([]) self.assertReallyNotEqual(msa, 42) self.assertReallyNotEqual(msa, []) self.assertReallyNotEqual(msa, {}) self.assertReallyNotEqual(msa, '') def test_eq_constructed_from_different_iterables_compare_equal(self): msa1 = TabularMSA([DNA('ACGT')]) msa2 = TabularMSA((DNA('ACGT'),)) self.assertReallyEqual(msa1, msa2) def test_eq_ignores_minter_str_and_lambda(self): # as long as the labels generated by the minters are the same, it # doesn't matter whether the minters are equal. msa1 = TabularMSA([DNA('ACGT', metadata={'id': 'a'})], minter='id') msa2 = TabularMSA([DNA('ACGT', metadata={'id': 'a'})], minter=lambda x: x.metadata['id']) self.assertReallyEqual(msa1, msa2) def test_eq_minter_and_index(self): # as long as the labels generated by the minters are the same, it # doesn't matter whether the minters are equal. msa1 = TabularMSA([DNA('ACGT', metadata={'id': 'a'})], index=['a']) msa2 = TabularMSA([DNA('ACGT', metadata={'id': 'a'})], minter='id') self.assertReallyEqual(msa1, msa2) def test_eq_default_index_and_equivalent_provided_index(self): msa1 = TabularMSA([DNA('ACGT'), DNA('----'), DNA('....')]) msa2 = TabularMSA([DNA('ACGT'), DNA('----'), DNA('....')], index=[0, 1, 2]) self.assertReallyEqual(msa1, msa2) assert_index_equal(msa1.index, pd.RangeIndex(3)) assert_index_equal(msa2.index, pd.Index([0, 1, 2], dtype=np.int64)) def test_reassign_index_empty(self): # sequence empty msa = TabularMSA([]) msa.reassign_index() self.assertEqual(msa, TabularMSA([])) assert_index_equal(msa.index, pd.RangeIndex(0)) msa.reassign_index(minter=str) self.assertEqual(msa, TabularMSA([], minter=str)) assert_index_equal(msa.index, pd.Index([])) # position empty msa = TabularMSA([DNA('')]) msa.reassign_index() self.assertEqual(msa, TabularMSA([DNA('')])) assert_index_equal(msa.index, pd.RangeIndex(1)) msa.reassign_index(minter=str) self.assertEqual(msa, TabularMSA([DNA('')], minter=str)) assert_index_equal(msa.index, pd.Index([''])) def test_reassign_index_non_empty(self): msa = TabularMSA([DNA('ACG', metadata={'id': 1}), DNA('AAA', metadata={'id': 2})], minter=str) assert_index_equal(msa.index, pd.Index(['ACG', 'AAA'])) msa.reassign_index(minter='id') self.assertEqual( msa, TabularMSA([DNA('ACG', metadata={'id': 1}), DNA('AAA', metadata={'id': 2})], minter='id')) assert_index_equal(msa.index, pd.Index([1, 2])) msa.reassign_index(mapping={1: 5}) self.assertEqual( msa, TabularMSA([DNA('ACG', metadata={'id': 1}), DNA('AAA', metadata={'id': 2})], index=[5, 2])) assert_index_equal(msa.index, pd.Index([5, 2])) msa.reassign_index() assert_index_equal(msa.index, pd.RangeIndex(2)) def test_reassign_index_minter_and_mapping_both_provided(self): msa = TabularMSA([DNA('ACGT'), DNA('TGCA')], minter=str) with self.assertRaisesRegex(ValueError, r'both.*mapping.*minter.*'): msa.reassign_index(minter=str, mapping={"ACGT": "fleventy"}) # original state is maintained assert_index_equal(msa.index, pd.Index(['ACGT', 'TGCA'])) def test_reassign_index_mapping_invalid_type(self): msa = TabularMSA([DNA('ACGT'), DNA('TGCA')], minter=str) with self.assertRaisesRegex(TypeError, r'mapping.*dict.*callable.*list'): msa.reassign_index(mapping=['abc', 'def']) # original state is maintained assert_index_equal(msa.index, pd.Index(['ACGT', 'TGCA'])) def test_reassign_index_with_mapping_dict_empty(self): seqs = [DNA("A"), DNA("C"), DNA("G")] msa = TabularMSA(seqs, index=[0.5, 1.5, 2.5]) msa.reassign_index(mapping={}) self.assertEqual(msa, TabularMSA(seqs, index=[0.5, 1.5, 2.5])) def test_reassign_index_with_mapping_dict_subset(self): seqs = [DNA("A"), DNA("C"), DNA("G")] mapping = {0.5: "a", 2.5: "c"} msa = TabularMSA(seqs, index=[0.5, 1.5, 2.5]) msa.reassign_index(mapping=mapping) self.assertEqual(msa, TabularMSA(seqs, index=['a', 1.5, 'c'])) def test_reassign_index_with_mapping_dict_superset(self): seqs = [DNA("A"), DNA("C"), DNA("G")] mapping = {0.5: "a", 1.5: "b", 2.5: "c", 3.5: "d"} msa = TabularMSA(seqs, index=[0.5, 1.5, 2.5]) msa.reassign_index(mapping=mapping) self.assertEqual(msa, TabularMSA(seqs, index=['a', 'b', 'c'])) def test_reassign_index_with_mapping_callable(self): seqs = [DNA("A"), DNA("C"), DNA("G")] msa = TabularMSA(seqs, index=[0, 1, 2]) msa.reassign_index(mapping=str) self.assertEqual(msa, TabularMSA(seqs, index=['0', '1', '2'])) msa.reassign_index(mapping=lambda e: int(e) + 42) self.assertEqual(msa, TabularMSA(seqs, index=[42, 43, 44])) def test_reassign_index_non_unique_existing_index(self): seqs = [DNA("A"), DNA("C"), DNA("G")] mapping = {0.5: "a", 1.5: "b", 2.5: "c", 3.5: "d"} msa = TabularMSA(seqs, index=[0.5, 0.5, 0.5]) msa.reassign_index(mapping=mapping) self.assertEqual(msa, TabularMSA(seqs, index=['a', 'a', 'a'])) def test_reassign_index_non_unique_new_index(self): seqs = [DNA("A"), DNA("C"), DNA("G")] mapping = {0.5: "a", 1.5: "a", 2.5: "a"} msa = TabularMSA(seqs, index=[0.5, 1.5, 2.5]) msa.reassign_index(mapping=mapping) self.assertEqual(msa, TabularMSA(seqs, index=['a', 'a', 'a'])) def test_reassign_index_to_multiindex_with_minter(self): msa = TabularMSA([DNA('AC'), DNA('.G')]) def multiindex_minter(seq): if str(seq) == 'AC': return ('foo', 42) else: return ('bar', 43) msa.reassign_index(minter=multiindex_minter) self.assertIsInstance(msa.index, pd.MultiIndex) self.assertEqual( msa, TabularMSA([DNA('AC'), DNA('.G')], index=[('foo', 42), ('bar', 43)])) def test_reassign_index_to_multiindex_with_mapping(self): msa = TabularMSA([DNA('AC'), DNA('.G')]) mapping = {0: ('foo', 42), 1: ('bar', 43)} msa.reassign_index(mapping=mapping) self.assertIsInstance(msa.index, pd.MultiIndex) self.assertEqual( msa, TabularMSA([DNA('AC'), DNA('.G')], index=[('foo', 42), ('bar', 43)])) def test_sort_on_unorderable_msa_index(self): msa = TabularMSA([DNA('AAA'), DNA('ACG'), DNA('---')], index=[42, 41, 'foo']) with self.assertRaises(TypeError): msa.sort() self.assertEqual( msa, TabularMSA([DNA('AAA'), DNA('ACG'), DNA('---')], index=[42, 41, 'foo'])) def test_sort_empty_on_msa_index(self): msa = TabularMSA([], index=[]) msa.sort() self.assertEqual(msa, TabularMSA([], index=[])) msa = TabularMSA([], index=[]) msa.sort(ascending=False) self.assertEqual(msa, TabularMSA([], index=[])) def test_sort_single_sequence_on_msa_index(self): msa = TabularMSA([DNA('ACGT')], index=[42]) msa.sort() self.assertEqual(msa, TabularMSA([DNA('ACGT')], index=[42])) msa = TabularMSA([DNA('ACGT')], index=[42]) msa.sort(ascending=False) self.assertEqual(msa, TabularMSA([DNA('ACGT')], index=[42])) def test_sort_multiple_sequences_on_msa_index(self): msa = TabularMSA([ DNA('TC'), DNA('GG'), DNA('CC')], index=['z', 'a', 'b']) msa.sort(ascending=True) self.assertEqual( msa, TabularMSA([ DNA('GG'), DNA('CC'), DNA('TC')], index=['a', 'b', 'z'])) msa = TabularMSA([ DNA('TC'), DNA('GG'), DNA('CC')], index=['z', 'a', 'b']) msa.sort(ascending=False) self.assertEqual( msa, TabularMSA([ DNA('TC'), DNA('CC'), DNA('GG')], index=['z', 'b', 'a'])) def test_sort_on_labels_with_some_repeats(self): msa = TabularMSA([ DNA('TCCG', metadata={'id': 10}), DNA('TAGG', metadata={'id': 10}), DNA('GGGG', metadata={'id': 8}), DNA('TGGG', metadata={'id': 10}), DNA('ACGT', metadata={'id': 0}), DNA('TAGA', metadata={'id': 10})], minter='id') msa.sort() self.assertEqual(msa._seqs.index.to_list(), [0, 8, 10, 10, 10, 10]) vals = list(msa._seqs.values) self.assertEqual(vals[0], DNA('ACGT', metadata={'id': 0})) self.assertEqual(vals[1], DNA('GGGG', metadata={'id': 8})) self.assertIn(DNA('TCCG', metadata={'id': 10}), vals) self.assertIn(DNA('TAGG', metadata={'id': 10}), vals[2:]) self.assertIn(DNA('TGGG', metadata={'id': 10}), vals[2:]) self.assertIn(DNA('TAGA', metadata={'id': 10}), vals[2:]) def test_sort_on_key_with_all_repeats(self): msa = TabularMSA([ DNA('TTT', metadata={'id': 'a'}), DNA('TTT', metadata={'id': 'b'}), DNA('TTT', metadata={'id': 'c'})], minter=str) msa.sort() self.assertEqual( msa, TabularMSA([ DNA('TTT', metadata={'id': 'a'}), DNA('TTT', metadata={'id': 'b'}), DNA('TTT', metadata={'id': 'c'})], minter=str)) def test_sort_default_index(self): msa = TabularMSA([DNA('TC'), DNA('GG'), DNA('CC')]) msa.sort() self.assertEqual( msa, TabularMSA([DNA('TC'), DNA('GG'), DNA('CC')])) def test_sort_default_index_descending(self): msa = TabularMSA([DNA('TC'), DNA('GG'), DNA('CC')]) msa.sort(ascending=False) self.assertEqual( msa, TabularMSA([DNA('CC'), DNA('GG'), DNA('TC')], index=[2, 1, 0])) def test_sort_already_sorted(self): msa = TabularMSA([DNA('TC'), DNA('GG'), DNA('CC')], index=[1, 2, 3]) msa.sort() self.assertEqual( msa, TabularMSA([DNA('TC'), DNA('GG'), DNA('CC')], index=[1, 2, 3])) msa = TabularMSA([DNA('TC'), DNA('GG'), DNA('CC')], index=[3, 2, 1]) msa.sort(ascending=False) self.assertEqual( msa, TabularMSA([DNA('TC'), DNA('GG'), DNA('CC')], index=[3, 2, 1])) def test_sort_reverse_sorted(self): msa = TabularMSA([DNA('T'), DNA('G'), DNA('A')], index=[3, 2, 1]) msa.sort() self.assertEqual( msa, TabularMSA([DNA('A'), DNA('G'), DNA('T')], index=[1, 2, 3])) msa = TabularMSA([DNA('T'), DNA('G'), DNA('A')], index=[1, 2, 3]) msa.sort(ascending=False) self.assertEqual( msa, TabularMSA([DNA('A'), DNA('G'), DNA('T')], index=[3, 2, 1])) def test_sort_multiindex(self): multiindex = [(2, 'a'), (1, 'c'), (3, 'b')] sortedindex = [(1, 'c'), (2, 'a'), (3, 'b')] msa = TabularMSA([DNA('A'), DNA('C'), DNA('G')], index=multiindex) msa.sort() self.assertEqual(msa, TabularMSA([DNA('C'), DNA('A'), DNA('G')], index=sortedindex)) def test_sort_multiindex_with_level(self): multiindex = [(2, 'a'), (1, 'c'), (3, 'b')] first_sorted = [(1, 'c'), (2, 'a'), (3, 'b')] second_sorted = [(2, 'a'), (3, 'b'), (1, 'c')] msa = TabularMSA([DNA('A'), DNA('C'), DNA('G')], index=multiindex) self.assertIsInstance(msa.index, pd.MultiIndex) msa.sort(level=0) self.assertEqual(msa, TabularMSA([DNA('C'), DNA('A'), DNA('G')], index=first_sorted)) msa.sort(level=1) self.assertEqual(msa, TabularMSA([DNA('A'), DNA('G'), DNA('C')], index=second_sorted)) def test_to_dict_falsey_msa(self): self.assertEqual(TabularMSA([]).to_dict(), {}) self.assertEqual(TabularMSA([RNA('')], index=['foo']).to_dict(), {'foo': RNA('')}) def test_to_dict_non_empty(self): seqs = [Protein('PAW', metadata={'id': 42}), Protein('WAP', metadata={'id': -999})] msa = TabularMSA(seqs, minter='id') self.assertEqual(msa.to_dict(), {42: seqs[0], -999: seqs[1]}) def test_to_dict_default_index(self): msa = TabularMSA([RNA('UUA'), RNA('-C-'), RNA('AAA')]) d = msa.to_dict() self.assertEqual(d, {0: RNA('UUA'), 1: RNA('-C-'), 2: RNA('AAA')}) def test_to_dict_duplicate_labels(self): msa = TabularMSA([DNA("A"), DNA("G")], index=[0, 0]) with self.assertRaises(ValueError) as cm: msa.to_dict() self.assertIn("unique", str(cm.exception)) def test_from_dict_to_dict_roundtrip(self): d = {} self.assertEqual(TabularMSA.from_dict(d).to_dict(), d) # can roundtrip even with mixed key types d1 = {'a': DNA('CAT'), 42: DNA('TAG')} d2 = TabularMSA.from_dict(d1).to_dict() self.assertEqual(d2, d1) self.assertIs(d1['a'], d2['a']) self.assertIs(d1[42], d2[42]) def test_from_path_seqs(self): path = AlignPath(lengths=[3, 2, 5, 1, 4, 3, 2], states=[0, 2, 0, 6, 0, 1, 0], starts=[0, 0, 0]) seqs = [DNA("CGGTCGTAACGCGTACA"), DNA("CAGGTAAGCATACCTCA"), DNA("CGGTCGTCACTGTACACTA")] obj = TabularMSA.from_path_seqs(path=path, seqs=seqs) self.assertEqual(str(obj[0]), "CGGTCGTAACGCGTA---CA") self.assertEqual(str(obj[1]), "CAG--GTAAG-CATACCTCA") self.assertEqual(str(obj[2]), "CGGTCGTCAC-TGTACACTA") class TestContains(unittest.TestCase): def test_no_sequences(self): msa = TabularMSA([], index=[]) self.assertFalse('' in msa) self.assertFalse('foo' in msa) def test_with_str_labels(self): msa = TabularMSA([RNA('AU'), RNA('A.')], index=['foo', 'bar']) self.assertTrue('foo' in msa) self.assertTrue('bar' in msa) self.assertFalse('baz' in msa) self.assertFalse(0 in msa) def test_with_int_labels(self): msa = TabularMSA([RNA('AU'), RNA('A.')], index=[42, -1]) self.assertTrue(42 in msa) self.assertTrue(-1 in msa) self.assertFalse(0 in msa) self.assertFalse('foo' in msa) class TestCopy(unittest.TestCase): # Note: tests for metadata/positional_metadata are in mixin tests above. def test_no_sequences(self): msa = TabularMSA([]) msa_copy = copy.copy(msa) self.assertEqual(msa, msa_copy) self.assertIsNot(msa, msa_copy) self.assertIsNot(msa._seqs, msa_copy._seqs) def test_with_sequences(self): msa = TabularMSA([DNA('ACGT', metadata={'foo': [1]}), DNA('TGCA')]) msa_copy = copy.copy(msa) self.assertEqual(msa, msa_copy) self.assertIsNot(msa, msa_copy) self.assertIsNot(msa._seqs, msa_copy._seqs) self.assertIsNot(msa[0], msa_copy[0]) self.assertIsNot(msa[1], msa_copy[1]) msa_copy.append(DNA('AAAA'), reset_index=True) self.assertEqual( msa, TabularMSA([DNA('ACGT', metadata={'foo': [1]}), DNA('TGCA')])) msa_copy._seqs[0].metadata['bar'] = 42 self.assertEqual( msa, TabularMSA([DNA('ACGT', metadata={'foo': [1]}), DNA('TGCA')])) msa_copy._seqs[0].metadata['foo'].append(2) self.assertEqual( msa, TabularMSA([DNA('ACGT', metadata={'foo': [1, 2]}), DNA('TGCA')])) def test_with_index(self): msa = TabularMSA([DNA('ACGT'), DNA('TGCA')], index=['foo', 'bar']) msa_copy = copy.copy(msa) self.assertEqual(msa, msa_copy) self.assertIsNot(msa, msa_copy) # pd.Index is immutable, no copy necessary. self.assertIs(msa.index, msa_copy.index) msa_copy.index = [1, 2] assert_index_equal(msa_copy.index, pd.Index([1, 2])) assert_index_equal(msa.index, pd.Index(['foo', 'bar'])) class TestDeepCopy(unittest.TestCase): # Note: tests for metadata/positional_metadata are in mixin tests above. def test_no_sequences(self): msa = TabularMSA([]) msa_copy = copy.deepcopy(msa) self.assertEqual(msa, msa_copy) self.assertIsNot(msa, msa_copy) self.assertIsNot(msa._seqs, msa_copy._seqs) def test_with_sequences(self): msa = TabularMSA([DNA('ACGT', metadata={'foo': [1]}), DNA('TGCA')]) msa_copy = copy.deepcopy(msa) self.assertEqual(msa, msa_copy) self.assertIsNot(msa, msa_copy) self.assertIsNot(msa._seqs, msa_copy._seqs) self.assertIsNot(msa[0], msa_copy[0]) self.assertIsNot(msa[1], msa_copy[1]) msa_copy.append(DNA('AAAA'), reset_index=True) self.assertEqual( msa, TabularMSA([DNA('ACGT', metadata={'foo': [1]}), DNA('TGCA')])) msa_copy._seqs[0].metadata['bar'] = 42 self.assertEqual( msa, TabularMSA([DNA('ACGT', metadata={'foo': [1]}), DNA('TGCA')])) msa_copy._seqs[0].metadata['foo'].append(2) self.assertEqual( msa, TabularMSA([DNA('ACGT', metadata={'foo': [1]}), DNA('TGCA')])) def test_with_index(self): msa = TabularMSA([DNA('ACGT'), DNA('TGCA')], index=['foo', 'bar']) msa_copy = copy.deepcopy(msa) self.assertEqual(msa, msa_copy) self.assertIsNot(msa, msa_copy) # pd.Index is immutable, no copy necessary. self.assertIs(msa.index, msa_copy.index) msa_copy.index = [1, 2] assert_index_equal(msa_copy.index, pd.Index([1, 2])) assert_index_equal(msa.index, pd.Index(['foo', 'bar'])) class SharedIndexTests: def get(self, obj, indexable): raise NotImplementedError() def test_tuple_too_big(self): with self.assertRaises(ValueError): self.get(TabularMSA([]), (None, None, None)) def test_empty_msa_slice(self): msa = TabularMSA([]) new = self.get(msa, slice(None, None)) self.assertIsNot(msa, new) self.assertEqual(msa, new) def test_msa_slice_all_first_axis(self): msa = TabularMSA([RNA("AAA", metadata={1: 1}), RNA("AAU", positional_metadata={0: [1, 2, 3]})], metadata={0: 0}, positional_metadata={1: [3, 2, 1]}) new_slice = self.get(msa, slice(None)) new_ellipsis = self.get(msa, Ellipsis) self.assertIsNot(msa, new_slice) for s1, s2 in zip(msa, new_slice): self.assertIsNot(s1, s2) self.assertEqual(msa, new_slice) self.assertIsNot(msa, new_ellipsis) for s1, s2 in zip(msa, new_ellipsis): self.assertIsNot(s1, s2) self.assertEqual(msa, new_ellipsis) def test_msa_slice_all_both_axes(self): msa = TabularMSA([RNA("AAA", metadata={1: 1}), RNA("AAU", positional_metadata={0: [1, 2, 3]})], metadata={0: 0}, positional_metadata={1: [3, 2, 1]}) new_slice = self.get(msa, (slice(None), slice(None))) new_ellipsis = self.get(msa, (Ellipsis, Ellipsis)) self.assertIsNot(msa, new_slice) for s1, s2 in zip(msa, new_slice): self.assertIsNot(s1, s2) self.assertEqual(msa, new_slice) self.assertIsNot(msa, new_ellipsis) for s1, s2 in zip(msa, new_ellipsis): self.assertIsNot(s1, s2) self.assertEqual(msa, new_ellipsis) def test_bool_index_first_axis(self): a = DNA("AAA", metadata={1: 1}) b = DNA("NNN", positional_metadata={1: ['x', 'y', 'z']}) c = DNA("AAC") msa = TabularMSA([a, b, c], metadata={0: 'x'}, positional_metadata={0: [1, 2, 3]}, index=[True, False, True]) new = self.get(msa, [True, True, False]) self.assertEqual(new, TabularMSA([a, b], metadata={0: 'x'}, positional_metadata={0: [1, 2, 3]}, index=[True, False])) def test_bool_index_second_axis(self): a = DNA("AAA", metadata={1: 1}) b = DNA("NNN", positional_metadata={1: ['x', 'y', 'z']}) c = DNA("AAC") msa = TabularMSA([a, b, c], metadata={0: 'x'}, positional_metadata={0: [1, 2, 3]}, index=[True, False, True]) new = self.get(msa, (Ellipsis, [True, True, False])) self.assertEqual(new, TabularMSA([a[0, 1], b[0, 1], c[0, 1]], metadata={0: 'x'}, positional_metadata={0: [1, 2]}, index=[True, False, True])) def test_bool_index_both_axes(self): a = DNA("AAA", metadata={1: 1}) b = DNA("NNN", positional_metadata={1: ['x', 'y', 'z']}) c = DNA("AAC") msa = TabularMSA([a, b, c], metadata={0: 'x'}, positional_metadata={0: [1, 2, 3]}, index=[True, False, True]) new = self.get(msa, ([False, True, True], [True, True, False])) self.assertEqual(new, TabularMSA([b[0, 1], c[0, 1]], metadata={0: 'x'}, positional_metadata={0: [1, 2]}, index=[False, True])) def test_bool_index_too_big(self): msa = TabularMSA([DNA("ABCD"), DNA("GHKM"), DNA("NRST")], index=[False, True, False]) with self.assertRaises(IndexError): self.get(msa, [False, False, False, False]) with self.assertRaises(IndexError): self.get(msa, [True, True, True, True]) with self.assertRaises(IndexError): self.get(msa, (Ellipsis, [True, False, True, False, True])) with self.assertRaises(IndexError): self.get(msa, ([True, False, True, False], [True, False, True, False, False])) def test_bool_index_too_small(self): msa = TabularMSA([DNA("ABCD"), DNA("GHKM"), DNA("NRST")], index=[False, True, False]) with self.assertRaises(IndexError): self.get(msa, [False]) with self.assertRaises(IndexError): self.get(msa, [True]) with self.assertRaises(IndexError): self.get(msa, (Ellipsis, [True])) with self.assertRaises(IndexError): self.get(msa, ([True, False], [True, False, True, False])) def test_bad_scalar(self): msa = TabularMSA([DNA("ABCD"), DNA("GHKM"), DNA("NRST")]) with self.assertRaises((KeyError, TypeError)): self.get(msa, "foo") with self.assertRaises(IndexError): self.get(msa, (Ellipsis, "foo")) def test_bad_fancy_index(self): msa = TabularMSA([DNA("ABCD"), DNA("GHKM"), DNA("NRST")]) with self.assertRaises((KeyError, TypeError, ValueError)): self.get(msa, [0, "foo"]) with self.assertRaises(IndexError): self.get(msa, (Ellipsis, [0, "foo"])) def test_asburd_slice(self): msa = TabularMSA([DNA("ABCD"), DNA("GHKM"), DNA("NRST")]) with self.assertRaises(TypeError): self.get(msa, {set(1): 0}) class SharedPropertyIndexTests(SharedIndexTests): def setUp(self): self.combo_msa = TabularMSA([ DNA('ACGTA', metadata={0: 0}, positional_metadata={0: [1, 2, 3, 4, 5]}), DNA('CGTAC', metadata={1: 1}, positional_metadata={1: [1, 2, 3, 4, 5]}), DNA('GTACG', metadata={2: 2}, positional_metadata={2: [1, 2, 3, 4, 5]}), DNA('TACGT', metadata={3: 3}, positional_metadata={3: [1, 2, 3, 4, 5]}), DNA('ACGTT', metadata={4: 4}, positional_metadata={4: [1, 2, 3, 4, 5]}) ], index=list('ABCDE'), metadata={'x': 'x'}, positional_metadata={'y': [5, 4, 3, 2, 1]}) """First off, sorry to the next person who has to deal with this. The next few tests will try and slice by a bunch of stuff, with all combinations. Each element in the two lists is a tuple where the first element is the thing to slice with, and the second is the equivalent fancy index which describes the same range. This lets us describe the results a little more declaratively without setting up a thousand tests for each possible combination. This does mean the iloc via a fancy index and simple scalar must work correctly. """ # This will be overriden for TestLoc because the first axis are labels self.combo_first_axis = [ ([], []), (slice(0, 0), []), (Ellipsis, [0, 1, 2, 3, 4]), (slice(None), [0, 1, 2, 3, 4]), (slice(0, 10000), [0, 1, 2, 3, 4]), (3, 3), (-4, 1), ([0], [0]), ([2], [2]), (slice(1, 3), [1, 2]), (slice(3, 0, -1), [3, 2, 1]), ([-3, 2, 1], [2, 2, 1]), ([-4, -3, -2, -1], [1, 2, 3, 4]), (np.array([-3, 2, 1]), [2, 2, 1]), ([True, True, False, False, True], [0, 1, 4]), (np.array([True, True, False, True, False]), [0, 1, 3]), (range(3), [0, 1, 2]), ([slice(0, 2), slice(3, 4), 4], [0, 1, 3, 4]) ] # Same in both TestLoc and TestILoc self.combo_second_axis = self.combo_first_axis def test_combo_single_axis_natural(self): for idx, exp in self.combo_first_axis: self.assertEqual(self.get(self.combo_msa, idx), self.combo_msa.iloc[exp], msg="%r did not match iloc[%r]" % (idx, exp)) def test_combo_first_axis_only(self): for idx, exp in self.combo_first_axis: self.assertEqual(self.get(self.combo_msa, idx, axis=0), self.combo_msa.iloc[exp, ...], msg="%r did not match iloc[%r, ...]" % (idx, exp)) def test_combo_second_axis_only(self): for idx, exp in self.combo_second_axis: self.assertEqual(self.get(self.combo_msa, idx, axis=1), self.combo_msa.iloc[..., exp], msg="%r did not match iloc[..., %r]" % (idx, exp)) def test_combo_both_axes(self): for idx1, exp1 in self.combo_first_axis: for idx2, exp2 in self.combo_second_axis: self.assertEqual(self.get(self.combo_msa, (idx1, idx2)), self.combo_msa.iloc[exp1, exp2], msg=("%r did not match iloc[%r, %r]" % ((idx1, idx2), exp1, exp2))) class TestLoc(SharedPropertyIndexTests, unittest.TestCase): def setUp(self): SharedPropertyIndexTests.setUp(self) self.combo_first_axis = [ ([], []), (slice('X', "Z"), []), ('A', 0), ('E', 4), (['B'], [1]), (np.asarray(['B']), [1]), (slice('A', 'C', 2), [0, 2]), (slice('C', 'A', -2), [2, 0]), (slice('A', 'B'), [0, 1]), (slice(None), [0, 1, 2, 3, 4]), (slice('A', None), [0, 1, 2, 3, 4]), (slice(None, 'C'), [0, 1, 2]), (Ellipsis, [0, 1, 2, 3, 4]), (self.combo_msa.index, [0, 1, 2, 3, 4]), (['B', 'A', 'A', 'C'], [1, 0, 0, 2]), (np.asarray(['B', 'A', 'A', 'C']), [1, 0, 0, 2]), ([True, False, True, True, False], [0, 2, 3]), (np.asarray([True, False, True, True, False]), [0, 2, 3]), ] def test_forced_axis_returns_copy(self): msa = TabularMSA([Protein("EVANTHQMVS"), Protein("EVANTH*MVS")]) self.assertIsNot(msa.loc(axis=1), msa.loc) def test_forced_axis_no_mutate(self): msa = TabularMSA([Protein("EVANTHQMVS"), Protein("EVANTH*MVS")]) self.assertEqual(msa.loc(axis=1)[0], Sequence("EE")) self.assertEqual(msa.loc[0], Protein("EVANTHQMVS")) self.assertIsNone(msa.loc._axis) def get(self, obj, indexable, axis=None): if axis is None: return obj.loc[indexable] else: return obj.loc(axis=axis)[indexable] def test_complex_single_label(self): a = DNA("ACG") b = DNA("ACT") c = DNA("ACA") msa = TabularMSA([a, b, c], index=[('a', 0), ('a', 1), ('b', 0)]) self.assertIs(a, self.get(msa, (('a', 0),))) self.assertIs(b, self.get(msa, (('a', 1),))) self.assertIs(c, self.get(msa, (('b', 0),))) def test_partial_label(self): a = DNA("ACG") b = DNA("ACT") c = DNA("ACA") msa = TabularMSA([a, b, c], index=[('a', 0), ('a', 1), ('b', 0)]) exp_a = TabularMSA([a, b], index=[0, 1]) exp_b = TabularMSA([c], index=[0]) self.assertEqual(self.get(msa, 'a'), exp_a) self.assertEqual(self.get(msa, 'b'), exp_b) def test_label_not_exists(self): msa = TabularMSA([DNA("ACG")], index=['foo']) with self.assertRaises(KeyError): self.get(msa, 'bar') def test_duplicate_index_nonscalar_label(self): a = DNA("ACGA", metadata={0: 0}, positional_metadata={0: [1, 2, 3, 4]}) b = DNA("A-GA", metadata={1: 1}, positional_metadata={1: [1, 2, 3, 4]}) c = DNA("AAGA", metadata={2: 2}, positional_metadata={2: [1, 2, 3, 4]}) d = DNA("ACCA", metadata={3: 3}, positional_metadata={3: [1, 2, 3, 4]}) msa = TabularMSA([a, b, c, d], metadata={'x': 'y'}, positional_metadata={'z': [1, 2, 3, 4]}, index=[0, 0, 1, 2]) self.assertEqual(self.get(msa, 0), TabularMSA([a, b], metadata={'x': 'y'}, positional_metadata={'z': [1, 2, 3, 4]}, index=[0, 0])) def test_duplicate_index_scalar_label(self): a = DNA("ACGA", metadata={0: 0}, positional_metadata={0: [1, 2, 3, 4]}) b = DNA("A-GA", metadata={1: 1}, positional_metadata={1: [1, 2, 3, 4]}) c = DNA("AAGA", metadata={2: 2}, positional_metadata={2: [1, 2, 3, 4]}) d = DNA("ACCA", metadata={3: 3}, positional_metadata={3: [1, 2, 3, 4]}) msa = TabularMSA([a, b, c, d], metadata={'x': 'y'}, positional_metadata={'z': [1, 2, 3, 4]}, index=[0, 0, 1, 2]) self.assertEqual(self.get(msa, 1), c) def test_multiindex_complex(self): a = DNA("ACG") b = DNA("ACT") c = DNA("ACA") msa = TabularMSA([a, b, c], index=[('a', 0), ('a', 1), ('b', 0)]) exp = TabularMSA([a, c], index=[('a', 0), ('b', 0)]) self.assertEqual(self.get(msa, [('a', 0), ('b', 0)]), exp) def test_fancy_index_missing_label(self): msa = TabularMSA([DNA("ACG")], index=['foo']) with self.assertRaises(KeyError): self.get(msa, ['foo', 'bar']) with self.assertRaises(KeyError): self.get(msa, ['bar']) def test_multiindex_fancy_indexing_incomplete_label(self): a = RNA("UUAG", metadata={0: 0}, positional_metadata={0: [1, 2, 3, 4]}) b = RNA("UAAG", metadata={1: 0}, positional_metadata={1: [1, 2, 3, 4]}) c = RNA("UAA-", metadata={2: 0}, positional_metadata={2: [1, 2, 3, 4]}) d = RNA("UA-G", metadata={3: 0}, positional_metadata={3: [1, 2, 3, 4]}) msa = TabularMSA([a, b, c, d], metadata={'x': 'y'}, positional_metadata={'c': ['a', 'b', 'c', 'd']}, index=[('a', 'x', 0), ('a', 'x', 1), ('a', 'y', 2), ('b', 'x', 0)]) self.assertEqual(self.get(msa, (('a', 'x'), Ellipsis)), TabularMSA([a, b], metadata={'x': 'y'}, positional_metadata={'c': ['a', 'b', 'c', 'd']}, index=[0, 1])) def test_bool_index_scalar_bool_label(self): a = DNA("ACGA", metadata={0: 0}, positional_metadata={0: [1, 2, 3, 4]}) b = DNA("A-GA", metadata={1: 1}, positional_metadata={1: [1, 2, 3, 4]}) c = DNA("AAGA", metadata={2: 2}, positional_metadata={2: [1, 2, 3, 4]}) d = DNA("ACCA", metadata={3: 3}, positional_metadata={3: [1, 2, 3, 4]}) msa = TabularMSA([a, b, c, d], metadata={'x': 'y'}, positional_metadata={'z': [1, 2, 3, 4]}, index=[False, True, False, False]) self.assertEqual(self.get(msa, True), b) def test_bool_index_nonscalar_bool_label(self): a = DNA("ACGA", metadata={0: 0}, positional_metadata={0: [1, 2, 3, 4]}) b = DNA("A-GA", metadata={1: 1}, positional_metadata={1: [1, 2, 3, 4]}) c = DNA("AAGA", metadata={2: 2}, positional_metadata={2: [1, 2, 3, 4]}) d = DNA("ACCA", metadata={3: 3}, positional_metadata={3: [1, 2, 3, 4]}) msa = TabularMSA([a, b, c, d], metadata={'x': 'y'}, positional_metadata={'z': [1, 2, 3, 4]}, index=[False, True, False, True]) self.assertEqual(self.get(msa, True), TabularMSA([b, d], metadata={'x': 'y'}, positional_metadata={'z': [1, 2, 3, 4]}, index=[True, True])) def test_categorical_index_scalar_label(self): msa = TabularMSA([RNA("ACUG"), RNA("ACUA"), RNA("AAUG"), RNA("AC-G")], index=pd.CategoricalIndex(['a', 'b', 'b', 'c'])) self.assertEqual(self.get(msa, 'a'), RNA("ACUG")) def test_categorical_index_nonscalar_label(self): msa = TabularMSA([RNA("ACUG"), RNA("ACUA"), RNA("AAUG"), RNA("AC-G")], index=pd.CategoricalIndex(['a', 'b', 'b', 'c'])) self.assertEqual(self.get(msa, 'b'), TabularMSA([RNA("ACUA"), RNA("AAUG")], index=pd.CategoricalIndex( ['b', 'b'], categories=['a', 'b', 'c']) )) def test_float_index_out_of_order_slice(self): msa = TabularMSA([DNA("ACGG"), DNA("AAGC"), DNA("AAAA"), DNA("ACTC")], index=[0.1, 2.4, 5.1, 2.6]) with self.assertRaises(KeyError): self.get(msa, slice(0.1, 2.7)) msa.sort() result = self.get(msa, slice(0.1, 2.7)) self.assertEqual(result, TabularMSA([DNA("ACGG"), DNA("AAGC"), DNA("ACTC")], index=[0.1, 2.4, 2.6])) def test_nonscalar_fancy_index(self): msa = TabularMSA([DNA('ACGT'), DNA('ACGT'), DNA('ACGT')], index=[('a', 0, 1), ('a', 1, 1), ('b', 0, 1)]) with self.assertRaisesRegex(TypeError, r'tuple.*independent.*MultiIndex'): self.get(msa, ['a', 'b']) def test_missing_first_nonscalar_fancy_index(self): msa = TabularMSA([DNA('ACGT'), DNA('ACGT'), DNA('ACGT')], index=[('a', 0, 1), ('a', 1, 1), ('b', 0, 1)]) with self.assertRaises(KeyError): self.get(msa, ['x', 'a', 'b']) def test_tuple_fancy_index(self): msa = TabularMSA([DNA('ACGT'), DNA('ACGT'), DNA('ACGT')], index=[('a', 0, 1), ('a', 1, 1), ('b', 0, 1)]) with self.assertRaisesRegex(TypeError, r'tuple.*pd.MultiIndex.*label'): self.get(msa, ((('a', 0, 1), ('b', 0, 1)), Ellipsis)) def test_non_multiindex_tuple(self): msa = TabularMSA([DNA('ACGT'), DNA('ACGT'), DNA('ACGT')]) with self.assertRaisesRegex(TypeError, r'tuple.*first axis'): self.get(msa, ((0, 1), Ellipsis)) def test_assertion_exists_for_future_failure_of_get_sequence_loc(self): # Ideally we wouldn't need this test or the branch, but the most common # failure for pandas would be returning a series instead of the value. # We should make sure that the user get's an error should this ever # happen again. Getting a series of DNA looks pretty weird... msa = TabularMSA([DNA('ACGT'), DNA('ACGT'), DNA('ACGT')]) with self.assertRaises(AssertionError): msa._get_sequence_loc_([1, 2]) class TestILoc(SharedPropertyIndexTests, unittest.TestCase): def setUp(self): SharedPropertyIndexTests.setUp(self) self.combo_first_axis = self.combo_second_axis def test_forced_axis_returns_copy(self): msa = TabularMSA([Protein("EVANTHQMVS"), Protein("EVANTH*MVS")]) self.assertIsNot(msa.iloc(axis=1), msa.iloc) def test_forced_axis_no_mutate(self): msa = TabularMSA([Protein("EVANTHQMVS"), Protein("EVANTH*MVS")]) self.assertEqual(msa.iloc(axis=1)[0], Sequence("EE")) self.assertEqual(msa.iloc[0], Protein("EVANTHQMVS")) self.assertIsNone(msa.iloc._axis) def get(self, obj, indexable, axis=None): if axis is None: return obj.iloc[indexable] else: return obj.iloc(axis=axis)[indexable] def test_entire_fancy_first_axis(self): msa = TabularMSA([ DNA("ACCA", metadata={'a': 'foo'}, positional_metadata={'a': [7, 6, 5, 4]}), DNA("GGAA", metadata={'b': 'bar'}, positional_metadata={'b': [3, 4, 5, 6]}) ], metadata={'c': 'baz'}, positional_metadata={'foo': [1, 2, 3, 4]}) new_np_simple = self.get(msa, np.arange(2)) new_list_simple = self.get(msa, [0, 1]) new_list_backwards = self.get(msa, [-2, -1]) self.assertIsNot(msa, new_np_simple) self.assertEqual(msa, new_np_simple) self.assertIsNot(msa, new_list_simple) self.assertEqual(msa, new_list_simple) self.assertIsNot(msa, new_list_backwards) self.assertEqual(msa, new_list_backwards) def test_fancy_entire_second_axis(self): msa = TabularMSA([ DNA("ACCA", metadata={'a': 'foo'}, positional_metadata={'a': [7, 6, 5, 4]}), DNA("GGAA", metadata={'b': 'bar'}, positional_metadata={'b': [3, 4, 5, 6]}) ], metadata={'c': 'baz'}, positional_metadata={'foo': [1, 2, 3, 4]}) new_np_simple = self.get(msa, (Ellipsis, np.arange(4))) new_list_simple = self.get(msa, (Ellipsis, [0, 1, 2, 3])) new_list_backwards = self.get(msa, (Ellipsis, [-4, -3, -2, -1])) self.assertIsNot(msa, new_np_simple) self.assertEqual(msa, new_np_simple) self.assertIsNot(msa, new_list_simple) self.assertEqual(msa, new_list_simple) self.assertIsNot(msa, new_list_backwards) self.assertEqual(msa, new_list_backwards) def test_fancy_entire_both_axes(self): msa = TabularMSA([ DNA("ACCA", metadata={'a': 'foo'}, positional_metadata={'a': [7, 6, 5, 4]}), DNA("GGAA", metadata={'b': 'bar'}, positional_metadata={'b': [3, 4, 5, 6]}) ], metadata={'c': 'baz'}, positional_metadata={'foo': [1, 2, 3, 4]}) new_np_simple = self.get(msa, (np.arange(2), np.arange(4))) new_list_simple = self.get(msa, ([0, 1], [0, 1, 2, 3])) new_list_backwards = self.get(msa, ([-2, -1], [-4, -3, -2, -1])) self.assertIsNot(msa, new_np_simple) self.assertEqual(msa, new_np_simple) self.assertIsNot(msa, new_list_simple) self.assertEqual(msa, new_list_simple) self.assertIsNot(msa, new_list_backwards) self.assertEqual(msa, new_list_backwards) def test_fancy_out_of_bound(self): with self.assertRaises(IndexError): self.get(TabularMSA([DNA('AC')]), [0, 1, 2]) with self.assertRaises(IndexError): self.get(TabularMSA([DNA('AC')]), (Ellipsis, [0, 1, 2])) def test_fancy_empty_both_axis(self): msa = TabularMSA([DNA("ACGT", metadata={'x': 1}), DNA("TGCA", metadata={'y': 2})], index=list("AB")) new_np_simple = self.get(msa, (np.arange(0), np.arange(0))) new_list_simple = self.get(msa, ([], [])) self.assertEqual(TabularMSA([]), new_np_simple) self.assertEqual(TabularMSA([]), new_list_simple) def test_fancy_standard_first_axis(self): a = DNA("ACGT", metadata={0: 0}, positional_metadata={0: [1, 2, 3, 4]}) b = DNA("ACGT", metadata={1: 1}, positional_metadata={1: [1, 2, 3, 4]}) c = DNA("ACGT", metadata={2: 2}, positional_metadata={2: [1, 2, 3, 4]}) msa = TabularMSA([a, b, c], metadata={3: 3}, positional_metadata={3: [1, 2, 3, 4]}) self.assertEqual(self.get(msa, [0, 2]), TabularMSA([a, c], metadata={3: 3}, positional_metadata={3: [1, 2, 3, 4]}, index=[0, 2])) def test_fancy_standard_second_axis(self): a = DNA("ACGT", metadata={0: 0}, positional_metadata={0: [1, 2, 3, 4]}) b = DNA("ACGT", metadata={1: 1}, positional_metadata={1: [1, 2, 3, 4]}) c = DNA("ACGT", metadata={2: 2}, positional_metadata={2: [1, 2, 3, 4]}) msa = TabularMSA([a, b, c], metadata={3: 3}, positional_metadata={3: [1, 2, 3, 4]}) self.assertEqual(self.get(msa, (Ellipsis, [0, 2])), TabularMSA([a[0, 2], b[0, 2], c[0, 2]], metadata={3: 3}, positional_metadata={3: [1, 3]}, index=[0, 1, 2])) def test_fancy_standard_both_axes(self): a = DNA("ACGT", metadata={0: 0}, positional_metadata={0: [1, 2, 3, 4]}) b = DNA("ACGT", metadata={1: 1}, positional_metadata={1: [1, 2, 3, 4]}) c = DNA("ACGT", metadata={2: 2}, positional_metadata={2: [1, 2, 3, 4]}) msa = TabularMSA([a, b, c], metadata={3: 3}, positional_metadata={3: [1, 2, 3, 4]}) self.assertEqual(self.get(msa, ([0, 2], [0, 2])), TabularMSA([a[0, 2], c[0, 2]], metadata={3: 3}, positional_metadata={3: [1, 3]}, index=[0, 2])) def test_fancy_empty_first_axis(self): a = DNA("ACGT", metadata={0: 0}, positional_metadata={0: [1, 2, 3, 4]}) b = DNA("ACGT", metadata={1: 1}, positional_metadata={1: [1, 2, 3, 4]}) c = DNA("ACGT", metadata={2: 2}, positional_metadata={2: [1, 2, 3, 4]}) msa = TabularMSA([a, b, c], metadata={3: 3}, positional_metadata={3: [1, 2, 3, 4]}) # TODO: Change for #1198 self.assertEqual(self.get(msa, []), TabularMSA([], metadata={3: 3})) def test_fancy_empty_second_axis(self): a = DNA("ACGT", metadata={0: 0}, positional_metadata={0: [1, 2, 3, 4]}) b = DNA("ACGT", metadata={1: 1}, positional_metadata={1: [1, 2, 3, 4]}) c = DNA("ACGT", metadata={2: 2}, positional_metadata={2: [1, 2, 3, 4]}) msa = TabularMSA([a, b, c], metadata={3: 3}, positional_metadata={3: [1, 2, 3, 4]}) self.assertEqual(self.get(msa, (Ellipsis, [])), TabularMSA([a[0:0], b[0:0], c[0:0]], metadata={3: 3}, positional_metadata={3: np.array( [], dtype=np.int64)})) def test_fancy_empty_both_axes(self): a = DNA("ACGT", metadata={0: 0}, positional_metadata={0: [1, 2, 3, 4]}) b = DNA("ACGT", metadata={1: 1}, positional_metadata={1: [1, 2, 3, 4]}) c = DNA("ACGT", metadata={2: 2}, positional_metadata={2: [1, 2, 3, 4]}) msa = TabularMSA([a, b, c], metadata={3: 3}, positional_metadata={3: [1, 2, 3, 4]}) # TODO: Change for #1198 self.assertEqual(self.get(msa, ([], [])), TabularMSA([], metadata={3: 3})) def test_fancy_out_of_bounds_first_axis(self): msa = TabularMSA([DNA("ACGT"), DNA("GCAT")]) with self.assertRaises(IndexError): self.get(msa, [10]) with self.assertRaises(IndexError): self.get(msa, [0, 1, 10]) def test_fancy_out_of_bounds_second_axis(self): msa = TabularMSA([DNA("ACGT"), DNA("GCAT")]) with self.assertRaises(IndexError): self.get(msa, (Ellipsis, [10])) with self.assertRaises(IndexError): self.get(msa, (Ellipsis, [1, 2, 4])) def test_get_scalar_first_axis(self): a = DNA("AA", metadata={'a': 'foo'}, positional_metadata={'x': [1, 2]}) b = DNA("GG", metadata={'b': 'bar'}, positional_metadata={'y': [3, 4]}) msa = TabularMSA([a, b]) new0 = self.get(msa, 0) new1 = self.get(msa, 1) self.assertEqual(new0, a) self.assertEqual(new1, b) def test_get_scalar_second_axis(self): a = DNA("AA", metadata={'a': 'foo'}, positional_metadata={'x': [1, 2]}) b = DNA("GC", metadata={'b': 'bar'}, positional_metadata={'y': [3, 4]}) msa = TabularMSA([a, b], positional_metadata={'z': [5, 6]}) new0 = self.get(msa, (Ellipsis, 0)) new1 = self.get(msa, (Ellipsis, 1)) self.assertEqual(new0, Sequence("AG", metadata={'z': 5}, positional_metadata={'x': [1, np.nan], 'y': [np.nan, 3]})) self.assertEqual(new1, Sequence("AC", metadata={'z': 6}, positional_metadata={'x': [2, np.nan], 'y': [np.nan, 4]})) def test_scalar_sliced_first_axis(self): a = DNA("ACGT", metadata={0: 0}, positional_metadata={0: [1, 2, 3, 4]}) b = DNA("ACGT", metadata={1: 1}, positional_metadata={1: [1, 2, 3, 4]}) c = DNA("ACGT", metadata={2: 2}, positional_metadata={2: [1, 2, 3, 4]}) msa = TabularMSA([a, b, c], metadata={3: 3}, positional_metadata={3: [1, 2, 3, 4]}) self.assertEqual(self.get(msa, (1, [1, 3])), DNA("CT", metadata={1: 1}, positional_metadata={1: [2, 4]})) def test_scalar_sliced_second_axis(self): a = DNA("ACGT", metadata={0: 0}, positional_metadata={0: [1, 2, 3, 4]}) b = DNA("ACGA", metadata={1: 1}, positional_metadata={1: [1, 2, 3, 4]}) c = DNA("ACGT", metadata={2: 2}, positional_metadata={2: [1, 2, 3, 4]}) msa = TabularMSA([a, b, c], metadata={3: 3}, positional_metadata={3: [1, 2, 3, 4]}) self.assertEqual(self.get(msa, ([1, 2], 3)), Sequence("AT", metadata={3: 4}, positional_metadata={1: [4, np.nan], 2: [np.nan, 4]})) def test_get_scalar_out_of_bound_first_axis(self): a = DNA("AA", metadata={'a': 'foo'}, positional_metadata={'x': [1, 2]}) b = DNA("GC", metadata={'b': 'bar'}, positional_metadata={'y': [3, 4]}) msa = TabularMSA([a, b], positional_metadata={'z': [5, 6]}) with self.assertRaises(IndexError): self.get(msa, 3) def test_get_scalar_out_of_bound_second_axis(self): a = DNA("AA", metadata={'a': 'foo'}, positional_metadata={'x': [1, 2]}) b = DNA("GC", metadata={'b': 'bar'}, positional_metadata={'y': [3, 4]}) msa = TabularMSA([a, b], positional_metadata={'z': [5, 6]}) with self.assertRaises(IndexError): self.get(msa, (Ellipsis, 3)) class TestGetItem(SharedIndexTests, unittest.TestCase): def get(self, obj, indexable): return obj[indexable] def test_uses_iloc_not_loc(self): a = DNA("ACGA") b = DNA("ACGT") msa = TabularMSA([a, b], index=[1, 0]) self.assertIs(msa[0], a) self.assertIs(msa[1], b) class TestConstructor(unittest.TestCase): def setUp(self): self.seqs = [DNA("ACGT"), DNA("GCTA")] self.m = {'x': 'y', 0: 1} self.pm = pd.DataFrame({'foo': [1, 2, 3, 4]}) self.index = pd.Index(['a', 'b']) self.msa = TabularMSA(self.seqs, metadata=self.m, positional_metadata=self.pm, index=self.index) def test_no_override(self): result = self.msa._constructor_() self.assertEqual(self.msa, result) for seq1, seq2 in zip(result, self.msa): self.assertIsNot(seq1, seq2) self.assertIsNot(result.metadata, self.msa.metadata) self.assertIsNot(result.positional_metadata, self.msa.positional_metadata) def test_sequence_override_same_seqs(self): result = self.msa._constructor_(sequences=self.seqs) self.assertEqual(self.msa, result) for seq1, seq2 in zip(result, self.msa): self.assertIsNot(seq1, seq2) self.assertIsNot(result.metadata, self.msa.metadata) self.assertIsNot(result.positional_metadata, self.msa.positional_metadata) def test_sequence_override(self): seqs = [RNA("ACGU"), RNA("GCUA")] result = self.msa._constructor_(sequences=seqs) self.assertNotEqual(result, self.msa) self.assertEqual(list(result), seqs) assert_index_equal(result.index, self.index) self.assertEqual(result.metadata, self.m) assert_data_frame_almost_equal(result.positional_metadata, self.pm) def test_no_override_no_md(self): msa = TabularMSA(self.seqs, index=self.index) self.assertEqual(msa, msa._constructor_()) def test_metadata_override(self): new_md = {'foo': {'x': 0}} result = self.msa._constructor_(metadata=new_md) self.assertNotEqual(result, self.msa) self.assertEqual(list(result), self.seqs) assert_index_equal(result.index, self.index) self.assertEqual(result.metadata, new_md) assert_data_frame_almost_equal(result.positional_metadata, self.pm) def test_positional_metadata_override(self): new_pm = pd.DataFrame({'x': [1, 2, 3, 4]}) result = self.msa._constructor_(positional_metadata=new_pm) self.assertNotEqual(result, self.msa) self.assertEqual(list(result), self.seqs) assert_index_equal(result.index, self.index) self.assertEqual(result.metadata, self.m) assert_data_frame_almost_equal(result.positional_metadata, new_pm) def test_index_override(self): new_index = pd.Index([('a', 0), ('b', 1)]) result = self.msa._constructor_(index=new_index) self.assertNotEqual(result, self.msa) self.assertEqual(list(result), self.seqs) assert_index_equal(result.index, new_index) self.assertEqual(result.metadata, self.m) assert_data_frame_almost_equal(result.positional_metadata, self.pm) class TestAppend(unittest.TestCase): # Error cases def test_invalid_minter_index_reset_index_parameter_combos(self): msa = TabularMSA([]) param_combos = ( {}, {'minter': str, 'index': 'foo', 'reset_index': True}, {'minter': str, 'index': 'foo'}, {'minter': str, 'reset_index': True}, {'index': 'foo', 'reset_index': True} ) for params in param_combos: with self.assertRaisesRegex(ValueError, r"one of.*minter.*index.*reset_index"): msa.append(DNA('ACGT'), **params) self.assertEqual(msa, TabularMSA([])) def test_invalid_dtype(self): msa = TabularMSA([]) with self.assertRaisesRegex(TypeError, r'GrammaredSequence.*Sequence'): msa.append(Sequence(''), reset_index=True) self.assertEqual(msa, TabularMSA([])) def test_dtype_mismatch_rna(self): msa = TabularMSA([DNA('ACGT'), DNA('TGCA')]) with self.assertRaisesRegex(TypeError, r'matching type.*RNA.*DNA'): msa.append(RNA('UUUU'), reset_index=True) self.assertEqual(msa, TabularMSA([DNA('ACGT'), DNA('TGCA')])) def test_dtype_mismatch_float(self): msa = TabularMSA([DNA('ACGT'), DNA('TGCA')]) with self.assertRaisesRegex(TypeError, r'matching type.*float.*DNA'): msa.append(42.0, reset_index=True) self.assertEqual(msa, TabularMSA([DNA('ACGT'), DNA('TGCA')])) def test_length_mismatch(self): msa = TabularMSA([DNA('ACGT'), DNA('TGCA')]) with self.assertRaisesRegex( ValueError, r'must match the number of positions.*5 != 4'): msa.append(DNA('ACGTA'), reset_index=True) self.assertEqual(msa, TabularMSA([DNA('ACGT'), DNA('TGCA')])) def test_invalid_minter(self): msa = TabularMSA([DNA('ACGT')], index=['foo']) with self.assertRaises(KeyError): msa.append(DNA('AAAA'), minter='id') self.assertEqual(msa, TabularMSA([DNA('ACGT')], index=['foo'])) # Valid cases: `minter` def test_minter_empty_msa(self): msa = TabularMSA([]) msa.append(DNA('ACGT'), minter=str) self.assertEqual(msa, TabularMSA([DNA('ACGT')], minter=str)) def test_minter_metadata_key(self): msa = TabularMSA([DNA('', metadata={'id': 'a'}), DNA('', metadata={'id': 'b'})], minter='id') msa.append(DNA('', metadata={'id': 'c'}), minter='id') self.assertEqual( msa, TabularMSA([ DNA('', metadata={'id': 'a'}), DNA('', metadata={'id': 'b'}), DNA('', metadata={'id': 'c'})], minter='id')) def test_minter_callable(self): msa = TabularMSA([DNA('', metadata={'id': 'a'}), DNA('', metadata={'id': 'b'})], minter='id') msa.append(DNA(''), minter=str) self.assertEqual( msa, TabularMSA([ DNA('', metadata={'id': 'a'}), DNA('', metadata={'id': 'b'}), DNA('')], index=['a', 'b', ''])) def test_multiindex_minter_empty_msa(self): def multiindex_minter(seq): return ('foo', 42) msa = TabularMSA([]) msa.append(DNA('AC'), minter=multiindex_minter) self.assertIsInstance(msa.index, pd.MultiIndex) assert_index_equal(msa.index, pd.Index([('foo', 42)])) def test_multiindex_minter_non_empty_msa(self): def multiindex_minter(seq): return ('baz', 44) msa = TabularMSA([RNA('UU'), RNA('CA')], index=[('foo', 42), ('bar', 43)]) msa.append(RNA('AC'), minter=multiindex_minter) self.assertIsInstance(msa.index, pd.MultiIndex) assert_index_equal(msa.index, pd.Index([('foo', 42), ('bar', 43), ('baz', 44)])) # Valid cases: `index` def test_index_empty_msa(self): msa = TabularMSA([]) msa.append(DNA('ACGT'), index='a') self.assertEqual( msa, TabularMSA([DNA('ACGT')], index=['a'])) def test_index_non_empty_msa(self): msa = TabularMSA([DNA('AC'), DNA('GT')], index=['a', 'b']) msa.append(DNA('--'), index='foo') self.assertEqual( msa, TabularMSA([DNA('AC'), DNA('GT'), DNA('--')], index=['a', 'b', 'foo'])) def test_multiindex_index_empty_msa(self): msa = TabularMSA([]) msa.append(DNA('AA'), index=('foo', 42)) self.assertIsInstance(msa.index, pd.MultiIndex) assert_index_equal(msa.index, pd.Index([('foo', 42)])) def test_multiindex_index_non_empty_msa(self): msa = TabularMSA([RNA('A'), RNA('C')], index=[('foo', 42), ('bar', 43)]) msa.append(RNA('U'), index=('baz', 44)) self.assertIsInstance(msa.index, pd.MultiIndex) assert_index_equal(msa.index, pd.Index([('foo', 42), ('bar', 43), ('baz', 44)])) # Valid cases: `reset_index` def test_reset_index_empty_msa(self): msa = TabularMSA([]) msa.append(DNA('ACGT'), reset_index=True) self.assertEqual(msa, TabularMSA([DNA('ACGT')])) assert_index_equal(msa.index, pd.RangeIndex(1)) def test_reset_index_default_index(self): msa = TabularMSA([DNA('ACGT'), DNA('CCCC')]) msa.append(DNA('ACGT'), reset_index=True) self.assertEqual(msa, TabularMSA([DNA('ACGT'), DNA('CCCC'), DNA('ACGT')])) assert_index_equal(msa.index, pd.RangeIndex(3)) def test_reset_index_non_default_index(self): msa = TabularMSA([DNA('ACGT'), DNA('CCCC')], index=['foo', 'bar']) msa.append(DNA('ACGT'), reset_index=True) self.assertEqual(msa, TabularMSA([DNA('ACGT'), DNA('CCCC'), DNA('ACGT')])) assert_index_equal(msa.index, pd.RangeIndex(3)) def test_reset_index_bool_cast(self): msa = TabularMSA([RNA('AC'), RNA('UU')], index=[42, 43]) msa.append(RNA('..'), reset_index='abc') self.assertEqual(msa, TabularMSA([RNA('AC'), RNA('UU'), RNA('..')])) assert_index_equal(msa.index, pd.RangeIndex(3)) # Valid cases (misc) def test_index_type_change(self): msa = TabularMSA([DNA('A'), DNA('.')]) msa.append(DNA('C'), index='foo') self.assertEqual( msa, TabularMSA([DNA('A'), DNA('.'), DNA('C')], index=[0, 1, 'foo'])) def test_duplicate_index(self): msa = TabularMSA([DNA('A'), DNA('.')], index=['foo', 'bar']) msa.append(DNA('C'), index='foo') self.assertEqual( msa, TabularMSA([DNA('A'), DNA('.'), DNA('C')], index=['foo', 'bar', 'foo'])) def test_empty_msa_with_positional_metadata_no_new_positions(self): msa = TabularMSA([], positional_metadata={'foo': []}) msa.append(DNA(''), reset_index=True) self.assertEqual( msa, TabularMSA([DNA('')], positional_metadata={'foo': []})) def test_empty_msa_with_positional_metadata_add_new_positions(self): # bug in 0.4.2 msa = TabularMSA([], positional_metadata={'foo': []}) msa.append(DNA('AA'), reset_index=True) self.assertEqual( msa, TabularMSA([DNA('AA')])) class TestExtend(unittest.TestCase): # Error cases # # Note: these tests check that the MSA isn't mutated when an error is # raised. Where applicable, the "invalid" sequence is preceded by valid # sequence(s) to test one possible (buggy) implementation of `extend`: # looping over `sequences` and calling `append`. These tests ensure that # valid sequences aren't appended to the MSA before the error is raised. def test_invalid_minter_index_reset_index_parameter_combos(self): msa = TabularMSA([]) param_combos = ( {}, {'minter': str, 'index': 'foo', 'reset_index': True}, {'minter': str, 'index': 'foo'}, {'minter': str, 'reset_index': True}, {'index': 'foo', 'reset_index': True} ) for params in param_combos: with self.assertRaisesRegex(ValueError, r"one of.*minter.*index.*reset_index"): msa.extend([DNA('ACGT')], **params) self.assertEqual(msa, TabularMSA([])) def test_from_tabular_msa_index_param_still_required(self): msa = TabularMSA([DNA('AC'), DNA('TG')]) with self.assertRaisesRegex(ValueError, r"one of.*minter.*index.*reset_index"): msa.extend(TabularMSA([DNA('GG'), DNA('CC')])) self.assertEqual(msa, TabularMSA([DNA('AC'), DNA('TG')])) def test_invalid_dtype(self): msa = TabularMSA([]) with self.assertRaisesRegex(TypeError, r'GrammaredSequence.*Sequence'): msa.extend([Sequence('')], reset_index=True) self.assertEqual(msa, TabularMSA([])) def test_dtype_mismatch_rna(self): msa = TabularMSA([DNA('ACGT'), DNA('TGCA')]) with self.assertRaisesRegex(TypeError, r'matching type.*RNA.*DNA'): msa.extend([DNA('----'), RNA('UUUU')], reset_index=True) self.assertEqual(msa, TabularMSA([DNA('ACGT'), DNA('TGCA')])) def test_dtype_mismatch_float(self): msa = TabularMSA([DNA('ACGT'), DNA('TGCA')]) with self.assertRaisesRegex(TypeError, r'matching type.*float.*DNA'): msa.extend([DNA('GGGG'), 42.0], reset_index=True) self.assertEqual(msa, TabularMSA([DNA('ACGT'), DNA('TGCA')])) def test_length_mismatch(self): msa = TabularMSA([DNA('ACGT'), DNA('TGCA')]) with self.assertRaisesRegex( ValueError, r'must match the number of positions.*5 != 4'): msa.extend([DNA('TTTT'), DNA('ACGTA')], reset_index=True) self.assertEqual(msa, TabularMSA([DNA('ACGT'), DNA('TGCA')])) def test_invalid_minter(self): msa = TabularMSA([DNA('ACGT')], index=['foo']) with self.assertRaises(KeyError): msa.extend([DNA('AAAA', metadata={'id': 'foo'}), DNA('----')], minter='id') self.assertEqual(msa, TabularMSA([DNA('ACGT')], index=['foo'])) def test_invalid_index(self): msa = TabularMSA([DNA('ACGT')], index=['foo']) with self.assertRaises(TypeError): msa.extend([DNA('----')], index=42) self.assertEqual(msa, TabularMSA([DNA('ACGT')], index=['foo'])) def test_sequences_index_length_mismatch(self): msa = TabularMSA([]) with self.assertRaisesRegex(ValueError, r'sequences.*2.*index length.*3'): msa.extend([DNA('TTTT'), DNA('ACGT')], index=['a', 'b', 'c']) self.assertEqual(msa, TabularMSA([])) # Valid cases: `minter` def test_minter_empty_msa(self): msa = TabularMSA([]) msa.extend([RNA('UU'), RNA('--')], minter=str) self.assertEqual(msa, TabularMSA([RNA('UU'), RNA('--')], minter=str)) def test_minter_metadata_key(self): msa = TabularMSA([DNA('', metadata={'id': 'a'}), DNA('', metadata={'id': 'b'})], minter='id') msa.extend([DNA('', metadata={'id': 'c'}), DNA('', metadata={'id': 'd'})], minter='id') self.assertEqual( msa, TabularMSA([ DNA('', metadata={'id': 'a'}), DNA('', metadata={'id': 'b'}), DNA('', metadata={'id': 'c'}), DNA('', metadata={'id': 'd'})], minter='id')) def test_minter_callable(self): msa = TabularMSA([DNA('A', metadata={'id': 'a'}), DNA('C', metadata={'id': 'b'})], minter='id') msa.extend([DNA('G'), DNA('T')], minter=str) self.assertEqual( msa, TabularMSA([ DNA('A', metadata={'id': 'a'}), DNA('C', metadata={'id': 'b'}), DNA('G'), DNA('T')], index=['a', 'b', 'G', 'T'])) def test_multiindex_minter_empty_msa(self): def multiindex_minter(seq): if str(seq) == 'AC': return ('foo', 42) else: return ('bar', 43) msa = TabularMSA([]) msa.extend([DNA('AC'), DNA('GG')], minter=multiindex_minter) self.assertIsInstance(msa.index, pd.MultiIndex) assert_index_equal(msa.index, pd.Index([('foo', 42), ('bar', 43)])) def test_multiindex_minter_non_empty_msa(self): def multiindex_minter(seq): if str(seq) == 'C': return ('baz', 44) else: return ('baz', 45) msa = TabularMSA([DNA('A'), DNA('G')], index=[('foo', 42), ('bar', 43)]) msa.extend([DNA('C'), DNA('T')], minter=multiindex_minter) self.assertIsInstance(msa.index, pd.MultiIndex) assert_index_equal( msa.index, pd.Index([('foo', 42), ('bar', 43), ('baz', 44), ('baz', 45)])) # Valid cases: `index` def test_index_empty_msa(self): msa = TabularMSA([]) msa.extend([RNA('UAC'), RNA('AAU')], index=['foo', 'bar']) self.assertEqual(msa, TabularMSA([RNA('UAC'), RNA('AAU')], index=['foo', 'bar'])) def test_index_non_empty_msa(self): msa = TabularMSA([DNA('AC'), DNA('GT')], index=['a', 'b']) msa.extend([DNA('--'), DNA('..')], index=['foo', 'bar']) self.assertEqual( msa, TabularMSA([DNA('AC'), DNA('GT'), DNA('--'), DNA('..')], index=['a', 'b', 'foo', 'bar'])) def test_multiindex_index_empty_msa(self): msa = TabularMSA([]) msa.extend([DNA('AA'), DNA('GG')], index=[('foo', 42), ('bar', 43)]) self.assertIsInstance(msa.index, pd.MultiIndex) assert_index_equal(msa.index, pd.Index([('foo', 42), ('bar', 43)])) def test_multiindex_index_non_empty_msa(self): msa = TabularMSA([DNA('.'), DNA('-')], index=[('foo', 42), ('bar', 43)]) msa.extend([DNA('A'), DNA('G')], index=[('baz', 44), ('baz', 45)]) self.assertIsInstance(msa.index, pd.MultiIndex) assert_index_equal( msa.index, pd.Index([('foo', 42), ('bar', 43), ('baz', 44), ('baz', 45)])) def test_index_object_empty_msa(self): msa = TabularMSA([]) msa.extend([DNA('AA'), DNA('GG')], index=pd.RangeIndex(2)) self.assertEqual(msa, TabularMSA([DNA('AA'), DNA('GG')])) assert_index_equal(msa.index, pd.RangeIndex(2)) def test_index_object_non_empty_msa(self): msa = TabularMSA([DNA('CT'), DNA('GG')]) msa.extend([DNA('AA'), DNA('GG')], index=pd.RangeIndex(2)) self.assertEqual( msa, TabularMSA([DNA('CT'), DNA('GG'), DNA('AA'), DNA('GG')], index=[0, 1, 0, 1])) # Valid cases: `reset_index` def test_reset_index_empty_msa(self): msa = TabularMSA([]) msa.extend([DNA('ACGT'), DNA('----')], reset_index=True) self.assertEqual(msa, TabularMSA([DNA('ACGT'), DNA('----')])) assert_index_equal(msa.index, pd.RangeIndex(2)) def test_reset_index_empty_msa_empty_iterable(self): msa = TabularMSA([]) msa.extend([], reset_index=True) self.assertEqual(msa, TabularMSA([])) assert_index_equal(msa.index, pd.RangeIndex(0)) def test_reset_index_non_empty_msa_empty_iterable(self): msa = TabularMSA([RNA('UU'), RNA('CC')], index=['a', 'b']) msa.extend([], reset_index=True) self.assertEqual(msa, TabularMSA([RNA('UU'), RNA('CC')])) assert_index_equal(msa.index, pd.RangeIndex(2)) def test_reset_index_default_index(self): msa = TabularMSA([DNA('A'), DNA('G')]) msa.extend([DNA('.'), DNA('-')], reset_index=True) self.assertEqual(msa, TabularMSA([DNA('A'), DNA('G'), DNA('.'), DNA('-')])) assert_index_equal(msa.index, pd.RangeIndex(4)) def test_reset_index_non_default_index(self): msa = TabularMSA([DNA('A'), DNA('G')], index=['a', 'b']) msa.extend([DNA('.'), DNA('-')], reset_index=True) self.assertEqual(msa, TabularMSA([DNA('A'), DNA('G'), DNA('.'), DNA('-')])) assert_index_equal(msa.index, pd.RangeIndex(4)) def test_reset_index_from_tabular_msa(self): msa = TabularMSA([DNA('AC'), DNA('TG')], index=[42, 43]) msa.extend(TabularMSA([DNA('GG'), DNA('CC'), DNA('AA')], index=['a', 'b', 'c']), reset_index=True) self.assertEqual( msa, TabularMSA([DNA('AC'), DNA('TG'), DNA('GG'), DNA('CC'), DNA('AA')])) assert_index_equal(msa.index, pd.RangeIndex(5)) def test_reset_index_bool_cast(self): msa = TabularMSA([RNA('AC'), RNA('UU')], index=[42, 43]) msa.extend([RNA('..')], reset_index='abc') self.assertEqual(msa, TabularMSA([RNA('AC'), RNA('UU'), RNA('..')])) assert_index_equal(msa.index, pd.RangeIndex(3)) # Valid cases (misc) def test_index_type_change(self): msa = TabularMSA([DNA('A'), DNA('.')]) msa.extend([DNA('C')], index=['foo']) self.assertEqual( msa, TabularMSA([DNA('A'), DNA('.'), DNA('C')], index=[0, 1, 'foo'])) def test_duplicate_index(self): msa = TabularMSA([DNA('A'), DNA('.')], index=['foo', 'bar']) msa.extend([DNA('C'), DNA('.')], index=['foo', 'baz']) self.assertEqual( msa, TabularMSA([DNA('A'), DNA('.'), DNA('C'), DNA('.')], index=['foo', 'bar', 'foo', 'baz'])) def test_empty_msa_with_positional_metadata_no_new_positions(self): msa = TabularMSA([], positional_metadata={'foo': []}) msa.extend([DNA(''), DNA('')], reset_index=True) self.assertEqual( msa, TabularMSA([DNA(''), DNA('')], positional_metadata={'foo': []})) def test_empty_msa_with_positional_metadata_add_new_positions(self): # bug in 0.4.2 msa = TabularMSA([], positional_metadata={'foo': []}) msa.extend([DNA('AA'), DNA('GG')], reset_index=True) self.assertEqual( msa, TabularMSA([DNA('AA'), DNA('GG')])) def test_empty_msa_empty_iterable(self): msa = TabularMSA([]) msa.extend([], minter=str) self.assertEqual(msa, TabularMSA([])) def test_non_empty_msa_empty_iterable(self): msa = TabularMSA([DNA('AC')], index=['foo']) msa.extend([], index=[]) self.assertEqual(msa, TabularMSA([DNA('AC')], index=['foo'])) def test_single_sequence(self): msa = TabularMSA([DNA('AC')]) msa.extend([DNA('-C')], minter=str) self.assertEqual(msa, TabularMSA([DNA('AC'), DNA('-C')], index=[0, '-C'])) def test_multiple_sequences(self): msa = TabularMSA([DNA('AC')]) msa.extend([DNA('-C'), DNA('AG')], minter=str) self.assertEqual(msa, TabularMSA([DNA('AC'), DNA('-C'), DNA('AG')], index=[0, '-C', 'AG'])) def test_from_iterable(self): msa = TabularMSA([]) msa.extend(iter([DNA('ACGT'), DNA('TGCA')]), reset_index=True) self.assertEqual(msa, TabularMSA([DNA('ACGT'), DNA('TGCA')])) def test_from_tabular_msa_with_index(self): msa1 = TabularMSA([DNA('AC'), DNA('TG')]) msa2 = TabularMSA([DNA('GG'), DNA('CC'), DNA('AA')]) msa1.extend(msa2, index=msa2.index) self.assertEqual( msa1, TabularMSA([DNA('AC'), DNA('TG'), DNA('GG'), DNA('CC'), DNA('AA')], index=[0, 1, 0, 1, 2])) class TestJoin(unittest.TestCase): def assertEqualJoinedMSA(self, msa1, msa2): # `TabularMSA.join` doesn't guarantee index order in the joined MSA. # The order differs across pandas versions, so sort each MSA before # comparing for equality. # copy because `TabularMSA.sort` is in-place. msa1 = copy.copy(msa1) msa2 = copy.copy(msa2) msa1.sort() msa2.sort() self.assertEqual(msa1, msa2) def test_invalid_how(self): with self.assertRaisesRegex(ValueError, r'`how`'): TabularMSA([]).join(TabularMSA([]), how='really') def test_invalid_other_type(self): with self.assertRaisesRegex(TypeError, r'TabularMSA.*DNA'): TabularMSA([]).join(DNA('ACGT')) def test_dtype_mismatch(self): with self.assertRaisesRegex(TypeError, r'dtype.*RNA.*DNA'): TabularMSA([DNA('AC')]).join(TabularMSA([RNA('UG')])) with self.assertRaisesRegex(TypeError, r'dtype.*None.*DNA'): TabularMSA([DNA('AC')]).join(TabularMSA([])) with self.assertRaisesRegex(TypeError, r'dtype.*DNA.*None'): TabularMSA([]).join(TabularMSA([DNA('AC')])) def test_duplicate_index_labels(self): with self.assertRaisesRegex(ValueError, r"This MSA's index labels.*unique"): TabularMSA([DNA('AC'), DNA('--')], index=[0, 0]).join( TabularMSA([DNA('GT'), DNA('..')])) with self.assertRaisesRegex(ValueError, r"`other`'s index labels.*unique"): TabularMSA([DNA('AC'), DNA('--')]).join( TabularMSA([DNA('GT'), DNA('..')], index=[0, 0])) def test_no_metadata(self): msa1 = TabularMSA([DNA('AC'), DNA('G.')]) msa2 = TabularMSA([DNA('-C'), DNA('.G')]) joined = msa1.join(msa2) self.assertEqualJoinedMSA( joined, TabularMSA([DNA('AC-C'), DNA('G..G')])) def test_ignores_metadata(self): msa1 = TabularMSA([DNA('AC', metadata={'id': 'a'}), DNA('G.', metadata={'id': 'b'}), DNA('C-', metadata={'id': 'c'})], metadata={'id': 'msa1'}) msa2 = TabularMSA([DNA('-C', metadata={'id': 'd'}), DNA('.G', metadata={'id': 'e'}), DNA('CA', metadata={'id': 'f'})], index=[2, 1, 0], metadata={'id': 'msa2'}) joined = msa1.join(msa2) self.assertEqualJoinedMSA( joined, TabularMSA([DNA('ACCA'), DNA('G..G'), DNA('C--C')])) def test_outer_join_on_per_sequence_positional_metadata(self): msa1 = TabularMSA([ DNA('AC', positional_metadata={'1': [1, 2], 'foo': ['a', 'b']}), DNA('GT', positional_metadata={'2': [3, 4], 'foo': ['c', 'd']})]) msa2 = TabularMSA([ DNA('CA', positional_metadata={'3': [5, 6], 'foo': ['e', 'f']}), DNA('TG', positional_metadata={'4': [7, 8], 'foo': ['g', 'h']})]) joined = msa1.join(msa2) self.assertEqualJoinedMSA( joined, TabularMSA([ DNA('ACCA', positional_metadata={'1': [1, 2, np.nan, np.nan], '3': [np.nan, np.nan, 5, 6], 'foo': ['a', 'b', 'e', 'f']}), DNA('GTTG', positional_metadata={'2': [3, 4, np.nan, np.nan], '4': [np.nan, np.nan, 7, 8], 'foo': ['c', 'd', 'g', 'h']})])) def test_no_sequences(self): msa1 = TabularMSA([], positional_metadata={'foo': []}) msa2 = TabularMSA([], positional_metadata={'foo': []}) joined = msa1.join(msa2) self.assertEqualJoinedMSA(joined, TabularMSA([])) def test_no_positions(self): msa1 = TabularMSA([DNA('', positional_metadata={'1': []}), DNA('', positional_metadata={'2': []})], positional_metadata={'foo': []}) msa2 = TabularMSA([DNA('', positional_metadata={'3': []}), DNA('', positional_metadata={'4': []})], positional_metadata={'foo': []}) joined = msa1.join(msa2) self.assertEqualJoinedMSA( joined, TabularMSA([DNA('', positional_metadata={'1': [], '3': []}), DNA('', positional_metadata={'2': [], '4': []})], positional_metadata={'foo': []})) def test_one_with_positions_one_without_positions(self): msa1 = TabularMSA([DNA('A', positional_metadata={'1': ['a']}), DNA('C', positional_metadata={'2': ['b']})], positional_metadata={'foo': ['bar']}) msa2 = TabularMSA([DNA('', positional_metadata={'3': []}), DNA('', positional_metadata={'4': []})], positional_metadata={'foo': []}) joined = msa1.join(msa2) self.assertEqualJoinedMSA( joined, TabularMSA([DNA('A', positional_metadata={'1': ['a'], '3': [np.nan]}), DNA('C', positional_metadata={'2': ['b'], '4': [np.nan]})], positional_metadata={'foo': ['bar']})) def test_how_strict(self): msa1 = TabularMSA([DNA('AC'), DNA('G.'), DNA('C-')], positional_metadata={'foo': [1, 2], 'bar': ['a', 'b']}) msa2 = TabularMSA([DNA('-C'), DNA('.G'), DNA('CA')], index=[2, 1, 0], positional_metadata={'foo': [3, 4], 'bar': ['c', 'd']}) joined = msa1.join(msa2) self.assertEqualJoinedMSA( joined, TabularMSA([DNA('ACCA'), DNA('G..G'), DNA('C--C')], positional_metadata={'bar': ['a', 'b', 'c', 'd'], 'foo': [1, 2, 3, 4]})) def test_how_strict_failure_index_mismatch(self): msa1 = TabularMSA([DNA('AC'), DNA('G.'), DNA('C-')]) msa2 = TabularMSA([DNA('-C'), DNA('.G'), DNA('CA'), DNA('--')]) with self.assertRaisesRegex(ValueError, r'Index labels must all ' 'match'): msa1.join(msa2) def test_how_strict_failure_positional_metadata_mismatch(self): msa1 = TabularMSA([DNA('AC'), DNA('G.')], positional_metadata={'foo': [1, 2], 'bar': ['a', 'b']}) msa2 = TabularMSA([DNA('-C'), DNA('.G')], positional_metadata={'foo': [3, 4]}) with self.assertRaisesRegex(ValueError, r'Positional metadata columns.*match'): msa1.join(msa2) def test_how_inner(self): msa1 = TabularMSA([DNA('AC'), DNA('G.'), DNA('C-'), DNA('--')], index=[0, 1, 2, 3], positional_metadata={'foo': [1, 2], 'bar': ['a', 'b']}) msa2 = TabularMSA([DNA('-C'), DNA('.G'), DNA('CA'), DNA('..')], index=[2, 1, 0, -1], positional_metadata={'foo': [3, 4], 'baz': ['c', 'd']}) joined = msa1.join(msa2, how='inner') self.assertEqualJoinedMSA( joined, TabularMSA([DNA('C--C'), DNA('G..G'), DNA('ACCA')], index=[2, 1, 0], positional_metadata={'foo': [1, 2, 3, 4]})) def test_how_inner_no_positional_metadata_overlap(self): msa1 = TabularMSA([DNA('AC'), DNA('G.')], index=['b', 'a'], positional_metadata={'foo': [1, 2]}) msa2 = TabularMSA([DNA('-C'), DNA('.G')], index=['a', 'b'], positional_metadata={'bar': ['c', 'd']}) joined = msa1.join(msa2, how='inner') self.assertEqualJoinedMSA( joined, TabularMSA([DNA('G.-C'), DNA('AC.G')], index=['a', 'b'])) def test_how_inner_no_index_overlap_with_positional_metadata_overlap(self): msa1 = TabularMSA([DNA('AC'), DNA('G.')], positional_metadata={'foo': [1, 2]}) msa2 = TabularMSA([DNA('-C'), DNA('.G')], index=['a', 'b'], positional_metadata={'foo': [3, 4]}) joined = msa1.join(msa2, how='inner') self.assertEqualJoinedMSA(joined, TabularMSA([])) def test_how_outer(self): msa1 = TabularMSA([DNA('AC'), DNA('G.'), DNA('C-'), DNA('--')], index=[0, 1, 2, 3], positional_metadata={'foo': [1, 2], 'bar': ['a', 'b']}) msa2 = TabularMSA([DNA('-CC'), DNA('.GG'), DNA('CAA'), DNA('...')], index=[2, 1, 0, -1], positional_metadata={'foo': [3, 4, 5], 'baz': ['c', 'd', 'e']}) joined = msa1.join(msa2, how='outer') self.assertEqualJoinedMSA( joined, TabularMSA([DNA('--...'), DNA('ACCAA'), DNA('G..GG'), DNA('C--CC'), DNA('-----')], index=range(-1, 4), positional_metadata={ 'bar': ['a', 'b', np.nan, np.nan, np.nan], 'baz': [np.nan, np.nan, 'c', 'd', 'e'], 'foo': [1, 2, 3, 4, 5]})) def test_how_left(self): msa1 = TabularMSA([DNA('AC'), DNA('G.'), DNA('C-'), DNA('--')], index=[0, 1, 2, 3], positional_metadata={'foo': [1, 2], 'bar': ['a', 'b']}) msa2 = TabularMSA([DNA('-CC'), DNA('.GG'), DNA('CAA'), DNA('...')], index=[2, 1, 0, -1], positional_metadata={'foo': [3, 4, 5], 'baz': ['c', 'd', 'e']}) joined = msa1.join(msa2, how='left') self.assertEqualJoinedMSA( joined, TabularMSA([DNA('ACCAA'), DNA('G..GG'), DNA('C--CC'), DNA('-----')], positional_metadata={ 'foo': [1, 2, 3, 4, 5], 'bar': ['a', 'b', np.nan, np.nan, np.nan]})) def test_how_right(self): msa1 = TabularMSA([DNA('AC'), DNA('G.'), DNA('C-'), DNA('--')], index=[0, 1, 2, 3], positional_metadata={'foo': [1, 2], 'bar': ['a', 'b']}) msa2 = TabularMSA([DNA('-CC'), DNA('.GG'), DNA('CAA'), DNA('...')], index=[2, 1, 0, -1], positional_metadata={'foo': [3, 4, 5], 'baz': ['c', 'd', 'e']}) joined = msa1.join(msa2, how='right') self.assertEqualJoinedMSA( joined, TabularMSA([DNA('C--CC'), DNA('G..GG'), DNA('ACCAA'), DNA('--...')], index=[2, 1, 0, -1], positional_metadata={ 'foo': [1, 2, 3, 4, 5], 'baz': [np.nan, np.nan, 'c', 'd', 'e']})) class TestIterPositions(unittest.TestCase): def test_method_return_type(self): msa = TabularMSA([DNA('AC'), DNA('GT')]) obs = msa.iter_positions() self.assertIsInstance(obs, types.GeneratorType) def test_position_type(self): msa = TabularMSA([DNA('AC'), DNA('GT')]) first_position = next(msa.iter_positions()) # Type should be *exactly* Sequence. self.assertIs(type(first_position), Sequence) def test_no_sequences(self): msa = TabularMSA([]) obs = list(msa.iter_positions()) self.assertEqual(obs, []) def test_no_sequences_ignore_metadata(self): msa = TabularMSA([]) obs = list(msa.iter_positions(ignore_metadata=True)) self.assertEqual(obs, []) def test_no_sequences_reverse(self): msa = TabularMSA([]) obs = list(msa.iter_positions(reverse=True)) self.assertEqual(obs, []) def test_no_sequences_reverse_ignore_metadata(self): msa = TabularMSA([]) obs = list(msa.iter_positions(reverse=True, ignore_metadata=True)) self.assertEqual(obs, []) def test_no_positions(self): msa = TabularMSA([DNA(''), DNA('')]) obs = list(msa.iter_positions()) self.assertEqual(obs, []) def test_no_positions_ignore_metadata(self): msa = TabularMSA([DNA(''), DNA('')]) obs = list(msa.iter_positions(ignore_metadata=True)) self.assertEqual(obs, []) def test_no_positions_reverse(self): msa = TabularMSA([DNA(''), DNA('')]) obs = list(msa.iter_positions(reverse=True)) self.assertEqual(obs, []) def test_no_positions_reverse_ignore_metadata(self): msa = TabularMSA([DNA(''), DNA('')]) obs = list(msa.iter_positions(reverse=True, ignore_metadata=True)) self.assertEqual(obs, []) def test_single_position(self): msa = TabularMSA([DNA('A')]) obs = list(msa.iter_positions()) self.assertEqual(obs, [Sequence('A')]) def test_single_position_reverse(self): msa = TabularMSA([DNA('A'), DNA('T')]) obs = list(msa.iter_positions(reverse=True)) self.assertEqual(obs, [Sequence('AT')]) def test_multiple_positions(self): msa = TabularMSA([DNA('ACGT'), DNA('A-G.'), DNA('----')]) obs = list(msa.iter_positions()) self.assertEqual(obs, [Sequence('AA-'), Sequence('C--'), Sequence('GG-'), Sequence('T.-')]) def test_multiple_positions_reverse(self): msa = TabularMSA([DNA('AC'), DNA('A-'), DNA('--')]) obs = list(msa.iter_positions(reverse=True)) self.assertEqual(obs, [Sequence('C--'), Sequence('AA-')]) def test_with_positional_metadata(self): # MSA *and* sequence positional metadata. msa_positional_metadata = {'pm1': [0.5, 1.5], 'foo': [9, 99]} seqs = [ DNA('AC', positional_metadata={'foo': [42, 43]}), DNA('A-'), DNA('--', positional_metadata={'foo': [-1, -2], 'bar': ['baz', 'bazz']})] msa = TabularMSA(seqs, positional_metadata=msa_positional_metadata) obs = list(msa.iter_positions()) self.assertEqual( obs, [Sequence('AA-', metadata={'pm1': 0.5, 'foo': 9}, positional_metadata={'bar': [np.nan, np.nan, 'baz'], 'foo': [42, np.nan, -1]}), Sequence('C--', metadata={'pm1': 1.5, 'foo': 99}, positional_metadata={'bar': [np.nan, np.nan, 'bazz'], 'foo': [43, np.nan, -2]})]) def test_with_positional_metadata_reverse(self): # MSA *and* sequence positional metadata. msa_positional_metadata = {'pm1': [0.5, 1.5], 'foo': [9, 99]} seqs = [ DNA('AC', positional_metadata={'foo': [42, 43]}), DNA('A-'), DNA('--', positional_metadata={'foo': [-1, -2], 'bar': ['baz', 'bazz']})] msa = TabularMSA(seqs, positional_metadata=msa_positional_metadata) obs = list(msa.iter_positions(reverse=True)) self.assertEqual( obs, [Sequence('C--', metadata={'pm1': 1.5, 'foo': 99}, positional_metadata={'bar': [np.nan, np.nan, 'bazz'], 'foo': [43, np.nan, -2]}), Sequence('AA-', metadata={'pm1': 0.5, 'foo': 9}, positional_metadata={'bar': [np.nan, np.nan, 'baz'], 'foo': [42, np.nan, -1]})]) def test_with_positional_metadata_ignore_metadata(self): # MSA *and* sequence positional metadata. msa_positional_metadata = {'pm1': [0.5, 1.5], 'foo': [9, 99]} seqs = [ DNA('AC', positional_metadata={'foo': [42, 43]}), DNA('A-'), DNA('--', positional_metadata={'foo': [-1, -2], 'bar': ['baz', 'bazz']})] msa = TabularMSA(seqs, positional_metadata=msa_positional_metadata) obs = list(msa.iter_positions(ignore_metadata=True)) self.assertEqual(obs, [Sequence('AA-'), Sequence('C--')]) class TestConsensus(unittest.TestCase): def test_no_sequences(self): msa = TabularMSA([]) cons = msa.consensus() self.assertEqual(cons, Sequence('')) def test_no_positions(self): msa = TabularMSA([DNA(''), DNA('')]) cons = msa.consensus() self.assertEqual(cons, DNA('')) def test_single_sequence(self): msa = TabularMSA([DNA('ACGT-.')]) cons = msa.consensus() self.assertEqual(cons, DNA('ACGT--')) def test_multiple_sequences(self): msa = TabularMSA([DNA('ACGT'), DNA('AG-.'), DNA('AC-.')]) cons = msa.consensus() self.assertEqual(cons, DNA('AC--')) def test_ties(self): msa = TabularMSA([DNA('A-'), DNA('C-'), DNA('G-')]) cons = msa.consensus() self.assertTrue(cons in [DNA('A-'), DNA('C-'), DNA('G-')]) def test_ties_with_gaps(self): msa = TabularMSA([DNA('-'), DNA('.'), DNA('T'), DNA('T')]) cons = msa.consensus() self.assertTrue(cons in [DNA('T'), DNA('-')]) def test_default_gap_char(self): msa = TabularMSA([DNA('.'), DNA('.'), DNA('.')]) cons = msa.consensus() self.assertEqual(cons, DNA('-')) def test_different_dtype(self): msa = TabularMSA([RNA('---'), RNA('AG-'), RNA('AGG')]) cons = msa.consensus() self.assertEqual(cons, RNA('AG-')) def test_with_positional_metadata(self): # Defining *all* types of metadata to ensure correct metadata is # propagated to majority consensus sequence. seqs = [ DNA('-.-', metadata={'id': 'seq1'}, positional_metadata={'qual': range(0, 3)}), DNA('A.T', metadata={'id': 'seq2'}, positional_metadata={'qual': range(3, 6)}), DNA('ACT', metadata={'id': 'seq3'}, positional_metadata={'qual': range(6, 9)}) ] msa = TabularMSA(seqs, metadata={'pubmed': 123456}, positional_metadata={'foo': [42, 43, 42], 'bar': ['a', 'b', 'c']}) cons = msa.consensus() self.assertEqual( cons, DNA('A-T', positional_metadata={'foo': [42, 43, 42], 'bar': ['a', 'b', 'c']})) def test_mixed_gap_characters_as_majority(self): seqs = [ DNA('A'), DNA('A'), DNA('A'), DNA('A'), DNA('.'), DNA('.'), DNA('.'), DNA('-'), DNA('-') ] msa = TabularMSA(seqs) cons = msa.consensus() self.assertEqual(cons, DNA('-')) class TestConservation(unittest.TestCase): def test_no_sequences(self): msa = TabularMSA([]) cons = msa.conservation() npt.assert_array_equal(cons, np.array([])) def test_shannon_entropy_dna(self): msa = TabularMSA([DNA('A'), DNA('G')]) actual = msa.conservation(metric='inverse_shannon_uncertainty') expected = np.array([1. - scipy.stats.entropy([0.5, 0.5], base=4)]) npt.assert_array_equal(actual, expected) msa = TabularMSA([DNA('A'), DNA('G'), DNA('C'), DNA('G')]) actual = msa.conservation(metric='inverse_shannon_uncertainty') expected = np.array([1. - scipy.stats.entropy([0.5, 0.25, 0.25], base=4)]) npt.assert_array_equal(actual, expected) msa = TabularMSA([DNA('AAC'), DNA('GAC')]) actual = msa.conservation(metric='inverse_shannon_uncertainty') expected = np.array([1. - scipy.stats.entropy([0.5, 0.5], base=4), 1. - scipy.stats.entropy([1.0], base=4), 1. - scipy.stats.entropy([1.0], base=4)]) npt.assert_array_equal(actual, expected) msa = TabularMSA([DNA('AACT'), DNA('GACA')]) actual = msa.conservation(metric='inverse_shannon_uncertainty') expected = np.array([1. - scipy.stats.entropy([0.5, 0.5], base=4), 1. - scipy.stats.entropy([1.0], base=4), 1. - scipy.stats.entropy([1.0], base=4), 1. - scipy.stats.entropy([0.5, 0.5], base=4)]) npt.assert_array_equal(actual, expected) def test_shannon_entropy_rna(self): msa = TabularMSA([RNA('A'), RNA('G')]) actual = msa.conservation(metric='inverse_shannon_uncertainty') expected = np.array([1. - scipy.stats.entropy([0.5, 0.5], base=4)]) npt.assert_array_equal(actual, expected) msa = TabularMSA([RNA('A'), RNA('G'), RNA('C'), RNA('G')]) actual = msa.conservation(metric='inverse_shannon_uncertainty') expected = np.array([1. - scipy.stats.entropy([0.5, 0.25, 0.25], base=4)]) npt.assert_array_equal(actual, expected) msa = TabularMSA([RNA('AAC'), RNA('GAC')]) actual = msa.conservation(metric='inverse_shannon_uncertainty') expected = np.array([1. - scipy.stats.entropy([0.5, 0.5], base=4), 1. - scipy.stats.entropy([1.0], base=4), 1. - scipy.stats.entropy([1.0], base=4)]) npt.assert_array_equal(actual, expected) msa = TabularMSA([RNA('AACU'), RNA('GACA')]) actual = msa.conservation(metric='inverse_shannon_uncertainty') expected = np.array([1. - scipy.stats.entropy([0.5, 0.5], base=4), 1. - scipy.stats.entropy([1.0], base=4), 1. - scipy.stats.entropy([1.0], base=4), 1. - scipy.stats.entropy([0.5, 0.5], base=4)]) npt.assert_array_equal(actual, expected) def test_shannon_entropy_protein(self): msa = TabularMSA([Protein('A'), Protein('G')]) actual = msa.conservation(metric='inverse_shannon_uncertainty') expected = np.array([1. - scipy.stats.entropy([0.5, 0.5], base=22)]) npt.assert_array_equal(actual, expected) msa = TabularMSA([Protein('A'), Protein('G'), Protein('C'), Protein('G')]) actual = msa.conservation(metric='inverse_shannon_uncertainty') expected = np.array([1. - scipy.stats.entropy([0.5, 0.25, 0.25], base=22)]) npt.assert_array_equal(actual, expected) msa = TabularMSA([Protein('AAC'), Protein('GAC')]) actual = msa.conservation(metric='inverse_shannon_uncertainty') expected = np.array([1. - scipy.stats.entropy([0.5, 0.5], base=22), 1. - scipy.stats.entropy([1.0], base=22), 1. - scipy.stats.entropy([1.0], base=22)]) npt.assert_array_equal(actual, expected) msa = TabularMSA([Protein('AACT'), Protein('GACA')]) actual = msa.conservation(metric='inverse_shannon_uncertainty') expected = np.array([1. - scipy.stats.entropy([0.5, 0.5], base=22), 1. - scipy.stats.entropy([1.0], base=22), 1. - scipy.stats.entropy([1.0], base=22), 1. - scipy.stats.entropy([0.5, 0.5], base=22)]) npt.assert_array_equal(actual, expected) def test_degenerate_mode_nan(self): msa = TabularMSA([DNA('NAC'), DNA('NNC')]) actual = msa.conservation(metric='inverse_shannon_uncertainty', degenerate_mode='nan') expected = np.array([np.nan, np.nan, 1. - scipy.stats.entropy([1.0], base=4)]) npt.assert_array_equal(actual, expected) def test_degenerate_mode_error(self): msa = TabularMSA([DNA('NACN'), DNA('NNCA')]) self.assertRaises(ValueError, msa.conservation, metric='inverse_shannon_uncertainty', degenerate_mode='error') msa = TabularMSA([DNA('AACA'), DNA('ANCA')]) self.assertRaises(ValueError, msa.conservation, metric='inverse_shannon_uncertainty', degenerate_mode='error') def test_error_on_degenerate_w_nan_on_gap(self): msa = TabularMSA([DNA('-ACA'), DNA('-NCA')]) self.assertRaises(ValueError, msa.conservation, metric='inverse_shannon_uncertainty', degenerate_mode='error', gap_mode='nan') def test_column_with_degen_and_gap(self): msa = TabularMSA([DNA('N'), DNA('-')]) # test all eight combinations of gap_mode and degenerate_mode actual = msa.conservation(metric='inverse_shannon_uncertainty', degenerate_mode='nan', gap_mode='nan') npt.assert_array_equal(actual, np.array([np.nan])) actual = msa.conservation(metric='inverse_shannon_uncertainty', degenerate_mode='nan', gap_mode='ignore') npt.assert_array_equal(actual, np.array([np.nan])) actual = msa.conservation(metric='inverse_shannon_uncertainty', degenerate_mode='nan', gap_mode='include') npt.assert_array_equal(actual, np.array([np.nan])) self.assertRaises(ValueError, msa.conservation, metric='inverse_shannon_uncertainty', degenerate_mode='nan', gap_mode='error') self.assertRaises(ValueError, msa.conservation, metric='inverse_shannon_uncertainty', degenerate_mode='error', gap_mode='nan') self.assertRaises(ValueError, msa.conservation, metric='inverse_shannon_uncertainty', degenerate_mode='error', gap_mode='error') self.assertRaises(ValueError, msa.conservation, metric='inverse_shannon_uncertainty', degenerate_mode='error', gap_mode='include') self.assertRaises(ValueError, msa.conservation, metric='inverse_shannon_uncertainty', degenerate_mode='error', gap_mode='ignore') def test_gap_mode_nan(self): msa = TabularMSA([DNA('-AC.'), DNA('--CA')]) actual = msa.conservation(metric='inverse_shannon_uncertainty', gap_mode='nan') expected = np.array([np.nan, np.nan, 1. - scipy.stats.entropy([1.0], base=4), np.nan]) npt.assert_array_equal(actual, expected) def test_gap_mode_include(self): msa = TabularMSA([DNA('AC'), DNA('-G')]) actual = msa.conservation(metric='inverse_shannon_uncertainty', gap_mode='include') expected = np.array([1. - scipy.stats.entropy([0.5, 0.5], base=5), 1. - scipy.stats.entropy([0.5, 0.5], base=5)]) npt.assert_array_equal(actual, expected) msa = TabularMSA([DNA('AC'), DNA('.G')]) actual = msa.conservation(metric='inverse_shannon_uncertainty', gap_mode='include') expected = np.array([1. - scipy.stats.entropy([0.5, 0.5], base=5), 1. - scipy.stats.entropy([0.5, 0.5], base=5)]) npt.assert_array_equal(actual, expected) def test_gap_mode_include_gaps_treated_as_single_char(self): msa = TabularMSA([DNA('.'), DNA('-')]) actual = msa.conservation(metric='inverse_shannon_uncertainty', gap_mode='include') expected = np.array([1. - scipy.stats.entropy([1.0], base=5)]) npt.assert_array_equal(actual, expected) def test_gap_mode_ignore(self): msa = TabularMSA([DNA('AC'), DNA('-G')]) actual = msa.conservation(metric='inverse_shannon_uncertainty', gap_mode='ignore') expected = np.array([1. - scipy.stats.entropy([1.0], base=4), 1. - scipy.stats.entropy([0.5, 0.5], base=4)]) npt.assert_array_equal(actual, expected) msa = TabularMSA([DNA('AC'), DNA('.G')]) actual = msa.conservation(metric='inverse_shannon_uncertainty', gap_mode='ignore') expected = np.array([1. - scipy.stats.entropy([1.0], base=4), 1. - scipy.stats.entropy([0.5, 0.5], base=4)]) npt.assert_array_equal(actual, expected) def test_gap_mode_error(self): msa = TabularMSA([DNA('-AC-'), DNA('--CA')]) self.assertRaises(ValueError, msa.conservation, metric='inverse_shannon_uncertainty', gap_mode="error") msa = TabularMSA([DNA('AACA'), DNA('A-CA')]) self.assertRaises(ValueError, msa.conservation, metric='inverse_shannon_uncertainty', gap_mode="error") msa = TabularMSA([DNA('AACA'), DNA('A.CA')]) self.assertRaises(ValueError, msa.conservation, metric='inverse_shannon_uncertainty', gap_mode="error") def test_bad_metric(self): msa = TabularMSA([DNA('AA'), DNA('A-')]) with self.assertRaisesRegex(ValueError, r'xyz'): msa.conservation(metric='xyz') msa = TabularMSA([]) with self.assertRaisesRegex(ValueError, r'xyz'): msa.conservation(metric='xyz') def test_bad_gap_mode(self): msa = TabularMSA([DNA('AA'), DNA('A-')]) with self.assertRaisesRegex(ValueError, r'xyz'): msa.conservation(gap_mode='xyz') msa = TabularMSA([]) with self.assertRaisesRegex(ValueError, r'xyz'): msa.conservation(gap_mode='xyz') def test_bad_degenerate_mode(self): msa = TabularMSA([DNA('AA'), DNA('A-')]) with self.assertRaisesRegex(ValueError, r'xyz'): msa.conservation(degenerate_mode='xyz') msa = TabularMSA([]) with self.assertRaisesRegex(ValueError, r'xyz'): msa.conservation(degenerate_mode='xyz') class TestGapFrequencies(unittest.TestCase): def test_default_behavior(self): msa = TabularMSA([DNA('AA.'), DNA('-A-')]) freqs = msa.gap_frequencies() npt.assert_array_equal(np.array([1, 0, 2]), freqs) def test_invalid_axis_str(self): with self.assertRaisesRegex(ValueError, r"axis.*'foo'"): TabularMSA([]).gap_frequencies(axis='foo') def test_invalid_axis_int(self): with self.assertRaisesRegex(ValueError, r"axis.*2"): TabularMSA([]).gap_frequencies(axis=2) def test_position_axis_str_and_int_equivalent(self): msa = TabularMSA([DNA('ACGT'), DNA('A.G-'), DNA('----')]) str_freqs = msa.gap_frequencies(axis='position') int_freqs = msa.gap_frequencies(axis=1) npt.assert_array_equal(str_freqs, int_freqs) npt.assert_array_equal(np.array([0, 2, 4]), str_freqs) def test_sequence_axis_str_and_int_equivalent(self): msa = TabularMSA([DNA('ACGT'), DNA('A.G-'), DNA('----')]) str_freqs = msa.gap_frequencies(axis='sequence') int_freqs = msa.gap_frequencies(axis=0) npt.assert_array_equal(str_freqs, int_freqs) npt.assert_array_equal(np.array([1, 2, 1, 2]), str_freqs) def test_correct_dtype_absolute_empty(self): msa = TabularMSA([]) freqs = msa.gap_frequencies(axis='position') npt.assert_array_equal(np.array([]), freqs) self.assertEqual(int, freqs.dtype) def test_correct_dtype_relative_empty(self): msa = TabularMSA([]) freqs = msa.gap_frequencies(axis='position', relative=True) npt.assert_array_equal(np.array([]), freqs) self.assertEqual(float, freqs.dtype) def test_correct_dtype_absolute_non_empty(self): msa = TabularMSA([DNA('AC'), DNA('-.')]) freqs = msa.gap_frequencies(axis='position') npt.assert_array_equal(np.array([0, 2]), freqs) self.assertEqual(int, freqs.dtype) def test_correct_dtype_relative_non_empty(self): msa = TabularMSA([DNA('AC'), DNA('-.')]) freqs = msa.gap_frequencies(axis='position', relative=True) npt.assert_array_equal(np.array([0.0, 1.0]), freqs) self.assertEqual(float, freqs.dtype) def test_no_sequences_absolute(self): msa = TabularMSA([]) seq_freqs = msa.gap_frequencies(axis='sequence') pos_freqs = msa.gap_frequencies(axis='position') npt.assert_array_equal(np.array([]), seq_freqs) npt.assert_array_equal(np.array([]), pos_freqs) def test_no_sequences_relative(self): msa = TabularMSA([]) seq_freqs = msa.gap_frequencies(axis='sequence', relative=True) pos_freqs = msa.gap_frequencies(axis='position', relative=True) npt.assert_array_equal(np.array([]), seq_freqs) npt.assert_array_equal(np.array([]), pos_freqs) def test_no_positions_absolute(self): msa = TabularMSA([DNA('')]) seq_freqs = msa.gap_frequencies(axis='sequence') pos_freqs = msa.gap_frequencies(axis='position') npt.assert_array_equal(np.array([]), seq_freqs) npt.assert_array_equal(np.array([0]), pos_freqs) def test_no_positions_relative(self): msa = TabularMSA([DNA('')]) seq_freqs = msa.gap_frequencies(axis='sequence', relative=True) pos_freqs = msa.gap_frequencies(axis='position', relative=True) npt.assert_array_equal(np.array([]), seq_freqs) npt.assert_array_equal(np.array([np.nan]), pos_freqs) def test_single_sequence_absolute(self): msa = TabularMSA([DNA('.T')]) seq_freqs = msa.gap_frequencies(axis='sequence') pos_freqs = msa.gap_frequencies(axis='position') npt.assert_array_equal(np.array([1, 0]), seq_freqs) npt.assert_array_equal(np.array([1]), pos_freqs) def test_single_sequence_relative(self): msa = TabularMSA([DNA('.T')]) seq_freqs = msa.gap_frequencies(axis='sequence', relative=True) pos_freqs = msa.gap_frequencies(axis='position', relative=True) npt.assert_array_equal(np.array([1.0, 0.0]), seq_freqs) npt.assert_array_equal(np.array([0.5]), pos_freqs) def test_single_position_absolute(self): msa = TabularMSA([DNA('.'), DNA('T')]) seq_freqs = msa.gap_frequencies(axis='sequence') pos_freqs = msa.gap_frequencies(axis='position') npt.assert_array_equal(np.array([1]), seq_freqs) npt.assert_array_equal(np.array([1, 0]), pos_freqs) def test_single_position_relative(self): msa = TabularMSA([DNA('.'), DNA('T')]) seq_freqs = msa.gap_frequencies(axis='sequence', relative=True) pos_freqs = msa.gap_frequencies(axis='position', relative=True) npt.assert_array_equal(np.array([0.5]), seq_freqs) npt.assert_array_equal(np.array([1.0, 0.0]), pos_freqs) def test_position_axis_absolute(self): msa = TabularMSA([ DNA('ACGT'), # no gaps DNA('A.G-'), # some gaps (mixed gap chars) DNA('----'), # all gaps DNA('....')]) # all gaps freqs = msa.gap_frequencies(axis='position') npt.assert_array_equal(np.array([0, 2, 4, 4]), freqs) def test_position_axis_relative(self): msa = TabularMSA([DNA('ACGT'), DNA('A.G-'), DNA('CCC.'), DNA('----'), DNA('....')]) freqs = msa.gap_frequencies(axis='position', relative=True) npt.assert_array_equal(np.array([0.0, 0.5, 0.25, 1.0, 1.0]), freqs) def test_sequence_axis_absolute(self): msa = TabularMSA([DNA('AC-.'), DNA('A.-.'), DNA('G--.')]) freqs = msa.gap_frequencies(axis='sequence') npt.assert_array_equal(np.array([0, 2, 3, 3]), freqs) def test_sequence_axis_relative(self): msa = TabularMSA([DNA('AC--.'), DNA('A.A-.'), DNA('G-A-.')]) freqs = msa.gap_frequencies(axis='sequence', relative=True) npt.assert_array_equal(np.array([0.0, 2/3, 1/3, 1.0, 1.0]), freqs) def test_relative_frequencies_precise(self): class CustomSequence(GrammaredSequence): @classproperty @overrides(GrammaredSequence) def gap_chars(cls): return set('0123456789') @classproperty @overrides(GrammaredSequence) def default_gap_char(cls): return '0' @classproperty @overrides(GrammaredSequence) def definite_chars(cls): return set('') @classproperty @overrides(GrammaredSequence) def degenerate_map(cls): return {} msa = TabularMSA([CustomSequence('0123456789')]) freqs = msa.gap_frequencies(axis='position', relative=True) npt.assert_array_equal(np.array([1.0]), freqs) def test_custom_gap_characters(self): class CustomSequence(GrammaredSequence): @classproperty @overrides(GrammaredSequence) def gap_chars(cls): return set('#$*') @classproperty @overrides(GrammaredSequence) def default_gap_char(cls): return '#' @classproperty @overrides(GrammaredSequence) def definite_chars(cls): return set('ABC-.') @classproperty @overrides(GrammaredSequence) def degenerate_map(cls): return {'D': 'ABC-.'} msa = TabularMSA([CustomSequence('ABCD'), CustomSequence('-.-.'), CustomSequence('A#C*'), CustomSequence('####'), CustomSequence('$$$$')]) freqs = msa.gap_frequencies(axis='position') npt.assert_array_equal(np.array([0, 0, 2, 4, 4]), freqs) class TestGetPosition(unittest.TestCase): def test_without_positional_metadata(self): msa = TabularMSA([DNA('ACG'), DNA('A-G')]) position = msa._get_position_(1) self.assertEqual(position, Sequence('C-')) def test_with_positional_metadata(self): msa = TabularMSA([DNA('ACG'), DNA('A-G')], positional_metadata={'foo': [42, 43, 44], 'bar': ['abc', 'def', 'ghi']}) position = msa._get_position_(1) self.assertEqual(position, Sequence('C-', metadata={'foo': 43, 'bar': 'def'})) class TestIsSequenceAxis(unittest.TestCase): def setUp(self): self.msa = TabularMSA([]) def test_invalid_str(self): with self.assertRaisesRegex(ValueError, r"axis.*'foo'"): self.msa._is_sequence_axis('foo') def test_invalid_int(self): with self.assertRaisesRegex(ValueError, r"axis.*2"): self.msa._is_sequence_axis(2) def test_positive_str(self): self.assertTrue(self.msa._is_sequence_axis('sequence')) def test_positive_int(self): self.assertTrue(self.msa._is_sequence_axis(0)) def test_negative_str(self): self.assertFalse(self.msa._is_sequence_axis('position')) def test_negative_int(self): self.assertFalse(self.msa._is_sequence_axis(1)) class TestHashable(unittest.TestCase): def test_unhashable_type(self): self.assertNotIsInstance(TabularMSA([]), collections.abc.Hashable) def test_unhashable_object(self): with self.assertRaisesRegex(TypeError, r'unhashable'): hash(TabularMSA([])) class TestRepr(unittest.TestCase): def test_repr(self): # basic sanity checks -- more extensive testing of formatting and # special cases is performed in TabularMSAReprDoctests below. here we # only test that pieces of the repr are present. these tests also # exercise coverage in case doctests stop counting towards coverage in # the future # str calls repr self.assertEqual(repr(TabularMSA([])), str(TabularMSA([]))) self.assertEqual(repr(TabularMSA([DNA('')])), str(TabularMSA([DNA('')]))) self.assertEqual(repr(TabularMSA([DNA('ACGT')])), str(TabularMSA([DNA('ACGT')]))) self.assertEqual(repr(TabularMSA([DNA('ACGT'*25) for x in range(10)])), str(TabularMSA([DNA('ACGT'*25) for x in range(10)]))) # empty obs = repr(TabularMSA([])) self.assertEqual(obs.count('\n'), 5) self.assertTrue(obs.startswith('TabularMSA')) self.assertIn('sequence count: 0', obs) self.assertIn('position count: 0', obs) # minimal obs = repr(TabularMSA([DNA('')])) self.assertEqual(obs.count('\n'), 5) self.assertTrue(obs.startswith('TabularMSA')) self.assertIn('sequence count: 1', obs) self.assertIn('position count: 0', obs) self.assertIn('[DNA]', obs) # no metadata obs = repr(TabularMSA([DNA('ACGT')])) self.assertEqual(obs.count('\n'), 6) self.assertTrue(obs.startswith('TabularMSA')) self.assertIn('sequence count: 1', obs) self.assertIn('position count: 4', obs) self.assertIn('[DNA]', obs) self.assertTrue(obs.endswith('ACGT')) # sequence spanning > 5 lines obs = repr(TabularMSA([DNA('A' * 71) for x in range(6)])) self.assertEqual(obs.count('\n'), 10) self.assertTrue(obs.startswith('TabularMSA')) self.assertIn('sequence count: 6', obs) self.assertIn('position count: 71', obs) self.assertIn('\n...\n', obs) self.assertIn('[DNA]', obs) self.assertTrue(obs.endswith('AAAA')) # sequences overflowing obs = repr(TabularMSA([DNA('A' * 72)])) self.assertEqual(obs.count('\n'), 6) self.assertTrue(obs.startswith('TabularMSA')) self.assertIn('sequence count: 1', obs) self.assertIn('position count: 72', obs) self.assertIn('[DNA]', obs) self.assertTrue(obs.endswith(' ... ' + 'A'*33)) # NOTE: this must be a *separate* class for doctests only (no unit tests). nose # will not run the unit tests otherwise # TODO: check if this is still the case since nose is no longer used # # these doctests exercise the correct formatting of TabularMSA's repr in a # variety of situations. they are more extensive than the unit tests above # (TestRepr.test_repr) but cannot be relied upon for coverage (the unit tests # take care of this) class TabularMSAReprDoctests: r""" >>> from skbio import DNA, TabularMSA Empty (minimal) MSA: >>> TabularMSA([]) TabularMSA --------------------- Stats: sequence count: 0 position count: 0 --------------------- MSA with single empty sequence: >>> TabularMSA([DNA('')]) TabularMSA[DNA] --------------------- Stats: sequence count: 1 position count: 0 --------------------- MSA with single sequence with single character: >>> TabularMSA([DNA('G')]) TabularMSA[DNA] --------------------- Stats: sequence count: 1 position count: 1 --------------------- G MSA with multicharacter sequence: >>> TabularMSA([DNA('ACGT')]) TabularMSA[DNA] --------------------- Stats: sequence count: 1 position count: 4 --------------------- ACGT Full single line: >>> TabularMSA([DNA('A' * 71)]) TabularMSA[DNA] ----------------------------------------------------------------------- Stats: sequence count: 1 position count: 71 ----------------------------------------------------------------------- AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA Full single line with 1 character overflow: >>> TabularMSA([DNA('A' * 72)]) TabularMSA[DNA] ----------------------------------------------------------------------- Stats: sequence count: 1 position count: 72 ----------------------------------------------------------------------- AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA ... AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA Two sequences with full lines: >>> TabularMSA([DNA('T' * 71), DNA('T' * 71)]) TabularMSA[DNA] ----------------------------------------------------------------------- Stats: sequence count: 2 position count: 71 ----------------------------------------------------------------------- TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT Two sequences with full lines with 1 character overflow: >>> TabularMSA([DNA('T' * 72), DNA('T' * 72)]) TabularMSA[DNA] ----------------------------------------------------------------------- Stats: sequence count: 2 position count: 72 ----------------------------------------------------------------------- TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT ... TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT ... TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT Five full lines (maximum amount of information): >>> TabularMSA([DNA('A' * 71) for x in range(5)]) TabularMSA[DNA] ----------------------------------------------------------------------- Stats: sequence count: 5 position count: 71 ----------------------------------------------------------------------- AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA Six lines starts "summarized" output: >>> TabularMSA([DNA('A' * 71) for x in range(6)]) TabularMSA[DNA] ----------------------------------------------------------------------- Stats: sequence count: 6 position count: 71 ----------------------------------------------------------------------- AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA ... AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA Supply horrendous metadata and positional metadata to exercise a variety of metadata formatting cases and rules. Sorting should be by type, then by value within each type (Python 3 doesn't allow sorting of mixed types): >>> metadata = { ... # str key, str value ... 'abc': 'some description', ... # int value ... 'foo': 42, ... # unsupported type (dict) value ... 'bar': {}, ... # int key, wrapped str (single line) ... 42: 'some words to test text wrapping and such... yada yada yada ' ... 'yada yada yada yada yada.', ... # bool key, wrapped str (multi-line) ... True: 'abc ' * 34, ... # float key, truncated str (too long) ... 42.5: 'abc ' * 200, ... # unsupported type (tuple) key, unsupported type (list) value ... ('foo', 'bar'): [1, 2, 3], ... # bytes key, single long word that wraps ... b'long word': 'abc' * 30, ... # truncated key (too long), None value ... 'too long of a key name to display in repr': None, ... # wrapped bytes value (has b'' prefix) ... 'bytes wrapped value': b'abcd' * 25, ... # float value ... 0.1: 99.9999, ... # bool value ... 43: False, ... # None key, complex value ... None: complex(-1.0, 0.0), ... # nested quotes ... 10: '"\'' ... } >>> positional_metadata = pd.DataFrame({ ... # str key, int list value ... 'foo': [1, 2, 3, 4], ... # float key, float list value ... 42.5: [2.5, 3.0, 4.2, -0.00001], ... # int key, object list value ... 42: [[], 4, 5, {}], ... # truncated key (too long), bool list value ... 'abc' * 90: [True, False, False, True], ... # None key ... None: range(4)}) >>> positional_metadata = positional_metadata.reindex( ... columns=['foo', 42.5, 42, 'abc' * 90, None]) >>> TabularMSA([DNA('ACGT')], metadata=metadata, ... positional_metadata=positional_metadata) TabularMSA[DNA] ----------------------------------------------------------------------- Metadata: None: (-1+0j) True: 'abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc ' b'long word': 'abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabca bcabcabcabcabcabcabcabcabcabcabcabcabc' 0.1: 99.9999 42.5: 10: '"\'' 42: 'some words to test text wrapping and such... yada yada yada yada yada yada yada yada.' 43: False 'abc': 'some description' 'bar': 'bytes wrapped value': b'abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdab cdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd abcdabcdabcdabcd' 'foo': 42 : None : Positional metadata: 'foo': 42.5: 42: : None: Stats: sequence count: 1 position count: 4 ----------------------------------------------------------------------- ACGT """ pass if __name__ == "__main__": unittest.main() scikit-bio-0.6.2/skbio/diversity/000077500000000000000000000000001464262511300167115ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/diversity/__init__.py000066400000000000000000000431121464262511300210230ustar00rootroot00000000000000r"""Community Diversity (:mod:`skbio.diversity`) ============================================ .. currentmodule:: skbio.diversity This module provides functionality for analyzing biodiversity of communities -- groups of organisms living in the same area. It implements various metrics of alpha (within-community) and beta (between-community) diversity, and provides "driver functions" for computing alpha and beta diversity for an entire data table. Additional utilities are provided to support discovery of available diversity metrics. While diversity metrics were originally designed to study biological communities, they can be generalized to the analysis of various biological data types. Alpha diversity --------------- .. rubric:: Alpha diversity metrics .. autosummary:: :toctree: generated/ alpha get_alpha_diversity_metrics .. rubric:: Driver function .. autosummary:: :toctree: generated/ alpha_diversity Beta diversity -------------- .. rubric:: Beta diversity metrics .. autosummary:: :toctree: generated/ beta get_beta_diversity_metrics .. rubric:: Driver functions .. autosummary:: :toctree: generated/ beta_diversity partial_beta_diversity block_beta_diversity Introduction ------------ A community (i.e., sample) is represented by a vector of frequencies of taxa within the sample. The term "taxon" (plural: "taxa") describes a group of biologically related organisms that constitute a unit in the community. Taxa are usually defined at a uniform taxonomic rank, such as species, genus or family. In community ecology, taxon is usually referred to as "species" (singular = plural), but its definition is not limited to species as a taxonomic rank. The term "taxonomic group" is a synonym of taxon in many situations. In scikit-bio, the term "taxon/taxa" is used very loosely, as these can in practice represent diverse feature types including organisms, genes, and metabolites. The term "sample" is also loosely defined for these purposes. These are intended to represent a single unit of sampling, and as such what a single sample represents can vary widely. For example, in a microbiome survey, these could represent all 16S rRNA gene sequences from a single oral swab. In a comparative genomics study on the other hand, a sample could represent an individual organism's genome. .. note:: Previous versions of scikit-bio referred to taxon as operational taxonomic unit (OTU), a historically important term in microbiome research. However, as the field advances and the research targets diverge (e.g., amplicon sequence variant, or ASV), a more generic term such as "taxon" becomes more appropriate. Therefore, the term OTU was replaced by taxon in scikit-bio 0.6.0. Each frequency in a given vector represents the number of individuals observed for a particular taxon. We will refer to the frequencies associated with a single sample as a *counts vector* or ``counts`` throughout the documentation. Counts vectors are `array_like`: anything that can be converted into a 1-D numpy array is acceptable input. For example, you can provide a numpy array or a native Python list and the results will be identical. The driver functions :func:`alpha_diversity` and :func:`beta_diversity` are designed to compute alpha diversity for one or more samples, or beta diversity for one or more pairs of samples. The driver functions accept a matrix containing vectors of frequencies of taxa within each sample. Each row in the matrix represents a single sample's count vector, so that rows represent samples and columns represent taxa. Some diversity metrics incorporate relationships between the taxa in their computation through reference to a phylogenetic tree. These metrics additionally take a :class:`skbio.TreeNode` object and a list of taxa mapping the values in the counts vector to tips in the tree. The driver functions are optimized so that computing a diversity metric more than one time (i.e., for more than one sample for alpha diversity metrics, or more than one pair of samples for beta diversity metrics) is often much faster than repeated calls to the metric. For this reason, the driver functions take matrices of counts vectors rather than a single counts vector for alpha diversity metrics or two counts vectors for beta diversity metrics. The ``alpha_diversity`` driver function will thus compute alpha diversity for all counts vectors in the matrix, and the ``beta_diversity`` driver function will compute beta diversity for all pairs of counts vectors in the matrix. Input validation ^^^^^^^^^^^^^^^^ The driver functions perform validation of input by default. Validation can be slow so it is possible to disable this step by passing ``validate=False``. This can be dangerous however. If invalid input is encountered when validation is disabled it can result in difficult-to-interpret error messages or incorrect results. We therefore recommend that users are careful to ensure that their input data is valid before disabling validation. The conditions that the driver functions validate follow. If disabling validation, users should be confident that these conditions are met. * the data in the counts vectors can be safely cast to integers * there are no negative values in the counts vectors * each counts vector is one dimensional * the counts matrix is two dimensional * all counts vectors are of equal length Additionally, if a phylogenetic diversity metric is being computed, the following conditions are also confirmed: * the provided taxa are all unique * the length of each counts vector is equal to the number of taxa * the provided tree is rooted * the tree has more than one node * all nodes in the provided tree except for the root node have branch lengths * all tip names in the provided tree are unique * all provided taxa correspond to tip names in the provided tree Count vectors ^^^^^^^^^^^^^ There are different ways that count vectors are represented in the ecological literature and in related software. The diversity measures provided here *always* assume that the input contains abundance data: each count represents the number of individuals observed for a particular taxon in the sample. For example, if you have two taxa, where three individuals were observed from the first taxon and only a single individual was observed from the second taxon, you could represent this data in the following forms (among others). As a vector of counts. This is the expected type of input for the diversity measures in this module. There are 3 individuals from the taxon at index 0, and 1 individual from the taxon at index 1: >>> counts = [3, 1] As a vector of indices. The taxon at index 0 is observed 3 times, while the taxon at index 1 is observed 1 time: >>> indices = [0, 0, 0, 1] As a vector of frequencies. We have 1 taxon that is a singleton and 1 taxon that is a tripleton. We do not have any 0-tons or doubletons: >>> frequencies = [0, 1, 0, 1] Always use the first representation (a counts vector) with this module. Specifying a diversity metric ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The driver functions take a parameter, ``metric``, that specifies which diversity metric should be applied. The value that you provide for ``metric`` can be either a string (e.g., ``"faith_pd"``) or a function (e.g., ``skbio.diversity.alpha.faith_pd``). The metric should generally be passed as a string, as this often uses an optimized version of the metric. For example, passing ``metric="faith_pd"`` (a string) to ``alpha_diversity`` will be tens of times faster than passing ``metric=skbio.diversity.alpha.faith_pd`` (a function) when computing Faith's PD on about 100 samples. Similarly, passing ``metric="unweighted_unifrac"`` (a string) will be hundreds of times faster than passing ``metric=skbio.diversity.beta.unweighted_unifrac`` (a function) when computing unweighted UniFrac on about 100 samples. The latter may be faster if computing only one alpha or beta diversity value, but in these cases the run times will likely be so small that the difference will be negligible. **We therefore recommend that you always pass the metric as a string when possible.** Passing a metric as a string will not be possible if the metric you'd like to run is not one that scikit-bio knows about. This might be the case, for example, if you're applying a custom metric that you've developed. To discover the metric names that scikit-bio knows about as strings that can be passed as ``metric`` to ``alpha_diversity`` or ``beta_diversity``, you can call ``get_alpha_diversity_metrics`` or ``get_beta_diversity_metrics``, respectively. These functions return lists of alpha and beta diversity metrics that are implemented in scikit-bio. There may be additional metrics that can be passed as strings which won't be listed here, such as those implemented in ``scipy.spatial.distance.pdist``. Tutorial -------- Create a matrix containing 6 samples (rows) and 7 taxa (columns): .. plot:: :context: >>> data = [[23, 64, 14, 0, 0, 3, 1], ... [0, 3, 35, 42, 0, 12, 1], ... [0, 5, 5, 0, 40, 40, 0], ... [44, 35, 9, 0, 1, 0, 0], ... [0, 2, 8, 0, 35, 45, 1], ... [0, 0, 25, 35, 0, 19, 0]] >>> ids = list('ABCDEF') First, we'll compute :math:`S_{obs}`, an alpha diversity metric, for each sample using the ``alpha_diversity`` driver function: >>> from skbio.diversity import alpha_diversity >>> adiv_sobs = alpha_diversity('sobs', data, ids) >>> adiv_sobs # doctest: +ELLIPSIS A 5 B 5 C 4 D 4 E 5 F 3 dtype: ... Next we'll compute Faith's PD on the same samples. Since this is a phylogenetic diversity metric, we'll first create a tree and an ordered list of taxa. >>> from skbio import TreeNode >>> from io import StringIO >>> tree = TreeNode.read(StringIO( ... '(((((U1:0.5,U2:0.5):0.5,U3:1.0):1.0):0.0,' ... '(U4:0.75,(U5:0.5,(U6:0.5,U7:0.5):0.5):' ... '0.5):1.25):0.0)root;')) >>> taxa = ['U1', 'U2', 'U3', 'U4', 'U5', 'U6', 'U7'] >>> adiv_faith_pd = alpha_diversity('faith_pd', data, ids=ids, ... taxa=taxa, tree=tree) >>> adiv_faith_pd A 6.75 B 7.00 C 6.25 D 5.75 E 6.75 F 5.50 dtype: float64 Now we'll compute Bray-Curtis distances, a beta diversity metric, between all pairs of samples. Notice that the ``data`` and ``ids`` parameters provided to ``beta_diversity`` are the same as those provided to ``alpha_diversity``. >>> from skbio.diversity import beta_diversity >>> bc_dm = beta_diversity("braycurtis", data, ids) >>> print(bc_dm) 6x6 distance matrix IDs: 'A', 'B', 'C', 'D', 'E', 'F' Data: [[ 0. 0.78787879 0.86666667 0.30927835 0.85714286 0.81521739] [ 0.78787879 0. 0.78142077 0.86813187 0.75 0.1627907 ] [ 0.86666667 0.78142077 0. 0.87709497 0.09392265 0.71597633] [ 0.30927835 0.86813187 0.87709497 0. 0.87777778 0.89285714] [ 0.85714286 0.75 0.09392265 0.87777778 0. 0.68235294] [ 0.81521739 0.1627907 0.71597633 0.89285714 0.68235294 0. ]] Next, we'll compute weighted UniFrac distances between all pairs of samples. Because weighted UniFrac is a phylogenetic beta diversity metric, we'll need to pass the ``skbio.TreeNode`` and list of taxa that we created above. Again, these are the same values that were provided to ``alpha_diversity``. >>> wu_dm = beta_diversity("weighted_unifrac", data, ids, tree=tree, ... taxa=taxa) >>> print(wu_dm) 6x6 distance matrix IDs: 'A', 'B', 'C', 'D', 'E', 'F' Data: [[ 0. 2.77549923 3.82857143 0.42512039 3.8547619 3.10937312] [ 2.77549923 0. 2.26433692 2.98435423 2.24270353 0.46774194] [ 3.82857143 2.26433692 0. 3.95224719 0.16025641 1.86111111] [ 0.42512039 2.98435423 3.95224719 0. 3.98796148 3.30870431] [ 3.8547619 2.24270353 0.16025641 3.98796148 0. 1.82967033] [ 3.10937312 0.46774194 1.86111111 3.30870431 1.82967033 0. ]] Next we'll do some work with these beta diversity distance matrices. First, we'll determine if the UniFrac and Bray-Curtis distance matrices are significantly correlated by computing the Mantel correlation between them. Then we'll determine if the p-value is significant based on an alpha of 0.05. >>> from skbio.stats.distance import mantel >>> r, p_value, n = mantel(wu_dm, bc_dm) >>> print(r) 0.922404392093 >>> alpha = 0.05 >>> print(p_value < alpha) True Next, we'll perform principal coordinates analysis (PCoA) on our weighted UniFrac distance matrix. >>> from skbio.stats.ordination import pcoa >>> wu_pc = pcoa(wu_dm) PCoA plots are only really interesting in the context of sample metadata, so let's define some before we visualize these results. >>> import pandas as pd >>> sample_md = pd.DataFrame([ ... ['gut', 's1'], ... ['skin', 's1'], ... ['tongue', 's1'], ... ['gut', 's2'], ... ['tongue', 's2'], ... ['skin', 's2']], ... index=['A', 'B', 'C', 'D', 'E', 'F'], ... columns=['body_site', 'subject']) >>> sample_md body_site subject A gut s1 B skin s1 C tongue s1 D gut s2 E tongue s2 F skin s2 Now let's plot our PCoA results, coloring each sample by the subject it was taken from: >>> fig = wu_pc.plot( ... sample_md, 'subject', ... axis_labels=('PC 1', 'PC 2', 'PC 3'), ... title='Samples colored by subject', ... cmap='jet', s=50 ... ) # doctest: +SKIP .. plot:: :context: We don't see any clustering/grouping of samples. If we were to instead color the samples by the body site they were taken from, we see that the samples from the same body site (those that are colored the same) appear to be closer to one another in the 3-D space then they are to samples from other body sites. >>> fig = wu_pc.plot( ... sample_md, 'body_site', ... axis_labels=('PC 1', 'PC 2', 'PC 3'), ... title='Samples colored by body site', ... cmap='jet', s=50 ... ) # doctest: +SKIP .. plot:: :context: Ordination techniques, such as PCoA, are useful for exploratory analysis. The next step is to quantify the strength of the grouping/clustering that we see in ordination plots. There are many statistical methods available to accomplish this; many operate on distance matrices. Let's use ANOSIM to quantify the strength of the clustering we see in the ordination plots above, using our weighted UniFrac distance matrix and sample metadata. First test the grouping of samples by subject: >>> from skbio.stats.distance import anosim >>> results = anosim(wu_dm, sample_md, column='subject', permutations=999) >>> results['test statistic'] -0.33333333333333331 >>> results['p-value'] < 0.1 False The negative value of ANOSIM's R statistic indicates anti-clustering and the p-value is insignificant at an alpha of 0.1. Now let's test the grouping of samples by body site: >>> results = anosim(wu_dm, sample_md, column='body_site', permutations=999) >>> results['test statistic'] 1.0 >>> results['p-value'] < 0.1 True The R statistic indicates strong separation of samples based on body site. The p-value is significant at an alpha of 0.1. We can also explore the alpha diversity in the context of sample metadata. To do this, let's add the observed richness and Faith's PD metrics to our sample metadata. This is straight-forward because ``alpha_diversity`` returns a Pandas ``Series`` object, and we're representing our sample metadata in a Pandas ``DataFrame`` object. >>> sample_md['Obs. richness'] = adiv_sobs >>> sample_md['Faith PD'] = adiv_faith_pd >>> sample_md body_site subject Obs. richness Faith PD A gut s1 5 6.75 B skin s1 5 7.00 C tongue s1 4 6.25 D gut s2 4 5.75 E tongue s2 5 6.75 F skin s2 3 5.50 We can investigate these alpha diversity data in the context of our metadata categories. For example, we can generate boxplots showing Faith PD by body site. >>> fig = sample_md.boxplot(column='Faith PD', by='body_site') # doctest: +SKIP We can also compute Spearman correlations between all pairs of columns in this ``DataFrame``. Since our alpha diversity metrics are the only two numeric columns (and thus the only columns for which Spearman correlation is relevant), this will give us a symmetric 2x2 correlation matrix. >>> sample_md.corr(method="spearman", numeric_only=True) Obs. richness Faith PD Obs. richness 1.000000 0.939336 Faith PD 0.939336 1.000000 """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from ._driver import ( alpha_diversity, beta_diversity, partial_beta_diversity, get_alpha_diversity_metrics, get_beta_diversity_metrics, ) from ._block import block_beta_diversity __all__ = [ "alpha_diversity", "beta_diversity", "get_alpha_diversity_metrics", "get_beta_diversity_metrics", "partial_beta_diversity", "block_beta_diversity", ] scikit-bio-0.6.2/skbio/diversity/_block.py000066400000000000000000000245471464262511300205300ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np from skbio.diversity._driver import partial_beta_diversity from skbio.stats.distance import DistanceMatrix from skbio.diversity._util import _validate_counts_matrix def _generate_id_blocks(ids, k=64): """Generate blocks of IDs that map into a DistanceMatrix. Parameters ---------- ids : Iterable object An iterable of IDs of whatever type. k : int, optional The size of a block to generate IDs for, defaults to 64. Notes ----- This method is intended to facilitate partial beta diversity calculations. Blocks of IDs are generated from the upper triangle of the subsequent distance matrix. For instance, given the following distance matrix with IDs {A, B, C, D, E}: A B C D E A 0 # # # # B # 0 # # # C # # 0 # # D # # # 0 # E # # # # 0 The goal of this method is to generate tuples of IDs of at most size k over the upper triangle which correspond to blocks of the matrix to compute. IDs are remapped as well into integers to facilitate downstream indexing. Given k=3, the following ID tuples would be generated: ((0, 1, 2), (0, 1, 2)) ((0, 1, 2), (3, 4)) ((3, 4), (3, 4)) This method is not responsible for describing which specific pairs of IDs are to be computed, only the subset of the matrix of interest. Returns ------- tuple of 1D np.array Index 0 contains the row IDs, and index 1 contains the column IDs """ n = len(ids) ids_idx = np.arange(n) for row_start in range(0, n, k): for col_start in range(row_start, n, k): row_ids = ids_idx[row_start : row_start + k] col_ids = ids_idx[col_start : col_start + k] yield (row_ids, col_ids) def _block_party(counts=None, row_ids=None, col_ids=None, **kwargs): """Subset counts to relevant rows and columns. Parameters ---------- counts : 2D array_like of ints or floats Matrix containing count/abundance data where each row contains counts of taxa in a given sample. row_ids : 1D np.ndarray of int Block row IDs to keep in the counts matrix. col_ids : 1D np.ndarray of int Block column IDs to keep in the counts matrix. Note, these correspond to rows in the counts matrix, but columns in a subsequent distance matrix. kwargs : dict Keyword arguments containing information about the block to compute. Returns ------- dict kwargs that describe the block to compute. A filtered ``counts`` matrix is stored in kwargs. If applicable, a filtered ``tree`` and ``taxa`` are also stored. """ ids_to_keep = np.unique(np.hstack([row_ids, col_ids])) # create a view of the relevant samples counts_block = counts[ids_to_keep] # remove from the block any empty observations # NOTE: this will perform an implicit copy nonzero_cols = (counts_block != 0).any(axis=0) counts_block = counts_block[:, nonzero_cols] kwargs["counts"] = counts_block kwargs["ids"] = ids_to_keep if "tree" in kwargs and "taxa" in kwargs: kwargs["taxa"] = np.asarray(kwargs["taxa"])[nonzero_cols] kwargs["tree"] = kwargs["tree"].shear(kwargs["taxa"]) return kwargs def _pairs_to_compute(rids, cids): """Determine the pairs of samples to compute distances between. Parameters ---------- rids : Iterable The row IDs in the partial pairwise computation. cids : Iterable The column IDs in the partial pairwise computation. Raises ------ ValueError When determining ID pairs for blocks that fall outside of the diagonal of the resulting distance matrix, if a pair corresponds to the lower triangle, complain loudly. Returns ------- list of tuple The ID pairs to compute distances between. """ # if identical, gather the upper triangle if len(rids) == len(cids) and (rids == cids).all(): return [(i, j) for idx, i in enumerate(rids) for j in rids[idx + 1 :]] # otherwise, grab pairwise combinations disregarding the diagonal else: if set(rids).intersection(set(cids)): raise ValueError("Attempting to compute a lower triangle") return [(i, j) for i in rids for j in cids if i != j] def _block_kwargs(**kwargs): """Construct arguments describing a block to compute. Returns ------- dict The parameters for the block of the distance matrix to compute. """ valid_block_keys = { "counts", "ids", "tree", "taxa", "metric", "id_pairs", "validate", "otu_ids", } for row_ids, col_ids in _generate_id_blocks(kwargs["ids"], kwargs["k"]): id_pairs = _pairs_to_compute(row_ids, col_ids) if id_pairs: kw = {k: v for k, v in kwargs.items() if k in valid_block_keys} kw["id_pairs"] = id_pairs kw["row_ids"] = row_ids kw["col_ids"] = col_ids yield kw def _block_compute(**kwargs): """Compute a block within the resulting distance matrix. Notes ----- This method encapsulates the two expensive operations to perform for each block, namely, the "shearing" of the phylogenetic tree to correspond to only the taxa of interest, and the actual beta diversity calculations. Returns ------- DistanceMatrix """ block_kw = _block_party(**kwargs) return partial_beta_diversity(**block_kw) def _map(func, kw_gen): """Map a function over arguments. Notes ----- builtin map does not allow for mapping with kwargs. Parallel uses of block decomposition will likely replace this method with one which can distribute compute. """ for kwargs in kw_gen: yield func(**kwargs) def _reduce(blocks): """Reduce an iterable of partial distance matrices into a full matrix. Note, the reduce doesn't actually care about what pairs are computed so if a distance between pairs exists multiple times, it'll get added. as such, this reduction is only safe to perform if by the block_beta_diversity method which assures that distances are not computed multiple times. """ all_blocks = list(blocks) # Determine the maximum integer ID observed in the blocks. There exists a # 1-1 mapping between the integer ID and a sample ID. We increment by 1 # as the integer ID space begins with zero, and we'll be using this value # to determine the size of the resulting full distance matrix. n_ids = max(map(lambda x: max(x.ids), all_blocks)) + 1 mat = np.zeros((n_ids, n_ids), dtype=float) # TODO: something smarter. for block in all_blocks: n_blk_ids = len(block.ids) # get the corresponding coordinates in the master matrix master_idx = [ (i, j) for row, i in enumerate(block.ids) for j in block.ids[row + 1 :] ] # get the corresponding coordinates within the current block block_idx = [ (i, j) for row, i in enumerate(range(n_blk_ids)) for j in range(row + 1, n_blk_ids) ] for (m_i, m_j), (b_i, b_j) in zip(master_idx, block_idx): mat[m_i, m_j] += block.data[b_i, b_j] return DistanceMatrix(mat + mat.T, list(range(n_ids))) def block_beta_diversity( metric, counts, ids, validate=True, k=64, reduce_f=None, map_f=None, **kwargs ): """Perform a block-decomposition beta diversity calculation. Parameters ---------- metric : str or callable The pairwise distance function to apply. If ``metric`` is a string, it must be resolvable by scikit-bio (e.g., UniFrac methods), or must be callable. counts : 2D array_like of ints or floats Matrix containing count/abundance data where each row contains counts of taxa in a given sample. ids : iterable of strs Identifiers for each sample in ``counts``. validate : bool, optional See ``skbio.diversity.beta_diversity`` for details. reduce_f : function, optional A method to reduce `PartialDistanceMatrix` objects into a single `DistanceMatrix`. The expected signature is: `f(Iterable of DistanceMatrix) -> DistanceMatrix` Note, this is the reduce within a map/reduce. map_f: function, optional A method that accepts a `_block_compute`. The expected signature is: `f(**kwargs) -> DistanceMatrix` NOTE: ipyparallel's `map_async` will not work here as we need to be able to pass around `**kwargs``. k : int, optional The blocksize used when computing distances kwargs : kwargs, optional Metric-specific parameters. Returns ------- DistanceMatrix A distance matrix relating all samples represented by counts to each other. Notes ----- This method is designed to facilitate computing beta diversity in parallel. In general, if you are processing a few hundred samples or less, then it is likely the case that `skbio.diversity.beta_diversity` will be faster. The original need which motivated the development of this method was processing the Earth Microbiome Project [1]_ dataset which at the time spanned over 25,000 samples and 7.5 million open reference taxa. See Also -------- skbio.diversity.beta_diversity skbio.diversity.partial_beta_diversity References ---------- .. [1] http://www.earthmicrobiome.org/ """ if validate: counts = _validate_counts_matrix(counts, ids=ids) if reduce_f is None: reduce_f = _reduce if map_f is None: map_f = _map # The block method uses numeric IDs to take advantage of fancy indexing # with numpy. tmp_ids = np.arange(len(counts)) kwargs["ids"] = tmp_ids kwargs["metric"] = metric kwargs["counts"] = counts kwargs["k"] = k kwargs["validate"] = False # we've already validated if necessary dm = reduce_f(map_f(_block_compute, _block_kwargs(**kwargs))) dm.ids = ids return dm scikit-bio-0.6.2/skbio/diversity/_driver.py000066400000000000000000000436531464262511300207300ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import functools import itertools from warnings import warn import numpy as np import scipy.spatial.distance import pandas as pd import skbio from skbio.diversity.alpha._pd import _faith_pd, _phydiv, _setup_pd from skbio.diversity.beta._unifrac import ( _setup_multiple_unweighted_unifrac, _setup_multiple_weighted_unifrac, _normalize_weighted_unifrac_by_default, ) from skbio.stats.distance import DistanceMatrix from skbio.diversity._util import ( _validate_counts_matrix, _get_phylogenetic_kwargs, _quantitative_to_qualitative_counts, _table_to_numpy, _validate_table, ) from skbio.util._warning import _warn_deprecated def _get_alpha_diversity_metric_map(): return { "ace": skbio.diversity.alpha.ace, "chao1": skbio.diversity.alpha.chao1, "chao1_ci": skbio.diversity.alpha.chao1_ci, "berger_parker_d": skbio.diversity.alpha.berger_parker_d, "brillouin_d": skbio.diversity.alpha.brillouin_d, "dominance": skbio.diversity.alpha.dominance, "doubles": skbio.diversity.alpha.doubles, "enspie": skbio.diversity.alpha.enspie, "esty_ci": skbio.diversity.alpha.esty_ci, "faith_pd": skbio.diversity.alpha.faith_pd, "fisher_alpha": skbio.diversity.alpha.fisher_alpha, "gini_index": skbio.diversity.alpha.gini_index, "goods_coverage": skbio.diversity.alpha.goods_coverage, "inv_simpson": skbio.diversity.alpha.inv_simpson, "hill": skbio.diversity.alpha.hill, "heip_e": skbio.diversity.alpha.heip_e, "kempton_taylor_q": skbio.diversity.alpha.kempton_taylor_q, "lladser_ci": skbio.diversity.alpha.lladser_ci, "lladser_pe": skbio.diversity.alpha.lladser_pe, "margalef": skbio.diversity.alpha.margalef, "mcintosh_d": skbio.diversity.alpha.mcintosh_d, "mcintosh_e": skbio.diversity.alpha.mcintosh_e, "menhinick": skbio.diversity.alpha.menhinick, "michaelis_menten_fit": skbio.diversity.alpha.michaelis_menten_fit, "observed_features": skbio.diversity.alpha.observed_features, "observed_otus": skbio.diversity.alpha.observed_otus, "osd": skbio.diversity.alpha.osd, "phydiv": skbio.diversity.alpha.phydiv, "pielou_e": skbio.diversity.alpha.pielou_e, "renyi": skbio.diversity.alpha.renyi, "robbins": skbio.diversity.alpha.robbins, "shannon": skbio.diversity.alpha.shannon, "simpson": skbio.diversity.alpha.simpson, "simpson_d": skbio.diversity.alpha.simpson_d, "simpson_e": skbio.diversity.alpha.simpson_e, "singles": skbio.diversity.alpha.singles, "sobs": skbio.diversity.alpha.sobs, "strong": skbio.diversity.alpha.strong, "tsallis": skbio.diversity.alpha.tsallis, } def get_alpha_diversity_metrics(): """List scikit-bio's alpha diversity metrics. The alpha diversity metrics listed here can be passed as metrics to ``skbio.diversity.alpha_diversity``. Returns ------- list of str Alphabetically sorted list of alpha diversity metrics implemented in scikit-bio. See Also -------- alpha_diversity get_beta_diversity_metrics """ metrics = _get_alpha_diversity_metric_map() return sorted(metrics.keys()) def get_beta_diversity_metrics(): """List scikit-bio's beta diversity metrics. The beta diversity metrics listed here can be passed as metrics to ``skbio.diversity.beta_diversity``. Returns ------- list of str Alphabetically sorted list of beta diversity metrics implemented in scikit-bio. See Also -------- beta_diversity get_alpha_diversity_metrics scipy.spatial.distance.pdist Notes ----- SciPy implements many additional beta diversity metrics that are not included in this list. See documentation for ``scipy.spatial.distance.pdist`` for more details. """ return sorted(["unweighted_unifrac", "weighted_unifrac"]) def alpha_diversity(metric, counts, ids=None, validate=True, **kwargs): """Compute alpha diversity for one or more samples. Parameters ---------- metric : str, callable The alpha diversity metric to apply to the sample(s). Passing metric as a string is preferable as this often results in an optimized version of the metric being used. counts : 1D or 2D array_like of ints or floats, Table Vector or matrix containing count/abundance data. If a matrix, each row should contain counts of taxa in a given sample. ids : iterable of strs, optional Identifiers for each sample in ``counts``. By default, samples will be assigned integer identifiers in the order that they were provided. validate: bool, optional If ``False``, validation of the input won't be performed. This step can be slow, so if validation is run elsewhere it can be disabled here. However, invalid input data can lead to invalid results or error messages that are hard to interpret, so this step should not be bypassed if you're not certain that your input data are valid. See :mod:`skbio.diversity` for the description of what validation entails so you can determine if you can safely disable validation. kwargs : kwargs, optional Metric-specific parameters. Returns ------- pd.Series Values of ``metric`` for all vectors provided in ``counts``. The index will be ``ids``, if provided. Raises ------ ValueError, MissingNodeError, DuplicateNodeError If validation fails. Exact error will depend on what was invalid. TypeError If invalid method-specific parameters are provided. See Also -------- skbio.diversity skbio.diversity.alpha skbio.diversity.get_alpha_diversity_metrics skbio.diversity.beta_diversity """ if isinstance(counts, skbio.Table): counts, ids = _validate_table(counts, ids, kwargs) metric_map = _get_alpha_diversity_metric_map() if validate: counts = _validate_counts_matrix(counts, ids=ids) if metric == "faith_pd": taxa, tree, kwargs = _get_phylogenetic_kwargs(counts, **kwargs) counts_by_node, branch_lengths = _setup_pd( counts, taxa, tree, validate, rooted=True, single_sample=False ) counts = counts_by_node metric = functools.partial(_faith_pd, branch_lengths=branch_lengths, **kwargs) elif metric == "phydiv": taxa, tree, kwargs = _get_phylogenetic_kwargs(counts, **kwargs) counts_by_node, branch_lengths = _setup_pd( counts, taxa, tree, validate, rooted=False, single_sample=False ) counts = counts_by_node if "rooted" not in kwargs: kwargs["rooted"] = len(tree.root().children) == 2 if "weight" not in kwargs: kwargs["weight"] = False metric = functools.partial(_phydiv, branch_lengths=branch_lengths, **kwargs) elif callable(metric): metric = functools.partial(metric, **kwargs) elif metric in metric_map: metric = functools.partial(metric_map[metric], **kwargs) else: raise ValueError("Unknown metric provided: %r." % metric) # kwargs is provided here so an error is raised on extra kwargs results = [metric(c, **kwargs) for c in counts] return pd.Series(results, index=ids) def partial_beta_diversity(metric, counts, ids, id_pairs, validate=True, **kwargs): """Compute distances only between specified ID pairs. Parameters ---------- metric : str or callable The pairwise distance function to apply. If ``metric`` is a string, it must be resolvable by scikit-bio (e.g., UniFrac methods), or must be callable. counts : 2D array_like of ints or floats Matrix containing count/abundance data where each row contains counts of taxa in a given sample. ids : iterable of strs Identifiers for each sample in ``counts``. id_pairs : iterable of tuple An iterable of tuples of IDs to compare (e.g., ``[('a', 'b'), ('a', 'c'), ...])``. If specified, the set of IDs described must be a subset of ``ids``. validate : bool, optional See ``skbio.diversity.beta_diversity`` for details. kwargs : kwargs, optional Metric-specific parameters. Returns ------- skbio.DistanceMatrix Distances between pairs of samples indicated by id_pairs. Pairwise distances not defined by id_pairs will be 0.0. Use this resulting DistanceMatrix with caution as 0.0 is a valid distance. Warnings -------- ``partial_beta_diversity`` is deprecated as of ``0.5.0``. The return type is unstable. Developer caution is advised. The resulting DistanceMatrix object will include zeros when distance has not been calculated, and therefore can be misleading. Raises ------ ValueError If ``ids`` are not specified. If ``id_pairs`` are not a subset of ``ids``. If ``metric`` is not a callable or is unresolvable string by scikit-bio. If duplicates are observed in ``id_pairs``. See Also -------- skbio.diversity.beta_diversity skbio.diversity.get_beta_diversity_metrics """ # @deprecated _warn_deprecated( partial_beta_diversity, "0.5.0", msg="The return type is unstable. Developer caution is advised. The resulting " "DistanceMatrix object will include zeros when distance has not been " "calculated, and therefore can be misleading.", ) if validate: counts = _validate_counts_matrix(counts, ids=ids) id_pairs = list(id_pairs) all_ids_in_pairs = set(itertools.chain.from_iterable(id_pairs)) if not all_ids_in_pairs.issubset(ids): raise ValueError("`id_pairs` are not a subset of `ids`") hashes = {i for i in id_pairs}.union({i[::-1] for i in id_pairs}) if len(hashes) != len(id_pairs) * 2: raise ValueError("A duplicate or a self-self pair was observed.") if metric == "unweighted_unifrac": counts = _quantitative_to_qualitative_counts(counts) taxa, tree, kwargs = _get_phylogenetic_kwargs(counts, **kwargs) metric, counts_by_node = _setup_multiple_unweighted_unifrac( counts, taxa=taxa, tree=tree, validate=validate ) counts = counts_by_node elif metric == "weighted_unifrac": # get the value for normalized. if it was not provided, it will fall # back to the default value inside of _weighted_unifrac_pdist_f normalized = kwargs.pop("normalized", _normalize_weighted_unifrac_by_default) taxa, tree, kwargs = _get_phylogenetic_kwargs(counts, **kwargs) metric, counts_by_node = _setup_multiple_weighted_unifrac( counts, taxa=taxa, tree=tree, normalized=normalized, validate=validate ) counts = counts_by_node elif callable(metric): metric = functools.partial(metric, **kwargs) # remove all values from kwargs, since they have already been provided # through the partial kwargs = {} else: raise ValueError( "partial_beta_diversity is only compatible with " "optimized unifrac methods and callable functions." ) dm = np.zeros((len(ids), len(ids)), dtype=float) id_index = {id_: idx for idx, id_ in enumerate(ids)} id_pairs_indexed = ((id_index[u], id_index[v]) for u, v in id_pairs) for u, v in id_pairs_indexed: dm[u, v] = metric(counts[u], counts[v], **kwargs) return DistanceMatrix(dm + dm.T, ids) # The following two lists are adapted from sklearn.metrics.pairwise. Metrics # that are not available in SciPy (only in sklearn) have been removed from # the list of _valid_beta_metrics here (those are: manhatten, wminkowski, # nan_euclidean, and haversine) _valid_beta_metrics = [ "euclidean", "cityblock", "braycurtis", "canberra", "chebyshev", "correlation", "cosine", "dice", "hamming", "jaccard", "mahalanobis", "manhattan", # aliases to "cityblock" in beta_diversity "matching", "minkowski", "rogerstanimoto", "russellrao", "seuclidean", "sokalmichener", "sokalsneath", "sqeuclidean", "yule", ] _qualitative_beta_metrics = [ "dice", "jaccard", "matching", "rogerstanimoto", "russellrao", "sokalmichener", "sokalsneath", "yule", ] def beta_diversity( metric, counts, ids=None, validate=True, pairwise_func=None, **kwargs ): """Compute distances between all pairs of samples. Parameters ---------- metric : str, callable The pairwise distance function to apply. See the scipy ``pdist`` docs and the scikit-bio functions linked under *See Also* for available metrics. Passing metrics as a strings is preferable as this often results in an optimized version of the metric being used. counts : 2D array_like of ints or floats, 2D pandas DataFrame, Table Matrix containing count/abundance data where each row contains counts of taxa in a given sample. ids : iterable of strs, optional Identifiers for each sample in ``counts``. By default, samples will be assigned integer identifiers in the order that they were provided (where the type of the identifiers will be ``str``). validate : bool, optional If ``False``, validation of the input won't be performed. This step can be slow, so if validation is run elsewhere it can be disabled here. However, invalid input data can lead to invalid results or error messages that are hard to interpret, so this step should not be bypassed if you're not certain that your input data are valid. See :mod:`skbio.diversity` for the description of what validation entails so you can determine if you can safely disable validation. pairwise_func : callable, optional The function to use for computing pairwise distances. This function must take ``counts`` and ``metric`` and return a square, hollow, 2-D ``numpy.ndarray`` of dissimilarities (floats). Examples of functions that can be provided are ``scipy.spatial.distance.pdist`` and ``sklearn.metrics.pairwise_distances``. By default, ``scipy.spatial.distance.pdist`` will be used. kwargs : kwargs, optional Metric-specific parameters. Returns ------- skbio.DistanceMatrix Distances between all pairs of samples (i.e., rows). The number of rows and columns will be equal to the number of rows in ``counts``. Raises ------ ValueError, MissingNodeError, DuplicateNodeError If validation fails. Exact error will depend on what was invalid. iTypeError If invalid method-specific parameters are provided. See Also -------- skbio.diversity skbio.diversity.beta skbio.diversity.get_beta_diversity_metrics skbio.diversity.alpha_diversity scipy.spatial.distance.pdist sklearn.metrics.pairwise_distances """ if isinstance(counts, skbio.Table): counts, ids = _validate_table(counts, ids, kwargs) if isinstance(counts, pd.DataFrame) and ids is None: ids = list(counts.index) if validate: counts = _validate_counts_matrix(counts, ids=ids) if 0 in counts.shape: # if the input counts are empty, return an empty DistanceMatrix. # this check is not necessary for scipy.spatial.distance.pdist but # it is necessary for sklearn.metrics.pairwise_distances where the # latter raises an exception over empty data. return DistanceMatrix(np.zeros((len(ids), len(ids))), ids) if metric == "unweighted_unifrac": counts = _quantitative_to_qualitative_counts(counts) taxa, tree, kwargs = _get_phylogenetic_kwargs(counts, **kwargs) metric, counts_by_node = _setup_multiple_unweighted_unifrac( counts, taxa=taxa, tree=tree, validate=validate ) counts = counts_by_node elif metric == "weighted_unifrac": # get the value for normalized. if it was not provided, it will fall # back to the default value inside of _weighted_unifrac_pdist_f normalized = kwargs.pop("normalized", _normalize_weighted_unifrac_by_default) taxa, tree, kwargs = _get_phylogenetic_kwargs(counts, **kwargs) metric, counts_by_node = _setup_multiple_weighted_unifrac( counts, taxa=taxa, tree=tree, normalized=normalized, validate=validate ) counts = counts_by_node elif metric == "manhattan": metric = "cityblock" elif callable(metric): metric = functools.partial(metric, **kwargs) # remove all values from kwargs, since they have already been provided # through the partial kwargs = {} elif metric in _qualitative_beta_metrics: counts = _quantitative_to_qualitative_counts(counts) elif metric not in _valid_beta_metrics: raise ValueError( "Metric %s is not available. " "Only the following metrics can be passed as strings to " "beta_diversity as we know whether each of these should be " "treated as a qualitative or quantitative metric. Other metrics " "can be provided as functions.\n Available metrics are: %s" % (metric, ", ".join(_valid_beta_metrics)) ) else: # metric is a string that scikit-bio doesn't know about, for # example one of the SciPy metrics pass if pairwise_func is None: pairwise_func = scipy.spatial.distance.pdist distances = pairwise_func(counts, metric=metric, **kwargs) return DistanceMatrix(distances, ids) scikit-bio-0.6.2/skbio/diversity/_phylogenetic.pyx000066400000000000000000000155411464262511300223120ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np cimport numpy as np cimport cython DTYPE = np.int64 ctypedef np.npy_intp intp_t @cython.boundscheck(False) @cython.wraparound(False) def _tip_distances(np.ndarray[np.double_t, ndim=1] a, object t, np.ndarray[intp_t, ndim=1] tip_indices): """Sets each tip to its distance from the root Parameters ---------- a : np.ndarray of double A matrix in which each row corresponds to a node in ``t``. t : skbio.tree.TreeNode The tree that corresponds to the rows in ``a``. tip_indices : np.ndarray of int The index positions in ``a`` of the tips in ``t``. Returns ------- np.ndarray of double A matrix in which each row corresponds to a node in ``t``, Only the rows that correspond to tips are nonzero, and the values in these rows are the distance from that tip to the root of the tree. """ cdef: object n Py_ssize_t i, p_i, n_rows np.ndarray[np.double_t, ndim=1] mask np.ndarray[np.double_t, ndim=1] tip_ds = a.copy() # preorder reduction over the tree to gather distances at the tips n_rows = tip_ds.shape[0] for n in t.preorder(include_self=False): i = n.id p_i = n.parent.id tip_ds[i] += tip_ds[p_i] # construct a mask that represents the locations of the tips mask = np.zeros(n_rows, dtype=np.double) for i in range(tip_indices.shape[0]): mask[tip_indices[i]] = 1.0 # apply the mask such that tip_ds only includes values which correspond to # the tips of the tree. for i in range(n_rows): tip_ds[i] *= mask[i] return tip_ds @cython.boundscheck(False) @cython.wraparound(False) cdef _traverse_reduce(np.ndarray[intp_t, ndim=2] child_index, np.ndarray[intp_t, ndim=2] a): """Apply a[k] = sum[i:j] Parameters ---------- child_index: np.array of int A matrix in which the first column corresponds to an index position in ``a``, which represents a node in a tree. The second column is the starting index in ``a`` for the node's children, and the third column is the ending index in ``a`` for the node's children. a : np.ndarray of int A matrix of the environment data. Each row corresponds to a node in a tree, and each column corresponds to an environment. On input, it is assumed that only tips have counts. Notes ----- This is effectively a postorder reduction over the tree. For example, given the following tree: /-A /E-------| | \-B -root----| | /-C \F-------| \-D And assuming counts for [A, B, C, D] in environment FOO of [1, 1, 1, 0] and counts for environment BAR of [0, 1, 1, 1], the input counts matrix ``a`` would be: [1 0 -> A 1 1 -> B 1 1 -> C 0 1 -> D 0 0 -> E 0 0 -> F 0 0] -> root The method will perform the following reduction: [1 0 [1 0 [1 0 [1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 -> 0 1 -> 0 1 -> 0 1 0 0 2 1 2 1 2 1 0 0 0 0 1 2 1 2 0 0] 0 0] 0 0] 3 3] The index positions of the above are encoded in ``child_index`` which describes the node to aggregate into, and the start and stop index positions of the nodes immediate descendents. This method operates inplace on ``a`` """ cdef: Py_ssize_t i, j, k intp_t node, start, end intp_t n_envs = a.shape[1] # possible GPGPU target for i in range(child_index.shape[0]): node = child_index[i, 0] start = child_index[i, 1] end = child_index[i, 2] for j in range(start, end + 1): for k in range(n_envs): a[node, k] += a[j, k] @cython.boundscheck(False) @cython.wraparound(False) def _nodes_by_counts(np.ndarray counts, np.ndarray tip_ids, dict indexed): """Construct the count array, and the counts up the tree Parameters ---------- counts : np.array of int A 1D or 2D vector in which each row corresponds to the observed counts in an environment. The rows are expected to be in order with respect to `tip_ids`. tip_ids : np.array of str A vector of tip names that correspond to the columns in the `counts` matrix. indexed : dict The result of `index_tree`. Returns ------- np.array of int The observed counts of every node and the counts if its descendents. """ cdef: np.ndarray nodes, observed_ids np.ndarray[intp_t, ndim=2] count_array, counts_t np.ndarray[intp_t, ndim=1] observed_indices, taxa_in_nodes Py_ssize_t i, j set observed_ids_set object n dict node_lookup intp_t n_count_vectors, n_count_taxa nodes = indexed['name'] # allow counts to be a vector counts = np.atleast_2d(counts) counts = counts.astype(DTYPE, copy=False) # determine observed IDs. It may be possible to unroll these calls to # squeeze a little more performance observed_indices = counts.sum(0).nonzero()[0] observed_ids = tip_ids[observed_indices] observed_ids_set = set(observed_ids) # construct mappings of the observed to their positions in the node array node_lookup = {} for i in range(nodes.shape[0]): n = nodes[i] if n in observed_ids_set: node_lookup[n] = i # determine the positions of the observed IDs in nodes taxa_in_nodes = np.zeros(observed_ids.shape[0], dtype=DTYPE) for i in range(observed_ids.shape[0]): n = observed_ids[i] taxa_in_nodes[i] = node_lookup[n] # count_array has a row per node (not tip) and a column per env. n_count_vectors = counts.shape[0] count_array = np.zeros((nodes.shape[0], n_count_vectors), dtype=DTYPE) # populate the counts array with the counts of each observation in each # env counts_t = counts.transpose() n_count_taxa = taxa_in_nodes.shape[0] for i in range(n_count_taxa): for j in range(n_count_vectors): count_array[taxa_in_nodes[i], j] = counts_t[observed_indices[i], j] child_index = indexed['child_index'].astype(DTYPE, copy=False) _traverse_reduce(child_index, count_array) return count_array scikit-bio-0.6.2/skbio/diversity/_util.py000066400000000000000000000222371464262511300204050ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import collections.abc import numpy as np import pandas as pd from skbio.tree import DuplicateNodeError, MissingNodeError from skbio.diversity._phylogenetic import _nodes_by_counts def _validate_counts_vector(counts, cast_int=False): """Validate and convert input to an acceptable counts vector type. Parameters ---------- counts : array_like of int or float of shape (n_taxa,) Vector of counts. cast_int : bool, optional Cast values into integers, if not already. ``False`` by default. Returns ------- ndarray of int or float of shape (n_taxa,) Valid counts vector. Raises ------ ValueError If input array has an invalid data type. ValueError If input array is not 1-D. ValueError If there are negative values. Notes ----- This function will return the original ``counts`` if it is already a valid counts vector. Otherwise it will return an edited copy that is valid. The data type of counts must be any subtype of ``np.integer`` (integers) or ``np.floating`` (floating-point numbers; excluding complex numbers) [1]_. See Also -------- _validate_counts_matrix References ---------- .. [1] https://numpy.org/doc/stable/reference/arrays.scalars.html """ counts = np.asarray(counts) # counts must be int or float if np.issubdtype(dtype := counts.dtype, np.floating): # cast values into integers if cast_int: counts = counts.astype(int) elif not np.issubdtype(dtype, np.integer) and dtype is not np.dtype("bool"): raise ValueError("Counts must be integers or floating-point numbers.") if counts.ndim != 1: raise ValueError("Only 1-D vectors are supported.") if (counts < 0).any(): raise ValueError("Counts vector cannot contain negative values.") return counts def _validate_counts_matrix(counts, ids=None, cast_int=False): """Validate and convert input to an acceptable counts matrix type. Parameters ---------- counts : array_like of shape (n_samples, n_taxa) Matrix of counts. ids : array_like of shape (n_samples,), optional Sample IDs to check against counts dimensions. cast_int : bool, optional Cast values into integers, if not already. ``False`` by default. Returns ------- ndarray of shape (n_samples, n_taxa) Valid counts matrix. See Also -------- _validate_counts_vector """ lenerr = "Number of rows in `counts` must be equal to number of provided `ids`." # handle pandas data frame if isinstance(counts, pd.DataFrame): if ids is not None and counts.shape[0] != len(ids): raise ValueError(lenerr) counts = counts.to_numpy() else: # convert counts into a 2-D array # will raise ValueError if row lengths are unequal counts = np.atleast_2d(counts) if counts.ndim > 2: raise ValueError( "Only 1-D and 2-D array-like objects can be provided as input. " f"Provided object has {counts.ndim} dimensions." ) if ids is not None and counts.shape[0] != len(ids): raise ValueError(lenerr) # counts must be int or float if np.issubdtype(dtype := counts.dtype, np.floating): # cast values into integers if cast_int: counts = counts.astype(int) elif not np.issubdtype(dtype, np.integer) and dtype is not np.dtype("bool"): raise ValueError("Counts must be integers or floating-point numbers.") # negative values are not allowed # TODO: `counts < 0` creates a Boolean array of the same shape, which could be # memory-inefficient if the input array is very large. Should optimize. # See: https://stackoverflow.com/questions/75553212/ if (counts < 0).any(): raise ValueError("Counts cannot contain negative values.") return counts def _validate_taxa_and_tree(counts, taxa, tree, rooted=True): """Validate taxa and tree prior to calculating phylogenetic diversity metrics.""" len_taxa = len(taxa) set_taxa = set(taxa) if len_taxa != len(set_taxa): raise ValueError("``taxa`` cannot contain duplicated ids.") if len(counts) != len_taxa: raise ValueError("``taxa`` must be the same length as ``counts`` " "vector(s).") if len(tree.root().children) == 0: raise ValueError("``tree`` must contain more than just a root node.") if rooted is True and len(tree.root().children) > 2: # this is an imperfect check for whether the tree is rooted or not. # can this be improved? raise ValueError("``tree`` must be rooted.") # all nodes (except the root node) have corresponding branch lengths # all tip names in tree are unique # all taxa correspond to tip names in tree branch_lengths = [] tip_names = [] for e in tree.traverse(): if not e.is_root(): branch_lengths.append(e.length) if e.is_tip(): tip_names.append(e.name) set_tip_names = set(tip_names) if len(tip_names) != len(set_tip_names): raise DuplicateNodeError("All tip names must be unique.") if np.array([branch is None for branch in branch_lengths]).any(): raise ValueError("All non-root nodes in ``tree`` must have a branch " "length.") missing_tip_names = set_taxa - set_tip_names if missing_tip_names != set(): n_missing_tip_names = len(missing_tip_names) raise MissingNodeError( "All ``taxa`` must be present as tip names " "in ``tree``. ``taxa`` not corresponding to " "tip names (n=%d): %s" % (n_missing_tip_names, " ".join(missing_tip_names)) ) def _vectorize_counts_and_tree(counts, taxa, tree): """Index tree and convert counts to np.array in corresponding order. Parameters ---------- counts : array_like of shape (n_samples, n_taxa) or (n_taxa,) Counts/abundances of taxa in one or multiple samples. taxa : array_like of shape (n_taxa,) Taxon IDs corresponding to tip names in `tree`. tree : skbio.TreeNode Tree relating taxa. The set of tip names in the tree can be a superset of `taxa`, but not a subset. Returns ------- ndarray of shape (n_samples, n_nodes) Total counts/abundances of taxa descending from individual nodes of the tree. dict of array Indexed tree. See `to_array`. ndarray of shape (n_nodes,) Branch lengths of corresponding nodes of the tree. See Also -------- skbio.tree.TreeNode.to_array """ tree_index = tree.to_array(nan_length_value=0.0) taxa = np.asarray(taxa) counts = np.atleast_2d(counts) counts_by_node = _nodes_by_counts(counts, taxa, tree_index) branch_lengths = tree_index["length"] # branch_lengths is just a reference to the array inside of tree_index, # but it's used so much that it's convenient to just pull it out here. return counts_by_node.T, tree_index, branch_lengths def _get_phylogenetic_kwargs(counts, **kwargs): try: taxa = kwargs.pop("taxa") except KeyError: raise ValueError("``taxa`` is required for phylogenetic diversity " "metrics.") try: tree = kwargs.pop("tree") except KeyError: raise ValueError("``tree`` is required for phylogenetic diversity " "metrics.") return taxa, tree, kwargs def _quantitative_to_qualitative_counts(counts): return counts > 0.0 def _check_taxa_alias(taxa, tree, otu_ids): # make `taxa` an alias of `taxa`; for backward compatibility if taxa is None: if otu_ids is None: raise ValueError("A list of taxon IDs must be provided.") taxa = otu_ids if tree is None: raise ValueError("A phylogenetic tree must be provided.") return taxa def _table_to_numpy(table): """Convert a skbio.table.Table to a dense representation. This is a stop-gap solution to allow current Table objects to interoperate with existing driver methods, until they transition to be "sparse" aware. """ sample_ids = list(table.ids()) obs_ids = list(table.ids(axis="observation")) if table.is_empty(): counts = np.array([[]] * len(sample_ids)) else: counts = table.matrix_data.T.toarray() return counts, sample_ids, obs_ids def _validate_table(counts, ids, kwargs): """Disallow overriding of sample and feature IDs. WARNING: this implicitly adds an entry to kwargs IF `tree` is present. """ if ids is not None: raise ValueError("Cannot provide a `Table` as `counts` and `ids`") if "taxa" in kwargs: raise ValueError("Cannot provide a `Table` as `counts` and `taxa`") dense_counts, sample_ids, feature_ids = _table_to_numpy(counts) if "tree" in kwargs: kwargs["taxa"] = feature_ids return dense_counts, sample_ids scikit-bio-0.6.2/skbio/diversity/alpha/000077500000000000000000000000001464262511300177765ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/diversity/alpha/__init__.py000066400000000000000000000112131464262511300221050ustar00rootroot00000000000000r"""Alpha diversity measures (:mod:`skbio.diversity.alpha`) ======================================================= .. currentmodule:: skbio.diversity.alpha This package provides implementations of various alpha diversity [1]_ metrics, including measures of richness, diversity, evenness, dominance, and coverage. Some functions generate confidence intervals (CIs). These functions have the suffix ``_ci``. Richness metrics ---------------- **Richness** [2]_ measures the number of species (taxa) in a community. Due to incomplete sampling, the number of observed species (``sobs``) in a sample is usually lower than the true number of species in the community. Metrics have been proposed to estimate the latter based on the distribution of observed species in the sample. .. autosummary:: :toctree: ace chao1 chao1_ci doubles faith_pd margalef menhinick michaelis_menten_fit observed_features observed_otus osd singles sobs Diversity metrics ----------------- **Diversity** [3]_ measures the number and relative abundances of species (taxa) in a community. It combines richness and evenness. Some diversity metrics describe the effective number of species (a.k.a., true diversity) -- the number of equally-abundant species that produce the same diversity measurement. .. autosummary:: :toctree: brillouin_d enspie fisher_alpha hill inv_simpson kempton_taylor_q phydiv renyi shannon simpson tsallis Evenness metrics ---------------- **Evenness** [4]_ (or equitability) measures the closeness of species (taxa) in a community in terms of abundance (number of individuals within the species). The calculation of evenness involves the relative abundances of species. .. autosummary:: :toctree: heip_e mcintosh_e pielou_e simpson_e Dominance metrics ----------------- **Dominance** [5]_ (or concentration) measures the degree that one or a few most abundant species (taxa) represent the great majority of a community. It can be considered as a measure of community unevenness. It should be noted that higher dominance corresponds to lower biodiversity. .. autosummary:: :toctree: berger_parker_d dominance gini_index mcintosh_d simpson_d strong Coverage metrics ---------------- **Coverage** [6]_ measures the proportion of individuals of a community that have been observed (or unobserved) in a sample. It describes the completeness of sampling. .. autosummary:: :toctree: esty_ci goods_coverage lladser_ci lladser_pe robbins References ---------- .. [1] https://en.wikipedia.org/wiki/Alpha_diversity .. [2] https://en.wikipedia.org/wiki/Species_richness .. [3] https://en.wikipedia.org/wiki/Species_diversity .. [4] https://en.wikipedia.org/wiki/Species_evenness .. [5] https://en.wikipedia.org/wiki/Dominance_%28ecology%29 .. [6] Good, I. J. (1953). The population frequencies of species and the estimation of population parameters. Biometrika, 40(3-4), 237-264. """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from ._base import ( berger_parker_d, brillouin_d, dominance, doubles, enspie, esty_ci, fisher_alpha, goods_coverage, heip_e, hill, inv_simpson, kempton_taylor_q, margalef, mcintosh_d, mcintosh_e, menhinick, michaelis_menten_fit, observed_features, observed_otus, osd, pielou_e, renyi, robbins, shannon, simpson, simpson_d, simpson_e, singles, sobs, strong, tsallis, ) from ._ace import ace from ._chao1 import chao1, chao1_ci from ._gini import gini_index from ._lladser import lladser_pe, lladser_ci from ._pd import faith_pd, phydiv __all__ = [ "ace", "chao1", "chao1_ci", "berger_parker_d", "brillouin_d", "dominance", "doubles", "enspie", "esty_ci", "faith_pd", "fisher_alpha", "gini_index", "goods_coverage", "heip_e", "hill", "inv_simpson", "kempton_taylor_q", "lladser_pe", "lladser_ci", "margalef", "mcintosh_d", "mcintosh_e", "menhinick", "michaelis_menten_fit", "observed_features", "observed_otus", "osd", "phydiv", "pielou_e", "renyi", "robbins", "shannon", "simpson", "simpson_d", "simpson_e", "singles", "sobs", "strong", "tsallis", ] scikit-bio-0.6.2/skbio/diversity/alpha/_ace.py000066400000000000000000000107271464262511300212460ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np from skbio.diversity._util import _validate_counts_vector def ace(counts, rare_threshold=10): r"""Calculate the ACE metric (Abundance-based Coverage Estimator). The ACE metric is defined as: .. math:: S_{ace}=S_{abund}+\frac{S_{rare}}{C_{ace}}+ \frac{F_1}{C_{ace}}\gamma^2_{ace} where :math:`S_{abund}` is the number of abundant taxa (with more than ``rare_threshold`` individuals) when all samples are pooled, :math:`S_{rare}` is the number of rare taxa (with less than or equal to ``rare_threshold`` individuals) when all samples are pooled, :math:`C_{ace}` is the sample abundance coverage estimator, :math:`F_1` is the frequency of singletons, and :math:`\gamma^2_{ace}` is the estimated coefficient of variation for rare taxa. The estimated coefficient of variation is defined as (assuming ``rare_threshold`` is 10, the default): .. math:: \gamma^2_{ace}=max\left[\frac{S_{rare}}{C_{ace}} \frac{\sum^{10}_{i=1}{{i\left(i-1\right)}}F_i} {\left(N_{rare}\right)\left(N_{rare}-1\right)} -1,0\right] Parameters ---------- counts : 1-D array_like, int Vector of counts. rare_threshold : int, optional Threshold at which a taxon containing as many or fewer individuals will be considered rare. Returns ------- double Computed ACE metric. Raises ------ ValueError If every rare taxon is a singleton. Notes ----- ACE was first introduced in [1]_ and [2]_. The implementation here is based on the description given in the EstimateS manual [3]_. If no rare taxa exist, returns the number of abundant taxa. The default value of 10 for `rare_threshold` is based on [4]_. If ``counts`` contains zeros, indicating taxa which are known to exist in the environment but did not appear in the sample, they will be ignored for the purpose of calculating the number of rare taxa. References ---------- .. [1] Chao, A. & S.-M Lee. 1992 Estimating the number of classes via sample coverage. Journal of the American Statistical Association 87, 210-217. .. [2] Chao, A., M.-C. Ma, & M. C. K. Yang. 1993. Stopping rules and estimation for recapture debugging with unequal failure rates. Biometrika 80, 193-201. .. [3] http://viceroy.eeb.uconn.edu/estimates/ .. [4] Chao, A., W.-H. Hwang, Y.-C. Chen, and C.-Y. Kuo. 2000. Estimating the number of shared species in two communities. Statistica Sinica 10:227-246. """ counts = _validate_counts_vector(counts) freq_counts = np.bincount(counts) s_rare = _taxa_rare(freq_counts, rare_threshold) singles = freq_counts[1] if singles > 0 and singles == s_rare: raise ValueError( "The only rare taxa are singletons, so the ACE " "metric is undefined. EstimateS suggests using " "bias-corrected Chao1 instead." ) s_abun = _taxa_abundant(freq_counts, rare_threshold) if s_rare == 0: return s_abun n_rare = _number_rare(freq_counts, rare_threshold) c_ace = 1 - singles / n_rare top = s_rare * _number_rare(freq_counts, rare_threshold, gamma=True) bottom = c_ace * n_rare * (n_rare - 1) gamma_ace = (top / bottom) - 1 if gamma_ace < 0: gamma_ace = 0 return s_abun + (s_rare / c_ace) + ((singles / c_ace) * gamma_ace) def _taxa_rare(freq_counts, rare_threshold): """Count number of rare taxa.""" return freq_counts[1 : rare_threshold + 1].sum() def _taxa_abundant(freq_counts, rare_threshold): """Count number of abundant taxa.""" return freq_counts[rare_threshold + 1 :].sum() def _number_rare(freq_counts, rare_threshold, gamma=False): """Return number of individuals in rare taxa. ``gamma=True`` generates the ``n_rare`` used for the variation coefficient. """ n_rare = 0 if gamma: for i, j in enumerate(freq_counts[: rare_threshold + 1]): n_rare += (i * j) * (i - 1) else: for i, j in enumerate(freq_counts[: rare_threshold + 1]): n_rare += i * j return n_rare scikit-bio-0.6.2/skbio/diversity/alpha/_base.py000066400000000000000000001212641464262511300214270ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from warnings import warn import functools import numpy as np from scipy.special import gammaln from scipy.optimize import fmin_powell, minimize_scalar from skbio.stats import subsample_counts from skbio.diversity._util import _validate_counts_vector from skbio.util._warning import _warn_deprecated def _validate_alpha(empty=None, cast_int=False): """Validate counts vector for an alpha diversity metric. Parameters ---------- func : callable Function that calculates an alpha diversity metric. empty : any, optional Return this value if set instead of calling the function when an input community is empty (i.e., no taxon, or all taxa have zero counts). cast_int : bool, optional Cast values into integers, if not already. ``False`` by default. Returns ------- callable Decorated function. Notes ----- This function serves as a decorator for individual functions that calculate alpha diversity metrics. The first positional argument of a decorated function must be a 1-D vector of counts/abundances of taxa in a community. Additional arguments may follow. """ def decorator(func): @functools.wraps(func) def wrapper(counts, *args, **kwargs): counts = _validate_counts_vector(counts, cast_int) # drop zero values, as these represent taxa that are absent from # the community if not (nonzero := counts != 0).all(): counts = counts[nonzero] # return a value if community is empty (after dropping zeros) if empty is not None and counts.size == 0: return empty # call function to calculate alpha diversity metric return func(counts, *args, **kwargs) return wrapper return decorator @_validate_alpha(empty=np.nan) def berger_parker_d(counts): r"""Calculate Berger-Parker dominance index. Berger-Parker dominance index :math:`d` is defined as the fraction of the sample that belongs to the most abundant taxon: .. math:: d = \frac{n_{max}}{N} where :math:`n_{max}` is the number of individuals in the most abundant taxon (or any of the most abundant taxa in the case of ties), and :math:`N` is the total number of individuals in the sample. Parameters ---------- counts : 1-D array_like, int Vector of counts. Returns ------- float Berger-Parker dominance index. Notes ----- Berger-Parker dominance index was originally described in [1]_. References ---------- .. [1] Berger, W. H., & Parker, F. L. (1970). Diversity of planktonic foraminifera in deep-sea sediments. Science, 168(3937), 1345-1347. """ return counts.max() / counts.sum() @_validate_alpha(empty=np.nan) def brillouin_d(counts): r"""Calculate Brillouin's diversity index. Brillouin's diversity index (:math:`H_B`) is defined as: .. math:: H_B = \frac{\ln N!-\sum_{i=1}^S{\ln n_i!}}{N} where :math:`N` is the total number of individuals in the sample, :math:`S` is the number of taxa, and :math:`n_i` is the number of individuals in the :math:`i^{\text{th}}` taxon. Parameters ---------- counts : 1-D array_like, int Vector of counts. Returns ------- float Brillouin's diversity index. Notes ----- Brillouin's diversity index was originally described in [1]_. References ---------- .. [1] Brillouin, L. (1956). Science and Information Theory. Academic Press. New York. """ return (gammaln((N := counts.sum()) + 1) - gammaln(counts + 1).sum()) / N @_validate_alpha(empty=np.nan) def dominance(counts, finite=False): r"""Calculate Simpson's dominance index. Simpson's dominance index, a.k.a. Simpson's :math:`D`, measures the degree of concentration of taxon composition of a sample. It is defined as: .. math:: D = \sum_{i=1}^S{p_i^2} where :math:`S` is the number of taxa and :math:`p_i` is the proportion of the sample represented by taxon :math:`i`. Simpson's :math:`D` ranges from 0 (infinite diversity; no dominance) and 1 (complete dominance, no diversity). Simpson's :math:`D` can be interpreted as the probability that two randomly selected individuals belong to the same taxon. Simpson's :math:`D` may be corrected for finite samples to account for the effect of sampling without replacement. This more accurately represents the above probability when the sample is small. It is calculated as: .. math:: D = \frac{\sum_{i=1}^s{n_i(n_i - 1))}}{N(N - 1)} where :math:`n_i` is the number of individuals in the :math:`i^{\text{th}}` taxon and :math:`N` is the total number of individuals in the sample. Simpson's :math:`D` is sometimes referred to as "Simpson's index". It should be noted that :math:`D` is not a measure of community diversity. It is also important to distinguish :math:`D` from Simpson's diversity index (:math:`1 - D`) and inverse Simpson index (:math:`1 / D`), both of which are measures of community diversity. Discrepancy exists among literature in using the term "Simpson index" and the denotion :math:`D`. It is therefore important to distinguish these metrics according to their mathematic definition. Parameters ---------- counts : 1-D array_like, int Vector of counts. finite : bool, optional If ``True``, correct for finite sampling. Returns ------- float Simpson's dominance index. See Also -------- simpson Notes ----- Simpson's dominance index was originally described in [1]_. References ---------- .. [1] Simpson, E. H. (1949). Measurement of diversity. Nature, 163(4148), 688-688. """ if finite: D = (counts * (counts - 1)).sum() / ((N := counts.sum()) * (N - 1)) else: D = ((counts / counts.sum()) ** 2).sum() return D @_validate_alpha() def doubles(counts): """Calculate number of double-occurrence taxa (doubletons). Parameters ---------- counts : 1-D array_like, int Vector of counts. Returns ------- int Doubleton count. """ return (counts == 2).sum() def enspie(counts, finite=False): r"""Calculate ENS_pie alpha diversity measure. The effective number of species (ENS) derived from Hurlbert's probability of interspecific encounter (PIE) ([1]_, [2]_) is defined as: .. math:: ENS_{pie} = \frac{1}{\sum_{i=1}^S{p_i^2}} where :math:`S` is the number of taxa and :math:`p_i` is the proportion of the sample represented by taxon :math:`i`. Therefore, :math:`ENS_{pie}` is equivalent to the inverse Simpson index (``1 / D``). Parameters ---------- counts : 1-D array_like, int Vector of counts. finite : bool, optional If ``True``, correct for finite sampling. Returns ------- float ENS_pie alpha diversity measure. See Also -------- inv_simpson dominance Notes ----- ``enspie`` is an alias for ``inv_simpson``. References ---------- .. [1] Chase, J. M., & Knight, T. M. (2013). Scale-dependent effect sizes of ecological drivers on biodiversity: why standardised sampling is not enough. Ecology letters, 16, 17-26. .. [2] Hurlbert, S. H. (1971). The nonconcept of species diversity: a critique and alternative parameters. Ecology, 52(4), 577-586. """ return inv_simpson(counts, finite=finite) @_validate_alpha(empty=np.nan) def esty_ci(counts): r"""Calculate Esty's confidence interval of Good's coverage estimator. Esty's confidence interval is defined as: .. math:: F_1/N \pm z\sqrt{W} where :math:`F_1` is the number of singleton taxa, :math:`N` is the total number of individuals, and :math:`z` is a constant that depends on the targeted confidence and based on the normal distribution. :math:`W` is defined as: .. math:: \frac{F_1(N-F_1)+2NF_2}{N^3} where :math:`F_2` is the number of doubleton taxa. Parameters ---------- counts : 1-D array_like, int Vector of counts. Returns ------- tuple Esty's confidence interval as ``(lower_bound, upper_bound)``. See Also -------- goods_coverage Notes ----- Esty's confidence interval was originally described in [1]_. :math:`z` is hardcoded for a 95% confidence interval. References ---------- .. [1] Esty, W. W. (1983). "A normal limit law for a nonparametric estimator of the coverage of a random sample". Ann Statist 11: 905-912. """ N = counts.sum() f1 = (counts == 1).sum() f2 = (counts == 2).sum() z = 1.959963985 W = (f1 * (N - f1) + 2 * N * f2) / (N**3) return f1 / N - z * np.sqrt(W), f1 / N + z * np.sqrt(W) @_validate_alpha(empty=np.nan) def fisher_alpha(counts): r"""Calculate Fisher's alpha, a metric of diversity. Fisher's alpha is estimated by solving the following equation for :math:`\alpha`: .. math:: S=\alpha\ln(1+\frac{N}{\alpha}) where :math:`S` is the number of taxa and :math:`N` is the total number of individuals in the sample. Parameters ---------- counts : 1-D array_like, int Vector of counts. Returns ------- float Fisher's alpha. Raises ------ RuntimeError If the optimizer fails to solve for Fisher's alpha. Notes ----- Fisher's alpha is defined in [1]_. There is no analytical solution to Fisher's alpha. However, one can use optimization techniques to obtain a numeric solution. This function calls SciPy's ``minimize_scalar`` to find alpha. It is deterministic. The result should be reasonably close to the true alpha. Alpha can become large when most taxa are singletons. Alpha = +inf when all taxa are singletons. When the sample is empty (i.e., all counts are zero), alpha = 0. References ---------- .. [1] Fisher, R.A., Corbet, A.S. and Williams, C.B., 1943. The relation between the number of taxa and the number of individuals in a random sample of an animal population. The Journal of Animal Ecology, pp.42-58. """ # alpha = +inf when all taxa are singletons if (N := counts.sum()) == (S := counts.size): return np.inf # objective function to minimize: # S = alpha * ln (1 + N / alpha), where alpha > 0 def f(x): return (x * np.log(1 + (N / x)) - S) ** 2 if x > 0 else np.inf # minimize the function using the default method (Brent's algorithm) with np.errstate(invalid="ignore"): res = minimize_scalar(f) # there is a chance optimization could fail if res.success is False: raise RuntimeError("Optimizer failed to solve for Fisher's alpha.") return res.x @_validate_alpha(empty=np.nan) def goods_coverage(counts): r"""Calculate Good's coverage estimator. Good's coverage estimator :math:`C`, a.k.a. Turing estimator or Good- Turing (GT) estimator, is an estimation of the proportion of the population represented in the sample. It is defined as: .. math:: C = 1 - \frac{F_1}{N} where :math:`F_1` is the number of taxa observed only once (i.e., singletons) and :math:`N` is the total number of individuals. Parameters ---------- counts : 1-D array_like, int Vector of counts. Returns ------- float Good's coverage estimator. See Also -------- esty_ci robbins Notes ----- Good's coverage estimator was originally described in [1]_. References ---------- .. [1] Good, I. J. (1953). The population frequencies of species and the estimation of population parameters. Biometrika, 40(3-4), 237-264. """ return 1 - ((counts == 1).sum() / counts.sum()) @_validate_alpha() def heip_e(counts): r"""Calculate Heip's evenness measure. Heip's evenness is defined as: .. math:: \frac{(e^H-1)}{(S-1)} where :math:`H` is Shannon's diversity index and :math:`S` is the number of taxa in the sample. Parameters ---------- counts : 1-D array_like, int Vector of counts. Returns ------- float Heip's evenness measure. See Also -------- shannon pielou_e Notes ----- Heip's evenness measure was originally described in [1]_. When there is only one taxon, the return value is 1.0. References ---------- .. [1] Heip, C. 1974. A new index measuring evenness. J. Mar. Biol. Ass. UK., 54, 555-557. """ if (S := counts.size) == 0: return np.nan elif S == 1: return 1.0 return (shannon(counts, exp=True) - 1) / (S - 1) @_validate_alpha(empty=np.nan) def hill(counts, order=2): r"""Calculate Hill number. Hill number (:math:`^qD`) is a generalized measure of the effective number of species. It is defined as: .. math:: ^qD = (\sum_{i=1}^S p_i^q)^{\frac{1}{1-q}} where :math:`S` is the number of taxa and :math:`p_i` is the proportion of the sample represented by taxon :math:`i`. Parameters ---------- counts : 1-D array_like, int Vector of counts. order : int or float, optional Order (:math:`q`). Ranges between 0 and infinity. Default is 2. Returns ------- float Hill number. See Also -------- inv_simpson renyi shannon sobs Notes ----- Hill number was originally defined in [1]_. It is a measurement of "true diversity", or the effective number of species (ENS) ([2]_), which is defined as the number of equally abundant taxa that would make the same diversity measurement given the observed total abundance of the community. Hill number is a generalization of multiple diversity metrics. Depending on the order :math:`q`, it is equivalent to: - :math:`q=0`: Observed species richness (:math:`S_{obs}`). - :math:`q \to 1`: The exponential of Shannon index (:math:`\exp{H'}`), i.e., perplexity. - :math:`q=2`: Inverse Simpson index (:math:`1 / D`). - :math:`q \to \infty`: :math:`1/\max{p}`, i.e., the inverse of Berger-Parker dominance index. The order :math:`q` determines the influence of taxon abundance on the metric. A larger (or smaller) :math:`q` puts more weight on the abundant (or rare) taxa. Hill number is equivalent to the exponential of Renyi entropy. References ---------- .. [1] Hill, M. O. (1973). Diversity and evenness: a unifying notation and its consequences. Ecology, 54(2), 427-432. .. [2] Jost, L. (2006). Entropy and diversity. Oikos, 113(2), 363-375. """ probs = counts / counts.sum() if order == 1: return _perplexity(probs) elif np.isposinf(order): return 1 / probs.max() else: return (probs**order).sum() ** (1 / (1 - order)) @_validate_alpha(empty=np.nan) def kempton_taylor_q(counts, lower_quantile=0.25, upper_quantile=0.75): r"""Calculate Kempton-Taylor Q index of alpha diversity. Kempton-Taylor Q index measures diversity based on the middle-ranking taxa in the abundance distribution. Specifically, it estimates the slope of the cumulative abundance curve in the interquantile range. It is defined as: .. math:: Q = \frac{S_{lower..upper}}{\ln n_{lower} - \ln n_{upper}} where "lower" and "upper" are the taxa at the lower and upper quantiles of the abundance distribution, :math:`S` is the number of taxa, and :math:`n` is the number of individuals. By default, the lower and upper quartiles are used. Therefore: .. math:: Q = \frac{S}{2(\ln n_{0.25} - \ln n_{0.75})} The quantiles are rounded inwards in this implementation. Parameters ---------- counts : 1-D array_like, int Vector of counts. lower_quantile : float, optional Lower bound of the interquantile range. Defaults to lower quartile. upper_quantile : float, optional Upper bound of the interquantile range. Defaults to upper quartile. Returns ------- float Kempton-Taylor Q index of alpha diversity. Notes ----- The index is defined in [1]_. The implementation here is based on the description given in the SDR-IV online manual [2]_. The implementation provided here differs slightly from the results given in Magurran 1998. Specifically, we have 14 in the numerator rather than 15. Magurran recommends counting half of the taxa with the same # counts as the point where the UQ falls and the point where the LQ falls, but the justification for this is unclear (e.g. if there were a very large # taxa that just overlapped one of the quantiles, the results would be considerably off). Leaving the calculation as-is for now, but consider changing. References ---------- .. [1] Kempton, R. A. and Taylor, L. R. (1976) Models and statistics for species diversity. Nature, 262, 818-820. .. [2] http://www.pisces-conservation.com/sdrhelp/index.html """ S = counts.size lower = int(np.ceil(S * lower_quantile)) upper = int(S * upper_quantile) sorted_counts = np.sort(counts) return (upper - lower) / np.log(sorted_counts[upper] / sorted_counts[lower]) def inv_simpson(counts, finite=False): r"""Calculate inverse Simpson index. The inverse Simpson index (:math:`1 / D`), a.k.a., Simpson's reciprocal index, is defined as: .. math:: 1 / D = \frac{1}{\sum_{i=1}^S{p_i^2}} where :math:`S` is the number of taxa and :math:`p_i` is the proportion of the sample represented by taxon :math:`i`. Parameters ---------- counts : 1-D array_like, int Vector of counts. finite : bool, optional If ``True``, correct for finite sampling when calculating :math:`D`. Returns ------- float Inverse Simpson index. See Also -------- dominance Notes ----- :math:`1 / D` is a measurement of the effective number of species (ENS). It is equivalent to Hill number with order 2 (:math:`^2D`). Inverse Simpson index was originally described in [1]_. References ---------- .. [1] Simpson, E. H. (1949). Measurement of diversity. Nature, 163(4148), 688-688. """ return 1 / dominance(counts, finite=finite) @_validate_alpha(empty=np.nan) def margalef(counts): r"""Calculate Margalef's richness index. Margalef's richness index :math:`D` is defined as: .. math:: D = \frac{(S - 1)}{\ln N} where :math:`S` is the number of taxa and :math:`N` is the total number of individuals in the sample. Margalef's richness index assumes log accumulation. Parameters ---------- counts : 1-D array_like, int Vector of counts. Returns ------- float Margalef's richness index. See Also -------- menhinick Notes ----- Margalef's richness index was originally described in [1]_. References ---------- .. [1] Margalef, R. (1958) Information Theory in Ecology. General Systems, 3, 36-71. """ if (N := counts.sum()) == 1: return np.nan return (counts.size - 1) / np.log(N) @_validate_alpha(empty=np.nan) def mcintosh_d(counts): r"""Calculate McIntosh dominance index. McIntosh dominance index :math:`D` is defined as: .. math:: D = \frac{N - U}{N - \sqrt{N}} where :math:`N` is the total number of individuals in the sample and :math:`U` is defined as: .. math:: U = \sqrt{\sum{{n_i}^2}} where :math:`n_i` is the number of individuals in the :math:`i^{\text{th}}` taxon. Parameters ---------- counts : 1-D array_like, int Vector of counts. Returns ------- float McIntosh dominance index. See Also -------- mcintosh_e Notes ----- McIntosh dominance index was originally described in [1]_. References ---------- .. [1] McIntosh, R. P. 1967 An index of diversity and the relation of certain concepts to diversity. Ecology 48, 1115-1126. """ if (N := counts.sum()) == 1: return np.nan u = np.sqrt((counts**2).sum()) return (N - u) / (N - np.sqrt(N)) @_validate_alpha(empty=np.nan) def mcintosh_e(counts): r"""Calculate McIntosh's evenness measure. McIntosh's evenness measure :math:`E` is defined as: .. math:: E = \frac{\sqrt{\sum{n_i^2}}}{\sqrt{((N-S+1)^2 + S -1}} where :math:`n_i` is the number of individuals in the :math:`i^{\text{th}}` taxon, :math:`N` is the total number of individuals, and :math:`S` is the number of taxa in the sample. Parameters ---------- counts : 1-D array_like, int Vector of counts. Returns ------- float McIntosh evenness measure. See Also -------- mcintosh_d Notes ----- McIntosh's evenness measure was originally described in [1]_. References ---------- .. [1] Heip & Engels (1974) Comparing Species Diversity and Evenness Indices. p 560. """ S = counts.size N = counts.sum() numerator = np.sqrt((counts * counts).sum()) denominator = np.sqrt((N - S + 1) ** 2 + S - 1) return numerator / denominator @_validate_alpha(empty=np.nan) def menhinick(counts): r"""Calculate Menhinick's richness index. Menhinick's richness index is defined as: .. math:: D_{Mn} = \frac{S}{\sqrt{N}} where :math:`S` is the number of taxa and :math:`N` is the total number of individuals in the sample. Menhinick's richness index assumes square-root accumulation. Parameters ---------- counts : 1-D array_like, int Vector of counts. Returns ------- float Menhinick's richness index. See Also -------- margalef Notes ----- Based on the description in [1]_. References ---------- .. [1] Magurran, A E 2004. Measuring biological diversity. Blackwell. pp. 76-77. """ return counts.size / np.sqrt(counts.sum()) @_validate_alpha(empty=np.nan) def michaelis_menten_fit(counts, num_repeats=1, params_guess=None): r"""Calculate Michaelis-Menten fit to rarefaction curve of observed taxa. The Michaelis-Menten equation estimates the asymptote of the rarefaction curve. It is an estimator of the true richness of a community given the observation. It is defined as: .. math:: S = \frac{nS_{max}}{n+B} where :math:`n` is the number of individuals and :math:`S` is the number of taxa. This function estimates the :math:`S_{max}` parameter. The fit is made to datapoints for :math:`n=1,2,...,N`, where :math:`N` is the total number of individuals (sum of abundances for all taxa). :math:`S` is the number of taxa represented in a random sample of :math:`n` individuals. Parameters ---------- counts : 1-D array_like, int Vector of counts. num_repeats : int, optional The number of times to perform rarefaction (subsampling without replacement) at each value of :math:`n`. params_guess : tuple, optional Initial guess of :math:`S_{max}` and :math:`B`. If ``None``, default guess for :math:`S_{max}` is :math:`S` (as :math:`S_{max}` should be >= :math:`S`) and default guess for :math:`B` is ``round(N / 2)``. Returns ------- float Estimate of the :math:`S_{max}` parameter in the Michaelis-Menten equation. See Also -------- skbio.stats.subsample_counts Notes ----- There is some controversy about how to do the fitting. The ML model given in [1]_ is based on the assumption that error is roughly proportional to magnitude of observation, reasonable for enzyme kinetics but not reasonable for rarefaction data. Here we just do a nonlinear curve fit for the parameters using least-squares. References ---------- .. [1] Raaijmakers, J. G. W. 1987 Statistical analysis of the Michaelis-Menten equation. Biometrics 43, 793-803. """ n_indiv = counts.sum() if params_guess is None: S_max_guess = sobs(counts) B_guess = int(round(n_indiv / 2)) params_guess = (S_max_guess, B_guess) # observed # of taxa vs # of individuals sampled, S vs n xvals = np.arange(1, n_indiv + 1) ymtx = np.empty((num_repeats, len(xvals)), dtype=int) for i in range(num_repeats): ymtx[i] = np.asarray( [sobs(subsample_counts(counts, n)) for n in xvals], dtype=int ) yvals = ymtx.mean(0) # Vectors of actual vals y and number of individuals n. def errfn(p, n, y): return (((p[0] * n / (p[1] + n)) - y) ** 2).sum() # Return S_max. return fmin_powell(errfn, params_guess, ftol=1e-5, args=(xvals, yvals), disp=False)[ 0 ] def observed_features(counts): """Calculate the number of distinct features. Parameters ---------- counts : 1-D array_like, int Vector of counts. Returns ------- int Distinct feature count. See Also -------- sobs Notes ----- ``observed_features`` is an alias for ``sobs``. """ return sobs(counts) def observed_otus(counts): """Calculate the number of distinct OTUs. Parameters ---------- counts : 1-D array_like, int Vector of counts. Returns ------- int Distinct OTU count. Warnings -------- ``observed_otus`` is deprecated as of ``0.6.0`` due to its usage of the historical term "OTU". See Also -------- sobs Notes ----- ``observed_otus`` is an alias for ``sobs``. """ # @deprecated _warn_deprecated(observed_otus, "0.6.0") return sobs(counts) @_validate_alpha() def osd(counts): """Calculate observed taxa, singletons, and doubletons. Parameters ---------- counts : 1-D array_like, int Vector of counts. Returns ------- osd : tuple Numbers of observed taxa, singletons, and doubletons. See Also -------- sobs singles doubles Notes ----- This is a convenience function used by many of the other measures that rely on these three measures. """ return counts.size, (counts == 1).sum(), (counts == 2).sum() @_validate_alpha() def pielou_e(counts, base=None): r"""Calculate Pielou's evenness index. Pielou's evenness index (:math:`J'`), a.k.a., Shannon's equitability index (:math:`E_H`), is defined as: .. math:: J' = \frac{(H)}{\log(S)} where :math:`H` is the Shannon index of the sample and :math:`S` is the number of taxa in the sample. That is, :math:`J'` is the ratio of the actual Shannon index of the sample versus the maximum-possible Shannon index when all taxa have the same number of individuals. :math:`J'` ranges between 0 and 1. Parameters ---------- counts : 1-D array_like, int Vector of counts. base : int or float, optional Logarithm base to use in the calculation. Default is ``e``. Returns ------- float Pielou's evenness index. See Also -------- shannon heip_e Notes ----- Pielou's evenness index was originally described in [1]_. When there is only one taxon, the return value is 1.0. References ---------- .. [1] Pielou, E. C., 1966. The measurement of diversity in different types of biological collections. Journal of Theoretical Biology, 13, 131-44. """ if (S := counts.size) == 0: return np.nan elif S == 1: return 1.0 H = shannon(counts, base=base) H_max = np.log(S) if base is not None: H_max /= np.log(base) return H / H_max @_validate_alpha() def renyi(counts, order=2, base=None): r"""Calculate Renyi entropy. Renyi entropy (:math:`^qH`) is a generalization of Shannon index, with an exponent (order) :math:`q` instead of 1. It is defined as: .. math:: ^qH = \frac{1}{1-q}\log_b{(\sum_{i=1}^S p_i^q)} where :math:`S` is the number of taxa and :math:`p_i` is the proportion of the sample represented by taxon :math:`i`. Parameters ---------- counts : 1-D array_like, int Vector of counts. order : int or float, optional Order (:math:`q`). Ranges between 0 and infinity. Default is 2. base : int or float, optional Logarithm base to use in the calculation. Default is ``e``. Returns ------- float Renyi entropy. See Also -------- hill inv_simpson shannon tsallis Notes ----- Renyi entropy was originally defined in [1]_. It is a generalization of multiple entropy notions, as determined by the order (:math:`q`). Special cases of Renyi entropy include: - :math:`q=0`: Max-entropy (:math:`\log{S}`). - :math:`q \to 1`: Shannon entropy (index). - :math:`q=2`: Collision entropy, a.k.a, Renyi's quadratic entropy, or "Renyi entropy". Equivalent to the logarithm of inverse Simpson index. - :math:`q \to \infty`: Min-entropy (:math:`-\log{\max{p}}`). Renyi entropy is equivalent to the logarithm of Hill number. References ---------- .. [1] Rényi, A. (1961, January). On measures of entropy and information. In Proceedings of the fourth Berkeley symposium on mathematical statistics and probability, volume 1: contributions to the theory of statistics (Vol. 4, pp. 547-562). University of California Press. """ if (S := counts.size) == 0: return np.nan elif S == 1: return 0.0 probs = counts / counts.sum() # max-entropy if order == 0: qH = np.log(S) # Shannon entropy elif order == 1: qH = _entropy(probs) # min-entropy elif np.isposinf(order): qH = -np.log(probs.max()) else: qH = np.log((probs**order).sum()) / (1 - order) if base is not None: qH /= np.log(base) return qH @_validate_alpha(empty=np.nan) def robbins(counts): r"""Calculate Robbins' estimator for probability of unobserved outcomes. Robbins' estimator is defined as: .. math:: \frac{F_1}{N} where :math:`F_1` is the number of singleton taxa and and :math:`N` is the total number of individuals in the sample. The result can be interpreted as the probability of discovering a new taxon at the :math:`N`-th individual given the current :math:`N - 1` individuals. Parameters ---------- counts : 1-D array_like, int Vector of counts. Returns ------- float Robbins' estimator. See Also -------- goods_coverage Notes ----- Robbins' estimator is defined in [1]_. References ---------- .. [1] Robbins, H. E. (1968). Estimating the total probability of the unobserved outcomes of an experiment. Ann. Math. Statist., 39(6), 256-257. """ return (counts == 1).sum() / counts.sum() def _entropy(probs): """Calculate entropy.""" return (-probs * np.log(probs)).sum() def _perplexity(probs): """Calculate perplexity.""" return (probs**-probs).prod() @_validate_alpha(empty=np.nan) def shannon(counts, base=None, exp=False): r"""Calculate Shannon's diversity index. Shannon's diversity index, :math:`H'`, a.k.a., Shannon index, or Shannon- Wiener index, is equivalent to entropy in information theory. It is defined as: .. math:: H' = -\sum_{i=1}^S\left(p_i\log_b(p_i)\right) where :math:`S` is the number of taxa and :math:`p_i` is the proportion of the sample represented by taxon :math:`i`. The logarithm base :math:`b` defaults to ``e``, but may be 2, 10 or other custom values. The exponential of Shannon index, :math:`exp(H')`, measures the effective number of species (a.k.a., true diversity). It is equivalent to perplexity in information theory, or Hill number with order 1 (:math:`^1D`). The value is independent from the base: .. math:: exp(H') = b ^ {-\sum_{i=1}^S\left(p_i\log_b(p_i)\right)} = \prod_{i=1} ^{S}p_i^{-p_i} Parameters ---------- counts : 1-D array_like, int Vector of counts. base : int or float, optional Logarithm base to use in the calculation. Default is ``e``. .. versionchanged:: 0.6.1 The default logarithm base was changed from 2 to :math:`e` for consistency with the majority of literature. exp : bool, optional If ``True``, return the exponential of Shannon index. Returns ------- float Shannon's diversity index. Notes ----- Shannon index (i.e., entropy) was originally proposed in [1]_. The exponential of Shannon index (i.e., perplexity) was discussed in [2]_ in the context of community diversity. References ---------- .. [1] Shannon, C. E. (1948). A mathematical theory of communication. The Bell system technical journal, 27(3), 379-423. .. [2] Jost, L. (2006). Entropy and diversity. Oikos, 113(2), 363-375. """ probs = counts / counts.sum() # perplexity if exp is True: return _perplexity(probs) # entropy else: H = _entropy(probs) if base is not None: H /= np.log(base) return H def simpson(counts, finite=False): r"""Calculate Simpson's diversity index. Simpson's diversity index, a.k.a., Gini-Simpson index, or Gini impurity, is defined as: .. math:: 1 - \sum_{i=1}^S{p_i^2} where :math:`S` is the number of taxa and :math:`p_i` is the proportion of the sample represented by taxon :math:`i`. Therefore, Simpson's diversity index is also denoted as :math:`1 - D`, in which :math:`D` is the Simpson's dominance index. Simpson's diversity index can be interpreted as the probability that two randomly selected individuals belong to different taxa. It is also known as Hurlbert's probability of interspecific encounter (PIE). Parameters ---------- counts : 1-D array_like, int Vector of counts. finite : bool, optional If ``True``, correct for finite sampling when calculating :math:`D`. Returns ------- float Simpson's diversity index. See Also -------- dominance Notes ----- Simpson's diversity index was originally described in [1]_. Hurlbert's probability of interspecific encounter was described in [2]_. References ---------- .. [1] Simpson, E. H. (1949). Measurement of diversity. Nature, 163(4148), 688-688. .. [2] Hurlbert, S. H. (1971). The nonconcept of species diversity: a critique and alternative parameters. Ecology, 52(4), 577-586. """ return 1 - dominance(counts, finite=finite) def simpson_d(counts, finite=False): """Calculate Simpson's dominance index, a.k.a. Simpson's D. Parameters ---------- counts : 1-D array_like, int Vector of counts. finite : bool, optional If ``True``, correct for finite sampling. Returns ------- int Simpson's dominance index. See Also -------- dominance simpson simpson_e Notes ----- ``simpson_d`` is an alias for ``dominance``. """ return dominance(counts, finite=finite) @_validate_alpha(empty=np.nan) def simpson_e(counts): r"""Calculate Simpson's evenness index. Simpson's evenness (a.k.a., equitability) index :math:`E_D` is defined as: .. math:: E_D = \frac{1}{D \times S} where :math:`D` is the Simpson's dominance index and :math:`S` is the number of taxa in the sample. That is, :math:`E_D` is the ratio of the minimum-possible Simpson's dominance index when all taxa have the same number of individuals: :math:`D_{min} = 1 / S`, versus the actual Simpson's dominance index of the sample. Parameters ---------- counts : 1-D array_like, int Vector of counts. Returns ------- float Simpson's evenness index. See Also -------- dominance simpson Notes ----- The implementation here is based on the description given in [1]_ and [2]_. References ---------- .. [1] Simpson, E. H. (1949). Measurement of diversity. nature, 163(4148), 688-688. .. [2] Pielou, E. C. (1966). The measurement of diversity in different types of biological collections. Journal of theoretical biology, 13, 131-144. """ # Note: the finite version of simpson_e might be: 1 / (D(S + 1)), because # S + 1 is the maximum possible finite D given S. Otherwise, the result can # be greater than 1 for small samples. However, I didn't find literature # stating this. Therefore, the `finite` parameter is not used here. return 1 / (counts.size * dominance(counts)) @_validate_alpha() def singles(counts): """Calculate number of single-occurrence taxa (singletons). Parameters ---------- counts : 1-D array_like, int Vector of counts. Returns ------- int Singleton count. """ return (counts == 1).sum() @_validate_alpha() def sobs(counts): """Calculate the observed species richness of a sample. Observed species richness, usually denoted as :math:`S_{obs}` or simply :math:`S`, is the number of distinct species (i.e., taxa), or any discrete groups of biological entities found in a sample. It should be noted that observed species richness is smaller than or equal to the true species richness of a population from which the sample is collected. Parameters ---------- counts : 1-D array_like, int Vector of counts. Returns ------- int Observed species richness. See Also -------- observed_features """ return counts.size @_validate_alpha(empty=np.nan) def strong(counts): r"""Calculate Strong's dominance index. Strong's dominance index (:math:`D_w`) is defined as .. math:: D_w = max_i[(\frac{b_i}{N})-\frac{i}{S}] where :math:`b_i` is the sequential cumulative totaling of the :math:`i^{\text{th}}` taxon abundance values ranked from largest to smallest, :math:`N` is the total number of individuals in the sample, and :math:`S` is the number of taxa in the sample. The expression in brackets is computed for all taxa, and :math:`max_i` denotes the maximum value in brackets for any taxa. Parameters ---------- counts : 1-D array_like, int Vector of counts. Returns ------- float Strong's dominance index. Notes ----- Strong's dominance index is defined in [1]_. References ---------- .. [1] Strong, W. L., 2002 Assessing species abundance unevenness within and between plant communities. Community Ecology, 3, 237-246. """ S = counts.size sorted_sum = np.sort(counts)[::-1].cumsum() i = np.arange(1, S + 1) return (sorted_sum / counts.sum() - (i / S)).max() @_validate_alpha() def tsallis(counts, order=2): r"""Calculate Tsallis entropy. Tsallis entropy (:math:`^qH`), a.k.a. HCDT entropy, is a generalization of Boltzmann-Gibbs entropy with an exponent (order) :math:`q`. It is defined as: .. math:: ^qH = \frac{1}{q - 1}(1 - \sum_{i=1}^S p_i^q) where :math:`S` is the number of taxa and :math:`p_i` is the proportion of the sample represented by taxon :math:`i`. Parameters ---------- counts : 1-D array_like, int Vector of counts. order : int or float, optional Order (:math:`q`). Ranges between 0 and infinity. Default is 2. Returns ------- float Tsallis entropy. See Also -------- renyi shannon simpson sobs Notes ----- Tsallis entropy was originally defined in [1]_. Special cases of Tsallis entropy given order :math:`q` include: - :math:`q=0`: Observed species richness (:math:`S_{obs}`) minus 1. - :math:`q \to 1`: Shannon index :math:`H'`. - :math:`q=2`: Simpson diversity index (:math:`1 - D`). - :math:`q \to \infty`: 0. References ---------- .. [1] Tsallis, C. (1988). Possible generalization of Boltzmann-Gibbs statistics. Journal of statistical physics, 52, 479-487. """ if (S := counts.size) == 0: return np.nan elif S == 1: return 0.0 probs = counts / counts.sum() if order == 1: return _entropy(probs) elif np.isposinf(order): return 0.0 else: return (1 - (probs**order).sum()) / (order - 1) scikit-bio-0.6.2/skbio/diversity/alpha/_chao1.py000066400000000000000000000143421464262511300215060ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np from ._base import osd from skbio.diversity._util import _validate_counts_vector def chao1(counts, bias_corrected=True): r"""Calculate Chao1 richness estimator. Uses the bias-corrected version unless ``bias_corrected`` is ``False`` *and* there are both singletons and doubletons. Parameters ---------- counts : 1-D array_like, int Vector of counts. bias_corrected : bool, optional Indicates whether or not to use the bias-corrected version of the equation. If ``False`` *and* there are both singletons and doubletons, the uncorrected version will be used. The biased-corrected version will be used otherwise. Returns ------- double Computed Chao1 richness estimator. See Also -------- chao1_ci Notes ----- The uncorrected version is based on Equation 6 in [1]_: .. math:: chao1=S_{obs}+\frac{F_1^2}{2F_2} where :math:`F_1` and :math:`F_2` are the count of singletons and doubletons, respectively. The bias-corrected version is defined as: .. math:: chao1=S_{obs}+\frac{F_1(F_1-1)}{2(F_2+1)} References ---------- .. [1] Chao, A. 1984. Non-parametric estimation of the number of classes in a population. Scandinavian Journal of Statistics 11, 265-270. """ counts = _validate_counts_vector(counts) o, s, d = osd(counts) if not bias_corrected and s and d: return o + s**2 / (d * 2) else: return o + s * (s - 1) / (2 * (d + 1)) def chao1_ci(counts, bias_corrected=True, zscore=1.96): """Calculate Chao1 confidence interval. Parameters ---------- counts : 1-D array_like, int Vector of counts. bias_corrected : bool, optional Indicates whether or not to use the bias-corrected version of the equation. If ``False`` *and* there are both singletons and doubletons, the uncorrected version will be used. The biased-corrected version will be used otherwise. zscore : scalar, optional Score to use for confidence. Default of 1.96 is for a 95% confidence interval. Returns ------- tuple Chao1 confidence interval as ``(lower_bound, upper_bound)``. See Also -------- chao1 Notes ----- The implementation here is based on the equations in the EstimateS manual [1]_. Different equations are employed to calculate the Chao1 variance and confidence interval depending on `bias_corrected` and the presence/absence of singletons and/or doubletons. Specifically, the following EstimateS equations are used: 1. No singletons, Equation 14. 2. Singletons but no doubletons, Equations 7, 13. 3. Singletons and doubletons, ``bias_corrected=True``, Equations 6, 13. 4. Singletons and doubletons, ``bias_corrected=False``, Equations 5, 13. References ---------- .. [1] http://viceroy.eeb.uconn.edu/estimates/ """ counts = _validate_counts_vector(counts) o, s, d = osd(counts) if s: chao = chao1(counts, bias_corrected) chaovar = _chao1_var(counts, bias_corrected) return _chao_confidence_with_singletons(chao, o, chaovar, zscore) else: n = counts.sum() return _chao_confidence_no_singletons(n, o, zscore) def _chao1_var(counts, bias_corrected=True): """Calculate Chao1 variance using decision rules in EstimateS.""" o, s, d = osd(counts) if not d: c = chao1(counts, bias_corrected) return _chao1_var_no_doubletons(s, c) if not s: n = counts.sum() return _chao1_var_no_singletons(n, o) if bias_corrected: return _chao1_var_bias_corrected(s, d) else: return _chao1_var_uncorrected(s, d) def _chao1_var_uncorrected(singles, doubles): """Calculate Chao1 variance without correction. From EstimateS manual, equation 5. """ r = singles / doubles return doubles * (0.5 * r**2 + r**3 + 0.24 * r**4) def _chao1_var_bias_corrected(s, d): """Calculate Chao1 variance with bias correction. `s` is the number of singletons and `d` is the number of doubletons. From EstimateS manual, equation 6. """ return ( s * (s - 1) / (2 * (d + 1)) + (s * (2 * s - 1) ** 2) / (4 * (d + 1) ** 2) + (s**2 * d * (s - 1) ** 2) / (4 * (d + 1) ** 4) ) def _chao1_var_no_doubletons(s, chao1): """Calculate Chao1 variance in absence of doubletons. From EstimateS manual, equation 7. `s` is the number of singletons, and `chao1` is the estimate of the mean of Chao1 from the same dataset. """ return s * (s - 1) / 2 + s * (2 * s - 1) ** 2 / 4 - s**4 / (4 * chao1) def _chao1_var_no_singletons(n, o): """Calculate Chao1 variance in absence of singletons. `n` is the number of individuals and `o` is the number of observed species. From EstimateS manual, equation 8. """ return o * np.exp(-n / o) * (1 - np.exp(-n / o)) def _chao_confidence_with_singletons(chao, observed, var_chao, zscore=1.96): """Calculate confidence bounds for Chao1 or Chao2. Uses Eq. 13 of EstimateS manual. `zscore` is the score to use for confidence. The default of 1.96 is for 95% confidence. """ T = chao - observed # if no diff between chao and observed, CI is just point estimate of # observed if T == 0: return observed, observed K = np.exp(abs(zscore) * np.sqrt(np.log(1 + (var_chao / T**2)))) return observed + T / K, observed + T * K def _chao_confidence_no_singletons(n, s, zscore=1.96): """Calculate confidence bounds for Chao1/Chao2 in absence of singletons. Uses Eq. 14 of EstimateS manual. `n` is the number of individuals and `s` is the number of observed taxa. """ P = np.exp(-n / s) return ( max(s, s / (1 - P) - zscore * np.sqrt((s * P / (1 - P)))), s / (1 - P) + zscore * np.sqrt(s * P / (1 - P)), ) scikit-bio-0.6.2/skbio/diversity/alpha/_gini.py000066400000000000000000000070361464262511300214430ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np from skbio.diversity._util import _validate_counts_vector def gini_index(data, method="rectangles"): r"""Calculate the Gini index. The Gini index is defined as: .. math:: G=\frac{A}{A+B} where :math:`A` is the area between :math:`y=x` and the Lorenz curve and :math:`B` is the area under the Lorenz curve. Simplifies to :math:`1-2B` since :math:`A+B=0.5`. Parameters ---------- data : 1-D array_like Vector of counts, abundances, proportions, etc. All entries must be non-negative. method : {'rectangles', 'trapezoids'} Method for calculating the area under the Lorenz curve. If ``'rectangles'``, connects the Lorenz curve points by lines parallel to the x axis. This is the correct method (in our opinion) though ``'trapezoids'`` might be desirable in some circumstances. If ``'trapezoids'``, connects the Lorenz curve points by linear segments between them. Basically assumes that the given sampling is accurate and that more features of given data would fall on linear gradients between the values of this data. Returns ------- float Gini index. Raises ------ ValueError If ``method`` isn't one of the supported methods for calculating the area under the curve. Notes ----- The Gini index was introduced in [1]_. The formula for ``method='rectangles'`` is: .. math:: dx\sum_{i=1}^n h_i The formula for ``method='trapezoids'`` is: .. math:: dx(\frac{h_0+h_n}{2}+\sum_{i=1}^{n-1} h_i) References ---------- .. [1] Gini, C. (1912). "Variability and Mutability", C. Cuppini, Bologna, 156 pages. Reprinted in Memorie di metodologica statistica (Ed. Pizetti E, Salvemini, T). Rome: Libreria Eredi Virgilio Veschi (1955). """ # Suppress casting to int because this metric supports ints and floats. data = _validate_counts_vector(data, cast_int=False) lorenz_points = _lorenz_curve(data) B = _lorenz_curve_integrator(lorenz_points, method) return max(0.0, 1 - 2 * B) def _lorenz_curve(data): """Calculate the Lorenz curve for input data. Notes ----- Formula available on wikipedia. """ sorted_data = np.sort(data) Sn = sorted_data.sum() n = sorted_data.shape[0] return np.arange(1, n + 1) / n, sorted_data.cumsum() / Sn def _lorenz_curve_integrator(lc_pts, method): """Calculate the area under a Lorenz curve. Notes ----- Could be utilized for integrating other simple, non-pathological "functions" where width of the trapezoids is constant. """ x, y = lc_pts # each point differs by 1/n dx = 1 / x.shape[0] if method == "trapezoids": # 0 percent of the population has zero percent of the goods h_0 = 0.0 h_n = y[-1] # the 0th entry is at x=1/n sum_hs = y[:-1].sum() return dx * ((h_0 + h_n) / 2 + sum_hs) elif method == "rectangles": return dx * y.sum() else: raise ValueError( "Method '%s' not implemented. Available methods: " "'rectangles', 'trapezoids'." % method ) scikit-bio-0.6.2/skbio/diversity/alpha/_lladser.py000066400000000000000000000415451464262511300221460ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np from skbio.diversity._util import _validate_counts_vector def lladser_pe(counts, r=10): """Calculate single point estimate of conditional uncovered probability. Parameters ---------- counts : 1-D array_like, int Vector of counts. r : int, optional Number of new colors that are required for the next prediction. Returns ------- double Single point estimate of the conditional uncovered probability. May be ``np.nan`` if a point estimate could not be computed. See Also -------- lladser_ci Notes ----- This function is just a wrapper around the full point estimator described in Theorem 2 (i) in [1]_, intended to be called for a single best estimate on a complete sample. This function is not guaranteed to return estimated uncovered probabilities less than 1 if the coverage is too low. References ---------- .. [1] Lladser, Gouet, and Reeder, "Extrapolation of Urn Models via Poissonization: Accurate Measurements of the Microbial Unknown" PLoS 2011. """ counts = _validate_counts_vector(counts) sample = _expand_counts(counts) np.random.shuffle(sample) try: pe = list(_lladser_point_estimates(sample, r))[-1][0] except IndexError: pe = np.nan return pe def lladser_ci(counts, r, alpha=0.95, f=10, ci_type="ULCL"): """Calculate single CI of the conditional uncovered probability. Parameters ---------- counts : 1-D array_like, int Vector of counts. r : int Number of new colors that are required for the next prediction. alpha : float, optional Desired confidence level. f : float, optional Ratio between upper and lower bound. ci_type : {'ULCL', 'ULCU', 'U', 'L'} Type of confidence interval. If ``'ULCL'``, upper and lower bounds with conservative lower bound. If ``'ULCU'``, upper and lower bounds with conservative upper bound. If ``'U'``, upper bound only, lower bound fixed to 0.0. If ``'L'``, lower bound only, upper bound fixed to 1.0. Returns ------- tuple Confidence interval as ``(lower_bound, upper_bound)``. See Also -------- lladser_pe Notes ----- This function is just a wrapper around the full CI estimator described in Theorem 2 (iii) in [1]_, intended to be called for a single best CI estimate on a complete sample. References ---------- .. [1] Lladser, Gouet, and Reeder, "Extrapolation of Urn Models via Poissonization: Accurate Measurements of the Microbial Unknown" PLoS 2011. """ counts = _validate_counts_vector(counts) sample = _expand_counts(counts) np.random.shuffle(sample) try: ci = list(_lladser_ci_series(sample, r, alpha, f, ci_type))[-1] except IndexError: ci = (np.nan, np.nan) return ci def _expand_counts(counts): """Convert vector of counts at each index to vector of indices.""" # From http://stackoverflow.com/a/22671394 return np.repeat(np.arange(counts.size), counts) def _lladser_point_estimates(sample, r=10): """Series of point estimates of the conditional uncovered probability. Parameters ---------- sample : 1-D array_like, int Series of random observations. r : int, optional Number of new colors that are required for the next prediction. Returns ------- generator Each new color yields a tuple of three elements: the point estimate, position in sample of prediction, and random variable from Poisson process (mostly to make testing easier). Raises ------ ValueError If `r` is less than or equal to 2. Notes ----- This is the point estimator described in Theorem 2 (i) in [1]_. References ---------- .. [1] Lladser, Gouet, and Reeder, "Extrapolation of Urn Models via Poissonization: Accurate Measurements of the Microbial Unknown" PLoS 2011. """ if r <= 2: raise ValueError("r must be greater than or equal to 3.") for count, seen, cost, i in _get_interval_for_r_new_taxa(sample, r): t = np.random.gamma(count, 1) point_est = (r - 1) / t yield point_est, i, t def _get_interval_for_r_new_taxa(seq, r): """Compute interval between r new taxa for seq of samples. Imagine an urn with colored balls. Given a drawing of balls from the urn, compute how many balls need to be looked at to discover r new colors. Colors can be repeated. Parameters ---------- seq : sequence Series of observations (the actual sample, not the frequencies). r : int Number of new colors that need to be observed for a new interval. Returns ------- generator For each new color seen for the first time, yields a tuple of four elements: the length of interval (i.e. number of observations looked at), the set of seen colors, position in seq after seeing last new color (end of interval), and position in seq where interval is started. """ seen = set() seq_len = len(seq) # note: first iteration is after looking at first char for i, curr in enumerate(seq): # bail out if there's nothing new if curr in seen: continue else: seen.add(curr) # otherwise, need to see distance to get k colors unseen = 0 j = i + 1 while unseen < r and j < seq_len: if seq[j] not in seen: unseen += 1 # note: increments after termination condition j += 1 # the interval to see r new colors count = j - i - 1 # the position in seq after seeing r new ones cost = j # bail out if not enough unseen if not count or (unseen < r): return # make a copy of seen before yielding, as we'll continue to add to the # set in subsequent iterations yield count, set(seen), cost, i def _lladser_ci_series(seq, r, alpha=0.95, f=10, ci_type="ULCL"): """Construct r-color confidence intervals for uncovered conditional prob. Parameters ---------- seq : sequence Sequence of colors (the actual sample, not the counts). r : int Number of new colors that are required for the next prediction. alpha : float, optional Desired confidence level. f : float, optional Ratio between upper and lower bound. ci_type : {'ULCL', 'ULCU', 'U', 'L'} Type of confidence interval. If ``'ULCL'``, upper and lower bounds with conservative lower bound. If ``'ULCU'``, upper and lower bounds with conservative upper bound. If ``'U'``, upper bound only, lower bound fixed to 0.0. If ``'L'``, lower bound only, upper bound fixed to 1.0. Returns ------- generator Yields one CI prediction for each new color that is detected and where. """ for count, seen, cost, i in _get_interval_for_r_new_taxa(seq, r): t = np.random.gamma(count, 1) yield _lladser_ci_from_r(r, t, alpha, f, ci_type) def _lladser_ci_from_r(r, t, alpha=0.95, f=10, ci_type="ULCL"): """Construct r-color confidence interval for uncovered conditional prob. Returns ------- tuple Confidence interval that contains the true conditional uncovered probability with a probability of 100% * `alpha`. Raises ------ ValueError For combinations of `r`, `f`, and `alpha` that do not have precomputed results. """ alpha = round(alpha, 2) if ci_type == "U": if alpha != 0.95: raise ValueError("alpha must be 0.95 if ci_type is 'U'.") if r not in _UPPER_CONFIDENCE_BOUND: raise ValueError("r must be between 1-25 or 50 if ci_type is 'U'.") return 0.0, _UPPER_CONFIDENCE_BOUND[r] / t elif ci_type == "L": if alpha != 0.95: raise ValueError("alpha must be 0.95 if ci_type is 'L'.") if r not in _LOWER_CONFIDENCE_BOUND: raise ValueError("r must be between 1-25 if ci_type is 'L'.") return _LOWER_CONFIDENCE_BOUND[r] / t, 1.0 bound_params = _ul_confidence_bounds(f, r, alpha) if ci_type == "ULCL": bound_param = bound_params[0] elif ci_type == "ULCU": bound_param = bound_params[1] else: raise ValueError("Unknown ci_type '%s'." % ci_type) upper_bound = bound_param * f / t lower_bound = bound_param / t # make sure upper bound is at most 1 if upper_bound > 1: upper_bound = 1.0 return lower_bound, upper_bound def _ul_confidence_bounds(f, r, alpha): """Return confidence bounds based on ratio and alpha. This function is just a lookup of some precomputed values. Parameters ---------- f : float Desired ratio of upper to lower bound. r : int Number of new colors. alpha : float Confidence interval (for 95% confidence use 0.95). Returns ------- tuple Constants ``(c_1, c_2)`` such that the confidence interval is ``[c_1/T_r, c_1*f/T_r]`` for conservative lower bound intervals and ``[c_2/T_r, c_2*f/T_r]`` for conservative upper bound intervals. """ a = None b = None if (f, r, alpha) in _PRECOMPUTED_TABLE: return _PRECOMPUTED_TABLE[(f, r, alpha)] # all others combination are only computed for f=10 # and alpha = 0.90, 0.95 and 0.99 if f == 10 and r <= 50: if alpha in _CBS and r < len(_CBS[alpha]): a, b = _CBS[alpha][r] if a is None or b is None: raise ValueError( "No constants are precomputed for the combination of " "f=%f, r=%d, and alpha=%.2f" % (f, r, alpha) ) return a, b # Maps r to a constant c such that the 95% confidence interval with lower bound # fixed at 0 is [0, c/T_r]. This constant is constant b according to # Theorem 2 (iii) in the paper with a=0, aka c_0 from Table 3. _UPPER_CONFIDENCE_BOUND = { 1: 2.995732274, 2: 4.743864518, 3: 6.295793622, 4: 7.753656528, 5: 9.153519027, 6: 10.51303491, 7: 11.84239565, 8: 13.14811380, 9: 14.43464972, 10: 15.70521642, 11: 16.96221924, 12: 18.20751425, 13: 19.44256933, 14: 20.66856908, 15: 21.88648591, 16: 23.09712976, 17: 24.30118368, 18: 25.49923008, 19: 26.69177031, 20: 27.87923964, 21: 29.06201884, 22: 30.24044329, 23: 31.41481021, 24: 32.58538445, 25: 33.75240327, 50: 62.17105670, } # Maps r to a constant c such that the 95% confidence interval with upper bound # fixed at 1 is [c/T_r, 1]. This constant is constant b according to # Theorem 2 (iii) in the paper with b=1, aka c_3 from Table 3. _LOWER_CONFIDENCE_BOUND = { 1: 0.051293294, 2: 0.355361510, 3: 0.817691447, 4: 1.366318397, 5: 1.970149568, 6: 2.613014744, 7: 3.285315692, 8: 3.980822786, 9: 4.695227540, 10: 5.425405697, 11: 6.169007289, 12: 6.924212514, 13: 7.689578292, 14: 8.463937522, 15: 9.246330491, 16: 10.03595673, 17: 10.83214036, 18: 11.63430451, 19: 12.44195219, 20: 13.25465160, 21: 14.07202475, 22: 14.89373854, 23: 15.71949763, 24: 16.54903871, 25: 17.38212584, } # Hack in some special values we used for the paper. # Since Manuel needs to compute those semi-automatically # using Maple, we pre-calculate only a few common ones # precomputed table is {(f, r, alpha):(c_1, c_2)} _PRECOMPUTED_TABLE = { (2, 50, 0.95): (31.13026306, 38.94718565), (2, 33, 0.95): (22.3203508, 23.4487304), (1.5, 100, 0.95): (79.0424349, 83.22790086), (1.5, 94, 0.95): (75.9077267, 76.5492088), (2.5, 19, 0.95): (11.26109001, 11.96814857), # In the next block for each f, we report the smallest possible value # of r from table 4 in the paper (80, 2, 0.95): (0.0598276655, 0.355361510), (48, 2, 0.95): (0.1013728884, 0.355358676), (40, 2, 0.95): (0.1231379857, 0.355320458), (24, 2, 0.95): (0.226833483, 0.346045204), (20, 3, 0.95): (0.320984257, 0.817610455), (12, 3, 0.95): (0.590243030, 0.787721610), (10, 4, 0.95): (0.806026244, 1.360288674), (6, 6, 0.95): (1.8207383, 2.58658608), (5, 7, 0.95): (2.48303930, 3.22806682), (3, 14, 0.95): (7.17185045, 8.27008349), (1.25, 309, 0.95): (275.661191, 275.949782), } # Below are the values used for Theorem 3 iii # Values hand computed by Manuel Lladser using Maple. For each alpha (0.90, # 0.95, and 0.99), there is a list mapping r to (c_1, c_2), where r is used as # an index into the list. _CB_90 = [ (None, None), # 0, makes indexing easier (None, None), # no feasible solution (None, None), # no feasible solution (0.5635941995, 1.095834700), (0.6764656264, 1.744588615), (0.8018565594, 2.432587343), (0.9282215025, 3.151897973), (1.053433716, 3.894766804), (1.177158858, 4.656118177), (1.299491033, 5.432468058), (1.420604842, 6.221304605), # 10 (1.540665805, 7.020746595), (1.659812701, 7.829342026), (1.778158703, 8.645942495), (1.895796167, 9.469621185), (2.012801198, 10.29961731), (2.129237257, 11.13529724), (2.245157877, 11.97612664), (2.360608695, 12.82164994), (2.475628991, 13.67147502), (2.590252861, 14.52526147), # 20 (2.704510123, 15.38271151), (2.818427036, 16.24356290), (2.932026869, 17.10758326), (3.045330351, 17.97456551), (3.158356050, 18.84432420), (None, None), # not computed (None, None), (None, None), (None, None), (3.719850286, 23.22944415), # 30 (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (4.828910181, 32.13892224), # 40 (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (5.924900191, 41.17906791), # 50 ] _CB_95 = [ (None, None), # 0 (None, None), (None, None), (None, None), (0.8060262438, 1.360288674), # 4 (0.9240311584, 1.969902537), (1.053998892, 2.613007253), (1.185086998, 3.285315518), (1.315076337, 3.980822783), (4.695227540, 4.695227541), (1.570546801, 5.425405698), # 10 (1.696229569, 6.169007289), (1.820753729, 6.924212513), (1.944257622, 7.689578291), (2.066857113, 8.463937522), (2.188648652, 9.246330491), (2.309712994, 10.03595673), (2.430118373, 10.83214036), (2.549923010, 11.63430451), (2.669177032, 12.44195219), (2.787923964, 13.25465160), # 20 (2.906201884, 14.07202475), (3.024044329, 14.89373854), (3.141481020, 15.71949763), (3.258538445, 16.54903871), (3.375240327, 17.38212584), (None, None), (None, None), (None, None), (None, None), (3.954097220, 21.59397923), # 30 (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (5.093973695, 30.19573919), # 40 (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (6.217105673, 38.96473258), # 50 ] _CB_99 = [ (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (1.360316290, 1.768978323), (1.470856924, 2.329171347), (1.604478487, 2.906049304), (1.741759456, 3.507452949), (1.878809285, 4.130199076), # 10 (2.014632329, 4.771246173), (2.149044735, 5.428180734), (2.282101533, 6.099073460), (2.413917374, 6.782354878), (2.544610844, 7.476728267), (2.674289153, 8.181107778), (2.803045614, 8.894573463), (2.930960779, 9.616337916), (3.058104355, 10.34572103), (3.184536992, 11.08213063), # 20 (3.310311816, 11.82504734), (3.435475649, 12.57401269), (3.560070013, 13.32861956), (3.684131925, 14.08850448), (3.807694563, 14.85334135), (None, None), (None, None), (None, None), (None, None), (4.41897094, 18.7424258), # 30 (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (5.61643962, 26.7700386), # 40 (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (None, None), (6.79033616, 35.0324474), # 50 ] _CBS = {0.90: _CB_90, 0.95: _CB_95, 0.99: _CB_99} scikit-bio-0.6.2/skbio/diversity/alpha/_pd.py000066400000000000000000000420501464262511300211130ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np from skbio.diversity._util import ( _validate_counts_vector, _validate_taxa_and_tree, _vectorize_counts_and_tree, _check_taxa_alias, ) def _setup_pd(counts, taxa, tree, validate, rooted, single_sample): if validate: if single_sample: # only validate count if operating in single sample mode, they # will have already been validated otherwise counts = _validate_counts_vector(counts) _validate_taxa_and_tree(counts, taxa, tree, rooted) else: _validate_taxa_and_tree(counts[0], taxa, tree, rooted) counts_by_node, _, branch_lengths = _vectorize_counts_and_tree(counts, taxa, tree) return counts_by_node, branch_lengths def _faith_pd(counts_by_node, branch_lengths): """Calculate Faith's phylogenetic diversity (Faith's PD) metric. Parameters ---------- counts_by_node : ndarray of shape (n_samples, n_nodes) Total counts/abundances of taxa descending from individual nodes of the tree. branch_lengths : ndarray of shape (n_nodes,) Branch lengths of corresponding nodes of the tree. Returns ------- float Faith's phylogenetic diversity (PD). """ return (branch_lengths * (counts_by_node > 0)).sum() def faith_pd(counts, taxa=None, tree=None, validate=True, otu_ids=None): r"""Calculate Faith's phylogenetic diversity (Faith's PD) metric. The Faith's PD metric is defined as: .. math:: PD = \sum_{b \in T \sqcup R} l(b) where :math:`T` is a minimum set of branches (:math:`b`) in a rooted tree that connect all taxa in a community. :math:`R` is a set of branches from the lowest common ancestor (LCA) of the taxa to the root of the tree. :math:`PD` is the sum of lengths (:math:`l`) of branches in both sets. Parameters ---------- counts : 1-D array_like, int Vectors of counts/abundances of taxa for one sample. taxa : list, np.array Vector of taxon IDs corresponding to tip names in ``tree``. Must be the same length as ``counts``. Required. tree : skbio.TreeNode Tree relating taxa. The set of tip names in the tree can be a superset of ``taxa``, but not a subset. Required. validate : bool, optional If ``False``, validation of the input won't be performed. This step can be slow, so if validation is run elsewhere it can be disabled here. However, invalid input data can lead to invalid results or error messages that are hard to interpret, so this step should not be bypassed if you're not certain that your input data are valid. See :mod:`skbio.diversity` for the description of what validation entails so you can determine if you can safely disable validation. otu_ids : list, np.array Alias of ``taxa`` for backward compatibility. Deprecated and to be removed in a future release. Returns ------- float Faith's phylogenetic diversity (PD). Raises ------ ValueError, MissingNodeError, DuplicateNodeError If validation fails. Exact error will depend on what was invalid. See Also -------- phydiv skbio.diversity skbio.diversity.alpha_diversity Notes ----- Faith's phylogenetic diversity, often referred to as PD, was originally described in [1]_. It is the total phylogenetic branch length spanning all taxa of a community. It was clarified that the calculation should extend to the root of the tree [2]_, such that a single-taxon community will not have PD = 0. The root should be ancestral to all taxa being considered in the study, but does not need to be the origin of life. One should choose the root according to the scope of the study. Unrooted and abundance-weighted variants of PD are implemented in ``phydiv``. Several other metrics, such as evolutionary history (EH) [3]_ and functional diversity (FD) [4]_, are equivalent to PD in calculation. If computing Faith's PD for multiple samples, using ``skbio.diversity.alpha_diversity`` will be much faster than calling this function individually on each sample. This implementation of Faith's PD is based on the array-based implementation of UniFrac described in [5]_. This implementation differs from that in PyCogent (and therefore QIIME versions less than 2.0.0) by imposing a few additional restrictions on the inputs. First, the input tree must be rooted. In PyCogent, if an unrooted tree was provided that had a single trifurcating node (a newick convention for unrooted trees) that node was considered the root of the tree. Next, all taxa must be tips in the tree. PyCogent would silently ignore taxa that were not present the tree. To reproduce Faith PD results from PyCogent with scikit-bio, ensure that your PyCogent Faith PD calculations are performed on a rooted tree and that all taxa are present in the tree. References ---------- .. [1] Faith, D. P. Conservation evaluation and phylogenetic diversity. Biol. Conserv. (1992). .. [2] Faith, D. P., & Baker, A. M. (2006). Phylogenetic diversity (PD) and biodiversity conservation: some bioinformatics challenges. Evolutionary bioinformatics, 2, 117693430600200007. .. [3] Nee, S., & May, R. M. (1997). Extinction and the loss of evolutionary history. Science, 278(5338), 692-694. .. [4] Petchey OL, Gaston KJ. Functional diversity (FD), species richness and community composition. Ecology letters. 2002 May;5(3):402-11. .. [5] Hamady M, Lozupone C, Knight R. Fast UniFrac: facilitating high- throughput phylogenetic analyses of microbial communities including analysis of pyrosequencing and PhyloChip data. ISME J. 4(1):17-27 (2010). Examples -------- Assume we have the following abundance data for a sample ``u``, represented as a counts vector. These counts represent the number of times specific taxa were observed in the sample. >>> u_counts = [1, 0, 0, 4, 1, 2, 3, 0] Because Faith PD is a phylogenetic diversity metric, we need to know which taxon each count corresponds to, which we'll provide as ``taxa``. >>> taxa = ['U1', 'U2', 'U3', 'U4', 'U5', 'U6', 'U7', 'U8'] We also need a phylogenetic tree that relates the taxa to one another. >>> from io import StringIO >>> from skbio import TreeNode >>> tree = TreeNode.read(StringIO( ... '(((((U1:0.5,U2:0.5):0.5,U3:1.0):1.0):0.0,' ... '(U4:0.75,(U5:0.5,((U6:0.33,U7:0.62):0.5' ... ',U8:0.5):0.5):0.5):1.25):0.0)root;')) We can then compute the Faith PD of the sample. >>> from skbio.diversity.alpha import faith_pd >>> pd = faith_pd(u_counts, taxa, tree) >>> print(round(pd, 2)) 6.95 """ taxa = _check_taxa_alias(taxa, tree, otu_ids) counts_by_node, branch_lengths = _setup_pd( counts, taxa, tree, validate, rooted=True, single_sample=True ) return _faith_pd(counts_by_node, branch_lengths) def _phydiv(counts_by_node, branch_lengths, rooted, weight): """Calculate generalized phylogenetic diversity (PD) metrics. Parameters ---------- counts_by_node : ndarray of shape (n_samples, n_nodes) Total counts/abundances of taxa descending from individual nodes of the tree. branch_lengths : ndarray of shape (n_nodes,) Branch lengths of corresponding nodes of the tree. rooted : bool Whether the metric is calculated considering the root of the tree. weight : bool or float Whether and to what degree branch lengths should be weighted by the relative abundance of taxa descending from the branch. Returns ------- float Phylogenetic diversity (PD). """ # select branches connecting taxa included = counts_by_node > 0 # get total counts counts_sum = counts_by_node.max() if counts_sum == 0.0: return 0.0 # in unrooted mode, remove branches to root if rooted is False: included &= counts_by_node < counts_sum # in unweighted mode, simply sum branch lengths if not weight: return (branch_lengths * included).sum() # get relative abundances fracs_by_node = counts_by_node / counts_sum # calculate balances in unrooted mode if rooted is False: fracs_by_node = 2 * np.minimum(fracs_by_node, 1 - fracs_by_node) # raise relative abundances to the power of theta if isinstance(weight, float) and weight < 1.0: fracs_by_node **= weight return (branch_lengths * fracs_by_node).sum() def phydiv( counts, taxa=None, tree=None, rooted=None, weight=False, validate=True, otu_ids=None ): r"""Calculate generalized phylogenetic diversity (PD) metrics. Parameters ---------- counts : 1-D array_like, int Vectors of counts/abundances of taxa for one sample. taxa : list, np.array Vector of taxon IDs corresponding to tip names in ``tree``. Must be the same length as ``counts``. Required. tree : skbio.TreeNode Tree relating taxa. The set of tip names in the tree can be a superset of ``taxa``, but not a subset. Required. rooted : bool, optional Whether the metric is calculated considering the root of the tree. By default, this will be determined based on whether the input tree is rooted. However, one can override it by explicitly specifying ``True`` (rooted) or ``False`` (unrooted). weight : bool or float, optional Whether branch lengths should be weighted by the relative abundance of taxa descending from the branch (default: ``False``). A float within [0, 1] indicates the degree of partial-weighting (0: unweighted, 1: fully-weighted). validate: bool, optional Whether validate the input data. See ``faith_pd`` for details. otu_ids : list, np.array Alias of ``taxa`` for backward compatibility. Deprecated and to be removed in a future release. Returns ------- float Phylogenetic diversity (PD). Raises ------ ValueError, MissingNodeError, DuplicateNodeError If validation fails. Exact error will depend on what was invalid. See Also -------- faith_pd skbio.diversity skbio.diversity.alpha_diversity Notes ----- Phylogenetic diversity (PD) metrics measure the diversity of a community with consideration of the phylogenetic relationships among taxa. In general, PD is the sum of branch lengths spanning across taxa, optionally weighted by their abundance. The most widely-adopted PD metric, Faith's PD [1]_, is defined as: .. math:: PD = \sum_{b \in T \sqcup R} l(b) where :math:`T` is a minimum set of branches (:math:`b`) in a rooted tree that connect all taxa in a community. :math:`R` is a set of branches from the lowest common ancestor (LCA) of the taxa to the root of the tree. :math:`PD` is the sum of lengths (:math:`l`) of branches in both sets. It is equivalent to ``pd(..., rooted=True, weight=False)``. A variant of PD, which does not include the root in the calculation, was referred to by some authors as unrooted phylogenetic diversity (uPD) [2]_, as in contrast to rooted phylogenetic diversity (rPD, i.e., Faith's PD). uPD is defined as: .. math:: PD = \sum_{b \in T} l(b) It is equivalent to ``pd(..., rooted=False, weight=False)``. See ``faith_pd`` for a discussion of the root. PD (rooted or unrooted) considers only the presence of taxa. Therefore, it can be considered as the phylogenetic generalization of species richness. However, there are advantages of incorporating abundance information in the measurement [3]_. A generalized framework of abundance-weighted PD is provided in [4]_. Abundance-weighted rooted PD (equivalent to :math:`RBWPD_{1}` described in [4]_) is analogous to the :math:`PD_{aw}` metric originally described in [5]_ with a multiplier. It is defined as: .. math:: PD = \sum_{b \in T \sqcup R} l(b) p(b) where :math:`p` is the sum of relative (proportional) abundances of taxa descending from branch (:math:`b`). It is equivalent to ``pd(..., rooted=True, weight=True)``. Abundance-weighted unrooted PD (equivalent to :math:`BWPD_{1}` described in [4]_) is analogous to the :math:`\delta nPD` metric originally described in [6]_ with a multiplier. It is defined as: .. math:: PD = 2 \sum_{b \in T} l(b) \min(p(b),1-p(b)) In which the term :math:`2\min(p(b),1-p(b))` is the lesser of the relative abundance of descending taxa on either side of a branch, multiplied by two. It is referred to as the "balance" of taxon abundance in [4]_. It is equivalent to ``pd(..., rooted=False, weight=True)``. The contribution of taxon abundance to the metric can be adjusted using the ``weighted`` parameter when it is a float within [0, 1]. This factor was referred to as :math:`\theta` in [4]_. The metric, :math:`BWPD_{\theta}`, referred to as the balance-weighted phylogenetic diversity in [4]_, is defined as: .. math:: PD = \sum_{b \in T} l(b) (2\min(p(b),1-p(b)))^\theta It is equivalent to ``pd(..., rooted=False, weight=theta)``. This metric falls back to unweighted PD when :math:`\theta=0` or fully- weighted PD when :math:`\theta=1`. The original publication tested :math:`\theta=0.25` or :math:`0.5` [4]_. The parameter :math:`\theta` is analogous to the parameter :math:`\alpha` in the generalized UniFrac metric [7]_. Likewise, the rooted version of balance-weighted phylogenetic diversity, :math:`RBWPD_{\theta}` [4]_ (although "balance" is not involved), is defined as: .. math:: PD = \sum_{b \in T \sqcup R} l(b) p(b)^\theta It is equivalent to ``pd(..., rooted=True, weight=theta)``. It is important to report which metric is used. For practical perspective, we recommend the following denotions: - :math:`rPD`: rooted, unweighted PD (Faith's PD [1]_). - :math:`uPD`: unrooted, unweighted PD (uPD [2]_). - :math:`rPD_{w}`: rooted, weighted PD (analogous to :math:`PD_{aw}` [3]_). - :math:`uPD_{w}`: unrooted, weighted PD (analogous to :math:`\delta nPD` [4]_). - :math:`rPD_{w\theta}`: rooted, weighted PD with parameter :math:`\theta` (:math:`RBWPD_{\theta}` [5]_). - :math:`uPD_{w\theta}`: unrooted, weighted PD with parameter :math:`\theta` (:math:`BWPD_{\theta}` [5]_). References ---------- .. [1] Faith, D. P. Conservation evaluation and phylogenetic diversity. Biol. Conserv. (1992). .. [2] Pardi, F., & Goldman, N. (2007). Resource-aware taxon selection for maximizing phylogenetic diversity. Systematic biology, 56(3), 431-444. .. [3] Chao, A., Chiu, C. H., & Jost, L. (2016). Phylogenetic diversity measures and their decomposition: a framework based on Hill numbers. Biodiversity Conservation and Phylogenetic Systematics, 14, 141-72. .. [4] McCoy, C. O., & Matsen IV, F. A. (2013). Abundance-weighted phylogenetic diversity measures distinguish microbial community states and are robust to sampling depth. PeerJ, 1, e157. .. [5] Vellend, M., Cornwell, W. K., Magnuson-Ford, K., & Mooers, A. Ø. (2011). Measuring phylogenetic biodiversity. Biological diversity: frontiers in measurement and assessment, 194-207. .. [6] Barker, G. M. (2002). Phylogenetic diversity: a quantitative framework for measurement of priority and achievement in biodiversity conservation. Biological Journal of the Linnean Society, 76(2), 165-194. .. [7] Chen, J., Bittinger, K., Charlson, E. S., Hoffmann, C., Lewis, J., Wu, G. D., ... & Li, H. (2012). Associating microbiome composition with environmental covariates using generalized UniFrac distances. Bioinformatics, 28(16), 2106-2113. """ taxa = _check_taxa_alias(taxa, tree, otu_ids) # whether tree is rooted should not affect whether metric can be calculated # ; it is common unrooted PD is calculated on a rooted tree counts_by_node, branch_lengths = _setup_pd( counts, taxa, tree, validate, rooted=False, single_sample=True ) # if not specified, determine whether metric should be calculated in rooted # mode according to the tree if rooted is None: rooted = len(tree.root().children) == 2 # validate weight parameter if ( not isinstance(weight, (bool, int, float)) or (w_ := float(weight)) < 0.0 or w_ > 1.0 ): raise ValueError("Weight parameter must be boolean or within [0, 1].") return _phydiv(counts_by_node, branch_lengths, rooted, weight) scikit-bio-0.6.2/skbio/diversity/alpha/tests/000077500000000000000000000000001464262511300211405ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/diversity/alpha/tests/__init__.py000066400000000000000000000005411464262511300232510ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- scikit-bio-0.6.2/skbio/diversity/alpha/tests/data/000077500000000000000000000000001464262511300220515ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/diversity/alpha/tests/data/qiime-191-tt/000077500000000000000000000000001464262511300241125ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/diversity/alpha/tests/data/qiime-191-tt/README.md000066400000000000000000000002411464262511300253660ustar00rootroot00000000000000Files in this directory are the QIIME 1.9.1 "tiny test" files. For detail on how these were created, see skbio/diversity/beta/tests/data/qiime-191-tt/README.md. scikit-bio-0.6.2/skbio/diversity/alpha/tests/data/qiime-191-tt/faith-pd.txt000066400000000000000000000001461464262511300263500ustar00rootroot00000000000000 PD_whole_tree f2 0.64607 f1 0.47803 f3 0.47803 f4 0.306525 p2 1.4239 p1 0.604145 t1 0.6574 t2 0.6574 scikit-bio-0.6.2/skbio/diversity/alpha/tests/data/qiime-191-tt/otu-table.tsv000066400000000000000000000004431464262511300265450ustar00rootroot00000000000000# Constructed from biom file #OTU ID f2 f1 f3 f4 p2 p1 t1 t2 295053 20 18 18 22 4 0 0 0 42684 0 0 0 0 1 0 0 0 None11 1 0 0 0 1 1 0 0 None7 0 0 0 0 1 0 0 0 None5 0 0 0 0 1 0 0 0 None4 0 0 0 0 1 1 0 0 None3 0 0 0 0 1 0 2 3 879972 0 0 0 0 9 20 1 4 None9 0 0 0 0 3 0 19 15 None8 1 4 4 0 0 0 0 0 scikit-bio-0.6.2/skbio/diversity/alpha/tests/data/qiime-191-tt/tree.nwk000066400000000000000000000004121464262511300255670ustar00rootroot00000000000000(((879972:0.05039,None3:0.00778)0.980:0.15948,((None11:0.07161,None4:0.06965)0.917:0.09643,(295053:0.06096,42684:0.15599)0.910:0.08898)0.899:0.09227)0.958:0.064315,(None8:0.09606,(None7:0.10435,(None5:0.02626,None9:0.00014)1.000:0.25335)0.753:0.0465):0.075445)root; scikit-bio-0.6.2/skbio/diversity/alpha/tests/test_ace.py000066400000000000000000000027301464262511300233030ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest import numpy as np import numpy.testing as npt from skbio.diversity.alpha import ace class AceTests(unittest.TestCase): def test_ace(self): npt.assert_almost_equal(ace(np.array([2, 0])), 1.0) npt.assert_almost_equal(ace(np.array([12, 0, 9])), 2.0) npt.assert_almost_equal(ace(np.array([12, 2, 8])), 3.0) npt.assert_almost_equal(ace(np.array([12, 2, 1])), 4.0) npt.assert_almost_equal(ace(np.array([12, 1, 2, 1])), 7.0) npt.assert_almost_equal(ace(np.array([12, 3, 2, 1])), 4.6) npt.assert_almost_equal(ace(np.array([12, 3, 6, 1, 10])), 5.62749672) # Just return the number of taxa when all are abundant. npt.assert_almost_equal(ace(np.array([12, 12, 13, 14])), 4.0) # Border case: only singletons and 10-tons, no abundant taxa. npt.assert_almost_equal(ace([0, 1, 1, 0, 0, 10, 10, 1, 0, 0]), 9.35681818182) def test_ace_only_rare_singletons(self): with self.assertRaises(ValueError): ace([0, 0, 43, 0, 1, 0, 1, 42, 1, 43]) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/diversity/alpha/tests/test_base.py000066400000000000000000000426751464262511300235010ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main from io import StringIO import warnings import numpy as np import numpy.testing as npt from skbio import TreeNode from skbio.diversity.alpha import ( berger_parker_d, brillouin_d, dominance, doubles, enspie, esty_ci, fisher_alpha, goods_coverage, heip_e, hill, inv_simpson, kempton_taylor_q, margalef, mcintosh_d, mcintosh_e, menhinick, michaelis_menten_fit, observed_features, observed_otus, osd, pielou_e, renyi, robbins, shannon, simpson, simpson_d, simpson_e, singles, sobs, strong, tsallis) class BaseTests(TestCase): def setUp(self): self.counts = np.array([0, 1, 1, 4, 2, 5, 2, 4, 1, 2]) self.sids1 = list('ABCD') self.oids1 = ['OTU%d' % i for i in range(1, 6)] self.t1 = TreeNode.read(StringIO( '(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):' '0.0,(OTU4:0.75,OTU5:0.75):1.25):0.0)root;')) self.t1_w_extra_tips = TreeNode.read( StringIO('(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,(OTU5:0.25,(OTU6:0.5,OTU7:0.5):0.5):0.5):1.25):0.0' ')root;')) def test_berger_parker_d(self): self.assertEqual(berger_parker_d(np.array([5, 5])), 0.5) self.assertEqual(berger_parker_d(np.array([1, 1, 1, 1, 0])), 0.25) self.assertEqual(berger_parker_d(self.counts), 5 / 22) self.assertEqual(berger_parker_d(np.array([5])), 1) self.assertTrue(np.isnan(berger_parker_d([0, 0]))) def test_brillouin_d(self): self.assertAlmostEqual(brillouin_d(np.array([1, 2, 0, 0, 3, 1])), 0.86289353018248782) self.assertTrue(np.isnan(brillouin_d([0, 0]))) def test_dominance(self): self.assertEqual(dominance(np.array([5])), 1) self.assertAlmostEqual(dominance(np.array([1, 0, 2, 5, 2])), 0.34) self.assertTrue(np.isnan(dominance([0, 0]))) # finite sample correction self.assertEqual(dominance(np.array([5]), finite=True), 1) self.assertAlmostEqual(dominance( np.array([1, 0, 2, 5, 2]), finite=True), 0.8 / 3) self.assertTrue(np.isnan(dominance([0, 0], finite=True))) def test_doubles(self): self.assertEqual(doubles(self.counts), 3) self.assertEqual(doubles(np.array([0, 3, 4])), 0) self.assertEqual(doubles(np.array([2])), 1) self.assertEqual(doubles([0, 0]), 0) def test_enspie(self): for vec in ( np.array([1, 1, 1, 1, 1, 1]), np.array([1, 41, 0, 0, 12, 13]), np.array([1, 0, 2, 5, 2]) ): self.assertEqual(enspie(vec), inv_simpson(vec)) vec = np.array([1, 2, 3, 4]) self.assertEqual(enspie(vec, finite=True), inv_simpson(vec, finite=True)) def test_esty_ci(self): def _diversity(indices, f): """Calculate diversity index for each window of size 1. indices: vector of indices of taxa f: f(counts) -> diversity measure """ result = [] max_size = max(indices) + 1 freqs = np.zeros(max_size, dtype=int) for i in range(len(indices)): freqs += np.bincount(indices[i:i + 1], minlength=max_size) try: curr = f(freqs) except (ZeroDivisionError, FloatingPointError): curr = 0 result.append(curr) return np.array(result) data = [1, 1, 2, 1, 1, 3, 2, 1, 3, 4] observed_lower, observed_upper = zip(*_diversity(data, esty_ci)) expected_lower = np.array([1, -1.38590382, -0.73353593, -0.17434465, -0.15060902, -0.04386191, -0.33042054, -0.29041008, -0.43554755, -0.33385652]) expected_upper = np.array([1, 1.38590382, 1.40020259, 0.67434465, 0.55060902, 0.71052858, 0.61613483, 0.54041008, 0.43554755, 0.53385652]) npt.assert_array_almost_equal(observed_lower, expected_lower) npt.assert_array_almost_equal(observed_upper, expected_upper) self.assertTrue(np.isnan(esty_ci([0, 0]))) def test_fisher_alpha(self): exp = 2.7823796 arr = np.array([4, 3, 4, 0, 1, 0, 2]) obs = fisher_alpha(arr) self.assertAlmostEqual(obs, exp, places=6) # Should depend only on S and N (number of taxa, number of # individuals / seqs), so we should obtain the same output as above. obs = fisher_alpha([1, 6, 1, 0, 1, 0, 5]) self.assertAlmostEqual(obs, exp, places=6) # Should match another by hand: # 2 taxa, 62 seqs, alpha is 0.39509 obs = fisher_alpha([61, 0, 0, 1]) self.assertAlmostEqual(obs, 0.3950909, places=6) # Test case where we have >1000 individuals (SDR-IV makes note of this # case). Verified against R's vegan::fisher.alpha. obs = fisher_alpha([999, 0, 10]) self.assertAlmostEqual(obs, 0.2396492, places=6) # Should be infinite when all species are singletons obs = fisher_alpha([1, 1, 1, 1, 1]) self.assertEqual(obs, np.inf) # Should be large when most species are singletons obs = fisher_alpha([1] * 99 + [2]) self.assertAlmostEqual(obs, 5033.278, places=3) # Similar but even larger obs = fisher_alpha([1] * 999 + [2]) TestCase().assertAlmostEqual(obs, 500333.3, places=1) self.assertTrue(np.isnan(fisher_alpha([0, 0]))) def test_goods_coverage(self): counts = [1] * 75 + [2, 2, 2, 2, 2, 2, 3, 4, 4] obs = goods_coverage(counts) self.assertAlmostEqual(obs, 0.23469387755) self.assertTrue(np.isnan(goods_coverage([0, 0]))) def test_heip_e(self): # Calculate "by hand". arr = np.array([1, 2, 3, 1]) H = shannon(arr) expected = (np.exp(H) - 1) / (arr.size - 1) self.assertEqual(heip_e(arr), expected) # From Statistical Ecology: A Primer in Methods and Computing, page 94, # table 8.1. self.assertAlmostEqual(heip_e([500, 300, 200]), 0.90, places=2) self.assertAlmostEqual(heip_e([500, 299, 200, 1]), 0.61, places=2) # Edge cases self.assertEqual(heip_e([5]), 1) self.assertTrue(np.isnan(heip_e([0, 0]))) def test_hill(self): orders = [0, 0.5, 1, 2, 10, np.inf] # a regular case arr = np.array([1, 2, 3, 4, 5]) obs = [hill(arr, order=x) for x in orders] exp = [5, 4.68423304, 4.43598780, 4.09090909, 3.34923645, 3] npt.assert_almost_equal(obs, exp) # equivalent to observed species richness when q = 0 self.assertAlmostEqual(hill(arr, order=0), sobs(arr)) # equivalent to the exponential of Shannon index when q = 1 self.assertAlmostEqual(hill(arr, order=1), shannon(arr, exp=True)) # equivalent to inverse Simpson index when q = 2 (default) self.assertAlmostEqual(hill(arr), inv_simpson(arr)) # equivalent to the inverse of Berger-Parker dominance index when q = inf self.assertAlmostEqual(hill(arr, order=np.inf), 1 / berger_parker_d(arr)) # equally abundant taxa: qD = S arr = np.array([5, 5, 5]) obs = [hill(arr, order=x) for x in orders] exp = [arr.size] * 6 npt.assert_almost_equal(obs, exp) # single taxon: qD = 1 self.assertEqual(hill([1]), 1) # empty community self.assertTrue(np.isnan(hill([0, 0]))) def test_inv_simpson(self): # Totally even community should have 1 / D = number of taxa. self.assertAlmostEqual(inv_simpson(np.array([1, 1, 1, 1, 1, 1])), 6) self.assertAlmostEqual(inv_simpson(np.array([13, 13, 13, 13])), 4) # Hand calculated. arr = np.array([1, 41, 0, 0, 12, 13]) exp = 1 / ((arr / arr.sum()) ** 2).sum() self.assertAlmostEqual(inv_simpson(arr), exp) # Using dominance. exp = 1 / dominance(arr) self.assertAlmostEqual(inv_simpson(arr), exp) arr = np.array([1, 0, 2, 5, 2]) exp = 1 / dominance(arr) self.assertAlmostEqual(inv_simpson(arr), exp) # Finite sample correction. self.assertEqual(inv_simpson( np.array([1, 0, 2, 5, 2]), finite=True), 3.75) self.assertEqual(inv_simpson(np.array([3, 3, 3]), finite=True), 4) self.assertTrue(np.isnan(inv_simpson([0, 0]))) def test_kempton_taylor_q(self): # Approximate Magurran 1998 calculation p143. arr = np.array([2, 3, 3, 3, 3, 3, 4, 4, 4, 6, 6, 7, 7, 9, 9, 11, 14, 15, 15, 20, 29, 33, 34, 36, 37, 53, 57, 138, 146, 170]) exp = 14 / np.log(34 / 4) self.assertAlmostEqual(kempton_taylor_q(arr), exp) # Should get same answer regardless of input order. np.random.shuffle(arr) self.assertAlmostEqual(kempton_taylor_q(arr), exp) self.assertTrue(np.isnan(kempton_taylor_q([0, 0]))) def test_margalef(self): self.assertEqual(margalef(self.counts), 8 / np.log(22)) self.assertTrue(np.isnan(margalef([1]))) self.assertTrue(np.isnan(margalef([0, 0]))) def test_mcintosh_d(self): self.assertAlmostEqual(mcintosh_d(np.array([1, 2, 3])), 0.636061424871458) self.assertTrue(np.isnan(mcintosh_d([1]))) self.assertTrue(np.isnan(mcintosh_d([0, 0]))) def test_mcintosh_e(self): num = np.sqrt(15) den = np.sqrt(19) exp = num / den self.assertEqual(mcintosh_e(np.array([1, 2, 3, 1])), exp) self.assertTrue(np.isnan(mcintosh_e([0, 0]))) def test_menhinick(self): # observed species richness = 9, total # of individuals = 22 self.assertEqual(menhinick(self.counts), 9 / np.sqrt(22)) self.assertTrue(np.isnan(menhinick([0, 0]))) def test_michaelis_menten_fit(self): obs = michaelis_menten_fit([22]) self.assertAlmostEqual(obs, 1.0) obs = michaelis_menten_fit([42]) self.assertAlmostEqual(obs, 1.0) obs = michaelis_menten_fit([34], num_repeats=3, params_guess=(13, 13)) self.assertAlmostEqual(obs, 1.0) obs = michaelis_menten_fit([70, 70], num_repeats=5) self.assertAlmostEqual(obs, 2.0, places=1) obs_few = michaelis_menten_fit(np.arange(4) * 2, num_repeats=10) obs_many = michaelis_menten_fit(np.arange(4) * 100, num_repeats=10) # [0,100,200,300] looks like only 3 taxa. self.assertAlmostEqual(obs_many, 3.0, places=1) # [0,2,4,6] looks like 3 taxa with maybe more to be found. self.assertTrue(obs_few > obs_many) self.assertTrue(np.isnan(michaelis_menten_fit([0, 0]))) def test_observed_features(self): for vec in (np.array([4, 3, 4, 0, 1, 0, 2]), self.counts): self.assertEqual(observed_features(vec), sobs(vec)) def test_observed_otus(self): with warnings.catch_warnings(): warnings.simplefilter("ignore") for vec in (np.array([4, 3, 4, 0, 1, 0, 2]), self.counts): self.assertEqual(observed_otus(vec), sobs(vec)) def test_osd(self): self.assertEqual(osd(self.counts), (9, 3, 3)) def test_pielou_e(self): # Calculate "by hand". arr = np.array([1, 2, 3, 1]) H = shannon(arr) S = arr.size expected = H / np.log(S) self.assertAlmostEqual(pielou_e(arr), expected) # alternative logarithm base expected = shannon(arr, base=2) / np.log2(S) self.assertAlmostEqual(pielou_e(arr, base=2), expected) self.assertAlmostEqual(pielou_e(self.counts), 0.92485490560) self.assertAlmostEqual(pielou_e([1, 1]), 1.0) self.assertAlmostEqual(pielou_e([1, 1, 1, 1]), 1.0) self.assertAlmostEqual(pielou_e([1, 1, 1, 1, 0, 0]), 1.0) # Examples from # http://ww2.mdsg.umd.edu/interactive_lessons/biofilm/diverse.htm#3 self.assertAlmostEqual(pielou_e([1, 1, 196, 1, 1]), 0.078, 3) # Edge cases self.assertEqual(pielou_e([5]), 1) self.assertTrue(np.isnan(pielou_e([0, 0]))) def test_renyi(self): orders = [0, 0.5, 1, 2, 10, np.inf] # a regular case arr = np.array([1, 2, 3, 4, 5]) obs = [renyi(arr, order=x) for x in orders] exp = [1.60943791, 1.54420220, 1.48975032, 1.40876722, 1.20873239, 1.09861229] npt.assert_almost_equal(obs, exp) # equivalent to Shannon index when q = 1 self.assertAlmostEqual(renyi(arr, order=1), shannon(arr)) # equivalent to log(inverse Simpson index) when q = 2 (default) self.assertAlmostEqual(renyi(arr), np.log(inv_simpson(arr))) # default q, custom logarithm base self.assertAlmostEqual(renyi(arr, base=2), 2.03242148) # equally abundant taxa: qH = log(S) arr = np.array([5, 5, 5]) obs = [renyi(arr, order=x) for x in orders] exp = [np.log(arr.size)] * 6 npt.assert_almost_equal(obs, exp) # single taxon: qH = 0 self.assertEqual(renyi([1]), 0) # empty community self.assertTrue(np.isnan(renyi([0, 0]))) def test_robbins(self): self.assertEqual(robbins(np.array([1, 2, 3, 0, 1])), 2 / 7) self.assertTrue(np.isnan(robbins([0, 0]))) def test_shannon(self): self.assertAlmostEqual(shannon([5, 5]), 0.693147181) self.assertEqual(shannon([5, 5], base=2), 1) self.assertAlmostEqual(shannon([5, 5], base=10), 0.301029996) # taxa with 0 counts are excluded from calculation self.assertAlmostEqual(shannon([1, 2, 3, 4]), 1.279854226) self.assertAlmostEqual(shannon([0, 1, 2, 3, 4]), 1.279854226) # Shannon index of a single-taxon community is always 0 self.assertEqual(shannon(np.array([5])), 0) # Shannon index cannot be calculated for an empty community self.assertTrue(np.isnan(shannon([0, 0]))) # NaN still holds if input is empty (instead of 0's), this behavior is # different from scipy.stats.entropy, which would return 0.0. self.assertTrue(np.isnan(shannon([]))) # Exponential of Shannon index self.assertAlmostEqual(shannon([1, 2, 3, 4], exp=True), 3.596115467) # Equally abundant taxa, exp(H) = # taxa self.assertAlmostEqual(shannon([5, 5, 5], exp=True), 3.0) def test_simpson(self): self.assertAlmostEqual(simpson(np.array([1, 0, 2, 5, 2])), 0.66) self.assertEqual(simpson(np.array([5])), 0) self.assertEqual(simpson(np.array([5]), finite=True), 0) self.assertTrue(np.isnan(simpson([0, 0]))) def test_simpson_d(self): for vec in (np.array([5]), np.array([1, 0, 2, 5, 2])): self.assertEqual(dominance(vec), simpson_d(vec)) self.assertEqual(dominance(vec, finite=True), simpson_d(vec, finite=True)) def test_simpson_e(self): # A totally even community should have simpson_e = 1. self.assertEqual(simpson_e(np.array([1, 1, 1, 1, 1, 1, 1])), 1) arr = np.array([0, 30, 25, 40, 0, 0, 5]) freq_arr = arr / arr.sum() D = (freq_arr ** 2).sum() exp = 1 / (D * 4) obs = simpson_e(arr) self.assertEqual(obs, exp) # From: # https://groups.nceas.ucsb.edu/sun/meetings/calculating-evenness- # of-habitat-distributions arr = np.array([500, 400, 600, 500]) D = 0.0625 + 0.04 + 0.09 + 0.0625 exp = 1 / (D * 4) self.assertEqual(simpson_e(arr), exp) self.assertTrue(np.isnan(simpson_e([0, 0]))) def test_singles(self): self.assertEqual(singles(self.counts), 3) self.assertEqual(singles(np.array([0, 3, 4])), 0) self.assertEqual(singles(np.array([1])), 1) self.assertEqual(singles([0, 0]), 0) def test_sobs(self): obs = sobs(np.array([4, 3, 4, 0, 1, 0, 2])) self.assertEqual(obs, 5) obs = sobs(np.array([0, 0, 0])) self.assertEqual(obs, 0) obs = sobs(self.counts) self.assertEqual(obs, 9) def test_strong(self): self.assertAlmostEqual(strong(np.array([1, 2, 3, 1])), 0.214285714) self.assertTrue(np.isnan(strong([0, 0]))) def test_tsallis(self): orders = [0, 0.5, 1, 2, 10, np.inf] # a regular case arr = np.array([1, 2, 3, 4, 5]) obs = [tsallis(arr, order=x) for x in orders] exp = [4, 2.32861781, 1.48975032, 0.75555556, 0.11110902, 0] npt.assert_almost_equal(obs, exp) # equivalent to richess - 1 when q = 0 self.assertAlmostEqual(tsallis(arr, order=0), sobs(arr) - 1) # equivalent to Shannon index when q = 1 self.assertAlmostEqual(tsallis(arr, order=1), shannon(arr)) # equivalent to Simpson's diversity index) when q = 2 (default) self.assertAlmostEqual(tsallis(arr), simpson(arr)) # 0 when order is infinity self.assertAlmostEqual(tsallis(arr, order=np.inf), 0) # 0 when there is a single taxon self.assertEqual(tsallis([1]), 0) # empty community self.assertTrue(np.isnan(tsallis([0, 0]))) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/diversity/alpha/tests/test_chao1.py000066400000000000000000000054611464262511300235520ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main import numpy as np import numpy.testing as npt from skbio.diversity.alpha import chao1, chao1_ci from skbio.diversity.alpha._chao1 import _chao1_var class Chao1Tests(TestCase): def setUp(self): self.counts = np.array([0, 1, 1, 4, 2, 5, 2, 4, 1, 2]) self.no_singles = np.array([0, 2, 2, 4, 5, 0, 0, 0, 0, 0]) self.no_doubles = np.array([0, 1, 1, 4, 5, 0, 0, 0, 0, 0]) def test_chao1(self): self.assertEqual(chao1(self.counts), 9.75) self.assertEqual(chao1(self.counts, bias_corrected=False), 10.5) self.assertEqual(chao1(self.no_singles), 4) self.assertEqual(chao1(self.no_singles, bias_corrected=False), 4) self.assertEqual(chao1(self.no_doubles), 5) self.assertEqual(chao1(self.no_doubles, bias_corrected=False), 5) def test_chao1_ci(self): # Should match observed results from EstimateS. NOTE: EstimateS rounds # to 2 dp. obs = chao1_ci(self.counts) npt.assert_allclose(obs, (9.07, 17.45), rtol=0.01) obs = chao1_ci(self.counts, bias_corrected=False) npt.assert_allclose(obs, (9.17, 21.89), rtol=0.01) obs = chao1_ci(self.no_singles) npt.assert_array_almost_equal(obs, (4, 4.95), decimal=2) obs = chao1_ci(self.no_singles, bias_corrected=False) npt.assert_array_almost_equal(obs, (4, 4.95), decimal=2) obs = chao1_ci(self.no_doubles) npt.assert_array_almost_equal(obs, (4.08, 17.27), decimal=2) obs = chao1_ci(self.no_doubles, bias_corrected=False) npt.assert_array_almost_equal(obs, (4.08, 17.27), decimal=2) def test_chao1_var(self): # Should match observed results from EstimateS.NOTE: EstimateS reports # sd, not var, and rounds to 2 dp. obs = _chao1_var(self.counts) npt.assert_allclose(obs, 1.42 ** 2, rtol=0.01) obs = _chao1_var(self.counts, bias_corrected=False) npt.assert_allclose(obs, 2.29 ** 2, rtol=0.01) obs = _chao1_var(self.no_singles) self.assertAlmostEqual(obs, 0.39 ** 2, delta=0.01) obs = _chao1_var(self.no_singles, bias_corrected=False) self.assertAlmostEqual(obs, 0.39 ** 2, delta=0.01) obs = _chao1_var(self.no_doubles) self.assertAlmostEqual(obs, 2.17 ** 2, delta=0.01) obs = _chao1_var(self.no_doubles, bias_corrected=False) self.assertAlmostEqual(obs, 2.17 ** 2, delta=0.01) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/diversity/alpha/tests/test_gini.py000066400000000000000000000044121464262511300235000ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main import numpy as np import numpy.testing as npt from skbio.diversity.alpha import gini_index from skbio.diversity.alpha._gini import ( _lorenz_curve, _lorenz_curve_integrator) class GiniTests(TestCase): def setUp(self): self.data = np.array([4.5, 6.7, 3.4, 15., 18., 3.5, 6.7, 14.1]) self.lorenz_curve_points = ( np.array([0.125, 0.25, 0.375, 0.5, 0.625, 0.75, 0.875, 1.0]), np.array([0.047287899860917935, 0.095966620305980521, 0.15855354659248957, 0.2517385257301808, 0.34492350486787204, 0.541029207232267, 0.74965229485396379, 1.0])) def test_gini_index_bug_1844(self): exp = 0.0 obs = gini_index([2, 2, 2, 2, 2]) self.assertAlmostEqual(obs, exp) def test_gini_index(self): exp = 0.32771210013908214 obs = gini_index(self.data, 'trapezoids') self.assertAlmostEqual(obs, exp) exp = 0.20271210013908214 obs = gini_index(self.data, 'rectangles') self.assertAlmostEqual(obs, exp) # Raises error on negative data. with self.assertRaises(ValueError): gini_index([1.0, -3.1, 4.5]) def test_lorenz_curve(self): npt.assert_array_almost_equal(_lorenz_curve(self.data), self.lorenz_curve_points) def test_lorenz_curve_integrator(self): exp = 0.33614394993045893 obs = _lorenz_curve_integrator(self.lorenz_curve_points, 'trapezoids') self.assertAlmostEqual(obs, exp) exp = 0.39864394993045893 obs = _lorenz_curve_integrator(self.lorenz_curve_points, 'rectangles') self.assertAlmostEqual(obs, exp) # Raises error on invalid method. with self.assertRaises(ValueError): _lorenz_curve_integrator(self.lorenz_curve_points, 'brofist') if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/diversity/alpha/tests/test_lladser.py000066400000000000000000000203451464262511300242030ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest import numpy as np import numpy.testing as npt from skbio.stats import subsample_counts from skbio.diversity.alpha import lladser_pe, lladser_ci from skbio.diversity.alpha._lladser import ( _expand_counts, _lladser_point_estimates, _get_interval_for_r_new_taxa, _lladser_ci_series, _lladser_ci_from_r) def create_fake_observation(): """Create a subsample with defined property""" # Create a subsample of a larger sample such that we can compute # the expected probability of the unseen portion. # This is used in the tests of lladser_pe and lladser_ci counts = np.ones(1001, dtype='int64') counts[0] = 9000 total = counts.sum() fake_obs = subsample_counts(counts, 1000, replace=False, seed=123456789) exp_p = 1 - sum([x/total for (x, y) in zip(counts, fake_obs) if y > 0]) return fake_obs, exp_p class LladserTests(unittest.TestCase): def test_lladser_pe(self): """lladser_pe returns point estimates within the expected variance""" obs = lladser_pe([3], r=4) self.assertTrue(np.isnan(obs)) fake_obs, exp_p = create_fake_observation() reps = 100 sum = 0 for i in range(reps): sum += lladser_pe(fake_obs, r=30) obs = sum / reps # Estimator has variance of (1-p)^2/(r-2), # which for r=30 and p~=0.9 is 0.0289 self.assertTrue(abs(obs - exp_p) < 0.0289) def test_lladser_ci_nan(self): """lladser_ci returns nan if sample is too short to make an estimate""" obs = lladser_ci([3], r=4) self.assertTrue(len(obs) == 2 and np.isnan(obs[0]) and np.isnan(obs[1])) def test_lladser_ci(self): """lladser_ci estimate using defaults contains p with 95% prob""" np.random.seed(12345678) reps = 100 sum = 0 for i in range(reps): fake_obs, exp_p = create_fake_observation() (low, high) = lladser_ci(fake_obs, r=10) if (low <= exp_p <= high): sum += 1 self.assertTrue(sum/reps >= 0.95) def test_lladser_ci_f3(self): """lladser_ci estimate using f=3 contains p with 95% prob""" # Test different values of f=3 and r=14, which lie exactly on the # 95% interval line. For 100 reps using simple cumulative binomial # probs we expect to have more than 5 misses of the interval in 38% # of all test runs. To make this test pass reliable we thus have to # set a defined seed np.random.seed(12345678) reps = 1000 sum = 0 for i in range(reps): # re-create the obs for every estimate, such that they are truly # independent events fake_obs, exp_p = create_fake_observation() (low, high) = lladser_ci(fake_obs, r=14, f=3) if (low <= exp_p <= high): sum += 1 self.assertTrue(sum/reps >= 0.95, msg=f"{sum}, {reps}, {sum/reps}") def test_expand_counts(self): arr = np.array([2, 0, 1, 2]) npt.assert_array_equal(_expand_counts(arr), np.array([0, 0, 2, 3, 3])) def test_lladser_point_estimates(self): s = [5, 1, 5, 1, 2, 3, 1, 5, 3, 2, 5, 3] r = 3 observed = list(_lladser_point_estimates(s, r)) self.assertEqual(len(observed), 3) for k in range(3): x = observed[k] t = x[2] self.assertEqual(x[0], (r - 1) / t) # Estimator has variance of (1-p)^2/(r-2), # which for r=7 and p=0.5 is 0.05 seq = "WBWBWBWBWBWBWBWBWBWBWBWBWBWBWBWBWBW" reps = 1000 sum = 0 for i in range(reps): p, _, _ = list(_lladser_point_estimates(seq, r=7))[0] sum += p self.assertTrue(0.45 < sum / reps and sum / reps < 0.55) def test_lladser_point_estimates_invalid_r(self): with self.assertRaises(ValueError): list(_lladser_point_estimates([5, 1, 5, 1, 2, 3, 1, 5, 3, 2, 5, 3], 2)) def test_get_interval_for_r_new_taxa(self): s = [5, 1, 5, 1, 2, 3, 1, 5, 3, 2, 5] expected = [(3, set([5]), 4, 0), (4, set([5, 1]), 6, 1), (4, set([5, 1, 2]), 9, 4)] for x, y in zip(_get_interval_for_r_new_taxa(s, 2), expected): self.assertEqual(x, y) s = [5, 5, 5, 5, 5] # never saw new one self.assertEqual(list(_get_interval_for_r_new_taxa(s, 2)), []) def test_lladser_ci_series_exact(self): # have seen RWB urn_1 = 'RWBWWBWRRWRYWRPPZ' results = list(_lladser_ci_series(urn_1, r=4)) self.assertEqual(len(results), 3) def test_lladser_ci_series_random(self): seq = "WBWBWBWBWBWB" observations = [] alpha = 0.95 reps = 1000 for i in range(reps): obs = list(_lladser_ci_series(seq, r=4, alpha=alpha))[0] observations.append(obs) tps = list(filter(lambda a_b: a_b[0] < 0.5 and 0.5 < a_b[1], observations)) self.assertTrue(len(tps) >= alpha * reps) # 100%-95% def test_lladser_ci_from_r(self): f = 10 t = 10 r = 4 obs_low, obs_high = _lladser_ci_from_r(r=r, t=t, f=f) npt.assert_almost_equal(obs_low, 0.0806026244) npt.assert_almost_equal(obs_high, 0.806026244) r = 20 t = 100 obs_low, obs_high = _lladser_ci_from_r(r=r, t=t, f=f) npt.assert_almost_equal(obs_low, 0.02787923964) npt.assert_almost_equal(obs_high, 0.2787923964) # make sure we test with each possible alpha alpha = 0.99 obs_low, obs_high = _lladser_ci_from_r(r=r, t=t, f=f, alpha=alpha) npt.assert_almost_equal(obs_low, 0.03184536992) npt.assert_almost_equal(obs_high, 0.3184536992) alpha = 0.9 r = 3 obs_low, obs_high = _lladser_ci_from_r(r=r, t=t, f=f, alpha=alpha) npt.assert_almost_equal(obs_low, 0.005635941995) npt.assert_almost_equal(obs_high, 0.05635941995) # test other ci_types ci_type = 'ULCU' obs_low, obs_high = _lladser_ci_from_r(r=r, t=t, f=f, alpha=alpha, ci_type=ci_type) npt.assert_almost_equal(obs_low, 0.01095834700) npt.assert_almost_equal(obs_high, 0.1095834700) alpha = 0.95 t = 10 ci_type = 'U' obs_low, obs_high = _lladser_ci_from_r(r=r, t=t, f=f, alpha=alpha, ci_type=ci_type) npt.assert_almost_equal(obs_low, 0) npt.assert_almost_equal(obs_high, 0.6295793622) ci_type = 'L' obs_low, obs_high = _lladser_ci_from_r(r=r, t=t, f=f, alpha=alpha, ci_type=ci_type) npt.assert_almost_equal(obs_low, 0.0817691447) npt.assert_almost_equal(obs_high, 1) def test_lladser_ci_from_r_invalid_input(self): # unsupported alpha for ci_type='U' with self.assertRaises(ValueError): _lladser_ci_from_r(r=3, t=10, f=10, alpha=0.90, ci_type='U') # unsupported r for ci_type='U' with self.assertRaises(ValueError): _lladser_ci_from_r(r=42, t=10, f=10, alpha=0.95, ci_type='U') # unsupported alpha for ci_type='L' with self.assertRaises(ValueError): _lladser_ci_from_r(r=3, t=10, f=10, alpha=0.90, ci_type='L') # unsupported r for ci_type='L' with self.assertRaises(ValueError): _lladser_ci_from_r(r=50, t=10, f=10, alpha=0.95, ci_type='L') # unknown ci_type with self.assertRaises(ValueError): _lladser_ci_from_r(r=4, t=10, f=10, alpha=0.95, ci_type='brofist') # requesting CI for not precomputed values with self.assertRaises(ValueError): _lladser_ci_from_r(r=500, t=10, f=10) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/diversity/alpha/tests/test_pd.py000066400000000000000000000375471464262511300231740ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main from io import StringIO import os import numpy as np import pandas as pd from skbio import TreeNode from skbio.util import get_data_path from skbio.tree import DuplicateNodeError, MissingNodeError from skbio.diversity.alpha import faith_pd, phydiv class FaithPDTests(TestCase): def setUp(self): self.counts = np.array([0, 1, 1, 4, 2, 5, 2, 4, 1, 2]) self.b1 = np.array([[1, 3, 0, 1, 0], [0, 2, 0, 4, 4], [0, 0, 6, 2, 1], [0, 0, 1, 1, 1], [2, 0, 3, 0, 0]]) self.sids1 = list('ABCDE') self.oids1 = ['OTU%d' % i for i in range(1, 6)] self.t1 = TreeNode.read(StringIO( '(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):' '0.0,(OTU4:0.75,OTU5:0.75):1.25):0.0)root;')) self.t1_w_extra_tips = TreeNode.read( StringIO('(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,(OTU5:0.25,(OTU6:0.5,OTU7:0.5):0.5):0.5):1.25):0.0' ')root;')) def test_faith_pd(self): # expected results derived from QIIME 1.9.1, which # is a completely different implementation skbio's initial # phylogenetic diversity implementation actual = faith_pd(self.b1[0], self.oids1, self.t1) expected = 4.5 self.assertAlmostEqual(actual, expected) actual = faith_pd(self.b1[1], self.oids1, self.t1) expected = 4.75 self.assertAlmostEqual(actual, expected) actual = faith_pd(self.b1[2], self.oids1, self.t1) expected = 4.75 self.assertAlmostEqual(actual, expected) actual = faith_pd(self.b1[3], self.oids1, self.t1) expected = 4.75 self.assertAlmostEqual(actual, expected) actual = faith_pd(self.b1[4], self.oids1, self.t1) expected = 3.0 self.assertAlmostEqual(actual, expected) def test_faith_pd_extra_tips(self): # results are the same despite presences of unobserved tips in tree actual = faith_pd(self.b1[0], self.oids1, self.t1_w_extra_tips) expected = faith_pd(self.b1[0], self.oids1, self.t1) self.assertAlmostEqual(actual, expected) actual = faith_pd(self.b1[1], self.oids1, self.t1_w_extra_tips) expected = faith_pd(self.b1[1], self.oids1, self.t1) self.assertAlmostEqual(actual, expected) actual = faith_pd(self.b1[2], self.oids1, self.t1_w_extra_tips) expected = faith_pd(self.b1[2], self.oids1, self.t1) self.assertAlmostEqual(actual, expected) actual = faith_pd(self.b1[3], self.oids1, self.t1_w_extra_tips) expected = faith_pd(self.b1[3], self.oids1, self.t1) self.assertAlmostEqual(actual, expected) actual = faith_pd(self.b1[4], self.oids1, self.t1_w_extra_tips) expected = 3.0 self.assertAlmostEqual(actual, expected) def test_faith_pd_none_observed(self): actual = faith_pd(np.array([], dtype=int), np.array([], dtype=int), self.t1) expected = 0.0 self.assertAlmostEqual(actual, expected) actual = faith_pd([0, 0, 0, 0, 0], self.oids1, self.t1) expected = 0.0 self.assertAlmostEqual(actual, expected) def test_faith_pd_all_observed(self): actual = faith_pd([1, 1, 1, 1, 1], self.oids1, self.t1) expected = sum(n.length for n in self.t1.traverse() if n.length is not None) self.assertAlmostEqual(actual, expected) actual = faith_pd([1, 2, 3, 4, 5], self.oids1, self.t1) expected = sum(n.length for n in self.t1.traverse() if n.length is not None) self.assertAlmostEqual(actual, expected) def test_faith_pd_one_observed(self): actual = faith_pd([1, 0, 0, 0, 0], self.oids1, self.t1) expected = 2.0 self.assertAlmostEqual(actual, expected) def test_faith_pd_minimal(self): # two tips tree = TreeNode.read(StringIO('(OTU1:0.25, OTU2:0.25)root;')) actual = faith_pd([1, 0], ['OTU1', 'OTU2'], tree) expected = 0.25 self.assertEqual(actual, expected) def test_faith_pd_qiime_tiny_test(self): # the following table and tree are derived from the QIIME 1.9.1 # "tiny-test" data tt_table_fp = get_data_path( os.path.join('qiime-191-tt', 'otu-table.tsv'), 'data') tt_tree_fp = get_data_path( os.path.join('qiime-191-tt', 'tree.nwk'), 'data') self.q_table = pd.read_csv(tt_table_fp, sep='\t', skiprows=1, index_col=0) self.q_tree = TreeNode.read(tt_tree_fp) expected_fp = get_data_path( os.path.join('qiime-191-tt', 'faith-pd.txt'), 'data') expected = pd.read_csv(expected_fp, sep='\t', index_col=0) for sid in self.q_table.columns: actual = faith_pd(self.q_table[sid], taxa=self.q_table.index, tree=self.q_tree) self.assertAlmostEqual(actual, expected['PD_whole_tree'][sid]) def test_faith_pd_root_not_observed(self): # expected values computed by hand tree = TreeNode.read( StringIO('((OTU1:0.1, OTU2:0.2):0.3, (OTU3:0.5, OTU4:0.7):1.1)' 'root;')) taxa = ['OTU%d' % i for i in range(1, 5)] # root node not observed, but branch between (OTU1, OTU2) and root # is considered observed actual = faith_pd([1, 1, 0, 0], taxa, tree) expected = 0.6 self.assertAlmostEqual(actual, expected) # root node not observed, but branch between (OTU3, OTU4) and root # is considered observed actual = faith_pd([0, 0, 1, 1], taxa, tree) expected = 2.3 self.assertAlmostEqual(actual, expected) def test_faith_pd_invalid_input(self): # tree has duplicated tip ids t = TreeNode.read( StringIO('(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU2:0.75):1.25):0.0)root;')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(DuplicateNodeError, faith_pd, counts, taxa, t) # unrooted tree as input t = TreeNode.read(StringIO('((OTU1:0.1, OTU2:0.2):0.3, OTU3:0.5,' 'OTU4:0.7);')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, faith_pd, counts, taxa, t) # taxa has duplicated ids t = TreeNode.read( StringIO('(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU2'] self.assertRaises(ValueError, faith_pd, counts, taxa, t) # len of vectors not equal t = TreeNode.read( StringIO('(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 2] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, faith_pd, counts, taxa, t) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2'] self.assertRaises(ValueError, faith_pd, counts, taxa, t) # negative counts t = TreeNode.read( StringIO('(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 2, -3] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, faith_pd, counts, taxa, t) # tree with no branch lengths t = TreeNode.read( StringIO('((((OTU1,OTU2),OTU3)),(OTU4,OTU5));')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, faith_pd, counts, taxa, t) # tree missing some branch lengths t = TreeNode.read( StringIO('(((((OTU1,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, faith_pd, counts, taxa, t) # taxa not present in tree t = TreeNode.read( StringIO('(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU42'] self.assertRaises(MissingNodeError, faith_pd, counts, taxa, t) class PhyDivTests(TestCase): def setUp(self): self.tree = TreeNode.read(StringIO( '(((a:0.4,b:0.5):0.7,((c:0.1,d:0.2):0.6,(e:0.2,f:0.3):0.4):0.2)' ':0.1,g:1.2):0.2;')) # each dash (-) represents branch length = 0.05 # # /--------a # /--------------| # | \----------b # | # /--| /--c # | | /------------| # | | | \----d # | \----| # ----| | /----e # | \--------| # | \------f # | # \------------------------g self.taxa = list('abcdef') self.data = np.array([ [1, 2, 0, 0, 0, 0], # clade (a, b) [0, 0, 3, 4, 0, 0], # clade (c, d) [0, 0, 0, 0, 5, 6], # clade (e, f) [0, 0, 3, 4, 5, 6], # non-basal, monophyletic group (c, d, e, f) [1, 2, 3, 4, 0, 0]]) # basal, non-monophyletic group (a, b, c, d) def test_phydiv_rooted_unweighted(self): # equivalent to faith_pd for datum in self.data: obs = phydiv(datum, self.taxa, self.tree) exp = faith_pd(datum, self.taxa, self.tree) self.assertAlmostEqual(obs, exp) def test_phydiv_unrooted_unweighted(self): # equivalent to faith_pd without path to root exps = [0.9, 0.3, 0.5, 1.8, 2.7] for datum, exp in zip(self.data, exps): obs = phydiv(datum, self.taxa, self.tree, rooted=False) self.assertAlmostEqual(obs, exp) # edge case: one taxon obs = phydiv([1], ['a'], self.tree, rooted=False) self.assertEqual(obs, 0) # edge case: zero taxon obs = phydiv([0], ['a'], self.tree, rooted=False) self.assertEqual(obs, 0) # edge case: no taxon obs = phydiv([], [], self.tree, rooted=False) self.assertEqual(obs, 0) def test_phydiv_rooted_weighted(self): # group (a, b) # = (0.4 * 1 + 0.5 * 2 + (0.7 + 0.1 + 0.2) * (1 + 2)) / (1 + 2) obs = phydiv(self.data[0], self.taxa, self.tree, weight=True) exp = 1.46666667 self.assertAlmostEqual(obs, exp) # group (c, d) # = (0.1 * 3 + 0.2 * 4 + (0.6 + 0.2 + 0.1 + 0.2) * (3 + 4)) / (3 + 4) obs = phydiv(self.data[1], self.taxa, self.tree, weight=True) exp = 1.25714286 self.assertAlmostEqual(obs, exp) # group (c, d, e, f) # = (0.1 * 3 + 0.2 * 4 + 0.2 * 5 + 0.3 * 6 + 0.6 * (3 + 4) + 0.4 * # (5 + 6) + (0.2 + 0.1 + 0.2) * (3 + 4 + 5 + 6)) / (3 + 4 + 5 + 6) obs = phydiv(self.data[3], self.taxa, self.tree, weight=True) exp = 1.19444444 self.assertAlmostEqual(obs, exp) # group (a, b, c, d) # = (0.4 * 1 + 0.5 * 2 + 0.1 * 3 + 0.2 * 4 + 0.7 * (1 + 2) + (0.6 + # 0.2) * (3 + 4) + (0.1 + 0.2) * (1 + 2 + 3 + 4)) / (1 + 2 + 3 + 4) obs = phydiv(self.data[4], self.taxa, self.tree, weight=True) exp = 1.32 self.assertAlmostEqual(obs, exp) def test_phydiv_unrooted_weighted(self): # a.k.a., balance-weighted PD # group (a, b) # = (0.4 + 0.5) * 2 * min(1, 2) / (1 + 2) obs = phydiv(self.data[0], self.taxa, self.tree, rooted=False, weight=True) exp = 0.6 self.assertAlmostEqual(obs, exp) # group (c, d) # = (0.1 + 0.2) * 2 * min(3, 4) / (3 + 4) obs = phydiv(self.data[1], self.taxa, self.tree, rooted=False, weight=True) exp = 0.25714286 self.assertAlmostEqual(obs, exp) # group (c, d, e, f) # = 2 * (0.1 * min(3, 4 + 5 + 6) + 0.2 * min(4, 3 + 5 + 6) + 0.2 * # min(5, 3 + 4 + 6) + 0.3 * min(6, 3 + 4 + 5) + (0.6 + 0.4) * # min(3 + 4, 5 + 6)) / (3 + 4 + 5 + 6) obs = phydiv(self.data[3], self.taxa, self.tree, rooted=False, weight=True) exp = 1.21111111 self.assertAlmostEqual(obs, exp) # group (a, b, c, d) # = 2 * (0.4 * min(1, 2 + 3 + 4) + 0.5 * min(2, 1 + 3 + 4) + (0.7 + # 0.6 + 0.2) * min(1 + 2, 3 + 4) + 0.1 * min(3, 1 + 2 + 4) + 0.2 * # min(4, 1 + 2 + 3)) / (1 + 2 + 3 + 4) obs = phydiv(self.data[4], self.taxa, self.tree, rooted=False, weight=True) exp = 1.4 self.assertAlmostEqual(obs, exp) # edge cases self.assertEqual(phydiv([1], ['a'], self.tree, False, True), 0) self.assertEqual(phydiv([0], ['a'], self.tree, False, True), 0) self.assertEqual(phydiv([], [], self.tree, False, True), 0) def test_phydiv_weight_param(self): # group (a, b), unrooted # = (0.4 + 0.5) * (2 * min(1, 2) / (1 + 2)) ** theta obs = phydiv(self.data[0], self.taxa, self.tree, False, 0.5) exp = 0.73484692 self.assertAlmostEqual(obs, exp) obs = phydiv(self.data[0], self.taxa, self.tree, False, 0.25) exp = 0.81324180 self.assertAlmostEqual(obs, exp) # fall back to unweighted obs = phydiv(self.data[0], self.taxa, self.tree, False, 0) exp = 0.9 # fall back to fully-weighted self.assertAlmostEqual(obs, exp) obs = phydiv(self.data[0], self.taxa, self.tree, False, 1) exp = 0.6 self.assertAlmostEqual(obs, exp) # rooted # = (0.4 * 1 ** theta + 0.5 * 2 ** theta + (0.7 + 0.1 + 0.2) * # (1 + 2) ** theta) / (1 + 2) ** theta obs = phydiv(self.data[0], self.taxa, self.tree, True, 0.5) exp = 1.63918840 self.assertAlmostEqual(obs, exp) obs = phydiv(self.data[0], self.taxa, self.tree, True, 0.25) exp = 1.75573528 self.assertAlmostEqual(obs, exp) # edge cases self.assertEqual(phydiv([1], ['a'], self.tree, False, 0.5), 0) self.assertEqual(phydiv([0], ['a'], self.tree, False, 0.5), 0) self.assertEqual(phydiv([], [], self.tree, False, 0.5), 0) def test_phydiv_tree_unrooted(self): # convert tree to unrooted outgroup = self.tree.find('g') ingroup = outgroup.siblings()[0] unrooted = ingroup.copy() unrooted.extend([outgroup.copy()]) unrooted.length += self.tree.length or 0.0 # auto-enter unrooted mode obs = phydiv(self.data[0], self.taxa, unrooted) exp = phydiv(self.data[0], self.taxa, self.tree, rooted=False) self.assertEqual(obs, exp) # force rooted mode obs = phydiv(self.data[0], self.taxa, unrooted, rooted=True) exp = phydiv(self.data[0], self.taxa, self.tree) self.assertEqual(obs, exp) def test_phydiv_invalid_weight(self): params = (self.data[0], self.taxa, self.tree) self.assertRaises(ValueError, phydiv, *params, weight='hello') self.assertRaises(ValueError, phydiv, *params, weight=-0.5) self.assertRaises(ValueError, phydiv, *params, weight=2.0) if __name__ == "__main__": main() scikit-bio-0.6.2/skbio/diversity/beta/000077500000000000000000000000001464262511300176245ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/diversity/beta/__init__.py000066400000000000000000000020141464262511300217320ustar00rootroot00000000000000"""Beta diversity measures (:mod:`skbio.diversity.beta`) ===================================================== .. currentmodule:: skbio.diversity.beta This package provides implementations of beta diversity measures for computing sample dissimilarity. Users of this package should also explore ``scipy.spatial.distance.pdist``, as it contains implementations of additional beta diversity metrics with interfaces similar to those provided here. Functions --------- .. autosummary:: :toctree: unweighted_unifrac weighted_unifrac """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from ._unifrac import unweighted_unifrac, weighted_unifrac __all__ = ["unweighted_unifrac", "weighted_unifrac"] scikit-bio-0.6.2/skbio/diversity/beta/_unifrac.py000066400000000000000000000563161464262511300217770ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import functools import numpy as np from skbio.diversity._util import ( _validate_counts_matrix, _validate_taxa_and_tree, _vectorize_counts_and_tree, _check_taxa_alias, ) from skbio.diversity._phylogenetic import _tip_distances # The default value indicating whether normalization should be applied # for weighted UniFrac. This is used in two locations, so set in a single # variable to avoid the code base becoming out of sync in the event of a # change in this default value. _normalize_weighted_unifrac_by_default = False def unweighted_unifrac( u_counts, v_counts, taxa=None, tree=None, validate=True, otu_ids=None ): """Compute unweighted UniFrac. Parameters ---------- u_counts, v_counts: list, np.array Vectors of counts/abundances of taxa for two samples. Must be equal length. taxa : list, np.array Vector of taxon IDs corresponding to tip names in ``tree``. Must be the same length as ``u_counts`` and ``v_counts``. Required. tree : skbio.TreeNode Tree relating taxa. The set of tip names in the tree can be a superset of ``taxa``, but not a subset. Required. validate: bool, optional If ``False``, validation of the input won't be performed. This step can be slow, so if validation is run elsewhere it can be disabled here. However, invalid input data can lead to invalid results or error messages that are hard to interpret, so this step should not be bypassed if you're not certain that your input data are valid. See :mod:`skbio.diversity` for the description of what validation entails so you can determine if you can safely disable validation. otu_ids : list, np.array Alias of ``taxa`` for backward compatibility. Deprecated and to be removed in a future release. Returns ------- float The unweighted UniFrac distance between the two samples. Raises ------ ValueError, MissingNodeError, DuplicateNodeError If validation fails. Exact error will depend on what was invalid. See Also -------- weighted_unifrac skbio.diversity skbio.diversity.beta_diversity Notes ----- Unweighted UniFrac was originally described in [1]_. A discussion of unweighted (qualitative) versus weighted (quantitative) diversity metrics is presented in [2]_. Deeper mathematical discussions of this metric is presented in [3]_. If computing unweighted UniFrac for multiple pairs of samples, using ``skbio.diversity.beta_diversity`` will be much faster than calling this function individually on each sample. This implementation differs from that in PyCogent (and therefore QIIME versions less than 2.0.0) by imposing a few additional restrictions on the inputs. First, the input tree must be rooted. In PyCogent, if an unrooted tree was provided that had a single trifurcating node (a newick convention for unrooted trees) that node was considered the root of the tree. Next, all taxa must be tips in the tree. PyCogent would silently ignore taxa that were not present the tree. To reproduce UniFrac results from PyCogent with scikit-bio, ensure that your PyCogent UniFrac calculations are performed on a rooted tree and that all taxa are present in the tree. This implementation of unweighted UniFrac is the array-based implementation described in [4]_. If using large number of samples or a large tree, we advise using the optimized UniFrac library [5]_. References ---------- .. [1] Lozupone, C. & Knight, R. UniFrac: a new phylogenetic method for comparing microbial communities. Appl. Environ. Microbiol. 71, 8228-8235 (2005). .. [2] Lozupone, C. A., Hamady, M., Kelley, S. T. & Knight, R. Quantitative and qualitative beta diversity measures lead to different insights into factors that structure microbial communities. Appl. Environ. Microbiol. 73, 1576-1585 (2007). .. [3] Lozupone, C., Lladser, M. E., Knights, D., Stombaugh, J. & Knight, R. UniFrac: an effective distance metric for microbial community comparison. ISME J. 5, 169-172 (2011). .. [4] Hamady M, Lozupone C, Knight R. Fast UniFrac: facilitating high- throughput phylogenetic analyses of microbial communities including analysis of pyrosequencing and PhyloChip data. ISME J. 4(1):17-27 (2010). .. [5] https://github.com/biocore/unifrac Examples -------- Assume we have the following abundance data for two samples, ``u`` and ``v``, represented as a pair of counts vectors. These counts represent the number of times specific Operational Taxonomic Units, or taxa, were observed in each of the samples. >>> u_counts = [1, 0, 0, 4, 1, 2, 3, 0] >>> v_counts = [0, 1, 1, 6, 0, 1, 0, 0] Because UniFrac is a phylogenetic diversity metric, we need to know which taxon each count corresponds to, which we'll provide as ``taxa``. >>> taxa = ['U1', 'U2', 'U3', 'U4', 'U5', 'U6', 'U7', 'U8'] We also need a phylogenetic tree that relates the taxa to one another. >>> from io import StringIO >>> from skbio import TreeNode >>> tree = TreeNode.read(StringIO( ... '(((((U1:0.5,U2:0.5):0.5,U3:1.0):1.0):0.0,' ... '(U4:0.75,(U5:0.5,((U6:0.33,U7:0.62):0.5' ... ',U8:0.5):0.5):0.5):1.25):0.0)root;')) We can then compute the unweighted UniFrac distance between the samples. >>> from skbio.diversity.beta import unweighted_unifrac >>> uu = unweighted_unifrac(u_counts, v_counts, taxa, tree) >>> print(round(uu, 2)) 0.37 """ taxa = _check_taxa_alias(taxa, tree, otu_ids) u_node_counts, v_node_counts, _, _, tree_index = _setup_pairwise_unifrac( u_counts, v_counts, taxa, tree, validate, normalized=False, unweighted=True ) return _unweighted_unifrac(u_node_counts, v_node_counts, tree_index["length"]) def weighted_unifrac( u_counts, v_counts, taxa=None, tree=None, normalized=_normalize_weighted_unifrac_by_default, validate=True, otu_ids=None, ): """Compute weighted UniFrac with or without branch length normalization. Parameters ---------- u_counts, v_counts: list, np.array Vectors of counts/abundances of taxa for two samples. Must be equal length. taxa : list, np.array Vector of taxon IDs corresponding to tip names in ``tree``. Must be the same length as ``u_counts`` and ``v_counts``. Required. tree : skbio.TreeNode Tree relating taxa. The set of tip names in the tree can be a superset of ``taxa``, but not a subset. Required. normalized: boolean, optional If ``True``, apply branch length normalization, which is described in [1]_. Resulting distances will then be in the range ``[0, 1]``. validate: bool, optional If ``False``, validation of the input won't be performed. This step can be slow, so if validation is run elsewhere it can be disabled here. However, invalid input data can lead to invalid results or error messages that are hard to interpret, so this step should not be bypassed if you're not certain that your input data are valid. See :mod:`skbio.diversity` for the description of what validation entails so you can determine if you can safely disable validation. otu_ids : list, np.array Alias of ``taxa`` for backward compatibility. Deprecated and to be removed in a future release. Returns ------- float The weighted UniFrac distance between the two samples. Raises ------ ValueError, MissingNodeError, DuplicateNodeError If validation fails. Exact error will depend on what was invalid. See Also -------- unweighted_unifrac skbio.diversity skbio.diversity.beta_diversity Notes ----- Weighted UniFrac was originally described in [1]_, which includes a discussion of unweighted (qualitative) versus weighted (quantitiative) diversity metrics. Deeper mathemtical discussions of this metric is presented in [2]_. If computing weighted UniFrac for multiple pairs of samples, using ``skbio.diversity.beta_diversity`` will be much faster than calling this function individually on each sample. This implementation differs from that in PyCogent (and therefore QIIME versions less than 2.0.0) by imposing a few additional restrictions on the inputs. First, the input tree must be rooted. In PyCogent, if an unrooted tree was provided that had a single trifurcating node (a newick convention for unrooted trees) that node was considered the root of the tree. Next, all taxa must be tips in the tree. PyCogent would silently ignore taxa that were not present the tree. To reproduce UniFrac results from PyCogent with scikit-bio, ensure that your PyCogent UniFrac calculations are performed on a rooted tree and that all taxa are present in the tree. This implementation of weighted UniFrac is the array-based implementation described in [3]_. If using large number of samples or a large tree, we advise using the optimized UniFrac library [4]_. References ---------- .. [1] Lozupone, C. A., Hamady, M., Kelley, S. T. & Knight, R. Quantitative and qualitative beta diversity measures lead to different insights into factors that structure microbial communities. Appl. Environ. Microbiol. 73, 1576-1585 (2007). .. [2] Lozupone, C., Lladser, M. E., Knights, D., Stombaugh, J. & Knight, R. UniFrac: an effective distance metric for microbial community comparison. ISME J. 5, 169-172 (2011). .. [3] Hamady M, Lozupone C, Knight R. Fast UniFrac: facilitating high- throughput phylogenetic analyses of microbial communities including analysis of pyrosequencing and PhyloChip data. ISME J. 4(1):17-27 (2010). .. [4] https://github.com/biocore/unifrac Examples -------- Assume we have the following abundance data for two samples, ``u`` and ``v``, represented as a pair of counts vectors. These counts represent the number of times specific taxa were observed in each of the samples. >>> u_counts = [1, 0, 0, 4, 1, 2, 3, 0] >>> v_counts = [0, 1, 1, 6, 0, 1, 0, 0] Because UniFrac is a phylogenetic diversity metric, we need to know which taxon each count corresponds to, which we'll provide as ``taxa``. >>> taxa = ['U1', 'U2', 'U3', 'U4', 'U5', 'U6', 'U7', 'U8'] We also need a phylogenetic tree that relates the taxa to one another. >>> from io import StringIO >>> from skbio import TreeNode >>> tree = TreeNode.read(StringIO( ... '(((((U1:0.5,U2:0.5):0.5,U3:1.0):1.0):0.0,' ... '(U4:0.75,(U5:0.5,((U6:0.33,U7:0.62):0.5' ... ',U8:0.5):0.5):0.5):1.25):0.0)root;')) Compute the weighted UniFrac distance between the samples. >>> from skbio.diversity.beta import weighted_unifrac >>> wu = weighted_unifrac(u_counts, v_counts, taxa, tree) >>> print(round(wu, 2)) 1.54 Compute the weighted UniFrac distance between the samples including branch length normalization so the value falls in the range ``[0.0, 1.0]``. >>> wu = weighted_unifrac(u_counts, v_counts, taxa, tree, normalized=True) >>> print(round(wu, 2)) 0.33 """ taxa = _check_taxa_alias(taxa, tree, otu_ids) ( u_node_counts, v_node_counts, u_total_count, v_total_count, tree_index, ) = _setup_pairwise_unifrac( u_counts, v_counts, taxa, tree, validate, normalized=normalized, unweighted=False, ) branch_lengths = tree_index["length"] if normalized: tip_indices = _get_tip_indices(tree_index) node_to_root_distances = _tip_distances(branch_lengths, tree, tip_indices) return _weighted_unifrac_normalized( u_node_counts, v_node_counts, u_total_count, v_total_count, branch_lengths, node_to_root_distances, ) else: return _weighted_unifrac( u_node_counts, v_node_counts, u_total_count, v_total_count, branch_lengths )[0] def _validate(u_counts, v_counts, taxa, tree): _validate_counts_matrix([u_counts, v_counts], cast_int=False) _validate_taxa_and_tree(counts=u_counts, taxa=taxa, tree=tree) def _setup_pairwise_unifrac( u_counts, v_counts, taxa, tree, validate, normalized, unweighted ): if validate: _validate(u_counts, v_counts, taxa, tree) # temporarily store u_counts and v_counts in a 2-D array as that's what # _vectorize_counts_and_tree takes u_counts = np.asarray(u_counts) v_counts = np.asarray(v_counts) counts = np.vstack([u_counts, v_counts]) counts_by_node, tree_index, branch_lengths = _vectorize_counts_and_tree( counts, taxa, tree ) # unpack counts vectors for single pairwise UniFrac calculation u_node_counts = counts_by_node[0] v_node_counts = counts_by_node[1] u_total_count = u_counts.sum() v_total_count = v_counts.sum() return (u_node_counts, v_node_counts, u_total_count, v_total_count, tree_index) def _unweighted_unifrac(u_node_counts, v_node_counts, branch_lengths): """Calculate unweighted UniFrac distance between samples. Parameters ---------- u_node_counts, v_node_counts : np.array Vectors indicating presence (value greater than zero) and absence (value equal to zero) of nodes in two samples, `u` and `v`. Order is assumed to be the same as in `branch_lengths`. branch_lengths : np.array Vector of branch lengths of all nodes (tips and internal nodes) in postorder representation of their tree. Returns ------- float Unweighted UniFrac distance between samples. Notes ----- The count vectors passed here correspond to all nodes in the tree, not just the tips. """ unique_nodes = np.logical_xor(u_node_counts, v_node_counts) observed_nodes = np.logical_or(u_node_counts, v_node_counts) unique_branch_length = (branch_lengths * unique_nodes).sum() observed_branch_length = (branch_lengths * observed_nodes).sum() if observed_branch_length == 0.0: # handle special case to avoid division by zero return 0.0 return unique_branch_length / observed_branch_length def _weighted_unifrac( u_node_counts, v_node_counts, u_total_count, v_total_count, branch_lengths ): """Calculate weighted Unifrac distance between samples. Parameters ---------- u_node_counts, v_node_counts : np.array Vectors indicating presence (value greater than zero) and absence (value equal to zero) of nodes in two samples, `u` and `v`. Order is assumed to be the same as in `branch_lengths`. u_total_count, v_total_count : int The sum of ``u_node_counts`` and ``v_node_counts`` vectors, respectively. This could be computed internally, but since this is a private method and the calling function has already generated these values, this saves an iteration over each of these vectors. branch_lengths : np.array Vector of branch lengths of all nodes (tips and internal nodes) in postorder representation of their tree. Returns ------- float Weighted UniFrac distance between samples. np.array of float Proportional abundance of each node in tree in sample `u` np.array of float Proportional abundance of each node in tree in sample `v` """ if u_total_count > 0: # convert to relative abundances if there are any counts u_node_proportions = u_node_counts / u_total_count else: # otherwise, we'll just do the computation with u_node_counts, which # is necessarily all zeros u_node_proportions = u_node_counts if v_total_count > 0: v_node_proportions = v_node_counts / v_total_count else: v_node_proportions = v_node_counts wu = (branch_lengths * np.absolute(u_node_proportions - v_node_proportions)).sum() return wu, u_node_proportions, v_node_proportions def _weighted_unifrac_normalized( u_node_counts, v_node_counts, u_total_count, v_total_count, branch_lengths, node_to_root_distances, ): """Calculate weighted normalized UniFrac distance between samples. Parameters ---------- u_node_counts, v_node_counts : np.array Vectors indicating presence (value greater than zero) and absence (value equal to zero) of nodes in two samples, `u` and `v`. Order is assumed to be the same as in `branch_lengths`. u_total_count, v_total_count : int The sum of ``u_node_counts`` and ``v_node_counts`` vectors, respectively. This could be computed internally, but since this is a private method and the calling function has already generated these values, this saves an iteration over each of these vectors. branch_lengths : np.array Vector of branch lengths of all nodes (tips and internal nodes) in postorder representation of their tree. node_to_root_distances : np.ndarray 1D column vector of branch lengths in post order form. There should be positions in this vector for all nodes in the tree, but only tips should be non-zero. Returns ------- float Normalized weighted UniFrac distance between samples. Notes ----- The count vectors passed here correspond to all nodes in the tree, not just the tips. """ if u_total_count == 0.0 and v_total_count == 0.0: # handle special case to avoid division by zero return 0.0 u, u_node_proportions, v_node_proportions = _weighted_unifrac( u_node_counts, v_node_counts, u_total_count, v_total_count, branch_lengths ) c = _weighted_unifrac_branch_correction( node_to_root_distances, u_node_proportions, v_node_proportions ) return u / c def _setup_multiple_unifrac(counts, taxa, tree, validate): if validate: _validate_taxa_and_tree(counts[0], taxa, tree) counts_by_node, tree_index, branch_lengths = _vectorize_counts_and_tree( counts, taxa, tree ) return counts_by_node, tree_index, branch_lengths def _setup_multiple_unweighted_unifrac(counts, taxa, tree, validate): r"""Create optimized pdist-compatible unweighted UniFrac function. Parameters ---------- counts : 2D array_like of ints or floats Matrix containing count/abundance data where each row contains counts of observations in a given sample. taxa: list, np.array Vector of taxon IDs corresponding to tip names in ``tree``. Must be the same length as ``u_counts`` and ``v_counts``. These IDs do not need to be in tip order with respect to the tree. tree: skbio.TreeNode Tree relating taxa. The set of tip names in the tree can be a superset of ``taxa``, but not a subset. validate: bool, optional If ``False``, validation of the input won't be performed. Returns ------- function Optimized pairwise unweighted UniFrac calculator that can be passed to ``scipy.spatial.distance.pdist``. 2D np.array of ints, floats Counts of all nodes in ``tree``. """ counts_by_node, _, branch_lengths = _setup_multiple_unifrac( counts, taxa, tree, validate ) f = functools.partial(_unweighted_unifrac, branch_lengths=branch_lengths) return f, counts_by_node def _setup_multiple_weighted_unifrac(counts, taxa, tree, normalized, validate): r"""Create optimized pdist-compatible weighted UniFrac function. Parameters ---------- counts : 2D array_like of ints or floats Matrix containing count/abundance data where each row contains counts of observations in a given sample. taxa : list, np.array Vector of taxon IDs corresponding to tip names in ``tree``. Must be the same length as ``u_counts`` and ``v_counts``. These IDs do not need to be in tip order with respect to the tree. tree : skbio.TreeNode Tree relating taxa. The set of tip names in the tree can be a superset of ``taxa``, but not a subset. normalized : bool If ``True``, output will be normalized. validate: bool, optional If ``False``, validation of the input won't be performed. Returns ------- function Optimized pairwise unweighted UniFrac calculator that can be passed to ``scipy.spatial.distance.pdist``. 2D np.array of ints, floats Counts of all nodes in ``tree``. """ counts_by_node, tree_index, branch_lengths = _setup_multiple_unifrac( counts, taxa, tree, validate ) tip_indices = _get_tip_indices(tree_index) if normalized: node_to_root_distances = _tip_distances(branch_lengths, tree, tip_indices) def f(u_node_counts, v_node_counts): u_total_count = np.take(u_node_counts, tip_indices).sum() v_total_count = np.take(v_node_counts, tip_indices).sum() u = _weighted_unifrac_normalized( u_node_counts, v_node_counts, u_total_count, v_total_count, branch_lengths, node_to_root_distances, ) return u else: def f(u_node_counts, v_node_counts): u_total_count = np.take(u_node_counts, tip_indices).sum() v_total_count = np.take(v_node_counts, tip_indices).sum() u, _, _ = _weighted_unifrac( u_node_counts, v_node_counts, u_total_count, v_total_count, branch_lengths, ) return u return f, counts_by_node def _get_tip_indices(tree_index): tip_indices = np.array( [n.id for n in tree_index["id_index"].values() if n.is_tip()], dtype=np.intp ) return tip_indices def _weighted_unifrac_branch_correction( node_to_root_distances, u_node_proportions, v_node_proportions ): """Calculate weighted unifrac branch length correction. Parameters ---------- node_to_root_distances : np.ndarray 1D column vector of branch lengths in post order form. There should be positions in this vector for all nodes in the tree, but only tips should be non-zero. u_node_proportions, v_node_proportions : np.ndarray Proportional abundace of observations of all nodes in the tree in samples ``u`` and ``v``, respectively. u_total_count, v_total_count : float The sum of the observations in samples ``u`` and ``v``, respectively. Returns ------- np.ndarray The corrected branch lengths """ return ( node_to_root_distances.ravel() * (u_node_proportions + v_node_proportions) ).sum() scikit-bio-0.6.2/skbio/diversity/beta/tests/000077500000000000000000000000001464262511300207665ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/diversity/beta/tests/__init__.py000066400000000000000000000005411464262511300230770ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- scikit-bio-0.6.2/skbio/diversity/beta/tests/data/000077500000000000000000000000001464262511300216775ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/diversity/beta/tests/data/qiime-191-tt/000077500000000000000000000000001464262511300237405ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/diversity/beta/tests/data/qiime-191-tt/README.md000066400000000000000000000025701464262511300252230ustar00rootroot00000000000000Files in this directory are the QIIME 1.9.1 "tiny test" files. These data were developed by @gregcaporaso, who gave permission to reproduce them in scikit-bio. If you have a [QIIME 1.9.1 base installation](http://install.qiime.org), the raw input files in this directory can be obtained by running: ```bash python -c "from qiime.test import write_test_data; write_test_data('.')" biom convert -i biom --to-tsv -o otu-table.tsv ``` After converting to tsv, the following OTUs are removed because they are not present in the tree (they're not 16S sequences, so can't be aligned with PyNAST): ``None1``, ``None10``, ``None6``, and ``None2``. The ``not16S.1`` sample is also removed because, after removing those OTUs, it has a total count of 0. This boundary case is tested directly in the ``unifrac_*`` and ``faith_pd`` tests. Then, in the python interpreter, we midpoint root the tree (since this is a QIIME 1.9.1 installation, this step is performed with scikit-bio 0.2.3): ```python from skbio import TreeNode t = TreeNode.read('./tree') t = t.root_at_midpoint() t.write('tree', format='newick') ``` The output files (alpha diversity values and beta diversity distance matrices) can then be obtained by running: ```bash alpha_diversity.py -i biom -t tree -m PD_whole_tree -o pd.txt beta_diversity.py -m weighted_unifrac,unweighted_unifrac,weighted_normalized_unifrac -i biom -t tree -o o ``` scikit-bio-0.6.2/skbio/diversity/beta/tests/data/qiime-191-tt/otu-table.tsv000066400000000000000000000004431464262511300263730ustar00rootroot00000000000000# Constructed from biom file #OTU ID f2 f1 f3 f4 p2 p1 t1 t2 295053 20 18 18 22 4 0 0 0 42684 0 0 0 0 1 0 0 0 None11 1 0 0 0 1 1 0 0 None7 0 0 0 0 1 0 0 0 None5 0 0 0 0 1 0 0 0 None4 0 0 0 0 1 1 0 0 None3 0 0 0 0 1 0 2 3 879972 0 0 0 0 9 20 1 4 None9 0 0 0 0 3 0 19 15 None8 1 4 4 0 0 0 0 0 scikit-bio-0.6.2/skbio/diversity/beta/tests/data/qiime-191-tt/tree.nwk000066400000000000000000000004121464262511300254150ustar00rootroot00000000000000(((879972:0.05039,None3:0.00778)0.980:0.15948,((None11:0.07161,None4:0.06965)0.917:0.09643,(295053:0.06096,42684:0.15599)0.910:0.08898)0.899:0.09227)0.958:0.064315,(None8:0.09606,(None7:0.10435,(None5:0.02626,None9:0.00014)1.000:0.25335)0.753:0.0465):0.075445)root; scikit-bio-0.6.2/skbio/diversity/beta/tests/data/qiime-191-tt/unweighted_unifrac_dm.txt000066400000000000000000000016661464262511300310440ustar00rootroot00000000000000 f2 f1 f3 f4 p2 p1 t1 not16S.1 t2 f2 0.0 0.26009565527 0.26009565527 0.525554506478 0.638141793205 0.649277757971 0.879901349993 1.0 0.879901349993 f1 0.26009565527 0.0 0.0 0.358774553898 0.74869733414 0.830826823972 0.859632207458 1.0 0.859632207458 f3 0.26009565527 0.0 0.0 0.358774553898 0.74869733414 0.830826823972 0.859632207458 1.0 0.859632207458 f4 0.525554506478 0.358774553898 0.358774553898 0.0 0.784728562399 0.792350994914 0.928507908983 1.0 0.928507908983 p2 0.638141793205 0.74869733414 0.74869733414 0.784728562399 0.0 0.575711075216 0.538310274598 1.0 0.538310274598 p1 0.649277757971 0.830826823972 0.830826823972 0.792350994914 0.575711075216 0.0 0.72230493437 1.0 0.72230493437 t1 0.879901349993 0.859632207458 0.859632207458 0.928507908983 0.538310274598 0.72230493437 0.0 1.0 0.0 not16S.1 1.0 1.0 1.0 1.0 1.0 1.0 1.0 0.0 1.0 t2 0.879901349993 0.859632207458 0.859632207458 0.928507908983 0.538310274598 0.72230493437 0.0 1.0 0.0scikit-bio-0.6.2/skbio/diversity/beta/tests/data/qiime-191-tt/weighted_normalized_unifrac_dm.txt000066400000000000000000000017261464262511300327220ustar00rootroot00000000000000 f2 f1 f3 f4 p2 p1 t1 not16S.1 t2 f2 0.0 0.113186179375 0.113186179375 0.0595362216515 0.599928715677 0.732991733568 0.962973616339 1.0 0.925172353416 f1 0.113186179375 0.0 0.0 0.14768803941 0.577821666109 0.782364205648 0.929696953333 1.0 0.889628707676 f3 0.113186179375 0.0 0.0 0.14768803941 0.577821666109 0.782364205648 0.929696953333 1.0 0.889628707676 f4 0.0595362216515 0.14768803941 0.14768803941 0.0 0.639547977307 0.751529763393 0.973594769453 1.0 0.936441078767 p2 0.599928715677 0.577821666109 0.577821666109 0.639547977307 0.0 0.430712953163 0.677634129601 1.0 0.519557525688 p1 0.732991733568 0.782364205648 0.782364205648 0.751529763393 0.430712953163 0.0 0.896900994816 1.0 0.739098773939 t1 0.962973616339 0.929696953333 0.929696953333 0.973594769453 0.677634129601 0.896900994816 0.0 1.0 0.167120342575 not16S.1 1.0 1.0 1.0 1.0 1.0 1.0 1.0 0.0 1.0 t2 0.925172353416 0.889628707676 0.889628707676 0.936441078767 0.519557525688 0.739098773939 0.167120342575 1.0 0.0scikit-bio-0.6.2/skbio/diversity/beta/tests/data/qiime-191-tt/weighted_unifrac_dm.txt000066400000000000000000000016661464262511300305010ustar00rootroot00000000000000 f2 f1 f3 f4 p2 p1 t1 not16S.1 t2 f2 0.0 0.0660086363636 0.0660086363636 0.0361822727273 0.364183181818 0.425055909091 0.634565909091 1.0 0.590832727273 f1 0.0660086363636 0.0 0.0 0.0869145454545 0.339649090909 0.438638181818 0.594755454545 1.0 0.551022272727 f3 0.0660086363636 0.0 0.0 0.0869145454545 0.339649090909 0.438638181818 0.594755454545 1.0 0.551022272727 f4 0.0361822727273 0.0869145454545 0.0869145454545 0.0 0.391632727273 0.4398 0.646739090909 1.0 0.603005909091 p2 0.364183181818 0.339649090909 0.339649090909 0.391632727273 0.0 0.251758181818 0.44967 1.0 0.334201363636 p1 0.425055909091 0.438638181818 0.438638181818 0.4398 0.251758181818 0.0 0.57082 1.0 0.455351363636 t1 0.634565909091 0.594755454545 0.594755454545 0.646739090909 0.44967 0.57082 0.0 1.0 0.116175909091 not16S.1 1.0 1.0 1.0 1.0 1.0 1.0 1.0 0.0 1.0 t2 0.590832727273 0.551022272727 0.551022272727 0.603005909091 0.334201363636 0.455351363636 0.116175909091 1.0 0.0scikit-bio-0.6.2/skbio/diversity/beta/tests/test_unifrac.py000066400000000000000000000721171464262511300240360ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from io import StringIO from unittest import main, TestCase import numpy as np from skbio import TreeNode from skbio.tree import DuplicateNodeError, MissingNodeError from skbio.diversity.beta import unweighted_unifrac, weighted_unifrac from skbio.diversity.beta._unifrac import (_unweighted_unifrac, _weighted_unifrac, _weighted_unifrac_branch_correction) class UnifracTests(TestCase): def setUp(self): self.b1 = np.array( [[1, 3, 0, 1, 0], [0, 2, 0, 4, 4], [0, 0, 6, 2, 1], [0, 0, 1, 1, 1], [5, 3, 5, 0, 0], [0, 0, 0, 3, 5]]) self.sids1 = list('ABCDEF') self.oids1 = ['OTU%d' % i for i in range(1, 6)] self.t1 = TreeNode.read( StringIO('(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) self.t1_w_extra_tips = TreeNode.read( StringIO('(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,(OTU5:0.25,(OTU6:0.5,OTU7:0.5):0.5):0.5):1.25):0.0' ')root;')) self.t2 = TreeNode.read( StringIO('((OTU1:0.1, OTU2:0.2):0.3, (OTU3:0.5, OTU4:0.7):1.1)' 'root;')) self.oids2 = ['OTU%d' % i for i in range(1, 5)] def test_unweighted_taxa_out_of_order(self): # UniFrac API does not assert the observations are in tip order of the # input tree shuffled_ids = self.oids1[:] shuffled_b1 = self.b1.copy() shuffled_ids[0], shuffled_ids[-1] = shuffled_ids[-1], shuffled_ids[0] shuffled_b1[:, [0, -1]] = shuffled_b1[:, [-1, 0]] for i in range(len(self.b1)): for j in range(len(self.b1)): actual = unweighted_unifrac( self.b1[i], self.b1[j], self.oids1, self.t1) expected = unweighted_unifrac( shuffled_b1[i], shuffled_b1[j], shuffled_ids, self.t1) self.assertAlmostEqual(actual, expected) def test_weighted_taxa_out_of_order(self): # UniFrac API does not assert the observations are in tip order of the # input tree shuffled_ids = self.oids1[:] shuffled_b1 = self.b1.copy() shuffled_ids[0], shuffled_ids[-1] = shuffled_ids[-1], shuffled_ids[0] shuffled_b1[:, [0, -1]] = shuffled_b1[:, [-1, 0]] for i in range(len(self.b1)): for j in range(len(self.b1)): actual = weighted_unifrac( self.b1[i], self.b1[j], self.oids1, self.t1) expected = weighted_unifrac( shuffled_b1[i], shuffled_b1[j], shuffled_ids, self.t1) self.assertAlmostEqual(actual, expected) def test_unweighted_extra_tips(self): # UniFrac values are the same despite unobserved tips in the tree for i in range(len(self.b1)): for j in range(len(self.b1)): actual = unweighted_unifrac( self.b1[i], self.b1[j], self.oids1, self.t1_w_extra_tips) expected = unweighted_unifrac( self.b1[i], self.b1[j], self.oids1, self.t1) self.assertAlmostEqual(actual, expected) def test_weighted_extra_tips(self): # UniFrac values are the same despite unobserved tips in the tree for i in range(len(self.b1)): for j in range(len(self.b1)): actual = weighted_unifrac( self.b1[i], self.b1[j], self.oids1, self.t1_w_extra_tips) expected = weighted_unifrac( self.b1[i], self.b1[j], self.oids1, self.t1) self.assertAlmostEqual(actual, expected) def test_unweighted_minimal_trees(self): # two tips tree = TreeNode.read(StringIO('(OTU1:0.25, OTU2:0.25)root;')) actual = unweighted_unifrac([1, 0], [0, 0], ['OTU1', 'OTU2'], tree) expected = 1.0 self.assertEqual(actual, expected) def test_weighted_minimal_trees(self): # two tips tree = TreeNode.read(StringIO('(OTU1:0.25, OTU2:0.25)root;')) actual = weighted_unifrac([1, 0], [0, 0], ['OTU1', 'OTU2'], tree) expected = 0.25 self.assertEqual(actual, expected) def test_unweighted_root_not_observed(self): # expected values computed with QIIME 1.9.1 and by hand # root node not observed, but branch between (OTU1, OTU2) and root # is considered shared actual = unweighted_unifrac([1, 1, 0, 0], [1, 0, 0, 0], self.oids2, self.t2) # for clarity of what I'm testing, compute expected as it would # based on the branch lengths. the values that compose shared was # a point of confusion for me here, so leaving these in for # future reference expected = 0.2 / (0.1 + 0.2 + 0.3) # 0.3333333333 self.assertAlmostEqual(actual, expected) # root node not observed, but branch between (OTU3, OTU4) and root # is considered shared actual = unweighted_unifrac([0, 0, 1, 1], [0, 0, 1, 0], self.oids2, self.t2) # for clarity of what I'm testing, compute expected as it would # based on the branch lengths. the values that compose shared was # a point of confusion for me here, so leaving these in for # future reference expected = 0.7 / (1.1 + 0.5 + 0.7) # 0.3043478261 self.assertAlmostEqual(actual, expected) def test_weighted_root_not_observed(self): # expected values computed by hand, these disagree with QIIME 1.9.1 # root node not observed, but branch between (OTU1, OTU2) and root # is considered shared actual = weighted_unifrac([1, 0, 0, 0], [1, 1, 0, 0], self.oids2, self.t2) expected = 0.15 self.assertAlmostEqual(actual, expected) # root node not observed, but branch between (OTU3, OTU4) and root # is considered shared actual = weighted_unifrac([0, 0, 1, 1], [0, 0, 1, 0], self.oids2, self.t2) expected = 0.6 self.assertAlmostEqual(actual, expected) def test_weighted_normalized_root_not_observed(self): # expected values computed by hand, these disagree with QIIME 1.9.1 # root node not observed, but branch between (OTU1, OTU2) and root # is considered shared actual = weighted_unifrac([1, 0, 0, 0], [1, 1, 0, 0], self.oids2, self.t2, normalized=True) expected = 0.1764705882 self.assertAlmostEqual(actual, expected) # root node not observed, but branch between (OTU3, OTU4) and root # is considered shared actual = weighted_unifrac([0, 0, 1, 1], [0, 0, 1, 0], self.oids2, self.t2, normalized=True) expected = 0.1818181818 self.assertAlmostEqual(actual, expected) def test_unweighted_unifrac_identity(self): for i in range(len(self.b1)): actual = unweighted_unifrac( self.b1[i], self.b1[i], self.oids1, self.t1) expected = 0.0 self.assertAlmostEqual(actual, expected) def test_unweighted_unifrac_symmetry(self): for i in range(len(self.b1)): for j in range(len(self.b1)): actual = unweighted_unifrac( self.b1[i], self.b1[j], self.oids1, self.t1) expected = unweighted_unifrac( self.b1[j], self.b1[i], self.oids1, self.t1) self.assertAlmostEqual(actual, expected) def test_invalid_input(self): # Many of these tests are duplicated from # skbio.diversity.tests.test_base, but I think it's important to # confirm that they are being run when *unifrac is called. # tree has duplicated tip ids t = TreeNode.read( StringIO('(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU2:0.75):1.25):0.0)root;')) u_counts = [1, 2, 3] v_counts = [1, 1, 1] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(DuplicateNodeError, unweighted_unifrac, u_counts, v_counts, taxa, t) self.assertRaises(DuplicateNodeError, weighted_unifrac, u_counts, v_counts, taxa, t) # unrooted tree as input t = TreeNode.read(StringIO('((OTU1:0.1, OTU2:0.2):0.3, OTU3:0.5,' 'OTU4:0.7);')) u_counts = [1, 2, 3] v_counts = [1, 1, 1] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, unweighted_unifrac, u_counts, v_counts, taxa, t) self.assertRaises(ValueError, weighted_unifrac, u_counts, v_counts, taxa, t) # taxa has duplicated ids t = TreeNode.read( StringIO('(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) u_counts = [1, 2, 3] v_counts = [1, 1, 1] taxa = ['OTU1', 'OTU2', 'OTU2'] self.assertRaises(ValueError, unweighted_unifrac, u_counts, v_counts, taxa, t) self.assertRaises(ValueError, weighted_unifrac, u_counts, v_counts, taxa, t) # len of vectors not equal t = TreeNode.read( StringIO('(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) u_counts = [1, 2] v_counts = [1, 1, 1] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, unweighted_unifrac, u_counts, v_counts, taxa, t) self.assertRaises(ValueError, weighted_unifrac, u_counts, v_counts, taxa, t) u_counts = [1, 2, 3] v_counts = [1, 1] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, unweighted_unifrac, u_counts, v_counts, taxa, t) self.assertRaises(ValueError, weighted_unifrac, u_counts, v_counts, taxa, t) u_counts = [1, 2, 3] v_counts = [1, 1, 1] taxa = ['OTU1', 'OTU2'] self.assertRaises(ValueError, unweighted_unifrac, u_counts, v_counts, taxa, t) self.assertRaises(ValueError, weighted_unifrac, u_counts, v_counts, taxa, t) # negative counts t = TreeNode.read( StringIO('(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) u_counts = [1, 2, -3] v_counts = [1, 1, 1] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, unweighted_unifrac, u_counts, v_counts, taxa, t) self.assertRaises(ValueError, weighted_unifrac, u_counts, v_counts, taxa, t) u_counts = [1, 2, 3] v_counts = [1, 1, -1] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, unweighted_unifrac, u_counts, v_counts, taxa, t) self.assertRaises(ValueError, weighted_unifrac, u_counts, v_counts, taxa, t) # tree with no branch lengths t = TreeNode.read( StringIO('((((OTU1,OTU2),OTU3)),(OTU4,OTU5));')) u_counts = [1, 2, 3] v_counts = [1, 1, 1] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, unweighted_unifrac, u_counts, v_counts, taxa, t) self.assertRaises(ValueError, weighted_unifrac, u_counts, v_counts, taxa, t) # tree missing some branch lengths t = TreeNode.read( StringIO('(((((OTU1,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) u_counts = [1, 2, 3] v_counts = [1, 1, 1] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, unweighted_unifrac, u_counts, v_counts, taxa, t) self.assertRaises(ValueError, weighted_unifrac, u_counts, v_counts, taxa, t) # taxa not present in tree t = TreeNode.read( StringIO('(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) u_counts = [1, 2, 3] v_counts = [1, 1, 1] taxa = ['OTU1', 'OTU2', 'OTU42'] self.assertRaises(MissingNodeError, unweighted_unifrac, u_counts, v_counts, taxa, t) self.assertRaises(MissingNodeError, weighted_unifrac, u_counts, v_counts, taxa, t) def test_unweighted_unifrac_non_overlapping(self): # these communities only share the root node actual = unweighted_unifrac( self.b1[4], self.b1[5], self.oids1, self.t1) expected = 1.0 self.assertAlmostEqual(actual, expected) actual = unweighted_unifrac( [1, 1, 1, 0, 0], [0, 0, 0, 1, 1], self.oids1, self.t1) expected = 1.0 self.assertAlmostEqual(actual, expected) def test_unweighted_unifrac_zero_counts(self): actual = unweighted_unifrac( [1, 1, 1, 0, 0], [0, 0, 0, 0, 0], self.oids1, self.t1) expected = 1.0 self.assertAlmostEqual(actual, expected) actual = unweighted_unifrac( [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], self.oids1, self.t1) expected = 0.0 self.assertAlmostEqual(actual, expected) actual = unweighted_unifrac( [], [], [], self.t1) expected = 0.0 self.assertAlmostEqual(actual, expected) def test_unweighted_unifrac(self): # expected results derived from QIIME 1.9.1, which # is a completely different implementation skbio's initial # unweighted unifrac implementation # sample A versus all actual = unweighted_unifrac( self.b1[0], self.b1[1], self.oids1, self.t1) expected = 0.238095238095 self.assertAlmostEqual(actual, expected) actual = unweighted_unifrac( self.b1[0], self.b1[2], self.oids1, self.t1) expected = 0.52 self.assertAlmostEqual(actual, expected) actual = unweighted_unifrac( self.b1[0], self.b1[3], self.oids1, self.t1) expected = 0.52 self.assertAlmostEqual(actual, expected) actual = unweighted_unifrac( self.b1[0], self.b1[4], self.oids1, self.t1) expected = 0.545454545455 self.assertAlmostEqual(actual, expected) actual = unweighted_unifrac( self.b1[0], self.b1[5], self.oids1, self.t1) expected = 0.619047619048 self.assertAlmostEqual(actual, expected) # sample B versus remaining actual = unweighted_unifrac( self.b1[1], self.b1[2], self.oids1, self.t1) expected = 0.347826086957 self.assertAlmostEqual(actual, expected) actual = unweighted_unifrac( self.b1[1], self.b1[3], self.oids1, self.t1) expected = 0.347826086957 self.assertAlmostEqual(actual, expected) actual = unweighted_unifrac( self.b1[1], self.b1[4], self.oids1, self.t1) expected = 0.68 self.assertAlmostEqual(actual, expected) actual = unweighted_unifrac( self.b1[1], self.b1[5], self.oids1, self.t1) expected = 0.421052631579 self.assertAlmostEqual(actual, expected) # sample C versus remaining actual = unweighted_unifrac( self.b1[2], self.b1[3], self.oids1, self.t1) expected = 0.0 self.assertAlmostEqual(actual, expected) actual = unweighted_unifrac( self.b1[2], self.b1[4], self.oids1, self.t1) expected = 0.68 self.assertAlmostEqual(actual, expected) actual = unweighted_unifrac( self.b1[2], self.b1[5], self.oids1, self.t1) expected = 0.421052631579 self.assertAlmostEqual(actual, expected) # sample D versus remaining actual = unweighted_unifrac( self.b1[3], self.b1[4], self.oids1, self.t1) expected = 0.68 self.assertAlmostEqual(actual, expected) actual = unweighted_unifrac( self.b1[3], self.b1[5], self.oids1, self.t1) expected = 0.421052631579 self.assertAlmostEqual(actual, expected) # sample E versus remaining actual = unweighted_unifrac( self.b1[4], self.b1[5], self.oids1, self.t1) expected = 1.0 self.assertAlmostEqual(actual, expected) def test_weighted_unifrac_identity(self): for i in range(len(self.b1)): actual = weighted_unifrac( self.b1[i], self.b1[i], self.oids1, self.t1) expected = 0.0 self.assertAlmostEqual(actual, expected) def test_weighted_unifrac_symmetry(self): for i in range(len(self.b1)): for j in range(len(self.b1)): actual = weighted_unifrac( self.b1[i], self.b1[j], self.oids1, self.t1) expected = weighted_unifrac( self.b1[j], self.b1[i], self.oids1, self.t1) self.assertAlmostEqual(actual, expected) def test_weighted_unifrac_non_overlapping(self): # expected results derived from QIIME 1.9.1, which # is a completely different implementation skbio's initial # weighted unifrac implementation # these communities only share the root node actual = weighted_unifrac( self.b1[4], self.b1[5], self.oids1, self.t1) expected = 4.0 self.assertAlmostEqual(actual, expected) def test_weighted_unifrac_zero_counts(self): actual = weighted_unifrac( [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], self.oids1, self.t1) expected = 0.0 self.assertAlmostEqual(actual, expected) # calculated the following by hand, as QIIME 1.9.1 tells the user # that values involving empty vectors will be uninformative, and # returns 1.0 actual = weighted_unifrac( [1, 1, 1, 0, 0], [0, 0, 0, 0, 0], self.oids1, self.t1) expected = 2.0 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( [], [], [], self.t1) expected = 0.0 self.assertAlmostEqual(actual, expected) def test_weighted_unifrac(self): # expected results derived from QIIME 1.9.1, which # is a completely different implementation skbio's initial # weighted unifrac implementation actual = weighted_unifrac( self.b1[0], self.b1[1], self.oids1, self.t1) expected = 2.4 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.b1[0], self.b1[2], self.oids1, self.t1) expected = 1.86666666667 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.b1[0], self.b1[3], self.oids1, self.t1) expected = 2.53333333333 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.b1[0], self.b1[4], self.oids1, self.t1) expected = 1.35384615385 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.b1[0], self.b1[5], self.oids1, self.t1) expected = 3.2 self.assertAlmostEqual(actual, expected) # sample B versus remaining actual = weighted_unifrac( self.b1[1], self.b1[2], self.oids1, self.t1) expected = 2.26666666667 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.b1[1], self.b1[3], self.oids1, self.t1) expected = 0.933333333333 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.b1[1], self.b1[4], self.oids1, self.t1) expected = 3.2 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.b1[1], self.b1[5], self.oids1, self.t1) expected = 0.8375 self.assertAlmostEqual(actual, expected) # sample C versus remaining actual = weighted_unifrac( self.b1[2], self.b1[3], self.oids1, self.t1) expected = 1.33333333333 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.b1[2], self.b1[4], self.oids1, self.t1) expected = 1.89743589744 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.b1[2], self.b1[5], self.oids1, self.t1) expected = 2.66666666667 self.assertAlmostEqual(actual, expected) # sample D versus remaining actual = weighted_unifrac( self.b1[3], self.b1[4], self.oids1, self.t1) expected = 2.66666666667 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.b1[3], self.b1[5], self.oids1, self.t1) expected = 1.33333333333 self.assertAlmostEqual(actual, expected) # sample E versus remaining actual = weighted_unifrac( self.b1[4], self.b1[5], self.oids1, self.t1) expected = 4.0 self.assertAlmostEqual(actual, expected) def test_weighted_unifrac_identity_normalized(self): for i in range(len(self.b1)): actual = weighted_unifrac( self.b1[i], self.b1[i], self.oids1, self.t1, normalized=True) expected = 0.0 self.assertAlmostEqual(actual, expected) def test_weighted_unifrac_symmetry_normalized(self): for i in range(len(self.b1)): for j in range(len(self.b1)): actual = weighted_unifrac( self.b1[i], self.b1[j], self.oids1, self.t1, normalized=True) expected = weighted_unifrac( self.b1[j], self.b1[i], self.oids1, self.t1, normalized=True) self.assertAlmostEqual(actual, expected) def test_weighted_unifrac_non_overlapping_normalized(self): # these communities only share the root node actual = weighted_unifrac( self.b1[4], self.b1[5], self.oids1, self.t1, normalized=True) expected = 1.0 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( [1, 1, 1, 0, 0], [0, 0, 0, 1, 1], self.oids1, self.t1, normalized=True) expected = 1.0 self.assertAlmostEqual(actual, expected) def test_weighted_unifrac_zero_counts_normalized(self): # expected results derived from QIIME 1.9.1, which # is a completely different implementation skbio's initial # weighted unifrac implementation actual = weighted_unifrac( [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], self.oids1, self.t1, normalized=True) expected = 0.0 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( [1, 1, 1, 0, 0], [0, 0, 0, 0, 0], self.oids1, self.t1, normalized=True) expected = 1.0 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( [], [], [], self.t1, normalized=True) expected = 0.0 self.assertAlmostEqual(actual, expected) def test_weighted_unifrac_normalized(self): # expected results derived from QIIME 1.9.1, which # is a completely different implementation skbio's initial # weighted unifrac implementation actual = weighted_unifrac( self.b1[0], self.b1[1], self.oids1, self.t1, normalized=True) expected = 0.6 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.b1[0], self.b1[2], self.oids1, self.t1, normalized=True) expected = 0.466666666667 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.b1[0], self.b1[3], self.oids1, self.t1, normalized=True) expected = 0.633333333333 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.b1[0], self.b1[4], self.oids1, self.t1, normalized=True) expected = 0.338461538462 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.b1[0], self.b1[5], self.oids1, self.t1, normalized=True) expected = 0.8 self.assertAlmostEqual(actual, expected) # sample B versus remaining actual = weighted_unifrac( self.b1[1], self.b1[2], self.oids1, self.t1, normalized=True) expected = 0.566666666667 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.b1[1], self.b1[3], self.oids1, self.t1, normalized=True) expected = 0.233333333333 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.b1[1], self.b1[4], self.oids1, self.t1, normalized=True) expected = 0.8 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.b1[1], self.b1[5], self.oids1, self.t1, normalized=True) expected = 0.209375 self.assertAlmostEqual(actual, expected) # sample C versus remaining actual = weighted_unifrac( self.b1[2], self.b1[3], self.oids1, self.t1, normalized=True) expected = 0.333333333333 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.b1[2], self.b1[4], self.oids1, self.t1, normalized=True) expected = 0.474358974359 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.b1[2], self.b1[5], self.oids1, self.t1, normalized=True) expected = 0.666666666667 self.assertAlmostEqual(actual, expected) # sample D versus remaining actual = weighted_unifrac( self.b1[3], self.b1[4], self.oids1, self.t1, normalized=True) expected = 0.666666666667 self.assertAlmostEqual(actual, expected) actual = weighted_unifrac( self.b1[3], self.b1[5], self.oids1, self.t1, normalized=True) expected = 0.333333333333 self.assertAlmostEqual(actual, expected) # sample E versus remaining actual = weighted_unifrac( self.b1[4], self.b1[5], self.oids1, self.t1, normalized=True) expected = 1.0 self.assertAlmostEqual(actual, expected) def test_weighted_unifrac_branch_correction(self): # for ((a:1, b:2)c:3,(d:4,e:5)f:6)root;" tip_ds = np.array([4, 5, 10, 11, 0, 0, 0])[:, np.newaxis] u_counts = np.array([1, 1, 0, 0, 2, 0, 2]) v_counts = np.array([0, 2, 1, 0, 2, 1, 3]) u_sum = 2 # counts at the tips v_sum = 3 exp = np.array([2.0, 5.0 * (.5 + (2.0/3.0)), 10.0 * (1.0 / 3.0), 0.0]).sum() obs = _weighted_unifrac_branch_correction( tip_ds, u_counts/u_sum, v_counts/v_sum) self.assertEqual(obs, exp) def test_unweighted_unifrac_pycogent_adapted(self): # adapted from PyCogent unit tests m = np.array([[1, 0, 1], [1, 1, 0], [0, 1, 0], [0, 0, 1], [0, 1, 0], [0, 1, 1], [1, 1, 1], [0, 1, 1], [1, 1, 1]]) # lengths from ((a:1,b:2):4,(c:3,(d:1,e:1):2):3) bl = np.array([1, 2, 1, 1, 3, 2, 4, 3, 0], dtype=float) self.assertEqual(_unweighted_unifrac(m[:, 0], m[:, 1], bl), 10/16.0) self.assertEqual(_unweighted_unifrac(m[:, 0], m[:, 2], bl), 8/13.0) self.assertEqual(_unweighted_unifrac(m[:, 1], m[:, 2], bl), 8/17.0) def test_weighted_unifrac_pycogent_adapted(self): # lengths from ((a:1,b:2):4,(c:3,(d:1,e:1):2):3) bl = np.array([1, 2, 1, 1, 3, 2, 4, 3, 0], dtype=float) # adapted from PyCogent unit tests m = np.array([[1, 0, 1], # a [1, 1, 0], # b [0, 1, 0], # d [0, 0, 1], # e [0, 1, 0], # c [0, 1, 1], # parent of (d, e) [2, 1, 1], # parent of a, b [0, 2, 1], # parent of c (d, e) [2, 3, 2]]) # root # sum just the counts at the tips m0s = m[:5, 0].sum() m1s = m[:5, 1].sum() m2s = m[:5, 2].sum() # scores computed by educational implementation self.assertAlmostEqual( _weighted_unifrac(m[:, 0], m[:, 1], m0s, m1s, bl)[0], 7.5) self.assertAlmostEqual( _weighted_unifrac(m[:, 0], m[:, 2], m0s, m2s, bl)[0], 6.0) self.assertAlmostEqual( _weighted_unifrac(m[:, 1], m[:, 2], m1s, m2s, bl)[0], 4.5) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/diversity/tests/000077500000000000000000000000001464262511300200535ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/diversity/tests/__init__.py000066400000000000000000000005411464262511300221640ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- scikit-bio-0.6.2/skbio/diversity/tests/test_block.py000066400000000000000000000202131464262511300225540ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main import numpy as np import numpy.testing as npt from skbio import TreeNode, DistanceMatrix from skbio.diversity import beta_diversity, block_beta_diversity from skbio.diversity._block import (_block_party, _generate_id_blocks, _pairs_to_compute, _block_compute, _block_kwargs, _map, _reduce) class ParallelBetaDiversity(TestCase): def setUp(self): self.table1 = [[1, 5], [2, 3], [0, 1]] self.sids1 = list('ABC') self.tree1 = TreeNode.read([ '((O1:0.25, O2:0.50):0.25, O3:0.75)root;']) self.oids1 = ['O1', 'O2'] def test_block_kwargs(self): kws = {'ids': [1, 2, 3, 4, 5], 'foo': 'bar', 'k': 2} exp = [{'row_ids': np.array((0, 1)), 'col_ids': np.array((0, 1)), 'id_pairs': [(0, 1)], 'ids': [1, 2, 3, 4, 5]}, {'row_ids': np.array((0, 1)), 'col_ids': np.array((2, 3)), 'id_pairs': [(0, 2), (0, 3), (1, 2), (1, 3)], 'ids': [1, 2, 3, 4, 5]}, {'row_ids': np.array((0, 1)), 'col_ids': np.array((4,)), 'id_pairs': [(0, 4), (1, 4)], 'ids': [1, 2, 3, 4, 5]}, {'row_ids': np.array((2, 3)), 'col_ids': np.array((2, 3)), 'id_pairs': [(2, 3), ], 'ids': [1, 2, 3, 4, 5]}, {'row_ids': np.array((2, 3)), 'col_ids': np.array((4,)), 'id_pairs': [(2, 4), (3, 4)], 'ids': [1, 2, 3, 4, 5]}] obs = list(_block_kwargs(**kws)) npt.assert_equal(obs, exp) def test_block_compute(self): def mock_metric(u, v): return (u + v).sum() counts = np.array([[0, 1, 2, 3, 4, 5], [1, 2, 3, 4, 5, 0], [2, 3, 4, 5, 0, 1], [10, 2, 3, 6, 8, 2], [9, 9, 2, 2, 3, 4]]) kwargs = {'metric': mock_metric, 'counts': counts, 'row_ids': np.array((2, 3)), 'col_ids': np.array((4, )), 'id_pairs': [(2, 4), (3, 4)], 'ids': [1, 2, 3, 4, 5]} exp = DistanceMatrix(np.array([[0, 0, 44], [0, 0, 60], [44, 60, 0]]), (2, 3, 4)) obs = _block_compute(**kwargs) npt.assert_equal(obs.data, exp.data) self.assertEqual(obs.ids, exp.ids) def test_map(self): def func(a, b, c=5): return a + b + c kwargs = [{'a': 0, 'b': 1, 'c': 0}, {'a': 2, 'b': 3}] exp = [1, 10] obs = list(_map(func, kwargs)) self.assertEqual(obs, exp) def test_reduce(self): dm1 = DistanceMatrix(np.array([[0, 0, 44], [0, 0, 60], [44, 60, 0]]), (2, 3, 4)) dm2 = DistanceMatrix(np.array([[0, 123], [123, 0]]), (1, 5)) dm3 = DistanceMatrix(np.array([[0, 1, 2, 3], [1, 0, 4, 5], [2, 4, 0, 6], [3, 5, 6, 0]]), (0, 3, 4, 5)) exp = DistanceMatrix(np.array([[0, 0, 0, 1, 2, 3], [0, 0, 0, 0, 0, 123], [0, 0, 0, 0, 44, 0], [1, 0, 0, 0, 64, 5], [2, 0, 44, 64, 0, 6], [3, 123, 0, 5, 6, 0]]), list(range(6))) obs = _reduce([dm1, dm2, dm3]) npt.assert_equal(obs.data, exp.data) self.assertEqual(obs.ids, exp.ids) def test_block_beta_diversity(self): exp = beta_diversity('unweighted_unifrac', self.table1, self.sids1, tree=self.tree1, taxa=self.oids1) obs = block_beta_diversity('unweighted_unifrac', self.table1, self.sids1, taxa=self.oids1, tree=self.tree1, k=2) npt.assert_equal(obs.data, exp.data) self.assertEqual(obs.ids, exp.ids) def test_generate_id_blocks(self): ids = [1, 2, 3, 4, 5] exp = [(np.array((0, 1)), np.array((0, 1))), (np.array((0, 1)), np.array((2, 3))), (np.array((0, 1)), np.array((4,))), (np.array((2, 3)), np.array((2, 3))), (np.array((2, 3)), np.array((4,))), (np.array((4,)), np.array((4,)))] obs = list(_generate_id_blocks(ids, 2)) npt.assert_equal(obs, exp) def test_block_party_notree(self): counts = np.arange(15).reshape(5, 3) exp = [{'counts': np.array([[0, 1, 2], [3, 4, 5]]), 'ids': np.array([0, 1])}, {'counts': np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, 10, 11]]), 'ids': np.array([0, 1, 2, 3])}, {'counts': np.array([[0, 1, 2], [3, 4, 5], [12, 13, 14]]), 'ids': np.array([0, 1, 4])}, {'counts': np.array([[6, 7, 8], [9, 10, 11]]), 'ids': np.array([2, 3])}, {'counts': np.array([[6, 7, 8], [9, 10, 11], [12, 13, 14]]), 'ids': np.array([2, 3, 4])}, {'counts': np.array([[12, 13, 14]]), 'ids': np.array([4])}] obs = [_block_party(counts, rids, cids) for rids, cids in _generate_id_blocks(list(range(5)), 2)] npt.assert_equal(obs, exp) def test_block_party_tree(self): counts = np.array([[1, 1, 1], [1, 0, 1], [1, 0, 1], [0, 0, 1], [0, 1, 1]]) tree = TreeNode.read(['(a:1,b:2,c:3);']) taxa = ['a', 'b', 'c'] kw = {'tree': tree, 'taxa': taxa} kw_no_a = {'tree': tree.shear(['b', 'c']), 'taxa': ['b', 'c']} kw_no_b = {'tree': tree.shear(['a', 'c']), 'taxa': ['a', 'c']} # python >= 3.5 supports {foo: bar, **baz} exp = [dict(counts=np.array([[1, 1, 1], [1, 0, 1]]), **kw), dict(counts=np.array([[1, 1, 1], [1, 0, 1], [1, 0, 1], [0, 0, 1]]), **kw), dict(counts=np.array([[1, 1, 1], [1, 0, 1], [0, 1, 1]]), **kw), dict(counts=np.array([[1, 1], [0, 1]]), **kw_no_b), dict(counts=np.array([[1, 0, 1], [0, 0, 1], [0, 1, 1]]), **kw), dict(counts=np.array([[1, 1]]), **kw_no_a)] obs = [_block_party(counts, rids, cids, **kw) for rids, cids in _generate_id_blocks(list(range(5)), 2)] for okw, ekw in zip(obs, exp): npt.assert_equal(okw['counts'], ekw['counts']) npt.assert_equal(okw['taxa'], ekw['taxa']) self.assertEqual(str(okw['tree']), str(ekw['tree'])) def test_pairs_to_compute_rids_are_cids(self): rids = np.array([0, 1, 2, 10]) cids = rids exp = [(0, 1), (0, 2), (0, 10), (1, 2), (1, 10), (2, 10)] self.assertEqual(_pairs_to_compute(rids, cids), exp) def test_pairs_to_compute_rids_are_not_cids(self): rids = np.array([0, 1, 2]) cids = np.array([3, 4, 5]) exp = [(0, 3), (0, 4), (0, 5), (1, 3), (1, 4), (1, 5), (2, 3), (2, 4), (2, 5)] self.assertEqual(_pairs_to_compute(rids, cids), exp) def test_pairs_to_compute_rids_overlap_cids(self): rids = np.array([0, 1, 2]) cids = np.array([0, 10, 20]) with self.assertRaises(ValueError): _pairs_to_compute(rids, cids) if __name__ == "__main__": main() scikit-bio-0.6.2/skbio/diversity/tests/test_driver.py000066400000000000000000001200531464262511300227600ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io from unittest import TestCase, main import pandas as pd import numpy as np import numpy.testing as npt from skbio import DistanceMatrix, TreeNode from skbio.table import Table, example_table from skbio.util._testing import assert_series_almost_equal from skbio.diversity import (alpha_diversity, beta_diversity, partial_beta_diversity, get_alpha_diversity_metrics, get_beta_diversity_metrics) from skbio.diversity.alpha import faith_pd, phydiv, sobs from skbio.diversity.beta import unweighted_unifrac, weighted_unifrac from skbio.tree import DuplicateNodeError, MissingNodeError from skbio.diversity._driver import (_qualitative_beta_metrics, _valid_beta_metrics) class AlphaDiversityTests(TestCase): def setUp(self): self.table1 = np.array([[1, 3, 0, 1, 0], [0, 2, 0, 4, 4], [0, 0, 6, 2, 1], [0, 0, 1, 1, 1]]) self.sids1 = list('ABCD') self.oids1 = ['OTU%d' % i for i in range(1, 6)] self.tree1 = TreeNode.read(io.StringIO( '(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):' '0.0,(OTU4:0.75,OTU5:0.75):1.25):0.0)root;')) self.table2 = np.array([[1, 3], [0, 2], [0, 0]]) self.sids2 = list('xyz') self.oids2 = ['OTU1', 'OTU5'] self.tree2 = TreeNode.read(io.StringIO( '(((((OTU1:42.5,OTU2:0.5):0.5,OTU3:1.0):1.0):' '0.0,(OTU4:0.75,OTU5:0.0001):1.25):0.0)root;')) def test_invalid_input(self): # number of ids doesn't match the number of samples self.assertRaises(ValueError, alpha_diversity, 'sobs', self.table1, list('ABC')) # unknown metric provided self.assertRaises(ValueError, alpha_diversity, 'not-a-metric', self.table1) # 3-D list provided as input self.assertRaises(ValueError, alpha_diversity, 'sobs', [[[43]]]) # negative counts self.assertRaises(ValueError, alpha_diversity, 'sobs', [0, 3, -12, 42]) # additional kwargs self.assertRaises(TypeError, alpha_diversity, 'sobs', [0, 1], not_a_real_kwarg=42.0) self.assertRaises(TypeError, alpha_diversity, 'faith_pd', [0, 1], tree=self.tree1, taxa=['OTU1', 'OTU2'], not_a_real_kwarg=42.0) self.assertRaises(TypeError, alpha_diversity, faith_pd, [0, 1], tree=self.tree1, taxa=['OTU1', 'OTU2'], not_a_real_kwarg=42.0) self.assertRaises(ValueError, alpha_diversity, 'sobs', example_table, ids=['A', 'B', 'C']) def test_invalid_input_phylogenetic(self): # taxa not provided self.assertRaises(ValueError, alpha_diversity, 'faith_pd', self.table1, list('ABC'), tree=self.tree1) # tree not provided self.assertRaises(ValueError, alpha_diversity, 'faith_pd', self.table1, list('ABC'), taxa=self.oids1) # tree has duplicated tip ids t = TreeNode.read( io.StringIO( '(((((OTU2:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(DuplicateNodeError, alpha_diversity, 'faith_pd', counts, taxa=taxa, tree=t) # unrooted tree as input t = TreeNode.read(io.StringIO( '((OTU1:0.1, OTU2:0.2):0.3, OTU3:0.5,OTU4:0.7);')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, alpha_diversity, 'faith_pd', counts, taxa=taxa, tree=t) # taxa has duplicated ids t = TreeNode.read( io.StringIO( '(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU2:0.75):1.25):0.0)root;')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU2'] self.assertRaises(ValueError, alpha_diversity, 'faith_pd', counts, taxa=taxa, tree=t) # count and OTU vectors are not equal length t = TreeNode.read( io.StringIO( '(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU2:0.75):1.25):0.0)root;')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2'] self.assertRaises(ValueError, alpha_diversity, 'faith_pd', counts, taxa=taxa, tree=t) t = TreeNode.read( io.StringIO( '(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU2:0.75):1.25):0.0)root;')) counts = [1, 2] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, alpha_diversity, 'faith_pd', counts, taxa=taxa, tree=t) # tree with no branch lengths t = TreeNode.read( io.StringIO('((((OTU1,OTU2),OTU3)),(OTU4,OTU5));')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, alpha_diversity, 'faith_pd', counts, taxa=taxa, tree=t) # tree missing some branch lengths t = TreeNode.read( io.StringIO('(((((OTU1,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, alpha_diversity, 'faith_pd', counts, taxa=taxa, tree=t) # some taxa not present in tree t = TreeNode.read( io.StringIO( '(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU42'] self.assertRaises(MissingNodeError, alpha_diversity, 'faith_pd', counts, taxa=taxa, tree=t) # table and taxa are provided test_table = Table(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), ['O1', 'O2', 'O3'], ['S1', 'S2', 'S3']) self.assertRaises(ValueError, alpha_diversity, 'faith_pd', test_table, taxa=taxa, tree=t) def test_empty(self): # empty vector actual = alpha_diversity('sobs', np.array([], dtype=np.int64)) expected = pd.Series([0], dtype=np.int64) assert_series_almost_equal(actual, expected) # array of empty vector actual = alpha_diversity('sobs', np.array([[]], dtype=np.int64)) expected = pd.Series([0], dtype=np.int64) assert_series_almost_equal(actual, expected) # array of empty vectors actual = alpha_diversity('sobs', np.array([[], []], dtype=np.int64)) expected = pd.Series([0, 0], dtype=np.int64) assert_series_almost_equal(actual, expected) # empty vector actual = alpha_diversity('faith_pd', np.array([], dtype=np.int64), tree=self.tree1, taxa=[]) expected = pd.Series([0.]) assert_series_almost_equal(actual, expected) # array of empty vector actual = alpha_diversity('faith_pd', np.array([[]], dtype=np.int64), tree=self.tree1, taxa=[]) expected = pd.Series([0.]) assert_series_almost_equal(actual, expected) # array of empty vectors actual = alpha_diversity('faith_pd', np.array([[], []], dtype=np.int64), tree=self.tree1, taxa=[]) expected = pd.Series([0., 0.]) assert_series_almost_equal(actual, expected) # empty Table actual = alpha_diversity('sobs', Table(np.array([[]]), [], ['S1', ])) actual.index = pd.RangeIndex(len(actual)) expected = pd.Series([0], dtype=np.int64) assert_series_almost_equal(actual, expected) def test_single_count_vector(self): actual = alpha_diversity('sobs', np.array([1, 0, 2])) expected = pd.Series([2], dtype=np.int64) assert_series_almost_equal(actual, expected) actual = alpha_diversity('faith_pd', np.array([1, 3, 0, 1, 0]), tree=self.tree1, taxa=self.oids1) self.assertAlmostEqual(actual[0], 4.5) def test_input_types(self): list_result = alpha_diversity('sobs', [1, 3, 0, 1, 0]) array_result = alpha_diversity('sobs', np.array([1, 3, 0, 1, 0])) table_result = alpha_diversity('sobs', Table(np.array([[1, 3, 0, 1, 0], ]).T, list('ABCDE'), ['S1', ])) # using a table we get sample IDs for free, drop them for the test table_result.index = pd.RangeIndex(len(table_result)) self.assertAlmostEqual(list_result[0], 3) assert_series_almost_equal(list_result, array_result) assert_series_almost_equal(table_result, array_result) list_result = alpha_diversity('faith_pd', [1, 3, 0, 1, 0], tree=self.tree1, taxa=self.oids1) array_result = alpha_diversity('faith_pd', np.array([1, 3, 0, 1, 0]), tree=self.tree1, taxa=self.oids1) table_result = alpha_diversity('faith_pd', Table(np.array([[1, 3, 0, 1, 0], ]).T, self.oids1, ['S1', ]), tree=self.tree1) # using a table we get sample IDs for free, drop them for the test table_result.index = pd.RangeIndex(len(table_result)) self.assertAlmostEqual(list_result[0], 4.5) assert_series_almost_equal(list_result, array_result) assert_series_almost_equal(table_result, array_result) def test_sobs(self): # expected values hand-calculated expected = pd.Series([3, 3, 3, 3], index=self.sids1, dtype=np.int64) actual = alpha_diversity('sobs', self.table1, self.sids1) assert_series_almost_equal(actual, expected) # function passed instead of string actual = alpha_diversity(sobs, self.table1, self.sids1) assert_series_almost_equal(actual, expected) # alt input table expected = pd.Series([2, 1, 0], index=self.sids2, dtype=np.int64) actual = alpha_diversity('sobs', self.table2, self.sids2) assert_series_almost_equal(actual, expected) def test_faith_pd(self): # calling faith_pd through alpha_diversity gives same results as # calling it directly expected = [] for e in self.table1: expected.append(faith_pd(e, tree=self.tree1, taxa=self.oids1)) expected = pd.Series(expected) actual = alpha_diversity('faith_pd', self.table1, tree=self.tree1, taxa=self.oids1) assert_series_almost_equal(actual, expected) # alt input table and tree expected = [] for e in self.table2: expected.append(faith_pd(e, tree=self.tree2, taxa=self.oids2)) expected = pd.Series(expected) actual = alpha_diversity('faith_pd', self.table2, tree=self.tree2, taxa=self.oids2) assert_series_almost_equal(actual, expected) def test_phydiv(self): expected = [] for e in self.table1: expected.append(phydiv(e, tree=self.tree1, taxa=self.oids1)) expected = pd.Series(expected) actual = alpha_diversity('phydiv', self.table1, tree=self.tree1, taxa=self.oids1) assert_series_almost_equal(actual, expected) expected = [] for e in self.table1: expected.append(phydiv(e, tree=self.tree1, taxa=self.oids1, rooted=False)) expected = pd.Series(expected) actual = alpha_diversity('phydiv', self.table1, tree=self.tree1, taxa=self.oids1, rooted=False) assert_series_almost_equal(actual, expected) expected = [] for e in self.table1: expected.append(phydiv(e, tree=self.tree1, taxa=self.oids1, weight=True)) expected = pd.Series(expected) actual = alpha_diversity('phydiv', self.table1, tree=self.tree1, taxa=self.oids1, weight=True) assert_series_almost_equal(actual, expected) def test_no_ids(self): # expected values hand-calculated expected = pd.Series([3, 3, 3, 3], dtype=np.int64) actual = alpha_diversity('sobs', self.table1) assert_series_almost_equal(actual, expected) def test_optimized(self): # calling optimized faith_pd gives same results as calling unoptimized # version optimized = alpha_diversity('faith_pd', self.table1, tree=self.tree1, taxa=self.oids1) unoptimized = alpha_diversity(faith_pd, self.table1, tree=self.tree1, taxa=self.oids1) assert_series_almost_equal(optimized, unoptimized) class BetaDiversityTests(TestCase): def setUp(self): self.table1 = [[1, 5], [2, 3], [0, 1]] self.sids1 = list('ABC') self.tree1 = TreeNode.read(io.StringIO( '((O1:0.25, O2:0.50):0.25, O3:0.75)root;')) self.oids1 = ['O1', 'O2'] self.table2 = [[23, 64, 14, 0, 0, 3, 1], [0, 3, 35, 42, 0, 12, 1], [0, 5, 5, 0, 40, 40, 0], [44, 35, 9, 0, 1, 0, 0], [0, 2, 8, 0, 35, 45, 1], [0, 0, 25, 35, 0, 19, 0]] self.sids2 = list('ABCDEF') self.table3 = [[23, 64, 14, 0, 0, 3, 1], [0, 3, 35, 42, 0, 12, 1], [0, 5, 5, 0, 40, 40, 0], [44, 35, 9, 0, 1, 0, 0], [0, 2, 8, 0, 35, 45, 1], [0, 0, 25, 35, 0, 19, 0], [88, 31, 0, 5, 5, 5, 5], [44, 39, 0, 0, 0, 0, 0]] def test_available_metrics(self): for metric in _valid_beta_metrics: try: beta_diversity(metric, self.table3) except Exception as exc: raise ValueError( f'Metric {metric} failed with exception:\n {exc}') def test_use_of_dataframe_index(self): '''reference to issue 1808''' df1 = pd.DataFrame(self.table1, index=self.sids1) df2 = pd.DataFrame(self.table2, index=self.sids2) matrix1 = beta_diversity('jaccard', df1) matrix2 = beta_diversity('jaccard', df2) self.assertEqual(self.sids1, list(matrix1.to_data_frame().index)) self.assertEqual(self.sids2, list(matrix2.to_data_frame().index)) def test_qualitative_bug_issue_1549(self): as_presence_absence = np.asarray(self.table3) > 0 for metric in _valid_beta_metrics: obs_mat = beta_diversity(metric, self.table3) obs_presence_absence = beta_diversity(metric, as_presence_absence) if metric in _qualitative_beta_metrics: self.assertEqual(obs_mat, obs_presence_absence) else: self.assertNotEqual(obs_mat, obs_presence_absence) def test_invalid_input(self): # number of ids doesn't match the number of samples error_msg = (r"Number of rows") with self.assertRaisesRegex(ValueError, error_msg): beta_diversity(self.table1, list('AB'), 'euclidean') # unknown metric provided error_msg = r"not-a-metric" with self.assertRaisesRegex(ValueError, error_msg): beta_diversity('not-a-metric', self.table1) # 3-D list provided as input error_msg = (r"Only 1-D and 2-D") with self.assertRaisesRegex(ValueError, error_msg): beta_diversity('euclidean', [[[43]]]) # negative counts error_msg = r"negative values." with self.assertRaisesRegex(ValueError, error_msg): beta_diversity('euclidean', [[0, 1, 3, 4], [0, 3, -12, 42]]) with self.assertRaisesRegex(ValueError, error_msg): beta_diversity('euclidean', [[0, 1, 3, -4], [0, 3, 12, 42]]) # additional kwargs error_msg = r"argument" with self.assertRaisesRegex(TypeError, error_msg): beta_diversity('euclidean', [[0, 1, 3], [0, 3, 12]], not_a_real_kwarg=42.0) with self.assertRaisesRegex(TypeError, error_msg): beta_diversity('unweighted_unifrac', [[0, 1, 3], [0, 3, 12]], not_a_real_kwarg=42.0, tree=self.tree1, taxa=['O1', 'O2', 'O3']) with self.assertRaisesRegex(TypeError, error_msg): beta_diversity('weighted_unifrac', [[0, 1, 3], [0, 3, 12]], not_a_real_kwarg=42.0, tree=self.tree1, taxa=['O1', 'O2', 'O3']) with self.assertRaisesRegex(TypeError, error_msg): beta_diversity(weighted_unifrac, [[0, 1, 3], [0, 3, 12]], not_a_real_kwarg=42.0, tree=self.tree1, taxa=['O1', 'O2', 'O3']) error_msg = r"`counts` and `ids`" with self.assertRaisesRegex(ValueError, error_msg): beta_diversity('euclidean', example_table, ids=['foo', 'bar']) error_msg = r"`counts` and `taxa`" with self.assertRaisesRegex(ValueError, error_msg): beta_diversity(weighted_unifrac, example_table, taxa=['foo', 'bar'], tree=self.tree1) def test_invalid_input_phylogenetic(self): # taxa not provided self.assertRaises(ValueError, beta_diversity, 'weighted_unifrac', self.table1, list('ABC'), tree=self.tree1) self.assertRaises(ValueError, beta_diversity, 'unweighted_unifrac', self.table1, list('ABC'), tree=self.tree1) # tree not provided self.assertRaises(ValueError, beta_diversity, 'weighted_unifrac', self.table1, list('ABC'), taxa=self.oids1) self.assertRaises(ValueError, beta_diversity, 'unweighted_unifrac', self.table1, list('ABC'), taxa=self.oids1) # tree has duplicated tip ids t = TreeNode.read( io.StringIO( '(((((OTU2:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(DuplicateNodeError, beta_diversity, 'weighted_unifrac', counts, taxa=taxa, tree=t) self.assertRaises(DuplicateNodeError, beta_diversity, 'unweighted_unifrac', counts, taxa=taxa, tree=t) # unrooted tree as input t = TreeNode.read(io.StringIO('((OTU1:0.1, OTU2:0.2):0.3, OTU3:0.5,' 'OTU4:0.7);')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, beta_diversity, 'weighted_unifrac', counts, taxa=taxa, tree=t) self.assertRaises(ValueError, beta_diversity, 'unweighted_unifrac', counts, taxa=taxa, tree=t) # taxa has duplicated ids t = TreeNode.read( io.StringIO( '(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU2:0.75):1.25):0.0)root;')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU2'] self.assertRaises(ValueError, beta_diversity, 'weighted_unifrac', counts, taxa=taxa, tree=t) self.assertRaises(ValueError, beta_diversity, 'unweighted_unifrac', counts, taxa=taxa, tree=t) # count and OTU vectors are not equal length t = TreeNode.read( io.StringIO( '(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU2:0.75):1.25):0.0)root;')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2'] self.assertRaises(ValueError, beta_diversity, 'weighted_unifrac', counts, taxa=taxa, tree=t) self.assertRaises(ValueError, beta_diversity, 'unweighted_unifrac', counts, taxa=taxa, tree=t) t = TreeNode.read( io.StringIO( '(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU2:0.75):1.25):0.0)root;')) counts = [1, 2] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, beta_diversity, 'weighted_unifrac', counts, taxa=taxa, tree=t) self.assertRaises(ValueError, beta_diversity, 'unweighted_unifrac', counts, taxa=taxa, tree=t) # tree with no branch lengths t = TreeNode.read( io.StringIO('((((OTU1,OTU2),OTU3)),(OTU4,OTU5));')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, beta_diversity, 'weighted_unifrac', counts, taxa=taxa, tree=t) self.assertRaises(ValueError, beta_diversity, 'unweighted_unifrac', counts, taxa=taxa, tree=t) # tree missing some branch lengths t = TreeNode.read( io.StringIO('(((((OTU1,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, beta_diversity, 'weighted_unifrac', counts, taxa=taxa, tree=t) self.assertRaises(ValueError, beta_diversity, 'unweighted_unifrac', counts, taxa=taxa, tree=t) # some taxa not present in tree t = TreeNode.read( io.StringIO( '(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU42'] self.assertRaises(MissingNodeError, beta_diversity, 'weighted_unifrac', counts, taxa=taxa, tree=t) self.assertRaises(MissingNodeError, beta_diversity, 'unweighted_unifrac', counts, taxa=taxa, tree=t) def test_empty(self): # array of empty vectors actual = beta_diversity('euclidean', np.array([[], []], dtype=np.int64), ids=['a', 'b']) expected_dm = DistanceMatrix([[0.0, 0.0], [0.0, 0.0]], ['a', 'b']) npt.assert_array_equal(actual, expected_dm) actual = beta_diversity('unweighted_unifrac', np.array([[], []], dtype=np.int64), ids=['a', 'b'], tree=self.tree1, taxa=[]) expected_dm = DistanceMatrix([[0.0, 0.0], [0.0, 0.0]], ['a', 'b']) self.assertEqual(actual, expected_dm) actual = beta_diversity('unweighted_unifrac', Table(np.array([[], []]).T, [], ['a', 'b']), tree=self.tree1) expected_dm = DistanceMatrix([[0.0, 0.0], [0.0, 0.0]], ['a', 'b']) self.assertEqual(actual, expected_dm) def test_input_types(self): actual_array = beta_diversity('euclidean', np.array([[1, 5], [2, 3]]), ids=['a', 'b']) actual_list = beta_diversity('euclidean', [[1, 5], [2, 3]], ids=['a', 'b']) actual_table = beta_diversity('euclidean', Table(np.array([[1, 5], [2, 3]]).T, ['O1', 'O2'], ['a', 'b'])) self.assertEqual(actual_array, actual_list) self.assertEqual(actual_array, actual_table) def test_euclidean(self): # TODO: update npt.assert_almost_equal calls to use DistanceMatrix # near-equality testing when that support is available actual_dm = beta_diversity('euclidean', self.table1, self.sids1) self.assertEqual(actual_dm.shape, (3, 3)) npt.assert_almost_equal(actual_dm['A', 'A'], 0.0) npt.assert_almost_equal(actual_dm['B', 'B'], 0.0) npt.assert_almost_equal(actual_dm['C', 'C'], 0.0) npt.assert_almost_equal(actual_dm['A', 'B'], 2.23606798) npt.assert_almost_equal(actual_dm['B', 'A'], 2.23606798) npt.assert_almost_equal(actual_dm['A', 'C'], 4.12310563) npt.assert_almost_equal(actual_dm['C', 'A'], 4.12310563) npt.assert_almost_equal(actual_dm['B', 'C'], 2.82842712) npt.assert_almost_equal(actual_dm['C', 'B'], 2.82842712) actual_dm = beta_diversity('euclidean', self.table2, self.sids2) expected_data = [ [0., 80.8455317, 84.0297566, 36.3042697, 86.0116271, 78.9176786], [80.8455317, 0., 71.0844568, 74.4714710, 69.3397433, 14.422205], [84.0297566, 71.0844568, 0., 77.2851861, 8.3066238, 60.7536007], [36.3042697, 74.4714710, 77.2851861, 0., 78.7908624, 70.7389567], [86.0116271, 69.3397433, 8.3066238, 78.7908624, 0., 58.4807660], [78.9176786, 14.422205, 60.7536007, 70.7389567, 58.4807660, 0.]] expected_dm = DistanceMatrix(expected_data, self.sids2) for id1 in self.sids2: for id2 in self.sids2: npt.assert_almost_equal(actual_dm[id1, id2], expected_dm[id1, id2], 6) def test_braycurtis(self): # TODO: update npt.assert_almost_equal calls to use DistanceMatrix # near-equality testing when that support is available actual_dm = beta_diversity('braycurtis', self.table1, self.sids1) self.assertEqual(actual_dm.shape, (3, 3)) npt.assert_almost_equal(actual_dm['A', 'A'], 0.0) npt.assert_almost_equal(actual_dm['B', 'B'], 0.0) npt.assert_almost_equal(actual_dm['C', 'C'], 0.0) npt.assert_almost_equal(actual_dm['A', 'B'], 0.27272727) npt.assert_almost_equal(actual_dm['B', 'A'], 0.27272727) npt.assert_almost_equal(actual_dm['A', 'C'], 0.71428571) npt.assert_almost_equal(actual_dm['C', 'A'], 0.71428571) npt.assert_almost_equal(actual_dm['B', 'C'], 0.66666667) npt.assert_almost_equal(actual_dm['C', 'B'], 0.66666667) actual_dm = beta_diversity('braycurtis', self.table2, self.sids2) expected_data = [ [0., 0.78787879, 0.86666667, 0.30927835, 0.85714286, 0.81521739], [0.78787879, 0., 0.78142077, 0.86813187, 0.75, 0.1627907], [0.86666667, 0.78142077, 0., 0.87709497, 0.09392265, 0.71597633], [0.30927835, 0.86813187, 0.87709497, 0., 0.87777778, 0.89285714], [0.85714286, 0.75, 0.09392265, 0.87777778, 0., 0.68235294], [0.81521739, 0.1627907, 0.71597633, 0.89285714, 0.68235294, 0.]] expected_dm = DistanceMatrix(expected_data, self.sids2) for id1 in self.sids2: for id2 in self.sids2: npt.assert_almost_equal(actual_dm[id1, id2], expected_dm[id1, id2], 6) def test_unweighted_unifrac(self): # TODO: update npt.assert_almost_equal calls to use DistanceMatrix # near-equality testing when that support is available # expected values calculated by hand dm1 = beta_diversity('unweighted_unifrac', self.table1, self.sids1, taxa=self.oids1, tree=self.tree1) dm2 = beta_diversity(unweighted_unifrac, self.table1, self.sids1, taxa=self.oids1, tree=self.tree1) self.assertEqual(dm1.shape, (3, 3)) self.assertEqual(dm1, dm2) expected_data = [[0.0, 0.0, 0.25], [0.0, 0.0, 0.25], [0.25, 0.25, 0.0]] expected_dm = DistanceMatrix(expected_data, ids=self.sids1) for id1 in self.sids1: for id2 in self.sids1: npt.assert_almost_equal(dm1[id1, id2], expected_dm[id1, id2], 6) def test_weighted_unifrac(self): # TODO: update npt.assert_almost_equal calls to use DistanceMatrix # near-equality testing when that support is available # expected values calculated by hand dm1 = beta_diversity('weighted_unifrac', self.table1, self.sids1, taxa=self.oids1, tree=self.tree1) dm2 = beta_diversity(weighted_unifrac, self.table1, self.sids1, taxa=self.oids1, tree=self.tree1) self.assertEqual(dm1.shape, (3, 3)) self.assertEqual(dm1, dm2) expected_data = [ [0.0, 0.1750000, 0.12499999], [0.1750000, 0.0, 0.3000000], [0.12499999, 0.3000000, 0.0]] expected_dm = DistanceMatrix(expected_data, ids=self.sids1) for id1 in self.sids1: for id2 in self.sids1: npt.assert_almost_equal(dm1[id1, id2], expected_dm[id1, id2], 6) def test_weighted_unifrac_normalized(self): # TODO: update npt.assert_almost_equal calls to use DistanceMatrix # near-equality testing when that support is available # expected values calculated by hand dm1 = beta_diversity('weighted_unifrac', self.table1, self.sids1, taxa=self.oids1, tree=self.tree1, normalized=True) dm2 = beta_diversity(weighted_unifrac, self.table1, self.sids1, taxa=self.oids1, tree=self.tree1, normalized=True) self.assertEqual(dm1.shape, (3, 3)) self.assertEqual(dm1, dm2) expected_data = [ [0.0, 0.128834, 0.085714], [0.128834, 0.0, 0.2142857], [0.085714, 0.2142857, 0.0]] expected_dm = DistanceMatrix(expected_data, ids=self.sids1) for id1 in self.sids1: for id2 in self.sids1: npt.assert_almost_equal(dm1[id1, id2], expected_dm[id1, id2], 6) def test_scipy_kwargs(self): # confirm that p can be passed to SciPy's minkowski, and that it # gives a different result than not passing it (the off-diagonal # entries are not equal). dm1 = beta_diversity('minkowski', self.table1, self.sids1) dm2 = beta_diversity('minkowski', self.table1, self.sids1, p=42.0) for id1 in self.sids1: for id2 in self.sids1: if id1 != id2: self.assertNotEqual(dm1[id1, id2], dm2[id1, id2]) def test_alt_pairwise_func(self): # confirm that pairwise_func is actually being used def not_a_real_pdist(counts, metric): return [[0.0, 42.0], [42.0, 0.0]] dm1 = beta_diversity('unweighted_unifrac', self.table1, taxa=self.oids1, tree=self.tree1, pairwise_func=not_a_real_pdist) expected = DistanceMatrix([[0.0, 42.0], [42.0, 0.0]]) self.assertEqual(dm1, expected) dm1 = beta_diversity('weighted_unifrac', self.table1, taxa=self.oids1, tree=self.tree1, pairwise_func=not_a_real_pdist) expected = DistanceMatrix([[0.0, 42.0], [42.0, 0.0]]) self.assertEqual(dm1, expected) dm1 = beta_diversity(unweighted_unifrac, self.table1, taxa=self.oids1, tree=self.tree1, pairwise_func=not_a_real_pdist) expected = DistanceMatrix([[0.0, 42.0], [42.0, 0.0]]) self.assertEqual(dm1, expected) dm1 = beta_diversity("euclidean", self.table1, pairwise_func=not_a_real_pdist) expected = DistanceMatrix([[0.0, 42.0], [42.0, 0.0]]) self.assertEqual(dm1, expected) class MetricGetters(TestCase): def test_get_alpha_diversity_metrics(self): m = get_alpha_diversity_metrics() # basic sanity checks self.assertTrue('faith_pd' in m) self.assertTrue('chao1' in m) def test_get_alpha_diversity_metrics_sorted(self): m = get_alpha_diversity_metrics() n = sorted(list(m)) self.assertEqual(m, n) def test_get_beta_diversity_metrics(self): m = get_beta_diversity_metrics() # basic sanity checks self.assertTrue('unweighted_unifrac' in m) self.assertTrue('weighted_unifrac' in m) def test_get_beta_diversity_metrics_sorted(self): m = get_beta_diversity_metrics() n = sorted(list(m)) self.assertEqual(m, n) class TestPartialBetaDiversity(TestCase): def setUp(self): self.table1 = [[1, 5], [2, 3], [0, 1]] self.sids1 = list('ABC') self.tree1 = TreeNode.read(io.StringIO( '((O1:0.25, O2:0.50):0.25, O3:0.75)root;')) self.oids1 = ['O1', 'O2'] self.table2 = [[23, 64, 14, 0, 0, 3, 1], [0, 3, 35, 42, 0, 12, 1], [0, 5, 5, 0, 40, 40, 0], [44, 35, 9, 0, 1, 0, 0], [0, 2, 8, 0, 35, 45, 1], [0, 0, 25, 35, 0, 19, 0]] self.sids2 = list('ABCDEF') def test_id_pairs_as_iterable(self): id_pairs = iter([('B', 'C'), ]) dm = partial_beta_diversity('unweighted_unifrac', self.table1, self.sids1, taxa=self.oids1, tree=self.tree1, id_pairs=id_pairs) self.assertEqual(dm.shape, (3, 3)) expected_data = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.25], [0.0, 0.25, 0.0]] expected_dm = DistanceMatrix(expected_data, ids=self.sids1) for id1 in self.sids1: for id2 in self.sids1: npt.assert_almost_equal(dm[id1, id2], expected_dm[id1, id2], 6) # pass in iter(foo) def test_unweighted_unifrac_partial(self): # TODO: update npt.assert_almost_equal calls to use DistanceMatrix # near-equality testing when that support is available # expected values calculated by hand dm = partial_beta_diversity('unweighted_unifrac', self.table1, self.sids1, taxa=self.oids1, tree=self.tree1, id_pairs=[('B', 'C'), ]) self.assertEqual(dm.shape, (3, 3)) expected_data = [[0.0, 0.0, 0.0], [0.0, 0.0, 0.25], [0.0, 0.25, 0.0]] expected_dm = DistanceMatrix(expected_data, ids=self.sids1) for id1 in self.sids1: for id2 in self.sids1: npt.assert_almost_equal(dm[id1, id2], expected_dm[id1, id2], 6) def test_weighted_unifrac_partial_full(self): # TODO: update npt.assert_almost_equal calls to use DistanceMatrix # near-equality testing when that support is available # expected values calculated by hand dm1 = partial_beta_diversity('weighted_unifrac', self.table1, self.sids1, taxa=self.oids1, tree=self.tree1, id_pairs=[('A', 'B'), ('A', 'C'), ('B', 'C')]) dm2 = beta_diversity('weighted_unifrac', self.table1, self.sids1, taxa=self.oids1, tree=self.tree1) self.assertEqual(dm1.shape, (3, 3)) self.assertEqual(dm1, dm2) expected_data = [ [0.0, 0.1750000, 0.12499999], [0.1750000, 0.0, 0.3000000], [0.12499999, 0.3000000, 0.0]] expected_dm = DistanceMatrix(expected_data, ids=self.sids1) for id1 in self.sids1: for id2 in self.sids1: npt.assert_almost_equal(dm1[id1, id2], expected_dm[id1, id2], 6) def test_self_self_pair(self): error_msg = (r"A duplicate or a self-self pair was observed.") with self.assertRaisesRegex(ValueError, error_msg): partial_beta_diversity((lambda x, y: x + y), self.table1, self.sids1, id_pairs=[('A', 'B'), ('A', 'A')]) def test_duplicate_pairs(self): # confirm that partial pairwise execution fails if duplicate pairs are # observed error_msg = (r"A duplicate or a self-self pair was observed.") with self.assertRaisesRegex(ValueError, error_msg): partial_beta_diversity((lambda x, y: x + y), self.table1, self.sids1, id_pairs=[('A', 'B'), ('A', 'B')]) def test_duplicate_transpose_pairs(self): # confirm that partial pairwise execution fails if a transpose # duplicate is observed error_msg = (r"A duplicate or a self-self pair was observed.") with self.assertRaisesRegex(ValueError, error_msg): partial_beta_diversity((lambda x, y: x + y), self.table1, self.sids1, id_pairs=[('A', 'B'), ('A', 'B')]) def test_pairs_not_subset(self): # confirm raise when pairs are not a subset of IDs error_msg = (r"`id_pairs` are not a subset of `ids`") with self.assertRaisesRegex(ValueError, error_msg): partial_beta_diversity((lambda x, y: x + y), self.table1, self.sids1, id_pairs=[('x', 'b'), ]) def test_euclidean(self): # confirm that pw execution through partial is identical def euclidean(u, v, **kwargs): return np.sqrt(((u - v)**2).sum()) id_pairs = [('A', 'B'), ('B', 'F'), ('D', 'E')] actual_dm = partial_beta_diversity(euclidean, self.table2, self.sids2, id_pairs=id_pairs) actual_dm = DistanceMatrix(actual_dm, self.sids2) expected_data = [ [0., 80.8455317, 0., 0., 0., 0.], [80.8455317, 0., 0., 0., 0., 14.422205], [0., 0., 0., 0., 0., 0.], [0., 0., 0., 0., 78.7908624, 0.], [0., 0., 0., 78.7908624, 0., 0.], [0., 14.422205, 0., 0., 0., 0.]] expected_dm = DistanceMatrix(expected_data, self.sids2) for id1 in self.sids2: for id2 in self.sids2: npt.assert_almost_equal(actual_dm[id1, id2], expected_dm[id1, id2], 6) def test_unusable_metric(self): id_pairs = [('A', 'B'), ('B', 'F'), ('D', 'E')] error_msg = r"partial_beta_diversity is only compatible" with self.assertRaisesRegex(ValueError, error_msg): partial_beta_diversity('hamming', self.table2, self.sids2, id_pairs=id_pairs) if __name__ == "__main__": main() scikit-bio-0.6.2/skbio/diversity/tests/test_util.py000066400000000000000000000322011464262511300224370ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io from unittest import TestCase, main import numpy as np import pandas as pd import numpy.testing as npt from skbio import TreeNode from skbio.table import example_table from skbio.diversity._util import (_validate_counts_vector, _validate_counts_matrix, _validate_taxa_and_tree, _vectorize_counts_and_tree, _quantitative_to_qualitative_counts, _check_taxa_alias, _table_to_numpy, _validate_table) from skbio.tree import DuplicateNodeError, MissingNodeError class ValidationTests(TestCase): def test_validate_counts_vector(self): # python list obs = _validate_counts_vector([0, 2, 1, 3]) npt.assert_array_equal(obs, np.array([0, 2, 1, 3])) self.assertEqual(obs.dtype, int) # numpy array (no copy made) data = np.array([0, 2, 1, 3]) obs = _validate_counts_vector(data) npt.assert_array_equal(obs, data) self.assertEqual(obs.dtype, int) self.assertTrue(obs is data) # single element obs = _validate_counts_vector([42]) npt.assert_array_equal(obs, np.array([42])) self.assertEqual(obs.dtype, int) self.assertEqual(obs.shape, (1,)) # keep float obs = _validate_counts_vector([42.2, 42.7, 0]) npt.assert_array_equal(obs, np.array([42.2, 42.7, 0])) self.assertEqual(obs.dtype, float) # cast into int obs = _validate_counts_vector([42.2, 42.7, 0], cast_int=True) npt.assert_array_equal(obs, np.array([42, 42, 0])) self.assertEqual(obs.dtype, int) # all zeros obs = _validate_counts_vector([0, 0, 0]) npt.assert_array_equal(obs, np.array([0, 0, 0])) self.assertEqual(obs.dtype, int) # all zeros (single value) obs = _validate_counts_vector([0]) npt.assert_array_equal(obs, np.array([0])) self.assertEqual(obs.dtype, int) def test_validate_counts_vector_invalid_input(self): # wrong data type (strings) with self.assertRaises(ValueError): _validate_counts_vector(['a', 'b', 'c']) # wrong data type (complex numbers) with self.assertRaises(ValueError): _validate_counts_vector([1 + 2j, 3 + 4j]) # wrong number of dimensions (2-D) with self.assertRaises(ValueError): _validate_counts_vector([[0, 2, 1, 3], [4, 5, 6, 7]]) # wrong number of dimensions (scalar) with self.assertRaises(ValueError): _validate_counts_vector(1) # negative values with self.assertRaises(ValueError): _validate_counts_vector([0, 0, 2, -1, 3]) # strings with self.assertRaises(ValueError): _validate_counts_vector([0, 0, 'a', -1, 3]) def test_validate_counts_matrix(self): # basic valid input (n=2) obs = _validate_counts_matrix([[0, 1, 1, 0, 2], [0, 0, 2, 1, 3]]) npt.assert_array_equal(obs[0], np.array([0, 1, 1, 0, 2])) npt.assert_array_equal(obs[1], np.array([0, 0, 2, 1, 3])) # basic valid input (n=3) obs = _validate_counts_matrix([[0, 1, 1, 0, 2], [0, 0, 2, 1, 3], [1, 1, 1, 1, 1]]) npt.assert_array_equal(obs[0], np.array([0, 1, 1, 0, 2])) npt.assert_array_equal(obs[1], np.array([0, 0, 2, 1, 3])) npt.assert_array_equal(obs[2], np.array([1, 1, 1, 1, 1])) # empty counts vectors obs = _validate_counts_matrix(np.array([[], []], dtype=int)) npt.assert_array_equal(obs[0], np.array([])) npt.assert_array_equal(obs[1], np.array([])) def test_validate_counts_matrix_pandas(self): obs = _validate_counts_matrix(pd.DataFrame([[0, 1, 1, 0, 2], [0, 0, 2, 1, 3], [1, 1, 1, 1, 1]])) npt.assert_array_equal(obs[0], np.array([0, 1, 1, 0, 2])) npt.assert_array_equal(obs[1], np.array([0, 0, 2, 1, 3])) npt.assert_array_equal(obs[2], np.array([1, 1, 1, 1, 1])) def test_validate_counts_matrix_cast_int(self): obs = _validate_counts_matrix( [[42.2, 42.1, 0], [42.2, 42.1, 1.0]], cast_int=True) npt.assert_array_equal(obs[0], np.array([42, 42, 0])) npt.assert_array_equal(obs[1], np.array([42, 42, 1])) self.assertEqual(obs[0].dtype, int) self.assertEqual(obs[1].dtype, int) def test_validate_counts_matrix_negative_counts(self): with self.assertRaises(ValueError): _validate_counts_matrix([[0, 1, 1, 0, 2], [0, 0, 2, -1, 3]]) with self.assertRaises(ValueError): _validate_counts_matrix([[0, 0, 2, -1, 3], [0, 1, 1, 0, 2]]) def test_validate_counts_matrix_unmatching_ids(self): with self.assertRaises(ValueError): _validate_counts_matrix([[0, 1, 1, 0, 2], [0, 0, 2, 1, 3], [1, 1, 1, 1, 1]], ids=['a', 'b']) with self.assertRaises(ValueError): obs = _validate_counts_matrix(pd.DataFrame( [[0, 1, 1, 0, 2], [0, 0, 2, 1, 3], [1, 1, 1, 1, 1]]), ids=['a', 'b']) def test_validate_counts_matrix_unequal_lengths(self): # len of vectors not equal with self.assertRaises(ValueError): _validate_counts_matrix([[0], [0, 0], [9, 8]]) with self.assertRaises(ValueError): _validate_counts_matrix([[0, 0], [0, 0, 8], [9, 8]]) with self.assertRaises(ValueError): _validate_counts_matrix([[0, 0, 75], [0, 0, 3], [9, 8, 22, 44]]) def test_validate_counts_matrix_invalid_input(self): with self.assertRaises(ValueError): _validate_counts_matrix([['a', 'b', 'c']]) with self.assertRaises(ValueError): _validate_counts_matrix([[1 + 2j, 3 + 4j]]) def test_validate_taxa_and_tree(self): # basic valid input tree = TreeNode.read( io.StringIO( '(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 1, 1] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertTrue(_validate_taxa_and_tree(counts, taxa, tree) is None) # all tips observed tree = TreeNode.read( io.StringIO( '(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 1, 1, 1, 1] taxa = ['OTU1', 'OTU2', 'OTU3', 'OTU4', 'OTU5'] self.assertTrue(_validate_taxa_and_tree(counts, taxa, tree) is None) # no tips observed tree = TreeNode.read( io.StringIO( '(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) counts = [] taxa = [] self.assertTrue(_validate_taxa_and_tree(counts, taxa, tree) is None) # all counts zero tree = TreeNode.read( io.StringIO( '(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) counts = [0, 0, 0, 0, 0] taxa = ['OTU1', 'OTU2', 'OTU3', 'OTU4', 'OTU5'] self.assertTrue(_validate_taxa_and_tree(counts, taxa, tree) is None) def test_validate_taxa_and_tree_invalid_input(self): # tree has duplicated tip ids tree = TreeNode.read( io.StringIO( '(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU2:0.75):1.25):0.0)root;')) counts = [1, 1, 1] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(DuplicateNodeError, _validate_taxa_and_tree, counts, taxa, tree) # unrooted tree as input tree = TreeNode.read(io.StringIO('((OTU1:0.1, OTU2:0.2):0.3, OTU3:0.5,' 'OTU4:0.7);')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, _validate_taxa_and_tree, counts, taxa, tree) # taxa has duplicated ids tree = TreeNode.read( io.StringIO( '(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU2'] self.assertRaises(ValueError, _validate_taxa_and_tree, counts, taxa, tree) # len of vectors not equal tree = TreeNode.read( io.StringIO( '(((((OTU1:0.5,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 2] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, _validate_taxa_and_tree, counts, taxa, tree) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2'] self.assertRaises(ValueError, _validate_taxa_and_tree, counts, taxa, tree) # tree with no branch lengths tree = TreeNode.read(io.StringIO('((((OTU1,OTU2),OTU3)),(OTU4,OTU5));')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, _validate_taxa_and_tree, counts, taxa, tree) # tree missing some branch lengths tree = TreeNode.read( io.StringIO( '(((((OTU1,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU3'] self.assertRaises(ValueError, _validate_taxa_and_tree, counts, taxa, tree) # taxa not present in tree tree = TreeNode.read( io.StringIO( '(((((OTU1:0.25,OTU2:0.5):0.5,OTU3:1.0):1.0):0.0,(OTU4:' '0.75,OTU5:0.75):1.25):0.0)root;')) counts = [1, 2, 3] taxa = ['OTU1', 'OTU2', 'OTU32'] self.assertRaises(MissingNodeError, _validate_taxa_and_tree, counts, taxa, tree) # single node tree tree = TreeNode.read(io.StringIO('root;')) counts = [] taxa = [] self.assertRaises(ValueError, _validate_taxa_and_tree, counts, taxa, tree) def test_vectorize_counts_and_tree(self): tree = TreeNode.read(io.StringIO("((a:1, b:2)c:3)root;")) counts = np.array([[0, 1], [1, 5], [10, 1]]) count_array, indexed, branch_lengths = \ _vectorize_counts_and_tree(counts, np.array(['a', 'b']), tree) exp_counts = np.array([[0, 1, 10], [1, 5, 1], [1, 6, 11], [1, 6, 11]]) npt.assert_equal(count_array, exp_counts.T) def test_quantitative_to_qualitative_counts(self): counts = np.array([[0, 1], [1, 5], [10, 1]]) exp = np.array([[False, True], [True, True], [True, True]]) obs = _quantitative_to_qualitative_counts(counts) npt.assert_equal(obs, exp) counts = np.array([[0, 0, 0], [1, 0, 42]]) exp = np.array([[False, False, False], [True, False, True]]) obs = _quantitative_to_qualitative_counts(counts) npt.assert_equal(obs, exp) def test_check_taxa_alias(self): # for backward compatibility; will be removed in the future msg = "A list of taxon IDs must be provided." with self.assertRaises(ValueError) as cm: _check_taxa_alias(None, None, None) self.assertEqual(str(cm.exception), msg) msg = "A phylogenetic tree must be provided." with self.assertRaises(ValueError) as cm: _check_taxa_alias([1], None, None) self.assertEqual(str(cm.exception), msg) obs = _check_taxa_alias([1], '1', None) self.assertListEqual(obs, [1]) obs = _check_taxa_alias(None, '1', [1]) self.assertListEqual(obs, [1]) class TableConversionTests(TestCase): def test_table_to_numpy(self): exp_data = np.array([[0, 1, 2], [3, 4, 5]]).T exp_ids = ['S1', 'S2', 'S3'] exp_feat_ids = ['O1', 'O2'] obs_data, obs_ids, obs_feat_ids = _table_to_numpy(example_table) npt.assert_equal(obs_data, exp_data) self.assertEqual(obs_ids, exp_ids) self.assertEqual(obs_feat_ids, exp_feat_ids) def test_validate_table(self): self.assertRaises(ValueError, _validate_table, example_table, ['foo', 'bar'], {}) self.assertRaises(ValueError, _validate_table, example_table, None, {'taxa': 'foo'}) obs_data, obs_ids = _validate_table(example_table, None, {}) exp_data = np.array([[0, 1, 2], [3, 4, 5]]).T exp_ids = ['S1', 'S2', 'S3'] npt.assert_equal(obs_data, exp_data) self.assertEqual(obs_ids, exp_ids) if __name__ == "__main__": main() scikit-bio-0.6.2/skbio/embedding/000077500000000000000000000000001464262511300166055ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/embedding/__init__.py000066400000000000000000000032611464262511300207200ustar00rootroot00000000000000r"""Biological Embeddings (:mod:`skbio.embedding`) ================================================= .. currentmodule:: skbio.embedding This module provides support for storing embeddings for biological objects, such as protein embeddings outputted from protein language models (pLMs). Embedding types --------------- .. autosummary:: :toctree: generated/ Embedding SequenceEmbedding ProteinEmbedding Embedding vector types ---------------------- .. autosummary:: :toctree: generated/ SequenceVector ProteinVector Embedding vector utilities -------------------------- .. autosummary:: :toctree: generated/ embed_vec_to_numpy embed_vec_to_distances embed_vec_to_ordination embed_vec_to_dataframe """ # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from ._embedding import ( Embedding, SequenceEmbedding, EmbeddingVector, SequenceVector, embed_vec_to_numpy, embed_vec_to_distances, embed_vec_to_ordination, embed_vec_to_dataframe, ) from ._protein import ProteinEmbedding, ProteinVector, example_protein_embedding __all__ = [ "Embedding", "SequenceEmbedding", "EmbeddingVector", "SequenceVector", "embed_vec_to_numpy", "embed_vec_to_distances", "embed_vec_to_ordination", "embed_vec_to_dataframe", "ProteinEmbedding", "ProteinVector", "example_protein_embedding", ] scikit-bio-0.6.2/skbio/embedding/_embedding.py000066400000000000000000000252141464262511300212400ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np import pandas as pd from scipy.linalg import svd from skbio.sequence import Sequence from skbio._base import SkbioObject from skbio.stats.ordination import OrdinationResults from skbio.diversity import beta_diversity def _repr_helper(rstr, org_name, new_name, dim_name, regex_match, shape): rstr = rstr.replace(org_name, new_name) n_indent = 4 # see Sequence.__repr__ indent = " " * n_indent rstr = rstr.replace( regex_match, f"{dim_name} dimension: {shape}\n{indent}has gaps", ) return rstr class Embedding(SkbioObject): r"""Embedding for a biological object. Parameters ---------- embedding : array_like Embedding matrix where the first axis is indexed by `ids`. ids : array_like IDs of biological objects. """ @property def embedding(self): r"""The embedding tensor.""" return self._embedding @property def ids(self): r"""IDs corresponding to each row of the embedding.""" # each embedding row corresponds to an id return self._ids def __init__(self, embedding, ids, **kwargs): # make sure that the embedding has the same length as the sequence ids_len = len(ids) if embedding.shape[0] != ids_len: raise ValueError( f"The embedding ({embedding.shape[0]}) must have the " f"same length as the ids ({ids_len})." ) self._embedding = np.asarray(embedding) self._ids = np.asarray(ids) def __str__(self): raise NotImplementedError("This method should be implemented by subclasses.") def bytes(self): r"""Bytes representation of string encoding.""" seq = np.frombuffer(str(self).encode("ascii"), dtype=np.uint8) return seq class SequenceEmbedding(Embedding): r"""Embedding for a biological sequence. Parameters ---------- embedding : array_like The embedding of the sequence. Row vectors correspond to the latent character coordinates. sequence : str, Sequence, or 1D ndarray Characters representing the sequence itself. See Also -------- Embedding skbio.sequence.Sequence """ def __init__(self, embedding, sequence, **kwargs): if isinstance(sequence, Sequence): sequence = str(sequence) if isinstance(sequence, str): sequence = sequence.encode("ascii") seq = np.frombuffer(sequence, dtype=np.uint8) super(SequenceEmbedding, self).__init__(embedding, seq, **kwargs) def __str__(self): r"""String representation of the underlying sequence.""" return str(self._ids.tobytes().decode("ascii")) @property def sequence(self): r"""String representation of the underlying sequence.""" return str(self) def __repr__(self): r"""Return a string representation of the SequenceEmbedding object. Returns ------- str String representation of the SequenceEmbedding object. See Also -------- skbio.sequence.Protein """ seq = Sequence(self.sequence) rstr = _repr_helper( repr(seq), "Sequence", "SequenceEmbedding", "embedding", regex_match="length", shape=self.embedding.shape[1], ) return rstr class EmbeddingVector(Embedding): r"""Vector representation for a biological entity. Parameters ---------- vector : 1D or 2D array_like The vector representation of the sequence. Typically a 1D array. Can also be a 2D array with only one row. sequence : str, Sequence, or 1D ndarray Characters representing the sequence itself. See Also -------- Embedding """ def __init__(self, vector, obj, **kwargs): super(EmbeddingVector, self).__init__(vector, obj, **kwargs) def __str__(self): return self._ids[0].decode("ascii") @property def vector(self): r"""Vector representation for the biological entity.""" return self._embedding.squeeze() @property def embedding(self): r"""The embedding tensor.""" return self._embedding.reshape(1, -1) class SequenceVector(EmbeddingVector): r"""Vector representation for a biological sequence. Parameters ---------- vector : 1D or 2D array_like The vector representation of the sequence. Typically a 1D array. Can also be a 2D array with only one row. sequence : str, Sequence, or 1D ndarray Characters representing the sequence itself. See Also -------- EmbeddingVector skbio.sequence.Sequence """ def __init__(self, vector, sequence, **kwargs): vector = np.atleast_2d(vector) if vector.shape[0] != 1: raise ValueError("Only one vector per sequence is allowed.") if isinstance(sequence, Sequence): sequence = str(sequence) if isinstance(sequence, str): sequence = sequence.encode("ascii") sequence = np.array([sequence], dtype="O") super(SequenceVector, self).__init__(vector, sequence, **kwargs) @property def sequence(self): r"""String representation of the underlying sequence.""" return str(self) def __repr__(self): r"""Return a string representation of the SequenceVector object. Returns ------- str A string representation of the SequenceVector object. See Also -------- skbio.sequence.Sequence """ seq = Sequence(str(self)) rstr = _repr_helper( repr(seq), "Sequence", "SequenceVector", "vector", regex_match="length", shape=self.embedding.shape[1], ) return rstr def embed_vec_to_numpy(vectors, validate=True): r"""Convert an iterable of EmbeddingVector objects to a NumPy array. Parameters ---------- vectors : iterable of EmbeddingVector objects An iterable of EmbeddingVector objects, or objects that subclass EmbeddingVector. validate : bool, optional If ``True``, validate that all vectors have the same length and are valid types. Returns ------- ndarray of shape (n_objects, n_features) A NumPy array where n_features corresponds to the dimensionality of the latent space. Raises ------ ValueError If the vectors do not have the same length. """ if validate: subcls = [issubclass(type(ev), EmbeddingVector) for ev in vectors] if not all(subcls): raise ValueError( "Input iterable contains objects that " "do not subclass EmbeddingVector." ) types = [type(ev) for ev in vectors] if not all(t == types[0] for t in types): raise ValueError("All objects must be of the same type.") lens = [len(ev.vector) for ev in vectors] if not all(ln == lens[0] for ln in lens): raise ValueError("All vectors must have the same length.") data = np.vstack([ev.vector for ev in vectors]) return data def embed_vec_to_distances(vectors, metric="euclidean", validate=True): r"""Convert EmbeddingVector objects to a DistanceMatrix object. Parameters ---------- vectors : iterable of EmbeddingVector objects An iterable of EmbeddingVector objects, or objects that subclass EmbeddingVector. metric : str or callable, optional The distance metric to use. Must be a valid metric for ``scipy.spatial.distance.pdist``. validate : bool, optional If ``True``, validate that all vectors have the same length and are valid types. Returns ------- DistanceMatrix A distance matrix representing pairwise distances among objects calculated by the given metric. See Also -------- skbio.stats.distance.DistanceMatrix """ data = embed_vec_to_numpy(vectors, validate=validate) ids = [str(ev) for ev in vectors] return beta_diversity(metric, data, ids) def embed_vec_to_ordination(vectors, validate=True): r"""Convert EmbeddingVector objects to an Ordination object. A singular value decomposition (SVD) is performed on the data. Parameters ---------- vectors : iterable of EmbeddingVector objects An iterable of EmbeddingVector objects, or objects that subclass EmbeddingVector. validate : bool, optional If ``True``, validate that all vectors have the same length and are valid types. Returns ------- OrdinationResults Ordination results with objects as samples and latent variables as features. See Also -------- skbio.stats.ordination.OrdinationResults """ data = embed_vec_to_numpy(vectors, validate=validate) u, s, vh = svd(data, full_matrices=False) eigvals = s**2 short_name = "SVD" long_name = "Singular Value Decomposition" # note that we are moving half of the singular values # in the eigvals to the samples and the other half to the features # this is to help with the interpretation of the ordination # if visualizing with biplots ordr = OrdinationResults( short_method_name=short_name, long_method_name=long_name, eigvals=eigvals, proportion_explained=eigvals / eigvals.sum(), samples=pd.DataFrame(u @ np.diag(s), index=[str(ev) for ev in vectors]), features=pd.DataFrame(vh.T, index=range(data.shape[1])), ) return ordr def embed_vec_to_dataframe(vectors, validate=True): r"""Convert a list of SequenceVector objects to a pandas DataFrame. Parameters ---------- vectors : iterable of EmbeddingVector objects An iterable of EmbeddingVector objects, or objects that subclass EmbeddingVector. validate : bool, optional If ``True``, validate that all vectors have the same length and are valid types. Returns ------- pd.DataFrame Data frame containing the embedding vectors as rows (index) and object IDs as columns. See Also -------- pd.DataFrame """ data = embed_vec_to_numpy(vectors, validate=validate) return pd.DataFrame(data, index=[str(ev) for ev in vectors]) scikit-bio-0.6.2/skbio/embedding/_protein.py000066400000000000000000000123221464262511300207760ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np import pandas as pd from skbio.sequence import Protein from skbio.stats.ordination import OrdinationResults from skbio.embedding._embedding import SequenceEmbedding, SequenceVector, _repr_helper def _validate_protein(sequence): if isinstance(sequence, Protein): sequence = str(sequence) elif isinstance(sequence, str): if " " in sequence: sequence = sequence.replace(" ", "") # perform a check to make sure the sequence is a valid protein sequence _ = Protein(sequence) return sequence class ProteinEmbedding(SequenceEmbedding): r"""Embedding of a protein sequence. Parameters ---------- embedding : array_like The embedding of the protein sequence. Row vectors correspond to the latent residues coordinates. sequence : str, Protein, or 1D ndarray Characters representing the protein sequence itself. clip_head : bool, optional If ``True``, then the first row of the embedding will be removed. Some language models specify start tokens, and this parameter can be used to account for this. clip_tail : bool, optional If ``True``, then the last row of the embedding will be removed. Some language models specify end tokens, and this parameter can be used to account for this. See Also -------- skbio.sequence.Protein Examples -------- >>> from skbio.embedding import ProteinEmbedding >>> import numpy as np >>> embedding = np.random.rand(10, 3) >>> sequence = "ACDEFGHIKL" >>> ProteinEmbedding(embedding, sequence) ProteinEmbedding -------------------------- Stats: length: 10 embedding dimension: 3 has gaps: False has degenerates: False has definites: True has stops: False -------------------------- 0 ACDEFGHIKL """ default_write_format = "embed" def __init__(self, embedding, sequence, clip_head=False, clip_tail=False, **kwargs): embedding = np.asarray(embedding) if clip_head: embedding = embedding[1:] if clip_tail: embedding = embedding[:-1] sequence = _validate_protein(sequence) super(ProteinEmbedding, self).__init__( embedding=embedding, sequence=sequence, **kwargs ) @property def residues(self): r"""Array containing underlying residue characters.""" return self._ids.view("|S1") def __repr__(self): r"""Return a string representation of the ProteinEmbedding object. Returns ------- str String representation of the ProteinEmbedding object. See Also -------- skbio.sequence.Protein """ seq = Protein(str(self)) rstr = _repr_helper( repr(seq), "Protein", "ProteinEmbedding", "embedding", regex_match="has gaps", shape=self.embedding.shape[1], ) return rstr example_protein_embedding = ProteinEmbedding( np.random.default_rng(0).normal(size=(62, 1024)), "IGKEEIQQRLAQFVDHWKELKQLAAARGQRLEESLEYQQFVANVEEEEAWINEKMTLVASED", ) class ProteinVector(SequenceVector): r"""Vector representation for a protein sequence. Parameters ---------- vector : 1D or 2D array_like The vector representation of the protein sequence. Typically a 1D array. Can also be a 2D array with only one row. sequence : str, Sequence, or 1D ndarray Characters representing the protein sequence itself. See Also -------- SequenceVector skbio.sequence.Protein Examples -------- >>> from skbio.embedding import ProteinVector >>> import numpy as np >>> vector = np.random.rand(10) >>> sequence = "ACDEFGHIKL" >>> ProteinVector(vector, sequence) ProteinVector -------------------------- Stats: length: 10 vector dimension: 10 has gaps: False has degenerates: False has definites: True has stops: False -------------------------- 0 ACDEFGHIKL """ default_write_format = "embed" def __init__(self, vector, sequence: str, **kwargs): sequence = _validate_protein(sequence) super(ProteinVector, self).__init__(vector, sequence=sequence, **kwargs) def __repr__(self): r"""Return a string representation of the ProteinVector object. Returns ------- str String representation of the ProteinEmbedding object. See Also -------- skbio.sequence.Protein """ seq = Protein(str(self)) rstr = _repr_helper( repr(seq), "Protein", "ProteinVector", "vector", regex_match="has gaps", shape=self.embedding.shape[1], ) return rstr scikit-bio-0.6.2/skbio/embedding/tests/000077500000000000000000000000001464262511300177475ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/embedding/tests/__init__.py000066400000000000000000000005411464262511300220600ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- scikit-bio-0.6.2/skbio/embedding/tests/data/000077500000000000000000000000001464262511300206605ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/embedding/tests/data/embed1.txt.npy000066400000000000000000007602001464262511300233700ustar00rootroot00000000000000NUMPYv{'descr': ':J/W0*t@8w>#>Bb>M= Ѿ~>ș^>륽\UNZL%بR>Mڽ&߽`߽ #2g>P=ݣ>8G[=ػf>E߱iľɃ @==~=><>ja}M> >Ž7`q2=vkV>+a==W>T2= aݦ-/(>Xo$K=@N@>]Y8uu=8xF %=)==A>R>"JP>Lz=>MaP>[HE>c[=J2:Rt;ۤ]=o>~ξU 0^ >/;=r1S>M>t=Ұ=\>9枾=> tʰ́ˉ=o =-Ⲥ>E>:<U=4&>|{>H~ri><>Ct=6ݻP=XF=9>ASu>P;`>w>*ھ 9ؼ-;Ÿ: )E>y==#>Ja w >Sh?&Y!7Ifʽ(v>s=>{/9 >/ >^>'=!&?gbͽ<¢=Ccȋ  kye>jN:p|>< (=h=h _x>MH<wų=,r=@>>%AlwO=iVf =1ǽc>L\>LW>픦=Ƽ>_mM𡬾ƾ:ܻ>nGRmNb G>m>4">HOr͢=uǽ>< m_=>>6>fj*nkp>> >ٽA>}>>8{n > ؜Evn>b=(>x`zm2{]1iWL*aB `=Ƚ+<{==,̾Bp>ۛM>tDCm=h>S>;ocM~ kr)=8>͚)l>u?F>^$>Cؾu1>H<=K=iI>G{=P𒼳%+,=d== \ Ŋ> Z=-o[6>kɜ2Ґ>c=;S>;ʺ<ުWGŒ>=[-=|d *@O< ܖ= )>ս1y*=<">LM7R(>65>ە+>6ܴ">N0$ =B==a1]X>>M$`=WI;ڶ&[>*y= 8=T>q=a\c 5r9z)>g>>`<$>'>ͽ{=." 6><>oVs˿(=FLՂ׾ =ѭ=a@>{t=3Cp<ͷJ>]T<>SwC%->? =I>7>!E/p>8>WL>՘E> ^A{T>C=d8f=Iq-=Q.w>] )Wj>x?PfԽO~b>y:uu0>Ce>M> ƾoZg>Ї>)>'8>#=˼>D}>څ>7,>!=pNþEqo< a\=v4>*Ao!&=JIYvqre#8>*<%߽-q=w䓾kz]==.;Bq)>U">WvP*= )>* мܧ8sgﳾVB2,뽮=#>@>;#>ʽ Ev= H 禼Κ=Y>40=xu=ة=8?L=W>>j|m">l=7'>= 6>x<)ۉ왋>=T>6>Fͽ:y]J 2=;W=!$?9FH>>z=3 >3=XVN,F>k m2sĠ_:=*W>2>b֗>\>d`U:'=r->նC,Z+"->ॾf ;0<*>=X?$uc=0ѽ$I>tt">j$=_F>3l6=<ݖ9zκ=;vᵢȖC>vx1>X;a>{ƙs=h=<>`GW=7]!A>a S? *tQ=Ӿ 6 >aJq,o =WG( _^P=iIQ i:=(N>)e>]ԄpH"G߽JY[=J9ۼEˀBf9 q= 5%>i%=>P,F==CXTPCfϼhAk=]~<|U>>=,Ӆ=Y 1C=x='=ļ'O==I%R@1>cDcw >G̊>~=nD> >73=@>O 7ek辽V==8>;:>y=fýc]Q= g>/=>mw>=2>y=h=?d?oF={ez>R>iG$z>b<=>0@=`= >Tڽ'CIӽwEsʽ9=S;>:=BK>@>H>ɲP\=8 >~t!h=>AN>闽=z=i һq=s`=>T=h>F==ʶ<:q);b=p>a¥;=©=M'@uÜ>Xt=<;. &'y4ڌ;S!Խ >(7>>ʊxA>5 Ew>c/=2?>w>&A=@[ >B>35<>Z=껾`3<;t>/ڼb>sὋZꚀ̬Y>*˾ׯwC='>2@潗YӽQ> >ye>52>2>QG>Fx/>iSh>,{H=>Sm4#e >x`> ٜv/>ɾ@DV?I>'S<'Sdžq=a>n>ً>z=C&>V>]|,<$^:7h>nх ;>k3N)c>D) >Wb>2A>O4<`>P]q,H=[HCZ=K>,a,"opy(>lR jg=Ѿ2WQƄ'LUc>+e=TŻjX>>1FNC>j<}P/>.h;ཕ˽=7Y=6?<'*9=/>O>L>h4u=!?q R=C4A^>f`1>"a =I=].Ջ>8o p>{L=HD+ճW;/9<[ڽ֩co>XDI>F2>һbIo慼=۽o\k=V>>xVҾzOY>Cu%>+>w=*{}>1=$:L>G?kI=j[H;ܠO> 8=bA} ~w<\=9> /=*=T>t?~>F=Ҩ=̼94>{"Ups>Wd;>T'@=5=>BW>_0=CҼ=߯O>J>PK>zߏ=$>T>&7~Y-_b;ڽS=F0޽l>V>d#>"B:=>*<);=fc,5=>>sZ>-G=/fJ(o5=v^> w); IF$=Cb==^Hоa%qjva⍽?A>}č^h/>e%>= `~=ۣick??Y"^/>u/M0 I=X;:rb?{y=ai=Y= ƾKؽk^=Feڡ]/>H>d>U?>i79ʽ#N.Db*>)P>#>=E79NTAzn=|ۻq@=>4=:7P#-tdǽ<$Y=I_A<>5=*R! ygC:={T< =Vw(Vk.>С=ņZiɅ7 >gۨ$콭6=V=m[v>}>W<ۺ=|= >.>=(=[YZV>=^wW< pI0>۽Ҵ>R *C?<">2.bS>Ju=j˽R/> >_=(Es==ԃ;`N=l@>.ؾ3=>+a=>&d>h=&=K/mK>T5>!pI=B[>/ٕ,aҽ>E; R<7=Ro+>@gv9j>l>ش-o6ʲ!$ھAf =T-t >J=<߸>t=z@f>xC=F>>?E<8E>8}ݲ ƽ=A83w;a>.sSm>=jWfuԽ5H)>sOb޽c!k þȽe޽ʾi=>@F>ggվn&j$ݠ><>'i83>΋=Ώ>=m-RtuEfbt>7>-jr=? > |b>>iu> +Rȑ=(:1 >iݳ>3>L>n ?z[> ?wx=lUJ>U%Sfm=>$|&W=>"l=Gi .<:#N'=, m<e2>[ 5Ě >GdC >><'(jPy}Yq> ~<=[>1=g*O=G <,YR=k=-2>'->6ٻB 8>~J־S"kN>c}ug=Ks|x8=;>/ 6|-P>v>t-!=Z9?*,pNJ9>A!X'H>ض<+?(K"րy>>G=b>=k>E==_=:'𘽈9nMXEH\><>8>SW m3=*$M=ټP3YQ3<$> `=s%侷E%>DQ5%|=ߨ=: ?횼di<뺼ċp4=c=&n[)XreK=#R=@wk>LzѼk;4;|q>HÚ=B֗=@>,Q>\gIӵ5<.=NCH2='菉G->$=}G9 ݽ>>(>:3EUhٽ3=0=N>C8==>pľZm>g=$<>Ko>$ս:}ɽC&Hx!> ='i>Z/>"Fdu A<>EL>Q_U>>/=E> >B>=$>T>>D侩 <z!>>7>#">S=űu< P/>"d͆8ElW^N>~ u[t=>01è=- ]<1:QtЇ>zx=p ?~M_"Z>c6P >~;^>hTo>g>J=a>㣼q=)>@>v>~zFw"/CM򞾬w~ fι!>Z=IO`-&>JP>be5M=~@:+>Ck=oƽzyݽ05pn .Ձ<|><*нКZ=K>AK>վ=& /=E=0T>6nŭ=r=dY_>89>T)>_&>*+ex>g< AB=e>Ғ=>)>l^b>/:3>Խԣ> <+5>%<7ļ =">UH>Y}ܾW>>(QU"?~q->> >B>>N>d>ez9BAC{w,><*>E=Hj8#%3sҾ;mC>J>Tx&8b>$#Xg@=и>̊>A> >X\:W=;ͽYyhE $S\ !1( X%`-iN,=wl-m==<]ۀW: ԽJPN?>Z?*h>ϐM>$>: >Q<>/GWs>)=Un}i6>K=mrZ߽A>$?vYF;=M򇽣N>=q > n>3럽 ǎ>x=ęZԐ>M0Z%_=#)==hJ+B<>vm=9>74>2n5㢦 qν=vĽ$> >pO?TI=><& z=>%⽕*y=a{,nN>iԱ3Ka>>}?/L>c" o};&rF],">eɾ|>>3Wz=>>>a=VV(=Kzj=&Jd=4ݱWO>c=y3'z>@0*>[>ⳑ0[=/>)愘<=A׺>>V~j>!3Z[UM^f*>h?%sPӽ@?>J~}>{>E>@>^>{>>=_"1ý=X%%,"Qu 4#=M=Xn >*6nȄ>EѽV<0:> F轼H >|X<ý*1= (>\<>=,E=q#@>ܾx_[9Vkv =.립C<>%=^>s>=~=bZ=g>b8bf>l1>3>ѓ>>:9 >MY=e&ܾ>%l>bX =0>BB S>;𙾆k)4Wu=##]<OA?>^=D>>zafX۾]=P>HO>PR >0=5p.8>ޕ>>~>8P,=f>Ït>nl>nX.=>6`>ͪ>ZF}^y7=ׄ w>?3"=Ftl>ʦ>\1ɒq6y>)=> b>\ 0k;=ٲ>S>Ι>E>n">pc=s~ ʾc$=]=н<ݽ> #k>pP>Z]=>>b*?ѽMRսG֫>%Y>IC"ļijDX="W>&>;6<׻ڀ>$=9u>7NF>S/>+2==(u|;9/ۯ!~}B0>E{=~`>-d>>R&l6q:=S\z}a Ȕ>oBD=?J=Ţ4a :^ k[J>=4b6>L=9B>m*=.>ߖѽs"?5)=F'v<>f;&>m>ɜ[C<$>.>l"l:>sHt>xܼSm=w=;>Tr=%>T˾)=$8a==Ҧ0T.>y&='/>@#r>֠&<>J4:Y =υ;(T>k>1> ,sMgzw$>"D2.=5XW=@a>mIe>$tM>>e=oC>e;K<|*LI>!o=)v.ȏ>Kebн'/7QŮ=R1>آvPAd= 51վ>[=P8=>A=-%l>=R[/=J՟Ȣ% =姥=cg>=gbˈ=tb>T(>"K=_ɬ=Ĵ>y㠽fSyZǔ>˟2=Q>c>΄>>8>=JgV)\L.=̎1*;>>r=;TCafe>G2=->< l창>@/'>~$>>ؤ]7"K=M=CS=*c >D5>1i Fw5=bs=nC>>pgB>dp >@@Ӽ==2c< >)#i $FؾkFU [ d;@<ެ|D>6r>0#p=돾%=P-=>Q=>ƒX>Ա3k6>"O>l8<̾LL>S]O=w=;o'?Tv?=̽>F½,>GKC=RR!=v>yΛ?bvͽ?>G>kݼJ{=i걎Ntd$>+[4>«h; ߾i'>/L*]D.}Ge{¯>\>r,>!Ѿ* >{:>bv,y=<yoqcY[>K=:=xAyY>= E|>i>Igљ=>=G#<~)ht>9>A~z?=7=SZ~[=6<6>"d>U >Y1Y>̾ =1>=<)=\>=}Vavk=.F:>9˽(>4ZzS.\J=o=c }>泛i!3'k;&"'i>)=Pk>O{ =r><=Qtk\==>9J>>>RQzٽE=X7=u@>.>8=m?lYfL=ν=V=><Kݰ9>) I6>Y1 >S8>*>,kX%>*M>w9>Q̫> ,AN?ؒk=h>*nn>0 :>5}ʼn;pݖ;>:E=!y>=ٯ]9G?=X;>j< >E>>7F>.k :=9Hz=X=y>> =i="h; 1>Iw>nP< р=T>%,rMhc?Q20===-%61=.>#=Po˂>N8>[[>p8N:G] Km=EOq>뼬QqX=yMy);c=v>>KU>d2U%@Wx< >1=;>% ,>nf>'@p>Rp2S>h=\ <L%IXN>@i$?y$xp==-{==Ã=U>˟U`m>^>˴>f>]+oQ== ,6=i >NN>krxJ y6Z=S>~laZ*鋉Wꤧ=WG>~,{beܑ\X3M>;<ycr;H<?MP3>Ct;>ܸ=͊] @>uoxD)>>(m̾>ό:VM<2$ ==)_T"ZɏcX=":>$u =2Z <@/K =vP= IekD>ci/F>|=Xl>]=_ =;E!QGI>]$ZL>M:An=?I=J"Ǿ=) Tݸ9ͮ> A=>o>C&Q?p= > <6~@5]/x+!>_xA=Vo,KUy6!>LJ2+׼P=ur񭈾 >SoX6? =#~ѡK>3VWq>!}<.!C8H(>xȾGC+O=ꋂ"J]>a>e<" M =0)M=lv= >>,[,u)yJ*:>>">d=~<(=fp><>PbN 7 _Ce̽ؽԼ>>>&_>;$=6=8.=rjQm>JA=¨==6<+jVA>t=ҽ&o遾n=M1hDV=Cv_=߾R*'=$žV$1>韈> >'<> C= l=}=^> 3{Ĵ>__>ju>juu>_"F罻v>_=~u4|X>;`=k>>Y=+B[OCA=Ľ>G&%=!9p d>:I=߼'#>>8t35<;Ѿ;7be:u>ag>+J})>T1*>Y2滺8;1b>hg禎>&O)>s>MA X?0=p@={=ݨ)l>}A<+;Y*>/H7l>k.h(>A*>]z[u><>hm<=P="K49>ϝP=e=DV>x{1=h>侘>>>M ᓽ@H=;&s>*y6,I >3` mmrXM%q-=l=yl>=uŽV K>l(Ǿl >q>e3>&Oڬn J>3>u>nP9kR`su?>/^?,Hc/x7J>f5 >[<<0d[$,Jy>xiC!ܴt> <>:t&=X-u-|Ĺ$>*> G W|=kz;~3>L|΅=ӻX%-|u>vn>+@q?>N>>Պ>Q2=~s=u<=>/xĜ=z.1=;[LFQu=ս2f5 dL0>9=g`6=>Ti^>X<H>=8l?P=*m7>U>x`M>ʴ>烾$ b>y|m9>oX7ƛ>s>N==r > v:5c \we>n"Yaʾ_3H(>=8g1x=R=掾H>,Q^>q>1*X>w>=:ˏېraM־roUn~P> guyq[T=>A*mT>qZ侟=2 = Ep>L'`6܂y=bR=>=PDOpc¾ؓTO>B>J=wؽO=>d<==Wѽɪ>r^ii?/=RWT=x2>̼V=~=?<a=ae<e=7>ut=yf2}8<ȇ,;gC>Vʸ:^= G 0> >>o:=_>̽=2|3>;<Y;ȵ.>^ѽߏ>z4>rZNm>="q, -rk><>NVsE<6齃=R‰>x[>'P+wHQ(>=jks󤿽.{>L>d1>P->i>_=@$<vQ>yx>=Y(~=LQ`0>=i4>(;f 5(V^4>wټa>>>š;L4>G4b=| h![Y[>@=p1õQ=Eþ">&=o.=o-1>x>=>Q _ļtNQ6t>.7z>=1>db>3>.MP= ?C=.>&=*ݾ{Vۛa+C=kH4->}qȑ>X}D9f<>64L̾@U>})!ٽ>YG==;#> /= .+> ;#G=Cd/="9="z(>/9ڼ׾N% >=>[=3{= ]>vqcʾS=ƼAP>+?0߻prdv[ŵ ?Ȣ=v=SԽ=.M5=> J=*νpX2>u`K>8z1=&= ='2Ly>8K<¾r<^x<%<>̼wQGT=k>%pӽu>n`_S˽j>[t>/:U=@=PV>*<բ>9d=mA;[ ͹wn=݉=Y>r= 3;лt>f'm7)'d>tw$ʱL> e_;S=j_>8ڱ=oe>)9<ھ[c>".+Yf>>y=TN0ܟRQ=wU9>= >$YW>z!OU=?>b(%D/i t>zҽ}s<$5X= ?>==f,>=Ł=>{JE>>KYo,,<)W[=ՠ>HLS^Y=6>>'ܙ1ٽ ; =z?Y\}4=?=DUؼvmÑDs~&@:>ԛ>=IN>/E=Xu>V =O;>Ҏ ,S>R rB# ?}>3="`~=>R4~>y<潠Խ=&=+T> >Lm@"Kގ~SbPj;=#D>0?j>\*B|ݼ0#G>_%??>{}>ψս>M== y9>A[=?\=*G>`[>Ы=>4x=`1Ab3^=|n>S׾ZL>8le 5>Pts8R5=njxW <8>N$]=:'m-8>Tۼ ?V'? a=>6=Ӿm|=Ai<ò=q<;^轈=o%==7a>H'! >CF.?;O:V={=M=M<=!#f ?ǞTM>T-1(rR.mI=Ĕz6>m<#{,}Hl>;n=秽J>5x><=Zj־+M2=/= >"~>Ǩ=LgJR9*,J>BU=!/6=E4t>*!><8<þn==0=i3dZ=+$<>mX81'8>>B۽ @ =&hMt"9O)34ؽU<^"=T<=b u>p>A>Rfp=c->L콽̌˾/q=>>k=f/b>-a5>֓~2= % |Y+=W)>־X}ýżA=I=뽵-5>"h=@カ_>u mFMf> v">E>悔p~>>#Yxxua^stɩ1ώ =ܦ#=mY`p"=฾9=zgǴQ[>i?0B=p =K촼kQvHYg=^V7==C>4lн>>w>{q<=w$>Q3=ub^,l=M<b:Nԕ<҂> > &f>+r>kI>B# cTKrk,`p> =>j=k>?.y>Z=Rk 5S>'jiz>PE)6E>qeO>>t ޾Jq>>u͖=+>i"+='ND ==Њ=bK>!=H=O:8?}8Ƽ>ma>e>>~2= $h<_ڼU>t1J&Q y*/0<4ػ{>=Ydyr#W+>zE>k#>9) 2=7eOJ<0Z&=C[nf5?LpkN=S2&>mJ֟ƽp>JX=d>2=a׾"에%8=>je>$ }>\S>*l<ɽc$ @-EN׼<>b=!Ҁ tv>Ծ=ɽ8:elM;T=!۽+l]>2<h=6eI>[bԽ {=ʜ;>^ûl1|>[1>C=#>"9ǵ<挾=>` pd64%D>D2ؽ#=2^>rnP(x Z=Qa2֘> oY=’+> =&=&n=>ii)8>f>r&)>aa;>Tb>=sT=!S*> ">P:>r>t~O=μj>!>>0<ೀcN>}[λ#=n4Rr!cP> ;i>  =Sau> v (~>pY>ྍ1Zޕ>]¼뉾N0hfQU>XM>0OBD׼>;4F^a:ٰz>,"Y4?mp<v[r>T-u0>I=Y!-ǾMN>܎#&>`Y>>p=6ܛyQD=q>P"P8re=ENُd1n>S{ռiнm\<A=֤_ߏR,\4b=jew=P>1g1G?2? ='V=mq=^=џ 'ú>h <4ct>7e㙾. =X<Ӿt }!E>ѲIP;=盽51VyG>1 =d-U<Kt^=nV>=YS9.=ۥ>ge>>>?Ma>L= 3=;2Ff>=@X=2O ix;/?2>Φ(= ǎ\@ŽNcԎMy.ujp>'=X(=>=>' j=p>RA??r#=-B ^3¾2=r"}QJ`\"43$X8H>7>>T=Ł>?%=.;#=<쾐m=u\\jst<] >.= wn.=kƦ  s=Za; >,_F=̙V(>P=/o=t'V?3­D>ҷ=(ξ0D3ݰM>jR>"^.`=s>=Ì>"=>Ih;1H%݀>>'?=ry p@l\Nj=,M `,>>IhRw*>Rʾ;#?e=W=mlO3u-O>r >n6^ ~8>~8 ?٢ oC j4>_{ WW>|j=,̥X<㽔s>2:Ev>ࠅX]8-3b>t/Xw>bξýH=q$>9==ޖ<"g:/>X?۽?{==>' dV*ϽÙpF=-:>3ۮ>|>B}>B>Ѿ[%>qݽ ={(=rzkǾ~>_6`u*>̾>>R?6S>NK<zv=qM^>0>t[ս E=x==hZ^>M=B0޽ ,=^a>u> >H=W_=aFM pW>n=Rvv|(>f=wfva<4 0>IoE8>D5g.P>1 =>G=>>%/:{>@@;"Z&~F=׭ov =>Q?= 1.=V== [{>󛽊=G>UȾ>>d>)g*>&J>a$> ?jF> n}[>>spǿ>])=O>d<>Cd>1=ˉI{h<>ֽjj徳 ־ %V I>>㥐I>>==%y>MIzn=ؽ=xc>>6>NF"О=>w=?;6!4<քvc>>iD>݊=݊> o1gd=ODD =I?H="T,P=Fg ױt=l}z=3>Nh=@,/>i> =qq`:Н;\!ᠾKIE%=H 4> <>T+D=]>O= >t9ͼp]=>{C==ԇ=Ap?σ⽿@>C=qi>CN>Xr߯x/ Ҿ,wl򽣤=қc>#7=>$2)?w=`4j.>Mjs>2/?>*<#-D>=ZjTB3>>S"-=xнH|ܾv#l=C{>l`=q;C2= N=BLL$`>4>0">T{u2 ->Ѿpf=r!=-f=?̐>>C R>> [սIn<{>撆>SUG1+#=['=b=T徝Hw`>=#1>5>J)߻ !'(>{>Y>4>4}=[&y>ܢػ%99g=:>. [>G =9 >/>.>A:5>9<= >T>aCoh=a /A=T*>.,=GYϾ===IHYA iUC0>r>Bm|>,=>N|qn^K0>kd9/޾k>D4'ui>oDщnH>懾~>$$J :kvоYXРDznS+=>"B >@0=Xe{޾ OnKlt3>>>==X*~>< =?>>|O.>g< pn>F H}[ê!=l<ς>g;4e<=1]P|GTR>CJl=V>>Q=ڕV>>K>9pN8=>8;54)IT<ǎ-0>H>iq>c-->D=>Q+>u,5>Q>F,=yib >'<@@?=':>*G0>&=J>=>F_Л={ =}>+U+Ʃ>P>>Z>>?I=>S=:>>Ҷp5>%>̙=}=bi>y%t>X>Ѽo>L?W<+˽ =Nt>V]M>fbhy>+C=D|Lk~V̀> 9Za:>=,{>,nΓ jI̽,=>;>dJ>?>q%>Zս F>=W;(ek> G UN>(?gy>/Xɫ>==@ |>>=@  ݈=9= mT/=5G?LɼYľB>uN̽0=N>h/P0>"$>%>vm&t[+a< r*>1K>yN&=g=d1>-½<)1p<16>!>>.1A=[2余cfV>ɿ6>o=ʡ2>Pޅ>f5ýeU>4{F5k={S>K>UԾ*_>6~>y>z(> U7=򼶾ɾG>=#|%>lW>x=FoNX"=&S>_?^tgRt ?XO=B<0an=!䵽^>ދ?a=yFѽtz$au=Mb$6 =[=ɴ O>7D]=*h>}LɽZiLhj묻O =>n@ _Xo=Ej2IqPѤ= 2b=Хޱ>>>ɺ=3U%7o РCTYȣqIM.>F%>򷅻V>>oqľ/-νG0n>>-a=;-i>=zf= I>U0OyAUWFCCd=BW!3=L ?b >>`=2Q>n֧E->+=G >q1˾ڽԔMԖT&=ձ> hM >{1t=>)=w> >>m־PW>&:=}$9?S=>G70>Gj-w==uoe\>or>=>=sQ>Kڽ<¿>δ<zڶL2=Y""6=<=g^WJ=:=ɍUn;ٖit>=H?>><0>$꽯対n>>WGI^=35<<7<RVNMy>>=`&9><="}ܽ}m&ﭾSS6=RԺU>h>g>h#8\Xd=>NG>s뤽j◼[=%hW*nf>U>/Do>6;>1'

a^tw=H๽ R¾>HR =6>I@> *+Ŵ¶+y>U>˪E>w>sľд%>5>^raŧsL< Dq>w7oip>?z>Fɽzc=܍>4>Rz?>$jC7B;?j >[x a{>u߽>x=>AZ==a=:=>=j;A<ak=wD>Ҽ=>3 X b!-}ҙU>A=?i9>oO4>H>о긾"\="<7= 13Ӥ>G)势>>=ýK>c5>Ɓ>#><̄>^JR}%}f_ w?TOk=ه=KNb=RսrL>6#>\>X=ȻgO>9h>a=A>Gi> *=Q|>e߭v=> ?`=w=Il㾣}=Ǭr>n<;{?>Ⱦ$">zo >Og>:=%>j2|U-a>s>ü'~>@<]*PN>PM>P>Ճ=lBž`gnnZ>; z떢5a /ry#=N̾f;=B=f{=v>(l<"=^=/]==`=҃?̝8S<=k=YߚϽr9> ?U$=>){V >'=;>i_=;=4>n; S '>8H=AO=X]>;Y"=LK@>>U=d-U!0>=;>A^D>Q^=+=֘QtDr> q>v\>Z;D=,]>M=!UE=+iݾ+==BR?=!9=+4:4K=nAj>!=>f?/he־T=f`þ#N>1P=, >ˈ|=V>褾kF=lpѼ` >5f9|;{괾׽ZIl>>C5G=PA*>ޕO=젾ȥK>,[Q#2.>C>0Ⱦ[>kw+J=؟=>ۘ>X==Kױ6>߶z7=SN=>+5ĩ?\oX޽vⱱ,3>J<ʛ>; q=;mĆ =>p>%cg>9>' $>y=''Vj==h gWi<  ;x˾?8\)[y>8;Ǽ\=Ir%=F=wlR=?'q׎+=#몾Ǹ\=)>o+=u>ͽ/9>F><Y+pG󻷏>seV*<>߽Z=`߄C=<= =a y>><^|C8> ˨A=A>Al07=Ww k ־>e>hʽ~<"s=!?ӯwl&o>gPv>w.>=q[ߍ>̽Ӿ=踆>?<5߸w>Cپ;%uDM ?1@ؽcR>=k>7>f,Ƣ =sQ1Hc*=t %>6Md!>[}@K>y>JZ!"[=@`(=?k?h,( >n<#=?r3>J֪>#!>IKtij<.$>ȪP>ۙC=r1>Q<ܽF?=[>>ts'sz,> =+>.>+_>W>.C ս=❾==J>a>==khJS;$>=|< /> >&о}E7Us>g>5>9->q7>>͞>75gq>>T5>{m=<̩=}k> @f>$>̂6Oq9y#S>A$Ƚ@"oi/~4uw9>_%D">)Q=\_(=>~;>C\VZ&>eޤ>H|=a)<$=K>@=yb#>tN>r= =S#н :>H>`.s>L=Pfe1l=7<3?"=ŸA>>1;E+>=<H=n$>U߽X$?UD>,E==#VGźz%&>FN0>t=;>)>&v>N>1;t>ﲽm{MDL!>h7޿pFAw>(x9=aa>>*刟t>F6>:F_R1=ؑAL-2༟e=> >|;=BDpd7=Σ=GnƽU'甬82=J%Ԏ}>sTBA=;Vg>B<<}0=U>*@>X3-)9==Q}MV21>@#M4M)i>ھam9W=2=H>e='K򞾁Vͽ>t>^fE>I>q>=/J j5#>=u1=D?! ?P$=Z>8<3> ѻ==̻ 湾VK y ߽&r.=7/3< o>2F':= =>$^供9=o>DrsR>?a<> jx>og=kOֽ<]>l߽&FJzvq:獡>o:?^jS>[A> y>"r>x? =2LT]^1>^ݨ>/J<ч?*m=#~S$;b=j>2h>>C<-pW>0$>uT=t'>aD߾:>9BVr8>m>=$=%>Sh>Mg2>īqFW>1&GS<==b 䜺Ɗ<>Z =d=6Xsjm>Npՠ=֣ >nq=>]>X-kiP=L{>fMn}< s >R<<}K= >L ]V'=d1'>=T>=xa؜ɽ>'= >d>,=eJ7^=r^C >0>붾1>ˊ<=GyH5lU>=V}>=Fa5pD= >><>=D,;S\x>AZɧ=H'>[><oܼ0O40 C.>*>ҮPL >A83M">Af>UO<|h>q<x>nn*6/q>EX?QQ=I>pMX,YľK=ܾCY= b=a.>Z=y^>hj=0W<>qԭf>w潉m==ž/[?vy=︩:W nϾ޽T >nN0Fɽ+eRg6=ZFܔ=B>J=S>VUgcd+=$>%=gs >==P=C >2$]>Vep0`>W1?>V>395)3< >[m]d޾F=5>DY ˎ>o;NJ=6?=h['=گsX>,=>0B =4K> >}w<& 17W5 =E\JʽxH>b#=pu%>8۽&ƾ:d>>fH>fU=>8!>='>kmh]G>'=E8}=˛p5>xf< >m=>>yc>x4>X$:߽ע A=wI>R>7> . J>~&#+wX֗<]٤Q<'>p<2wt>UAA=s 2>uf{#>`&{>c}>u~=l6g,s>>=ȾJ=J?;<^(==)Z3g>ߛ=+><_=!R>>!==Z0=.U=;x< =}~5>dþbA< <'%ʐ>\q=޿>!=ZB^t2>k*>ӂɽA1`=9>= 4*>-=>ky5>9畾8>cD>]#>c=}>֯>]wA?>\$>m=,/><]뒾>\>3nI_>T>y %CCB=-D>-kAfV=OHLH ?tO=2I}=ꍥ>!M>^3s> i ;q;>[m =N芾=՛j=ؙ;) pXѾN= )9zt̙=ҾR=ͮUҾ'|qY r>N>P==ӽ!KU <@u>͐=%չ?Œ=W">=)7σ=3޽z|=W~>\ #H=轤G@=i<=iXrP>=>'*>D0=0p=wp=4c>B\n%-+::S=x(>4>6/>U>;>l"> :;&d=q}žv=C>dwv-#<>>\sO3><v].E}=l Ӿ5g轷=/G>)>B~^o~pt丼E>9KP>O댽*o=m=2ga=6ÄE6>>V#u-`0 VkFf< =q>Ӳ>$<%>(;QA>t 08&Mb֊:ľν913ֽ1v1>t73[!|`5>_3=Z>aj@>=Nr=`ʟ6>R=+⥾={>#>ͷ4"U>,.qB]A^i`>tL%uTʽCY=|+=g ܽaq ?1>o)w>%{=E]>$4z\ -=\=>tg+>=l>ޛny`>y>>>'>ߠ')e^T>ke9>Ľ+<9n  *v>VA>!=|> T?,>J9=F2| r>=듏=߈>\;>4cT>ľG-+SI'=t $9B放\(pۼ>gϻT|پ}>%K1t>t>=y>l V>jD>޽w9=րӾ}U=s]>4߾>Ɇ>|Y˾L> =Ŕ"ʾT><6e=q=*˫uU/=;S>diTf>⭁B{<8|>=!>Җ5d3=V]^>>xZ>=>T\><>A=u< $v龲>lv=L> > ZG>.>h$=( N=f=_Ɖ= 9rc)>`UJ>$*u=Kݾ对ɪ=A0>G6= G=|l>a>g==>j=>==&9ӗ>#.>Q!>><7̽=d>uy>ڬ=ck9.>>}= ⨽=o½'=GY<U>_=w> =S[B@>F=ޏ>XT=/>zݽX\=T=w|>34N<<2II0)Axe>8>ǍH>>|2>=+U=R!>95݁>4k=7:"j~jZWa=풽uZ*=<=fN/ jPA1?>xTC=N<:>`=>=V8Y=҄=R|=m=罺|UK>Nwi= >|uR3e>>>ڈ "<F;>ɟ4"6;>b>>JV)=ai1r>K=e;a3M>9={:=S>>耔=UD<=!K&sghs"=>Z=>??>…֖#>f:;=ͩ>k&il'ͳ=@>,> E>uffWgK=ަ>ռHW<r7B>={4^I9_>+`:>n>6,|=@>i!B>|Ҽh > (>P>S&=[>UQ>= =+޽3Kd>pA9>fT>=_}dY&݊> BQpq>(=Aἱ >kؽu>k9>* ->ν>"'Fsv >s =;&0DC>=>UYɼ z60>½=]_>؁5>' xv*ž>= =p3>;7>T>><ֽ>*$}=IxZ=aý>s *=: />3~IhNa>9=q/>z>=<85=L󻿕=\V'UWj>={o}>"ʁ> ѽZJL$ג;ގ[>;Lg7$>+>2-;-=}ϾF)=+!v=ù=n;=2$==>6>6}1CϾA7*~>%6>=35=HȄž Q >В5cվF>K~Խ%3:>d_I">+OB43Y<؈]3> h>\@Ƚo*>-J=(N>i=Xsܽ=y_>}*==j/2>\=dI$=y6B:t4>:=̉>@Z>͎>D*]==n=k/5E񰢽YZ=9k=>A2\?vI#>.A:叾j=>==Oq=+QahFB>P>fa$>ڤ>~뽹սh>>3=L+(>Hž =нP:â=ID>M7S> V,׽!0>{=ۼ8>H=5i׽?_2x=:>_ae>ڗ0? X>V>R;"=H;<7S=^+@?@[V18= =Ŏ>+f>}v!8>]sI`}Ғid<:F>_!}&=G J|@9>;f>[ѽ;>MHx> >Wk=>S?*[>. 0.=<#}FN=L%旾>w<';'jv<[ݽ=&i=7K>0>;͉>> H>l%t>ӟœr>=[リ<~>(4~7R=y󻖎'=ϐjLh; PwiF<)єB >ґWOS̽==L :;8>9yjXK}yF='>" :!%=!4̼p=U};>՘'B½_0H=H>]y=U=8 }=tdz>(E>`f褾.=%>&d0=F4Z>I蟾M\b؁=D[=64>i2T==?hU>0>N=-n>M = F^=뤾.W>7y>"=j_a*T >@ T>$>?a>xl>l >Us=ӈEd\T2J>eS=Ej>ҽ"=m|`=戰]S=2Ce+>9ш q>S,<݉m'QdP{>[I=$w>=Ƚk>+p=Ae<7X;r >p꾊Gͯ> ;D>8>)>½k =ڒ=Ѿ->-+>9ahk eW>ؗ=s"=Dm׳<¸=3<fҽ}=/a=>?CU =k2p>wf9;=O}<ؗB6:ӵ >>>7%>s 0;b>5> >-Ic!AwvQ>ր輂肽m=_oƽ>L5t}>>KBf>H @Z侾>>7#=X47c2oǼ}F==>Ճ>&Y6qx=k='5>>hQO>L;<BU>>bWZսByʽSc= ?zߵ>>̊$<>,>pi>H; =rci >lK=PU=3V>#A>QWtDN=>6:Z R=>!&=jju>N~=~: <_ЃʽŽ 8=|۽ q

C?U)r@>->g{a|i==N >>z=&1>&>=ʵTnY<a>J>=>?oLJ> ,"7"\=qJ=ix&׼|C# >#rf=K;eI>xQh>wjev=CL>} >š>QZZ=5>G=~`>.ʽ"$ >6 Se=e={>Aqf>CֽK'B> ݽU?c=&#=C>N; E8)>#L>D>=@.y>C `=>Fg=.=&^]۞= >ٿ<> qYF>u>I;>u.YpZ=Y0;wi=v=b<{i>s==cDs4$bMP7n[L~ ;!:Ṻ>˵k>f꼽["Z_W>zs>$=>Vek>ͯ< vM=G7m@2峨?>8b>z=p=4f>:TSm=m\ZH==>E;R7V>Ζ>!*쓾7.ۧ=GT=i>#=Zt>fo;d(>KrɽD =6&^QEӽ=6>_T7>o,G>r>)]ͽ#vvg!>]>A]N۸U="x[|>Aˡ=mk>Ē04N>$n>mG>$Y>\%+>80u j==u>C@;nxϺs=: =) >ZvI=3O>Oz?i־qD횾$t>߽R>=p^< >b= G>e'E=Z=^>=9>ҽN1>=K>ėh~=)=/ =7ɭ~1=T>Fp{>t=8erL 9<@>ļat{=sh}>o=֟ h5$<*> Uv>=mJg>>Ͼ\>H->ƺ#b>I[e2.~=d$t>YD=oB@tȝtc ;,+<ѾY+ ՉT>ƀ>B1h=iN>G0=2 }>/cHτ>mG|3 f Xe~ؾ>1ɾ>>=&ٕ.=P=5>.q> j7T_=Yf>0]'<(f]1>78>=> |a)jgQێA X>p> /=;P.Is >t轶>8>9=aڗ>= 5nVO=b1F=R>qG$-W=?V>W;/ܼg>|>r2R_k轸V=冞>ZekSսx>KY=m>>';f`0+5=$vWz<:f(>e`G>j`G;f(gӽ >w>;A=$ս`0ҽ>E=3==_E=t=>>h=-O^VY=ż>>=4>ͱN<:>GC/>@@\X%?7E>a=c=(G=?=c= <=B>jDݙ>Br>m*>1>T=g)>Alm;2>7=>4-V AW>o "2="j =P>/>*m=;0s<iھg<`'l=E=>{ >ӽ"}<$6x_,L=->)b=+;;>轏>˼W=\p=<ؽ=^=7*>jFjϽn>U5zʻ&=R>r2N">w,5==kj< >N.=h=%>x;I>>]оI(>&L=W=ҏߺh=ڏ >` ?=ܷ l=ȷ=*>v> ާ<>ѽ=>hJ>") =?>e>Y <=[ >Wի>Л?.>s g=q;!1>_K) C> 0>!+H1>b=Dg<>ͦ= k= P5H=x á!>=@F = F$6/ j>(>f]>K2 S]>#L> >ڝsX,I ?Y=3νnի=!2QfgP>K^>C[<,=]㽵y>i,>2==wkB>!D&l> ȼ Q>E>'>==]0=ݼC=]>.Jм>b<Q=/=FG>2ZR>G7x_i>Vu=BI+g潒>>e.2>#{><8=U C>9֖1Aƞ>4YE|^Ⱦ?<>@5ۭLc>)ɺ7=r)Ὤ7P>{==DҸP>,ٽC==1ΟpžSg>v YA=}tg=F.yPl%~,=>WH>>!ޫ==v>3-7>% )=gUo>_ȼ><˲!iLs?">ٽ ڽñk> =fn<%=(K>R>!ýb齂r> '>,>;02=k >e>-˽&>=$qƍh>sAm!=c ~ 5>Ccro= >K{a<>q@۾IY>4 h=N dآ=="2>Z j>>>/^=v@>6lC>cCRD=I=u >yR㥾*=(U=U=I2̾>=}:M=֋D澆 h<>2%>plQ>W<>⼠ <=ڏ$9)>:}zzɧG=aJмc޼)t> >>N*2?f 9Hヽz(½.X; =Y>`oO>oa6}nܽ콇D><yͼU. *>aIknzU(]}->OI>,wZSsč6>)p}=։j=>)E E` ,>yyP>=Q㾂{lu{>?0=՞D>e1<XLg>S6v=#Ա'վ*c=FJT>iQ>V73>.=kSH>H=\>>s>/*x=?D =X;LY>:<q>=.7P>3ն=7iȽgh=W>N9>=2S"<=TK=ڔ%2L=V2z>O>/4=rƽ\+þƕ>}E[u>QF>JZ ;i>>n;Vr3=E=u:=b?= 5=oļ>7 ni=iw<A>S)>&>E>:i=+ӼL|F=h>O>l#)C)xL>ؗ=W¾Jx㭽_>fJǾ}N>^>==>_ڜ>ɳ>nVZVQ %S>ǜ2h9 =d;z>*=5EaǢ=|3*>>UǽzUB<0T3=]Z F>>Ԁ=gK#_ɾ=*|=v}=0=@=\^aY-TL߽C= >nb=kjY/t>m>d<_BE-'Ƽ>1==pj{k2>G=r>>Z< =x~|a%۽áW<<[]=MqVz>9̓>HNFzZ=;T :ݼu=V>r 6h&<~>"`9==kc=q8 ʃ>=A:>rQ=g=`=Bխ=o7VƇ(>>> ,> >y>ٜ;= >y={bo>;6ٺi͏y%x>{m_[Yř=R=۴Y=d#<ϑ>'Ƨq{|V0_v=q]͘8>ځ?l{#_=p=ǽ} jheVƎKXհ$=k~=`>k<|bX> =߀=8>NsD@<-dJP>y3>E|= =⁾=἟T >6=~C5>׎ت>~J^=ʅp=v=.X9aǽ =:>@=0>7>zG< > =sL=/Fo*>Z=oƹ1>>N,=Fu6_=n=5=ڊ>O=b=E">a>P=%>bK>UZ>r=D=¼== )O>'v=Sh<#< ==gk6>}86Fެ<1@r(>/=;BeLR;)>[=!KvwH׼@>f>e@d=rT>kSki=Y> =yK=0>;>}# ,?>H8P=߽>;PFd;μ\>5>B=,v>!Ѽȕ==M.b<<`r>673Z>έw">xx Yg>36<94x>à>H>":=&=46f>,=idg=˰L7WH>N/0BQ<Ɉ>S2=r=4q>D<8>0'{=H#4=sy.>WTʾT9È/ >>1A=6=/=NB>e}HI*>bJ=v]@>!>GjGEͽVh=j>/^>>ڹ=fYWE>-+>;%Ӽy<;=p#Tw=C@= O}=b#Y>pC<_zB‹+H=Gcj%>\ }墠=W=~3HE<)Oo>Y==.=>'>Ko DmA>\rQ>нa.h>aʳ5b:Ro:Ҿ Dƻ 6>HϼL=> +7ݣ3NjTR>>V=Zҽv>_+<>>y= ޠ> 3=~*>nK#$I>R>s ^)1q=;$L=l\=_HIzb= <>'QBVݽ>'=5y>7 >E-b>E#>e=Ӭֽ=SD7\Nb=5> dc=c6 ƽ>tw=v>=@J=K=Ǚb>e9> Z')UR>s ҽIs^O"=!Y>'6(>6T=6ʵ=3V;\> Bg>˽=ּJ>V< `=W=q>3f@?>_5<>-s^B<=H>T'y>ү>>VZgľI>>1s>L/d>|_PúElm>ry9q,= Ľ,蝾m=40=#>6=!s2O*Eom=ߌ>W =T>:ۀ\>f=i!߽գ=6%ؼ4M5 rkǾ2k?u=4Y3>־%)b[L#'$w=׼!ھ#r>J4ѽa2.  g້*?3{8kՇTk[2OmW>}=JKGþBH(>"a=ϊ/>UJ3iR*>Хu١R,. "!ӽսNI>(>uu0>1A =?=j&>(f>:`=:нsF4=#(N>>dN#>.\=PnV>G#Cǃ =V^(iW>n`>\Z>R!m=v=>Gؐb )=O/н9>JiK>F= 6EWo@g=/! =w>[P>6>Ӣ19<9=)0{1!&;==3>3>烉>Py> !C>z>$=@= :Xt>09"IFdB>/<, ;>W<`轄،=3)z=:`X>hY>>:h!Q=Ý>UeZ]jq>Bv=,P\XU>6>^\lԑ J>s= ㊾V%;%U>l>՞z7@ɽJ= b=8?;<>==+˖>2P>Ơ#u(=;+>a>|޾4>cn>I({<=-و>>}:>H>ξ2=C= =z>\>H>j#8JƇ=^=iL={(# U>FΝ;+E=qE=BgBqQL>#3䕽ABL]ic)IS>i>ý <^Y> > ]"R+R*>r4a>=*z-=>,>/3>Q>0=8f>(=>i>/q= N>y=v6¾[v})v0>" >^h>\s>tE2>4N}+&,=6x=蹽<,>@ ~4wfu>PB>QrKľI >l@3 >0?hvK7޽KY<>a̫>:>Lڞb7 ==F=->;U=RH>]X>(7dw==:a>'>0=S- <">{p^=|/ec9uz>X>Mfkؠ=h!=Ž{ι>nTUU>qSݾi=wYξ,j½j#AZ>-s>Ä=Wd^,5qK^nӡ|=vm> ?f>з<&Ļ3ZE>^t({6F EW>h`]=ѣ>- p>|c|R',>9d=}Ļ= d=ZEQ>fޑ=[> G;T0m+AJ;$%>l6=. <*6>(F=|>+=1qڑ>ZtI=<>=B[4=j;轓qμ[Y=BO>줕0cҫE d6>,6>>켺x>aK>>74K=W*=h췾ؽ9>B> 7>UqA>M>6?E6>J>?u克>4<^=(F5& >)6b'{ >W6<.A^>}DY cGT=;L ع8>ڽR>[^F_V=,=*:p\ãD=~2伃a<XT>r>^VQbbIa/фȽ=4<&^2e%(ܼe =ѱ~>@O'>:h˿*!}RG>TGw>h#>d/(=ȩh=1=wE>=88R>J;>6Gף=l#=@J=^>(=>=s)q,.$=a(>Q=;ZS;P6>,|]?<,>.=?=,d/z=="5=v=r=J=c=2">ǟ>]<)@W= >ʍ>3Rx>7>^@>;V&\6^U3/t#*b0> M|pX>(sf>2=]=> \+2e:ۉ> != >=U>H_> fd󢽊63(2M7Ӌ{G=T}>86e٘,)>?7*>L ku}?Ž ~@RB_vp>f=;A^hgW=AU9>R:-ܲàx<>;s4'= t=Qȷ;])* 89ϼ~Խ7>ᢽa\= uzfp>UpŽm>&^ڼ=?>+>dj >>U>^x>]f>)k~#>WV>p-/>a;'4{U̽.ݮt]sU> V?\= Ɏ>>b^x; ;=Lڛ2j0=໐F>=OK>=Mq=❃1>jU·>7̃1wg=Бt>g "gG=%u⽺K>y=A>F01=j=N짾5=ḱMw=ټ c8L9b3ZO=XJ>F/>H]ڼԃ>D>>7=!Aߥ=z=_>_=a;D轗s+ٽI~ =żtX6>> q-HE>,=>*%> >z=W<(B=/= 껾:>g< -(=#);g;DH>t>tиr'=2>$5oB\>8yS>c%>.0V>= &=5>e->>n=MQ h*>c5>>=B~\=O>( >m=.>HS-[le ̈́>$u >Yk>W>><>|>',H>Ȍdl>a$=O=st>bHf=F+>tHX=>i ξ̽p>x= WQrk=, ==H/=F>>*=>C=l=k0p>9ܜ,'Aٽǽ<ü0==>F=wo>x̾Z== ˢ>`׾U=w=<r%>nM)sK3> ,%!4-< Ot`CVY>.>qx=&4>m;Pu=bAB=z=Y[=z/=o:==:!<ĠZx=Ymv>e>:³ؠ==%> 0>N=t_?>:>甜;&W=gN?ˌ>PTz:>V(>񻾮L>u=7B<н -t<>2N.z&SlҨJ9=<#?O =hvX>˟=J> =Yeq=w> =ߚ8R ;>$ ;rr_=񀽫=>">JKdW-==tmd<\0>YŮ>;=:Kk>iQ`e>A*emXVT>6# S=c̽\>̶>R4!<>K#9&LvS/>-y=G<=Rƾiٌ9ܽ|b4Y=p=@=y\>6=W=t>=Rs 0ƽ݀=c,w?ޝtYJ.>^>ǽ (/yW"0<:=El=L '3 qc57<>5h$;ޖb>'M=0ǃ"I >"d='UCñU>=XGB>t0;*Mz_zᙢG5>p쵽> >= Ms>;+P=g>SXM?f=I;=?5>r==ri_F'T.䊸<=I=wb˖:*M=m>>6|pݽ&ľcI>D>6=ۑ;-B!j+>FN~jx@3w`ý>㢾%>ErþT﬽pc!u7=v/\Lr=VM>`>g7 zݥ>&1=Co>?>e>4}>>Q(1?Z7=XʾFh>Q&G>7=gG4^ɾW8T>_=P"OE>^>=1z^S  <]oJՕù1|Cü*=uk>p@_:='=V!!df ۟9>ˬe=W=H;\;v+9= 彾7=ȷ=BBѽr>4=$*=> />[vb%Lc{f=A,_8>GO>j6ZG\a>6ƽm`AvN=m>3>2=f>uj t >*I=Z=]>x>y; ʽ>3)=|^#<~U>Y'k8=d7Z%x=gɨ=p>4(57;^>%< ===9)!>ąe7M><5>O=nõཕ\>^>7ޞ>C:hywJ>q>!r$>T3"ɾ8>$1n=j}+<ڈh߃E>0`8=k>:P^zg>>gU>L N<;=?¹x >FЕ>y>^<Q>Ծŗ7+VU>/x=9si.e>`=юw?1?>D9=t<h>#> >佾,fqVB>N<[!5m薾oF=^>3a?>u6>2=>>c<́=X>+ES{m$>>熏>9/u>Nlr=|a2lr5>20>=R餾֙>>ӼJ=$}F=R$Y= >/>Qʻ;5>Һ1>1PO>U%'ԭ=>|S=P=˺=J1cL0i>:=>Ǯfԟ=>*i*;>s.>䡐'^>=%>`W>8G=]`E>n>Ph>Dok>S$N=\St> ;ܽLw-)GU>Vr>o>+;/=>gʀ]d:>>Z>G==]*aX=b[j*qW[v^/=C>.8ܸS=L=鞭?=&>Iֿ>q===L >f>:.n>W&g>,)>OھAI<[=Ht=s=7E@>VD=y֥>F>ZK>/>J*>+뾤2G>>Íg<rl@=_,xx>JBq>@>=3D~=:/>-ĞW=u0=">+%?bh7a;ZԮ՝>3=<=q\^{!v9=b>3X=#G>P=q=zIKQ_C> >R$ᚽ*>-;\`=>uZ>g$=|>9>_.MD+ H >Ɗ'>Ck*=)=Bqy'$>>YJ>==ྒ>14=F$>A>^Bv7K Y*o>H,?=j=c:<^hl>DƾlА=A|=Ҿu>z=(=l"=x(>8K6oq5>=> >M?e `>x:=`>LG<䋽>&;С=Pvӽ5K.>q>>8=v,>j~b=՝LC=No=E>GYDE>%mz>E7<3>}= ]>̩=%U>Z=e",R<Ź=w>|=d/>Ӝ>Bvy>=u>~(q?ℶf)<><_$>(=-IY>=K>p$=:ķ_`=4mБ̾>/>mN=OnK/=2<#(>\½L>Tq7n?i~ۂ4 *>>;G=pFUC.9.>>>=濽%.d>w>NjD=2d>ҹ" 7=e>TOKI!=C?>ǽb|<q>L*>#c;>󳬾lC>ֽօ=̛{>=@>3cѝ #E>?0J3颎wc8.>W,=А> pH>홽 uĉ>|dL>w@==4=)>SW쵻ޏ=+=ª>=/jj=ャkɾw>dZ=܄>"=@l>N=La5^&>*Ͼ=8BK!o> q<7>>? >m \<>2=bz5>8EG=rWa">}=/O7$%>S>н=">T׋=9|}K=g>݃>?,=W;>]`=ǕAuڽa+ξZ=-n>N>>L4TH}-I=}$ {< ^1>_=c>BzoҰmH>L4>u9>>Z=23>8=%La/K>f=e>[< tX=Pav<1=fY=S$>,m;8 0s=]ZL={!V>ԝ)mh>uʂ&F>_H+n>]}>nZL>hxbq䮽>ɟ=lC>-)sr>s>I#ݦ=+·rҽ%ͮ>?و>%qIRI>dwAؾ~~rLCS>mL>L>2ms08>=\2ƾNޕ G;=.>ILT&%.OM364t>:?潐آ=ő>L;(f,>s&>wfq=C>l>mԀ=n> R>yݧ)=H'*l>&!&>iԾPbj>o+>YS4 :<#3=>>', a>5=< Յ>D><;< |>kK1=|A=>X{>j=#=>ZSʾ@Sʻ}=A9=־':g>>=x>>ֽ&xc/>Ô>q=7j=1>:>OJ"K= ~vpF#;>P`=_>z=X9=~>,ļs+ُli$=iU]^l=^?T̡o>E:H&X̣3)U @>8ý{o>H>ZD=2)߾ҁ=R>Db>ΓzKRT>A L.>;zEp3>}'>r><*L@c;UA+~l.=E4p<~{2G>w/'m:%sQ=D=#Q|(u > Z6[D=>,>|B۾׵>O6#8r>/ĺ=FSd G=<=5==Z>%D8T>OP=gI8 ==Cpi>S^Oٝ~>D>=UB>=?[~pd>;c=cCgA >0;=Z"=A=]M=V =>9=Q=Խ$=V1=hJg d=Wr;2=^ڊ>όȽN>cLM )X~[RU,Ƚ)>v( >5:l>O'<+j4>Z\>>k=`W>شڕG=|h[ ?9=%=Aۻ٦gU^go<->蛾h̽^6>3]Oq>#,U=m?>n#M=b>>3,E81-)E=8Y>+N`>K>m̾yl>W%>] >B>R=C܏~>V>|ΆqM+!V @Eg=)=na>& t=Db;'/jh=?)=-;[ g|B^>xiy'μ6c=d=hD>2=5C>tPn ;IE=[<>\rjx>@e>&X=巜<<-H==>*'S/Gtɽ/L1ZmĽ\9W/a>\>=a='t`p 3g;{7妒[/%>H?J=m?i<=]=/k<'Z=y>*=(9G[>>M94ʽg=@~n=0Qv!= 9=Q-=jߒأ=W'j>ƒ<7.>_I: ,=(`-=<>Y`= x>ݼA+ > 䌾%=u= cw/>آ?tP[Ҏ=eOj]>؜= J=>_<yT;T> u>=*˽}о{=A*j$(=5BBT>f9k>}>Ʉn+XR>t >=d=>a=> =U4T>KD)>Hޡge+RDQ>&?>|>Me [Ҽp m=!>M=c1=^^\z=6<%ܸNw>|>s<0y>P{)|P7庽+V<-ql!T=,U}/m3Ǽse7^զ!>d#f[CeP/L>[[=Vw:P>a=J>=#I(>.$tha!1=+f>`=3k&o)>߽s":=3R>=,�S>>E)>K==x=>y>4=Ҁ>ɼr7>Za=N>d>]>S;N=f=J>=D=Vsl> ;ּ'=!3<^a>^>< ^\0T׎➽X,0>TΟ>X`nB>< o#0y>!>۴I>:s>T9W0=d=8G[l~OA>i=lrJmCvj^!>f*z &*'1> >av> (>=c=/Ak>PX>XS݌=߿h= *D5rhKf]$ha>fQ~0=C?@׽0#r=)[~M&8D&`g>NE <D>ƒS.=X慬==߀l>տ[@~Z[r>÷o={2̼3H<ċ=:@= 4>f)(I>+^>GJT>>|GoD8=d;>:<֪?oCU=<&><Ž2>Se<=U ppXUW_l>:˾ý?>ܼ ܽ~P8>h]>=xc==YYǽg<>*Y=aϽow:sF6 >Ŭ辉Ve> S>@>tY=+>EHGY;[5>2='L>h%O>O͵;˾ʣ^R{>E#=2O<@v> >ĕ5R!!U>ϫ>sPARy< <$=t`>aa=ڸ>>jдm((>6\V>.=߃=kHzK/>o&=,ʰQ<#>ɦ1>0V>ѹ`>k=~Ro:=R>l[j$4c>I>ɽOCg=B >.3=y<>]>.1=s=/G>S93ӽKv>EHz7&z>pK>Iɽ&Е=>Q!@>Q_?О=X>۽=\}݀=ƽSJ?ܾ݄_7<>㐌 = lf8p=2>"> =cg>]= , :=N=7۽e>v>ekUC8 >:5Ox>(oY>;d P>mk>V>aW>CE9AohY>iL>:>xVgy+aO:s;> ?>E->6F.}=^ <8z>!yY[&,$={>>->=ͽf>F/>"̳5=*v2K>{<,,>v)p4R>i@>X>?նԽ0;WA>u0>({Ͻd>o<#$>g=Ľ8^K˾xc=G9>k&&=2+_> <ߎ>Z?>  QGĢ PYtAL>}h>N>%> ܑ>O R>>6w=iBhp½–/I>V+ް=5>;-ie>f>W.>ګ<4K=Xf>楽T&"*XR >:ًR+hQ#=~u==>0L >Rt(-4,L#-> >kqJ%>&T=ɽ=$>tsɽzU> /Xs>v(>y?b SK(@->阊T=9k=$a=ZYP>P^ >ɑ>X7=OZ$3|ăgX@6P>^Q=*>X>Y>)yk~HS>X:x 0ϓ>+5=>>">4ߝm`k==T=U= Y=Owz[i< 5X.='nV.ڔZ=uSbD=(վs>\|gPC>>q > >@)e<=,j>7j=[ρ<U(=q>M=>N=̋==><>㺥>E=>2A<&]TIA="=|`>!k 4{{=\=!>=`H^U&=b[=꺴w轼* ;ԝQ+>Α= >B[<i &L>/">Z)=?>(:>Q>K>XQd2>xi^=0돾k>-ٽvĻ=R==3Ѩ=t>7U׽Si!>m =U<Ŵ=>* /V<=j>趾=6gվj<\+=`cfOE>an>d_ H>=!>b=;>L>=hW%9irkp>}JH:=q0>!>qh>γ.n>!px؛i02Z#!󩽝8Q<ژX;< W&ִ2^=1&D&>-=lXfQ;F!>ْH<3> Q½oB=E=e3=!v=bF-_ۋxmjzW"ߴk 釾0.{ѧ={t1c>>6")}$><>‰Ĕ>A=Ӷh>9j= \ٛd¤;>P9=+)gP>@{>= =Ly9=]㲾A۽wCn>2|>>d=Z{o ><5=Y=V>j>H=0FoR>2 =L!i>T>m޽>s=MerVęSG}4HUlR>Me=)W WjW>8m=Y&>|ν?f(>Sl~\K]$ؽ/=l==gxQ `$2>8:l='>jq>a=]Wӽ`8>D=L4>m=t!n<+==>U=>o>j>WBP/f>\>,?> =mδ!=}%!>̲`==:NK1=g>n> >> 4>V=zgR瘽Q!żE{ ɽ =^=>9np=6<^ ۾d)>eн9 좼=B_^xK>bG5Jx3lo#=!>z3>0w< ({ǣ=90^ 4EϽJ=v<3;7Jƨ>^ j{=7ɔ=n#!>t`ʾSe=()>* ׽]?u&H>x_i¾ v>DRڀbq==گ=0=)X=r>WNG&N<.ƽ= <>]4P">͘=5*>==>R{:ܧ=iž-o#0eAR a2=Bý2"=R7>x=)5>qW=Rr޽6vVM>~= <_SO>y3̽G-<2=gS=y0˗=>BƎ=H>6'b<`l>r`>t]>.$'>* 67Q9a>u=ӏ=æ@@Žڢ=.=QgNjZ=-=ր>91Ei!>av=x>o>gk>_i߀9 ʆv)?/>WSp(ܽN>'S)t׽Z>iU{=*=tu)>JW=:Fs N^ TU,5)(2=*>ksǐ<=SH=vL >PA=l4<r<>q;Ͼ2_;=6e>x=S1>6YN=Et=uyā= 8R<>3!핈=[sN=IUZxnMؼ[LPn>W<TCi6z=Ȁ>R=(= >x>:=NMXNaV׼V>o./%>I۾_U; =Q>^!=+ J>fB[_L5*w I>y*0>]R^>ϡ<8>c>@ؽtNM[OGѸ=]s'>>Qi>~7>! ;E.x>'>CJ<=|a=1^pr>{gf=E=*=w>'L,=H@= =ug>s`>z\}>v=BX(ֽֽTF )=eXսzFE=>ɢ>ɳj^d? >[>AY6=!iN[Ń>>'A=Jɏ>X8=*ɼd$s=2/>lھ%S=>w =ɮCZA!=>z^>a&=N>^s4n4>uQ<#>MUK >F׮*>t)>񞂾#r]=g>5c<[r\=b=8==ԕj6!"zbX&.nF;1:Rm< >k>u̽m3T9Ws<:`=<=A ܢ^*=_Խn48>ydL nQ;j==cZ?v>< N۽>X,,wY]>Ҫ>= >H<}*l; w=s>Dc>B.<~ǽp=1>6>rڙ8R6=.н ==Lk=!$S >7ɻ<>e<=#ȾhnC^Y>B>φ>߾uܽƢ'ƽ> /E鼸oه;%Ri$HR=rY=>T> (>{긂洽VZ=s" >5=;;> >d&#<=>>,8bK$^((b=(n=k|>=p>|St>.dh>d<l=ي==>>ry>u1>4DD]*:Iɲ-(c>0"W齅S|Ǹ&g>Ê4=*t+>%==,dh>V=%˽=UC;:v=?q>a# &='úYI!'ܿ=:>pU=.4}c=>&."8Ã>yx>/!>摞=b/y=I>#_>( j>?0v|MY ?>#>P=,=C< $<+>@w />>S>`] ?=-A==W䂽F+Q>f&3=|ff;;H>*᳾}׻=d>ܦH=p>,>d=ld$=G>/=t=ZFf\ _=Ѝш;X>d>Bd#>xǽ>>>5L>L>I-P >6۸==T\=}wmEI;>W>R=>=p+zTB:=q>Xi;=0>"w>vTi>%̀ E=_ulu;G&=;+=\1*ֹͧ>&>_DI>̀=@ݒ>MBf:ְ>۪R;hZb)8|=A\>C C;!sVOo<>j>&יT}#=Lt> @ȾLOx=z ='ș1">ڲkJ~(3v+۰>I=r >(ǽ k{ЍL<8=&1g=% ;h ?<ژ>#H#>%>_t(%9]1=(a>p]4 =tr>-쓾uY=b:=% >@>d>h?1Ҳ,>,}PT>=^t>L=Ƚ-7o࿽ a>څ>=M==;~?'<7>gܾ!Pt>H̒ <6=@>5 w5<>Y\9=i~ ==׼>2 nHi= >҇ ?ZW\e>}(t=6=ђ=^=N}=`Xm(6=(ty_>͠<[C,펪=g-=@=>4=ڜFrMCz=>()>L8@vw>E>d>5V= > n=>QI#>*>;/f>\սk =FL_<v?*>R8>5R{=S<'(0X>>$ZcP{haž<^e@ܾ bnhڽ8<͒> >בd><,MTD4>==hE>8=i{2t;> mQ=[({_> `1k?"/ >=z=3Q>U>>=H> Rps>ﱾO=N[x>G='z ;+=읠;I;= 9p>ڼo>X⯾a>H=HrĴ= xaaR=]f@~6Cվ>ں=Аj C=R=E-ll>շ>c=C*>=;l>־ʾ>>T#3.۾z>>Q>>Y3N > P[ >?ܽ'нv>> fv=b>=>&f==<?j$;L=gjl&g>==;ϫ=P>MI'6<->:IB>}o=nApW ,<\G# 佔 v:| ޲=?< yxk+3myy (l\_kC>??/>iA.2վ ̾&ɻ>S >aH9QϾW-%l b6Ea=^2>(o==Te=<bx=%>(sW7B.s>K=X=[j=9=6cD=G9=#4N>нF> OIr}HG>>Aya=<=yEg3> >(>>f=bv==Ɯg"wx=3-~ >^b~>~=@>>%8<i x>w> >^T8>{}cɼ>5>;n> >܅>zw½ȳK;mnsx|<<#uS=p=fùs> b7=G?Q,==>>Z=o>, =P[>FA;={=JNV)<ܾP>ǁ=%Cp>p0𼮐`X>]= @  <<]>8r>5{.";S=ZUm><ʒ/g>|=u-I>pGH>c}0MN=$?N!?]!>d> =i Sv7>^۽>?j*~=j='St ?#^Z>'N? =>"=G=%0;?N><薽ܧ{!=gl=FQ#>eI>?a?@;F=uh>4FtP->@J>3$fD> p>]>@=GW1 ?G*M-F>|ֽW˕4=Ll?=ǽu D\{>^_L=a!j> > j;>IF>T?<w8;>Ѿlg5zyp">ס>=)<$kvq=8dY>nD =>9vn=>`i"!|MͽfT>\=nl> V>&ymhv/== AC~'>&v>">^/>i=F_K&= w=G5>U>ݭ=h?i\־><|>2S'hwS[>:F>P>T=HeB>/DpMJW\<43 v#(I>޽L=eUȓ=>P"=8[{0_:";Z:>neh>S=8>={/.= =n#xS>6SUv;<߽-7<D=2^1zC 転 @>Uf)Dp*>tiG=Q3&>G4>a=<蒶>x>d⽘C|0 Կly䱰=ܳ9(=$B7><>֖><*'ɽ(?b->of|[=j((X.>.;(=Hdi"ӾU;|+?.i->ͽZ=>*>>;ij>Hu={>yBe>E>rib;w_+=t>LI?>ܼKS>0{rCxxK>jCȾA=naײ>c>Cs8rK>6><>OAc>E>F=O>2 =6N== Ƚ2=Ko>\=~>ϻp >vş>T󄾌 A>8=!-">bŽ5"8>$> *>m%=m=z@P=J.> 5i?d>TT]>M,yj9Q,q^;mS<ִ=\=:1>jR=G>}>ZR2`?p/m>鐾¾C39 ;O)E~%58o>eFrv=d>A>ϕ>NJq'>xUY> hfEq>=n>}.^.;pӁ=-1ٹRi==[.[((y=> =6e==y_3?>>ֽ!>=f&W n<)\I>0 $;P=t~=5 ;>=X.==@罵5)>>y%Rz/)=jРJ=L=Z>8=Dd>^x=yWyժ3>=}^'t==k"=>*>Uͽ=?/>Pvu :=d_>JG>=9=OW=ߧĎ>{?D>kھn5[J͒>; >đv=%=u=tL5>y>Qd=ͽձ=€E.P2Ľ{>C>>UdK=")eFS>~>ٚ د;zS=a>0= >lD>=Sb=]>j=o9D62TZ/>y ,>)=^=1R>ͼV=E'B>zy˽u^ml3,_+>>z&= >͛Nf=랾:a ܾ:>>4}><~Dqmkc>=W=du>E>+ߪ A>a@>uWA=?e>Fm>l^>!W`y>3?>,z=!6{=ZeT<`H\?L=ǘv}P޾P<&=(IލU=\gʾI=y><>~%8=ܾȻZʽ:<'O>c>>\ZNʽRnnQ˨>\%ݏb>S<>=Kx>h Rqbv>+> >I 4>.7vN >t.><)>ê >iͽ=DGݤ=-/Vf=g%m>m>Yٽ=|_C>Kx=n=OȽsW9d=I=q=c <,>5=0Hܽca5EW*>H"Z=Zx!R>½>a>=$TcW>=uhm >&>=gv7*>>>c>y|1V<輾~b>>Ǿ}=F<{F=+=WNԼ>O=h>_.qLE.q> ӽhuwɾ>(!W8ýP>qbm=>Iħu׼t<=!]>L}BU=_Y=x> DS̼g=J5`jz=13=WN}GN>_9džؽ>&u><3>H>+<]㥍=N=fP7==B:e<^ =I =@h=mF~8 b\/c EPm;>:P=KGFL4ZQ>~Z `>ʅT=nI=>߽^''<> ̆Tp=8{)ޑ>">Y3GɼL>[=fw'Z=@=mC>BI=>|6=^g+i>fI?H$ȳ7nl[1='!Ø>剨<O辡-(㽜(>lR=>tv6^G;>'=>G>>=FཏN=F%n=x>f1 ގ=۾ޏ>X>ꗒ>l[)>=->S%X2*>h}\<K={|t>x>N>HSӉ>=ĉ>"׽hPaJkskR>P>#=bb=L>vM^=<@1>['>h >άsO>N4-BZ=>=}G>9AΞlb 0>=6*>|MU<<ˑcPc=w>h޾;P$Ύ)Y>=+`bĨ>\z=?V-1dش9~>\P=_;M>%n=0>#'< Ϡ>'/>>=ذ>U=HNq>ߨ3 Ͼ]o>4ܩ>d>Ɔ =j>{i>/c>(H>;>9&m+~7r=3L>stqvm0 >> >_st>P>Խ?)<[jآ>>LFbd.s<*={0#G=|u>:>ѠK;>\'V [DwĽS^|8> :=꼦 ;=$>kFk[[Z\>] >=:=23=c>Y>UJ>2Þ==<9Ž|DG>x5>)g==>;N6>3<!>)?rL=>2YI=@+>)Z=S(Yԏ>>r?:=#-w='B q3;_\=ƣ < ; =}=6#%>wF]o`VU۾2>֖wO=AHa<[J>n]½ 3( =V=Z<=ի>kb|]˼P>Ž?ų=S >*>Eڅ>G} >ƖH==bZF7<}coz>q<ݻUƓ>&I[>O`=į >=!>7=g>Bl>*=+`=2-d>^`:uD ⧚>g>zBaL7>uS)<ǽ =x=d!>dbŽt)Ľ=6<UΪ>>^>ko=V=hL=!">;<^̼lžux蕰h6>h=>D9>Zd>+Bn>e=@ھ;1)>{>D}<5`ո1D<ǧT=b=o=)>+>r^>o=^Dh=It=ʧwN>o?qW>b=#>Я '/{={=A>S;ͽ>,G8= >$`>81سe>ks>ŷb;>Q=WW>Q&:>$==K݀>=D>ٓeגe*=~yգ4= #Zu*%1;6>.; c Ӱ ҽ>>b>/ƫn>y%3K=5> BBlD=Aρ=>=c>^;[?->B>q7>>==[>n< fj,>DU>0>;}͸l(/='fֽW?*>=+c}Ծy <˖>9>L7X%7xf=> 4Ž/e ŧR<& h*=r,>ق=*N>xer{|U;';I>gAҾ=3-0">5 tDK=\oh; eֶI:>?=?jJ >`:f˜>s;ؽz=k6>Ӕ> ;]9K>I tʘ<тH> =>c=60>}>=&=J>=:=&Ƽj&B>j>J =)nטN&> >.Y>C=.I!=g=ԴS-b=>¢ԓ; {'=Y%y&$<j> 1,JC>,g#=q >tz]l> >A={>Az>a0 >=ѼV]> {>a @=:>QP3BLV>>BK1>qP=U\=M>(Az2>P0 @6Wp̃=ܜ>R>t =QT=o(؃MѽF>[n[󽶸>ɭwVu>[ *0it~  G -=V>Ã>AD>|c(=l=3"Z+>D=!:= >ms  =2= =zV<>ܩ=Ƿ<}=tI+ظZ=S= A>D=9Q=~Y =^TZ+,C=SݼAH>>K=dL#Z>KU+=^*>q'p>ȹz>PXX>">0tຽjU>_+=Q^D=},Η+6:=R>D>9C>| J;T+L<>|>l)~>2)&>Y>r>2.<7\Ԑ">=ѩ#>uY=tT-1;>"kmW#> $Fi =:+沷y|=Q?>uPIt=սo>M\ ޴=G=7iξ>WZa;M33> >t8>=x5=>{Ͼ$>O>=OK>6.>>gHT&ԽL>='7b *=e'K>[>>0=<R9Z3څ>i'҉}»>zH=Pe>oXR=d=9> l>e"6=Zj;0<ۨ=7~Ldh>=xf[QB>)=o}">Mty>G>-0I>ؤ=sA_"=!diYaپ^y:ҕ>T.~<7>f> ->B߽I;u=/PX>F=y=qMl>3=o;N>4\ZR"dWb== =ǽ" ^0 G~>zQ >UuB-=,6<N< >*;5KؼDp>z6M}嗽$&Lc>͛2E5>Gm;=b%P <彣;ô>ך>$>.[=d^>=P=`$މ>[>u=%+>bDM>yk=T߁>Â>& W6]C>=#>-N^VHO>`>3#>X薾̅MJXJ=>_>W<>B;>wkIwTn㊼ =>f^>:x +>0=Y='>-Q>`CJ=0=l{%o>%þDI>ǽtࡼƍ:!ӽE}>=.0X ;K=iռK=rR n<,K˻a>"=Z,E}<Їw(O>w=B=:݅o>5 >\=F3B/⽧{>]s="-X߽4FJ=x9>* csF<>-d&Dν3?#=4\=d@%m`"߾>*>K=D7>0 w kSV> WaM;Y>vr]'K]> =-m=/Ot> 7Y={Ý C>==QR<@MU>2X^[䒽ӭ>youG=iېՄ>(՞2< q=~T s-=jY4:O꒾U>V=ڮ==7 [>ξ=M܁>t=x>N˱>}=j}\G2>6*><1>MOȄ>g [9pv|n[;8g=-̆>཯ >L>m>=Oq9=!R=>F>ay >I"W>ik\>vy>A<^?.>= >aTѽK>W%ɽ2/>=7.>>O>@a@\Ծ>'`;0ʍ=q1=Om ~<֓>/i Z:='䖾==+G><=%=4Z6coT> 8>Qt>S=QI>rœAN[. =V>҃ս\ < >XB޽U}>Ot]59JĽ&: >k>5:|RDl]= Ϲ="/Jt-=Eg>L.>u+㽈=x .=Aq@u=œ> jkγNx=6B=P Ёo]œ"c#0=r0w\F>b=k[S=YX#<.=L>D XϽM}vk>Dz<<3@-i.=OUKfK=}w >=դ8ֽs9n;Zm;͓>Zj "`]A=: v64nսsN>>ӽR&$=a< GE$d$ƍ>{ +>хpAٽD7> >sc >^>z?=0]>>5K=,>,OGj;>ob>ͽ T9ǣ<>=:>T=W8D>%>`v=f>,o>/:C˾>>Xz=>U 6,")lп<+>r>hB=}':&+%HXW`&$>`n$!>?Ig=J>=;> *>7 2= )mm>&=<+.= >4W< =8b>U> _̼>*o>j=g[ck3>h=ysH==D>ѓ>d^ >}=N6CpG>8>5"tYрu|PH4/sz>b>[ >e<c>=-$>c;P#> >a >n>6rvh5)=]'>;i!>]BD==D>=Nd=}=@u xƽjrq>Y<>1>I\9>š.L>hվ>C~Ib> >8Et+V>;ZK>r ">TI>!d(c">=A΂Z$S>v>}bf=Oi>M > }>Tᆽ羼<]޽=1_=V>Õ>Fm=;r#><,>=X= >H7C>Nܽ=k==ʷ=Wcv>=ci >o~/zn@#>* >+[R5>{> z(CA>0C=T:=<x q&L '6 =RAGu:6k?!>,/.پnJ>B$W~q{w<7XC1`4;L+]hм==G@=h!>* ?=yM+>Q?=>8>01<=]=<[B#>l^C===*',= P-N>E8>Ƒ[ /޷P?ʹ>hbվ=_>l;fVG>OJ$=<‹r%X2D!jqv>~;=k>dz|>0=%WA=Ғ>ҽg<-ʎ>і2ϵ>2 > ͠=5o>+?>\&> =N>=<" >ꐻ[n>!̝=; ^ >>jE3Ս^>bĽC>H8>C.׽&\=a=YyI< >y6=Y|>ғ= ?9=HO:ܻ?86ɮԍc=Ž$ >>>/A>_|>M>.>^}c?#Y>VgZ>UX0;Խ>T= 3(a)>HbFT=.>t=`>+D=̭^N=n=j>%!ԪS*==>䰳sG,>\=(<r><7[˾Ec}L=9e==W)^];k=FJI> ~Q=5=zD"'JǾ >fLT+=(>n_ >'k0q=zm>=¤+~=0=j>I="==ײҽluZ1=*?=QN;;<~߉>BŤBu\=Q^|^iL>P\>Sc>=>۽6Y=> 郾@>D$>Bor>Q>s6(>.X>H>hy]>===*$l=c#9x:Z"ꣾꂾ#WFƼϽ X>Y=+>m>x)=Hn EА2=X>`_Ϳ3>"? ;fV^H˭Y5=dgǽ oɴ!Mt=~gfzP>o<7?>dҧj:0=fm >;Qn&K%>^p>V:齧1)6dKgU>D,>Fvy~fnB;>:>Ra>qSIA6<>3޷#>>~-~3ln:'!=n<2<t>,B̾Uׯ;>,L>q=f>収> >^=TIvJ>U<>=N\=8?c>5#>Ը M= @ls==`&;=p=7<,ݐmѽOke]>\2>L=e7+~<=]\=`=>͇<_?>س(ba>)k>.U>.="=]>;fS=򓌽s=g>=~=t >~h>V%R`z4<w̽a8

ȼ =L>%LnپN=CA=v =;WCܬ=S]=<:мo;>V=;>*> =&$Dl=9 =uSx$ B=>)w0=F~=6>"&[Ѯ꾭+:T>P=e6=ͽԘ=U>L>~쑼6|>m>rr<&=~uŽ?[ƻYu>};=GDM&vs{O=C= EQ)8>P ^>=l'=02b=uBeɽLl<-C%=)=D=^;- 𿽊PX=XԼ˄+Pِ>xI=棼Qgb;Ԯ=[R>,9f>UCr>BZT&>,>T >>e=!H=AcId>o2LYtWS@sN=t?==D㉾iiy>NU)½ > &ؾG>% >`=> =|>=& ->||F;>f)gDi;C '$Sߕ>Jn=>v?!9n>}绾kF=t¾A>>ν^=6=o>==>(vὑr>8NͽלXܤ=s>5]>xt5>=>w> X}> NE7ݬB>Ÿ>F><`i%<3Y3<>c=6@Rn))y>Gv>`o=)\+;FU=fu>k<,K] &Z>"=h >k8U>"Yɋ<[`bme/[idL=׾ =>rz>я>< MX>:ɽrH=^3<ɽdxsҨ>=t=ز!<'U輓8\N=G˽S<#>Z>=y<=#x%>>>*A#Őm));S6=. >RoÞ-=!F<=p">si>=evp{>Rs>8C潺e{˶ >3/tvIRԻTX> >*Ͻ^E>T>-/4U>>>xTZKG7=T$᫽)S'>{½H=x@?k@*>i(>>t(>Tc6W־;=lb=><>M?%=% Q=u>L=ZǾAka> >T8>/&pfO=x ޽$t>hھ1M="<>'*;>(>=L=~)½k`n$靾V>߆I}>  <=5>>=<[=F<Բ>Z>={/λq=m>]`h={J+E>>oȽ>>ۧ`,=H9<~>WL_`=^Um>> >1$\?xNq>ekxAPK)Tc>ټ-;V0Q=rNa>p=@r[L>8>?|0=wн<|=d{ W}XSf=# $轪8QUz(9>u=> >Ԅ>Xw=ԁ&=@@qU>tA>eA;2>Vm]>Ʒݽ>&<>Q;>w;q&=߱=7~;<=V%>หL#|J/4>/ؽ=F2>\ڽC=f=͖/z8>H>"4>mLx[[bvxX>=U߾ھTP>,R ?2rE>7-;H=kƝ;bZ 0<=;>=`>ciν>C@{C>=I=wr]>' =ȹ85Bb>ȇg f_a=?u=Vu >/)>%.>BI>v>ss׭>6>ŽK8n;%wuv>ڻ)񼆱t>ӻ=4fX<|1g[/Oݽ}|=a>u=>Xnړރ>T,>z =̾wvbr1j ?=U>>2?J!ߙJ$Hxx>= H>C>3>-rP> 1=P=,1L5r>>~;W>"C> ͽ)s@ >4.>Aח=e]=sJKbq= T|<=M=剾 Y;Y6=Dݠ=<>_ց>EZ=wW_>`2w\t>%ؾ+9>5Y[ǒ#eP=0m-|j}N((=w~=en"1$N>Ik>${)<~>\cʽNg&1K >\}dl>i>Ut >g>>P_j&K]==Oh<|=F\Q g>Hr>=7=+>s=w˾4>__]P Jz> >h><=Wf\ ܺb:>˸=%)Ͻ=ٔ >n<]$>dџ=e=Ck,ٜ!oJk(>74>1w<|>|>M>i7C[>>Sc>}>EE>==H> z;)->0>J񼜾_nI[e` >z>@U=s&!>K>F=Ϫn '6-P>wXBEUU^XΎp'J-=>^趾:Ƚ/>Av)r*S( 7>!= >R>$蛾Ni>$=SH>>>{>rKFt>fDs=T0f]= q>W0>(7>J(F.>3!<>$= e ELM>Y>315\> ˽lx> 9=Q>G>=>0NJS>O'>>ƽ촽Y\>䦾5dֻ>t ͐>fݽdo}RlRl&_6Z=p><ξdh=,<ý_n>Y#>x>#@>+ p0 H >ּR^G>Q>?k`ݽ䥾  +,<ʼ2g\RK>;y=3tf=/>yb<081>f>!=O :>xӱ<7F>>x>j%'D>`˽>z > S0G;j>9M=-X=3o>=χS>=iea>*=8{HT= '>0=ӟ)Q=_|k پD`o:w>8==Ki==at=.q>I>_9⾼' vH=3,i۵ý"f*kz>ƒ$|>41g#>wx=OQ἗G߽D/#۽;>}7[<䖾m>&j#w>UT>8>]֌ P>wDgDx&32>{=ؤ>*¬>>۶=Z= =ܫ_>&]ԥe>t< lm|o[ >3u9>̮>j<`Żj,>?rw=LսR:]4==ξ.>rSmp=o>̳λ;<>&i!nma:==KN?go<=MK<ض'=SZR>ҕ=[9ʫ=Ma>8h\ >)ys>ꉵ=/A>Fi=->R{*> y=T3w=>> JT><>6/<2̐7>R{19I=I>Kཷ^=IK9BS=>)=8 >Y W*>0=- ᅾ >=dI>V4> clj_^+t1L6>&O>=ю<濮>>l>ʗ$>g B1>wξһ#̆,> _,3=M>͓d.ؽdD=t=->)v=K=3 =XPK^־-iwD== W2Mj>}894>y>*!>t=65>={>+==e{0Z~%> =5>t<>ܞpgw<>;ҋ=~mo=WW>WvL@;e~>>ܴ>46g=$>qJ >-3罓c>R=x%K)><0e>k=lm >>̽`:m>; >=S6F,>uW;Z[͘b>U6̽===P >Z@ND>"hl7e>`B?oCnj#yf=ڠ:I@+ֽ彖b=8 >&|ז=b^j\ =z'=𦡾>SgY=(M>=b=`+dW==sm>A _bm>N>۽|B"=$Ψ=P6HMJ 8=Q.>j8(>Yy>P>u>֔(yC>}q>Hy',:VԢ= 31>r>(Pf>w*%;_=5>GuR=L=擾>>j%<6k*Y"MĪJ=o=Qz3{=W#=7[qݓ=L= 7:v^U=U{½>7) >Kƾ/eQ>/<)">1,=!pkἛ1i==ܪ*N%9>=?# MeDs= Ⱦ-q>8+>S== >Ô>jս?46A=> ?χ!m4z>W>[ '8ܼ=e=Wg>@=6JT =<5C..=[09"A:c>aX>U|56>nl4>R=瓜.Dh=rR^U~1⭙W>çl~'̫=dPM>~v'{ݼg>,Q=N^XO>0̻:-E]F=O[$=Jt >]>ܦ>J>U>'c;[s>>vQj>Ez>X =>C ӌQH%> ov2#9M<7b;/`:Nf >u->뮸,aOjh0>置x>{>"R˔A>p=L'dپ!/(P>L"=F=!k;>h=EKu*=j><j90>=f!2>վ)c95^B=<د;,zF>C:=h7< !C=wi=oR<=8sD6>:>4c[Zョv¿a.;V>n=I.<6žEZW>B>C';g>Yh=7i˾by5Y>ޤa;<݋>0l=yϽ ہ+9>i:@]͒=>&jH>IE=ŒG6^˳̾a=n=/>==={>>o#>>Q>}}>˽, >"=->%=puGàAϟ>pTP>-C֑&N=yI  YU_*ԘE==>[=(=Gh=O:=e5A>B>T!>v=AQ>Gj>Y.>sBD =>Er=O<yJ6=c>b胾q>lh [Y;&== V<~>݃>GK =gg[5l= %=¾c>,'=:8i>|/>=7Ǩ8)1>D$d>8{GQ>>as?=󸫽<"=DUʽX!R;=> +=3V%,==r`>ʑd$ie/>; S8_ڽKR A >=4q8`>X8qW3=*(z=g=y>c-?P8B=$>tR<~>I=`4>?ܼ@ň=/$=c^pn9\򷴽MP Q>0FM'p;=46=xq=L44>,Q>k>_=;&7:^73ѽ;@G>5=sq==hY=Hr>T=>J^=1=/<=ʽpK}`A> ټ 'Q ;= tJ0ɝxM=,>>@:1n=>P>UϽ9Їݦ=\=ƽG:=8:{M>= 0 =Op >'Y>+ Hy־$=L)GB=5(>[)s>=xܾ>H{DuٿEbn8>>==k{K=_t<m>O>!W=:t>(= y>=|d.WC=W=' ѽe=5{gQl>Hx̽8>*=o)[>Q#>R<Óc>ޢ=L 9>?<Θ>p{>ɥJ>̟o"O KW>I6\>J>ob=CT>g?k>[%>*ᚾ`=i==e:=~:>==(̽ɾlDi8~~>=3>PbM;>L3>n=J>cg(X9= +=rw>t`b[89o=Z岼^ Vc= d=E0&Ax>I)z׷9K>9ݽ<>$U3v;=Q%>1W$'9|t=<_@,L.>>’=J4}g'{O:=5g>^Ӥ)D>i>Fy>Nn40=P57;;n>aK=׿=p$:ah;W=<">־eg<=2=Z=eC:)AEH>;>r:7=>䘽S=?>&Zž =oO;m>=̽ 2s>u<#{ؘ>y>/ `=K m>y $b>+뼻 S=q>fOu8h+>䗽Ūju=P=>oHvoZ ˺F &|>Y2>'ŽG,> >VoNF=J=]w'򟽳`>c|HԽO<<{[Ma=Q%x|>K]>h =?=07m=(6>ΣB>ONH{¾s>/h{HNc>9RJ >9BT>mq42 >xS<:}ۥý0>%:S,>=u?=>Ľ_>D>$(>Ew> 6!Q>y>- u=/-S=ރG> S=t>vYJ>W>\:>=69h4=;=];+A\ =;u`C]5>2=JRzR>;5|>Rv;#[6MC>_ c>jh]ЛNZ::>A.꺝<=O>>t=}K>Ayt1+`ǽQc>܇1=c=[Al*>ߞ=;<EIƾ'zu=:=m>K<"zpB>s<~=g=]$4'4<$^>(Wo"ܽ'B%;)>2'uP>6tJ9>X=:XC=#>-N u->qm>~=AԢܾﯾO;:8^ =9*H=p9>½@I"=;ѬS Cgí=F!ug#=|?>%#38 >'== >Q,-c>KM>\E9{>d >/ݫ>󇾫=ObݼYZM>+>>J4=z\:U;=w<֝``Zj2z>ֻnN>f{Ⱦ&1a^X; ͖=?;=KgR=mֽ2>/ c* Ӈ>}>mI>C>V>><}=8oƼd=j:X==>L_r>W<ԟ7׸ume\PB''>>$GP^> ƾGA'@>$nǽCɷ>7R3нZބ=$,Z>G >c=z>֚>s=~>.RQ%=^MW>98=r>{=ʭ>Py=/ע=.1zšW&#W=>=ł=J>ʾ$@<~>57#_V(>'Nꞽv>E|%=ßyj>4<KC`l>*/>8rq~>yģ;Jx>&f>>_Ă=0x>>n>b>cW?l#^g/=†R<ڗ_>[> >÷=l4:\6~ނ>$clގ>?R>H=>#U>ƽ>@u=!=G- ]>ڏx ub=R~K, S>'>ƒ>.=e= =АN=E >=[>Js +>7=ˀt=|˽=檾`=1!<9f=~>B"i!b=0eY<}=>I>M^$bϼ=>b=暴i ٲ>= 8\!Kξ{ 8(5>߽St!=_<P<yd>&=dR=}{>acd>[.>VQ ;!)>E;$f=[c=:v>ؾ]>#z7R;?=<>_X1F<9r6c=:{8 k=(=-,k5>Z=`'?=> (>k" --Q>> >#2>n?Ƥ>=稾rɽVy>K>GV=/>,/9W 4xޣU4>loZzsbRUͻQ/>>ݾ]➽&>=H>>خ u>S=j->< d>b̓< y>-=Eg>68>76k=7BMp>0>6p=\ҡ <x>caHБ!><ʫ^>=bV{^>u|DV>EF>lh]Py>Ër=mܼJb>lTn>S <Żk D<>K:=P<ýp 0=&>}m= =),*p=f8i>=PҼA<@<ľCn=7sR~=.=sm=S=>l%>ã>֢B=6[>=1yF><=F~i0`=5=qrM/u>ĸ= f_>j<>>LRa=+>ѻZ :E=ܽ =J6=a/Ӿ[>8$֟=7>A& P:>N=,D>r;I=HC>>=O>߇=,t`%i(P=Ō=#ܤ58) !>Rټ þ!>G>`=") =A=Q{c(>gpF>՘=i=g8_׾ =MUwӾa9=g= ߾qJ23 >Hg>Yh=!y<X=;ߡU>lh==QY=e=apb>\j]1>I>b.8w[ fC>W=@V(=A>Ӿ֔> Y>½iW=Լ =*d>>JaK0=>37!7={

:>L >vD>ٙ'>5)=M=3 >=\:Ͻ>٭6iþ(>>T>׀O>&&>DO>[^=> .=謁G$>AN=H>=Y +5X.C>>gkB<.2< > L=>W_>—+u.>D>}>;sb=JQ=>[4}?<Br=7w<{;\=Hm.̽U0E9=fq>0>iK\>=A<̾ &>\>-T>r)UZ 1= =E.>R=̺= ==)>u>g%XV>椬ѷ }1p=ʽ">g>Y{+^uD6":cېh-=tw=3K=A-gok>8>mK c=v#=HK%{= kV>B+̽!2b=;+v:.=K;OpKq ~>caE>G@䐽Co;b>qf;>kZA)_Efd^3gȴgѾC>D>ےDӉֽk>qGy=a>ËV$>┾ >d7M<Ї>8>f=e>8@<:=R>PK~C*=&޽{1:;+*>݁=K>iHҋ=H5>F gm=ߙۑ>]r>O >ҾN0:="epM!=Ŀ>7>7ھUU3;QԀ>F>#ZM>:=V>w$X)}=>YF=qy(=nj==><s>`Yh ;=ףT8n=e> ?=>ӽ AK˽lN`v?>&0+xٽ\KZ >.%1Ȑ>Z$>lL=3S<>^=I6.> )>WLc>Ar1<!;Ͷw>&-}>X >IځI=s=$> O=f>ӌxN>¡y=;~(yh`֪CV/>eagž>s"_=Dt;R;?@kg>hー;gs>kKɄ@>-= ^.c% ʽ =:iu>>=CҲE<K=Y]O=WC>*ֺ =Nb=h־ >V>}< R>-̼p)>ͅ>=˾amN ol?a(=u*ѽ7=.>C> \:Ҡ>j> 5>9U yȾ h=P=f;>?O>=0V>:<S<䠽FA ߒ]=!?}>s7=<=jJ=> w>=`=S,> #I1&=@(Ǯt>kļ;ļt77m=ƺIh-6>+7"?>H8?}#$mge>Ra[>X[ a> k>Z{O=S*@轤>``=q6V='`c>2?i4-@VӅd@W=]=f*=`m;H=ZQ=J=l>^=)F>lp5>$N`8=$-j{>{>"Q9_&M̙ܽ>ٽ)>FU&=k>."$=v>p.>ѽO'=A8=K<'N>%L@H≾; 6/jgq2?4;9>5>'T>|>YߔjMf= ѽm<+ C~ >=Pn=<@=m=޴5W >,=g6>ZƼ^Gq%f=q=22!AZ,?s,ƻqҴ=K!T 02DZ=2E=7=#>39Râ>ٽ3Qo>>+jPgƽB>=?Dkb>}S(W^>p/3>}E<V"=/>"BĽBs>"='1<(d=bE>DvL?72^Ⱦ~ƚ>l>=ݽv &ˆ׻s>%=O(>>)Nh/u]uAd >Yf!q->s齩>@>(%f>}E>L;;%ѶE>" jLcŝᡡ? "V>av=Xn<m;=ri>>p=/>u5v=O= >#TƽK>F1Y,>xB>=\>Eὓ n>!ֽ>C{q>Ga4 T=>->=v/;ݽO >>~bӽ=~=7MY>= 9>$>wXay?M4e$Q]=^1C F=>=o>BmLF6>4$NNC>s(=ɹ\>+;q X͊)ʱ)\T>"*=J>a?QGW>#K;x>{)+UJɉڧX=Nvؑt>%Qw>?>7={^姱gC҇b==W.>ۼa(>d(<ߌ ;`@:>A1&R꼞C8>VI>1?>;}TH!ǽ9h>ˌ>>໢d;d>jŢ;>=c!>M[ƫ>m@]ls=5v>;^=7@W=>z>>O;< >lW,Nv1˽=l;>_> >5ÍC׽IJ^\<͖>{;7J̾/컽=]=q >ᠾ<#=)x ⯜={-=rUֽkPȽ۾,A>fd=f>DX`>9O>'Ϟ>=E|8=="=.5=[s<օ1{udﺸu.>=ҋ%&| V =U>p Ɵ=ۣ>׼>;;<=Ea=ہ=g%n&c~<23Q>˟=Rz>f=8='T?>&;R-"K=U > se"=)x>l K>(HNm>(>>M>D>>ߔ=D>GyUB/K> Fn}Sb>=>)]<>?<>s">K8u Z;Ϗ]m$>j<=^'G<Ͻmz1c >ȳ=Or>‹>C6>4C2yX==uF@68=K;Y;[ߪ=}!@Iǽui=&I1>}{¾{P?}> 4>+9fPj>nW=G=Cwِc;/%er>P?<%G>pܼ%(>#1;I;_>%=?Ϙ><> >3jR>ӱ@!=.T=̲-D>>=TKٽ](>><=%=P+q$;K߾==#>j=]4>Ɓ>>$>4K=V=mE= ==P{~>i,>$<}W[:==>P@T?===ڽHt\=û(ʱ=HrB(̊:F5(ǰ% ͙> f~=Cb=[L% )*)W=>[ eož A=j<W>>==!c=྅gVb)#::ш'% >}>d, r`>Iv>tR׻r7>c ?=0'=Ms>qXVO=YF> Vɂ _>>D<Ȅ='O >)>5=ؔ=k#^=4S>.iM>0>'=*/=X! ><9=>>tZ>n(=K>>cNQ%X>lӅ>1>>uҼ=7,F=پ>u!cP> Sc>ֻ>(9/ќ:n%5 :>>@U=?Ҳ;HN]ҍ>=<= >64>5=ŽJGJ}>U[gE6u[-瘢yS:XT>lu >gg>'B :; X,Bu<->kװb\~>6BtO=fϽ ׭0T[="0%2B><{;`c":V>=5LǽAՅ>x&¶r>]Гѽ*\pK=>;>5Uy=>}%>0È5<,y-@^HI>4MԽCy=g">qqDs>k>!&穽^T> =>̝L'꫽]C>b(~=*>V==zu(>·=y>(>ATw>lμp==U=;ZO=;v>=?w_>A>d⎽q];~U> =~AC>,V(=ſN>jTg>G><m#95>߼ƽk!G>q>G+>">u=h>=ǽsc>>\=^ƽ?߽G?xJf<7PK= 1c=@s.bt=4 >},8>OW=g*==`@?m/=3[=Z?>8Y>A29 >\;0>d=V=8d)=zH>=|q&Ya>e>Oj;8y>d?>,C>ݽ:=\= )oBսƻ W=_>~<`cU>l7R(lé8=0=b=f=>=4:MZ4=CU>E>U=hg0I/=m(>IVt,=Ȉ=2>x#G=q%2w־4=`=s>o=ZE;9u[ ><=} )>m%=ϝ> %SRr]c=>ұw===B=\{\}>8>ٽ@$>LW>IJ=+ :=k= o>Rf=HR> k>x%뾸 r+dC=aN>yor?I>!?=B3u=SC>jG>QC>7o=m z>>%zq8}ѽ.D>a>W*>~qA,׾?= H>+1>ˑU'ygBva=Zx=\mP*>Bhn=M}=| :!`E='[L>ԥWn>y{>%'>,a=Sdܫ٧Ky>= =;=K[E =Io=YSʾ,hϽ kM(>@p>kK;9=hd>6>#Z>JV* >%=5>V W#=c>d<>?0=y۽Q׽ >\j<>顠G>ۂ>Vg=>=P1A> A=Ļg>0+ݼh>n>S;¼ =׷^=_> >>AK>=ta>t JEݾ>T@>۾#>ͦ1X+뽴=>~]dx> ,ʽ=4ڜ=ue=Pq<Wx?$T><9BT>=2>lN:}Lu!=Ŷl>C>> >T@Ǎg>c>fE>ؼ˵+>9=B#zNR_>iə[>m=jf>j< X>v>?f=$hTվʊ@&/>?/=_]^;>ͽi,`>c)>7=Ľ 4>PQ>Z'p+ =Vu2u~=W=Ss=ou<s,>]">{>p*> =>ax&i=Jl9= =(2=HJ4 >'6=e.= =8>cn=Ey?6\!=+=liF=*>>84=\=e-;@=O>>u>A> z#}Lʼa=z ;>T~Ž=Jr=\,Գ= #>W=-)%x<@t,W;s<'fiLnٽ5%_=)t>*=9`A;<HN b=8 ==iB>x(6F(> ي<)蕽S孽m>5 &>IC>>(u#>)=>9=}ɲ=/Y8U <>:=bJ=w>=D=h=>Dmee^=ϥ=vUiKz=z+V^<n)nn>&-_$>tɽӼr =:>9 <>z ޲><u=lW>#>ى>Q@qz>Ė~>0B: 5>o‹c9N>}ɽO\=&S{$p >L?42׼..`>L>>=y5=2YJ0V< t:L3!=Y˂?T[>i=?A6>_;ФA>[* > 3>k=t>+7Dy>C4|>n=i<3'>FCo>F<=d>gL{սݯ->2q=$`>>你/|"='(k8ѻZ%=0B>wC>a>>)==#=K$g淾-lڻD=1o/U>4~=PR=LqU=w.iƤ>As> x >Y{=wtѽ$(>>)Q>ƌ>w=Ÿ<4+=|29YI=,/F 0z,>c=!w=wV>rY<_;A>JB>a>>w >3s,j᯽'P<|>>m9=<1>y2:);،r=>;>ZýO_>_>܉<^g=iH<тfyP;V>Pe>>?=+M=O0YF 9i7>S=)=2a =XaLV}Y%;v==}Id F>2+^DxZ ۽F>Eh=p=]e<.> >P?aVIpXýLp!<=R]9F=.J-(= ;>P{>M=Z><ɠ_=>Fr̟= ]P׽l .6=u;(=>6MA;7)-Y`>iŽxd>/j'"> y=ʗ->@.=#w.z> =XRIz8 @=q%+/>*;V]=^=xD~*-=Y==]7FQ~ >-Ľ=: T"=>; <(?j>::˔qi=a^>C('>gX ξ>? uʉ>`,==80>cW=>V =g><>ݙ>W3>䶤>)&>ɤh1=*#ǽөu=-R< Q^:#6 >jm<ܶ>3A@=PD=O=լ=f >='SK>/} W8ܛ<7>5=Cޝ>½=Gȼ}='>"> -:ͷ>d;f=U=ثM>B=j>ۀi+dHM>;ko>^>> =N=$ >$>H!N~=@==Ji<5 f{h>^j:A־Œ=ԄQ>>Sy=lJ>ee>>Y r>Ǘ<=4==>ٗ=# 'Na5G>D#=bK>VC}%1>Ļ8>' o> >Y=EKJ:ͽj@=Ɩ;>ы;㽝׼6Kܽs?9=Tp>ʂJn^H$7>9ݽr=؛gn6>C'>I싾BK>jvc=3=-1>*@=s><~х>f[><`l>-V=Q7>/>7J=%ཽ'^>w!Cg~<=6\tԾ4%>'Ж<2;8 x>ȸ>0r=2%` =(=S>ۢ=ys>` ==oPO=A"W>31;J eh>ϾӂtqK:6>tI=_u;rq=ap=(j>z>)JOtf=ֽhZ>=;>VTz4瑽L 8>硽;?;]z(,n>05o->n;k==n*>=c>j><Ϋ3Z> 唾F=|/^no=àvz=T2=U=<"V=L (_7>>ab=_c =Ec;/E_ڙ=;W>$v]c;xGἹX&2)}*NBoսIqڽ*AHK>>b #>>2X>֌==/v ( >n`It>zG2=ܘ=T#> =H)z> FFxGZ( q.d>?ὦ> K6$g'eV>i:'z>eN>څ=C"V,v4=0>~=ف>,:ֳ=/ZҐA**uW>֌= ;9=OSAD=&/>w='e='Z;w=)=ĵ=ów~\>|z95%>>?\w >P>޽_lb=X >U>H>4$7>| Na>K>成N$==쇽;PC8ҽVK8s>===,ѽ @>.>g oN > <̽>~%= >1>=A=fӪ=s!@>=r,>.>CG>`=Lۉ==l|9zV>U3&*Ӿ Qy>ԯ^>R/=F=i*z9mv)> q9=>o=P//>=}%ª]W>:=MT1O>M>=?= u=Qۜ>[]=d>h>X<=#>Y ={>ZA>/=Q_="]ӽ>*ٯ8>$J >c>=Tb=d >)A=sn:x>ꖽ,& ==-Ͻ ۽.<=T>sP4;,#>>>c~>J>|t=03ft>vJ>A=ϥt$F>;s]>>"6%pM1e>']>+v=X>>FY1wfH>X>gȠ>l=;JX >s=;>71dic; ~#{>=CgY >#=>`>0F>0l=7MBdCǑ>++S)=~:D=%?h>(=謢&iYU=5=S'8i>ዛ>Ӎ=ذsH<èPD<";;eSRpy;We>}x<#Z=F==k==&>CC\L\94e*>%~==o| >r>6#iľ佺<(9=|F(Qy>o=>e.=.c>=,>>(Bq>.t=h=m>Udp > >`{)b5z+> 3'*c%@L!uS*+>f>t== p>%n={ e>s^=Gz>l=M=ӷ2>TIk=Ծ9>ڡS!0=(\t=t==2Ux=4>p=Cy=ݺ.a>a >5Z,D>=vMO=n09?,5>Yj> =1 =I=c. ʸ*X=*+)>[h>@I>=jeݽǾh>w /n>4_uMR><׼Z=_=ݫ>0m; Ѽ{<6>u=C5=;.@=;i̼=>ɾo>J BUʽ="d=j==h0e96>|=4O>=Yiw><˽w>п 0=&I>ޕÝ޽===>c@=⺠u;=x=|z"ϣSֽNR_>>@G>D=P} R>dp+g>@#>c;=WS>>PGtF=#>‰>gFl}ٔ=D">Vn:>S>Մ$>AL>_þӶ=H=V92ܹiNՄ?=?Ω=Jkƽ Q啾w݁=] ۽@>sS*vkJ{>j:b?yK= ivW5x>':PO>婾ك>WWPU>(a<>:mE;Gr=r>-=d>-Z=q=7y>8ࣽ/0> >%gw+>vSҫj>>)>5־3></=|=''#<~;z^x1#>6=?=J6 >7]`۽|/>ü VRs%?m+<⽉+=L=(뱽6XU>$?-C7T41Q2Ғ=䂻>F>=v4{drVEs!eW>=ֽ W=>>hX=#>xC,==1=,=aB==߽=D_]=u>M@Q{>E>p$>F=Wz=k=bFP4SYJ}^ 0 >YϽz;=_=OO==H@g3r>X3= G>Ҙ>3ٔ>jѽ# >l֩8>uS\=>‚>(Њٶ=e;򹯽J>?NCi$b>÷>i~>Gx=zl=zE:K_)\=>J F=}>Ah><TT 0>Q˼iUK=X >6٠p(<^x½m=RY a7P >!bW\==Q>)_>庼py>`B>)>L`q:(=$w>9&>罻>5RNPrŒU.2~>sýў==Q_>ڿ=8\W氾V'= =~fP>}>@q ,=n(ly=O6>~_=Ct>s>ž6C/z?>̾;6+Æ=צ=T¾%>"n=>'b?b_>S5>KP=+==6w~'>8ŧ>|:VC=k8>W=->U=Eɲn(ʧ=M=>Ό>LTp>C=8>v.\O>wX;>1VZ>Rzx>׽:rz>A=>rUC=78Rȼf<h=}C> z=PG`k,&=9>!u>$>5g=q>m=:5&7ov)g#ݰ=Ӹ=&8,>n=ݐc9p;)>q>=ވS0>S:=d.ʼxA60ϫ= ]<5=Y>Z>WX=9K(>b>(ICK>pө>!>(A=_^=R5νѓ=n=VTK\=>?/>Jb>ڽ1>ȓ>=p^=[>X=s=yYlPDQ^_.*S>*߀=~7 >S +齈揽:>ZM<ľI)kl2h>a=_&I{ށ>=LutT>=wJ>`Q3PF>c=*_>= =v5r胾Le$3>N[X׽r>Ң<2=5f> >x$@> XDINоe=R#=})>n;<=g?!<F{Dk]<н>>\@.>$L>҈{̫>>">KZ=:;<Un| EOh\d?fپ5>}4O> 󦽑Ž&}=W>>ڔ>*k(M?H$>==mw=⟾N)Cȏ=f-4[}>N.=^N>F^U< =y޷l[>U& W?Q=Ks>dߐ=g;}>~NX=1[=)]Lok=f.zF>Z >FB> >|(>$᪼ )W{A@>8=ýt32a=<A>D)>Ծ>N>?/ =-Drٰ>Ṅv>4soM&>N|==7>xl!>,[c>> 7 >;zx3%$>u^>n% 9=v>Ff]?>i=B4u=}>{M˾;MA0U,>s^>-:w/ag=9> =0d,=$%=4y>vL =dpo>?.>n=[7||>ߜ=~wн-> >"=3,>:*><_=$=͸E=={+=l=} =G*[~a->x=1#>D>>󼿾%M=L=(> r=&`y > d4>U V>u4@4>d>ʜ-u/?;;=i+> %=>4 >U>=J=(=K6 ,$^l>쁽 pFk ;ŽTi>='ƾԨ˽\ ?KA>gZ=Ɂj>">8c=~t=nf>Üс>{=;=1'=&%>+G/HʣvӼeXr=a(>^R=Tɾb;=9E-ϓg>bgZ?>쐠b>>u0Љ0[v:(a>\ = ɼ.ž}>={G .Dg9=9LK>΄۽>U>x>_ =@>,bT= >~=FI>-K¾4~ s0sl6B=w-ׄWy=K=<ӽ{>0;BHe!=G>^yi м89]eI꽶Ʀ>zU>3Ig~X>]=rF=>p7(<$>G.aIhĸE!0<H>О䤍CeN>-"G=VKP^< >> >h Y'3=*>Io|!Z>^> ѼP&P V=u֒3>e:Mh)>\H=<*=I=4J?4x.= ׼5~+=׾{y>iS<>;ؑ>.P>œkH-,N>_= Zȳ[# +>ͺ>̼H>͛J=plܽ`#8>Ⱦ4 k{=8>Eg9>=ʂw>̨uX񄽧=>>ɪ?Y%#>0Q6mD>uNc3>t=7?¾ +DW>n="~B޼Q>yw3f>>ЎG<aW[qM=s===?>:,>8"f< >bC>>;>=硨<~]Vk=P"S& ,j<>' FzLd=[Ӿ&;[K\l%=섰k=.>U7U'=_ 8>(=ܘw>=W\>=VV=ʒ>Lb<_mCkYJ=;?=k>>=I*}=d=;/F=1=νŮ>gY>u>hr=AD>]G_=J 6)Ξ?G=cPF<*(>{W,ss=9]y]>o> 2-ǫB@><ႻT>W>>rj,7<?+=HI ":[;>3>O./ro=xZ#'>;S1Q=S:=++o>h>s {L Υ /@<(9f/QJ >@>È,^>.ɬ;>fqѾC>G(zI =8=^ӏ>R> ?N-=Z$?>X>4<,}=x= VڽŇ=|1>n>u3j&?HH 2>:>/Hc>KN>Z=kaa=,᧾5=h7>^.N {=|g¬ݲCv M=UR>]=Kξ->bl+hO>b>߽`=S>t;X>6>g>|=D>ID=|>ٟkm!@< h>9ie<:> z UG>:&= >2D= P=t>>Mݣ>/u'6!=&>=/>콝µ=>꨽lt]>R^=ݗڕU͍8>Иh>^ ?, >R>V˻O ?tK6?k>R=æH6C=s|թT?.>_=U$W>Bv>b=w"=~d>74<>=Ӿ >f"l\==>`Av=s=LC>~73Mb==1 Ug>Iլ+^eѠ.>y=Q>vҽŒ>崊<[=>Mh>9|b=ƴ=;eG>cb=!i>@a?P>w?7=G&d;-{e:׽{=~DC`0=6G뭽jԤ> mvCj=%?h<Á ze#Is=lM<>G[> Kl+>r=!>`>#='{>|w4>ѾĎQ>=ٓ *~&iEim==CŻ>Ba=R >u+0=Jr=v%肔$><:<ޛ>-K}%>sz'ܽ<俽X[j&=25|=Z>^> rO>CϼS=t,>;>V\h=c> };o?eA>;m=ډ=۽>o27=s?=>@hM;c׎J5=_ؽ^B>V K'>4tȽ=>{W>>==c>$ɭ=վ$پF=۽>YXƽ=7~c>K!=(;&>xp?D=]]=<[<}vV= 4 '0>n**DO~3Fo#>>H>B>7"?JUX=,d\br>C>3@=୾]O!=nN5=̙>\ubZz9>K=* =Tx/>| e>=R>GDP̾=|&(=a<bjƾ~C>=MwȼF >@N½f绾-׾D<40> f<*߽>JU> i=z?>] =R>aHݿ=,3n-`ӾY a8r>O?>5M+> += >ak0@< >tǾr5=@d>6U>kP>< >&Mr>~h&?kL>,&S=;c\|C=Ψl}=̾$5>ݽBm=$->X7a/F>Ľ=c3>=n>OJ=Ӕ>$g>sx>lvr=P=k5S(B<7$<Bܓf>>ݔÉ>oX> !x!=&r>F.,>/3>Fjd>ъ7:&Ѿۀ{=UR>>AU>QKU=>>=]S>eJ1޾R=@u=I]{t>Q>'=}TZE>[=sR>O>N`>u&o9=,+=;V F>0?>j:roǾk?GU>F>Rn>&N=9D3k'>н >C=5>0a>G|!>20z=X*\Z׽O=P*A>p.ͽS =c>7)C=J:>A@=$ngӾ5j=ڍ=hk%Ͼ>AӚ>f}=>[y>6@dN=0W=_>N;>x wl=۸7f=jaܽ? >jy>B79 3bfc8>aw=˘`y>tu0>L>۟>&=Xٷ"fbRxQ\>Pg>>+V>S"{=.(/d>gur>h|>t@٠B v<|>1z!=R==6cX;?y ">`l,8e >re> >=qbVͼ=j1+.B r1KB>/̽w#}vSU=P!>X=>Y-P=|#=h> kX=n4T^e>cx71vա<ĝg>2==|L>z >R==G˽J =j$:$gW߀ow><}>dkWn:xb-.p=vL+^l)>PԤ>2Y>F|>a@=1ן|3ýz>9;>06=CƽBy߾*l>_ >۽t(e{5E=U Tc0=pk9>P8B|lLdP3><0 >Eh4Z> >F5>@ؽ4m>ǒ=I >Ӌ=Wr>p>km<,!>{@ "$Ľ̽v>=s=+>'>\r>'K|>=+=%[a<퐽7轷=~/k8=1d>~=2 ͼH :1y<7i>>PN轨ml8O>BsQ>kﯽ1Q?>Oh'_C>G>ݽ+=0~>ah>EL@ֽNpP;̽"H=,>@5m֡;y,<>۽}i>׾s >y0=J=vy?8>p>%.Hov0+!йnT>,=t>o(>$Ľ><4$>}>>>k=ᚯ>4h>>uK>L>ma>2{=9=X@G>I _*R``f!e$>j6>G>P=_k=>tO>1kAG> >'A>9 EZx`W>Ma<,xPJ">f=:K>1Ei>{ֻ릚4>"s=MA=PƼzq@ݼj=2>Vj{=F> >EБ>mm佾p=p={lԽk>I}.VR#i꼡#=[Q> O]O Ҕ=ؙ= 戼9dDbF=fU½]ؤ>~׾o=l>kDx=Eýf*=< >C=*]>wM}_>y۫Ƚ=o>婼=>=C-V>LM>O=S*;j=)Z:!x5:Q>;cC7>">gY/-j^<좘Y9<ʾNU>>it&ZeSJ>J'^]Nq=Q>{/=e^>8W$ 2JAiA: U")< Q|,W׾ώ)ۜ=P>*>+>jTDt">yA>>j\$= >d=d>e>)-=^aw=Y\(O>>Z=\AŽ>F=e X$>{Xm>1"<2=j"T= +Q>,>[H@>>y:G>0L>_>_,>rp41k5o<>!>E@9PP?e=Hb&.ؼV]a=Tayx>_L>t˷7:>`ƾEsc>>='=[揆=8T`>C=aw="EA<T >y>C K7,j_t=_>A^=9U=^>#>݌>|=۽6[z>&4'n>g N>/8>dP=Ec>'=Gv&>'>-@e<*s4=ŗ>j ?L8&=2>>>> ]4;=ؼ>=;ؽ84Gm=JĞ4#3˽nf>DWn>\">L>'K@=}<d{ZӾ= L>H=*N>}=>>*>}( >ĵ>U">1>.QLۥ=἖>]hQd }uwy> Խ*> y /D=.@=uOƒ/^1>* l;>t>F'Ǧg>x9>oGj=;M[SEg>M@WQڧ>U$&=IL>8Ht>ڇ>>8 !>O ޒ&>!!>uz >&'>˯P>Yt >ْ=T<ɲ#oH w7Z>H# t90@=;S>Z̈J>L>$+;>`Z<5oB{d\O"ͽp>ѾAӽ=M<[4[.>ܭ͌>>iBg=p>Y1M~=:>X+x>.4,>(a(sLC">/UDeN>~<}>->>:'=>UPAgXSQAJI>l`叼8Ƚ޽/;7R>vh<ҽU?Mq <K>G==>K8ɶ L>n덽^  u ~fu>q?\LM̞|>_т =3"=T׋sl=1=HRVaA =YԓD>k=c@4>U >;>.̝=4ޘ> `7j==TϾ5=rWz[>M>gE>?=r=ZI=77/#'> >Y΋> >?PX>>Rk;ѽ݋$=/F>l=ŷ>Zn>Scq"=;<|}=u\>p=I=lBrɹ=lA =o'Ϩzv>8<(V/>1f=>?>:=V>{|=p#>,l{>QwՔq>5Bh$%>YǾR$'%؍>>=p@>`vf=.{*>U =a Lw_=s]=v=; w>8=f=P%QὊV=>T1=P>>y \>=]>>|> V >M;<~kc]>r.>D=I>od>ղ۽ >MXGΦ vJ Z8>2jG=-1Y>N$=J>n U'>rz=W^o=97r>FЌe>~-=Mmt_>L_ȾzH#< Y=ߚ>p>B=wv%1l=G,=>$>q>1Ǭ>bx*=`KV=jܽg>85(~=lv`<><=t@ jgfIT=vf=0 T0j3 >=}Z>vT$>>j>=ѽ;uc><"===q=1>."9<ԏUa?>;=ő>"&>*=v n R>^u<1 q>S^L"b"V7ǟ~F=j>7>ǃ'>vO8$oz=z>'4_ms=="a@A>X> oHGs<>r>X#=ʦ=ZPֽGQܽzUI&U3>+VsP3>bUrl[.S>ż&mxs,E;ٽ=}K=;>K̽nk5L,|%bw=ɆB>, ?a=>J&>k*I;0>Ss&' PºI)=>hN=hO=ʺ=H%= >= Q{>Dl?̾=JH=iݽ̷;<>N=k>DWFN;'>dƇ8C)=NvK>$N_%ǽ>{U=1>2KA C >c>y=׮ۻ<8>I~ϼ Z>&=K^>pm7K&=jR;~~>w=[ُ; m=oQ=#&>VYFYl$>J i>_ A >zFK>][( >=oP=Z(-L;GӼꖻw#LvT>ʥ`C>'6xq^e7Dpt= >xB>6Zp==bk5s'6>=A+ >@ĽE1%>y=P\ f3醾%>\>>箒ppz=kG>G>$=O=H&=$c>)~<>o= wY==m>AbRo<^<½pZ>->.W;8>),>6># >84v9=n=_?Z<[jZ==7C=8k?ֻ&>yJz==Q= >^sk+-=L>Og>=i+˂m=g=\5>cPb>WM+1@ܽ == Ӿd=\:*>;<&>6;DL䝽#== =Y&O>+>&dǾڽIv=qO,=/:w*=#C F᤿|[->[I&;-&>朽3Q<"d>蜾B;>LC>O=>>xfiMa?>HQQ.< 9_ 3v<3d>s>R=FREq>G \>i.f>Ӓ`>>Vb`8&Ɣn>";vpξP>@>o 1O0S/=s21\b)"A=&rA|>4;>Q>޶*> *>=,㽷$<: PM>l9==Pe>nE>Eth"4>c=*mN"I==[;<'=tE =>>⪽;_e<B==fL6=đؽ`=f5=^>Qp>x>8L>>f Ysj>^9ۼ푽D(=ګ<`lkH=1NJqqݒaJM#E} >ν+ ..>F/>&M6{>H=?X>ICo>/>V.==2>>6k[.\ѽ.<P޽;= J㽝[Pj5j=,XнH'=vFHL&ZLK> ۟d(>[T(=[vSV4`ξyC>] ;E=HCc=b;|ct9=nཿ=q=|~<< >ES=M26< $H>> =}2i>X ޾b%ٽ4w3>*=!>낾u-=%̽.%>{:>?[>g2==-@/>lh >@=T=u;>\>[4<2U>f=*f=ԁ>Xͽq NJ04=_5=S>`>s>1d=,ʽB+>>_Za>jp=dp.ӏ,<* =x>tfׅ>{>6$}ѽ9ɹ=%=L0 }@\Lg>+_u>=a.>#YId&7p>xݽkdlRDڗIF>h>͚]>ٝ>5UuUͽ,o/=7:>=pР+ֽP>yZ>[>@8L=9I2H =:ýPUu>;zx(G>[5>r[UU>Ndf=x(<!⼉e՜=y>=u=-r>3 $P =ἶ#ZU=̲>=e =Z >x=G"c=D#v>m>I`:>آ7=0>^0->kj>=J-=b = >>ڹl>b>I>^߽퍾~K==@c7a:>X>ܛ䮜=KZ>W3ԅ=>"e>= =⣲=^-ɪX==rb>wQ=>J`lkx.a-=Q=>2`WQڽӾo: >/> .=Qʿ> >!yUO<=01=Yt>K\1>2x>5u<2DT2Jp=ѩ_> =U`n齃v;"5O> > d]]սtȊ>L=+0E5>P#=6B= *o)<2e=ǽ>0J=9PC//tx`>|';LjFbf&>1{=G>,o&*>>Nu)}:^>T>Z'U>>r=9C>u< Sҽ7;`[ء~ycž]X0K>3=03>Yf>>ÿ-B<&U,z ?(>u=Uf>j]=USe $>*>a>vh=*JOf=<.l7?-<=.i\W>Ȭ>kV=!'^A>ʽ4C >lyƾ>>|̾;U8eR=.= 6>/,x>Ռdu>Vμ%>!E>G<3>9Vښ=>?f>E?Ry<>;>Q>EH>l˽nKv,q>@A==L==)&>f=Ƀ?U=>ߔv> >mSXf FX< =e;:h(34>_>[žpF>\B t@1>3ǚ=,>y >/?c>a<>>ͩ=6K>\>=N|:V)̻ؽ0==P<9:c>ajJf=ܿƾhȾS>V2>i>&_:L41>r>Lc><->a>M5=h=J>oz>$T>e5v]O˼շ>VUPѽ崇Iڽ~C>F`^5P=v?`>>\ HB8'UV)\>tL^1hɽ"i_D l>zBTCnm>`ξr>@6=uν-~$GR==ʀR?Y+>\r>c>(Ҽ>+ϓ8>=>U=f;J](d>=->U>o+8q>>H?>"g=.=i=qҽy+R=;-0<=x>^>]>!> =W$ qxS>$P>>=hƻ }e<<&d"?!ݼS%G>=a=>]!:>>>n^Yq=۽W>>fs?>=u,;>c< C$mS=r>١>>+==>dHRO>}Stľb&gU=*.`b6,4$,\q(>d>n53r>|#pzN>r ?: hŭ>2z>+==?>y 5ؾ]-?=ڼ*H>>OgX=@Z>9`;=_ k=aw=Co>#&F<+=Uw5Ȋ>>>c>3 ? >$W=dE;=׽6i/5==Lo~ٽb V>0\`>uH%=>-kA>>ŇU|<;*=L\=K4=3 > ?>DHڐ,>=g!=cfK#= ̤K>=is >H >xUmϻc3>e<;>>>ZI:=Խ|=y<.줽/ľs4<^j<]°= ;<)e3=>Q=y<:/+gB\Cײ.=kL=Rls&t>>Fg=)>E>(HO/=dսb=.w>喼>3a>o[C='">,.>~?THq&-Z>!iM?W>/$lR G>s >cwC">=¾F/߽rfgZپs=s+9s=B>>ŻH; 6I>EK¾d>>v=ٞb>hw!>أ=>PD ;R= =\=LN2_>uۆ>d[~s-==* rhT>hZC<: I>u=] ^>NM@=PD>X= P=c>,f,F><@S>hU㽢ӀK=o=>O< !>:=0Y>[ >bI>&K>jB >=ʾ >Me>$T}=5'﫻~gֽS޽r1P۞/@>L=}=R?! '>C>)C=TC$XedD>۽>L8>7{r=m5Bs> ٬ũ=]=^= &\>G_FW=oD=`n=RAs=p>P=S(`=D6>> >j>=*4>\bȽ&=>u=+=l>p=9>Af>4=.>U㽦=M>Z>?>=vX!=Q >oϾ,<ソ==t$>>B+>:ý3ѕ##♾*= ھz(g:wѾbBto'B_=>׭R |ֽ[%|>I=h=Y0>k=ҋ=^$=0?U72hإ==$o =3ž<CϾ,>h>_S>8 %cV>M>aj=7u>>Ҿ<[ >q= =t">= 2٭=«w=>Z>t>u>[t>G=0J >‘><,<0@ n>gl; JQϣ)@>8>Z= =Д>W!$ >Q>B>|` >E}>s脻Yǽ />Id=֫S>{ľrN=*R>M\>z>/bPH >x5>p;]q>cp> =G"b> S6P=FRǼL>8=k =FP`.~ p̭>oC8 vK=ͽLټk? p.>1|>4N׆=H۽"=Kf.>8?&=K>dK !OP=ꃾoŽkM>B2=VU>9н^->{=t> k> Z=LVLi;t>>%v <@>ȣ9jc?"󭽽"=24<{o̩4=8/>O< >k>q0>5V=u:>Ld:>^7?<9 v>K)>4>6>~6>+kV˾ Vٽز;YԼ{tN>>=`&> P=>~$4=΢U&Y- MfztRǽ}=|=ʏ`>V=h8F=E.MY=Q=[^>P@ɼsC>}ؗ>kay߽WpI=P"=g>=q|=Mz>{l+>Ž{3 >x U+ǛF>-Ƚգe==O>}Խ ) ޾>d;+m=g[=}y[>3<@2<\'M>XMuȼ|Th=O]yP%mVk>3&"8Q[>侇W==΍>=M>=&>&z>\Y&=qadCs>56=$Լ._t=E>W ;=`=fݽͣ7>C=lE0-sqx2> prxwƽ-={;F5^?6g>g;}>ý=×>s=Ӊ'+sF>H<(+9>;Ϟ>PDt1 +=&Q,׼8$1K:/<|@w=t=k=M=>C9Ռ>0>~ Eҽb=B7P=d=/X=,5$ɦ̞=Ų`8>U@@><\t>ny5~ =_ -=[G~9Dн0 ;Lꄾ8>9V>~5% >PFA;hr徰PV>>=,8diߍ2DU<,1=ٻ֔=p4> >&|4$=u<`=N.J=d:=( W=`؛=b;}cm߽mR5>p>=[йPD>0CN= ?l =5>,=,B|>ͱ@Qε>:4>uнW@tq=b=X]x>vj׽=)|Äܯ6='>~Wm!=(cV.=Yp;FEڽƾ,PH~`&ZEUʼ S5n#=]V> ;>}=4=3 &>羢Wd=5 >=q->u2<=uH"B=3>m6<="7>ZM>Ѽ>}s >IF*4>=ľ"\&c>OvR>`cL>Ә3q:! >y 0<]U>k~==gp=#^ i(I"&<<@ZcM>V>M,+=J}X=!c=B=[;=)=a>08>:y3>^gƃއE>e>T< 5<*>>Ng+ܩ=N)>=Bнד> =G@U>$*8䓽)^qq(>=>X=wV(>t)><(B=|3 >Hqa{B=P3>ݙ=Qu;LK=Tk>U{==%G>.C= wA =/;Ȏb :e8ξ⃾4Ոev>?Ⱦѽl=̛; =>tF>&!>f">|>=˽Zx>a#"=hA@D<Ť>j&> S=Ľ="=Ǻ=Y~M%5=qmRC@=R軫XTG>>'Ab>2Yj<#ϽÕ=>">V6z  k=i;F< %n|<|>suK=?=>9=d=f0et;"n5PȽYD$>UO'(i:>H=G½gu\5=Uj=6=Ki=<ܽ6=L*ui=/l8>J]=qN>4ip>GԨݼ4(#ȟW2>E[>au7>\RK9=QV\(>Q[4q<%<ȕI>h`7'#7[5*M/|>1sF=L>=0?<$=o=;B >$ٽ_ܽ=[½*c2>~}>̧콱=ǽ{=>=(F,X?bбӚ`x>U{Z= f=e3>,Tֻ{Y4>լP혁>1,=]><{=:P=k5SI=X=ٷ2u>=e{"=%rF>A-D= >z:><7>;{q3罠 i)>>!p={>꽷==q>\6>=S`>'xۚ=U#A= R\综e<!: -Kb=j>̽q WȾP->Q}xr=[=0L> >e>њu[<{nH>ԇ>dyH><֢==q= ~=> I >|r<;>z<>.:=_r>֤>a>UǼN>&}R<J:O=Ci=0c>nv%>vKּQz$}Sݐ=H=E>H =]sżR=ND=٧>n> Vx--~>L>\0`u>=>DP>x= )>n&=jm8MM s=&h3Pм>o\GL>LԽ7H=;.?;ٳ>SA;D; >"qҺpRĒ=`z5>?<нu>U>x=a!%4Q=><9=Q<::=+=m>wQ -QC=#W-8==z= b> u=gx=>>>g |>=]>>&V>'` Y>\帽Yܽ_v8yq= o>P=F>[=l>O,$U\>;FS>[>!i= %Y A`>yYy=J=9>3^="~Q>>nk=dkν/~='>_(1~=<ѵ==У=Zfd>>[,>T=Ƭ+X/6/TT4>T<Ν > >/=UQ= U>}ؽu>>81ʙB=p2m?8 Xƣe=,,>&tm_)s p={<ؽ <!#=λ ,*<ٻl,x;ՑLd8=+W>=)7u=Ũ>C>: 7>=>_=i>D@4S> c=x3%g`i>\7M= p>A/ı=ŹJ;Om oe&=!Ѽ%:Ͼ;>BKSiO>C[}a#9>p7>>>Y#ER :=KTx־M=Hl^<>'T -A==%V <=0 q^>>.v>D]h<=eɽ˪=g:∏%auٲ =p<<>:2h@~=¾P>{{s=>Tk>lоt>>Ž!=j9p)>Ωx0=xl>nO=>t=4>">2>LK=zBZ>s? _؃=6=>Ѝg꘾=U/>[V>Pfk>ٕн>J;_=`>ϽWjpw=j=*=/Ϯ׾}>-=D=^ͽְ=<^',w¾)錽ښq0=U>=P>+P;`7:>=;_p@\y`ǽ>&PP>|,=˙=g'=go>Gcq'+>$hh >>u]/=@zCW > ? ;ԧa=ɸ= <0W>؃2=N>==cA^ s=doz=f=N>'T#>J=G=!>fDؽLu.%}qu\<̏*g<N>3cv4<=NmO}=M=<)hv.=C=h>g=kk>g>>ip/>=e1\>ǽ=յ<]Q>sIDw;>~>rH:ou)|Q=0=>OcҘϽ޽aL=$M=L9Қj >w/GK9S҃=>~wԽB>.>,=>˼>_>dk +>z5Gp`ϭ>],>m=@lB@"pe>z68B>a>> ;@k=͜Ͼ\wtx>[$pZ>;c6ǽ;>u\]:8>\q=3>wz>|>+OS>݉=Ԕ=><8-pA>轾?N7>ž ng2?2j@/=<`>0=7_=5^>ҽK><̼2=CwqM?>&< _ b[6Z==Bk<>04<"}0B=<6_>uE*>т>ft,ؾbj>o>cm >Z?<4Z=]y.x=L$>Wk=as>b~^<@y=Jؽ m#><[>C=G <<ڶ;5>'ugؘJ3r#>f;-v> >08>8y9;=⦅ l퟽=PJ3\>r=\~>"x>Yf>Td!={ǯy<,=J߫=@2M8,>5>~=M<2+H>Xq'>X=漘оBK>Ƿ6?cat<#>2vNjl=b>>M>yC L &![$a>=4 ܍=#@,>S,ὓo]=;S;f#B>f~=7;CzzӇ>g=1-Z:,[3Ύ>4OؽJk<б+L<S@>; A=u>hV>,C= <<$=>OVU&@bNn>&i<H$==>N>@! =n>!J:w= L0f3DieyN>u{=d >Z̳>I>1M>MT=~>>ǽW=!>pA>Q$Jü=gr>)Է= >S=M#нդ[6=N>2%=MFR>bJ>7T"#>1Yo[>v='߽i=};D>l[%=Q(>@==/Ԟ>~=ݚ~.=Gm19[(i;^hH_>uN> =Tb>7;> C<=p>F6>PT;>Hg23=5=uwX=葾٦=<ʅidWz<>d=t{%R>+KхW̅W۽,HAǾ}7>鶓>,F֚Ѽ{t=A2=ɾR>8 (٥=`A¾]L#]lvڽ;>C Fs[=[!vD>oj>n5L>NWx,=V=f׭=O |=dP=%aST=叜w(>@=5~>M=i=yy꽪x3>!;qGX#$2hpx>J žŀ0>2[/Lْ㿽$;Rľ|Z&:DWF>a(.ϽGZ!>nԽ>XӾA>;=%`ћ =[<2ľ =pͤ><_5%=3:=q"=?=+=G,>;d>%z=P]q >l2;=?̥=i>=ts{=ma?>={(=~(խ'=\1~=*.G>.$>,ӽQ;u*x$>E] >p >>Pz= ?D *t>M,6L : M׿Sؽ>lߝ<]h>"c9>4Q>`>?>=4=>}Y@>He&Oh2Er>ػ4fY`>>Y(>^>)>N>pYO>Nf=<:1>TE=U]뻴KWj>2r=֡כ=NϽ_P>>=>ͫ #'<f. ~ʽd=TsrmRh⽳bP<d.(WJy=U>(=cu>v>ּ<.d>彞Z>*G<ؽ>'>)D!>qN&=&1 #A}=:׽>=;">)/=>"= ! ?*==ܭ\kk>x>4a>^h<I>W>=D:>{=Tq-=Vo>Rgw>>f=$=k~`=)=㮾,}-jļB+>;A< >*>P=T<< vR>໓]\Cwý=_4=dP>>{/"b𳄽=^>pw~oSM=c+ȍ:)b ?@">q='f=Zc턫,>3h>0>,á;xe'>ς⍏>W=$d4 ]C^Jžp$>,˾y|3 Œk'>J\<5w/Y8ů4;=)3>JAJ=$qa@T>VԀ=,>X9>(~GD=aрj\=C>>>>c>>%"虛p >S&JV>dx=RS==L><>E!n=8>BZ\;=6=S$>lҾ@w><9gl=)>zj#>z鸑XO<r V+=<Fu<0>ci>>)=B=fI=E߲1=3= >T%&?C=ы=a=載:ľz=o >>6)>.>j~>>_َLŽ+=Xێ>|BMC>G7nSv=.&>#$Qvnͦ> >=0@->64>jq<3=. -6?>y$ d&>]<8̽͹=~`"D`HB=@N=b麍I6Myp>U>d]m>JH1fX6q>k~=eD<$vR>q{=>ހ><Ғ,lL>H>k-h=vI>2>D Ǹ>G>\>=V\=dꭾ9{6vi>Y>!]оx>ͽQ)>㋽>&Q=.;8[kͼ6ݝ>̂+>i](,엽@A>5S>L3=#|=%;>B=8A=(3]TzPa>=A<<>Jʘ`͘Ǽ-+<>Y>!>Mx->w0>>tՕy =8'>5=s=PU>r>喽(U>@,>v=>O==&"#(>}@=[b;>U8Kq>Mx<ҽI>Dž=w=V>=a>\!q6½ֻK<ؾ(=HG>k>֮l= ,~:w$})=m=^wO 8>ځ>3ZMA>4kKԗ8Yn=>;g=|)>-Q> ?qc<=C=x%>=u>>t>?=(,cq9M½O_7>/="} 3>+<9}㾧s<,pCO?z>Γ>^=S>},<ƃ>Փ=ϖ*B5 ?M=#Ⱦ#.9s>jO]oscLؔR==߷O>&dE `I5ꈽ4i==]1C= ? >Tܽ@|=qM>LY=F71AtI>']/>8*>_ >>6TT>dT>!Ȳ2e=Oo>=44ɽO>>=(ۼa=$;5we >'>;gZ>=K==>G>¾>;>i =>>9>R=>ҥ.]= ߳<6=dS>h>Igg>$=K.z<>}<]p;n= mZ3C>ySν,ǃ%=XY>ѻܱ=H|=7D;o;Q"~ =!n<`U>ҁ&]ӑӾ7⳽-I>ݺZ=`=ܞ>B~>>1ჼ˽,vӼkRG<k=E= '* %H>퍾j_'==>˯5g=K>>4b0 R=v>[(<tM=| rU)>;>ľ޾Sgjo<,>A=0=  3sYJ0!>,>}KY >Z=N <(2>YG׃=8*>bI<So3)># >,ͼԽ㵗=>RIH=J =9yw@]D>ϻ[G=j=M>k>=8#=z=5 5>V>R[(^b>\>Yl>)>=2YV}%=< >(3C=!2 E> >beA>* =1GWg=[ < <'̰=ලQ>SZs=hDy =WUMP=!U゚a/@t>z?)%R>M=R:2nR

MA=B>Xp>3>>l$>@=,>8>>3ތ>߄(=>.==C;>K=+(=jGn2L:=-<\从(>Ï ϽlP6 x6>:>el<=ڏX'<̽6>=H>Qa>P>444|@=?n>de=D8;ʸǼ_[>}P>g9> +}=>*x|=.B ۾g>s ]h>p=K=>o=Y]# ^>/wW">I==>^b->*#>=h {7tMgMU&qt>vKiB>d+t8t==mN)>L `m]qjA |<(=?ǾG>_? m=&}`?L.WP^N>1<\a>>;ҋ='= =)p5>7ì =A?>>3y >Bf>=mR=qe>*?e=]V>mҒ>EP־>=@T+>>b9= >= a%7Nv=D=4=Q=g'>/ؚ>0>]|>>>ȍ%|>pSz<+>1%><< >8*Y<><$/ F<W>Я>8=Ƽ6>v17=mّI>ѽ'A>>iyp>9,uL= l,z,j=ӑ=P>16p'󌒾 ?=oK=~=U2EG>Jgi}7w<[>>s<o=T\۝ݽ51y> +=fE=|>%==н#T=Ui㾦Z>tm#>+"(XGqwٽwX C=^0?7μ W>kF>~=;Z搽ꭽk>? >+ɾ̽ c>vaA>\D>UjL=p=~9K>>ċ<Ͻw@n#=`-=^X W>͝;>Л>Y;s<=Et>OGD*=wr=?I>s[w]<@%>==k>}%Y=+vCaml=x>9G?>|= B3>yCQ@>-׌/>y>6e/ 5=iY+PgcK3j4>\OEp{=]ttp=g!T=.7->؏>>N >>$=o$ >N>0MX4>vz >>Q@>I>=AȾK+ZG>=(ID'';)>7슽a <4>Ph>ߍ<>hb>L?<̡>"s I>2؀2 >͸<"6v?/b;ke:=#uS<\BDmtEsjj><>ʼpZ>V= cT->&>mO=6 > f +kO`h+Ǽ6j=Eg>f>vf>#I>(MݽX=_̌!+>n8>C>=<&{f =u:5=!AȽ;z!=N=U:`fie?•=52>a٬==WE>z>ievKT=1A:>̟/SR=ϧgžR=2zr>k&^b2=D<1;u=ksxڣc=1;n=OPQ>?>\ErV< f#=]:0U>ay >惋ۿ=36>)=D>y>_=\p ֽτཏ4>O$.>}u>g>t`k>>?5r[QS>ҽa=u0ܘ>;[Sg;!7f>zXq>` >K%齗>k霽c> h>V?=f6:콿==x%>>!|LQ>?9> gA=6j>@<>S>: x>( D>:[@>>gǹ >Pb=~jsuw=>ͽeNn=-=$=U5>ƱFE>Z>#i_=Kb`=,?[>)l=`G>,o>>I)%'>hƣ=Bpl`>J{>U>Cي>8J=üy컽#;o=v@F&u5>z>`O3k6ǽс<>w=W>r9gGMŽ>u/=Ik&ݾ+IԼ-sGjmP;>YK$>2}=;>{,и:>6xK=tn~rFA>S.I=Q34( <>'=n=><>I>1>Q֢Jn!~=M=h>qb>-*> %D$v td.> ݏ>`?Ў=im:>fh,>S=F=i> ݽ< >>G=ke6=)ýϥ1>Z n->-~W A׽= :>ӾoF=Xe@s=f=HTЉ0>Gvz#!>`/ýTb=XՄݽ:ݾ(ŵsi>Z֫=j>>ʞ>tu>k1$.>OǾі+>f n#[>e>ه:k(g>??N>PbƝ>xQJ<[9>m>gĞE>񌾿 8gͽMDt0?>7<+=|<=">i_<&:>лN>1^l>|{n6C=>_jN>SwN>̾*`)*B>S">7Ž侧ES>>[=۳>ژ޼)>rE޽1 -Cu{==>d kΥ>V>Đ (zӼ0Zu><>~T>־o=RĽa>|:;Ao>_>^>~װI굾ƶkCY޼Uμ!jiNs S$GR= k>k&>||=*:i=G=>>N> o>>jT> 4>ϗ7>sD>͘= >B<սcϽފ|>N{w>?Y$Ӽ_ŋ*=^m>ua*=-<}=%/<=h-=ApN>}>o=-Zv>MZJ>RTBA>.>>= J$.=g>q9Dd|=ܼ1>θe>|t.>> *>2嫾}>Xi=iLBEn@=b؁ý >\>S/'>z=;᩾o{5܁N3>K>nx>=h=7!:>F>7Y3o >#j>89]>(n=@V>v>Cľd #(>lBMt[J;T Mhz b=⿮>X8>'齵`=>a=>=b_Ww>@=d=;XG>2뽰x>]>ԟA>%>G> u>҄1s>W>tUdRirKA;a>_&ĽFJ> .>Ar6of>>KPZ>սEcо(bU Vr>4>v> =J6=A>X;=`X=>m!x>Y=,!L[;Np&>W@t7->o-31|=Z{^h\>c=>Y>I>W>c)YTK&ZU-,Ns.=@)=NU< S4>]=Z>!߽5>Nh̒==:D5* i&fl=BNtv'/ H%n{ս >#R=P=)I"G=>,=S>@?=Lkо7XV>>Aq>`?uGb#@s=׽+\. j+>Ε<=a)=N޼?%=;(L4x<D\>b>^˷=#p>.>="e}>Yy="]6Fž!ɛG=A;ECw=6=Z>:=};=_c='@l *=v즽=Ħy=>N D.D`K<bdy2>֕in >LC=I{cO=1̕=)e:W>==V>q==V㐄>$B5>,=A:9q=(Ryt)F===>,i >8?K<0(2P>6X=N>ji]Ľi=;̾=na=Ҷ>>>o92< %B>3>ֽ0ۂU>ؾ$C%CM>=L l`-!-=: -<㌓>=VFά<,͜=㺩<*gzc>LZ>M>K>Q=ZY=>=սݑ>=O40^>Z=̀9>V>>j>=J{DHhc=> >k{=e,=PfO[5kaӽ@7>J-PZo"dUiۼ.{JK=g]o#ͤ>uluFΙ;D>xy >H&֌>=[D>=G sT>a4LB>̝ =07>ŐrⅾN3>Ti>OPP =xH=c)9>[U=}2J ۡ>L tIԽ ~>8>d_:>.% 澠Zvɽ=`w<>Q> -罈b9~vF>j0o½Nw:/saJ@>w>-\ݻWc@be{ľwC=R̽FL< K6]>>=.%yhJ<S>s!/6BGTF\C>Ec-|ھbs_>7Y>t>#<ͺ= [{=S=>)3=6X#>=$=<{m>g>Pa>bž&3=]uz)S˻=9 "=&b=)4;ͻ½1Xv S=<>GX"@IKMV=:Rн>]>8BWܥ= O<0=<>9Xm:46BּΕX->p>=B7L>rY>V>^>>vlt`u>"u>c=q75ʾ-k7o=2G5AEY\=r=ߖ>dq>8^T>&M@Ʈfmn'݁=B5$>H P w=C>&Ž<"BFlϊ>毙=>ML>ܽ>K8Q<ؽ?4=z]= f>3{D2upQ=HJ==$͂\z*[! >VtB>Fҽ) =&;>A=>l:Q>:gm)ľDo-QXF=;ɽ8=>R>=l>⼏ȾYL=bd;=bι>zEdQcԋc<]#=~\' ']oE=%>وV<<>@S>rG>h=Sr>Ⱦ8;x>Shy=bO@9>\V=M?<;7wվ6l==6q>>+z>N%>*>j20Mp냾|̾5$Cn=sê=E(ؽU>6<9.=$Jg>3;o۠<)>R=ܱ%>T>M6>=#{  FmJS}=d''`u>nS?=N%XR0˕>r_[>=7W>1Y@DTov@=@>x)8>=J=Z>>>2l@QC>!t>ҿD>=Z*=F0p">+>$w ~=-སZ=f\=A>=s"ʽ^{8]=ѧ+><лyٷ=/bN㵽W o>>e<(̽[`>$9[Q=`N=F t#>w2>F"]8mY>R =>Y>22>=CʽRz>ю>,>=Ҿ߼7w>EQC^^M>ϑ؀Ͼ%>=4>,+*Ⱦn=`>DG;=L=Pϼ؏F=>|쁽ÉC=y:>.[wnHh=T>_t̜_l:=S= Q\==+i>R=?hA:7ɣ B>U= >=Qq=i3:>@ē=jȶ==_>>Ёtf=blE<(> nRe/>=? =qB>@TG>ӟ>T>%Ň>ˉ1ɂzpƎ>zG))>=)lS>1ȿ'K;f=7=|1cN>Z>>-] m>̎-W>tǡ=Tp=VOQSXe0"=m>U>t'R>>C١{.y£z JM[p2=TN2 >@9AWИZJ<BW 7]> o>䆛0ˆ~=Ƥ>@SL=i=K>vPN>&2>@*5½ >ُ=]7>:;Z21>"=g=>{>n>>k>-x>n߆X>3_Hxξ±6 6(UJ!U?)=; Pl,>Y>I>r>=rυCc~z>s=>><7ݞ>~l>TԽ*mE;>b34uZ *)Ӽi<W=sDK>û lL >IX>&W?<<ͼ!&>f14 hF>c>:u"<7U;=8`;>t>;)ý".> =uш,@D^\:Z^D>Ov<#$ߚH>*=Y>=-f>-<&Q=a?'|>#>a=n#>>Q;8=>k?>`Cf>fjtHN;r=i> ] >^oA!w= ̄&B>O A=M$p==13->E>wH>k;M==0<䨑v=t82Bq=wHz8I9%`CۼPOoBlbr >S3>0=_>>=:Vo~`D>?>J"gĽ>N>Ҍ`<,%>=(^> >ӣ==(>G>v o=F>Y<׽ ={$=㑽2{<&=t=8ZA)==!6Xaۨi(>?><>=0 >> h=0>l&>KP> Λ> :8>AM5>o>NA><ѼJ>Pm >yQWp|j=$=Ɏ>5Y,>pr=th=@>nD0=>9>bn>y>}>)x=^uO(> >W=u1 F T > @c[>V1=>f6/BN>@71=sa=պHʢۼ9>b_6?;Mߜ=<ѽg>ʹ=g/P*&n /l>Nl$G=f=)=/;X=?QZ>'Z>ij>G4D#9>>w6" *>^^>pjDvg=0">4b >ׁ<7SоŽǼ25+a>?>2=<>⻸Pm8k=Rʪy|ZDT>D>;{88>qýYO=;j7=:>kʹ="=p( >y=7>k>:>z=2L<;->b0)xh=ۏ.>Ul*j>Ib=]3G>.Gw>`(8G=cW=N=3?-w=I>㒽|A=-2e >TL]=0>e@X2+os >>Wt>+yA{>:{=Cl=/;;e=O ԍ%=15>K >Cv Lf>q="`O">UA#ɝ>pR?^]S{hi1$翼!52>Ӿ]<~<ظ$U?=3=<>iU;OtZFQxb<=t<0꾷=d;4i==.="=R4={vԽHpV>XT>0GZR颀;:>;ʆ>5yԽ&>@=A<3~>MU=Z>8>0s=Mze>q>t>Wm[9kFP=4>;#>Ⱦ=Z>w,=>t*=X'>_O>L=q>6=ZL==4p"cp=Xֽ* 5td>Z>~?_I>S=(*OU>"R2=K$ >7ɽa1>5=|{mIݾ]=<=6=w=܌M"m>tseŽb=X=r=X<ƛF>2J̾e>R> 6:X==.f>#;q`ƨQ=8G =?a><,½ TG}'>G>>J9ﻞՈ䚽>Ec> =0?w>J_>)xJon>>_>[!9u;2P=mwT/>=*3=$t> Z5i+E[=Jq=&9ǃ @" =>>At=VR#>ΣG=tϽb06J5>p5>d>Tj>s>Ͼ~=v=T>/8>Bڏ>GHj>sI>̩=JZC<^/&]C>RX-mD}W<:h@=-罸׽mr=X=׾L?=szIL>TЪ+q>^j/>T>3c>rνu\0==>-7< ڽ 4>;95>(o>z={aPy>cj=yw9Vg\> 3T= DƼƆ=ǔ>iJ> r<)>ûoOS=埾 ^>䐒><?=eWYj>+U>ok%Ȋ!> Ӽp"=tq1>m=0d!>= =3>oiO;j^=>} 4==P()<-=O=NS=.>ܮ>q=Y2.==["&3>+Z=>1 ;^ۍZ 6 >Ԕ-j4q>~>>=B=]&gli> >vVP{d=H >ͣO=ri>#<)ͼ]ϐ$ټ;2g>=&iž,>><.yu=VO҇>>k>7ǭ<9]-c>N[*>7Éa=VԽ>6>}>B3=J=H[ҝ=FD!ZԼ=?<*"<'@^V@,?=v˼5E>5D>>=Lҽ/믾[}w09|M>k3>w1A>Ӽ>s>k >=٭=!MP>[0=8p>.;Suy|>>Lܽ$=$f>;\>Cm:YN;=cJib&}Yٗ>˫[8ֽS=>_>E,<>:M8*>Τ=@>+* A>s;>}4ٞ$ G\$b'.>H.W1;Ks˽>x!> >=#>+ҽbmMZ9#>Mz=྽K(XH>TNU)>j<+z4ł͂=H=G==327W=<1>Q>2`Y>]A:2eb>5)9,#>>;h&6=P~\<Ɋޡ[&>c=x>ożW?_;>;+>X3>1QԽ ̭72܊׺Kps(<_=U>R>ʗ(0>@Ӯ. &=.--=M>jqDyd=|}彳 =P#?LJ>B1>tL*>:==߾5 >^L>G0=vf!-o >Y1̾>R ?]+j >V=9>/dͽRH-vY>a>jv>_?VľF9=,߽0jK=Bxe$$ʞ b> ;9*l==q'=֑>V4}>\wY]S>ളM5>>i>8=i\ꣾa=QL>!SN>>>A f>].K\Q!G_=uDվN>>yo>w>l7Mžpuɗ>`b=maR >"Y/>>/Y>^>,p>Ьw=>dpdK%M>'N=oW2=^$>9_>_R=ɑ>w->W̽W>= =6p->#=8W=& =D36? >oo> m<^=;m^=Ph'M>t>>Z=}V<>ɅK.xB>N'`>o/??ԽEz>O=%>> S)=.+?|>߾urI5?>WƀƎټs>:yls>%Ǚ=MLM>̾ (x>|>E=b>3ߣ>wƾ6r>95(>p>2>6)>8>>>>;!ѕ>2?x>l>.>_>n=rսCE=ߊz(=6(ƚ蕎>Ѩ >䐸=>(&aJ=%'Ѿ4>w?"> 6:#n1ʔAzha>)ԁ>5s m9=V}?;Z>&('4 ``Mb>g!>=d\<˼)=>; X?4=dȽD">==>Yq={>U*>iO=>*Ӫ<8?7 t>x0Ha>VK1>o>{4>P;9нpt==x7=} 辕0\> =>eA孱>Y>=Vl>ī aDqC>63x===5=$=8ܯ"Zr=&_E9iTx vD6ԽHV>?2S=Yu>Ӭ>=!=L"JP˰>JS>8>"W> >1מּ=jg@x 3:=nH>뽱޾:b>~3=߾2ܷkezӽ½N>x/K>=k>>c>]Xt# D}H>jǾZOh(u񙑾C2>d=z<@4۽{ >q>M>iiЉH!<$=N^¥>(QT><>iu8_>=ƾ:+۽Q;>7ng3=\W>r%>:t}8 ?und=/=Yi⾱ɔ>C$>fwM> _>\++t1I:KA>D>{$6Xn/j-=D>-X쾯=_W=P>%¦񽐉X>>0\>ě>=9U ?R'>;=>D >|J=%>uW=ްٽI,V>s>KrDt=|뙽Į(: >_>ߒ=+R9>٩> }+ق yW:FEXʾ^>> G>=z@Uʾ?Ɵ;P= x 6m=%F߾D= >}V=HP>!fN>׭)<>Eb;>U }'=N=7p=7>yK1󏾎)=G=?x[> i^]/>.">E#E1>kFvG^a>l2D>#u=-,0>6> ?>휽lAF=EK{ 1 ?SB>dt=$== ->Z>oоo>16 =3{?] >>m>9*=?=ˋ< ]>K);S>hB=">E> <(3<*;!>>1>?#>m>=99V ྅?=~?dB?/=ٽZjR>m=̽hiWw;>sI>t˽ ]V>>j҉>]R@:=~T=tɐ=c<&>3>nv`#4=1>C=iF>/ >a{fLG=q>e)>Eھ3>DH<2=1>P>8{;%9{+ ?2d>Q}>qOjKٝ>Pծ4>3r=J=D%={ =Ƥa>]YLvB b?轣Y1/$< >{=!>~Z9> B>kx=a>%c<>]E=E=ҵ>iq>y>U:ℽDf= Ԡ>DU>42߷č=K>:>NɃ>N>?># Q>^O8_/ؾÄW0ɽ9>)c=Q]=>nR=>At6T(,0>$1퐜<1wS/>*@>>jʽWF=>O$;VH>͙>%Ct2<>r>#>xZ]X><=_=[}=5Z> !=7>.k2?ٻ^}=<(=>S= o 'u.4ELEX>>,%*&þ>D=S\7G=3f==>n>񭷽OBˆ>=qwf/M>W h>\31\P_< =1>MRQ>tP=-{H>>3ZB =>=e[>i (>K>6>◾wPveb+?<+p,=AW]=☰={lN=XKo ?ȧ3=ԿH>z>§=W>Q ?/G>!> =O= N`8=o);g<" +<+i>\>;7V>[>J?(Ψ=O;[_s1jVH;į\:!Ѿ7i>4!> >ν=siw>)~=`>XΓ=޽`> >QU=ZTŬ=B>x>q8>Cƃ=]<{>; 軽"!Y<[< p t<5ᄉ2>bc=¾Xpy$#>`a>0E9'G>j> 1= Ȁ>L= x>D_>Q;>=M*PgȠr>=Ev羡{2>l~>=0= ǽ>_=&%>s> =G=>=p>vMc8? *?>j>,a?T=෈0> (D>(@>֧#O<> |;n@>Z0d ?87m0>@}ƾzM8>>Mf>|}GZ>I=W18=fu]=^U>``>?>u=@[?A>QXY>0>=>>O?$>R$K]xG$ܽiB?>ءA=p>=haK >b> >*=<%) 9[j>0v]= >Dש;p:>uyS>?>R>LP%Ό ?t> >>">L=B/:>3jZ g&>IgH>E#ݭ> >5a>m=>T=l`=xd߾ʍEG~Ž%4UU>niڣ=9,1>,>DtK*?-W Bg!]>sI>@%;>1ԧ=-=zC,1f.W3=ZQrmV?l>o<#%C` =/T>#&60>K)>X ?t?۔*>e܂>)>$="==*o>?=ui:8:Ol厾%>0BHW=%ޭh> >e>fc7>1值0>Ǽ!bs<0)=Ϸ=Wő>>;3(H^$Ⱦ\d?;O>'>Ft>oZ'*=)>}GO#==)>LAƼP=sb>Pɽv<J>{~>8;4Re>`Uh=#C[þ4%=\?PX>==jv>y>a8ϊX=y|y='U c t=V߰u'><$~>t>/= > >|ƾ5>TP>Y>O6=j#Fr=Ua=q=̄~{e->ŰtrYw>}L==}?S>:`c><ΰ">(f=(>P>>c]׈*Ƽ= ưܽ?{Y>Z< =/~<>}HmjUҽ>:`=>F=='dɽ`>GkՑ>&]Uً>(=>L=v>|`1Om=@-< 2Mƽ>Z־z֋> 3= <{_ >*L@TW;<==^k=d >ᅾjm>o>)=Bh> "r3<ȇ=C=?FQ@n>K='g?ԧ3[V.,a$?bȑ^,=W`>럽)9>4;N=ý9&>ΰ;>m=z<[@w@=tCJ->!D>+>j`V5^p"NF=k@>qR7g1>yM=7)j=x=L==d=D>`>ݏ<*o>b=2K,>=3C>>=?L/R=[>s@=Mjf>.Mkx5|Ң=[>̇5j>rrܽLVT,>J/>]="[>- F=Z F>K]>؄*=V =+I>Ͻ>M=yO=[>oifj>$Xz=ۍ=>dBV@=:>=韾Aa|BM>>vXvX>[8=KU&Ÿ_>͊>Kǽ&NҾ==}=A:OP?ɽ_O =fYF43Sk3==t<<5u##i>~K,>xڤoU=8!ϡ>}罸=q\=T>~=M>H l꽯>z<5 ׾J>Kd=!" -B6>IKqp>tټ >*Y6>*<^C<,Ζ޽ұ>LO_<㽒>>!R:>z >lӽ>2i<;Y=-FsKŽM#& tq}?׽> ><A`>a~;/H;+=\3k_9\T">wHQ=1=YJL6Ͼ>Ԯ;:Cl>>n=>1#K?`S0ʼ&8>ؾ W>V=뚏>X=z=ز2. ?%=ӽ>=[>ڽPZ?ś bBAm{3=(l)>pSE/k>= > >t/&ԽG?h>X(z={;P>>: =Yн8\hm|"s1>^x=s= ϩ 䳎>{=/O=0k>= =q= &6T>袒=j D fkv9m>u>:>xIei0[M=&MJ>Hd W \>Rƒ>>M3,>ҽDV>+|<ݦ==G;=zDk=헆=$2QX+>v>W>k#>kCih=Y#V>7'`BgC>:==>O=|=*>쎽oBQR3h]=kT$>:>+@=H>"[E׽2=^̳l؀=DoM>:Zu>A ΍u᝾mWk/O7ȏ> =Ba-J>X ¹=aY>a S=EU>d$>,m'>6}>4#144=4J<=hi=J=d=N =v8Pݾ/=%>==5=Ƞ >+-%B>fU>Xǫ=B/y=v6q<žEdetk>ե7<R>.\n4ig<y>+>аּ&<;VϾb<>`=J82>ᐦ=܆X>7c>V=eQ= ==y ޻rd4k=aN>1>i=ld>i>&aN<9;:>s>=2]ʽ< SMX>ν:~ O<> J=3h,ۧ@=c5Kl> =۝g>lo᜽kyf&c;!>v>5Խy>ؙE>->'ip+7QԼp>K=Y={0>tz=<<˸/>A潋 >a>L=_>X>(9o@==fMU=8+3>>{>?kk> 'a>߇AϽǽU=8y`>ոw=@㾗nP)K =W=zi<9V%l>n-=׻>&T>gD=?^=E?ޏ<̾C˾=A>i>㉌\=ghy>Z==!d3> ^>rE> j38Յ=`^=>W\>)b;XEO(- =@ k=ٌ[>:W'=g>r̼ྺ<3W=J=5]=>7{>>|=^>LmJ⼴==}@%K>t-_4=="]ƾT=>Ԯ>\>Z]{H>Rž쌾#Ё>.t>C>\u"}=z6m_>aR>^>>*8> Y<^ri}< >n>Q=&KV>swR%`ս]=.N+>r=,݁=z>͇3>Mm>ȞŇ>X޽E,Z:=ݽ>߄>v*>Z>Я>?VM>ZK=]lHS:>qGqoD=3/>F wף>HCs>_¼cfνh[01 D=\w>CA>L"Z >>~M3;U]<7yfU=b8s>1j\>=xBIW=_D>B= ;=%=c==+=g*CP> uRw=>սxIn>7:M_{'L=|>r=H3>_<*.=_= 1z޽1KE>K*A<ʁ.=@=E9<0v>^l>qo>J=3=޶p:>wM>k5ѽ>9=Кw>ҽ->>=5~f YOsh=8=;'*8>~"w.π=V!P -=?>Y==_^>5do>{>u1X>Ѹ%ͼp>+d=(դOs>17>caDoٽ^=<;f;=\Y ͔>|^=&>Z<X&h~=:w<=C> X Tf>Q=Ȃ=N۔>o>ؾ۽+=R>再<᜽>maO+=Уj= ==cSIGhT7r<>U8a>$> s&޲ݼ4ȫ>]q߾ =>?=voO=kvs>J <>L>g[> 8=>=݆T>\lB׷>!#>9=~4= ?<>{/BN."М=JfVg=T:;{,>Ƚm\> u#>1SU>jF>Z>1>{E=iM>T6RV=/&HǾDAݽ/fkM:w=;=Q >%>mV>IDr'? = P=nTBm>WRE= /k IZ=軈8=@nKc* (Ag>ķ:߾@ O`|=ߥ>\*씙g >DP\>SPym0A->-->6=>>orF<{zC/<$==(>ȓY>RM0x2=T= =cAS=`==>Yuxl#3=- 1;}ȽMڽ{حr] =xb>C:=߲=B'nn> ߛB=zy #U>'q=6/=4ý =>苣=;=c>O̘>gнs^Y=k=a^>C_&==s*=(=JE; >Z+/ʲ>= I>;=T='YHw>H6vz>t>=QC>=AVNrg==FeX>PV<~Ԫ²5{>-$"뚲<^=j>fÜ4x7T >Q==8<=<,,e=NICc:+=H|>B<=wѽTٽaAaּ=߱7J1%>z.=Ͻs>w;ݘ>cqѝ.a=JĮ=:>j=[6kV31d?=ۖ=:+p<u>#GQ\OϽƝav2QR'س<ϓd> ; M>G;p=qMG~GD=#-q&>P>?A?=¼ vڽl>ڽc~Kn!RѽǬ={Q< >>9=EO>I=+PΑ<P X < ĭy!@_=ܞK*̼xAuq7@ >f<"9>< <+XR)l]hc>  >,>)'±i;= >U|'>c %]F+>@T>pck1T8<7> ,9P>fӽ(L+㕯>?,<4i>8>ҍ@9Go[6!?_э=ж[T>)=1=y=>>kƽFI,>]֪o=.=Tk=:f1='<=R]1ڽ~ijTx> <k^>==S>ż̽nw{ >sG> >̀(>wٽ=>ѽ>@=Di8|}[Ľw!tOܼ4>Jɺ\>Sν=L_iB녕>hA׾(fɇ<T%>J>޶>;V>b彽pҼ{$>S\P4=">* K>->/r>* xL[/>L<&7=4 X]>_>L;1 u=ޖA==ḾhԽ{-q=Ҽ=u>z=][>A | >B-=ܯ=<I. j<%>=E=i&;x]>ٗQν >>?M}=|౽-O>rr=i>]Jes%58A?MӾ #ҥ>;9=b4>i>>N$p=Xr=?w> ̼VɾU#J3q=K>3% > >R=> z>~s>K(eڽ=1^g==A=64>$~0N,N=혻5>ç(<(V<9<clu>+=">H cJ>$ýjU>+>y=h] ҽ5=Aj=>>(A>-i>t>W>Lqzu=Od,׾y/o$#>/\MKY=퀫vQ ,>a>G>V>z>]OkMP8U>>;=9PҼT>kkAq** 7>˽~Sk;Vě>^jƅƽm>*˼V=;c&=!Cb<=8еP>7vd.>h4/>P2Te=(=@LA>>N 5廴U88<={9 .>C>`{>x>K =Swg== c>O>`#>=:=Y?>=>>k1<賨)?tpC.>=g>~=|>I>ms>{ьȽ]& 0<]2>M̵=<0s>RF=S0E:Е6f>a=GK}=LB>D1>;f>%X\zn>8/F=ā >A>u<6Od2s~n=й >Fܽw> [ R0>6)=.>ӻm=-幽<9>_c>rѽ*;>qSuOKu`t۽>޽*>>ۦ<%kRS<c[>,3=+v ָ;n>#V>Ȥ^9V>B=mxT>NEhHd6=ソ}>TPh=5 >fNܼ̿=߻=j>vCV5v> |>(=(#׼E<:vb=f$+>[ZG%[& G>HC<>Nx>>S!/~Þ>۴ Oʽ8ŽV>0}=#>-59>=Gaa}RP|(.H8輸,U+#q9(>M> >9=>*S>.>o=e=2>;1> > w! ۼYqI=V' U= tUh ={4=`[<=1>=󗀾>.M>Q;>Bq=Ծg+<qU>3=03ýr>H=8 >%kH=;ۭ=ўj0dNJ>=f=mGJ۽~^>b\=> &56=h#_=-܍,= -ߎ>H>^̽=q =_=Kv>׃>U<^=c<=>\>=9=a=4U=1j=-} ==9٩">|i=>;F (KbaK=>J_=3P>\>t tw=ca=i%c  +|>WӾ>#<{qS=DA<5H>j>uX<l:`%ʾ2Ɨ>Rm>6`> ˽=n =ԹlB> /> ryC">Uj>s&?F+6>^/=@վz^=*M*GƽGzqL>[| ,>dg*|4<5҃>ܽGė==?dc?\S>'~2>E5ct.yҭm.?)=?ȅþSҽDzM=>>Ʌ:?3dz3>?~;>+B>CNýP=NOV>>>ODI q!ʣ=Iɾ(=鞞>)A>ݜ>8x%B&n?5>o(jq}q=TO?@>">->۾>;23 Ī=pY=O?|~L?l6C=".>\ >MpK F=\e>~=Ѓ>&ֽC>O =>@(>G w>bC=.>w@U>„?q`Tq> C->}>dm;J=\^>Dc>4ə> >a柁@X>܂ᙟ>:? D=jv< D_U0݉L@c?6>`7H9= vI>[ݯ>Ne=bc=K1>MoeuO>>D>=)1kU5#=sz=LI>o>aWjǽUޚM;ĽTI뾿<4ྕ_< dFo>hR=6>~= >R==fZ<:.?Y:/i Uo <_\>9=\EHa>uȒsTCJ/ؾ>t٪=8Ű=8A><|Z>82=\V>A> ?ƪ=>=K>#> >+>fӽ᜽ZVSZ>Ӂ=U`H2 p%cę>9=> N3@>n6Uy5>R=B=H-;c>W>RYaI>ݽ,> L!=ƾlr K>-Bs:?X>#d+D=V/׽XMC>z3=p P>qo<>Rf/x>ƞ>ڻRSw=):=jF$>8"l=>=!>:1뾑N>b&:dmE >%}J9m>GŐ>=<t 9=i]U >TþA]Zþv9 W-l疁˘ĉľ¶q>\=>&սS/bd=/>&=FL;M@>% ; K?f׉>i%8<'О>ߕ=S۽>ns> >X>+>|= Oւ>ژ=~=>WhGp>#=mGO>;>I;pD.5g]<\ٿ>>QT)9=#a>F>1n7>4e><,;0Ľn>=3=9\˾3, >MLJs=P=gykr>]=Y*?ysL>hsҽg>f5oQ>w\~,?[>N=aRGν?>KO>/ӺgEd&>͚>4h<N=A> rNi=ѾCW>B>,Ĩ;> VsнC*(=T==l>*>> =4cT=nɽ!>jnM~>W>O>Y_P?>S~ji>>>g!yG=^ĥ>G>;=>r_xeYA2=% =>?9b>Ͻl><9#g> >a-H=Ya=\><>/tN>H>ؾBȾ>>6eI>d=<=Ʉ>G>۾7}B> >!jS줼$f>4> (=,>QbN=fϽ>i< +?ӞҾs$>k>=>C >A>>& >&/F=0>ɂ=]B)?}=M 6ƾeY2>Y5 ޒ=<=v>K= ;?7=8y>6΍)h>C~ 6=#>*8^ i=Ņ>|>l`.=5¾ =`? D>->D!=B=F>66լ=C(y>sB :>U+@W捬 =3:>=g2OU.?|E=:=!>[דý_>x>>.u>9>?>:x╾0Լ cQ ?SL</5>tKA>W%>;,!wK|CT.?2J=m<;l>5ы=̟)`>=>Qx=i/=>X >\𽵗0?jn<-= > >dýW">/3>y>3S}c2>iSt=؏ :P>:=ӣ^;E>F'>*)>>?>DJ=XξvQ>,C>L ><=x>")> !>AϾj_>S݈>3=.Xf3>x={>M P>F>]>>2= @-> K Cs{3B?Mý̧=۾Y>j>&=|>SiWľꍷ=I7>bT>I@>E=V9 h{=5M= m>E[>ӽ2==djsqW{a>hʾ=Ͼ³0+b=%i+>>>(k>l_>RU=ZQW:W3M>>`P;>=D+F=T?+>,[Q>yjj<`>;<=>)̲> t=C>->L$h=ғBH}"Z>w\fξF9 {SB0G>ba{Jd>R=C>k*>X=U >9ֿⶶ='>,q"f ">ݮP]\bST>">B0>x==&uƽto:¾===*n->I>>=_7=#j*=(L=Ŋ=#(Ծ }}=Bh>>q>L2Z<>Hp={ ~=4z=ǻX=>.>}Խl>1?k= "?Yz˾=4>?=B2x> n9>:Ù>R1(>]K>>.r=ȼd=IN> мKyԆ>@>K܉?=|©> M;( ʙ=&g왚=ݭ<==)<6=s|d>K=Pq >qFF?z <> 1;>m>Ƚ] 1E>W¾D=>j?sxg>$>S=H=6>1=GĿ=lr>T->νnA>fWbC%'>P=dֽ >~i9=ճu>R[X=L?j=Sm>}L=]V`ؾ##>F/1$ >im*֠0b(1%JȆƐ > ??|=)7e##=ӽ\>R}:>>M'>>|#5>/>(G>>PE9>r>ע=f>hE[g3>&(=P=پ>-MT>"=:AB>˽H>͘稈 헽Ek=uRCr6SXY|E=Rr =_F>k!*>45>d=kh(㽣ì5R5j>泟=!}> IP>~Q>=7{6>`=!o =ᕽUIuuW<=VE{<ǾMW>n)D>e>M^=l>ܾ>ABӥ=}>ySc>b9>dg/sIPl'.S#n 3 $#>-U&gqtѽ=b>3:$e=i !D->91{mȾ{ǾE>=eNP=e>>YK>4?8==nޝ=e"mUx>v5B>)=G~ s=n􃾤Jý[?_]櫽>=>> >D'T>==+Ƞ"Lg>D>p狾\3ٝ2s!$

)B@~A>3W=*>dVX=׬ ]DߛY:u%:fK>MBY>>;HIA>󣔽L7h=?Ľbh!#=߼-=㓽=(RP0\<!>fT;e>.=+w]==~D>@)">=)D>˾2C=uJ>%3Y}\9T|WR<>xaW=gD%=,%t u">6\=q>i'd#b3>Ux=qc<ҕ;/HNEqv=>޽CÍ>1G <7X*(JO=< ds՟O>y>hڼ@gX= >o>`\8˟< X=̓:M=\$K2vƘn_>ʾh=K>I.>Z}-Vj)1弙ֶ=T+=u/(>Ԇ> T<5>_8>-e>ʲ99a=e̽i c>D>a>+.>ڸg=~G>f@>s`=e ҽ!<)ެ>->I0>>ORþS{OQ>>uF=o=$_e;E)> ü \=@T#=Mk٭d>Iu={Gk@bg\>k1;}qp>FPT9C(;$N~aw>=菠=q=<n=SNO;Q;B>_^Ͻŵ=h0=O,]0>E=|> <(ԾJf]dl>!/J?/kνe˽.b+>8<=:Y LʽK>?>漝<^/R>ゲ彇i+=' ׽.ǯp>!>먼1A =pD#= s> >Qۋ>!?*ýI>-,Lb^u= =8ݢ?>xOtK<;->o/>ӛ<|={=eݗ= MLֽR<_u-漂>!;ڽ(cӽ>^>ډ*>Ӵ >bM?q/>=,@>S=?$?fEнN&R4;=yr=x2=Z>ƕ;l!>;1>=m=^>So'k>T=_.>w>25>Wk=*} *u>%p= =&=!:3\"vE͔>m6=뱁<4M|;״h8>ƽJs"3L>q>X>FU =[սS=/8=%8>%r>,ڣdMAV:/~k> =S?S>q$4(oV>H&qƛz=]=v?ʔ=.3= ލ>'>>yp >½=6Q>R=>&a]ܽs>/N.M> X=0e=/E,/:#>=.<9ݼE9=">}>Hdž>3>cm>=eq=A1+q=ubz>?8=& 6σ<>)>JCZXj<u&q>¾TQ>>cke>ػnbCF>\<60`yu?6U".>ljo>f=̂=fP >um`>;>#@`X>h>'=6='3p=HB=$>޾]<%h{>m봼Ӈ::=HsFXk=^Jýjʳ=,>2=Rg})LB=!>ԓ=x>J >>K=n=Oh>t%>}1>l=p~=P4>&վDZVBK^t>O>V=8dQ O.G>q >J2,2_=\Կ=J&>= [>+2{;>x{;v:8=v(9U پQn>JTº<}zӪ=\=фԽs9p[>-" T}=I!,G4(r >^H=#1 Bono=7">ͼԣ>Q]I'% ;FN>fX K=;7>=B>H>x\SU=^湽!= dB=A>R|>?)_d=Y=6$>o=˻M>ɽ' v>7[< >r峽lN-fcٽ߼>3;l+>dT>bW=rҾ;ɽy =u>ב{> 'a>h={hr>iݲ=% EJ[-I/W=V =/1>'>̽{S>4;>hbPBm>>=ҙ>g<݅iA=nGI>]>jZs> m7>#>M==A_E0A>33p>i=4=<>feJ>[|o1=\==r>1>е>K>CE ޻7dD>>>cRd7}z J*+_>>$w= $.3{ V>!=um"h>}=>z!댾}=$w佃=ӽt?ZA>%ٽFu>Оܴl`>wW8=W >Y4n>#=R!Y=>c>ዼȇ96=ю>e{]=Gl= >ÿ+$! [>=A)>=V=l-;*r=FA3==?r>u>]r;ͽ><|J>^7:gm==ݫ}A?=bQ'-h^I >X־2ey>0Mn={<"((r=6.Ů>"7>|J[d=A>e<(h >DL;=2O>  =P>t0> ϼV==üK->zr8>%`p5>WۦFAi>=ڴ9>¤==|D=0om=:xK>ik򕧾x hm:n>P9y /ɽEC>a(>!<`SRHD;ݙ>מG+fսE9#n^3\@= =/;Me>=_ zɸL= ZR=2>vR> ">|=R>|b*=P4=W޺=@g>>>;9TG>lw6]>@`ǾI=C2=61>=a)n+~->>cKjH =ag־^5<\=2 o=ɘ>>dF<:>jQg=Q.a> {p=6=9_+]N1~>gZlݽ>G>:>1R"#-kb=ƽ\|>i==[(=f 8ŽS;9̽LO>"ΑD->!뻲ф=o=~ƍS-߾fI<Žvj>D;sR¼7=,B4㙁!V>+)Yw<F>K>7>u ?F=qfP>/?{z>d3v3(,D>sq d&=>٘=G>ݽnjrbzNB2>߫=$E}i=뵽3T|̌%S >F>W:[=>F-=׫X=^= >˵ꧾ򨽡>=ka=ü/%>SKxiCapLɆ=5>D>@׻c9<>VҼ>N>jVT4s=Z~T=]q<:U;B>%~=GS>R>$νe7*=de=d>ˤ/f׽*>">@*=Ӄ>Z$G>R_p: O!:B>hua===D>Ԥ<" a ><_&'vD6zR,E=Rw==FP<x!>T=ݚ </>u_kRY Y!,kþ=}X>?rQн>vR@U==e>;jYD ,÷ cW=MC>-'=> !#)<n>x>=TG)D=.XT{i8aL >6=,>c=8='>X%M>fDN=鼉IVL=d ><1;urf>[g>%&u

^񼢡*;=btW>,ܿͰ8>m=>ν >9=UiA>>uz>T =ǕJ>JнZk ^(>=⁅=j=Mn3^Wj(3>X9<%=aRǾ=}5Li>q; >ƙ=ችe8ǰ<~U<3T>w7\ ?2;\>V9u4<罭N>5A=Jz^)ǽ&.5!AνxU=>je YF B*>c 5[ϑ==9Y~>5@WW}=Jz=a[">AՒ$@>Zs>j6>_{U>x{=R>+O_)8>S<(x>O@6[Y=lO=DAA>Phw|ǎS>=" = > m>D>/ż]Ƿ=4="=nw>S<2>>=4=ߐkȼT4vJ'V=%;>½4>WZ= =v>Ӛn=Sxvm = 佀[<:>m'>DVRýl q>I=-=Ǯ>>.h >}7= >>;2Y߾|<;>^=uz=2ԼE]Pr:Q{[qZ= 2=%4뽊 =.B=lDWZWd$>j=̓Ok#>bg<,`:ػ*<-(>پX{1d(>=)>N=?;%C4Φ;pI==P=R=ҷ :ORY;ah=K>='=ZU,I>=v[5E54| = =5eJ;\>jR,>ZE>67=ElgpE>:.>p%=e+l7>\q=8> u (Wqm >N;pʽD=ߴb#>=Xo=r}>>>>=ܔнI|=?'>׮>,p0>y+}<Lm>8>BQ9.<?>z|>_c=}!)>%==ao8ֽS"=/>`{RN>5>ؽ1='\|6">>3 >g>~vZ> Ua=?2ѽ^<>x!ZG>9c-S;X"C>{+^=w=A6ʷFCc>(]>낄ͭa;ұPڽθU=9^_Ts >] >I5S>>Ctm=N.z_%=нZ>t=_= ra>I> Y9zX @>.n0=WB- OȽw{"=V5q=:X3û=d-<>"=>N>@2[=k>i==d>;vW>q=)>;@dpH8H>4e==>n8e=<HӼ'L=4|b9I>)4n%ЯB;=dnm=qK=Ȍ=`l*H4^X>#=TAv=h},x4!M=s>E>x>E=)m;9e=>5WP׼Q_I= ۻUxN4>;vC=7> |="|'=jY.(f<^gɖ>=z>y=>UL>>Km=;=$>hʝ4=c۬A>sr<V>JwbKx Os9->\>-=EE@3D+L>=Mņ=>]O:Fo<>.=E[={}x0>j>xe>bF>>ܽf=2MBF=' &A>܃>k=>?=vM\JT==VK%Tg>G=9={7> =osV=OnᲽA'Em=M:B=O01={3ʽ@Eh\>p [SӾRl=@ Y*>P =✄>GB=Aq%ڽ5r; 3>?. =!Wv>>s=w>>Ž]̾= >1آ> ysJ6V& >=owҽN >Rn=q>A'=j>np>&5>*Nf>4*ݾFuKa@K/mƆ>Cޢ_E=<#>>ts S4W\>~?}>@>"r<=aGԽg>ͽ/=F, (>E>D?q =V=82l={x >ӡ>}><Ͻ+Ђ=8C&=(QiB9V<-[>ſ=mQ̔ 4;>dtI:>UNfSޅ7k=55= ==>tMK=>GD>s)/WYƄ8>ޥ>Kܽ3Q2<pLR9YW=(77z >r =mK=0ս&^MK=?*=-r>qq,k6>/U>m=8F> g>>|>uJ>>3B=+D>t}u# K=;ӗ=n=Z>Sҽ;>vN=2>Nct =rؿJ>1鍾>>D>x_=+8iVUQ>Mrq>>1J>u7оA!|>ڰͼ7?]hA>\&=T=ٰk>i=iw>M0b=T=[ =-s>\³=׮ռ 4C> =~]=<=<Q=ʌݽj==>>=)ӗ>轟|$lE=A>=l=Fa>̍I>Tþ;,iw>7.>hT=_>ML>`Esʾ:>>vN9I)>;~i=\===@<&>|ݽ6> ȼ;DTƾB(> +UqT><=ݘ>y py>ބ=> -ov^>o3 P >V =NPK>9; = v=N7QX= ,+>OY>hu=Z>7K>|ܽ_P=~=3뽫 =MV(>㸪߶!mfv T=\#ҽj?;Tlтqq)*=BP=>찾a<:|<)>b~FggM= ='=Z/+鰾ccmr>=>i}_E=eb>!f= Yn>=E/7>\[<&O@vZz>lm>^Ӿǹ8=%N>Q>\@1>Ly=WMŽd=&>=Ŧs7 ]>=O齩<> N=@< U='h3t[JO`)>u 4RK>f>E >h~[P=t$=:\a {~X>2>R=lm>UN˽Vþ;qD+<:߽w0-= ?M.|=p>l׽ݬ7> >l.Q20I< >6?D֌þP=oy>߿>jxq=G==?8뻉h)~5ʱ>x>#`i=/ޫsELG7,gh}:-.!Cn>Ul=m߼>Fկ>6>==-jJ>0Y>fǾ*c%><{>N47鿽n>8<<<>~T=Tu=8W=l$>1()=w<c>y>4=mY*=eh= >>=>F>Mϼ_\>HD >`>=~ޚr/h[nC >C>3s9%;zJ=ʈ>jуY'\vL>') D;O-⾫n<9Ǚ=#>Bz>PN=[j%X= >OL_>#S=g!Y ľM>8>E>e_< f<=<~=_L > V>#H+`=lEHEa>9>^1m>n>A>v<ھ>"Nyǘ>s?6>%>ե>Hi[z= W4Z=+.>8ؾ]}{>4o[>>!=bJ=Y!=YNڑ<~CS5>R>xNy쌼M >tt SL>]9nt> &=xѻ\>t= uuQt\x>ʽBp=Q>ڽ 挾*j>ha9 >=F> >Ѿ._=sP½J>%<݃>{;-9<=< >4W+2=^ͽXp>}b=峾٭=(;b>JĴ>y>\X^.>>O!= *>:!|>н=6>x_>Ɉ{rp>c=Od==xj=#=P m>p=aVY='=)ioV>A+=KU>Zoֽ\HhdI>9rR;lv=都t!C1Ļ>s]=oT> >Ϯ=#>茱>X |ꮒ_Z'Tsq+/Y!"8n=OhHLB<Խ=]Zj>ߞY>Yn=kþ>v>{<>A}>Z>6W>`E K>si>Sei5ƈ>N1>  >1<>*2=f>#ɦwӧi8> PI=}\5>L=ؼn'C S#$y!<(rDѽ=+\=9">޼s~e>>>>"?T>cۭ=ix:`.x3=*轘gK'9>B = >捾m=av>>@W>R\hԽ]>*c>|$ {ے>'E: ;O֍!al ST]<8 ]HdJ< #M><4>;|>DCl >Y*=~*`>(>)Y>+=Q׹*?{>=YY>!ݾ4N̽ 6>V>V;)ѾUlL;JN$E>I>$i>]:>>=(#ٽ!U)=2>l=-OdR=֏ټ@@>{[Qh؆ebp@$en `f==f>r=t2wǾn=&>`=Lj>64B k;3ZۋE> e >>>=>%?S>g=ȼ2>|j==߼ld1[>L!<<L>xݾL=q1;X`)O䆾񽓈<ӽ[>>…$=E=ȅ{o =cB }=fȔx ZٽSj =nr9&A=9L> 0>>`d(>{+>|رd>ȅ>H>A>q{~C>~=$B>X2%|>=r޾[!?ǗQ]֮Vo>e̒>=@;Yy+;Λd]=~ Uo*=`<=s>HuIeپOr,o=P19 =[jN0>~Xrk,;.|r>nME<=U;H Iu>zyщ=̾ĕJt jb$`='WK><;zG'3>:`=H__{⽇];S%>=%`꾃CAU=N==9=lH㚾?=y>*>>%҆>p d<<]{=pUIy;ս'j>Ok ۽S}N>3=MF>>Nc>[wWZŅ2?ߐT=?SL2>뽽6>~|k;_촾FRٽ>n=gF>`" 7>^ٷc>Jf >>u,G<⼝=䥠۾=c=fB/>)u`j!h ?:+=2VfVI>35>ٱ>r>ХS1<$>:<:i> ׽bN>u~=Ǣ2߽ޞ>Nc=>=۷=g>:^><&5Ŀ^=pZ>);tμ%f_Խ㝨MuU[>C;M{>`.L*=O)>b>?=J\<=+Oqٚ>g]5>PM>A  >nH>>XZ|=i%ž=c>ƓE99 >2Zн>>*~=PĽE{&Q> !<;4>t>doF_>==6c=1>{">uP=1>("(mC& >jآ'оR a0t4Adɽ7 NAz=Ђ>`q=>>Щ>Tq<`h}җhĻrZ*=d8>=<ݾH> Q,>4۽Ur]O>`(>ĽS6d=af\=м>>m<=C d䨽2 >!r9򡰾;@x>% b ? 8AOnBU==>Ƅnma ==n><>姽L~#_o>@;= ҽ{=9!LC=v>뽗W)ǽ-񵊼"L?$ \'=YV=z9=l/=&,U =/Ӛ&ɾb<1>ɆZ>$5a!z=>-uݼZvf& 㾑!z>/=id>,x/>(X`>eR=<>dŀ=?=n"!W>h< ;xm%>K >3=#>gV>;>|$>t>C>w#&=̊Q>j.A뽅>RT=:恽1+ٽ<5l=>1 =Н< >}=VHo|= l9>Z)Q>4E=>]@? WFڇ;)=->>WbX$սƶA= t=M+V>M <6=o6=='}@D>e<\>;=W?>66D>=t<=>A`3=>{&e<T>{>A>-o=p>AV>k~>n; DhΓ=}=I>(I=hm=Zlr>gYۼI j9=>!_& C>4#+ļBE>ݾߎO6e=x)=5>/V&5A>1X/yI.?`=-.P[.a(9aw&>>żŢ>e=B=rL ?1T>>!q!ξU߽EQma >=,)>/>tߔ=>>w=+>B>dA8 >B w^ay6=af>fJx=;sO>2(]1>h >Ox>v2>H#>z=;>!Vܲ=V5>  .;MK^>> l$ /4d<[_,==2a/>'|5lp=р-O ><:7a=i.L<׽ xF+>[>^G8> +9 ˽V]݀>[=Fo=>˼s]>w<=!>r6>XW>1Ĩ e=\Ľ%=PW=W§=ιhډ>q`Z>=;><Wbz6Q_=8n=!=fle>g>Ynx=n>gb>㶌ivA=φ=s Q>M>8xͩe|>ͻR?="w=Ց>7%<2>< >H&G>w=_瘾 5f= =>$=#PM<q>Z=g> ? >.N=-qa0>&=ӽ˻Ȧ#ûi>ZܾUi>轡)cE>6ʺ=F>H=ψ>V?n%)>w!1/>-K̽D\=tEH>mKzD>Y]˽3NCGw; >ּY=<]/SChO=0>î>t=i!>::y>wMdK,:>&ը =ā=ad>;@y9>*Q#]z >nCc+yA>O>Ļ=..B>1*l V>o=w>K>->{<(o.,r1>$=I=E=_<"O=`=76>?do־˺Dg)d\=g;O=L] JQ=Ի->l, >C8=lp =A:վF<>>"I=E83W.%v i=K .>>>z<=lo1=f>>+s>T ==`>4@o>>{Z w~B>rP>ԖX=B.>돟û2{ ^`*U=yq>Vtq"0iw'=yL=,>C~=2`ݽ*=>ݷ>rSp= _>Ɛc>Rrݓ>s({>\ڄ<+-_=VGĻXj!5>"15^=c0=Z< s=7cKJ>؄Ӽ;sNJn==u<٥ۃ>D<9d>f1?=j><k!& Ծg菉Gqa>y=>>>r#>Y7<w$㙽:f= y= `?;I=D.rG{>?IػW\>ɨM(>#>*HB=*ax<>WܽKv=?=jýQz*=ۧYi>Zj>d>v!9:>3 =M\=.=H==0> f<^ۂg?J>5=g=Ƌ#p=g=TFG=LvCp>Ч\=>>=>10c >G˾Z=>Dt>!r7[q!>eD[ U>଼=A'MQH9=t3yEz3`>,}= =F)=NO =&ɾk ԼLf齀z=P\=ij>=pN컀"F}=`>==P<>s`=ֽMDR>m=R@p2>e<DҼW>Mی5>/|8&=3(ݱ e=+=R o=Ιk`->w S>X}eD=l|DD<=7}S )K=i_'qU<#d$<1>>xhG>۲ <=5W?Vǭ=~];; <^=X0uȟc=&&&д!;zh.#==PASqc=}=3]Ss$!\ڊw> fQ:WP;7=T> &> >`v< :<ý5-VL<Sڽ ?=н.8=(ӽk3%NF2>GsGrږV=>X>=y\=[ _>(>=;=< =;S<|,=+/=8v=>Ȁ_v>Gd>-޽.?V6Zp=h>8:{!=ݾ0=6ESgS>G>3ML`=/Q=83Bc/4փ-<)ڽnfɽ>=?< S>]>>=>T=<*ܬn 6HS>9@׽3>J=?=> M=O>3>𭧾|>n=Da>ʋƽX=v> #@>(`=`DӴ>G澞:ki>W='>Ϡ*>j:>}s>R=L㉽" 1><x>:U=8b>*>o"Dx4=z;<9t8=L);`,t<2#2Fj>L=n.ƽa@En=ސ>/=D>.LT=a4s/=A5 >˅̼cF<Sn97 i>%>%M=VZ> >:[Z=6a>B> 0>V^>a= ?=XQ:=t?_>r :>=S5>(+ɥ=]Ƚ">(v{ӽe=W>}5`N',#>D=>*;>U=Q8= !*ol;@Y >Z|z耾SN=c*>DG=zu=b=x=G Z82>==;.=S>Y.%>X=Yk>l]VB/Pp>aUy_>F=>錨h=U(Ʉ=|>DR=!>7<.}=n#ZP>lgLfrsg>p>x/>"!*RU1~)0; =t1hE>R!;Ŧ'>&5==9fxE<)̥cϽPb^>ӽɅ9j5>!sv>)G6p>֩ܽ6*)>XŽ>1>WMD>b<ۡ*==-V=Ez=E=m=vH8< S=D[9>izO =>jW<ǝO>'=>>G> H>0f9>1W ȋо6ܻ=%QY>->6kF<'D* y1>]a>߆&!N[>U >>Y>#H4r>轤:m< P={騺v4>lY5>c0>kɒ>=T1Ge>Q=7R'=Q(>lvѐ`,c=H)<,>o=\<»D>A^i t[ν< ';3=c^>=k}=M`,=`=>X=t{">q!H>=w >0etyQ<9< =Z+轏pi>ys>"=i=6Ұ>¥>J^=%>~><)$=mЛA~i_=d=?>.O<뢾^3]\S=l s>> %[ǽ >7$=>"+e2o쎾'Ӽycp>=X埽=)=78PF>'>4Bȭ=#.=ێ>囑RDr=P 򽿙O>J>㽸>=sK_y=D5=_RJ0?6>Õ/ԽaX>~)=T=W=%r=+ $<&D]ڟ>=0*>@|= h#>˧=jXq齂*=|d=̹E|><*+:>-!>Ÿ=`Խ;#Aʈ>j%G>a*>9=!9=Mk`>R;3B=J ?ݽP:>ۖ*gc=0F#a>==E >@9e>ͱ=(5&Z$>Mnaܑ>=1`W3>=}ڻƬO=">Ƚպ .>/>=}D> =ϗ=)frI0>Xx>=iP=7Sa>xP>>SB>ލ>/>͂<>M_[>>Nn>!7wR].=#=d:򽉱xG!X+ 9u=&R*>B=8bwnCC[=H,VG&5оs-X}Ʊb=͋bb2HҾ+=҅=>A=˽\=>Л= |꾰=>tHn| ҤC>׹>ٔ >jg=I>=1cx>0DS%>ǓBޚ=RmW=?)==j>a>LK=B\zj=ڽ~jTa=hսd}VP=!X>R?v"j>Ri۽1\7H =ٽZ2=-E=P` {M>=V].-=.>`H>= ּ:=k<3&[g>&^ S}]S!>a<5< Eq/b&N"=:q"GG>v=A4;@>6b.>X=_ѓ=r;%=>=l "ͽ%55[羬8<ì>HM2;<[m3 >20UW=)/<ι-u:\>$ =ѻ\wJB?=?2= А j=vɾ :=v>=A3E> K">W=>׷+<0 =Q$>)p>f=_E5>?ƽ,>++N(M!=*¼=㙗< :!>>ds5Ǿw㽴kQNC=m>:k=B y=DǤ< :A۾ekU:=ѻU̽F>v+>;`<=Vy=eR=G$>H뼜O[hA>>X "\>s>?VFV<\Z};><1_`<ŷ)u֔==<9/i=&YX=0=|*%Dn<3ֽcJHEx >;jTx=>=H>%\<>6:=nBZS_=eg\}>[}+>_>9>( =ĒϽK|U==۾@>S<*v>(p;P=|o6.h=h(ܴ=`hC臓>L_= ==l9=n|-D > +w=(#m4-! >_',V ۽,=9<!=zCá2_;/>D>6~#>j;|>5,R;o>=6n@>=r=x˽' {l=E=<]; ܽ=59=K=qQn-Xu*=D=<6Wq>q2>WC\=`>`ͻ\aߖ<)C=q<燾 <==aoFh&e;b0>p=B=u=T%S=_W.7U>Pq=lk(ӆ<>>vV+HG=; +>!M>{ܽw=y>u=Y܅o<==`76>Q]:Yǽߐ&\<|½X=Cj>%au?sN^==J>]=ɽyd>2O>K5>>Do>Wʊ9.>ȼ\M==k=";J9˽$d>誽j>v=X >,N>t-=x>xyl>¾Mپ֢H='a.7>!>Vf=Ϋ/ =N%|h؁=$l [{%N.=DNdG>'5?+Dr< f>~bnؽs8Y| U5 鶒`X۽>|[=bݖ=} dʫ=Fx"=G$\5>W= l6G= >i< >Nj=sv\O=`K6G=86ɾ)>T0ơ5Tȼ,>qK,Cy| =݀dn;kC;BͽgOcph>pa==\LE7Wx?ڽo:Y=MV=<>սX>םaŻ=I=㍼,PO!B`Y?ǽ />k$`^fe>bu<;,;v>z<=bѽx:,=Q,=X= >p=]5z7K;GC>=q=.]F=#<3>ݦ=R]O=Q>GOn((<>pdnPdQ=csMtzk=Q=ue `>yd#sw_g>e>cVKn=f=A(F)>,<31>L=5ͅ>*>_5q=7>;Y"> (<=z"=EV9>Z='0> ;:/<ݡ0=&c Vc=_1f0Pb;< &s>CW<&\=>>!ZK>|>r=I=b,//>OG*>=ʌ=T{֫ >=/>e9>JճI<1&r. >'o㭾mܽ ɾFC>H>{.tȾTD<׾QAR>X=fS]I>^'C<(gBRK=q5Soچe`n;=ҖKN==g5|N*(Z`v>O=gq=(ǽ`=)>{ڧ}Rq>']4 a>s>Ҕ/> >:>=Ѓ̽>Z==u!1+>%Ͻ1=+>~2H<߽[=,,=[>X >؃>a=yK6,*>B1z=o=/Ê<>Wd= wH0)->d\>>@> 2ɛ>>xk= 7Jǽc>s5:ބ>\[ZS=44aQ'<>k=V?_=ּ<iӮP ik=q¼y~=.=ACU>< ==>V²>[c v.þiaI=Ixk>f!=Iiq=9H>\KN>@$;=| >iƫ=Dϒy=C?;r;o+Z<8׶㽼S)=]7">̣3>U; O>u+ǽVqn=0)}~<:>ˊO} 0=شK>=BP=t=(O> OEB>'V;=\>J>=oTK.>l =/<8=E<>z=Q=F5K=>=Jێ:OgOaS={=?=@6Y:нQ|3=10>> JYz/G!3Y>W!X>>f=X=V>/5<=VRUֽ=:=o= &a>Xj<3_=W >cҵ>H굽KA=C=yXW[>r >e)B_B>3ە==cg>Yw= m9ձ>U5< >k=$Nq">>g1e=b>:_=u><[Hw>W!=ZӞ= ==p= 1V*+>Wv.:׽Ն&ksT>Pü=@;D=#ς;a]VGn>0<r3Ͻ1h >>.߽ p=.>~9O>~þ`4'=!N= 6>^y1,=4w=>=Ƽb>dM=GM=q,M׍s`i>\2J> 0>JԽ7c}I==*>T U=щ<:bϪ>;ɋ%Ͻ'U>SQظ$>GF3>M>>VFZIʡZ>= N<-)z=>a;ekj>[*=*xT=bk=n\9!ЃnM=;|>>vH1>陽@TNVч>9>@">QLp7=3>L!<D>ECbhn>c=;>4[>w3>Mƶ )/PE92q >JR=oN2=iDf^>><>ҙn+ % <CF=<]j>5=.uJ=E~C+9f1<<>$ >tDàڼC= =ɥ<7}==n!8o/>},2(#>0;]c<->{۽ _+ؽqSh<2X<rvN>= >x=2>10b>k => 2 c=Y=>]>S)!(==m>y=[>*⽽=Vɽk ؼ잽ۘ=MӐ= ɒ=z%;U;=?=0ҼOU= z6ݖKPo>n<=F>6꯼d˳Yͽu==ij>w=}=U&<>=j7= Ar=⮱=eHA>:>㹾\мʈ!>f!D=yJMd>!4= >y=rՆ3o>l do+3T=>km(2=w<D۽.ْ=p=k҆Y޽C04M>ܐ= î=^wŽ0FS>۬=T4Da$U==Ƴ5m#=ݼ}O[k켃p:=%>j=t)>h?~_֣OK;! =%=l=6m=f1""v0chf~ý!=8>]>K>9~>t-=k5J>ڲ>dӽ==[/>(~" x\=s@?W=dr~l<<.@'PI.dD>L=wkv2>i2=w O4]q1W=OCݴ>kUQBDMk={t%)CcPu= /j<Ό>>V=y=;_>r*_ֽx5>&:>ڌ,x4>5P;%%߽h> =AֽY#=@>& x=Q-V>rV %=eH=UȽ8 \n=诪0R(QH=脉>O5Oդ=/k=֠>H7/.>Ľ}=?>A;2v x=ڼvRBļJ% >A[<7_=~=->6E"y>Y=0'݀=;N'DGX> =Q>1}n>褠=l`bR>H>H:Ҷ=1͇>|=ֽ`uQ\=gh=l<=Cy> =I}9L⺉37[R=bY6;T>e[; %>O==őʾ >Tw>Z%>2bƭ= ==22޼S<#1J=>V+>ҷ=<=">;E=s=M7<Ӌf>6̔=*>9>8vB}=I/1R>yJ$ý>a-> >(<'^sRSMuܘm˽,r!טgaFT=R=BN=>Y|;KfܽԼۃk>% <8=8=5ZY>>HZa;O`z|?=( p=.>[9>En>PpW><0<_>y7RJ| 14n򽍽1>= 0>P[>=T ='O_>m>-p C/ `66L b;=B¾ښ>D>"&>yq=(<U>l>>Ľ0X; = x>λxν Q=G|[9JH_=c JVS>>ݽ0=/I='%zp]%4v>.>}w=ex=/x=D] տ>4qű=#'>b];T@{ Vcu>.>f=д=z=d{9==_ =Zs> N=>_**$@= x=!폽Op=fUԽ@,,;~.=7>1~+>ֻV=w=*>!=/[B<3EmE='u>==`\1D>==~>h-=4=jrjuVd> =vJ=%)-g>d 4Jw=Ƽƺ{(~=|cJ>>V%7)Q= }>2w %.>Т>w>K.>fg={=2=dUw>n<[=~&=P-^1+ؼp&;ֽ8<[ }2>ELg%=';Ag!>ߐg>-+>K$<=C= ~>b=E됽\=3 >3>r&<)-<8=.y>r[=9=U=ھ0V02U= /=e";mH3>)>=n9 *t&=/,Xs= ߑss곱(>0'> =; qή=l=X=3Yf]-nfrƽYcFԺ=܂Ƚb)=L/7?h>S ==ն>;:2Y>S=O3h>VDT >$z>3_SL6=9ؽ >ST>n(#p[=s< >$wf|^پ^gSսKv$ O*%D>%>ݽKD.>|=k+X3(;h >ː=^=Թ>Kmq11 W?b>=>SPip>>\lc׽mIeٽUF=kvi=њ&\>ƕI>R"<&=;?Obsdnwڸ= b='>n>Ộ=oJC>t>8<圥n= I>Xb==R=>pp1-> YI <_}q=;߼Fm =>LK=q>9eNz$HVf Y<;Q+=v.>S=פA=f".=*$a нƂ="=Q-{->\|s=*>dS>.N>;+=2hQR=ڽ >cR=#L=(J=/~>7^=)d :r==>>==&"->{q%_=j>z/JL(>yH>>)CmRpP>;|xb:;+52InY={,>z{c>+>t >Æn=-p>fד5l,=Y \Q_|0>0|74\,> "O=(5Q>쯽t뒼JJՕ=o8H轪M<:<=?>m>z|Z4="X=곧==gSE<䫽qc,=Q9>3<>_?=|}>H i>Y8> 5X=?=M﮽Z᧾m;̽߰o=$^ =?Xt<|>r>6=薾ڠx?Vtٻu>:6w$>#)iN1=T5dE>"=?;b===.:%N5Ծ"<6!=x0f=漅U 9;><5>;r=daľ!N>@I| =`J>~ >.kdsB=_P)>wJT=7 +mK(K(='>Io=> 0 i:ҽ!>F/>K<>d>a>s :c`<FT>y2>0=/`>F=!}9>k=+_p>ʮ|=|yy; >:>=>w>I>I=kDJw{=e=K\>gfcI>6/>J=`=d\>,=S}ȋ=е=43 ?<3E].=ZuQթ>#y;0p=0=|>Rq㽐< rd!Ȗ==K{buԻ>*=ѮCrs=я1>⼠CK{>7=܅=8>]>>2ޫͰ=0Ǽ=\_߽YDos={>|@#G> @><D= PF>5Q.>s4=rˤ= n;: }Cw;ȊŽ!4=4=[r=)I>5=+>'D2>Y[1b0ξJpz>Ѿ =i~ l=b=j=4"3f<),ٺNw >ŢYqmqY= 諾Ѹ=y=& {>> >JX=Hr=1伖i=]xͬ>n/=u<*> UʼbS>^_nYOD+@}ս5|=ԏ=ז-:3>=_½.N`>q-9=p&>=֍>@ d޼ɷ_Y~!>HK*=?~ُ>}ɬ >.9><6衺=-I) $>ѱ)pB>-:08o=._zr Ue>A=]==g7.>;TY1A|c=fߊFH_,˜<>>Ifu=% >Bu>]>];=`<AFˤ=>Q)GT08>P> ~>ٶ{>N;U~Ɵ4^8әe>f=1~>>pg>'ՅzٱP{x^z{O=#Up\#׃r==[;-/>t^>+QS>/;.Yq;#< Ͻ=V=[>&fy#,=[wn^>^0vG,нA~ >t=>Y3xӾ",c=?<9=%WM |m==pu=leG>z=һ=SNC5=ɦ[>! ?>E=JSѾ(9u%>>eO{<\<佀=i [P1ް|E>fOaG6Zؽȕ XFu(=wKս>I>j>Ӱ<9=ܦ>ľ?&=mQ̽0<'>S~=j=:ޯ>fo&r=<4T}=틁r$>;='>ޟ=W5 >nj*>"Y=->B>5("OA= X=X =4M=4,9:#;1=⽢x>" >=!*=o-^1=J= ;>hϚfǮ=q=+=M>ޑ>4µ(>3 >EmURнla>Rt뽌Ca%*M쑽b_43g+=v>>==G=}üUJ<$n@x>=|T=(1=?z=ݴ=lE.?1>@k]>=0=_$JpW=f˽ѐ==pXNW=E>k=n%륯Jߙ< mW)>P==3"D>MX+#^.}<>=N=-g>]=ހýXIK>2=` ㊽@=}m.<=K6>r)_JḬ̈h@6<>tE7*DG=[=Ɵ-=`[vAk4 > VSkIi=<.igr2Li>C1> Tp*$>m=UǾ5 =0;=jI>pCsO>`>cS>>g>'RS(vr@><\t=w=PW=fۥ-L>*phR<1Ȁ0=Z2 15 ?=xyY=bx=z*>I2> }Q<҉|#A;.>n6@>/;P<k==*·<0=!d< yY<}<>H3HDa=?IW(|W><&p=MJ5>޻Cx=>\|嵽==Cn=zz>]<.bEk>;>罨\C`;!o>RF>T==P< <=J#i{S7HB8ӝ3:=c=Vv*;2b(<<5o>)H0О>)=E3=G=桽w=$1K>5yܱ;>="cA=7}R=FZ>Rꍽ(R+cȽ=Bjm>i5h|)2=\ =\nK*HV (=]»<4#>,d<< ><9r=R[+=F½97 ?=гl8$;>=.=o޽p@>X =_]=kнN=z=o>q1=3>Iy4!`F>` >`D=؞,=%>EŽيjh ka%a!>X>q>>q~CY=bBM=W=$<4.>t>?X`FY$E= Y<Ҳ$SE=_ {a=>CG> (E޽ƈ<=>l5B="Ԟ;Qb=qk5=Gt==@Ce6>G *#6Z)U\>1==07;Vct>D>N.Z>a=+WI>К[Dz𽈼==^@>g༥bLGJy( _z&\>_)f$=[3>\eAuٷd¼ट=G>;#h=˙= M-;Gn?>I ;=7<=</<>p 3=ԽE+=#>oJ$>Zy>6%^>Z=t5j&L>f-P=Z=ˆ2.=7)[<90=$;fJzY@>NX-=򺢽t^=زY=-=[$11 ֽl=vYIxS >Qj5m>Wk<|~&>騽'>׹=:L==<=;(>WG>vq<<7B()==YS=>8 ;3IN=\, (g)s=V=Us>Bj5z>= ýW>=>]>CM+=t:A=bI4y*>KX~=5@=I =}>/BWZHm서<+U=u=|{> =G>V= >4u>C뽧=݄fdi>R.=>㽕^=$Ià EE{)sXA/=A=2O߉<:ۭBxg嫈Jr>T/ =4Z>[=<=hiQw5=jvjq>d>h0R7Ǽ-2=6[Oc>=B{>ƴ=a>ώ>;7o=ꚧC>Ag =蹦>v'T_>6<06YarD ýG;hA9>FI$"뮖=I< =yd0==k^\پ@n֐+>1~/X>5>nZ=ܣ>r=z.j4 >lq/=Yf> >=Gþ&>=M1#e.t>0 =51ݕl"@>-(y=>nq+ׁ >~wA=09yM>a+<=w !>J<:;<(?KR7w>!lX=C="=|C$Ca=f㽴>93>=q=R I>O~>Hr>_y <5="^>a_=6>늽}_ͽxkdl=t8F;n= >=R ]l=G`>G쁾ݽ5,>7>;->FO==(==*`ݮ"5=?bŽ;`爇=4%Q>7n=W/ cxxb> >*_><]SG/V"R=b>O#>T5qӽ==x>=dQ>5\ c=*>޽8<@{ Bl=1pq>Cs=>4?D=R>Yz>eϽj#Z#=-;-}Z0콵]\ ]/.0~>>=r9»"L5>H<=/M>*f>=Y>Ru=W۽_ӽv(<6<2=>G%>! =d %FK=>n3>}mD+<# vr>=r6=+<7MfV=Jf9z>= '=>g=GH =(>Y=-c%,=mx=S= oZ y=r>sۏ;h+=ak/>>Hۄ>R$>::'ur>#@-D&W*=4>p=7Aڽ]%>g >H>f==t==!ѽ! 0xN:c=yb>StEfW=t> @>hS-Ԗ6t>>>=4zϾJo+=}. =a9>|*=Eu=]#>u=Mm;rbBŅ>Z?; Dx)J#=J>* \>$膾 x=b1>ǧwӽp~[*>Y{l{>3/7=Y>N@(C[<`(]ѻ =_H=tqYI+E<>=eėi=ؾ޼Xjr1:R*=tޏ<,C|z=Roa{04==k8 2>V@ $,=3=]o+=ñz >KI>Խ3==ȇ{=\=ȀIbd9>,1U<> '>mKL5L==(dx=L0dB#Z>S >Ǡ=k"=v|56>&>K 8ftE=u\TFb>U\hE>)g8iC?XF:X1[a[:!'s=H<,=.8=댽=f<}Vx(=<?>>6C=۰|.+<*B==J,w>u<>W=%Z>X=xaT<>Z F9 .b#r=VlR>/>"]=8><ǽ@@>`<^=/>ƼVX=Q>a 7=H=ԽsHI$;Z<%a=/=K;L3'@Ѽg! >,lcE<4&͉I>+j=Mi<ߓ0?O44>a=k=ܭC=Ƚu'R=b <`~=^*"`08ռѽ08=I̻2(<*=~@e<h!="=|J >hQK½O}Y=t#+¼2=AK,.;6sܘ=5/d>I><`eA*=ZœDCy :$<

bWaν<~c4S˷( >&g0OuPP=}=8>r0=T;>z+ja߽=MF=ٞ;K췽ʻ=u>=ѽ=\;uh<3N<{=QNNy]>ԽU; <44q?蕇==`Ӿ;0 >AHcL'NgG2>8_=92rc>ٯ{4>gg=L>= <ƶ==Qv=a< ߟi>^>eL>v-Z|AC<ó>m1f=,>><%G>N<ɖ=nL>%]d<#.=)>ңڼ>>>dH=_W=jKWG=>)jtrC>.=>,$>J>s s<ĵK<=';-~ý=c=H>z'=8i)=vw>¡(|齀==΁ Q)gF)>m]׉hj>?;IL]ZeMػ=om[N=Cw=AJ}$>> F:ӛ;Er<8F;lnş_|="==߭< ?W=:ns/==B=r=;)>XI=mTz>Ej>Feú}a>=?5=n .$`_>>սDp<xbY)>HM۾'=DP2/I>![E=G.ɼ8>F=Z>[[tT>$<{^1[+>gM =E5=ӽQt>0>ON=i[DO^~>5T|\Yf>5>ּӼX>p[=2d>X>5s<4}d<.w2=}P>V'>NUPD UÆpD>1>9=hτ>r=5.e=M&i=DvH>_=M[<ጼj&>Ty=/6N!lL<'>>"LQ><ʽ$C>*=qI\78ۼ(;Q$6O7_=nu ^=ȅ>h>6J;>p*!4 >=j=6=Dك ͼ'p!DB}\>P׽P4><\=Wxϊ4J>{HtJ7;Ex3dNdT8>"<>=\=C%w=ェ/>Rν >`S=č =],}= >Yy>5༆V=f >Xg=OB>q>; %;i b8>\>b"==X;0u;C(cO<̽>1{B>3 Y >3פ>7v=lͼj_= G<]LS; wD>blUm!>ܰ+> 7# >OJX߽qi>1<^1==):8;D> >=s==rP%ɨ=#!Z Ɨ>OAF>xf@=!.=,Ɛ<=>F׀=Ղ=*;z->lܬ]{=(L3 3G>ٵ(Ep=Δ=h>}a>l=/f9 R@>T%\랼Y>Q׽`>C>O=(׼'>g8Js}=*>A=\ X>°=џ3>.<ݦ=k1;e >%"=BIO=FRݾm$^=˪=djuAӹ̐ݽ{=3Q> tpG01='dGeᾒ^ xN:s<ʕ}H-'5>Ζ"mr|aTS0>=8ؼ+I=O˾uG'>%>|1 <2-|$yn fA#skD>Sn;r>gĽR@B>0E24>0(w>̫/6l>ZEn=W= ql===~@7 zn>T=WDh|=Ͻui@ =3 *c=W%yx|`)=:<=>ӿ:3Լϛ5=,/;.<x=/= 8,=b)>'=dLr->,=|1^|>q=+(G>LP;C{AXȼ2[:Fȗ=,>X>b=f(f >fp-># = <o|>BMM<}>pv>r>a=nC)B~<=BCS>>=iv[M:=a85=_]>^0=K<:__>;9۬- >Զ+K>1UUқr!>(쟾)>`>u7A:>I.׸\o<~=F =5\f`>sN= <;{ҽGj=͕z$=.==χ=oK`h^Q>R;=$7O<>=JP{>lX羫d>>-Н=ҽRsEiO+H>OfAU\El=Eif<(=§]=y>9Պ=h0>$>UP>|>!MZ,>v<=>o콘2A>4艸V湷=\i>V]`eܱŻ*<8<><μg>e=;ݩ=q=S(9Cj2/ƽ=3>4/>m>T3=hNSM7+<>n߾8.%=G>֕Ue=Ne#VX6>Bȵ(=0uUNǾd=-ރ1<0a[>iđ\=i1i=Yս~ H…4a>D1 =$GҞ=|*~ =ǟ>ܙ;j=E0;k SFP"={@>A=C>G1V>\}tǽ=h1=# [>n ;=s"%X=u P݉EW>TY5;.!v=޿/<췛ê>>g>%f]aU'=o2\>B>mϽh==|Q<6r~LW=kҼv>=Ўǻj>tO4>d=AN= 5)f>KE_>NL̽U<ѝ0p'[}<^tGh>5=Z >喥[=4F>> L>|Q=9N<ờ+>a>i%W=/96>) >z= 2k^S=UUFe;Jnxv{s G>=jý7x>;C=,><=;w>(cl5 >9o>M;>ײUA]>ҽEoU?W=#>鰒>i:'{7g^ν%K;  Jn ݽ=AK=L=W=~}ݥ>7?=f>e$61!">g.10ʶ9= =}t=3>ˬ=~,>a20=@z&>=Pdi=/x=&=O=9K}[e$>3<߾q >=~8H=Z=aq=Lࢽ^`=HUuZ;Ʒm,2$/֯=| =U$[>n&=ɾ>4Y >*OY>=בr=o>)hq=,e>(W=v_iP> ?{#XejH=(.2QB=4oq=K=WWcu>@8=&ԼK0>ߘ H>I>&=lt>Sٶq'=F]=Ʒ$>:ɻtz8R>UZ~MύqʼĶLS<6P./,>g[czX>'= >!jܼ2YJ0xƼ3yLP0,=Dtqw3>ߑ;ؾF;>c$&=FD>Mζ=n%[>#؝_H v>~-ew=qU,' =>bl6f&>͝ټDH>L=.9oA>=;+>QԾ=<u<[N=Azϼ->7%r>-9i<$쓚>T"#>齗Zν?av>\=~־?DqUwdx=p>=.>@ a>=<, '~b=,>+<>Ɛehcؽjz~K*!Y=&;M9M=-V=2>I>aGӰ=P[8>\=F= +Խ_J= >/ 0>⤿;4;9&=ˑ 6M >Ӄ=!=[.7>Y';=s8pȚ;_ љ4 (̵KVwm2<]>74:`<ept=lp;q>@%غl8=c2="kc潾>);Y7¾<ő==鬘RjA>=-6ּC=>Τݼ{d==yT>>= mlOԾȇm=6]<)k 66.==r>D>ŽVe<=_˽ D>NCa>D=%Ƚi=;3ؚ##m >>Q>+>e=FbA=0}=wOu,>\qK=~>JИ%O>=]41>Sҽ [>X@a>)2=qν>r=ILk*0>ܾ\=譾$c*:,b ک=>Mm1o9eG$>,p=p"f ^R>k =U>Q˽'Z>2 ==cn=^=>"O龩<;ofܾ]<6³>r,ċ> [`B <@l=\=>t=ѲYi<=>ۑo'x>J>ȅ=}&>k>g!=4>a;u;X;)KA(e=b'O9@;Q`7+'=Z >^m&>0^ې>:vb~r<{>ώ >V "3HL=u<#n>@=#=ՌT@5=]=>ֲ ('=:V2=JZ>'$Jpi=LCl><$CTU;ٽw>I.ͽoƽ5˽}>ݼ4>.>񠍾Y<>oJr2:j@Q@\T=<_4M8Ob> >1%y>|==0=~?X=0uʼ4)Y>O:0>Xx=۽ Ujݽ:>شN:j=X>~=l^E)Y>2]]=Ċh>'(]0==<,>^zns> p˝?1ؽ >Lټ<>toHM==2>==[=ɽ =k>]>= /=!ko> 2ԄJ^>>"G6R=8a]y$=! Ec=F`>޼ >O>t6>ynT>%E=,>=~=J G*>' ae=$=>)>g+x>'@x=puh EPP> >kbnԸ<ݚ3>Vʾ=D2PS>?!>M^ x~>TbZ2D]]9㢽HоN;Ҿ x۲D%8>)>R>HՇ>>>)&A꽲`mC]ݽG$"!{T#:0%!,>΁뽓11>\~H>#>>4c՟> M==H뮾>l=P>"U>ڽX _Nv:`=9 E 07= >@>곱>>7!=q$>:F`=޾<,qļQ>,>"8= >^b>>J=[j=ȍ>U> >@J>~/C:ѾY>#<#>Cּ3wWC>:J=-@>\=>tF=|3>-.=I>"ѿ=>v>+>?(>>eK=F<{g>ZTK ֵGpU&琾0Խ>_2= >j=4=S=ON.!ӽB=1K 0>pB>׽ .>[=4>=ؘ'Y㽉;>2>6>!˽#=s{ܫU<*s>᡾(H>ihýW󞾏%=c>(d5>GV=(ǽm˺ ӹCgb=ܳH7.=}V=P T=jc}>9=@x=] Y̾E)A>hT;KAV>ѽ(g(e:of)=9>x>8=%x[z0=8zw./1>Id=Q*2> 3>+BX nFLAS<>|~>XPmid== !>4[9G+>>H=n>bg%>gѽ" =9;7Xh;E<3Ͷi>퀸t>S0C->NͽUH(6Y40=>ds>`,q=O=/>d D3>a81>Wi=%j>>2h>#B +Dc>{h>!MiD=H&>c]N>>[>nN>J>#(>s5=B>>$>]#BOA>3Wu\=1"<{_[ )=2'>8qp?>Ekϣ*m} 2x< i=7A=&%ʽ>=[<_ݪT>W-=s r3=R=5<(i>&;Q->0a>+>Ai>*2lJI&X0= >y<{]2>뽐~s}2 >#¾!CApQ=x3>Ĺ=l>4>sPnK>T> ^R==u'˾P>q= V>p>3e>&U X>C*y>MvT=~w>'/\>dEB0->x(',׾|:a>=U=rwB =pٮxQ>1>qa6 M߹=ʤ?=nq=Ƅc?Ban.p6<>;;`N14H=\JͽdU=="hؽ=b=А= )%>=۟=/2+9h<r,><>yЍv>?fu>>y>t=?>ץ=ֆS)<3>>Y u>|;>0=+S>y#Ka>8r3JF5>*Jve\=ՇRxR>[=!L0>H7V>A<)=E4>,>[.ߺ쾌<="$=4&e<̾ǯ<Y+rK䊵:Y 'qβ>[?믩<銒=AA>#>9>ι=rY; Clr>I&3>kB >z> յw/>FpW=TzZ=<>0S9>r>i2>7=.;^ړ >?Op zx:>!=a436=HvdN=fTA裀=F= gbӽ>Ӂ=>~>=X=?=>ބ R=P>ټݮ@>K=:=4`n>}e>M }tmW?`L<>m>KD6@!.{ tdϟ=`y>C>_=U>v޽=>̶<ձo">9w=(> =`׼Ė>#˚㱽 =N=f=NX>F@_=^.eeS>xT>W><8~мSIA>YC>@BGN>4{>bE5>ѪW,ž!<\>;>ii>#Ez;>X>3_>MA>$,aY=>6^,稽YиtLC>P{w;>%?l i ==Bgp퐼|d>ة=7< L>c>n}>UB뵽 J>< =Aw;]x=W=>~"e= A&4>P=>vhܱ=L=ؿ*'ƽyg>彎P&!o#<h<5=Y=(U=ې=FpV=#/ >iQ<j&׽ >|<? 콊8="31>}$GS>,>y6>~J=i>=䫱6M36>lo>=="=sbT=N|>ǫ<=8I>}ɽ+vBj=Y@*ZP=d;=e+e<(>Q=fXV>y_kw> `BA==~Ne@>Wڷ=k>'>)=kR>ᕊ:|=scikit-bio-0.6.2/skbio/embedding/tests/test_embedding.py000066400000000000000000000230651464262511300233040ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main import numpy as np import numpy.testing as npt import pandas as pd from scipy.spatial.distance import euclidean from skbio.sequence import DNA, Protein from skbio import DistanceMatrix, OrdinationResults from skbio.embedding._protein import ProteinVector from skbio.embedding._embedding import ( Embedding, SequenceEmbedding, SequenceVector, embed_vec_to_numpy, embed_vec_to_dataframe, embed_vec_to_distances, embed_vec_to_ordination ) class EmbeddingTests(TestCase): def setUp(self): self.emb = np.random.randn(62, 10) self.seq = "IGKEEIQQRLAQFVDHWKELKQLAAARGQRLEESLEYQQFVANVEEEEAWINEKMTLVASED" def test_id(self): emb, s = self.emb, self.seq p_emb = Embedding(emb, list(s)) npt.assert_array_equal(p_emb.ids, np.array(list(s))) def test_embedding(self): emb, s = self.emb, self.seq p_emb = Embedding(emb, s) self.assertTupleEqual(p_emb.embedding.shape, (62, 10)) def test_str(self): with self.assertRaises(NotImplementedError): Embedding(self.emb, self.seq).__str__() def test_assert_length(self): msg = "The embedding (62) must have the same length as the ids (63)." with self.assertRaises(ValueError) as cm: Embedding(self.emb, self.seq + "A") self.assertEqual(str(cm.exception), msg) class SequenceEmbeddingTests(TestCase): def setUp(self): self.emb = np.random.randn(62, 10) self.seq = "IGKEEIQQRLAQFVDHWKELKQLAAARGQRLEESLEYQQFVANVEEEEAWINEKMTLVASED" def test_repr(self): emb, s = self.emb, self.seq p_emb = SequenceEmbedding(emb, s) rstr = repr(p_emb) self.assertIn("SequenceEmbedding", rstr) self.assertIn("62", rstr) self.assertIn("10", rstr) self.assertIn("IGKEEIQQRL", rstr) def test_str(self): emb, s = self.emb, self.seq p_emb = SequenceEmbedding(emb, s) self.assertEqual(p_emb.__str__(), s) self.assertEqual(p_emb.sequence, s) self.assertEqual(str(p_emb.ids.tobytes().decode("ascii")), s) def test_bytes(self): emb, s = self.emb, self.seq p_emb = SequenceEmbedding(emb, s) res = p_emb.bytes() res_str = str(res.tobytes().decode("ascii")) self.assertEqual(res_str, s) def test_init(self): emb, s = self.emb, self.seq # sequence as string p_emb = SequenceEmbedding(emb, s) self.assertTupleEqual(p_emb.embedding.shape, (62, 10)) # sequence as bytes p_emb = SequenceEmbedding(emb, s.encode("ascii")) self.assertTupleEqual(p_emb.embedding.shape, (62, 10)) # sequence as skbio.Sequence p_emb = SequenceEmbedding(emb, Protein(s)) self.assertTupleEqual(p_emb.embedding.shape, (62, 10)) def test_assert_length(self): msg = "The embedding (62) must have the same length as the ids (63)." with self.assertRaises(ValueError) as cm: SequenceEmbedding(self.emb, self.seq + "A") self.assertEqual(str(cm.exception), msg) class SequenceVectorTests(TestCase): def setUp(self): # Create some sample SequenceVector objects for testing self.vector1 = np.array([1, 2, 3]) self.vector2 = np.array([4, 5, 6]) self.vector3 = np.array([7, 8, 9]) self.bad_vector = np.array([7, 8]) self.seq_vectors = [ SequenceVector(self.vector1, "ACGT"), SequenceVector(self.vector2, "GCTA"), SequenceVector(self.vector3, "TTAG") ] def test_init(self): vec = np.array([1, 2, 3]) seq = "ACGT" # sequence as string obs = SequenceVector(vec, seq) npt.assert_array_equal(obs.vector, vec) npt.assert_array_equal(obs.embedding, vec.reshape(1, -1)) npt.assert_array_equal(obs.ids, np.array([b"ACGT"])) # sequence as bytes obs = SequenceVector(vec, seq.encode("ascii")) npt.assert_array_equal(obs.vector, vec) # sequence as skbio.Sequence obs = SequenceVector(vec, DNA(seq)) npt.assert_array_equal(obs.vector, vec) # input is a matrix, not a vector vec2d = np.vstack([vec, vec]) msg = "Only one vector per sequence is allowed." with self.assertRaisesRegex(ValueError, msg): SequenceVector(vec2d, seq) def test_vector(self): # Test if the vector attribute is set correctly for i, vector in enumerate([self.vector1, self.vector2, self.vector3]): npt.assert_array_equal(self.seq_vectors[i].vector, vector) def test_sequence(self): # Test if the sequence attribute is set correctly for i, sequence in enumerate(["ACGT", "GCTA", "TTAG"]): self.assertEqual(self.seq_vectors[i].sequence, sequence) def test_repr(self): # Test if the __repr__ method returns the correct string for seq_vector in self.seq_vectors: self.assertTrue(seq_vector.__repr__().startswith("SequenceVector")) self.assertIn("vector", seq_vector.__repr__()) # check latent dimension self.assertIn("4", seq_vector.__repr__()) def test_str(self): # Test if the __str__ method returns the correct string for seq_vector in self.seq_vectors: self.assertEqual(str(seq_vector), seq_vector.sequence) class EmbedVecUtilityTests(TestCase): def setUp(self): self.vector1 = np.array([1, 2, 3]) self.vector2 = np.array([4, 5, 6]) self.vector3 = np.array([7, 8, 9]) self.bad_vector = np.array([7, 8]) self.seq_vectors = [ SequenceVector(self.vector1, "ACGT"), SequenceVector(self.vector2, "GCTA"), SequenceVector(self.vector3, "TTAG") ] def test_embed_vec_to_numpy(self): # Test if to_numpy returns the correct numpy array exp = np.array([self.vector1, self.vector2, self.vector3]) obs = embed_vec_to_numpy(self.seq_vectors) npt.assert_array_equal(obs, exp) # skip validation obs = embed_vec_to_numpy(self.seq_vectors, validate=False) npt.assert_array_equal(obs, exp) def test_embed_vec_to_numpy_raises(self): # input contains non-vector lst = [SequenceVector(self.vector1, "ACGT"), SequenceEmbedding(np.vstack([self.vector2, self.vector3]), "AT")] msg = "Input iterable contains objects that do not subclass EmbeddingVector." with self.assertRaisesRegex(ValueError, msg): embed_vec_to_numpy(lst) # mixed sequence types lst = [SequenceVector(self.vector1, "ACGT"), ProteinVector(self.vector2, "MKRPL")] msg = "All objects must be of the same type." with self.assertRaisesRegex(ValueError, msg): embed_vec_to_numpy(lst) # lengths are not equal lst = [SequenceVector(self.vector1, "ACGT"), SequenceVector(self.vector2, "GCTA"), SequenceVector(self.bad_vector, "TTAG")] msg = "All vectors must have the same length." with self.assertRaisesRegex(ValueError, msg): embed_vec_to_numpy(lst) def test_embed_vec_to_distances(self): # Test if to_distances returns a DistanceMatrix object obs = embed_vec_to_distances(self.seq_vectors) self.assertIsInstance(obs, DistanceMatrix) self.assertTupleEqual(obs.shape, (3, 3)) self.assertTrue(all(isinstance(d, float) for d in obs.condensed_form())) d12 = euclidean(self.vector1, self.vector2) d13 = euclidean(self.vector1, self.vector3) d23 = euclidean(self.vector2, self.vector3) exp = DistanceMatrix([[0, d12, d13], [d12, 0, d23], [d13, d23, 0]], ids=["ACGT", "GCTA", "TTAG"]) npt.assert_allclose(obs.data, exp.data) self.assertEqual(obs.ids, exp.ids) obs = embed_vec_to_distances(self.seq_vectors, validate=False) self.assertIsInstance(obs, DistanceMatrix) def test_embed_vec_to_ordination(self): # Test if to_ordination returns an OrdinationResults object obs = embed_vec_to_ordination(self.seq_vectors) self.assertIsInstance(obs, OrdinationResults) self.assertEqual(obs.samples.shape, (3, 3)) self.assertEqual(obs.features.shape, (3, 3)) reconstructed = (obs.samples.values @ obs.features.values.T) npt.assert_allclose( reconstructed, embed_vec_to_numpy(self.seq_vectors) ) obs = embed_vec_to_ordination(self.seq_vectors, validate=False) self.assertIsInstance(obs, OrdinationResults) def test_embed_vec_to_dataframe(self): # Test if to_dataframe returns a pandas DataFrame object obs = embed_vec_to_dataframe(self.seq_vectors) self.assertIsInstance(obs, pd.DataFrame) self.assertTupleEqual(obs.shape, (3, 3)) exp = pd.DataFrame([self.vector1, self.vector2, self.vector3], index=["ACGT", "GCTA", "TTAG"]) pd.testing.assert_frame_equal(obs, exp) obs = embed_vec_to_dataframe(self.seq_vectors, validate=False) self.assertIsInstance(obs, pd.DataFrame) if __name__ == "__main__": main() scikit-bio-0.6.2/skbio/embedding/tests/test_protein.py000066400000000000000000000134321464262511300230430ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main import numpy as np import numpy.testing as npt from skbio.util import get_data_path from skbio import Protein from skbio.embedding._embedding import SequenceVector, embed_vec_to_numpy from skbio.embedding._protein import ( ProteinEmbedding, ProteinVector ) class ProteinEmbeddingTests(TestCase): def setUp(self): self.emb = np.load(get_data_path("embed1.txt.npy")) self.seq = ("IGKEEIQQRLAQFVDHWKELKQLAAARGQRLEESLEYQ" "QFVANVEEEEAWINEKMTLVASED") self.invalid_seq = ( "$GKEEIQQRLAQFVDHWKELKQLAAARGQRLEESLEYQ" "QFVANVEEEEAWINEKMTLVASED") def test_clipping(self): emb, s = self.emb, self.seq nemb = np.zeros((emb.shape[0] + 2, emb.shape[1])) nemb[1:-1] = emb p2_emb = ProteinEmbedding(nemb, s, clip_head=True, clip_tail=True) npt.assert_array_equal(p2_emb.embedding, emb) self.assertEqual(p2_emb.sequence, s) def test_str(self): emb, s = self.emb, self.seq p_emb = ProteinEmbedding(emb, s) self.assertEqual(str(p_emb), s) self.assertEqual(p_emb.sequence, s) byte_s = np.array([b"I", b"G", b"K", b"E", b"E", b"I", b"Q", b"Q", b"R", b"L", b"A", b"Q", b"F", b"V", b"D", b"H", b"W", b"K", b"E", b"L", b"K", b"Q", b"L", b"A", b"A", b"A", b"R", b"G", b"Q", b"R", b"L", b"E", b"E", b"S", b"L", b"E", b"Y", b"Q", b"Q", b"F", b"V", b"A", b"N", b"V", b"E", b"E", b"E", b"E", b"A", b"W", b"I", b"N", b"E", b"K", b"M", b"T", b"L", b"V", b"A", b"S", b"E", b"D"], dtype="|S1") npt.assert_array_equal(p_emb.residues, byte_s) self.assertEqual(str(p_emb.ids.tobytes().decode("ascii")), s) def test_skbio_protein(self): emb, s = self.emb, self.seq p_emb = ProteinEmbedding(emb, Protein(s)) self.assertEqual(str(p_emb), s) self.assertEqual(p_emb.sequence, s) def test_str_spaces(self): seq = ("I G K E E I Q Q R L A Q F V D H W K E L K Q L A " "A A R G Q R L E E S L E Y Q Q F V A N V E E E E " "A W I N E K M T L V A S E D") p_emb = ProteinEmbedding(self.emb, seq) self.assertEqual(str(p_emb), self.seq) self.assertEqual(p_emb.sequence, self.seq) def test_embedding(self): emb, s = self.emb, self.seq p_emb = ProteinEmbedding(emb, s) self.assertTupleEqual(p_emb.embedding.shape, (62, 1024)) def test_assert_length(self): with self.assertRaises(ValueError): ProteinEmbedding(self.emb, self.seq + "A") def test_invalid_sequence(self): emb, s = self.emb, self.invalid_seq with self.assertRaises(ValueError): ProteinEmbedding(emb, s) def test_repr(self): emb, s = self.emb, self.seq p_emb = ProteinEmbedding(emb, s) self.assertIn("ProteinEmbedding", repr(p_emb)) class ProteinVectorTests(TestCase): def setUp(self): rk = 10 self.emb = np.random.randn(rk) self.seq = Protein( "IGKEEIQQRLAQFVDHWKELKQLAAARGQRLEESLEYQQFVANVEEEEAWINEKMTLVASED", metadata={"id": "seq1"} ) self.vector1 = np.array([1, 2, 3]) self.vector2 = np.array([4, 5, 6]) self.vector3 = np.array([7, 8, 9]) self.bad_vector = np.array([7, 8]) self.bad_vector2 = np.array([[7, 8], [7, 9]]) self.protein_vectors = [ProteinVector(self.vector1, "IGKE"), ProteinVector(self.vector2, "EIQQ"), ProteinVector(self.vector3, "RLAQ")] def test_valid_protein_vector(self): ProteinVector(self.emb, self.seq) ProteinVector(self.emb, str(self.seq)) ProteinVector(self.emb, str(self.seq).encode("ascii")) def test_invalid_protein_vector(self): seq = "$GKEEIQQRLAQFVDHWKELKQLAAARGQRLEESLEYQQFVANVEEEEAWINEKMTLVASED^^" with self.assertRaises(ValueError): ProteinVector(self.emb, seq) with self.assertRaises(ValueError): ProteinVector(self.bad_vector2, seq) def test_invalid_vector_shape(self): msg = "Only one vector per sequence is allowed." with self.assertRaisesRegex(ValueError, msg): ProteinVector(np.vstack([self.emb, self.emb]), self.seq) def test_repr(self): pv = ProteinVector(self.emb, self.seq) self.assertIn("ProteinVector", repr(pv)) self.assertIn("vector dimension", repr(pv)) def test_to_numpy(self): # confirm that Protein objects can be casted to numpy exp = np.array([self.vector1, self.vector2, self.vector3]) obs = embed_vec_to_numpy(self.protein_vectors) npt.assert_array_equal(obs, exp) def test_to_numpy_raises(self): # assert that all types are the same lst = [ProteinVector(self.vector1, "IGKE"), SequenceVector(self.vector2, "EIQQ"), SequenceVector(self.bad_vector, "RLAQ")] with self.assertRaises(ValueError): embed_vec_to_numpy(lst) # assert that all objects subclass EmbeddingVector lst = [Protein("IGKE"), Protein("EIQQ"), Protein("RLAQ")] with self.assertRaises(ValueError): embed_vec_to_numpy(lst) if __name__ == "__main__": main() scikit-bio-0.6.2/skbio/io/000077500000000000000000000000001464262511300152765ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/io/__init__.py000066400000000000000000000206271464262511300174160ustar00rootroot00000000000000r"""Input and Output (:mod:`skbio.io`) ================================== .. currentmodule:: skbio.io This module provides input/output (I/O) functionality for scikit-bio. Supported file formats ---------------------- scikit-bio provides parsers for the following file formats. For details on what objects are supported by each format, see the associated documentation. .. currentmodule:: skbio.io.format .. autosummary:: :toctree: generated/ binary_dm biom blast6 blast7 clustal embl embed fasta fastq genbank gff3 lsmat newick ordination phylip qseq stockholm taxdump sample_metadata Read/write files ---------------- .. rubric:: Generic I/O functions .. currentmodule:: skbio.io.registry .. autosummary:: :toctree: generated/ write read sniff .. rubric:: Additional I/O utilities .. currentmodule:: skbio.io .. autosummary:: :toctree: generated/ util Develop custom formats ---------------------- .. rubric:: Developer documentation on extending I/O .. autosummary:: :toctree: generated/ registry Exceptions and warnings ^^^^^^^^^^^^^^^^^^^^^^^ .. currentmodule:: skbio.io .. rubric:: General exceptions and warnings .. autosummary:: FormatIdentificationWarning ArgumentOverrideWarning UnrecognizedFormatError IOSourceError FileFormatError .. rubric:: Format-specific exceptions and warnings .. autosummary:: BLAST7FormatError ClustalFormatError EMBLFormatError FASTAFormatError FASTQFormatError GenBankFormatError GFF3FormatError LSMatFormatError NewickFormatError OrdinationFormatError PhylipFormatError QSeqFormatError QUALFormatError StockholmFormatError Tutorial -------- Reading and writing files (I/O) can be a complicated task: * A file format can sometimes be read into more than one in-memory representation (i.e., object). For example, a FASTA file can be read into an :class:`skbio.alignment.TabularMSA` or :class:`skbio.sequence.DNA` depending on what operations you'd like to perform on your data. * A single object might be writeable to more than one file format. For example, an :class:`skbio.alignment.TabularMSA` object could be written to FASTA, FASTQ, CLUSTAL, or PHYLIP formats, just to name a few. * You might not know the exact file format of your file, but you want to read it into an appropriate object. * You might want to read multiple files into a single object, or write an object to multiple files. * Instead of reading a file into an object, you might want to stream the file using a generator (e.g., if the file cannot be fully loaded into memory). To address these issues (and others), scikit-bio provides a simple, powerful interface for dealing with I/O. We accomplish this by using a single I/O registry. What kinds of files scikit-bio can use ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ To see a complete list of file-like inputs that can be used for reading, writing, and sniffing, see the documentation for :func:`skbio.io.util.open`. Reading files into scikit-bio ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ There are two ways to read files. The first way is to use the procedural interface: .. code-block:: python my_obj = skbio.io.read(file, format='someformat', into=SomeSkbioClass) The second is to use the object-oriented (OO) interface which is automatically constructed from the procedural interface: .. code-block:: python my_obj = SomeSkbioClass.read(file, format='someformat') For example, to read a ``newick`` file using both interfaces you would type: >>> from skbio import read >>> from skbio import TreeNode >>> from io import StringIO >>> open_filehandle = StringIO('(a, b);') >>> tree = read(open_filehandle, format='newick', into=TreeNode) >>> tree For the OO interface: >>> open_filehandle = StringIO('(a, b);') >>> tree = TreeNode.read(open_filehandle, format='newick') >>> tree In the case of :func:`skbio.io.registry.read` if ``into`` is not provided, then a generator will be returned. What the generator yields will depend on what format is being read. When ``into`` is provided, format may be omitted and the registry will use its knowledge of the available formats for the requested class to infer the correct format. This format inference is also available in the OO interface, meaning that ``format`` may be omitted there as well. As an example: >>> open_filehandle = StringIO('(a, b);') >>> tree = TreeNode.read(open_filehandle) >>> tree We call format inference **sniffing**, much like the :class:`csv.Sniffer` class of Python's standard library. The goal of a ``sniffer`` is two-fold: to identify if a file is a specific format, and if it is, to provide ``**kwargs`` which can be used to better parse the file. .. note:: There is a built-in ``sniffer`` which results in a useful error message if an empty file is provided as input and the format was omitted. Writing files from scikit-bio ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Just as when reading files, there are two ways to write files. Procedural Interface: .. code-block:: python skbio.io.write(my_obj, format='someformat', into=file) OO Interface: .. code-block:: python my_obj.write(file, format='someformat') In the procedural interface, ``format`` is required. Without it, scikit-bio does not know how you want to serialize an object. OO interfaces define a default ``format``, so it may not be necessary to include it. """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from importlib import import_module from ._warning import FormatIdentificationWarning, ArgumentOverrideWarning from ._exception import ( UnrecognizedFormatError, FileFormatError, BLAST7FormatError, ClustalFormatError, FASTAFormatError, GenBankFormatError, IOSourceError, FASTQFormatError, LSMatFormatError, NewickFormatError, OrdinationFormatError, PhylipFormatError, QSeqFormatError, QUALFormatError, StockholmFormatError, GFF3FormatError, EMBLFormatError, BIOMFormatError, EmbedFormatError, ) from .registry import write, read, sniff, create_format, io_registry from .util import open __all__ = [ "write", "read", "sniff", "open", "io_registry", "create_format", "FormatIdentificationWarning", "ArgumentOverrideWarning", "UnrecognizedFormatError", "IOSourceError", "FileFormatError", "BLAST7FormatError", "ClustalFormatError", "EMBLFormatError", "FASTAFormatError", "FASTQFormatError", "GenBankFormatError", "GFF3FormatError", "LSMatFormatError", "NewickFormatError", "OrdinationFormatError", "PhylipFormatError", "QSeqFormatError", "QUALFormatError", "StockholmFormatError", "BIOMFormatError", "EmbedFormatError", ] # Necessary to import each file format module to have them added to the I/O # registry. We use import_module instead of a typical import to avoid flake8 # unused import errors. import_module("skbio.io.format.blast6") import_module("skbio.io.format.blast7") import_module("skbio.io.format.clustal") import_module("skbio.io.format.embl") import_module("skbio.io.format.fasta") import_module("skbio.io.format.fastq") import_module("skbio.io.format.lsmat") import_module("skbio.io.format.newick") import_module("skbio.io.format.ordination") import_module("skbio.io.format.phylip") import_module("skbio.io.format.qseq") import_module("skbio.io.format.genbank") import_module("skbio.io.format.gff3") import_module("skbio.io.format.stockholm") import_module("skbio.io.format.binary_dm") import_module("skbio.io.format.taxdump") import_module("skbio.io.format.sample_metadata") import_module("skbio.io.format.biom") import_module("skbio.io.format.embed") # This is meant to be a handy indicator to the user that they have done # something wrong. import_module("skbio.io.format.emptyfile") # Now that all of our I/O has loaded, we can add the object oriented methods # (read and write) to each class which has registered I/O operations. io_registry.monkey_patch() scikit-bio-0.6.2/skbio/io/_exception.py000066400000000000000000000056631464262511300200170ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- class IOSourceError(Exception): """Raised when a file source cannot be resolved.""" pass class FileFormatError(Exception): """Raised when a file cannot be parsed.""" pass class UnrecognizedFormatError(FileFormatError): """Raised when a file's format is unknown, ambiguous, or unidentifiable.""" pass class GenBankFormatError(FileFormatError): """Raised when a ``genbank`` formatted file cannot be parsed.""" pass class EMBLFormatError(FileFormatError): """Raised when a ``EMBL`` formatted file cannot be parsed.""" pass class GFF3FormatError(FileFormatError): """Raised when a ``GFF3`` formatted file cannot be parsed.""" pass class BLAST7FormatError(FileFormatError): """Raised when a ``blast7`` formatted file cannot be parsed.""" pass class ClustalFormatError(FileFormatError): """Raised when a ``clustal`` formatted file cannot be parsed.""" pass class FASTAFormatError(FileFormatError): """Raised when a ``fasta`` formatted file cannot be parsed.""" pass class QUALFormatError(FASTAFormatError): """Raised when a ``qual`` formatted file cannot be parsed.""" pass class LSMatFormatError(FileFormatError): """Raised when a ``lsmat`` formatted file cannot be parsed.""" pass class OrdinationFormatError(FileFormatError): """Raised when an ``ordination`` formatted file cannot be parsed.""" pass class NewickFormatError(FileFormatError): """Raised when a ``newick`` formatted file cannot be parsed.""" pass class FASTQFormatError(FileFormatError): """Raised when a ``fastq`` formatted file cannot be parsed.""" pass class PhylipFormatError(FileFormatError): """Raised when a ``phylip`` formatted file cannot be parsed. May also be raised when an object (e.g., ``TabularMSA``) cannot be written in ``phylip`` format. """ pass class QSeqFormatError(FileFormatError): """Raised when a ``qseq`` formatted file cannot be parsed.""" pass class StockholmFormatError(FileFormatError): """Raised when a ``stockholm`` formatted file cannot be parsed.""" pass class InvalidRegistrationError(Exception): """Raised if function doesn't meet the expected API of its registration.""" pass class DuplicateRegistrationError(Exception): """Raised when a function is already registered in skbio.io.""" pass class BIOMFormatError(Exception): """Raised when a ``BIOM`` formatted file cannot be parsed.""" pass class EmbedFormatError(Exception): """Raised when an ``Embedding`` formatted file cannot be parsed.""" pass scikit-bio-0.6.2/skbio/io/_fileobject.py000066400000000000000000000050161464262511300201170ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io import h5py def is_binary_file(file): return isinstance( file, (io.BufferedReader, io.BufferedWriter, io.BufferedRandom, h5py.Group) ) # Everything beyond this point will be some kind of hack needed to make # everything work. It's not pretty and it doesn't make great sense much # of the time. I am very sorry to the poor soul who has to read beyond. class FlushDestructorMixin: def __del__(self): # By default, the destructor calls close(), which flushes and closes # the underlying buffer. Override to only flush. if not self.closed: self.flush() class SaneTextIOWrapper(FlushDestructorMixin, io.TextIOWrapper): pass class WrappedBufferedRandom(FlushDestructorMixin, io.BufferedRandom): pass class CompressedMixin(FlushDestructorMixin): """Act as a bridge between worlds.""" def __init__(self, before_file, *args, **kwargs): self.streamable = kwargs.pop("streamable", True) self._before_file = before_file super(CompressedMixin, self).__init__(*args, **kwargs) @property def closed(self): return self.raw.closed or self._before_file.closed def close(self): super(CompressedMixin, self).close() # The above will not usually close before_file. We want the # decompression to be transparent, so we don't want users to deal with # this edge case. Instead we can just close the original now that we # are being closed. self._before_file.close() class CompressedBufferedReader(CompressedMixin, io.BufferedReader): pass class CompressedBufferedWriter(CompressedMixin, io.BufferedWriter): pass class IterableStringReaderIO(io.StringIO): def __init__(self, iterable, newline): self._iterable = iterable super(IterableStringReaderIO, self).__init__("".join(iterable), newline=newline) class IterableStringWriterIO(IterableStringReaderIO): def close(self): if not self.closed: backup = self.tell() self.seek(0) for line in self: self._iterable.append(line) self.seek(backup) super(IterableStringWriterIO, self).close() scikit-bio-0.6.2/skbio/io/_iosources.py000066400000000000000000000150011464262511300200170ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io import gzip import bz2 import tempfile import itertools import requests from skbio.io import IOSourceError from ._fileobject import ( IterableStringWriterIO, IterableStringReaderIO, WrappedBufferedRandom, ) # NamedTemporaryFile isn't an actual file class, it is a function which # returns _TemporaryFileWrapper around a normal file object. Instead of # relying on this implementation, we take whatever the class of the result of # NamedTemporaryFile is. with tempfile.NamedTemporaryFile() as fh: _WrappedTemporaryFile = type(fh) def get_io_sources(): return ( # The order of these source is significant as they will short-circuit HTTPSource, FilePathSource, BytesIOSource, BufferedIOSource, TextIOSource, WrappedTemporaryFileSource, IterableSource, ) def _compressors(): return (GzipCompressor, BZ2Compressor) def get_compression_handler(name): compressors = {c.name: c for c in _compressors()} compressors["auto"] = AutoCompressor return compressors.get(name, False) class IOSource: closeable = True def __init__(self, file, options): self.file = file self.options = options def can_read(self): return False def can_write(self): return False def get_reader(self): raise NotImplementedError() def get_writer(self): raise NotImplementedError() class Compressor(IOSource): streamable = True name = "" def can_write(self): return True class FilePathSource(IOSource): def can_read(self): return isinstance(self.file, str) def can_write(self): return self.can_read() def get_reader(self): return io.open(self.file, mode="rb") def get_writer(self): return io.open(self.file, mode="wb") class HTTPSource(IOSource): def can_read(self): return isinstance(self.file, str) and requests.compat.urlparse( self.file ).scheme in {"http", "https"} def get_reader(self): req = requests.get(self.file) # if the response is not 200, an exception will be raised req.raise_for_status() return io.BufferedReader(io.BytesIO(req.content)) class BytesIOSource(IOSource): closeable = False def can_read(self): return isinstance(self.file, io.BytesIO) def can_write(self): return self.can_read() def get_reader(self): return WrappedBufferedRandom(self.file) def get_writer(self): return self.get_reader() class BufferedIOSource(IOSource): closeable = False def can_read(self): # `peek` is part of the API we want to guarantee, so we can't just look # for io.BufferedIOBase. Despite the fact that the C implementation of # io.BufferedRandom inherits io.BufferedReader/Writer it is not # reflected in an isinstance check, so we need to check for it manually return isinstance(self.file, (io.BufferedReader, io.BufferedRandom)) def can_write(self): return isinstance(self.file, (io.BufferedWriter, io.BufferedRandom)) def get_reader(self): return self.file def get_writer(self): return self.file class TextIOSource(IOSource): closeable = False def can_read(self): return isinstance(self.file, io.TextIOBase) and self.file.readable() def can_write(self): return isinstance(self.file, io.TextIOBase) and self.file.writable() def get_reader(self): return self.file def get_writer(self): return self.file class WrappedTemporaryFileSource(IOSource): closeable = False def can_read(self): return isinstance(self.file, _WrappedTemporaryFile) and self.file.readable() def can_write(self): return isinstance(self.file, _WrappedTemporaryFile) and self.file.writable() def get_reader(self): # _TemporaryFileWrapper has a file attribute which is an actual fileobj return self.file.file def get_writer(self): return self.file.file class IterableSource(IOSource): def can_read(self): if hasattr(self.file, "__iter__"): iterator = iter(self.file) head = next(iterator, None) if head is None: self.repaired = [] return True if isinstance(head, str): self.repaired = itertools.chain([head], iterator) return True else: # We may have mangled a generator at this point, so just abort raise IOSourceError( "Could not open source: %r (mode: %r)" % (self.file, self.options["mode"]) ) return False def can_write(self): return hasattr(self.file, "append") and hasattr(self.file, "__iter__") def get_reader(self): return IterableStringReaderIO(self.repaired, newline=self.options["newline"]) def get_writer(self): return IterableStringWriterIO(self.file, newline=self.options["newline"]) class GzipCompressor(Compressor): name = "gzip" streamable = True def can_read(self): return self.file.peek(2)[:2] == b"\x1f\x8b" def get_reader(self): return gzip.GzipFile(fileobj=self.file) def get_writer(self): return gzip.GzipFile( fileobj=self.file, mode="wb", compresslevel=self.options["compresslevel"] ) class BZ2Compressor(Compressor): name = "bz2" streamable = False def can_read(self): return self.file.peek(3)[:3] == b"BZh" def get_reader(self): return bz2.BZ2File(self.file, mode="rb") def get_writer(self): return bz2.BZ2File( self.file, mode="wb", compresslevel=self.options["compresslevel"] ) class AutoCompressor(Compressor): streamable = True # We can' write so it doesn't matter name = "auto" def get_reader(self): for compression_handler in _compressors(): compressor = compression_handler(self.file, self.options) if compressor.can_read(): return compressor.get_reader() return self.file def get_writer(self): return self.file scikit-bio-0.6.2/skbio/io/_warning.py000066400000000000000000000011341464262511300174530ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- class FormatIdentificationWarning(Warning): """Warn when the sniffer of a format cannot confirm the format.""" pass class ArgumentOverrideWarning(Warning): """Warn when a user provided kwarg differs from a guessed kwarg.""" pass scikit-bio-0.6.2/skbio/io/format/000077500000000000000000000000001464262511300165665ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/io/format/__init__.py000066400000000000000000000005651464262511300207050ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- # ruff: noqa: D104 scikit-bio-0.6.2/skbio/io/format/_base.py000066400000000000000000000166571464262511300202300ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import re import warnings import numpy as np from skbio.util import cardinal_to_ordinal _whitespace_regex = re.compile(r"\s") _newline_regex = re.compile(r"\n") def _decode_qual_to_phred(qual_str, variant=None, phred_offset=None): phred_offset, phred_range = _get_phred_offset_and_range( variant, phred_offset, [ "Must provide either `variant` or `phred_offset` in order to decode " "quality scores.", "Decoding Solexa quality scores is not currently supported, " "as quality scores are always stored as Phred scores in " "scikit-bio. Please see the following scikit-bio issue to " "track progress on this:\n\t" "https://github.com/scikit-bio/scikit-bio/issues/719", ], ) qual = np.frombuffer(qual_str.encode("ascii"), dtype=np.uint8) - phred_offset if np.any((qual > phred_range[1]) | (qual < phred_range[0])): raise ValueError( "Decoded Phred score is out of range [%d, %d]." % (phred_range[0], phred_range[1]) ) return qual def _encode_phred_to_qual(phred, variant=None, phred_offset=None): phred_offset, phred_range = _get_phred_offset_and_range( variant, phred_offset, [ "Must provide either `variant` or `phred_offset` in order to encode " "Phred scores.", "Encoding Solexa quality scores is not currently supported. " "Please see the following scikit-bio issue to track progress " "on this:\n\t" "https://github.com/scikit-bio/scikit-bio/issues/719", ], ) qual_chars = [] for score in phred: if score < phred_range[0]: raise ValueError( "Phred score %d is out of range [%d, %d]." % (score, phred_range[0], phred_range[1]) ) if score > phred_range[1]: warnings.warn( "Phred score %d is out of targeted range [%d, %d]. Converting " "to %d." % (score, phred_range[0], phred_range[1], phred_range[1]), UserWarning, ) score = phred_range[1] qual_chars.append(chr(score + phred_offset)) return "".join(qual_chars) def _get_phred_offset_and_range(variant, phred_offset, errors): if variant is None and phred_offset is None: raise ValueError(errors[0]) if variant is not None and phred_offset is not None: raise ValueError("Cannot provide both `variant` and `phred_offset`.") if variant is not None: if variant == "sanger": phred_offset = 33 phred_range = (0, 93) elif variant == "illumina1.3": phred_offset = 64 phred_range = (0, 62) elif variant == "illumina1.8": phred_offset = 33 phred_range = (0, 62) elif variant == "solexa": phred_offset = 64 phred_range = (-5, 62) raise ValueError(errors[1]) else: raise ValueError("Unrecognized variant %r." % variant) else: if not (33 <= phred_offset <= 126): raise ValueError( "`phred_offset` %d is out of printable ASCII character range." % phred_offset ) phred_range = (0, 126 - phred_offset) return phred_offset, phred_range def _get_nth_sequence(generator, seq_num): # i is set to None so that an empty generator will not result in an # undefined variable when compared to seq_num. i = None if seq_num is None or seq_num < 1: raise ValueError( "Invalid sequence number (`seq_num`=%s). `seq_num`" " must be between 1 and the number of sequences in" " the file." % str(seq_num) ) try: for i, seq in zip(range(1, seq_num + 1), generator): pass finally: generator.close() if i == seq_num: return seq raise ValueError( "Reached end of file before finding the %s sequence." % cardinal_to_ordinal(seq_num) ) def _parse_fasta_like_header(line): id_ = "" desc = "" header = line[1:].rstrip() if header: if header[0].isspace(): # no id desc = header.lstrip() else: header_tokens = header.split(None, 1) if len(header_tokens) == 1: # no description id_ = header_tokens[0] else: id_, desc = header_tokens return id_, desc def _format_fasta_like_records( generator, id_whitespace_replacement, description_newline_replacement, require_qual, lowercase=None, ): if ( id_whitespace_replacement is not None and "\n" in id_whitespace_replacement ) or ( description_newline_replacement is not None and "\n" in description_newline_replacement ): raise ValueError( "Newline character (\\n) cannot be used to replace whitespace in " "sequence IDs, nor to replace newlines in sequence descriptions." ) for idx, seq in enumerate(generator): if len(seq) < 1: raise ValueError( "%s sequence does not contain any characters (i.e., it is an " "empty/blank sequence). Writing empty sequences is not " "supported." % cardinal_to_ordinal(idx + 1) ) if "id" in seq.metadata: id_ = "%s" % seq.metadata["id"] else: id_ = "" if id_whitespace_replacement is not None: id_ = _whitespace_regex.sub(id_whitespace_replacement, id_) if "description" in seq.metadata: desc = "%s" % seq.metadata["description"] else: desc = "" if description_newline_replacement is not None: desc = _newline_regex.sub(description_newline_replacement, desc) if desc: header = "%s %s" % (id_, desc) else: header = id_ if require_qual and "quality" not in seq.positional_metadata: raise ValueError( "Cannot write %s sequence because it does not have quality " "scores associated with it." % cardinal_to_ordinal(idx + 1) ) qual = None if "quality" in seq.positional_metadata: qual = seq.positional_metadata["quality"].values if lowercase is not None: seq_str = seq.lowercase(lowercase) else: seq_str = str(seq) yield header, "%s" % seq_str, qual def _line_generator(fh, skip_blanks=False, strip=True): for line in fh: if strip: line = line.strip() skip = False if skip_blanks: skip = line.isspace() or not line if not skip: yield line def _too_many_blanks(fh, max_blanks): count = 0 too_many = False for line in _line_generator(fh, skip_blanks=False): if line: break else: count += 1 if count > max_blanks: too_many = True break fh.seek(0) return too_many scikit-bio-0.6.2/skbio/io/format/_blast.py000066400000000000000000000043751464262511300204150ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import functools import contextlib import pandas as pd _possible_columns = { "qseqid": str, "qgi": float, "qacc": str, "qaccver": str, "qlen": float, "sseqid": str, "sallseqid": str, "sgi": float, "sallgi": float, "sacc": str, "saccver": str, "sallacc": str, "slen": float, "qstart": float, "qend": float, "sstart": float, "send": float, "qseq": str, "sseq": str, "evalue": float, "bitscore": float, "score": float, "length": float, "pident": float, "nident": float, "mismatch": float, "positive": float, "gapopen": float, "gaps": float, "ppos": float, "frames": str, "qframe": float, "sframe": float, "btop": float, "staxids": str, "sscinames": str, "scomnames": str, "sblastnames": str, "sskingdoms": str, "stitle": str, "salltitles": str, "sstrand": str, "qcovs": float, "qcovhsp": float, } def _parse_blast_data(fh, columns, error, error_message, comment=None, skiprows=None): read_csv = functools.partial( pd.read_csv, na_values="N/A", sep="\t", header=None, keep_default_na=False, comment=comment, skiprows=skiprows, ) # HACK for https://github.com/pandas-dev/pandas/issues/14418 # this avoids closing the `fh`, whose lifetime isn't the responsibility # of this parser with _noop_close(fh) as fh: lineone = read_csv(fh, nrows=1) if len(lineone.columns) != len(columns): raise error(error_message % (len(columns), len(lineone.columns))) fh.seek(0) return read_csv(fh, names=columns, dtype=_possible_columns) # HACK for https://github.com/pandas-dev/pandas/issues/14418 @contextlib.contextmanager def _noop_close(fh): backup = fh.close fh.close = lambda: None try: yield fh finally: fh.close = backup scikit-bio-0.6.2/skbio/io/format/_sequence_feature_vocabulary.py000066400000000000000000000264021464262511300250550ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import re from skbio.metadata import IntervalMetadata from skbio.io.format._base import _line_generator from skbio.io import FileFormatError def _vocabulary_change(format="insdc", read_in=True): """Return a dict that converts between memory and output vocabulary.""" convert = { "phase": {"insdc": "codon_start"}, "source": {"insdc": "inference"}, "db_xref": {"gff3": "Dbxref"}, "note": {"gff3": "Note"}, } if read_in: return {v[format]: k for k, v in convert.items() if format in v} else: return {k: v[format] for k, v in convert.items() if format in v} def _vocabulary_skip(format="insdc"): """Return vocabluaries skipped for auto disk output, given a format. Return a list of vocabularies that should be skipped when auto output to disk for the specified format. """ skip = { "type": ("insdc", "gff3"), "ID": ("insdc"), "translation": ("gff3"), "strand": ("insdc"), } return [k for k, v in skip.items() if format in v] def _yield_section(is_another_section, **kwargs): """Return function that returns successive sections from file. Parameters ---------- is_another_section : callable It takes a string as input and return a boolean indicating a new section starts. kwargs : dict, optional Keyword arguments will be passed to `_line_generator`. Returns ------- function A function accept a list of lines as input and return a generator to yield section one by one. """ def parser(lines): curr = [] for line in _line_generator(lines, **kwargs): # if we find another, return the previous section if is_another_section(line): if curr: yield curr curr = [] curr.append(line) # don't forget to return the last section in the file if curr: yield curr return parser def _parse_section_default( lines, label_delimiter=None, join_delimiter=" ", return_label=False ): """Parse sections in default way. Do 2 things: 1. split first line with label_delimiter for label 2. join all the lines into one str with join_delimiter. """ data = [] label = None line = lines[0] items = line.split(label_delimiter, 1) if len(items) == 2: label, section = items else: label = items[0] section = "" data.append(section) data.extend(lines[1:]) data = join_delimiter.join(i.strip() for i in data) if return_label: return label, data else: return data def _serialize_section_default(header, obj, indent=12): return "{header:<{indent}}{obj}\n".format(header=header, obj=obj, indent=indent) def _parse_feature_table(lines, length): """Parse DDBJ/ENA/GenBank Feature Table.""" imd = IntervalMetadata(length) # skip the 1st FEATURES line if lines[0].startswith("FEATURES"): lines = lines[1:] # magic number 21: the lines following header of each feature # are indented with 21 spaces. feature_indent = " " * 21 section_splitter = _yield_section( lambda x: not x.startswith(feature_indent), skip_blanks=True, strip=False ) for section in section_splitter(lines): _parse_single_feature(section, imd) return imd def _parse_single_feature(lines, imd): """Parse a feature and add it to ``IntervalMetadata`` object. Parameters ---------- imd : IntervalMetadata An IntervalMetadata object to which the parsed feature will be added. lines : list of strings A list of strings representing the lines of text to be parsed. """ voca_change = _vocabulary_change("insdc") # each component of a feature starts with '/', except the 1st # component of location. section_splitter = _yield_section(lambda x: x.startswith("/"), strip=True) section_iter = section_splitter(lines) # 1st section is location section = next(section_iter) feature_type, feature_loc = _parse_section_default( section, join_delimiter="", return_label=True ) metadata = {"type": feature_type, "__location": feature_loc} intvl = imd.add(*_parse_loc_str(feature_loc)) for section in section_iter: # following sections are Qualifiers k, v = _parse_section_default( section, label_delimiter="=", join_delimiter=" ", return_label=True ) # 1st char is '/' k = k[1:] if k in voca_change: k = voca_change[k] if k == "phase": v = int(v) - 1 # some Qualifiers can appear multiple times if k in metadata: if not isinstance(metadata[k], list): metadata[k] = [metadata[k]] metadata[k].append(v) else: metadata[k] = v intvl.metadata.update(metadata) def _parse_loc_str(loc_str): """Parse location string. .. warning: This converts coordinates to 0-based from 1-based GenBank coordinate system. The location descriptor can be one of the following [1]_: (a) a single base number. e.g. 467 (b) a site between two indicated adjoining bases. e.g. 123^124 (c) a single base chosen from within a specified range of bases (not allowed for new entries). e.g. 102.110 (d) the base numbers delimiting a sequence span. e.g.340..565 (e) a remote entry identifier followed by a local location descriptor (i.e., a-d). e.g. J00194.1:100..202 Notes ----- This does not fully handle (e) case. It will discard the remote entry part and only keep the local part. When it parses locations across strand (e.g. "complement(123..145),200..209"), it will record all the span parts but will record strand as negative. References ---------- .. [1] http://www.insdc.org/files/feature_table.html#3.4 """ # define the tokens operators = ["join", "complement", "order"] LPAREN = r"(?P\()" RPAREN = r"(?P\))" COMMA = r"(?P,)" WS = r"(?P\s+)" a = r"(?P\d+)" b = r"(?P\d+\^\d+)" c = r"(?P\d+\.\d+)" d = r"(?P?\d+)" e_left = r"(?P?\d+)" e_right = r"(?P?[a-zA-Z_0-9\.]+:\d+)" illegal = r"(?P.+)" # The order of tokens in the master regular expression also # matters. When matching, re tries to match pattens in the order # specified. Thus, if a pattern happens to be a substring of a # longer pattern, you need to make sure the longer pattern goes # first. master_pat = re.compile( "|".join( operators + [WS, LPAREN, RPAREN, COMMA, b, c, d, e_left, e_right, a, illegal] ) ) scanner = master_pat.scanner(loc_str) bounds = [] fuzzy = [] metadata = {"strand": "+"} for m in iter(scanner.match, None): p, v = m.lastgroup, m.group() if v == "complement": metadata["strand"] = "-" elif p == "A": start = int(v) bounds.append((start - 1, start)) fuzzy.append((False, False)) elif p == "B": start, end = v.split("^") start = int(start) bounds.append((start - 1, start)) fuzzy.append((False, False)) elif p == "C" or p == "D": if p == "C": start, end = v.split(".") else: start, end = v.split("..") fuzzy_s = fuzzy_e = False if start.startswith("<"): start = start[1:] fuzzy_s = True if end.startswith(">"): end = end[1:] fuzzy_e = True bounds.append((int(start) - 1, int(end))) fuzzy.append((fuzzy_s, fuzzy_e)) elif p == "ILLEGAL": raise FileFormatError('Could not parse location string: "%s"' % loc_str) return bounds, fuzzy, metadata def _serialize_feature_table(intervals, indent=21): """Serialize a list of intervals into a feature table format. Parameters ---------- intervals : list of ``Interval`` A list of Interval objects representing the intervals to be serialized. indent : int, optional The number of spaces to indent each serialized feature. Defaults to 21. """ for intvl in intervals: yield _serialize_single_feature(intvl, indent) def _serialize_single_feature(intvl, indent=21): """Serialize a single interval into feature format. Parameters ---------- intvl : Interval The Interval object representing the interval to be serialized. indent : int, optional The number of spaces to indent each serialized feature. Defaults to 21. """ # there are 5 spaces before Feature Key starts. padding = " " * 5 qualifiers = [] md = intvl.metadata voca_skip = _vocabulary_skip("insdc") voca_change = _vocabulary_change("insdc", read_in=False) # sort it so the output order is deterministic for k in sorted(md): if k.startswith("__") or k in voca_skip: continue v = md[k] if k == "phase": v = str(v + 1) if k in voca_change: k = voca_change[k] if isinstance(v, list): for vi in v: qualifiers.append(_serialize_qualifier(k, vi)) else: qualifiers.append(_serialize_qualifier(k, v)) if "__location" in md: loc = md["__location"] else: loc = _serialize_location(intvl) # the qualifiers start at column 22 qualifiers = [" " * indent + i for i in qualifiers] return "{header:<{indent}}{loc}\n{qualifiers}\n".format( header=padding + md["type"], loc=loc, indent=indent, qualifiers="\n".join(qualifiers), ) def _serialize_location(intvl): loc = [] for bound, fuzzy in zip(intvl.bounds, intvl.fuzzy): start, end = bound start += 1 if start == end: s = str(start) elif fuzzy[0] and fuzzy[1]: s = "<%d..>%d" % (start, end) elif fuzzy[0] and not fuzzy[1]: s = "<%d..%d" % (start, end) elif not fuzzy[0] and fuzzy[1]: s = "%d..>%d" % (start, end) else: s = "%d..%d" % (start, end) loc.append(s) if len(loc) > 1: loc_str = "join({})".format(",".join(loc)) else: loc_str = loc[0] if intvl.metadata.get("strand") == "-": loc_str = "complement({})".format(loc_str) return loc_str def _serialize_qualifier(key, value): """Serialize a Qualifier in a feature. Parameters ---------- key : str The key of the Qualifier, representing the type or name of the information. value : int, str The value associated with the Qualifier. """ # if value is empty if not value: return "/%s" % key return "/{k}={v}".format(k=key, v=value) scikit-bio-0.6.2/skbio/io/format/binary_dm.py000066400000000000000000000133211464262511300211040ustar00rootroot00000000000000"""Simple binary dissimilarity matrix format (:mod:`skbio.io.format.binary_dm`) ============================================================================ .. currentmodule:: skbio.io.format.binary_dm The Binary DisSimilarity Matrix format (``binary_dm``) encodes a binary representation for dissimilarity and distance matrices. The format is designed to facilitate rapid random access to individual rows or columns of a hollow matrix. Format Support -------------- **Has Sniffer: Yes** +------+------+---------------------------------------------------------------+ |Reader|Writer| Object Class | +======+======+===============================================================+ |Yes |Yes |:mod:`skbio.stats.distance.DissimilarityMatrix` | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.stats.distance.DistanceMatrix` | +------+------+---------------------------------------------------------------+ Format Specification -------------------- The binary dissimilarity matrix and object identifiers are stored within an HDF5 [1]_ file. Both datatypes are represented by their own datasets. The `ids` dataset is of a variable length unicode type, while the `matrix` dataset are floating point. The shape of the `ids` is `(N,)`, and the shape of the `dissimilarities` is `(N, N)`. The diagonal of `matrix` are all zeros. The dissimilarity between `ids[i]` and `ids[j]` is interpreted to be the value at `matrix[i, j]`. `i` and `j` are integer indices. Required attributes: +-----------+---------------------+------------------------------+ |Attribute |Value |Description | | |type | | +===========+=====================+==============================+ |format |string |A string identifying the file | | | |as Binary DM format | +-----------+---------------------+------------------------------+ |version |string |The version of the current | | | |Binary DM format | +-----------+---------------------+------------------------------+ |matrix |float32 or float64 |A (N, N) dataset containing | | | |the values of the | | | |dissimilarity matrix | +-----------+---------------------+------------------------------+ |order |string |A (N,) dataset of the sample | | | |IDs, where N is the total | | | |number of IDs | +-----------+---------------------+------------------------------+ .. note:: This file format is most useful for storing large matrices that do not need to be represented in a human-readable format. This format is especially appropriate for facilitating random access to entries in the distance matrix, such as when calculating within and between distances for a subset of samples in a large matrix. References ---------- .. [1] http://www.hdfgroup.org/ """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import h5py from skbio.io import create_format from skbio.stats.distance import DissimilarityMatrix, DistanceMatrix binary_dm = create_format("binary_dm", encoding="binary") _vlen_dtype = h5py.special_dtype(vlen=str) @binary_dm.sniffer() def _binary_dm_sniffer(fh): try: f = h5py.File(fh, "r") except OSError: return False, {} header = _get_header(f) if header is None: return False, {} ids = f.get("order") if ids is None: return False, {} mat = f.get("matrix") if mat is None: return False, {} n = len(ids) if mat.shape != (n, n): return False, {} return True, {} @binary_dm.reader(DissimilarityMatrix) def _binary_dm_to_dissimilarity(fh): return _h5py_mat_to_skbio_mat(fh) @binary_dm.reader(DistanceMatrix) def _binary_dm_to_distance(fh): return _h5py_mat_to_skbio_mat(fh) @binary_dm.writer(DissimilarityMatrix) def _dissimilarity_to_binary_dm(obj, fh): return _skbio_mat_to_h5py_mat(fh) @binary_dm.writer(DistanceMatrix) def _distance_to_binary_dm(obj, fh): return _skbio_mat_to_h5py_mat(fh) def _h5py_mat_to_skbio_mat(cls, fh): return cls(fh["matrix"], _parse_ids(fh["order"])) def _skbio_mat_to_h5py_mat(obj, fh): _set_header(fh) ids = fh.create_dataset("order", shape=(len(obj.ids),), dtype=_vlen_dtype) ids[:] = obj.ids fh.create_dataset("matrix", data=obj.data) def _get_header(fh): format_ = fh.get("format") version = fh.get("version") if format is None or version is None: return None else: return {"format": format_[0], "version": version[0]} def _parse_ids(ids): if isinstance(ids[0], bytes): return _bytes_decoder(ids) else: return _passthrough_decoder(ids) def _verify_dimensions(fh): if "order" not in fh or "matrix" not in fh: return False n = len(fh["order"]) return fh["matrix"].shape == (n, n) def _bytes_decoder(x): return [i.decode("utf8") for i in x] def _passthrough_decoder(x): return x def _set_header(h5grp): """Set format spec header information.""" h5grp["format"] = [ b"BDSM", ] h5grp["version"] = [ b"2020.06", ] scikit-bio-0.6.2/skbio/io/format/biom.py000066400000000000000000000074251464262511300200760ustar00rootroot00000000000000r"""BIOM-Format (:mod:`skbio.io.format.biom`) ============================================ .. currentmodule:: skbio.io.format.biom The BIOM-Format (format v2.1.0) is an HDF5-based format to represent sample/feature counts or relative abundances. It is designed specifically for sparse data. Internally, it stores the data in both compressed sparse row, and compressed sparse column representation. It additionally has support for representing sample and feature metadata. .. note:: Internally, BIOM describes features as observations, whereas scikit-bio uses the term features. Format Support -------------- **Has Sniffer: Yes** +------+------+-------------------------------------------------------+ |Reader|Writer| Object Class | +======+======+=======================================================+ |Yes |Yes |:mod:`skbio.table.Table` | +------+------+-------------------------------------------------------+ Format Specification -------------------- The official format specification for BIOM-Format can be found at [1]_. Examples -------- Here we will write an existing BIOM table, and re-read it. Note that the Table from ``biom`` implicitly gets the ``.write`` method from the IO registry. This ``ByteIO`` object can be a file path in a regular use case. >>> import io, skbio >>> f = io.BytesIO() >>> skbio.table.example_table.write(f) # doctest: +ELLIPSIS <_io.BytesIO object at ...> >>> roundtrip = skbio.read(f, into=skbio.Table) >>> roundtrip 2 x 3 with 5 nonzero entries (83% dense) References ---------- .. [1] http://biom-format.org/documentation/format_versions/biom-2.1.html """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import h5py import skbio from skbio.io import create_format from skbio.table import Table from .. import BIOMFormatError biom = create_format("biom", encoding="binary") @biom.sniffer() def _biom_sniffer(fh): # this can be buffered, in which case .peek will return the buffer # so slice just in case magic = fh.peek(8)[:8] # From https://en.wikipedia.org/wiki/Hierarchical_Data_Format # Note that Wikipedia specifies: "\211HDF\r\n\032\n" which is an ordinal form: # >>> ord('\211') # 137 # >>> ord('\x89') # 137 # >>> ord('\032') # 26 # >>> ord('\x1a') # 26 if magic == b"\x89HDF\r\n\x1a\n": fp = h5py.File(fh, "r") url = fp.attrs.get("format-url") version = fp.attrs.get("format-version") if url is None or version is None: return False, {} if url != "http://biom-format.org": return False, {} if list(version) != [2, 1]: return False, {} return True, {} else: return False, {} @biom.reader(Table) def _biom_to_table_into(fh): return _biom_to_table(fh) @biom.reader(None) def _biom_to_table_default(fh): # skbio.read('foo.biom', format='biom') # will return a generator, that subsequently iterates the table. # returning a single item tuple yields expected behavior such that: # next(skbio.read('foo.biom', format='biom')) == Table return (_biom_to_table(fh),) def _biom_to_table(fh): h5grp = h5py.File(fh, "r") return Table.from_hdf5(h5grp) @biom.writer(Table) def _sktable_to_biom(obj, fh): _table_to_biom(obj, fh) def _table_to_biom(obj, fh): h5grp = h5py.File(fh, "w") obj.to_hdf5(h5grp, f"Written by scikit-bio version {skbio.__version__}") scikit-bio-0.6.2/skbio/io/format/blast6.py000066400000000000000000000306721464262511300203430ustar00rootroot00000000000000r"""BLAST+6 format (:mod:`skbio.io.format.blast6`) ============================================== .. currentmodule:: skbio.io.format.blast6 The BLAST+6 format (``blast+6``) stores the results of a BLAST [1]_ database search. The results are stored in a simple tabular format with no column headers. Values are separated by the tab character. An example BLAST+6-formatted file comparing two protein sequences, taken from [2]_ (tab characters represented by ````):: moaCgi|15800534|ref|NP_286546.1|100.0016100 116111613e-114330 moaCgi|170768970|ref|ZP_02903423.1|99.3816110 116111619e-114329 Format Support -------------- **Has Sniffer: No** +------+------+---------------------------------------------------------------+ |Reader|Writer| Object Class | +======+======+===============================================================+ |Yes |No |:mod:`pandas.DataFrame` | +------+------+---------------------------------------------------------------+ Format Specification -------------------- BLAST+6 format is a tabular text-based format produced by both BLAST+ output format 6 (``-outfmt 6``) and legacy BLAST output format 8 (``-m 8``). It is tab-separated and has no column headers. With BLAST+, users can specify the columns that are present in their BLAST output file by specifying column names (e.g., ``-outfmt "6 qseqid sseqid bitscore qstart sstart"``), if the default columns output by BLAST are not desired. BLAST Column Types ^^^^^^^^^^^^^^^^^^ The following column types are output by BLAST and supported by scikit-bio. This information is taken from [3]_. +-----------+------------------------------------+-----+ |Name |Description |Type | +===========+====================================+=====+ |qseqid |Query Seq-id |str | +-----------+------------------------------------+-----+ |qgi |Query GI |int | +-----------+------------------------------------+-----+ |qacc |Query accesion |str | +-----------+------------------------------------+-----+ |qaccver |Query accesion.version |str | +-----------+------------------------------------+-----+ |qlen |Query sequence length |int | +-----------+------------------------------------+-----+ |sseqid |Subject Seq-id |str | +-----------+------------------------------------+-----+ |sallseqid |All subject Seq-id(s), separated by |str | | |a ';' | | +-----------+------------------------------------+-----+ |sgi |Subject GI |int | +-----------+------------------------------------+-----+ |sallgi |All subject GIs |int | +-----------+------------------------------------+-----+ |sacc |Subject accesion |str | +-----------+------------------------------------+-----+ |saccver |Subject accesion.version |str | +-----------+------------------------------------+-----+ |sallacc |All subject accesions |str | +-----------+------------------------------------+-----+ |slen |Subject sequence length |int | +-----------+------------------------------------+-----+ |qstart |Start of alignment in query |int | +-----------+------------------------------------+-----+ |qend |End of alignment in query |int | +-----------+------------------------------------+-----+ |sstart |Start of alignment in subject |int | +-----------+------------------------------------+-----+ |send |End of alignment in subject |int | +-----------+------------------------------------+-----+ |qseq |Aligned part of query sequence |str | +-----------+------------------------------------+-----+ |sseq |Aligned part of subject sequence |str | +-----------+------------------------------------+-----+ |evalue |Expect value |float| +-----------+------------------------------------+-----+ |bitscore |Bit score |float| +-----------+------------------------------------+-----+ |score |Raw score |int | +-----------+------------------------------------+-----+ |length |Alignment length |int | +-----------+------------------------------------+-----+ |pident |Percent of identical matches |float| +-----------+------------------------------------+-----+ |nident |Number of identical matches |int | +-----------+------------------------------------+-----+ |mismatch |Number of mismatches |int | +-----------+------------------------------------+-----+ |positive |Number of positive-scoring matches |int | +-----------+------------------------------------+-----+ |gapopen |Number of gap openings |int | +-----------+------------------------------------+-----+ |gaps |Total number of gaps |int | +-----------+------------------------------------+-----+ |ppos |Percentage of positive-scoring |float| | |matches | | +-----------+------------------------------------+-----+ |frames |Query and subject frames separated |str | | |by a '/' | | +-----------+------------------------------------+-----+ |qframe |Query frame |int | +-----------+------------------------------------+-----+ |sframe |Subject frame |int | +-----------+------------------------------------+-----+ |btop |Blast traceback operations (BTOP) |int | +-----------+------------------------------------+-----+ |staxids |Unique Subject Taxonomy ID(s), |str | | |separated by a ';' (in numerical | | | |order). | | +-----------+------------------------------------+-----+ |sscinames |Unique Subject Scientific Name(s), |str | | |separated by a ';' | | +-----------+------------------------------------+-----+ |scomnames |Unique Subject Common Name(s), |str | | |separated by a ';' | | +-----------+------------------------------------+-----+ |sblastnames|unique Subject Blast Name(s), |str | | |separated by a ';' (in alphabetical | | | |order) | | +-----------+------------------------------------+-----+ |sskingdoms |unique Subject Super Kingdom(s), |str | | |separated by a ';' (in alphabetical | | | |order) | | +-----------+------------------------------------+-----+ |stitle |Subject Title |str | +-----------+------------------------------------+-----+ |sstrand |Subject Strand |str | +-----------+------------------------------------+-----+ |salltitles |All Subject Title(s), separated by |str | | |a '<>' | | +-----------+------------------------------------+-----+ |qcovs |Query Coverage Per Subject |int | +-----------+------------------------------------+-----+ |qcovhsp |Query Coverage Per HSP |int | +-----------+------------------------------------+-----+ .. note:: When a BLAST+6-formatted file contains ``N/A`` values, scikit-bio will convert these values into ``np.nan``, matching pandas' convention for representing missing data. .. note:: scikit-bio stores columns of type ``int`` as type ``float`` in the returned ``pd.DataFrame``. This is necessary in order to allow ``N/A`` values in integer columns (this is currently a limitation of pandas). Format Parameters ----------------- The following format parameters are available in ``blast+6`` format: - ``default_columns``: ``False`` by default. If ``True``, will use the default columns output by BLAST, which are qseqid, sseqid, pident, length, mismatch, gapopen, qstart, qend, sstart, send, evalue, and bitscore. .. warning:: When reading legacy BLAST files, you must pass ``default_columns=True`` because legacy BLAST does not allow users to specify which columns are present in the output file. - ``columns``: ``None`` by default. If provided, must be a list of column names in the order they will appear in the file. .. note:: Either ``default_columns`` or ``columns`` must be provided, as ``blast+6`` does not contain column headers. Examples -------- Suppose we have a ``blast+6`` file with default columns: >>> from io import StringIO >>> import skbio.io >>> import pandas as pd >>> fs = '\n'.join([ ... 'moaC\tgi|15800534|ref|NP_286546.1|\t100.00\t161\t0\t0\t1\t161\t1\t161' ... '\t3e-114\t330', ... 'moaC\tgi|170768970|ref|ZP_02903423.1|\t99.38\t161\t1\t0\t1\t161\t1' ... '\t161\t9e-114\t329' ... ]) >>> fh = StringIO(fs) Read the file into a ``pd.DataFrame`` and specify that default columns should be used: >>> df = skbio.io.read(fh, format="blast+6", into=pd.DataFrame, ... default_columns=True) >>> df # doctest: +NORMALIZE_WHITESPACE qseqid sseqid pident length mismatch gapopen \ 0 moaC gi|15800534|ref|NP_286546.1| 100.00 161.0 0.0 0.0 1 moaC gi|170768970|ref|ZP_02903423.1| 99.38 161.0 1.0 0.0 qstart qend sstart send evalue bitscore 0 1.0 161.0 1.0 161.0 3.000000e-114 330.0 1 1.0 161.0 1.0 161.0 9.000000e-114 329.0 Suppose we have a ``blast+6`` file with user-supplied (non-default) columns: >>> from io import StringIO >>> import skbio.io >>> import pandas as pd >>> fs = '\n'.join([ ... 'moaC\t100.00\t0\t161\t0\t161\t330\t1', ... 'moaC\t99.38\t1\t161\t0\t161\t329\t1' ... ]) >>> fh = StringIO(fs) Read the file into a ``pd.DataFrame`` and specify which columns are present in the file: >>> df = skbio.io.read(fh, format="blast+6", into=pd.DataFrame, ... columns=['qseqid', 'pident', 'mismatch', 'length', ... 'gapopen', 'qend', 'bitscore', 'sstart']) >>> df # doctest: +NORMALIZE_WHITESPACE qseqid pident mismatch length gapopen qend bitscore sstart 0 moaC 100.00 0.0 161.0 0.0 161.0 330.0 1.0 1 moaC 99.38 1.0 161.0 0.0 161.0 329.0 1.0 References ---------- .. [1] Altschul, S.F., Gish, W., Miller, W., Myers, E.W. & Lipman, D.J. (1990) "Basic local alignment search tool." J. Mol. Biol. 215:403-410. .. [2] http://blastedbio.blogspot.com/2014/11/column-headers-in-blast-tabular-\ and-csv.html .. [3] http://www.ncbi.nlm.nih.gov/books/NBK279675/ """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import pandas as pd from skbio.io import create_format from skbio.io.format._blast import _parse_blast_data, _possible_columns blast6 = create_format("blast+6") _default_columns = [ "qseqid", "sseqid", "pident", "length", "mismatch", "gapopen", "qstart", "qend", "sstart", "send", "evalue", "bitscore", ] @blast6.reader(pd.DataFrame, monkey_patch=False) def _blast6_to_data_frame(fh, columns=None, default_columns=False): if default_columns and columns is not None: raise ValueError("`columns` and `default_columns` cannot both be" " provided.") if not default_columns and columns is None: raise ValueError("Either `columns` or `default_columns` must be" " provided.") if default_columns: columns = _default_columns else: for column in columns: if column not in _possible_columns: raise ValueError( "Unrecognized column (%r)." " Supported columns:\n%r" % (column, set(_possible_columns.keys())) ) return _parse_blast_data( fh, columns, ValueError, "Specified number of columns (%r) does not equal" " number of columns in file (%r).", ) scikit-bio-0.6.2/skbio/io/format/blast7.py000066400000000000000000000345571464262511300203520ustar00rootroot00000000000000"""BLAST+7 format (:mod:`skbio.io.format.blast7`) ============================================== .. currentmodule:: skbio.io.format.blast7 The BLAST+7 format (``blast+7``) stores the results of a BLAST [1]_ database search. This format is produced by both BLAST+ output format 7 and legacy BLAST output format 9. The results are stored in a simple tabular format with headers. Values are separated by the tab character. An example BLAST+7-formatted file comparing two nucleotide sequences, taken from [2]_ (tab characters represented by ````): .. code-block:: none # BLASTN 2.2.18+ # Query: gi|1786181|gb|AE000111.1|AE000111 # Subject: ecoli # Fields: query acc., subject acc., evalue, q. start, q. end, s. st\ art, s. end # 5 hits found AE000111AE0001110.0110596110596 AE000111AE0001748e-305565567169286821 AE000111AE0003941e-2755875671135219 AE000111AE0004256e-265587567185528468 AE000111AE0001713e-245587567122142130 Format Support ============== **Has Sniffer: Yes** +------+------+---------------------------------------------------------------+ |Reader|Writer| Object Class | +======+======+===============================================================+ |Yes |No |:mod:`pandas.DataFrame` | +------+------+---------------------------------------------------------------+ Format Specification ==================== There are two BLAST+7 file formats supported by scikit-bio: BLAST+ output format 7 (``-outfmt 7``) and legacy BLAST output format 9 (``-m 9``). Both file formats are structurally similar, with minor differences. Example BLAST+ output format 7 file:: # BLASTP 2.2.31+ # Query: query1 # Subject: subject2 # Fields: q. start, q. end, s. start, s. end, identical, mismatches, sbjct\ frame, query acc.ver, subject acc.ver # 2 hits found 1 8 3 10 8 0 1 query1 subject2 2 5 2 15 8 0 2 query1 subject2 .. note:: Database searches without hits may occur in BLAST+ output format 7 files. scikit-bio ignores these "empty" records: .. code-block:: none # BLASTP 2.2.31+ # Query: query1 # Subject: subject1 # 0 hits found Example legacy BLAST output format 9 file: .. code-block:: none # BLASTN 2.2.3 [May-13-2002] # Database: other_vertebrate # Query: AF178033 # Fields: Query id,Subject id,% identity,alignment length,mismatches,gap openings,q.\ start,q. end,s. start,s. end,e-value,bit score AF178033 EMORG:AF178033 100.00 811 0 0 1 811 1 811 0.0 1566.6 AF178033 EMORG:AF031394 99.63 811 3 0 1 811 99 909 0.0 1542.8 .. note:: scikit-bio requires fields to be consistent within a file. BLAST Column Types ------------------ The following column types are output by BLAST and supported by scikit-bio. For more information on these column types, see :mod:`skbio.io.format.blast6`. +-------------------+----------------------+ |Field Name |DataFrame Column Name | +===================+======================+ |query id |qseqid | +-------------------+----------------------+ |query gi |qgi | +-------------------+----------------------+ |query acc. |qacc | +-------------------+----------------------+ |query acc.ver |qaccver | +-------------------+----------------------+ |query length |qlen | +-------------------+----------------------+ |subject id |sseqid | +-------------------+----------------------+ |subject ids |sallseqid | +-------------------+----------------------+ |subject gi |sgi | +-------------------+----------------------+ |subject gis |sallgi | +-------------------+----------------------+ |subject acc. |sacc | +-------------------+----------------------+ |subject acc.ver |saccver | +-------------------+----------------------+ |subject accs |sallacc | +-------------------+----------------------+ |subject length |slen | +-------------------+----------------------+ |q\\. start |qstart | +-------------------+----------------------+ |q\\. end |qend | +-------------------+----------------------+ |s\\. start |sstart | +-------------------+----------------------+ |s\\. end |send | +-------------------+----------------------+ |query seq |qseq | +-------------------+----------------------+ |subject seq |sseq | +-------------------+----------------------+ |evalue |evalue | +-------------------+----------------------+ |bit score |bitscore | +-------------------+----------------------+ |score |score | +-------------------+----------------------+ |alignment length |length | +-------------------+----------------------+ |% identity |pident | +-------------------+----------------------+ |identical |nident | +-------------------+----------------------+ |mismatches |mismatch | +-------------------+----------------------+ |positives |positive | +-------------------+----------------------+ |gap opens |gapopen | +-------------------+----------------------+ |gaps |gaps | +-------------------+----------------------+ |% positives |ppos | +-------------------+----------------------+ |query/sbjct frames |frames | +-------------------+----------------------+ |query frame |qframe | +-------------------+----------------------+ |sbjct frame |sframe | +-------------------+----------------------+ |BTOP |btop | +-------------------+----------------------+ |subject tax ids |staxids | +-------------------+----------------------+ |subject sci names |sscinames | +-------------------+----------------------+ |subject com names |scomnames | +-------------------+----------------------+ |subject blast names|sblastnames | +-------------------+----------------------+ |subject super |sskingdoms | |kingdoms | | +-------------------+----------------------+ |subject title |stitle | +-------------------+----------------------+ |subject strand |sstrand | +-------------------+----------------------+ |subject titles |salltitles | +-------------------+----------------------+ |% query coverage |qcovs | |per subject | | +-------------------+----------------------+ |% query coverage |qcovhsp | |per hsp | | +-------------------+----------------------+ Examples -------- Suppose we have a BLAST+7 file: >>> from io import StringIO >>> import skbio.io >>> import pandas as pd >>> fs = '\\n'.join([ ... '# BLASTN 2.2.18+', ... '# Query: gi|1786181|gb|AE000111.1|AE000111', ... '# Database: ecoli', ... '# Fields: query acc., subject acc., evalue, q. start, q. end, s. st\ art, s. end', ... '# 5 hits found', ... 'AE000111\\tAE000111\\t0.0\\t1\\t10596\\t1\\t10596', ... 'AE000111\\tAE000174\\t8e-30\\t5565\\t5671\\t6928\\t6821', ... 'AE000111\\tAE000171\\t3e-24\\t5587\\t5671\\t2214\\t2130', ... 'AE000111\\tAE000425\\t6e-26\\t5587\\t5671\\t8552\\t8468' ... ]) >>> fh = StringIO(fs) Read the file into a ``pd.DataFrame``: >>> df = skbio.io.read(fh, into=pd.DataFrame) >>> df # doctest: +NORMALIZE_WHITESPACE qacc sacc evalue qstart qend sstart send 0 AE000111 AE000111 0.000000e+00 1.0 10596.0 1.0 10596.0 1 AE000111 AE000174 8.000000e-30 5565.0 5671.0 6928.0 6821.0 2 AE000111 AE000171 3.000000e-24 5587.0 5671.0 2214.0 2130.0 3 AE000111 AE000425 6.000000e-26 5587.0 5671.0 8552.0 8468.0 Suppose we have a legacy BLAST 9 file: >>> from io import StringIO >>> import skbio.io >>> import pandas as pd >>> fs = '\\n'.join([ ... '# BLASTN 2.2.3 [May-13-2002]', ... '# Database: other_vertebrate', ... '# Query: AF178033', ... '# Fields: ', ... 'Query id,Subject id,% identity,alignment length,mismatches,gap openin\ gs,q. start,q. end,s. start,s. end,e-value,bit score', ... 'AF178033\\tEMORG:AF178033\\t100.00\\t811\\t0\\t0\\t1\\t811\\t1\\t81\ 1\\t0.0\\t1566.6', ... 'AF178033\\tEMORG:AF178032\\t94.57\\t811\\t44\\t0\\t1\\t811\\t1\\t81\ 1\\t0.0\\t1217.7', ... 'AF178033\\tEMORG:AF178031\\t94.82\\t811\\t42\\t0\\t1\\t811\\t1\\t81\ 1\\t0.0\\t1233.5' ... ]) >>> fh = StringIO(fs) Read the file into a ``pd.DataFrame``: >>> df = skbio.io.read(fh, into=pd.DataFrame) >>> df[['qseqid', 'sseqid', 'pident']] # doctest: +NORMALIZE_WHITESPACE qseqid sseqid pident 0 AF178033 EMORG:AF178033 100.00 1 AF178033 EMORG:AF178032 94.57 2 AF178033 EMORG:AF178031 94.82 References ---------- .. [1] Altschul, S.F., Gish, W., Miller, W., Myers, E.W. & Lipman, D.J. (1990) "Basic local alignment search tool." J. Mol. Biol. 215:403-410. .. [2] http://www.ncbi.nlm.nih.gov/books/NBK279682/ """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import pandas as pd from skbio.io import create_format, BLAST7FormatError from skbio.io.format._blast import _parse_blast_data blast7 = create_format("blast+7") column_converter = { "query id": "qseqid", "query gi": "qgi", "query acc.": "qacc", "query acc.ver": "qaccver", "query length": "qlen", "subject id": "sseqid", "subject ids": "sallseqid", "subject gi": "sgi", "subject gis": "sallgi", "subject acc.": "sacc", "subject acc.ver": "saccver", "subject accs.": "sallacc", "subject length": "slen", "q. start": "qstart", "q. end": "qend", "s. start": "sstart", "s. end": "send", "query seq": "qseq", "subject seq": "sseq", "evalue": "evalue", "bit score": "bitscore", "score": "score", "alignment length": "length", "% identity": "pident", "identical": "nident", "mismatches": "mismatch", "positives": "positive", "gap opens": "gapopen", "gaps": "gaps", "% positives": "ppos", "query/sbjct frames": "frames", "query frame": "qframe", "sbjct frame": "sframe", "BTOP": "btop", "subject tax ids": "staxids", "subject sci names": "sscinames", "subject com names": "scomnames", "subject blast names": "sblastnames", "subject super kingdoms": "sskingdoms", "subject title": "stitle", "subject titles": "salltitles", "subject strand": "sstrand", "% query coverage per subject": "qcovs", "% query coverage per hsp": "qcovhsp", "Query id": "qseqid", "Subject id": "sseqid", "gap openings": "gapopen", "e-value": "evalue", } @blast7.sniffer() def _blast7_sniffer(fh): # Smells a BLAST+7 file if the following conditions are present # -First line contains "BLAST" # -Second line contains "Query" or "Database" # -Third line starts with "Subject" or "Query" or "Database" lines = [line for _, line in zip(range(3), fh)] if len(lines) < 3: return False, {} if not lines[0].startswith("# BLAST"): return False, {} if not (lines[1].startswith("# Query:") or lines[1].startswith("# Database:")): return False, {} if not ( lines[2].startswith("# Subject:") or lines[2].startswith("# Query:") or lines[2].startswith("# Database:") ): return False, {} return True, {} @blast7.reader(pd.DataFrame, monkey_patch=False) def _blast7_to_data_frame(fh): line_num = 0 columns = None skiprows = [] for line in fh: if line == "# Fields: \n": # Identifies Legacy BLAST 9 data line = next(fh) line_num += 1 if columns is None: columns = _parse_fields(line, legacy=True) skiprows.append(line_num) else: next_columns = _parse_fields(line, legacy=True) if columns != next_columns: raise BLAST7FormatError( "Fields %r do not equal fields %r" % (columns, next_columns) ) skiprows.append(line_num) elif line.startswith("# Fields: "): # Identifies BLAST+7 data if columns is None: columns = _parse_fields(line) else: # Affirms data types do not differ throught file next_columns = _parse_fields(line) if columns != next_columns: raise BLAST7FormatError( "Fields %r do not equal fields %r" % (columns, next_columns) ) line_num += 1 if columns is None: # Affirms file contains BLAST data raise BLAST7FormatError("File contains no BLAST data.") fh.seek(0) return _parse_blast_data( fh, columns, BLAST7FormatError, "Number of fields (%r) does not equal number" " of data columns (%r).", comment="#", skiprows=skiprows, ) def _parse_fields(line, legacy=False): r"""Remove '\n' from fields line and returns fields as a list (columns).""" line = line.rstrip("\n") if legacy: fields = line.split(",") else: line = line.split("# Fields: ")[1] fields = line.split(", ") columns = [] for field in fields: if field not in column_converter: raise BLAST7FormatError( "Unrecognized field (%r)." " Supported fields: %r" % (field, set(column_converter.keys())) ) columns.append(column_converter[field]) return columns scikit-bio-0.6.2/skbio/io/format/clustal.py000066400000000000000000000277201464262511300206170ustar00rootroot00000000000000r"""Clustal format (:mod:`skbio.io.format.clustal`) =============================================== .. currentmodule:: skbio.io.format.clustal Clustal format (``clustal``) stores multiple sequence alignments. This format was originally introduced in the Clustal package [1]_. Format Support -------------- **Has Sniffer: Yes** +------+------+---------------------------------------------------------------+ |Reader|Writer| Object Class | +======+======+===============================================================+ |Yes |Yes |:mod:`skbio.alignment.TabularMSA` | +------+------+---------------------------------------------------------------+ Format Specification -------------------- A clustal-formatted file is a plain text format. It can optionally have a header, which states the clustal version number. This is followed by the multiple sequence alignment, and optional information about the degree of conservation at each position in the alignment [2]_. Alignment Section ^^^^^^^^^^^^^^^^^ Each sequence in the alignment is divided into subsequences each at most 60 characters long. The sequence identifier for each sequence precedes each subsequence. Each subsequence can optionally be followed by the cumulative number of non-gap characters up to that point in the full sequence (not included in the examples below). A line containing conservation information about each position in the alignment can optionally follow all of the subsequences (not included in the examples below). .. note:: scikit-bio ignores conservation information when reading and does not support writing conservation information. .. note:: When reading a clustal-formatted file into an ``skbio.alignment.TabularMSA`` object, sequence identifiers/labels are stored as ``TabularMSA`` index labels (``index`` property). When writing an ``skbio.alignment.TabularMSA`` object as a clustal-formatted file, ``TabularMSA`` index labels will be converted to strings and written as sequence identifiers/labels. Format Parameters ----------------- The only supported format parameter is ``constructor``, which specifies the type of in-memory sequence object to read each aligned sequence into. This must be a subclass of ``GrammaredSequence`` (e.g., ``DNA``, ``RNA``, ``Protein``) and is a required format parameter. For example, if you know that the clustal file you're reading contains DNA sequences, you would pass ``constructor=DNA`` to the reader call. Examples -------- Assume we have a clustal-formatted file of RNA sequences: .. code-block:: none CLUSTAL W (1.82) multiple sequence alignment abc GCAUGCAUCUGCAUACGUACGUACGCAUGCAUCA def ---------------------------------- xyz ---------------------------------- abc GUCGAUACAUACGUACGUCGUACGUACGU-CGAC def ---------------CGCGAUGCAUGCAU-CGAU xyz -----------CAUGCAUCGUACGUACGCAUGAC We can use the following code to read the clustal file into a ``TabularMSA``: >>> from skbio import TabularMSA, RNA >>> clustal_f = ['CLUSTAL W (1.82) multiple sequence alignment\n', ... '\n', ... 'abc GCAUGCAUCUGCAUACGUACGUACGCAUGCA\n', ... 'def -------------------------------\n', ... 'xyz -------------------------------\n', ... '\n', ... 'abc GUCGAUACAUACGUACGUCGGUACGU-CGAC\n', ... 'def ---------------CGUGCAUGCAU-CGAU\n', ... 'xyz -----------CAUUCGUACGUACGCAUGAC\n'] >>> msa = TabularMSA.read(clustal_f, constructor=RNA) >>> msa TabularMSA[RNA] -------------------------------------------------------------- Stats: sequence count: 3 position count: 62 -------------------------------------------------------------- GCAUGCAUCUGCAUACGUACGUACGCAUGCAGUCGAUACAUACGUACGUCGGUACGU-CGAC ----------------------------------------------CGUGCAUGCAU-CGAU ------------------------------------------CAUUCGUACGUACGCAUGAC >>> msa.index Index(['abc', 'def', 'xyz'], dtype='object') We can use the following code to write a ``TabularMSA`` to a clustal-formatted file: >>> from io import StringIO >>> from skbio import DNA >>> seqs = [DNA('ACCGTTGTA-GTAGCT', metadata={'id': 'seq1'}), ... DNA('A--GTCGAA-GTACCT', metadata={'id': 'sequence-2'}), ... DNA('AGAGTTGAAGGTATCT', metadata={'id': '3'})] >>> msa = TabularMSA(seqs, minter='id') >>> msa TabularMSA[DNA] ---------------------- Stats: sequence count: 3 position count: 16 ---------------------- ACCGTTGTA-GTAGCT A--GTCGAA-GTACCT AGAGTTGAAGGTATCT >>> msa.index Index(['seq1', 'sequence-2', '3'], dtype='object') >>> fh = StringIO() >>> _ = msa.write(fh, format='clustal') >>> print(fh.getvalue()) # doctest: +NORMALIZE_WHITESPACE CLUSTAL seq1 ACCGTTGTA-GTAGCT sequence-2 A--GTCGAA-GTACCT 3 AGAGTTGAAGGTATCT References ---------- .. [1] http://www.sciencedirect.com/science/article/pii/0378111988903307 .. [2] http://web.mit.edu/meme_v4.9.0/doc/clustalw-format.html """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from skbio.io import create_format, ClustalFormatError from skbio.alignment import TabularMSA clustal = create_format("clustal") def _label_line_parser(record): """Return dict mapping list of data to labels, plus list with field order. Field order contains labels in order encountered in file. NOTE: doesn't care if lines are out of order in different blocks. This should never happen anyway, but it's possible that this behavior should be changed to tighten up validation. """ labels = [] result = {} for line in record: split_line = line.strip().rsplit(None, 1) if len(split_line) == 2: key, val = split_line else: raise ClustalFormatError( "Failed to parse sequence identifier and subsequence from " "the following line: %r" % line ) if key in result: result[key].append(val) else: result[key] = [val] labels.append(key) return result, labels def _is_clustal_seq_line(line): """Return True if line starts with a non-blank character but not 'CLUSTAL'. Useful for filtering other lines out of the file. """ return ( line and (not line[0].isspace()) and (not line.startswith("CLUSTAL")) and (not line.startswith("MUSCLE")) ) def _delete_trailing_number(line): """Delete trailing number from a line. WARNING: does not preserve internal whitespace when a number is removed! (converts each whitespace run to a single space). Returns the original line if it didn't end in a number. """ pieces = line.split() try: int(pieces[-1]) return " ".join(pieces[:-1]) except ValueError: # no trailing numbers return line def _check_length(data, labels, num_seqs_check=None): """Check that the lengths of the clustal sequences line up correctly. num_seqs_check: The number of sequences to check Return True if all of the subsequence lengths are equal or if data is empty Return False if one of the subsequence lengths differs """ if len(labels) == 0: return True num_subseqs = len(data[labels[0]]) if num_seqs_check is None: num_seqs_check = num_subseqs else: if num_seqs_check > num_subseqs: num_seqs_check = num_subseqs subseq_length = len(data[labels[0]][0]) end_lengths = set() # subsequence lengths at end of file for i in range(num_seqs_check): for label in labels: seq = data[label][i] if len(seq) > subseq_length: return False elif i + 1 == num_subseqs: # Last subsequence end_lengths.add(len(seq)) elif len(seq) < subseq_length: return False # All trailing subsequences must be the same if len(end_lengths) > 1: return False return True @clustal.sniffer() def _clustal_sniffer(fh): # Strategy # The following conditions preclude a file from being clustal # * It is an empty file # * The whole sequences have differing lengths # * The sub-sequences have differing lengths # * One of the sequence ids is not immediately # followed by a subsequence empty = True if fh.read(7) != "CLUSTAL": return False, {} fh.seek(0) try: records = map(_delete_trailing_number, filter(_is_clustal_seq_line, fh)) data, labels = _label_line_parser(records) if len(data) > 0: empty = False # Only check first 50 sequences aligned_correctly = _check_length(data, labels, 50) if not aligned_correctly: raise ClustalFormatError("Sequences not aligned properly") except ClustalFormatError: return False, {} return not empty, {} @clustal.writer(TabularMSA) def _tabular_msa_to_clustal(obj, fh): if not obj.index.is_unique: raise ClustalFormatError("TabularMSA's index labels must be unique.") clen = 60 # Max length of clustal lines seqs = [str(s) for s in obj] names = [str(label) for label in obj.index] nameLen = max(map(len, names)) seqLen = max(map(len, seqs)) fh.write("CLUSTAL\n\n\n") for i in range(0, seqLen, clen): for label, seq in zip(names, seqs): name = ("{:<%d}" % (nameLen)).format(label) fh.write("%s\t%s\n" % (name, seq[i : i + clen])) fh.write("\n") @clustal.reader(TabularMSA) def _clustal_to_tabular_msa(fh, constructor=None): r"""Yield labels and sequences from msa (multiple sequence alignment). Parameters ---------- fh : open file object An open Clustal file. constructor : callable, optional A callable object that constructs sequences. Returns ------- skbio.TabularMSA MSA containing aligned sequences. Raises ------ skbio.util.exception.ClustalFormatError If the sequences in `fh` don't have the same sequence length or if the sequence ids don't properly match with the subsequences Notes ----- Skips any line that starts with a blank. ``_clustal_to_tabular_msa`` preserves the order of the sequences from the original file. However, it does use a dict as an intermediate, so two sequences can't have the same label. This is probably OK since Clustal will refuse to run on a FASTA file in which two sequences have the same label, but could potentially cause trouble with manually edited files (all the segments of the conflicting sequences would be interleaved, possibly in an unpredictable way). If the lines have trailing numbers (i.e. Clustal was run with `-LINENOS=ON`), silently deletes them. Does not check that the numbers actually correspond to the number of chars in the sequence printed so far. References ---------- .. [1] Thompson JD, Higgins DG, Gibson TJ, "CLUSTAL W: improving the sensitivity of progressive multiple sequence alignment through sequence weighting, position-specific gap penalties and weight matrix choice. Thompson", Nucleic Acids Res. 1994 Nov 11;22(22):4673-80. """ if constructor is None: raise ValueError("Must provide `constructor`.") records = map(_delete_trailing_number, filter(_is_clustal_seq_line, fh)) data, labels = _label_line_parser(records) aligned_correctly = _check_length(data, labels) if not aligned_correctly: raise ClustalFormatError("Sequences not aligned properly") seqs = [] for label in labels: seqs.append(constructor("".join(data[label]))) return TabularMSA(seqs, index=labels) scikit-bio-0.6.2/skbio/io/format/embed.py000066400000000000000000000270571464262511300202270ustar00rootroot00000000000000r"""Embedding format (:mod:`skbio.io.format.embed`). ==================================================== .. currentmodule:: skbio.io.format.embed This module provides support for reading and writing embedding files that are outputted by sequential language models such as protein language models (pLMs). Format Support -------------- **Has Sniffer: Yes** +------+------+---------------------------------------------------------------+ |Reader|Writer| Object Class | +======+======+===============================================================+ |Yes |Yes |generator of :mod:`skbio.embedding.ProteinEmbedding` objects | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.embedding.ProteinEmbedding` objects | +------+------+---------------------------------------------------------------+ |Yes |Yes |generator of :mod:`skbio.embedding.ProteinVector` objects | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.embedding.ProteinVector` objects | +------+------+---------------------------------------------------------------+ Format Specification -------------------- The format is a HDF5 file with the following structure: - ``embeddings`` (dataset) - ``embedding_ptr`` (dataset) - ``id`` (dataset) - ``idptr`` (dataset) - ``format`` (attribute) - ``format-version`` (attribute) - ``dtype`` (attribute) - ``dim`` (attribute) The `idptr` dataset contains the cumulative sum of the sequence lengths in the hdf5. This is used to index both the sequences and the embeddings in the hdf5, which can be useful for iterating through the embeddings and avoiding the need to load all of the embedding into memory. For protein embeddings the `id` is the original sequence used to generate the embeddings. The `embeddings` dataset contains the embeddings for each sequence, where the first dimension is the sequence length and the second dimension is the embedding dimension. The row vectors in the `embeddings` correspond to the residues of the sequence in the `id` dataset. The `embptr` is an optional dataset that is used in case the `id` length is different from the `embedding` length. This could be do to string formatting, for instance for dealing with protein vectors, the id is a full length sequence, not a single residue. The `emdptr` is used to separately keep track of the individual embeddings in these scenarios. The format attribute is a string that specifies the format of the embedding. If the ``format`` attribute is present and has the value of `embed`, then the file is a valid embedding file. The `format-version` attribute is a string that specifies the version of the format. The `dtype` attribute is a string that specifies the data type of the embeddings. Currently supported dtypes include `float32` or `float64`. The `dim` attribute is an integer that specifies the dimensionality of the embeddings. The `embed` format currently does not support storing embeddings with different dimensionality in the same file. Examples -------- Here we will read in an example protein embedding file and write it back out. Note that the embedding from implicitly gets the ``.write`` method from the IO registry. This ``ByteIO`` object can be a file path in a regular use case. >>> import io, skbio >>> f = io.BytesIO() >>> skbio.embedding.example_protein_embedding.write(f) # doctest: +ELLIPSIS <_io.BytesIO object at ...> >>> roundtrip = skbio.read(f, into=skbio.ProteinEmbedding) >>> roundtrip ProteinEmbedding -------------------------------------------------------------------- Stats: length: 62 embedding dimension: 1024 has gaps: False has degenerates: False has definites: True has stops: False -------------------------------------------------------------------- 0 IGKEEIQQRL AQFVDHWKEL KQLAAARGQR LEESLEYQQF VANVEEEEAW INEKMTLVAS 60 ED """ import numpy as np from math import ceil import h5py from skbio.io import create_format from skbio.embedding._protein import ProteinEmbedding from skbio.embedding._protein import ProteinVector from skbio.sequence import Protein embed = create_format("embed", encoding="binary") @embed.sniffer() def _embed_sniffer(fh): # this can be buffered, in which case .peek will return the buffer # so slice just in case magic = fh.peek(8)[:8] # From https://en.wikipedia.org/wiki/Hierarchical_Data_Format # Note that Wikipedia specifies: "\211HDF\r\n\032\n" which is an ordinal form: # >>> ord('\211') # 137 # >>> ord('\x89') # 137 # >>> ord('\032') # 26 # >>> ord('\x1a') # 26 if magic == b"\x89HDF\r\n\x1a\n": with h5py.File(fh, "r") as h5file: if "embedding" in h5file and "id" in h5file and "idptr" in h5file: return True, {} return False, {} @embed.reader(None) def _embed_to_generator( fh, constructor=ProteinEmbedding, obj_constructor=Protein, kwargs: dict = {}, ): h5grp = h5py.File(fh, "r") embed_fh = h5grp["embedding"] id_fh = h5grp["id"] idptr_fh = h5grp["idptr"] has_embedding_ptr = "embedding_ptr" in h5grp if has_embedding_ptr: embptr_fh = h5grp["embedding_ptr"] j, k = 0, 0 n = idptr_fh.shape[0] for i in range(n): idptr = idptr_fh[i] id_ = id_fh[j:idptr] if has_embedding_ptr: embptr = embptr_fh[i] emb = embed_fh[k:embptr] else: emb = embed_fh[j:idptr] string = str(id_.tobytes().decode("ascii")) j = idptr k = embptr if has_embedding_ptr else None yield constructor(emb, string, **kwargs) def _embed_to_object( fh, constructor=ProteinEmbedding, obj_constructor=Protein, kwargs: dict = {}, ): h5grp = h5py.File(fh, "r") embed_fh = h5grp["embedding"] id_fh = h5grp["id"] # assumes that there is only a single object in the file emb = np.array(embed_fh[:]) id_ = id_fh[()] string = str(id_.tobytes().decode("ascii")) return constructor(emb, string, **kwargs) @embed.reader(ProteinEmbedding) def _embed_to_protein( fh, kwargs: dict = {}, ): return _embed_to_object( fh, constructor=ProteinEmbedding, obj_constructor=Protein, kwargs=kwargs, ) @embed.reader(ProteinVector) def _vector_to_protein( fh, kwargs: dict = {}, ): return _embed_to_object( fh, constructor=ProteinVector, obj_constructor=Protein, kwargs=kwargs, ) def _objects_to_embed(objs, fh, include_embedding_pointer=True): with h5py.File(fh, "w") as h5grp: h5grp.attrs["format"] = "embedding" h5grp.attrs["format-version"] = "1.0" max_idsize = 1 max_embsize = 1 resize_by = 1.38 resize = False for i, obj in enumerate(objs): # store string representation of the object # that will serve as an identifier for the entire object. # for sequences, this could be the sequence itself # for molecules, this could be the SMILES string. # The entries in this string representation can be used # to index the row vectors in the embedding. # For sequences, this is the positional index of the sequence. # For molecules, this is the position index of atoms in the SMILES string. arr = obj.bytes() # Store the embedding itself. We are assuming that the # embbedding is a 2D numpy array emb = obj.embedding dtype = emb.dtype if "dtype" not in h5grp.attrs: h5grp.attrs["dtype"] = dtype.name if "dim" not in h5grp.attrs: h5grp.attrs["dim"] = emb.shape[1] # resize if necessary if i > 0: if include_embedding_pointer: if (len(arr) + idptr_fh[i - 1]) > max_idsize or ( emb.shape[0] + embptr_fh[i - 1] ) > max_embsize: max_idsize = ceil(len(arr) + idptr_fh[i - 1]) * resize_by max_embsize = ceil(emb.shape[0] + embptr_fh[i - 1]) * resize_by resize = True else: if len(arr) + idptr_fh[i - 1] > max_idsize: max_idsize = ceil(len(arr) + idptr_fh[i - 1] * resize_by) resize = True # store the pointers that keep track of the start and # end of the embedding for each object, as well as well as # the corresponding string representation if "idptr" in h5grp: idptr_fh = h5grp["idptr"] if resize: idptr_fh.resize((ceil(i * resize_by),)) idptr_fh[i] = len(arr) + idptr_fh[i - 1] else: idptr_fh = h5grp.create_dataset( "idptr", data=[len(arr)], maxshape=(None,), dtype=np.int32, compression="gzip", ) if "id" in h5grp: id_fh = h5grp["id"] if resize: id_fh.resize((max_idsize,)) id_fh[idptr_fh[i - 1] : idptr_fh[i]] = arr else: id_fh = h5grp.create_dataset( "id", data=arr, maxshape=(None,), dtype=np.uint8, compression="gzip" ) if include_embedding_pointer: if "embedding_ptr" in h5grp: embptr_fh = h5grp["embedding_ptr"] if resize: embptr_fh.resize((ceil(i * resize_by),)) embptr_fh[i] = emb.shape[0] + embptr_fh[i - 1] else: embptr_fh = h5grp.create_dataset( "embedding_ptr", data=[emb.shape[0]], maxshape=(None,), dtype=np.int32, ) if "embedding" in h5grp: embed_fh = h5grp["embedding"] assert embed_fh.shape[1] == emb.shape[1], ( "Embedding dimension mismatch between objects. " f"({embed_fh.shape}) and ({emb.shape})" ) if resize: embed_fh.resize(max_embsize, axis=0) if include_embedding_pointer: embed_fh[embptr_fh[i - 1] : embptr_fh[i]] = emb else: embed_fh[idptr_fh[i - 1] : idptr_fh[i]] = emb else: embed_fh = h5grp.create_dataset( "embedding", data=emb, maxshape=(None, emb.shape[1]), dtype=obj.embedding.dtype, compression="gzip", ) resize = False # resize the datasets to the actual number of objects max_idsize = idptr_fh[i] max_embsize = embptr_fh[i] if include_embedding_pointer else max_idsize id_fh.resize((max_idsize,)) idptr_fh.resize((i,)) embed_fh.resize(max_embsize, axis=0) if include_embedding_pointer: embptr_fh.resize((i,)) @embed.writer(None) def _generator_to_embed(objs, fh): return _objects_to_embed(objs, fh) @embed.writer(ProteinEmbedding) def _protein_to_embed(obj, fh): _objects_to_embed([obj], fh, include_embedding_pointer=False) @embed.writer(ProteinVector) def _protein_to_vector(obj, fh): _objects_to_embed([obj], fh, include_embedding_pointer=True) scikit-bio-0.6.2/skbio/io/format/embl.py000066400000000000000000001522021464262511300200610ustar00rootroot00000000000000"""EMBL format (:mod:`skbio.io.format.embl`) ========================================= .. currentmodule:: skbio.io.format.embl EMBL format stores sequence and its annotation together. The start of the annotation section is marked by a line beginning with the word "ID". The start of sequence section is marked by a line beginning with the word "SQ". The "//" (terminator) line also contains no data or comments and designates the end of an entry. More information on EMBL file format can be found here [1]_. The EMBL file may end with .embl or .txt extension. An example of EMBL file can be seen here [2]_. Feature Level Products ^^^^^^^^^^^^^^^^^^^^^^ As described in [3]_ *"Feature-level products contain nucleotide sequence and related annotations derived from submitted ENA assembled and annotated sequences. Data are distributed in flatfile format, similar to that of parent ENA records, with each flatfile representing a single feature"*. While only the sequence of the feature is included in such entries, features are derived from the parent entry, and can't be applied as interval metadata. For such reason, interval metatdata are ignored from Feature-level products, as they will be ignored by subsetting a generic Sequence object. Format Support -------------- **Has Sniffer: Yes** **NOTE: No protein support at the moment** Current protein support development is tracked in issue-1499 [4]_ +------+------+---------------------------------------------------------------+ |Reader|Writer| Object Class | +======+======+===============================================================+ |Yes |Yes |:mod:`skbio.sequence.Sequence` | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.sequence.DNA` | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.sequence.RNA` | +------+------+---------------------------------------------------------------+ |No |No |:mod:`skbio.sequence.Protein` | +------+------+---------------------------------------------------------------+ |Yes |Yes | generator of :mod:`skbio.sequence.Sequence` objects | +------+------+---------------------------------------------------------------+ Format Specification -------------------- Sections before ``FH (Feature Header)`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ All the sections before ``FH (Feature Header)`` will be read into the attribute of ``metadata``. The header and its content of a section are stored as key-value pairs in ``metadata``. For the ``RN (Reference Number)`` section, its value is stored as a list, as there are often multiple reference sections in one EMBL record. ``FT`` section ^^^^^^^^^^^^^^ See :ref:`Genbank FEATURES section` ``SQ`` section ^^^^^^^^^^^^^^ The sequence in the ``SQ`` section is always in lowercase for the EMBL files downloaded from ENA. For the RNA molecules, ``t`` (thymine), instead of ``u`` (uracil) is used in the sequence. All EMBL writers follow these conventions while writing EMBL files. Examples -------- Reading EMBL Files ^^^^^^^^^^^^^^^^^^ Suppose we have the following EMBL file example: >>> embl_str = ''' ... ID X56734; SV 1; linear; mRNA; STD; PLN; 1859 BP. ... XX ... AC X56734; S46826; ... XX ... DT 12-SEP-1991 (Rel. 29, Created) ... DT 25-NOV-2005 (Rel. 85, Last updated, Version 11) ... XX ... DE Trifolium repens mRNA for non-cyanogenic beta-glucosidase ... XX ... KW beta-glucosidase. ... XX ... OS Trifolium repens (white clover) ... OC Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta; ... OC Spermatophyta; Magnoliophyta; eudicotyledons; Gunneridae; ... OC Pentapetalae; rosids; fabids; Fabales; Fabaceae; Papilionoideae; ... OC Trifolieae; Trifolium. ... XX ... RN [5] ... RP 1-1859 ... RX DOI; 10.1007/BF00039495. ... RX PUBMED; 1907511. ... RA Oxtoby E., Dunn M.A., Pancoro A., Hughes M.A.; ... RT "Nucleotide and derived amino acid sequence of the cyanogenic ... RT beta-glucosidase (linamarase) from white clover ... RT (Trifolium repens L.)"; ... RL Plant Mol. Biol. 17(2):209-219(1991). ... XX ... RN [6] ... RP 1-1859 ... RA Hughes M.A.; ... RT ; ... RL Submitted (19-NOV-1990) to the INSDC. ... RL Hughes M.A., University of Newcastle Upon Tyne, Medical School, ... RL Newcastle ... RL Upon Tyne, NE2 4HH, UK ... XX ... DR MD5; 1e51ca3a5450c43524b9185c236cc5cc. ... XX ... FH Key Location/Qualifiers ... FH ... FT source 1..1859 ... FT /organism="Trifolium repens" ... FT /mol_type="mRNA" ... FT /clone_lib="lambda gt10" ... FT /clone="TRE361" ... FT /tissue_type="leaves" ... FT /db_xref="taxon:3899" ... FT mRNA 1..1859 ... FT /experiment="experimental evidence, no additional ... FT details recorded" ... FT CDS 14..1495 ... FT /product="beta-glucosidase" ... FT /EC_number="3.2.1.21" ... FT /note="non-cyanogenic" ... FT /db_xref="GOA:P26204" ... FT /db_xref="InterPro:IPR001360" ... FT /db_xref="InterPro:IPR013781" ... FT /db_xref="InterPro:IPR017853" ... FT /db_xref="InterPro:IPR033132" ... FT /db_xref="UniProtKB/Swiss-Prot:P26204" ... FT /protein_id="CAA40058.1" ... FT /translation="MDFIVAIFALFVISSFTITSTNAVEASTLLDIGNLSRS ... FT SFPRGFIFGAGSSAYQFEGAVNEGGRGPSIWDTFTHKYPEKIRDGSNADITV ... FT DQYHRYKEDVGIMKDQNMDSYRFSISWPRILPKGKLSGGINHEGIKYYNNLI ... FT NELLANGIQPFVTLFHWDLPQVLEDEYGGFLNSGVINDFRDYTDLCFKEFGD ... FT RVRYWSTLNEPWVFSNSGYALGTNAPGRCSASNVAKPGDSGTGPYIVTHNQI ... FT LAHAEAVHVYKTKYQAYQKGKIGITLVSNWLMPLDDNSIPDIKAAERSLDFQ ... FT FGLFMEQLTTGDYSKSMRRIVKNRLPKFSKFESSLVNGSFDFIGINYYSSSY ... FT ISNAPSHGNAKPSYSTNPMTNISFEKHGIPLGPRAASIWIYVYPYMFIQEDF ... FT EIFCYILKINITILQFSITENGMNEFNDATLPVEEALLNTYRIDYYYRHLYY ... FT IRSAIRAGSNVKGFYAWSFLDCNEWFAGFTVRFGLNFVD" ... XX ... SQ Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other; ... aaacaaacca aatatggatt ttattgtagc catatttgct ctgtttgtta ttagctcatt ... cacaattact tccacaaatg cagttgaagc ttctactctt cttgacatag gtaacctgag ... tcggagcagt tttcctcgtg gcttcatctt tggtgctgga tcttcagcat accaatttga ... aggtgcagta aacgaaggcg gtagaggacc aagtatttgg gataccttca cccataaata ... tccagaaaaa ataagggatg gaagcaatgc agacatcacg gttgaccaat atcaccgcta ... caaggaagat gttgggatta tgaaggatca aaatatggat tcgtatagat tctcaatctc ... ttggccaaga atactcccaa agggaaagtt gagcggaggc ataaatcacg aaggaatcaa ... atattacaac aaccttatca acgaactatt ggctaacggt atacaaccat ttgtaactct ... ttttcattgg gatcttcccc aagtcttaga agatgagtat ggtggtttct taaactccgg ... tgtaataaat gattttcgag actatacgga tctttgcttc aaggaatttg gagatagagt ... gaggtattgg agtactctaa atgagccatg ggtgtttagc aattctggat atgcactagg ... aacaaatgca ccaggtcgat gttcggcctc caacgtggcc aagcctggtg attctggaac ... aggaccttat atagttacac acaatcaaat tcttgctcat gcagaagctg tacatgtgta ... taagactaaa taccaggcat atcaaaaggg aaagataggc ataacgttgg tatctaactg ... gttaatgcca cttgatgata atagcatacc agatataaag gctgccgaga gatcacttga ... cttccaattt ggattgttta tggaacaatt aacaacagga gattattcta agagcatgcg ... gcgtatagtt aaaaaccgat tacctaagtt ctcaaaattc gaatcaagcc tagtgaatgg ... ttcatttgat tttattggta taaactatta ctcttctagt tatattagca atgccccttc ... acatggcaat gccaaaccca gttactcaac aaatcctatg accaatattt catttgaaaa ... acatgggata cccttaggtc caagggctgc ttcaatttgg atatatgttt atccatatat ... gtttatccaa gaggacttcg agatcttttg ttacatatta aaaataaata taacaatcct ... gcaattttca atcactgaaa atggtatgaa tgaattcaac gatgcaacac ttccagtaga ... agaagctctt ttgaatactt acagaattga ttactattac cgtcacttat actacattcg ... ttctgcaatc agggctggct caaatgtgaa gggtttttac gcatggtcat ttttggactg ... taatgaatgg tttgcaggct ttactgttcg ttttggatta aactttgtag attagaaaga ... tggattaaaa aggtacccta agctttctgc ccaatggtac aagaactttc tcaaaagaaa ... ctagctagta ttattaaaag aactttgtag tagattacag tacatcgttt gaagttgagt ... tggtgcacct aattaaataa aagaggttac tcttaacata tttttaggcc attcgttgtg ... aagttgttag gctgttattt ctattatact atgttgtagt aataagtgca ttgttgtacc ... agaagctatg atcataacta taggttgatc cttcatgtat cagtttgatg ttgagaatac ... tttgaattaa aagtcttttt ttattttttt aaaaaaaaaa aaaaaaaaaa aaaaaaaaa ... // ... ''' Now we can read it as ``DNA`` object: >>> import io >>> from skbio import DNA, RNA, Sequence >>> embl = io.StringIO(embl_str) >>> dna_seq = DNA.read(embl) >>> dna_seq DNA ---------------------------------------------------------------------- Metadata: 'ACCESSION': 'X56734; S46826;' 'CROSS_REFERENCE': 'DATE': 'DBSOURCE': 'MD5; 1e51ca3a5450c43524b9185c236cc5cc.' 'DEFINITION': 'Trifolium repens mRNA for non-cyanogenic beta- glucosidase' 'KEYWORDS': 'beta-glucosidase.' 'LOCUS': 'REFERENCE': 'SOURCE': 'VERSION': 'X56734.1' Interval metadata: 3 interval features Stats: length: 1859 has gaps: False has degenerates: False has definites: True GC-content: 35.99% ---------------------------------------------------------------------- 0 AAACAAACCA AATATGGATT TTATTGTAGC CATATTTGCT CTGTTTGTTA TTAGCTCATT 60 CACAATTACT TCCACAAATG CAGTTGAAGC TTCTACTCTT CTTGACATAG GTAACCTGAG ... 1740 AGAAGCTATG ATCATAACTA TAGGTTGATC CTTCATGTAT CAGTTTGATG TTGAGAATAC 1800 TTTGAATTAA AAGTCTTTTT TTATTTTTTT AAAAAAAAAA AAAAAAAAAA AAAAAAAAA Since this is a mRNA molecule, we may want to read it as ``RNA``. As the EMBL file usually have ``t`` instead of ``u`` in the sequence, we can read it as ``RNA`` by converting ``t`` to ``u``: >>> embl = io.StringIO(embl_str) >>> rna_seq = RNA.read(embl) >>> rna_seq RNA ---------------------------------------------------------------------- Metadata: 'ACCESSION': 'X56734; S46826;' 'CROSS_REFERENCE': 'DATE': 'DBSOURCE': 'MD5; 1e51ca3a5450c43524b9185c236cc5cc.' 'DEFINITION': 'Trifolium repens mRNA for non-cyanogenic beta- glucosidase' 'KEYWORDS': 'beta-glucosidase.' 'LOCUS': 'REFERENCE': 'SOURCE': 'VERSION': 'X56734.1' Interval metadata: 3 interval features Stats: length: 1859 has gaps: False has degenerates: False has definites: True GC-content: 35.99% ---------------------------------------------------------------------- 0 AAACAAACCA AAUAUGGAUU UUAUUGUAGC CAUAUUUGCU CUGUUUGUUA UUAGCUCAUU 60 CACAAUUACU UCCACAAAUG CAGUUGAAGC UUCUACUCUU CUUGACAUAG GUAACCUGAG ... 1740 AGAAGCUAUG AUCAUAACUA UAGGUUGAUC CUUCAUGUAU CAGUUUGAUG UUGAGAAUAC 1800 UUUGAAUUAA AAGUCUUUUU UUAUUUUUUU AAAAAAAAAA AAAAAAAAAA AAAAAAAAA We can also ``trascribe`` a sequence and verify that it will be a ``RNA`` sequence >>> rna_seq == dna_seq.transcribe() True Reading EMBL Files using generators ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Soppose we have an EMBL file with multiple records: we can instantiate a generator object to deal with multiple records >>> import skbio >>> embl = io.StringIO(embl_str) >>> embl_gen = skbio.io.read(embl, format="embl") >>> dna_seq = next(embl_gen) For more informations, see :mod:`skbio.io` References ---------- .. [1] ftp://ftp.ebi.ac.uk/pub/databases/embl/release/doc/usrman.txt .. [2] http://www.ebi.ac.uk/ena/data/view/X56734&display=text .. [3] http://www.ebi.ac.uk/ena/browse/feature-level-products .. [4] https://github.com/scikit-bio/scikit-bio/issues/1499 """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- # std modules import re import copy import textwrap from functools import partial # skbio modules from skbio.io import create_format, EMBLFormatError from skbio.io.format._base import _line_generator, _get_nth_sequence from skbio.io.format._sequence_feature_vocabulary import ( _yield_section, _parse_single_feature, _serialize_section_default, _serialize_single_feature, ) from skbio.metadata import IntervalMetadata from skbio.sequence import Sequence, DNA, RNA, Protein from skbio.util._misc import chunk_str # look at skbio.io.registry to have an idea on how to define this class embl = create_format("embl") # This list is ordered used to read and write embl file. By processing those # values one by one, I will write embl sections with the same order _HEADERS = [ "LOCUS", "ACCESSION", "PARENT_ACCESSION", "PROJECT_IDENTIFIER", "DATE", "DEFINITION", "GENE_NAME", "KEYWORDS", "SOURCE", "REFERENCE", "DBSOURCE", "COMMENT", "FEATURES", ] # embl has a series of keys different from genbank; moreover keys are not so # easy to understand (eg. RA for AUTHORS). I want to use the same keys used by # genbank both to convert between formats and to use the same methods to get # info from Sequence and its derived objects Here is a dictionary of keys # conversion (EMBL->GB). All the unspecified keys will remain in embl format KEYS_TRANSLATOR = { # identification "ID": "LOCUS", "AC": "ACCESSION", # PA means PARENT ACCESSION (?) and applies to # feature-level-products entries "PA": "PARENT_ACCESSION", "PR": "PROJECT_IDENTIFIER", "DT": "DATE", "DE": "DEFINITION", "GN": "GENE_NAME", # uniprot specific "KW": "KEYWORDS", # Source (taxonomy and classification) "OS": "ORGANISM", "OC": "taxonomy", "OG": "organelle", # reference keys "RA": "AUTHORS", "RP": "REFERENCE", "RC": "REFERENCE_COMMENT", "RX": "CROSS_REFERENCE", "RG": "GROUP", "RT": "TITLE", "RL": "JOURNAL", # Cross references "DR": "DBSOURCE", "CC": "COMMENT", # features "FH": "FEATURES", "FT": "FEATURES", "SQ": "ORIGIN", } # the inverse of KEYS_TRANSLATOR, for semplicity REV_KEYS_TRANSLATOR = {v: k for k, v in KEYS_TRANSLATOR.items()} # the original genbank _yield_section divides entries in sections relying on # spaces (the same section has the same level of indentation). EMBL entries # have a key for each line, so to divide record in sections I need to define a # correspondance for each key to section, then I will divide a record in # sections using these section name. KEYS_2_SECTIONS = { # identification "ID": "LOCUS", "AC": "ACCESSION", # PA means PARENT ACCESSION (?) and applies to # feature-level-products entries "PA": "PARENT_ACCESSION", "PR": "PROJECT_IDENTIFIER", "DT": "DATE", "DE": "DEFINITION", "GN": "GENE_NAME", # uniprot specific "KW": "KEYWORDS", # Source (taxonomy and classification) "OS": "SOURCE", "OC": "SOURCE", "OG": "SOURCE", # reference keys "RA": "REFERENCE", "RP": "REFERENCE", "RC": "REFERENCE", "RX": "REFERENCE", "RG": "REFERENCE", "RT": "REFERENCE", "RL": "REFERENCE", # This shuold be Reference Number. However, to split # between references with _embl_yield_section I need to # change section after reading one reference. So a single # reference is completed when I found a new RN. The # reference number information will be the reference # position in the final REFERENCE list metadata "RN": "SPACER", # Cross references "DR": "DBSOURCE", "CC": "COMMENT", "AH": "ASSEMBLY", "AS": "ASSEMBLY", "FH": "FEATURES", "FT": "FEATURES", # sequence "SQ": "ORIGIN", " ": "ORIGIN", "CO": "CONSTRUCTED", # spacer (discarded) "XX": "SPACER", } # for convenience: I think such functions are more readadble while accessing # values in lambda functions def _get_embl_key(line): """Return first part of a string as a embl key (ie 'AC M14399;' -> 'AC').""" # embl keys have a fixed size of 2 chars return line[:2] def _get_embl_section(line): """Return the embl section from uniprot key(ie 'RA' -> 'REFERENCE').""" # get embl key key = _get_embl_key(line) # get embl section from key section = KEYS_2_SECTIONS[key] return section def _translate_key(key): """Translate a single key from EMBL to genbank. Returns key itself if no traslation is defined. """ return KEYS_TRANSLATOR.get(key, key) # a method to translate keys from embl to genbank for a dict object. All keys # not defined in the original dict will remain the same def _translate_keys(data): """Translate keys from EMBL to genbank for a dict object. Translate a dictionary of uniprot key->value in a genbank like dictionary of key values. Keep old keys if no translation is defined. """ # traslate keys and get a new_data object new_data = {_translate_key(k): v for k, v in data.items()} return new_data # define a default textwrap.Wrapper for embl def _get_embl_wrapper(embl_key, indent=5, subsequent_indent=None, width=80): """Return a textwrap.TextWrapper for embl records. For example, write by providing embl key and a string. Wrap text to 80 column. """ # define the string to prepen (eg "OC ") prepend = "{key:<{indent}}".format(key=embl_key, indent=indent) # deal with 2° strings and more if subsequent_indent is None: subsequent_prepend = prepend else: subsequent_prepend = "{key:<{indent}}".format( key=embl_key, indent=subsequent_indent ) # define a text wrapper object wrapper = textwrap.TextWrapper( initial_indent=prepend, subsequent_indent=subsequent_prepend, width=width ) return wrapper def _serialize_list(embl_wrapper, data, sep="\n"): """Serialize a list of obj using a textwrap.TextWrapper instance. Returns one string of wrapped embl objects. """ # the output array output = [] for line in data: output += embl_wrapper.wrap(line) # merge dates in one string. Add final newline output = sep.join(output) + "\n" # return comupted string return output # Method to determine if file is in EMBL format or not. A uniprot embl format # can't be parsed by this module (at the moment) @embl.sniffer() def _embl_sniffer(fh): try: line = next(_line_generator(fh, skip_blanks=True, strip=False)) except StopIteration: return False, {} try: _parse_id([line]) except EMBLFormatError: return False, {} return True, {} @embl.reader(None) def _embl_to_generator(fh, constructor=None, **kwargs): for record in _parse_embls(fh): yield _construct(record, constructor, **kwargs) # Method to read EMBL data as skbio.sequence.DNA @embl.reader(Sequence) def _embl_to_sequence(fh, seq_num=1, **kwargs): record = _get_nth_sequence(_parse_embls(fh), seq_num) return _construct(record, Sequence, **kwargs) # Method to read EMBL data as skbio.sequence.DNA @embl.reader(DNA) def _embl_to_dna(fh, seq_num=1, **kwargs): record = _get_nth_sequence(_parse_embls(fh), seq_num) return _construct(record, DNA, **kwargs) # Method to read EMBL data as skbio.sequence.DNA @embl.reader(RNA) def _embl_to_rna(fh, seq_num=1, **kwargs): record = _get_nth_sequence(_parse_embls(fh), seq_num) return _construct(record, RNA, **kwargs) # No protein support at the moment @embl.reader(Protein) def _embl_to_protein(fh, seq_num=1, **kwargs): # no protein support, at the moment raise EMBLFormatError( "There's no protein support for EMBL record. " "Current status of EMBL protein support is " "described in issue-1499 (https://github.com/" "biocore/scikit-bio/issues/1499)" ) # Writer methods @embl.writer(None) def _generator_to_embl(obj, fh): for obj_i in obj: _serialize_single_embl(obj_i, fh) @embl.writer(Sequence) def _sequence_to_embl(obj, fh): _serialize_single_embl(obj, fh) @embl.writer(DNA) def _dna_to_embl(obj, fh): _serialize_single_embl(obj, fh) @embl.writer(RNA) def _rna_to_embl(obj, fh): _serialize_single_embl(obj, fh) @embl.writer(Protein) def _protein_to_embl(obj, fh): # no protein support, at the moment raise EMBLFormatError( "There's no protein support for EMBL record. " "Current status of EMBL protein support is " "described in issue-1499 (https://github.com/" "biocore/scikit-bio/issues/1499)" ) def _construct(record, constructor=None, **kwargs): """Construct the object of Sequence, DNA, RNA, or Protein.""" # sequence, metadata and interval metadata seq, md, imd = record if "lowercase" not in kwargs: kwargs["lowercase"] = True if constructor is None: unit = md["LOCUS"]["unit"] if unit == "bp": # RNA mol type has T instead of U for genbank from from NCBI constructor = DNA elif unit == "aa": # no protein support, at the moment # constructor = Protein raise EMBLFormatError("There's no protein support for EMBL record") if constructor == RNA: return DNA(seq, metadata=md, interval_metadata=imd, **kwargs).transcribe() else: return constructor(seq, metadata=md, interval_metadata=imd, **kwargs) # looks like the genbank _parse_genbank def _parse_embls(fh): """Chunk multiple EMBL records by '//', and returns a generator.""" data_chunks = [] for line in _line_generator(fh, skip_blanks=True, strip=False): if line.startswith("//"): yield _parse_single_embl(data_chunks) data_chunks = [] else: data_chunks.append(line) def _parse_single_embl(chunks): metadata = {} interval_metadata = None sequence = "" # define a section splitter with _embl_yield_section function defined in # this module (return the embl section by embl key). returns generator for # each block with different line type section_splitter = _embl_yield_section( lambda line: _get_embl_section(line), skip_blanks=True, strip=False ) # process each section, like genbank does. for section, section_name in section_splitter(chunks): # section is a list of records with the same session (eg RA, RP for # for a single reference). section_name is the name of the section # (eg REFERENCE for the section of the previous example) # search for a specific method in PARSER_TABLE using section_name or # set _embl_parse_section_default parser = _PARSER_TABLE.get(section_name, _embl_parse_section_default) if section_name == "FEATURES": # This requires 'ID' line parsed before 'FEATURES', which should # be true and is implicitly checked by the sniffer. This is true # since the first section is parsed by the last else condition if "PARENT_ACCESSION" in metadata: # this is a feature-level-products entry and features are # relative to parent accession; in the same way a subset of a # Sequence object has no interval metadata, I will refuse to # process interval metadata here continue # partials add arguments to previous defined functions, in this # case length of Sequence object parser = partial(parser, length=metadata["LOCUS"]["size"]) elif section_name == "COMMENT": # mantain newlines in comments # partials add arguments to previous defined functions parser = partial(parser, join_delimiter="\n") # call function on section parsed = parser(section) # reference can appear multiple times if section_name == "REFERENCE": # genbank data hasn't CROSS_REFERENCE section, To have a similar # metatadata object, I chose to remove CROSS_REFERENCE from # each single reference and put them in metadata. Since I could # have more references, I need a list of CROSS_REFERENCE, with # None values when CROSS_REFERENCE are not defined: there are cases # in which some references have a CROSS_REFERENCE and others not. # So each reference will have it's cross reference in the same # index position, defined or not cross_reference = parsed.pop("CROSS_REFERENCE", None) # fix REFERENCE metadata. Ask if is the first reference or not # I need a reference number as genbank, this could be reference # size if section_name in metadata: RN = len(metadata[section_name]) + 1 else: RN = 1 # fix reference fields. Get RN->REFERENCE value from dict positions = parsed.pop("REFERENCE", None) parsed["REFERENCE"] = str(RN) # append position to RN (eg "1 (bases 1 to 63)") if positions: parsed["REFERENCE"] += " %s" % (positions) # cross_reference will be a list of cross reference; Also # metadata[REFERENCE] is a list of references if section_name in metadata: # I've already seen a reference, append new one metadata[section_name].append(parsed) metadata["CROSS_REFERENCE"].append(cross_reference) else: # define a list for this first reference and its RX metadata[section_name] = [parsed] metadata["CROSS_REFERENCE"] = [cross_reference] elif section_name == "ORIGIN": sequence = parsed elif section_name == "FEATURES": interval_metadata = parsed elif section_name == "DATE": # read data (list) metadata[section_name] = parsed # fix locus metadata using last date. Take only last date date = metadata[section_name][-1].split()[0] metadata["LOCUS"]["date"] = date # parse all the others sections (SOURCE, ...) else: metadata[section_name] = parsed # after metadata were read, add a VERSION section like genbank # eval if entry is a feature level product or not if "ACCESSION" in metadata: metadata["VERSION"] = "{accession}.{version}".format( accession=metadata["ACCESSION"].split(";")[0], version=metadata["LOCUS"]["version"], ) elif "PARENT_ACCESSION" in metadata: # locus name is in the format # .::[:ordinal] # and ordinal could be present or not, depends on how many features # are found in such location. Such entry couldn't be found in others # database like NCBI (at the moment) so we will take the version # relying on parent accession (hoping that an update in the parent # accession will generate an update in all feature level products) metadata["VERSION"] = metadata["PARENT_ACCESSION"] # return a string, metatdata as a dictionary and IntervalMetadata object return sequence, metadata, interval_metadata def _write_serializer(fh, serializer, embl_key, data): """Write serializer to a file. Append 'XX'.""" # call the serializer function out = serializer(embl_key, data) # test if 'out' is a iterator. # cf. Effective Python Item 17 if iter(out) is iter(out): for s in out: fh.write(s) else: fh.write(out) # add spacer between sections fh.write("XX\n") # main function for writer methods def _serialize_single_embl(obj, fh): """Write a EMBL record. Always write it in ENA canonical way: 1. sequence in lowercase (uniprot are uppercase) 2. 'u' as 't' even in RNA molecules. Parameters ---------- obj : Sequence or its child class A Sequence object or its child class representing the biological sequence to be serialized. fh : file object A file object open for writing. """ # shortcut to deal with metadata md = obj.metadata # embl has a different magick number than embl serialize_default = partial(_serialize_section_default, indent=5) # Now cicle for GB like headers (sections) in _HEADERS. for header in _HEADERS: # Get appropriate serializer method or default one serializer = _SERIALIZER_TABLE.get(header, serialize_default) # headers needs to be converted into embl, or matained as they are # if no conversion could be defined. embl_key = REV_KEYS_TRANSLATOR.get(header, header) # this is true also for locus line if header in md: # deal with special source case, add cross references if needed if header == "REFERENCE": serializer = partial( serializer, cross_references=md.get("CROSS_REFERENCE") ) elif header == "LOCUS": # pass also metadata (in case of entries from genbank) serializer = partial(serializer, metadata=md) # call the serializer function _write_serializer(fh, serializer, embl_key, md[header]) else: # header not in metadata. Could be date read from GB? if header == "DATE": # Have I date in locus metadata? if md["LOCUS"]["date"]: # call serializer on date. Date is a list of values _write_serializer(fh, serializer, embl_key, [md["LOCUS"]["date"]]) if header == "FEATURES": if obj.has_interval_metadata(): # magic number 21: the amount of indentation before # feature table starts as defined by INSDC indent = 21 feature_key = "FH Key" fh.write( "{header:<{indent}}Location/Qualifiers\n".format( header=feature_key, indent=indent ) ) # add FH spacer fh.write("FH\n") for s in serializer(obj.interval_metadata._intervals, indent): fh.write(s) # add spacer between sections fh.write("XX\n") # write out the sequence # always write RNA seq as DNA if isinstance(obj, RNA): obj = obj.reverse_transcribe() # serialize sequence from a Sequence object for s in _serialize_sequence(obj): fh.write(s) # terminate a embl record with fh.write("//\n") def _parse_id(lines): """Parse the identification line of an EMBL record. From EMBL user manual (Release 130, November 2016). (ftp://ftp.ebi.ac.uk/pub/databases/embl/release/doc/usrman.txt) The ID (IDentification) line is always the first line of an entry. The format of the ID line is: ID <1>; SV <2>; <3>; <4>; <5>; <6>; <7> BP. The tokens represent: 1. Primary accession number 2. Sequence version number 3. Topology: 'circular' or 'linear' 4. Molecule type (see note 1 below) 5. Data class (see section 3.1 of EMBL user manual) 6. Taxonomic division (see section 3.2 of EMBL user manual) 7. Sequence length (see note 2 below) Note 1 - Molecule type: this represents the type of molecule as stored and can be any value from the list of current values for the mandatory mol_type source qualifier. This item should be the same as the value in the mol_type qualifier(s) in a given entry. Note 2 - Sequence length: The last item on the ID line is the length of the sequence (the total number of bases in the sequence). This number includes base positions reported as present but undetermined (coded as "N"). An example of a complete identification line is shown below: ID CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP. """ # get only the first line of EMBL record line = lines[0] # define a specific patter for EMBL pattern = re.compile( r"ID" r" +([^\s]+);" # ie: CD789012 r" +SV ([0-9]*);" # 4 r" +(\w+);" # linear r" +([^;]+);" # genomic DNA r" +(\w*);" # HTG r" +(\w+);" # MAM r" +(\d+)" # 500 r" +(\w+)\.$" ) # BP # search it matches = re.match(pattern, line) try: res = dict( zip( [ "locus_name", "version", "shape", "mol_type", "class", "division", "size", "unit", ], matches.groups(), ) ) except AttributeError: raise EMBLFormatError("Could not parse the ID line:\n%s" % line) # check for CON entries: if res["class"] == "CON": # entries like http://www.ebi.ac.uk/ena/data/view/LT357133 # doesn't have sequence, so can't be read by skbio.sequence raise EMBLFormatError( "There's no support for embl CON record: for more information " "see issue-1506 (https://github.com/scikit-bio/scikit-bio/issues/" "1506)" ) # those values are integer res["size"] = int(res["size"]) # version could be integer if res["version"]: res["version"] = int(res["version"]) # unit are in lower cases in others modules res["unit"] = res["unit"].lower() # initialize a date record (for gb compatibility) res["date"] = None # returning parsed attributes return res def _serialize_id(header, obj, metadata={}, indent=5): """Serialize ID line. Parameters ---------- header : str The header of the ID line. Usually 'ID' for EMBL or 'LOCUS' for GenBank. obj : dict A dictionary containing key-value pairs representing the attributes of the sequence entry. metadata : dict, optional Additional metadata information, typically extracted from a GenBank entry. indent : int, optional The number of spaces used to indent the serialized ID line. Defaults to 5. """ # get key->value pairs, or key->'' if values is None kwargs = {k: "" if v is None else v for k, v in obj.items()} # then unit is in upper cases kwargs["unit"] = kwargs["unit"].upper() # check for missing keys (eg from gb data). Keys in md are in uppercase for key in ["version", "class"]: if key not in kwargs: if key.upper() in metadata: kwargs[key] = metadata[key.upper()] else: kwargs[key] = "" # version from genbank could be "M14399.1 GI:145229". I need an integer version = kwargs["version"] # version could by empty, integer or text if version != "": try: int(kwargs["version"]) # could be a text like M14399.1 except ValueError: match = re.search(r"^\w+\.([0-9]+)", version) if match: kwargs["version"] = match.groups()[0] # return first line return ( "{header:<{indent}}{locus_name}; SV {version}; {shape}; " "{mol_type}; {class}; {division}; {size} {unit}.\n" ).format(header=header, indent=indent, **kwargs) # similar to skbio.io.format._sequence_feature_vocabulary.__yield_section # but applies to embl file format def _embl_yield_section(get_line_key, **kwargs): """Return function that returns successive sections from file. Parameters ---------- get_line_key : callable It takes a string as input and a key indicating the section (could be the embl key or embl KEYS_2_SECTIONS) kwargs : dict, optional Keyword arguments will be passed to `_line_generator`. Returns ------- function A function accept a list of lines as input and return a generator to yield section one by one. """ def parser(lines): curr = [] curr_type = None for line in _line_generator(lines, **kwargs): # if we find another line, return the previous section line_type = get_line_key(line) # changed line type if line_type != curr_type: if curr: # returning block yield curr, curr_type # reset curr after yield curr = [] # reset curr_type in any cases curr_type = line_type # don't append record if line type is a spacer if "SPACER" not in line_type: curr.append(line) # don't forget to return the last section in the file if curr: yield curr, curr_type return parser # replace skbio.io.format._sequence_feature_vocabulary._parse_section_default def _embl_parse_section_default( lines, label_delimiter=None, join_delimiter=" ", return_label=False ): """Parse sections in default way. Do 2 things: 1. split first line with label_delimiter for label 2. join all the lines into one str with join_delimiter. """ data = [] label = None line = lines[0] # take the first line, divide the key from the text items = line.split(label_delimiter, 1) if len(items) == 2: label, section = items else: label = items[0] section = "" # append the text of the first element in a empty array data.append(section) # Then process all the elements with the same embl key. remove the key # and append all the text in the data array data.extend(line.split(label_delimiter, 1)[-1] for line in lines[1:]) # Now concatenate the text using join_delimiter. All content with the same # key will be placed in the same string. Strip final "\n data = join_delimiter.join(i.strip() for i in data) # finally return the merged text content, and the key if needed if return_label: return label, data else: return data # parse an embl reference record. def _parse_reference(lines): """Parse single REFERENCE field.""" # parsed reference will be placed here res = {} # define a section splitter with _embl_yield_section function defined in # this module section_splitter = _embl_yield_section( lambda line: _get_embl_key(line), skip_blanks=True, strip=False ) # now itereta along sections (lines of the same type) for section, section_name in section_splitter(lines): # this function append all data in the same keywords. A list of lines # as input (see skbio.io.format._sequence_feature_vocabulary) label, data = _embl_parse_section_default( section, join_delimiter=" ", return_label=True ) res[label] = data # now RX (CROSS_REFERENCE) is a joined string of multiple values. To get # back to a list of values you can use: re.compile("([^;\s]*); ([^\s]*)") # search for pubmed record, and add the PUBMED key if "RX" in res: match = re.search(r"PUBMED; (\d+)\.", res["RX"]) if match: # add pubmed notation res["PUBMED"] = match.groups()[0] # fix RP field like genbank (if exists), Ie: (bases 1 to 63) if "RP" in res: match = re.search(r"(\d+)-(\d+)", res["RP"]) if match: # fix rp fields res["RP"] = "(bases {start} to {stop})".format( start=match.groups()[0], stop=match.groups()[1] ) # return translated keys (EMBL->GB) return _translate_keys(res) def _serialize_reference(header, obj, cross_references, indent=5): """Serialize a list of references.""" reference = [] sort_order = ["RC", "RP", "RX", "RG", "RA", "RT", "RL"] # deal with RX pattern and RP pattern RX = re.compile(r"([^;\s]*); ([^\s]*)") RP = re.compile(r"bases (\d+) to (\d+)") # create a copy of obj, that can be changed. I need to delete values or # adding new ones obj = copy.deepcopy(obj) # obj is a list of references. Now is a copy of metadata[SOURCE] for i, data in enumerate(obj): # get the reference number (as the iteration number) embl_key = "RN" # get cross_references if cross_references: cross_reference = cross_references[i] # append cross reference [i] to data (obj[i]) (if they exists) if cross_reference: data["CROSS_REFERENCE"] = cross_reference # delete PUBMED key (already present in CROSS_REFERENCE) if "PUBMED" in data: del data["PUBMED"] else: # no cross reference, do I have PUBMED in data? if "PUBMED" in data: # add a fake CROSS_REFERENCE data["CROSS_REFERENCE"] = "PUBMED; %s." % data["PUBMED"] # get an embl wrapper wrapper = _get_embl_wrapper(embl_key, indent) # define wrapped string and add RN to embl data reference += wrapper.wrap("[{RN}]".format(RN=i + 1)) # now process each record for references for embl_key in sort_order: # get internal key (genbank like key) key = _translate_key(embl_key) # have I this reference in my reference data? if key not in data: continue # if yes, define wrapper wrapper = _get_embl_wrapper(embl_key, indent) # data could have newlines records = data[key].split("\n") for record in records: # strip after newlines record = record.strip() # define wrapped string. beware RX if embl_key == "RX": for match in re.finditer(RX, record): source, link = match.groups() # join text cross_reference = "; ".join([source, link]) reference += wrapper.wrap(cross_reference) # RP case elif embl_key == "RP": match = re.search(RP, record) # if I have position, re-define RP key if match: record = "%s-%s" % match.groups() reference += wrapper.wrap(record) # if not, ignore RP key else: continue # all the other cases, go in wrapper as they are else: reference += wrapper.wrap(record) # add a spacer between references (but no at the final reference) # cause the caller will add spacer if (i + 1) < len(obj): reference += ["XX"] # now define a string and add a final "\n" s = "\n".join(reference) + "\n" # and return it return s # parse an embl reference record. def _parse_source(lines): """Parse single SOURCE field.""" # parsed reference will be placed here res = {} # define a section splitter with _embl_yield_section function defined in # this module section_splitter = _embl_yield_section( lambda line: _get_embl_key(line), skip_blanks=True, strip=False ) # now itereta along sections (lines of the same type) for section, section_name in section_splitter(lines): # this function append all data in the same keywords. A list of lines # as input (see skbio.io.format._sequence_feature_vocabulary) label, data = _embl_parse_section_default( section, join_delimiter=" ", return_label=True ) res[label] = data # return translated keys return _translate_keys(res) def _serialize_source(header, obj, indent=5): """Serialize SOURCE. Parameters ---------- header: str The section header. obj : dict A dictionary containing key-value pairs representing the attributes of the SOURCE section. indent : int, optional The number of spaces used to indent the serialized SOURCE section. Defaults to 5. """ source = [] # treat taxonomy and all others keys for key in ["ORGANISM", "taxonomy", "organelle"]: # get data to serielize data = obj.get(key) # if key is not defined (eg. organelle, continue) if data is None: continue # get embl key for my key (eg, taxonomy -> OC) embl_key = REV_KEYS_TRANSLATOR.get(key, key) # get an embl wrapper wrapper = _get_embl_wrapper(embl_key, indent) # define wrapped string source += wrapper.wrap(data) # now define a string and add a final "\n" s = "\n".join(source) + "\n" # and return it return s def _parse_sequence(lines): """Parse the sequence section for sequence.""" # result array sequence = [] for line in lines: # ignore record like: # SQ Sequence 275 BP; 64 A; 73 C; 88 G; 50 T; 0 other; if line.startswith("SQ"): continue # remove the numbers inside strings. revome spaces around string items = [i for i in line.split() if not i.isdigit()] # append each sequence items to sequence list sequence += items return "".join(sequence) def _serialize_sequence(obj, indent=5): """Serialize seq to SQ. Parameters ---------- obj : DNA, RNA, Sequence Obj A DNA, RNA, or Sequence object representing the biological sequence to be serialized. indent : int, optional The number of spaces used to indent the serialized sequence. Defaults to 5. """ # a flag to determine if I wrote header or not flag_header = False # magic numbers: there will be 60 letters (AA, bp) on each line chunk_size = 60 # letters (AA, bp) will be grouped by 10: each group is divided by # one space from each other frag_size = 10 # fasta sequence will have indent spaces on the left, chunk_size/frag_size # groups of frag_size letters separated by n-1 groups of single spaces, # then the sequence length aligned on the right to get a string of # line_size. Setting left and right padding for semplicity pad_right = 65 # there are also 5 columns for indentation pad_left = 10 # sequence number will be in the last 10 columns # get sequence as a string with lower letters (uniprot will be upper!) seq = str(obj).lower() # count bases in sequence. Frequencies returns a dictionary of occurences # of A,C,G,T. Sequences are stored always in capital letters freq = obj.frequencies() # get values instead of popping them: I can't assure that the letter T, # for example, is always present n_a = freq.get("A", 0) n_c = freq.get("C", 0) n_g = freq.get("G", 0) n_t = freq.get("T", 0) # this will be the count of all others letters (more than ACGT) n_others = len(obj) - (n_a + n_c + n_g + n_t) # define SQ like this: # SQ Sequence 275 BP; 63 A; 72 C; 88 G; 52 T; 0 other; SQ = ( "SQ Sequence {size} {unit}; {n_a} A; {n_c} C; {n_g} G; " + "{n_t} T; {n_others} other;\n" ) # TODO: deal with protein SQ: they have a sequence header like: # SQ SEQUENCE 256 AA; 29735 MW; B4840739BF7D4121 CRC64; # apply format SQ = SQ.format( size=len(obj), unit=obj.metadata["LOCUS"]["unit"].upper(), n_a=n_a, n_c=n_c, n_g=n_g, n_t=n_t, n_others=n_others, ) for i in range(0, len(seq), chunk_size): line = seq[i : i + chunk_size] # pad string left and right s = "{indent}{s:<{pad_right}}{pos:>{pad_left}}\n".format( indent=" " * indent, s=chunk_str(line, frag_size, " "), pad_left=pad_left, pos=i + len(line), pad_right=pad_right, ) if not flag_header: # First time here. Add SQ header to sequence s = SQ + s # When I added header, I need to turn off this flag flag_header = True yield s def _embl_parse_feature_table(lines, length): """Parse embl feature tables.""" # define interval metadata imd = IntervalMetadata(length) # get only FT records, and remove key from line lines = [line[2:] for line in lines if line.startswith("FT")] # magic number 19: after key removal, the lines of each feature # are indented with 19 spaces. feature_indent = " " * 19 section_splitter = _yield_section( lambda x: not x.startswith(feature_indent), skip_blanks=True, strip=False ) for section in section_splitter(lines): _parse_single_feature(section, imd) return imd def _serialize_feature_table(intervals, indent=21): """Serialize a list of ``Interval`` objects into EMBL format. Parameters ---------- intervals : list of ``Interval`` A list of Interval objects representing the intervals to be serialized. indent : int, optional The number of spaces to indent each serialized feature. Defaults to 21. """ # define a embl wrapper object. I need to replace only the first two # characters from _serialize_single_feature output wrapper = _get_embl_wrapper("FT", indent=2, subsequent_indent=21) for intvl in intervals: tmp = _serialize_single_feature(intvl, indent) output = [] # I need to remove two spaces, cause I will add a FT key for line in tmp.split("\n"): output += wrapper.wrap(line[2:]) # re add newline between elements, and a final "\n" yield "\n".join(output) + "\n" def _parse_date(lines, label_delimiter=None, return_label=False): """Parse embl date records.""" # take the first line, and derive a label label = lines[0].split(label_delimiter, 1)[0] # read all the others dates and append to data array data = [line.split(label_delimiter, 1)[-1] for line in lines] # strip returned data data = [i.strip() for i in data] # finally return data array, and the key if needed if return_label: return label, data else: return data def _serialize_date(embl_key, date_list, indent=5): """Serialize date line. Parameters ---------- embl_key : str The EMBL key ID corresponding to the date line. date_list : list A list of dates associated with the sequence entry. indent : int, optional The number of spaces used to indent the serialized date line. Defaults to 5. """ # get an embl wrapper wrapper = _get_embl_wrapper(embl_key, indent) # # serialize date and return them as a string return _serialize_list(wrapper, date_list) def _serialize_comment(embl_key, obj, indent=5): """Serialize comment (like Assembly).""" # obj is a string, Split it by newlines data = obj.split("\n") # get an embl wrapper wrapper = _get_embl_wrapper(embl_key, indent) # serialize data and return it return _serialize_list(wrapper, data) def _serialize_dbsource(embl_key, obj, indent=5): """Serialize DBSOURCE.""" # data are stored like 'SILVA-LSU; LK021130. SILVA-SSU; LK021130. ... # I need to split string after final period (not AAT09660.1) # deal with re pattern. A pattern to find a period as end of sentence DR = re.compile(r"\.\s") # splitting by this pattern, I will have # ["SILVA-LSU; LK021130", "SILVA-SSU; LK021130", ...] # I need that each of them will be in a DR record. # get an embl wrapper wrapper = _get_embl_wrapper(embl_key, indent) # serialize data and return it. Split dbsource using re. Add a # final period between elements since I removed it by splitting return _serialize_list(wrapper, re.split(DR, obj), sep=".\n") def _parse_assembly(lines): """Parse embl assembly records.""" output = [] # first line is header, skip it for line in lines[1:]: data = line.split() # data could have comp feature or not. First element in data is 'AS' if len(data) == 5: res = dict( zip( ["local_span", "primary_identifier", "primary_span", "comp"], data[1:], ) ) elif len(data) == 4: res = dict( zip( ["local_span", "primary_identifier", "primary_span", "comp"], data[1:] + [""], ) ) else: raise EMBLFormatError("Can't parse assembly line %s" % line) # append res to output output += [res] return output # Map a function to each section of the entry _PARSER_TABLE = { "LOCUS": _parse_id, "SOURCE": _parse_source, "DATE": _parse_date, "REFERENCE": _parse_reference, "FEATURES": _embl_parse_feature_table, "ORIGIN": _parse_sequence, "ASSEMBLY": _parse_assembly, } # for writer functions _SERIALIZER_TABLE = { "LOCUS": _serialize_id, "SOURCE": _serialize_source, "DATE": _serialize_date, "REFERENCE": _serialize_reference, "FEATURES": _serialize_feature_table, "COMMENT": _serialize_comment, "DBSOURCE": _serialize_dbsource, } scikit-bio-0.6.2/skbio/io/format/emptyfile.py000066400000000000000000000022321464262511300211350ustar00rootroot00000000000000r"""Empty Files (:mod:`skbio.io.format.emptyfile`) ============================================== .. currentmodule:: skbio.io.format.emptyfile This format exists to make debugging simpler, often an empty file is a mistake which can take an embarrasing amount of time to notice. This format has only a sniffer and no readers or writers, so error messages will indicate as such if an empty file is accidentally used as input. Format Support -------------- **Has Sniffer: Yes** Format Specification -------------------- An empty file consists of only whitespace characters. """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from skbio.io import create_format emptyfile = create_format("") @emptyfile.sniffer() def _empty_file_sniffer(fh): for line in fh: if line.strip(): return False, {} return True, {} scikit-bio-0.6.2/skbio/io/format/fasta.py000066400000000000000000001051641464262511300202450ustar00rootroot00000000000000r"""FASTA/QUAL format (:mod:`skbio.io.format.fasta`) ================================================ .. currentmodule:: skbio.io.format.fasta The FASTA file format (``fasta``) stores biological (i.e., nucleotide or protein) sequences in a simple plain text format that is both human-readable and easy to parse. The file format was first introduced and used in the FASTA software package [1]_. Additional descriptions of the file format can be found in [2]_ and [3]_. An example of a FASTA-formatted file containing two DNA sequences:: >seq1 db-accession-149855 CGATGTCGATCGATCGATCGATCAG >seq2 db-accession-34989 CATCGATCGATCGATGCATGCATGCATG The QUAL file format is an additional format related to FASTA. A FASTA file is sometimes accompanied by a QUAL file, particuarly when the FASTA file contains sequences generated on a high-throughput sequencing instrument. QUAL files store a Phred quality score (nonnegative integer) for each base in a sequence stored in FASTA format (see [4]_ for more details). scikit-bio supports reading and writing FASTA (and optionally QUAL) file formats. Format Support -------------- **Has Sniffer: Yes** +------+------+---------------------------------------------------------------+ |Reader|Writer| Object Class | +======+======+===============================================================+ |Yes |Yes |generator of :mod:`skbio.sequence.Sequence` objects | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.alignment.TabularMSA` | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.sequence.Sequence` | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.sequence.DNA` | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.sequence.RNA` | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.sequence.Protein` | +------+------+---------------------------------------------------------------+ .. note:: All readers and writers support an optional QUAL file via the ``qual`` parameter. If one is provided, quality scores will be read/written in addition to FASTA sequence data. Format Specification -------------------- The following sections define the FASTA and QUAL file formats in detail. FASTA Format ^^^^^^^^^^^^ A FASTA file contains one or more biological sequences. The sequences are stored sequentially, with a *record* for each sequence (also referred to as a *FASTA record*). Each *record* consists of a single-line *header* (sometimes referred to as a *defline*, *label*, *description*, or *comment*) followed by the sequence data, optionally split over multiple lines. .. note:: Blank or whitespace-only lines are only allowed at the beginning of the file, between FASTA records, or at the end of the file. A blank or whitespace-only line after the header line, within the sequence (for FASTA files), or within quality scores (for QUAL files) will raise an error. scikit-bio will ignore leading and trailing whitespace characters on each line while reading. .. note:: scikit-bio does not currently support legacy FASTA format (i.e., headers/comments denoted with a semicolon). The format supported by scikit-bio (described below in detail) most closely resembles the description given in NCBI's BLAST documentation [3]_. See [2]_ for more details on legacy FASTA format. If you would like legacy FASTA format support added to scikit-bio, please consider submitting a feature request on the scikit-bio `issue tracker `_ (pull requests are also welcome!). Sequence Header ~~~~~~~~~~~~~~~ Each sequence header consists of a single line beginning with a greater-than (``>``) symbol. Immediately following this is a sequence identifier (ID) and description separated by one or more whitespace characters. .. note:: When reading a FASTA-formatted file, the sequence ID and description are stored in the sequence `metadata` attribute, under the `'id'` and `'description'` keys, repectively. Both are optional. Each will be represented as the empty string (``''``) in `metadata` if it is not present in the header. When writing a FASTA-formatted file, sequence `metadata` identified by keys `'id'` and `'description'` will be converted to strings and written as the sequence identifier and description, respectively. Each will be written as the empty string if not present in sequence `metadata`. A sequence ID consists of a single *word*: all characters after the greater- than symbol and before the first whitespace character (if any) are taken as the sequence ID. Unique sequence IDs are not strictly enforced by the FASTA format itself. A single standardized ID format is similarly not enforced by the FASTA format, though it is often common to use a unique library accession number for a sequence ID (e.g., NCBI's FASTA defline format [5]_). If a description is present, it is taken as the remaining characters that follow the sequence ID and initial whitespace(s). The description is considered additional information about the sequence (e.g., comments about the source of the sequence or the molecule that it encodes). For example, consider the following header:: >seq1 db-accession-149855 ``seq1`` is the sequence ID and ``db-accession-149855`` is the sequence description. .. note:: scikit-bio's readers will remove all leading and trailing whitespace from the description. If a header line begins with whitespace following the ``>``, the ID is assumed to be missing and the remainder of the line is taken as the description. Sequence Data ~~~~~~~~~~~~~ Biological sequence data follows the header, and can be split over multiple lines. The sequence data (i.e., nucleotides or amino acids) are stored using the standard IUPAC lexicon (single-letter codes). .. note:: scikit-bio supports both upper and lower case characters. This functionality depends on the type of object the data is being read into. For ``Sequence`` objects, sciki-bio doesn't care about the case. Other sequence objects do, but all provide the `lowercase` parameter to control case functionality. Refer to each class's respective constructor documentation for details. Both ``-`` and ``.`` are supported as gap characters when reading into ``DNA``, ``RNA``, and ``Protein`` sequence objects. Validation is performed when reading into scikit-bio sequence objects that enforce an alphabet (e.g., ``DNA``, ``RNA``, ``Protein``). If any invalid characters are found while reading from the FASTA file, an exception is raised. QUAL Format ^^^^^^^^^^^ A QUAL file contains quality scores for one or more biological sequences stored in a corresponding FASTA file. QUAL format is very similar to FASTA format: it stores records sequentially, with each record beginning with a header line containing a sequence ID and description. The same rules apply to QUAL headers as FASTA headers (see the above sections for details). scikit-bio processes FASTA and QUAL headers in exactly the same way. Instead of storing biological sequence data in each record, a QUAL file stores a Phred quality score for each base in the corresponding sequence. Quality scores are represented as nonnegative integers separated by whitespace (typically a single space or newline), and can span multiple lines. .. note:: When reading a QUAL-formatted file, quality scores are stored in the sequence's `positional_metadata` attribute under the `'quality'` column. When writing a QUAL-formatted file, a sequence's `positional_metadata` `'quality'` column will be written as the quality scores. .. note:: When reading FASTA and QUAL files, scikit-bio requires records to be in the same order in both files (i.e., each FASTA and QUAL record must have the same ID and description after being parsed). In addition to having the same order, the number of FASTA records must match the number of QUAL records (i.e., missing or additonal records are not allowed). scikit-bio also requires that the number of quality scores match the number of bases in the corresponding sequence. When writing FASTA and QUAL files, scikit-bio will maintain the same ordering of records in both files (i.e., using the same ID and description in both records) to support future reading. Format Parameters ----------------- The following parameters are available to change how FASTA/QUAL files are read or written in scikit-bio. QUAL File Parameter (Readers and Writers) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The ``qual`` parameter is available to all FASTA format readers and writers. It can be any file-like type supported by scikit-bio's I/O registry (e.g., file handle, file path, etc.). If ``qual`` is provided when reading, quality scores will be included in each in-memory ``Sequence`` object, in addition to sequence data stored in the FASTA file. When writing, quality scores will be written in QUAL format in addition to the sequence data being written in FASTA format. Reader-specific Parameters ^^^^^^^^^^^^^^^^^^^^^^^^^^ The available reader parameters differ depending on which reader is used. Generator and TabularMSA Reader Parameters ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The ``constructor`` parameter can be used with the ``Sequence`` generator and ``TabularMSA`` FASTA readers. ``constructor`` specifies the type of in-memory sequence object to read each sequence into. For example, if you know that the FASTA file you're reading contains protein sequences, you would pass ``constructor=Protein`` to the reader call. When reading into a ``Sequence`` generator, ``constructor`` defaults to ``Sequence`` and must be a subclass of ``Sequence`` if supplied. When reading into a ``TabularMSA``, ``constructor`` is a required format parameter and must be a subclass of ``GrammaredSequence`` (e.g., ``DNA``, ``RNA``, ``Protein``). .. note:: The FASTA sniffer will not attempt to guess the ``constructor`` parameter. Sequence Reader Parameters ~~~~~~~~~~~~~~~~~~~~~~~~~~ The ``seq_num`` parameter can be used with the ``Sequence``, ``DNA``, ``RNA``, and ``Protein`` FASTA readers. ``seq_num`` specifies which sequence to read from the FASTA file (and optional QUAL file), and defaults to 1 (i.e., such that the first sequence is read). For example, to read the 50th sequence from a FASTA file, you would pass ``seq_num=50`` to the reader call. Writer-specific Parameters ^^^^^^^^^^^^^^^^^^^^^^^^^^ The following parameters are available to all FASTA format writers: - ``id_whitespace_replacement``: string to replace **each** whitespace character in a sequence ID. This parameter is useful for cases where an in-memory sequence ID contains whitespace, which would result in an on-disk representation that would not be read back into memory as the same ID (since IDs in FASTA format cannot contain whitespace). Defaults to ``_``. If ``None``, no whitespace replacement is performed and IDs are written as they are stored in memory (this has the potential to create an invalid FASTA-formatted file; see note below). This parameter also applies to a QUAL file if one is provided. - ``description_newline_replacement``: string to replace **each** newline character in a sequence description. Since a FASTA header must be a single line, newlines are not allowed in sequence descriptions and must be replaced in order to write a valid FASTA file. Defaults to a single space. If ``None``, no newline replacement is performed and descriptions are written as they are stored in memory (this has the potential to create an invalid FASTA-formatted file; see note below). This parameter also applies to a QUAL file if one is provided. - ``max_width``: integer specifying the maximum line width (i.e., number of characters) for sequence data and/or quality scores. If a sequence or its quality scores are longer than ``max_width``, it will be split across multiple lines, each with a maximum width of ``max_width``. Note that there are some caveats when splitting quality scores. A single quality score will *never* be split across multiple lines, otherwise it would become two different quality scores when read again. Thus, splitting only occurs *between* quality scores. This makes it possible to have a single long quality score written on its own line that exceeds ``max_width``. For example, the quality score ``12345`` would not be split across multiple lines even if ``max_width=3``. Thus, a 5-character line would be written. Default behavior is to not split sequence data or quality scores across multiple lines. - ``lowercase``: String or boolean array. If a string, it is treated as a key into the positional metadata of the object. If a boolean array, it indicates characters to write in lowercase. Characters in the sequence corresponding to `True` values will be written in lowercase. The boolean array must be the same length as the sequence. .. note:: The FASTA format writers will have noticeably better runtime performance if ``id_whitespace_replacement`` and/or ``description_newline_replacement`` are set to ``None`` so that whitespace replacement is not performed during writing. However, this can potentially create invalid FASTA files, especially if there are newline characters in the IDs or descriptions. For IDs with whitespace, this can also affect how the IDs are read into memory in a subsequent read operation. For example, if an in-memory sequence ID is ``'seq 1'`` and ``id_whitespace_replacement=None``, reading the FASTA file back into memory would result in an ID of ``'seq'``, and ``'1'`` would be part of the sequence description. Examples -------- Reading and Writing FASTA Files ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Suppose we have the following FASTA file with five equal-length sequences (example modified from [6]_):: >seq1 Turkey AAGCTNGGGCATTTCAGGGTGAGCCCGGGCAATACAGGGTAT >seq2 Salmo gair AAGCCTTGGCAGTGCAGGGTGAGCCGTGG CCGGGCACGGTAT >seq3 H. Sapiens ACCGGTTGGCCGTTCAGGGTACAGGTTGGCCGTTCAGGGTAA >seq4 Chimp AAACCCTTGCCG TTACGCTTAAAC CGAGGCCGGGAC ACTCAT >seq5 Gorilla AAACCCTTGCCGGTACGCTTAAACCATTGCCGGTACGCTTAA .. note:: Original copyright notice for the above example file: *(c) Copyright 1986-2008 by The University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.* Note that the sequences are not required to be of equal length in order for the file to be a valid FASTA file (this depends on the object that you're reading the file into). Also note that some of the sequences occur on a single line, while others are split across multiple lines. Let's define this file in-memory as a ``StringIO``, though this could be a real file path, file handle, or anything that's supported by scikit-bio's I/O registry in practice: >>> fl = [">seq1 Turkey\n", ... "AAGCTNGGGCATTTCAGGGTGAGCCCGGGCAATACAGGGTAT\n", ... ">seq2 Salmo gair\n", ... "AAGCCTTGGCAGTGCAGGGTGAGCCGTGG\n", ... "CCGGGCACGGTAT\n", ... ">seq3 H. Sapiens\n", ... "ACCGGTTGGCCGTTCAGGGTACAGGTTGGCCGTTCAGGGTAA\n", ... ">seq4 Chimp\n", ... "AAACCCTTGCCG\n", ... "TTACGCTTAAAC\n", ... "CGAGGCCGGGAC\n", ... "ACTCAT\n", ... ">seq5 Gorilla\n", ... "AAACCCTTGCCGGTACGCTTAAACCATTGCCGGTACGCTTAA\n"] Since these sequences are of equal length (presumably because they've been aligned), let's read the FASTA file into a ``TabularMSA`` object: >>> from skbio import TabularMSA, DNA >>> msa = TabularMSA.read(fl, constructor=DNA) >>> msa TabularMSA[DNA] ------------------------------------------ Stats: sequence count: 5 position count: 42 ------------------------------------------ AAGCTNGGGCATTTCAGGGTGAGCCCGGGCAATACAGGGTAT AAGCCTTGGCAGTGCAGGGTGAGCCGTGGCCGGGCACGGTAT ACCGGTTGGCCGTTCAGGGTACAGGTTGGCCGTTCAGGGTAA AAACCCTTGCCGTTACGCTTAAACCGAGGCCGGGACACTCAT AAACCCTTGCCGGTACGCTTAAACCATTGCCGGTACGCTTAA Note that we didn't specify a file format in the ``read`` call. The FASTA sniffer detected the correct file format for us! To write the ``TabularMSA`` in FASTA format: >>> from io import StringIO >>> with StringIO() as fh: ... print(msa.write(fh).getvalue()) >seq1 Turkey AAGCTNGGGCATTTCAGGGTGAGCCCGGGCAATACAGGGTAT >seq2 Salmo gair AAGCCTTGGCAGTGCAGGGTGAGCCGTGGCCGGGCACGGTAT >seq3 H. Sapiens ACCGGTTGGCCGTTCAGGGTACAGGTTGGCCGTTCAGGGTAA >seq4 Chimp AAACCCTTGCCGTTACGCTTAAACCGAGGCCGGGACACTCAT >seq5 Gorilla AAACCCTTGCCGGTACGCTTAAACCATTGCCGGTACGCTTAA ``TabularMSA`` loads all of the sequences from the FASTA file into memory at once. If the FASTA file is large (which is often the case), this may be infeasible if you don't have enough memory. To work around this issue, you can stream the sequences using scikit-bio's generator-based FASTA reader and writer. The generator-based reader yields ``Sequence`` objects (or subclasses if ``constructor`` is supplied) one at a time, instead of loading all sequences into memory. For example, let's use the generator-based reader to process a single sequence at a time in a ``for`` loop: >>> import skbio.io >>> for seq in skbio.io.read(fl, format='fasta'): ... seq ... print('') Sequence ------------------------------------------------ Metadata: 'description': 'Turkey' 'id': 'seq1' Stats: length: 42 ------------------------------------------------ 0 AAGCTNGGGC ATTTCAGGGT GAGCCCGGGC AATACAGGGT AT Sequence ------------------------------------------------ Metadata: 'description': 'Salmo gair' 'id': 'seq2' Stats: length: 42 ------------------------------------------------ 0 AAGCCTTGGC AGTGCAGGGT GAGCCGTGGC CGGGCACGGT AT Sequence ------------------------------------------------ Metadata: 'description': 'H. Sapiens' 'id': 'seq3' Stats: length: 42 ------------------------------------------------ 0 ACCGGTTGGC CGTTCAGGGT ACAGGTTGGC CGTTCAGGGT AA Sequence ------------------------------------------------ Metadata: 'description': 'Chimp' 'id': 'seq4' Stats: length: 42 ------------------------------------------------ 0 AAACCCTTGC CGTTACGCTT AAACCGAGGC CGGGACACTC AT Sequence ------------------------------------------------ Metadata: 'description': 'Gorilla' 'id': 'seq5' Stats: length: 42 ------------------------------------------------ 0 AAACCCTTGC CGGTACGCTT AAACCATTGC CGGTACGCTT AA A single sequence can also be read into a ``Sequence`` (or subclass): >>> from skbio import Sequence >>> seq = Sequence.read(fl) >>> seq Sequence ------------------------------------------------ Metadata: 'description': 'Turkey' 'id': 'seq1' Stats: length: 42 ------------------------------------------------ 0 AAGCTNGGGC ATTTCAGGGT GAGCCCGGGC AATACAGGGT AT By default, the first sequence in the FASTA file is read. This can be controlled with ``seq_num``. For example, to read the fifth sequence: >>> seq = Sequence.read(fl, seq_num=5) >>> seq Sequence ------------------------------------------------ Metadata: 'description': 'Gorilla' 'id': 'seq5' Stats: length: 42 ------------------------------------------------ 0 AAACCCTTGC CGGTACGCTT AAACCATTGC CGGTACGCTT AA We can use the same API to read the fifth sequence into a ``DNA`` sequence: >>> dna_seq = DNA.read(fl, seq_num=5) >>> dna_seq DNA ------------------------------------------------ Metadata: 'description': 'Gorilla' 'id': 'seq5' Stats: length: 42 has gaps: False has degenerates: False has definites: True GC-content: 50.00% ------------------------------------------------ 0 AAACCCTTGC CGGTACGCTT AAACCATTGC CGGTACGCTT AA Individual sequence objects can also be written in FASTA format: >>> with StringIO() as fh: ... print(dna_seq.write(fh).getvalue()) >seq5 Gorilla AAACCCTTGCCGGTACGCTTAAACCATTGCCGGTACGCTTAA Reading and Writing FASTA/QUAL Files ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In addition to reading and writing standalone FASTA files, scikit-bio supports reading and writing FASTA and QUAL files together. Suppose we have the following FASTA file:: >seq1 db-accession-149855 CGATGTC >seq2 db-accession-34989 CATCGTC Also suppose we have the following QUAL file:: >seq1 db-accession-149855 40 39 39 4 50 1 100 >seq2 db-accession-34989 3 3 10 42 80 80 79 >>> fasta_fl = [ ... ">seq1 db-accession-149855\n", ... "CGATGTC\n", ... ">seq2 db-accession-34989\n", ... "CATCGTC\n"] >>> qual_fl = [ ... ">seq1 db-accession-149855\n", ... "40 39 39 4\n", ... "50 1 100\n", ... ">seq2 db-accession-34989\n", ... "3 3 10 42 80 80 79\n"] To read in a single ``Sequence`` at a time, we can use the generator-based reader as we did above, providing both FASTA and QUAL files: >>> for seq in skbio.io.read(fasta_fl, qual=qual_fl, format='fasta'): ... seq ... print('') Sequence ---------------------------------------- Metadata: 'description': 'db-accession-149855' 'id': 'seq1' Positional metadata: 'quality': Stats: length: 7 ---------------------------------------- 0 CGATGTC Sequence --------------------------------------- Metadata: 'description': 'db-accession-34989' 'id': 'seq2' Positional metadata: 'quality': Stats: length: 7 --------------------------------------- 0 CATCGTC Note that the sequence objects have quality scores stored as positional metadata since we provided a QUAL file. The other FASTA readers operate in a similar manner. Now let's load the sequences and their quality scores into a ``TabularMSA``: >>> msa = TabularMSA.read(fasta_fl, qual=qual_fl, constructor=DNA) >>> msa TabularMSA[DNA] --------------------- Stats: sequence count: 2 position count: 7 --------------------- CGATGTC CATCGTC To write the sequence data and quality scores in the ``TabularMSA`` to FASTA and QUAL files, respectively: >>> new_fasta_fh = StringIO() >>> new_qual_fh = StringIO() >>> _ = msa.write(new_fasta_fh, qual=new_qual_fh) >>> print(new_fasta_fh.getvalue()) >seq1 db-accession-149855 CGATGTC >seq2 db-accession-34989 CATCGTC >>> print(new_qual_fh.getvalue()) >seq1 db-accession-149855 40 39 39 4 50 1 100 >seq2 db-accession-34989 3 3 10 42 80 80 79 >>> new_fasta_fh.close() >>> new_qual_fh.close() References ---------- .. [1] Lipman, DJ; Pearson, WR (1985). "Rapid and sensitive protein similarity searches". Science 227 (4693): 1435-41. .. [2] http://en.wikipedia.org/wiki/FASTA_format .. [3] http://blast.ncbi.nlm.nih.gov/blastcgihelp.shtml .. [4] https://www.broadinstitute.org/crd/wiki/index.php/Qual .. [5] Madden T. The BLAST Sequence Analysis Tool. 2002 Oct 9 [Updated 2003 Aug 13]. In: McEntyre J, Ostell J, editors. The NCBI Handbook [Internet]. Bethesda (MD): National Center for Biotechnology Information (US); 2002-. Chapter 16. Available from: http://www.ncbi.nlm.nih.gov/books/NBK21097/ .. [6] http://evolution.genetics.washington.edu/phylip/doc/sequence.html """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import itertools import textwrap import numpy as np from skbio.io import create_format, FASTAFormatError, QUALFormatError from skbio.io.registry import FileSentinel from skbio.io.format._base import ( _get_nth_sequence, _parse_fasta_like_header, _format_fasta_like_records, _line_generator, _too_many_blanks, ) from skbio.util._misc import chunk_str from skbio.alignment import TabularMSA from skbio.sequence import Sequence, DNA, RNA, Protein fasta = create_format("fasta") @fasta.sniffer() def _fasta_sniffer(fh): # Strategy: # Ignore up to 5 blank/whitespace-only lines at the beginning of the # file. Read up to 10 records. If at least one record is read (i.e. # the file isn't empty) and no errors are thrown during reading, assume # the file is in FASTA format. If a record appears to be QUAL, do *not* # identify the file as FASTA since we don't want to sniff QUAL files as # FASTA (technically they can be read as FASTA since the sequences may # not be validated but it probably isn't what the user wanted). Also, if # we add QUAL as its own file format in the future, we wouldn't want the # FASTA and QUAL sniffers to both positively identify a QUAL file. if _too_many_blanks(fh, 5): return False, {} num_records = 10 empty = True try: parser = _parse_fasta_raw(fh, _sniffer_data_parser, FASTAFormatError) for _ in zip(range(num_records), parser): empty = False except FASTAFormatError: return False, {} if empty: return False, {} else: return True, {} def _sniffer_data_parser(chunks): data = _parse_sequence_data(chunks) try: _parse_quality_scores(chunks) except QUALFormatError: return data else: # used for flow control within sniffer, user should never see this # message raise FASTAFormatError("Data appear to be quality scores.") @fasta.reader(None) def _fasta_to_generator(fh, qual=FileSentinel, constructor=Sequence, **kwargs): if qual is None: for seq, id_, desc in _parse_fasta_raw( fh, _parse_sequence_data, FASTAFormatError ): yield constructor(seq, metadata={"id": id_, "description": desc}, **kwargs) else: fasta_gen = _parse_fasta_raw(fh, _parse_sequence_data, FASTAFormatError) qual_gen = _parse_fasta_raw(qual, _parse_quality_scores, QUALFormatError) for fasta_rec, qual_rec in itertools.zip_longest( fasta_gen, qual_gen, fillvalue=None ): if fasta_rec is None: raise FASTAFormatError("QUAL file has more records than FASTA file.") if qual_rec is None: raise FASTAFormatError("FASTA file has more records than QUAL file.") fasta_seq, fasta_id, fasta_desc = fasta_rec qual_scores, qual_id, qual_desc = qual_rec if fasta_id != qual_id: raise FASTAFormatError( "IDs do not match between FASTA and QUAL records: %r != %r" % (str(fasta_id), str(qual_id)) ) if fasta_desc != qual_desc: raise FASTAFormatError( "Descriptions do not match between FASTA and QUAL " "records: %r != %r" % (str(fasta_desc), str(qual_desc)) ) # sequence and quality scores lengths are checked in constructor yield constructor( fasta_seq, metadata={"id": fasta_id, "description": fasta_desc}, positional_metadata={"quality": qual_scores}, **kwargs, ) @fasta.reader(Sequence) def _fasta_to_sequence(fh, qual=FileSentinel, seq_num=1, **kwargs): return _get_nth_sequence( _fasta_to_generator(fh, qual=qual, constructor=Sequence, **kwargs), seq_num ) @fasta.reader(DNA) def _fasta_to_dna(fh, qual=FileSentinel, seq_num=1, **kwargs): return _get_nth_sequence( _fasta_to_generator(fh, qual=qual, constructor=DNA, **kwargs), seq_num ) @fasta.reader(RNA) def _fasta_to_rna(fh, qual=FileSentinel, seq_num=1, **kwargs): return _get_nth_sequence( _fasta_to_generator(fh, qual=qual, constructor=RNA, **kwargs), seq_num ) @fasta.reader(Protein) def _fasta_to_protein(fh, qual=FileSentinel, seq_num=1, **kwargs): return _get_nth_sequence( _fasta_to_generator(fh, qual=qual, constructor=Protein, **kwargs), seq_num ) @fasta.reader(TabularMSA) def _fasta_to_tabular_msa(fh, qual=FileSentinel, constructor=None, **kwargs): if constructor is None: raise ValueError("Must provide `constructor`.") return TabularMSA( _fasta_to_generator(fh, qual=qual, constructor=constructor, **kwargs) ) @fasta.writer(None) def _generator_to_fasta( obj, fh, qual=FileSentinel, id_whitespace_replacement="_", description_newline_replacement=" ", max_width=None, lowercase=None, ): if max_width is not None: if max_width < 1: raise ValueError( "Maximum line width must be greater than zero (max_width=%d)." % max_width ) if qual is not None: # define text wrapper for splitting quality scores here for # efficiency. textwrap docs recommend reusing a TextWrapper # instance when it is used many times. configure text wrapper to # never break "words" (i.e., integer quality scores) across lines qual_wrapper = textwrap.TextWrapper( width=max_width, break_long_words=False, break_on_hyphens=False ) formatted_records = _format_fasta_like_records( obj, id_whitespace_replacement, description_newline_replacement, qual is not None, lowercase, ) for header, seq_str, qual_scores in formatted_records: if max_width is not None: seq_str = chunk_str(seq_str, max_width, "\n") fh.write(">%s\n%s\n" % (header, seq_str)) if qual is not None: qual_str = " ".join(np.asarray(qual_scores, dtype=str)) if max_width is not None: qual_str = qual_wrapper.fill(qual_str) qual.write(">%s\n%s\n" % (header, qual_str)) @fasta.writer(Sequence) def _sequence_to_fasta( obj, fh, qual=FileSentinel, id_whitespace_replacement="_", description_newline_replacement=" ", max_width=None, lowercase=None, ): _sequences_to_fasta( [obj], fh, qual, id_whitespace_replacement, description_newline_replacement, max_width, lowercase, ) @fasta.writer(DNA) def _dna_to_fasta( obj, fh, qual=FileSentinel, id_whitespace_replacement="_", description_newline_replacement=" ", max_width=None, lowercase=None, ): _sequences_to_fasta( [obj], fh, qual, id_whitespace_replacement, description_newline_replacement, max_width, lowercase, ) @fasta.writer(RNA) def _rna_to_fasta( obj, fh, qual=FileSentinel, id_whitespace_replacement="_", description_newline_replacement=" ", max_width=None, lowercase=None, ): _sequences_to_fasta( [obj], fh, qual, id_whitespace_replacement, description_newline_replacement, max_width, lowercase, ) @fasta.writer(Protein) def _protein_to_fasta( obj, fh, qual=FileSentinel, id_whitespace_replacement="_", description_newline_replacement=" ", max_width=None, lowercase=None, ): _sequences_to_fasta( [obj], fh, qual, id_whitespace_replacement, description_newline_replacement, max_width, lowercase, ) @fasta.writer(TabularMSA) def _tabular_msa_to_fasta( obj, fh, qual=FileSentinel, id_whitespace_replacement="_", description_newline_replacement=" ", max_width=None, lowercase=None, ): _sequences_to_fasta( obj, fh, qual, id_whitespace_replacement, description_newline_replacement, max_width, lowercase, ) def _parse_fasta_raw(fh, data_parser, error_type): """Raw parser for FASTA or QUAL files. Returns raw values (seq/qual, id, description). It is the responsibility of the caller to construct the correct in-memory object to hold the data. """ # Skip any blank or whitespace-only lines at beginning of file try: seq_header = next(_line_generator(fh, skip_blanks=True)) except StopIteration: return # header check inlined here and below for performance if seq_header.startswith(">"): id_, desc = _parse_fasta_like_header(seq_header) else: raise error_type( "Found non-header line when attempting to read the 1st record:" "\n%s" % seq_header ) data_chunks = [] prev = seq_header for line in _line_generator(fh, skip_blanks=False): if line.startswith(">"): # new header, so yield current record and reset state yield data_parser(data_chunks), id_, desc data_chunks = [] id_, desc = _parse_fasta_like_header(line) else: if line: # ensure no blank lines within a single record if not prev: raise error_type( "Found blank or whitespace-only line within record." ) data_chunks.append(line) prev = line # yield last record in file yield data_parser(data_chunks), id_, desc def _parse_sequence_data(chunks): if not chunks: raise FASTAFormatError("Found header without sequence data.") return "".join(chunks) def _parse_quality_scores(chunks): if not chunks: raise QUALFormatError("Found header without quality scores.") qual_str = " ".join(chunks) try: quality = np.asarray(qual_str.split(), dtype=np.int64) except ValueError: raise QUALFormatError( "Could not convert quality scores to integers:\n%s" % str(qual_str) ) if (quality < 0).any(): raise QUALFormatError( "Encountered negative quality score(s). Quality scores must be " "greater than or equal to zero." ) if (quality > 255).any(): raise QUALFormatError( "Encountered quality score(s) greater than 255. scikit-bio only " "supports quality scores in the range 0-255 (inclusive) when " "reading QUAL files." ) return quality.astype(np.uint8, casting="unsafe", copy=False) def _sequences_to_fasta( obj, fh, qual, id_whitespace_replacement, description_newline_replacement, max_width, lowercase=None, ): def seq_gen(): yield from obj _generator_to_fasta( seq_gen(), fh, qual=qual, id_whitespace_replacement=id_whitespace_replacement, description_newline_replacement=description_newline_replacement, max_width=max_width, lowercase=lowercase, ) scikit-bio-0.6.2/skbio/io/format/fastq.py000066400000000000000000000531541464262511300202660ustar00rootroot00000000000000r"""FASTQ format (:mod:`skbio.io.format.fastq`) =========================================== .. currentmodule:: skbio.io.format.fastq The FASTQ file format (``fastq``) stores biological (e.g., nucleotide) sequences and their quality scores in a simple plain text format that is both human-readable and easy to parse. The file format was invented by Jim Mullikin at the Wellcome Trust Sanger Institute but wasn't given a formal definition, though it has informally become a standard file format for storing high-throughput sequence data. More information about the format and its variants can be found in [1]_ and [2]_. Conceptually, a FASTQ file is similar to paired FASTA and QUAL files in that it stores both biological sequences and their quality scores. FASTQ differs from FASTA/QUAL because the quality scores are stored in the same file as the biological sequence data. An example FASTQ-formatted file containing two DNA sequences and their quality scores: .. code-block:: none @seq1 description 1 AACACCAAACTTCTCCACCACGTGAGCTACAAAAG + ````Y^T]`]c^cabcacc`^Lb^ccYT\T\Y\WF @seq2 description 2 TATGTATATATAACATATACATATATACATACATA + ]KZ[PY]_[YY^```ac^\\`bT``c`\aT``bbb Format Support -------------- **Has Sniffer: Yes** +------+------+---------------------------------------------------------------+ |Reader|Writer| Object Class | +======+======+===============================================================+ |Yes |Yes |generator of :mod:`skbio.sequence.Sequence` objects | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.alignment.TabularMSA` | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.sequence.Sequence` | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.sequence.DNA` | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.sequence.RNA` | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.sequence.Protein` | +------+------+---------------------------------------------------------------+ Format Specification -------------------- A FASTQ file contains one or more biological sequences and their corresponding quality scores stored sequentially as *records*. Each *record* consists of four sections: 1. Sequence header line consisting of a sequence identifier (ID) and description (both optional) 2. Biological sequence data (typically stored using the standard IUPAC lexicon), optionally split over multiple lines 3. Quality header line separating sequence data from quality scores (optionally repeating the ID and description from the sequence header line) 4. Quality scores as printable ASCII characters, optionally split over multiple lines. Decoding of quality scores will depend on the specified FASTQ variant (see below for more details) For the complete FASTQ format specification, see [1]_. scikit-bio's FASTQ implementation follows the format specification described in this excellent publication, including validating the implementation against the FASTQ example files provided in the publication's supplementary data. .. note:: IDs and descriptions will be parsed from sequence header lines in exactly the same way as FASTA headers (:mod:`skbio.io.format.fasta`). IDs, descriptions, and quality scores are also stored on, and written from, sequence objects in the same way as with FASTA. .. note:: Blank or whitespace-only lines are only allowed at the beginning of the file, between FASTQ records, or at the end of the file. A blank or whitespace-only line after the header line, within the sequence, or within quality scores will raise an error. scikit-bio will ignore leading and trailing whitespace characters on each line while reading. .. note:: Validation may be performed depending on the type of object the data is being read into. This behavior matches that of FASTA files. .. note:: scikit-bio will write FASTQ files in a normalized format, with each record section on a single line. Thus, each record will be composed of *exactly* four lines. The quality header line won't have the sequence ID and description repeated. .. note:: `lowercase` functionality is supported the same as with FASTA. Quality Score Variants ^^^^^^^^^^^^^^^^^^^^^^ FASTQ associates quality scores with sequence data, with each quality score encoded as a single printable ASCII character. In scikit-bio, all quality scores are decoded as Phred quality scores. This is the most common quality score metric, though there are others (e.g., Solexa quality scores). Unfortunately, different sequencers have different ways of encoding quality scores as ASCII characters, notably Sanger and Illumina. Below is a table highlighting the different encoding variants supported by scikit-bio, as well as listing the equivalent variant names used in the Open Bioinformatics Foundation (OBF) [3]_ projects (e.g., Biopython, BioPerl, etc.). +-----------+---------+----+--------+-----------------------------------------+ | Variant | ASCII |Off\|Quality | Notes | | | Range |set |Range | | +===========+=========+====+========+=========================================+ |sanger |33 to 126|33 |0 to 93 |Equivalent to OBF's fastq-sanger. | +-----------+---------+----+--------+-----------------------------------------+ |illumina1.3|64 to 126|64 |0 to 62 |Equivalent to OBF's fastq-illumina. Use | | | | | |this if your data was generated using | | | | | |Illumina 1.3-1.7 software. | +-----------+---------+----+--------+-----------------------------------------+ |illumina1.8|33 to 95 |33 |0 to 62 |Equivalent to sanger but with 0 to 62 | | | | | |quality score range check. Use this if | | | | | |your data was generated using Illumina | | | | | |1.8 software or later. | +-----------+---------+----+--------+-----------------------------------------+ |solexa |59 to 126|64 |-5 to 62|Not currently implemented. | +-----------+---------+----+--------+-----------------------------------------+ .. note:: When writing, Phred quality scores will be truncated to the maximum value in the variant's range and a warning will be issued. This is consistent with the OBF projects. When reading, an error will be raised if a decoded quality score is outside the variant's range. Format Parameters ----------------- The following parameters are available to all FASTQ format readers and writers: - ``variant``: A string indicating the quality score variant used to decode/encode Phred quality scores. Must be one of ``sanger``, ``illumina1.3``, ``illumina1.8``, or ``solexa``. This parameter is preferred over ``phred_offset`` because additional quality score range checks and conversions can be performed. It is also more explicit. - ``phred_offset``: An integer indicating the ASCII code offset used to decode/encode Phred quality scores. Must be in the range ``[33, 126]``. All decoded scores will be assumed to be Phred scores (i.e., no additional conversions are performed). Prefer using ``variant`` over this parameter whenever possible. .. note:: You must provide ``variant`` or ``phred_offset`` when reading or writing a FASTQ file. ``variant`` and ``phred_offset`` cannot both be provided at the same time. The following additional parameters are the same as in FASTA format (:mod:`skbio.io.format.fasta`): - ``constructor``: see ``constructor`` parameter in FASTA format - ``seq_num``: see ``seq_num`` parameter in FASTA format - ``id_whitespace_replacement``: see ``id_whitespace_replacement`` parameter in FASTA format - ``description_newline_replacement``: see ``description_newline_replacement`` parameter in FASTA format - ``lowercase``: see ``lowercase`` parameter in FASTA format Examples -------- Suppose we have the following FASTQ file with two DNA sequences:: @seq1 description 1 AACACCAAACTTCTCCACC ACGTGAGCTACAAAAG +seq1 description 1 ''''Y^T]']C^CABCACC `^LB^CCYT\T\Y\WF @seq2 description 2 TATGTATATATAACATATACATATATACATACATA + ]KZ[PY]_[YY^'''AC^\\'BT''C'\AT''BBB Note that the first sequence and its quality scores are split across multiple lines, while the second sequence and its quality scores are each on a single line. Also note that the first sequence has a duplicate ID and description on the quality header line, while the second sequence does not. Let's define this file in-memory as a ``StringIO``, though this could be a real file path, file handle, or anything that's supported by scikit-bio's I/O registry in practice: >>> from io import StringIO >>> fs = '\n'.join([ ... r"@seq1 description 1", ... r"AACACCAAACTTCTCCACC", ... r"ACGTGAGCTACAAAAG", ... r"+seq1 description 1", ... r"''''Y^T]']C^CABCACC", ... r"'^LB^CCYT\T\Y\WF", ... r"@seq2 description 2", ... r"TATGTATATATAACATATACATATATACATACATA", ... r"+", ... r"]KZ[PY]_[YY^'''AC^\\'BT''C'\AT''BBB"]) >>> fh = StringIO(fs) To load the sequences into a ``TabularMSA``, we run: >>> from skbio import TabularMSA, DNA >>> msa = TabularMSA.read(fh, constructor=DNA, variant='sanger') >>> msa TabularMSA[DNA] ----------------------------------- Stats: sequence count: 2 position count: 35 ----------------------------------- AACACCAAACTTCTCCACCACGTGAGCTACAAAAG TATGTATATATAACATATACATATATACATACATA Note that quality scores are decoded from Sanger. To load the second sequence as ``DNA``: >>> fh = StringIO(fs) # reload the StringIO to read from the beginning again >>> seq = DNA.read(fh, variant='sanger', seq_num=2) >>> seq DNA ---------------------------------------- Metadata: 'description': 'description 2' 'id': 'seq2' Positional metadata: 'quality': Stats: length: 35 has gaps: False has degenerates: False has definites: True GC-content: 14.29% ---------------------------------------- 0 TATGTATATA TAACATATAC ATATATACAT ACATA To write our ``TabularMSA`` to a FASTQ file with quality scores encoded using the ``illumina1.3`` variant: >>> new_fh = StringIO() >>> print(msa.write(new_fh, format='fastq', variant='illumina1.3').getvalue()) @seq1 description 1 AACACCAAACTTCTCCACCACGTGAGCTACAAAAG + FFFFx}s|F|b}b`ab`bbF}ka}bbxs{s{x{ve @seq2 description 2 TATGTATATATAACATATACATATATACATACATA + |jyzox|~zxx}FFF`b}{{FasFFbF{`sFFaaa >>> new_fh.close() Note that the file has been written in normalized format: sequence and quality scores each only occur on a single line and the sequence header line is not repeated in the quality header line. Note also that the quality scores are different because they have been encoded using a different variant. References ---------- .. [1] Peter J. A. Cock, Christopher J. Fields, Naohisa Goto, Michael L. Heuer, and Peter M. Rice. The Sanger FASTQ file format for sequences with quality scores, and the Solexa/Illumina FASTQ variants. Nucl. Acids Res. (2010) 38 (6): 1767-1771. first published online December 16, 2009. doi:10.1093/nar/gkp1137 http://nar.oxfordjournals.org/content/38/6/1767 .. [2] http://en.wikipedia.org/wiki/FASTQ_format .. [3] http://www.open-bio.org/ """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import re import numpy as np from skbio.io import create_format, FASTQFormatError from skbio.io.format._base import ( _decode_qual_to_phred, _encode_phred_to_qual, _get_nth_sequence, _parse_fasta_like_header, _format_fasta_like_records, _line_generator, _too_many_blanks, ) from skbio.alignment import TabularMSA from skbio.sequence import Sequence, DNA, RNA, Protein _whitespace_regex = re.compile(r"\s") fastq = create_format("fastq") @fastq.sniffer() def _fastq_sniffer(fh): # Strategy: # Ignore up to 5 blank/whitespace-only lines at the beginning of the # file. Read up to 10 records. If at least one record is read (i.e. the # file isn't empty) and the quality scores are in printable ASCII range, # assume the file is FASTQ. if _too_many_blanks(fh, 5): return False, {} try: not_empty = False for _, seq in zip(range(10), _fastq_to_generator(fh, phred_offset=33)): split_length = len( (seq.metadata["id"] + seq.metadata["description"]).split(":") ) description = seq.metadata["description"].split(":") if split_length == 10 and description[1] in "YN": return True, {"variant": "illumina1.8"} not_empty = True return not_empty, {} except (FASTQFormatError, ValueError): return False, {} @fastq.reader(None) def _fastq_to_generator( fh, variant=None, phred_offset=None, constructor=Sequence, **kwargs ): # Skip any blank or whitespace-only lines at beginning of file try: seq_header = next(_line_generator(fh, skip_blanks=True)) except StopIteration: return if not seq_header.startswith("@"): raise FASTQFormatError( "Expected sequence (@) header line at start of file: %r" % str(seq_header) ) while seq_header is not None: id_, desc = _parse_fasta_like_header(seq_header) seq, qual_header = _parse_sequence_data(fh, seq_header) if qual_header != "+" and qual_header[1:] != seq_header[1:]: raise FASTQFormatError( "Sequence (@) and quality (+) header lines do not match: " "%r != %r" % (str(seq_header[1:]), str(qual_header[1:])) ) phred_scores, seq_header = _parse_quality_scores( fh, len(seq), variant, phred_offset, qual_header ) yield constructor( seq, metadata={"id": id_, "description": desc}, positional_metadata={"quality": phred_scores}, **kwargs, ) @fastq.reader(Sequence) def _fastq_to_sequence(fh, variant=None, phred_offset=None, seq_num=1, **kwargs): return _get_nth_sequence( _fastq_to_generator( fh, variant=variant, phred_offset=phred_offset, constructor=Sequence, **kwargs, ), seq_num, ) @fastq.reader(DNA) def _fastq_to_dna(fh, variant=None, phred_offset=None, seq_num=1, **kwargs): return _get_nth_sequence( _fastq_to_generator( fh, variant=variant, phred_offset=phred_offset, constructor=DNA, **kwargs ), seq_num, ) @fastq.reader(RNA) def _fastq_to_rna(fh, variant=None, phred_offset=None, seq_num=1, **kwargs): return _get_nth_sequence( _fastq_to_generator( fh, variant=variant, phred_offset=phred_offset, constructor=RNA, **kwargs ), seq_num, ) @fastq.reader(Protein) def _fastq_to_protein(fh, variant=None, phred_offset=None, seq_num=1, **kwargs): return _get_nth_sequence( _fastq_to_generator( fh, variant=variant, phred_offset=phred_offset, constructor=Protein, **kwargs, ), seq_num, ) @fastq.reader(TabularMSA) def _fastq_to_tabular_msa( fh, variant=None, phred_offset=None, constructor=None, **kwargs ): if constructor is None: raise ValueError("Must provide `constructor`.") return TabularMSA( _fastq_to_generator( fh, variant=variant, phred_offset=phred_offset, constructor=constructor, **kwargs, ) ) @fastq.writer(None) def _generator_to_fastq( obj, fh, variant=None, phred_offset=None, id_whitespace_replacement="_", description_newline_replacement=" ", lowercase=None, ): formatted_records = _format_fasta_like_records( obj, id_whitespace_replacement, description_newline_replacement, True, lowercase=lowercase, ) for header, seq_str, qual_scores in formatted_records: qual_str = _encode_phred_to_qual( qual_scores, variant=variant, phred_offset=phred_offset ) fh.write("@") fh.write(header) fh.write("\n") fh.write(seq_str) fh.write("\n+\n") fh.write(qual_str) fh.write("\n") @fastq.writer(Sequence) def _sequence_to_fastq( obj, fh, variant=None, phred_offset=None, id_whitespace_replacement="_", description_newline_replacement=" ", lowercase=None, ): _sequences_to_fastq( [obj], fh, variant, phred_offset, id_whitespace_replacement, description_newline_replacement, lowercase=lowercase, ) @fastq.writer(DNA) def _dna_to_fastq( obj, fh, variant=None, phred_offset=None, id_whitespace_replacement="_", description_newline_replacement=" ", lowercase=None, ): _sequences_to_fastq( [obj], fh, variant, phred_offset, id_whitespace_replacement, description_newline_replacement, lowercase=lowercase, ) @fastq.writer(RNA) def _rna_to_fastq( obj, fh, variant=None, phred_offset=None, id_whitespace_replacement="_", description_newline_replacement=" ", lowercase=None, ): _sequences_to_fastq( [obj], fh, variant, phred_offset, id_whitespace_replacement, description_newline_replacement, lowercase=lowercase, ) @fastq.writer(Protein) def _protein_to_fastq( obj, fh, variant=None, phred_offset=None, id_whitespace_replacement="_", description_newline_replacement=" ", lowercase=None, ): _sequences_to_fastq( [obj], fh, variant, phred_offset, id_whitespace_replacement, description_newline_replacement, lowercase=lowercase, ) @fastq.writer(TabularMSA) def _tabular_msa_to_fastq( obj, fh, variant=None, phred_offset=None, id_whitespace_replacement="_", description_newline_replacement=" ", lowercase=None, ): _sequences_to_fastq( obj, fh, variant, phred_offset, id_whitespace_replacement, description_newline_replacement, lowercase=lowercase, ) def _blank_error(unique_text): error_string = ("Found blank or whitespace-only line {} in " "FASTQ file").format( unique_text ) raise FASTQFormatError(error_string) def _parse_sequence_data(fh, prev): seq_chunks = [] for chunk in _line_generator(fh, skip_blanks=False): if chunk.startswith("+"): if not prev: _blank_error("before '+'") if not seq_chunks: raise FASTQFormatError("Found FASTQ record without sequence data.") return "".join(seq_chunks), chunk elif chunk.startswith("@"): raise FASTQFormatError( "Found FASTQ record that is missing a quality (+) header line " "after sequence data." ) else: if not prev: _blank_error("after header or within sequence") if _whitespace_regex.search(chunk): raise FASTQFormatError( "Found whitespace in sequence data: %r" % str(chunk) ) seq_chunks.append(chunk) prev = chunk raise FASTQFormatError("Found incomplete/truncated FASTQ record at end of file.") def _parse_quality_scores(fh, seq_len, variant, phred_offset, prev): phred_scores = [] qual_len = 0 for chunk in _line_generator(fh, skip_blanks=False): if chunk: if chunk.startswith("@") and qual_len == seq_len: return np.hstack(phred_scores), chunk else: if not prev: _blank_error("after '+' or within quality scores") qual_len += len(chunk) if qual_len > seq_len: raise FASTQFormatError( "Found more quality score characters than sequence " "characters. Extra quality score characters: %r" % chunk[-(qual_len - seq_len) :] ) phred_scores.append( _decode_qual_to_phred( chunk, variant=variant, phred_offset=phred_offset ) ) prev = chunk if qual_len != seq_len: raise FASTQFormatError( "Found incomplete/truncated FASTQ record at end of file." ) return np.hstack(phred_scores), None def _sequences_to_fastq( obj, fh, variant, phred_offset, id_whitespace_replacement, description_newline_replacement, lowercase=None, ): def seq_gen(): yield from obj _generator_to_fastq( seq_gen(), fh, variant=variant, phred_offset=phred_offset, id_whitespace_replacement=id_whitespace_replacement, description_newline_replacement=description_newline_replacement, lowercase=lowercase, ) scikit-bio-0.6.2/skbio/io/format/genbank.py000066400000000000000000000631231464262511300205520ustar00rootroot00000000000000"""GenBank format (:mod:`skbio.io.format.genbank`) =============================================== .. currentmodule:: skbio.io.format.genbank GenBank format (GenBank Flat File Format) stores sequence and its annotation together. The start of the annotation section is marked by a line beginning with the word "LOCUS". The start of sequence section is marked by a line beginning with the word "ORIGIN" and the end of the section is marked by a line with only "//". The GenBank file usually ends with .gb or sometimes .gbk. The GenBank format for protein has been renamed to GenPept. The GenBank (for nucleotide) and Genpept are essentially the same format. An example of a GenBank file can be seen here [1]_. Format Support -------------- **Has Sniffer: Yes** +------+------+---------------------------------------------------------------+ |Reader|Writer| Object Class | +======+======+===============================================================+ |Yes |Yes |:mod:`skbio.sequence.Sequence` | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.sequence.DNA` | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.sequence.RNA` | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.sequence.Protein` | +------+------+---------------------------------------------------------------+ |Yes | Yes | generator of :mod:`skbio.sequence.Sequence` objects | +------+------+---------------------------------------------------------------+ Format Specification -------------------- Sections before ``FEATURES`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ All the sections before ``FEATURES`` will be read into the attribute of ``metadata``. The header and its content of a section is stored as a pair of key and value in ``metadata``. For the ``REFERENCE`` section, its value is stored as a list, as there are often multiple reference sections in one GenBank record. .. _genbank_feature_section: ``FEATURES`` section ^^^^^^^^^^^^^^^^^^^^ The International Nucleotide Sequence Database Collaboration (INSDC [2]_) is a joint effort among the DDBJ, EMBL, and GenBank. These organisations all use the same "Feature Table" layout in their plain text flat file formats, which are documented in detail [3]_. The feature keys and their qualifiers are also described in this webpage [4]_. The ``FEATURES`` section will be stored in ``interval_metadata`` of ``Sequence`` or its sub-class. Each sub-section is stored as an ``Interval`` object in ``interval_metadata``. Each ``Interval`` object has ``metadata`` keeping the information of this feature in the sub-section. To normalize the vocabulary between multiple formats (currently only the INSDC Feature Table and GFF3) to store metadata of interval features, we rename some terms in some formats to the same common name when parsing them into memory, as described in this table: +-----------+-----------+-----------+---------+------------------------------+ |INSDC |GFF3 |Key stored |Value |Description | |feature |columns or | |type | | |table |attributes | |stored | | +===========+===========+===========+=========+==============================+ |inference |source |source |str |the algorithm or experiment | | |(column 2) | | |used to generate this feature | +-----------+-----------+-----------+---------+------------------------------+ |feature key|type |type |str |the type of the feature | | |(column 3) | | | | +-----------+-----------+-----------+---------+------------------------------+ |N/A |score |score |float |the score of the feature | | |(column 6) | | | | +-----------+-----------+-----------+---------+------------------------------+ |N/A |strand |strand |str |the strand of the feature. + | | |(column 7) | | |for positive strand, - for | | | | | |minus strand, and . for | | | | | |features that are not | | | | | |stranded. In addition, ? can | | | | | |be used for features whose | | | | | |strandedness is relevant, but | | | | | |unknown. | +-----------+-----------+-----------+---------+------------------------------+ |codon_start|phase |phase |int |the offset at which the first | | |(column 8) | | |complete codon of a coding | | | | | |feature can be found, relative| | | | | |to the first base of that | | | | | |feature. It is 0, 1, or 2 in | | | | | |GFF3 or 1, 2, or 3 in GenBank.| | | | | |The stored value is 0, 1, or | | | | | |2, following in GFF3 format. | +-----------+-----------+-----------+---------+------------------------------+ |db_xref |Dbxref |db_xref |list of |A database cross reference | | | | |str | | +-----------+-----------+-----------+---------+------------------------------+ |N/A |ID |ID |str |feature ID | +-----------+-----------+-----------+---------+------------------------------+ |note |Note |note |str |any comment or additional | | | | | |information | +-----------+-----------+-----------+---------+------------------------------+ |translation|N/A |translation|str |the protein sequence for CDS | | | | | |features | +-----------+-----------+-----------+---------+------------------------------+ ``Location`` string +++++++++++++++++++ There are 5 types of location descriptors defined in Feature Table. This explains how they will be parsed into the bounds of ``Interval`` object (note it converts the 1-based coordinate to 0-based): 1. a single base number. e.g. 67. This is parsed to ``(66, 67)``. 2. a site between two neighboring bases. e.g. 67^68. This is parsed to ``(66, 67)``. 3. a single base from inside a range. e.g. 67.89. This is parsed to ``(66, 89)``. 4. a pair of base numbers defining a sequence span. e.g. 67..89. This is parsed to ``(66, 89)``. 5. a remote sequence identifier followed by a location descriptor defined above. e.g. J00123.1:67..89. This will be discarded because it is not on the current sequence. When it is combined with local descriptor like J00123.1:67..89,200..209, the local part will be kept to be ``(199, 209)``. .. note:: The Location string is fully stored in ``Interval.metadata`` with key ``__location``. The key starting with ``__`` is "private" and should be modified with care. ``ORIGIN`` section ^^^^^^^^^^^^^^^^^^ The sequence in the ``ORIGIN`` section is always in lowercase for the GenBank files downloaded from NCBI. For the RNA molecules, ``t`` (thymine), instead of ``u`` (uracil) is used in the sequence. All GenBank writers follow these conventions while writing GenBank files. Format Parameters ----------------- Reader-specific Parameters ^^^^^^^^^^^^^^^^^^^^^^^^^^ The ``constructor`` parameter can be used with the ``Sequence`` generator to specify the in-memory type of each GenBank record that is parsed. ``constructor`` should be ``Sequence`` or a sub-class of ``Sequence``. It is also detected by the unit label on the LOCUS line. For example, if it is ``bp``, it will be read into ``DNA``; if it is ``aa``, it will be read into ``Protein``. Otherwise, it will be read into ``Sequence``. This default behavior is overridden by setting ``constructor``. ``lowercase`` is another parameter available for all GenBank readers. By default, it is set to ``True`` to read in the ``ORIGIN`` sequence as lowercase letters. This parameter is passed to ``Sequence`` or its sub-class constructor. ``seq_num`` is a parameter used with the ``Sequence``, ``DNA``, ``RNA``, and ``Protein`` GenBank readers. It specifies which GenBank record to read from a GenBank file with multiple records in it. Examples -------- Reading and Writing GenBank Files ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Suppose we have the following GenBank file example modified from [5]_: >>> gb_str = ''' ... LOCUS 3K1V_A 34 bp RNA linear SYN 10-OCT-2012 ... DEFINITION Chain A, Structure Of A Mutant Class-I Preq1. ... ACCESSION 3K1V_A ... VERSION 3K1V_A GI:260656459 ... KEYWORDS . ... SOURCE synthetic construct ... ORGANISM synthetic construct ... other sequences; artificial sequences. ... REFERENCE 1 (bases 1 to 34) ... AUTHORS Klein,D.J., Edwards,T.E. and Ferre-D'Amare,A.R. ... TITLE Cocrystal structure of a class I preQ1 riboswitch ... JOURNAL Nat. Struct. Mol. Biol. 16 (3), 343-344 (2009) ... PUBMED 19234468 ... COMMENT SEQRES. ... FEATURES Location/Qualifiers ... source 1..34 ... /organism="synthetic construct" ... /mol_type="other RNA" ... /db_xref="taxon:32630" ... misc_binding 1..30 ... /note="Preq1 riboswitch" ... /bound_moiety="preQ1" ... ORIGIN ... 1 agaggttcta gcacatccct ctataaaaaa ctaa ... // ... ''' Now we can read it as ``DNA`` object: >>> import io >>> from skbio import DNA, RNA, Sequence >>> gb = io.StringIO(gb_str) >>> dna_seq = DNA.read(gb) >>> dna_seq DNA ----------------------------------------------------------------- Metadata: 'ACCESSION': '3K1V_A' 'COMMENT': 'SEQRES.' 'DEFINITION': 'Chain A, Structure Of A Mutant Class-I Preq1.' 'KEYWORDS': '.' 'LOCUS': 'REFERENCE': 'SOURCE': 'VERSION': '3K1V_A GI:260656459' Interval metadata: 2 interval features Stats: length: 34 has gaps: False has degenerates: False has definites: True GC-content: 35.29% ----------------------------------------------------------------- 0 AGAGGTTCTA GCACATCCCT CTATAAAAAA CTAA Since this is a riboswitch molecule, we may want to read it as ``RNA``. As the GenBank file usually have ``t`` instead of ``u`` in the sequence, we can read it as ``RNA`` by converting ``t`` to ``u``: >>> gb = io.StringIO(gb_str) >>> rna_seq = RNA.read(gb) >>> rna_seq RNA ----------------------------------------------------------------- Metadata: 'ACCESSION': '3K1V_A' 'COMMENT': 'SEQRES.' 'DEFINITION': 'Chain A, Structure Of A Mutant Class-I Preq1.' 'KEYWORDS': '.' 'LOCUS': 'REFERENCE': 'SOURCE': 'VERSION': '3K1V_A GI:260656459' Interval metadata: 2 interval features Stats: length: 34 has gaps: False has degenerates: False has definites: True GC-content: 35.29% ----------------------------------------------------------------- 0 AGAGGUUCUA GCACAUCCCU CUAUAAAAAA CUAA >>> rna_seq == dna_seq.transcribe() True >>> with io.StringIO() as fh: ... print(dna_seq.write(fh, format='genbank').getvalue()) LOCUS 3K1V_A 34 bp RNA linear SYN 10-OCT-2012 DEFINITION Chain A, Structure Of A Mutant Class-I Preq1. ACCESSION 3K1V_A VERSION 3K1V_A GI:260656459 KEYWORDS . SOURCE synthetic construct ORGANISM synthetic construct other sequences; artificial sequences. REFERENCE 1 (bases 1 to 34) AUTHORS Klein,D.J., Edwards,T.E. and Ferre-D'Amare,A.R. TITLE Cocrystal structure of a class I preQ1 riboswitch JOURNAL Nat. Struct. Mol. Biol. 16 (3), 343-344 (2009) PUBMED 19234468 COMMENT SEQRES. FEATURES Location/Qualifiers source 1..34 /db_xref="taxon:32630" /mol_type="other RNA" /organism="synthetic construct" misc_binding 1..30 /bound_moiety="preQ1" /note="Preq1 riboswitch" ORIGIN 1 agaggttcta gcacatccct ctataaaaaa ctaa // References ---------- .. [1] http://www.ncbi.nlm.nih.gov/Sitemap/samplerecord.html .. [2] http://www.insdc.org/ .. [3] http://www.insdc.org/files/feature_table.html .. [4] http://www.ebi.ac.uk/ena/WebFeat/ .. [5] http://www.ncbi.nlm.nih.gov/nuccore/3K1V_A """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import re from functools import partial from skbio.io import create_format, GenBankFormatError from skbio.io.format._base import _get_nth_sequence, _line_generator, _too_many_blanks from skbio.util._misc import chunk_str from skbio.sequence import Sequence, DNA, RNA, Protein from skbio.io.format._sequence_feature_vocabulary import ( _yield_section, _parse_section_default, _serialize_section_default, _parse_feature_table, _serialize_feature_table, ) genbank = create_format("genbank") # This list is ordered # used to read and write genbank file. _HEADERS = [ "LOCUS", "DEFINITION", "ACCESSION", "VERSION", "DBSOURCE", "DBLINK", "KEYWORDS", "SOURCE", "REFERENCE", "COMMENT", "FEATURES", "ORIGIN", ] @genbank.sniffer() def _genbank_sniffer(fh): # check the 1st real line is a valid LOCUS line if _too_many_blanks(fh, 5): return False, {} try: line = next(_line_generator(fh, skip_blanks=True, strip=False)) except StopIteration: return False, {} try: _parse_locus([line]) except GenBankFormatError: return False, {} return True, {} @genbank.reader(None) def _genbank_to_generator(fh, constructor=None, **kwargs): for record in _parse_genbanks(fh): yield _construct(record, constructor, **kwargs) @genbank.reader(Sequence) def _genbank_to_sequence(fh, seq_num=1, **kwargs): record = _get_nth_sequence(_parse_genbanks(fh), seq_num) return _construct(record, Sequence, **kwargs) @genbank.reader(DNA) def _genbank_to_dna(fh, seq_num=1, **kwargs): record = _get_nth_sequence(_parse_genbanks(fh), seq_num) return _construct(record, DNA, **kwargs) @genbank.reader(RNA) def _genbank_to_rna(fh, seq_num=1, **kwargs): record = _get_nth_sequence(_parse_genbanks(fh), seq_num) return _construct(record, RNA, **kwargs) @genbank.reader(Protein) def _genbank_to_protein(fh, seq_num=1, **kwargs): record = _get_nth_sequence(_parse_genbanks(fh), seq_num) return _construct(record, Protein, **kwargs) @genbank.writer(None) def _generator_to_genbank(obj, fh): for obj_i in obj: _serialize_single_genbank(obj_i, fh) @genbank.writer(Sequence) def _sequence_to_genbank(obj, fh): _serialize_single_genbank(obj, fh) @genbank.writer(DNA) def _dna_to_genbank(obj, fh): _serialize_single_genbank(obj, fh) @genbank.writer(RNA) def _rna_to_genbank(obj, fh): _serialize_single_genbank(obj, fh) @genbank.writer(Protein) def _protein_to_genbank(obj, fh): _serialize_single_genbank(obj, fh) def _construct(record, constructor=None, **kwargs): """Construct the object of Sequence, DNA, RNA, or Protein.""" seq, md, imd = record if "lowercase" not in kwargs: kwargs["lowercase"] = True if constructor is None: unit = md["LOCUS"]["unit"] if unit == "bp": # RNA mol type has T instead of U for genbank from from NCBI constructor = DNA elif unit == "aa": constructor = Protein if constructor == RNA: return DNA(seq, metadata=md, interval_metadata=imd, **kwargs).transcribe() else: return constructor(seq, metadata=md, interval_metadata=imd, **kwargs) def _parse_genbanks(fh): data_chunks = [] for line in _line_generator(fh, skip_blanks=True, strip=False): if line.startswith("//"): yield _parse_single_genbank(data_chunks) data_chunks = [] else: data_chunks.append(line) def _parse_single_genbank(chunks): metadata = {} interval_metadata = None sequence = "" # each section starts with a HEADER without indent. section_splitter = _yield_section(lambda x: not x[0].isspace(), strip=False) for section in section_splitter(chunks): header = section[0].split(None, 1)[0] parser = _PARSER_TABLE.get(header, _parse_section_default) if header == "FEATURES": # This requires 'LOCUS' line parsed before 'FEATURES', which should # be true and is implicitly checked by the sniffer. parser = partial(parser, length=metadata["LOCUS"]["size"]) parsed = parser(section) # reference can appear multiple times if header == "REFERENCE": if header in metadata: metadata[header].append(parsed) else: metadata[header] = [parsed] elif header == "ORIGIN": sequence = parsed elif header == "FEATURES": interval_metadata = parsed else: metadata[header] = parsed return sequence, metadata, interval_metadata def _serialize_single_genbank(obj, fh): """Write a GenBank record. Always write it in NCBI canonical way: 1. sequence in lowercase 2. 'u' as 't' even in RNA molecules. Parameters ---------- obj : Sequence or its child class A Sequence object or its child class representing the biological sequence to be serialized. fh : file object A file object open for writing. """ # write out the headers md = obj.metadata for header in _HEADERS: serializer = _SERIALIZER_TABLE.get(header, _serialize_section_default) if header in md: out = serializer(header, md[header]) # test if 'out' is a iterator. # cf. Effective Python Item 17 if iter(out) is iter(out): for s in out: fh.write(s) else: fh.write(out) if header == "FEATURES": if obj.has_interval_metadata(): # magic number 21: the amount of indentation before # feature table starts as defined by INSDC indent = 21 fh.write( "{header:<{indent}}Location/Qualifiers\n".format( header=header, indent=indent ) ) for s in serializer(obj.interval_metadata._intervals, indent): fh.write(s) # write out the sequence # always write RNA seq as DNA if isinstance(obj, RNA): obj = obj.reverse_transcribe() # always write in lowercase seq_str = str(obj).lower() for s in _serialize_origin(seq_str): fh.write(s) fh.write("//\n") def _parse_locus(lines): """Parse the line LOCUS. Format: # Positions Contents # --------- -------- # 00:06 LOCUS # 06:12 spaces # 12:?? Locus name # ??:?? space # ??:29 Length of sequence, right-justified # 29:33 space, bp/aa/rc, space # 33:41 molecule type (can be blank): DNA, ssDNA, dsRNA, tRNA, etc. # 41:42 space # 42:51 Blank (implies linear), linear or circular # 51:52 space # 52:55 The division code (e.g. BCT, VRL, INV) # 55:62 space # 62:73 Date, in the form dd-MMM-yyyy (e.g., 15-MAR-1991) """ line = lines[0] pattern = ( r"LOCUS" r" +([^\s]+)" r" +([0-9]+)" r" +(bp|aa|rc)" r" +(.*DNA|.*RNA)?" r" +(linear|circular)?" r" +(?!.*DNA|.*RNA)([A-Z]{3})" r" +([0-9]{2}-[A-Z]{3}-[0-9]{4})" ) matches = re.match(pattern, line) try: res = dict( zip( ["locus_name", "size", "unit", "mol_type", "shape", "division", "date"], matches.groups(), ) ) except Exception: raise GenBankFormatError("Could not parse the LOCUS line:\n%s" % line) res["size"] = int(res["size"]) return res def _serialize_locus(header, obj, indent=12): """Serialize LOCUS line. Parameters ---------- header : str The header of the LOCUS line. obj : dict A dictionary containing key-value pairs representing the attributes of the sequence entry. indent : int, optional The number of spaces used to indent the serialized LOCUS line. Defaults to 12. """ # use 'or' to convert None to '' kwargs = {k: v or "" for k, v in obj.items()} return ( "{header:<{indent}}{locus_name} {size} {unit}" " {mol_type} {shape} {division} {date}\n" ).format(header=header, indent=indent, **kwargs) def _parse_reference(lines): """Parse single REFERENCE field.""" res = {} # magic number 11: the non keyworded lines in REFERENCE # are at least indented with 11 spaces. feature_indent = " " * 11 section_splitter = _yield_section( lambda x: not x.startswith(feature_indent), skip_blanks=True, strip=False ) for section in section_splitter(lines): label, data = _parse_section_default( section, join_delimiter=" ", return_label=True ) res[label] = data return res def _serialize_reference(header, obj, indent=12): """Serialize REFERENCE. Parameters ---------- header : str The header of the REFERENCE section. obj : list A list of dictionaries representing the individual references cited in the sequence entry. indent : int, optional The number of spaces used to indent each line of the serialized REFERENCE section. Defaults to 12. """ padding = " " sort_order = {"REFERENCE": 0, "AUTHORS": 1, "TITLE": 2, "JOURNAL": 3, "PUBMED": 4} for obj_i in obj: ref_i = [] for h in sorted(obj_i, key=lambda k: sort_order.get(k, 100)): if h == header: s = "{h:<{indent}}{ref}".format(h=h, indent=indent, ref=obj_i[h]) else: s = "{h:<{indent}}{value}".format( h=padding + h, indent=indent, value=obj_i[h] ) ref_i.append(s) yield "%s\n" % "\n".join(ref_i) def _parse_source(lines): """Parse SOURCE field.""" res = {} # magic number 11: the non keyworded lines in SOURCE # are at least indented with 11 spaces. feature_indent = " " * 11 section_splitter = _yield_section( lambda x: not x.startswith(feature_indent), skip_blanks=True, strip=False ) # SOURCE line is not informative; skip it _, organism = list(section_splitter(lines)) res["ORGANISM"] = organism[0].split(None, 1)[1].strip() res["taxonomy"] = " ".join([i.strip() for i in organism[1:]]) return res def _serialize_source(header, obj, indent=12): """Serialize SOURCE. Parameters ---------- header : str The header of the SOURCE section, obj : dict A dictionary containing key-value pairs representing the attributes of the SOURCE section. indent : int, optional The number of spaces used to indent each line of the serialized SOURCE section. Defaults to 12. """ s = ( "{header:<{indent}}{organism}\n" "{h:<{indent}}{organism}\n" "{space}{taxonomy}\n" ).format( header=header, indent=indent, h=" ORGANISM", organism=obj["ORGANISM"], space=" " * 12, taxonomy=obj["taxonomy"], ) return s def _parse_origin(lines): """Parse the ORIGIN section for sequence.""" sequence = [] for line in lines: if line.startswith("ORIGIN"): continue # remove the number at the beg of each line items = line.split() sequence.append("".join(items[1:])) return "".join(sequence) def _serialize_origin(seq, indent=9): """Serialize seq to ORIGIN. Parameters ---------- seq : str The sequence data to be serialized. indent : int, optional The number of spaces used to indent each line of the serialized ORIGIN section. """ n = 1 line_size = 60 frag_size = 10 for i in range(0, len(seq), line_size): line = seq[i : i + line_size] s = "{n:>{indent}} {s}\n".format( n=n, indent=indent, s=chunk_str(line, frag_size, " ") ) if n == 1: s = "ORIGIN\n" + s n += line_size yield s _PARSER_TABLE = { "LOCUS": _parse_locus, "SOURCE": _parse_source, "REFERENCE": _parse_reference, "FEATURES": _parse_feature_table, "ORIGIN": _parse_origin, } _SERIALIZER_TABLE = { "LOCUS": _serialize_locus, "SOURCE": _serialize_source, "REFERENCE": _serialize_reference, "FEATURES": _serialize_feature_table, } scikit-bio-0.6.2/skbio/io/format/gff3.py000066400000000000000000000443241464262511300177740ustar00rootroot00000000000000"""GFF3 format (:mod:`skbio.io.format.gff3`) ========================================= .. currentmodule:: skbio.io.format.gff3 GFF3 (Generic Feature Format version 3) is a standard file format for describing features for biological sequences. It contains lines of text, each consisting of 9 tab-delimited columns [1]_. Format Support -------------- **Has Sniffer: Yes** +------+------+---------------------------------------------------------------+ |Reader|Writer| Object Class | +======+======+===============================================================+ |Yes |Yes |:mod:`skbio.sequence.Sequence` | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.sequence.DNA` | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.metadata.IntervalMetadata` | +------+------+---------------------------------------------------------------+ |Yes |Yes |generator of tuple (seq_id of str type, | | | |:mod:`skbio.metadata.IntervalMetadata`) | +------+------+---------------------------------------------------------------+ Format Specification -------------------- The first line of the file is a comment that identifies the format and version. This is followed by a series of data lines. Each data line corresponds to an annotation and consists of 9 columns: SEQID, SOURCE, TYPE, START, END, SCORE, STRAND, PHASE, and ATTR. Column 9 (ATTR) is list of feature attributes in the format "tag=value". Multiple "tag=value" pairs are delimited by semicolons. Multiple values of the same tag are separated with the comma ",". The following tags have predefined meanings: ID, Name, Alias, Parent, Target, Gap, Derives_from, Note, Dbxref, Ontology_term, and Is_circular. The meaning and format of these columns and attributes are explained detail in the format specification [1]_. And they are read in as the vocabulary defined in GenBank parser (:mod:`skbio.io.format.genbank`). Format Parameters ----------------- Reader-specific Parameters ^^^^^^^^^^^^^^^^^^^^^^^^^^ ``IntervalMetadata`` GFF3 reader requires 1 parameter: ``seq_id``. It reads the annotation with the specified sequence ID from the GFF3 file into an ``IntervalMetadata`` object. ``DNA`` and ``Sequence`` GFF3 readers require ``seq_num`` of int as parameter. It specifies which GFF3 record to read from a GFF3 file with annotations of multiple sequences in it. Writer-specific Parameters ^^^^^^^^^^^^^^^^^^^^^^^^^^ ``skip_subregion`` is a boolean parameter used by all the GFF3 writers. It specifies whether you would like to write each non-contiguous sub-region for a feature annotation. For example, if there is interval feature for a gene with two exons in an ``IntervalMetadata`` object, it will write one line into the GFF3 file when ``skip_subregion`` is ``True`` and will write 3 lines (one for the gene and one for each exon, respectively) when ``skip_subregion`` is ``False``. Default is ``True``. In addition, ``IntervalMetadata`` GFF3 writer needs a parameter of ``seq_id``. It specify the sequence ID (column 1 in GFF3 file) that the annotation belong to. Examples -------- Let's create a file stream with following data in GFF3 format: >>> from skbio import Sequence, DNA >>> gff_str = ''' ... ##gff-version 3 ... seq_1\\t.\\tgene\\t10\\t90\\t.\\t+\\t0\\tID=gen1 ... seq_1\\t.\\texon\\t10\\t30\\t.\\t+\\t.\\tParent=gen1 ... seq_1\\t.\\texon\\t50\\t90\\t.\\t+\\t.\\tParent=gen1 ... seq_2\\t.\\tgene\\t80\\t96\\t.\\t-\\t.\\tID=gen2 ... ##FASTA ... >seq_1 ... ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC ... ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC ... >seq_2 ... ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC ... ATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGC ... ''' >>> import io >>> from skbio.metadata import IntervalMetadata >>> from skbio.io import read >>> gff = io.StringIO(gff_str) We can read it into ``IntervalMetadata``. Each line will be read into an interval feature in ``IntervalMetadata`` object: >>> im = read(gff, format='gff3', into=IntervalMetadata, ... seq_id='seq_1') >>> im # doctest: +SKIP 3 interval features ------------------- Interval(interval_metadata=<4604421736>, bounds=[(9, 90)], \ fuzzy=[(False, False)], metadata={'type': 'gene', \ 'phase': 0, 'strand': '+', 'source': '.', 'score': '.', 'ID': 'gen1'}) Interval(interval_metadata=<4604421736>, bounds=[(9, 30)], \ fuzzy=[(False, False)], metadata={'strand': '+', 'source': '.', \ 'type': 'exon', 'Parent': 'gen1', 'score': '.'}) Interval(interval_metadata=<4604421736>, bounds=[(49, 90)], \ fuzzy=[(False, False)], metadata={'strand': '+', 'source': '.', \ 'type': 'exon', 'Parent': 'gen1', 'score': '.'}) We can write the ``IntervalMetadata`` object back to GFF3 file: >>> with io.StringIO() as fh: # doctest: +NORMALIZE_WHITESPACE ... print(im.write(fh, format='gff3', seq_id='seq_1').getvalue()) ##gff-version 3 seq_1 . gene 10 90 . + 0 ID=gen1 seq_1 . exon 10 30 . + . Parent=gen1 seq_1 . exon 50 90 . + . Parent=gen1 If the GFF3 file does not have the sequence ID, it will return an empty object: >>> gff = io.StringIO(gff_str) >>> im = read(gff, format='gff3', into=IntervalMetadata, ... seq_id='foo') >>> im 0 interval features ------------------- We can also read the GFF3 file into a generator: >>> gff = io.StringIO(gff_str) >>> gen = read(gff, format='gff3') >>> for im in gen: # doctest: +SKIP ... print(im[0]) # the seq id ... print(im[1]) # the interval metadata on this seq seq_1 3 interval features ------------------- Interval(interval_metadata=<4603377592>, bounds=[(9, 90)], \ fuzzy=[(False, False)], metadata={'type': 'gene', 'ID': 'gen1', \ 'source': '.', 'score': '.', 'strand': '+', 'phase': 0}) Interval(interval_metadata=<4603377592>, bounds=[(9, 30)], \ fuzzy=[(False, False)], metadata={'strand': '+', 'type': 'exon', \ 'Parent': 'gen1', 'source': '.', 'score': '.'}) Interval(interval_metadata=<4603377592>, bounds=[(49, 90)], \ fuzzy=[(False, False)], metadata={'strand': '+', 'type': 'exon', \ 'Parent': 'gen1', 'source': '.', 'score': '.'}) seq_2 1 interval feature ------------------ Interval(interval_metadata=<4603378712>, bounds=[(79, 96)], \ fuzzy=[(False, False)], metadata={'strand': '-', 'type': 'gene', \ 'ID': 'gen2', 'source': '.', 'score': '.'}) For the GFF3 file with sequences, we can read it into ``Sequence`` or ``DNA``: >>> gff = io.StringIO(gff_str) >>> seq = read(gff, format='gff3', into=Sequence, seq_num=1) >>> seq Sequence -------------------------------------------------------------------- Metadata: 'description': '' 'id': 'seq_1' Interval metadata: 3 interval features Stats: length: 100 -------------------------------------------------------------------- 0 ATGCATGCAT GCATGCATGC ATGCATGCAT GCATGCATGC ATGCATGCAT GCATGCATGC 60 ATGCATGCAT GCATGCATGC ATGCATGCAT GCATGCATGC >>> gff = io.StringIO(gff_str) >>> seq = read(gff, format='gff3', into=DNA, seq_num=2) >>> seq DNA -------------------------------------------------------------------- Metadata: 'description': '' 'id': 'seq_2' Interval metadata: 1 interval feature Stats: length: 120 has gaps: False has degenerates: False has definites: True GC-content: 50.00% -------------------------------------------------------------------- 0 ATGCATGCAT GCATGCATGC ATGCATGCAT GCATGCATGC ATGCATGCAT GCATGCATGC 60 ATGCATGCAT GCATGCATGC ATGCATGCAT GCATGCATGC ATGCATGCAT GCATGCATGC References ---------- .. [1] https://github.com/The-Sequence-Ontology/\ Specifications/blob/master/gff3.md """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import re from collections.abc import Iterable from skbio.sequence import DNA, Sequence from skbio.io import create_format, GFF3FormatError from skbio.metadata import IntervalMetadata from skbio.io.format._base import _line_generator, _too_many_blanks, _get_nth_sequence from skbio.io.format.fasta import _fasta_to_generator from skbio.io.format._sequence_feature_vocabulary import ( _vocabulary_change, _vocabulary_skip, ) from skbio.io import write gff3 = create_format("gff3") @gff3.sniffer() def _gff3_sniffer(fh): # check the 1st real line is a valid ID line if _too_many_blanks(fh, 5): return False, {} try: line = next(_line_generator(fh, skip_blanks=True, strip=False)) except StopIteration: return False, {} if re.match(r"##gff-version\s+3", line): return True, {} else: return False, {} @gff3.reader(None) def _gff3_to_generator(fh): """Parse the GFF3 into the existing IntervalMetadata. Parameters ---------- fh : file file handler Yields ------ tuple str of seq id, IntervalMetadata """ id_lengths = {} for data_type, sid, data in _yield_record(fh): if data_type == "length": # get length from sequence-region pragma. # the pragma lines are always before the real annotation lines. id_lengths[sid] = data elif data_type == "data": length = id_lengths.get(sid) yield sid, _parse_record(data, length) @gff3.writer(None) def _generator_to_gff3(obj, fh, skip_subregion=True): """Write a list of IntervalMetadata into a file. Parameters ---------- obj : Iterable of (seq_id, IntervalMetadata) List of IntervalMetadata to write. fh : file handler File to write into. skip_subregion : bool Write a line for each sub-regions of an ``Interval`` if it is ``False``. """ # write file header fh.write("##gff-version 3\n") for seq_id, obj_i in obj: _serialize_interval_metadata(obj_i, seq_id, fh, skip_subregion) @gff3.reader(Sequence) def _gff3_to_sequence(fh, seq_num=1): return _construct_seq(fh, Sequence, seq_num) @gff3.writer(Sequence) def _sequence_to_gff3(obj, fh, skip_subregion=True): # write file header fh.write("##gff-version 3\n") _serialize_seq(obj, fh, skip_subregion) @gff3.reader(DNA) def _gff3_to_dna(fh, seq_num=1): return _construct_seq(fh, DNA, seq_num) @gff3.writer(DNA) def _dna_to_gff3(obj, fh, skip_subregion=True): # write file header fh.write("##gff-version 3\n") _serialize_seq(obj, fh, skip_subregion) @gff3.reader(IntervalMetadata) def _gff3_to_interval_metadata(fh, seq_id): """Read a GFF3 record into the specified interval metadata. Parameters ---------- fh : file handler GFF3 file to read. seq_id : str Sequence ID which the interval metadata is associated with. """ length = None for data_type, sid, data in _yield_record(fh): if seq_id == sid: if data_type == "length": # get length from sequence-region pragma length = data elif data_type == "data": return _parse_record(data, length) else: raise GFF3FormatError( "Unknown section in the input GFF3 file: " "%r %r %r" % (data_type, sid, data) ) # return an empty instead of None return IntervalMetadata(None) @gff3.writer(IntervalMetadata) def _interval_metadata_to_gff3(obj, fh, seq_id, skip_subregion=True): """Output ``IntervalMetadata`` object to GFF3 file. Parameters ---------- obj : IntervalMetadata An IntervalMetadata object. fh : file object File object opened for writing. seq_id : str ID for column 1 in the GFF3 file. skip_subregion : bool Write a line for each sub-regions of an ``Interval`` if it is ``False``. """ # write file header fh.write("##gff-version 3\n") _serialize_interval_metadata(obj, seq_id, fh, skip_subregion=True) def _construct_seq(fh, constructor=DNA, seq_num=1): lines = [] for i, (data_type, seq_id, L) in enumerate(_yield_record(fh), 1): if data_type == "data" and seq_num == i: lines = L seq = _get_nth_sequence( _fasta_to_generator(fh, constructor=constructor), seq_num=seq_num ) seq.interval_metadata = _parse_record(lines, len(seq)) return seq def _yield_record(fh): """Yield (seq_id, lines) that belong to the same sequence.""" lines = [] current = False for line in _line_generator(fh, skip_blanks=True, strip=True): if line.startswith("##sequence-region"): _, seq_id, start, end = line.split() length = int(end) - int(start) + 1 yield "length", seq_id, length if line.startswith("##FASTA"): # stop once reaching to sequence section break if not line.startswith("#"): try: seq_id, _ = line.split("\t", 1) except ValueError: raise GFF3FormatError("Wrong GFF3 format at line: %s" % line) if current == seq_id: lines.append(line) else: if current is not False: yield "data", current, lines lines = [line] current = seq_id if current is False: # if the input file object is empty, it should return # an empty generator return yield else: yield "data", current, lines def _parse_record(lines, length): """Parse the lines into a IntervalMetadata object.""" interval_metadata = IntervalMetadata(length) for line in lines: columns = line.split("\t") # there should be 9 columns if len(columns) != 9: raise GFF3FormatError('do not have 9 columns in this line: "%s"' % line) # the 1st column is seq ID for every feature. don't store # this repetitive information metadata = { "source": columns[1], "type": columns[2], "score": columns[5], "strand": columns[6], } phase = columns[7] # phase value can only be int or '.' try: metadata["phase"] = int(phase) except ValueError: if phase != ".": raise GFF3FormatError( "unknown value for phase column: {!r}".format(phase) ) metadata.update(_parse_attr(columns[8])) start, end = columns[3:5] bounds = [(int(start) - 1, int(end))] interval_metadata.add(bounds, metadata=metadata) return interval_metadata def _parse_attr(s): """Parse attribute column.""" voca_change = _vocabulary_change("gff3") md = {} # in case the line ending with ';', strip it. s = s.rstrip(";") for attr in s.split(";"): k, v = attr.split("=") if k in voca_change: k = voca_change[k] md[k] = v return md def _serialize_interval_metadata(interval_metadata, seq_id, fh, skip_subregion=True): """Serialize an IntervalMetadata to GFF3. Parameters ---------- interval_metadata : IntervalMetadata An IntervalMetadata object. seq_id : str Seq id for the current annotation. It will be used as the 1st column in the GFF3. fh : file handler The file object to output. skip_subregion : bool Whether to skip outputting each sub region as a line in GFF3. """ column_keys = ["source", "type", "score", "strand", "phase"] voca_change = _vocabulary_change("gff3", False) voca_skip = _vocabulary_skip("gff3") voca_skip.extend(column_keys) # these characters have reserved meanings in column 9 and must be # escaped when used in other contexts escape = str.maketrans({";": "%3B", "=": "%3D", "&": "%26", ",": "%2C"}) for interval in interval_metadata._intervals: md = interval.metadata bd = interval.bounds start = str(bd[0][0] + 1) end = str(bd[-1][-1]) source, feat_type, score, strand, phase = [ str(md.get(i, ".")) for i in column_keys ] columns = [seq_id, source, feat_type, start, end, score, strand, phase] # serialize the attributes in column 9 attr = [] # use sort to make the output order deterministic for k in sorted(md): if k in voca_skip: # skip the metadata that doesn't go to attribute column continue v = md[k] if k in voca_change: k = voca_change[k] if isinstance(v, Iterable) and not isinstance(v, str): # if there are multiple values for this attribute, # convert them to str and concat them with "," v = ",".join(str(i).translate(escape) for i in v) else: v = v.translate(escape) attr.append("%s=%s" % (k.translate(escape), v)) columns.append(";".join(attr)) fh.write("\t".join(columns)) fh.write("\n") # if there are multiple regions for this feature, # output each region as a standalone line in GFF3. if len(bd) > 1 and skip_subregion is False: for start, end in bd: # if this is a gene, then each sub region should be an exon if columns[2] == "gene": columns[2] = "exon" columns[3] = str(start + 1) columns[4] = str(end) try: parent = md["ID"] except KeyError: raise GFF3FormatError( "You need provide ID info for " "the parent interval feature: %r" % interval ) columns[8] = "Parent=%s" % parent fh.write("\t".join(columns)) fh.write("\n") def _serialize_seq(seq, fh, skip_subregion=True): """Serialize a sequence to GFF3.""" _serialize_interval_metadata( seq.interval_metadata, seq.metadata["id"], fh, skip_subregion ) fh.write("##FASTA\n") write(seq, into=fh, format="fasta") scikit-bio-0.6.2/skbio/io/format/lsmat.py000066400000000000000000000205071464262511300202640ustar00rootroot00000000000000r"""Labeled square matrix format (:mod:`skbio.io.format.lsmat`) =========================================================== .. currentmodule:: skbio.io.format.lsmat The labeled square matrix file format (``lsmat``) stores numeric square matrix data relating a set of objects along each axis. The format also stores identifiers (i.e., unique labels) for the objects. The matrix data and identifiers are stored in delimited text format (e.g., TSV or CSV). This format supports storing a variety of data types including dissimilarity/distance matrices, similarity matrices and amino acid substitution matrices. Format Support -------------- **Has Sniffer: Yes** +------+------+---------------------------------------------------------------+ |Reader|Writer| Object Class | +======+======+===============================================================+ |Yes |Yes |:mod:`skbio.stats.distance.DissimilarityMatrix` | +------+------+---------------------------------------------------------------+ |Yes |Yes |:mod:`skbio.stats.distance.DistanceMatrix` | +------+------+---------------------------------------------------------------+ Format Specification -------------------- The labeled square matrix and object identifiers are stored as delimited text. The first line of the file is the header, which must start with the delimiter, followed by the IDs for all objects in the matrix. Each of the following lines must contain an object's ID, followed by a numeric (float or integer) vector relating the object to all other objects in the matrix. The order of objects is determined by the IDs in the header. For example, assume we have a 2x2 distance matrix with IDs ``'a'`` and ``'b'``. When serialized in this format, the distance matrix might look like:: ab a0.01.0 b1.00.0 where ```` is the delimiter between elements. Lines containing only whitespace may occur anywhere throughout the file and are ignored. Lines starting with ``#`` are treated as comments and are ignored. Comments may only occur *before* the header. IDs will have any leading/trailing whitespace removed when they are parsed. .. note:: This file format is most useful for storing small matrices, or when it is desirable to represent the matrix in a human-readable format, or easily import the file into another program that supports delimited text (e.g., a spreadsheet program). If efficiency is a concern, this format may not be the most appropriate choice. Format Parameters ----------------- The only supported format parameter is ``delimiter``, which defaults to the tab character (``'\t'``). ``delimiter`` is used to separate elements in the file format. Examples include tab (``'\t'``) for TSV format and comma (``','``) for CSV format. ``delimiter`` can be specified as a keyword argument when reading from or writing to a file. A special ``delimiter`` is ``None``, which represents a whitespace of arbitrary length. This value is useful for reading a fixed-width text file. However, it cannot be automatically determined, nor can it be specified when writing to a file. """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import csv import numpy as np from skbio.stats.distance import DissimilarityMatrix, DistanceMatrix from skbio.io import create_format, LSMatFormatError lsmat = create_format("lsmat") @lsmat.sniffer() def _lsmat_sniffer(fh): header = _find_header(fh) if header is not None: try: dialect = csv.Sniffer().sniff(header) delimiter = dialect.delimiter ids = _parse_header(header, delimiter) first_id, _ = next(_parse_data(fh, delimiter), (None, None)) if first_id is not None and first_id == ids[0]: return True, {"delimiter": delimiter} except (csv.Error, LSMatFormatError): pass return False, {} @lsmat.reader(DissimilarityMatrix) def _lsmat_to_dissimilarity_matrix(fh, delimiter="\t"): return _lsmat_to_matrix(DissimilarityMatrix, fh, delimiter) @lsmat.reader(DistanceMatrix) def _lsmat_to_distance_matrix(fh, delimiter="\t"): return _lsmat_to_matrix(DistanceMatrix, fh, delimiter) @lsmat.writer(DissimilarityMatrix) def _dissimilarity_matrix_to_lsmat(obj, fh, delimiter="\t"): _matrix_to_lsmat(obj, fh, delimiter) @lsmat.writer(DistanceMatrix) def _distance_matrix_to_lsmat(obj, fh, delimiter="\t"): _matrix_to_lsmat(obj, fh, delimiter) def _lsmat_to_matrix(cls, fh, delimiter): # We aren't using np.loadtxt because it uses *way* too much memory # (e.g, a 2GB matrix eats up 10GB, which then isn't freed after parsing # has finished). See: # http://mail.scipy.org/pipermail/numpy-tickets/2012-August/006749.html # Strategy: # - find the header # - initialize an empty ndarray # - for each row of data in the input file: # - populate the corresponding row in the ndarray with floats header = _find_header(fh) if header is None: raise LSMatFormatError( "Could not find a header line containing IDs in the " "dissimilarity matrix file. Please verify that the file is " "not empty." ) ids = _parse_header(header, delimiter) num_ids = len(ids) data = np.empty((num_ids, num_ids), dtype=np.float64) row_idx = -1 for row_idx, (row_id, row_data) in enumerate(_parse_data(fh, delimiter)): if row_idx >= num_ids: # We've hit a nonempty line after we already filled the data # matrix. Raise an error because we shouldn't ignore extra data. raise LSMatFormatError( "Encountered extra row(s) without corresponding IDs in " "the header." ) num_vals = len(row_data) if num_vals != num_ids: raise LSMatFormatError( "There are %d value(s) in row %d, which is not equal to the " "number of ID(s) in the header (%d)." % (num_vals, row_idx + 1, num_ids) ) expected_id = ids[row_idx] if row_id == expected_id: data[row_idx, :] = np.asarray(row_data, dtype=float) else: raise LSMatFormatError( "Encountered mismatched IDs while parsing the " "dissimilarity matrix file. Found %r but expected " "%r. Please ensure that the IDs match between the " "dissimilarity matrix header (first row) and the row " "labels (first column)." % (str(row_id), str(expected_id)) ) if row_idx != num_ids - 1: raise LSMatFormatError( "Expected %d row(s) of data, but found %d." % (num_ids, row_idx + 1) ) return cls(data, ids) def _find_header(fh): header = None for line in fh: stripped_line = line.strip() if stripped_line and not stripped_line.startswith("#"): # Don't strip the header because the first delimiter might be # whitespace (e.g., tab). header = line break return header def _parse_header(header, delimiter): tokens = header.rstrip().split(delimiter) if delimiter is not None: if tokens[0]: raise LSMatFormatError( "Header must start with delimiter %r." % str(delimiter) ) tokens = tokens[1:] return [e.strip() for e in tokens] def _parse_data(fh, delimiter): for line in fh: stripped_line = line.strip() if not stripped_line: continue tokens = line.rstrip().split(delimiter) id_ = tokens[0].strip() yield id_, tokens[1:] def _matrix_to_lsmat(obj, fh, delimiter): delimiter = "%s" % delimiter ids = obj.ids fh.write(_format_ids(ids, delimiter)) fh.write("\n") for id_, vals in zip(ids, obj.data): fh.write("%s" % id_) fh.write(delimiter) fh.write(delimiter.join(np.asarray(vals, dtype=str))) fh.write("\n") def _format_ids(ids, delimiter): return delimiter.join([""] + list(ids)) scikit-bio-0.6.2/skbio/io/format/newick.py000066400000000000000000000477311464262511300204340ustar00rootroot00000000000000r"""Newick format (:mod:`skbio.io.format.newick`) ============================================= .. currentmodule:: skbio.io.format.newick Newick format (``newick``) stores spanning-trees with weighted edges and node names in a minimal file format [1]_. This is useful for representing phylogenetic trees and taxonomies. Newick was created as an informal specification on June 26, 1986 [2]_. Format Support -------------- **Has Sniffer: Yes** +------+------+---------------------------------------------------------------+ |Reader|Writer| Object Class | +======+======+===============================================================+ |Yes |Yes |:mod:`skbio.tree.TreeNode` | +------+------+---------------------------------------------------------------+ Format Specification -------------------- A Newick file represents a tree using the following grammar. See below for an explanation of the format in plain English. Formal Grammar ^^^^^^^^^^^^^^ .. code-block:: none NEWICK ==> NODE ; NODE ==> FORMATTING SUBTREE FORMATTING NODE_INFO FORMATTING SUBTREE ==> ( CHILDREN ) | null NODE_INFO ==> LABEL | LENGTH | LABEL FORMATTING LENGTH | null FORMATTING ==> [ COMMENT_CHARS ] | whitespace | null CHILDREN ==> NODE | CHILDREN , NODE LABEL ==> ' ALL_CHARS ' | SAFE_CHARS LENGTH ==> : FORMATTING NUMBER COMMENT_CHARS ==> any ALL_CHARS ==> any SAFE_CHARS ==> any except: ,;:()[] and whitespace NUMBER ==> a decimal or integer .. note:: The ``_`` character inside of SAFE_CHARS will be converted to a blank space in ``skbio.tree.TreeNode`` and vice versa. ``'`` is considered the escape character. To escape ``'`` use a preceding ``'``. The implementation of newick in scikit-bio allows nested comments. To escape ``[`` or ``]`` from within COMMENT_CHARS, use a preceding ``'``. Explanation ^^^^^^^^^^^ The Newick format defines a tree by creating a minimal representation of nodes and their relationships to each other. Basic Symbols ~~~~~~~~~~~~~ There are several symbols which define nodes, the first of which is the semi-colon (``;``). The semi-colon creates a root node to its left. Recall that there can only be one root in a tree. The next symbol is the comma (``,``), which creates a node to its right. However, these two alone are not enough. For example imagine the following string: ``, , , ;``. It is evident that there is a root, but the other 3 nodes, defined by commas, have no relationship. For this reason, it is not a valid Newick string to have more than one node at the root level. To provide these relationships, there is another structure: paired parenthesis (``( )``). These are inserted at the location of an existing node and give it the ability to have children. Placing ``( )`` in a node's location will create a child inside the parenthesis on the left-most inner edge. Application of Rules ~~~~~~~~~~~~~~~~~~~~ Adding a comma within the parenthesis will create two children: ``( , )`` (also known as a bifurcating node). Notice that only one comma is needed because the parenthesis have already created a child. Adding more commas will create more children who are siblings to each other. For example, writing ``( , , , )`` will create a multifurcating node with 4 child nodes who are siblings to each other. The notation for a root can be used to create a complete tree. The ``;`` will create a root node where parenthesis can be placed: ``( );``. Adding commas will create more children: ``( , );``. These rules can be applied recursively ad. infinitum: ``(( , ), ( , ));``. Adding Node Information ~~~~~~~~~~~~~~~~~~~~~~~ Information about a node can be added to improve the clarity and meaning of a tree. Each node may have a label and/or a length (to the parent). Newick always places the node information at the right-most edge of a node's position. Starting with labels, ``(( , ), ( , ));`` would become ``((D, E)B, (F, G)C)A;``. There is a named root ``A`` and the root's children (from left to right) are ``B`` and ``C``. ``B`` has the children ``D`` and ``E``, and ``C`` has the children ``F`` and ``G``. Length represents the distance (or weight of the edge) that connects a node to its parent. This must be a decimal or integer. As an example, suppose ``D`` is rather estranged from ``B``, and ``E`` is very close. That can be written as: ``((D:10, E:0.5)B, (F, G)C)A;``. Notice that the colon (``:``) separates the label from the length. If the length is provided but the label is omitted, a colon must still precede the length (``(:0.25,:0.5):0.0;``). Without this, the length would be interpreted as a label (which happens to be a number). .. note:: Internally scikit-bio will cast a length to ``float`` which technically means that even exponent strings (``1e-3``) are supported) Advanced Label and Length Rules ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ More characters can be used to create more descriptive labels. When creating a label there are some rules that must be considered due to limitations in the Newick format. The following characters are not allowed within a standard label: parenthesis, commas, square-brackets, colon, semi-colon, and whitespace. These characters are also disallowed from occurring within a length, which has a much stricter format: decimal or integer. Many of these characters are symbols which define the structure of a Newick tree and are thus disallowed for obvious reasons. The symbols not yet mentioned are square-brackets (``[ ]``) and whitespace (space, tab, and newline). What if these characters are needed within a label? In the simple case of spaces, an underscore (``_``) will be translated as a space on read and vice versa on write. What if a literal underscore or any of the others mentioned are needed? A label can be escaped (meaning that its contents are understood as regular text) using single-quotes (``'``). When a label is surrounded by single-quotes, any character is permissible. If a single-quote is needed inside of an escaped label or anywhere else, it can be escaped with another single-quote. For example, ``A_1`` is written ``'A_1'`` and ``'A'_1`` would be ``'''A''_1'``. Inline Comments ~~~~~~~~~~~~~~~ Square-brackets define a comment, which are the least commonly used part of the Newick format. Comments are not included in the generated objects and exist only as human readable text ignored by the parser. The implementation in scikit-bio allows for nested comments (``[comment [nested]]``). Unpaired square-brackets can be escaped with a single-quote preceding the bracket when inside an existing comment. (This is identical to escaping a single-quote). The single-quote has the highest operator precedence, so there is no need to worry about starting a comment from within a properly escaped label. Whitespace ~~~~~~~~~~ Whitespace is not allowed within any un-escaped label or in any length, but it is permitted anywhere else. Caveats ~~~~~~~ Newick cannot always provide a unique representation of any tree, in other words, the same tree can be written multiple ways. For example: ``(A, B);`` is isomorphic to ``(B, A);``. The implementation in scikit-bio maintains the given sibling order in its object representations. Newick has no representation of an unrooted tree. Some biological packages make the assumption that when a trifurcated root exists in an otherwise bifurcated tree that the tree must be unrooted. In scikit-bio, ``skbio.tree.TreeNode`` will always be rooted at the ``newick`` root (``;``). Format Parameters ----------------- The only supported format parameter is `convert_underscores`. This is `True` by default. When `False`, underscores found in unescaped labels will not be converted to spaces. This is useful when reading the output of an external program in which the underscores were not escaped. This parameter only affects `read` operations. It does not exist for `write` operations; they will always properly escape underscores. Examples -------- This is a simple Newick string. >>> from io import StringIO >>> from skbio import read >>> from skbio.tree import TreeNode >>> f = StringIO("((D, E)B, (F, G)C)A;") >>> tree = read(f, format="newick", into=TreeNode) >>> f.close() >>> print(tree.ascii_art()) /-D /B-------| | \-E -A-------| | /-F \C-------| \-G This is a complex Newick string. >>> f = StringIO("[example](a:0.1, 'b_b''':0.2, (c:0.3, d_d:0.4)e:0.5)f:0.0;") >>> tree = read(f, format="newick", into=TreeNode) >>> f.close() >>> print(tree.ascii_art()) /-a | -f-------|--b_b' | | /-c \e-------| \-d d Notice that the node originally labeled ``d_d`` became ``d d``. Additionally ``'b_b'''`` became ``b_b'``. Note that the underscore was preserved in `b_b'`. References ---------- .. [1] http://evolution.genetics.washington.edu/phylip/newick_doc.html .. [2] http://evolution.genetics.washington.edu/phylip/newicktree.html """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from skbio.io import create_format, NewickFormatError from skbio.tree import TreeNode newick = create_format("newick") @newick.sniffer() def _newick_sniffer(fh): # Strategy: # The following conditions preclude a file from being newick: # * It is an empty file. # * There is whitespace inside of a label (handled by tokenizer) # * : is followed by anything that is an operator # * ( is not preceded immediately by , or another ( # * The parens are unbalanced when ; is found. # If 100 tokens (or less if EOF occurs earlier) then it is probably # newick, or at least we can't prove it isn't. operators = set(",;:()") empty = True last_token = "," indent = 0 try: # 100 tokens ought to be enough for anybody. for token, _ in zip(_tokenize_newick(fh), range(100)): if token not in operators: pass elif token == "," and last_token != ":" and indent > 0: pass elif token == ":" and last_token != ":": pass elif token == ";" and last_token != ":" and indent == 0: pass elif token == ")" and last_token != ":": indent -= 1 elif token == "(" and (last_token == "(" or last_token == ","): indent += 1 else: raise NewickFormatError() last_token = token empty = False except NewickFormatError: return False, {} return not empty, {} @newick.reader(TreeNode) def _newick_to_tree_node(fh, convert_underscores=True): tree_stack = [] current_depth = 0 last_token = "" next_is_distance = False root = TreeNode() tree_stack.append((root, current_depth)) for token in _tokenize_newick(fh, convert_underscores=convert_underscores): # Check for a label if last_token not in "(,):": if not next_is_distance: tree_stack[-1][0].name = last_token if last_token else None else: next_is_distance = False # Check for a distance if token == ":": next_is_distance = True elif last_token == ":": try: tree_stack[-1][0].length = float(token) except ValueError: raise NewickFormatError( "Could not read length as numeric type" ": %s." % token ) elif token == "(": current_depth += 1 tree_stack.append((TreeNode(), current_depth)) elif token == ",": tree_stack.append((TreeNode(), current_depth)) elif token == ")": if len(tree_stack) < 2: raise NewickFormatError( "Could not parse file as newick." " Parenthesis are unbalanced." ) children = [] # Pop all nodes at this depth as they belong to the remaining # node on the top of the stack as children. while current_depth == tree_stack[-1][1]: node, _ = tree_stack.pop() children.insert(0, node) parent = tree_stack[-1][0] if parent.children: raise NewickFormatError( "Could not parse file as newick." " Contains unnested children." ) # This is much faster than TreeNode.extend for child in children: child.parent = parent parent.children = children current_depth -= 1 elif token == ";": if len(tree_stack) == 1: return root break last_token = token raise NewickFormatError( "Could not parse file as newick." " `(Parenthesis)`, `'single-quotes'`," " `[comments]` may be unbalanced, or tree may be" " missing its root." ) @newick.writer(TreeNode) def _tree_node_to_newick(obj, fh): operators = set(",:_;()[]") current_depth = 0 nodes_left = [(obj, 0)] while len(nodes_left) > 0: entry = nodes_left.pop() node, node_depth = entry if node.children and node_depth >= current_depth: fh.write("(") nodes_left.append(entry) nodes_left += ((child, node_depth + 1) for child in reversed(node.children)) current_depth = node_depth + 1 else: if node_depth < current_depth: fh.write(")") current_depth -= 1 # Note we don't check for None because there is no way to represent # an empty string as a label in Newick. Therefore, both None and '' # are considered to be the absence of a label. label = node._node_label() if label: escaped = "%s" % label.replace("'", "''") if any(t in operators for t in label): fh.write("'") fh.write(escaped) fh.write("'") else: fh.write(escaped.replace(" ", "_")) if node.length is not None: fh.write(":") fh.write("%s" % node.length) if nodes_left and nodes_left[-1][1] == current_depth: fh.write(",") fh.write(";\n") def _tokenize_newick(fh, convert_underscores=True): structure_tokens = set("(),;:") not_escaped = True label_start = False last_non_ws_char = "" last_char = "" comment_depth = 0 metadata_buffer = [] # Strategy: # We will iterate by character. # Comments in newick are defined as: # [This is a comment] # Nested comments are allowed. # # The following characters indicate structure: # ( ) , ; : # # Whitespace is never allowed in a newick label, so an exception will be # thrown. # # We use ' to indicate a literal string. It has the highest precedence of # any operator. for line in fh: for character in line: # We will start by handling the comment case. # This code branch will probably never execute in practice. # Using a comment_depth we can handle nested comments. # Additionally if we are inside an escaped literal string, then # we don't want to consider it a comment. if character == "[" and not_escaped: # Sometimes we might not want to nest a comment, so we will use # our escape character. This is not explicitly mentioned in # any format specification, but seems like what a reasonable # person might do. if last_non_ws_char != "'" or comment_depth == 0: # Once again, only advance our depth if [ has not been # escaped inside our comment. comment_depth += 1 if comment_depth > 0: # Same as above, but in reverse if character == "]" and last_non_ws_char != "'": comment_depth -= 1 last_non_ws_char = character continue # We are not in a comment block if we are below here. # If we are inside of an escaped string literal, then ( ) , ; are # meaningless to the structure. # Otherwise, we are ready to submit our metadata token. if not_escaped and character in structure_tokens: label_start = False metadata = "".join(metadata_buffer) # If the following condition is True, then we must have just # closed a literal. We know this because last_non_ws_char is # either None or the last non-whitespace character. # last_non_ws_char is None when we have just escaped an escape # and at the first iteration. if last_non_ws_char == "'" or not convert_underscores: # Make no modifications. yield metadata elif metadata: # Underscores are considered to be spaces when not in an # escaped literal string. yield metadata.replace("_", " ") # Clear our buffer for the next metadata token and yield our # current structure token. metadata_buffer = [] yield character # We will now handle escaped string literals. # They are inconvenient because any character inside of them is # valid, especially whitespace. # We also need to allow ' to be escaped by '. e.g. '' -> ' elif character == "'": not_escaped = not not_escaped label_start = True if last_non_ws_char == "'": # We are escaping our escape, so it should be added to our # metadata_buffer which will represent some future token. metadata_buffer.append(character) # We do not want a running chain of overcounts, so we need # to clear the last character and continue iteration from # the top. Without this, the following would happen: # ''' ' -> '' # What we want is: # ''' ' -> ' last_non_ws_char = "" last_char = "" continue elif not character.isspace() or not not_escaped: if label_start and last_char.isspace() and not_escaped: raise NewickFormatError( "Newick files cannot have" " unescaped whitespace in their" " labels." ) metadata_buffer.append(character) label_start = True # This is equivalent to an `else` however it prevents coverage from # mis-identifying the `continue` as uncalled because cpython will # optimize it to a jump that is slightly different from the normal # jump it would have done anyways. elif True: # Skip the last statement last_char = character continue last_char = character # This line is skipped in the following cases: # * comment_depth > 0, i.e. we are in a comment. # * We have just processed the sequence '' and we don't want # the sequence ''' to result in ''. # * We have encountered whitespace that is not properly escaped. last_non_ws_char = character scikit-bio-0.6.2/skbio/io/format/ordination.py000066400000000000000000000340751464262511300213170ustar00rootroot00000000000000r"""Ordination results format (:mod:`skbio.io.format.ordination`) ============================================================= .. currentmodule:: skbio.io.format.ordination The ordination results file format (``ordination``) stores the results of an ordination method in a human-readable, text-based format. The format supports storing the results of various ordination methods available in scikit-bio, including (but not necessarily limited to) PCoA, CA, RDA, and CCA. Format Support -------------- **Has Sniffer: Yes** +------+------+---------------------------------------------------------------+ |Reader|Writer| Object Class | +======+======+===============================================================+ |Yes |Yes |:mod:`skbio.stats.ordination.OrdinationResults` | +------+------+---------------------------------------------------------------+ Format Specification -------------------- The format is text-based, consisting of six attributes that describe the ordination results: - ``Eigvals``: 1-D - ``Proportion explained``: 1-D - ``Species``: 2-D - ``Site``: 2-D - ``Biplot``: 2-D - ``Site constraints``: 2-D The attributes in the file *must* be in this order. Each attribute is defined in its own section of the file, where sections are separated by a blank (or whitespace-only) line. Each attribute begins with a header line, which contains the attribute's name (as listed above), followed by a tab character, followed by one or more tab-separated dimensions (integers) that describe the shape of the attribute's data. The attribute's data follows its header line, and is stored in tab-separated format. ``Species``, ``Site``, and ``Site constraints`` store species and site IDs, respectively, as the first column, followed by the 2-D data array. An example of this file format might look like:: Eigvals4 0.360.180.070.08 Proportion explained4 0.460.230.100.10 Species94 Species00.110.28-0.20-0.00 Species10.140.300.39-0.14 Species2-1.010.09-0.19-0.10 Species3-1.030.100.220.22 Species41.050.53-0.430.22 Species50.990.570.67-0.38 Species60.25-0.17-0.200.43 Species70.14-0.85-0.010.05 Species80.41-0.700.21-0.69 Site104 Site00.71-3.080.21-1.24 Site10.58-3.00-0.942.69 Site20.76-3.152.13-3.11 Site31.111.07-1.870.66 Site4-0.97-0.06-0.69-0.61 Site51.040.45-0.630.28 Site6-0.95-0.080.13-0.42 Site70.94-0.100.52-0.00 Site8-1.140.490.471.17 Site91.031.032.74-1.28 Biplot33 -0.160.630.76 -0.990.06-0.04 0.18-0.970.03 Site constraints104 Site00.69-3.08-0.32-1.24 Site10.66-3.060.232.69 Site20.63-3.040.78-3.11 Site31.100.50-1.550.66 Site4-0.970.06-1.12-0.61 Site51.050.53-0.430.28 Site6-1.020.10-0.00-0.42 Site70.990.570.67-0.00 Site8-1.080.131.111.17 Site90.940.611.79-1.28 If a given result attribute is not present (e.g. ``Biplot``), it should still be defined and declare its dimensions as 0. For example:: Biplot00 All attributes are optional except for ``Eigvals``. Examples -------- Assume we have the following tab-delimited text file storing the ordination results in ``ordination`` format:: Eigvals4 0.360.180.070.08 Proportion explained4 0.460.230.100.10 Species94 Species00.110.28-0.20-0.00 Species10.140.300.39-0.14 Species2-1.010.09-0.19-0.10 Species3-1.030.100.220.22 Species41.050.53-0.430.22 Species50.990.570.67-0.38 Species60.25-0.17-0.200.43 Species70.14-0.85-0.010.05 Species80.41-0.700.21-0.69 Site104 Site00.71-3.080.21-1.24 Site10.58-3.00-0.942.69 Site20.76-3.152.13-3.11 Site31.111.07-1.870.66 Site4-0.97-0.06-0.69-0.61 Site51.040.45-0.630.28 Site6-0.95-0.080.13-0.42 Site70.94-0.100.52-0.00 Site8-1.140.490.471.17 Site91.031.032.74-1.28 Biplot00 Site constraints00 Load the ordination results from the file: >>> from io import StringIO >>> from skbio import OrdinationResults >>> or_f = StringIO( ... "Eigvals\t4\n" ... "0.36\t0.18\t0.07\t0.08\n" ... "\n" ... "Proportion explained\t4\n" ... "0.46\t0.23\t0.10\t0.10\n" ... "\n" ... "Species\t9\t4\n" ... "Species0\t0.11\t0.28\t-0.20\t-0.00\n" ... "Species1\t0.14\t0.30\t0.39\t-0.14\n" ... "Species2\t-1.01\t0.09\t-0.19\t-0.10\n" ... "Species3\t-1.03\t0.10\t0.22\t0.22\n" ... "Species4\t1.05\t0.53\t-0.43\t0.22\n" ... "Species5\t0.99\t0.57\t0.67\t-0.38\n" ... "Species6\t0.25\t-0.17\t-0.20\t0.43\n" ... "Species7\t0.14\t-0.85\t-0.01\t0.05\n" ... "Species8\t0.41\t-0.70\t0.21\t-0.69\n" ... "\n" ... "Site\t10\t4\n" ... "Site0\t0.71\t-3.08\t0.21\t-1.24\n" ... "Site1\t0.58\t-3.00\t-0.94\t2.69\n" ... "Site2\t0.76\t-3.15\t2.13\t-3.11\n" ... "Site3\t1.11\t1.07\t-1.87\t0.66\n" ... "Site4\t-0.97\t-0.06\t-0.69\t-0.61\n" ... "Site5\t1.04\t0.45\t-0.63\t0.28\n" ... "Site6\t-0.95\t-0.08\t0.13\t-0.42\n" ... "Site7\t0.94\t-0.10\t0.52\t-0.00\n" ... "Site8\t-1.14\t0.49\t0.47\t1.17\n" ... "Site9\t1.03\t1.03\t2.74\t-1.28\n" ... "\n" ... "Biplot\t0\t0\n" ... "\n" ... "Site constraints\t0\t0\n") >>> ord_res = OrdinationResults.read(or_f) """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np import pandas as pd from skbio.stats.ordination import OrdinationResults from skbio.io import create_format, OrdinationFormatError ordination = create_format("ordination") @ordination.sniffer() def _ordination_sniffer(fh): # Smells an ordination file if *all* of the following lines are present # *from the beginning* of the file: # - eigvals header (minimally parsed) # - another line (contents ignored) # - a whitespace-only line # - proportion explained header (minimally parsed) try: _parse_header(fh, "Eigvals", 1) next_line = next(fh, None) if next_line is not None: _check_empty_line(fh) _parse_header(fh, "Proportion explained", 1) return True, {} except OrdinationFormatError: pass return False, {} @ordination.reader(OrdinationResults) def _ordination_to_ordination_results(fh): eigvals = _parse_vector_section(fh, "Eigvals") if eigvals is None: raise OrdinationFormatError("At least one eigval must be present.") _check_empty_line(fh) prop_expl = _parse_vector_section(fh, "Proportion explained") _check_length_against_eigvals(prop_expl, eigvals, "proportion explained values") _check_empty_line(fh) species = _parse_array_section(fh, "Species") _check_length_against_eigvals(species, eigvals, "coordinates per species") _check_empty_line(fh) site = _parse_array_section(fh, "Site") _check_length_against_eigvals(site, eigvals, "coordinates per site") _check_empty_line(fh) # biplot does not have ids to parse (the other arrays do) biplot = _parse_array_section(fh, "Biplot", has_ids=False) _check_empty_line(fh) cons = _parse_array_section(fh, "Site constraints") if cons is not None and site is not None: if not np.array_equal(cons.index, site.index): raise OrdinationFormatError( "Site constraints ids and site ids must be equal: %s != %s" % (cons.index, site.index) ) return OrdinationResults( short_method_name="", long_method_name="", eigvals=eigvals, features=species, samples=site, biplot_scores=biplot, sample_constraints=cons, proportion_explained=prop_expl, ) def _parse_header(fh, header_id, num_dimensions): line = next(fh, None) if line is None: raise OrdinationFormatError( "Reached end of file while looking for %s header." % header_id ) header = line.strip().split("\t") # +1 for the header ID if len(header) != num_dimensions + 1 or header[0] != header_id: raise OrdinationFormatError("%s header not found." % header_id) return header def _check_empty_line(fh): """Check that the next line in `fh` is empty or whitespace-only.""" line = next(fh, None) if line is None: raise OrdinationFormatError( "Reached end of file while looking for blank line separating " "sections." ) if line.strip(): raise OrdinationFormatError("Expected an empty line.") def _check_length_against_eigvals(data, eigvals, label): if data is not None: num_vals = data.shape[-1] num_eigvals = eigvals.shape[-1] if num_vals != num_eigvals: raise OrdinationFormatError( "There should be as many %s as eigvals: %d != %d" % (label, num_vals, num_eigvals) ) def _parse_vector_section(fh, header_id): header = _parse_header(fh, header_id, 1) # Parse how many values we are waiting for num_vals = int(header[1]) if num_vals == 0: # The ordination method didn't generate the vector, so set it to None vals = None else: # Parse the line with the vector values line = next(fh, None) if line is None: raise OrdinationFormatError( "Reached end of file while looking for line containing values " "for %s section." % header_id ) vals = pd.Series(np.asarray(line.strip().split("\t"), dtype=np.float64)) if len(vals) != num_vals: raise OrdinationFormatError( "Expected %d values in %s section, but found %d." % (num_vals, header_id, len(vals)) ) return vals def _parse_array_section(fh, header_id, has_ids=True): """Parse an array section of `fh` identified by `header_id`.""" # Parse the array header header = _parse_header(fh, header_id, 2) # Parse the dimensions of the array rows = int(header[1]) cols = int(header[2]) ids = None if rows == 0 and cols == 0: # The ordination method didn't generate the array data for 'header', so # set it to None data = None elif rows == 0 or cols == 0: # Both dimensions should be 0 or none of them are zero raise OrdinationFormatError( "One dimension of %s is 0: %d x %d" % (header_id, rows, cols) ) else: # Parse the data data = np.empty((rows, cols), dtype=np.float64) if has_ids: ids = [] for i in range(rows): # Parse the next row of data line = next(fh, None) if line is None: raise OrdinationFormatError( "Reached end of file while looking for row %d in %s " "section." % (i + 1, header_id) ) vals = line.strip().split("\t") if has_ids: ids.append(vals[0]) vals = vals[1:] if len(vals) != cols: raise OrdinationFormatError( "Expected %d values, but found %d in row %d." % (cols, len(vals), i + 1) ) data[i, :] = np.asarray(vals, dtype=np.float64) data = pd.DataFrame(data, index=ids) return data @ordination.writer(OrdinationResults) def _ordination_results_to_ordination(obj, fh): _write_vector_section(fh, "Eigvals", obj.eigvals) _write_vector_section(fh, "Proportion explained", obj.proportion_explained) _write_array_section(fh, "Species", obj.features) _write_array_section(fh, "Site", obj.samples) _write_array_section(fh, "Biplot", obj.biplot_scores, has_ids=False) _write_array_section( fh, "Site constraints", obj.sample_constraints, include_section_separator=False ) def _write_vector_section(fh, header_id, vector): if vector is None: shape = 0 else: shape = vector.shape[0] fh.write("%s\t%d\n" % (header_id, shape)) if vector is not None: fh.write(_format_vector(vector.values)) fh.write("\n") def _write_array_section( fh, header_id, data, has_ids=True, include_section_separator=True ): # write section header if data is None: shape = (0, 0) else: shape = data.shape fh.write("%s\t%d\t%d\n" % (header_id, shape[0], shape[1])) # write section data if data is not None: if not has_ids: for vals in data.values: fh.write(_format_vector(vals)) else: for id_, vals in zip(data.index, data.values): fh.write(_format_vector(vals, id_)) if include_section_separator: fh.write("\n") def _format_vector(vector, id_=None): formatted_vector = "\t".join(np.asarray(vector, dtype=str)) if id_ is None: return "%s\n" % formatted_vector else: return "%s\t%s\n" % (id_, formatted_vector) scikit-bio-0.6.2/skbio/io/format/phylip.py000066400000000000000000000336521464262511300204560ustar00rootroot00000000000000"""PHYLIP multiple sequence alignment format (:mod:`skbio.io.format.phylip`) ========================================================================= .. currentmodule:: skbio.io.format.phylip The PHYLIP file format stores a multiple sequence alignment. The format was originally defined and used in Joe Felsenstein's PHYLIP package [1]_, and has since been supported by several other bioinformatics tools (e.g., RAxML [2]_). See [3]_ for the original format description, and [4]_ and [5]_ for additional descriptions. An example PHYLIP-formatted file taken from [3]_:: 5 42 Turkey AAGCTNGGGC ATTTCAGGGT GAGCCCGGGC AATACAGGGT AT Salmo gairAAGCCTTGGC AGTGCAGGGT GAGCCGTGGC CGGGCACGGT AT H. SapiensACCGGTTGGC CGTTCAGGGT ACAGGTTGGC CGTTCAGGGT AA Chimp AAACCCTTGC CGTTACGCTT AAACCGAGGC CGGGACACTC AT Gorilla AAACCCTTGC CGGTACGCTT AAACCATTGC CGGTACGCTT AA .. note:: Original copyright notice for the above PHYLIP file: *(c) Copyright 1986-2008 by The University of Washington. Written by Joseph Felsenstein. Permission is granted to copy this document provided that no fee is charged for it and that this copyright notice is not removed.* Format Support -------------- **Has Sniffer: Yes** +------+------+---------------------------------------------------------------+ |Reader|Writer| Object Class | +======+======+===============================================================+ |Yes |Yes |:mod:`skbio.alignment.TabularMSA` | +------+------+---------------------------------------------------------------+ Format Specification -------------------- PHYLIP format is a plain text format containing exactly two sections: a header describing the dimensions of the alignment, followed by the multiple sequence alignment itself. The format described here is "strict" PHYLIP, as described in [4]_. Strict PHYLIP requires that each sequence identifier is exactly 10 characters long (padded with spaces as necessary). Other bioinformatics tools (e.g., RAxML) may relax this rule to allow for longer sequence identifiers. See the **Alignment Section** below for more details. The format described here is "sequential" format. The original PHYLIP format specification [3]_ describes both sequential and interleaved formats. .. note:: scikit-bio currently supports reading and writing strict, sequential PHYLIP-formatted files. Relaxed and/or interleaved PHYLIP formats are not supported. Header Section ^^^^^^^^^^^^^^ The header consists of a single line describing the dimensions of the alignment. It **must** be the first line in the file. The header consists of optional spaces, followed by two positive integers (``n`` and ``m``) separated by one or more spaces. The first integer (``n``) specifies the number of sequences (i.e., the number of rows) in the alignment. The second integer (``m``) specifies the length of the sequences (i.e., the number of columns) in the alignment. The smallest supported alignment dimensions are 1x1. .. note:: scikit-bio will write the PHYLIP format header *without* preceding spaces, and with only a single space between ``n`` and ``m``. PHYLIP format *does not* support blank line(s) between the header and the alignment. Alignment Section ^^^^^^^^^^^^^^^^^ The alignment section immediately follows the header. It consists of ``n`` lines (rows), one for each sequence in the alignment. Each row consists of a sequence identifier (ID) and characters in the sequence, in fixed width format. The sequence ID can be up to 10 characters long. IDs less than 10 characters must have spaces appended to them to reach the 10 character fixed width. Within an ID, all characters except newlines are supported, including spaces, underscores, and numbers. .. note:: When reading a PHYLIP-formatted file into an ``skbio.alignment.TabularMSA`` object, sequence identifiers/labels are stored as ``TabularMSA`` index labels (``index`` property). When writing an ``skbio.alignment.TabularMSA`` object as a PHYLIP-formatted file, ``TabularMSA`` index labels will be converted to strings and written as sequence identifiers/labels. scikit-bio supports the empty string (``''``) as a valid sequence ID. An empty ID will be padded with 10 spaces when writing. Sequence characters immediately follow the sequence ID. They *must* start at the 11th character in the line, as the first 10 characters are reserved for the sequence ID. While PHYLIP format does not explicitly restrict the set of supported characters that may be used to represent a sequence, the original format description [3]_ specifies the IUPAC nucleic acid lexicon for DNA or RNA sequences, and the IUPAC protein lexicon for protein sequences. The original PHYLIP specification uses ``-`` as a gap character, though older versions also supported ``.``. The sequence characters may contain optional spaces (e.g., to improve readability), and both upper and lower case characters are supported. .. note:: scikit-bio will read/write a PHYLIP-formatted file as long as the alignment's sequence characters are valid for the type of in-memory sequence object being read into or written from. This differs from the PHYLIP specification, which states that a PHYLIP-formatted file can only contain valid IUPAC characters. See the ``constructor`` format parameter below for details. Since scikit-bio supports both ``-`` and ``.`` as gap characters (e.g., in ``DNA``, ``RNA``, and ``Protein`` sequence objects), both are supported when reading/writing a PHYLIP-formatted file. When writing a PHYLIP-formatted file, scikit-bio will split up each sequence into chunks that are 10 characters long. Each chunk will be separated by a single space. The sequence will always appear on a single line (sequential format). It will *not* be wrapped across multiple lines. Sequences are chunked in this manner for improved readability, and because most example PHYLIP files are chunked in a similar way (e.g., see the example file above). Note that this chunking is not required when reading PHYLIP-formatted files, nor by the PHYLIP format specification itself. Format Parameters ----------------- The only supported format parameter is ``constructor``, which specifies the type of in-memory sequence object to read each aligned sequence into. This must be a subclass of ``GrammaredSequence`` (e.g., ``DNA``, ``RNA``, ``Protein``) and is a required format parameter. For example, if you know that the PHYLIP file you're reading contains DNA sequences, you would pass ``constructor=DNA`` to the reader call. Examples -------- Let's create a ``TabularMSA`` with three DNA sequences: >>> from skbio import TabularMSA, DNA >>> seqs = [DNA('ACCGTTGTA-GTAGCT', metadata={'id':'seq1'}), ... DNA('A--GTCGAA-GTACCT', metadata={'id':'sequence-2'}), ... DNA('AGAGTTGAAGGTATCT', metadata={'id':'3'})] >>> msa = TabularMSA(seqs, minter='id') >>> msa TabularMSA[DNA] ---------------------- Stats: sequence count: 3 position count: 16 ---------------------- ACCGTTGTA-GTAGCT A--GTCGAA-GTACCT AGAGTTGAAGGTATCT >>> msa.index Index(['seq1', 'sequence-2', '3'], dtype='object') Now let's write the ``TabularMSA`` to file in PHYLIP format and take a look at the output: >>> from io import StringIO >>> fh = StringIO() >>> print(msa.write(fh, format='phylip').getvalue()) 3 16 seq1 ACCGTTGTA- GTAGCT sequence-2A--GTCGAA- GTACCT 3 AGAGTTGAAG GTATCT >>> fh.close() Notice that the 16-character sequences were split into two chunks, and that each sequence appears on a single line (sequential format). Also note that each sequence ID is padded with spaces to 10 characters in order to produce a fixed width column. If the index labels in a ``TabularMSA`` surpass the 10-character limit, an error will be raised when writing: >>> msa.index = ['seq1', 'long-sequence-2', 'seq3'] >>> fh = StringIO() >>> msa.write(fh, format='phylip') Traceback (most recent call last): ... skbio.io._exception.PhylipFormatError: ``TabularMSA`` can only be written in \ PHYLIP format if all sequence index labels have 10 or fewer characters. Found \ sequence with index label 'long-sequence-2' that exceeds this limit. Use \ ``TabularMSA.reassign_index`` to assign shorter index labels. >>> fh.close() One way to work around this is to assign shorter index labels. The recommended way to do this is via ``TabularMSA.reassign_index``. For example, to reassign default integer index labels: >>> msa.reassign_index() >>> msa.index RangeIndex(start=0, stop=3, step=1) We can now write the ``TabularMSA`` in PHYLIP format: >>> fh = StringIO() >>> print(msa.write(fh, format='phylip').getvalue()) 3 16 0 ACCGTTGTA- GTAGCT 1 A--GTCGAA- GTACCT 2 AGAGTTGAAG GTATCT >>> fh.close() References ---------- .. [1] http://evolution.genetics.washington.edu/phylip.html .. [2] RAxML Version 8: A tool for Phylogenetic Analysis and Post-Analysis of Large Phylogenies". In Bioinformatics, 2014 .. [3] http://evolution.genetics.washington.edu/phylip/doc/sequence.html .. [4] http://www.phylo.org/tools/obsolete/phylip.html .. [5] http://www.bioperl.org/wiki/PHYLIP_multiple_alignment_format """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from skbio.alignment import TabularMSA from skbio.io import create_format, PhylipFormatError from skbio.util._misc import chunk_str phylip = create_format("phylip") @phylip.sniffer() def _phylip_sniffer(fh): # Strategy: # Read the header and a single sequence; verify that the sequence length # matches the header information. Do not verify that the total number of # lines matches the header information, since that would require reading # the whole file. try: header = next(_line_generator(fh)) _, seq_len = _validate_header(header) line = next(_line_generator(fh)) _validate_line(line, seq_len) except (StopIteration, PhylipFormatError): return False, {} return True, {} @phylip.reader(TabularMSA) def _phylip_to_tabular_msa(fh, constructor=None): if constructor is None: raise ValueError("Must provide `constructor`.") seqs = [] index = [] for seq, ID in _parse_phylip_raw(fh): seqs.append(constructor(seq)) index.append(ID) return TabularMSA(seqs, index=index) @phylip.writer(TabularMSA) def _tabular_msa_to_phylip(obj, fh): sequence_count = obj.shape.sequence if sequence_count < 1: raise PhylipFormatError( "TabularMSA can only be written in PHYLIP format if there is at " "least one sequence in the alignment." ) sequence_length = obj.shape.position if sequence_length < 1: raise PhylipFormatError( "TabularMSA can only be written in PHYLIP format if there is at " "least one position in the alignment." ) chunk_size = 10 labels = [str(label) for label in obj.index] for label in labels: if len(label) > chunk_size: raise PhylipFormatError( "``TabularMSA`` can only be written in PHYLIP format if all " "sequence index labels have %d or fewer characters. Found " "sequence with index label '%s' that exceeds this limit. Use " "``TabularMSA.reassign_index`` to assign shorter index labels." % (chunk_size, label) ) fh.write("{0:d} {1:d}\n".format(sequence_count, sequence_length)) fmt = "{0:%d}{1}\n" % chunk_size for label, seq in zip(labels, obj): chunked_seq = chunk_str(str(seq), chunk_size, " ") fh.write(fmt.format(label, chunked_seq)) def _validate_header(header): header_vals = header.split() try: n_seqs, seq_len = [int(x) for x in header_vals] if n_seqs < 1 or seq_len < 1: raise PhylipFormatError( "The number of sequences and the length must be positive." ) except ValueError: raise PhylipFormatError( "Found non-header line when attempting to read the 1st record " "(header line should have two space-separated integers): " '"%s"' % header ) return n_seqs, seq_len def _validate_line(line, seq_len): if not line: raise PhylipFormatError("Empty lines are not allowed.") ID = line[:10].strip() seq = line[10:].replace(" ", "") if len(seq) != seq_len: raise PhylipFormatError( "The length of sequence %s is not %s as specified in the header." % (ID, seq_len) ) return (seq, ID) def _parse_phylip_raw(fh): """Raw parser for PHYLIP files. Returns a list of raw (seq, id) values. It is the responsibility of the caller to construct the correct in-memory object to hold the data. """ # Note: this returns the full data instead of yielding each sequence, # because the header specifies the number of sequences, so the file cannot # be validated until it's read completely. # File should have a single header on the first line. try: header = next(_line_generator(fh)) except StopIteration: raise PhylipFormatError("This file is empty.") n_seqs, seq_len = _validate_header(header) # All following lines should be ID+sequence. No blank lines are allowed. data = [] for line in _line_generator(fh): data.append(_validate_line(line, seq_len)) if len(data) != n_seqs: raise PhylipFormatError( "The number of sequences is not %s " % n_seqs + "as specified in the header." ) return data def _line_generator(fh): """Just remove linebreak characters and yield lines.""" for line in fh: yield line.rstrip("\n") scikit-bio-0.6.2/skbio/io/format/qseq.py000066400000000000000000000240261464262511300201150ustar00rootroot00000000000000r"""QSeq format (:mod:`skbio.io.format.qseq`) ========================================= .. currentmodule:: skbio.io.format.qseq The QSeq format (`qseq`) is a record-based, plain text output format produced by some DNA sequencers for storing biological sequence data, quality scores, per-sequence filtering information, and run-specific metadata. Format Support -------------- **Has Sniffer: Yes** +------+------+---------------------------------------------------------------+ |Reader|Writer| Object Class | +======+======+===============================================================+ |Yes |No |generator of :mod:`skbio.sequence.Sequence` objects | +------+------+---------------------------------------------------------------+ |Yes |No |:mod:`skbio.sequence.Sequence` | +------+------+---------------------------------------------------------------+ |Yes |No |:mod:`skbio.sequence.DNA` | +------+------+---------------------------------------------------------------+ |Yes |No |:mod:`skbio.sequence.RNA` | +------+------+---------------------------------------------------------------+ |Yes |No |:mod:`skbio.sequence.Protein` | +------+------+---------------------------------------------------------------+ Format Specification -------------------- A QSeq file is composed of single-line records, delimited by tabs. There are 11 fields in a record: - Machine name - Run number - Lane number (positive int) - Tile number (positive int) - X coordinate (integer) - Y coordinate (integer) - Index - Read number (1-3) - Sequence data (typically IUPAC characters) - Quality scores (quality scores encoded as printable ASCII) - Filter boolean (1 if sequence has passed CASAVA's filter, 0 otherwise) For more details please refer to the CASAVA documentation [1]_. .. note:: When a QSeq file is read into a scikit-bio object, the object's `metadata` attribute is automatically populated with data corresponding to the names above. .. note:: `lowercase` functionality is supported when reading QSeq files. Refer to specific object constructor documentation for details. .. note:: scikit-bio allows for the filter field to be ommitted, but it is not clear if this is part of the original format specification. Format Parameters ----------------- The following parameters are the same as in FASTQ format (:mod:`skbio.io.format.fastq`): - ``variant``: see ``variant`` parameter in FASTQ format - ``phred_offset``: see ``phred_offset`` parameter in FASTQ format The following additional parameters are the same as in FASTA format (:mod:`skbio.io.format.fasta`): - ``constructor``: see ``constructor`` parameter in FASTA format - ``seq_num``: see ``seq_num`` parameter in FASTA format Generators Only ^^^^^^^^^^^^^^^ - ``filter``: If `True`, excludes sequences that did not pass filtering (i.e., filter field is 0). Default is `True`. Examples -------- Suppose we have the following QSeq file:: illumina 1 3 34 -30 30 0 1 ACG....ACGTAC ruBBBBrBCEFGH 1 illumina 1 3 34 30 -30 0 1 CGGGCATTGCA CGGGCasdGCA 0 illumina 1 3 35 -30 30 0 2 ACGTA.AATAAAC geTaAafhwqAAf 1 illumina 1 3 35 30 -30 0 3 CATTTAGGA.TGCA tjflkAFnkKghvM 0 Let's define this file in-memory as a ``StringIO``, though this could be a real file path, file handle, or anything that's supported by scikit-bio's I/O registry in practice: >>> from io import StringIO >>> fs = '\n'.join([ ... 'illumina\t1\t3\t34\t-30\t30\t0\t1\tACG....ACGTAC\truBBBBrBCEFGH\t1', ... 'illumina\t1\t3\t34\t30\t-30\t0\t1\tCGGGCATTGCA\tCGGGCasdGCA\t0', ... 'illumina\t1\t3\t35\t-30\t30\t0\t2\tACGTA.AATAAAC\tgeTaAafhwqAAf\t1', ... 'illumina\t1\t3\t35\t30\t-30\t0\t3\tCATTTAGGA.TGCA\ttjflkAFnkKghvM\t0' ... ]) >>> fh = StringIO(fs) To iterate over the sequences using the generator reader, we run: >>> import skbio.io >>> for seq in skbio.io.read(fh, format='qseq', variant='illumina1.3'): ... seq ... print('') Sequence -------------------------------------- Metadata: 'id': 'illumina_1:3:34:-30:30#0/1' 'index': 0 'lane_number': 3 'machine_name': 'illumina' 'read_number': 1 'run_number': 1 'tile_number': 34 'x': -30 'y': 30 Positional metadata: 'quality': Stats: length: 13 -------------------------------------- 0 ACG....ACG TAC Sequence -------------------------------------- Metadata: 'id': 'illumina_1:3:35:-30:30#0/2' 'index': 0 'lane_number': 3 'machine_name': 'illumina' 'read_number': 2 'run_number': 1 'tile_number': 35 'x': -30 'y': 30 Positional metadata: 'quality': Stats: length: 13 -------------------------------------- 0 ACGTA.AATA AAC Note that only two sequences were loaded because the QSeq reader filters out sequences whose filter field is 0 (unless ``filter=False`` is supplied). References ---------- .. [1] http://biowulf.nih.gov/apps/CASAVA_UG_15011196B.pdf """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from skbio.io import create_format, QSeqFormatError from skbio.io.format._base import _decode_qual_to_phred, _get_nth_sequence from skbio.sequence import Sequence, DNA, RNA, Protein _default_phred_offset = None _default_variant = None _will_filter = True qseq = create_format("qseq") @qseq.sniffer() def _qseq_sniffer(fh): empty = True try: for _, line in zip(range(10), fh): _record_parser(line) empty = False return not empty, {} except QSeqFormatError: return False, {} @qseq.reader(None) def _qseq_to_generator( fh, constructor=Sequence, filter=_will_filter, phred_offset=_default_phred_offset, variant=_default_variant, **kwargs, ): for line in fh: ( machine_name, run, lane, tile, x, y, index, read, seq, raw_qual, filtered, ) = _record_parser(line) if not filter or not filtered: phred = _decode_qual_to_phred(raw_qual, variant, phred_offset) seq_id = "%s_%s:%s:%s:%s:%s#%s/%s" % ( machine_name, run, lane, tile, x, y, index, read, ) yield constructor( seq, metadata={ "id": seq_id, "machine_name": machine_name, "run_number": int(run), "lane_number": int(lane), "tile_number": int(tile), "x": int(x), "y": int(y), "index": int(index), "read_number": int(read), }, positional_metadata={"quality": phred}, **kwargs, ) @qseq.reader(Sequence) def _qseq_to_sequence( fh, seq_num=1, phred_offset=_default_phred_offset, variant=_default_variant, **kwargs, ): return _get_nth_sequence( _qseq_to_generator( fh, filter=False, phred_offset=phred_offset, variant=variant, constructor=Sequence, **kwargs, ), seq_num, ) @qseq.reader(DNA) def _qseq_to_dna( fh, seq_num=1, phred_offset=_default_phred_offset, variant=_default_variant, **kwargs, ): return _get_nth_sequence( _qseq_to_generator( fh, filter=False, phred_offset=phred_offset, variant=variant, constructor=DNA, **kwargs, ), seq_num, ) @qseq.reader(RNA) def _qseq_to_rna( fh, seq_num=1, phred_offset=_default_phred_offset, variant=_default_variant, **kwargs, ): return _get_nth_sequence( _qseq_to_generator( fh, filter=False, phred_offset=phred_offset, variant=variant, constructor=RNA, **kwargs, ), seq_num, ) @qseq.reader(Protein) def _qseq_to_protein( fh, seq_num=1, phred_offset=_default_phred_offset, variant=_default_variant, **kwargs, ): return _get_nth_sequence( _qseq_to_generator( fh, filter=False, phred_offset=phred_offset, variant=variant, constructor=Protein, **kwargs, ), seq_num, ) def _record_parser(line): fields = line.rstrip("\n") if fields: fields = fields.split("\t") else: raise QSeqFormatError("Found blank line.") f_len = len(fields) if not (10 <= f_len <= 11): raise QSeqFormatError("Expected 10 or 11 fields, found %d." % f_len) # If the filter field was ommitted, assume that it passed filtering: if f_len == 10: fields.append("1") (machine, run, lane, tile, x, y, index, read, seq, raw_qaul, filter) = fields _test_fields([("filter", filter)], lambda x: x in "01", "0 or 1") _test_fields([("read", read)], lambda x: x in "123", "in the range [1, 3]") _test_fields([("x", x), ("y", y)], lambda x: int(x) is not None, "an integer") _test_fields( [("lane", lane), ("tile", tile)], lambda x: int(x) >= 0, "a positive integer" ) return (machine, run, lane, tile, x, y, index, read, seq, raw_qaul, filter == "0") def _test_fields(iterkv, test, efrag): try: for k, v in iterkv: if not test(v): raise ValueError() except ValueError: raise QSeqFormatError("Field %r is not %s." % (k, efrag)) scikit-bio-0.6.2/skbio/io/format/sample_metadata.py000066400000000000000000000332251464262511300222660ustar00rootroot00000000000000"""Sample Metadata object ported over from qiime2. =============================================== .. currentmodule:: skbio.io.format.sample_metadata This implements the Sample_Metadata format which is identical to the Metadata format implemented in qiime2. (see: https://docs.qiime2.org/2024.2/tutorials/metadata/) An example sample_metadata file: .. code-block:: none id col1 col2 col3 #q2:types categorical categorical categorical id1 1 a foo id2 2 b bar id3 3 c 42 Format Support -------------- **Has Sniffer: Yes** +------+------+---------------------------------------------------------------+ |Reader|Writer| Object Class | +======+======+===============================================================+ |Yes |Yes |:mod:`skbio.metadata.SampleMetadata` | +------+------+---------------------------------------------------------------+ Format Specification -------------------- Metadata Formatting Requirements ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ QIIME 2 metadata is most commonly stored in a TSV (i.e. tab-separated values) file. These files typically have a .tsv or .txt file extension, though it doesn't matter to QIIME 2 what file extension is used. TSV files are simple text files used to store tabular data, and the format is supported by many types of software, such as editing, importing, and exporting from spreadsheet programs and databases. Thus, it's usually straightforward to manipulate QIIME 2 metadata using the software of your choosing. If in doubt, we recommend using a spreadsheet program such as Microsoft Excel or Google Sheets to edit and export your metadata files. The following sections describe formatting requirements for QIIME 2 metadata files, and how to validate your metadata files. Since there is no universal standard for TSV files, it is important to adhere to these requirements and understand how QIIME 2 will interpret the file's contents to get the most out of your (meta)data! Metadata Validation ^^^^^^^^^^^^^^^^^^^ Sample and feature metadata files stored in Google Sheets can be validated using Keemei. Select Add-ons > Keemei > Validate QIIME 2 metadata file to validate metadata stored in Google Sheets. QIIME 2 will also automatically validate a metadata file anytime it is used by the software. However, using Keemei to validate your metadata is recommended because a report of all validation errors and warnings will be presented each time Keemei is run. Loading your metadata in QIIME 2 will typically present only a single error at a time, which can make identifying and resolving validation issues cumbersome, especially if there are many issues with the metadata. Leading and trailing whitespace characters ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ If any cell in the metadata contains leading or trailing whitespace characters (e.g. spaces, tabs), those characters will be ignored when the file is loaded. Thus, leading and trailing whitespace characters are not significant, so cells containing the values 'gut' and ' gut ' are equivalent. This rule is applied before any other rules described below. Comments and Empty Rows ^^^^^^^^^^^^^^^^^^^^^^^ Rows whose first cell begins with the pound sign (#) are interpreted as comments and may appear anywhere in the file. Comment rows are ignored by QIIME 2 and are for informational purposes only. Inline comments are not supported. Empty rows (e.g. blank lines or rows consisting solely of empty cells) may appear anywhere in the file and are ignored. Identifier Column ^^^^^^^^^^^^^^^^^ The first column in the metadata file is the identifier (ID) column. This column defines the sample or feature IDs associated with your study. It is not recommended to mix sample and feature IDs in a single metadata file; keep sample and feature metadata stored in separate files. The ID column name (i.e. ID header) must be one of the following values. The values listed below may not be used to name other IDs or columns in the file. Case-insensitive: - id - sampleid - sample id - sample-id - fetureid - feature id - feature-id Case-sensitive (these are mostly for backwards-compatibility with QIIME 1, biom-format, and Qiita files): - #SampleID - #Sample ID - #OTUID - #OTU ID - sample_name The following rules apply to IDs: - IDs may consist of any Unicode characters, with the exception that IDs must notstart with the pound sign (#), as those rows would be interpreted as comments and ignored. See the section Recommendations for Identifiers for recommendations on choosing identifiers in your study. - IDs cannot be empty (i.e. they must consist of at least one character). - IDs must be unique (exact string matching is performed to detect duplicates). - At least one ID must be present in the file. - IDs cannot use any of the reserved ID column names listed above. Recommendations for Identifiers ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Our goal with QIIME 2 is to support arbitrary Unicode characters in all cells of metadata files. However, given that QIIME 2 plugins and interfaces can be developed by anyone, we can’t make a guarantee that arbitrary Unicode characters will work with all plugins and interfaces. We can therefore make recommendations to users about characters that should be safe to use in identifiers, and we are preparing resources for plugin and interface developers to help them make their software as robust as possible. As developer resources become available, we will announce them in the Developer Discussion category on the QIIME 2 Forum. Sample and feature identifiers with problematic characters tend to cause the most issues for our users. Based on our experiences with QIIME 1, QIIME 2, and other bioinformatics and command line tools, we can recommend the following attributes for identifiers: - Identifiers should be 36 characters long or less. - Identifiers should contain only ASCII alphanumeric characters (i.e. in the range of [a-z], [A-Z], or [0-9]), the period (.) character, or the dash (-) character. An important point to remember is that sometimes values in your sample metadata can become identifiers. For example, taxonomy annotations can become feature identifiers following qiime taxa collapse, and sample or feature metadata values can become identifiers after applying qiime feature-table group. If you plan to apply these or similar methods where metadata values can become identifiers, you will be less likely to encounter problems if the values adhere to these identifier recommendations as well. To help users become aware of these recommendations, the Keemei metadata validator will warn users about identifiers that don’t meet the above recommendations. Users may be interested in the cual-id software for assistance with creating sample identifiers. The cual-id paper also provides some discussion on how to design identifiers. Metadata Columns ^^^^^^^^^^^^^^^^ The ID column is the first column in the metadata file, and can optionally be followed by additional columns defining metadata associated with each sample or feature ID. Metadata files are not required to have additional metadata columns, so a file containing only an ID column is a valid QIIME 2 metadata file. The following rules apply to column names: - May consist of any Unicode characters. - Cannot be empty (i.e. column names must consist of at least one character). - Must be unique (exact string matching is performed to detect duplicates). - Column names cannot use any of the reserved ID column names described in the section Identifier Column. The following rules apply to column values: - May consist of any Unicode characters. - Empty cells represent missing data. Other values such as NA are not interpreted as missing data; only the empty cell is recognized as “missing”. Note that cells consisting solely of whitespace characters are also interpreted as missing data because leading and trailing whitespace characters are always ignored, effectively making the cell empty. Column Types ^^^^^^^^^^^^ QIIME 2 currently supports categorical and numeric metadata columns. By default, QIIME 2 will attempt to infer the type of each metadata column: if the column consists only of numbers or missing data, the column is inferred to be numeric. Otherwise, if the column contains any non-numeric values, the column is inferred to be categorical. Missing data (i.e. empty cells) are supported in categorical columns as well as numeric columns. QIIME 2 supports an optional comment directive to allow users to explicitly state a column's type, avoiding the column type inference described above. This can be useful if there is a column that appears to be numeric, but should actually be treated as categorical metadata (e.g. a Subject column where subjects are labeled 1, 2, 3, etc). Explicitly declaring a column's type also makes your metadata file more descriptive because the intended column type is included with the metadata, instead of relying on software to infer the type (which isn't always transparent). You can use an optional comment directive to declare column types in your metadata file, either manually or through the q2cli developer tools. For manual specifications within your metadata file(s), the comment directive must appear directly below the header. The row's first cell must be #q2:types or #sk:types to indicate the row is a comment directive. Subsequent cells may contain the values categorical or numeric (both case-insensitive). The empty cell is also supported if you do not wish to assign a type to a column (the type will be inferred in that case). Thus, it is easy to include this comment directive without having to declare types for every column in your metadata. Number Formatting ^^^^^^^^^^^^^^^^^ If a column is to be interpreted as a numeric metadata column (either through column type inference or by using the #q2:types comment directive), numbers in the column must be formatted following these rules: - Use the decimal number system: ASCII characters [0-9], . for an optional decimal point, and + and - for positive and negative signs, respectively. - Examples: 123, 123.45, 0123.40, -0.000123, +1.23 - Scientific notation may be used with E-notation; both e and E are supported. - Examples: 1e9, 1.23E-4, -1.2e-08, +4.5E+6 - Only up to 15 digits total (including before and after the decimal point) are supported to stay within the 64-bit floating point specification. Numbers exceeding 15 total digits are unsupported and will result in undefined behavior. - Common representations of not a number (e.g. NaN, nan) or infinity (e.g. inf, -Infinity) are not supported. Use an empty cell for missing data (e.g. instead of NaN). Infinity is not supported at this time in QIIME 2 metadata files. """ # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import csv import re from skbio.io import create_format from skbio.metadata._metadata import SampleMetadata from skbio.metadata.io import MetadataReader, MetadataWriter # newline="" is because otherwise csv.writer will write blank lines between rows # in Windows. See: https://stackoverflow.com/questions/3348460/ sample_metadata = create_format("sample_metadata", newline="") @sample_metadata.sniffer() def _sample_metadata_sniffer(fh): # Strategy: # Check if first word in the file is in the list # of allowed metadata words try: tsv_reader = csv.reader(fh, dialect="excel-tab", strict=True) # sample id and feature id are not separated when reading the tsv # since they are not tab-separated. possible_ids = [ "id", "sampleid", "sample-id", "featureid", "feature-id", "sample id", "feature id", "sample_name", # Technically this should be case-sensitive ] possible_ids_w_leading_comment_char = [ "#SampleID", "#Sample ID", "#OTUID", "#OTU ID", ] # We need to find the actual header row # so we loop until we find the first row that isn't empty or a comment for header in tsv_reader: # Skip empty rows if len(header) == 0: continue match = re.search(r"\S+", header[0]) # Check if first word is a columnID that starts with # if match and match.group() in possible_ids_w_leading_comment_char: return True, {} # Skip rows whose first non-whitespace character is a # # since they are comments. skips empty rows too. if not match or match.group()[0] == "#": continue if any( [x.casefold() == header[0].strip().casefold() for x in possible_ids] ): return True, {} # if the first non-empty non-comment row doesn't have a valid id as # first entry we conclude that this is not a metadata file. return False, {} # In case the file is empty and has no rows that are non-empty non-comment # we return a negative result. return False, {} # if we run into errors with the csv file we assume its not a metadata file except csv.Error: return False, {} @sample_metadata.reader(SampleMetadata) def _sample_metadata_read(fh, **kwargs): return MetadataReader(fh).read(SampleMetadata, **kwargs) @sample_metadata.writer(SampleMetadata) def _sample_metadata_write(obj, fh): MetadataWriter(obj).write(fh) scikit-bio-0.6.2/skbio/io/format/stockholm.py000066400000000000000000000703511464262511300211510ustar00rootroot00000000000000"""Stockholm format (:mod:`skbio.io.format.stockholm`) =================================================== .. currentmodule:: skbio.io.format.stockholm The Stockholm format is a multiple sequence alignment format (MSA) that optionally supports storing arbitrary alignment features (metadata). Features can be placed into four different categories: GF, GS, GR, and GC (described in more detail below). An example Stockholm file, taken from [1]_: .. code-block:: none # STOCKHOLM 1.0 #=GF ID UPSK #=GF SE Predicted; Infernal #=GF SS Published; PMID 9223489 #=GF RN [1] #=GF RM 9223489 #=GF RT The role of the pseudoknot at the 3' end of turnip yellow mosaic #=GF RT virus RNA in minus-strand synthesis by the viral RNA-dependent \ RNA #=GF RT polymerase. #=GF RA Deiman BA, Kortlever RM, Pleij CW; #=GF RL J Virol 1997;71:5990-5996. AF035635.1/619-641 UGAGUUCUCGAUCUCUAAAAUCG M24804.1/82-104 UGAGUUCUCUAUCUCUAAAAUCG J04373.1/6212-6234 UAAGUUCUCGAUCUUUAAAAUCG M24803.1/1-23 UAAGUUCUCGAUCUCUAAAAUCG #=GC SS_cons .AAA....<<<>>> // Format Support -------------- **Has Sniffer: Yes** +------+------+---------------------------------------------------------------+ |Reader|Writer| Object Class | +======+======+===============================================================+ |Yes |Yes |:mod:`skbio.alignment.TabularMSA` | +------+------+---------------------------------------------------------------+ Format Specification -------------------- The Stockholm format consists of a header, a multiple sequence alignment, associated metadata (features), and a footer. Header ^^^^^^ The first line of a Stockholm file must be the following header: .. code-block:: none # STOCKHOLM 1.0 Multiple Sequence Alignment ^^^^^^^^^^^^^^^^^^^^^^^^^^^ Sequence lines consist of a sequence name, followed by whitespace, followed by the aligned sequence. For example:: seq1 ACG-T-GGT seq2 ACCGTTCG- Sequence names (``seq1``, ``seq2``) are stored in the ``TabularMSA`` ``index``. .. note:: scikit-bio currently supports reading Stockholm files where each sequence is contained on a single line. Interleaved/wrap-around Stockholm files are not supported. When writing, each sequence will be placed on its own line. .. warning:: Sequence names must be unique in the Stockholm file. Likewise, when writing from a ``TabularMSA``, ``index`` must be unique. Metadata ^^^^^^^^ Stockholm files support storing arbitrary metadata (features) about the MSA. All metadata described in the following sections are optional and may appear in any order. Metadata "mark-up" lines begin with either ``#=GF``, ``#=GS``, ``#=GR``, or ``#=GC``, and each line describes a single feature of the alignment. .. note:: Stockholm format supports generic features. [1]_ and [2]_ provide a list of common features output by Pfam/Rfam. scikit-bio does not require that these features are present. These features are processed in the same way as any arbitrary feature would be, as a simple key-value pair of strings. When writing, feature names, feature data, and sequence names are converted to type ``str``. .. note:: When writing a Stockholm file, scikit-bio will place the metadata in the format's recommended order: - GF: Above the alignment - GS: Above the alignment (after GF) - GR: Below corresponding sequence - GC: Below the alignment GF metadata +++++++++++ Data relating to the multiple sequence alignment as a whole, such as authors or number of sequences in the alignment. Starts with ``#=GF`` followed by a feature name and data relating to the feature. Typically comes first in a Stockholm file. For example (taken from [2]_): .. code-block:: none #=GF DE CBS domain Where ``DE`` is the feature name and ``CBS Domain`` is the feature data. GF metadata is stored in the ``TabularMSA`` ``metadata`` dictionary. .. note:: When reading, duplicate GF feature names will have their values concatenated in the order they appear in the file. Concatenation will also add a space between lines if one isn't already there in order to avoid joining words together. When writing, each GF feature will be placed on its own line, regardless of length. .. note:: Trees labelled with ``NH``/``TN`` are handled differently than other GF features. When reading a Stockholm file with these features, the reader follows the rules described in [2]_. Trees split over multiple lines will have their values concatenated. Unlike other GF features, trees will never have a space added when they are concatenated. A single tree without an identifier will be stored as:: metadata = { 'NH': 'tree in NHX format' } A single tree with an identifier will be stored as:: metadata = { 'NH': { 'tree-id': 'tree in NHX format' } } Multiple trees (which must have identifiers) will be stored as:: metadata = { 'NH': { 'tree-id-1': 'tree in NHX format', 'tree-id-2': 'tree in NHX format' } } .. note:: References labelled with ``RN``/``RM``/``RT``/``RA``/``RL``/``RC`` are handled differently than other GF features. When reading a Stockholm file with these features, the reader populates a list of dictionaries, where each dictionary represents a single reference. The list contains references in the order they appear in the file, regardless of the value provided for ``RN``. If a reference does not include all possible reference tags (e.g. ``RC`` is missing), the dictionary will only contain the reference tags present for that reference. When writing, the writer adds a reference number (``RN``) line before writing each reference, for example: .. code-block:: none #=GF RN [1] #=GF RA Kestrel Gorlick ... #=GF RN [2] ... References will be stored as:: metadata = { 'RN': [{ 'RM': 'reference medline', 'RT': 'reference title', 'RA': 'reference author', 'RL': 'reference location', 'RC': 'reference comment' }, { 'RM': 'reference medline', ... }] } GS metadata +++++++++++ Data relating to a specific sequence in the multiple sequence alignment. Starts with ``#=GS`` followed by the sequence name followed by a feature name and data relating to the feature. Typically comes after GF metadata in a Stockholm file. For example (taken from [2]_): .. code-block:: none #=GS O83071/259-312 AC O83071 Where ``O83071/259-312`` is the sequence name, ``AC`` is the feature name, and ``083071`` is the feature data. GS metadata is stored in the sequence-specific ``metadata`` dictionary. .. note:: When reading, duplicate GS feature names will have their values concatenated in the order they appear in the file. Concatenation will also add a space between lines if one isn't already there in order to avoid joining words together. When writing, each GS feature will be placed on its own line, regardless of length. GR metadata +++++++++++ Data relating to the columns of a specific sequence in a multiple sequence alignment. Starts with ``#=GR`` followed by the sequence name followed by a feature name and data relating to the feature, one character per column. Typically comes after the sequence line it relates to. For example (taken from [2]_): .. code-block:: none #=GR O31698/18-71 SS CCCHHHHHHHHHHHHHHH..EEEEEEEE....EEEEEEEEHHH Where ``O31698/18-71`` is the sequence name, ``SS`` is the feature name, and ``CCCHHHHHHHHHHHHHHH..EEEEEEEE....EEEEEEEEHHH`` is the feature data. GR metadata is stored in sequence-specific ``positional_metadata``. .. note:: Duplicate GR feature names attributed to a single sequence are disallowed. GC metadata +++++++++++ Data relating to the columns of the multiple sequence alignment as a whole. Starts with ``#=GC`` followed by a feature name and data relating to the feature, one character per column. Typically comes at the end of the multiple sequence alignment. For example (taken from [2]_): .. code-block:: none #=GC SS_cons CCCCCHHHHHHHHHHHHH..EEEEEEEE....EEEEEEEEEEH Where ``SS_cons`` is the feature name and ``CCCCCHHHHHHHHHHHHH..EEEEEEEE....EEEEEEEEEEH`` is the feature data. GC metadata is stored in ``TabularMSA`` ``positional_metadata``. .. note:: Duplicate GC feature names are disallowed. Footer ^^^^^^ The final line of a Stockholm file must be the following footer:: // .. note:: scikit-bio currently supports reading a Stockholm file containing a single MSA. If the file contains more than one MSA, only the first MSA will be read into a ``TabularMSA``. Format Parameters ----------------- The only supported format parameter is ``constructor``, which specifies the type of in-memory sequence object to read each aligned sequence into. This must be a subclass of ``GrammaredSequence`` (e.g., ``DNA``, ``RNA``, ``Protein``) and is a required format parameter. For example, if you know that the Stockholm file you're reading contains DNA sequences, you would pass ``constructor=DNA`` to the reader call. Examples -------- Suppose we have a Stockholm file containing an MSA of protein sequences (modified from [2]_): >>> import skbio.io >>> from io import StringIO >>> from skbio import Protein, TabularMSA >>> fs = '\\n'.join([ ... '# STOCKHOLM 1.0', ... '#=GF CC CBS domains are small intracellular modules mostly' ... ' found', ... '#=GF CC in 2 or four copies within a protein.', ... '#=GS O83071/192-246 AC O83071', ... '#=GS O31698/88-139 OS Bacillus subtilis', ... 'O83071/192-246 MTCRAQLIAVPRASSLAE..AIACAQKM....RVSRV', ... '#=GR O83071/192-246 SA 999887756453524252..55152525....36463', ... 'O83071/259-312 MQHVSAPVFVFECTRLAY..VQHKLRAH....SRAVA', ... 'O31698/18-71 MIEADKVAHVQVGNNLEH..ALLVLTKT....GYTAI', ... 'O31698/88-139 EVMLTDIPRLHINDPIMK..GFGMVINN......GFV', ... 'O31699/88-139 EVMLTDIPRLHINDPIMK..GFGMVINN......GFV', ... '#=GR O31699/88-139 AS ________________*____________________', ... '#=GR O31699/88-139 IN ____________1______________2_________', ... '#=GC SS_cons CCCCCHHHHHHHHHHHHH..EEEEEEEE....EEEEE', ... '//' ... ]) >>> fh = StringIO(fs) >>> msa = TabularMSA.read(fh, constructor=Protein) >>> msa # doctest: +NORMALIZE_WHITESPACE TabularMSA[Protein] ---------------------------------------------------------------------- Metadata: 'CC': 'CBS domains are small intracellular modules mostly found in 2 or four copies within a protein.' Positional metadata: 'SS_cons': Stats: sequence count: 5 position count: 37 ---------------------------------------------------------------------- MTCRAQLIAVPRASSLAE..AIACAQKM....RVSRV MQHVSAPVFVFECTRLAY..VQHKLRAH....SRAVA MIEADKVAHVQVGNNLEH..ALLVLTKT....GYTAI EVMLTDIPRLHINDPIMK..GFGMVINN......GFV EVMLTDIPRLHINDPIMK..GFGMVINN......GFV The sequence names are stored in the ``index``: >>> msa.index Index(['O83071/192-246', 'O83071/259-312', 'O31698/18-71', 'O31698/88-139', 'O31699/88-139'], dtype='object') The ``TabularMSA`` has GF metadata stored in its ``metadata`` dictionary: >>> list(msa.metadata.items()) [('CC', 'CBS domains are small intracellular modules mostly found in 2 or \ four copies within a protein.')] GC metadata is stored in the ``TabularMSA`` ``positional_metadata``: >>> msa.positional_metadata # doctest: +ELLIPSIS SS_cons 0 C 1 C 2 C 3 C 4 C 5 H 6 H 7 H 8 H 9 H ... GS metadata is stored in the sequence-specific ``metadata`` dictionary: >>> list(msa[0].metadata.items()) [('AC', 'O83071')] GR metadata is stored in sequence-specific ``positional_metadata``: >>> msa[4].positional_metadata # doctest: +ELLIPSIS AS IN 0 _ _ 1 _ _ 2 _ _ 3 _ _ 4 _ _ 5 _ _ 6 _ _ 7 _ _ 8 _ _ 9 _ _ ... Let's write this ``TabularMSA`` in Stockholm format: >>> fh = StringIO() >>> _ = msa.write(fh, format='stockholm') >>> print(fh.getvalue()) # STOCKHOLM 1.0 #=GF CC CBS domains are small intracellular modules mostly found in 2 or four \ copies within a protein. #=GS O83071/192-246 AC O83071 #=GS O31698/88-139 OS Bacillus subtilis O83071/192-246 MTCRAQLIAVPRASSLAE..AIACAQKM....RVSRV #=GR O83071/192-246 SA 999887756453524252..55152525....36463 O83071/259-312 MQHVSAPVFVFECTRLAY..VQHKLRAH....SRAVA O31698/18-71 MIEADKVAHVQVGNNLEH..ALLVLTKT....GYTAI O31698/88-139 EVMLTDIPRLHINDPIMK..GFGMVINN......GFV O31699/88-139 EVMLTDIPRLHINDPIMK..GFGMVINN......GFV #=GR O31699/88-139 AS ________________*____________________ #=GR O31699/88-139 IN ____________1______________2_________ #=GC SS_cons CCCCCHHHHHHHHHHHHH..EEEEEEEE....EEEEE // >>> fh.close() References ---------- .. [1] https://en.wikipedia.org/wiki/Stockholm_format .. [2] http://sonnhammer.sbc.su.se/Stockholm.html """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from collections import OrderedDict from skbio.alignment import TabularMSA from skbio.sequence._grammared_sequence import GrammaredSequence from skbio.io import create_format, StockholmFormatError stockholm = create_format("stockholm") _REFERENCE_TAGS = frozenset({"RM", "RT", "RA", "RL", "RC"}) @stockholm.sniffer() def _stockholm_sniffer(fh): # Smells a Stockholm file if the following conditions are met: # - File isn't empty # - File contains correct header try: line = next(fh) except StopIteration: return False, {} if _is_header(line): return True, {} return False, {} @stockholm.reader(TabularMSA) def _stockholm_to_tabular_msa(fh, constructor=None): # Checks that user has passed required constructor parameter if constructor is None: raise ValueError( "Must provide `constructor` parameter indicating the " "type of sequences in the alignment. `constructor` " "must be a subclass of `GrammaredSequence` " "(e.g., `DNA`, `RNA`, `Protein`)." ) # Checks that contructor parameter is supported elif not issubclass(constructor, GrammaredSequence): raise TypeError("`constructor` must be a subclass of " "`GrammaredSequence`.") # Checks that the file isn't empty try: line = next(fh) except StopIteration: raise StockholmFormatError("File is empty.") # Checks that the file follows basic format (includes the required header) if not _is_header(line): raise StockholmFormatError("File missing required Stockholm header " "line.") msa_data = _MSAData() for line in fh: if line.isspace(): continue line = line.rstrip("\n") if _is_sequence_line(line): seq_name, seq_data = _parse_sequence_line(line) msa_data.add_sequence(seq_name, seq_data) elif line.startswith("#=GF"): feature_name, feature_data = _parse_gf_line(line) msa_data.add_gf_metadata(feature_name, feature_data) elif line.startswith("#=GS"): seq_name, feature_name, feature_data = _parse_gs_line(line) msa_data.add_gs_metadata(seq_name, feature_name, feature_data) elif line.startswith("#=GR"): seq_name, feature_name, feature_data = _parse_gr_line(line) msa_data.add_gr_metadata(seq_name, feature_name, feature_data) elif line.startswith("#=GC"): feature_name, feature_data = _parse_gc_line(line) msa_data.add_gc_metadata(feature_name, feature_data) elif _is_footer(line): break else: raise StockholmFormatError("Unrecognized line: %r" % line) if not _is_footer(line): raise StockholmFormatError( "Final line does not conform to Stockholm " 'format. Must contain only "//".' ) return msa_data.build_tabular_msa(constructor) # For storing intermediate data used to construct a Sequence object. class _MSAData: def __init__(self): self._seqs = {} self._seq_order = [] self._metadata = OrderedDict() self._positional_metadata = OrderedDict() def add_sequence(self, seq_name, seq_data): if seq_name not in self._seqs: self._seqs[seq_name] = _SeqData(seq_name) self._seqs[seq_name].seq = seq_data self._seq_order.append(seq_name) def add_gf_metadata(self, feature_name, feature_data): # Handles first instance of labelled tree if feature_name == "TN" and "NH" not in self._metadata: self._metadata["NH"] = OrderedDict() self._metadata["NH"][feature_data] = "" # Handles second instance of labelled tree elif feature_name == "TN" and "NH" in self._metadata: if feature_data in self._metadata["NH"]: raise StockholmFormatError( "Tree name %r used multiple times " "in file." % feature_data ) self._metadata["NH"][feature_data] = "" # Handles extra line(s) of an already created tree elif feature_name == "NH" and feature_name in self._metadata: trees = self._metadata[feature_name] if isinstance(trees, OrderedDict): tree_id = next(reversed(trees)) self._metadata[feature_name][tree_id] = trees[tree_id] + feature_data else: self._metadata[feature_name] = ( self._metadata[feature_name] + feature_data ) elif feature_name == "RN": if feature_name not in self._metadata: self._metadata[feature_name] = [OrderedDict()] else: self._metadata[feature_name].append(OrderedDict()) elif feature_name in _REFERENCE_TAGS: if "RN" not in self._metadata: raise StockholmFormatError( "Expected 'RN' tag to precede " "'%s' tag." % feature_name ) reference_dict = self._metadata["RN"][-1] if feature_name not in reference_dict: reference_dict[feature_name] = feature_data else: padding = _get_padding(reference_dict[feature_name]) reference_dict[feature_name] += padding + feature_data elif feature_name in self._metadata: padding = _get_padding(self._metadata[feature_name][-1]) self._metadata[feature_name] = ( self._metadata[feature_name] + padding + feature_data ) else: self._metadata[feature_name] = feature_data def add_gc_metadata(self, feature_name, feature_data): if feature_name in self._positional_metadata: _raise_duplicate_error("Found duplicate GC label %r." % feature_name) self._positional_metadata[feature_name] = feature_data def add_gs_metadata(self, seq_name, feature_name, feature_data): if seq_name not in self._seqs: self._seqs[seq_name] = _SeqData(seq_name) self._seqs[seq_name].add_metadata_feature(feature_name, feature_data) def add_gr_metadata(self, seq_name, feature_name, feature_data): if seq_name not in self._seqs: self._seqs[seq_name] = _SeqData(seq_name) self._seqs[seq_name].add_positional_metadata_feature(feature_name, feature_data) def build_tabular_msa(self, constructor): if len(self._seqs) != len(self._seq_order): invalid_seq_names = set(self._seqs) - set(self._seq_order) raise StockholmFormatError( "Found GS or GR metadata for " "nonexistent sequence(s): %r" % invalid_seq_names ) seqs = [] for seq_name in self._seq_order: seqs.append(self._seqs[seq_name].build_sequence(constructor)) positional_metadata = self._positional_metadata if not positional_metadata: positional_metadata = None metadata = self._metadata if not metadata: metadata = None # Constructs TabularMSA return TabularMSA( seqs, metadata=metadata, positional_metadata=positional_metadata, index=self._seq_order, ) class _SeqData: def __init__(self, name): self.name = name self._seq = None self.metadata = None self.positional_metadata = None @property def seq(self): return self._seq @seq.setter def seq(self, seq): if self._seq is None: self._seq = seq else: _raise_duplicate_error("Found duplicate sequence name: %r" % self.name) def add_metadata_feature(self, feature_name, feature_data): if self.metadata is None: self.metadata = OrderedDict() if feature_name in self.metadata: padding = _get_padding(self.metadata[feature_name][-1]) self.metadata[feature_name] += padding + feature_data else: self.metadata[feature_name] = feature_data def add_positional_metadata_feature(self, feature_name, feature_data): if self.positional_metadata is None: self.positional_metadata = OrderedDict() if feature_name in self.positional_metadata: _raise_duplicate_error( "Found duplicate GR label %r associated " "with sequence name %r" % (feature_name, self.name) ) else: self.positional_metadata[feature_name] = feature_data def build_sequence(self, constructor): return constructor( self.seq, metadata=self.metadata, positional_metadata=(self.positional_metadata), ) def _parse_gf_line(line): line = line.split(None, 2) _check_for_malformed_line(line, 3) return line[1:] def _parse_gs_line(line): line = line.split(None, 3) _check_for_malformed_line(line, 4) return line[1:] def _parse_gr_line(line): line = line.split(None, 3) _check_for_malformed_line(line, 4) seq_name = line[1] feature_name = line[2] feature_data = list(line[3]) return seq_name, feature_name, feature_data def _parse_gc_line(line): line = line.split(None, 2) _check_for_malformed_line(line, 3) feature_name = line[1] feature_data = list(line[2]) return feature_name, feature_data def _parse_sequence_line(line): line = line.split(None, 1) _check_for_malformed_line(line, 2) return line def _is_header(line): return line == "# STOCKHOLM 1.0\n" def _is_footer(line): return line.rstrip() == "//" def _is_sequence_line(line): return not (line.startswith("#") or _is_footer(line)) def _raise_duplicate_error(message): raise StockholmFormatError( message + " Note: If the file being used is in " "Stockholm interleaved format, this " "is not supported by the reader." ) def _check_for_malformed_line(line, expected_len): if len(line) != expected_len: raise StockholmFormatError( "Line contains %d item(s). It must " "contain exactly %d item(s)." % (len(line), expected_len) ) @stockholm.writer(TabularMSA) def _tabular_msa_to_stockholm(obj, fh): if not obj.index.is_unique: raise StockholmFormatError("The TabularMSA's index labels must be" " unique.") # Writes header fh.write("# STOCKHOLM 1.0\n") # Writes GF data to file if obj.has_metadata(): for gf_feature, gf_feature_data in obj.metadata.items(): if gf_feature == "NH" and isinstance(gf_feature_data, dict): for tree_id, tree in gf_feature_data.items(): fh.write("#=GF TN %s\n" % tree_id) fh.write("#=GF NH %s\n" % tree) elif gf_feature == "RN": if not isinstance(gf_feature_data, list): raise StockholmFormatError( "Expected 'RN' to contain a list of reference " "dictionaries, got %r." % gf_feature_data ) for ref_num, dictionary in enumerate(gf_feature_data, start=1): if not isinstance(dictionary, dict): raise StockholmFormatError( "Expected reference information to be stored as a " "dictionary, found reference %d stored as %r." % (ref_num, type(dictionary).__name__) ) fh.write("#=GF RN [%d]\n" % ref_num) for feature in dictionary: if feature not in _REFERENCE_TAGS: formatted_reference_tags = ", ".join( [tag for tag in _REFERENCE_TAGS] ) raise StockholmFormatError( "Invalid reference tag %r found in reference " "dictionary %d. Valid reference tags are: %s." % (feature, ref_num, formatted_reference_tags) ) fh.write("#=GF %s %s\n" % (feature, dictionary[feature])) else: fh.write("#=GF %s %s\n" % (gf_feature, gf_feature_data)) unpadded_data = [] # Writes GS data to file, retrieves GR data, and retrieves sequence data for seq, seq_name in zip(obj, obj.index): seq_name = str(seq_name) if seq.has_metadata(): for gs_feature, gs_feature_data in seq.metadata.items(): fh.write("#=GS %s %s %s\n" % (seq_name, gs_feature, gs_feature_data)) unpadded_data.append((seq_name, str(seq))) if seq.has_positional_metadata(): df = _format_positional_metadata( seq.positional_metadata, "Sequence-specific positional " "metadata (GR)" ) for gr_feature in df.columns: gr_feature_data = "".join(df[gr_feature]) gr_string = "#=GR %s %s" % (seq_name, gr_feature) unpadded_data.append((gr_string, gr_feature_data)) # Retrieves GC data if obj.has_positional_metadata(): df = _format_positional_metadata( obj.positional_metadata, "Multiple sequence alignment " "positional metadata (GC)", ) for gc_feature in df.columns: gc_feature_data = "".join(df[gc_feature]) gc_string = "#=GC %s" % gc_feature unpadded_data.append((gc_string, gc_feature_data)) # Writes GR, GC, and raw data to file with padding _write_padded_data(unpadded_data, fh) # Writes footer fh.write("//\n") def _write_padded_data(data, fh): max_data_len = 0 for label, _ in data: if len(label) > max_data_len: max_data_len = len(label) fmt = "{0:%d} {1}\n" % max_data_len for label, value in data: fh.write(fmt.format(label, value)) def _format_positional_metadata(df, data_type): # Asserts positional metadata feature names are unique if not df.columns.is_unique: num_repeated_columns = len(df.columns) - len(set(df.columns)) raise StockholmFormatError( "%s feature names must be unique. " "Found %d duplicate names." % (data_type, num_repeated_columns) ) str_df = df.astype(str) # Asserts positional metadata dataframe items are one character long for column in str_df.columns: if (str_df[column].str.len() != 1).any(): raise StockholmFormatError( "%s must contain a single character for" " each position's value. Found value(s)" " in column %s of incorrect length." % (data_type, column) ) return str_df def _get_padding(item): return "" if item[-1].isspace() else " " scikit-bio-0.6.2/skbio/io/format/taxdump.py000066400000000000000000000315541464262511300206320ustar00rootroot00000000000000r"""Taxdump format (:mod:`skbio.io.format.taxdump`) =============================================== .. currentmodule:: skbio.io.format.taxdump The NCBI Taxonomy database dump (``taxdump``) format stores information of organism names, classifications and other properties. It is a tabular format with a delimiter: ```` between columns, and a line end ```` after all columns. The file name usually ends with .dmp. Format Support -------------- **Has Sniffer: No** +------+------+---------------------------------------------------------------+ |Reader|Writer| Object Class | +======+======+===============================================================+ |Yes |No |:mod:`pandas.DataFrame` | +------+------+---------------------------------------------------------------+ Format Specification -------------------- The NCBI taxonomy database [1]_ [2]_ hosts organism names and classifications. It has a web portal [3]_ and an FTP download server [4]_. It is also accessible using E-utilities [5]_. The database is being updated daily, and an archive is generated every month. The data release has the file name ``taxdump``. It consists of multiple .dmp files. These files serve different purposes, but they follow a common format pattern: - It is a tabular format. - Column delimiter is ````. - Line end is ````. - The first column is a numeric identifier, which usually represent taxa (i.e., "TaxID"), but can also be genetic codes, citations or other entries. The two most important files of the data release are ``nodes.dmp`` and ``names.dmp``. They store the hierarchical structure of the classification system (i.e., taxonomy) and the names of organisms, respectively. They can be used to construct the taxonomy tree of organisms. The definition of columns of each .dmp file type are taken from [6]_ and [7]_. ``nodes.dmp`` ^^^^^^^^^^^^^ +----------------+-------------------------------------+ |Name |Description | +================+=====================================+ |tax_id |node id in GenBank taxonomy database | +----------------+-------------------------------------+ |parent tax_id |parent node id in GenBank taxonomy | | |database | +----------------+-------------------------------------+ |rank |rank of this node (superkingdom, | | |kingdom, ...) | +----------------+-------------------------------------+ |embl code |locus-name prefix; not unique | +----------------+-------------------------------------+ |division id |see division.dmp file | +----------------+-------------------------------------+ |inherited div |1 if node inherits division from | |flag (1 or 0) |parent | +----------------+-------------------------------------+ |genetic code id |see gencode.dmp file | +----------------+-------------------------------------+ |inherited GC |1 if node inherits genetic code from | |flag (1 or 0) |parent | +----------------+-------------------------------------+ |mitochondrial |see gencode.dmp file | |genetic code id | | +----------------+-------------------------------------+ |inherited MGC |1 if node inherits mitochondrial | |flag (1 or 0) |gencode from parent | +----------------+-------------------------------------+ |GenBank hidden |1 if name is suppressed in GenBank | |flag (1 or 0) |entry lineage | +----------------+-------------------------------------+ |hidden subtree |1 if this subtree has no sequence | |root flag |data yet | |(1 or 0) | | +----------------+-------------------------------------+ |comments |free-text comments and citations | +----------------+-------------------------------------+ Since 2018, NCBI releases "new taxonomy files" [8]_ (``new_taxdump``). The new ``nodes.dmp`` format is compatible with the classical format, plus five extra columns after all aforementioned columns. +----------------+-------------------------------------+ |Name |Description | +================+=====================================+ |plastid genetic |see gencode.dmp file | |code id | | +----------------+-------------------------------------+ |inherited PGC |1 if node inherits plastid gencode | |flag (1 or 0) |from parent | +----------------+-------------------------------------+ |specified |1 if species in the node's lineage | |species |has formal name | +----------------+-------------------------------------+ |hydrogenosome |see gencode.dmp file | |genetic code id | | +----------------+-------------------------------------+ |inherited HGC |1 if node inherits hydrogenosome | |flag (1 or 0) |gencode from parent | +----------------+-------------------------------------+ ``names.dmp`` ^^^^^^^^^^^^^ +----------------+-------------------------------------+ |Name |Description | +================+=====================================+ |tax_id |the id of node associated with this | | |name | +----------------+-------------------------------------+ |name_txt |name itself | +----------------+-------------------------------------+ |unique name |the unique variant of this name if | | |name not unique | +----------------+-------------------------------------+ |name class |(synonym, common name, ...) | +----------------+-------------------------------------+ ``division.dmp`` ^^^^^^^^^^^^^^^^ +----------------+-------------------------------------+ |Name |Description | +================+=====================================+ |division id |taxonomy database division id | +----------------+-------------------------------------+ |division cde |GenBank division code (three | | |characters) | +----------------+-------------------------------------+ |division name |e.g. BCT, PLN, VRT, MAM, PRI... | +----------------+-------------------------------------+ |comments | | +----------------+-------------------------------------+ ``gencode.dmp`` ^^^^^^^^^^^^^^^ +----------------+-------------------------------------+ |Name |Description | +================+=====================================+ |genetic code id |GenBank genetic code id | +----------------+-------------------------------------+ |abbreviation |genetic code name abbreviation | +----------------+-------------------------------------+ |name |genetic code name | +----------------+-------------------------------------+ |cde |translation table for this genetic | | |code | +----------------+-------------------------------------+ |starts |start codons for this genetic code | +----------------+-------------------------------------+ Other types of .dmp files are currently not supported by scikit-bio. However, the user may customize column definitions in using this utility. See below for details. Format Parameters ----------------- The following format parameters are available in ``taxdump`` format: - ``scheme``: The column definition scheme name of the input .dmp file. Available options are listed below. Alternatively, one can provide a custom scheme as defined in a name-to-data type dictionary. 1. ``nodes``: The classical ``nodes.dmp`` scheme. It is also compatible with new ``nodes.dmp`` format, in which case only the columns defined by the classical format will be read. 2. ``nodes_new``: The new ``nodes.dmp`` scheme. 3. ``nodes_slim``: Only the first three columns: tax_id, parent_tax_id and rank, which are the minimum required information for constructing the taxonomy tree. It can be applied to both classical and new ``nodes.dmp`` files. It can also handle custom files which only contains these three columns. 4. ``names``: The ``names.dmp`` scheme. 5. ``division``: The ``division.dmp`` scheme. 6. ``gencode``: The ``gencode.dmp`` scheme. .. note:: scikit-bio will read columns from leftmost till the number of columns defined in the scheme. Extra columns will be cropped. Examples -------- >>> from io import StringIO >>> import skbio.io >>> import pandas as pd >>> fs = '\n'.join([ ... '1\t|\t1\t|\tno rank\t|', ... '2\t|\t131567\t|\tsuperkingdom\t|', ... '6\t|\t335928\t|\tgenus\t|' ... ]) >>> fh = StringIO(fs) Read the file into a ``pd.DataFrame`` and specify that the "nodes_slim" scheme should be used: >>> df = skbio.io.read(fh, format="taxdump", into=pd.DataFrame, ... scheme="nodes_slim") >>> df # doctest: +NORMALIZE_WHITESPACE parent_tax_id rank tax_id 1 1 no rank 2 131567 superkingdom 6 335928 genus References ---------- .. [1] Federhen, S. (2012). The NCBI taxonomy database. Nucleic acids research, 40(D1), D136-D143. .. [2] Schoch, C. L., Ciufo, S., Domrachev, M., Hotton, C. L., Kannan, S., Khovanskaya, R., ... & Karsch-Mizrachi, I. (2020). NCBI Taxonomy: a comprehensive update on curation, resources and tools. Database, 2020. .. [3] https://www.ncbi.nlm.nih.gov/taxonomy .. [4] https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/ .. [5] Kans, J. (2022). Entrez direct: E-utilities on the UNIX command line. In Entrez Programming Utilities Help [Internet]. National Center for Biotechnology Information (US). .. [6] https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump_readme.txt .. [7] https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/new_taxdump/taxdump_readme.txt .. [8] https://ncbiinsights.ncbi.nlm.nih.gov/2018/02/22/new-taxonomy-files- available-with-lineage-type-and-host-information/ """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import pandas as pd from skbio.io import create_format taxdump = create_format("taxdump") _taxdump_column_schemes = { "nodes_slim": {"tax_id": int, "parent_tax_id": int, "rank": str}, "nodes": { "tax_id": int, "parent_tax_id": int, "rank": str, "embl_code": str, "division_id": int, "inherited_div_flag": bool, "genetic_code_id": int, "inherited_GC_flag": bool, "mitochondrial_genetic_code_id": int, "inherited_MGC_flag": bool, "GenBank_hidden_flag": bool, "hidden_subtree_root_flag": bool, "comments": str, }, "names": {"tax_id": int, "name_txt": str, "unique_name": str, "name_class": str}, "division": { "division_id": int, "division_cde": str, "division_name": str, "comments": str, }, "gencode": { "genetic_code_id": int, "abbreviation": str, "name": str, "cde": str, "starts": str, }, } _taxdump_column_schemes["nodes_new"] = dict( _taxdump_column_schemes["nodes"], **{ "plastid_genetic_code_id": bool, "inherited_PGC_flag": bool, "specified_species": bool, "hydrogenosome_genetic_code_id": int, "inherited_HGC_flag": bool, }, ) @taxdump.reader(pd.DataFrame, monkey_patch=False) def _taxdump_to_data_frame(fh, scheme): """Read a taxdump file into a data frame. Parameters ---------- fh : file handle Input taxdump file scheme : str Name of column scheme Returns ------- pd.DataFrame Parsed table """ if isinstance(scheme, str): if scheme not in _taxdump_column_schemes: raise ValueError(f'Invalid taxdump column scheme: "{scheme}".') scheme = _taxdump_column_schemes[scheme] names = list(scheme.keys()) try: return pd.read_csv( fh, sep="\t\\|(?:\t|$)", engine="python", index_col=0, names=names, dtype=scheme, usecols=range(len(names)), ) except ValueError: raise ValueError("Invalid taxdump file format.") scikit-bio-0.6.2/skbio/io/format/tests/000077500000000000000000000000001464262511300177305ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/io/format/tests/__init__.py000066400000000000000000000005411464262511300220410ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- scikit-bio-0.6.2/skbio/io/format/tests/data/000077500000000000000000000000001464262511300206415ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/io/format/tests/data/blast6_custom_minimal000066400000000000000000000000111464262511300250470ustar00rootroot00000000000000subject2 scikit-bio-0.6.2/skbio/io/format/tests/data/blast6_custom_mixed_nans000066400000000000000000000000651464262511300255570ustar00rootroot00000000000000N/A PAAWWWWW 8 1 100.00 N/A 0 query1 N/A 8 1 N/A 8 0 scikit-bio-0.6.2/skbio/io/format/tests/data/blast6_custom_multi_line000066400000000000000000000001611464262511300255700ustar00rootroot00000000000000subject2 32 0 100 N/A subject2 query1 subject2 18 0 100 N/A subject2 query1 subject1 19 0 70 N/A subject1 query2 scikit-bio-0.6.2/skbio/io/format/tests/data/blast6_custom_single_line000066400000000000000000000000371464262511300257210ustar00rootroot00000000000000query1 PAAWWWWW 8 1 100.00 8 0 scikit-bio-0.6.2/skbio/io/format/tests/data/blast6_default_multi_line000066400000000000000000000002171464262511300257040ustar00rootroot00000000000000query1 subject2 100.00 8 0 0 1 8 3 10 9e-05 16.9 query1 subject2 75.00 8 2 0 1 8 2 9 0.060 11.5 query2 subject1 71.43 7 2 0 1 7 1 7 0.044 11.9 scikit-bio-0.6.2/skbio/io/format/tests/data/blast6_default_single_line000066400000000000000000000000571464262511300260350ustar00rootroot00000000000000query1 subject2 75.00 8 2 0 1 8 2 9 0.060 11.5 scikit-bio-0.6.2/skbio/io/format/tests/data/blast6_invalid_column_types000066400000000000000000000002151464262511300262640ustar00rootroot000000000000001.2523 subject2 abcd 8 0 0 1 8 3 10 9e-05 16.9 query1 subject2 75.00 8 2 0 r 8 2 9 0.060 11.5 query2 subject1 71.43 7 2 0 1 7 1 7 G24TL 11.9 scikit-bio-0.6.2/skbio/io/format/tests/data/blast6_invalid_number_of_columns000066400000000000000000000000441464262511300272570ustar00rootroot00000000000000query1 subject2 75.00 8 2 0 1 8 2 9 scikit-bio-0.6.2/skbio/io/format/tests/data/blast6_invalid_type_in_column000066400000000000000000000001371464262511300265720ustar00rootroot00000000000000query1 subject2 string 8 2 0 1 8 2 9 0.060 11.5 query1 subject2 75.00 8 2 0 1 8 2 9 0.060 11.5 scikit-bio-0.6.2/skbio/io/format/tests/data/blast7_custom_minimal000066400000000000000000000001351464262511300250570ustar00rootroot00000000000000# BLAST 2.2.31+ # Query: query1 # Subject: subject1 # Fields: query id # 1 hits found query1 scikit-bio-0.6.2/skbio/io/format/tests/data/blast7_custom_mixed_nans000066400000000000000000000006141464262511300255600ustar00rootroot00000000000000# BLASTP 2.2.31+ # Query: query1 # Subject: subject1 # 0 hits found # BLASTP 2.2.31+ # Query: query1 # Subject: subject2 # Fields: query gi, subject gi, query length, subject length, query frame, sbjct frame, query id, subject id # 1 hits found 0 N/A 8 13 1 1 N/A subject2 N/A 0 8 N/A 1 1 query1 N/A # BLASTP 2.2.31+ # Query: query2 # Subject: subject4 # 0 hits found # BLAST processed 2 queries scikit-bio-0.6.2/skbio/io/format/tests/data/blast7_custom_multi_line000066400000000000000000000006321464262511300255740ustar00rootroot00000000000000# BLASTP 2.2.31+ # Query: query1 # Subject: subject2 # Fields: q. start, q. end, s. start, s. end, identical, mismatches, sbjct frame, query acc.ver, subject acc.ver # 3 hits found 1 8 3 10 8 0 1 query1 subject2 2 5 2 15 8 0 2 query1 subject2 1 6 2 12 8 0 1 query1 subject2 # BLASTP 2.2.31+ # Query: query1 # Subject: subject3 # 0 hits found # BLASTP 2.2.31+ # Query: query1 # Subject: subject4 # 0 hits found scikit-bio-0.6.2/skbio/io/format/tests/data/blast7_custom_single_line000066400000000000000000000005711464262511300257250ustar00rootroot00000000000000# BLASTP 2.2.31+ # Query: query1 # Subject: subject1 # 0 hits found # BLASTP 2.2.31+ # Query: query1 # Subject: subject2 # Fields: query id, % positives, % identity, alignment length, subject gi, bit score, q. end, query seq # 1 hits found query1 100.00 100.00 8 0 16.9 8 PAAWWWWW # BLASTP 2.2.31+ # Query: query1 # Subject: subject3 # 0 hits found # BLAST processed 2 queries scikit-bio-0.6.2/skbio/io/format/tests/data/blast7_default_multi_line000066400000000000000000000006501464262511300257060ustar00rootroot00000000000000# BLASTP 2.2.31+ # Query: query1 # Subject: subject1 # 0 hits found # BLASTP 2.2.31+ # Query: query1 # Subject: subject2 # Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 3 hits found query1 subject2 70.00 5 0 0 7 60 3 100 9e-05 10.5 query1 subject2 30.00 8 0 0 6 15 1 100 0.053 12.0 query1 subject2 90.00 2 0 0 9 35 2 100 0.002 8.3 scikit-bio-0.6.2/skbio/io/format/tests/data/blast7_default_single_line000066400000000000000000000005041464262511300260330ustar00rootroot00000000000000# BLASTP 2.2.31+ # Query: query1 # Subject: subject1 # 0 hits found # BLASTP 2.2.31+ # Query: query1 # Subject: subject2 # Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 1 hits found query1 subject2 100.00 8 0 0 1 8 3 10 9e-05 16.9 scikit-bio-0.6.2/skbio/io/format/tests/data/blast7_invalid_differing_fields000066400000000000000000000006371464262511300270370ustar00rootroot00000000000000# BLASTP 2.2.31+ # Query: query1 # Subject: subject1 # 0 hits found # BLASTP 2.2.31+ # Query: query1 # Subject: subject3 # 0 hits found # BLASTP 2.2.31+ # Query: query2 # Subject: subject1 # Fields: query id, subject id, q. start # 1 hits found query1 subject1 4 # BLASTP 2.2.31+ # Query: query2 # Subject: subject1 # Fields: query id, subject id, score # 1 hits found query2 subject2 4 # BLAST processed 2 queries scikit-bio-0.6.2/skbio/io/format/tests/data/blast7_invalid_for_sniffer000066400000000000000000000000711464262511300260460ustar00rootroot00000000000000# BLASTP 2.2.31+ asdf # Subject: subject1 # 0 hits found scikit-bio-0.6.2/skbio/io/format/tests/data/blast7_invalid_for_sniffer_2000066400000000000000000000000651464262511300262720ustar00rootroot00000000000000# BLASTP 2.2.31+ # Query: query1 asdf # 0 hits found scikit-bio-0.6.2/skbio/io/format/tests/data/blast7_invalid_gibberish000066400000000000000000000002201464262511300254760ustar00rootroot00000000000000aphanbrnaweoowehapivaborebaweo;aweifh[fapofh29r-2u-23 adjapignqp9rghp4hq2hrq93phq9hqwhpehoweawe aidgpuhp0eh9q2yrq02r9q9euq9rhg93fhqefjwphwpahw] scikit-bio-0.6.2/skbio/io/format/tests/data/blast7_invalid_no_data000066400000000000000000000006301464262511300251520ustar00rootroot00000000000000# BLASTP 2.2.31+ # Query: query1 # Subject: subject1 # 0 hits found # BLASTP 2.2.31+ # Query: query1 # Subject: subject2 # 0 hits found # BLASTP 2.2.31+ # Query: query1 # Subject: subject3 # 0 hits found # BLASTP 2.2.31+ # Query: query2 # Subject: subject1 # 0 hits found # BLASTP 2.2.31+ # Query: query2 # Subject: subject2 # 0 hits found # BLASTP 2.2.31+ # Query: query2 # Subject: subject3 # 0 hits found scikit-bio-0.6.2/skbio/io/format/tests/data/blast7_invalid_too_many_columns000066400000000000000000000006321464262511300271340ustar00rootroot00000000000000# BLASTP 2.2.31+ # Query: query1 # Subject: subject1 # 0 hits found # BLASTP 2.2.31+ # Query: query1 # Subject: subject3 # 0 hits found # BLASTP 2.2.31+ # Query: query2 # Subject: subject1 # Fields: query id, subject id # 1 hits found query1 subject1 subject2 query2 # BLASTP 2.2.31+ # Query: query2 # Subject: subject1 # Fields: query id, subject id # 1 hits found query2 subject2 # BLAST processed 2 queries scikit-bio-0.6.2/skbio/io/format/tests/data/blast7_invalid_unrecognized_field000066400000000000000000000002631464262511300274060ustar00rootroot00000000000000# BLASTP 2.2.31+ # Query: query1 # Subject: subject1 # 0 hits found # BLASTP 2.2.31+ # Query: query2 # Subject: subject1 # Fields: query id, sallid # 1 hits found query1 subject1 scikit-bio-0.6.2/skbio/io/format/tests/data/embed1.txt.npy000066400000000000000000007602001464262511300233510ustar00rootroot00000000000000NUMPYv{'descr': ':J/W0*t@8w>#>Bb>M= Ѿ~>ș^>륽\UNZL%بR>Mڽ&߽`߽ #2g>P=ݣ>8G[=ػf>E߱iľɃ @==~=><>ja}M> >Ž7`q2=vkV>+a==W>T2= aݦ-/(>Xo$K=@N@>]Y8uu=8xF %=)==A>R>"JP>Lz=>MaP>[HE>c[=J2:Rt;ۤ]=o>~ξU 0^ >/;=r1S>M>t=Ұ=\>9枾=> tʰ́ˉ=o =-Ⲥ>E>:<U=4&>|{>H~ri><>Ct=6ݻP=XF=9>ASu>P;`>w>*ھ 9ؼ-;Ÿ: )E>y==#>Ja w >Sh?&Y!7Ifʽ(v>s=>{/9 >/ >^>'=!&?gbͽ<¢=Ccȋ  kye>jN:p|>< (=h=h _x>MH<wų=,r=@>>%AlwO=iVf =1ǽc>L\>LW>픦=Ƽ>_mM𡬾ƾ:ܻ>nGRmNb G>m>4">HOr͢=uǽ>< m_=>>6>fj*nkp>> >ٽA>}>>8{n > ؜Evn>b=(>x`zm2{]1iWL*aB `=Ƚ+<{==,̾Bp>ۛM>tDCm=h>S>;ocM~ kr)=8>͚)l>u?F>^$>Cؾu1>H<=K=iI>G{=P𒼳%+,=d== \ Ŋ> Z=-o[6>kɜ2Ґ>c=;S>;ʺ<ުWGŒ>=[-=|d *@O< ܖ= )>ս1y*=<">LM7R(>65>ە+>6ܴ">N0$ =B==a1]X>>M$`=WI;ڶ&[>*y= 8=T>q=a\c 5r9z)>g>>`<$>'>ͽ{=." 6><>oVs˿(=FLՂ׾ =ѭ=a@>{t=3Cp<ͷJ>]T<>SwC%->? =I>7>!E/p>8>WL>՘E> ^A{T>C=d8f=Iq-=Q.w>] )Wj>x?PfԽO~b>y:uu0>Ce>M> ƾoZg>Ї>)>'8>#=˼>D}>څ>7,>!=pNþEqo< a\=v4>*Ao!&=JIYvqre#8>*<%߽-q=w䓾kz]==.;Bq)>U">WvP*= )>* мܧ8sgﳾVB2,뽮=#>@>;#>ʽ Ev= H 禼Κ=Y>40=xu=ة=8?L=W>>j|m">l=7'>= 6>x<)ۉ왋>=T>6>Fͽ:y]J 2=;W=!$?9FH>>z=3 >3=XVN,F>k m2sĠ_:=*W>2>b֗>\>d`U:'=r->նC,Z+"->ॾf ;0<*>=X?$uc=0ѽ$I>tt">j$=_F>3l6=<ݖ9zκ=;vᵢȖC>vx1>X;a>{ƙs=h=<>`GW=7]!A>a S? *tQ=Ӿ 6 >aJq,o =WG( _^P=iIQ i:=(N>)e>]ԄpH"G߽JY[=J9ۼEˀBf9 q= 5%>i%=>P,F==CXTPCfϼhAk=]~<|U>>=,Ӆ=Y 1C=x='=ļ'O==I%R@1>cDcw >G̊>~=nD> >73=@>O 7ek辽V==8>;:>y=fýc]Q= g>/=>mw>=2>y=h=?d?oF={ez>R>iG$z>b<=>0@=`= >Tڽ'CIӽwEsʽ9=S;>:=BK>@>H>ɲP\=8 >~t!h=>AN>闽=z=i һq=s`=>T=h>F==ʶ<:q);b=p>a¥;=©=M'@uÜ>Xt=<;. &'y4ڌ;S!Խ >(7>>ʊxA>5 Ew>c/=2?>w>&A=@[ >B>35<>Z=껾`3<;t>/ڼb>sὋZꚀ̬Y>*˾ׯwC='>2@潗YӽQ> >ye>52>2>QG>Fx/>iSh>,{H=>Sm4#e >x`> ٜv/>ɾ@DV?I>'S<'Sdžq=a>n>ً>z=C&>V>]|,<$^:7h>nх ;>k3N)c>D) >Wb>2A>O4<`>P]q,H=[HCZ=K>,a,"opy(>lR jg=Ѿ2WQƄ'LUc>+e=TŻjX>>1FNC>j<}P/>.h;ཕ˽=7Y=6?<'*9=/>O>L>h4u=!?q R=C4A^>f`1>"a =I=].Ջ>8o p>{L=HD+ճW;/9<[ڽ֩co>XDI>F2>һbIo慼=۽o\k=V>>xVҾzOY>Cu%>+>w=*{}>1=$:L>G?kI=j[H;ܠO> 8=bA} ~w<\=9> /=*=T>t?~>F=Ҩ=̼94>{"Ups>Wd;>T'@=5=>BW>_0=CҼ=߯O>J>PK>zߏ=$>T>&7~Y-_b;ڽS=F0޽l>V>d#>"B:=>*<);=fc,5=>>sZ>-G=/fJ(o5=v^> w); IF$=Cb==^Hоa%qjva⍽?A>}č^h/>e%>= `~=ۣick??Y"^/>u/M0 I=X;:rb?{y=ai=Y= ƾKؽk^=Feڡ]/>H>d>U?>i79ʽ#N.Db*>)P>#>=E79NTAzn=|ۻq@=>4=:7P#-tdǽ<$Y=I_A<>5=*R! ygC:={T< =Vw(Vk.>С=ņZiɅ7 >gۨ$콭6=V=m[v>}>W<ۺ=|= >.>=(=[YZV>=^wW< pI0>۽Ҵ>R *C?<">2.bS>Ju=j˽R/> >_=(Es==ԃ;`N=l@>.ؾ3=>+a=>&d>h=&=K/mK>T5>!pI=B[>/ٕ,aҽ>E; R<7=Ro+>@gv9j>l>ش-o6ʲ!$ھAf =T-t >J=<߸>t=z@f>xC=F>>?E<8E>8}ݲ ƽ=A83w;a>.sSm>=jWfuԽ5H)>sOb޽c!k þȽe޽ʾi=>@F>ggվn&j$ݠ><>'i83>΋=Ώ>=m-RtuEfbt>7>-jr=? > |b>>iu> +Rȑ=(:1 >iݳ>3>L>n ?z[> ?wx=lUJ>U%Sfm=>$|&W=>"l=Gi .<:#N'=, m<e2>[ 5Ě >GdC >><'(jPy}Yq> ~<=[>1=g*O=G <,YR=k=-2>'->6ٻB 8>~J־S"kN>c}ug=Ks|x8=;>/ 6|-P>v>t-!=Z9?*,pNJ9>A!X'H>ض<+?(K"րy>>G=b>=k>E==_=:'𘽈9nMXEH\><>8>SW m3=*$M=ټP3YQ3<$> `=s%侷E%>DQ5%|=ߨ=: ?횼di<뺼ċp4=c=&n[)XreK=#R=@wk>LzѼk;4;|q>HÚ=B֗=@>,Q>\gIӵ5<.=NCH2='菉G->$=}G9 ݽ>>(>:3EUhٽ3=0=N>C8==>pľZm>g=$<>Ko>$ս:}ɽC&Hx!> ='i>Z/>"Fdu A<>EL>Q_U>>/=E> >B>=$>T>>D侩 <z!>>7>#">S=űu< P/>"d͆8ElW^N>~ u[t=>01è=- ]<1:QtЇ>zx=p ?~M_"Z>c6P >~;^>hTo>g>J=a>㣼q=)>@>v>~zFw"/CM򞾬w~ fι!>Z=IO`-&>JP>be5M=~@:+>Ck=oƽzyݽ05pn .Ձ<|><*нКZ=K>AK>վ=& /=E=0T>6nŭ=r=dY_>89>T)>_&>*+ex>g< AB=e>Ғ=>)>l^b>/:3>Խԣ> <+5>%<7ļ =">UH>Y}ܾW>>(QU"?~q->> >B>>N>d>ez9BAC{w,><*>E=Hj8#%3sҾ;mC>J>Tx&8b>$#Xg@=и>̊>A> >X\:W=;ͽYyhE $S\ !1( X%`-iN,=wl-m==<]ۀW: ԽJPN?>Z?*h>ϐM>$>: >Q<>/GWs>)=Un}i6>K=mrZ߽A>$?vYF;=M򇽣N>=q > n>3럽 ǎ>x=ęZԐ>M0Z%_=#)==hJ+B<>vm=9>74>2n5㢦 qν=vĽ$> >pO?TI=><& z=>%⽕*y=a{,nN>iԱ3Ka>>}?/L>c" o};&rF],">eɾ|>>3Wz=>>>a=VV(=Kzj=&Jd=4ݱWO>c=y3'z>@0*>[>ⳑ0[=/>)愘<=A׺>>V~j>!3Z[UM^f*>h?%sPӽ@?>J~}>{>E>@>^>{>>=_"1ý=X%%,"Qu 4#=M=Xn >*6nȄ>EѽV<0:> F轼H >|X<ý*1= (>\<>=,E=q#@>ܾx_[9Vkv =.립C<>%=^>s>=~=bZ=g>b8bf>l1>3>ѓ>>:9 >MY=e&ܾ>%l>bX =0>BB S>;𙾆k)4Wu=##]<OA?>^=D>>zafX۾]=P>HO>PR >0=5p.8>ޕ>>~>8P,=f>Ït>nl>nX.=>6`>ͪ>ZF}^y7=ׄ w>?3"=Ftl>ʦ>\1ɒq6y>)=> b>\ 0k;=ٲ>S>Ι>E>n">pc=s~ ʾc$=]=н<ݽ> #k>pP>Z]=>>b*?ѽMRսG֫>%Y>IC"ļijDX="W>&>;6<׻ڀ>$=9u>7NF>S/>+2==(u|;9/ۯ!~}B0>E{=~`>-d>>R&l6q:=S\z}a Ȕ>oBD=?J=Ţ4a :^ k[J>=4b6>L=9B>m*=.>ߖѽs"?5)=F'v<>f;&>m>ɜ[C<$>.>l"l:>sHt>xܼSm=w=;>Tr=%>T˾)=$8a==Ҧ0T.>y&='/>@#r>֠&<>J4:Y =υ;(T>k>1> ,sMgzw$>"D2.=5XW=@a>mIe>$tM>>e=oC>e;K<|*LI>!o=)v.ȏ>Kebн'/7QŮ=R1>آvPAd= 51վ>[=P8=>A=-%l>=R[/=J՟Ȣ% =姥=cg>=gbˈ=tb>T(>"K=_ɬ=Ĵ>y㠽fSyZǔ>˟2=Q>c>΄>>8>=JgV)\L.=̎1*;>>r=;TCafe>G2=->< l창>@/'>~$>>ؤ]7"K=M=CS=*c >D5>1i Fw5=bs=nC>>pgB>dp >@@Ӽ==2c< >)#i $FؾkFU [ d;@<ެ|D>6r>0#p=돾%=P-=>Q=>ƒX>Ա3k6>"O>l8<̾LL>S]O=w=;o'?Tv?=̽>F½,>GKC=RR!=v>yΛ?bvͽ?>G>kݼJ{=i걎Ntd$>+[4>«h; ߾i'>/L*]D.}Ge{¯>\>r,>!Ѿ* >{:>bv,y=<yoqcY[>K=:=xAyY>= E|>i>Igљ=>=G#<~)ht>9>A~z?=7=SZ~[=6<6>"d>U >Y1Y>̾ =1>=<)=\>=}Vavk=.F:>9˽(>4ZzS.\J=o=c }>泛i!3'k;&"'i>)=Pk>O{ =r><=Qtk\==>9J>>>RQzٽE=X7=u@>.>8=m?lYfL=ν=V=><Kݰ9>) I6>Y1 >S8>*>,kX%>*M>w9>Q̫> ,AN?ؒk=h>*nn>0 :>5}ʼn;pݖ;>:E=!y>=ٯ]9G?=X;>j< >E>>7F>.k :=9Hz=X=y>> =i="h; 1>Iw>nP< р=T>%,rMhc?Q20===-%61=.>#=Po˂>N8>[[>p8N:G] Km=EOq>뼬QqX=yMy);c=v>>KU>d2U%@Wx< >1=;>% ,>nf>'@p>Rp2S>h=\ <L%IXN>@i$?y$xp==-{==Ã=U>˟U`m>^>˴>f>]+oQ== ,6=i >NN>krxJ y6Z=S>~laZ*鋉Wꤧ=WG>~,{beܑ\X3M>;<ycr;H<?MP3>Ct;>ܸ=͊] @>uoxD)>>(m̾>ό:VM<2$ ==)_T"ZɏcX=":>$u =2Z <@/K =vP= IekD>ci/F>|=Xl>]=_ =;E!QGI>]$ZL>M:An=?I=J"Ǿ=) Tݸ9ͮ> A=>o>C&Q?p= > <6~@5]/x+!>_xA=Vo,KUy6!>LJ2+׼P=ur񭈾 >SoX6? =#~ѡK>3VWq>!}<.!C8H(>xȾGC+O=ꋂ"J]>a>e<" M =0)M=lv= >>,[,u)yJ*:>>">d=~<(=fp><>PbN 7 _Ce̽ؽԼ>>>&_>;$=6=8.=rjQm>JA=¨==6<+jVA>t=ҽ&o遾n=M1hDV=Cv_=߾R*'=$žV$1>韈> >'<> C= l=}=^> 3{Ĵ>__>ju>juu>_"F罻v>_=~u4|X>;`=k>>Y=+B[OCA=Ľ>G&%=!9p d>:I=߼'#>>8t35<;Ѿ;7be:u>ag>+J})>T1*>Y2滺8;1b>hg禎>&O)>s>MA X?0=p@={=ݨ)l>}A<+;Y*>/H7l>k.h(>A*>]z[u><>hm<=P="K49>ϝP=e=DV>x{1=h>侘>>>M ᓽ@H=;&s>*y6,I >3` mmrXM%q-=l=yl>=uŽV K>l(Ǿl >q>e3>&Oڬn J>3>u>nP9kR`su?>/^?,Hc/x7J>f5 >[<<0d[$,Jy>xiC!ܴt> <>:t&=X-u-|Ĺ$>*> G W|=kz;~3>L|΅=ӻX%-|u>vn>+@q?>N>>Պ>Q2=~s=u<=>/xĜ=z.1=;[LFQu=ս2f5 dL0>9=g`6=>Ti^>X<H>=8l?P=*m7>U>x`M>ʴ>烾$ b>y|m9>oX7ƛ>s>N==r > v:5c \we>n"Yaʾ_3H(>=8g1x=R=掾H>,Q^>q>1*X>w>=:ˏېraM־roUn~P> guyq[T=>A*mT>qZ侟=2 = Ep>L'`6܂y=bR=>=PDOpc¾ؓTO>B>J=wؽO=>d<==Wѽɪ>r^ii?/=RWT=x2>̼V=~=?<a=ae<e=7>ut=yf2}8<ȇ,;gC>Vʸ:^= G 0> >>o:=_>̽=2|3>;<Y;ȵ.>^ѽߏ>z4>rZNm>="q, -rk><>NVsE<6齃=R‰>x[>'P+wHQ(>=jks󤿽.{>L>d1>P->i>_=@$<vQ>yx>=Y(~=LQ`0>=i4>(;f 5(V^4>wټa>>>š;L4>G4b=| h![Y[>@=p1õQ=Eþ">&=o.=o-1>x>=>Q _ļtNQ6t>.7z>=1>db>3>.MP= ?C=.>&=*ݾ{Vۛa+C=kH4->}qȑ>X}D9f<>64L̾@U>})!ٽ>YG==;#> /= .+> ;#G=Cd/="9="z(>/9ڼ׾N% >=>[=3{= ]>vqcʾS=ƼAP>+?0߻prdv[ŵ ?Ȣ=v=SԽ=.M5=> J=*νpX2>u`K>8z1=&= ='2Ly>8K<¾r<^x<%<>̼wQGT=k>%pӽu>n`_S˽j>[t>/:U=@=PV>*<բ>9d=mA;[ ͹wn=݉=Y>r= 3;лt>f'm7)'d>tw$ʱL> e_;S=j_>8ڱ=oe>)9<ھ[c>".+Yf>>y=TN0ܟRQ=wU9>= >$YW>z!OU=?>b(%D/i t>zҽ}s<$5X= ?>==f,>=Ł=>{JE>>KYo,,<)W[=ՠ>HLS^Y=6>>'ܙ1ٽ ; =z?Y\}4=?=DUؼvmÑDs~&@:>ԛ>=IN>/E=Xu>V =O;>Ҏ ,S>R rB# ?}>3="`~=>R4~>y<潠Խ=&=+T> >Lm@"Kގ~SbPj;=#D>0?j>\*B|ݼ0#G>_%??>{}>ψս>M== y9>A[=?\=*G>`[>Ы=>4x=`1Ab3^=|n>S׾ZL>8le 5>Pts8R5=njxW <8>N$]=:'m-8>Tۼ ?V'? a=>6=Ӿm|=Ai<ò=q<;^轈=o%==7a>H'! >CF.?;O:V={=M=M<=!#f ?ǞTM>T-1(rR.mI=Ĕz6>m<#{,}Hl>;n=秽J>5x><=Zj־+M2=/= >"~>Ǩ=LgJR9*,J>BU=!/6=E4t>*!><8<þn==0=i3dZ=+$<>mX81'8>>B۽ @ =&hMt"9O)34ؽU<^"=T<=b u>p>A>Rfp=c->L콽̌˾/q=>>k=f/b>-a5>֓~2= % |Y+=W)>־X}ýżA=I=뽵-5>"h=@カ_>u mFMf> v">E>悔p~>>#Yxxua^stɩ1ώ =ܦ#=mY`p"=฾9=zgǴQ[>i?0B=p =K촼kQvHYg=^V7==C>4lн>>w>{q<=w$>Q3=ub^,l=M<b:Nԕ<҂> > &f>+r>kI>B# cTKrk,`p> =>j=k>?.y>Z=Rk 5S>'jiz>PE)6E>qeO>>t ޾Jq>>u͖=+>i"+='ND ==Њ=bK>!=H=O:8?}8Ƽ>ma>e>>~2= $h<_ڼU>t1J&Q y*/0<4ػ{>=Ydyr#W+>zE>k#>9) 2=7eOJ<0Z&=C[nf5?LpkN=S2&>mJ֟ƽp>JX=d>2=a׾"에%8=>je>$ }>\S>*l<ɽc$ @-EN׼<>b=!Ҁ tv>Ծ=ɽ8:elM;T=!۽+l]>2<h=6eI>[bԽ {=ʜ;>^ûl1|>[1>C=#>"9ǵ<挾=>` pd64%D>D2ؽ#=2^>rnP(x Z=Qa2֘> oY=’+> =&=&n=>ii)8>f>r&)>aa;>Tb>=sT=!S*> ">P:>r>t~O=μj>!>>0<ೀcN>}[λ#=n4Rr!cP> ;i>  =Sau> v (~>pY>ྍ1Zޕ>]¼뉾N0hfQU>XM>0OBD׼>;4F^a:ٰz>,"Y4?mp<v[r>T-u0>I=Y!-ǾMN>܎#&>`Y>>p=6ܛyQD=q>P"P8re=ENُd1n>S{ռiнm\<A=֤_ߏR,\4b=jew=P>1g1G?2? ='V=mq=^=џ 'ú>h <4ct>7e㙾. =X<Ӿt }!E>ѲIP;=盽51VyG>1 =d-U<Kt^=nV>=YS9.=ۥ>ge>>>?Ma>L= 3=;2Ff>=@X=2O ix;/?2>Φ(= ǎ\@ŽNcԎMy.ujp>'=X(=>=>' j=p>RA??r#=-B ^3¾2=r"}QJ`\"43$X8H>7>>T=Ł>?%=.;#=<쾐m=u\\jst<] >.= wn.=kƦ  s=Za; >,_F=̙V(>P=/o=t'V?3­D>ҷ=(ξ0D3ݰM>jR>"^.`=s>=Ì>"=>Ih;1H%݀>>'?=ry p@l\Nj=,M `,>>IhRw*>Rʾ;#?e=W=mlO3u-O>r >n6^ ~8>~8 ?٢ oC j4>_{ WW>|j=,̥X<㽔s>2:Ev>ࠅX]8-3b>t/Xw>bξýH=q$>9==ޖ<"g:/>X?۽?{==>' dV*ϽÙpF=-:>3ۮ>|>B}>B>Ѿ[%>qݽ ={(=rzkǾ~>_6`u*>̾>>R?6S>NK<zv=qM^>0>t[ս E=x==hZ^>M=B0޽ ,=^a>u> >H=W_=aFM pW>n=Rvv|(>f=wfva<4 0>IoE8>D5g.P>1 =>G=>>%/:{>@@;"Z&~F=׭ov =>Q?= 1.=V== [{>󛽊=G>UȾ>>d>)g*>&J>a$> ?jF> n}[>>spǿ>])=O>d<>Cd>1=ˉI{h<>ֽjj徳 ־ %V I>>㥐I>>==%y>MIzn=ؽ=xc>>6>NF"О=>w=?;6!4<քvc>>iD>݊=݊> o1gd=ODD =I?H="T,P=Fg ױt=l}z=3>Nh=@,/>i> =qq`:Н;\!ᠾKIE%=H 4> <>T+D=]>O= >t9ͼp]=>{C==ԇ=Ap?σ⽿@>C=qi>CN>Xr߯x/ Ҿ,wl򽣤=қc>#7=>$2)?w=`4j.>Mjs>2/?>*<#-D>=ZjTB3>>S"-=xнH|ܾv#l=C{>l`=q;C2= N=BLL$`>4>0">T{u2 ->Ѿpf=r!=-f=?̐>>C R>> [սIn<{>撆>SUG1+#=['=b=T徝Hw`>=#1>5>J)߻ !'(>{>Y>4>4}=[&y>ܢػ%99g=:>. [>G =9 >/>.>A:5>9<= >T>aCoh=a /A=T*>.,=GYϾ===IHYA iUC0>r>Bm|>,=>N|qn^K0>kd9/޾k>D4'ui>oDщnH>懾~>$$J :kvоYXРDznS+=>"B >@0=Xe{޾ OnKlt3>>>==X*~>< =?>>|O.>g< pn>F H}[ê!=l<ς>g;4e<=1]P|GTR>CJl=V>>Q=ڕV>>K>9pN8=>8;54)IT<ǎ-0>H>iq>c-->D=>Q+>u,5>Q>F,=yib >'<@@?=':>*G0>&=J>=>F_Л={ =}>+U+Ʃ>P>>Z>>?I=>S=:>>Ҷp5>%>̙=}=bi>y%t>X>Ѽo>L?W<+˽ =Nt>V]M>fbhy>+C=D|Lk~V̀> 9Za:>=,{>,nΓ jI̽,=>;>dJ>?>q%>Zս F>=W;(ek> G UN>(?gy>/Xɫ>==@ |>>=@  ݈=9= mT/=5G?LɼYľB>uN̽0=N>h/P0>"$>%>vm&t[+a< r*>1K>yN&=g=d1>-½<)1p<16>!>>.1A=[2余cfV>ɿ6>o=ʡ2>Pޅ>f5ýeU>4{F5k={S>K>UԾ*_>6~>y>z(> U7=򼶾ɾG>=#|%>lW>x=FoNX"=&S>_?^tgRt ?XO=B<0an=!䵽^>ދ?a=yFѽtz$au=Mb$6 =[=ɴ O>7D]=*h>}LɽZiLhj묻O =>n@ _Xo=Ej2IqPѤ= 2b=Хޱ>>>ɺ=3U%7o РCTYȣqIM.>F%>򷅻V>>oqľ/-νG0n>>-a=;-i>=zf= I>U0OyAUWFCCd=BW!3=L ?b >>`=2Q>n֧E->+=G >q1˾ڽԔMԖT&=ձ> hM >{1t=>)=w> >>m־PW>&:=}$9?S=>G70>Gj-w==uoe\>or>=>=sQ>Kڽ<¿>δ<zڶL2=Y""6=<=g^WJ=:=ɍUn;ٖit>=H?>><0>$꽯対n>>WGI^=35<<7<RVNMy>>=`&9><="}ܽ}m&ﭾSS6=RԺU>h>g>h#8\Xd=>NG>s뤽j◼[=%hW*nf>U>/Do>6;>1'

a^tw=H๽ R¾>HR =6>I@> *+Ŵ¶+y>U>˪E>w>sľд%>5>^raŧsL< Dq>w7oip>?z>Fɽzc=܍>4>Rz?>$jC7B;?j >[x a{>u߽>x=>AZ==a=:=>=j;A<ak=wD>Ҽ=>3 X b!-}ҙU>A=?i9>oO4>H>о긾"\="<7= 13Ӥ>G)势>>=ýK>c5>Ɓ>#><̄>^JR}%}f_ w?TOk=ه=KNb=RսrL>6#>\>X=ȻgO>9h>a=A>Gi> *=Q|>e߭v=> ?`=w=Il㾣}=Ǭr>n<;{?>Ⱦ$">zo >Og>:=%>j2|U-a>s>ü'~>@<]*PN>PM>P>Ճ=lBž`gnnZ>; z떢5a /ry#=N̾f;=B=f{=v>(l<"=^=/]==`=҃?̝8S<=k=YߚϽr9> ?U$=>){V >'=;>i_=;=4>n; S '>8H=AO=X]>;Y"=LK@>>U=d-U!0>=;>A^D>Q^=+=֘QtDr> q>v\>Z;D=,]>M=!UE=+iݾ+==BR?=!9=+4:4K=nAj>!=>f?/he־T=f`þ#N>1P=, >ˈ|=V>褾kF=lpѼ` >5f9|;{괾׽ZIl>>C5G=PA*>ޕO=젾ȥK>,[Q#2.>C>0Ⱦ[>kw+J=؟=>ۘ>X==Kױ6>߶z7=SN=>+5ĩ?\oX޽vⱱ,3>J<ʛ>; q=;mĆ =>p>%cg>9>' $>y=''Vj==h gWi<  ;x˾?8\)[y>8;Ǽ\=Ir%=F=wlR=?'q׎+=#몾Ǹ\=)>o+=u>ͽ/9>F><Y+pG󻷏>seV*<>߽Z=`߄C=<= =a y>><^|C8> ˨A=A>Al07=Ww k ־>e>hʽ~<"s=!?ӯwl&o>gPv>w.>=q[ߍ>̽Ӿ=踆>?<5߸w>Cپ;%uDM ?1@ؽcR>=k>7>f,Ƣ =sQ1Hc*=t %>6Md!>[}@K>y>JZ!"[=@`(=?k?h,( >n<#=?r3>J֪>#!>IKtij<.$>ȪP>ۙC=r1>Q<ܽF?=[>>ts'sz,> =+>.>+_>W>.C ս=❾==J>a>==khJS;$>=|< /> >&о}E7Us>g>5>9->q7>>͞>75gq>>T5>{m=<̩=}k> @f>$>̂6Oq9y#S>A$Ƚ@"oi/~4uw9>_%D">)Q=\_(=>~;>C\VZ&>eޤ>H|=a)<$=K>@=yb#>tN>r= =S#н :>H>`.s>L=Pfe1l=7<3?"=ŸA>>1;E+>=<H=n$>U߽X$?UD>,E==#VGźz%&>FN0>t=;>)>&v>N>1;t>ﲽm{MDL!>h7޿pFAw>(x9=aa>>*刟t>F6>:F_R1=ؑAL-2༟e=> >|;=BDpd7=Σ=GnƽU'甬82=J%Ԏ}>sTBA=;Vg>B<<}0=U>*@>X3-)9==Q}MV21>@#M4M)i>ھam9W=2=H>e='K򞾁Vͽ>t>^fE>I>q>=/J j5#>=u1=D?! ?P$=Z>8<3> ѻ==̻ 湾VK y ߽&r.=7/3< o>2F':= =>$^供9=o>DrsR>?a<> jx>og=kOֽ<]>l߽&FJzvq:獡>o:?^jS>[A> y>"r>x? =2LT]^1>^ݨ>/J<ч?*m=#~S$;b=j>2h>>C<-pW>0$>uT=t'>aD߾:>9BVr8>m>=$=%>Sh>Mg2>īqFW>1&GS<==b 䜺Ɗ<>Z =d=6Xsjm>Npՠ=֣ >nq=>]>X-kiP=L{>fMn}< s >R<<}K= >L ]V'=d1'>=T>=xa؜ɽ>'= >d>,=eJ7^=r^C >0>붾1>ˊ<=GyH5lU>=V}>=Fa5pD= >><>=D,;S\x>AZɧ=H'>[><oܼ0O40 C.>*>ҮPL >A83M">Af>UO<|h>q<x>nn*6/q>EX?QQ=I>pMX,YľK=ܾCY= b=a.>Z=y^>hj=0W<>qԭf>w潉m==ž/[?vy=︩:W nϾ޽T >nN0Fɽ+eRg6=ZFܔ=B>J=S>VUgcd+=$>%=gs >==P=C >2$]>Vep0`>W1?>V>395)3< >[m]d޾F=5>DY ˎ>o;NJ=6?=h['=گsX>,=>0B =4K> >}w<& 17W5 =E\JʽxH>b#=pu%>8۽&ƾ:d>>fH>fU=>8!>='>kmh]G>'=E8}=˛p5>xf< >m=>>yc>x4>X$:߽ע A=wI>R>7> . J>~&#+wX֗<]٤Q<'>p<2wt>UAA=s 2>uf{#>`&{>c}>u~=l6g,s>>=ȾJ=J?;<^(==)Z3g>ߛ=+><_=!R>>!==Z0=.U=;x< =}~5>dþbA< <'%ʐ>\q=޿>!=ZB^t2>k*>ӂɽA1`=9>= 4*>-=>ky5>9畾8>cD>]#>c=}>֯>]wA?>\$>m=,/><]뒾>\>3nI_>T>y %CCB=-D>-kAfV=OHLH ?tO=2I}=ꍥ>!M>^3s> i ;q;>[m =N芾=՛j=ؙ;) pXѾN= )9zt̙=ҾR=ͮUҾ'|qY r>N>P==ӽ!KU <@u>͐=%չ?Œ=W">=)7σ=3޽z|=W~>\ #H=轤G@=i<=iXrP>=>'*>D0=0p=wp=4c>B\n%-+::S=x(>4>6/>U>;>l"> :;&d=q}žv=C>dwv-#<>>\sO3><v].E}=l Ӿ5g轷=/G>)>B~^o~pt丼E>9KP>O댽*o=m=2ga=6ÄE6>>V#u-`0 VkFf< =q>Ӳ>$<%>(;QA>t 08&Mb֊:ľν913ֽ1v1>t73[!|`5>_3=Z>aj@>=Nr=`ʟ6>R=+⥾={>#>ͷ4"U>,.qB]A^i`>tL%uTʽCY=|+=g ܽaq ?1>o)w>%{=E]>$4z\ -=\=>tg+>=l>ޛny`>y>>>'>ߠ')e^T>ke9>Ľ+<9n  *v>VA>!=|> T?,>J9=F2| r>=듏=߈>\;>4cT>ľG-+SI'=t $9B放\(pۼ>gϻT|پ}>%K1t>t>=y>l V>jD>޽w9=րӾ}U=s]>4߾>Ɇ>|Y˾L> =Ŕ"ʾT><6e=q=*˫uU/=;S>diTf>⭁B{<8|>=!>Җ5d3=V]^>>xZ>=>T\><>A=u< $v龲>lv=L> > ZG>.>h$=( N=f=_Ɖ= 9rc)>`UJ>$*u=Kݾ对ɪ=A0>G6= G=|l>a>g==>j=>==&9ӗ>#.>Q!>><7̽=d>uy>ڬ=ck9.>>}= ⨽=o½'=GY<U>_=w> =S[B@>F=ޏ>XT=/>zݽX\=T=w|>34N<<2II0)Axe>8>ǍH>>|2>=+U=R!>95݁>4k=7:"j~jZWa=풽uZ*=<=fN/ jPA1?>xTC=N<:>`=>=V8Y=҄=R|=m=罺|UK>Nwi= >|uR3e>>>ڈ "<F;>ɟ4"6;>b>>JV)=ai1r>K=e;a3M>9={:=S>>耔=UD<=!K&sghs"=>Z=>??>…֖#>f:;=ͩ>k&il'ͳ=@>,> E>uffWgK=ަ>ռHW<r7B>={4^I9_>+`:>n>6,|=@>i!B>|Ҽh > (>P>S&=[>UQ>= =+޽3Kd>pA9>fT>=_}dY&݊> BQpq>(=Aἱ >kؽu>k9>* ->ν>"'Fsv >s =;&0DC>=>UYɼ z60>½=]_>؁5>' xv*ž>= =p3>;7>T>><ֽ>*$}=IxZ=aý>s *=: />3~IhNa>9=q/>z>=<85=L󻿕=\V'UWj>={o}>"ʁ> ѽZJL$ג;ގ[>;Lg7$>+>2-;-=}ϾF)=+!v=ù=n;=2$==>6>6}1CϾA7*~>%6>=35=HȄž Q >В5cվF>K~Խ%3:>d_I">+OB43Y<؈]3> h>\@Ƚo*>-J=(N>i=Xsܽ=y_>}*==j/2>\=dI$=y6B:t4>:=̉>@Z>͎>D*]==n=k/5E񰢽YZ=9k=>A2\?vI#>.A:叾j=>==Oq=+QahFB>P>fa$>ڤ>~뽹սh>>3=L+(>Hž =нP:â=ID>M7S> V,׽!0>{=ۼ8>H=5i׽?_2x=:>_ae>ڗ0? X>V>R;"=H;<7S=^+@?@[V18= =Ŏ>+f>}v!8>]sI`}Ғid<:F>_!}&=G J|@9>;f>[ѽ;>MHx> >Wk=>S?*[>. 0.=<#}FN=L%旾>w<';'jv<[ݽ=&i=7K>0>;͉>> H>l%t>ӟœr>=[リ<~>(4~7R=y󻖎'=ϐjLh; PwiF<)єB >ґWOS̽==L :;8>9yjXK}yF='>" :!%=!4̼p=U};>՘'B½_0H=H>]y=U=8 }=tdz>(E>`f褾.=%>&d0=F4Z>I蟾M\b؁=D[=64>i2T==?hU>0>N=-n>M = F^=뤾.W>7y>"=j_a*T >@ T>$>?a>xl>l >Us=ӈEd\T2J>eS=Ej>ҽ"=m|`=戰]S=2Ce+>9ш q>S,<݉m'QdP{>[I=$w>=Ƚk>+p=Ae<7X;r >p꾊Gͯ> ;D>8>)>½k =ڒ=Ѿ->-+>9ahk eW>ؗ=s"=Dm׳<¸=3<fҽ}=/a=>?CU =k2p>wf9;=O}<ؗB6:ӵ >>>7%>s 0;b>5> >-Ic!AwvQ>ր輂肽m=_oƽ>L5t}>>KBf>H @Z侾>>7#=X47c2oǼ}F==>Ճ>&Y6qx=k='5>>hQO>L;<BU>>bWZսByʽSc= ?zߵ>>̊$<>,>pi>H; =rci >lK=PU=3V>#A>QWtDN=>6:Z R=>!&=jju>N~=~: <_ЃʽŽ 8=|۽ q

a=T<>Ѽ>K{;>G=;EQ !>&>6_C=`o;U°?8<>==#C8>ݚ=g\Dsv>WQ^<=u'=[ ?f=>P>6>=Y_.)<0=D^a?>=jK=(>N>->@>laFJ=V>==#qXu35>4_7># =;h͍&>%;#>a;o!Mm0>Oeꋽ?>7x5>M5~WOվ qU6O=O=t~&U>?=x>þD<>8߽=Ň㛍 [)ڼK"==zx!ͽ>>W=VNHy#h)ۻNS>}HG>P>>ሾ^۽>=O=,>9D:{ѽ2=Q\;.=`>i@V2þK=J!>R><1>(>rf>.1r >=T<8ʛ>Dy>=|aj>WA@vQ(r:g݇~|=2G_7">*>k = >A˽г=+]\9hg=\r=oYo> 6"!qK< =/>!½Dؠ#M= ?{ͽaS,>MI((nm='*;. e =XX!%0=!=[0=A=eg>JIͦ ={mE &]==X>>N>ȯV=?֘=HTƅ=E-<>!M1?r Tđ*np>g >Er<3=LyS >=}6z>M2z=_q>=k>Q~>=Gy=,_=j>2+N=k8T>)>Fo>+>I?=4#Zv>>e>(&0=4>^Qsx(ͱ >}p}=RY>U=7Ž=2b>DzI>~?>ۈs>=#U!hnd>@>*>W{W=@@><쏼'F>O>nN>&z>iX=$%G=W>yX >I<ٽn>M>f=J6 zd>>:>HҾN=CCg>p(>F{U)=w2>{.=ˤ>5Wv>Do=tR g;3> =pto>s!b=E<`X> vȣʽr=+>߾:>?>%=pӌy=}ؾ 3;(>NݼE=>2=j5=0ǎ<,eϽܙY>W=,kM>yOὭ`@([=t>z-b?#Q>%:O=Y>;\>^>퍔$UQ>wg`a 4y>傾= =ǽ=7Sܾ^ܠ>r> AuИ>=>*~HD>y^t >ud=!HϾbR=H1j͑>6ʽ[NL~ܽ>5=5 >'-s=T>{=5?>۴0uZ>+r>=3=">Zհ ǽp:) 0=SL$>φ;uw>VֱI]>> >ۙ7>PyC@%M/<´DÄ>)cS=*xN:)$<< I\<7<{v=/d@>I[D>"y4ݼ9<.1̟=*<'f=&իZVX>/eث !=J>><>ư=5><0=ںʝ̾ٴ(7aоa>l\>F=_Y/es>>ڤz>bپ؍=I{=1_>k<]=/<;=jͽI:~ n>ex=ko==|оذG\r,P>*38=c&>iGi>,Ǽ=>Aj{s=MF>,=`>pG>E>OBIOPyX=ME>ↄ>K>0 Y;Og>>Fa=|=2uQi>מ=:;>D>{>}"V OV>.>*;>T< l>p=彽>iY>=7>E=b>WSe.=K>/>;<=>;y>d3==>[vB>ޙub>wz=)>>>ݐM.&D>!zU>.Ww=k<>; b3<\ػ:>EOK<=2+=+aB=l<>:Yv>g>>=ݽ19lx=썾m>P==`[UI퐩R=Rϲ߾- ҽIb=A᫽=6l> 6>9?>c<&>1YyԾX>O{)A'֯USldǗ,>. ͉fm>}-1,CR"=tPͼi>|=nĽB> ?ٮ`>FV."|۽]G=:>Y]qT =*>;)2 .%6Y>옺+f<3q~><.Q4c%=<==N9)V/>螆>nJ7=V2>yК=o>C>wbG=YHMc0&3>t>c >^'̽=>Ђ2X=(b%>*浽k/>C>>JP+s:)=ؽ;;5j><>^ =8[>E־8>=]>׭/==ں-;1">gjl&7;>B=N!W)>u*>yHF|=>\>:}ؽz6>R]W>$=l=Z=׽,’8u>z=-줾( F=[fP=0?)>n->2yĆW >LOG2DS޾oa?>_>N>ŽuR 5>$C=`>/y=aa>jS =v= >?=k=ܽk$ >YJW=þ!Q> y>(#⡮yݾiĄ>>2 >kzQ=bf=]=rޓ>Y>= G>\=G=+ܽ /9.>;ͼ'>)}ҵZ±]>L:}qNؾV.=,q>k`= q4=z=1> ?=4=-u<+Y>pC>)f sXS<[YC>0@ò=PI>"> ==i= Aּ]=B=>4[?~>Jku=|D><w>>B:= nh˽G8.>y>7=>;>|7 qPb.$>=!;.>2: 1јT>O@s%=_>Y)_>pU>Z%>)\=#Κ+HP>ہJV娾 >n=4{᭰rd=Ⱦ*?zu>|=l= ;>u>W===>=;=WUm6><0Gv=yLP_Ѫ&+)>JݽS>' +3?Zf>LYȾw \]UJ<=O>+2{=*=,tI==N=K>L>7Yrt=;[K=bHּ= =e6cJy==Sv=T=r=>>7V^Q̼=(H/>>\8~۽>hL>=p3:>c(:4?>RIkE>C$ \zAr<ن=7 >b`!g>=X=:>Rt.>= ?[҄m,>ϽG>Hӭ=Vu|P]=S;=?f><ġ#2^=Uv*'>ZQrU>J_L7>;T< d ??@>l_=YAs=+^.>&s̶7D\ns=j>b>Rq=08?8Ȯ=ސb>ךk`>==Jɪ>~6(V>Ϧ=+f%g>ab_j6M>>>s>=Nxڼ1\>1>>Kyi>콰9=>̍=*c>;%pk> }=@FS>BX><`>#d4>R<3->.>s>%*J#>U= q<&uQ>X[>{>H>jdĥ>XJ ソ&{EZP:>` }w/e>T$>}>S C s>9H?>C=U>Ղ#>$B!:->~>SaIH41j?~=V%4:s9>b[e>Ku)?E]g>H^=>d=/>68>/%}=d ==hO>\~t>G1< *$=^ >|;T=_P>jl\S>.-oYc=;Ա.>[p>X'̽>z+\>%>>`?S?t=BT=Ha>;YZ.>F ȾA>=>IG>s=,>W I;p>wW>d=bp>?5>oW =r;/=[:-=r=pA>XFzۏ=<>c7 >ə=I 0>ˆ>_X";F?#U4lD᭾]`>DԽ"=>!r><+> o==&>v;罹O4 a0k>ɟ=Kb`=&/i>kɼchnN,>#I>f-oP>X=pց>.`}>^>]-R+b=-FM=N-> =Ӵ>{i>n=ƾ:,\x?`/ ;=ӽXJGzl |=$ٛ=Z2>2+Ľ;$>Ƥ>X/f>Nx+8;)[=:=2T= R` U=7Π?Y>x=АX>Ihg^d=>0_=>1 +>|P.cR,>=賾:0=xC >vرy>>2Ӿ1½ȼ=M Ͻ{>6f=n,'>L^uJ" >,9=p=%pb+ٽ7=+Mc')nƾ6= t;KA>5>[=+S2==>]O>ܥ5s=܈=߾3=(>o=>;m}A>\/>F>l<*L=Ɍ@w<<>W>>h;,b>zI= >'ض[=JZ=jNl< -;*G>y=H;=q> kJ\sľlA8o{>hz?)v=j>Pj)<>ջ55>->z=d پTExW<-y*T>=`[>Q=ͯ=4>XX{,?j>ǹ>o=_ɋ>y&ٮeUD=|>Z;? B>ɓð9(d>v`¾~{<"Y@Ww>>༁w=s=v] =e=<]J<OW>~5V>:=_(9 ߣ>P&U X>?=v~>>waGU.?t;HP>b>2x=ʕqsB>:eA=$<=FI<[><Ҿ^={e=ʘ"(H>f,>+=(KKg>B;>'[žy<_X>=J;6s> Pֲ>xT>=4>t8?>RsdÉUD==0>4Ѿ*=*>=<>tY>LM>_m=i1KV[O=?+` )`d>ۭ=社F$>=F><#p3>+=Y_>=k>Yb>!w|>>>UHjչʝ>R_7>PE(j=M>ͦ=,^&>@qn >M2=4ȇ-(.> &>4s<>ެ^ >Y>s=F9 >=:־z>y=&V>3<`>*>l⽢㽎>R5ͽ"%<1 q>J>|"ϽI>0MLG =& >-9=sf>K ɓ> uX>*Ս=NUձo>UւQL +>(>i>M3>r*z>* Gtc=<?-+>r,:>m=~>l9$>WrüAD>&r&>)<޽m=Ks;>&>I>isv#`_>pL>{n>=m>T>m;\:)=ѣ>?1f=8=}']c =-=0>񐥼=j:4^=-=3V1ڀf(é=y>Th?Urk>2P>Mtx=kA꾿>v߹=v<M#e>Bi~O>,+< TC⾽X<="5E u>2e<;=>LF1L^>;<> Hݽ<ܸ=>X=r>:=QfR@=~2:CT'<#="ag=4Ͻjм{|$ǽn*%>^ޜ<XG-`Ig=G5=0A1>V;b=1^<H@a¢ت8 {U{R>Dj=D\>D=%W`=~D>W>H=g=ﻖ=>[=0d@>%=ᇑB=9<<=]<_>@&!=u9l=q>J=Y,V>ގ>˾h:> >_> =x=Oٽ˱<ȁ>0,q6[>̏|߽6^z==_f囖=ҽ_b8o>PD)U;7>Of>S > W=%=}>[Y r8[Ƚ[Ȁ+>"G>&2ի@>s=:<=<&>L <<>7\=$.>1]=T>q9| >h%>.> #>a?>)=6%T>:^=hνwX>- >赽T>=|Gbl>s>/lP>j=a={,=O$>*)R>uϼ=i?s䔡=bl<-8C|d׾|I=^R<0v=h!v>.h=8]W >J򕾦x>ۭO;x 4Q=@#= >S@=p}=\Z0>)v=]b>j1c)<=f8=n=ht>88>>4&;>x vǽV>I ]#O).)=!HǼ,V>Vg=>Ͼ=L=j ]s=ť=l~=~=%[>n=H ,=膬=Qn'>E н[>"=c**\io^O6=9¾PS0+9lnL>dŸӲ<-=KO%S>ޫ =Jc q\Ul"=ȼ`3,}qD%m>愡;>N8ă>+<4$]>'>ϗhie>"ɕY>Lf8?0+<`>ZP=!tkc> rľ>8=S2>:=$=O=xdAY[;>J{T?+>!&[;u㰽a V}=e&`5==P/+==^엾qf'=ŋ>3 W?I/dh,>Ug>'=-S>E g> >U= =WG#F> >$>%>Eww=l$>/'|>ޝ䨽D>m9>?̷<64*l#>/n>sW8)x(==5Q>vkM0>C@zJKt!Mv|$==:!X.kVm>B>=o>R# rB*<].>>'ֽ$/c>)/>V>[n>7<چOEB>#.T9%R !L?< "=#u>[K=Z)'2=!u=">x2P>I =@<9ED 1"#>x+*(5ڢ!> =Nb,cn9z]>٬=G>-b<=7E OC@"Gqӯ<ڌ> 3^>m<>"O=Y۸V3o r=C𥽱l>I(=q>= ]R!\"Ͼ->'$>23۽Ɨ>\*=9;]<a>L[>T'ü1k=qN<1&J!=73:>7@f/>b'L>8Y_: ǚZ>)Ii=+"oA<#E f!> (K<\8]~>e3;>l.Kv7X(}>LӽK="oE>pi>3Tw@\\>35>>D @o_Ծu<> B&> 4ݽ5 > R׾z=i,==K===jb;V]=Y&XLr>A{#Y>x3u߽F;D+>=D/=>/>=>>1=;;=(xI1q Kj߽g>`=>o'g=SȤdǵ~h-d:޽ >_=6C>i7> } ?|==:}>c[l>fhS=+:6ؾī1W<>9k>;=bxJ6)b <-:>U$>v=T;H)=>.@4=ھL>΅' 1ٽA {J:J-;gpF/>=2>>=j vk=Ge'=ǿO>lĻ+> AUc;>#ذ<]>I2N ZDq =l̽:Ti>_0>xX=ƪ">սhn >_lN^>{ŵ<=ҽu3%3= =.Y3>>\8Ə8Ծ* ?+B=et=sj23,;i?==(l>^GO>-8>8ɐ<i4>{p>X=@=&H=͂=>ZR=ý0}즎>5p#{̽-tk my>/>I\ڑ6>`IS>$c< HB7P=H=L=o2=<[ >jrn>g>q>-Q>_k5 (2gD&͉= ~}쾏!Ľ 5>~Z>E=ih >%M=|t/qv>=`ν`BX=CMҽ>p;4yAF>)I?=3d>H<h<,'=9+!=<=g >dJ=+֐\0 r==)/=oA,y>%V1>SVWmi>F>0i=a+=j@=U=FcBFʯ3>.>_yP4'C >6r/OCiY<"G=̙We&l=4MJQ0Q%צ֔=ffRûT=eR݁=N>Ӌ=R >N>םXD<`>fwwc{Ѹy\?=aըJ>*g>Қ$==˭)N= UlK=,i>VX>;o\>SQ7=l<\Jo'<m=yJ<w>o>3 0Ta>p̄>u=i=>W5>->^u>~t=q4=>}f'>m=a>/ >J}>=R->>!#U_wfo`N=oj=YG=BI>wn==J 0=#m7i187=UԽ>LM=b=><ij<>}'.¾qF>T>,)j֊A=YJp@>E=):}꾠ᅾ X44=W>B6>EZ=Ćp>A>ѱ<Ĭ>n0>{#jW 1Ty~= >ݳXD=s't=|z>)>즾<6 )0>=>@=w"=-໾P6=4E\X>*>| >NB<.]>^⯽MľE'gJܾ q>n=P>e}8JμԘ4>98>jFms\=UD5>,!==i=@Bm>֝D>$;7O/?Yh=t>R>Q>Uվ'>|H;r='|L:&ED0>G>$ͼXe6W)惾?>=-I=> #WFT>#<T>K=н I5!Bh/82>/y8=z~"6=:[@=KktF *\=>.Y=.dRP=aI} <cپ )>I>MO=vfcD>=(S=c=J]Я倾E=&=K>= ;Ƚ̵`>/=?>D/ɽ> >w]>yj>&v<=c2*=nC">"lz0>o=i>K;$7>Ǹt> K>0>=FD&>8ZYì>%mS>Zm/хI5# >mUC8?`;k>`<>Hp$}>-<><ýf=w=>>=o=a>HɻPB=q>x= ٳ4>(=;j@pS0=vr>/M;b ?=Яľ6?=SL=f=b=p>ꓽ~>H=O= >e>URp>>V.ybR>r=Ι>Aݽ?3!>{=Sn.b.>:=O<;X<L8>K>tDнυ> R ~˽t> yW=>Oh;pO4W5r4>>|.>*>0Ž==TTM>پ=Q>>q=5='>wl>%B3=>sP_=%>>T>*w޼ꎇm`4>YQ=-Xx==ڜ>meν6 ,>>eI@$=.* I6=?<->rVw>}[;>e>p=7===AʼB54=j=4=$#>===68+>Z߾ֽi)IE\=ý3N I>T=?gȾ?>EQ=RѾɍ=(=< >=pS>n=j6=I̲mie7H%> i>FL>cռ ,jϽ Wu*' Q>= O=>4=n>C?,}*{>>o#>QI>>=D.>4H=WX= ȼ|۬=͉6= Wཿ R Guý'^->\'V>,a>3b=X>@=fv<&=8M[=>H)=>?>[o-O?|̼O<sٽC_6>V9Z= h%>^G;j!=>>2>Q.=P}1=>0zPp1<=yL>G|8=6j>\d==?$ C=l>Z%="#zH>i=A I+m=[~A>ԟE=1m>ɃF=#ϽM&=:3o+ >2=qb >I@>">dǡQ>DKq_>?0 >^G;l=:֧W>b=RE8<=Pv,>5n*MY>_BͺX>֠=`>B>׋>= >L0=ے <;ƾMD＀> 彯l> Ǿ @1'{>҈Nj;N\ K&kB>9F)t> r%>ڽ< >P>f>N E,Eڻ-ǡW|=?q0 V_>=3&}=hZM$g_>?a=ypS=k=gb2=u =[E<fyѾO-y")2${>k '>Q>*4/N+4qt>B>D@ݞ s{ڣ+*>l&^PZ̽=7ľ [XOjf=i/k=;\=k꽰wk->T:<_< =m=$v>Nۻ>윅g>|XU<İ>~?> 9þ>=ULɾt=sH>!=;C >~+ >=p؍#+I$\ gQ>m=^O>P>>R 5H֗>ѻp> CD==Y(>Qbg.>O+=Hͽ !>,.i!(oQEr&rw>8q8b==n>0ν>Y5ⰾ r">&A>J 5I!C/=>hm>RE===v>nFղ`]O="6s=*>Ǚcdt}`$sL>Y/>='ֽ YgL>5q=J=xӄ>l3nfW->h>x%.">*=콶 =;Lrgn=۽rz=3(=.>-;D=%B=r8fս=$P6=w=>@1qd=vC=Tp2fDھ>t>`TC>R=f_> gW >L> wN9 ?2z=aЙ%|=\㾾k=<>v=*mվePdL=\u>*f2>zUlt⾻6>ؕ= ,;Tgƾ?h!|þu=>L=u>e9l>>=ڕ6o=Mq>vc >q=='҄Yo\ hz>rt>Y6]=gT$(}?=P;?Zi>h=E>l;?Iq˼r2>T=D>u3 >"rýN6=<0X'J%hƉۥm+>>ie>>ɍ=Z(==K[<-09#N1ą>&ڽv+>UGA;>Y/|=-/<>WaA>ר;<== >xFP<=gFx>o]= =w=0Gw>> W>J>x :A@rk=; Oڗ=&= 򠽕[=.=V$=0{)>gJ)ڽӭG+=ͽѾ 6>N>(=tl>뇪a>mV>^8v>=ԽB$(>ᆬ>0qnj,,>=Iw=;}1,>Xڽ*>qP<^' Yi 4 2sU>Sd>u;=ԹH= }6B½  e=Pv>< z-ͽ@=q"B*>E?z6?^{>S[nb=}]lk>;iF>u!B'!*2=4%>~ZK׺J>ѩc=>>A P>w <חn>Y L>$<30>>k2#6>ڇ>={秼qY?T5>W*t=CdȾ|'޾yap$c%<+׽TBHX>-3>U><=&>`W,> 3<>>rgT R=>o{=Jž?Мe$;Z?>B>`{'+>K{p>޾X=!>P#?:Ӿ*-=^*">[<=e>b>^?p.M`>>>SR>(<{zW>wn>W ,>:=(=⪚==A͇ />o9>׀.>\&>UzM.Ͻv >l!!3={Ҟ>J,>n 7<]L=Bp=lJ~>ӽzrU>>[&<=%N=rL>M){<>4>0)>Jc&ټ+?>5C =n/=o=.>vgXz= L>_l=­>1%=2= z _>5~;! e>E>;ܦ]/^3뽲C>>=>}t >^=B;>`>OWX5MdO2NO380dX?>S>{>e*>?ӽ>=3>|H<&=Q_f>{Fklz >a=̔I>6i4w%=``>] 9=J>X63>QܻV>:>k+;:8f>=Kweer=>5wE=Ҕ=;i7j>iĽɋ4>ZT>SMa=տ=lWxn>>Ί̊d'Y>f>>(>=2?h`P=G=<= [>i:;v>=B` 3JQ᥼XI=*>z=>n^P>6-=tͶi>>2>=3Ȏ>2&@D!=?y>==/>> F@ӱ<N>5=>{> =C:=1ϽY>m> )7z=%ټiv>j;= Z= #֤>|Й>W>Z>>L@?2%>ra<,?2Ӿ=oW>_MϠ[t|>G>g'Kr(nHIֽS=fVq- >:F`PX=(=aW:8=~#>{>|"B⢾$-> <\=IE=i>2*[8=M=;>f= !>V'" }'p'l=ҿg\=F! ] >gU5}޽=6߾s"=Q?A$> #=X6=J>}w 9:>79&=}%z>@н6>,>ؙdR>^=dUGok>C\&=> 7h>;>9:ϣa>14=u=6>ü!<=\>,)>ѽ.Ĭ7#>^eҞ(>P>漡}e;>,=x=o+<>-(<վczR>ξ 1j>%[?u.$n=l',KU9>>Z>H>=a!x>0>Z8C=OE>)U=}U>uJ=Fn/"<ⰽa,><=+c>1ϰ4>lY$>I=>,?;AO4D@>LI?BH>QV>Ib޾6 ?x7>LP=tD>*~Ss8뾂f> 7wr}>B<>ɰ;i> @> POͼo!>lr,|>.cL>^%Z>ע=|=2ν~>}>B!X=%>0砾vZT E> <^'=:Sۥ<4>]/#>c<>@O߼U/ƶ~7=O>Ƚ>K>,l';饽j=kf->C>'>?:U>=͚T=%"D$==5.H> ᏾^=Q<$ ;e=6>^>>0C$>=qs=&HJ>.Px˾Kkq 5<<@Ona >m.pI=!>|>Ht1=)#>^=&u>Zo>-=m,,H>aȞ>~x /!>>ߋçNΤ=$ T={>1ܾӕ>>B9B> s <-=#>!"=Ո>JA3= ѽt=ߨKǽ#C=Z#>=&=T5J z!Pڇ~1DÆ;Ըn>bνxiw3>H89=jH ?2>w=S>6k=e{[|du֊!?3>3ֽ1%r<4x)K=>@> >=OLoDAiߺo>!ʇ ,>[Ľc> 'm>' >w`L>p>(c1>"J>*eټ ս=)!=\Oh>FYܚ;|>\>XBhF=%B@ >JŽ<>  =iq={ >4\DY=d>kL>x>>J=,wվ^ez="=9c6<=zpJ>XB#Nbt5SX1@&l>'->>O=p=62> KQ=XY>,=}|] vnsd =Ae D\C>N{= >Y< f>ν2 ͽ;ׁ(ÕC+7<`k>eVA=>Wid T=Ks)<;>(P>g>F(b:P#'={ =x9vV;ͼ넾G >k>8z$=H羷 z$]=0wC=>ߎ=E?&?>2d>uD>ԩp=f*=(<=hӓ> t=05">42<-Q`ߡ==J64=>?F_=G)_>=a>o=2bL>bm@y\f:VQW8<O]>zT2=1>(>-|8"`^5tT䡾I_=f\[==2> -w>V̺͚=t>>>a>iWF+ñf=q۽X<>Δ-I>eサ35ݍ>/Z=L">ᴾ`.>lwKŲ&>H\=&EbRV> X>R=<<ꎘ>Q|==8=q>,.==t >xn)>Q=lF<>+>>d[=j(?c>s>+>=PG>ԡ+=V>ﴼw=q=U -=u=j>۽%>~Rs>ʝ>Ƒ>Qyˁ=~dߙR><ͣXKAr ./`[> f=I4>[݉=XzO<ѽ =7ccV'> ==7<:>wև>Ⱦ,)>ɽ(a>z6>S>VZ> 9>.;>=wp=r>}L2>[vԽqW>YqH~>:=| i=XƽpE=`>J`;{m>^}|> !C=G޽Wnzbn>AW==*=?T- >{= &h=>0F>mþ=ȘU$オ> *P}pϾ >Qͼ >.:>:5>!==8}<)>B7&S>?L> ZM>ž̋{>LJ>QC"޾ܻI=wh)= E=>豼*T>s >J=cǽk=]%b?` @<$:%Ś>Xϡ>1> ='8D=mDs/=.>Ȃ >(w {>0ǽ7T ߽`>&>os<][=4=><6 >Үd=ۼj>o6Xƽy͝>eԾ;:D?'> =ɉxpq+@>8Xe#K>rx>-b>(>ff>u߻\v<Ǚ==+,:7)=w=GW,>ュ=kn>G ?5o=[ҽ>>>s #[K\P>6>3i=v>=> =q=s;^}4 &,Z='n=0>Λ=j=x+CG=5e>>#=>4QA>=Z" g=sĭH>IˈP> yY` =^pOXe=$>7>Qvͽ *"}9>+=\I>55e>=#>>L(>}|S>=,=%V{ۇS>>X޻z>><,=>s1UoT>g=3=4=]:=թ>O4.lY&u<>{<\<2G}D>ҧk>:=$K~̟0]>B=Lbr2r8J">NBe˻u}ν-?=ח>ꁬ?>1-_M=񠽚H2>KJ>#5>K@>>Y> +=x;< @U >WO&D>->pgAK> q%|K|8 A=s\= ~>j1A>cM>R,> *mz~5;k<}W>. LuH<8<ۤ ^R->.>}`>d=`oPh>>3AQؾo-kAHH=^w>Z3=Wf;Ɏ=1>Nj位N=ľW|( >9y@ll>>4? Hˎ>{%>f>na>AC^/}TN >vq)>˩=Ͻ6N7>JM C<>-pXL= >֋=v<+< U>ĉITd =|Ș>%="z{>[>M2=` $閾 _<>=4~뽂#=*A#= 4ؽ%)N[*>sB^>5)ŵ@`3>^YO>J =İ2={?/m=HV>ceT>Rk;Hܼ^>塽=Jc>mP,>z]=U(<҂=})Y< F<}B>)x=M=ULӽ} >>>ɹ=QY[>;e-젾l>ּ.4=ʼR=$#S4b-=)=3y<ʡ`>ľ =t>=gqܻ/>揲m/>{B ӜjI( >c>'ѤY>є=Sڇ=xs>[=>_k=Y >`>b>_e;=hQQ>3z>V>>>B>Lly>u>/<]=IFV>>o>Dg4;Ď>P>%>J>˴O>`z9>ݯS>%>=`>:b=._>1>%+׽6>f >EK-؎>JFQ7UE#RkJ; > >[ҽӅ>bB齎Ⱦ4/>9=R[E>l=KEfJ<)ZEve<@*։=5t>Y>'>0[NT®}>E1>)߼<ۺ>"!Ɏ=kH>S/>pa&9=>uT>ho!1D=;d>ؽAs=Ն >6HVK6>ڽNw.>kȾkU$=|mQ=kE=p]'>Dvhi!f=>\X>$>;E4W==%= tQɽ n>$;˗>\>:vH/Ь>9飾7L0)>lp˽~Ћi/d>L>; =(뺉=?GI~==݀ >|PS=>m=.;N>)>r)N(j=mH>59<F(PZg<0Է><3Wvp>I> _ɖmވU>+<镽e>%<-0-<&>%>F>ѱ>ُTH9~ɃK>©$ԾKY<>= >} Z>_Bc=SAu>Ẕ=\f+½>X>2@=j|.îZc>&> 1ͼ=_cZכY=^[B>Mr>VbV>|>>e>0vv>*reN߽= >3>,>ġ=-=WݖεڽUat<]Z{M˚n>c>|>A>:gJv;.DD>ܽ<v|YAT= Q b<>RHLƞ㼴f> ٠$==aV#MV$a>:>/>x>lp= ;yRs>D>OȾI˽V>tG1=X/">=7!\?HG=E9:Mp.=\<&HҾ0iR{ ]z+>N>O=u>/=/7'=c!==Tp=r>@ܽJO= L=:o-Z>U~нgU%χ]N=><>Ӡܧ?' >^{=<@ =C??`w>c>ݵ=nW' ;>Q>,>>4ߢ=ze>e=f>U>~>; =g>@B _-="UQ=̴qWJ߾2Z>#=">26=Aύ 7>4=J>h]xG>wG=抽CU> u==d>X9*z>WAhqc #* >&H;=۞=d>>8>*p<7ӽz =Mb>߀}+jXC>FM>B=%=0a=1g1>6 v==WyC8Q fe>1>gV>lD=f >!>==Rƾ>|;>!=8_>=_<>[<|Vj >=8qC>ѽ^5sni=(=\>me=:l=E#3PVL>>S&>s>=0<cs=QB>3>:.ݽk>=~>hl>=@^qjͽ=z<˓>CQSXbg>%Q=:O;=+ѳ~>ʔa>OĽ|a/>48><==A;9\_?=4 =zk! >9=ɻ'8O,D <>j>;=T3>n񽯮7Yf*A>1,uvKc>l8\>2?پd>Ʃ==L =̇a~$iP]>5t\>W8X3O>fcν7==@޽p vs9,==ah>!NV0Md;>m=*娾~Ȼ7>)>co>Sm=qU=c>ϝ>:&>ěS>U:>jv-G/=eZ>T? >!(<>z>PB=Y=9==EfM<6=l,L =sq=;E>Y7_=VM=>Bkb[p>>93>yy>FySٽ w ZlF> "_ݙ=p=04׋6]>0=þM(cml8;>rjy'¼\>}:xK$8YiH8>&6w0f<ͨnFo1> U<ٞ=Pnf:޾G=-7:>c> > O=Ds>f>Q=E_vL= >9zDJ>ᠽo=>h[,*>\.=Xر=IнT=?>bGm$>;>(=İ>cգ>i=>y=즋>=_羧Fk>b=tE>zxر-Ҝ&=v=0>hĎ1܍렽>j=,;!= ƈ=u/$v>%Yʼ8.>V=8Ǖ=^Mҽg=iAgB>;ilW#=&GDU>G='02<&=feT=->L7G>~z= M+>=;抽Aɺ6=>>>B=jH>[4>å=/ TsB|D%X>&%ɋμ^>uD) 5ͼ\A{>s^w<>aD>OԒ>i8'9bPKN =Di>>r\>v>>ML=G86x-.5>pupʼܖ>H,)gn>`̎>ޜo3<{>dM=ڃ\>FF9>"= =[8>B=>K>>a~=v=oU1Z>q>ǧ>c݂:=%>0|>6 0-ؾB!_>Zw>E>tŽO=A/>wsBHWs>Z>_u`$RI=xcR">nJ>쾾α>{6=T MPԼ]=n>Ů[0 O ::>AW B)=p,>aQq(s>`ǾA=.>#FX2T#<7*e=6^=Z՞>>< >>pA/; 9=^>F=0 >>ol<$g>|ֽuó:E>HA>G=9>e=vDs{Y+>Ըm#J>,=R|)>z@ٶ>YLBȾ]>N>~> =MFS >=CE^AGc={ҷ-< =Q>4>>Z4KA<>Ճ>N@=Z޽~>G >R[/ /C+ :6>>a> =ս7>y>퇾,% eUë72>r 忩\K^=& >=̦=X/׾@< =4?"> jn>҈F=>x<g贽** .>_>=&="!>O=Ii>¾FI><'B=Yj>+<)|O>7¼<"[l >?˽o>G=r}.;k:9+BC = 0>{ت>(`,>Pa< )н}=o)<@΃>皅%>Kߎ=u_>ܽM>9Pq>2\a+p+>#'ԏ hHL0T,f}=L@>r"l=w^/jv=#>>Êh^=JWh>-׽= > }7f'?9`0 >V0Rn3R>ٕXUGEi%,>MϽ2 HM>N>7=wrռ$т>>|ݾjIuܖM<nW)=R1`>e=t=$ >]d=c6W>W|x>#?GQV=Y h|>=): ;ڏ=w3%/>H9C=:iQȽ{<;eL=ü/BG=X{Pɞپ?tv=C>WN>17Y">z=yQ#x+ > < >9=\C=9><<15 J>蜾Bw73s??D>*=P=m>>>TJ>V=`v͊N@>>N>>5=?JCc=/7m>=ɾiP T=>Lm< $>-r>[ou\G< >_ 65=C$>;x=͸>?>Oɺ1 >kN$y̮>5ѽ", =O j}=W:.Dhչ1<=,\=l?.>B>q>тvfJi_ C>ۿ0?=<=ҷ ?>@>/>(5>U"WZ=qվF3[.<nx. x޼~= M/i>Iws>Ie[߻ yO^=F>@e'y{=5y ӊ<`=]K]W6>oaռzk >[MEy>;p̼ ?!iN>(>{,j>D=mW<2=t>>˽M=uXٽ>[*> =s>ɠ=.eދ>]2>yS½╀F|>rNh+=>t1n퍾y#> 征i~>V)2>yw>_>>ZD>ߏ<;=~'>)|> >rC>Q>\I>5>M>5>XgJF>D==7=҉g,s=>w=f3 >Nq]v_=T#Ľ@!>N>>=X<:kإ>-v>I̽d?>@>ѽ/>*+n>D>4K8[>`F% > >;=/C4>V&= =H>6?>0M>(; >-=ZF~x>@;h>J=ʼBe?Uj>>dVyX$ؽN>x𽷝80 >6$7キ =õ >R_n;M%Īν+>2ZB=4>|:liE-PM=Ȉ}=o=\t[o==D&<~ȁ!>(N 3˾-=>}'> (C=~w@ļ`=Rx!gSs >B=A+=W,>[f==_>(U>B>Sy>%WOn>]罴<%P>w>$>yS>Jj6]>]xG>~Ķ Ǣv#=.>$<P><2A>㶯{|hY`)d%>F|=>f< =C>]=Jt>۞M >cO9=SGw: <=[ ( =[>XΥ<=-@%U®C =0S¾>|=˽?_>*Ƚw>"c=fVh>K2sH>76<1%<0=._=' <"E`5=2}=A=v=n>:mӼˌg;Q=>֊3ؾӾU>S/ʼXujNhiz0Ұ2=KaY8c>x\>t dc >G %b=x =6:9 >`>20:Wf?/&>8>`ٽ@n5{:vH=~=Ti=[>Ѧ=q k=~:֬\p̾k=.6߽yS>4=7.;6Bi='^=̼>u>R_hNWx缰]=q>o_ֽ0=>AT>ɘǻyb=I T9=;>=-nn[>g;|>4q=,097=ARdo<2=!<> > nE>rh *=*h>>u==uu(==/s>s>GG=?>܉M>t=?;=-p>t]<_x>>@Hr=T<=:).~q>'ɽ?B>5%ʖ>X5=ʽ0?b>E>1?7]%=?sY>=>Y6>挾;/>S@>(=F=ee>t>Na4= w?!rE<Һq<>n=~5=>=z ?>a>\=0>?3D8.н>` >0h 3>p{=n=]J w>Ta>1> @S==>GI=>g >Q'4<ӾrY,>h>s[SZ;p>K j>]?y5=S\:>_>r=I ~~=6>Yɜ;)>Kc>l>> ߽\د>2XWLKz\=t{f }<=[d97%/>ڟ=fy?4>!;5ƹ>hO{==f&>s>`>[>8> g=>r~=ƔxѐLn>4>i߽"}=ĻWg< >ɼӽU_*>Ӽ'=1۽EB>.>l^胾z<== ?>[h>x2v<57<` d;ڽ->>"톻~>g@U>5>/\=Wǐb%`i>g{tS>_ٽvYR>q&>ȧ=>o=I 'Qʼ/X=}>Gk >>>ʽylR<&xL_>ս%&> 4=J,> =t=1b>)3>֞=Uof>dv9Q *== Fs>Q>>Xۉ>+ b>jW9ݽ~ow==:>=k?g>jy?7;>w.ɴ"iJ>U5>5>*(Vͨϼdgr %xvZ}#̋+=;DRv =Z f>|߾&=0&jh=Ǡ>u>hT;=¾w>nV><Yc>=ƐսU=,>>9 ?_A>2߽ٛu>Ⴞ^=b=;ű|p>Ȟ>)=~!!F[=sM>ջ8\o>`>!Ὕ&= [>k"݌=ҾWŽaԩ<đ὏> >?=AFP彧>x'>3؁L}<&&Ů<ak=e>)>FM>;vfOR/\G>ח?`=-N ?/`E#IIy=8',R~D޽ ;h;>>A0j;-buW=)~>N>;q=ht_=K>fjp<=> >e=?9>覽N>-==v:Js>=G>c4><|86 < >l\If_Gq;GXD<Æ> mp>gݽqӜ==[>9j=6&>{h'+rUi>!>pƋB>*f7> ?V>/>QGJ>p>~Laz*N򽽢>?UH?|F::2=%=lczr>U*9>OW>L%2=ֽv>D?<1_=̔|./ )Q0 V==cטa=o>!=؊^>tq=H>Z7|)&6C(+n=z>w>Ȓ=QX>s7 =|.> Z=D>j_c=:T.?;=U=5=*:BY=&^>3>?> _<_|=>d=H=I>=,>>|={^{q=>IG >[>R>n)u \wc>/=,> >>|- d^!1i=ɇ== ^aU>>G0= =XAY=C{/1ڹ>Bk> e> 6K*fx|IOP7/5=T2>=r ~>FY,>=8>+cyCX>)R ?Ľ!n==h="<>ٹ꽧G>O>>Pt :>ݾkwF=2@=ٞ >$>G<֜u$4>>>v-E b>q^>K\1>F1uU>. <Hr=ֱ> FĽ˾I>W(m=ުNBԾ\X<?p=N>qY=ؾL = >8ƾU=/>?=O=6bo N>`QH>e==.kث,e>xCႽRO>2=2܃>{7SuŽ%j^?8嬻i Tc㾴E>YX>Y=W=ƞ>:~< >3Xy>旼Mue5>;9#O@N7>_>`=;(HC> ?B?e?a=VV;Ro˾L.<2מE>}ch(>Oͽ>8e!? >6r)>c>=Cz<0# R->+>F΄=[Sn>M->p->vu>4m=3P==iXT>j3>Ja>>Iʍ/Qn4>W^=Os>F7==>z>>=>+>=?>&Y>Q=W'0-xIɽ>f J0>GY}>;>%= P>>>%)x]r|7ĽMC>y}=u= 'j>[="ݾk= >SF>X5>Lڸ>y鼎>>k ?-J>]0>I.=Mg>ZӻB>f>&?T}>(=>5~3>ŏt7M=_=>d۷սWlD?b>@5u7>Q;[PTHlU>>z=>> >*=Wn!8R=]y)?Խ b)w>>Ns3D[=uv0>6 [>>VE9,(Ǿd=>fι]>%>>#EѾCm-w\ͽqm>a(a#?6=ih:TYo>FB >#+ >ɾ >pwb;9X>D?|Շ>~cCξHݾgQ>>! c>$R9MУ=^*c3cKb>h>t"@w?=N۽4} ,s=88=Z" lJ==7 =־m6H $ҾHf+m}='`=Q> .> `e=Z3o>R5>F>a>i]=?򪟽HǽyXM=Cu w>!?#>l=+齀I!mc>݊->F;s+.>̰%>E=,>"=ԅ>DKJ jLɕoѪ>:?JS;]>Խ.X?e] >mS<[> 1=>:dA >$M>&un~>9R]h +gtR=1v=ԉ#<!>n>Ṡ>{ _+ R>1>蟽& ?W(T<`ϔ> <Ψ>ZfM]wU#;q>f(>=.>&>T=Y<=eR; SWt>sU'>=i5)>B >$>s33YܽjSվ@?\>=~u>{ܕ/>> =iz[>',{f]]=>z >V!>m-I*@> =G>ړq>f>p$.>͹ ?O>n-=]⳾h>RpH==>%R-/=>*^F>쎾Wf>@[=>-&>R Š#+> }>>bBwg> %B>FjDV#ykUѻD\OC i)i2>~ =qνDhu+8=.j>`=ZF=>%Fu$>9v>9>jh=(q>>o+$r9>V=x?">ix=ط=/;,ȽO> sR=@p*>g>(w0Ùo>d=dq='za=Ao1]J,2>'nkU3">N^> >C->P==5>KP>G|>-a> y5F=ןHsi.?c& Dt=)Dͤ>Ǒ=  >by-=J}>Դ/H>B<\>vnK=?:Xf>qU>>$ci>8jA6 >!VR(v#}W>L ?D>wPs=IH6>@풾 >8,>;k=;>v:ՙ> >m OO=>ޥ$;e(l5`6>Ưg "ϝ>_ <E?> f>>XQ!k>xƾUSۿ>֗>\2<> > ?BB<2?e>>>t8>>R1^=X*!i=3S½$=` 8>xN|gUL>AT$EiD}z2^=l.=>ͽ">ľ >]ֽͪ=<7>n>N=]>ki=A7<4.>>q=YK)E>`o<=w恾E $=[8=l> SY&ľKN,!>>HbJH=3=SP>5=TV9>Ӏ~=g=t=ESS<,6'X>S >[=ܯ=Iz6I>3l{A)=Iqn>8A4z>>F}=URy>=e=43l=S%E<;D< >W2-ށI>޹V(2U<kj2A> ưm==?^t. p=>X+=`>> h~=DMs">=;}=>Y<>dE?=|7\>IƾK>=쌾XҽJ>Ѣ<=8= Z6%u=O=ڼDT Bнb r>?"NJؽН`>^ ;̽K> ]" D:>>1\Kn>i>>>!q=>>-P >==.8=d| =>:=l}g4k/=wbAB >>~S>l`>'NLQ-=V>~(>G-?rX=Q=jm~>3>}+>P>=Y{(>V>Gxd>@BǢ< =>͌erg<~ >s >>% I2Y= 1?5`>q)Ӽ:?E=AĽA =v>u=E^j=z-Ӑ0#+ >{t?ٽǮ>SfzAZT>=XJB=<=y@e]`p(o.N-> ɜ,>2ۆ=]> ],>^{/7f> {?n֤hL>ɒ|+>=<'=t)rŽs>;`gkqU4>D> ~,})>\=P=>͏>=b́>lmI>^r`&<*ƭ$>A?>0f>="=Z+=tC=sX>-5V'R>C/QT8ǫ>5份JG=N> \=t"<΀>F>=(=H>S+<&aļML>W($x]b.*v>c>i= =I< *>tuB=<(e<щ`t>p*>"~ =Cf>;> u">}Ƚ `:>>t4>Qx>IXH=W|Y T>}}>zܻeNWIޢ==X:1>B=TYW ֽi+mj =G; 5>R!Is>>%~Z>@]L\Z=uY=0=/b潨k>E;E=w7S=0̣=DU=C>/=ƽLɭuk8v* J>KCeJ>j`<5;Kӷq>>]M=ilL?姂8E-Ž|>v>==lY>I={e><1>:v\=}_> ֨<="l騽ȾT!3v>׾'=Ojae%h->)y=]06=ӓDTvx侇>:%>s'>]=f?h=rSV=5ܽ_!W>㽵VXy>b l>>B:>tȾ{0AP0qm ӫV=Hhߓ>|_>8U\>&>Nb;=>|[q=  <=>c8=:a=+x=In5k$K[ >&{=I=0> {ON>=Ak>RH>a@QA$=p"ʼh<{>1~`N=-=V=/>t9}(=(Wj=>>'>I$%=,oI=m 0=l߾c3 >*f >@AX$>>ÃKR*=T=1<=)^Z!O>y,(>,;[k>Ƚ\ >=}>=60= 0a/i[>f)iE=g\>=D=9a>2_=3SN=H>>#&=}{HB5=Y>E?#>I>%ڽ>:BN[>EY=dyY>Pm~2l>=95;é<ج.>4+r& C;@VЙ8= +>¾4u>=>o>R[>yx> B>>X n;潮>=-܂=`)Ua>eYj>)^2aO><%>sl>A>=*L> Ŀl=þ_>Jd =o?|h.о@‘=5=q>h̽v593S=(f>C>^ =Ï=>[>,BR=̩\> _E>]8=C==B=x>L >{k W>q>"=y=R5Bݼ(N4>{Q'>EP>\ʰ=G< !-&{>\ŽdT/#>uTޞ=:?>*V=j>ݶ>,l=Խ>E>W=jr8>>>3=!e d=Uj>Q8tw=? !cE>=c4<> >[>9 =Րة4>>)=#C<Ý>>E==n=> _=?qsH>m>q`)>ng5ކiRV̽x>= rrJνrqM= =G>>>:i=a!>W><#> Y>tBOD"=8+<==oR6M-䵾\ -h?=>&>=𽵷?=wQ|' =' ڋ=^>Ov=:+!=D`f>Yhdnk= ʽK->Պ>.r%,ua<Nc=>)Tv>nGK=a|=<>c/>g>O=5@;@>*=;?ؾ >#׽Y=Fc?I=/~v0a=+3pj+6>>E>G >$Ѿ=Z">}==_=uE G 411uH>V>j,f >It>=K FBu/=k=i>1=gh<1mY>>c>Y= #b=0?g@ڇƽlݺL>S}ۋȾ=Q>P4>j7>ށ>[NuNbKu>޽Q!6F_s>=fzB=zG]p=[m }e=n꽱%I)y>?C#=~:оI >Τ<f>ó-=W腾=Jς>>=) =%es>kz=Y>_ϽmLR>P=%N>NBf<'m>϶X䚾1p,u/?rp5L=7@U>Խf8<߈4ѽ 8Fe=d:> ;Az=>[ǽE½E(*u>P?>! =%e+|.?>Ya=^=x[ґF=h|t>p]pb,2d>6Q}>>(6׾[<'v=kΟ=Pp>l]Z_=7¾Ǿo\=gtỼW=w=a=Wf>s\ʽ :y +w$74c_rg۵I>#v>+btr=<'{=f{11Kd>s_>>=֨gt"P>7mV>LI>CI}:Nʾ@콽<_'>jPgmDU<> > >_G<=">9=1?PD=m=ů K>(y>v(>S=h\'b*;}c>R <-> =-A6=+M\><潓=j>o>> μ&>86ZQ>Ԇh9=G4>=k1H>]̤ƽlx/ƾ5>A<)-;!@,˽"4@ܳ񾶵+>kB>c|>_2>`y7,>6=;pw= :m8;o7B:>ᕽb>R>\=vBy?лK)=A)[%W#1O=Z&>dI>lz6-vpfw=d={O > vӠx>Š>= -< -=>%ڽ!9>4>=>Z>o>O1=7>l;!ћw 8=} >ds] +=,<~UZ 7=櫒/н>HDe=辒>꘼ǽi=)v>$[>}&l[?N<)>Z>/A*= qS> $>:/=֏Kq=uV= .E> I>;>~>F=x9l3-h >^8>=\>ɯɔ=w4< @_>iړgP^> >q~=!V Q>:>Q>f'=j{bC>D#-@໎mQ>US > <'>򚾥L>Y<} a $ /~+6=6[ݳ> ey}=>uh>d==#=F>sem#_4=!J>( K֯?;cRj Ǿ>W:R= >O}>楖=E=臽}>1!L;v>F9s!Cq=_MO9d= =+޽D=5D->m l=<>pQ s>)Mň=5t(lýu=akXD=˚>.4x"`ɽ1ӾFƾ>1Ksܛ>us>.ˋl=->>Ahg >k}2hԼx'!+M=|>\H=F>ϜVU=}\(5 -<5 G>/> =L=/>Qqip;ž>^l>M>oI >훒B;L=L;q=^Xk= >FP >(C=ա%y[>ξ/=Q8;Ɵ>(2>u>S<=j>Cգ=lr⾤6v>ex O>##=;=$;x3T%6`dߦ=ZkuEm=c[M=l*2_>[<ˡv>Z+'>l h/X>콚<_jofimcy0f]im>rnB=l=P>e>QW<&>38>GS>=B>H¾8p㵾1= .ѾƔ=_\Ͻ,,>=]>u%PG?R>hժ>">e z%(w==He=";>/n>H>(P>ļv>oʽ>D=\i>X=BDƦP6><w=e}>9l=^.>86;bJ̵h>^vZ>PuǾLPO > =nI=Yv># D4>/~d>jK :<=@=Uҽ6=:3T=SUC=5 >-V>;=C=zEN_.>|>;A=kCw>x>Îi|+Ǥ+>e@{>;þ`>$=w>QٽE>++h>lo=1Ȇ==9@U>>7=C>zʝ=2s=$8U >;]MIM=~A{><(]>i=r.>L8pWiV>g_=e>ߺ>=};+[{=;=f>$Ǿ Ց=J^Q->g=򊁾Zn=\=լ=>f|]>v>ϻ>ؽɽO(>K>== ?1={1>6-gĢ=S c6bO߫=i>^fY4^w>M&0>&ݽ Z>εB>f˻BE*J>ܒ(@>;c8=->=浽@\=@>i_0MvҼg&H>x>&>=j>W{=ܲӼG5u%Zn`>ݧ> Ĵ=| >'^0>;;Ҋ>>kagy[>7`= .5>3>XLoh;wj1ze=O80><>(>ШSV>#;_=yYz>2gV>HѷI>w;$=o;'پq-=7o<7:J># M>>̬d䢾p?>7>& E$a(>ud=jٖET =ux[< 7ݜp%HJ-a<:U f>F>jc5фc><>٧32g<=4~ A>D>:=ԅV ?J=Y=t<ƽ<=ϼI̽BMN>ٳi>Gx<Ҿ(j<꨾㲓>?Oa?d>B9 >-7>AS(-5+>=|>¯:!WQt=^G\9p**җ B8=*=>$W5F=R1>۫ǻ>>#QV'gDM>:#>%%5-=.!=tδLTr=֬TK6{ h(]4=/>lc>"ký=ͽa׽x|W>M ?> l;M>P7<45cCyҼ>Ys<ci==hlm<>A]x>=ݽ 烾'=ړ K%(G@ӕ,>y =>3rGȽ i(pG>aq>.<뾫$==*58ν4>q>/_ <>U=v>$ >+4 ׅY!S =v>3%%wȾ .>v}7tW_>poe;LT>WZ>]?n*>3y=_a‚!{>O߼sM}Vǟ)>F.GSQ=Hibv8>#>O0нx6LGi)==d/t8ֽ >۽UD˽k.<,>h}>Jگ>X&M>= 4===xl>gL>{=<>E߾Z>Ez 鄾8> >0\@>a=" >A_=5n=ˊ=Nx)|qD> =ӰR<:Ѿ?>c >C<>]ֽ(>l' >q#D`>uO<.;EZw l>0K凾3S>=ת=w/=2<=ޱ*νǼ3bü1f=y';w>̓r;?;=T.K<6P@>ܱ=fA>>Tui^C!>o= c>ymN9,>Qen<cI6=6!C>щn= Hx>Wpc>y>>8>#w={I3& 1=a;7X>H.>hӼ;k>1C=7 >W>\{ B>g/=b>T/F7>7=?=8uw <=_}=S=}<=NxQ 6sJ>Ԏ>3a>h=I>F=H|Žܽ E4O; %nU`5$%=$j>,>u>cc<=hV>*b=>~׻XT>{=Lz;>y5>,{>i۽ڌ=!Fƽ*HE>Q> =žg,pF!>W@>T+ ¾*0>~,>e΂?o >hy;yj>EHy"=*\=E>cxIc<&>u=wVf>\B=:=$d=J7㼥p$=a!*>>5_b>:֓;ӾN ;8> ۪>!D>(=T(|>a> v>'=MN!6v(>~q":=t:,+>s1=>: W>ߒ"c> Aq=ry>Z=(>\b==սq('pK=>+Tܽ>>`>4)>z=Zc>Bo{>t=Va'Oc>d=G߽/pA=<7>Ƽ+v>A(={Ra=3q=>M =>=>(RwGǽ">a>Atljl4=&Vs \)>Pc):[_9^=ʕ>X`GȼĶ>N<+kp>E=;z=(k|>pAN>PTZڍ>1vQaRjJLlHS>K">iJý V<>߽dYaiX:7˾Ͼ>=c ->/`-\>v`M2IJ=:>w>~e 7=\޽褽J>>o0>a0ߧ==ai+뻤3=`{"=vbM:2ݽF=b.ŵyB#>lvjPP]=y⾡M=<} zrGH=5Ͻ ,>v=>=c.>֨qhZӜDADV=>UKɱr=7松% <Twĕ`=i=5-=Ub=p͑; W=̥Y=8"|j>H=d=f<۝=4]۽Ag>Պ=r3>=B=G_?>0=R>`r7bž+% ?d<,u׽n Ğ>rw>9=Խy='BԽ?>^ýʰ>me.C>=x8f7?:0>I>:=6UByW.4\>i>(=>D~﬏=SLec=> 1>F=hT]uRRa>5Q'־0>ſu>H@a<;=?$> v>>p,.< d=4>/=>7d=#9P: )G>ܹOm>.<(={W)>/6>_='ᔢ>p>Xm= ͽ=x&;>6?k>P?*+>(=N5)H<==@)5;ܼk=D>:Z> b=')=ɽ h>ai=S=>]̽>BeX;`A=[<^>0ؽ 4˻퓅>o"> $=!){:!>XXjQ>#w&>=$eB>~=;?YGY->( мM3VAK%M=\N<ʼԾ`;G=՟߽!Ct=>>u>=H<|;>- >(p=ľOǂͬ=>Rx= U=bFIAj>>Y9>> >3=_<4>ʔI4>e"u=Jm0Tnc>͹y>&=Q\s u$=R=b>Wo>I=?ս5$>-= =8>v!pv1=&D=둾r=,;+9/Y==m׬=Lb>= ~u mἌ>î>Z>Ԯ>V2Fc"̽rcּ<; %^Q=`tv= #>EٓW;aIVzi{>b?a=*ο=x2üvc/=">A{>,<$=v(սξz`s)L='>;m:=˖> < ҽ{Իdg>j >ϱzki,W0>so=}=qk=x>Fؤ zJ> =L2<=%нrί=\=h='&P=gow>y9>>'-٣ :=t/1>Ïdo>=i>]½b[6IZr&Dʾ#>T== >FBoaK>?>e=OESmE:q=8оU3(=R >ì|;8s=n4=7G~g>3X=MAE@ZMO->K}\p9\Ὅ#>4.-WTP13>Xf뜘=>tXT>q{ B=֣½ݗ6.'=Ζ=+Yl<'>>]%I=R>~+R%i=F<3*M0i>=>lw# =;i >q=l=D¼r@@>TRƫ7O=0>}zE>pϾ2<<ۑ/>:Z>x1>=n<̾us[>Y{g=#AX=>[4>D>h<,e5҄>ѥ>4+>m0>,==8>Z\`\$S=+>%WP>h=;.PQ=OV yƢ˞*>CѽmHm2>??E<;=N='ӟ=Q>+RI,>d]-+N>.Kv=wmY5>Ɖ==碙>7QቾD=<} r˾{

C?U)r@>->g{a|i==N >>z=&1>&>=ʵTnY<a>J>=>?oLJ> ,"7"\=qJ=ix&׼|C# >#rf=K;eI>xQh>wjev=CL>} >š>QZZ=5>G=~`>.ʽ"$ >6 Se=e={>Aqf>CֽK'B> ݽU?c=&#=C>N; E8)>#L>D>=@.y>C `=>Fg=.=&^]۞= >ٿ<> qYF>u>I;>u.YpZ=Y0;wi=v=b<{i>s==cDs4$bMP7n[L~ ;!:Ṻ>˵k>f꼽["Z_W>zs>$=>Vek>ͯ< vM=G7m@2峨?>8b>z=p=4f>:TSm=m\ZH==>E;R7V>Ζ>!*쓾7.ۧ=GT=i>#=Zt>fo;d(>KrɽD =6&^QEӽ=6>_T7>o,G>r>)]ͽ#vvg!>]>A]N۸U="x[|>Aˡ=mk>Ē04N>$n>mG>$Y>\%+>80u j==u>C@;nxϺs=: =) >ZvI=3O>Oz?i־qD횾$t>߽R>=p^< >b= G>e'E=Z=^>=9>ҽN1>=K>ėh~=)=/ =7ɭ~1=T>Fp{>t=8erL 9<@>ļat{=sh}>o=֟ h5$<*> Uv>=mJg>>Ͼ\>H->ƺ#b>I[e2.~=d$t>YD=oB@tȝtc ;,+<ѾY+ ՉT>ƀ>B1h=iN>G0=2 }>/cHτ>mG|3 f Xe~ؾ>1ɾ>>=&ٕ.=P=5>.q> j7T_=Yf>0]'<(f]1>78>=> |a)jgQێA X>p> /=;P.Is >t轶>8>9=aڗ>= 5nVO=b1F=R>qG$-W=?V>W;/ܼg>|>r2R_k轸V=冞>ZekSսx>KY=m>>';f`0+5=$vWz<:f(>e`G>j`G;f(gӽ >w>;A=$ս`0ҽ>E=3==_E=t=>>h=-O^VY=ż>>=4>ͱN<:>GC/>@@\X%?7E>a=c=(G=?=c= <=B>jDݙ>Br>m*>1>T=g)>Alm;2>7=>4-V AW>o "2="j =P>/>*m=;0s<iھg<`'l=E=>{ >ӽ"}<$6x_,L=->)b=+;;>轏>˼W=\p=<ؽ=^=7*>jFjϽn>U5zʻ&=R>r2N">w,5==kj< >N.=h=%>x;I>>]оI(>&L=W=ҏߺh=ڏ >` ?=ܷ l=ȷ=*>v> ާ<>ѽ=>hJ>") =?>e>Y <=[ >Wի>Л?.>s g=q;!1>_K) C> 0>!+H1>b=Dg<>ͦ= k= P5H=x á!>=@F = F$6/ j>(>f]>K2 S]>#L> >ڝsX,I ?Y=3νnի=!2QfgP>K^>C[<,=]㽵y>i,>2==wkB>!D&l> ȼ Q>E>'>==]0=ݼC=]>.Jм>b<Q=/=FG>2ZR>G7x_i>Vu=BI+g潒>>e.2>#{><8=U C>9֖1Aƞ>4YE|^Ⱦ?<>@5ۭLc>)ɺ7=r)Ὤ7P>{==DҸP>,ٽC==1ΟpžSg>v YA=}tg=F.yPl%~,=>WH>>!ޫ==v>3-7>% )=gUo>_ȼ><˲!iLs?">ٽ ڽñk> =fn<%=(K>R>!ýb齂r> '>,>;02=k >e>-˽&>=$qƍh>sAm!=c ~ 5>Ccro= >K{a<>q@۾IY>4 h=N dآ=="2>Z j>>>/^=v@>6lC>cCRD=I=u >yR㥾*=(U=U=I2̾>=}:M=֋D澆 h<>2%>plQ>W<>⼠ <=ڏ$9)>:}zzɧG=aJмc޼)t> >>N*2?f 9Hヽz(½.X; =Y>`oO>oa6}nܽ콇D><yͼU. *>aIknzU(]}->OI>,wZSsč6>)p}=։j=>)E E` ,>yyP>=Q㾂{lu{>?0=՞D>e1<XLg>S6v=#Ա'վ*c=FJT>iQ>V73>.=kSH>H=\>>s>/*x=?D =X;LY>:<q>=.7P>3ն=7iȽgh=W>N9>=2S"<=TK=ڔ%2L=V2z>O>/4=rƽ\+þƕ>}E[u>QF>JZ ;i>>n;Vr3=E=u:=b?= 5=oļ>7 ni=iw<A>S)>&>E>:i=+ӼL|F=h>O>l#)C)xL>ؗ=W¾Jx㭽_>fJǾ}N>^>==>_ڜ>ɳ>nVZVQ %S>ǜ2h9 =d;z>*=5EaǢ=|3*>>UǽzUB<0T3=]Z F>>Ԁ=gK#_ɾ=*|=v}=0=@=\^aY-TL߽C= >nb=kjY/t>m>d<_BE-'Ƽ>1==pj{k2>G=r>>Z< =x~|a%۽áW<<[]=MqVz>9̓>HNFzZ=;T :ݼu=V>r 6h&<~>"`9==kc=q8 ʃ>=A:>rQ=g=`=Bխ=o7VƇ(>>> ,> >y>ٜ;= >y={bo>;6ٺi͏y%x>{m_[Yř=R=۴Y=d#<ϑ>'Ƨq{|V0_v=q]͘8>ځ?l{#_=p=ǽ} jheVƎKXհ$=k~=`>k<|bX> =߀=8>NsD@<-dJP>y3>E|= =⁾=἟T >6=~C5>׎ت>~J^=ʅp=v=.X9aǽ =:>@=0>7>zG< > =sL=/Fo*>Z=oƹ1>>N,=Fu6_=n=5=ڊ>O=b=E">a>P=%>bK>UZ>r=D=¼== )O>'v=Sh<#< ==gk6>}86Fެ<1@r(>/=;BeLR;)>[=!KvwH׼@>f>e@d=rT>kSki=Y> =yK=0>;>}# ,?>H8P=߽>;PFd;μ\>5>B=,v>!Ѽȕ==M.b<<`r>673Z>έw">xx Yg>36<94x>à>H>":=&=46f>,=idg=˰L7WH>N/0BQ<Ɉ>S2=r=4q>D<8>0'{=H#4=sy.>WTʾT9È/ >>1A=6=/=NB>e}HI*>bJ=v]@>!>GjGEͽVh=j>/^>>ڹ=fYWE>-+>;%Ӽy<;=p#Tw=C@= O}=b#Y>pC<_zB‹+H=Gcj%>\ }墠=W=~3HE<)Oo>Y==.=>'>Ko DmA>\rQ>нa.h>aʳ5b:Ro:Ҿ Dƻ 6>HϼL=> +7ݣ3NjTR>>V=Zҽv>_+<>>y= ޠ> 3=~*>nK#$I>R>s ^)1q=;$L=l\=_HIzb= <>'QBVݽ>'=5y>7 >E-b>E#>e=Ӭֽ=SD7\Nb=5> dc=c6 ƽ>tw=v>=@J=K=Ǚb>e9> Z')UR>s ҽIs^O"=!Y>'6(>6T=6ʵ=3V;\> Bg>˽=ּJ>V< `=W=q>3f@?>_5<>-s^B<=H>T'y>ү>>VZgľI>>1s>L/d>|_PúElm>ry9q,= Ľ,蝾m=40=#>6=!s2O*Eom=ߌ>W =T>:ۀ\>f=i!߽գ=6%ؼ4M5 rkǾ2k?u=4Y3>־%)b[L#'$w=׼!ھ#r>J4ѽa2.  g້*?3{8kՇTk[2OmW>}=JKGþBH(>"a=ϊ/>UJ3iR*>Хu١R,. "!ӽսNI>(>uu0>1A =?=j&>(f>:`=:нsF4=#(N>>dN#>.\=PnV>G#Cǃ =V^(iW>n`>\Z>R!m=v=>Gؐb )=O/н9>JiK>F= 6EWo@g=/! =w>[P>6>Ӣ19<9=)0{1!&;==3>3>烉>Py> !C>z>$=@= :Xt>09"IFdB>/<, ;>W<`轄،=3)z=:`X>hY>>:h!Q=Ý>UeZ]jq>Bv=,P\XU>6>^\lԑ J>s= ㊾V%;%U>l>՞z7@ɽJ= b=8?;<>==+˖>2P>Ơ#u(=;+>a>|޾4>cn>I({<=-و>>}:>H>ξ2=C= =z>\>H>j#8JƇ=^=iL={(# U>FΝ;+E=qE=BgBqQL>#3䕽ABL]ic)IS>i>ý <^Y> > ]"R+R*>r4a>=*z-=>,>/3>Q>0=8f>(=>i>/q= N>y=v6¾[v})v0>" >^h>\s>tE2>4N}+&,=6x=蹽<,>@ ~4wfu>PB>QrKľI >l@3 >0?hvK7޽KY<>a̫>:>Lڞb7 ==F=->;U=RH>]X>(7dw==:a>'>0=S- <">{p^=|/ec9uz>X>Mfkؠ=h!=Ž{ι>nTUU>qSݾi=wYξ,j½j#AZ>-s>Ä=Wd^,5qK^nӡ|=vm> ?f>з<&Ļ3ZE>^t({6F EW>h`]=ѣ>- p>|c|R',>9d=}Ļ= d=ZEQ>fޑ=[> G;T0m+AJ;$%>l6=. <*6>(F=|>+=1qڑ>ZtI=<>=B[4=j;轓qμ[Y=BO>줕0cҫE d6>,6>>켺x>aK>>74K=W*=h췾ؽ9>B> 7>UqA>M>6?E6>J>?u克>4<^=(F5& >)6b'{ >W6<.A^>}DY cGT=;L ع8>ڽR>[^F_V=,=*:p\ãD=~2伃a<XT>r>^VQbbIa/фȽ=4<&^2e%(ܼe =ѱ~>@O'>:h˿*!}RG>TGw>h#>d/(=ȩh=1=wE>=88R>J;>6Gף=l#=@J=^>(=>=s)q,.$=a(>Q=;ZS;P6>,|]?<,>.=?=,d/z=="5=v=r=J=c=2">ǟ>]<)@W= >ʍ>3Rx>7>^@>;V&\6^U3/t#*b0> M|pX>(sf>2=]=> \+2e:ۉ> != >=U>H_> fd󢽊63(2M7Ӌ{G=T}>86e٘,)>?7*>L ku}?Ž ~@RB_vp>f=;A^hgW=AU9>R:-ܲàx<>;s4'= t=Qȷ;])* 89ϼ~Խ7>ᢽa\= uzfp>UpŽm>&^ڼ=?>+>dj >>U>^x>]f>)k~#>WV>p-/>a;'4{U̽.ݮt]sU> V?\= Ɏ>>b^x; ;=Lڛ2j0=໐F>=OK>=Mq=❃1>jU·>7̃1wg=Бt>g "gG=%u⽺K>y=A>F01=j=N짾5=ḱMw=ټ c8L9b3ZO=XJ>F/>H]ڼԃ>D>>7=!Aߥ=z=_>_=a;D轗s+ٽI~ =żtX6>> q-HE>,=>*%> >z=W<(B=/= 껾:>g< -(=#);g;DH>t>tиr'=2>$5oB\>8yS>c%>.0V>= &=5>e->>n=MQ h*>c5>>=B~\=O>( >m=.>HS-[le ̈́>$u >Yk>W>><>|>',H>Ȍdl>a$=O=st>bHf=F+>tHX=>i ξ̽p>x= WQrk=, ==H/=F>>*=>C=l=k0p>9ܜ,'Aٽǽ<ü0==>F=wo>x̾Z== ˢ>`׾U=w=<r%>nM)sK3> ,%!4-< Ot`CVY>.>qx=&4>m;Pu=bAB=z=Y[=z/=o:==:!<ĠZx=Ymv>e>:³ؠ==%> 0>N=t_?>:>甜;&W=gN?ˌ>PTz:>V(>񻾮L>u=7B<н -t<>2N.z&SlҨJ9=<#?O =hvX>˟=J> =Yeq=w> =ߚ8R ;>$ ;rr_=񀽫=>">JKdW-==tmd<\0>YŮ>;=:Kk>iQ`e>A*emXVT>6# S=c̽\>̶>R4!<>K#9&LvS/>-y=G<=Rƾiٌ9ܽ|b4Y=p=@=y\>6=W=t>=Rs 0ƽ݀=c,w?ޝtYJ.>^>ǽ (/yW"0<:=El=L '3 qc57<>5h$;ޖb>'M=0ǃ"I >"d='UCñU>=XGB>t0;*Mz_zᙢG5>p쵽> >= Ms>;+P=g>SXM?f=I;=?5>r==ri_F'T.䊸<=I=wb˖:*M=m>>6|pݽ&ľcI>D>6=ۑ;-B!j+>FN~jx@3w`ý>㢾%>ErþT﬽pc!u7=v/\Lr=VM>`>g7 zݥ>&1=Co>?>e>4}>>Q(1?Z7=XʾFh>Q&G>7=gG4^ɾW8T>_=P"OE>^>=1z^S  <]oJՕù1|Cü*=uk>p@_:='=V!!df ۟9>ˬe=W=H;\;v+9= 彾7=ȷ=BBѽr>4=$*=> />[vb%Lc{f=A,_8>GO>j6ZG\a>6ƽm`AvN=m>3>2=f>uj t >*I=Z=]>x>y; ʽ>3)=|^#<~U>Y'k8=d7Z%x=gɨ=p>4(57;^>%< ===9)!>ąe7M><5>O=nõཕ\>^>7ޞ>C:hywJ>q>!r$>T3"ɾ8>$1n=j}+<ڈh߃E>0`8=k>:P^zg>>gU>L N<;=?¹x >FЕ>y>^<Q>Ծŗ7+VU>/x=9si.e>`=юw?1?>D9=t<h>#> >佾,fqVB>N<[!5m薾oF=^>3a?>u6>2=>>c<́=X>+ES{m$>>熏>9/u>Nlr=|a2lr5>20>=R餾֙>>ӼJ=$}F=R$Y= >/>Qʻ;5>Һ1>1PO>U%'ԭ=>|S=P=˺=J1cL0i>:=>Ǯfԟ=>*i*;>s.>䡐'^>=%>`W>8G=]`E>n>Ph>Dok>S$N=\St> ;ܽLw-)GU>Vr>o>+;/=>gʀ]d:>>Z>G==]*aX=b[j*qW[v^/=C>.8ܸS=L=鞭?=&>Iֿ>q===L >f>:.n>W&g>,)>OھAI<[=Ht=s=7E@>VD=y֥>F>ZK>/>J*>+뾤2G>>Íg<rl@=_,xx>JBq>@>=3D~=:/>-ĞW=u0=">+%?bh7a;ZԮ՝>3=<=q\^{!v9=b>3X=#G>P=q=zIKQ_C> >R$ᚽ*>-;\`=>uZ>g$=|>9>_.MD+ H >Ɗ'>Ck*=)=Bqy'$>>YJ>==ྒ>14=F$>A>^Bv7K Y*o>H,?=j=c:<^hl>DƾlА=A|=Ҿu>z=(=l"=x(>8K6oq5>=> >M?e `>x:=`>LG<䋽>&;С=Pvӽ5K.>q>>8=v,>j~b=՝LC=No=E>GYDE>%mz>E7<3>}= ]>̩=%U>Z=e",R<Ź=w>|=d/>Ӝ>Bvy>=u>~(q?ℶf)<><_$>(=-IY>=K>p$=:ķ_`=4mБ̾>/>mN=OnK/=2<#(>\½L>Tq7n?i~ۂ4 *>>;G=pFUC.9.>>>=濽%.d>w>NjD=2d>ҹ" 7=e>TOKI!=C?>ǽb|<q>L*>#c;>󳬾lC>ֽօ=̛{>=@>3cѝ #E>?0J3颎wc8.>W,=А> pH>홽 uĉ>|dL>w@==4=)>SW쵻ޏ=+=ª>=/jj=ャkɾw>dZ=܄>"=@l>N=La5^&>*Ͼ=8BK!o> q<7>>? >m \<>2=bz5>8EG=rWa">}=/O7$%>S>н=">T׋=9|}K=g>݃>?,=W;>]`=ǕAuڽa+ξZ=-n>N>>L4TH}-I=}$ {< ^1>_=c>BzoҰmH>L4>u9>>Z=23>8=%La/K>f=e>[< tX=Pav<1=fY=S$>,m;8 0s=]ZL={!V>ԝ)mh>uʂ&F>_H+n>]}>nZL>hxbq䮽>ɟ=lC>-)sr>s>I#ݦ=+·rҽ%ͮ>?و>%qIRI>dwAؾ~~rLCS>mL>L>2ms08>=\2ƾNޕ G;=.>ILT&%.OM364t>:?潐آ=ő>L;(f,>s&>wfq=C>l>mԀ=n> R>yݧ)=H'*l>&!&>iԾPbj>o+>YS4 :<#3=>>', a>5=< Յ>D><;< |>kK1=|A=>X{>j=#=>ZSʾ@Sʻ}=A9=־':g>>=x>>ֽ&xc/>Ô>q=7j=1>:>OJ"K= ~vpF#;>P`=_>z=X9=~>,ļs+ُli$=iU]^l=^?T̡o>E:H&X̣3)U @>8ý{o>H>ZD=2)߾ҁ=R>Db>ΓzKRT>A L.>;zEp3>}'>r><*L@c;UA+~l.=E4p<~{2G>w/'m:%sQ=D=#Q|(u > Z6[D=>,>|B۾׵>O6#8r>/ĺ=FSd G=<=5==Z>%D8T>OP=gI8 ==Cpi>S^Oٝ~>D>=UB>=?[~pd>;c=cCgA >0;=Z"=A=]M=V =>9=Q=Խ$=V1=hJg d=Wr;2=^ڊ>όȽN>cLM )X~[RU,Ƚ)>v( >5:l>O'<+j4>Z\>>k=`W>شڕG=|h[ ?9=%=Aۻ٦gU^go<->蛾h̽^6>3]Oq>#,U=m?>n#M=b>>3,E81-)E=8Y>+N`>K>m̾yl>W%>] >B>R=C܏~>V>|ΆqM+!V @Eg=)=na>& t=Db;'/jh=?)=-;[ g|B^>xiy'μ6c=d=hD>2=5C>tPn ;IE=[<>\rjx>@e>&X=巜<<-H==>*'S/Gtɽ/L1ZmĽ\9W/a>\>=a='t`p 3g;{7妒[/%>H?J=m?i<=]=/k<'Z=y>*=(9G[>>M94ʽg=@~n=0Qv!= 9=Q-=jߒأ=W'j>ƒ<7.>_I: ,=(`-=<>Y`= x>ݼA+ > 䌾%=u= cw/>آ?tP[Ҏ=eOj]>؜= J=>_<yT;T> u>=*˽}о{=A*j$(=5BBT>f9k>}>Ʉn+XR>t >=d=>a=> =U4T>KD)>Hޡge+RDQ>&?>|>Me [Ҽp m=!>M=c1=^^\z=6<%ܸNw>|>s<0y>P{)|P7庽+V<-ql!T=,U}/m3Ǽse7^զ!>d#f[CeP/L>[[=Vw:P>a=J>=#I(>.$tha!1=+f>`=3k&o)>߽s":=3R>=,�S>>E)>K==x=>y>4=Ҁ>ɼr7>Za=N>d>]>S;N=f=J>=D=Vsl> ;ּ'=!3<^a>^>< ^\0T׎➽X,0>TΟ>X`nB>< o#0y>!>۴I>:s>T9W0=d=8G[l~OA>i=lrJmCvj^!>f*z &*'1> >av> (>=c=/Ak>PX>XS݌=߿h= *D5rhKf]$ha>fQ~0=C?@׽0#r=)[~M&8D&`g>NE <D>ƒS.=X慬==߀l>տ[@~Z[r>÷o={2̼3H<ċ=:@= 4>f)(I>+^>GJT>>|GoD8=d;>:<֪?oCU=<&><Ž2>Se<=U ppXUW_l>:˾ý?>ܼ ܽ~P8>h]>=xc==YYǽg<>*Y=aϽow:sF6 >Ŭ辉Ve> S>@>tY=+>EHGY;[5>2='L>h%O>O͵;˾ʣ^R{>E#=2O<@v> >ĕ5R!!U>ϫ>sPARy< <$=t`>aa=ڸ>>jдm((>6\V>.=߃=kHzK/>o&=,ʰQ<#>ɦ1>0V>ѹ`>k=~Ro:=R>l[j$4c>I>ɽOCg=B >.3=y<>]>.1=s=/G>S93ӽKv>EHz7&z>pK>Iɽ&Е=>Q!@>Q_?О=X>۽=\}݀=ƽSJ?ܾ݄_7<>㐌 = lf8p=2>"> =cg>]= , :=N=7۽e>v>ekUC8 >:5Ox>(oY>;d P>mk>V>aW>CE9AohY>iL>:>xVgy+aO:s;> ?>E->6F.}=^ <8z>!yY[&,$={>>->=ͽf>F/>"̳5=*v2K>{<,,>v)p4R>i@>X>?նԽ0;WA>u0>({Ͻd>o<#$>g=Ľ8^K˾xc=G9>k&&=2+_> <ߎ>Z?>  QGĢ PYtAL>}h>N>%> ܑ>O R>>6w=iBhp½–/I>V+ް=5>;-ie>f>W.>ګ<4K=Xf>楽T&"*XR >:ًR+hQ#=~u==>0L >Rt(-4,L#-> >kqJ%>&T=ɽ=$>tsɽzU> /Xs>v(>y?b SK(@->阊T=9k=$a=ZYP>P^ >ɑ>X7=OZ$3|ăgX@6P>^Q=*>X>Y>)yk~HS>X:x 0ϓ>+5=>>">4ߝm`k==T=U= Y=Owz[i< 5X.='nV.ڔZ=uSbD=(վs>\|gPC>>q > >@)e<=,j>7j=[ρ<U(=q>M=>N=̋==><>㺥>E=>2A<&]TIA="=|`>!k 4{{=\=!>=`H^U&=b[=꺴w轼* ;ԝQ+>Α= >B[<i &L>/">Z)=?>(:>Q>K>XQd2>xi^=0돾k>-ٽvĻ=R==3Ѩ=t>7U׽Si!>m =U<Ŵ=>* /V<=j>趾=6gվj<\+=`cfOE>an>d_ H>=!>b=;>L>=hW%9irkp>}JH:=q0>!>qh>γ.n>!px؛i02Z#!󩽝8Q<ژX;< W&ִ2^=1&D&>-=lXfQ;F!>ْH<3> Q½oB=E=e3=!v=bF-_ۋxmjzW"ߴk 釾0.{ѧ={t1c>>6")}$><>‰Ĕ>A=Ӷh>9j= \ٛd¤;>P9=+)gP>@{>= =Ly9=]㲾A۽wCn>2|>>d=Z{o ><5=Y=V>j>H=0FoR>2 =L!i>T>m޽>s=MerVęSG}4HUlR>Me=)W WjW>8m=Y&>|ν?f(>Sl~\K]$ؽ/=l==gxQ `$2>8:l='>jq>a=]Wӽ`8>D=L4>m=t!n<+==>U=>o>j>WBP/f>\>,?> =mδ!=}%!>̲`==:NK1=g>n> >> 4>V=zgR瘽Q!żE{ ɽ =^=>9np=6<^ ۾d)>eн9 좼=B_^xK>bG5Jx3lo#=!>z3>0w< ({ǣ=90^ 4EϽJ=v<3;7Jƨ>^ j{=7ɔ=n#!>t`ʾSe=()>* ׽]?u&H>x_i¾ v>DRڀbq==گ=0=)X=r>WNG&N<.ƽ= <>]4P">͘=5*>==>R{:ܧ=iž-o#0eAR a2=Bý2"=R7>x=)5>qW=Rr޽6vVM>~= <_SO>y3̽G-<2=gS=y0˗=>BƎ=H>6'b<`l>r`>t]>.$'>* 67Q9a>u=ӏ=æ@@Žڢ=.=QgNjZ=-=ր>91Ei!>av=x>o>gk>_i߀9 ʆv)?/>WSp(ܽN>'S)t׽Z>iU{=*=tu)>JW=:Fs N^ TU,5)(2=*>ksǐ<=SH=vL >PA=l4<r<>q;Ͼ2_;=6e>x=S1>6YN=Et=uyā= 8R<>3!핈=[sN=IUZxnMؼ[LPn>W<TCi6z=Ȁ>R=(= >x>:=NMXNaV׼V>o./%>I۾_U; =Q>^!=+ J>fB[_L5*w I>y*0>]R^>ϡ<8>c>@ؽtNM[OGѸ=]s'>>Qi>~7>! ;E.x>'>CJ<=|a=1^pr>{gf=E=*=w>'L,=H@= =ug>s`>z\}>v=BX(ֽֽTF )=eXսzFE=>ɢ>ɳj^d? >[>AY6=!iN[Ń>>'A=Jɏ>X8=*ɼd$s=2/>lھ%S=>w =ɮCZA!=>z^>a&=N>^s4n4>uQ<#>MUK >F׮*>t)>񞂾#r]=g>5c<[r\=b=8==ԕj6!"zbX&.nF;1:Rm< >k>u̽m3T9Ws<:`=<=A ܢ^*=_Խn48>ydL nQ;j==cZ?v>< N۽>X,,wY]>Ҫ>= >H<}*l; w=s>Dc>B.<~ǽp=1>6>rڙ8R6=.н ==Lk=!$S >7ɻ<>e<=#ȾhnC^Y>B>φ>߾uܽƢ'ƽ> /E鼸oه;%Ri$HR=rY=>T> (>{긂洽VZ=s" >5=;;> >d&#<=>>,8bK$^((b=(n=k|>=p>|St>.dh>d<l=ي==>>ry>u1>4DD]*:Iɲ-(c>0"W齅S|Ǹ&g>Ê4=*t+>%==,dh>V=%˽=UC;:v=?q>a# &='úYI!'ܿ=:>pU=.4}c=>&."8Ã>yx>/!>摞=b/y=I>#_>( j>?0v|MY ?>#>P=,=C< $<+>@w />>S>`] ?=-A==W䂽F+Q>f&3=|ff;;H>*᳾}׻=d>ܦH=p>,>d=ld$=G>/=t=ZFf\ _=Ѝш;X>d>Bd#>xǽ>>>5L>L>I-P >6۸==T\=}wmEI;>W>R=>=p+zTB:=q>Xi;=0>"w>vTi>%̀ E=_ulu;G&=;+=\1*ֹͧ>&>_DI>̀=@ݒ>MBf:ְ>۪R;hZb)8|=A\>C C;!sVOo<>j>&יT}#=Lt> @ȾLOx=z ='ș1">ڲkJ~(3v+۰>I=r >(ǽ k{ЍL<8=&1g=% ;h ?<ژ>#H#>%>_t(%9]1=(a>p]4 =tr>-쓾uY=b:=% >@>d>h?1Ҳ,>,}PT>=^t>L=Ƚ-7o࿽ a>څ>=M==;~?'<7>gܾ!Pt>H̒ <6=@>5 w5<>Y\9=i~ ==׼>2 nHi= >҇ ?ZW\e>}(t=6=ђ=^=N}=`Xm(6=(ty_>͠<[C,펪=g-=@=>4=ڜFrMCz=>()>L8@vw>E>d>5V= > n=>QI#>*>;/f>\սk =FL_<v?*>R8>5R{=S<'(0X>>$ZcP{haž<^e@ܾ bnhڽ8<͒> >בd><,MTD4>==hE>8=i{2t;> mQ=[({_> `1k?"/ >=z=3Q>U>>=H> Rps>ﱾO=N[x>G='z ;+=읠;I;= 9p>ڼo>X⯾a>H=HrĴ= xaaR=]f@~6Cվ>ں=Аj C=R=E-ll>շ>c=C*>=;l>־ʾ>>T#3.۾z>>Q>>Y3N > P[ >?ܽ'нv>> fv=b>=>&f==<?j$;L=gjl&g>==;ϫ=P>MI'6<->:IB>}o=nApW ,<\G# 佔 v:| ޲=?< yxk+3myy (l\_kC>??/>iA.2վ ̾&ɻ>S >aH9QϾW-%l b6Ea=^2>(o==Te=<bx=%>(sW7B.s>K=X=[j=9=6cD=G9=#4N>нF> OIr}HG>>Aya=<=yEg3> >(>>f=bv==Ɯg"wx=3-~ >^b~>~=@>>%8<i x>w> >^T8>{}cɼ>5>;n> >܅>zw½ȳK;mnsx|<<#uS=p=fùs> b7=G?Q,==>>Z=o>, =P[>FA;={=JNV)<ܾP>ǁ=%Cp>p0𼮐`X>]= @  <<]>8r>5{.";S=ZUm><ʒ/g>|=u-I>pGH>c}0MN=$?N!?]!>d> =i Sv7>^۽>?j*~=j='St ?#^Z>'N? =>"=G=%0;?N><薽ܧ{!=gl=FQ#>eI>?a?@;F=uh>4FtP->@J>3$fD> p>]>@=GW1 ?G*M-F>|ֽW˕4=Ll?=ǽu D\{>^_L=a!j> > j;>IF>T?<w8;>Ѿlg5zyp">ס>=)<$kvq=8dY>nD =>9vn=>`i"!|MͽfT>\=nl> V>&ymhv/== AC~'>&v>">^/>i=F_K&= w=G5>U>ݭ=h?i\־><|>2S'hwS[>:F>P>T=HeB>/DpMJW\<43 v#(I>޽L=eUȓ=>P"=8[{0_:";Z:>neh>S=8>={/.= =n#xS>6SUv;<߽-7<D=2^1zC 転 @>Uf)Dp*>tiG=Q3&>G4>a=<蒶>x>d⽘C|0 Կly䱰=ܳ9(=$B7><>֖><*'ɽ(?b->of|[=j((X.>.;(=Hdi"ӾU;|+?.i->ͽZ=>*>>;ij>Hu={>yBe>E>rib;w_+=t>LI?>ܼKS>0{rCxxK>jCȾA=naײ>c>Cs8rK>6><>OAc>E>F=O>2 =6N== Ƚ2=Ko>\=~>ϻp >vş>T󄾌 A>8=!-">bŽ5"8>$> *>m%=m=z@P=J.> 5i?d>TT]>M,yj9Q,q^;mS<ִ=\=:1>jR=G>}>ZR2`?p/m>鐾¾C39 ;O)E~%58o>eFrv=d>A>ϕ>NJq'>xUY> hfEq>=n>}.^.;pӁ=-1ٹRi==[.[((y=> =6e==y_3?>>ֽ!>=f&W n<)\I>0 $;P=t~=5 ;>=X.==@罵5)>>y%Rz/)=jРJ=L=Z>8=Dd>^x=yWyժ3>=}^'t==k"=>*>Uͽ=?/>Pvu :=d_>JG>=9=OW=ߧĎ>{?D>kھn5[J͒>; >đv=%=u=tL5>y>Qd=ͽձ=€E.P2Ľ{>C>>UdK=")eFS>~>ٚ د;zS=a>0= >lD>=Sb=]>j=o9D62TZ/>y ,>)=^=1R>ͼV=E'B>zy˽u^ml3,_+>>z&= >͛Nf=랾:a ܾ:>>4}><~Dqmkc>=W=du>E>+ߪ A>a@>uWA=?e>Fm>l^>!W`y>3?>,z=!6{=ZeT<`H\?L=ǘv}P޾P<&=(IލU=\gʾI=y><>~%8=ܾȻZʽ:<'O>c>>\ZNʽRnnQ˨>\%ݏb>S<>=Kx>h Rqbv>+> >I 4>.7vN >t.><)>ê >iͽ=DGݤ=-/Vf=g%m>m>Yٽ=|_C>Kx=n=OȽsW9d=I=q=c <,>5=0Hܽca5EW*>H"Z=Zx!R>½>a>=$TcW>=uhm >&>=gv7*>>>c>y|1V<輾~b>>Ǿ}=F<{F=+=WNԼ>O=h>_.qLE.q> ӽhuwɾ>(!W8ýP>qbm=>Iħu׼t<=!]>L}BU=_Y=x> DS̼g=J5`jz=13=WN}GN>_9džؽ>&u><3>H>+<]㥍=N=fP7==B:e<^ =I =@h=mF~8 b\/c EPm;>:P=KGFL4ZQ>~Z `>ʅT=nI=>߽^''<> ̆Tp=8{)ޑ>">Y3GɼL>[=fw'Z=@=mC>BI=>|6=^g+i>fI?H$ȳ7nl[1='!Ø>剨<O辡-(㽜(>lR=>tv6^G;>'=>G>>=FཏN=F%n=x>f1 ގ=۾ޏ>X>ꗒ>l[)>=->S%X2*>h}\<K={|t>x>N>HSӉ>=ĉ>"׽hPaJkskR>P>#=bb=L>vM^=<@1>['>h >άsO>N4-BZ=>=}G>9AΞlb 0>=6*>|MU<<ˑcPc=w>h޾;P$Ύ)Y>=+`bĨ>\z=?V-1dش9~>\P=_;M>%n=0>#'< Ϡ>'/>>=ذ>U=HNq>ߨ3 Ͼ]o>4ܩ>d>Ɔ =j>{i>/c>(H>;>9&m+~7r=3L>stqvm0 >> >_st>P>Խ?)<[jآ>>LFbd.s<*={0#G=|u>:>ѠK;>\'V [DwĽS^|8> :=꼦 ;=$>kFk[[Z\>] >=:=23=c>Y>UJ>2Þ==<9Ž|DG>x5>)g==>;N6>3<!>)?rL=>2YI=@+>)Z=S(Yԏ>>r?:=#-w='B q3;_\=ƣ < ; =}=6#%>wF]o`VU۾2>֖wO=AHa<[J>n]½ 3( =V=Z<=ի>kb|]˼P>Ž?ų=S >*>Eڅ>G} >ƖH==bZF7<}coz>q<ݻUƓ>&I[>O`=į >=!>7=g>Bl>*=+`=2-d>^`:uD ⧚>g>zBaL7>uS)<ǽ =x=d!>dbŽt)Ľ=6<UΪ>>^>ko=V=hL=!">;<^̼lžux蕰h6>h=>D9>Zd>+Bn>e=@ھ;1)>{>D}<5`ո1D<ǧT=b=o=)>+>r^>o=^Dh=It=ʧwN>o?qW>b=#>Я '/{={=A>S;ͽ>,G8= >$`>81سe>ks>ŷb;>Q=WW>Q&:>$==K݀>=D>ٓeגe*=~yգ4= #Zu*%1;6>.; c Ӱ ҽ>>b>/ƫn>y%3K=5> BBlD=Aρ=>=c>^;[?->B>q7>>==[>n< fj,>DU>0>;}͸l(/='fֽW?*>=+c}Ծy <˖>9>L7X%7xf=> 4Ž/e ŧR<& h*=r,>ق=*N>xer{|U;';I>gAҾ=3-0">5 tDK=\oh; eֶI:>?=?jJ >`:f˜>s;ؽz=k6>Ӕ> ;]9K>I tʘ<тH> =>c=60>}>=&=J>=:=&Ƽj&B>j>J =)nטN&> >.Y>C=.I!=g=ԴS-b=>¢ԓ; {'=Y%y&$<j> 1,JC>,g#=q >tz]l> >A={>Az>a0 >=ѼV]> {>a @=:>QP3BLV>>BK1>qP=U\=M>(Az2>P0 @6Wp̃=ܜ>R>t =QT=o(؃MѽF>[n[󽶸>ɭwVu>[ *0it~  G -=V>Ã>AD>|c(=l=3"Z+>D=!:= >ms  =2= =zV<>ܩ=Ƿ<}=tI+ظZ=S= A>D=9Q=~Y =^TZ+,C=SݼAH>>K=dL#Z>KU+=^*>q'p>ȹz>PXX>">0tຽjU>_+=Q^D=},Η+6:=R>D>9C>| J;T+L<>|>l)~>2)&>Y>r>2.<7\Ԑ">=ѩ#>uY=tT-1;>"kmW#> $Fi =:+沷y|=Q?>uPIt=սo>M\ ޴=G=7iξ>WZa;M33> >t8>=x5=>{Ͼ$>O>=OK>6.>>gHT&ԽL>='7b *=e'K>[>>0=<R9Z3څ>i'҉}»>zH=Pe>oXR=d=9> l>e"6=Zj;0<ۨ=7~Ldh>=xf[QB>)=o}">Mty>G>-0I>ؤ=sA_"=!diYaپ^y:ҕ>T.~<7>f> ->B߽I;u=/PX>F=y=qMl>3=o;N>4\ZR"dWb== =ǽ" ^0 G~>zQ >UuB-=,6<N< >*;5KؼDp>z6M}嗽$&Lc>͛2E5>Gm;=b%P <彣;ô>ך>$>.[=d^>=P=`$މ>[>u=%+>bDM>yk=T߁>Â>& W6]C>=#>-N^VHO>`>3#>X薾̅MJXJ=>_>W<>B;>wkIwTn㊼ =>f^>:x +>0=Y='>-Q>`CJ=0=l{%o>%þDI>ǽtࡼƍ:!ӽE}>=.0X ;K=iռK=rR n<,K˻a>"=Z,E}<Їw(O>w=B=:݅o>5 >\=F3B/⽧{>]s="-X߽4FJ=x9>* csF<>-d&Dν3?#=4\=d@%m`"߾>*>K=D7>0 w kSV> WaM;Y>vr]'K]> =-m=/Ot> 7Y={Ý C>==QR<@MU>2X^[䒽ӭ>youG=iېՄ>(՞2< q=~T s-=jY4:O꒾U>V=ڮ==7 [>ξ=M܁>t=x>N˱>}=j}\G2>6*><1>MOȄ>g [9pv|n[;8g=-̆>཯ >L>m>=Oq9=!R=>F>ay >I"W>ik\>vy>A<^?.>= >aTѽK>W%ɽ2/>=7.>>O>@a@\Ծ>'`;0ʍ=q1=Om ~<֓>/i Z:='䖾==+G><=%=4Z6coT> 8>Qt>S=QI>rœAN[. =V>҃ս\ < >XB޽U}>Ot]59JĽ&: >k>5:|RDl]= Ϲ="/Jt-=Eg>L.>u+㽈=x .=Aq@u=œ> jkγNx=6B=P Ёo]œ"c#0=r0w\F>b=k[S=YX#<.=L>D XϽM}vk>Dz<<3@-i.=OUKfK=}w >=դ8ֽs9n;Zm;͓>Zj "`]A=: v64nսsN>>ӽR&$=a< GE$d$ƍ>{ +>хpAٽD7> >sc >^>z?=0]>>5K=,>,OGj;>ob>ͽ T9ǣ<>=:>T=W8D>%>`v=f>,o>/:C˾>>Xz=>U 6,")lп<+>r>hB=}':&+%HXW`&$>`n$!>?Ig=J>=;> *>7 2= )mm>&=<+.= >4W< =8b>U> _̼>*o>j=g[ck3>h=ysH==D>ѓ>d^ >}=N6CpG>8>5"tYрu|PH4/sz>b>[ >e<c>=-$>c;P#> >a >n>6rvh5)=]'>;i!>]BD==D>=Nd=}=@u xƽjrq>Y<>1>I\9>š.L>hվ>C~Ib> >8Et+V>;ZK>r ">TI>!d(c">=A΂Z$S>v>}bf=Oi>M > }>Tᆽ羼<]޽=1_=V>Õ>Fm=;r#><,>=X= >H7C>Nܽ=k==ʷ=Wcv>=ci >o~/zn@#>* >+[R5>{> z(CA>0C=T:=<x q&L '6 =RAGu:6k?!>,/.پnJ>B$W~q{w<7XC1`4;L+]hм==G@=h!>* ?=yM+>Q?=>8>01<=]=<[B#>l^C===*',= P-N>E8>Ƒ[ /޷P?ʹ>hbվ=_>l;fVG>OJ$=<‹r%X2D!jqv>~;=k>dz|>0=%WA=Ғ>ҽg<-ʎ>і2ϵ>2 > ͠=5o>+?>\&> =N>=<" >ꐻ[n>!̝=; ^ >>jE3Ս^>bĽC>H8>C.׽&\=a=YyI< >y6=Y|>ғ= ?9=HO:ܻ?86ɮԍc=Ž$ >>>/A>_|>M>.>^}c?#Y>VgZ>UX0;Խ>T= 3(a)>HbFT=.>t=`>+D=̭^N=n=j>%!ԪS*==>䰳sG,>\=(<r><7[˾Ec}L=9e==W)^];k=FJI> ~Q=5=zD"'JǾ >fLT+=(>n_ >'k0q=zm>=¤+~=0=j>I="==ײҽluZ1=*?=QN;;<~߉>BŤBu\=Q^|^iL>P\>Sc>=>۽6Y=> 郾@>D$>Bor>Q>s6(>.X>H>hy]>===*$l=c#9x:Z"ꣾꂾ#WFƼϽ X>Y=+>m>x)=Hn EА2=X>`_Ϳ3>"? ;fV^H˭Y5=dgǽ oɴ!Mt=~gfzP>o<7?>dҧj:0=fm >;Qn&K%>^p>V:齧1)6dKgU>D,>Fvy~fnB;>:>Ra>qSIA6<>3޷#>>~-~3ln:'!=n<2<t>,B̾Uׯ;>,L>q=f>収> >^=TIvJ>U<>=N\=8?c>5#>Ը M= @ls==`&;=p=7<,ݐmѽOke]>\2>L=e7+~<=]\=`=>͇<_?>س(ba>)k>.U>.="=]>;fS=򓌽s=g>=~=t >~h>V%R`z4<w̽a8

ȼ =L>%LnپN=CA=v =;WCܬ=S]=<:мo;>V=;>*> =&$Dl=9 =uSx$ B=>)w0=F~=6>"&[Ѯ꾭+:T>P=e6=ͽԘ=U>L>~쑼6|>m>rr<&=~uŽ?[ƻYu>};=GDM&vs{O=C= EQ)8>P ^>=l'=02b=uBeɽLl<-C%=)=D=^;- 𿽊PX=XԼ˄+Pِ>xI=棼Qgb;Ԯ=[R>,9f>UCr>BZT&>,>T >>e=!H=AcId>o2LYtWS@sN=t?==D㉾iiy>NU)½ > &ؾG>% >`=> =|>=& ->||F;>f)gDi;C '$Sߕ>Jn=>v?!9n>}绾kF=t¾A>>ν^=6=o>==>(vὑr>8NͽלXܤ=s>5]>xt5>=>w> X}> NE7ݬB>Ÿ>F><`i%<3Y3<>c=6@Rn))y>Gv>`o=)\+;FU=fu>k<,K] &Z>"=h >k8U>"Yɋ<[`bme/[idL=׾ =>rz>я>< MX>:ɽrH=^3<ɽdxsҨ>=t=ز!<'U輓8\N=G˽S<#>Z>=y<=#x%>>>*A#Őm));S6=. >RoÞ-=!F<=p">si>=evp{>Rs>8C潺e{˶ >3/tvIRԻTX> >*Ͻ^E>T>-/4U>>>xTZKG7=T$᫽)S'>{½H=x@?k@*>i(>>t(>Tc6W־;=lb=><>M?%=% Q=u>L=ZǾAka> >T8>/&pfO=x ޽$t>hھ1M="<>'*;>(>=L=~)½k`n$靾V>߆I}>  <=5>>=<[=F<Բ>Z>={/λq=m>]`h={J+E>>oȽ>>ۧ`,=H9<~>WL_`=^Um>> >1$\?xNq>ekxAPK)Tc>ټ-;V0Q=rNa>p=@r[L>8>?|0=wн<|=d{ W}XSf=# $轪8QUz(9>u=> >Ԅ>Xw=ԁ&=@@qU>tA>eA;2>Vm]>Ʒݽ>&<>Q;>w;q&=߱=7~;<=V%>หL#|J/4>/ؽ=F2>\ڽC=f=͖/z8>H>"4>mLx[[bvxX>=U߾ھTP>,R ?2rE>7-;H=kƝ;bZ 0<=;>=`>ciν>C@{C>=I=wr]>' =ȹ85Bb>ȇg f_a=?u=Vu >/)>%.>BI>v>ss׭>6>ŽK8n;%wuv>ڻ)񼆱t>ӻ=4fX<|1g[/Oݽ}|=a>u=>Xnړރ>T,>z =̾wvbr1j ?=U>>2?J!ߙJ$Hxx>= H>C>3>-rP> 1=P=,1L5r>>~;W>"C> ͽ)s@ >4.>Aח=e]=sJKbq= T|<=M=剾 Y;Y6=Dݠ=<>_ց>EZ=wW_>`2w\t>%ؾ+9>5Y[ǒ#eP=0m-|j}N((=w~=en"1$N>Ik>${)<~>\cʽNg&1K >\}dl>i>Ut >g>>P_j&K]==Oh<|=F\Q g>Hr>=7=+>s=w˾4>__]P Jz> >h><=Wf\ ܺb:>˸=%)Ͻ=ٔ >n<]$>dџ=e=Ck,ٜ!oJk(>74>1w<|>|>M>i7C[>>Sc>}>EE>==H> z;)->0>J񼜾_nI[e` >z>@U=s&!>K>F=Ϫn '6-P>wXBEUU^XΎp'J-=>^趾:Ƚ/>Av)r*S( 7>!= >R>$蛾Ni>$=SH>>>{>rKFt>fDs=T0f]= q>W0>(7>J(F.>3!<>$= e ELM>Y>315\> ˽lx> 9=Q>G>=>0NJS>O'>>ƽ촽Y\>䦾5dֻ>t ͐>fݽdo}RlRl&_6Z=p><ξdh=,<ý_n>Y#>x>#@>+ p0 H >ּR^G>Q>?k`ݽ䥾  +,<ʼ2g\RK>;y=3tf=/>yb<081>f>!=O :>xӱ<7F>>x>j%'D>`˽>z > S0G;j>9M=-X=3o>=χS>=iea>*=8{HT= '>0=ӟ)Q=_|k پD`o:w>8==Ki==at=.q>I>_9⾼' vH=3,i۵ý"f*kz>ƒ$|>41g#>wx=OQ἗G߽D/#۽;>}7[<䖾m>&j#w>UT>8>]֌ P>wDgDx&32>{=ؤ>*¬>>۶=Z= =ܫ_>&]ԥe>t< lm|o[ >3u9>̮>j<`Żj,>?rw=LսR:]4==ξ.>rSmp=o>̳λ;<>&i!nma:==KN?go<=MK<ض'=SZR>ҕ=[9ʫ=Ma>8h\ >)ys>ꉵ=/A>Fi=->R{*> y=T3w=>> JT><>6/<2̐7>R{19I=I>Kཷ^=IK9BS=>)=8 >Y W*>0=- ᅾ >=dI>V4> clj_^+t1L6>&O>=ю<濮>>l>ʗ$>g B1>wξһ#̆,> _,3=M>͓d.ؽdD=t=->)v=K=3 =XPK^־-iwD== W2Mj>}894>y>*!>t=65>={>+==e{0Z~%> =5>t<>ܞpgw<>;ҋ=~mo=WW>WvL@;e~>>ܴ>46g=$>qJ >-3罓c>R=x%K)><0e>k=lm >>̽`:m>; >=S6F,>uW;Z[͘b>U6̽===P >Z@ND>"hl7e>`B?oCnj#yf=ڠ:I@+ֽ彖b=8 >&|ז=b^j\ =z'=𦡾>SgY=(M>=b=`+dW==sm>A _bm>N>۽|B"=$Ψ=P6HMJ 8=Q.>j8(>Yy>P>u>֔(yC>}q>Hy',:VԢ= 31>r>(Pf>w*%;_=5>GuR=L=擾>>j%<6k*Y"MĪJ=o=Qz3{=W#=7[qݓ=L= 7:v^U=U{½>7) >Kƾ/eQ>/<)">1,=!pkἛ1i==ܪ*N%9>=?# MeDs= Ⱦ-q>8+>S== >Ô>jս?46A=> ?χ!m4z>W>[ '8ܼ=e=Wg>@=6JT =<5C..=[09"A:c>aX>U|56>nl4>R=瓜.Dh=rR^U~1⭙W>çl~'̫=dPM>~v'{ݼg>,Q=N^XO>0̻:-E]F=O[$=Jt >]>ܦ>J>U>'c;[s>>vQj>Ez>X =>C ӌQH%> ov2#9M<7b;/`:Nf >u->뮸,aOjh0>置x>{>"R˔A>p=L'dپ!/(P>L"=F=!k;>h=EKu*=j><j90>=f!2>վ)c95^B=<د;,zF>C:=h7< !C=wi=oR<=8sD6>:>4c[Zョv¿a.;V>n=I.<6žEZW>B>C';g>Yh=7i˾by5Y>ޤa;<݋>0l=yϽ ہ+9>i:@]͒=>&jH>IE=ŒG6^˳̾a=n=/>==={>>o#>>Q>}}>˽, >"=->%=puGàAϟ>pTP>-C֑&N=yI  YU_*ԘE==>[=(=Gh=O:=e5A>B>T!>v=AQ>Gj>Y.>sBD =>Er=O<yJ6=c>b胾q>lh [Y;&== V<~>݃>GK =gg[5l= %=¾c>,'=:8i>|/>=7Ǩ8)1>D$d>8{GQ>>as?=󸫽<"=DUʽX!R;=> +=3V%,==r`>ʑd$ie/>; S8_ڽKR A >=4q8`>X8qW3=*(z=g=y>c-?P8B=$>tR<~>I=`4>?ܼ@ň=/$=c^pn9\򷴽MP Q>0FM'p;=46=xq=L44>,Q>k>_=;&7:^73ѽ;@G>5=sq==hY=Hr>T=>J^=1=/<=ʽpK}`A> ټ 'Q ;= tJ0ɝxM=,>>@:1n=>P>UϽ9Їݦ=\=ƽG:=8:{M>= 0 =Op >'Y>+ Hy־$=L)GB=5(>[)s>=xܾ>H{DuٿEbn8>>==k{K=_t<m>O>!W=:t>(= y>=|d.WC=W=' ѽe=5{gQl>Hx̽8>*=o)[>Q#>R<Óc>ޢ=L 9>?<Θ>p{>ɥJ>̟o"O KW>I6\>J>ob=CT>g?k>[%>*ᚾ`=i==e:=~:>==(̽ɾlDi8~~>=3>PbM;>L3>n=J>cg(X9= +=rw>t`b[89o=Z岼^ Vc= d=E0&Ax>I)z׷9K>9ݽ<>$U3v;=Q%>1W$'9|t=<_@,L.>>’=J4}g'{O:=5g>^Ӥ)D>i>Fy>Nn40=P57;;n>aK=׿=p$:ah;W=<">־eg<=2=Z=eC:)AEH>;>r:7=>䘽S=?>&Zž =oO;m>=̽ 2s>u<#{ؘ>y>/ `=K m>y $b>+뼻 S=q>fOu8h+>䗽Ūju=P=>oHvoZ ˺F &|>Y2>'ŽG,> >VoNF=J=]w'򟽳`>c|HԽO<<{[Ma=Q%x|>K]>h =?=07m=(6>ΣB>ONH{¾s>/h{HNc>9RJ >9BT>mq42 >xS<:}ۥý0>%:S,>=u?=>Ľ_>D>$(>Ew> 6!Q>y>- u=/-S=ރG> S=t>vYJ>W>\:>=69h4=;=];+A\ =;u`C]5>2=JRzR>;5|>Rv;#[6MC>_ c>jh]ЛNZ::>A.꺝<=O>>t=}K>Ayt1+`ǽQc>܇1=c=[Al*>ߞ=;<EIƾ'zu=:=m>K<"zpB>s<~=g=]$4'4<$^>(Wo"ܽ'B%;)>2'uP>6tJ9>X=:XC=#>-N u->qm>~=AԢܾﯾO;:8^ =9*H=p9>½@I"=;ѬS Cgí=F!ug#=|?>%#38 >'== >Q,-c>KM>\E9{>d >/ݫ>󇾫=ObݼYZM>+>>J4=z\:U;=w<֝``Zj2z>ֻnN>f{Ⱦ&1a^X; ͖=?;=KgR=mֽ2>/ c* Ӈ>}>mI>C>V>><}=8oƼd=j:X==>L_r>W<ԟ7׸ume\PB''>>$GP^> ƾGA'@>$nǽCɷ>7R3нZބ=$,Z>G >c=z>֚>s=~>.RQ%=^MW>98=r>{=ʭ>Py=/ע=.1zšW&#W=>=ł=J>ʾ$@<~>57#_V(>'Nꞽv>E|%=ßyj>4<KC`l>*/>8rq~>yģ;Jx>&f>>_Ă=0x>>n>b>cW?l#^g/=†R<ڗ_>[> >÷=l4:\6~ނ>$clގ>?R>H=>#U>ƽ>@u=!=G- ]>ڏx ub=R~K, S>'>ƒ>.=e= =АN=E >=[>Js +>7=ˀt=|˽=檾`=1!<9f=~>B"i!b=0eY<}=>I>M^$bϼ=>b=暴i ٲ>= 8\!Kξ{ 8(5>߽St!=_<P<yd>&=dR=}{>acd>[.>VQ ;!)>E;$f=[c=:v>ؾ]>#z7R;?=<>_X1F<9r6c=:{8 k=(=-,k5>Z=`'?=> (>k" --Q>> >#2>n?Ƥ>=稾rɽVy>K>GV=/>,/9W 4xޣU4>loZzsbRUͻQ/>>ݾ]➽&>=H>>خ u>S=j->< d>b̓< y>-=Eg>68>76k=7BMp>0>6p=\ҡ <x>caHБ!><ʫ^>=bV{^>u|DV>EF>lh]Py>Ër=mܼJb>lTn>S <Żk D<>K:=P<ýp 0=&>}m= =),*p=f8i>=PҼA<@<ľCn=7sR~=.=sm=S=>l%>ã>֢B=6[>=1yF><=F~i0`=5=qrM/u>ĸ= f_>j<>>LRa=+>ѻZ :E=ܽ =J6=a/Ӿ[>8$֟=7>A& P:>N=,D>r;I=HC>>=O>߇=,t`%i(P=Ō=#ܤ58) !>Rټ þ!>G>`=") =A=Q{c(>gpF>՘=i=g8_׾ =MUwӾa9=g= ߾qJ23 >Hg>Yh=!y<X=;ߡU>lh==QY=e=apb>\j]1>I>b.8w[ fC>W=@V(=A>Ӿ֔> Y>½iW=Լ =*d>>JaK0=>37!7={

:>L >vD>ٙ'>5)=M=3 >=\:Ͻ>٭6iþ(>>T>׀O>&&>DO>[^=> .=謁G$>AN=H>=Y +5X.C>>gkB<.2< > L=>W_>—+u.>D>}>;sb=JQ=>[4}?<Br=7w<{;\=Hm.̽U0E9=fq>0>iK\>=A<̾ &>\>-T>r)UZ 1= =E.>R=̺= ==)>u>g%XV>椬ѷ }1p=ʽ">g>Y{+^uD6":cېh-=tw=3K=A-gok>8>mK c=v#=HK%{= kV>B+̽!2b=;+v:.=K;OpKq ~>caE>G@䐽Co;b>qf;>kZA)_Efd^3gȴgѾC>D>ےDӉֽk>qGy=a>ËV$>┾ >d7M<Ї>8>f=e>8@<:=R>PK~C*=&޽{1:;+*>݁=K>iHҋ=H5>F gm=ߙۑ>]r>O >ҾN0:="epM!=Ŀ>7>7ھUU3;QԀ>F>#ZM>:=V>w$X)}=>YF=qy(=nj==><s>`Yh ;=ףT8n=e> ?=>ӽ AK˽lN`v?>&0+xٽ\KZ >.%1Ȑ>Z$>lL=3S<>^=I6.> )>WLc>Ar1<!;Ͷw>&-}>X >IځI=s=$> O=f>ӌxN>¡y=;~(yh`֪CV/>eagž>s"_=Dt;R;?@kg>hー;gs>kKɄ@>-= ^.c% ʽ =:iu>>=CҲE<K=Y]O=WC>*ֺ =Nb=h־ >V>}< R>-̼p)>ͅ>=˾amN ol?a(=u*ѽ7=.>C> \:Ҡ>j> 5>9U yȾ h=P=f;>?O>=0V>:<S<䠽FA ߒ]=!?}>s7=<=jJ=> w>=`=S,> #I1&=@(Ǯt>kļ;ļt77m=ƺIh-6>+7"?>H8?}#$mge>Ra[>X[ a> k>Z{O=S*@轤>``=q6V='`c>2?i4-@VӅd@W=]=f*=`m;H=ZQ=J=l>^=)F>lp5>$N`8=$-j{>{>"Q9_&M̙ܽ>ٽ)>FU&=k>."$=v>p.>ѽO'=A8=K<'N>%L@H≾; 6/jgq2?4;9>5>'T>|>YߔjMf= ѽm<+ C~ >=Pn=<@=m=޴5W >,=g6>ZƼ^Gq%f=q=22!AZ,?s,ƻqҴ=K!T 02DZ=2E=7=#>39Râ>ٽ3Qo>>+jPgƽB>=?Dkb>}S(W^>p/3>}E<V"=/>"BĽBs>"='1<(d=bE>DvL?72^Ⱦ~ƚ>l>=ݽv &ˆ׻s>%=O(>>)Nh/u]uAd >Yf!q->s齩>@>(%f>}E>L;;%ѶE>" jLcŝᡡ? "V>av=Xn<m;=ri>>p=/>u5v=O= >#TƽK>F1Y,>xB>=\>Eὓ n>!ֽ>C{q>Ga4 T=>->=v/;ݽO >>~bӽ=~=7MY>= 9>$>wXay?M4e$Q]=^1C F=>=o>BmLF6>4$NNC>s(=ɹ\>+;q X͊)ʱ)\T>"*=J>a?QGW>#K;x>{)+UJɉڧX=Nvؑt>%Qw>?>7={^姱gC҇b==W.>ۼa(>d(<ߌ ;`@:>A1&R꼞C8>VI>1?>;}TH!ǽ9h>ˌ>>໢d;d>jŢ;>=c!>M[ƫ>m@]ls=5v>;^=7@W=>z>>O;< >lW,Nv1˽=l;>_> >5ÍC׽IJ^\<͖>{;7J̾/컽=]=q >ᠾ<#=)x ⯜={-=rUֽkPȽ۾,A>fd=f>DX`>9O>'Ϟ>=E|8=="=.5=[s<օ1{udﺸu.>=ҋ%&| V =U>p Ɵ=ۣ>׼>;;<=Ea=ہ=g%n&c~<23Q>˟=Rz>f=8='T?>&;R-"K=U > se"=)x>l K>(HNm>(>>M>D>>ߔ=D>GyUB/K> Fn}Sb>=>)]<>?<>s">K8u Z;Ϗ]m$>j<=^'G<Ͻmz1c >ȳ=Or>‹>C6>4C2yX==uF@68=K;Y;[ߪ=}!@Iǽui=&I1>}{¾{P?}> 4>+9fPj>nW=G=Cwِc;/%er>P?<%G>pܼ%(>#1;I;_>%=?Ϙ><> >3jR>ӱ@!=.T=̲-D>>=TKٽ](>><=%=P+q$;K߾==#>j=]4>Ɓ>>$>4K=V=mE= ==P{~>i,>$<}W[:==>P@T?===ڽHt\=û(ʱ=HrB(̊:F5(ǰ% ͙> f~=Cb=[L% )*)W=>[ eož A=j<W>>==!c=྅gVb)#::ш'% >}>d, r`>Iv>tR׻r7>c ?=0'=Ms>qXVO=YF> Vɂ _>>D<Ȅ='O >)>5=ؔ=k#^=4S>.iM>0>'=*/=X! ><9=>>tZ>n(=K>>cNQ%X>lӅ>1>>uҼ=7,F=پ>u!cP> Sc>ֻ>(9/ќ:n%5 :>>@U=?Ҳ;HN]ҍ>=<= >64>5=ŽJGJ}>U[gE6u[-瘢yS:XT>lu >gg>'B :; X,Bu<->kװb\~>6BtO=fϽ ׭0T[="0%2B><{;`c":V>=5LǽAՅ>x&¶r>]Гѽ*\pK=>;>5Uy=>}%>0È5<,y-@^HI>4MԽCy=g">qqDs>k>!&穽^T> =>̝L'꫽]C>b(~=*>V==zu(>·=y>(>ATw>lμp==U=;ZO=;v>=?w_>A>d⎽q];~U> =~AC>,V(=ſN>jTg>G><m#95>߼ƽk!G>q>G+>">u=h>=ǽsc>>\=^ƽ?߽G?xJf<7PK= 1c=@s.bt=4 >},8>OW=g*==`@?m/=3[=Z?>8Y>A29 >\;0>d=V=8d)=zH>=|q&Ya>e>Oj;8y>d?>,C>ݽ:=\= )oBսƻ W=_>~<`cU>l7R(lé8=0=b=f=>=4:MZ4=CU>E>U=hg0I/=m(>IVt,=Ȉ=2>x#G=q%2w־4=`=s>o=ZE;9u[ ><=} )>m%=ϝ> %SRr]c=>ұw===B=\{\}>8>ٽ@$>LW>IJ=+ :=k= o>Rf=HR> k>x%뾸 r+dC=aN>yor?I>!?=B3u=SC>jG>QC>7o=m z>>%zq8}ѽ.D>a>W*>~qA,׾?= H>+1>ˑU'ygBva=Zx=\mP*>Bhn=M}=| :!`E='[L>ԥWn>y{>%'>,a=Sdܫ٧Ky>= =;=K[E =Io=YSʾ,hϽ kM(>@p>kK;9=hd>6>#Z>JV* >%=5>V W#=c>d<>?0=y۽Q׽ >\j<>顠G>ۂ>Vg=>=P1A> A=Ļg>0+ݼh>n>S;¼ =׷^=_> >>AK>=ta>t JEݾ>T@>۾#>ͦ1X+뽴=>~]dx> ,ʽ=4ڜ=ue=Pq<Wx?$T><9BT>=2>lN:}Lu!=Ŷl>C>> >T@Ǎg>c>fE>ؼ˵+>9=B#zNR_>iə[>m=jf>j< X>v>?f=$hTվʊ@&/>?/=_]^;>ͽi,`>c)>7=Ľ 4>PQ>Z'p+ =Vu2u~=W=Ss=ou<s,>]">{>p*> =>ax&i=Jl9= =(2=HJ4 >'6=e.= =8>cn=Ey?6\!=+=liF=*>>84=\=e-;@=O>>u>A> z#}Lʼa=z ;>T~Ž=Jr=\,Գ= #>W=-)%x<@t,W;s<'fiLnٽ5%_=)t>*=9`A;<HN b=8 ==iB>x(6F(> ي<)蕽S孽m>5 &>IC>>(u#>)=>9=}ɲ=/Y8U <>:=bJ=w>=D=h=>Dmee^=ϥ=vUiKz=z+V^<n)nn>&-_$>tɽӼr =:>9 <>z ޲><u=lW>#>ى>Q@qz>Ė~>0B: 5>o‹c9N>}ɽO\=&S{$p >L?42׼..`>L>>=y5=2YJ0V< t:L3!=Y˂?T[>i=?A6>_;ФA>[* > 3>k=t>+7Dy>C4|>n=i<3'>FCo>F<=d>gL{սݯ->2q=$`>>你/|"='(k8ѻZ%=0B>wC>a>>)==#=K$g淾-lڻD=1o/U>4~=PR=LqU=w.iƤ>As> x >Y{=wtѽ$(>>)Q>ƌ>w=Ÿ<4+=|29YI=,/F 0z,>c=!w=wV>rY<_;A>JB>a>>w >3s,j᯽'P<|>>m9=<1>y2:);،r=>;>ZýO_>_>܉<^g=iH<тfyP;V>Pe>>?=+M=O0YF 9i7>S=)=2a =XaLV}Y%;v==}Id F>2+^DxZ ۽F>Eh=p=]e<.> >P?aVIpXýLp!<=R]9F=.J-(= ;>P{>M=Z><ɠ_=>Fr̟= ]P׽l .6=u;(=>6MA;7)-Y`>iŽxd>/j'"> y=ʗ->@.=#w.z> =XRIz8 @=q%+/>*;V]=^=xD~*-=Y==]7FQ~ >-Ľ=: T"=>; <(?j>::˔qi=a^>C('>gX ξ>? uʉ>`,==80>cW=>V =g><>ݙ>W3>䶤>)&>ɤh1=*#ǽөu=-R< Q^:#6 >jm<ܶ>3A@=PD=O=լ=f >='SK>/} W8ܛ<7>5=Cޝ>½=Gȼ}='>"> -:ͷ>d;f=U=ثM>B=j>ۀi+dHM>;ko>^>> =N=$ >$>H!N~=@==Ji<5 f{h>^j:A־Œ=ԄQ>>Sy=lJ>ee>>Y r>Ǘ<=4==>ٗ=# 'Na5G>D#=bK>VC}%1>Ļ8>' o> >Y=EKJ:ͽj@=Ɩ;>ы;㽝׼6Kܽs?9=Tp>ʂJn^H$7>9ݽr=؛gn6>C'>I싾BK>jvc=3=-1>*@=s><~х>f[><`l>-V=Q7>/>7J=%ཽ'^>w!Cg~<=6\tԾ4%>'Ж<2;8 x>ȸ>0r=2%` =(=S>ۢ=ys>` ==oPO=A"W>31;J eh>ϾӂtqK:6>tI=_u;rq=ap=(j>z>)JOtf=ֽhZ>=;>VTz4瑽L 8>硽;?;]z(,n>05o->n;k==n*>=c>j><Ϋ3Z> 唾F=|/^no=àvz=T2=U=<"V=L (_7>>ab=_c =Ec;/E_ڙ=;W>$v]c;xGἹX&2)}*NBoսIqڽ*AHK>>b #>>2X>֌==/v ( >n`It>zG2=ܘ=T#> =H)z> FFxGZ( q.d>?ὦ> K6$g'eV>i:'z>eN>څ=C"V,v4=0>~=ف>,:ֳ=/ZҐA**uW>֌= ;9=OSAD=&/>w='e='Z;w=)=ĵ=ów~\>|z95%>>?\w >P>޽_lb=X >U>H>4$7>| Na>K>成N$==쇽;PC8ҽVK8s>===,ѽ @>.>g oN > <̽>~%= >1>=A=fӪ=s!@>=r,>.>CG>`=Lۉ==l|9zV>U3&*Ӿ Qy>ԯ^>R/=F=i*z9mv)> q9=>o=P//>=}%ª]W>:=MT1O>M>=?= u=Qۜ>[]=d>h>X<=#>Y ={>ZA>/=Q_="]ӽ>*ٯ8>$J >c>=Tb=d >)A=sn:x>ꖽ,& ==-Ͻ ۽.<=T>sP4;,#>>>c~>J>|t=03ft>vJ>A=ϥt$F>;s]>>"6%pM1e>']>+v=X>>FY1wfH>X>gȠ>l=;JX >s=;>71dic; ~#{>=CgY >#=>`>0F>0l=7MBdCǑ>++S)=~:D=%?h>(=謢&iYU=5=S'8i>ዛ>Ӎ=ذsH<èPD<";;eSRpy;We>}x<#Z=F==k==&>CC\L\94e*>%~==o| >r>6#iľ佺<(9=|F(Qy>o=>e.=.c>=,>>(Bq>.t=h=m>Udp > >`{)b5z+> 3'*c%@L!uS*+>f>t== p>%n={ e>s^=Gz>l=M=ӷ2>TIk=Ծ9>ڡS!0=(\t=t==2Ux=4>p=Cy=ݺ.a>a >5Z,D>=vMO=n09?,5>Yj> =1 =I=c. ʸ*X=*+)>[h>@I>=jeݽǾh>w /n>4_uMR><׼Z=_=ݫ>0m; Ѽ{<6>u=C5=;.@=;i̼=>ɾo>J BUʽ="d=j==h0e96>|=4O>=Yiw><˽w>п 0=&I>ޕÝ޽===>c@=⺠u;=x=|z"ϣSֽNR_>>@G>D=P} R>dp+g>@#>c;=WS>>PGtF=#>‰>gFl}ٔ=D">Vn:>S>Մ$>AL>_þӶ=H=V92ܹiNՄ?=?Ω=Jkƽ Q啾w݁=] ۽@>sS*vkJ{>j:b?yK= ivW5x>':PO>婾ك>WWPU>(a<>:mE;Gr=r>-=d>-Z=q=7y>8ࣽ/0> >%gw+>vSҫj>>)>5־3></=|=''#<~;z^x1#>6=?=J6 >7]`۽|/>ü VRs%?m+<⽉+=L=(뱽6XU>$?-C7T41Q2Ғ=䂻>F>=v4{drVEs!eW>=ֽ W=>>hX=#>xC,==1=,=aB==߽=D_]=u>M@Q{>E>p$>F=Wz=k=bFP4SYJ}^ 0 >YϽz;=_=OO==H@g3r>X3= G>Ҙ>3ٔ>jѽ# >l֩8>uS\=>‚>(Њٶ=e;򹯽J>?NCi$b>÷>i~>Gx=zl=zE:K_)\=>J F=}>Ah><TT 0>Q˼iUK=X >6٠p(<^x½m=RY a7P >!bW\==Q>)_>庼py>`B>)>L`q:(=$w>9&>罻>5RNPrŒU.2~>sýў==Q_>ڿ=8\W氾V'= =~fP>}>@q ,=n(ly=O6>~_=Ct>s>ž6C/z?>̾;6+Æ=צ=T¾%>"n=>'b?b_>S5>KP=+==6w~'>8ŧ>|:VC=k8>W=->U=Eɲn(ʧ=M=>Ό>LTp>C=8>v.\O>wX;>1VZ>Rzx>׽:rz>A=>rUC=78Rȼf<h=}C> z=PG`k,&=9>!u>$>5g=q>m=:5&7ov)g#ݰ=Ӹ=&8,>n=ݐc9p;)>q>=ވS0>S:=d.ʼxA60ϫ= ]<5=Y>Z>WX=9K(>b>(ICK>pө>!>(A=_^=R5νѓ=n=VTK\=>?/>Jb>ڽ1>ȓ>=p^=[>X=s=yYlPDQ^_.*S>*߀=~7 >S +齈揽:>ZM<ľI)kl2h>a=_&I{ށ>=LutT>=wJ>`Q3PF>c=*_>= =v5r胾Le$3>N[X׽r>Ң<2=5f> >x$@> XDINоe=R#=})>n;<=g?!<F{Dk]<н>>\@.>$L>҈{̫>>">KZ=:;<Un| EOh\d?fپ5>}4O> 󦽑Ž&}=W>>ڔ>*k(M?H$>==mw=⟾N)Cȏ=f-4[}>N.=^N>F^U< =y޷l[>U& W?Q=Ks>dߐ=g;}>~NX=1[=)]Lok=f.zF>Z >FB> >|(>$᪼ )W{A@>8=ýt32a=<A>D)>Ծ>N>?/ =-Drٰ>Ṅv>4soM&>N|==7>xl!>,[c>> 7 >;zx3%$>u^>n% 9=v>Ff]?>i=B4u=}>{M˾;MA0U,>s^>-:w/ag=9> =0d,=$%=4y>vL =dpo>?.>n=[7||>ߜ=~wн-> >"=3,>:*><_=$=͸E=={+=l=} =G*[~a->x=1#>D>>󼿾%M=L=(> r=&`y > d4>U V>u4@4>d>ʜ-u/?;;=i+> %=>4 >U>=J=(=K6 ,$^l>쁽 pFk ;ŽTi>='ƾԨ˽\ ?KA>gZ=Ɂj>">8c=~t=nf>Üс>{=;=1'=&%>+G/HʣvӼeXr=a(>^R=Tɾb;=9E-ϓg>bgZ?>쐠b>>u0Љ0[v:(a>\ = ɼ.ž}>={G .Dg9=9LK>΄۽>U>x>_ =@>,bT= >~=FI>-K¾4~ s0sl6B=w-ׄWy=K=<ӽ{>0;BHe!=G>^yi м89]eI꽶Ʀ>zU>3Ig~X>]=rF=>p7(<$>G.aIhĸE!0<H>О䤍CeN>-"G=VKP^< >> >h Y'3=*>Io|!Z>^> ѼP&P V=u֒3>e:Mh)>\H=<*=I=4J?4x.= ׼5~+=׾{y>iS<>;ؑ>.P>œkH-,N>_= Zȳ[# +>ͺ>̼H>͛J=plܽ`#8>Ⱦ4 k{=8>Eg9>=ʂw>̨uX񄽧=>>ɪ?Y%#>0Q6mD>uNc3>t=7?¾ +DW>n="~B޼Q>yw3f>>ЎG<aW[qM=s===?>:,>8"f< >bC>>;>=硨<~]Vk=P"S& ,j<>' FzLd=[Ӿ&;[K\l%=섰k=.>U7U'=_ 8>(=ܘw>=W\>=VV=ʒ>Lb<_mCkYJ=;?=k>>=I*}=d=;/F=1=νŮ>gY>u>hr=AD>]G_=J 6)Ξ?G=cPF<*(>{W,ss=9]y]>o> 2-ǫB@><ႻT>W>>rj,7<?+=HI ":[;>3>O./ro=xZ#'>;S1Q=S:=++o>h>s {L Υ /@<(9f/QJ >@>È,^>.ɬ;>fqѾC>G(zI =8=^ӏ>R> ?N-=Z$?>X>4<,}=x= VڽŇ=|1>n>u3j&?HH 2>:>/Hc>KN>Z=kaa=,᧾5=h7>^.N {=|g¬ݲCv M=UR>]=Kξ->bl+hO>b>߽`=S>t;X>6>g>|=D>ID=|>ٟkm!@< h>9ie<:> z UG>:&= >2D= P=t>>Mݣ>/u'6!=&>=/>콝µ=>꨽lt]>R^=ݗڕU͍8>Иh>^ ?, >R>V˻O ?tK6?k>R=æH6C=s|թT?.>_=U$W>Bv>b=w"=~d>74<>=Ӿ >f"l\==>`Av=s=LC>~73Mb==1 Ug>Iլ+^eѠ.>y=Q>vҽŒ>崊<[=>Mh>9|b=ƴ=;eG>cb=!i>@a?P>w?7=G&d;-{e:׽{=~DC`0=6G뭽jԤ> mvCj=%?h<Á ze#Is=lM<>G[> Kl+>r=!>`>#='{>|w4>ѾĎQ>=ٓ *~&iEim==CŻ>Ba=R >u+0=Jr=v%肔$><:<ޛ>-K}%>sz'ܽ<俽X[j&=25|=Z>^> rO>CϼS=t,>;>V\h=c> };o?eA>;m=ډ=۽>o27=s?=>@hM;c׎J5=_ؽ^B>V K'>4tȽ=>{W>>==c>$ɭ=վ$پF=۽>YXƽ=7~c>K!=(;&>xp?D=]]=<[<}vV= 4 '0>n**DO~3Fo#>>H>B>7"?JUX=,d\br>C>3@=୾]O!=nN5=̙>\ubZz9>K=* =Tx/>| e>=R>GDP̾=|&(=a<bjƾ~C>=MwȼF >@N½f绾-׾D<40> f<*߽>JU> i=z?>] =R>aHݿ=,3n-`ӾY a8r>O?>5M+> += >ak0@< >tǾr5=@d>6U>kP>< >&Mr>~h&?kL>,&S=;c\|C=Ψl}=̾$5>ݽBm=$->X7a/F>Ľ=c3>=n>OJ=Ӕ>$g>sx>lvr=P=k5S(B<7$<Bܓf>>ݔÉ>oX> !x!=&r>F.,>/3>Fjd>ъ7:&Ѿۀ{=UR>>AU>QKU=>>=]S>eJ1޾R=@u=I]{t>Q>'=}TZE>[=sR>O>N`>u&o9=,+=;V F>0?>j:roǾk?GU>F>Rn>&N=9D3k'>н >C=5>0a>G|!>20z=X*\Z׽O=P*A>p.ͽS =c>7)C=J:>A@=$ngӾ5j=ڍ=hk%Ͼ>AӚ>f}=>[y>6@dN=0W=_>N;>x wl=۸7f=jaܽ? >jy>B79 3bfc8>aw=˘`y>tu0>L>۟>&=Xٷ"fbRxQ\>Pg>>+V>S"{=.(/d>gur>h|>t@٠B v<|>1z!=R==6cX;?y ">`l,8e >re> >=qbVͼ=j1+.B r1KB>/̽w#}vSU=P!>X=>Y-P=|#=h> kX=n4T^e>cx71vա<ĝg>2==|L>z >R==G˽J =j$:$gW߀ow><}>dkWn:xb-.p=vL+^l)>PԤ>2Y>F|>a@=1ן|3ýz>9;>06=CƽBy߾*l>_ >۽t(e{5E=U Tc0=pk9>P8B|lLdP3><0 >Eh4Z> >F5>@ؽ4m>ǒ=I >Ӌ=Wr>p>km<,!>{@ "$Ľ̽v>=s=+>'>\r>'K|>=+=%[a<퐽7轷=~/k8=1d>~=2 ͼH :1y<7i>>PN轨ml8O>BsQ>kﯽ1Q?>Oh'_C>G>ݽ+=0~>ah>EL@ֽNpP;̽"H=,>@5m֡;y,<>۽}i>׾s >y0=J=vy?8>p>%.Hov0+!йnT>,=t>o(>$Ľ><4$>}>>>k=ᚯ>4h>>uK>L>ma>2{=9=X@G>I _*R``f!e$>j6>G>P=_k=>tO>1kAG> >'A>9 EZx`W>Ma<,xPJ">f=:K>1Ei>{ֻ릚4>"s=MA=PƼzq@ݼj=2>Vj{=F> >EБ>mm佾p=p={lԽk>I}.VR#i꼡#=[Q> O]O Ҕ=ؙ= 戼9dDbF=fU½]ؤ>~׾o=l>kDx=Eýf*=< >C=*]>wM}_>y۫Ƚ=o>婼=>=C-V>LM>O=S*;j=)Z:!x5:Q>;cC7>">gY/-j^<좘Y9<ʾNU>>it&ZeSJ>J'^]Nq=Q>{/=e^>8W$ 2JAiA: U")< Q|,W׾ώ)ۜ=P>*>+>jTDt">yA>>j\$= >d=d>e>)-=^aw=Y\(O>>Z=\AŽ>F=e X$>{Xm>1"<2=j"T= +Q>,>[H@>>y:G>0L>_>_,>rp41k5o<>!>E@9PP?e=Hb&.ؼV]a=Tayx>_L>t˷7:>`ƾEsc>>='=[揆=8T`>C=aw="EA<T >y>C K7,j_t=_>A^=9U=^>#>݌>|=۽6[z>&4'n>g N>/8>dP=Ec>'=Gv&>'>-@e<*s4=ŗ>j ?L8&=2>>>> ]4;=ؼ>=;ؽ84Gm=JĞ4#3˽nf>DWn>\">L>'K@=}<d{ZӾ= L>H=*N>}=>>*>}( >ĵ>U">1>.QLۥ=἖>]hQd }uwy> Խ*> y /D=.@=uOƒ/^1>* l;>t>F'Ǧg>x9>oGj=;M[SEg>M@WQڧ>U$&=IL>8Ht>ڇ>>8 !>O ޒ&>!!>uz >&'>˯P>Yt >ْ=T<ɲ#oH w7Z>H# t90@=;S>Z̈J>L>$+;>`Z<5oB{d\O"ͽp>ѾAӽ=M<[4[.>ܭ͌>>iBg=p>Y1M~=:>X+x>.4,>(a(sLC">/UDeN>~<}>->>:'=>UPAgXSQAJI>l`叼8Ƚ޽/;7R>vh<ҽU?Mq <K>G==>K8ɶ L>n덽^  u ~fu>q?\LM̞|>_т =3"=T׋sl=1=HRVaA =YԓD>k=c@4>U >;>.̝=4ޘ> `7j==TϾ5=rWz[>M>gE>?=r=ZI=77/#'> >Y΋> >?PX>>Rk;ѽ݋$=/F>l=ŷ>Zn>Scq"=;<|}=u\>p=I=lBrɹ=lA =o'Ϩzv>8<(V/>1f=>?>:=V>{|=p#>,l{>QwՔq>5Bh$%>YǾR$'%؍>>=p@>`vf=.{*>U =a Lw_=s]=v=; w>8=f=P%QὊV=>T1=P>>y \>=]>>|> V >M;<~kc]>r.>D=I>od>ղ۽ >MXGΦ vJ Z8>2jG=-1Y>N$=J>n U'>rz=W^o=97r>FЌe>~-=Mmt_>L_ȾzH#< Y=ߚ>p>B=wv%1l=G,=>$>q>1Ǭ>bx*=`KV=jܽg>85(~=lv`<><=t@ jgfIT=vf=0 T0j3 >=}Z>vT$>>j>=ѽ;uc><"===q=1>."9<ԏUa?>;=ő>"&>*=v n R>^u<1 q>S^L"b"V7ǟ~F=j>7>ǃ'>vO8$oz=z>'4_ms=="a@A>X> oHGs<>r>X#=ʦ=ZPֽGQܽzUI&U3>+VsP3>bUrl[.S>ż&mxs,E;ٽ=}K=;>K̽nk5L,|%bw=ɆB>, ?a=>J&>k*I;0>Ss&' PºI)=>hN=hO=ʺ=H%= >= Q{>Dl?̾=JH=iݽ̷;<>N=k>DWFN;'>dƇ8C)=NvK>$N_%ǽ>{U=1>2KA C >c>y=׮ۻ<8>I~ϼ Z>&=K^>pm7K&=jR;~~>w=[ُ; m=oQ=#&>VYFYl$>J i>_ A >zFK>][( >=oP=Z(-L;GӼꖻw#LvT>ʥ`C>'6xq^e7Dpt= >xB>6Zp==bk5s'6>=A+ >@ĽE1%>y=P\ f3醾%>\>>箒ppz=kG>G>$=O=H&=$c>)~<>o= wY==m>AbRo<^<½pZ>->.W;8>),>6># >84v9=n=_?Z<[jZ==7C=8k?ֻ&>yJz==Q= >^sk+-=L>Og>=i+˂m=g=\5>cPb>WM+1@ܽ == Ӿd=\:*>;<&>6;DL䝽#== =Y&O>+>&dǾڽIv=qO,=/:w*=#C F᤿|[->[I&;-&>朽3Q<"d>蜾B;>LC>O=>>xfiMa?>HQQ.< 9_ 3v<3d>s>R=FREq>G \>i.f>Ӓ`>>Vb`8&Ɣn>";vpξP>@>o 1O0S/=s21\b)"A=&rA|>4;>Q>޶*> *>=,㽷$<: PM>l9==Pe>nE>Eth"4>c=*mN"I==[;<'=tE =>>⪽;_e<B==fL6=đؽ`=f5=^>Qp>x>8L>>f Ysj>^9ۼ푽D(=ګ<`lkH=1NJqqݒaJM#E} >ν+ ..>F/>&M6{>H=?X>ICo>/>V.==2>>6k[.\ѽ.<P޽;= J㽝[Pj5j=,XнH'=vFHL&ZLK> ۟d(>[T(=[vSV4`ξyC>] ;E=HCc=b;|ct9=nཿ=q=|~<< >ES=M26< $H>> =}2i>X ޾b%ٽ4w3>*=!>낾u-=%̽.%>{:>?[>g2==-@/>lh >@=T=u;>\>[4<2U>f=*f=ԁ>Xͽq NJ04=_5=S>`>s>1d=,ʽB+>>_Za>jp=dp.ӏ,<* =x>tfׅ>{>6$}ѽ9ɹ=%=L0 }@\Lg>+_u>=a.>#YId&7p>xݽkdlRDڗIF>h>͚]>ٝ>5UuUͽ,o/=7:>=pР+ֽP>yZ>[>@8L=9I2H =:ýPUu>;zx(G>[5>r[UU>Ndf=x(<!⼉e՜=y>=u=-r>3 $P =ἶ#ZU=̲>=e =Z >x=G"c=D#v>m>I`:>آ7=0>^0->kj>=J-=b = >>ڹl>b>I>^߽퍾~K==@c7a:>X>ܛ䮜=KZ>W3ԅ=>"e>= =⣲=^-ɪX==rb>wQ=>J`lkx.a-=Q=>2`WQڽӾo: >/> .=Qʿ> >!yUO<=01=Yt>K\1>2x>5u<2DT2Jp=ѩ_> =U`n齃v;"5O> > d]]սtȊ>L=+0E5>P#=6B= *o)<2e=ǽ>0J=9PC//tx`>|';LjFbf&>1{=G>,o&*>>Nu)}:^>T>Z'U>>r=9C>u< Sҽ7;`[ء~ycž]X0K>3=03>Yf>>ÿ-B<&U,z ?(>u=Uf>j]=USe $>*>a>vh=*JOf=<.l7?-<=.i\W>Ȭ>kV=!'^A>ʽ4C >lyƾ>>|̾;U8eR=.= 6>/,x>Ռdu>Vμ%>!E>G<3>9Vښ=>?f>E?Ry<>;>Q>EH>l˽nKv,q>@A==L==)&>f=Ƀ?U=>ߔv> >mSXf FX< =e;:h(34>_>[žpF>\B t@1>3ǚ=,>y >/?c>a<>>ͩ=6K>\>=N|:V)̻ؽ0==P<9:c>ajJf=ܿƾhȾS>V2>i>&_:L41>r>Lc><->a>M5=h=J>oz>$T>e5v]O˼շ>VUPѽ崇Iڽ~C>F`^5P=v?`>>\ HB8'UV)\>tL^1hɽ"i_D l>zBTCnm>`ξr>@6=uν-~$GR==ʀR?Y+>\r>c>(Ҽ>+ϓ8>=>U=f;J](d>=->U>o+8q>>H?>"g=.=i=qҽy+R=;-0<=x>^>]>!> =W$ qxS>$P>>=hƻ }e<<&d"?!ݼS%G>=a=>]!:>>>n^Yq=۽W>>fs?>=u,;>c< C$mS=r>١>>+==>dHRO>}Stľb&gU=*.`b6,4$,\q(>d>n53r>|#pzN>r ?: hŭ>2z>+==?>y 5ؾ]-?=ڼ*H>>OgX=@Z>9`;=_ k=aw=Co>#&F<+=Uw5Ȋ>>>c>3 ? >$W=dE;=׽6i/5==Lo~ٽb V>0\`>uH%=>-kA>>ŇU|<;*=L\=K4=3 > ?>DHڐ,>=g!=cfK#= ̤K>=is >H >xUmϻc3>e<;>>>ZI:=Խ|=y<.줽/ľs4<^j<]°= ;<)e3=>Q=y<:/+gB\Cײ.=kL=Rls&t>>Fg=)>E>(HO/=dսb=.w>喼>3a>o[C='">,.>~?THq&-Z>!iM?W>/$lR G>s >cwC">=¾F/߽rfgZپs=s+9s=B>>ŻH; 6I>EK¾d>>v=ٞb>hw!>أ=>PD ;R= =\=LN2_>uۆ>d[~s-==* rhT>hZC<: I>u=] ^>NM@=PD>X= P=c>,f,F><@S>hU㽢ӀK=o=>O< !>:=0Y>[ >bI>&K>jB >=ʾ >Me>$T}=5'﫻~gֽS޽r1P۞/@>L=}=R?! '>C>)C=TC$XedD>۽>L8>7{r=m5Bs> ٬ũ=]=^= &\>G_FW=oD=`n=RAs=p>P=S(`=D6>> >j>=*4>\bȽ&=>u=+=l>p=9>Af>4=.>U㽦=M>Z>?>=vX!=Q >oϾ,<ソ==t$>>B+>:ý3ѕ##♾*= ھz(g:wѾbBto'B_=>׭R |ֽ[%|>I=h=Y0>k=ҋ=^$=0?U72hإ==$o =3ž<CϾ,>h>_S>8 %cV>M>aj=7u>>Ҿ<[ >q= =t">= 2٭=«w=>Z>t>u>[t>G=0J >‘><,<0@ n>gl; JQϣ)@>8>Z= =Д>W!$ >Q>B>|` >E}>s脻Yǽ />Id=֫S>{ľrN=*R>M\>z>/bPH >x5>p;]q>cp> =G"b> S6P=FRǼL>8=k =FP`.~ p̭>oC8 vK=ͽLټk? p.>1|>4N׆=H۽"=Kf.>8?&=K>dK !OP=ꃾoŽkM>B2=VU>9н^->{=t> k> Z=LVLi;t>>%v <@>ȣ9jc?"󭽽"=24<{o̩4=8/>O< >k>q0>5V=u:>Ld:>^7?<9 v>K)>4>6>~6>+kV˾ Vٽز;YԼ{tN>>=`&> P=>~$4=΢U&Y- MfztRǽ}=|=ʏ`>V=h8F=E.MY=Q=[^>P@ɼsC>}ؗ>kay߽WpI=P"=g>=q|=Mz>{l+>Ž{3 >x U+ǛF>-Ƚգe==O>}Խ ) ޾>d;+m=g[=}y[>3<@2<\'M>XMuȼ|Th=O]yP%mVk>3&"8Q[>侇W==΍>=M>=&>&z>\Y&=qadCs>56=$Լ._t=E>W ;=`=fݽͣ7>C=lE0-sqx2> prxwƽ-={;F5^?6g>g;}>ý=×>s=Ӊ'+sF>H<(+9>;Ϟ>PDt1 +=&Q,׼8$1K:/<|@w=t=k=M=>C9Ռ>0>~ Eҽb=B7P=d=/X=,5$ɦ̞=Ų`8>U@@><\t>ny5~ =_ -=[G~9Dн0 ;Lꄾ8>9V>~5% >PFA;hr徰PV>>=,8diߍ2DU<,1=ٻ֔=p4> >&|4$=u<`=N.J=d:=( W=`؛=b;}cm߽mR5>p>=[йPD>0CN= ?l =5>,=,B|>ͱ@Qε>:4>uнW@tq=b=X]x>vj׽=)|Äܯ6='>~Wm!=(cV.=Yp;FEڽƾ,PH~`&ZEUʼ S5n#=]V> ;>}=4=3 &>羢Wd=5 >=q->u2<=uH"B=3>m6<="7>ZM>Ѽ>}s >IF*4>=ľ"\&c>OvR>`cL>Ә3q:! >y 0<]U>k~==gp=#^ i(I"&<<@ZcM>V>M,+=J}X=!c=B=[;=)=a>08>:y3>^gƃއE>e>T< 5<*>>Ng+ܩ=N)>=Bнד> =G@U>$*8䓽)^qq(>=>X=wV(>t)><(B=|3 >Hqa{B=P3>ݙ=Qu;LK=Tk>U{==%G>.C= wA =/;Ȏb :e8ξ⃾4Ոev>?Ⱦѽl=̛; =>tF>&!>f">|>=˽Zx>a#"=hA@D<Ť>j&> S=Ľ="=Ǻ=Y~M%5=qmRC@=R軫XTG>>'Ab>2Yj<#ϽÕ=>">V6z  k=i;F< %n|<|>suK=?=>9=d=f0et;"n5PȽYD$>UO'(i:>H=G½gu\5=Uj=6=Ki=<ܽ6=L*ui=/l8>J]=qN>4ip>GԨݼ4(#ȟW2>E[>au7>\RK9=QV\(>Q[4q<%<ȕI>h`7'#7[5*M/|>1sF=L>=0?<$=o=;B >$ٽ_ܽ=[½*c2>~}>̧콱=ǽ{=>=(F,X?bбӚ`x>U{Z= f=e3>,Tֻ{Y4>լP혁>1,=]><{=:P=k5SI=X=ٷ2u>=e{"=%rF>A-D= >z:><7>;{q3罠 i)>>!p={>꽷==q>\6>=S`>'xۚ=U#A= R\综e<!: -Kb=j>̽q WȾP->Q}xr=[=0L> >e>њu[<{nH>ԇ>dyH><֢==q= ~=> I >|r<;>z<>.:=_r>֤>a>UǼN>&}R<J:O=Ci=0c>nv%>vKּQz$}Sݐ=H=E>H =]sżR=ND=٧>n> Vx--~>L>\0`u>=>DP>x= )>n&=jm8MM s=&h3Pм>o\GL>LԽ7H=;.?;ٳ>SA;D; >"qҺpRĒ=`z5>?<нu>U>x=a!%4Q=><9=Q<::=+=m>wQ -QC=#W-8==z= b> u=gx=>>>g |>=]>>&V>'` Y>\帽Yܽ_v8yq= o>P=F>[=l>O,$U\>;FS>[>!i= %Y A`>yYy=J=9>3^="~Q>>nk=dkν/~='>_(1~=<ѵ==У=Zfd>>[,>T=Ƭ+X/6/TT4>T<Ν > >/=UQ= U>}ؽu>>81ʙB=p2m?8 Xƣe=,,>&tm_)s p={<ؽ <!#=λ ,*<ٻl,x;ՑLd8=+W>=)7u=Ũ>C>: 7>=>_=i>D@4S> c=x3%g`i>\7M= p>A/ı=ŹJ;Om oe&=!Ѽ%:Ͼ;>BKSiO>C[}a#9>p7>>>Y#ER :=KTx־M=Hl^<>'T -A==%V <=0 q^>>.v>D]h<=eɽ˪=g:∏%auٲ =p<<>:2h@~=¾P>{{s=>Tk>lоt>>Ž!=j9p)>Ωx0=xl>nO=>t=4>">2>LK=zBZ>s? _؃=6=>Ѝg꘾=U/>[V>Pfk>ٕн>J;_=`>ϽWjpw=j=*=/Ϯ׾}>-=D=^ͽְ=<^',w¾)錽ښq0=U>=P>+P;`7:>=;_p@\y`ǽ>&PP>|,=˙=g'=go>Gcq'+>$hh >>u]/=@zCW > ? ;ԧa=ɸ= <0W>؃2=N>==cA^ s=doz=f=N>'T#>J=G=!>fDؽLu.%}qu\<̏*g<N>3cv4<=NmO}=M=<)hv.=C=h>g=kk>g>>ip/>=e1\>ǽ=յ<]Q>sIDw;>~>rH:ou)|Q=0=>OcҘϽ޽aL=$M=L9Қj >w/GK9S҃=>~wԽB>.>,=>˼>_>dk +>z5Gp`ϭ>],>m=@lB@"pe>z68B>a>> ;@k=͜Ͼ\wtx>[$pZ>;c6ǽ;>u\]:8>\q=3>wz>|>+OS>݉=Ԕ=><8-pA>轾?N7>ž ng2?2j@/=<`>0=7_=5^>ҽK><̼2=CwqM?>&< _ b[6Z==Bk<>04<"}0B=<6_>uE*>т>ft,ؾbj>o>cm >Z?<4Z=]y.x=L$>Wk=as>b~^<@y=Jؽ m#><[>C=G <<ڶ;5>'ugؘJ3r#>f;-v> >08>8y9;=⦅ l퟽=PJ3\>r=\~>"x>Yf>Td!={ǯy<,=J߫=@2M8,>5>~=M<2+H>Xq'>X=漘оBK>Ƿ6?cat<#>2vNjl=b>>M>yC L &![$a>=4 ܍=#@,>S,ὓo]=;S;f#B>f~=7;CzzӇ>g=1-Z:,[3Ύ>4OؽJk<б+L<S@>; A=u>hV>,C= <<$=>OVU&@bNn>&i<H$==>N>@! =n>!J:w= L0f3DieyN>u{=d >Z̳>I>1M>MT=~>>ǽW=!>pA>Q$Jü=gr>)Է= >S=M#нդ[6=N>2%=MFR>bJ>7T"#>1Yo[>v='߽i=};D>l[%=Q(>@==/Ԟ>~=ݚ~.=Gm19[(i;^hH_>uN> =Tb>7;> C<=p>F6>PT;>Hg23=5=uwX=葾٦=<ʅidWz<>d=t{%R>+KхW̅W۽,HAǾ}7>鶓>,F֚Ѽ{t=A2=ɾR>8 (٥=`A¾]L#]lvڽ;>C Fs[=[!vD>oj>n5L>NWx,=V=f׭=O |=dP=%aST=叜w(>@=5~>M=i=yy꽪x3>!;qGX#$2hpx>J žŀ0>2[/Lْ㿽$;Rľ|Z&:DWF>a(.ϽGZ!>nԽ>XӾA>;=%`ћ =[<2ľ =pͤ><_5%=3:=q"=?=+=G,>;d>%z=P]q >l2;=?̥=i>=ts{=ma?>={(=~(խ'=\1~=*.G>.$>,ӽQ;u*x$>E] >p >>Pz= ?D *t>M,6L : M׿Sؽ>lߝ<]h>"c9>4Q>`>?>=4=>}Y@>He&Oh2Er>ػ4fY`>>Y(>^>)>N>pYO>Nf=<:1>TE=U]뻴KWj>2r=֡כ=NϽ_P>>=>ͫ #'<f. ~ʽd=TsrmRh⽳bP<d.(WJy=U>(=cu>v>ּ<.d>彞Z>*G<ؽ>'>)D!>qN&=&1 #A}=:׽>=;">)/=>"= ! ?*==ܭ\kk>x>4a>^h<I>W>=D:>{=Tq-=Vo>Rgw>>f=$=k~`=)=㮾,}-jļB+>;A< >*>P=T<< vR>໓]\Cwý=_4=dP>>{/"b𳄽=^>pw~oSM=c+ȍ:)b ?@">q='f=Zc턫,>3h>0>,á;xe'>ς⍏>W=$d4 ]C^Jžp$>,˾y|3 Œk'>J\<5w/Y8ů4;=)3>JAJ=$qa@T>VԀ=,>X9>(~GD=aрj\=C>>>>c>>%"虛p >S&JV>dx=RS==L><>E!n=8>BZ\;=6=S$>lҾ@w><9gl=)>zj#>z鸑XO<r V+=<Fu<0>ci>>)=B=fI=E߲1=3= >T%&?C=ы=a=載:ľz=o >>6)>.>j~>>_َLŽ+=Xێ>|BMC>G7nSv=.&>#$Qvnͦ> >=0@->64>jq<3=. -6?>y$ d&>]<8̽͹=~`"D`HB=@N=b麍I6Myp>U>d]m>JH1fX6q>k~=eD<$vR>q{=>ހ><Ғ,lL>H>k-h=vI>2>D Ǹ>G>\>=V\=dꭾ9{6vi>Y>!]оx>ͽQ)>㋽>&Q=.;8[kͼ6ݝ>̂+>i](,엽@A>5S>L3=#|=%;>B=8A=(3]TzPa>=A<<>Jʘ`͘Ǽ-+<>Y>!>Mx->w0>>tՕy =8'>5=s=PU>r>喽(U>@,>v=>O==&"#(>}@=[b;>U8Kq>Mx<ҽI>Dž=w=V>=a>\!q6½ֻK<ؾ(=HG>k>֮l= ,~:w$})=m=^wO 8>ځ>3ZMA>4kKԗ8Yn=>;g=|)>-Q> ?qc<=C=x%>=u>>t>?=(,cq9M½O_7>/="} 3>+<9}㾧s<,pCO?z>Γ>^=S>},<ƃ>Փ=ϖ*B5 ?M=#Ⱦ#.9s>jO]oscLؔR==߷O>&dE `I5ꈽ4i==]1C= ? >Tܽ@|=qM>LY=F71AtI>']/>8*>_ >>6TT>dT>!Ȳ2e=Oo>=44ɽO>>=(ۼa=$;5we >'>;gZ>=K==>G>¾>;>i =>>9>R=>ҥ.]= ߳<6=dS>h>Igg>$=K.z<>}<]p;n= mZ3C>ySν,ǃ%=XY>ѻܱ=H|=7D;o;Q"~ =!n<`U>ҁ&]ӑӾ7⳽-I>ݺZ=`=ܞ>B~>>1ჼ˽,vӼkRG<k=E= '* %H>퍾j_'==>˯5g=K>>4b0 R=v>[(<tM=| rU)>;>ľ޾Sgjo<,>A=0=  3sYJ0!>,>}KY >Z=N <(2>YG׃=8*>bI<So3)># >,ͼԽ㵗=>RIH=J =9yw@]D>ϻ[G=j=M>k>=8#=z=5 5>V>R[(^b>\>Yl>)>=2YV}%=< >(3C=!2 E> >beA>* =1GWg=[ < <'̰=ලQ>SZs=hDy =WUMP=!U゚a/@t>z?)%R>M=R:2nR

MA=B>Xp>3>>l$>@=,>8>>3ތ>߄(=>.==C;>K=+(=jGn2L:=-<\从(>Ï ϽlP6 x6>:>el<=ڏX'<̽6>=H>Qa>P>444|@=?n>de=D8;ʸǼ_[>}P>g9> +}=>*x|=.B ۾g>s ]h>p=K=>o=Y]# ^>/wW">I==>^b->*#>=h {7tMgMU&qt>vKiB>d+t8t==mN)>L `m]qjA |<(=?ǾG>_? m=&}`?L.WP^N>1<\a>>;ҋ='= =)p5>7ì =A?>>3y >Bf>=mR=qe>*?e=]V>mҒ>EP־>=@T+>>b9= >= a%7Nv=D=4=Q=g'>/ؚ>0>]|>>>ȍ%|>pSz<+>1%><< >8*Y<><$/ F<W>Я>8=Ƽ6>v17=mّI>ѽ'A>>iyp>9,uL= l,z,j=ӑ=P>16p'󌒾 ?=oK=~=U2EG>Jgi}7w<[>>s<o=T\۝ݽ51y> +=fE=|>%==н#T=Ui㾦Z>tm#>+"(XGqwٽwX C=^0?7μ W>kF>~=;Z搽ꭽk>? >+ɾ̽ c>vaA>\D>UjL=p=~9K>>ċ<Ͻw@n#=`-=^X W>͝;>Л>Y;s<=Et>OGD*=wr=?I>s[w]<@%>==k>}%Y=+vCaml=x>9G?>|= B3>yCQ@>-׌/>y>6e/ 5=iY+PgcK3j4>\OEp{=]ttp=g!T=.7->؏>>N >>$=o$ >N>0MX4>vz >>Q@>I>=AȾK+ZG>=(ID'';)>7슽a <4>Ph>ߍ<>hb>L?<̡>"s I>2؀2 >͸<"6v?/b;ke:=#uS<\BDmtEsjj><>ʼpZ>V= cT->&>mO=6 > f +kO`h+Ǽ6j=Eg>f>vf>#I>(MݽX=_̌!+>n8>C>=<&{f =u:5=!AȽ;z!=N=U:`fie?•=52>a٬==WE>z>ievKT=1A:>̟/SR=ϧgžR=2zr>k&^b2=D<1;u=ksxڣc=1;n=OPQ>?>\ErV< f#=]:0U>ay >惋ۿ=36>)=D>y>_=\p ֽτཏ4>O$.>}u>g>t`k>>?5r[QS>ҽa=u0ܘ>;[Sg;!7f>zXq>` >K%齗>k霽c> h>V?=f6:콿==x%>>!|LQ>?9> gA=6j>@<>S>: x>( D>:[@>>gǹ >Pb=~jsuw=>ͽeNn=-=$=U5>ƱFE>Z>#i_=Kb`=,?[>)l=`G>,o>>I)%'>hƣ=Bpl`>J{>U>Cي>8J=üy컽#;o=v@F&u5>z>`O3k6ǽс<>w=W>r9gGMŽ>u/=Ik&ݾ+IԼ-sGjmP;>YK$>2}=;>{,и:>6xK=tn~rFA>S.I=Q34( <>'=n=><>I>1>Q֢Jn!~=M=h>qb>-*> %D$v td.> ݏ>`?Ў=im:>fh,>S=F=i> ݽ< >>G=ke6=)ýϥ1>Z n->-~W A׽= :>ӾoF=Xe@s=f=HTЉ0>Gvz#!>`/ýTb=XՄݽ:ݾ(ŵsi>Z֫=j>>ʞ>tu>k1$.>OǾі+>f n#[>e>ه:k(g>??N>PbƝ>xQJ<[9>m>gĞE>񌾿 8gͽMDt0?>7<+=|<=">i_<&:>лN>1^l>|{n6C=>_jN>SwN>̾*`)*B>S">7Ž侧ES>>[=۳>ژ޼)>rE޽1 -Cu{==>d kΥ>V>Đ (zӼ0Zu><>~T>־o=RĽa>|:;Ao>_>^>~װI굾ƶkCY޼Uμ!jiNs S$GR= k>k&>||=*:i=G=>>N> o>>jT> 4>ϗ7>sD>͘= >B<սcϽފ|>N{w>?Y$Ӽ_ŋ*=^m>ua*=-<}=%/<=h-=ApN>}>o=-Zv>MZJ>RTBA>.>>= J$.=g>q9Dd|=ܼ1>θe>|t.>> *>2嫾}>Xi=iLBEn@=b؁ý >\>S/'>z=;᩾o{5܁N3>K>nx>=h=7!:>F>7Y3o >#j>89]>(n=@V>v>Cľd #(>lBMt[J;T Mhz b=⿮>X8>'齵`=>a=>=b_Ww>@=d=;XG>2뽰x>]>ԟA>%>G> u>҄1s>W>tUdRirKA;a>_&ĽFJ> .>Ar6of>>KPZ>սEcо(bU Vr>4>v> =J6=A>X;=`X=>m!x>Y=,!L[;Np&>W@t7->o-31|=Z{^h\>c=>Y>I>W>c)YTK&ZU-,Ns.=@)=NU< S4>]=Z>!߽5>Nh̒==:D5* i&fl=BNtv'/ H%n{ս >#R=P=)I"G=>,=S>@?=Lkо7XV>>Aq>`?uGb#@s=׽+\. j+>Ε<=a)=N޼?%=;(L4x<D\>b>^˷=#p>.>="e}>Yy="]6Fž!ɛG=A;ECw=6=Z>:=};=_c='@l *=v즽=Ħy=>N D.D`K<bdy2>֕in >LC=I{cO=1̕=)e:W>==V>q==V㐄>$B5>,=A:9q=(Ryt)F===>,i >8?K<0(2P>6X=N>ji]Ľi=;̾=na=Ҷ>>>o92< %B>3>ֽ0ۂU>ؾ$C%CM>=L l`-!-=: -<㌓>=VFά<,͜=㺩<*gzc>LZ>M>K>Q=ZY=>=սݑ>=O40^>Z=̀9>V>>j>=J{DHhc=> >k{=e,=PfO[5kaӽ@7>J-PZo"dUiۼ.{JK=g]o#ͤ>uluFΙ;D>xy >H&֌>=[D>=G sT>a4LB>̝ =07>ŐrⅾN3>Ti>OPP =xH=c)9>[U=}2J ۡ>L tIԽ ~>8>d_:>.% 澠Zvɽ=`w<>Q> -罈b9~vF>j0o½Nw:/saJ@>w>-\ݻWc@be{ľwC=R̽FL< K6]>>=.%yhJ<S>s!/6BGTF\C>Ec-|ھbs_>7Y>t>#<ͺ= [{=S=>)3=6X#>=$=<{m>g>Pa>bž&3=]uz)S˻=9 "=&b=)4;ͻ½1Xv S=<>GX"@IKMV=:Rн>]>8BWܥ= O<0=<>9Xm:46BּΕX->p>=B7L>rY>V>^>>vlt`u>"u>c=q75ʾ-k7o=2G5AEY\=r=ߖ>dq>8^T>&M@Ʈfmn'݁=B5$>H P w=C>&Ž<"BFlϊ>毙=>ML>ܽ>K8Q<ؽ?4=z]= f>3{D2upQ=HJ==$͂\z*[! >VtB>Fҽ) =&;>A=>l:Q>:gm)ľDo-QXF=;ɽ8=>R>=l>⼏ȾYL=bd;=bι>zEdQcԋc<]#=~\' ']oE=%>وV<<>@S>rG>h=Sr>Ⱦ8;x>Shy=bO@9>\V=M?<;7wվ6l==6q>>+z>N%>*>j20Mp냾|̾5$Cn=sê=E(ؽU>6<9.=$Jg>3;o۠<)>R=ܱ%>T>M6>=#{  FmJS}=d''`u>nS?=N%XR0˕>r_[>=7W>1Y@DTov@=@>x)8>=J=Z>>>2l@QC>!t>ҿD>=Z*=F0p">+>$w ~=-སZ=f\=A>=s"ʽ^{8]=ѧ+><лyٷ=/bN㵽W o>>e<(̽[`>$9[Q=`N=F t#>w2>F"]8mY>R =>Y>22>=CʽRz>ю>,>=Ҿ߼7w>EQC^^M>ϑ؀Ͼ%>=4>,+*Ⱦn=`>DG;=L=Pϼ؏F=>|쁽ÉC=y:>.[wnHh=T>_t̜_l:=S= Q\==+i>R=?hA:7ɣ B>U= >=Qq=i3:>@ē=jȶ==_>>Ёtf=blE<(> nRe/>=? =qB>@TG>ӟ>T>%Ň>ˉ1ɂzpƎ>zG))>=)lS>1ȿ'K;f=7=|1cN>Z>>-] m>̎-W>tǡ=Tp=VOQSXe0"=m>U>t'R>>C١{.y£z JM[p2=TN2 >@9AWИZJ<BW 7]> o>䆛0ˆ~=Ƥ>@SL=i=K>vPN>&2>@*5½ >ُ=]7>:;Z21>"=g=>{>n>>k>-x>n߆X>3_Hxξ±6 6(UJ!U?)=; Pl,>Y>I>r>=rυCc~z>s=>><7ݞ>~l>TԽ*mE;>b34uZ *)Ӽi<W=sDK>û lL >IX>&W?<<ͼ!&>f14 hF>c>:u"<7U;=8`;>t>;)ý".> =uш,@D^\:Z^D>Ov<#$ߚH>*=Y>=-f>-<&Q=a?'|>#>a=n#>>Q;8=>k?>`Cf>fjtHN;r=i> ] >^oA!w= ̄&B>O A=M$p==13->E>wH>k;M==0<䨑v=t82Bq=wHz8I9%`CۼPOoBlbr >S3>0=_>>=:Vo~`D>?>J"gĽ>N>Ҍ`<,%>=(^> >ӣ==(>G>v o=F>Y<׽ ={$=㑽2{<&=t=8ZA)==!6Xaۨi(>?><>=0 >> h=0>l&>KP> Λ> :8>AM5>o>NA><ѼJ>Pm >yQWp|j=$=Ɏ>5Y,>pr=th=@>nD0=>9>bn>y>}>)x=^uO(> >W=u1 F T > @c[>V1=>f6/BN>@71=sa=պHʢۼ9>b_6?;Mߜ=<ѽg>ʹ=g/P*&n /l>Nl$G=f=)=/;X=?QZ>'Z>ij>G4D#9>>w6" *>^^>pjDvg=0">4b >ׁ<7SоŽǼ25+a>?>2=<>⻸Pm8k=Rʪy|ZDT>D>;{88>qýYO=;j7=:>kʹ="=p( >y=7>k>:>z=2L<;->b0)xh=ۏ.>Ul*j>Ib=]3G>.Gw>`(8G=cW=N=3?-w=I>㒽|A=-2e >TL]=0>e@X2+os >>Wt>+yA{>:{=Cl=/;;e=O ԍ%=15>K >Cv Lf>q="`O">UA#ɝ>pR?^]S{hi1$翼!52>Ӿ]<~<ظ$U?=3=<>iU;OtZFQxb<=t<0꾷=d;4i==.="=R4={vԽHpV>XT>0GZR颀;:>;ʆ>5yԽ&>@=A<3~>MU=Z>8>0s=Mze>q>t>Wm[9kFP=4>;#>Ⱦ=Z>w,=>t*=X'>_O>L=q>6=ZL==4p"cp=Xֽ* 5td>Z>~?_I>S=(*OU>"R2=K$ >7ɽa1>5=|{mIݾ]=<=6=w=܌M"m>tseŽb=X=r=X<ƛF>2J̾e>R> 6:X==.f>#;q`ƨQ=8G =?a><,½ TG}'>G>>J9ﻞՈ䚽>Ec> =0?w>J_>)xJon>>_>[!9u;2P=mwT/>=*3=$t> Z5i+E[=Jq=&9ǃ @" =>>At=VR#>ΣG=tϽb06J5>p5>d>Tj>s>Ͼ~=v=T>/8>Bڏ>GHj>sI>̩=JZC<^/&]C>RX-mD}W<:h@=-罸׽mr=X=׾L?=szIL>TЪ+q>^j/>T>3c>rνu\0==>-7< ڽ 4>;95>(o>z={aPy>cj=yw9Vg\> 3T= DƼƆ=ǔ>iJ> r<)>ûoOS=埾 ^>䐒><?=eWYj>+U>ok%Ȋ!> Ӽp"=tq1>m=0d!>= =3>oiO;j^=>} 4==P()<-=O=NS=.>ܮ>q=Y2.==["&3>+Z=>1 ;^ۍZ 6 >Ԕ-j4q>~>>=B=]&gli> >vVP{d=H >ͣO=ri>#<)ͼ]ϐ$ټ;2g>=&iž,>><.yu=VO҇>>k>7ǭ<9]-c>N[*>7Éa=VԽ>6>}>B3=J=H[ҝ=FD!ZԼ=?<*"<'@^V@,?=v˼5E>5D>>=Lҽ/믾[}w09|M>k3>w1A>Ӽ>s>k >=٭=!MP>[0=8p>.;Suy|>>Lܽ$=$f>;\>Cm:YN;=cJib&}Yٗ>˫[8ֽS=>_>E,<>:M8*>Τ=@>+* A>s;>}4ٞ$ G\$b'.>H.W1;Ks˽>x!> >=#>+ҽbmMZ9#>Mz=྽K(XH>TNU)>j<+z4ł͂=H=G==327W=<1>Q>2`Y>]A:2eb>5)9,#>>;h&6=P~\<Ɋޡ[&>c=x>ożW?_;>;+>X3>1QԽ ̭72܊׺Kps(<_=U>R>ʗ(0>@Ӯ. &=.--=M>jqDyd=|}彳 =P#?LJ>B1>tL*>:==߾5 >^L>G0=vf!-o >Y1̾>R ?]+j >V=9>/dͽRH-vY>a>jv>_?VľF9=,߽0jK=Bxe$$ʞ b> ;9*l==q'=֑>V4}>\wY]S>ളM5>>i>8=i\ꣾa=QL>!SN>>>A f>].K\Q!G_=uDվN>>yo>w>l7Mžpuɗ>`b=maR >"Y/>>/Y>^>,p>Ьw=>dpdK%M>'N=oW2=^$>9_>_R=ɑ>w->W̽W>= =6p->#=8W=& =D36? >oo> m<^=;m^=Ph'M>t>>Z=}V<>ɅK.xB>N'`>o/??ԽEz>O=%>> S)=.+?|>߾urI5?>WƀƎټs>:yls>%Ǚ=MLM>̾ (x>|>E=b>3ߣ>wƾ6r>95(>p>2>6)>8>>>>;!ѕ>2?x>l>.>_>n=rսCE=ߊz(=6(ƚ蕎>Ѩ >䐸=>(&aJ=%'Ѿ4>w?"> 6:#n1ʔAzha>)ԁ>5s m9=V}?;Z>&('4 ``Mb>g!>=d\<˼)=>; X?4=dȽD">==>Yq={>U*>iO=>*Ӫ<8?7 t>x0Ha>VK1>o>{4>P;9нpt==x7=} 辕0\> =>eA孱>Y>=Vl>ī aDqC>63x===5=$=8ܯ"Zr=&_E9iTx vD6ԽHV>?2S=Yu>Ӭ>=!=L"JP˰>JS>8>"W> >1מּ=jg@x 3:=nH>뽱޾:b>~3=߾2ܷkezӽ½N>x/K>=k>>c>]Xt# D}H>jǾZOh(u񙑾C2>d=z<@4۽{ >q>M>iiЉH!<$=N^¥>(QT><>iu8_>=ƾ:+۽Q;>7ng3=\W>r%>:t}8 ?und=/=Yi⾱ɔ>C$>fwM> _>\++t1I:KA>D>{$6Xn/j-=D>-X쾯=_W=P>%¦񽐉X>>0\>ě>=9U ?R'>;=>D >|J=%>uW=ްٽI,V>s>KrDt=|뙽Į(: >_>ߒ=+R9>٩> }+ق yW:FEXʾ^>> G>=z@Uʾ?Ɵ;P= x 6m=%F߾D= >}V=HP>!fN>׭)<>Eb;>U }'=N=7p=7>yK1󏾎)=G=?x[> i^]/>.">E#E1>kFvG^a>l2D>#u=-,0>6> ?>휽lAF=EK{ 1 ?SB>dt=$== ->Z>oоo>16 =3{?] >>m>9*=?=ˋ< ]>K);S>hB=">E> <(3<*;!>>1>?#>m>=99V ྅?=~?dB?/=ٽZjR>m=̽hiWw;>sI>t˽ ]V>>j҉>]R@:=~T=tɐ=c<&>3>nv`#4=1>C=iF>/ >a{fLG=q>e)>Eھ3>DH<2=1>P>8{;%9{+ ?2d>Q}>qOjKٝ>Pծ4>3r=J=D%={ =Ƥa>]YLvB b?轣Y1/$< >{=!>~Z9> B>kx=a>%c<>]E=E=ҵ>iq>y>U:ℽDf= Ԡ>DU>42߷č=K>:>NɃ>N>?># Q>^O8_/ؾÄW0ɽ9>)c=Q]=>nR=>At6T(,0>$1퐜<1wS/>*@>>jʽWF=>O$;VH>͙>%Ct2<>r>#>xZ]X><=_=[}=5Z> !=7>.k2?ٻ^}=<(=>S= o 'u.4ELEX>>,%*&þ>D=S\7G=3f==>n>񭷽OBˆ>=qwf/M>W h>\31\P_< =1>MRQ>tP=-{H>>3ZB =>=e[>i (>K>6>◾wPveb+?<+p,=AW]=☰={lN=XKo ?ȧ3=ԿH>z>§=W>Q ?/G>!> =O= N`8=o);g<" +<+i>\>;7V>[>J?(Ψ=O;[_s1jVH;į\:!Ѿ7i>4!> >ν=siw>)~=`>XΓ=޽`> >QU=ZTŬ=B>x>q8>Cƃ=]<{>; 軽"!Y<[< p t<5ᄉ2>bc=¾Xpy$#>`a>0E9'G>j> 1= Ȁ>L= x>D_>Q;>=M*PgȠr>=Ev羡{2>l~>=0= ǽ>_=&%>s> =G=>=p>vMc8? *?>j>,a?T=෈0> (D>(@>֧#O<> |;n@>Z0d ?87m0>@}ƾzM8>>Mf>|}GZ>I=W18=fu]=^U>``>?>u=@[?A>QXY>0>=>>O?$>R$K]xG$ܽiB?>ءA=p>=haK >b> >*=<%) 9[j>0v]= >Dש;p:>uyS>?>R>LP%Ό ?t> >>">L=B/:>3jZ g&>IgH>E#ݭ> >5a>m=>T=l`=xd߾ʍEG~Ž%4UU>niڣ=9,1>,>DtK*?-W Bg!]>sI>@%;>1ԧ=-=zC,1f.W3=ZQrmV?l>o<#%C` =/T>#&60>K)>X ?t?۔*>e܂>)>$="==*o>?=ui:8:Ol厾%>0BHW=%ޭh> >e>fc7>1值0>Ǽ!bs<0)=Ϸ=Wő>>;3(H^$Ⱦ\d?;O>'>Ft>oZ'*=)>}GO#==)>LAƼP=sb>Pɽv<J>{~>8;4Re>`Uh=#C[þ4%=\?PX>==jv>y>a8ϊX=y|y='U c t=V߰u'><$~>t>/= > >|ƾ5>TP>Y>O6=j#Fr=Ua=q=̄~{e->ŰtrYw>}L==}?S>:`c><ΰ">(f=(>P>>c]׈*Ƽ= ưܽ?{Y>Z< =/~<>}HmjUҽ>:`=>F=='dɽ`>GkՑ>&]Uً>(=>L=v>|`1Om=@-< 2Mƽ>Z־z֋> 3= <{_ >*L@TW;<==^k=d >ᅾjm>o>)=Bh> "r3<ȇ=C=?FQ@n>K='g?ԧ3[V.,a$?bȑ^,=W`>럽)9>4;N=ý9&>ΰ;>m=z<[@w@=tCJ->!D>+>j`V5^p"NF=k@>qR7g1>yM=7)j=x=L==d=D>`>ݏ<*o>b=2K,>=3C>>=?L/R=[>s@=Mjf>.Mkx5|Ң=[>̇5j>rrܽLVT,>J/>]="[>- F=Z F>K]>؄*=V =+I>Ͻ>M=yO=[>oifj>$Xz=ۍ=>dBV@=:>=韾Aa|BM>>vXvX>[8=KU&Ÿ_>͊>Kǽ&NҾ==}=A:OP?ɽ_O =fYF43Sk3==t<<5u##i>~K,>xڤoU=8!ϡ>}罸=q\=T>~=M>H l꽯>z<5 ׾J>Kd=!" -B6>IKqp>tټ >*Y6>*<^C<,Ζ޽ұ>LO_<㽒>>!R:>z >lӽ>2i<;Y=-FsKŽM#& tq}?׽> ><A`>a~;/H;+=\3k_9\T">wHQ=1=YJL6Ͼ>Ԯ;:Cl>>n=>1#K?`S0ʼ&8>ؾ W>V=뚏>X=z=ز2. ?%=ӽ>=[>ڽPZ?ś bBAm{3=(l)>pSE/k>= > >t/&ԽG?h>X(z={;P>>: =Yн8\hm|"s1>^x=s= ϩ 䳎>{=/O=0k>= =q= &6T>袒=j D fkv9m>u>:>xIei0[M=&MJ>Hd W \>Rƒ>>M3,>ҽDV>+|<ݦ==G;=zDk=헆=$2QX+>v>W>k#>kCih=Y#V>7'`BgC>:==>O=|=*>쎽oBQR3h]=kT$>:>+@=H>"[E׽2=^̳l؀=DoM>:Zu>A ΍u᝾mWk/O7ȏ> =Ba-J>X ¹=aY>a S=EU>d$>,m'>6}>4#144=4J<=hi=J=d=N =v8Pݾ/=%>==5=Ƞ >+-%B>fU>Xǫ=B/y=v6q<žEdetk>ե7<R>.\n4ig<y>+>аּ&<;VϾb<>`=J82>ᐦ=܆X>7c>V=eQ= ==y ޻rd4k=aN>1>i=ld>i>&aN<9;:>s>=2]ʽ< SMX>ν:~ O<> J=3h,ۧ@=c5Kl> =۝g>lo᜽kyf&c;!>v>5Խy>ؙE>->'ip+7QԼp>K=Y={0>tz=<<˸/>A潋 >a>L=_>X>(9o@==fMU=8+3>>{>?kk> 'a>߇AϽǽU=8y`>ոw=@㾗nP)K =W=zi<9V%l>n-=׻>&T>gD=?^=E?ޏ<̾C˾=A>i>㉌\=ghy>Z==!d3> ^>rE> j38Յ=`^=>W\>)b;XEO(- =@ k=ٌ[>:W'=g>r̼ྺ<3W=J=5]=>7{>>|=^>LmJ⼴==}@%K>t-_4=="]ƾT=>Ԯ>\>Z]{H>Rž쌾#Ё>.t>C>\u"}=z6m_>aR>^>>*8> Y<^ri}< >n>Q=&KV>swR%`ս]=.N+>r=,݁=z>͇3>Mm>ȞŇ>X޽E,Z:=ݽ>߄>v*>Z>Я>?VM>ZK=]lHS:>qGqoD=3/>F wף>HCs>_¼cfνh[01 D=\w>CA>L"Z >>~M3;U]<7yfU=b8s>1j\>=xBIW=_D>B= ;=%=c==+=g*CP> uRw=>սxIn>7:M_{'L=|>r=H3>_<*.=_= 1z޽1KE>K*A<ʁ.=@=E9<0v>^l>qo>J=3=޶p:>wM>k5ѽ>9=Кw>ҽ->>=5~f YOsh=8=;'*8>~"w.π=V!P -=?>Y==_^>5do>{>u1X>Ѹ%ͼp>+d=(դOs>17>caDoٽ^=<;f;=\Y ͔>|^=&>Z<X&h~=:w<=C> X Tf>Q=Ȃ=N۔>o>ؾ۽+=R>再<᜽>maO+=Уj= ==cSIGhT7r<>U8a>$> s&޲ݼ4ȫ>]q߾ =>?=voO=kvs>J <>L>g[> 8=>=݆T>\lB׷>!#>9=~4= ?<>{/BN."М=JfVg=T:;{,>Ƚm\> u#>1SU>jF>Z>1>{E=iM>T6RV=/&HǾDAݽ/fkM:w=;=Q >%>mV>IDr'? = P=nTBm>WRE= /k IZ=軈8=@nKc* (Ag>ķ:߾@ O`|=ߥ>\*씙g >DP\>SPym0A->-->6=>>orF<{zC/<$==(>ȓY>RM0x2=T= =cAS=`==>Yuxl#3=- 1;}ȽMڽ{حr] =xb>C:=߲=B'nn> ߛB=zy #U>'q=6/=4ý =>苣=;=c>O̘>gнs^Y=k=a^>C_&==s*=(=JE; >Z+/ʲ>= I>;=T='YHw>H6vz>t>=QC>=AVNrg==FeX>PV<~Ԫ²5{>-$"뚲<^=j>fÜ4x7T >Q==8<=<,,e=NICc:+=H|>B<=wѽTٽaAaּ=߱7J1%>z.=Ͻs>w;ݘ>cqѝ.a=JĮ=:>j=[6kV31d?=ۖ=:+p<u>#GQ\OϽƝav2QR'س<ϓd> ; M>G;p=qMG~GD=#-q&>P>?A?=¼ vڽl>ڽc~Kn!RѽǬ={Q< >>9=EO>I=+PΑ<P X < ĭy!@_=ܞK*̼xAuq7@ >f<"9>< <+XR)l]hc>  >,>)'±i;= >U|'>c %]F+>@T>pck1T8<7> ,9P>fӽ(L+㕯>?,<4i>8>ҍ@9Go[6!?_э=ж[T>)=1=y=>>kƽFI,>]֪o=.=Tk=:f1='<=R]1ڽ~ijTx> <k^>==S>ż̽nw{ >sG> >̀(>wٽ=>ѽ>@=Di8|}[Ľw!tOܼ4>Jɺ\>Sν=L_iB녕>hA׾(fɇ<T%>J>޶>;V>b彽pҼ{$>S\P4=">* K>->/r>* xL[/>L<&7=4 X]>_>L;1 u=ޖA==ḾhԽ{-q=Ҽ=u>z=][>A | >B-=ܯ=<I. j<%>=E=i&;x]>ٗQν >>?M}=|౽-O>rr=i>]Jes%58A?MӾ #ҥ>;9=b4>i>>N$p=Xr=?w> ̼VɾU#J3q=K>3% > >R=> z>~s>K(eڽ=1^g==A=64>$~0N,N=혻5>ç(<(V<9<clu>+=">H cJ>$ýjU>+>y=h] ҽ5=Aj=>>(A>-i>t>W>Lqzu=Od,׾y/o$#>/\MKY=퀫vQ ,>a>G>V>z>]OkMP8U>>;=9PҼT>kkAq** 7>˽~Sk;Vě>^jƅƽm>*˼V=;c&=!Cb<=8еP>7vd.>h4/>P2Te=(=@LA>>N 5廴U88<={9 .>C>`{>x>K =Swg== c>O>`#>=:=Y?>=>>k1<賨)?tpC.>=g>~=|>I>ms>{ьȽ]& 0<]2>M̵=<0s>RF=S0E:Е6f>a=GK}=LB>D1>;f>%X\zn>8/F=ā >A>u<6Od2s~n=й >Fܽw> [ R0>6)=.>ӻm=-幽<9>_c>rѽ*;>qSuOKu`t۽>޽*>>ۦ<%kRS<c[>,3=+v ָ;n>#V>Ȥ^9V>B=mxT>NEhHd6=ソ}>TPh=5 >fNܼ̿=߻=j>vCV5v> |>(=(#׼E<:vb=f$+>[ZG%[& G>HC<>Nx>>S!/~Þ>۴ Oʽ8ŽV>0}=#>-59>=Gaa}RP|(.H8輸,U+#q9(>M> >9=>*S>.>o=e=2>;1> > w! ۼYqI=V' U= tUh ={4=`[<=1>=󗀾>.M>Q;>Bq=Ծg+<qU>3=03ýr>H=8 >%kH=;ۭ=ўj0dNJ>=f=mGJ۽~^>b\=> &56=h#_=-܍,= -ߎ>H>^̽=q =_=Kv>׃>U<^=c<=>\>=9=a=4U=1j=-} ==9٩">|i=>;F (KbaK=>J_=3P>\>t tw=ca=i%c  +|>WӾ>#<{qS=DA<5H>j>uX<l:`%ʾ2Ɨ>Rm>6`> ˽=n =ԹlB> /> ryC">Uj>s&?F+6>^/=@վz^=*M*GƽGzqL>[| ,>dg*|4<5҃>ܽGė==?dc?\S>'~2>E5ct.yҭm.?)=?ȅþSҽDzM=>>Ʌ:?3dz3>?~;>+B>CNýP=NOV>>>ODI q!ʣ=Iɾ(=鞞>)A>ݜ>8x%B&n?5>o(jq}q=TO?@>">->۾>;23 Ī=pY=O?|~L?l6C=".>\ >MpK F=\e>~=Ѓ>&ֽC>O =>@(>G w>bC=.>w@U>„?q`Tq> C->}>dm;J=\^>Dc>4ə> >a柁@X>܂ᙟ>:? D=jv< D_U0݉L@c?6>`7H9= vI>[ݯ>Ne=bc=K1>MoeuO>>D>=)1kU5#=sz=LI>o>aWjǽUޚM;ĽTI뾿<4ྕ_< dFo>hR=6>~= >R==fZ<:.?Y:/i Uo <_\>9=\EHa>uȒsTCJ/ؾ>t٪=8Ű=8A><|Z>82=\V>A> ?ƪ=>=K>#> >+>fӽ᜽ZVSZ>Ӂ=U`H2 p%cę>9=> N3@>n6Uy5>R=B=H-;c>W>RYaI>ݽ,> L!=ƾlr K>-Bs:?X>#d+D=V/׽XMC>z3=p P>qo<>Rf/x>ƞ>ڻRSw=):=jF$>8"l=>=!>:1뾑N>b&:dmE >%}J9m>GŐ>=<t 9=i]U >TþA]Zþv9 W-l疁˘ĉľ¶q>\=>&սS/bd=/>&=FL;M@>% ; K?f׉>i%8<'О>ߕ=S۽>ns> >X>+>|= Oւ>ژ=~=>WhGp>#=mGO>;>I;pD.5g]<\ٿ>>QT)9=#a>F>1n7>4e><,;0Ľn>=3=9\˾3, >MLJs=P=gykr>]=Y*?ysL>hsҽg>f5oQ>w\~,?[>N=aRGν?>KO>/ӺgEd&>͚>4h<N=A> rNi=ѾCW>B>,Ĩ;> VsнC*(=T==l>*>> =4cT=nɽ!>jnM~>W>O>Y_P?>S~ji>>>g!yG=^ĥ>G>;=>r_xeYA2=% =>?9b>Ͻl><9#g> >a-H=Ya=\><>/tN>H>ؾBȾ>>6eI>d=<=Ʉ>G>۾7}B> >!jS줼$f>4> (=,>QbN=fϽ>i< +?ӞҾs$>k>=>C >A>>& >&/F=0>ɂ=]B)?}=M 6ƾeY2>Y5 ޒ=<=v>K= ;?7=8y>6΍)h>C~ 6=#>*8^ i=Ņ>|>l`.=5¾ =`? D>->D!=B=F>66լ=C(y>sB :>U+@W捬 =3:>=g2OU.?|E=:=!>[דý_>x>>.u>9>?>:x╾0Լ cQ ?SL</5>tKA>W%>;,!wK|CT.?2J=m<;l>5ы=̟)`>=>Qx=i/=>X >\𽵗0?jn<-= > >dýW">/3>y>3S}c2>iSt=؏ :P>:=ӣ^;E>F'>*)>>?>DJ=XξvQ>,C>L ><=x>")> !>AϾj_>S݈>3=.Xf3>x={>M P>F>]>>2= @-> K Cs{3B?Mý̧=۾Y>j>&=|>SiWľꍷ=I7>bT>I@>E=V9 h{=5M= m>E[>ӽ2==djsqW{a>hʾ=Ͼ³0+b=%i+>>>(k>l_>RU=ZQW:W3M>>`P;>=D+F=T?+>,[Q>yjj<`>;<=>)̲> t=C>->L$h=ғBH}"Z>w\fξF9 {SB0G>ba{Jd>R=C>k*>X=U >9ֿⶶ='>,q"f ">ݮP]\bST>">B0>x==&uƽto:¾===*n->I>>=_7=#j*=(L=Ŋ=#(Ծ }}=Bh>>q>L2Z<>Hp={ ~=4z=ǻX=>.>}Խl>1?k= "?Yz˾=4>?=B2x> n9>:Ù>R1(>]K>>.r=ȼd=IN> мKyԆ>@>K܉?=|©> M;( ʙ=&g왚=ݭ<==)<6=s|d>K=Pq >qFF?z <> 1;>m>Ƚ] 1E>W¾D=>j?sxg>$>S=H=6>1=GĿ=lr>T->νnA>fWbC%'>P=dֽ >~i9=ճu>R[X=L?j=Sm>}L=]V`ؾ##>F/1$ >im*֠0b(1%JȆƐ > ??|=)7e##=ӽ\>R}:>>M'>>|#5>/>(G>>PE9>r>ע=f>hE[g3>&(=P=پ>-MT>"=:AB>˽H>͘稈 헽Ek=uRCr6SXY|E=Rr =_F>k!*>45>d=kh(㽣ì5R5j>泟=!}> IP>~Q>=7{6>`=!o =ᕽUIuuW<=VE{<ǾMW>n)D>e>M^=l>ܾ>ABӥ=}>ySc>b9>dg/sIPl'.S#n 3 $#>-U&gqtѽ=b>3:$e=i !D->91{mȾ{ǾE>=eNP=e>>YK>4?8==nޝ=e"mUx>v5B>)=G~ s=n􃾤Jý[?_]櫽>=>> >D'T>==+Ƞ"Lg>D>p狾\3ٝ2s!$

)B@~A>3W=*>dVX=׬ ]DߛY:u%:fK>MBY>>;HIA>󣔽L7h=?Ľbh!#=߼-=㓽=(RP0\<!>fT;e>.=+w]==~D>@)">=)D>˾2C=uJ>%3Y}\9T|WR<>xaW=gD%=,%t u">6\=q>i'd#b3>Ux=qc<ҕ;/HNEqv=>޽CÍ>1G <7X*(JO=< ds՟O>y>hڼ@gX= >o>`\8˟< X=̓:M=\$K2vƘn_>ʾh=K>I.>Z}-Vj)1弙ֶ=T+=u/(>Ԇ> T<5>_8>-e>ʲ99a=e̽i c>D>a>+.>ڸg=~G>f@>s`=e ҽ!<)ެ>->I0>>ORþS{OQ>>uF=o=$_e;E)> ü \=@T#=Mk٭d>Iu={Gk@bg\>k1;}qp>FPT9C(;$N~aw>=菠=q=<n=SNO;Q;B>_^Ͻŵ=h0=O,]0>E=|> <(ԾJf]dl>!/J?/kνe˽.b+>8<=:Y LʽK>?>漝<^/R>ゲ彇i+=' ׽.ǯp>!>먼1A =pD#= s> >Qۋ>!?*ýI>-,Lb^u= =8ݢ?>xOtK<;->o/>ӛ<|={=eݗ= MLֽR<_u-漂>!;ڽ(cӽ>^>ډ*>Ӵ >bM?q/>=,@>S=?$?fEнN&R4;=yr=x2=Z>ƕ;l!>;1>=m=^>So'k>T=_.>w>25>Wk=*} *u>%p= =&=!:3\"vE͔>m6=뱁<4M|;״h8>ƽJs"3L>q>X>FU =[սS=/8=%8>%r>,ڣdMAV:/~k> =S?S>q$4(oV>H&qƛz=]=v?ʔ=.3= ލ>'>>yp >½=6Q>R=>&a]ܽs>/N.M> X=0e=/E,/:#>=.<9ݼE9=">}>Hdž>3>cm>=eq=A1+q=ubz>?8=& 6σ<>)>JCZXj<u&q>¾TQ>>cke>ػnbCF>\<60`yu?6U".>ljo>f=̂=fP >um`>;>#@`X>h>'=6='3p=HB=$>޾]<%h{>m봼Ӈ::=HsFXk=^Jýjʳ=,>2=Rg})LB=!>ԓ=x>J >>K=n=Oh>t%>}1>l=p~=P4>&վDZVBK^t>O>V=8dQ O.G>q >J2,2_=\Կ=J&>= [>+2{;>x{;v:8=v(9U پQn>JTº<}zӪ=\=фԽs9p[>-" T}=I!,G4(r >^H=#1 Bono=7">ͼԣ>Q]I'% ;FN>fX K=;7>=B>H>x\SU=^湽!= dB=A>R|>?)_d=Y=6$>o=˻M>ɽ' v>7[< >r峽lN-fcٽ߼>3;l+>dT>bW=rҾ;ɽy =u>ב{> 'a>h={hr>iݲ=% EJ[-I/W=V =/1>'>̽{S>4;>hbPBm>>=ҙ>g<݅iA=nGI>]>jZs> m7>#>M==A_E0A>33p>i=4=<>feJ>[|o1=\==r>1>е>K>CE ޻7dD>>>cRd7}z J*+_>>$w= $.3{ V>!=um"h>}=>z!댾}=$w佃=ӽt?ZA>%ٽFu>Оܴl`>wW8=W >Y4n>#=R!Y=>c>ዼȇ96=ю>e{]=Gl= >ÿ+$! [>=A)>=V=l-;*r=FA3==?r>u>]r;ͽ><|J>^7:gm==ݫ}A?=bQ'-h^I >X־2ey>0Mn={<"((r=6.Ů>"7>|J[d=A>e<(h >DL;=2O>  =P>t0> ϼV==üK->zr8>%`p5>WۦFAi>=ڴ9>¤==|D=0om=:xK>ik򕧾x hm:n>P9y /ɽEC>a(>!<`SRHD;ݙ>מG+fսE9#n^3\@= =/;Me>=_ zɸL= ZR=2>vR> ">|=R>|b*=P4=W޺=@g>>>;9TG>lw6]>@`ǾI=C2=61>=a)n+~->>cKjH =ag־^5<\=2 o=ɘ>>dF<:>jQg=Q.a> {p=6=9_+]N1~>gZlݽ>G>:>1R"#-kb=ƽ\|>i==[(=f 8ŽS;9̽LO>"ΑD->!뻲ф=o=~ƍS-߾fI<Žvj>D;sR¼7=,B4㙁!V>+)Yw<F>K>7>u ?F=qfP>/?{z>d3v3(,D>sq d&=>٘=G>ݽnjrbzNB2>߫=$E}i=뵽3T|̌%S >F>W:[=>F-=׫X=^= >˵ꧾ򨽡>=ka=ü/%>SKxiCapLɆ=5>D>@׻c9<>VҼ>N>jVT4s=Z~T=]q<:U;B>%~=GS>R>$νe7*=de=d>ˤ/f׽*>">@*=Ӄ>Z$G>R_p: O!:B>hua===D>Ԥ<" a ><_&'vD6zR,E=Rw==FP<x!>T=ݚ </>u_kRY Y!,kþ=}X>?rQн>vR@U==e>;jYD ,÷ cW=MC>-'=> !#)<n>x>=TG)D=.XT{i8aL >6=,>c=8='>X%M>fDN=鼉IVL=d ><1;urf>[g>%&u

^񼢡*;=btW>,ܿͰ8>m=>ν >9=UiA>>uz>T =ǕJ>JнZk ^(>=⁅=j=Mn3^Wj(3>X9<%=aRǾ=}5Li>q; >ƙ=ችe8ǰ<~U<3T>w7\ ?2;\>V9u4<罭N>5A=Jz^)ǽ&.5!AνxU=>je YF B*>c 5[ϑ==9Y~>5@WW}=Jz=a[">AՒ$@>Zs>j6>_{U>x{=R>+O_)8>S<(x>O@6[Y=lO=DAA>Phw|ǎS>=" = > m>D>/ż]Ƿ=4="=nw>S<2>>=4=ߐkȼT4vJ'V=%;>½4>WZ= =v>Ӛn=Sxvm = 佀[<:>m'>DVRýl q>I=-=Ǯ>>.h >}7= >>;2Y߾|<;>^=uz=2ԼE]Pr:Q{[qZ= 2=%4뽊 =.B=lDWZWd$>j=̓Ok#>bg<,`:ػ*<-(>پX{1d(>=)>N=?;%C4Φ;pI==P=R=ҷ :ORY;ah=K>='=ZU,I>=v[5E54| = =5eJ;\>jR,>ZE>67=ElgpE>:.>p%=e+l7>\q=8> u (Wqm >N;pʽD=ߴb#>=Xo=r}>>>>=ܔнI|=?'>׮>,p0>y+}<Lm>8>BQ9.<?>z|>_c=}!)>%==ao8ֽS"=/>`{RN>5>ؽ1='\|6">>3 >g>~vZ> Ua=?2ѽ^<>x!ZG>9c-S;X"C>{+^=w=A6ʷFCc>(]>낄ͭa;ұPڽθU=9^_Ts >] >I5S>>Ctm=N.z_%=нZ>t=_= ra>I> Y9zX @>.n0=WB- OȽw{"=V5q=:X3û=d-<>"=>N>@2[=k>i==d>;vW>q=)>;@dpH8H>4e==>n8e=<HӼ'L=4|b9I>)4n%ЯB;=dnm=qK=Ȍ=`l*H4^X>#=TAv=h},x4!M=s>E>x>E=)m;9e=>5WP׼Q_I= ۻUxN4>;vC=7> |="|'=jY.(f<^gɖ>=z>y=>UL>>Km=;=$>hʝ4=c۬A>sr<V>JwbKx Os9->\>-=EE@3D+L>=Mņ=>]O:Fo<>.=E[={}x0>j>xe>bF>>ܽf=2MBF=' &A>܃>k=>?=vM\JT==VK%Tg>G=9={7> =osV=OnᲽA'Em=M:B=O01={3ʽ@Eh\>p [SӾRl=@ Y*>P =✄>GB=Aq%ڽ5r; 3>?. =!Wv>>s=w>>Ž]̾= >1آ> ysJ6V& >=owҽN >Rn=q>A'=j>np>&5>*Nf>4*ݾFuKa@K/mƆ>Cޢ_E=<#>>ts S4W\>~?}>@>"r<=aGԽg>ͽ/=F, (>E>D?q =V=82l={x >ӡ>}><Ͻ+Ђ=8C&=(QiB9V<-[>ſ=mQ̔ 4;>dtI:>UNfSޅ7k=55= ==>tMK=>GD>s)/WYƄ8>ޥ>Kܽ3Q2<pLR9YW=(77z >r =mK=0ս&^MK=?*=-r>qq,k6>/U>m=8F> g>>|>uJ>>3B=+D>t}u# K=;ӗ=n=Z>Sҽ;>vN=2>Nct =rؿJ>1鍾>>D>x_=+8iVUQ>Mrq>>1J>u7оA!|>ڰͼ7?]hA>\&=T=ٰk>i=iw>M0b=T=[ =-s>\³=׮ռ 4C> =~]=<=<Q=ʌݽj==>>=)ӗ>轟|$lE=A>=l=Fa>̍I>Tþ;,iw>7.>hT=_>ML>`Esʾ:>>vN9I)>;~i=\===@<&>|ݽ6> ȼ;DTƾB(> +UqT><=ݘ>y py>ބ=> -ov^>o3 P >V =NPK>9; = v=N7QX= ,+>OY>hu=Z>7K>|ܽ_P=~=3뽫 =MV(>㸪߶!mfv T=\#ҽj?;Tlтqq)*=BP=>찾a<:|<)>b~FggM= ='=Z/+鰾ccmr>=>i}_E=eb>!f= Yn>=E/7>\[<&O@vZz>lm>^Ӿǹ8=%N>Q>\@1>Ly=WMŽd=&>=Ŧs7 ]>=O齩<> N=@< U='h3t[JO`)>u 4RK>f>E >h~[P=t$=:\a {~X>2>R=lm>UN˽Vþ;qD+<:߽w0-= ?M.|=p>l׽ݬ7> >l.Q20I< >6?D֌þP=oy>߿>jxq=G==?8뻉h)~5ʱ>x>#`i=/ޫsELG7,gh}:-.!Cn>Ul=m߼>Fկ>6>==-jJ>0Y>fǾ*c%><{>N47鿽n>8<<<>~T=Tu=8W=l$>1()=w<c>y>4=mY*=eh= >>=>F>Mϼ_\>HD >`>=~ޚr/h[nC >C>3s9%;zJ=ʈ>jуY'\vL>') D;O-⾫n<9Ǚ=#>Bz>PN=[j%X= >OL_>#S=g!Y ľM>8>E>e_< f<=<~=_L > V>#H+`=lEHEa>9>^1m>n>A>v<ھ>"Nyǘ>s?6>%>ե>Hi[z= W4Z=+.>8ؾ]}{>4o[>>!=bJ=Y!=YNڑ<~CS5>R>xNy쌼M >tt SL>]9nt> &=xѻ\>t= uuQt\x>ʽBp=Q>ڽ 挾*j>ha9 >=F> >Ѿ._=sP½J>%<݃>{;-9<=< >4W+2=^ͽXp>}b=峾٭=(;b>JĴ>y>\X^.>>O!= *>:!|>н=6>x_>Ɉ{rp>c=Od==xj=#=P m>p=aVY='=)ioV>A+=KU>Zoֽ\HhdI>9rR;lv=都t!C1Ļ>s]=oT> >Ϯ=#>茱>X |ꮒ_Z'Tsq+/Y!"8n=OhHLB<Խ=]Zj>ߞY>Yn=kþ>v>{<>A}>Z>6W>`E K>si>Sei5ƈ>N1>  >1<>*2=f>#ɦwӧi8> PI=}\5>L=ؼn'C S#$y!<(rDѽ=+\=9">޼s~e>>>>"?T>cۭ=ix:`.x3=*轘gK'9>B = >捾m=av>>@W>R\hԽ]>*c>|$ {ے>'E: ;O֍!al ST]<8 ]HdJ< #M><4>;|>DCl >Y*=~*`>(>)Y>+=Q׹*?{>=YY>!ݾ4N̽ 6>V>V;)ѾUlL;JN$E>I>$i>]:>>=(#ٽ!U)=2>l=-OdR=֏ټ@@>{[Qh؆ebp@$en `f==f>r=t2wǾn=&>`=Lj>64B k;3ZۋE> e >>>=>%?S>g=ȼ2>|j==߼ld1[>L!<<L>xݾL=q1;X`)O䆾񽓈<ӽ[>>…$=E=ȅ{o =cB }=fȔx ZٽSj =nr9&A=9L> 0>>`d(>{+>|رd>ȅ>H>A>q{~C>~=$B>X2%|>=r޾[!?ǗQ]֮Vo>e̒>=@;Yy+;Λd]=~ Uo*=`<=s>HuIeپOr,o=P19 =[jN0>~Xrk,;.|r>nME<=U;H Iu>zyщ=̾ĕJt jb$`='WK><;zG'3>:`=H__{⽇];S%>=%`꾃CAU=N==9=lH㚾?=y>*>>%҆>p d<<]{=pUIy;ս'j>Ok ۽S}N>3=MF>>Nc>[wWZŅ2?ߐT=?SL2>뽽6>~|k;_촾FRٽ>n=gF>`" 7>^ٷc>Jf >>u,G<⼝=䥠۾=c=fB/>)u`j!h ?:+=2VfVI>35>ٱ>r>ХS1<$>:<:i> ׽bN>u~=Ǣ2߽ޞ>Nc=>=۷=g>:^><&5Ŀ^=pZ>);tμ%f_Խ㝨MuU[>C;M{>`.L*=O)>b>?=J\<=+Oqٚ>g]5>PM>A  >nH>>XZ|=i%ž=c>ƓE99 >2Zн>>*~=PĽE{&Q> !<;4>t>doF_>==6c=1>{">uP=1>("(mC& >jآ'оR a0t4Adɽ7 NAz=Ђ>`q=>>Щ>Tq<`h}җhĻrZ*=d8>=<ݾH> Q,>4۽Ur]O>`(>ĽS6d=af\=м>>m<=C d䨽2 >!r9򡰾;@x>% b ? 8AOnBU==>Ƅnma ==n><>姽L~#_o>@;= ҽ{=9!LC=v>뽗W)ǽ-񵊼"L?$ \'=YV=z9=l/=&,U =/Ӛ&ɾb<1>ɆZ>$5a!z=>-uݼZvf& 㾑!z>/=id>,x/>(X`>eR=<>dŀ=?=n"!W>h< ;xm%>K >3=#>gV>;>|$>t>C>w#&=̊Q>j.A뽅>RT=:恽1+ٽ<5l=>1 =Н< >}=VHo|= l9>Z)Q>4E=>]@? WFڇ;)=->>WbX$սƶA= t=M+V>M <6=o6=='}@D>e<\>;=W?>66D>=t<=>A`3=>{&e<T>{>A>-o=p>AV>k~>n; DhΓ=}=I>(I=hm=Zlr>gYۼI j9=>!_& C>4#+ļBE>ݾߎO6e=x)=5>/V&5A>1X/yI.?`=-.P[.a(9aw&>>żŢ>e=B=rL ?1T>>!q!ξU߽EQma >=,)>/>tߔ=>>w=+>B>dA8 >B w^ay6=af>fJx=;sO>2(]1>h >Ox>v2>H#>z=;>!Vܲ=V5>  .;MK^>> l$ /4d<[_,==2a/>'|5lp=р-O ><:7a=i.L<׽ xF+>[>^G8> +9 ˽V]݀>[=Fo=>˼s]>w<=!>r6>XW>1Ĩ e=\Ľ%=PW=W§=ιhډ>q`Z>=;><Wbz6Q_=8n=!=fle>g>Ynx=n>gb>㶌ivA=φ=s Q>M>8xͩe|>ͻR?="w=Ց>7%<2>< >H&G>w=_瘾 5f= =>$=#PM<q>Z=g> ? >.N=-qa0>&=ӽ˻Ȧ#ûi>ZܾUi>轡)cE>6ʺ=F>H=ψ>V?n%)>w!1/>-K̽D\=tEH>mKzD>Y]˽3NCGw; >ּY=<]/SChO=0>î>t=i!>::y>wMdK,:>&ը =ā=ad>;@y9>*Q#]z >nCc+yA>O>Ļ=..B>1*l V>o=w>K>->{<(o.,r1>$=I=E=_<"O=`=76>?do־˺Dg)d\=g;O=L] JQ=Ի->l, >C8=lp =A:վF<>>"I=E83W.%v i=K .>>>z<=lo1=f>>+s>T ==`>4@o>>{Z w~B>rP>ԖX=B.>돟û2{ ^`*U=yq>Vtq"0iw'=yL=,>C~=2`ݽ*=>ݷ>rSp= _>Ɛc>Rrݓ>s({>\ڄ<+-_=VGĻXj!5>"15^=c0=Z< s=7cKJ>؄Ӽ;sNJn==u<٥ۃ>D<9d>f1?=j><k!& Ծg菉Gqa>y=>>>r#>Y7<w$㙽:f= y= `?;I=D.rG{>?IػW\>ɨM(>#>*HB=*ax<>WܽKv=?=jýQz*=ۧYi>Zj>d>v!9:>3 =M\=.=H==0> f<^ۂg?J>5=g=Ƌ#p=g=TFG=LvCp>Ч\=>>=>10c >G˾Z=>Dt>!r7[q!>eD[ U>଼=A'MQH9=t3yEz3`>,}= =F)=NO =&ɾk ԼLf齀z=P\=ij>=pN컀"F}=`>==P<>s`=ֽMDR>m=R@p2>e<DҼW>Mی5>/|8&=3(ݱ e=+=R o=Ιk`->w S>X}eD=l|DD<=7}S )K=i_'qU<#d$<1>>xhG>۲ <=5W?Vǭ=~];; <^=X0uȟc=&&&д!;zh.#==PASqc=}=3]Ss$!\ڊw> fQ:WP;7=T> &> >`v< :<ý5-VL<Sڽ ?=н.8=(ӽk3%NF2>GsGrږV=>X>=y\=[ _>(>=;=< =;S<|,=+/=8v=>Ȁ_v>Gd>-޽.?V6Zp=h>8:{!=ݾ0=6ESgS>G>3ML`=/Q=83Bc/4փ-<)ڽnfɽ>=?< S>]>>=>T=<*ܬn 6HS>9@׽3>J=?=> M=O>3>𭧾|>n=Da>ʋƽX=v> #@>(`=`DӴ>G澞:ki>W='>Ϡ*>j:>}s>R=L㉽" 1><x>:U=8b>*>o"Dx4=z;<9t8=L);`,t<2#2Fj>L=n.ƽa@En=ސ>/=D>.LT=a4s/=A5 >˅̼cF<Sn97 i>%>%M=VZ> >:[Z=6a>B> 0>V^>a= ?=XQ:=t?_>r :>=S5>(+ɥ=]Ƚ">(v{ӽe=W>}5`N',#>D=>*;>U=Q8= !*ol;@Y >Z|z耾SN=c*>DG=zu=b=x=G Z82>==;.=S>Y.%>X=Yk>l]VB/Pp>aUy_>F=>錨h=U(Ʉ=|>DR=!>7<.}=n#ZP>lgLfrsg>p>x/>"!*RU1~)0; =t1hE>R!;Ŧ'>&5==9fxE<)̥cϽPb^>ӽɅ9j5>!sv>)G6p>֩ܽ6*)>XŽ>1>WMD>b<ۡ*==-V=Ez=E=m=vH8< S=D[9>izO =>jW<ǝO>'=>>G> H>0f9>1W ȋо6ܻ=%QY>->6kF<'D* y1>]a>߆&!N[>U >>Y>#H4r>轤:m< P={騺v4>lY5>c0>kɒ>=T1Ge>Q=7R'=Q(>lvѐ`,c=H)<,>o=\<»D>A^i t[ν< ';3=c^>=k}=M`,=`=>X=t{">q!H>=w >0etyQ<9< =Z+轏pi>ys>"=i=6Ұ>¥>J^=%>~><)$=mЛA~i_=d=?>.O<뢾^3]\S=l s>> %[ǽ >7$=>"+e2o쎾'Ӽycp>=X埽=)=78PF>'>4Bȭ=#.=ێ>囑RDr=P 򽿙O>J>㽸>=sK_y=D5=_RJ0?6>Õ/ԽaX>~)=T=W=%r=+ $<&D]ڟ>=0*>@|= h#>˧=jXq齂*=|d=̹E|><*+:>-!>Ÿ=`Խ;#Aʈ>j%G>a*>9=!9=Mk`>R;3B=J ?ݽP:>ۖ*gc=0F#a>==E >@9e>ͱ=(5&Z$>Mnaܑ>=1`W3>=}ڻƬO=">Ƚպ .>/>=}D> =ϗ=)frI0>Xx>=iP=7Sa>xP>>SB>ލ>/>͂<>M_[>>Nn>!7wR].=#=d:򽉱xG!X+ 9u=&R*>B=8bwnCC[=H,VG&5оs-X}Ʊb=͋bb2HҾ+=҅=>A=˽\=>Л= |꾰=>tHn| ҤC>׹>ٔ >jg=I>=1cx>0DS%>ǓBޚ=RmW=?)==j>a>LK=B\zj=ڽ~jTa=hսd}VP=!X>R?v"j>Ri۽1\7H =ٽZ2=-E=P` {M>=V].-=.>`H>= ּ:=k<3&[g>&^ S}]S!>a<5< Eq/b&N"=:q"GG>v=A4;@>6b.>X=_ѓ=r;%=>=l "ͽ%55[羬8<ì>HM2;<[m3 >20UW=)/<ι-u:\>$ =ѻ\wJB?=?2= А j=vɾ :=v>=A3E> K">W=>׷+<0 =Q$>)p>f=_E5>?ƽ,>++N(M!=*¼=㙗< :!>>ds5Ǿw㽴kQNC=m>:k=B y=DǤ< :A۾ekU:=ѻU̽F>v+>;`<=Vy=eR=G$>H뼜O[hA>>X "\>s>?VFV<\Z};><1_`<ŷ)u֔==<9/i=&YX=0=|*%Dn<3ֽcJHEx >;jTx=>=H>%\<>6:=nBZS_=eg\}>[}+>_>9>( =ĒϽK|U==۾@>S<*v>(p;P=|o6.h=h(ܴ=`hC臓>L_= ==l9=n|-D > +w=(#m4-! >_',V ۽,=9<!=zCá2_;/>D>6~#>j;|>5,R;o>=6n@>=r=x˽' {l=E=<]; ܽ=59=K=qQn-Xu*=D=<6Wq>q2>WC\=`>`ͻ\aߖ<)C=q<燾 <==aoFh&e;b0>p=B=u=T%S=_W.7U>Pq=lk(ӆ<>>vV+HG=; +>!M>{ܽw=y>u=Y܅o<==`76>Q]:Yǽߐ&\<|½X=Cj>%au?sN^==J>]=ɽyd>2O>K5>>Do>Wʊ9.>ȼ\M==k=";J9˽$d>誽j>v=X >,N>t-=x>xyl>¾Mپ֢H='a.7>!>Vf=Ϋ/ =N%|h؁=$l [{%N.=DNdG>'5?+Dr< f>~bnؽs8Y| U5 鶒`X۽>|[=bݖ=} dʫ=Fx"=G$\5>W= l6G= >i< >Nj=sv\O=`K6G=86ɾ)>T0ơ5Tȼ,>qK,Cy| =݀dn;kC;BͽgOcph>pa==\LE7Wx?ڽo:Y=MV=<>սX>םaŻ=I=㍼,PO!B`Y?ǽ />k$`^fe>bu<;,;v>z<=bѽx:,=Q,=X= >p=]5z7K;GC>=q=.]F=#<3>ݦ=R]O=Q>GOn((<>pdnPdQ=csMtzk=Q=ue `>yd#sw_g>e>cVKn=f=A(F)>,<31>L=5ͅ>*>_5q=7>;Y"> (<=z"=EV9>Z='0> ;:/<ݡ0=&c Vc=_1f0Pb;< &s>CW<&\=>>!ZK>|>r=I=b,//>OG*>=ʌ=T{֫ >=/>e9>JճI<1&r. >'o㭾mܽ ɾFC>H>{.tȾTD<׾QAR>X=fS]I>^'C<(gBRK=q5Soچe`n;=ҖKN==g5|N*(Z`v>O=gq=(ǽ`=)>{ڧ}Rq>']4 a>s>Ҕ/> >:>=Ѓ̽>Z==u!1+>%Ͻ1=+>~2H<߽[=,,=[>X >؃>a=yK6,*>B1z=o=/Ê<>Wd= wH0)->d\>>@> 2ɛ>>xk= 7Jǽc>s5:ބ>\[ZS=44aQ'<>k=V?_=ּ<iӮP ik=q¼y~=.=ACU>< ==>V²>[c v.þiaI=Ixk>f!=Iiq=9H>\KN>@$;=| >iƫ=Dϒy=C?;r;o+Z<8׶㽼S)=]7">̣3>U; O>u+ǽVqn=0)}~<:>ˊO} 0=شK>=BP=t=(O> OEB>'V;=\>J>=oTK.>l =/<8=E<>z=Q=F5K=>=Jێ:OgOaS={=?=@6Y:нQ|3=10>> JYz/G!3Y>W!X>>f=X=V>/5<=VRUֽ=:=o= &a>Xj<3_=W >cҵ>H굽KA=C=yXW[>r >e)B_B>3ە==cg>Yw= m9ձ>U5< >k=$Nq">>g1e=b>:_=u><[Hw>W!=ZӞ= ==p= 1V*+>Wv.:׽Ն&ksT>Pü=@;D=#ς;a]VGn>0<r3Ͻ1h >>.߽ p=.>~9O>~þ`4'=!N= 6>^y1,=4w=>=Ƽb>dM=GM=q,M׍s`i>\2J> 0>JԽ7c}I==*>T U=щ<:bϪ>;ɋ%Ͻ'U>SQظ$>GF3>M>>VFZIʡZ>= N<-)z=>a;ekj>[*=*xT=bk=n\9!ЃnM=;|>>vH1>陽@TNVч>9>@">QLp7=3>L!<D>ECbhn>c=;>4[>w3>Mƶ )/PE92q >JR=oN2=iDf^>><>ҙn+ % <CF=<]j>5=.uJ=E~C+9f1<<>$ >tDàڼC= =ɥ<7}==n!8o/>},2(#>0;]c<->{۽ _+ؽqSh<2X<rvN>= >x=2>10b>k => 2 c=Y=>]>S)!(==m>y=[>*⽽=Vɽk ؼ잽ۘ=MӐ= ɒ=z%;U;=?=0ҼOU= z6ݖKPo>n<=F>6꯼d˳Yͽu==ij>w=}=U&<>=j7= Ar=⮱=eHA>:>㹾\мʈ!>f!D=yJMd>!4= >y=rՆ3o>l do+3T=>km(2=w<D۽.ْ=p=k҆Y޽C04M>ܐ= î=^wŽ0FS>۬=T4Da$U==Ƴ5m#=ݼ}O[k켃p:=%>j=t)>h?~_֣OK;! =%=l=6m=f1""v0chf~ý!=8>]>K>9~>t-=k5J>ڲ>dӽ==[/>(~" x\=s@?W=dr~l<<.@'PI.dD>L=wkv2>i2=w O4]q1W=OCݴ>kUQBDMk={t%)CcPu= /j<Ό>>V=y=;_>r*_ֽx5>&:>ڌ,x4>5P;%%߽h> =AֽY#=@>& x=Q-V>rV %=eH=UȽ8 \n=诪0R(QH=脉>O5Oդ=/k=֠>H7/.>Ľ}=?>A;2v x=ڼvRBļJ% >A[<7_=~=->6E"y>Y=0'݀=;N'DGX> =Q>1}n>褠=l`bR>H>H:Ҷ=1͇>|=ֽ`uQ\=gh=l<=Cy> =I}9L⺉37[R=bY6;T>e[; %>O==őʾ >Tw>Z%>2bƭ= ==22޼S<#1J=>V+>ҷ=<=">;E=s=M7<Ӌf>6̔=*>9>8vB}=I/1R>yJ$ý>a-> >(<'^sRSMuܘm˽,r!טgaFT=R=BN=>Y|;KfܽԼۃk>% <8=8=5ZY>>HZa;O`z|?=( p=.>[9>En>PpW><0<_>y7RJ| 14n򽍽1>= 0>P[>=T ='O_>m>-p C/ `66L b;=B¾ښ>D>"&>yq=(<U>l>>Ľ0X; = x>λxν Q=G|[9JH_=c JVS>>ݽ0=/I='%zp]%4v>.>}w=ex=/x=D] տ>4qű=#'>b];T@{ Vcu>.>f=д=z=d{9==_ =Zs> N=>_**$@= x=!폽Op=fUԽ@,,;~.=7>1~+>ֻV=w=*>!=/[B<3EmE='u>==`\1D>==~>h-=4=jrjuVd> =vJ=%)-g>d 4Jw=Ƽƺ{(~=|cJ>>V%7)Q= }>2w %.>Т>w>K.>fg={=2=dUw>n<[=~&=P-^1+ؼp&;ֽ8<[ }2>ELg%=';Ag!>ߐg>-+>K$<=C= ~>b=E됽\=3 >3>r&<)-<8=.y>r[=9=U=ھ0V02U= /=e";mH3>)>=n9 *t&=/,Xs= ߑss곱(>0'> =; qή=l=X=3Yf]-nfrƽYcFԺ=܂Ƚb)=L/7?h>S ==ն>;:2Y>S=O3h>VDT >$z>3_SL6=9ؽ >ST>n(#p[=s< >$wf|^پ^gSսKv$ O*%D>%>ݽKD.>|=k+X3(;h >ː=^=Թ>Kmq11 W?b>=>SPip>>\lc׽mIeٽUF=kvi=њ&\>ƕI>R"<&=;?Obsdnwڸ= b='>n>Ộ=oJC>t>8<圥n= I>Xb==R=>pp1-> YI <_}q=;߼Fm =>LK=q>9eNz$HVf Y<;Q+=v.>S=פA=f".=*$a нƂ="=Q-{->\|s=*>dS>.N>;+=2hQR=ڽ >cR=#L=(J=/~>7^=)d :r==>>==&"->{q%_=j>z/JL(>yH>>)CmRpP>;|xb:;+52InY={,>z{c>+>t >Æn=-p>fד5l,=Y \Q_|0>0|74\,> "O=(5Q>쯽t뒼JJՕ=o8H轪M<:<=?>m>z|Z4="X=곧==gSE<䫽qc,=Q9>3<>_?=|}>H i>Y8> 5X=?=M﮽Z᧾m;̽߰o=$^ =?Xt<|>r>6=薾ڠx?Vtٻu>:6w$>#)iN1=T5dE>"=?;b===.:%N5Ծ"<6!=x0f=漅U 9;><5>;r=daľ!N>@I| =`J>~ >.kdsB=_P)>wJT=7 +mK(K(='>Io=> 0 i:ҽ!>F/>K<>d>a>s :c`<FT>y2>0=/`>F=!}9>k=+_p>ʮ|=|yy; >:>=>w>I>I=kDJw{=e=K\>gfcI>6/>J=`=d\>,=S}ȋ=е=43 ?<3E].=ZuQթ>#y;0p=0=|>Rq㽐< rd!Ȗ==K{buԻ>*=ѮCrs=я1>⼠CK{>7=܅=8>]>>2ޫͰ=0Ǽ=\_߽YDos={>|@#G> @><D= PF>5Q.>s4=rˤ= n;: }Cw;ȊŽ!4=4=[r=)I>5=+>'D2>Y[1b0ξJpz>Ѿ =i~ l=b=j=4"3f<),ٺNw >ŢYqmqY= 諾Ѹ=y=& {>> >JX=Hr=1伖i=]xͬ>n/=u<*> UʼbS>^_nYOD+@}ս5|=ԏ=ז-:3>=_½.N`>q-9=p&>=֍>@ d޼ɷ_Y~!>HK*=?~ُ>}ɬ >.9><6衺=-I) $>ѱ)pB>-:08o=._zr Ue>A=]==g7.>;TY1A|c=fߊFH_,˜<>>Ifu=% >Bu>]>];=`<AFˤ=>Q)GT08>P> ~>ٶ{>N;U~Ɵ4^8әe>f=1~>>pg>'ՅzٱP{x^z{O=#Up\#׃r==[;-/>t^>+QS>/;.Yq;#< Ͻ=V=[>&fy#,=[wn^>^0vG,нA~ >t=>Y3xӾ",c=?<9=%WM |m==pu=leG>z=һ=SNC5=ɦ[>! ?>E=JSѾ(9u%>>eO{<\<佀=i [P1ް|E>fOaG6Zؽȕ XFu(=wKս>I>j>Ӱ<9=ܦ>ľ?&=mQ̽0<'>S~=j=:ޯ>fo&r=<4T}=틁r$>;='>ޟ=W5 >nj*>"Y=->B>5("OA= X=X =4M=4,9:#;1=⽢x>" >=!*=o-^1=J= ;>hϚfǮ=q=+=M>ޑ>4µ(>3 >EmURнla>Rt뽌Ca%*M쑽b_43g+=v>>==G=}üUJ<$n@x>=|T=(1=?z=ݴ=lE.?1>@k]>=0=_$JpW=f˽ѐ==pXNW=E>k=n%륯Jߙ< mW)>P==3"D>MX+#^.}<>=N=-g>]=ހýXIK>2=` ㊽@=}m.<=K6>r)_JḬ̈h@6<>tE7*DG=[=Ɵ-=`[vAk4 > VSkIi=<.igr2Li>C1> Tp*$>m=UǾ5 =0;=jI>pCsO>`>cS>>g>'RS(vr@><\t=w=PW=fۥ-L>*phR<1Ȁ0=Z2 15 ?=xyY=bx=z*>I2> }Q<҉|#A;.>n6@>/;P<k==*·<0=!d< yY<}<>H3HDa=?IW(|W><&p=MJ5>޻Cx=>\|嵽==Cn=zz>]<.bEk>;>罨\C`;!o>RF>T==P< <=J#i{S7HB8ӝ3:=c=Vv*;2b(<<5o>)H0О>)=E3=G=桽w=$1K>5yܱ;>="cA=7}R=FZ>Rꍽ(R+cȽ=Bjm>i5h|)2=\ =\nK*HV (=]»<4#>,d<< ><9r=R[+=F½97 ?=гl8$;>=.=o޽p@>X =_]=kнN=z=o>q1=3>Iy4!`F>` >`D=؞,=%>EŽيjh ka%a!>X>q>>q~CY=bBM=W=$<4.>t>?X`FY$E= Y<Ҳ$SE=_ {a=>CG> (E޽ƈ<=>l5B="Ԟ;Qb=qk5=Gt==@Ce6>G *#6Z)U\>1==07;Vct>D>N.Z>a=+WI>К[Dz𽈼==^@>g༥bLGJy( _z&\>_)f$=[3>\eAuٷd¼ट=G>;#h=˙= M-;Gn?>I ;=7<=</<>p 3=ԽE+=#>oJ$>Zy>6%^>Z=t5j&L>f-P=Z=ˆ2.=7)[<90=$;fJzY@>NX-=򺢽t^=زY=-=[$11 ֽl=vYIxS >Qj5m>Wk<|~&>騽'>׹=:L==<=;(>WG>vq<<7B()==YS=>8 ;3IN=\, (g)s=V=Us>Bj5z>= ýW>=>]>CM+=t:A=bI4y*>KX~=5@=I =}>/BWZHm서<+U=u=|{> =G>V= >4u>C뽧=݄fdi>R.=>㽕^=$Ià EE{)sXA/=A=2O߉<:ۭBxg嫈Jr>T/ =4Z>[=<=hiQw5=jvjq>d>h0R7Ǽ-2=6[Oc>=B{>ƴ=a>ώ>;7o=ꚧC>Ag =蹦>v'T_>6<06YarD ýG;hA9>FI$"뮖=I< =yd0==k^\پ@n֐+>1~/X>5>nZ=ܣ>r=z.j4 >lq/=Yf> >=Gþ&>=M1#e.t>0 =51ݕl"@>-(y=>nq+ׁ >~wA=09yM>a+<=w !>J<:;<(?KR7w>!lX=C="=|C$Ca=f㽴>93>=q=R I>O~>Hr>_y <5="^>a_=6>늽}_ͽxkdl=t8F;n= >=R ]l=G`>G쁾ݽ5,>7>;->FO==(==*`ݮ"5=?bŽ;`爇=4%Q>7n=W/ cxxb> >*_><]SG/V"R=b>O#>T5qӽ==x>=dQ>5\ c=*>޽8<@{ Bl=1pq>Cs=>4?D=R>Yz>eϽj#Z#=-;-}Z0콵]\ ]/.0~>>=r9»"L5>H<=/M>*f>=Y>Ru=W۽_ӽv(<6<2=>G%>! =d %FK=>n3>}mD+<# vr>=r6=+<7MfV=Jf9z>= '=>g=GH =(>Y=-c%,=mx=S= oZ y=r>sۏ;h+=ak/>>Hۄ>R$>::'ur>#@-D&W*=4>p=7Aڽ]%>g >H>f==t==!ѽ! 0xN:c=yb>StEfW=t> @>hS-Ԗ6t>>>=4zϾJo+=}. =a9>|*=Eu=]#>u=Mm;rbBŅ>Z?; Dx)J#=J>* \>$膾 x=b1>ǧwӽp~[*>Y{l{>3/7=Y>N@(C[<`(]ѻ =_H=tqYI+E<>=eėi=ؾ޼Xjr1:R*=tޏ<,C|z=Roa{04==k8 2>V@ $,=3=]o+=ñz >KI>Խ3==ȇ{=\=ȀIbd9>,1U<> '>mKL5L==(dx=L0dB#Z>S >Ǡ=k"=v|56>&>K 8ftE=u\TFb>U\hE>)g8iC?XF:X1[a[:!'s=H<,=.8=댽=f<}Vx(=<?>>6C=۰|.+<*B==J,w>u<>W=%Z>X=xaT<>Z F9 .b#r=VlR>/>"]=8><ǽ@@>`<^=/>ƼVX=Q>a 7=H=ԽsHI$;Z<%a=/=K;L3'@Ѽg! >,lcE<4&͉I>+j=Mi<ߓ0?O44>a=k=ܭC=Ƚu'R=b <`~=^*"`08ռѽ08=I̻2(<*=~@e<h!="=|J >hQK½O}Y=t#+¼2=AK,.;6sܘ=5/d>I><`eA*=ZœDCy :$<

bWaν<~c4S˷( >&g0OuPP=}=8>r0=T;>z+ja߽=MF=ٞ;K췽ʻ=u>=ѽ=\;uh<3N<{=QNNy]>ԽU; <44q?蕇==`Ӿ;0 >AHcL'NgG2>8_=92rc>ٯ{4>gg=L>= <ƶ==Qv=a< ߟi>^>eL>v-Z|AC<ó>m1f=,>><%G>N<ɖ=nL>%]d<#.=)>ңڼ>>>dH=_W=jKWG=>)jtrC>.=>,$>J>s s<ĵK<=';-~ý=c=H>z'=8i)=vw>¡(|齀==΁ Q)gF)>m]׉hj>?;IL]ZeMػ=om[N=Cw=AJ}$>> F:ӛ;Er<8F;lnş_|="==߭< ?W=:ns/==B=r=;)>XI=mTz>Ej>Feú}a>=?5=n .$`_>>սDp<xbY)>HM۾'=DP2/I>![E=G.ɼ8>F=Z>[[tT>$<{^1[+>gM =E5=ӽQt>0>ON=i[DO^~>5T|\Yf>5>ּӼX>p[=2d>X>5s<4}d<.w2=}P>V'>NUPD UÆpD>1>9=hτ>r=5.e=M&i=DvH>_=M[<ጼj&>Ty=/6N!lL<'>>"LQ><ʽ$C>*=qI\78ۼ(;Q$6O7_=nu ^=ȅ>h>6J;>p*!4 >=j=6=Dك ͼ'p!DB}\>P׽P4><\=Wxϊ4J>{HtJ7;Ex3dNdT8>"<>=\=C%w=ェ/>Rν >`S=č =],}= >Yy>5༆V=f >Xg=OB>q>; %;i b8>\>b"==X;0u;C(cO<̽>1{B>3 Y >3פ>7v=lͼj_= G<]LS; wD>blUm!>ܰ+> 7# >OJX߽qi>1<^1==):8;D> >=s==rP%ɨ=#!Z Ɨ>OAF>xf@=!.=,Ɛ<=>F׀=Ղ=*;z->lܬ]{=(L3 3G>ٵ(Ep=Δ=h>}a>l=/f9 R@>T%\랼Y>Q׽`>C>O=(׼'>g8Js}=*>A=\ X>°=џ3>.<ݦ=k1;e >%"=BIO=FRݾm$^=˪=djuAӹ̐ݽ{=3Q> tpG01='dGeᾒ^ xN:s<ʕ}H-'5>Ζ"mr|aTS0>=8ؼ+I=O˾uG'>%>|1 <2-|$yn fA#skD>Sn;r>gĽR@B>0E24>0(w>̫/6l>ZEn=W= ql===~@7 zn>T=WDh|=Ͻui@ =3 *c=W%yx|`)=:<=>ӿ:3Լϛ5=,/;.<x=/= 8,=b)>'=dLr->,=|1^|>q=+(G>LP;C{AXȼ2[:Fȗ=,>X>b=f(f >fp-># = <o|>BMM<}>pv>r>a=nC)B~<=BCS>>=iv[M:=a85=_]>^0=K<:__>;9۬- >Զ+K>1UUқr!>(쟾)>`>u7A:>I.׸\o<~=F =5\f`>sN= <;{ҽGj=͕z$=.==χ=oK`h^Q>R;=$7O<>=JP{>lX羫d>>-Н=ҽRsEiO+H>OfAU\El=Eif<(=§]=y>9Պ=h0>$>UP>|>!MZ,>v<=>o콘2A>4艸V湷=\i>V]`eܱŻ*<8<><μg>e=;ݩ=q=S(9Cj2/ƽ=3>4/>m>T3=hNSM7+<>n߾8.%=G>֕Ue=Ne#VX6>Bȵ(=0uUNǾd=-ރ1<0a[>iđ\=i1i=Yս~ H…4a>D1 =$GҞ=|*~ =ǟ>ܙ;j=E0;k SFP"={@>A=C>G1V>\}tǽ=h1=# [>n ;=s"%X=u P݉EW>TY5;.!v=޿/<췛ê>>g>%f]aU'=o2\>B>mϽh==|Q<6r~LW=kҼv>=Ўǻj>tO4>d=AN= 5)f>KE_>NL̽U<ѝ0p'[}<^tGh>5=Z >喥[=4F>> L>|Q=9N<ờ+>a>i%W=/96>) >z= 2k^S=UUFe;Jnxv{s G>=jý7x>;C=,><=;w>(cl5 >9o>M;>ײUA]>ҽEoU?W=#>鰒>i:'{7g^ν%K;  Jn ݽ=AK=L=W=~}ݥ>7?=f>e$61!">g.10ʶ9= =}t=3>ˬ=~,>a20=@z&>=Pdi=/x=&=O=9K}[e$>3<߾q >=~8H=Z=aq=Lࢽ^`=HUuZ;Ʒm,2$/֯=| =U$[>n&=ɾ>4Y >*OY>=בr=o>)hq=,e>(W=v_iP> ?{#XejH=(.2QB=4oq=K=WWcu>@8=&ԼK0>ߘ H>I>&=lt>Sٶq'=F]=Ʒ$>:ɻtz8R>UZ~MύqʼĶLS<6P./,>g[czX>'= >!jܼ2YJ0xƼ3yLP0,=Dtqw3>ߑ;ؾF;>c$&=FD>Mζ=n%[>#؝_H v>~-ew=qU,' =>bl6f&>͝ټDH>L=.9oA>=;+>QԾ=<u<[N=Azϼ->7%r>-9i<$쓚>T"#>齗Zν?av>\=~־?DqUwdx=p>=.>@ a>=<, '~b=,>+<>Ɛehcؽjz~K*!Y=&;M9M=-V=2>I>aGӰ=P[8>\=F= +Խ_J= >/ 0>⤿;4;9&=ˑ 6M >Ӄ=!=[.7>Y';=s8pȚ;_ љ4 (̵KVwm2<]>74:`<ept=lp;q>@%غl8=c2="kc潾>);Y7¾<ő==鬘RjA>=-6ּC=>Τݼ{d==yT>>= mlOԾȇm=6]<)k 66.==r>D>ŽVe<=_˽ D>NCa>D=%Ƚi=;3ؚ##m >>Q>+>e=FbA=0}=wOu,>\qK=~>JИ%O>=]41>Sҽ [>X@a>)2=qν>r=ILk*0>ܾ\=譾$c*:,b ک=>Mm1o9eG$>,p=p"f ^R>k =U>Q˽'Z>2 ==cn=^=>"O龩<;ofܾ]<6³>r,ċ> [`B <@l=\=>t=ѲYi<=>ۑo'x>J>ȅ=}&>k>g!=4>a;u;X;)KA(e=b'O9@;Q`7+'=Z >^m&>0^ې>:vb~r<{>ώ >V "3HL=u<#n>@=#=ՌT@5=]=>ֲ ('=:V2=JZ>'$Jpi=LCl><$CTU;ٽw>I.ͽoƽ5˽}>ݼ4>.>񠍾Y<>oJr2:j@Q@\T=<_4M8Ob> >1%y>|==0=~?X=0uʼ4)Y>O:0>Xx=۽ Ujݽ:>شN:j=X>~=l^E)Y>2]]=Ċh>'(]0==<,>^zns> p˝?1ؽ >Lټ<>toHM==2>==[=ɽ =k>]>= /=!ko> 2ԄJ^>>"G6R=8a]y$=! Ec=F`>޼ >O>t6>ynT>%E=,>=~=J G*>' ae=$=>)>g+x>'@x=puh EPP> >kbnԸ<ݚ3>Vʾ=D2PS>?!>M^ x~>TbZ2D]]9㢽HоN;Ҿ x۲D%8>)>R>HՇ>>>)&A꽲`mC]ݽG$"!{T#:0%!,>΁뽓11>\~H>#>>4c՟> M==H뮾>l=P>"U>ڽX _Nv:`=9 E 07= >@>곱>>7!=q$>:F`=޾<,qļQ>,>"8= >^b>>J=[j=ȍ>U> >@J>~/C:ѾY>#<#>Cּ3wWC>:J=-@>\=>tF=|3>-.=I>"ѿ=>v>+>?(>>eK=F<{g>ZTK ֵGpU&琾0Խ>_2= >j=4=S=ON.!ӽB=1K 0>pB>׽ .>[=4>=ؘ'Y㽉;>2>6>!˽#=s{ܫU<*s>᡾(H>ihýW󞾏%=c>(d5>GV=(ǽm˺ ӹCgb=ܳH7.=}V=P T=jc}>9=@x=] Y̾E)A>hT;KAV>ѽ(g(e:of)=9>x>8=%x[z0=8zw./1>Id=Q*2> 3>+BX nFLAS<>|~>XPmid== !>4[9G+>>H=n>bg%>gѽ" =9;7Xh;E<3Ͷi>퀸t>S0C->NͽUH(6Y40=>ds>`,q=O=/>d D3>a81>Wi=%j>>2h>#B +Dc>{h>!MiD=H&>c]N>>[>nN>J>#(>s5=B>>$>]#BOA>3Wu\=1"<{_[ )=2'>8qp?>Ekϣ*m} 2x< i=7A=&%ʽ>=[<_ݪT>W-=s r3=R=5<(i>&;Q->0a>+>Ai>*2lJI&X0= >y<{]2>뽐~s}2 >#¾!CApQ=x3>Ĺ=l>4>sPnK>T> ^R==u'˾P>q= V>p>3e>&U X>C*y>MvT=~w>'/\>dEB0->x(',׾|:a>=U=rwB =pٮxQ>1>qa6 M߹=ʤ?=nq=Ƅc?Ban.p6<>;;`N14H=\JͽdU=="hؽ=b=А= )%>=۟=/2+9h<r,><>yЍv>?fu>>y>t=?>ץ=ֆS)<3>>Y u>|;>0=+S>y#Ka>8r3JF5>*Jve\=ՇRxR>[=!L0>H7V>A<)=E4>,>[.ߺ쾌<="$=4&e<̾ǯ<Y+rK䊵:Y 'qβ>[?믩<銒=AA>#>9>ι=rY; Clr>I&3>kB >z> յw/>FpW=TzZ=<>0S9>r>i2>7=.;^ړ >?Op zx:>!=a436=HvdN=fTA裀=F= gbӽ>Ӂ=>~>=X=?=>ބ R=P>ټݮ@>K=:=4`n>}e>M }tmW?`L<>m>KD6@!.{ tdϟ=`y>C>_=U>v޽=>̶<ձo">9w=(> =`׼Ė>#˚㱽 =N=f=NX>F@_=^.eeS>xT>W><8~мSIA>YC>@BGN>4{>bE5>ѪW,ž!<\>;>ii>#Ez;>X>3_>MA>$,aY=>6^,稽YиtLC>P{w;>%?l i ==Bgp퐼|d>ة=7< L>c>n}>UB뵽 J>< =Aw;]x=W=>~"e= A&4>P=>vhܱ=L=ؿ*'ƽyg>彎P&!o#<h<5=Y=(U=ې=FpV=#/ >iQ<j&׽ >|<? 콊8="31>}$GS>,>y6>~J=i>=䫱6M36>lo>=="=sbT=N|>ǫ<=8I>}ɽ+vBj=Y@*ZP=d;=e+e<(>Q=fXV>y_kw> `BA==~Ne@>Wڷ=k>'>)=kR>ᕊ:|=scikit-bio-0.6.2/skbio/io/format/tests/data/embed2.txt.npy000066400000000000000000007602001464262511300233520ustar00rootroot00000000000000NUMPYv{'descr': '=v.,<<L>'C=Y==cf뾝1<;jFӼ]r>=?|N>j?>|>=Ti>q>>SGNg>>|_(oFf[1[>̰>r=;Xu>Hg|̾G:Ƚ0[R)>?>P=u==Ӕ==9$=O=8֤Rx?W: >7!>)).=r+P><#s}>]D=if>b?K,=|ાJ>׹}>B>=*>bAϽ 1%mE)P>(=>XIT'zR>_^>><>5>#=$?J;پ68=ΘWc>Y==t>-=K;Ysm>VLϽn== 9=ɽO"}=4>3uP?n:+ ?I>OxXM>t>tw>@Vནv>#^'>c=mN>Cs>?_\<"\>0+=O}< >Л =}> 6>0>=>!5>^='>ܔ-=[*>ݽr#A<>Hǽ8= > S%>>Z^>X$>V9=;Zx<>,E+>34=pS+#>gD}>O-ؑ>h%{;D>I^˽yÇ*ξHeS>>>v t=?޽P<C<֍>tN'>8S=p>ַ>&s>l>56>/ ,>j>e5%Vi=X#>:L=]$>n>{ŽeB=>w3|o>nUyFrp>uuݾx=*ϕ>i>- ܹ!\><[>ݽbhJƭH>k=>=dp>k8JW}R>&> N#ZgMYoJc=V=& =wf>q Y݅=)^½ݜ=)%r+s =r ?gS >K#dz=WNnC6JUq(`@=7Ƞ>$BP%=߽=lC޽a;>z>Дy#ʽ%/>w`>O>z`>DI37=yn8>f>A>5u=lˑm>m@> R>O;>Ѵ ?4h/?(B=$}=ZmǾu`ȽFҒ<::Sh$=7>oo>bwBR>R >>\V ȃ>.4=DI+kMн"1>d=E炡#>$=F.{M;T4|뽵=8Iݓ $Mμqg!>>¾O>e|\=l޽FW;}G>T=u=- >>LjxĽwy=Ї>w>Ӊzʤ*>0> (A-&=Nؾ\>ӝ<]ܼ z/>0̽zx<$abj>&>>Xv>wI>~֩!齾5cs> 5>b5QY>o!@t>INm$Y$I >=V>>>|=*>~\C>D̽@c>ƒ>13= 7Ox>&u2[&%?K<sN>VӾ_>k?ž7=[2p$$>2;ƽ-;>]]]; >84$r:;miD>r>=ݽts7|v3>Aӽ3=b<>>2>>=W܌.>.1Qゾ>ᶠ>C<==9⾅>Qm>j_O== ߆˼zYnЌ{<>>e==Ӫv߻p?*>>[>߽,>/F> *>(e*>3>\>uW= 0>|͚>>Wj;nr=]N%H9hFWτ>6(;HA$=nP>YLΝ=->><*Tް>$[=d->o<*=:,L>V{G>[(>d9>.* >p>.A\޼^ٽˑ<=>{R۩>P;?4;C_>30=;=J:>)>XdE>@6i> W >=>%.>6\( 2ļ }羋ͼn=AI>`1E澚%0㬉>>9>$>%y"+>6>RM<<ڼř?b&= B>Ԭڽ9==<-;%vN?>wu g[:pn/>Ͼ'>cyYg>:= Z>"<`=u>(,2S9ḰZm>P~c>=>w~>|<[=L ӾQ=!>'+8>>Z=K 8=KOPA(;>㫾۴K>q ?S>J_>pqfZh>| >>>vN 0>ܬ422# >I> g2=쾵 @> Dͽ0@=Z">>S==K=n1;>VҊ2=?$ 0>J2>>h<콹>3=경|=2> ]ߥt>nl>t]>]H6ɹ>eWw>#]r =#>>낾5K;>>U z>=:ֽ,B,>R=ݶ>Bx<Uł>ai=q>{끾̯kcdʄ=Ї;gD?_;4)>>;q!>1*=n=e_=>˚>7 >î=aPL̾s> F㾼Ե=7=m#>2>WB= _>$ Q= CԾ3,;>u>ƪuۃ=g½=I!!>6ūZ>=9龋8_>߾|>y$=?|S{Y>Y)>#=gP-=qꧾ0>R:>W\>E,=(VhPkX>G\>=Ӣ>(n< ᅾ˜>^r<?{QPJ$4i!TZXٽ.MB>(,Uobm>b=b#Їg߬=,d<}= v=譽gˡm>:n <>=axݽ񙺽jp<\␽՗>o/*Y?z=>q53̾~ĽF <\R(="!z>.>F^ϻ ?tp} / =4iwPeʃ>PkJ>6>#3SM>SM>CbcS+o>9>=4]=ltqڻs&3AT>A .e>!>8@=pp|Kн3L:>쫶>ц=R= 56?E[<[>K=ek>B^!+>u(>ya`=Bw>Պ=j>J 7D>=F$;>$p=$0;'b7y=Ծ L>axʽaX=_d5=ݽ >\c4>Y۶&>!s>ƾ?>b?"ƒ==+Q>>@0D>E>T4Q<=y>ZZP%?Wk=t=5=k> =%>{`fC>d>Qh>/==u< >>=Ϟd> '4V[>ïs>̅'ol>1㾛M>â46? <=p=z>u-G=n?#=Z>=5j>ة>ꭱꗾ?/վJ׀=jG>81w8F) D=;.< .)=Ź<-,=R0=->Q>rcr=^l=,;>e=[>ij>HU.P=b`jY/+x.>e|;?Nܽ@>Ƕ*>>+#>Tdè=&!+#pKVFv:>T>2>&>?=齸>O&\"1>GY>T>]= >ľ" %˽hWU3/~ZS6`:->l0;+DtesQ\ؓ'={_>iB>\n>;1]!>n#>u*q>?dnu<I>'>+ھ?Y>罐>X=s=> >q*>C!=|񄼋.?a KljbŎ=x}w,ٔ OSrk<>1a?s>_ּu{>؅~=oK=N(G6$=- =&l>_p>3_L>x=*cŹ>@y,'>YrMV>lD>=ξ׫$:?a=>⣤>>(W>3@aHZ>ݾ^j>>T> ;=isCRrq;[=CJ?uXý(=?@ģs=*>ς>.w >E>$=y>V$>١mN00=7ɻS;h7^zO=S>ʽ\->t0\=">/>Ok>="=jCݧ;!ʼtn׆ { >==*<ٯDl9=bUP/=-sA@>Σ#_uٽ-=h>X:??=i>j>7>Ծk*=G?ͽ3=]O~+;C}-K<>~>e>ʾRg>86A=Y>{þn徿3==EA>ȽP>?N>_Ni>iӋ> V/'>m7y2eS .a< >[ =:}ݽ$><>0= >:>TA>>5C>Y;QY>>BԾ2>{d$$>< X}A׉E_>Ȉ5: ;mm>6=m)jHaF ʅ>IN8C=[>I>>T>鷾\>_Jξ @5׽7Q8?yѾ(r>~= ->U> >t:<=Ľ>=<>f=N`S> g5>>w=A>_8;!LbC> $н&P=;>阾|O:Iý~ƽkW9?_m0x>^ּI=>#JU)>@>"t%>l<3 ?c.<݊=oEKK4TEվ{T>]F>S=+>\>:-=,@;mp>c%>U>Ab'fo>?Eg>7W>dm>T> ྱę0>>8-{*ga>k{?fǽnht>r='F?BھZ'=;~=E2>=XP>۫Žet-P;?}?=>->ɷƼ<(>E?F> >*>&9೻[=h!n>hZM!?'J=9>S־#>31?}ӾG> ;Z>=U>*>>y> >3M>SU>ų@<mYsb<1>=/[{@پSSGwZ: x=j>P/xA>L >'=>j]d+>d(>?=nR<2?>=DcBxP=g=Al> >WR݀SZ*SF->;>>=Q,qn۾ڭn3g>}>>*=`x:h;=+w 挾>N= >H9 < U?B>`=HY$}=8-h;=`>5%=0=7>E <== ;/=[=6==mh= s%˾@ ko=nj>ŵ>>sb+*w<>t]FFh}CtSY>0l=x3v5>B}pǼ >8e6û8)>==?e8=Ҽ>)Ż>H ?4t[na>(=)=9N>E^= =;QI=j2>w>T>y=^%=5>ӱ۽910>i`Dz+L%F>sgg> yP %=hiハ< ^v=pޗ>=J10u'z3=h>=HE=v[ˊU>x>{߾bPͽ ?7ȼ>CF=?>,ɽ =]yʼ2ݾ+V~-)H½T??=lIfć>0s -z펽N0N>n i>>T">N+o>b4=O21Q>j5 ڮA=ǡ?ȇ>v,Y/>B>C>ft >ɢ>4оI>/N)aa>rcPk}=}dc>>^ >o=i;Cw,>"鶾-\>1ܠs~;Na^=`/x>x>=)>}\낾=]9>M>%pP>=IٯO<;cgM>~>Z?zf.,;j̼tʧJ>o==b 0=B>Ee^;٠ +kܼ>Ϊe<%ܽ& >ὑ>婽ѽe_=3I=>qԽ䜼!3D>.J9f= =PV>!䠾y=6I><I>).S&>G5=`? %>G>Ӿ#!=;>?S>߸> >¤>>ss;>OѼI[>/><>{>@7 L0=s>Ad>%9Ƚd>xͽb=:%>=D@,M=O|i=7 BȾVj=V>('>vV^->Kb3E‰i=>\>2 >Hx>^>IZ>_C>BVJo;kdhjb;"!VMv>J=>2,Ȼ=:'0-=9> S7B>#r>)\?J>?=8>2?$;>vL69=z =ʛۆ>.IutL[DOD>2(q<1=AgɼNY=H*C>;m>[= K>W@O>C//>(_TԤ>b<=T?ߺᆪs> |?>n @!1=^o >r>t=n\:i¾ɎBѾ׊ m>=m>̈́=:#/1y9wև>ࣾEw>?])ZN>т>>B>ϕ>^vC= =<ٳN^Is>>qƫb(?>_>>=ۭB>-n+ RW><2-㾼g=xړp0Tn>>ؾʭRR>?M=yn2`=:>QM=<viM>ZdL/Cvž'4=mJ=>*O=s1g>#!Ґ*>=%]>lA> =q"=pѼΡ>yN=ge>Sx->/>3=TĈ9>MucKD u\Vj?8ę==Ϲa> J >F=Y=IL>"L>%+> ==A /``3?=N6eսUʈ+i>>ޚ>=S>ȥF>_D߼+B=4Q 6,U!=3ʼO ^==6>@=q 3c=>'=]N4S}>|O&x>V\;y8>n>r=V=>kZ>ા~h|sn>Az<Ĩ&E=~.=,ub)$=:>w|f=9,% =?>q `?=t̽¾I>=.>?6 >*:18=. ǣ>ٖUx%>UX>c2(;ݹ?>>U> ѣZ@>˕<:~>p> >6C>䪾bd>[==e5(*=ɪ 2[P'%ɪgq>;r=s>?=@:==0 i#f HǾq ![oJ،>-{?1=[>ר>>U>?oS:߁M5~@=ӐRҾٺ>7kU=>>y2=N 0"><*=D> >Cw5n?SVdT>PU< Ri>ţ=/>:u==&>ACSrں?>_>a޾ =M=<: ;? KAS>Z |9=; =me>xp>12>8#|>$ǩ>&DxP{;d޾] >V$BV==>U8>^>3t.N=I><=Ҭo=*7!ʾ>ֽ@0> ަ>L=#A@Ƚ~F>1>[?t ~{>ڀ&gJ)ꟺ>)&}>:=z>@R=4c-Z>˽>>mcIM\9r;?瞾s;>o)&t C'D>y>|书h?9"u?,=@?n#IoWE>u>\/ټr>5:u>-͗= >RP>9ھ %<ܾ#=0#>[յ=U>l.T<[)>3QB>/-=Hڽ?|:f= S>>P' !=d +{>;e k=@e52=L>=j+ :ĜŭQ<P?j'<>^>V;J~YN2ڞ%0=iO]ݾT=>c:?*\>2>Xž>-p'=K=ԛ<=vo> >DF> Z>2׽.>7>{(m>Z>s>9=X ֽ-V ('?N~aW>Q>=?MBϓ>hVV8>@{ǜ>q2=ճ<#=5b=)Vpem>gjľmx^=Oy7,ۧ> ?[$?\=x$>I W pɋ>ˢ㽁 ?4yy"=YQ>\=?qw>9+>|Aݪ t<˽J>@ړ-𽍎

=I3f=d>4R>:AQ)>>ހZ`>xb>i[\>u5=>Oޒ sT<x>ZTIg>JŌ ?e̼U>ס81>^=Y:p; q>)Ve]>V>AͽO:J>=o>$΃>B,=-=*>UsT)=Ҿ~ŻJK5?y>"=8\F g>o=#;V%g; _=0>>"?y{lk>#:=K+>5\K|0P;ڽ_=WV>L>ɰ? ,=\>P>>>@==솼t)]=8>>=< >59:=CYѻ>=Bmv= @>4 ޯe>H0eپ>^1=#XrV=b۽r0O> 2>뽋辨蠽D#@QQ>e?=t6+>*:=S??k%Aple=\Q>F#?ތ>S}`2½re==>[>0e,>+;W=󽾾lH>^Hj`j>5>[ն=o_ i u+['9uEٽs= ʾ 䍽Qj^0>8þœal>{WlO؇=s ^=+>让=>>5#>V{ ds=pG>=<=B"?>h=ﲏ>>;=4A===?BE=x=Z׽p,<\>݂>'(>A=c*> C#= ~1)"W>mw<=oξY1>v?Ѿ3=>O; >S-ƽE?'dM3˽g=>KM> 6?֠nD=b>\>+覾\{ݭ,ھQ=u>Rf>H>=T|hd>:g]n]:<\u3>?=y>>@J>m=G>pn=tK8>֬b>3==C>/ڿ>LKf>Ks>#>u,<>>6>}>g:ҽsNRij=0 >P<G=R> iFo>`>Etq5 x֕M#@>m=݁>?5L?5>s>=ra F= >\U=x.> ?Z=i>>p>OZ]3>$8 # >4>>==2>G>$@;whP>SOʏ8pR>>羺<==|=&4>mJ׽iK=h=Γ^߾W="=oz=P%xn>xPN>s?>C="Ɓ#w'G>dA<>#>==<>ـ{%Y*A> 3`>=&= >[?=k>0؍g[p`>)>T%?ˆ^/`Er߾G]9v#TR-ǔ3>A =e9>lK6>>8<Ƶܼog ?qNS> >Oǣ;6=F>6$p><1KF>8cy >_>u>bnUᇾj>K.|a>6~=6=>h=l̾>d=ְ iO>8@`>&> a>L켙~<=N> X4f8; x=i(>$=쯽*]P>ܙO]͓>d>M^(&~LѽY>'4 7پLAo>os>۩?E_4Wp_>Bh=ʜ<=þ>@F=(Q7X8>Wk=>.P?J24&R>W;;0ѽ0=ټ?=kbpc>G>]ν;\9d=`;T=&<ҵX=y>o>4<>֩=EH-'=""%.ҽ8v (߽D 耽>U?:<۽ߡ< <=0?\=b&>S3>L-ϼC= ';>< پo>,)>7B8w<9>V3ct+=-=pĝ>fz}!L _+#:=b٘=; r徆>{>2@ҫ ?#>Udý>i= O>"5> >=6A=eq>A >i~\wQSExV|sܼĜ#UHs␨fy'N >mE1|>slԘv>=!Y<ҽ[@y=92?Q>j#>>` Jb= >_cq71==w@>6=UXԼ7ŽɲPd>g=AD4>_Q>%yc>Oq>=Nn_ݾwa3Ѿ<[|z2ngx>2V%>4Oh>=KO=C=!=rp>j>9}{>;=?K>= = 1:2=>G]koI=8?Y>w=O?qj޽(>`>"WTR½ <6ҡ0>~+>==Yo=c~ԓX>嚾>K(=Y< Sn>de>h=u9a> >>>hE4>T˾?=??mႾ>>㕾2%ܾ÷>IT>}a>o>}er>1>9(==oZ=cYQ0fMyν3D>b=W==?a>ЫcM>J=$>#=FSA>\=cy뼚̾]N =K=hپ6>Pn??^BL=6:=>Fw>lc=L2D>-??y=dD=P2/'<%:d-_;=$2~>% >>ߛ>Xc/GD ΢ʾI}ļ z=??9ۈ^}Pa^>>@>7R "l>Kw o>R='iRDfk ?!ƾ3SO>Ӑi8Ggg V>R=l̽΢>dȷy <䖽d=aTtQ޾#;ĀA燨=3K>,Ծm?*E']Y`'>׊>>Z&?7UX>il=="C= >D>n̽,Y<>G>4 >$ "<>c-U>&~>O ?H>پ6~n >`L\>FU&?3H>sBe0r>Uhd=7>f>W.`\=>y>FمBd+ >1:[n>x>ֲg#è;_6<옾I ? a >6M>׽m>a!?~ ŋB>,>.dUD<ϽQ>= =7z>0;_C>K(='>0>ɹȤs%M}YM֕s=sv2B>_L>Wc=gX>KX>;>a{<="";u%==^ꆾ+헾> =ۏ=VP>=ex=x{:Ez>&ԉ>cU=0߽H>0=1K>s6=Lr=ퟥ>\ cF m5߈Ծ&9+=A==,(1>=?v5nx>CnTQ;*G>q=W==ݾ> #>,_S;{>/=(xB=uA >fmt=ѾSDؾΈ>=/k> F>}qY> ==־܄ R9q>fe½| >r[l=D>֒j3>>c&^i==>௼ þV[f:?1d<Υ{KŹɻ鸻H>> !?W*=?\KOa>3}<m z>PWpKl0?G%Lr6>3>QU> a>=,=;k>O=~S#=>H /?UQ =Bؽ >X> >e=;=}%AO?̾ՊT>۽BF_>^iLC|=\hXU?= > 3nj ]ZL/&r?b>Jҽ':>X/2t9>s>?h+>MUR>TC>=#==9<>˾%>w>u=V|DN~> >\=?@m`>:>M<>ےGZlr>έ>2 G=YE6=ҾoFjI>'=I>q<3=24,>2I11Y^=<4>>6>䝻=8+[>w> s=9==VIJp>>ܱ; B=!X^K =&U}v*=f8|>J`> z>tZRlx، [ >{2>4>>hq>F^=Tv!W>v=E))fץ=׸ >/Nr>F2`>O\08JK> >l=ݸ=Ƿj=`&>Г>60?8 'M:3>(Ѷ;>;+=/=Q=Y S>y+ƽO>J֊>Jkɾױ='$>Qžn\>mO>rDyd{>ܖ>;K&=E>ۼ>J^>>ڿ> Q=M>6=׎w"l;>l1s>g*K= >"XO=k>\=(=>.>ak=7Ӹw=4;py>u=;(>=$iU>Y,>]>D$>ޑG>9>KȽ"^ֽb/W Ľ!Q|>-GG?ֻK;呪<;oY>q>ڼ??׼׽'>˜J>S>_>wSf.N1K q =¼>.">)/J>b[J=]ƽ>5s >%>B{>S]xj[>>0*y>G".覱>7H;˱SyHB>F>(*>GW >>> Z=j==z4q޷ =sRg/L;\-/z>Xf>'Ჽ8A>9==ŝq=|Yþ *?C8_=7 ;H*>O>3>>0:,*=Uc4|P֖> GȆO>g~0>㎳t>V,Z| {(ֶDЈXw%=j=U`>(p0Q/>>Y~^ VZ݈4$>a:>-ü=vy=B멽hز<> >=Ue>=K:p [S>"=HbS>aڽ۽&tMi>c<[Ƽ:kRe=[I侹5@>oF 5>d׼>t=OCZ=?_=tG >}j>!-q46=V<@> >s$ÓSxFP=.7b\m~D굽ބQS=f-싼 k}¢p>o >{<>ށ辇spK4< =FK& :>;=7=x'w]< &+ >N^]=ѩ=E!?gI$=jש=(~W?1>Ɩ>_4I>Ţx>NUJ́k> >zq罋8=n=>lڋ> '~,?>>w-> q=2,_=1g=aw=:= >>0e^=꾱XѾM7"<:j>\;>r>b5=HϾ>@;Jo\G=Fs>G6e=x>x0>XO=">=a>>>,_>-^4諽 \I>&TS3>ʵF=ΔBE=s?4޽B>ݒE=ӜX>l\ً=E-6>xT'>4>Y?p>pE}z٭tνZо I QY=2|ϺqA˴ؾ& ,5;>a(?FfA=@M=k >>< >&!ip=,NwZ־׏=N}0i>\X>+AO>.8=ɋV>Gp? ]>ph?=shm>VeB<>yk>>;p=Y> ۽W :Խq 2zA￾a=o>b@,>Tה2q>>>I3>:ǨsL'g|܃>ɽ Υ>z7]`L)f20PV>Ykb>3QN;>>_>棾xD=C>0>J=+> ɼq,Fm?awf/.CjV=4H$-TX>r>PB˽:j%?=J>\q(3/>7@/پ0+= = ` ?BQ>8=<뽋O>#ͽ¯뾧=$ĽG<kH7E=w5 8j͚=Z5@>%F#>?g=T$5b7JL5!3i>vz=Dlw>.D>9HA>>USg"> >u>M>^b#>zק= /.M9=ԾKD~>ս@g=I& ?䕽aT=9<:b;CL޳=wO\~;@vS3>ٶ?' Dnz>>}Po>$\ZP}=kǀjQ>H¾䎌=H= V9W@Om>+͝Lݱbb&?]>=?=>]8Ͼ\νZ>P&R>tX=󢷽sX)g> < >/@X>z׉;<3q>gIo:=f^Z>>=Q9 UT>6Ǿ^>.V"5>cB>(;ߣ~>=n_6NV=->S">$̾5僽$=Y>ABrɼc[<3>D->=u)>|k5<=WG>v=h@'>TTy4> ƾO=H}cXD=)5b(>r ;9C=W>WJ`P? !=F >NFd=Bf̽R?zP=h D0>g Aog6y =ݺ=0>㽬>L FӬ)r?|'T־#>yFF><>K/>e=7aо逾<]3>UK>5+>]vƾ>[j⥾===Y-=K><0>>Ov=KC>I=mXgb>>;ҍݿҐ>>{z ƃҽ >w,>n(wӽn?E>?=A<]G~8>0>kk>R5wmsz=P) м >)u>U=M<>M. ?TBg>1>ɣ>OȾo>᡾}Ӿ,>"'=>CN=/ =}BW>>6ɼ ? 5X Ga5:>'`C|=>V>Nj7A>Rʭ>pW'>I> ?/ܿ)`ɽm =6½ <3Bi5Fe>ٯ?>Q7> >ӽ^=us]B>Op缈:2nݽJ>4j<=t >K==t8yjԻiaĽQ(H=T>vJ=Ҥ=T=fսs%彗ͼwg\ԶMv>1;K>-=jċ\3=f"XC:&>}S~L> lk>=/H> >4":=)Gx-`$A9?I^V,.>WϾ`0Ⱦ,8F> =h`Cox<·C=𳄾W=/~ ?31!=HS6;NH1.Hi 軾6n>ن=F.᝽T6u===aH> mw6=M=K()>[_r:[=>c>zu=>AGA=>B >'%5>T"+O-q>K=uw,>ţ>}=ʒϽ{> O,W]>=~< ?,'>j>#I7; ;I5}?b>Y6;۾WY=??~@sG^A0ڼWC=e>0g3O?V>V&a1>1>j >=^0>#>iYwE>6nu7;, =n>¤";ItSqr>F}>G״,))=Y;R=H02>X/= K=yH=y=">lPϠG>R8Odb W>ug>G=aC>>'>(=B_żJK>5zg/>/=H=FR>or ?>޾D؁<'B>՗A Յ<8G|JlREÅQ_9<ѽ9m=)U?'_>zr>|> i=霆>kʗ39=.P>,Y4z=l㫽@>[J>l>l}>p=ݾ3>h\>b>&_=a۽k>9a>ru>8%>7`>}>Cǖ.=g>[Ao%2>lJ<|*YBhg>BY>_+?`1> < S:Y!o8W׾e>t޽|> >i0?#='3>B=|Q.փ>?>G4>8Ы'>>D=V  =XQ>.>۾sW;)>n+=վ8@= PQ>ɾw%>G-^a M>8 q2=R>4]v=(>j2>Eʾ B׌=4>+;)YJ8+)<ǐT>{=ކT[>B>>K~<1彚k1b==D>[=R>y782=>zJnB= =l{d=bu=>V>X>=%>>1P>>&p>wzڽ>x\Ծf k>dy>>׮'<,z[żr<7U?>=_~꧶>U>г;ng=(<>>p@PG=w]~E='_> ?%$M= P=iL9&Cwپ|ҽ>ؽ𠣾cP=پIؽhfg>k=jmt:>?qL>HXx[ӽf > =:j7#>k>ܾ6==m"b>?K>>|ձ> :5>K2<=g >(=!>\T=ڡ$8}>$=F">(e$->Xd][I>X&>://ܛZ=x =M>>H,qc̼ƻ>YzX=]=!"==>->*+>Uʡ==e2;=\C[E=骾-A/>W01=ՕP=Ʃ=u"8?>9\>R_B=M1=hT=hp>f>YM>>jL.}=1?>ex>nuf=9BD>YEKռUT>pYe>آ=B>->a~=#fW>A{>x h%>֯>}Ѿi>s`>GN;>`>>f={>D=Y?=d>.o<27>+ S=%1=6hh1> >y>co>kNJ/ щ>O(Dj>p?= >> lK>( ?Q> O^پ4!$>''E=T׾hԼ=4>_򬾭p>Ӂ=#?a\.f>Ee=Hp>g)Ief|>r(l4>>/$ɽk6>>ať=xFA>BB>QI>,)>1DF=H>en>=Mʓ"=Q kھն>^YҾú>3=:=vŽGS=(M2þ ,;Q=?+䣼3^=a s>e%H>H{>>3jG>>C>/|->`lj<=S]>)i>N>>>k{>?=) ?y:x>'&?r$=~ HLc%B:b$վI'>Rub@Q>=i>Kb=+Te!=M1L>IR>.>/%<'Jh=)A=š_) Q+8M>`Ub=6>}P$!>19=9;>0\ɻ1=!])o`xZM=qF>V>"#=&&>g; > )B:G DVq>b_,>DB>vup&>>2>s& >%>"[>i=b;>L>7Ot< =p>Л;UA>LȾU=ʩ==cI=>(X1E>U/=av>u$?Dܖ>2sJ>oZ<<=P%>\<,?>"d<>f>?=Xl=>p'>s2͠YZ n=c<)>7@q>2>.>q*jZԽK{o钦=3PvϒU>+>; Ms>0=lT }ٽ=q߼>n\#i=1>clhcWa>>e><(>M!<>7O֘=j{=Z>J>忑>z={>mfi=nv>ұm<=3cO>Ȅ}f=;0þz;̠=¾O.9>J= +>-%#4?^ [=(}>dDX>=^X>>龼A_Hm~pؾj)`D='?=( ڽ_ =Kݽ\^yӲ=f>k>Fj>=ǦJU=/\)~ ZU l80>u秏9Ng D5>XWI>|Cg>6>>%0>%=^{J=xu>Z=H?>K>p3=f=*R>ӕ8 ?XeϽ[H龁BŽ >~F>y0 JsbTDu>ʯ|R>C{>k= ȼ!=>2<=)>>M"c>8:䣽eF=>t&I=kὂ;Dk>›>_Fj־\=?whG˾S\>Mu>@>}%ֽK>ׅ@> v;"ZMKR;K>

{e<鋽8`d=l?ȸ>I>+>>{ɾ!ӻ,Z>`x>O>㪾N8";E=(->9>lK=q;Ij>r=6M>}:>ġ=Ak>V=8>-K>"A> } ?3qy>L=^!=>u%[QQ>s i3>[q>=Np>@.X==C3k b=k>>DU>3CbȽؠᾧ.!΄;>n1>-0Eyڳs>>Uj>ܾӮ68[>O8i=v#>i2=]>z%0A" 4K"A>Q> .:<e۽Z=IU9>½6>4=1o>*V*JX=Ѿ+1>UhR>ĸ`0j@=qL>4žҽ\=Sz=LE=S}R[(<>b=#s>@~cҼ3>1W=[;輙4w>j>A"о3=;:>~ƾR> n7>->Ue=j駢q%?޼B=Zga,=*>{d>ؽh>TV>r;>G>lI!1>p=>oq?><+?7{>>޾_J>)v̾2>f(>ͽa>q.7(qG?>|>,g=?V>X=n?یqϾ|1?>݉>?*>꾺|LY3/ =>iV8!Ⱦ%YH=*ռG>D>7>xlo[O>t RB\>箾p>~.>>wM>Ih=XMri=n0J>Z&WN>ݦ>:>1[?@>I>U1"=AJ>nP @r>xf>>a>>,O?0WV@s]> Z#>^=W>=$lQ>>+酾7>>k>3={Q>=Z'*fx>'Gλ:K> $>`;A>>p> >ľC">MSnᨺ>t>m<x'ã>9=(>1CsbtV=I>= =._'m>M`>B=H> ?|3I,>hݾ򾽜bRf>*eMIg;G(>I=<>#<4й>M>qh n=z>6Tr 9>)>G>t >&>=n=?('<1>Q\>q=V0ɡpm=>Ii C+'DT>˦>)=>ȫ> >lș=`#>VN`h7@[>!Ugڨ!eO<ٷ>.7=2> t>>`Ľ&d?sH> =59%ͽ>ټ>m>X4h=u;jʮ."ex=C<^>?F5M=h`)=:=bJE> >Od<ۓZ!=Tf0>kA>i>l.A>xcC-|>8c6|B>Lr~=!W>kr> H>=>X>< {.㫢> ˌ'+=eL>@=P>=>wx> ]u>ͫ>8x!? >3>ٓL}5[hƒM>b,,>̊tWD?*?͖;ӫ><4#k=^<iaN> <[>Z_e^ۼ<0OsØ>}=vk=[=>7,7н],ﵾԾʣ >Ԑ><>L=!.oO>{?FrԽ=9===O>򨼝D>Z=e>aO9)e=(?A&>e \=c=2H#><"\> ^r>Hj>Wc2>#UԽv>T = d= >SE ?V>>St>><|G!|{l Jz>r=\=wnJN+ =$=Vy;;˧=܄y>׽(L>b>r=>l{' u?2\՛=>7 SxC(C;I>ܾw =`>>S%RT8=1=QX~$6_>/C>M;<>m>$>VJf ]>پU> 2!`R=8S5cV,xFZM;S =Z_ؾ5>9nI(*پl>b҄>6=@׹>Dj>]ɔb̳_f=B=;)x>>۾[>Sc &/>-R>碄=c'I>Iež݁=Q)N+kV8_>O۾F=Q>)>T=|{>60 Z'@!('?=ͽِ=W=Э >9>h '>L-X(>>4qڽ.ߟXɮ_>sa!F7>$>zj L'|:=Y>&>s>xZTM=K+<>.>)W㼹G\=ҽ,?@wVH⾄ y>V!.>c/@=a>SŲ>cɌT>M=N >B=;>Y<֭;%N>l!?=`Kyeľ^=]]$2=Ơ>&e*=[@=S=̽'>> nmQah>!<񤽟̼Ѿܠ{>ý+W>>vM-<;5ž%ME4\=Zk>Nöyh> >伋>==Ss> -=e;F>[᰾}5e[O>&y?>??=Ł#ݼ(Ҽ=K-x?A>4X=)Ӄ>l6>">s<m4eh>`=|_O;vSnQ>7qEbݽ檀Z`{3?X>/p <̾׍>O= O ]g=+>>>G<>>A ֥>w/>Ŗ=>/匽n=ò֧}=G-A۽2u@t2>z >,3.=,5>už$;>ҽ(=V>dx?]=J>H<g>/>Y7b2Ȼ)v>=>ŗ- ýL=7>=z]=2--=u>;]=>}}*߽>=}RX>wbҴjϽ7=Z>W=bN>{>e>ǽ;"O>7>xdݽ `q> >P Z=T>8<~wU~>YE>'?>߾rvب=C9>Q? >=pɘ>k>:0d>"A>>)8Z=^=\ VI>o)>^Y=Qd[n$;/o5'=8 r=ޡ>G>>p=⑾- @\<$>Ži>O>̘Aufl>>[hw>a4a;TPн<.>p6վ>tb>[^=h>gOq>T>= >9D>$?|XԼ`-1bX=>M># 6=fSK>N=5\JB>5>K$Yڝg{Nd'j>K=ߩ>=>=ns>Wt=oJ|,ím&>?EY8=h>+hy>!GG6>~.e7>.\= َ@ܨ־w0+I𾎯9>=%>-[=l7=.-['0=t>,=Uؽ]JV>aAW>(=I=%>j)?;=|Pl< 6z=*_Iý>?*>ND=;|[=t1[~>)]Hܼ,?"罵B=䀾>>~$?P= _d/>ݟý"R> W>rO>IJֽ~ J=i> >R9>¾>=﷽2=$>m=.=?b =J> -A>CSݽM2O5>׼V69&Ƽwi==_Џ@0Q;-'=S>p&>Ca'|<=>C=mmt>]>M+=Jk$w<;S=w=>>X< Vq=[>P>'0={x>uy?Y;{'>" >K.?Xmq>'>k>`K;2>.&$^L>ȷ1$(=@Woì> o4\=>#Up?L>>!4=1=>ؼ"ҝPA==H>/q>z=e=Z>X> #>Ck3>@f ?[#>G>=)>o'%þz=vXgϽIu\F A=9q#=*ȾaK=|r>?}7ߋ1>t =15}Q>?>Sa0 >[=>E>5/B GD>{j>aF!? e=e>*Nj ?i>E{t />؉˸zo=xȵ=>{_[=Tÿ;q ->ş=6<ܚ%0vk]>>`yd,=^$Z>b> U(=䈇>=>-=hӅ>V >n[a%>&b.s==>91yQ?щA>J1};Qr=QqD=ݾwD< >>8>gbjcu>}Lt=T6=F>$(>+>?F"s>>5=9aIv:G^=qt>Z>꽹 =g=x@&L>] *h<+t8Yl?%7k+=pC ]5] R(DYVo{,G/ZA I~>>tD>S,=>>R{H;`ɽIm[h>ֲ1^*6=+j߼Ez;=[b ])>LuP?>KӾ!nI#>}"t`b>U> >ɚ>i ?i=Ȭ J8;7> ڣㇾ=>#4>hʽ: >jw+N>K~*/?(x=>{*J shRP>~}Ys;FKVԼ>5>a=v3̽('=Ѽ >~])>rBj;>B#>zXag>d>Sx̾<ՠ)=F׾|>B>0T>h>H/>55S?n<= =;?`|b= - =x:OCt='&:) h=-<> iI=SԤ>pio (Hּ_=z>@/>7>urAjA> L=?=>F=5r>b˾~r ?@=^'>Kb=?&?c=´L=v+rB㽕Bپ⁾dRڻX2>*_/?)RؽF'?=5jΏ>v>_>p&w>q>ǐ;ɔ/>8"=nޭ=B/= m=h~W=>,=?=\3>W/ξG>>6>G>ki>>n:xO>8H>>3='=/H>=̧>f¾f>G>t=p_~ >sƸz=7ݾc%>YYǃy6`=Zk=%ٽɮ4> P;?*^=YȎ>= y>O=01ڟ=EC=UO9W>Gp1=h]>n>>h\ >}>O8Ný=A=>w`> 5=-?=d>>y=>m=&%=:[r6>e>_Ǵ>w0 ? >ܽV?d +o>\> >h=X$(>&==t'tþ!Ԇk0<-uIq=JDI:,>R`g{:}n3+?H=pFX>x>>O?>rr>Y¼>>=H=> >޲AI=#)%=(6o;>@о?=(z?== >K;%=V],> ݸKS젼Z0-]%=; Wv=+b>w==S=M=rI?!"m;WK>{C~P>V(p<@y=tM_)>5?==z?>A]>=*P=ܢv>Q>'=4$ܾ>odB6j̹!>ܽzx>X<:>#w*J8>Ǯ=g>TMIہ6},Ib|>U߼?M>O#>L>n|>zu/ވpd=z86>Oi>e+>#F=̦>cN>xZ˸>N ,>^=1=K7sZ;|ډ>!>ǼË>وb! >K ҷF('8?>4=e>7>je =W HXÅ >o">oV=>>n,ݸ=١/&>i=`bԵC>q >E<]=IK=8=9< <?<>D=a@>hV5oqN^1:g"Ծ?->M><I>_ç>N=Fn>\v>6=>ɴ=ke[Բ(><*?KEն,=y^ +_&W>&>m>= >" ><ݳ9\I缏> /9>WO h<>s! ~(GX>*Ĺ>N꽧|>2z`&;>ٽ{> =&>4v =;hk>Ĵ> >Ҿ#S?vB>Δ>K+#>b/>:x=iJ0@1ӽCva:6)!>b<=󳃾?v5>{;=,9>=>)>^ +`:>y+.Yd5;Ub ;X2v>,>3tY><[>aCžQdf e>T~v>}޾][V Ҫ=ۛ<,==)>C=󯪽 +>0F>g>HL>jҞ։>eM;0GJs?>=Wv?ZQǽm2dT ==% G Ծ3`>8>TA\^=i=\+rs> O&>k>^x?>4q>lm.>[>J>=괄>>Z3Q-ǻ+[/>z=EZA=Qܺѽ-?YI^=s6;>o=T;y?玠>k>qp<5'k V[]=>D=j=w\*>ξW> L>z뉾n>u>bռ;W>dѾӧh> d >{>ϩDC3L=9=KH4w=LU>/<$r-ǚ0U) b>8;L^ӞEx= jL<议>aZ3gc;b5>Jsr1=梼W<>ozwW>7>- >l=D@Fb<:.R>>7A=ad1?JgA> =J>&Pr5=9=R|9Q>)6=r =V݈>l>СtлmZ? 1>><7?$>I>oFm˽׻8>a>¦=',N/>+Q*!V><{=&H>=B>F>87G>r'>9܁>+Ծy>#@==k ϸ>lS0Ǫ ᕾ^H=",mcS<>=a1>=ڦ>'1ؾ9:=rx> ?AV>!J>n1={VF뽘)DO>$9̜>DV`=\f==;>IF>VQ>}>N[/,>B?D; 5;ʾ>z>AQV>Ubپ>_V=#ֈ5oX qvt8b>#?|zGƬ>} >]O>F>ńW=DziL/T<>Vާ=̳Y ?[W1̽^>>>b?:~ d>WN>+ =qG=?[>j1>-#;T<ǻU9B>N=u>|],> \≯L=dh"i=.P:>GW\=[?= <>V>By9;)Ӿc>)+k >۽T='m<1>e^2= *7=m"+">#>7>ȳ;>s==o݊>V=ټv?C=X5Y=!kyF>lĊ=zB>>d7o>]aw=@#Gt=H,Rm;*M>;pk?>b>OV\=rr=*a=$>L/.>X>l>4=65>>Ɛ=X}A:\>xf>W=}d>a= /?:>K_oK =J>q9c/<<24Fľ`y=Ꮎ0"FӾB>=Gӭ<cK쾬TzMSOUN>6=F>8dwd@ra=U;=7=b =.1=`=)9>R>8ߔꣂ*k|<>[? >[P` >L>x@ܼ\ߕ>NȽ=GЀNQ+`>>+>o<>$Yٽf=>δ@D>LAM>Bx=í>cټa>S.=ѻ>!G)==F>₾$>׿Ф^u*V}@=,?e=>8Y׆`>>0>&=vB{Ge=@bb߾NY0;=\_>K>PK旾Ǽ)>TFfP=Iy>=DM\맾S ->q>t>\1yGe,<\>^_w;Pgys" b`]M=Qpf>*|֮>ʲ>fBI=QϾ%>ռݾVv gVFpǐxR>5 /> t=z;H?w=dOŹ=> 7> vLg;ʊ?E8!$6ߐ>>5>>= >kxaY6g2W;ChU&>$;x{?l>/G9$?}t >U!:=P+g>C>M{H>KJ޽&Ξl*Lj>[ky@ ?7>~=(&Y_n>f_ᙉ;`X>]=xQm̥>>w-ɽ)?K=9>Jf9m=..>כ>C>FXn~>gV,?La cX= ͼU~>ԝ->>O=#>>p3>oݾ>=Q>2>cY>>A>!} .j0[=uǻ-I>%ξX佉v =ˈ>.<2};a>%zxZzii_>|Y|>c۾ꀾ[>Q>Ѧ<%/\0 >4>'<Ѿe(>¾Vaż Wt7Y>Q[,<0>Zc=2^3*:J`>>>M>t7Y3$m <>m;>Խ#f5Խڽ[z`P> 2=G{'=Үno>S="a> `> P,>H'>t>SxEUI}C-6>:]u>ڊK>B<?D>..>>vr>= T>xN㾏?ჾz0sE>(Lm??&fz>+쀾=NnƽCk>_Jt?UΔ>WG>%\(ܽ#s>0轙#>C>[8>7>_>2ξu=3lGT_Z䯼o #|kq>H>2Mt>g >^"aNo|5.q’M>ob>`;GA=m<.U>㽪&>˺>;{zjd=ҙ=^PZN>=S6=KA֚-lDVm4>?d7=c#>i>2KHZ=4 Sý}[7>=䀺U>=/E=>=>X22=$>9=VE>2C{|\=(>VM[>ğT >c >>d >>1W?N>侽c#=O?d=8d>?il>!$&=>8 >. ><=?b>Ғ=Tۋ(= jp ?:.q.lj>ҽ} 4>*b>,fL>|>D><>޾$<>?qξzjzOcbR>@>^W@M="= @l>T:>vG:- >Ơ=)潳a>r>z=R_O__> 뾀T~@ =ߣp2?ӽ8>}Vv>=O^>D> BYz=S)M]=ӵ>$&k>k=*<1=/n>2>o&vL=+?>Af;x.=YgK>AH]'#Z Kt> >O>neXL>=խ'>C=NB!z۽Ǜ;ȴ=vϽE&> O$>Qa><<7>P>_Fkݼf@>6%튘=h:=Jq=>>,5<<0*\< !L/3(>IF8>A>=J>=x=ۈQ>.=qW>Fk)bz@=Y>=$>7K>W&> Ҽ" ϾC1<&> ~>Q>I >#>Tk>A.? =䙾 t9+>ͩ=/J֔w=뙂>k7D$ > 2>썀vybq F>|ba>1v>5P7,=g>efE= @NܽQF<<+[>WjCH_>Od>,r{=ξ#=+0e<= w߾!a>PFr(^ɽBur>_>`>-ڽ >>=G=^>\•=b߫= m%>AG־j`<8>cB>*g=^=$q>ʤ>|Zl ϾӼn.>X*>/9>>2=,=ȋd<59<|D24>#?=sY>@X> ?9ӾE<>f+V;^¾\ښ>Nvrߛ2>aV>07?鵾&>H>M&ܾseod?9м =U+k>vAu8>G]>> z=$X ?-p;y>] >t>>Y]<=>=>ȩ38>ӑ>J ˶=Y]f >H>Z *C@>,޲;Ir8>_þ쨺(Z>sf>cp?>/ղ=q>2.`;dP>J\N|A>bY0Is>,<{u=x>ɾSc>ϯl81Nx>e5$v\>R=Lj=R3i>p]$T>y>>>y>&|XIo(+=k>i>'F>><}Yc'ĻI}@>&<>p>=l=>mk>xi>7)׾ >ƴ"=mB>$$2?<]k/P >{Ѱ>:ؾIv:' #.޹̽!>Wi|=jK>>9 >{l>BBZ=gmmI(7>; @>9& l>](DqUM>=?hO(?cV=9x>8> >L>p:=)>v\A >>N>:H=>=G>+>$Q>3:1=2=1.=d]켸1>Q>=>멽^)%N@B>R @=<@=,<=!־>&=X&}H>,>3:6>C=>9p>M=I*B >h?۽B?߱9@ӂ>b>=4dJQX>m;>x >=Zy~t>$>)=,k=Mu>ѯa>XР=)f:>~`V>+)?t\->>>= >7 z!=m\=&?H=uоG>>>\0> [=a=S?> # a>} 8B>i>R]<5ܳW>J4s>7tvu>$x8(>T;>%3>~9>>"X>ҵ=^2Ҝ |=QԉX=]V=?>F=Jz (?P=ߡ>E+R7ZJ]=ZM*>9?Ag>n0^z% ?]>nd>>>a>|/ o _纽=>Dܱ?Yr>Ph>z=KƔ>3=?ɾ7"=j>}=⨽M>+!vCgI>="n>Ɓ$?*0>#165=N:GکVq%>ZƼn=>Z=h=l=!>X=`.%>j=ٚi=ߐY%e辍 >A@= = =0K<>JT6;3>M>S>.fƽe*Dg <>o`=6[y75.;+w.&.>?4>c>,= >؀=2q߫4x>%=$=Ϯ u=-B=d^9 ?suj>a_>E=f图2>bؼT ,Gx2-pL=7m=s'>OA5cSk>я^>8Y>= ;ɵ>_>=4>{D^>C@>ͩO>F_׽ŽQ⻾«>z ?F 2&>ٸ/ļe=F4HqZ\l>=C\>yнI=W&>WHz(޻&VN>*s>=%>a0Lq>t<==>|1>#$ݾM =B>]սSd! Wx?ZS>*GZdl)e=~'?w>@=:cX>!='ԽdZA>l >w>%:=>|ufX>7[2r2>>H>No>$i^þ >f<ڜ> r>l>DL#d` XYžWX뽤Q>>'>P>FI='>7Ꮎ*g<>fk=bYX>b=wclV<ݛv(?X8=W2>ꖾr=q0C>m=_>?>}c򠅾%><ֽ;>|t=[=NϞ;V7fM<_E ׽AX;vo=eK@h=½{x>{j>SD?= >t ?L=> f>)X~>,vZ>汽=

&=Ȅ\>b=>8ܾՃb(弽˟|==L7ml>*>t5 =s>d;['>  >)=e>;;>6 = >Z =d=5p =d-gUL˹^O>fhiS=><^ >ȩ%p'ZܼTߒ=UӀ)~>]=&|>̾As=d9=3A>oz>T)?1##Lwܯ+~>> > >cK>>RlG(g#lHaf a> 3>d;/Hž:>{>hTC>̧a4& <ɾk{ϟwɡK>t)=p?ή>oB==B">=ڽ /](>ICRcJ[>ޚuk>e==<>-س䏾c'?޼ >MҌ<. s>Q><1>3Ļ(>>A{ N DB=>;Bm>,RF>\\)jk~]/;=M?>' ?=5ľ'.>p>ܗZda>%>{a>7Y=S.ҼJ{t=8>ځ&SƠк>_R콳>) 1bWU꽯d.i=ĽBTj<M>5.7d>;Y}G$EcM>N=s˲>= =̳> >~S>iyl,꛽su}>2>;=|!?ϴ>>A;Z==C>8^P>/2>=>$>T˽K=kb>W>0+=jDpbN>=Cv>Bɭj= ,̽G´>QMD|)= н۳ >Zm(&lpw>YQW! =42->>[sܽW>i=a>Ľ¾V=zy=0wp$?G>[y%}VHmUL:3\=q^BB>C >"T=52>ʼ]>6z >>g=Ӿ<@<\K&>60R#>e{ف>gU9׽h¾Ծm?fҽd>-9DM">dۀ V=D֍i=mtþޢn>KC&><\#+b$I#5Uw;ПaX:H„> ͘== TC=gž6>=G7[d$l=d5V?><8ܙ=0Cg>>(=>n[=o=͇>`Xȣ>>LWH<푞@(>_Ag,S>o>B>1=`~>cXkl>>>L>3>=X-l,>&r=mqtU0X?>*X n>$m>1vM>w>1u]< ?JT|!=+{>=l 8>6 [:_Y=5 ?nE :h~_>.LK&sP૾ɳ>=(#>lU>V<>_==*>{ռF5>͊=c= ?Yƻt#!]>Ԓ;h>5%>c>X<>O%?DQ>j=!W?x=><=!{(d[>?9<QLm=eN=jiU6{k=ⴾ'="_H>>)=f*Q,a=u>ŽΘ:#K=9+^ը>>l?[=ݽ<Լ,T>FL=ݡ>F>o>A7{<;߯~fC)$RӽV> 9>/=i=>ݔ_y>)>vt>Tw= V>4>; 4BJHByK ?z\1V0W>ZX~>kI< La"zbRO=>[: >%뺖>֧&>~=Lj즾l)=\5>dpw!tM>>/>8U=hȽ?se'>V<#=;?=X=S=n&??#w7:>@1U>FfȽľh&>"f>>^F==%=Ry\d>)?5X L>ȕ6>s~y>}hgн/彧³L=Od=a5>Z;=N@ֽk>z>pʊ=y&=c(7!=Ņ(%> ~m>!*>.kkDB=^Oc81;޽ BҚ>4dʾ>YGN>=\=4Ħؽnh>E0O >c>q>R!8De> =ab=#=辙:=FrL>V>/>[?Dи=mtO߾Ҕ>s֋d:w??`=?A>>p=B\%>{=o߽ d%sEཝ\Ce,w>:=cTf>F{>=>>L D<]'\> :_&> >g3/4C>l=ս \hv>:`h41>r7>!i7z_>䠪>^>ā? |=B-Y=m|LX=|̽{=m9ZO?#>=*a>HH]辻< >VҞ=پBdb >b=k?H,۾ٽ}=4 =MY>=I=٣=ǭ/>ڽDU8ľm8(T(j>r"=ڲ6>Q>LA2=νjZ>))5CɼD" 2/#( >iEA| ><ھD >s>&.>>9x+=Zq住ح6>L*#>b܅r9>n@=J`< =O==='k>eJ<;9(<m>޸VWvf> 侹, L= :>l;5g;d>Kg>(S>kY>8|p={q=:ڄ+CS^=C>Q>\С$2 ]W>jP=X=>[= *<>X[ 4>G< \>J>S(>񗽶>m-̽b=92zҚ;b,=KG=!>5ֽ*=ؾ[=_thĕ ={#HU@>=]N=?lt>jS>SԾK4->=w=>u'E<.J=_iGb5!h־>̌u>_ \I$N>Dþ7=ނ:h(c==C[Bms#??3vK_n>Ry̾)}"">)>w"zK" x ,>ͧ>+>qD#8 =<> ">߶>ӁMe>V]>>T=:ͽ6=F~>Xa{ "Z>Ž>`>-<0\W>=BY>zyk 8>j> ^<> 9oýR >n@=}>=Z]Hȶ>8`_=8 >cĄ>}!q;42#=Ī%><ְ=,E>߽C;d1?J&s$Yǽ9;r=>B>rƞ>U=7D> D.?郡>TDبG?6MXB=4x=`!>l>հVh>¾sX>5"Sνկ],>0!,>>ϩ$>w }Q">z=8]>[ OSO>.=;b>f>8_=I>2/r>Ug:>>L^ ?CfS>*=Y=n5\>>0 >eml$!>j]>7=2=uTz=/A:P>w==M;Ҁ>eXknU>mOsT=?S=P=#8=PN0.d'|=FWf6=>c՛7>=J=8ʡ>ƻ=ƾ>j=촪=w>˾p>e.>BY‷g+>iO# >:$ˆ5>:dg>]X;\D= 齄e>8ѭۛ尿=N :?hƻƚ> E>H >_$FU=IQɾ6 >Y׾9?>=ѽ+z\#zؽ>ub[>F>J`aSU5>&= ?@4=>3[3>oDP]]I>C0?э>5!dK> ? 3p)<<8Di"5YT22`F>>ͩ=<;>J]8>=qz=bߗH>4Ol c=>M<΋o==?N>Rʽ ?Q'!`¸p>¾%J.ب=▾O0,=ώY=}|>>=kKM{>m瀻gnz=)>HVM>5D>n}{>RZ=/\>O&ι>dcq=]խQ>@쾆+>="׾L*>沽ż>>xKI>4l> )>`gھ+>PŽ9 ZHaE'r#>p+l>i>( >T}!>1>Ԟ95C#>`ȽM== >ېr>R.=!>>YY8><:O>=FTͪ>yF>Eq/Ŵ1u>u>k}G>cbV* =:<1~n>*ھG=F@=7>]N>>? |=>n뢼>$>Z)2-}Z =>ᗀ=S:q0ҾnDbL>Ӂ\na>>U>R6ʇ;Erf'=ff>5=/_=S>S\">}hZ<GǾg4>E>7NQI>:VN<=qNF<$ =l9>E- I$Y>=Fᾬ>#= b=_>>Yń3厾D;Gfڼ>xsT >tRc{=5<9>6 ?6>*q/>12>;O>=lV>LD>9P-I!>> o>xOþ,%k=$n{>a^>If=vhs[>Zh=Z8ܽ/=e/&2=^ x<>Q]"֛>ۯE> >!q< >8Z 3>Xn;ؾÍ>̾޼įG>\>ԭ=Oʝ0N==YHU=2 =Lx>/>D%>ôR=>b)>s>b 6񘑾!e$==0Ā= 㵾䩾j>CT缱׽C K>sK0 >P>=3>E4<*>qu'=*=x>@pUj>̽\==ݽu.>>d>&_>>v>tҾ*i; =T>0&E<>훾K<<2=6=8>W>Hp> b (=p>:ʎwq̯]>$>wa>J|eT=mSfɾqH=Js(>T>m=~=by>Kr>W𢽾8ɾID( ?V* ,s}>6lW;=[==Ҡlڽ#>4Hо(侞/IVs>/>՘>>|>.1Y{>6Sھ=>U^LO:؜Ւ|]>Zp

H>$>?\A>>h>F >ɳTQFlϼg=[>Ӧi>E<b=i¾2v2=:>>TnK7ٽ-d2?.> S=}CKF;)lqV>Q>=iʾ4`W;6/ n=0c>#Z *+x=C|>>)ܛ>5>U>_WpTunw>}==;>RS=PY=?1=c E=v5ܧ]r>3T_T=WhNN\#$r>O>Ƙ`uySiH󅽶>Qi>n[<=|>OX?>ϛ⽀0Ͻu=M="#>e@>>^=4>@Dw^RH>}=`9Q="ʽdJ9̽m>9>~a-]>/<91=8 u>P >=%>Q>=x{Ш8=xL>AC>ґx> >;T#u=M==0A=yO{'>t_->r7i>hJ <>\X:Ǿ@Tz^=>L\> t9=ܾ>a 7d>ڜ1:c>׼= >>󑦾I(>r5_=ջ)=>{=➗N82>\#== VNt <[9=[>w!=E))ͰlBpD<נ'M?u*(>J?;'>T>|Q=,6 <=V@=:N>%1?Cd>(>=X;>X˽L aSj>%>>E?=O=ҩah}>oƱ=cr>m!uE>8Y߽*C>_q=z ½:g"*<0|U>ĝ(>z=$??] /=]P+u>Ž`>˯=F<5N>t?=*>mL#wX&>B=Pp3G> Y³>W_=/<㭽SW:J5j=&>@=ȉ=A־v|q={#ʫG>b"?*ꑘU?psz>X.(?>ik=4is>5V$>t9>T`>,MP^(sT942+z /=ϽYʧ>@> |/ o>4dx=#d>e >N6CDc!?%HalX S/Ϣ=>uC=iɬ13?YF/ݾ*X?\kph=>-.>+<>Diy=$> >d@ҽv0:6j=WjW>̙>.z )M ?P0s>$>ӼR> Uٖ>>F>E>=˾:D>Dߺ=>6 v>Q-fyrP[>sNdO>q>e`>^h?C-=C >ɻ =v{?'܌>Mؽ%b>ď꾌?>P=)$b > =< 1=Lç><$;a;>ljD=>"?^lW<_4>8<\=9=>:? >H_>Vg=>>ZTY =}kIȁ>$뾴>uC3>į 7c/ .> =>N "*LHg6>:>Fx Lp> 'F>r= =`k|yg=='xF=}Dl .;f, t=;5_>⠝l >'>Kt>H>ߵx>ɔ~ɾq%2> !2h>e/Ci.=|B .u?󫻾,x]>L_>ZXn>>@3,zW$;'>AC=n?m`)>=sBscXh>iT?(&mHսtbIt>a<M5>"z>=ݿ><н,й;kdO0/N>4R> Y(T>0>EU?Ez<>LH <"ޡ=VM&^-giu=$Ҋ遽 >}+/0A #'c;+=B̾Q>Ӕ=*>J=\D>D>S =Խ >l>-]=(>k=w=*Ś>l8fW̾%Xg$J"p>]Ҿ U3?[vCQݥp> 8t=!,uLfa=.g>տ2 =c_`8Y>ȷ=<uu>m>GV<=۾=%>=˒>$=X&=㻽n=!='/=O.;>"F>~ü_ qMw=TȾא #}>w=+3=H=)X>߼.~;sj=F><]!k-=\O=怽 q~{>f>Gu>"@NW,>OA$[Tz;MЉ¼>[P<<^Ӽس>%D>5]ນj>d?`>;!j 3k>^*"=\ ?ھ6]!<8>;#>i=2>/>h?T=D>R>Ewzi;)>p8>A>==8 ?a=G>4M>>ЇS?M;$>|<q KE>FG ?5^> > )){>!>Wn=oɓ=!^U>N/P> >2 =iB>q=r3EQ>t|>K>C=.. Iɾ1"%>i=;cOܾɹ>l |x\⶗>2>m=w>XEd= >tݫŽAq^>؞<;A>3=r:>>cZTQ?I=g=S>~=o=a>?=Q!A>>]"=T>NFt)gվ6Z> #>T˾To>k= 0ESRQ=N @~(>mL7@<)->}? fԽߣ?>j8>)[`=+=Yy=j=j>Q9k6p=vs>7$D=>2ʾ gLD8.>m>=J'>q⽫$v /?=eξIvz9w<圾͡=dfK?>>2)>Ǿj_޾n>>>vX.lO<ډL{='l-US>>=՚>MF̾ >UPu<>oGcP? aϽ'? c(B>V>k>_?>=JiލZ>ņq_= >_,|G>¾>9>6*=ZNd7Fk>F>P'?>K>;=ދ~Bݾd"'>ս7Lf>$=qV= =uP*ym(b>$1۹,$>V>n=2{>F>S<>x+>_%d8H<ݽ2>=V1vW-"ij>Ac>v>x>z>h-~=|0EѺ>E"H.Ӗ>>y$ξ~I>0>, (_=>۝m_> =d>>0>M[A1>꾷lBW>C=>9yn= ?cv<~>I5!=H9ظ= S>^|+>>}(=iž!k=4>17Lfڰ=* ;o!>!!>Ez?<>Q/7$Vz}6> >O>ξRh>%\Vy(=C`=oX>f7>P;op>"%>>=˾׭;[>{Ȕ<XfU⽳`Ⱦ Fa<`:>>w>|>ڜ=|~n>LXݾLo>]ѩ0e>F[8m=~}4u־םݽ1BZ"M62">0?>2>>~ +>:=}RxHBm=S>ž>U=_üm]>>R=l>A<~>DVO>m?Ϸqw>>=Q>_L=ojT>\;*֯/P>6=Jp>݉lJ;$v> 0?==þ,nDώ ӕ= CO>9=am;_f=p)5 5n1z ,h=Z> {>, :&@>2l8>2r=H>J>jkl>a>Tl=A> 8[ fƾg>=D^>)<>VԾK>y=K>9E>=>d;>2gJ–=5eion?> j8w--S>u>x=9v?>Sͥ>:= >)̺ ?#Ψ D+r=}#>]k>_i> ?k^= DS>ʾw4>;'">R>VL‘0ї>򾁥>qe'==&>=[+0b=bd[+>Hk2=x'">=w=>y>=dP}>M~߾8qO#K6H>6V%3>^#AQҿ>~>z>Xq#.ž>z:-θ>~>[:?u^t ?+?2>,=R :>NkL&4 =e׻z}7OYܼ¾j1>7>,w; %T > ={MнG)=ꈷ<0 q3-8?>>`?CO 7@9)OdI=Ws `BI=jk>^F0Ѽ-̼>k?$R$cMiE=>31V<>%5a>> >YMX#کQ1#ؼS2 E>P>=$]T^O/r$?><? =`* i>:*>mǾ=2{>yR={(>.Zy >-}>/U >U>jX=|A6f>ώ>}t>R>^׻=x>Oe Ep!gڽ(=yˑ>5<>=M>kY/ޏ{tA>G˚ қ>ޡ>G󽡱)6>`ɾD}>3=?!t=Ը>%>/Λ>˽(=A>" >>!x"H >ή> =\v>=i>(ٲ[%J=D=)_cJ\=0p#k>>>at>ä5=$仾d`>Q+.q>D@=2?>s>_*>J>1!ͪ>K?:?>:K>Yô=H|>LL>nǾIza_Z85Kl =Y'=#> .9L>l\Z3|SU.ؽܡ'>->btX,\>X=Дa=7<=<\w>]!?s`m!e+W>8_3rSq>=<>s?>?=C>Y96>z&6> Aw>̲Aj}>96j]?6>پm4H>ѐ>ΓA[l=?hޱ;ۡi Bc=Qߔ=B>b" ľv#>V+F>)=?S>8;OKnᐾWD>Y@h=@+3;:=Q>{;->u>8>Av>Ԗ>AI>'0>[=DA=nN =Mq^PQc?FwY2>͐ó=I-:>( z>.7=پ<ܾ Wo=F28>>nR-5e"?ͽn=z}5$\J>J26=\l>E>+3us=Fj>K>y#= Xž(v={u<ǥ> y>Rm( bՇd1>#~˾iip>\>He>~=:U=ʾ=N?>#?:^=["=ǒ;.}E>g>q>Ap=1>~> c=C9=&2>>W=f;A$>>> 'bW!=ɽ b> =$u=eν=(3O=e]tT.<=<1>S>zlr.OR^>dȹ=u>ǽ,f]>ucD>f >0=eM=RϾ8-׽X<>H=>Vo/?+= f>qۺh%>~tz=Os>t>4=> ީb=-0=C>!*> Pׇ=ƹ4YWNW)>>d>V>[P>Z>wQ3!*淾>d[=5'eľNȱ#_>迾$\=;4~uѠr]f=-þy/>uEN>@̻ >ϳ㲧`>X>i>T>~k#=sλ# E|#>{=" > m>|!>nCjN=B9d= =N> =?G>?U >S>X\>f\a>H7< 6\*=#D¾ >,=>AƑjk=] H>bR>a;Y>>s>\=>h>ӭLQ̽EIER>!G9_>CI=xwv =Bw=D_QMኾT=G>>Y$!;y=>N!?߿ý꽟='=&pԾfOiZ?Lz4۽3[;V n.>]v>1">VE;ŀ.8>I:( >>>OA = ־{cs>RΨ=\h_?ľFqmѾ}}; em=z>">;oL<>YD&5&-ʽŀϦ=,o=X 6=dt!>O='?%q)7>WgX誾JU=zZ>R_eϽA'OuvlWΝ=f޼',' >r>==1^?, >GX>y> . t摽^BP>=,Nj]>Pלe񌲾}=23h)?+j=3?S՞*Žb>Qy>3ケu<{>%XV>5`>@|׼>۾4/B e*>>_>Gz>%Cxu>~h=>q>O>t5>˾>=Y>ɀ8>ᙾ)>ƃ<*? ҽV{>)@W>at7>>*/>؟D>l=XӽCI4S> $.h덽a= >0욽.⾍܀=*><ľhJ\>8R > 1>E?>!X=a22m [r>>(n*rҥ>*o>$[f>U>h&>#0n>4P>Ӥfs*?@<;)Rž΀١>֤^=h݆>,9<}TlS=n>`S>+k<8<,+<3!7˼r_ jWj>8ѹX?ͽеM"e=>7NYٍ;N$h= =>&M"> Ծś[0!B |>҂>e>>F>\;>K輞Lhĕ#=T0>>>'=7<ս#3e=>D]><_ۡ=[>/k>^1Lνȅ4D ?>Ͻٚ:6{>><>"Fܦ{ =w|@(=YV>2Ծ`ʽڟS]wbȽ'>"[(?{w>g4bN>?=?l.>=s><)Q>E>΋u=;q>׽UΪ4>>xU)>%)>w0=C==-8@>?Kf8;>U>Hq>3>v߽^\<=GZ>g[<<@ WEԽ.ڮϾt=t(]>LX2V0=i>XZ,WP}ܽs1>0>mEѭӼaw=>MOC+=>x>4x=C=UW3V>]=fai>R>jj}@cCڽ3;>D5XU<RrTBe=>1"\G>׽v>:5>>'_a>6>+ >5! B ⽽߿񽠿Me о>\)>>LUk>'۽C=5>3|571>CXk="336cD/K<><.|(ufAbVoeH>T>d4>>#O>a]?l<2 ޽*>t>,> >uY>"2>5p=6!߾|?;Hr<2@?&{qH܋qO溾x9=D襽A/!v8?*.`:ڛ&ˉЭQIV==RNyg"+iq'x >~~QĻ#z $>Z=>TR<@a{' $<굽-&>>b@9>%@X< >/>=b=5W,=>?`>M>r#>?><?>>iþg;x@>zGd>#L)>ynXŃ ^i=G{>jj9>ﳍȗM=€н><Կ<[ziT=k̇=GET;>)l>D?nx>Ⱦ :Q>=Dv>y=@vY#*='..~e=iD(=$Ӂˆa>ku(m`=־XY<=͏=ZY :]P$=H@>;lѽ"U>=D>eG>M {>C>ͯ}=F3=ލ ,^< =>:D<6V;4>ɽj>!w*9>굾X @> IJfF7>KNUR2>>#ڦ9Տ>w>bž"*>PQK\&y>Z">$> nXٽݶ>ܽw>\>mpϫPy>?>尿?¾"ͼD A>yS>wUck6 븷=vG)5?;o="W<u>ڽ}=z1>= c>js=b>'sd=*mZ>}DZ宾ql=) = > ,л{>}`n>1)=e$þ>C>Zݾ_SC; <>]=,=?>>n.j̀>ik= 5׽C[ m ==a=4E "PB.fGvѤ8'U,ҾM4={9!0<$>-=[q}[=M>-G=O>nz,>gZ>Ԃ=7> W>B9)K¾>E1>UY>BR~>Jf=/>8!. 9J=2$)=>O(=ޱ>]޽4=:>%=9̘(Z=nV>%ܾɾͻ>p>~似¼"3n0ͽho=k=i=Z#A=:é<= a=d_k[>ے>f=M<4>N>Lкc@!l8>COc>5A={6x)L qm#E?<"8>h>ItP>>U> I>R<};b#F1P^1>>*#xS:J==O=K>ar&=5l>*jIb> =O>*s:c>>TL:v>r㟽gwG>(oMI=hBR>='xmQ=O1?G> ?$)>mc*>ɤ=AڽͽIȽgb0. >>~>hI=>MɈ> >xh:ԽEξ罾>k р>=zU=Y߾G˚- ֝s=/⾽xo>iϜ=>9kS>x-lL>s#=VPĽMne>Iւ>Gkyj6J>g=">,<,>$&/>"|=뿾ٚݼ >=Mw=K`=q$PɾB>ɋ>L>y.>9Y佇!>ٽTG>v:}Ob>;޸>9=`O޾q}<=Ҷ:ؽSjZ=vla>)q=7>$g>9 O>ć=+]]TQ=k=A>g>=*]@>r>uχ`>gS>k_>$f?A}=6=vm==gG+=ֹ=r[b2[`>SO >6̍<>>=>uؽk>.=ڬ >J= \$.o ;-mR>oޑ=n8;7D>U۬=І=5;ӽ?f>OTR!>=6>=z>U|1PsY=[;E<)ʼ>C =򿽹?eŽCmwᴾ\>ZuՌ<;?CqD>*>f=\G]I(>3=6EpUg>`</>׺>9>G2=?Q)9>]0K.v(nx͚>)p佌5=qͰ>qN>>J>Q>\k==(o=.*>I>&\=;}B=o>=q>E>}>$<>IF d?D>O===\y=,}>>|7H=eY>=d?ˠ=H4=9:jE=7"7>齑= :>R[E;>F-Ss0>JNj>8YP>.d9?vl>Z>!j=2e,;nǾ+>]6fmbu>=˾+=)=?$=rN>0/ 1Uf>R?>n=&>(ƽn.>8pJ=NIν (3

h >!>m .>nK;d=ֻJگ=T Δ>Ks> ?1>`+* ?k=q<;=f>EB\>6=do>> CY)p>@l(=6;l;]Ƚm->+6>?>%lgE>'YƖ7lW==> (>Hpa ĈCx=5>e-=坷>X >c=E 9z;>LV=W` f>⡁?<>@J@ǭsKN>Q|>#."s# =Lj,>!l=z">'}>F0-=cjY=ѥľ>R])>~ Mx2X8>:+>L>{ nh=Y$>Ō>HǾ*Q`[=@ӽ=zeU>;=:?O>Ysϻ<'=%=>mK!>b=U>xCڽc>L(>4ᮝ><̾&:>mN>zU x>osv=tTP>'l\<=*y>x ʮ/Ĵ;{;͉=ͷ= b=bw>H>-+|,A;;&)(>QĘ=iB>1ۉ#>t p=ci> =WΡ0==٩x>#+K4qN-,g>hýF> lf\]>s=66CՎpV=Ѭ<0O>ҾH=Y +>+=zLFދ fbZhD <3+>.7>b>WA>(=e=>TY=AD>PGchr澱8i!->!#]ٍ=.>Q,X(Kd>avB&>Ne<$>׽eڡ9Z>>Я>ԇ!>ƍ+@>>E>E>E>s>u*ƽhR=;jꏾp׾v w>oakg== Yr8\Ɵ;6<0ڽ̊3)>e=,k&< ے|vwIlU<>??6Q==1YA~=qKFp|)>!tF8> >S>t > ?RF?I=㡼==Hc U)=s1<"RCva> v>-Bv>䭾RE &> J=uoн>i=Vj>Ͷ>C=4S<>Q= {=I>n>~ޝ>s10>-<1>~d=8#ͽW?9>0=F0Ql MK=o0=u2hɾYK>#r>=~EM>f׾(>[kj=b5>[>0>[>ø(w@Xf>l|u>]>Џ>0Jx@Az>M< =qg>B=/*l~7q>@f>x>O>⾏Pjî6>lU.>7r>L>!V3>=<=FԾM[L?k`>d'&>ܚ)<5=L>≾e >Yʘ=.>g|>=+>R>1>H_P>pQ@ Ǽm?>c1;s *=< ?pBF>b;jN>߀Qjľ=I>"wJ//Ecܽe >l<; (7ľjzҲ=R>vJ''A_Qkyᙠ=!j>αS:>!f>J!=><ס[h%wUvQfCMwp>ă2'F=>/}a8?F<*0ҹY<],?>JuqhоK?,>=:|>js> V>G= y`*+>>>>^>deQ=ܾ>3.(ߚ>׻=sh> /=X>`Y=%N7="=ż#>Cy0=5>(>6>:X< =:>~^9#t=i>y>U07 =ƥ=: dM;3%?cRr7z{Ou=`:=Ե*8 1%;>0ϽCvy?O`:j>\þsK$>ė>S3>セ>Pt>hܼu}4=?hn5r޽p>c\>4p>Հス<<[N]>Tk=.w<~+=@=6ɾ4>5%菂=Mѱ=&>4V(Xq >i3y>>=%>>5I7>cft<U>lA=xo-8kRr>6X> =|F">[((>VQ5K5*>?W$,FԞj6d>Jj.* \G= >/Z>X I0=Vl=ɇ)>][GQb#M>XR64=i>XSBtc\Kr<~F^w'5>][]>8!3>b<=By2O9=C =>jh>_GRy᪽G6MuX$#R=kJ޾>=%`竾TV?2[w#==Q^= LVA;c;>B!1>&i> :>/ 0, >JA1K=ڄZn?g>@>q>$.!>?>"R}=]D>7Q>=G>+>OaHI9|>%>.><ؽ/>6ý鞕=#jb>:>u >f\~c>D>>#[;>aN?Djsϙ33=P;?>0S>> ~>К>37u=1>a=0]>kGL~>P#r׃5J#潷*ԃ=l,E)B>9W9x=S(7 ?C6?&"]8u==L><_>>=N4?Lo>d=kJ$7>)==I3L_U=pϖa4>jν>:=vPն )*rm>,$ykA@=>&=>->[:ù~=>ž,>k>>l'=W촽K!7}=<(o>:j >x`: o>t$=#4$> >F>X(6X°;R'g~f>R>v>=ՇG]>٦>v<0>s>>!F>u>l!= >>=d=R>/>Y>{1?8=%%?c);2g>q̽yݾ(y>!˽-;S>' >Lʽu7>CkY?pV;YXپp[=j8>(7 r?ą>y>\>.`3S7>[>}3K>s2= >s#>cɾ@8Zp=CKY==B==]6)=*=(>mM<h~7V>Hb߽镰>g|=G >nh޾w>hB2ͻ>1 &2>_=0=,w=95~>?G>6>CnIyʡO* vze q=]<-!.>"y=z/uI;JG$1E>iy5>K>T0M ,>">|{>I!=É=M=t *0{Y;۽>ӭ>˾ćݾ+gZBO*ھPl>>j {<ԁaSus*ཕ1>~Rӽ M>n=ζ> 1>YR2=ņ,]>iŽ>/"k==(3=B>Oý)1>QWW3 >Jdx02=Y;uP=S >r)<*> |A?~=9Vԋ6>ϝ>>[}%1==μ=+s)f=_>Zf>1ڼοI>h{e>zn]5>1,i6>9^ݺ>eaS=p<%>rȼ/+wY=d5HԻǼc:lM=I=ԯ>ҖV>ׯ<Ӄ>/?>h. +¾ʣe\Mo2QIн6="`޽;==>5>kN#?ڬ>,> >pϽ=o>>L'པ͂=hν-ڽ;No<_=Ír=$1>*XRgP=X>oȾ= W=?Ҽ3#߈o!ҽxU?=d7;mË'7r=ս$BSnB>>`⾈AV kN>#=k=vl>Fɾv_>"3; =q>`>=> >f_Wb)=LV >03>`Ia2~My1/c>!I>v<ꕦ9: &=F*>:ؾj> [Jf>3־^>p=ȾG yr% ?qt>>\={.>4PѸ>鄽A=?Kʽ+=[> ?8Ž(+>_ H>)͔bDw=9>}'>pox>@>n>'=뒓=~i>S5M\1>rJ1>=lm>9$>lgJb>d `=nC>g H=g~>;k E?LU>%A>0L^/<>ΐ >b}Dć=>ș=8 >A=܎>$M =@)?`E22>&>%sġ>=Uj=m̽[>II>b>zl>vH<:m=[P&?Gm=?X==W>?n_e>UW?O =ᅿ`>༄>f+9>ݒI>a=\I~> =Am>F#<)=`=ㄽx > >3#>NJN> VVm>5ա~>(E>5?=/>}=92;iT=1 \l>(>c{ڼ=l> Bc>=L?><@=$= &>!R*#f<>B>=wqpYH ?]>}m>L>x(>-xsuFZo c9#ym=sw=d=2>?†Gz&?k +> S>7>>xPM17g<Ӿ{>M@x͓ie=ˌuoϼ9.='@>(M>M=Ġ}h>2 =.> *uNa>P8>N>ϔ>>2 iU >0ҾE#i=t>$Wd>>|G=UL>>䑾} >?=jƢ===Fw>>opV>=7Y͵Mf:X"I{>& =#*?B%=]>G> =ll2?&>ppōYg>NovS >n~=O<4>>VI.>=Y>R =PD!>+׻= i D>v}>4X> 2>a>}m?(yB=pâ>sbl])E>]-:0N6KѾ?=J>Xž[kX |T>xQ=8s=a}Jk! g=ijs>r a>JO>l>D>W4m5ў=Rv6Խ@;>>=Y-`bK>On n>>X=8: J?>!sunM lj>2̾B\2G'=JH#Q)7a=>;>zU>|ƫ־S?>Zx>>X=*=h\>v83RGێ>0E{ݗ dg{>@֎s=A6ٛ(1? afkN5>ō=ɽ̏T>݂>,>p=#E>ƾ>P=ؾ)!e ?,>þ&j>{cR>-Om>Za>cB~Q|>sY>kK=>>|2[2>M?==}H$>_<h=v> R1vߋ>xVJ>=3= ܳ=bLj=g8> $>4½>5gY=8vӏC- F>PU^* >w ё`M<0<)b?10>[@U>`9|m;$L> զ>)=\~>.=.Ư=M>!Έ> >Wj== Y>1>yf+75f?7>> >ؾżSǽ@P Gְ=?4Io$d[= P>rcZ=.<>dP>pD=T2+K<ć=Ľ9t?Kp>2>S>>o0=[ =2=1>r> ,RwG־eqHdR]$5BY <`w>fT2S>Had>\P^>'^׾[>4&3#>tYz~>p>>rc ] .G>A>Qk'`J>|=AV=o0e=j=7Y=,>hi9>-Ø'd>>d]9>#վ`>TS'=6=<^=a >b%=;d>>%ݕšgAl>iE9W>_:9f/A>b4>k>Ҍ9Һ>2+ >?ϼ>o.=怽c>=,>=t%lM=1&="z>藬=.k0>I;7e><ƾ:Ť>(>Ғ"VO=H=PtY>9`>a~e= [^(>l=rȞ>> >lZ2 ܾz}n!>]IQ=]>ɼk򅾯]~> <[۽'>AH>oנZceA>ej\8$н}>c>n=:5j=b>L=+[=g=/?i>.=5vʖ>w澪؊M!$R  X>1+=Ԡ=>=0>DX>n?ʾ3tS0)>VN>窗>t=v<] (y߽!$>0)aNe"=(0>L]>OƉ=,=:>N]}u>T>>@O ;xG~>=e= >>^?]Q, "s G9aŽ5F>\->Έ=Ÿ:=1ڳ=2[3/MW>'>+4=c>>aym>\>U>mBX>G6=ѽd>іEl0=CKK%m=d 6? ڽvxp=ڽ7h>=e A>d 4Ԭ;Ԅ?l>TT{=zq>N=>ʂn>YS><1G/?r>a)?Џu>^D>.N>U>9ս< ?EIV`H$A k<?<04>Ajg\i>|==M=;=%=3>⒯>r>j>>`ͽսX>&>hlą>#=a>]0=>Yd$=UԾ>~W>->_=*}>شOG>}}&ڽm>*^C>b7 p3+Zyۓ>AZ"5>;pX`>f=K}>ͽ-Jy==>.E>>>3=6>Y<>2=QCO7>iWf$n&>Ս=%qHjD=iF5=t_&ξI+> >=h;Ip;0(>||;>v&kٽۭE+ي%>^kz->9)lm-徎]> C>xo> P>7b>Y>[E(=wlݔ=c=m>(gĴ 9Z޾ʘ̾(ZcuR=ȇ<-?Bp \=,1%PԾ<>WBڰ XH>Q>=>*侖=(>t;>߰dR<> \>^^>HʾOk=w>ꤞJx>Ju~h?g*!u>H}bK;]IxVs砼>={= 4[p=+׽Zc>B>ڪ<=z?> =3#;Y>Q>SxǛ1ϾF#׾>DmD>!d{ڽq1y?Ӿ_K>8q=9%=L+>[|>'x @R=)8d=ˌK?e3 :=u2>>'~>>[x>7>&a> y;=)>LM!8663=$1 =k{>q=m.R<= =g>&(=Lۓ>3/>l߾B>? W+~AF=55?Qk=T?_͠;@?ɽU7>=wb8>0:M4s=P=I>V=7cl/R>k=[zoE\5vr>5Y>Y۾?Gc=ƒF>WGߍ½ZϪμ>h>{k>ڝ>U>;W>Բhyp9> +u=# MlS"n>签] ͽJ I>T=f1=Ve=KZ伉ۤ=f*0`>8>ՙ>aj < =rb=1>>M>bJ >DӾ֓>ھ>Q>B=s>ITNC=^fK>0lɽ1_i<,Y7:>3P>߭>,=g> >0k=nL>n뽄N޽GǬ=*ˢ>0> vl"ҵ>e?z=D>>I6w >F(>>e<Ҥ@>npڹ>Tƾ8幼=e=>+H>[>%G>*(c>m>#[Iӻy> m>BDYI=X=07>Ƽllc:=4/;GL1A=IH=Zxc|>:<\ͧmUi>) SS=8=$bl ==4n>&@z"׆>Z>p޽v\>c>:>3]>ȍv>Wꧾ]=Q=[z/>W#?͊<>#qqe[>T@=`M^>=ýhF>V =!.K+ o>K<?O>>ڽǿ>C>,>oHݫ=l )h|lOϾ+P6! `} >H=k%>H޾꒽N;<?>]Yi="=?l>*>}"?>A"? > > >:>|=<=䌽'ū[Vş<(2>DB=>A>=[=oU8=g>1%=rپG1ޙ;ԽJ!1ASh" ?,>5==`>k2i=VCY>P>T >|F=Zܽ<=؝=EnGb8>5>Vko^>p >#>#>1 =(ɽlxq|o>ms=Csj=/(><|v\6>=1=@O;<־H=>cƽ">kΉ8k=sl= +`AB>l4t>~=ĕ}?߾Um->Vex?,ph=?=;ܒ\=xu7>=>* <2C>U>'Ԑ/:ξ,{=>c>l>u><:=|)<.,U>'R 0]-x=' >(.>Ƕ>ly Zha=!n>\{;d&>R[ļo{\>cz>iL=BC>k=~h=I̸>R5R=YrF>6 E?M>پ^$'=m&y霽 =Ym> Gn=2=dȓ9=MJ v>fx>wW>PSʽlKl>>׻}H^p/0bMоr Sd@<} 5>pBB:>Xթz&;ag>`3<*AWٳ><7>@>ح = ab0$>Ծ>$Ld=Ѻ콇' 6=n>~ɻS*=˾+=оb>)=>Z Ke=U>E>'?N`g6=N?LA>Z>>MR>(ܡ=yK+>Dv=Q%߾{1=O.=!> j=<?Hr>.(?=I4KhJ>=+CQ>}\2=QH>E5r>~*>< >~5>"eپ<==˔>L=Ȳ){H?Tp>4ľX=S=>YE\P=D>I_ <藾>\>VX<=ݻ|-U?:|)=غ5>w>p4]XSU凼ܤ[^O?s׾o=<1>2 >ȑ>=b׻q$?ξj @=[b >>d:'H>S>j>Xܾ=*ӎ6`$P?= ZNp>yʖ=>ۅ<#>AR>ə>a=<7cN >VDs>{|;R>-m> >pRF>bo==҇=o>ܽI5>gOv<Ƹ>& U>TYyὖ#>rg>q >}E1> Q* ={kMh'1=S> H<>P~Kh>=>>N =ݾ0>X?>,G>֘=S?> 9="dƽD>/vLiVvZ>WZ>2>iL羾#I>F+j=*=Zo=/>Oh~E/La>%>%s'==н=:G>f>=7>={>tϽt9N#?==#hȽCߘkQAzO6Y><?$ѽ>9?Ѿѿ~GZ>$?>E~v 5p=H,5V@ +XܼmI>^77xZ>nK>~=m r.>U>{#Ja=l/>j>p~媽LZ>)>VuFR/=6n=Sw|_=|YU"ڰ=VW>&{=?b>j~>|=MHe=%?|~rb u{^ ?O3> wzຑ~:>|)N>B-y{χ>޾q>c.;G<=W@>"!0>GYoν\׏=m/>^>$<,+>XB>c:({ռ{NB>|a>@6񖉾'ʌWE=(p> >6y>IL>>+1,Ir>W<'$>T>Z%2>9Nt=˾-e=u )E(=I㽝2-Go:J">rƤǽ R^6q=S!>=`ɽL>m={;l37>L>=ΰ}C$>k6`-ѽ!+c>uW,Q>Pݟ>JL1>9ϽnMł>i=Bݽ<>=`t>*HN>(b>K ta-/c>b>><ހY=>1$2d@HpM=d>s@ =8>= A= =&lA\> >Ҿ=Ch>>.VLu}㓾ԥ9>F#>jn9=Y=o,潃@>2>R>\=g~S G]>E#>#㽲>sF>d{>p֎>B>E@>2>xI7l=h.>u=*ߋ=n"$\Zk=Y>R=7>m<>*?gG=fܽ7,_ؽlE>C >.-.&>%Ľ->=qGy>C#>>#>ј0-=%hžI>=ܠnӾ?=~M1 &>',?V=ыFe6=fS[ܽ`-2T}= hQqH7x=b>׈M'>Q=K=i> >)瓾=1>= #Ѝ(2漨B>D+2=) >."=T=5a=Wd=)>!>>r>h =1>O>PP=i+<<{o8z=>H>Pn>*>| ۽G&?db=@=Raowb=7O>B,>Ƒ>9d*=Rs@677r3\q#D=ֽ>B]=WYd>h>g<=>$׽x͊>4P>n ɼ_d>%2R>Ѽ EnZ>oɶ=: :.s뽼N;Y򽽰w&i?>Ǡ+`=R~=3="_I)Ⱦu{=ٜA=:>@b=I41j=TゥW>C>v)>ԃ.վ}cH_<> <>`=,>RUutk=eZlS<g"> G=>R9XË=!C> j>ҽ 7> +S=&S>>=[=Cè>وz= W=FGĐNr,ٽt}>?pY=z =7q<@վz>~elv$G>A͔O>3vdOa <[>?>8vw߽Ii=n>.]Ͻl>RhD=O>j%>e13% +2gWqȔ=aC=#K=ݢc=I>=]ox=_=ZhCWvIO>KkȾԌ?>Np_b+ZC>P>K>W; pD+>_Iپani>Q%>H<^h>q ?w=Dнڪvî5=꽘eR>= >5\>V&V=5=C> &=<=d>U=M%6B۽M>1>ͻ޽: Ծ? =>0yHi. >⧮gs1>aՀ>i57'=B ==mW>kʽ0=࿀lM>ȭ?&fک|g1h >wy߲=h=v"Ĝu>"==>YW;vA=C>uý쾱A˼x?2(>.|#D?jg7ʯz<e&ƻt;X?G(=8ѽFpPnY=" mo>/+0m`(d ѽu?Y=l=s3]>彬)$=>G"޾4=~lu{l.<>^R>n>¼fj b>> pQվuς>ȪvQ=-O͂>=Y>9;>Pi_==}>?j>L=ҪF>1== ^[=Qd>Y>b=mUӴ>=)ދљbmP>>R߾w>|=E;>?>E;f>a>A:ƃ=d>>G>Y=;'@]0?> !>lDs>mId>I=d3Q<~q=']>8>~>5>Kq :;B<R.Ƽf =a{ U?[ ľ1:' V ?e &Խ,>MW=wɲ>lJ悇=&hb8>q=AV>M׏>%H>>F>콯ݽX>h;>wo)8 C(Q>ltzs>cнH?=;>>>==$m>3>3U8<_ҾFM>z>W8>|0:>Eؽ,OHΎ0ͷ='UϾx5>Q*M\7>쟰=u Wof<ͬ=o៾wýݮ> @Hl>L}=Ž#=>Zu6>V9>ma'>>s^>Fľ2߾M5>%:hkP]i B>4Fɻ^>)`M 밫>>&?>_9n>7B>Ś>ܺ>*?I"_>jly=Y*9*I*>憼!M }U1`d=`_f=*0=H^>!$73ͥw=5:s=c>OH-lj;R޾=bv<"qDo>C>F>t;+ ʼ J)aX>$Vđ>%=mjG`>|<$IKy>⏾|yJsP=z\ >>kE>>ö7N4>> >w=>*/R=vOܾ-d>>G/n?g>Y >0e>!=s(t>2=Ib>>I1U>WnXST'=>/`u<2 `=ކ>ӷ=Z=Z<%<=6~9*"<">P9BMg> C>W{S! ?!"K=[=>¾$Ñ>=&̾!cL4mz >y=yH)F;s;?tJ ni)l(Ѻ.B>zi>.F̽CD묽=< NiWL:U"Mכ>$O>A>mh7>v\> >v><)>K=tݗ>d˾bH<+8>Q>̽V=Ɛ>=9П- \C1ٽ)Ö=y&>mb>šp8m;ɫ1>B>@'?aF嵼|>"p8#|罋`>ꥬ>#MKh=Ⱦ̒> AھFfPn>> ~{>>>P߾Jp>]}aڽ!E>>!= ==jnz> f=u>tm㣽C->@M>ZP=پ >k\Q>zt~ƾN;C:Z>=>c= %47}>I祽Xm)[>>->o;>8iXRP\=\=P>t>.f> >>z>A?m=J=$?U n>>m >s<#> N$JU=̽߅zE>`=H <<]k'>Bo=A}E>>u1b>S;X(M*4>?eyY[= ?V|>4n5.=g A>vD݊u><=e=pоn>ؔ>?O~7n=yfм@8>'3d^7 =~>V=g ,>'.>0W]lξ%>>)Ѿ=b=sB>dt9=H<o>]s;q|~,%+ြ>h> DHk T=ʰ?Ecs'_Б="[%={Q+?켉|"P=KrM>7^g>=-=#4=#>L>XH>F(<6q>s />%{=H>ʽIU>E=1k=Yx߼1=C6y*q><;>>L>S̽8=*㾛p=d ~B;ME>t>Q&=]O,ڦ>sYo>\ʼs<"vp>u=( 9cߓ=Jk;>:#0>#=Ey_P߲>}d=We|=ʽ߽&>d?H)æe c<=HS)>Vsb`>=< at> ] PG>5= ثLJ$Z=^ +<*?lr̾nM><]>j=̘L=m*SA>N1>Wξ˲; 8|=M(>>A>!O>J>ЌNk>(JľV>lJ>=G>I+>:oߏ=gXɽT2=[?֑ݞE2=|\=R9>ߠKy>x4>?<+>cZ[;ͦ-2O_ѽBhp=*:yz)= y=(Z=$x>Y>:i>f=}>L𡎾fN+K>{ᦖսľ) j>3=dͽXļ\^Q>:uT=A<[H=>Mu}[Gy=}W=+b>]8Ƚp~`=s}> bAܽ>R>?*ӽϑ;,? C=u,dA>`> H(>о >K=S‾Rq ?Mal>x><m=<=D>T!"t+=v=(>xJTCTO">6_cν=oѽZ% t4B>@S>B>q>7>l<(꼓Ļq>>zӧ>I& \!5C A`>r=5>+þϼ >Z8nb=<>a=2=J>>m6 >\.wٗ>'> >&IƁ?qi>(K b>>>vv}5]k?=:>v>yS=+s{@.=h=)R>rH,=>?Nj.>gN>)<}Z>]G¦޽~Y>7>@>Lg= A>=Nq=Tn4u$>M˙ :>4>MQCĕ>[;Ľ ھ l=;w} >;wGc>k<W(>S l==;h]->t=h>%b@W 6m=V5I>?uͽ\1ޓƽrSH=2l>V8>=)>9>=+?"= -DH̼9>߿=`/[>5 vkG>:ߦjE&>ht=ަ\>LG?m=={g>@F=D>]>d!z>Î>>&=7>>d?>3Y>ߺ>;n>ZG >V=V >-Ϋ>IO^]1x1OܽVe>}+;'=j#"/.>:<Ӱ&>T<< >>8Z<ک6輾.ӃLu Ov}n>=>13$۾p-3zs>r>qw=&>Q`&`܉>J=Yـ狾>v<>v=Q^·>Lớr?~+= >b1Ǿj`>L\>=29-w#>¼ٹ;ZNq>F<>=F`3>%~΂Ѿ=[E;G >F2i|(ʾÜv4'̽<>6iþFѿ}DH>ŏwB |xS=X`=t=A4!X>H=f>#>#Ƚ4ռ=[i>> =>=n<7>RU=aY>>Yx<ՕP= w>pVK+?->n&%x%s=>);;>->q<==wm>ϼᵾн><|L=!D>־ >>>/{Ӵ=rZ&K>e>(8%>]륽1i>/ż)=(\=8 n>h뚾Kپ:Ć=ajD>' >tBziX@<>& ;E>mh>3Q^&,zWu΂>A>{oN>$^D?aLޕK >)ݾ :{ N>^>0 >L=q=!횽p(&hStB>Y>"=">@=;"Xh>=)>:=Zֿ߽u>޽!Y=l>q,-Y~l>i=?w¾Ѿo~>j>9#>ぽs >"#ɽr=E;m0M->Ͼ=kXPN=O]=V >H>>?A=ڼ>>o'½g>=c ˽5A>8Wp$;5h>#ü/罤ȅWJ| 09>2>b>@>z4>FByrsY=0>r<>*>]҇k>GTZ=b >/>>+ ?|={4>I? +=>ҍ1M{ }8F7?q ^g+c>=IQ>/#"Ѕj&<뼈 >j{ƽC1Kg ;b c 6rZ>Ht=Z½z9k =DCv ? 4`.>d=0@$==vN>?ֽ[>ˆ>>>{T>O۽hơр>>e@>?];?J>W>n#Z>w>$gb =OɾɣS=!>'{Z=^1>hrQ>Jr?=ԖD]=h$<9~޷>IvPbZ>PξH=/`P3>qq,û=:厣-t71Y4D=ז_4/E}a=$Ϛ&>ݾzO>_E>^v<Х> >c&S>>==W?;o>?v7A>?xټglgS8!KB@=>>=W=$>C>a;E;/怽XH>yC(?/vr>DN> ?a>ؕZK=>M>?')۾ =Ͻᭃl=`>B>M=-;R=<>u>2P>^J>셽@@يo/;+W=LnA$M`rܽ>^<N=qA#>+ɾڥ>c4n= mi+4<ȔMT>t'(>g(>Qկ>\>>hi>-=>-6= e==K>)=VQ>%ξƨs Aø> =щr"67=yȜ;Ǚ>aоrcQh#> [ٽ]3~%>5Z<Qؤ7'>&hpR_>>+>.;>7Ė`R(='XMx0FAZ>{Ŕ> =ws>r1!>6>===;ۃ0D/>ԉl<\ q:>{>)>?*>(௾OQ>I~;>R=F&<ν3M =r99 T?{X۾> O5>O >u,>ļ>9C]%u>%->7cáLT>c>+u?>I\O>${"=k,>E <-ק===xL꾈ME ~> M񅲼R>{b[<`N>%>>ln >d~mzە= >tD[= Jd S>t o?8M=MQ{>xҬ:>Ct>,$I!׾G>G敻Pϐ<(^f<>=E=VK7=#=Ӽuy{?:[0h] >}&ҾKS=>Boνyu:=IUdS>-VB?q>BX>HS˽ҭPPV<T~",=˜>Z>? %>l2 YvjtC'?^Zn6@#38@=M4S>ֽ{>>fcm>P3;Ԁ>7>=5{>/V[>IA>#X\>ݼȴ%TM>(}O>kܾɽ]c<%g>gҔ=͝>p4><[[=ko~=iPOV=`,˽%(`ٽ<>= Cl>1&z+;<> >=5j[>.Xs>!2V=w3@>cU>A;o>98=p= *>>: >=>vԼ`3=>5M<ɽҽRC>( V?>sѼk>L>)=bHỚXXy>:G=c﹤>نM=x>cjѽ~U9Z>s >q=*ҽ3>j=D>>ݽ@L=~>a>cj#þ=6ÀyGm!x>[h9>0`>ah¾,A;v;=>%>;u7>=84>Qc@s=wpJ:=8>~>{>:J> ;]>}=U=j = =bXչ>&8m>tFω=a>Ϭ>-_"vCDA>B=4ռ '].Ⱦ׻{>kW>n )J^ >N>C`?Y>0>v?wb>bsW>1>>-uevX>m 998Z >M2<S=b>K>?'18W<؍ײ= ֠=d>K($_> ]U G>YlJ#<+=l:>Q=T=w S>=MI>9<+ >=U+mJF|>: =퐝ɾ=]ѽ{ >"=+>AB,?t?=>>

~DϾe*}wXժ˽uj>Օ<E>r>ZG9G:=ľ$⾫-;>P>F>T>ؾs)>iN> 5>>ӳkYӕGٽ^K8>z>T~b>sj ]-b>2[<@+M=679=@s̽EkJ?LM > I_>_yb!>3=Y?C&E3R=Krw񤽅>ZǾ26&1P>z">M>>K=<@>^{e9=>q=Kr=wD=>k[}<>5>F.><  室O=x>>74>B@;[, >,>^>?/ $?¨'=>>Tq zKx>_Bv[U>-/ɾ>g=,>d됰s>ic~,moI9ξep)i>&>l==6>Ocž罏< b>np;.y>û=e>l> l >  VʽK[0=-AO쯾~j8>l7>?d2<ڱ ?y><ĝ=!ƽraѾ9=A&~<Y> ٥`<͗>h:?ڸ<4>)$Ɍ>%R>_=:=>⎾$=f=g\м&a>c6e\xs:dgr0;=V>e=U n`=C_>SX>R0Y>@3>mr\* .>H{)=s> $5lzH>޽i;>t7=Sxe=F= =xETq)>"?3>>qn?z>l>L`='=$3>={>l@>Db-澤eZR>=wJ>`><8><ý>1r8߼>4OSh< >"mNX >*6q=AY=?U;= >Vk(=Σ!o߸>r0>ڽd[t#=y,Ҿ;>?FU>5??#iSJR=܈>S+?$6>IfU=[=w>Dw`>@νu>r=_uh:Ͼ-/׽F;=5́>v}ѻJ1=Wwth=}=>#M )>t>' =hUu>MȨx>đ"=‘ k@bپ >ez>Ns`뾍sX}~>PJ=(sw+C>!!=SBgý2>>Ӥ= u>4=9`UR}>H*>RQ۾,}>*3 +8>)Zv>m@>]k<*C9=Q{>ż=MV=X>c>W=u'>J=h3>>FK>~i>OC>;>=@d=3ͽAG&7=W= >&>i >d+>絽iZUs] >cn>=n >>~=T򐨽 D =/=]l<3d=G{н>L_==Or0t=x jQ# `>i>F>f!½0<`= O= >:=u=6== H>1{=" >\FbT>5#=\>i̾o>kw}=Gf>TʾF m qP4 3 x>hʺ;1 ,>3&S\>*>k>>_>@X st`=;Sd=^<+W{At1=k >Nŋ4"k>kb8 <ݾ~}J= =^>>0r>Rv޽ <}>7k¼3=o'{߽P==Z.?'9>t5 v,$>K?E `}X>:"LQ0 n=*m=.J>_7>{>G=b=+ =Ūp>?W +LN>gcHɽMꄪGT\=/|Kƽ!I=!t=OY =<6> ȾmKߺþ}=eH)HȁL>jWK>h=㖾U>ud;B5.%N>69$>{u!W1=:7x[B/=P==!Ծ>p8d=> >KY Ӽjh(*=zJ񽿙=-qX ߽=>j?=Dt0v>a>>{ S YNҽP4i;1=3uIS7>>>y>^M>C>=YzM|>?|^`mվ[/ C>!)>amqU*>>*=={;>.x>LC>>:Ǣq!=tłV*]߼*>5˺>E==QٽGlzھW>6LX"' {N=Ed>>P> .e=翲=!ҽ/L>Cs@>/ վ>⾘ > >]G"=WGZ>@=<۽qcO>'->׸r!dн?>H>L>@>V\ȽkD½AT<e =x<۟e>J?2ɡy=46t;^>=(u>D&`xVh] ?9ܽk!>%>h^{63׽mF8K>쀄> s} B>s%>M<QC>IEc>>}۾q??*s5t>M>^><<K<yq>w|!>E-`w=?kMx#f>,>.'>oA>$z>J?a>>n$? sƽpb_h\==tHyj4Ӄ}ѻfANWaO|=<'e=BYm>()`=s=>vH=.f x>> =D6>>@>iv= N<>"Ū>1Lx>u<Ѽ=3<žF`>J;= 05i+>xn>Avo> ܦF9`` ">Z bƚ̗S=.l>Fk>q>A/gP>$?1LL=*->7i>S> =Д;̣wG'$>F;Bv?8`>}>Gn>fݽ.4S>~ {=GTn>5I69>G>)L>݇>ƽv7:Q8>k@0>=;u>ݲ>> H[/۝>V;> >ݖ>B=>^N0 8YU=] o V=V= ̑=>C6> /]uf=K>}T~p=r>=>]b>eS>zƾB뾑kK=s>Ѱ.4R'>ܶ3;u%ھĂ>Z=>aߕ>澙> |>Чa=, z>m<E>Γ>^Xؾ,)e=ț>P<:is==%+8ML5HQ׼% T==@.>w{=ԾpB>2 O_ d(>ȽR>>kG=6r> >ɔѽSXk%P}x<=zSE.ݖ?>F->>2m>K{!S>=>>>NK>':Ƽ6٤<0i_= 4dkŕ>b>ۀ=BEC s>9"qԾ=>fL===>>T\@D];B 7>SϞR>4#?>-ؾvx #.H>=o[%3х>א>mO Ƶ=<D+=lX93=@<5g: ;#>J3h G }Z߽'&d=.6ľ">Iؽ">e>\>b=:p+A>(国B=TF=9>Vѽ;ئ>7;j->&vH?<:A>B>%Q=]Q?ݾM48惾_g>_e+ >ŽȽͼV=R>=bm>f>([דH>,>۽%:==Cۀ v)=fH>e>F=JJ-Lv>f>?uՎ<Í`!mzMZ+Bn G>įW>>z4&1>Tk=<(}vH׫ݾZłS[<BT>i ??* Dϼ)o=ft=Z}E> 6y>=f\Z=<"hYȧ>_;̽4l>^>#%Fh>I<4: =<>T>G=A>>>exSۼVm7 =T=)>+{䦴>½>!;%lb>8=4G><岱=‚>GDޚ}L?9>>=Ľ!b=n<=呾E?3=6>VY0>醍=㈾J>Qi>~y<#G">fH>8 d>DU=@}e=X< BS(=o$?"><>SDz> ?k0=b=jz=X==8 >gr1&ս^7p>T{>G%q68>q~Ufk:Rf>殾:}3 <9GM@=q)CW='о>ټ 2<+Ix=so>ES<TVU>К=EOG=V>+,ͷ]mU<`MZp>Gk'νJ|ػ8;ݞ,=ܓU>֟>kUWN>_6Ɯ<=Vnhm.>MR&|>,>e>4w>){D%]=֐<х4>Dl=po>:>m=ʼ|> :x> v+ד=>i~>ѢܽR(>m§e><,=g>$ ?uqnq>=fF5=dO ຾믽O=ܴ 0=,:ھ~S>՜Il=,ѾY/2u>=)ѽ5(==Fѽmo=>'>,S>><4Ҿ .>`ʾ=]J=<>1>Խ.>c0|>>JF7{>v<">T< <=ZR>6߾ ὈC= c>,0=O̹>()?S=cf>މ>J;B8?y>d/mF״p>}=>tw5=ʭ=jxQd]B=vF,x>nK=X<?(>Nq`f=9ǃ̓r=R=HP,={_TO9><1G 1">9=O:9 >ټ>اQ%S?ݡ>D$e=v,,/>=Vף<&="߽Me>=<7V>+ {? E@>BϽq2(>,?6=֜>rాb>.C=4𽟤Z>Ҷ߾6=w-A>:5> fE>c ? ѽ>t#=@ >8{nj==Q?ߒ04>="&thDWT)`e VE>Hy4P>A>#A>$>7!P>KjuƞFv>FZ~[K>M>>x9> =B=el UyLyӢ=] VL.?& `>92= Ӿ#F(Zu>5;Q`0>>o 3o>$R5T#05d>y< T>\>xT)KŽȪ>>)\=fIvID;;]"<3 ȇ>h>t&>۾Hھ}9>+> Dl>"a^ׂ7=ushj~==Kx=Q>l羫>)oG ?* Cp$j%1>1 hy|F8>PF x ?~8>vf۽};X>JpB=cW>Q>> \ @> >m&>P/Lq۽H>ԽEOBԾv=%>F[ryѾf}Sr>Z$g>z?AYU>IQ7=h>d`=(_=An9E%2>@>>]#>< a\)ѾW_ k>!1k)ē)=C,jG>?>/>j8ǁݾS<>:b>9n-2A>E>t;(%pU=>=@m =ע><ҵk{b̾ىlZCؾ e_=:<%Y)>g!>a`<>RDWtb=Xra=8v>9<>=x>횒>O ?^T\> >ǍQn>%y0=|>N>ԼS<ڿU>>z8b<LC# >s=R搋>ֽ\iG>pcN=b>+H=ƽ!֗$>m=M>F><ļξfj>b8> D>?e=a>X>^̦>K>X=r0P>k>9Aq>^G>>s=D >ػ׾5O]jo=D `>ٰd޽>$?+=IbN>i8?*'u= TH>>5=>6 G>0(]=[0>i-=/%=LiK$;x<"=|s_rv=)8=u>,>UmI>)<,=petb@pC>6? I> z=w0p =M>8]>D=>~G= <>3>&F=ͽ{YY̾Y =H=# >>b^0$z7,X\4>=?#8]=" &>L8>>UYbLIy=A?Ͼ;}=}5O>=(]?>T>B !a >T>ƾe=A>ț k6>-dS>(%.=a>(:Q {=ۺ>=>>kp; C Q 3վ{ԡ>|v=c>g>*0(Dqz|l-0>T'=N>w |>h)u>0>k>`{!>cb>?M->Ԙ>@ֽqV䉾ʵ>W=]w?Gv>j><☽=o̠>޴=8ퟵ=ҾM0R>"=o(aD;;?ܧ=X,>O?Mm՘:??!=㶽̵=K>チ٩=v`l>Y9i kX 1sp)Xvua>r>>?d>h>?>KKr>uS6!v=bҾcǾA>ӽ .=򜟾 >͍K=; ]>PP=׾n?>V>U:8j7hz@AM!Ϧ={>v >'{>l^)f=QN˾=$>p>=课j=QJ(>K 5!,hMy{)Uᘈ}0龤W>=EӞ>̬>6><>hePpľ1K&8L:1 C>vI3>1w`=m=V?s;V>wC^K>!+=u>",:x9m=!ah;y4T>aJ-҂=]+&J>T}=h>zk>V=9 ?=8=(o>r=$>¾"1>@=D־B>`Y/>+a=w>ཅ)>m^0>۶>]>?Lr:1qȬ=`<>6>$>ٟŶ佦/ ;~s> >}0.[< d>I< >- '7aZRTs>Zy>u?!쪾5B>I>->6$L>ag9t%*~>>{dZ!?D6]'>>8G >5JRAѽ_=> t=㋯=8>Nߋ5*>Bc>R\p;=rH=7[>bJ>I}f奾T+m^k?T+>Qx>+7=W#Zy4׺>>¾(H=4>= " ?>A=o!d?=L>W̼Q H>]">se۽)>.>xb:uр9>4>N~=>|Y8>(zp>>XvI>I*8m='k8>8>!2>G=>S![ۇ m >߆@!Љ<>BR?==1r=JU>Ⱦ$"> 7h(w>i=Bڼ[=p>r0>ު>/־Ķr뽞d">.wnR}#˾.>+X= >R(>Y>T">(J>x>=E=j!\Z>=U=op~> w9_֗l=>[=>-T>yq`(߰;#Y= _>0>U>Y >a W> >S+v>cĒ<:.=J>>"=BrR>%2>򸓽F>>m>F>Z->=9=>;eٛ羠>a&0 =:}g[}>2==>8,>= k;@><>~χ'<=a!>I>=43lx=1ZJ>Ҷ= SdT.=3 >gwn<>-i>虠>-%P"º=N=>>jG>4'3>='>?*?=G>?>/8t>"{R>ྈ, > |gKK>W;E= -==/^9>㽢ǔ9>'õ>A.gi_:>ho=y>->+'W=aJ>ŗ¾4ݽЙF<(eg>El>0=>KT=.x ZI>^#0F=Ojl4>AL>S/k>٦=90>n<;E=O?_9C>vLN>;rh>ᬓs{>"n"= [־\>6H۾=>yM>Vh>gizJxZ=, ?{>H>;>>D=\t:>]=M<>}hRp>< =>_|(2%>\>SXY]y>$<#=")>=4=ҝ=z_>=gC>} j>:1=-F> Xf@ɽ>;VԽڹʾ0f>2>==^>$u="|1<>c)v $bb>=mE|='ښ>5ر>2>u导}=>==EEc(}=0L1!̽Np3ؾ8ݻZ >Y >߾>O.>&> nW\e;VM|]>M2 >=dw>}=Dc>:s!>=Hygh=IGh#%ŵ_>C=ѣ:v3P>a^>3=3j>P=kИ>mYm=]>ǣ;ZT=+~=ֽ?c<,4hO>jG>uAk">|᤾6I>㹼= ynz>y= R#<>8>^=@?@>{a0>%V= x>=-E">0<=DZ>U?kBD_>9=6ZghIþ<Lw>hk>:$<% Y>=ˇAR>O߼!>sB\d># _>>("EfFnO#=ιʽplo$F>I9e~=>}exv?A>z>A?\=m :>˼+ >2h>>}Sq=#^2tɆiEq>R$¾\`,XF,$>Jm!>X%>gvp>&k(=tgnV>k`SٽO=h;= > .`<>q<X Ρw=Ө=Y:s?Xu< {r =>خ=Tb>๽}=%¾(׽=}xJ>v<=\CQ< tv{w9Ӫ=8t$=)EȴD>` IܽvWUA>m=FW<-> |>0n>nG~>^> (|],݁<(=$v>uޮ=S>ս]?x>y?%<'dQ>m/r:W>>~S= 9ھ,>*{=TľgW>J _>3E>Q( ڎѾ$-+YFG>>uGx=gY;OO>(rP>y>>' <"Ҭ>+ <IR>jiU>Zciٓ>4:2t<v>t{Y֋=¼>f>ϼ >ʂS4>#>'ѱtl:d<¡>lc8k=1pL#==!U=jp, >@Qk>S>l>yI-6X\Ⱦ'~=>Hŭ=ލhe=v<“>rk<ƽx.ʐ>> y>^{4> Bm>՗־>w]=PWw?;:4Wxce>)h>EZ)=?, ;owkId>tķrRQsEL6=g`e=r=s>>R̽=,>HB=`*Qc}>Q+*۠ЉнOcF, >ǽKq=$R@ ?O>2qE>_];'Ö+R>2=q=궽u=OKi=8=4|">@)=A7>W] >J ?|m>O"a=~=ۏ=_/=7{,ĽB5=V -P=rgj$L>>zվ/3̽X>l">#e7===9|˽#R J(~Fxкd>=Rng>+ >;gZP'>>:p>\>(Z>X>=55 =[>HL>%Z~=`>v=8=!= ;>+7QqlJ>>bX'x>~;v4>>`TD=l;g >s$=|vx>meH>lQ࢛Q^վ]e=DzYB½G=r>o)νCվ*#+=;>6ﰺۧmʒU*S^?> >F=M>FN3;|;[&:>_S>i=c>,> \* h>)=1bH>2&e,t=V!y>a ޤ_漍l-}Y>x>Z|~=Đ>ىw>U=؆i=>>='2T>἟>=m>AK]j>|>daH->W$ܽ)vϯA<>y=tD@cpH>u:">]>l>!;>DN[¤>#f=R+*P =-Ľޑd=((v=@e$>"ܾX=L(׾$d=n> I۽uq=sR%ӽN=$-bpi)S>^u)z>zAx$>y>uO>]P=>ˎ==ƛa^J>'f>>>V>=!B"j>е޾*=H>w0>T<=aZ#>yT>T=K=zq?V&>f <>>I㡼#䆽W=Gw>H==U=Oű>F;x&2˜T>WQ>==Ͱ1[=zg>$K:>i-9aT>Ű=>9t>nؾOf;=&y0>z+Uʻl >;0Bؽ<+ª>Rt>7c=W>B>MouAPNO>)]5=&> K0>#%%;=Cr="2a=r4E>b(==2ܻvAb2>S;M̮axj=;"h=DP=T<_=m#Sփ>b=ڼ>"# =VP>J >5,QA =,>J=r*ӽ=qX>eq N7`>pG= =?=`"q"< {> }Cu"1ouBbL>?=o=Z7"V>׎E>Žs=3b=ni?>bM=< S=tl'F=>̲=C9=zb>=b {IaǷ>нBn?>89܅W>qPL #=a X]Ε=I >0ľ>N=c>Dn̾o>z=yd>F65>*>x%>e*=+yUd+,\\>2bHL d>E5@\n>zY>= +dfpB |t߽,= >J/>6Ǽs.nD5> >n>'7=,>q=!|ĤE@Go=i=nr>cA>ŧ>]w?'>+_=y>qӊ>zk>£]>f>LP>}l({Y>N>Dօ r-o3>ܾ8(?>k>+ŽQ=@rq>6 L=:6b?=pk=ޘֽhpj^B[= V(@>o$=4H,zF > "T@%t >@ヽƣ>S9>d~ ?);&=#>uS<|ynҸqyҴ9o9( =->94 >mH>Y ?k>^H)>|DP> />cҧ =CH-k򦽔(>C+z<={A=ݓڽ2rx+Q2ֽ'cQ>d=D?=SԾn7If>m>*Y{C>3p_;>ߤؽ:پJZZVSM>+ɽ=>95½t kyOY<>ߡ>e=B=I$A&QV>Y\>.Խ ҁ* =>.>HQf+(>qO?'==n=>;M>v=H>y3P=z1>[<>=i=[qZ>q{W0t=ܝ+=SUĆ>;p^>: S?>g>␗=(0.=C= >F0=ق;c'[}=OYbI>~/>&>$ >yakQa> ?:W=),=<}>>YU1> xl*>Sps> _>a>(p>!!X^l>>jüg z=IJ彡&^> >w>Orx{=1ľsJ>w>ទn?k"=4ʸ~3<]>0TtEb<ژP>^<;@==Tx+;> {c 7G$(>a={ʟq=pcOfz<t_P1>O>sؾ̵#>U =LƽݤyR4F[%===[dоo<1U6hWݾi!2<'T>﬽uE<+E>$B0z=J5=\=Gf[6@`>f_c>}>> =>0􍾕{=ywFT>i>~ս36?:'E7; >;>>H>̑K̽0NzMvST.ۖ=QC=T6>H6>=>0N>F>Nm0s= "=!/0?h-<ϊ<-=Hf>Cōڂ> \jq>`9>v_>%<ʁ3B>>=^>F(Ĵ >*>j=:5>-þq>eY= Hs:0Rz>c<>@;ѽmT=>uUW=󊶾-U`?K5cnX-,0>\!>An ?:5bgi>=ps>l9s>`y<):Zb>?HV>&>=>>Itb>ut>'P=޿>/\}>5+@=2H;5->>>R>DWx=:=u?%> =E>u=ޱ=ýrXN>;lht<>^ع@Uʽf";WY/<=>J=-x>}\>Wc >9ɗ+,>0= yax=E<0v3= [,>u̦Q>>޽n%=`?4=;p>J%+/>$?" =TX0>R:PF>C<`Rlu<.YԾC==$@="G|>g2 t>x˖>r=@:>\.>=Ͻtcr2>lڍԹ#|>Ȑw&&^>G{=L=ڍ;;=,`>oX`=i<{4>ϼփֽ)>̾)y#ŗH=j>Vj>/UG=+{S>z bN?Q7>s?$>ɶ>My>1gS5O==C>i>`b8s>"d`,=J>͈۾<=vҟ=Ե:˱Hdͫp.s>!:p=ŋ=b5>xƾ4=}>Ϥ=V,>@.i>@ǼS#=24􃽣O)=q<^> 6>qJ7n>̻=<=}ϡL>ԡ9e+IZ=LjȾCa8>}>]^jU>Lսon]q%H=;5>>?=W 2.ɢ<7Y>ĘI<><>~?Mh!>=S>4,=p =zΆ݁о*x>A!L>nx="y)]>௾D%/>*?K2_aAUG&G?D͸>EOɽX>RB;Mt0SW={&O(y=$>?lY<|5y< ־Ī; L>P l>SyG >vhk=9y>qXe5e>Yn~C>t>gՠY 񏽿ੲ>]t>9= ξx#xjt<_:>46=j`7>XNT=^=ArI>@1カɐ>I=q>z^Rd=M>럾v>y=>`ꪾR<q,)>Rн=)b>=Җ<f[=>x+Zt>8X >mؼXB>u4=6- >n=${½T><z =g>VU=x=Q,>Z?Uͽ"$3j>(qo>. L>ɾ,!>)I >,>r>o>yB>>&k4$f{>n>G>G>imW!"> =ai왋B緾X==>Kj*>Hx ?]DՐ=pi>ꢋ 1uqk񑿽,ʭ>:\Ͼ?P>>Vw>X֬{[U:>ȝ{k;拱=R=r =S<)HWO|_8x=+:Bւ>w5.>bXd>>t>)>O=K= F{=9[DX{=s1>+ U>?>]T>f>|[g< > 2^V>(5HU;=l+>>G=V>$(+8,;8c%.ȼO==u>a1?žC-Ҿw92|;?!> ? =b=*Y;A(>[aY=1y"~ľ1L\>\0?!>>Yԏ'ɸoe >lo,>>" >C>Zü'<3M=l=t썾< ?7 %r䀱p2>սȅ=m]= `&>'=-L>\ ޽#Nx>='O>$J=pQ/^.dL_RPR!>6 I=Z9'ж7> <=>=݅>ʳ>+oh><># ] >b=J=Y/>>)@E>>c6=/Q>=ݙ=s=0`[=T;wXg=>0(>"P>>:l0>> s>с1=idpXӞWe>qMCc"/)?g_{=szM=𾤢۽@=Oy\>n>д&Vdf9 +w">s! K=h3=~<M>#'=NԽ9_>nWqjF=yмd<Yz\M]Q寮>*7>x8ǽ;?;s=;=vhD=]>g >f >Hm>A?>OF>n D_ `=>>?;ȷ>#(>e'I>>~^kL;G>p>Ɔ=lN?>=FB>8>Fix'?X= ̀c=>S0>[ca|YBl;#C4<;X&>.>}>g = =OU>% A>>/uR>Z#>e8=Qt>6Qzk>f>W`>)=l>n$Gw,=`%tS>LwA.F W=µoȺ;Z,=9>[B>Q4=>fuǰc+U1" ce>iվ>>轹=Cj=(=1;ּ؜j>H?8\ =z>] 뽛W>"u=s=t՚={7j˾]>ݍ)c=K;H&Y˽˞dC?6=MڽJ>"=>x6]>I=BF0>6=>?@-kA4=~F><*h=m )@BY2?^4 >MFJitr =Ǿ!N>0?ҋ=$ ?K#?;݇J>F>D>>i;E >D> u׾^{倾41G) <==9Η:b>7?= mb>^< S=bR>P>6x>t<9>5#*ws۾n7=l=;ž y½=ah,=>LRav==q)٘/>Hܾ꠽b=.OhՁW>g>NO>>_ ̬޽ˮg<]׾>L=أ>ڈ6֩_>O>{>@Mt}~\>b6=(:7˽缍59,>s ̾&?=YN=D>7=0LS>.>Cbw=I>{y6J=]f`=fg >l>:U$>ξ*>h>rj<>)x=k7e(!>dk={<'=ސ=tt>>={M>Q>o" g\<-|>X]=>Ͼ=:=QW=J>v=y,=`H->i8;|q=@= oo!=t^8Qɾh lcsbK>>7Y퀾⣼y0=td=k=ª=2@>ꍾu~'ڊgz=һz]9?wNG*s +6>>;% r><>2J>W#=flӾV ?>|>A\= U=Ft>c &0?>h>4ymRkHξ7>pvK<=+ޅDN=um+Hѽ=l<b >iU$>AJ>z̽9>*b=<=ESG>>]?Ύ> =to>0>ts辑2w>>dm<٣ >>$mu>@d=pw۽M>Ռ>ݬ=>d;.>K&ڽ?s)K ?B>AS|1>uĈ=+ɆhSʼ,>>{|]>^>oZ> =8>m=+xWE$ P ~=9e>>t 'nIdl>@ֽ]<>e=¸>F< u? 1>OB=E>p !dT=nD==Csނ?u>҆hq> >t>r)<v>9,* N": r>4>>B=@,=I> =sXU=B̾o?DGW$ͼC=hyc>1G Y>h?A2A=b;u>?8Vt>>`c>`Q =?B$4]-=>ep1\\`>Ld>g=٤> -sZ'>1½?%>3h>z= =y5= =H>灾](q@&>᧳K7Ӿvel"=P?O<Z=>N=>GR>َ:;=}jG>7= x>:mɼBg>/>=A>NCE>5dҾ'>!==(=G>V>傼>p>MY>-=>޽6-=%.پaui#<ʣͱ=#i;ޟ-D>;7~$U= W0=Cr[6>eMuW75Z#c3Ce=u;e==j@n>5n?X\<=&=C> b>sm>{Ҽq⼇ZV>kqLSк>w/%NNCMD1>&~e=y~Vs9~{=\c{>/<]>{)<>ّ!&S< ˼1o3>ܽi,==?־xR>o>Z;=kܜ>Qk=z֏e>rL z=)Լ{/><tW!=Uuy8>>JJg>l=,B>ƄjLO;jvDE>(E1gӺ>FLA($;RlS>k}= =Z6>ȼ&ߛә>/Z><2=eK s>X8=յ>=E޽Tu>4â`U>By]˻>~ $9@>E=a^fw=1ؽp8=\ =}>cڽM1T>ɍ=@ @>>>m=ẽζ=)?-Bg H=K6>O>(=z=>ӣ>* {׽Q=dђ3>eu>F~KS׌>w'>_m!> c>o$>'>s*0>=>U>Y-=~~WQd=@¾D= qƾ *Fv>LP?O?'خ6$sо U'u>2`~>f?C\.n=<>Vh/=%ʅZߔ=?bfuh> >= >oa> =u><An>x>>^F> &>o>ƾ5>T9>o A-N.ؽ)>޺=n>[xe'?"/?9gm<-Ƕ=Q9p>u䑓6G >n`=D>N=ᮾ>e$S8>!<3+= -%2h1#Z=\>֫=1 >8yhDʎ>>]=!=򞮾a˾2> :B}>:S{ɚ>2k0&< =%>~>IS,kr> +?`At ̽= <B L >Ѽ@:{.=L=;t>pk>7.>e>w/z>Q>D%!r>xD"Zh`>=>65oG$#=t=XU~`2e>3>/><4r>un>4h>)>%ܕ>]>-f=J>;e>nT=4ǃ)Ok>X8X>d=^Oܾ4>](y0 z>݄Sh=P*">S>i'=_a>S.n/& |z>\>>P*>i# ;cNPNEK=w<[)=)phi >$=͗=н!+>a>#䫾w+ <<IF>*ƼYQ>C=S{ f.>WbJ;3' )O?wYQr bgC>bv=> $9U>SE=/.1dCn[+>F;Bc/k>XS6~[<_gNk P46>=sT$=jh>?\oɈWpX 1>>j>q>ӽ{CzJ}3>0K_m>">J92>ͫ;>^ , >ؿ=Q>ۆ>}Ҽ=h>ǽ {=|־ }W%=.8}P$ >Sh>A>v2>μ츍=$>0>x3q=>Ք!"=<>>n@)=w7>d`@/>E}>>=/5q̾Il P?Z= >+>6p>6>4Cc~F?G>,DG(0>x>Jw#;e>B)>>i >%^>ְO=B ԰u>b_߾2>7,a=T㻌]]=c)È>½nJ> r=T8d=—IG%S> >@D>r??>Ce=u=rU>-I9c2=q>>kI>רd=\ u<=+? ־=+G)=cL=˳=>0>h24a=$0r9Sj;}u>NH=&4FnV>=#7#=>6i>>K,=6d痾!x>NŌ>J0z !Hxfɽf$>g >N9WO>+`>حe€=">a`=&LHn>=>#>Z=<>$=hj>y">]:=A>̑>>Β@Ur Q~!:}G1+,ebt|=>}.DYؼ!O>+s>C! 0 yNϽ6*H=2 {=p>Yֽ->S>2bF>nֽi~&;=c^AGJ>@=e>x`ٽ!h>g@N>辏ޏ=L >0=Js<=Ǣc=Fl>\=ȼ>>Ⱦ>۾;w>dA:I=QnA?dO>>'pH5!==4Ɍ>@=tM 2B${'>bbf=%>,>=Q>u_>>(J4;(>D>> =U1>뀼=r=. սA6>.>IJ==/ t[=8~=\>>>q.=-V!tx=|Hb=Fam%"=J.>_>v>m>Ǿ>8$@=KE>C@>=|5o>M1?/S~W7-LA>.'?ԤB>%;>M>n>yx>q k<&o>H>wy!H>{b>5> ?>R)?k><+=ٸ]>T!M>Oоoݽ/>4>z=н֑= gX<;e>>=5E=7)w =>Y4¾3.ӽ3>u׽>o=߮>u>Doj@>Pl=~ݾ])u-p;oR_2<.~>@>31`>.>)ޅu>/>@㗽q>>gӽ>C>=75g="=>xI=>> 6 @Cit11>>{&>em< Ƹ=޽sw_'^L>\>"v>2P>p:#>ėQE>0'>=>=L뺽#%=&zk>G#>j>/=wo:.#G=[>J>$%4[>C>,,>bEb>Wg=& >+<ü !ZVӽږ˽]:VKǽ ɬ=dؖE=y2![0>^ľZ+=ɲ>"}>#Q'?ş=@)<{st>lo>*d=Ѹ 9-Q<B+l=& =({r P2ͱ>TXu52W=Wm >3t>,x,.b=%.(=SpS.޼*@>!OʽS}C>w1~=\>>>>>'!:>p>XʋMB.w=0<==P软TܾQ崠>L=y;=A-&T>f=ťJvh>% t⽙w`9gWa"<׷!<.l=0= >ݛN>>@>K>A]>2v<~><=;>O̟*<&3>Hߜ&cb> %輿>='ؾ-=xH̉>CV>>,>u. =xd>ڱ;c;\vȡ=ßdܾn]>%>d=\=R}Ѿ0g<fC>==' ,Na xQz.o^<&A>u>ڽL=Q˴M(*( >!>>>H璾;>#>,[<^>佢>bg>m==$YPbnc 6Q0m=p=O ȓ>+ >v9`Ŷ>z'>čQju\t?>Uj={=^=|fY˾Oؓ>7'= ʼ=sýv>1C5>Y=k>Lj}N(]=li|<\ V =T=>۽pv>7: <>h;֣B<=H#;9gn[=W=e#=cB0Es><~E;>z7>?hLƒqU=,>=>:=-={n=>u@ciV>ND> E>qB&<>?L;x=>Lz > Ylyvj>a?47=Q>$<:鰾m> =v+@>=|=32>4Tn=d?>Jd%=-ėDD>fvS=;<I=g>u>#=¾J(}1#>ȹw>xk p> O]>'b6k>(m0>GlJj=>tD>{Kཻ,>tp./=9WT>l۾z5><)9>u>r=s>=>44=Q]n><>Hf>t=Tm<̽d>7=w=>c> =Fׂ>GԽ=?%={;><J>-=X>CmV1=w*ٻTdýPҔ> ܾMg>zb~׽uֽT>K>=HSE51>5đK&3b>wm >*+N>=z{}z=d_"˽>>g-B>gU#Y4C]3<:Cv>>]>G>g_>uv<آK`bJ՝=f>@)>f>gʀ(=軗=vͽp]>=+р>.C]lRp=&=v>r=>Zj>i>)uAPi1>^>(=Ob>XĆ>U>=!p=s'> bwFԾ9)<` 5@>{?c=-0=GX>ZQ2`9=NgJнs;>=)_[]X , _>ԵqvLm0, >⽌u2`0? VO_s}#+= ׻= >A;c>KQ=I>цƾ>o]<䧝>B^M>i> >ZO9>.|%B!pMA ï>>kO>>=5><0Qֽ/1= !g>ˏ]?U=,G"E>~>>}t>>Ҿ+GȤTI>0"Z`-=I>1tlP>Y#L >()= OϮٽ=/fH<V>NE"}w_ʽP>sh.==ǽ>o[ 9F>41 TF3\T=P*2c'댾8˴fd>z+>=ٽPg\>Y;>~P=4ѧ=W= hp >I՘S=`MSS] OZu؃>~Bӽz/>MҮ>l xy>8y/=T}=<>=Mi[>Fݟ>2>]=o#;N=^T=/8>9>pJ&<ǜmƼ=z2q7,aR=oW=~Z>4="W􍮾!ݽ->T[r> M>?S>4P=X,>v>N?_Z>w=ce>>k>h?Ei>nü=,,>ķ˱h)Bk  sr_ ~=LsgH/;%>g>۽H<{Ҍ>=kw>>==Ծ.;—<<qOg("'M>x7>R]>z<%>>L쭾,>>lh>ʾvB==o>1L>־=졙Q^;4Ңmhr^"ݳ>K>e>Oo>2]G^j> >8X;>'Ǖ=oz>L3&b=eN> D>|h>:?t=ƾ>D>>B?j5=7>*oD=D=hӽSuM=>Q}=+`ǽ7G>(ֽR=炽N ýG=?&==Z=1 <gu>CS!W=o?=(͡=># T=zRfdӄM=bXIEؽe/$>QB=O`nwi7 >͍a>m[ >2e@E=ż_//N=mǾF<\FTџ>QC=.Zɥ>]s۽+Κ>">$"NrFؽC@<>WR=g=}=l=;:==pO$>☾/=D@>sEcikx+^4YgE½ͅ x ><>!C>gi;MM=^=F2>$)ϕ:er[{><>_=64]՜>?@Y>=oM,A>$]qL̽WEo`]>w>;3>3ԗ=c>I>^&=v,4>T ½׽Ƚ`E>E;Fo8;=9>R-=z㦽P>&C=A=DK<CwӎpB=΂fX >Q ;>f3C>%Y=ݩ0>mUt&>E8=H0>s> :>Sh=4>Nk=pRz=||׽ `=v>{s˽Jō*p>n>a<=hl2D;M'>(0>,>>Ⱦ̉M>>a=ad=p>`>žS>;TP^-H hG%#>"'>ݤ(>-(ǽ8>UG+>=8 h87"[Z&=}>?8=??n>Ն=ܝʾEc><{=AY&r>G=-ݾ 뾾j>l&=zY?m?)+E>w [==t_@e ?rI x>G^}SĀ_E> L7->" =oB? A&tRƾk~>LwŽ)]ǽ*׽`y>>$>C>J>~>q4~JFu`ZB9>Ҋ=+=>F>G>>>R ={p>r塽[{V=A8><>0ξC==0#>#>R>^>A uPOgӲ=O(BP=i<P+=FϽ>`P+>D"ޱ=*7ɛ>ͨ&?nx=>̘>>4IJ;M.^yG=l>= Mb>[;#>tH| =,oQ6㪼[6?ê=,<'@j‡><=Ē>@=>=iE;=,moTƇ>S>[5 C=L?XvO>:%Xr>Sy+>X=ԡ>rT{>.,:B>噾f;Rp_P=_v>>>M=D<9=?>B<b.=>f>LKrE>/=߅>ix%@>-"<*}є>1=">rBx/'=S>>w=>">_>Փ==U F?>`>?ɰ> vѼɐ>ި=!>ZȾJ_(f===Js;>b>f>= ^=:^>0>!>P?`! C> >'>'ȕ=`辳-Ok>i>l=hu>Z>ox);$d=>KeԽGꚽI5 J>Ę/=T=Q=kAfX=/L>~<="XĢBhTW>>5,ھ?p>x!>=|=a >dڡ=J=؁\<>yC>G];7>rej9!6=IIMR>/>*P="?;bm= I־(̾M>&vwg=Hɂ~ a(>@$eS1elK78<>WR>0?@ &?a=1<=9m=N =dka;>0r=IlF=;>ݍDy5Ya>> g>=[ x>S<n>?YWn=jAde>] >~ֿ>DǥBЍ">>H>>h=Z׾ӼO?sy ?=!>!>GWQȊ+=(6>4>fzh>*>I<Ӿ>a=!ľӒ>44}>ˠ#H2>ы<>o:>S%x4N9٬=[>@ŗ׾b=)uSnz3^(ɼ>/ͯ7=r%He=WP >2RY|=r.>#+s>I#/%A=>sM>c{>, ~=`E= a`|8^&>ྣW >6?;K=r!꾎”p>=j?ݼ =x]>ξ,郾&mR my=*L>>oFZ=a>?Ƌ;^WN< ?)>(Y>LC> >*(ĽQ3>KEM>[=Dt>=9oh>3%K>Pb" m >D,=n|d19>ra /Ǿ ?8>w` l>Yン=>ؽwa/<¢ i̷>6 t >7j; ="m>b,t'r>izo}F>>0ξ%>3r?=ヾVJ0>ֽ/>8s>LҾ0򇾕J=8ʾF1j}>ݠj=@>v<<c=~<#W=%p>;B|DF=(~>B]W>iU$6=ö>Ώ1P I>Dj>X1 ?wGc>V>8<=C?j> ,2*@c={rs>C|>Ȏ>ӎ>1Nν'y9$l>QAb9o;>b>d;?=o>VK=<y:w"}1pz1X>^KʾztQjIg[-bm>4YB3>t=Z >a=k'?j>򁼽D,>Ñ=fھa@ƾ3=/:He>tX<_܂=Mxq4<]=>}>-sL>4> 5>U<>=;@=7 w/>94>CJ$zw> |>~>IB>ڑU =\"?۫;?A>jI=Sڗ>?<&>>bs>p=hu5;~>J@ ?O> y=2}>FL Jƽw8==1?>b{>H=mW>>eΛ> =<-,i*͕>ܾ">X齑-Ҵ#>WC.DO@>B=+>x߽TۘþybvRO@[}}O^=?y>1!y*> = > 9>t==>$>>&+>7;f>QY>qd)L]/qVP=x'E>>>WNʽ)>u">?H¾۾Ӿ)j<>Dut=.*>=21>=A\\>[Ϝ>HLj٬^v>3}=9b>^t>$Ͻ>}z=G>0vA><~]<-&=4=}+}>*=;>^-A>VRX4>\=gך\ȼ!>5B "A>=`=W>>Mp> <,k=679>ugZ>68S$3>sy=rݷpr*> ;U>i>$>ɐd>;@0>䇾Ŏ>ػ=Q=痨5_>E*$:<.?<վ: u>*>k;ƾY>3):=Gg>˼~p@u=Gl=>uU`Ds>?]Kl=oE>}R]>>F=I-cs:ӗpZ*(=|~ G>c:e 뼹w===/&?Y~.!-?=R#'>>>45+5d= zڼ!IPj>4u0=P>Dx?WƑ>=8>ʀMm=&+iw >E͉jʽAj3<ûJ>E>E׽enq}CCD=4D=kᤅ>e]8icjU:=V=>~c)>|#=! =7^>L7=fgBh>{f>>vƦ¾L} w>B>6>IUP>3g7:<Ⱦ>$7>U>s>=uh'>?׽C>/?!TQqh#?;B?>R\>?e= =G>w*@k2>>px>$ > N>k) g#p32 *5>PXF>V_nOn|CZs>m> 6 -o=»:'>?Fj>g :>Ī>J<=hrF><>Ǘ:=z=gH>5>"W>ގ)4=\?W*]w\>nM¾qF& ҽ06>qt>vz=3W<v<58=jҾ(lS>Mh>J?Mj<9i ~>R_>t߾ 7/ >*+#G]=RTp=>̫p>Ӿ65:-e>]> =9'ھ'1-=d >\D>p}>'>ᏚQw>\=eh>+慨s4w> ?$4In=Hk=U7<`x6T?{/@bV}?m>=(>Dd^19>QnRע|1>>u;<ͼQ4n>X>"=G>`uǽk>>hy=n&>Ρ ?b\UOϽE5>0=F@>c񐽒L>c)>n^>UkL>WK(?)$V>>qND>a> Z[>󸔾6> >&6<-4! PR(p>Lt'? K=z3^Rs# =Y>>`(==L>s=7c?.E=0p3!>Ј> ,_4+h3QƋm\>Ծ֏RU,Gq>H Tb⾾R=dǨ<> =l2R>= ͽKέ)P~O$?ؾV?_5>,+tˡ>5⽫^8>HJ5j=&B9@ ȽKܱ>x>4AeQ>}|“> *==Ç<>=`|>$?,F=m(EOR>:!r}=+$>;V˒>[Sy%ο< C>ޫR>54>y+>_!>?-(y]=ٽμ=9b==[=u7=䅾V,9=bwn=>߃>g>EP?">J=ξU>Zc2VYV>e~6>>Jz&Wy'>Zq>횥:P&>W=FrII=f"|=Sq$<< >=R.)&s=8.X19=>zȃוp(>Y)~>9>=l>dH^f~'(=M-Ͻ@>Ok>/cڂ޾v=i)]Ɵ==>|==]u=ҧ=y5>=H=\6^N>9<>&j>1\>p߽sǽ@=3AF=I=<k=1a=V?m>>nR=K | i>%<>l'>ԸPPy= ӽŮ=nt =nh=%?kMmU>(>3==—>X>2*g=y̽2쾡{qy=r`t=wL8 ҕ=js =| ,>sI=V`kCv=jfy>>biQ=AZ>i=%P>0|>M<٘>J`hS>^P=Q2vCe >8>s5>0<^> K3ZD>y꡼ t=^>cIdZ>h>2mg=׌P%=L8?ƨ=r `Kɮ>> >!Z>V>[S% ż?$=?b=q>G=rуii?=> b7C~>٤b>_ø>cإaSQ>23>hֻ8>[\>A9lɪr5FKW%^_=)R>Ԡ = ">g?b="=[ƾY=ȧlXn=)$ :  꼌>Y =t>¾b >s<>">R=>}= ݍOZ>潋t>ro>=:=T7> ?=V>⡽> ?I5=&%>%>u>ks8rD)f9i=b ">q>6=ὗ=3=͊ >` ==,>,;DŽG=>L^tU<&/n/lҽ$/'>贸>[츗=ZF]e= <C>S[3>aW2?;<|>Q>8=J=y>>:6=]>8=Hne1<=iʃ>/\?7$>8Ã>52>A~I=1 "r>>y>C>=#E˼=`=-S<~&>M>q򜾖@#1#u=/msC=b;>UUއ>Sli>m/>{>===>J&=; -p'=i<:{b\>4\]>6]>jPe=Edt>3>UC`=24z>TW :~=<·6W6J>W|u=ؙQO<+򽓗C;ڊ>D=ԉoDu/?:׽ɹ>i4>&L>&d>&W=<.=ʃ@><0ݺ7#>Rq>C=`UaXe>">|0>ž>mGk ĝ%Oɽݾ/8?g=b쾸c>ܾXs~U=}(=J>n=/.6u?->Nקď|t71LOھ+{>w&Ȁ@= M;/0> >s >;x>k=Q[]W3_cw>=n?>N=5x>)ѾYz>z fҾ#>9 2\#m  wS?ްP/iF=L{>V˽^5XU= 7Tt>пNj=>?*>˪*堾>;w= <>= D>j=hy=(=|L)>[ A =G_=nl>x$z֥=r>(@̾aЎ=89pC&ë>ۀ?s>T=>>Rv^UPC== B HE>}!m|H>섀vP=Y=l?ʲ==w߾T<;3;Nb4>>n=G5j>_ Dn}rg>08υ=:E>[9Hb=|s+ɄX;>NI>> =s$f=ڒ>=E8Ek$qAT,ؤ=p%n!;뾁z3= >$>7L<7(>px=sξZ%r= >y =7S=T=6d>A?>~>q-:1>>=XgA>uS1W[=z<6>):>=p=>z>ZM45DW>6c>n9뚂>Ւw>3ܫS=~*7v>M;ם> W=3=4&dT0>࿹RH >M!d=BiL>E>Q&?ПDb<)޾vӽzϷ(=NW&۽:SVlqꝼ>e>ׇ<޾>>Gֈ=Y;Hҽ D>0>Ʃ>E=nKo>.I ؽQ`=7)MR>4ns7>p>@86>_>0K5a[=ڱo1A>~Hi>k'>{ =h02 'L>;>a%>>xǢ>4;=r>pP?>׉z>>J>m;1N=Q L=&>ڹ={`=>G,۾? =w>3Ⱦ^>iKrk>/Jc.-]R=ɝ ?%\m=n̾ig>̾m{3?=ۀ=rT=tC:Լ>N>B}>7;= _H<~1><ߞA==(:nF= ֣*>P/>Ƕ'=#1>#ghK"=oCbO>9}==K웾j(8>~{>WDS>4\Ǿ]->?z_>@a=&ໝ4T*=!d==F>o'Q©x-.tk '>j=Q= J>W>9>־B6>BFVZܸ>Gq i=/ͽVD=Q]k5>v7=F#a>=>>^>^L6M4n<*;R=->ؼj@let>2=C>=3 qgs>XKzT >哽T=4ͽTfbLO=N~>7u=:65?WLf>3.=Ϊ~=J_,4ٽL_oy=l"3>=&츾J=6oXf=%|>,Eɽ?2u>|6fD]=#3CV><Μ' X>ԾJ>4,>`>>Z>5>=/z %pNٽ4G=]Y>K> =7M>'{W)~=3=*Qy=<8Ͻ=:bͭ%>בG>3>vH:^6$ =xϾ/B=q)r qˮ;>v>>AI>RT>-}=ھWY^>ё<[>|w0VT>>9~i>~>m :=>OpFݽĂ>Rm$ƽy=h>;NH=C߼cn=&ޯ=N=;_'>`˾YsgKճѽ3xµyf t= [>O84<<ǽf;>1>_D>Vƪ>ɾ?g>AG@>Rh="5^OIO=Jt?^< Q>=@>=ρ>7-¼iP#]>2>UGνg<=2j= `dɽۛVYĭ=7=J{Pҽ߄<&>S^X n=qCŃ=^?>5>&;=xn>QwD>Ledi,>=>=Zi>_[P-=4,=}FF>==,xq{}/W,>P8qO>yAv >df>忁=Kɣ>wM>$34>>. Mm>Xp>i:m= ;h敽3NS>v2=Ը¾E} e>L=+R*!cOüb[=ZоEt=:@%;bίJ\*< guɴ_<{K<⤋>Y#@wͽ9=^ӝDR>IUT>h=!>e>y=N>m& I5<}458W><>F0Ͻ>ힽ=>> PB=i1>r w-z>ާmEL =< < "0 ;Մ?ZM>b>m<[vM'ѝrHc˾1# ?EO>d uϵ>Oּ78>[ڽ^>6A=w=>2 U"y>?,=d '%dD>RB27,b뾸}l<wS)>o.cAho=bGj<>W>9>X=þ[>Z>+%DxvGJq> >T8=]d@L>?YĨD<] w>( v ƾvU><??<0Wh>Eo>=z<>l`_w>l=-=">ib>V=?>CAŽ??K=Nj׽%Kn(?-o>6=>C>l֗>5 =IJv>>;_s>"`=u= = ~)#=w&>wx=?~ؾF2='B rE>e9>>bT=eǽPu<>;ý|J >콜6 >%29&:ބˊŽ~N>Q9>e޾1>L3)ƽf\iBBD >><}ۧ=ck>wG>`yykѽ{sk>=%q>鼽)@>wi酳>̓= ٽ=<^iG=6I҅GԺL*ƽT>Mrs齉R>f> =i>=x,>X>P$8+VC=&>)>yc=06=%=2ۛ=s+=)>=(YC EǾltT>?G;񰶼½(#=Տt(]>D )=)RE>AHf-a=7V^4X=H>8ʂt$wj>K/B= >z&=u¾]>Oė=="YJ>*ߜ>jcB_|>=e>'Pkҏ=ڢ=+\h̋>iξkh>8f>#%\?>~}>>t>Jy'Ť>d4=;=w6vxrK=>?GȾfB&4彈ཊ*_>ϰ=S޾ʵ+=Y >Gz.1=9sN>Id=R<=>6z>L/d}9[H=FY>ᶨ=Rؽ|Vpݽ;a7vn4֮_=x3wz6MHDi >4yje>2޾V>|K(],]оX{={w >4>SٽF=TJ>8=WkrωL5cV~>%T=nꅾZ>\>!ы=&;">s\>侯=>U,9<= ;'>)H*>x7d5UFv>=Z=>;e>2:=ȢN\= ?=/>be{:~q66?8Lo֊ ~ڽU{?12>>>>DH<˾-e_>ϼwI<>->>h/< Y)7J؅gq=3&=ľ3 >}VK"2$>0>>)?(M>?g$5R}mؾ3M=>ioG>:eN=_2~q2iMZK¼GCv;SC>w6Ѿ~>:'=>}?d>yW >^4FǾs4Qٻ9ꁼ>'L"p>"=ٕ _ ;){ϽE?R>/u< N>\`!O䫾a>$i>J >x=lΘ_྄ =|>f>̷=½)>?1v>2)"H >Ģey=Y>8==XŴS*|I>#;^y;'I]ID>lAT>A">y' >m[>m=>;! >;><(> oj>Ⱦ| T>>9SyvF.Ь >O=d?s^ (8[= +>H1)d< >D?G>!>⣽3ӽ0?]= ;ջ>=Lս啊>>5=!>>j_>HR?>,tǽGQ?>U~N;u<;>usp>|;!=pSf>!JLl>> ʻ=@K$uȾ 94>F  5;E&JXyuLË >('D>=VkZ>oY>ʑ˾i0X>vI>;K>$MMqw=!`?O6 Pؾg>'(ДPq=Fa j2m;0>?*c>ھL!,>CI5=,3>&X>q=H>B>y?=U>w`T> R`73$7->%f>B>J^Y>q$>z`>[>ۈ.>8|w1w5w># > f2SJ}=+*=+> ?}=1k>53=>]=+? 7=b1]|}˾v%uPu_`>>V>\vE>P?>?HF>\K ,= >D>- =2Ҿ*>=Ru׾8>7 >J>fgz >vx6xj>ק>Pqz>jEnFex,oA><>=ݾm>@޽]½>@U?kjG)>Rݾ޽>=[:>`Ӏ_>&Rʐ d>o:->>۪Ǣ>["3>[>=D8@JL->Sp>Q>Oؼfvu>k6=a> wP==^<AP'<>CQ>"QA>;(= "=>"2v}{>@>=[H0_0Mfpx_v> >~>A~>V>Y=n=E*<}À>>2>PE^=b|ތ]+><+=:"gþvV=/`=x/?7 x>mZ=;o4>L>>yw_q>4r,>8!g=>M½><ȉ`>)s v. ?E-1>TG=gE즽2=c`ץ>{JM?D9#ъ>6="s$H= r*>:[־}g=} >+Bj P^<::Ž ==೚>g<>"sH꺢n欼=Qa>NiB>BZ>x<|>}P3R>u>hޘ8< +>>K<3l<0E>%=`>|ofu<;v<;|txf >ƀs>ü .뿵<=e(e>?7H<_x>|3wer=+>o~\=ٷݾĪ="*oMn=gw$=GFX> L(Z5?E>7=&D>;>D9>ν~Hz> ݽ<ѽ5d>zee8)][=R)=5ۉ>f]>n>6:<a>>e> _@4=/=/վ +>;g<=n>==o=^=Ԕ>ZPJ>>,3>CM>z`>-ڶ>ؽ|C>.>K+>!޽pe>E,=w >ޏhY=|< 4?{ӽ(J>>>=5ܤ;68#+=(^k>L)g<>p=[==<c=n>4=>I<]D==Ѡ= o)vڍ> (h:,->hݾ;o>c݀5*=SAY:>Ċ>?f4T8>g?+>븽G >нyャ >n>M(nB><+ ?suQ=Oɾ4#Y<"KKabB`1O0>K\ z>B>.= >>T$ >Mw-"O=Z [1x.d>âƾ Ӽ>Ѹ޳==>rF{>&7|>0c>"? 6GJ_>l+>X>|Q>=,% r \Q)j;r0Ӿ =, <5*>`1=|ҽ{">d_ᓽΤ />/g=Za:4>VQ>@1!=E`"Q.>h>q%<0CS?`=| 7>W>)v>8ŽY> >_>>p>wɾپh%?A s•c=O>h= zk:F6oy>ނO>_">b>Ҿ=g="l>=i#C>/xt=9R=A>eF&>Y^,d< Mٽ >i$u'|Ƚݶ=4qgLS S>tS>T},>l>1=ā7a'x> ?>F9v3̾҅>Z=jc>2?yP>6P?5a(Q=sa>b>`ވ.~L쩾>MhY>4<M޼H-%޻Ti>^6?o>>G༏=V)>:o>J+>5Ͼ*{<=>T>DoþIyc=vxC6J~>'< = 5)>V#>R>1>tgTD>Y<>^$]C=>n?C>c>nBA;=E>C>>4<?񾵧R>\=m篽6m=0jϐ=z*dx>e;*[>0; Xֽ5YL/B8>WS|Ѵl@=RJME-bʽi@>c_iߠѥ@=}=d ?>#> p=?>ZvoǽQݕ=c[>@aݽ=s>G݃xw>mξ7g=-LOjKt 6W=q~ q>=##>>2NpW>sva==rj==޽hEsaż?RFu>H{Gh t>:e~꾝E<&Ex*DT ֫>)>Q>(m>N\T/e]+:>&jѾcǾU?2*Ce =>T'>E{>LӰR>a߾=&& IUȒ=cs>B.?Gv`= /(=v>׼>ֿ>օھ>5LCھeһ\6>>SwF>;ֿ=1|>~!f>ܡ>I>!><о볾3E=rG=I>XE8GE?lp<>eB>>8g?>5XX;!j>:½+i=_ >~o=U>lN|.]K\=u,>澥A4t>Z=,µ ?V>F8=ǽ {W n>q {A>ԙZZm+žŌg"{>^F>F>>؛>o =ܼ*;"X^>i;>mB6>̼HtI>u,'fj-<\NP=:>ؽL/;>訾82>|;Ϋ 8ҽJu>m3z>+ʽplf==m+w뢽}e% 4>RZ>?jZA>Q)5>#3<%=C~RT= ?$h=;N>V!=Z>%=c^۽0ŧQYq>=ս.%>x߽tȟ*N>dec/>_>&Yc;}0>oRO=LLĽذO[>i︾@=1x>F>'AC>SSd>[:`!~ѡ=]#@=V2,>h>f>ex==V^=>ʆ8&9?R>)=>S[例O=>==UeOli>I?<>G<=Q\>iuཹGHr=_KȤw.k;>F>NáziM=N)վ=>X=i꾀QY=->k=?=Wē1=2ýG=ƈq>Y4f+wTT񜾃C>+S|xoP>r>!~*<.d%>j=npsa>&5<N)پmb6N>;l7 ?yd>R>Is>a;–νF^=.)5(Q>NRI>Da/>i >WpԾ{A#<0Pk<)>E>M>Y>I<"<ٌ>o>P>yK=R4>t@=׎"Kܽy[+?-=#nh<ڕ=L|:>=?i>F4>>ZҊnc%>>6<,=e=q?5{> @$>p ?3n=+wӾ>)>EulT>>qf¾8QKv9>I d8>| $>rĽȷJ;>2=üFɾB>O2Nҗ(= ̳=mоm?>K==ْ>c=v> M7='(>>oiaOJC$&=WU'=yXnϜyE=@m&hSc2=|Cr=`7c:4:>g.6üs>֒7c[?˼v>@=^ >h>zm`q>9=dP;n><>j2=F\'0=d=}>`Z3嘽 <>dV>o'=>;=[ >>~:2I<?=1U>ྗ|<>Ћ;И> Cྒ>ӕZ1> f߾OEe>v=&-tJ-LM> CVK>bή>>٘>}>j>\窽:6d!n*u>? b>J~>k=D{F>sUb:.>M>rb}=~#+>=|=j=>,>u=>X6pŽ3=MJ;Sоi2`>>ֹ=yzhR=JDH=}=l>fl.>Oi&?#T=&q>??c:Ɛ> \>ƥs>Dj6]>0:GS?$/Z=1J /J29?>Jp%>IE=h':>M>򽦮>ı'{'ݥ~~=_u>=ĵtsN9?<>>6{^ o[<:+= -{2=Ti>9ļc>  ?2=%i?퇽=3z>>f "#A>8n%>N?>XNCr#b*-뽴^>dw>&!}H=(7>)=9;MD|>>Ǔ>sĺ>š>ث>><{h<ؾ5]?>>C >uT=*>>L*>>B=!=-$[5`Vq>r<ӊ>ŝ=a==>W>zz>dgȆ򽷍>(p#־Cֽ>-bFq<>:>6KؾRѡĽ$񾧷> />DŽB>U`б>B޽Hkf +>ž=Dw(>+þn05>ilw˽5v > a%#8xn-?*q>ㆾ M=p *>aRP#>>}BM>F>9`=LQ\C!=꽉8>҂Z=̝>"+=3 >T.Kvج>l=Jp>6j>))}`nK=%>bND>|>fs-yP>X>9󝼇l=Vb>^c>lú>YI>V3Dh}/>h=8>Zɾ(P=CڽNp*l>4稘FޛH& >)mŨ>7OG>ku fG>}Ϳt@J =P2Kem;>>zlt ]CuHw~?D`=]U@𙾑=m!=vս閾FyѺQ2:96X>3O;hU>>='W>$>3'>Љ ]*:~==j= >1>@Y>2;B$c>l`]>=b=R=6C%ڱ+^<{>4;=|>߽2>lR[oIp Bb=i>1f>3KP=kz ?]EzN 3SV==꾬N=ν|> >Pc:~{;Z*>kv=ҽh=pƺ=S=l"<>K><=Cl=TsO޾5ю>"sJ=н>Q9>̄>˅>%+z=v<"=|>sF>ɥ>eVha=SGR3 >uQ> *>Dȃ="| 8c<[ʽ4>>.z>q= 蚽`1=D'==iT#B eCR*QJO3Dv;6RQ=z$e( >{=>kN=@=s~/=X=)7D{@<&ۘ?}[08>b-b> V9Ҩ=K=T7D>nr>E\=jL > =[L(ңjOT>G]5 >f>s/*$7_>}>?f>>b\d8?Ϳ>cx>ߓ=JQ >q=7P> <ϒ>.>~>6/}>=>CĄ=>p\>#Q>E>YA>Y>_'U\w-ս!6=bf=$=C`gs^ԝ<=T⪾w=ᇼs/6S>{54=X}V*>S>ci\>^<>}6?j^ ѽ8>`W>~=,,>hfτM=<>nl>j>> ROU>> Ԁ)ԻXg潄p)ξRj; Eg=f>s?&>q=\ dn o>*Җ>u턾wՕ嘽K7x>Z>}B?ۂhڼJDd*RM1>ѭ<~r>һAz(CAƼB>,=)9ϼ ˾l]{H=oNپ<gz|=:>HK 7ӧ_IӾ87omL?JȾCY=4沠>y>t+>ʆ'!c>;L>_G5>m=!QG6]m[ZB>?>D$>v=y(eV=K蔾vb>̕=m=iZ >c>w|gnB=͙>]QL>s=>hՂbZ$?ʏ=K!0=>w>=GU=aJYO&ユ=O޼p=_M~ҼsĽ콘>*)>؉4ŚV>Bn='Zin==GJ[>[>x\J$AI\Zb>K>XH|=~i=h]=8l=2>P8<}7_Ǿ3>ϱ>zy;d?m>+¾w=d`=;se>{=9>"L>Su>Ӳ">>jȽ[^L@F e>;ggţ=t]w1S}`=LY눻{ >MmK=}e=_Pje,>0{=-:H[]\/d]>>zb>}Q?==e*8P>X *>hC>'c>yH`s[=oÂ=4=z ts>P?Q >jʽ{>q5:=%ֽ,>T>>T?˒>7>p'>O<{2m>0j>L1>ꪾ]ش?=yk>=셢>Oڃ=Z>3y<߻<ʎ>/GJ3ra=X =bq\>>Ϛͽ?>$T >+= 6>=Y$by>lL׿{%>kLl> =D=a5r>:MV٧>,#uv>ʿ;>r=e"YrU => >z y 9{>%UcjԾ< >P Dԟ@=">]2Ps{>PT>M>?2W>XM>=4>=3># 9=rz=s\>>z=&9=p@ >SY=+\. >ZG>BGͲ"<@0=J><=v߷,Kɽ 1BT>FΛ=k>T>-{-x?!ʘ;S===\o;S/Sn>1X3=4m>k傽=);H>b=M<>%d{ >WN*>'$8>$><=m7Ϗ=*>?1 ?d<>: 4=m,Q>j =exf=z]=kGؼMPWx={=>?=3*`={>G>$=㤞s>.>fsrK<=zmم=>9yG==*m>&Co_2 D>>μ%> rDhO0=8+RN@׼K=z1=eej=c>6,=>A=Ľ<=AO=Q>b>@dn8;8&Wѧd<у<yG>\|CDὭf=o>;>y>~^^/Q4+v>V->#x>4¦>Jut=5?--=/Ep> ;|=EY>= .A>zн/n> D=]#Н'==J=߾->[?#t%{P3%>پȪ>/ݝڐ>)`5=D }>Ẍ؍?"9>+p&=>E]>[b<`̾zQKo>P>%4;0=9>2z>y>9d6Q>d̓"=>8L>I=.s==3*/vo/[<$5>X>1]>X5?+>1l>gf .=>X;Jr=m1׽&J9$2I>m[=YQ >i=>Fe4ṙ:%ɾX:>M>U>TEϾe>֬ݭ=g%M >(ս>e轎&/>B>E>m>΅==#>}>i>>yR>5>nS>:$d= >>Y5>E>>>`K>d=ɣ+= h@ep0>Խ 6;(+:ߡ=y^>7S=q,i_9=b>6?:q ?ɬM=,Y>́}>DN>BTt>#.ļ]ΧP >ƪ<=[QA*m>_k>M{Fj>d S>]Д],}>;>=8 =o̽l5a+>Լ?5T>jW>&a<2bxމ=`>:+C`>r>V®>}@(9> ?= `D==>r>s><cIocĺ@=@x->r0>8>>vu=cC$/>d=gi>;;nFҾ<,>:޾R>'WI=$ =C=͇۾*WP=ȱ=DV=h>=̊=F"K<%J;ĘHGOh)$>+{3<ӲC<<2>.=¥9==b=!ˍ`bŤ AĽ>Vx=e="H>C Y=VO[>òeյ8=< kc־7<==,ս)F?ղ_=4f6a\>e]=;$O= {>׿r>#~>Ҥ qd=ks앾[Dȼjg=,!q>kսf۲3}dھg>ڣ$.>;+c-7a>8>K>&OYe=s<:X> >">%t<=5J, @>ϦB>?>`4u=&B===> R>\]0>'J[ֽ=V==oÕP>>x>E;>Jt/^q=(7| vS慾ٔ=76lڼ@?—=s㖆>=t5^>vg:7>Z4>=>=G=~*]J> >_=`8?=f~F(>1=>[>·>9=Z򼃷J-<>65b–>xj@m>#N?yhl(l=Xo>q=>dȽªuν~1 =:1>->[>o8o\RL@> %>>־`eO O{J!=<^Ǿ===x}S>eG?_XHe9=O 873l;bKIź=7i=񕾉>GpH>(ә=[=#Ͼq>bȰ!>T7u;2Q= =Q?WYJ>iV >T<=rzɠ<- ?G:>˔>O?l>3Os_>}"ؾUfuW=x>3|Zt =aoyWSF>Պ>KcM>%>::?-7s>>ѭ.>jXW+#<=eB= W>>sx*%A>yUe&7>{R)d =5Oݼ!U:?<>%>^>=]>a ,K>V===sE>`= GiٽXi=X>qd YQ>B>l/>ڻ`Jq>3cUOι>T> G="! =ܽbҾ=-oȾ>b">M}<+=&e0w|uO >S=:ͽJ5#=H뫾=4X=۴#>]?>>l1>Iྒ9d>>D@=w>~=z=">xF>>Iinb>=>6>D>>E߽=>&>M>&(>d?<=z>LlF=b=M=> eF=t/;wN>3 Ex#㾶V׆X=>'b^$=RQ09c=y= ;R @H^,潒1?f =GTvKho=z?y=E>ȾZh{O(=6 ©J޽C^=;>V=㱥Hh>% ? ;P= _S¾LwOhF>>x>3=Ӌ޾W=h>Bw">0p=8ľMɘ6>\O==w%>ZeW6=">>P >7VG>->=um>/,_> _w>Ȋ>@2Z==Xa>^Y>A&/>VGpG=:2=[>o>0=P!!=h}j<(=䨕=~⾽p.H=߫~><.´pR=C5am= %o>B=ƞ>b\> =ȝ>|`-5?->~>uy᛾y=lȼ<8 >KO>xx<\>H>x>&=!>ވ>v >qb0=a=+^>s'C3 @b>~&=󨡾[^>΍CMc>3Ⱦ9O=ҥ>>>F<]հU Fe=4G< lf-I|=(?D=g¾Q=ѷRv_=C:)> ~i>Ԛ>1>m}O=T3>μHn >]0?>=4=ZA=7ƾI>>$=(d+{=rEy:\>fAv>>;>fٽ潑0>,'> G>H$<ý<2J>>+=;->7B>%A#>>x7?>w楼հ6>4 o>A> fؾ d>]Fˡamo=髽?˻=\ȾYY>/~ȧ3D\!`>KuwI>> Žxq>\r{;U޾(轘# 6|Ҿ==4=!< `F\˾3=?=Θnjnu; >L@>[7<#銾=yT~&PĻ>^-=7z8=n<~+ ;3Y=g޽S=nN.wP4>U=4܇=|J>m*l`=&b>4=(=4>z=<5b>4=cX=% T>L==26=& ?Ž4>nH>ZS=h^> 211=4Dӽ[9><Lj=^r7XK}g7 N>m )>MF>$A p=<=l>>]N>.uc5,=՞̽3:ˣA>0Sd$]Ku[q>z >>U=ǀ=h䐽 >{>r۾|^= ֘>>iBw :>ےW= u=0J1=&_þ,t>տr5Xzd>eo=>r>ɐ?Z3>`h>!$=2>;>>Cim™:FUV4X >.= V=!gXʝ>>3&>d=M7>dh>Z=.7l=M%==4==)=={u`q > &ԼM9>LL9>FF=}zR0>ԗmPA^f  t/u\>SvB=Ե=6>hþ ļ'=jJ={>*½>+ =ȊYY=ϧm\z.Or=U=lR>=5LL =-1T>@GI o=#c>UD>Rf Q[>D&>K=>2QrU ;,>:>A-/>.KGؽ?B6p>K=$>x^!&>[=eܘn>Օ;>ؽ`qp>2 >]U}B3"?YTwFW>b>,߆Y<>=I>ז=+ >F4 5vC=<8"j>'w>y٧><4? >l4sa=ï G@j=L>/z;_iD>C] Z=#]>> >=>>zj43>wͦ#>4Uk>#Уt;=~ݴ#==L,y&=;??M>!`7>=IXAˇT>= Fh廨/L0ھNֽOO>rC>;>־aes$Z>;NGecc=h}8>Ѓ=p2>2B|<4=CIҾJ'e>d=!w>E=t_>9>>Y 0~HnsOй;.߰;T!4=W> %=m>U>.a~=5g+b>>G?M>≾nߠU061N>F>W޾|s=m==|>>,>%=e|r=L?^ٽ[c>>m>>L>g >G>=B*%2͉F;x9<%E>s=;VEm`=_< aiI>Y>WE>tDJ~>T>²M9Hc<=&5;>/>=ꟼ4K&=>YJ> k/=ݾ|?= ft?":u=5/=fp$ >k'F>K5>R=u9=0>KNӕ')=I==xA=ȧ3x=Bf$?O>핦RKs˽ de?>]=>Ey>M3;>z'>T*p>4u)lc>ב!eIG>i6˽3_"g=y=? 5x>[< >rA>3=o#>#2>zM(>o;=O?j8%>j = U>/=>=J󼀸>1%8=.>ݻ>N~q'ΕXT=B>P>,>AC>yԾy=}½"=ZN S@?&=0o$W=G V=玂>?>UYq=|xQ>R*< qDe>ܼMuO >۽9RH[>dw=o$* yHÌ}XW<Ɯ=aHh0v>=EP׾R=>ҝuӢ} `ؼH=Q> j>ԏ_>ݕ6U=}h9X0=>z־{,>y>==QN=]e>$Ⱦ|g>p>5R >ꇰ=.0R_V>fiT>Y\=>N=D>< x>Y9qSã=Uj3h==A>~T<>)[>>9{ >k>?ן>B<{N>K={=POE>)=s>g<#>JA>>< V<>T%=l>% K&J]i]<1w]=>J#?2ϽM=oU(=;4=1>8=٨>}﾿ꅻR<.j<p݌]7= F'J<>63ue:=P>YDy=x F0>T@Xpo>>L>5+WQ,6a> W>G >덕<д;ۻ7 bIJ*T>=x-S=>p=g3='Z6 >oƽ:<:>Eh߽;Y>w QGwQ>C Ǧ򻨱.z>/>i>r_>pi?h>յ>*U=>-w5:&>R{=H>8 >b>eZ>7> >$?> =F $t>U'=Je!?~=#I>Ny3c=?C,>Gef5y" "(ʪ>:o>np;--Ԯf>9V;y {J=3>?2+qyŏkZ=ٶ=T>_ >^K=4>g>q?>>O*B /=<;Gd=>pl&>X%L=#=Ξ>$;2=G>xO>(=>2> >@>֐> -G>c>vD=d=3S<(GT ɾX=a=ZM@^-=J*>;>ݳ/{ξ*> Ɔo59>?:)%>=5=ž.z>ϐ>V=+-?6]$0=7>gҿվf@?5=xZ]> +BHze 0[2'@>Ku>GX:Q}:=81ʦh\> 羥\A ><K ?]^~`mjcݽ] >*>r>X*>_M>R>===Fﱾ0#DUO?|(ICD'>K=$?0mQ>;17=XFԾR=I>&슾Dhd=ɉ>>&[>EI>i>9 ig>xV5s< >]Y=Ԝ>L=>Fwћ>$.q>n֊bb2<>ۨ><2NV^?MC=E= W# ?= %>\Y>i,> Ӿ|`O|>w?ɜ=<>AL־p >sͼgB!=$>oZIg=>.ѾU=% ¾ <Ⱦ= B> =q>>d<0>M7>^7e!G/a=KOM&Gl>n==WP> I>t>̧>S>۽a5 >^@=tכ<+g>ch^l(|>p R>+=6OgY=_>>>(Td?Db>~>ѣ>J=:^3>2?`wU뽳D>A&%ʓ5ռQm`̽`aC>=f&> ;eͽ罣҈V.tD>!Ì>zoæ>|پt=F[OB}z>` >${>->=1s)o>2=6i#ž%S<>O^:d =uH>l;f=> 7>*L>ZB>M>J iR#PALcp=>a?ś[>U >\>&ٽIFZY>;@ =a$oT]>>ϾE?8R;/>f3CeN8>̭>h@>s~r= ýIW>N,?>)d=$&><7>罽Fa^,>>:$+޾H>>).Yƺ3#½D>g>=A##>>]L^>>"=K|>R?]ik>E/)=>N;Eν-.W/R>+ͽa}5>xR]Gc><=; <1P >8>M>; =}ξ^</7=b!"d3>:^7EY=Y=m;=߈+n̈́">!>K>4"ix_=,Y=T=_Xͽh=z>4ػ0A>ޑ>3z=?}* ̊=@>ȅ!?`#> Y>F>O!&@^D>Ʀyj=Ǿ.~:dh=d>Zܽ4K{/>C`>TTĽzV>C>&K+%==)>{NK0SѾR?P>Sؼ`ɨ<6=#:qY;xa>]>S^=ek=א=_=1 N>$=8AG?4N4=2W>}߷ܼ%=~&< tG>B;Mq*?>G靺>1P.=p=%>Oܕ>mBy|?J#>0 `=y>u'=t!=q?ZQ#>yo>AG;*>>t4=7|<=,.>Q%%d>"l ?о&(=;w`>[w2:< =f>*>\D璽FʮնF>>B=Afb=`>=Yc>;>0+B7>Y?ex4=E&L7<5_p>f8ݽ>D\> ?h ?ͫ?>ᆹ~a=65@+Dr*!;TJёX.Y=@i泻c>6LսX5=eqjAB=2>z>D?=M>±>BU>:[CQ[!;cX=x=ýZdmAAX8ӻ>s>C0>_@$wOv>k%t(> >6̾*ֽb=SCf >8y9֫=l?$>n=>3S>/>R#=IЁi=R>o=>g=ST;r>>v$q=> >[?->ԏg\&`Vd>9|(%> +W>Q>w*A=iJN]>X>͌mdyaWW=>]=4'jS,>3=莺{@=>).?z=c>? 4(H=AJ[>N'ɽ;iD4>|h>\>&9>>l>wwAþ=y;lY=g<  >MZ>jK뾣;i/>y>`E6E/L?R <ǶQ>=a'>n0D=>l=1 =|ʾx>%sH>Jmj>̿>k R>=t>d>W`>j̾%>p cg,y?PK>/2@U >-2P! >J=NҮ>C<<[i>ļ<ГB>D%vO=bW>c&p徤ġ>žW=sa>=`=4h?=G;qp!z>*&.)>W2Nh@AzlQ˽һ==T7= F>Ϳ>Ҭ)o{= l>gC\>\<;C[>.&f2pƾ\O}<>,Q>ʾ~D>>\;>GJ=KJaLS2.<>ʒ>~L=`w>悾L>BG>T>>; !> ?܇=~N UX6 (=:wa=[n<< Q=`>q>>">">tý <e+;U>.K>^>Ļjp8ʼnzüŨpZo>`I?ݭ>θ>»>۾M<= >=BE?I??KǏ>)Z>%>0>Bɽ+ ?O>|œ>=r`L>/>K8x=Ⱦ5="Y.= n=?Gy.##P=>>ɾQc=aƽ ] LwB>/O 1>T>U>O>>e8>) >>wþ,J>{ =E>>s;? ~VJ#."<3FXp[j>}n>f;=^ŝ>U>9>v?w=Dһ>ռ zn<I>l>a哎=tO^>N_"d>g=Ң=a>Cc=Xq5PcX>~(?z=Z2^8=lvC>{>{J>"!5<= >>Mܽ\=&=`E> wxcn>#F}>۾.?$h#> p>v\V.>:AT9(iF[r%=Jh>Wxݽo*ܖ`p>'> =szw= /">p>3uZþ~9=_ԽvȲ=F'QeG>j{>ѧ==C-qDZՌ7>=l=C>+>B?=5ܒ>(O%<>)P= =J6~r2@>,4=y9y㲽>S/>{ NƇ->ȴ>eDe{;[I=|@DN͌S'7y=`=!63v2>$ne> *=G*=F> >\>?E>u[:o$r==m(=DϼC~Y?>翇=88\ƕv.B f>{=*H'b=7 =FE=ľ_Qc>}>>E> "ZUS==>=Z=þD>#k=lг>~=Э=Vƽ!?|󽁂)IN>x$˽=a҇:!ޮ=-nk=LB^*.+l>[ݝXBϖ=-37>~>~=o=G>þu< > <~=>b/>Я0>7%>[=8-G/vb>`>>>qGiN C.4?u)=P==PvD>_X~7l>˽>&<~CC>6 KC>!>>`>vpؽ/A> >=[y! ="H>'=cSվ%<m>>~W66=^=6S>U<=>?s\ X>?Y*>rjK ޽E9=='> 3>ּ/AK>M>$>G >\{(a=F\]\~8S>>=d=d5:ͽ8< LT>utfm>**=wF-j1tG8 }6>21I5<)%f>þ6@>ׅ>WYY :aThɽY> >Wo>n>)>ܾgaVL>G>=#Ytk><*>0,"=B!>xf> ]}6_<\=D,=xb>#ӡGum􂴾J >!=u&K>>>iI&F> >}݇>^K>,>ڼ 2=(<?; ,=/y,=IT=Hi؄> [>5H=K>!r6;7dM=c>."[>^v>n$?Kq =aK@d>]>I9`=×N=G=&r>8_>~C z>!tݼk En=Ӿ.=|P߫=/?>B>e}9=վ׈>m> sA=ă>>;=0>m̾ZVpۼotվ$Zlj|==o!G<=4?ۃ;(=@f/wHZ0Lo<n+>*-|6v?4k=*< >=>=L (&EgHN=E=롾Sj>G>smt>0T>>rJOS==v&Β82> =n[oŃ|E?&gn=z>?X>~l=;=+n';)%w0= =K^1%=>(hNw-!>НRf*#>\8>\>W+"4 >䚨>+;~j?35>V?,Pc>(>G>Skr>X%L=н{=}_Ծu/=2=> 佺*־nI\y%S>U<9=j(#=L-P=REQNҾ^*>4=>>w ?=t>>p{u>uJW$e?>U=HA>T>lBϓ>ӏ>_.>>@n=LuK|Kng^Ҽ6Y>ۦt=G!̪A(kI>h^>ļ;> ܾg>>Ft=݉>`o=N,>C (I=+I>E*?7>*>P=$b>`m ?ܪ< 3>Q>>B&>b>~$ҽ>>r;&Ѽӎ>>1>X X6=@ >@{_^>&=iP 9EZ>cʠTFlE%=a)=Vˡ=}j =xh};a`!KDZ=>>'c> L^='>LySҺ>z$?hq>j3t `?N>C޼xW%>,~ξ~qS Ҽk->z="e> =;)>T<=X.\j >Kǽ?2^=B? =m>>"g=ee?Zu.,ؾ<|ߋŽ)=׌}䷼|Y=Ol} ~>->ɰ>e.ϸr=oL= ;f!>i=#=Ac>>^(=پ ,4 >N̹?g/;>8>F>m0r׽>Eu>FL>(k>νz;KO=><>w>;S =(= )H>PȖq>6H=ܻ==>=2}l־ٵW=[>L6̽S3==x=>!=6fĴǽq ?_=oP>^>zB4>Y>KٽRvU>=(Lmb>soM>j9`ȶ` >=KEն=iRr>є=ʝF=^D^<Ѕ-rLj_@X>įL߽Q)z;x=!>)=?=>aj><5>+O>m fS> ߾:=* ?>9itHþf=԰j@Z > $? =%X<ŔL>O>>viI>R~>]F=1$>Yɪ>Ξ>ϊW<.Q=>⽾H9iFw5=>|=>νgU>%f=,뽇Q>=g>PS%>վˇ>iL><=oJ=#|>Nޟ>=n6xKl_=?L si?==^ʾj=NJ=1[=9!>t>n߉>=/C>h}ma5;@)&žl>' >F:=w[gC˾`^{1>TS>Щ=0@ľ:\R>NW#>uH1 >1Ru`U>…oK g%= 8>R<2N6c`x\:6YfZ无>t>мY=}="=>U i8=_҂>*^Ͼ3LiZHb=wҽ>1=a!=֮/%?FD\;>>mj*=ɢD㉿>fUR>Rپe>hD>>=J0?H;;Ï?p8iyA?x>ڕ>!>;>$=DȢKح>K#8=}jk>/ 9>>|E=u=/=CJw!c]>M;վBz <= 8vX<ڤ<>2>h= 3  =`XZ-=ͽ~'a<=>lt>5 ?!=RW=>6>>K?R>Y^>u2201=>WG<;=]Խ= )=:l&>5 >Y=<ڨнĞ=Bv=d>,ys=;j)D8>r۽8,>d꽙'>>Ip*Az5Ș=Ch15=h:X+>c>2L)}=t#~>*,t}XV>={T>> AC> >Qλ_>>B>z(n>b>H:={ns7>.=ݳ!ޛt>N=ōd|*=jEf0)О=>{=ռ>(G>m<>[dxf#&>LK>҃C=*ɽ>t곾?2>=0=q+X>?2>>=RH<=>=v#dc= < şס/|>2>6M|޽9RtU>U`W>CL>D=r QQHQRZ;("g=_zzp>zȽgн=oC>d@@>Ֆs>) )n>o4C>[7>q=?Eb=>Gֽcӫ>>G=Zx<= $y[=m>nI轔]=>yr݌>m=PQܠ&fJ <"<Z| m>FI'm+ N>Ya>>YuWO_oZU _U>\(>.:>>ė6оp%SZFS>/Z:>M{>Dz]=?u7wy<Os#j>!~>j!>~w=̾[=l`=B=- S͏>ٽg(FZ>>9;R>>%>^>8>aU=Zk>1aMՎC:=ǂZ>`m>>0κ0#>g |=X>?Hླ>bd>Eu-aJF>6>]3bM>?;TI>Ί>#=0`>s0=%> =3c7Օ>^8Wn>r޽7>7?b =ˠ>0$2'g=%:=m#=>p+,= Ї>:G=p> 6>ې>=̎>$[νWkd}>bd><"ľ ѻfG'f=:?>i=J{Uڲ>dZ>]>f=9j"eCe== X>B;=~Q>Kܾp=xڪr8/=GQYx3l=C>A>xc[;ҥ >̽P0>}/QE">i߽)#M>_P>>?I 9< -H==oh;S!=E>b=nj>1<3x#RTu>d>{;ݻ>U==>,>0-E>!;]j>S+Dr=>UP?w=<@P< ->XH%><܎X>x<0B>=2h>>dZckb<'ZG~+=>B(?΀>x6;>u=0|=w W`>L6z߾bԽ[=aCT>`)O=:";,\wі><鞜>b8pnF>HN{n&ӈ>qֽ_4V38=iK b>r#>>Ñr= . =bNybG(]1>Rժ>Wk-)8Vz'?Sbd>la>N5<>иS> K ?Ѐ>'>Ki(>'>u3>@=9CQ>֤mO$KP>lPZ>=:>IkR='ʼ6Dͳ=ENQ>EA#=h<%$>v>>Ӌ>0|=2B}>[=W Ж'>;(">um+\$Ҽ4]= Cj>H>,@=Oz>N=sv=^>#L"8הe>?Q>>ܿRΆ>L ?>#=;U;s!H,|>|s,{@>~.)>bKU&>U=G >q>[=$ZBo΁>` >ZV=&?bֱ/ɆXk0۽ GXhXmyY>kt>iLv*PB >|*>Pley=G>+pk>ѼJم>z >B=UT~;dRyXF>>m^ <ɾ#>> ^8o35=k<ޔN=J>n<>0@=Z =F2>4<` 4>m>ιsji愾bG>৾=>0EL>սG=(ґ< =:>Y>4>}9>QQv@T@ܽL >|>&x;[0>mS=;7'>no¾>q>ں¾F=gE{>r'tM=׵=B7>C[#yƀMXNZj8 7Z<={>sd>_>L>DTd>]B!O>^>>}>󠟽kF<>Mh>Ͼ]Ծ`_>T=پ;s҈E(2cx\W'==oJ=΋E4>|R>ܡ ?D>6==pRȾd=v>nʽ=a">^a<>%G><譽==m{_76p>>XuH>~>G*>ŏ>;>yM+=\p>Hu^=qU#Խg=ӵT>#ubw><Tybξ=tzֱ>w'z>^ޖd=> ۏf>ws<=-(I>E=^=}= ㍽* :^x[*^j<8>GFɃF=w-=¸] 8DaM-==TĽCҲW>"=# g=ɲ{!5pvd;G>gO>Էf<ǼQ|>Ux#6=N >ʻX=G Qs>o>S>$!>0Z>4Ƚ̾K=,&(=#s>rd='=.BĪ#p<>4Oh >v6坾C=K>1>nG>zMY4>~ ^}gm>ԾlYdE?m4>R3ݞ=̎<=\=[ۑL*=Ҍ=dXG>[Uc vy=t?tOv=(p >7> > A+=7=.> XCi;>`1g@?>=)?Wq>@1}Ht< ԽGk+\>!>ͯK=noD?빾+۽>k>_ۛf>概ndkw=Ήľʥ>_:Ok>.>Lʽ&Qh>bF>^Z=0O>=v̽3B>k>rm>s>5>Nč>A=*g]=ε=K>sY=>X{ĩ;AhF= >h%>,gik=E==!t U?w=A/:'h>A=a'>zA>A%e=7aM6>5>qJȡ;>>S:ƇR>$20*>>h?Hp3>҆6> >l5>m>۔>=. > >=N]W׋E!Cֽ@E>\'>#nUNm> >^<>.=o:0V\= oG*PӽD+k>,f=x<>= %>Z?|V='3>-Y<Ͻ1:RSgNǍ#S&m!}#> ]>Q>A>cH=h 1>7G=%$"K$=< þ>=Ô>;{ Qꀾ7̾ P=ZX0?]B&:M?ω7K>xV.׽=o>XV>E6=e (Ya;t= Ka'>=w8:=Mt >j<:=#>j=┽(7>=BPڽ-܈<4;1s?.>UN=̐=>eR=><~Wp7)`>y߉=ۼAz ?R;%9><-=:>nfD4`_=޽ ?G ?+˾ޱϾ&>j%>m=C`ܖC=s.# >]=.$Lp~&u>ē>L=LHO>$UH=}>~F2̚=g?mY>٘=3;R>N䝽ӝk 7j>^ q>N׮~{=_`=4Xr }5a>+=3=K%o"A >](v̩>=_>`euGb><ӾSH?sxf;#,>gM?=>k[ ƾn:?fDzvẽdZq2<>L;G >= |= Mkaͧ>_8!?t+оʅ>.p>Nh{>pY~>yK?=.>ؘ<>5>tw<(i>.b|ܽdX>{M]v>VE">Q·=ZLd˽6Nj>`=~?+.>k^;d>}{h>,˦>$\`6=fQ;?>XP>ɰ=P==Y>izy<0Ih=c.<>r 8 #>>q ? Q>g;4D4>Q2Xjqw=3>­>7$bc~,);AD>)ZÖ>9Gt Z>ׅi>twL{|v/<TY< =SGؾV<]c?vo辯>/Ƕ>q&?. N=~Y=-`N>5>md$ ɾ71;|P >e N^>U>=<>H@>|3>Z?CX=>Evז=,>Z>F?C T2=+c}*>O?ļw>+ͫg ˽wȾ&">m ?>쭾OD=>1>J,ý4zN=.䚽MB<᭽v>+>fG>!>i>3>r7=gA>JA=cr =U{ݾv= =nGh> > P OT PS>'Ų=>E" a=W>o>W@=>3<ͽ긼X= > ՚>)lyLMk=Q8gj=18>Ѵ;>2>A~>F=hor>45BE^Ƚ-gM>~9;W X>ni>=7ї>Ծ#y軈U N ϲ>>vľԺ߾HO=95>$_>f= p>rF}(=?9 >a${=>Q>3> >}t& %7̾{0<s=>^Ѿz-(>}m}=% ;ߺwL.>=K>$ @IPIR>cW'?^h[ɻ1>=G`ZJֽr?sA+>6*Q; >(ui TG]>mg=H)>= !>}p=90>*>(>̊WCwV>9>=6]>|2Ab能EV>Q þ ?e>31Ư=AK2>vI=pJ> EJ=R}>=*,>%ھ_> >pb4=j3>; ?ވE=Dm=YM&z>K=A>>c^>;f#LX=n=X>Ʒ=\>>>潝R\qb>,>#-=B>z7?j>b<̜ "ߓ> ,D>Ku=Rh>'>C2K>$=]=CE>?'R3ؾ$2=U[=tq9.>>yD?] =3>zQ˽뷽G ½ )>+:%6 :,Cŏl<`;cD>|>o%><\p=>=5[| = >W=A=9\n<'>(QqO2Jk>;x=&=6>=.>vD>hݼmC;}a&;I>Ⱦ [p9X=>ӗP>t<>o>N>?4B=)\=lk{. s= i䆂i=6=˼2eRv=Vh*-g>6$׾~wG>l߶K>^H >έ=7 U ؾŨiM>B:Fm={`S'y>ܣ-=go> p˶񜴾g>Ⱦq>QE> J>Dꬹ>hٽB}>kޤ>"3=*D~>Y>þ>T̻4Ԉ2>j>@>;mIa`>crýg>lj4}'?L>^9 Z7>=o7byԽ_$>7C>=nf"=?2u >>9NHy >xR?>Of 3׾Si|>yrڽ]Ҿ. F4}3P=4>? kU=?"6 a=L<ǥ=ak><=m>FИ>w6;=R>É>ֽ̙2d>V>pCM|?K>>[ 0X>رCᐾ!/GV>q >w=Zc<>\;({.0t<>X<?3\rd<# >;!Dfؼk,-Wʾ/i5d`]=xx>)> ==Soh>p{=a u->Ͼ=>CH=pŲ= <ϬO>0 ?>=UYƱ>;|Ii>-7!>>G>=k>@<>>=[Ƈ=ߌ&ͽ@WڽI>gm=_5">}>j=9~>6>ŽZȽ9A=({T>cd>U=|>?Lԑ&+ۨ[mYZP^>9)>&>;;@j=p S)Di+j{5<}K`=B14/>O‘w*5,=>Ѿr%<` {>Pه>=ą`>C)>1(>>aj=XFԾuǗ̝2>=8;=d>E >==QL%_>̑`>Rwý2=ޟo=Z7SUJ%>n=X#=O=u>6=k]&>=BPv03>6 >"'<ܡG=jf5>/4{vO>Zn==x >|'0i>7_q`u0E>6>u=v>ʺ1g=qUK׾=҇k>͂'/>1Vn>녾d>9>GA!>նԾ<𶃾½~>!N=K'=7`>ןӽOz;Ͼ`a>ȜP8u.(>õ>@=wN>IV`o>9r>ڛ[n>0>e>(&U~?j>=J]s/> @0c;S>H>>aF_o=>X_=6)= {¾{=CLc=S p>OB=S@ɾ2>9> @f=6u=O>C>9%@ ;bAXL87>XS>)^=t>M<<Ƽ>\5oӉ #@=Ͻi<=ӿ >]= >v>HA:=>ԾUqK/>'k<ʞbl>5ʆ>|b'>g3}=9#߆qK#什x/u ) Joj9DkAgj>e Vlsyp>_x&W=>߲ >Y[W͑;Q۾62\=.+=⹾QD>&S>=Tm+>D&h;U=>9>ۑ-R> #?'ڵ`/#n/PY>T<{>>Dj>ֺٽ夦=q>S1qnsx(;Í,z>xF>>t*>>>f#=QؽP( ?";$ʼ>*m="I)7M>5>R7$>>HUi{lH=xKW>}i>7d=@v)U(>y=N8=鈎A"W̼0U?P}>;=H'aLw>VE<0>cFf>?=~=+I{>n>;>n>J9o2ʽ|=s.)T>]oc;Dx>>?ľϭ>p>؀ >K=پCT>?U>;DԽ5>$l<8ĥ=yқ='ڽy>٣0]ϽE_ؽkҋ=XR#3H;%>Խ>6=A<<S>=>bb>lP>-=]Q>?B鼔*8>zI= >#iK> kV>yu =v[cP->e,?=2>H>>>]=V=Dڗ<">)=1=2t>A>>>6>V:g>ʑ셽a>V>,> ?)F>=Q >y6M>RK*> #> A྽1*M>WI?>#Su{^dd%w>}?}DtS<+=hgU>;9Q7>8]K->=㩽!.>α=Z=W>N-H==RK}]q>Ǿә缀*~`h>8=ь׽uɚ=N$>R

    XEQ==$>hP.'w*>; ">V˽r<:3f=d]>⏶=:Z=Ѡ>Y=>ZB=k=.1Ƽ<%U=XJ峾 O鈽 9EH>/=D{v>,?#yX=L6p7>Ӿ>NSw>>X>tc>h==SBd=xUĽn{ͽٜ`S>U{-=>f?>ܼU$!e[K=0>E]LR<@m>>\ ~>> L$> < =6(>m=KH5?w>Ĭ%=;5x=!> >¼?+>W=]̼ski >fR >',>b>l`ƽ%<(>ǂ>ѰŽ!y;4%>g=p;x/>LׄLS8_e̽o$bl>#5lt==b>ۃ>(>O>x>s>89<_<-LaԾ-1Ͼ *dڽfK g>q3,D=o8<,"J=7>.1>>[>@K½

    k>Ż>K>.~>V j֞7$>6>Z%>d>Hm>~=?=H%=KDO=;TKw=еABv~RVT>sE>">O ]nF=h9 =%45>eo>=' 9Ǧt>Q'y>%-r>> >ZL>_>ξ貾\>f[ ?=/=Vݾ =Ͻc6| >lZep\{>@'c>N,1>A0hQH_=F=R[>(>  $PN$>=>|j==]>a=1ЁÎ6>}># >)ʼ h)>YLI:ƾ_ʼP>>8>m1m"!:a>"P>F>}!`\>< ҽ*I>p_r%^!>> <(a>>D4?J>CW=:P>vU6=\iս1繾=x >q -E>o=s׽Y! 恾d㾏Κ>vR>Zf\> (NW>W>C>a=<>ԍ}=}=Թ=J͠s>㼏4"^;SVc=k4Ͻl=,{`=ʾ==(<>?#= YF*ҽ=ؽzc=Ba*>D=Lm H>C\Z⼽`=si%=C>Q >>>2&>Ә2`Y>H$.u:&a>$dT>4a=qi='58;=J&>8T=MR:38=2 koP"H=9֐͂bE罏-*=Lŕ=6>_>N;RqYo̾?#<(ܾC;ѭE^ghb;F[>OH{-=ս='f=}`)"`+i@U=b>ظ`+=j=k=gɠ=w:P5=(?α<>axQ~:>i>#=ݘ$M<7>cOi>=i  m3<?6=:>7= N=(=?lk!>'>Ó=ָL>Ne64-Z>ݫʽ*+>D> 夽;g'< @N쾫\<>>͎ ==M7HqA=F>;>ߐ^p<=6= ߽lIc=QW½3T=((/=Ԥl>Og=2HppC1W>[=G1?<5]=~ϓDS>%>=G3ٖ>P>Q>4==[+=V=r9:e=YP@=,=W徢 I#r}C>|f> :M>R> >m6a|>(<+>'?ՙ0>^>m<6<6UT<>>J1b>x>1z>Jlm҂g;>Q5,f(>꾭= o8X=oQϾ{8*Y>=wރ=>4=#c31<8&ݾL>ҽ/?{(>:ȶ>u>D>.a:~>đޫ4|ɘ>Ra==il=>k?B.>+>O$خ! >$ AZ=hftc" !>\ӽ5qj-'q"D>:L > <;Ǿ L1ϽM=$>A>g1g5W.umZdD>_lIrоg\􄾇HѼ%V=RVoX+>`6=Q'?8â>A>=̹L=߻Ӱ>9N4̽)<=Q6>%>^H>Ш*== S>G%=/>*t>?Z'>Ѩ>yM>=>V+|<1=;ڽ)ε@=><>C>#>KԼ_ཽH^=>Sn1> |MRP>/=3>ց>t>S2b '=4<8c>| a;?_m6o=d=ewK>:D>9nj>>>b"CB}*&>*ɽQ1/=U{>q> I>7&ƾֱB>w0d d>u_e>Y>$-x>+ ?f|/d=J= A+tg>>Ծ @h==r0ss>.ft<.F=Ƚ>'=Ń>,\"> ~>տR%>˽x=W\Fk=MԽ=4f>Χ`ɾ>qI>rA5>u9:N>Er=&GӾ77>:ӍI =[xR8=2>'V=wMw>Nbnc>M>,% +>lͫ=XVłE>x E=گ>>>2%N}Ś>VP&>Ab0p?@>2>L#>;쑅>] >e >"m>ϔ=RC>N>R=2=Zy=Aʽaj=R>^=ycP=gپHU^s>7 aO>i>dF̽sҢp>WX>>?C8='aʗؼ=`0>'Rt=B=p>7>E>s*~?l>y<8wa=A>G6T9[O˼e?}ξ7ڻIb`>r >oCPY'z@U>===O=Kjc=-9vg=rgF>8c4 M|=>y I>/4 >'> />>> A>>,x#,>ݓ>S 1a=>씾89uc>R>~7x> >>fQ =!X?:>Cf >%w?>^ Y;'f>w=)=F‘>N1B\>,,'>O4?i ?xZ$6Csk'9J>m׆c>Y>ev%?'5|m>z_=2@>%>1m=V>ʼ;p>x3*ɾuow?Vc =Yc= Iv(S Rsݽn"y0`6A2a>⩽|6N=mb<Թ= > >OJ2 ~=Qc=8=,>>ne.O(ѬX>>[]<, zN>Jk^>K>v>14cl> =n'0^>g>֦>J/qT><+>RO]>ګ4>PD&켾`ĽN־@@ی<?s * >e<Ә=n>-Y=x V>Ԥ=A">#K>r=9pݺZ ?o!=΅=<=:+?!܁8 9㌱>*? =g9=RA>}&>SN;0=(Mc>!;>S2=f=BUr=?;%?>$~>!ս>>gxN=Ҽ#UOت#=mY>Wur?F,p쥼Ѡt:lëTH/P=>d`׍<.Q>SMaH=>J`p8_>0OS>&~(=fdw{D>V><*xPq>@{p7B=~>b>(B/ïݽR<D=)t`>y4>1էc ԥ5<9==дl=Y2>ES=a[>} @A=5== i>e>ue>>:@>PHB>H;Ľ&C=A;>X;>j'=}S; "=<ߢ <쿽:񲾗dP>\/6i8z'@i=ޡ3=ˍT>ӡl=̬>=mnH<>3#罶UʽK=)`^ $icaK;Ǵ >(ǽ}p=0p= i>H(=hн=>RZj=! >> ">'u/1YY½k=a i%<]>" >\>o> =h>D9zk3_~(b>_r* SxbҾ>DNƱ>5=h>c=w< 2H;><]+?M޾~۽=!=#!>`>>MxC>T^=94=#>HP>7(ڸоD>$fwz={=Z>>޼ >.>=;'Q;z >2;9Z>2nt=!~6*Y>0>>MnAb^=Նоǜ=!8-bI?.>ӗ\>x==o>q`vxt>OT>׽|Ⱦ??>. >U>t= >=>Jk;>'W7a'W;ؐ9!>2r>@@>bᒾN=j<}:΀Z=V=lp_ؾc|>s@ ྃ>Bj3<?'Nʾ߼1; 󰉽z> `=5> >{>c">Mp>fD0>QþX=.( []< T_GN=D=)>0}>GFF=q= Ҧ7>9> c=ԀTU,mi K>a$&g> >Rmoỽ4r=nVO>*݀=>"(>=0=`QD>>=:SN{ĺ>!>>>2L&9'=H>s=>y>q =>xpe=fW\>KJ:(E=>T)> W>Z"=#=KΧ>6>$W>5^޾qS$>V>Ⱦ>(虾)= \1∦a]A= =<+B<=S?;+=m'>G>i>I>O>!)>As>nXբ>pѾm<.KgP} N/T1( =wn>͈=l2vO5{-qz>= >0!}\}=ۻCJ;>>=_i.>Ԙ6>Js&Τ>2>#7<5Gt>X=0=>m>p>AՀ>K@N'>S&>$[K彊@><D=B黈a c/><><>78"wOi| {fw=Wk=ӷ,=bG>F>}ƈgD>`>ܛ>K>eA>g-,νGlV>h>-O>ls>ůru=!~;!;痖Ӣ޾>¾gm>f=NLd=@M ܽu>۽9>ݽq8h^>?>af?x > xֽĉ*A=5=V>zRX+-=?Ί R>D>tμNԳ!b>L脾Nc =:D#9<<>F#>vF5F>|> >&U=>ah>G>g>>n> =@(= > #>@=P>ӕB>Ls>/>ʨ=?ӽ5l>P>^==v0>=N =ğ>yo8>+=l"#?x>>>}b=1>[~=<=<6SxBCؼZ<.>?cc=)"=;(g5AD>lҽ>ssN=D>|=R$>OR<½+=x{>>Z|=~#Žng=P>ړz <.l&0=zC»>4>^>Q>yp?T `f>#5ҽ;=MRr>k,K<ļ5h>^ʌ>d=+{?e=u$uu=C>%EΗ侄H Q>>:⽺D'=&2RFi>"L5=>ӽ*˹0B-阾=>Տ|> 2qP%x>P?YǾ=k5VFA>6Q>x\=)=^=x>ܜ>>f<||K=Qz/>.11e/cB^Ed>{=ź9_>1B>I>>>ʽ[TL->/,=b=LPD?t>"E> >k=)qQIQ>3F>˾=As>z>>W=7:<&>=Jp= $>ud>A$ =w8S,Ǿ>[=Ծ,ŽgG5t4af>*Ī._(>p#x">?z}> U96>8P׽$3=׾@>gx޽SS>==w+f>|%$?g'`f=ݽ'==] _P<Rs=| >9Z r=jkܬ_'=Xth=6 >=0>v>V>hto>y>> >>I4;&=ъ‾ =f>p g<1 0::>H zʾM>>>rgԝ>L>u$Z<\Qc=ps:%lRA)=hQƜӾS~>?A>@>p(Tpd4>Ef0= _<>0v䃾վKU=W=>x5>Z|r>=q v1ɾ>vü?<-XBF5T=Ͼ`O>PJ>AW[O>p }[t>)I>wZ=H*;G=2F%@>/>_:>~;5>sU>#]=W3>l>w'>jB>B 7s3=o/Ni>;G}zl2VY߾t >,Zw<,YLu׷>>8ν,= >j>6nz?=Ffel =k͕Ε\=#>>U@=mX=e٤>D< &>N> 8=<#!q`ީ>')&R>i> ^ʻM{KK/d=|>>=J<>2Tv >"0>@L=lu= I s=х=#=H{=Y>W.>Aҽ@==13>>Xf>Z= 46"Sy>qܽ+ߙɼe־S>A`ݾc>C\Ͼ̝>;>OĐc*%]>v/>3=>G>=>m!>>>G;c̋>6> ?QM>l侄RLA?F=,Ž)>yܾe>~Hr_=*g=Fwv>/ʾ)>ɜ$e𼧽=>8i_4ZDT>X+Jx>Ӿ=ʙK=2>#H=r>~I>iZ >mk_XӊnDwƾ?>K>;>&4<&ͷ=]N?S=lga]9 s><>3tޠǽJH>p>I>HΊ>FrR4NJ?=^x>A>=nl0n.>>|0>|=WQ4=ey==>JDb=_ꐽy!>>)=bg>a眾8Atp>Uf2>']9>>C =/L?X><k>־EW =ae#.?~SbW=tj=r{=a'qμ ٬0ϽF#-?h>5 >.t>)8>&e>I"= =>y$j< Uf=t>fyȽa>sj>0xr=(>?>۾w<ד\I>rŌ> 'Q? >=kýMg>E*??O=$6U441<+y(>@5ݾI=P>?4>(=6z{>A ="=>jM>uG"?>d]>hti><(=>b =11==>x=ض?+д^ ;>=>X>RD=W =z=Z/=`>$?Yaоi>fA=GW>"=>^={=ܾȼ>>uߢE>EkξA?>$l;8ͽ9=>¼p}X=o牼FƘ=uoxx==q%;]#='=*Y3>tƽz Va>d=+[پʸz^>ف-緽gg=2a>\4PX22>G"= 2=pD>=>NH@f>=\a=V>]):ھ5>؝7>6!ē>oT>s>Ca=dC="оPms:;^ȥ7>>79>ļSFJT׊ھ`o<|S"N U+(=>Sz=a> l=>Pd>D>ξ w4>Ҵ>aq hh>9*]G>"^>HW>`=v8 >k=?xw;+>p>X(Tˣ4>˼$9=īNA>b>[=W==Mf#?[Y>=9E?w;%<5>n.>HјHtr=>%Ép$>BF, >y款H>ay'>\m> *=T=4>J!]=9^"'?h=>w=ƾov p;ZK>ڭ˾ސ=R$=;>.,/25+S27Q<%>$-=#o=$#>@˽Zx;ⅾ>$=Zc fĆ>>=IOi8v>Li>3IQw= Ծܫ=>/7jw>$-=>1ѽr-R Ce> 9/CY\>=>=ᔵ .? > f>Ww>լ>@Dm{=~X>S&=h>w!v=>>Ƽc׼4V> S<>*kξ *2==S>s޾ 4  >\>apGf>ب<ǰ>3> ߾{QƭrE>n>=}>lY>9>5d>e>uǾq" >x?_+?JOB>ۓ>b> h=_>tؽe11=}>WU#(?*~>۾`/Q4;m= ^tȾ$7=gve>aчAjbW*:3B> V=<…>>S^M5>=NڼƼ>&">oo!Ӿn!$>r?I>G} *>2=4=z1Qe8>⓺<=? 69>~TB>86?\8>*:spоМ>>>kU> Y =n Bռy]N>4?1 X_a=b=0==O~qiooU4=1ʾS==r> >f,<;dei^RU֒>>u6>igW9辖Q=璼xnl>R,ܽ7^<!>h˽6 _>&^YBC>D=Ij0>h>K%q^Q>5 >' Nn>Ms<>>!7t:J=Z,>x=M?>6^>= ľ_>fUt>Яv?klS>|hz<*SBuj*;>}~>C >>`>˗=Cč>'*7e>wľ(b;ŵ=<p辨>W>y=Z~QK:;>hr(3>k>G=+]>ޝݼD>hW<-P@ǽ.WsB7{j:>O0p"j=/U,@n}>^n=w=}*v>iU>>!MJ>dD>A=O>9p>q<> S^>.<J=G(=zTn5=Z_e^[pA>ƾb>`= =s'Cim<`%~ž5̾ч<ȅaNܽn8j=:==> ">>ě==H'>}>[3=OY>T;Q>,>L>C1vDx =欈=W+>оjѧ@u Lp~>rdSHJV>ʱ=>/>Qb;f=V1=[>3/hDW=˼>)Ͼ WRྦ?@t>Ǿ>i=a >\=_28F}{>SV=i>Xb?ƽͷ$9v>}>$'G*=?>(?>(>?Hѝ>i;! Q=fu3sH=Ǵ/>,>}>(>w> 2#>:, =C=kܾvD>ܙ;<8p> >a>pq>=l^Z<<]:X[ L QOJŻ E?V&̾`Eϓ>4ܽÇ<м9x^< =>ߑ_L[A*?t678* v>(ڒ>hԨxٗp=>9W>{>FYOX=$Œ>\>xp c=Cw>0Ƚ!Te_> Kj=f=b<>z<@>s0x=􏾑.Žu>Y+?*J=d >=..C80;Uluƾ=$>Q>qfυ>hn-Ka=oVRQE7ei1鰼j = I > +DK>핾lź>z]>o>羶Þ>ku}?P>`u>RCA?^>y?>P)!ٸWTR>T;󔾹Y>>B˼b2>5>xY^>9x>75kq>K>j\ʾc/[> ?ۓ@*>kν>~=ARG%u>'>>2>>7ڼ/q=>$q>]Op½ʽb< E4 r׼kr>2`~fL[>=㬝آ>8.>M?QQ>5eH=/=?j|ry>3(=%ZHh2>,7о1u;($ت>:S>f./< <=Pp="O">m=>^Ji-n-dڽAG=a=0ᏼMǽO>^ž>kq>U=1a>y=FZ>?H鏾1=^y0>Z>;1?>OF>a_p$0Xh>߻~bZhL>xZ ?1#>1ν/=c>w27UW֘P5s?ty>W)@>Yj` :˽j>>4W#5Eڞ>>ẽ<>@+=Dg)l>݌=t?ᖾH6,>͟Z>&2U> Z6p=>0>=>8#@4 =>ZO>-[?ݽi n=v4輗/F?>w<>T ?/Rӽ+F>:,=. 5=2|>#žQ=7>N>J>μs㥽ľ?&=@UR:T^><<8ʃ!=N#>o۲|;_>#Vcq*9: &?vv>{&N>Gɾo?ic?vm>Q<@7<&*ӻ0ąK'>c >)[v=e==>Nhh==oJ>u><_Z>X$4>!0᳼V=}n>>SM> ^>B#>ʹ>>풾}ܾcҾ0Wl dȽXBQ(>_Bm,=̆ c䨽SCU%-j>>>̮=:>?*5a=9Kھ$>( >6>"u= H=.=B+ꐡ=s[|<;Z>G軠i!>D,g> * >j;TཀXǶ>>=_>[=v)K-8eAD>-+~ W>?> r=ca>B=*>2 ᜂ> _¾ְx>|2:>=0νWj=£E>0.>'p*<s9=Ԅֿ>ؽ=(>/>>a:s~>>8Cӽo'>r=P<ѧ諒ZAt=<>yY>>Ck>>%>h>Žc-? >r=xŽ&=G0=&ȣwf>S>Fʼ "4w%=`h>>zV ;.=v9>J30gr8.=ZPY=V%u8=EվJ<E 8TY>&>fp( hPkT9#> ==l >׿Y?0nc2>.>9+m=wN=t8.L|3'=n{[th?&A>#ҵ;>XINW=C?ٵa?GIHz=j2E>0BMFS̾e>ރb> J>r%>7|>P`>l4>%p>y^>[8>b#-V>Ywu숽l>nRQ.&s>ľ>(>=EC[>rq=wu=<[>_=cK>!Խt:vX?B>H ==}>>%A> 8>#KR@ 5Y=2ž >=΁>v1=⣳73V>d=;f?g=R=P!u>mV>>'1>-;>S=)?Uv<3=v>jg?Ni䐽D+X>=F>QPaV>|Pcݙ>-F=^>#G2SλB=Y>ф;7P<>f>eOv[Ɲ>ս2>hD?,FR=NϽ1>B^>R=oN>sJ0> v>l?>ǣT>zߝp>?Vq>=UJ~r#>F=>>vh=߫VE;>y!S5h=~u8nH=ݾR.ܽ(g>Q|>T>R/> }g1>Y=1#p,=ٸM > ky_=? ?:[=3>/?{&>S=mw{RaO ;Ǡ i<:=r=E ?u=Xɽ=RDӾˠ|>ᦶ=ۄ?m=є>η8ɯ쾔XÍ>=5> J> Q`Uxɽ>`4->]<=>!aĵx>?!=˺t>'d>4O_R<>E1ľO=[?޽H{>Mx+= =:>+S$>l=>w ;5y>6]d>L >|SW< >u; F>o<ǾнIB==i>jQ=?9_>+)$-?9ԽA,)>V+ރ.!=˱>RU災0 >X@=I>L=:w>N >>A[ƾ'@S(( >>+L>(?o(4x@DS>_Y[>>> I<8">W-Lݠb=O=aۃc[=uOי>aX=q>6$?E bK=KȾbfWR=T>鎾3 >|  DǾGۘ>/3>/5=[[9S>ockվ{%fN~ٽ===y >9? ?X2>Mվ|.W1k3 z= ?亍>;j=FhW>x n>+߼>ДN=ԓ̙+OU>W&Ζ)*mJ"9^>, ,?>;=\> Pּ@J:=`W42>2='5h?\>p<pn5>,. ‰p (g=V? ='ɾd=H^=9HN5=}9x> >+R=;C>L=K >U%z) >㷾 ,>g{p< in=4> $> %r>I>0SWnwJn>_ɾD>[B>M9@3c>O)>'=̂$C>% ~$^ >Y^>/f"|h ] >ѻ&6Hmܾۥ>+>1q+ͺVit=Vp;(ZT>s6->ޚf">P_eC<>lBdC=A>$=!~6>BX=>v}>k*=Q>MH\=*܎> S=uy<+н\=s V8==>R:QfԽL:>꼈wnX=#˘>|>E=*>ܑ p嬾D]J=}>S=e5A>R;!݅>z=-_=@>+5Ϝ▙$+:=,1>Ŗ"<]qC>d>O8G н;vc"E>6nv>f=+ɾ%>̘U(22>o *,=nn>f>Ȼ񴾡L>_#=x4>O> >C=b"qUa=M>uN> B>.ײBUf>v>=.HY<\V4C>P gb >> <>2U;+TxB<<=5hn=%B0S=/ؼOw4>+>=Ef V>"d.͝AĽE4>G=EK=Ҿd(j-Πl=+>U;#Ѿ1=|򛾄>/:>g6=O=<Ň=bZJ9<۾M>I&?gҧ.ļ9V?>/>S5<{=@>Ԑf>{Ѭ>{<=1ln>=m1ǽi?G0>dP=ڽ&qk=bBHCؿ>Q(|>RVd-%Ҿ]>B;~=!,DD=qp>I3>R@>)j>&I>b<,> ƻN>'S=h|ws>)Y;н詡 <>>b,==ioP6w>`ŽBnd<.~?9)>Z=OV΄>I$жm^7>2J>-=>ej=>Z>s=rc>=DS>U<8g=4> >>>(m>{tA'/~$>Wm=JD>P>="B=ʾNu.>W>:ż %ݪ>לxI5<]n걽R^ԽւG(˻ >'/>cQ>lPL-/>U;'>$v=2>S>ۈ߉=v4~,< cdq>>y؜?CXTߥ==D>qN9>遽YnԽ2>ds4]bj>iu>GKɽEƌ?6Z->> vҁȺUPþYA]=(AqE>#=01ׅ=gߨ{=gf>mJR1ռ r >S׿ 򨣾 -rG>i;=WB= 8]:-=x*<vQ=wx2>}>W>;?ॕEZ>qa>~J>>^=R>;>I>Pq ڭ,Gb=;Z<>>j>07>9<ų{NܾPdz>H5%<{?ս`>2>hQxfhadν<>Ly:&L:o>VrA=\ =ɽR,mw>ԕ,jH=>+I޼)`D>=o^5Iٽ:K><μUiH?`1^Y=ት>4Z<> >sg BPa=5=|=K|>L^V8>=\>϶.ʡnC=ďf\_߽O=y=6>~~M>K><5<#>7=J->AVALyht>= ==j >>>>ݿġ~/>".>c.>L&U#>%F=6/A%?Oi>8q;yؽNF>Ց=s"ؾ:$=܀b?2N>آ>>0ʾᢼ(=*#3h='='(=<%:>;/6i0=Q=y($њ5R%̀>fDV> Ƽ@<DO>VA>j=,t(4*UJ]=R '>a{F_ھr!>RX nkў܎!=M=#==crPվ U=P<&(<^˾R=9rJ ̼g=~:=oS=P>7H3GYX=:>r/=c+Lf="Oܙ>>Zw> j,ďj4ޜ>L>^1ԅ>J>dFU^=`5AZQXCɼgr@K<ݣ]h> Ǿ|Ͻb5AQq>j=~9z澞&gTq>=pGqQ>A=V8>nfrB>w7.W'l=i?=\YtؾZ(2><҇*ڽ +=;s6 >>;v=L7+>t w ><=9&ty>&>G>̼}(O=()Y=?=^Ͻm ?w`>i [R1>~B.+>>q> >[!C=4>L '[kj+x,4`EB9%lž1>->>!^V=OO0Q?R}% )L[oɽ~Rx>3g>HH=Ȕ= lk>7= _>k=~><=>FL =$D>r0PnI>購|>ꓡ=L>8>x>D ?qS1K=jM>d =,W,?Hg>K(>b&>ue>rԽP=`$O^*C>6@ž$Q>唽f˄ųXNC>iԽ>>fH>=v[>H;E[M>|qؾy¦0> ዊ돧 H>qZ= 8w>CR>0=¹>Yp9=G$S Oq>+ >'=n갽>W>>8$oꢾ7wR@ǀ^=2(T>">CK>=rڽ->gc=kԽ$=퐯=+>WWǽzȽz@>ف޾'>vw>{= psT>?Juxi>ḣ/GAb u]>=?>>GUl>AO>./P3zI=D>0@ή昦K8I=|vԏ<(l+܀>Dq6=al_+=[潅 ,lbbt>6pνW-7[ *=pԽb $E>Þ>/>G X +D͏1=˸*> ?f{B=>9˽kḢ> A>>Mq>{Uq7>쿾е&z= dbx<-нޏ>U1k)]P> ,=#=G$?:AtM>ZQ+>TƀV(<YO>L[}S>붼>|<IS7>~*0>)ϥ˶>s+qD ?=͗Iئ==f QþCښ\>:>c*ĽOߑ>?}e>=^&X?ވb>O>I>=:3n= >5̼j&4VRp=74Խe=/؄> c#j<  Oۼ@J<^=K Nwn>D>m[>\=R>H;j>Ogc<4>ii==n 0>)@þl=Iϼ>?2)a˳*>n=2h=i)>E]^>='> 8>RUOh>d:>Hڽ#>h,.ҭ=p=Kq>'>+ݨ>>2 X> ?>e>{8))Jɣ>"m齠>66α`%xS7?}> ˼W(;p>>>><꯾:u4QxG>kcC/;M<_>({ :>>}ʾR=2YJtmO>#W=;>a>.P>x&ʌp>+d>+ AQUAܼ$ ЎIV==8I}>:˾b>=ptN=՚>zoB? 2>۸=T)=mj9>_,=eam>Cb>J>J=2l m2=)>F4v޾->6Ž`=/>̂>=H'sw QDa>]>Y=W>>>FN)?[6=>$Η=&H=Ծ͆= ;= _>>=, 5B!j)>ܾ=D>ҟ<tK>vٽƫ?i>>[x>~>9-Op>=Y*җ=(`þF> .=(>=5׾g!]M>>Fx>ӗ=^ >]=[beuȽNO|&=aKAB==b9b:ҩE=I ɾy==?bK=ڽ)?|)>"=ekx=#=֓z{s>f>|>MkO>T>ǒM= =7h>q7<ӾH>^>~=!?N? =E>`W2B>L=ڼ±>:A߽=~~{Uq_5EP>_C?EB>v=J >3>^US=XͽuoT<U˻V1( @_=3À=2>3.Tk=э> %?'?L*>9߽==y>>K>c8f쾮8> );v{R>>L=Nʼ:"dE>A=žH~( <=χ=I^=>3\=o>".~>(s=,>X>]u>D> ۾R=&#>cU?>sڽf@?pH>&>Uͽ4>>ⲹ>y\Q>^=$=LR>KF[^T=<_ʽ/4J9l=e=x>=O>(j>$Ƚp~$>ݽ>_L3$B>D¶==PLp=B=K#[=lsTNJXW'ӽ=غY7f>IԾǽY&I>y7=}Z>^h_}GF=ݎ80q;WʾVͽ[|~|?P=5w>ދ>= =v>=Ei=ZH=Ȍ;B=X8=νnyW6R=ΌV?Jþ<>|?;5\@= >ʕ=-='v<۾Ć>Xھ|=>ЂD>FA>E R2jJ`n>ͼ3=n==1m>6I=s潰Hs苵qA6==ozx={v/? st*c->'*=ݡ>܈=ޱ>>ȾP%(l>d>r=k>=Nb>(>L>@>Q콑F ==[c!=$:)>&vx =A+>ZD>!=!m>?>>jH>y? o>r,6{ >- e>M}ھڙ>ֽ)ڽ؎>[+>!r>O&*";l>i> H>z=tz=q{QMe[|>8=~,:>$'F޾r;@T[B:>)bgaw>Rdnv5=y>-z=ʽڅ={(4D)ue>^(>Oe6=6>==(>y =Wn=ٽ= .>@3',@>,)Ⱦ{<(F3)>ȵھL*N5>Q𽊊>e>_q=6U">J=;{;߭t}8E"X`ؼ#>A4nk蹾Pb>>Ҽ7⭾*<>kf>2>߽lMOH>  =EӐ=> ΆO r{L,0*_=ܓW?>6 Ģt!=$ >\=1>Yo̾=c~W~*=`d >˽8d>Ǯ5>.oX'!פg:EW(>ĝr\}.={= >G} >=!N=ܟ>pI>j>o+ >I>70?VH=mRŽ$`>4Y=\i>*:EpBu>~ӈ=U*="Sb_=q*8G>E?!vp)z`Auą}gߡ>&>]4->]l>ɀ% F> !<_>bl*HSq[BT=wS=Mq>m>.;^ᾪ^<>5!<׾ >Q=h>*NW=jp=ӕȆ:>Х'WG9>?nO=T>/X\?z9=2a==>M<>=z=g>8i= _sNϽeA=j+Y=۽`xZ:XN=>z>v >)># {Q=<^ htu>ߌ~=>6$<>=ޮn>2s@@9ϲ[*=* =V xsw?/>=xy.>yX>Һb==؄Vg߻P?֮(E8">s= >ȲL>O>>Z >{@S=> $ @cpǽ>֢>[<9ɾ>eUMܝ<>Ѿ;)<Ǧ= ϱֽ泣rǾ^'>?<Ƙ>؋TQQ=Ej%y?e>`>dD>k=yc澾£LIX>ksoEĽFT>,M$>c]Mz_=t橕>Aw>bь>>V>?B>@_ֽV>Lo>iB3>]l.9=I=S#>|2~rm#?>R!>kI7=->n=~׽Y>VXwkT=T=|."4L(>ou>>/: ]= >! >>p>%v>?>Y>V=Vp?i=7o>P>>c>]f>T>vk>'=6<X)>M> ^NR(>u' ӟ=-]L2#DBF3=\>/>q>-^W_FTp6x}q>ɾ3>E>>5?j&|W=EU(>4ý_>R>G>-,>=v=||>>64?x^;Rn=ǎ>4Γ=->7ec>w4=0'Al>96>J=u4>ê>GMm=v#>2щ>B$>=l)B0=R].<=p >) =ƽ=z"p{c<}w>B>xEk>1< ?!=᧿ >d`>4 %վDb)?˰>Z<ͮYQ>cҔ<7|=νКiڽh>e=X>!7<=X^==3>5s>u*>4'|=,<|>QR"WSqe=y=wIgU9%>0>"<>ܡG=_kv0>*Xk<>ࠟ6Y5oWо]<>Xn6 >!=Q1>Oh|>E!n\I4EJP$|$=>?h[.>">=Ve#N7|>b9ھӢ~;#e2X">P."߾9E‘>>ni(>C뽰<[g>!>\BT`=ܶ>۾x=>$̾hľ 0mA>,`->d&>,;f4>X=)ξ/>.M qM=C*?;d\U<-˦=Ԝ" n=>=q+<>+>->v\ٽ@&4?ྴj=mξ2J>>aT١=vh>xL^*%=~~=ncU==I>s>1Bݶ;=<>)=ख>žQX= 8Y>Vս>H >, Irk>Mپ=To>\1>3:>ˁL\>y>7>D6=t>>sU>%b>H<=;@[>j=Nѽo?p>´>;+KQ>>9:; 3l>1 t=94Ӿ)>O>Y=-e{=A諭TU= 6(<9ƻF>hv[U>5ɟ=â<ؾ>cFԾ=hǽρ;n ?W=v>;L > ..>hh=>2 ?2%8} ٽ>rN> E<>{)>HŸ=o>gWͽ+!>%˾yF>O ? >=?X8 X>ouvk_>߽2<> e>l~d/R%A= Eɽ+_nnH] /.m=WӾ jν> =>Y2>o>Q>Ub?68U>Ҳ6G>=T}>9>ڽHC@,>`?D]Z>w=ܫ)>y&>3d=>u>nG&>3=>ϜL>9>V>VU):>2r>8MνVo*a0>?= kU>\>|=">D'=S>:QI(=>?=i>ƽ=>>ɧ>X>8=>'~!>~M=M= 2>%I?Q>q>T?>"\H_>A>7C1CRĽA>mZ>n#WλP$hQ>f >.<= *t=HdZ >9ᵽ}y-ϼI=x&>>8#>֌>em>ǣ;gֽWL>>=P/>m)8>澺>z>>=߮>[9>X^>9j==zAq-ƾᣪ=u>"?͘F>u=WsŁ>9F>Ҽְ.> -=h>ža@ƽ'> º<_\ Y>p==/P *T2>g>4]4<>z= D>?U> q鄾*=5z=Uɾ- VXn|VR2_>%0>|v">>>aK5>6ɽ꫃=T;>-J"ľ_.Ycfx= =KA==^3EB> (=|=P>^2>Јuy5>ɰ>2=)*>J=hDj:>mc==2a筻L=ʖ͒A=zk>)=ď|1>RO>F,%>T=*>2h=Npwi>h>  ɽz">t.>3$D`w>>e<|q Y>Y7)>>EmK<d">#Z>z0>$6F>/N= F> 7>&Dz!T4ytk>"b%>@aٽj̈l[y&=:Ɩ >+@I>`yоrvjjT>%=Q:H2r>O=k<=;^=  Mf6OO\>q؉Zj+ىK>.L䈽(=νWt>?7<@< K˾=b {<<>>X= >5jٞ=-=Z>68>zc~Ἱ#~m>nΤ$c>,3۔=xi13f3>@Ͻ?V%@=ټ x;>釽>唾 j<%'=\Jx>d5>/p<>/n1=2۾^:e9>ֽZ(= i.bJ>KNŽ(==w== >-> ;EٽB`=,cľV׾,P$6="<{>7=: Q=sVQ% :>"=V`q pF]>r:=n%>` /1p=`o?u=>J=^CC=McS>A=ȶٽj 丽:>_zp*>-AU]>:<{>0NX%S>AH=5>^TD=a,=aE<B>p?*=*}>=1>XOQ+=<;A2Y>G%mۼGs/>m==b>P=q_ɍ6y> QjV>"$`+x{r>LνJ_k[>o=fCg>f:"ol>I.b=V=]Nkp=>=|~>v;P~>cdv R>W¾O.:y=yz>pue=.0eF>  @=sC($.%=lǀ/`淭=׆ڽX>bcRsi\qҽ`ڒ=>??>t >i=yF<>>s >_%,><J_L>6d> ھAkR>[?]T=h=YGD<=rp>%JA) >=Z#R>ָ==ؙ}>\o<6[H!`>wG=VĽE_;󥒾5>Žy=g#>E)>ʕ=>=8>w>1#[WR>Ptn=pr>>>[6gN=ctɽX=MJ;>Mm >dj;V>跾b>F<=3j>XH$n_ x=T+>'9>VO>8El>`,!:>r׾=.#z>Eש=g[BKh3;Е%>L.>i_?9,>@>,KU]=Iel>>Boݾ=">ͥ>>5h=Z">>7'>">N\=za2>xl%9wKy^>JW>>ږOMdнC==x=Vzdi>Uo *Z@=1ev>JxX>@+@U 5? |=u >Y0=ʩݾ71#Ό>;>R~b> >ұD;?*i<=C9F=Q:>99@};C>̌°o>h>-*=X~>aVsR(B>`F<` W>C=R<͢ڽt?H>7>մ=z(?h2@si}Jƽca=KF;+̽{.[|7>=_}><;=Z?>B^>zYL=s,""=<=[{7 ջ>b t>r>;;E(># J=>cԧG~ >=£=>{v}<L>S==>$>8oQ?>@>>74Lʇ>ѽ鸃=[pDJr5>Wzck)Ӿ >ڽ ޅi>><)$Ľ/=~򾷎><=h=]WS>f}h f>6*&>Ӻ=O>>-M1.<0+o轻<>5 M9=CwŠ" Y=f=1Z>CJ0B&V׽nUJi>Hm=v 'ȑ >BྗMڽ2>)׾E?;(>m H|(>> 9Wu+?#=>/Eօ=vpl'P>+@J"->mH q䅽}FQw>p@===CW= OШ>=҄>_ >{->EĿb>>; >Q<١ A;?{>A={@>+ˬݞ>N=D>]=V%!>iɾgJ3@>Fb z|',1}s>=k{> <5=(G=*喻>"j>I3d==> ?H<=.' >aK^M>8~ 2>?z>q,u=F9|D=!=`q5*YӭFϽ 8=;>:-b>.J=뭰>?Ȟ6V̽.>Wr;m'Z"S>C=5ⲽ M8>a>\,i>["'a9=Y=]%=ƞ=-Dg>Ƒ=>aꆾ*ǾaF>=0x<*D>)?@t{ʈ}6:w >@=P=!>+ 9>=?+u $nϽ'[>J=8= ݞH&=1o?۾E"(>v˫>J*?\Y'̨bkx}XkŘ=IyQ.`:;!:">):~>>7V%/>`pu>^;>E>- ><(>Ї;=z= ŬLc>Z+<~z>&X>a>cC> 8> m|"IVv=n; K+=n>۾OGd<ј4>;x=Y>J>&żzC젉>)5g>[7>r=>óT*Ps>(–M>=ADiMu&,G>=,'>&Ļ6=FF>iý3s&(<=Cttx15/==FL .>>fξ꘶F*;>D=.>$> =|>f^>ޜz=> 6 >> Y,.e>Z=3]>>׍=n>/ ޡ }=G>RaV+ 8y*-,=3yf1=>.6>uzu]ʾBC!6lKP>ǀ|rJvᾟzkR<=ս F>l;`z9 V>>H=ڻ7˽mه

    .;}* >yl>"M<>򯪾e(wrB>">Ĕ=D >E&վ' +[?PIY6 >Vw=7?1Ӝ>&m)<ýʑu>?>)>N_>zG>:z=+?59 91ʽ^>^>iTZ͓>x>uhoν#5=:-Ś>/'=H>m>3><=g=}.6=r|yUW;ƽ˾.}>~@&/oQ=yu,>O>=D>=_(,>6];[7Ȥ>1==!>;q~s7j>( >#:->͉/>)=Aؾ!g>ǘ>[=*;44徤KF>k>hku=d=H=>f¾_mRe>giZV_YF>J >+>2's>>{0Êݾwt_;g70UxZ>#QKaf>"Ib;:Ͻ#We1>N%:MU BBU>z/彴ݕ>8ݼ >N>!E>ۈ;9T=-ͩ8Tj&>e.(>Y>ǀ=Y=5>^>`s>"g=()=&IվEJK 羘L≕=M>y΁z u>>13V>}[">zþr>HTp=?nb^>QU>Io>9.@>1F'>jF=*&Yh>Sr>ϟʾ3T@> >j=жj >5E@>% =x;;>eԀ=ý53ߛV +tx <?dL=;z>8¾=F }L{)2>>k>/“l>u9>BEv>x6^T>'h>H~nhb?O&A(E='>RF0:.9?3"===U7=Bh>oľYCͽ/>>]K5Gp@xz$XE=jlҽof >>=[`>`>6}ƾ8h83: p?04ȾSe=߽=c$>a眾+Ul,=8F<ˆ>>|¼:`>GhRG >t\>ΆwlZ>vϽk<<~=>t>*R>!о{W;ϾA=1>ּ8$>jE>,>Wh>w=ahO0E>0= D>.$j=14?Ɠܾ&=!.?6?UؽH dغ=H"\>=N= u></>V>y>E1&>S=4> !o"?Z=>s>c+Xx̦>>[ E>ѹ16>>>>1]$e@<ỏt׽yL>?7N>Ϡ x b, ־<>jB2>t,;qν>8= =Qp>eL7>={=*>{X~&>,1>< o=>íg=>OӉC>X7>==^>`?> x'?>E(>%z~#z=@=8P=ۼɞK>Ls>=˛;սL־r;6><==>6ܽ\]߽:=>>ﺾa<,=r=ksZ=Y灾(`=5|K+>>e]>ɾM>->`fྡྷ?DO9hBs*=JtI,^P>yK4=ICmRy>< gN?Bc^la>Z鸽HB>?)&"*͉'Q=s>Sb'>2'`U=CSMKy<>?7p]>·ꊏ>#>'o@<>"Už=@o>Őm= sԹc<νq}< {=e^/S=<>:?_>==>I>o><=v> K =>wL=OcD>d>P>=>„RHF=?LS>O>X=U "`=_ ?=Q.8H{+@=s>%t>=X:#C|5;h?8=^#,=wGP,n/Ӳ>nh=bLǘ>gQ Yآ>O<;|>-\w!?CM=qeJ~y>*;A2dZt>6T{zM>50a>__ >KCGҾH\"7qd{j>yP<=A>Ek:n>=#>8^Ukb>uɾ>̖N=־ թ%>)H) m6 >6't=Z)=Ƣ>3Eֽ?S3^> > 8"ZoJzU B> U>>Od>8mK= Q:7N>8=/]%sc>6$>X=5a~>`0>􇤾[c>V>EཬؽF%̽]>>N$G8r=AP<Rt8=鐖jd>>*7o=?=%d>> ״>6>տ#`K0B><5>T>G-=ymYaH>$>/ BN j៾7>QY5 >hY ,>A>y>B%U>(=/!V=9)ir =&a='> >C9Tׯ>O/= cr>¾x%&">+{Ja3;ߣStV)`H>j^>WI=vN>4=5=f>ࡂ;`>蘾sm n>w =Cרv=PA?{ǰO;">>KN>)RN<>ACF焆K$>yo q>ZӽD>F>R=3);t=ϊ=о3>迼,p>FJJQ>̽[h\2X$]S)n>跾ˠQ4P w^g{Ӿk>:7h>7>0/>ӗP:8I>h=;%nsn=^H">uZ|>p=c՝=B=>Q>* >nIS=^VG='@>^>&'9 >rҶ`73  >n=\s=QսIi ?`e< F>ZJT=蓾ǚ= 3=G:.=j }< #<Nj =^>G]U>8ho=`Jx?k>cr>^}r=R6Qȡ

    B: >_?fsh>^:jLټ=g(>=镾=OSͽNUM=\>=`r< =~6>=U==9M>wiվ]D)@?D1e=7/Rp =r;>>=~a=y&><9>| < >Ѽ0\'v[>W*=T:>gt+)T=x󾌟=m*톽v0?J=N̾\p=h>/=&>=>H=^H0|O~'оD<e(Dc>7:=E(xTx=Ǽ]8=Ƽn J]=>=a*>>b#\> 7=q U>޼_>ý5־'46-y=p<:ϼ+T*#۾*>6< =y}i>'~0=Nz>1=U;>SȰl>F<\&=r>X;F>B>xzB>0/>U_z=bB=$=&[">V=^3>UA=&ۍ=DǼSK>h\>ŃT>);3>HV>q-%Ь~>h>{/<gR>꾎NG>?J<(. cxD&{|!>>v{ Z )l?h>5P;c>??'`>/-<)>ᱬ>z~AS⩾o$>u>|>wl=(d-A)D9ȟƇ?u^>>*ˏ>GK>*>4gU>/1ٕ> >06=gM>Bjtz:U=M->>D4=o`>2n@L>۾Aeb:Lz"ݽ>{ 4$>2&{=sV>=6@-;}>?`gB>>Vcwٞ=>\u>= ӏ=@<>/>0/䧾 =('>V1>-y>p8?W;6?+~>8^k9>=D=LɾGAMBj>EH =2޽ => >VeH>q+־<-; >[Y]l=`sپ`תmʭ>!Ӗ%>z=AZ>*Zs>3>ꖠ=KȾMo>* piK>(@w%=:̾<>ǡQ>O2Fa>Oؽi8=NѼ)zF?26h=u:2\=fMB=*3=;I;~>BѸ>a)>qeC޼ i=!=>c >^8ɇ>Uc77<=Wji=N>VN=hg>;2>+J=J?>ف >uֽɘWk})>ue#(L}7>%蝽8C=c^;衕=.5=+*S*> >%~>ڳ޴Ǿ1=¡wd۾2<>zFr{=H<>t6d/=Sk]+NSW ̏>֫8 K3 >0>!=5=\~C>'J?=+& `jBQTq^ʼ+>´缚˾ .X F> _r>*y!>eq>TG=5<[ǹH*.r=p=޽>:P3>YOʽs|p >AV~>+>ҰU>zd><4>̲վa=w6 >#H>N=80wֻG {>%7=&z>Q:=s>KD;$]=>ś5t&=0sPz>Q=c2GtiJ=ԽuN=߽L>ӾE}=Vw >#<C>>j'=g>Y=\>}G`>dq>io%pڽ[>O@> u Iսk>>g>rnotzϤgKGX=h>y>A8>L½q?>vN>vp%ƽV>Z>حD=1Ͻ7>Scb=3> =! =6Ͻf?8u>E>x G= %\!E@ts!>W>=M>p>ޖ{>>%rپP>¾X<=Ey<+= {y>e>ξ"V=>]u\>.>=>(~4zsV=O>S+Lշmx+=U")yP コ"=O:B=0M=ɾ ?>a>>qn;ʼCW>`F=l>~<>{\m`.9^co>'ʼ|ZY>EŸ='=p<(_4> ?K^z|N8[>%7>#==HiRsu>IE?=Ɋ>ה>>ݽ{5;*=P>^<'>Y=NE><"s>e>$>>gC~r> u>. ʐ/\ b@=!qN>Mj>2|=tXwT`=<{Ծr>oL>v7> =>fx9>m>C<3!?̽ξ;hYC3ӟɾd>2>v>i=y>3=# *>̳[|e>=>\>= j\3=eg>s2" jI>:nEb>Ny0>%\;آPZ@oѾ 9BL; O[=DHN~>hc1C>>g>w=B=xݾ&Ά _}g=gQU>]a0>\>J>/=0>~q>r9>z<]=z==l>? Ľr`>LD\=w=a=&8:V2x|=Li è霮RV>S><5ѭBn>z^=W>B[>`j,%2'˾V>#>0?dUF=w=ػB+>=Vmm>g>+62u>X۽oo>t\۷v>R.9>.Dʽ񀉾)=󇏾eP>/{=6>>J>>={<-;$Г><:U2>-QY"晾_=f`>\H;~*:=t{X%>-=R=$8߿>rЁނq>+ ޾chZ5qS>">!9q> :Ͻۄ=|:><4> ҽ(/"wy%Ҿm4=q=:>s코>G7Dh;[_$=:[c=h=->M3>?qǁ>>=an<'Jw<غS\$>;P>g\>TGK <8L$>Cn=m>( $**={&HsL9#O?=?&W;l e=3>$=5?(v>2w=vr>þٳ>u he%?vro͸> YC"k};ZFdn)[,=+T=œ=>xڽ"U=9>*:i5>Խj/L> ?.>C8Ph3YFR>8?ӄ>U=쑽mD>`>y=+tÔ>;`-:s<&IU=:>*8=g\nB۽=ELڳ>y><=s=  >6 > v>&>>%]"|>"Fb>l0ü'>&>y">9kc>%t4x>0H=_?>|^>ξ#Gx`<χX=\2>3=U꽩0=ll; 2wkPy];SP>=r/<&>`>bP>>,>HFMb)tkb=?Dc)ip<T=&=DQ iJ<\8T>^ >`<_S ?b3U >"+=fG5#<'>4y>93=w=ʽBȾf=tM\=WM>>>tR>'fͽÕ=S>ԉ7=A*x>>~;2X(ɒ>\= ?!߽鋄>P>A>4>W>|I+w>yɽl>mOP>LǾ>:>֬<:2>b;c>zm>q><>&Hf<'8=EB>ѽE>:վzo]m=w>բrZ>J~>+==(mb+Kg߽GN > >Ѿ)7=;sq>>=-]8>>#lԾ>DV>t^V=YHļƭRR>Q? >Ob5>}$>{ ]wֽ^D)==>᭾?%l(=zSžٛ=yg=td ? =;+K<=̳F=ۄ>9z-9=^!a>]-=&{R>3;>z3>/<ሀ.=R9==->h1c> =2A;T>oR&:*#1zgн2J> [^=[A<qL>mڨIV6m=ho9>4T>E|; =?<X~=OܩaʾRt>=a?ǎT=7 ھT>W>SѾӅ=g&¾ӽ#U=i}F=9<<@h۰DM%d>30>E> Bg=>[>^Xɾ-=9$YL=y+#= ܽY<{Fnk>w9O> н`) |M=Z'<ȽRW$67g>d=N> t ;>^:3pYJwP> >:e>͝&?ci=#>6d>噾(>2 0m>SHk=nؾxF*'g6ĸ>nB=1?x>j~>|> >g>o ?[=ϗ>w~G{?ˇvY>^ѽ7Z3=HR4#̾>od=Ҽʾϳ>08> ]ũ>aҾVd>Al>@qOP>3G=b'>1=YO==״ >.d>Ѓ==?# waDL=eH|A4O>-!⻜":m>Q%?Q>0>?W=Ʊ:U;o7>:><>%&? c 5I%o1y2>!> F4U`1U>=v0=BZޝ>@콕u>7n >0>E>lo>.k>=>]`= ɾ3P=iPtS,`=.> 7=_.> >0z>w>opbe>KH2<- ~0s&>߾[>"F`=L>hL=K<=5TeB>cMؤ=\=%P6|>?^Y>P=tkXyn>x>a=+u%3,>9=}l>8>5?P2=q,>ޡ]a>.pM=G'?6uݭR=>qR>LN8>C>nQ>O9>K=>>|gWc)]>V=]8~ӽ2(Ս>Ŋ=B>V/v=0>q>">0!ʾH]= ҽ+=s>m<>&?0>1>I>>t|>/> Z=>ƾ·~>>=z>tҘ=P;|>S!= K>">aTi/>=vҚ>Z=> P>>R0<>Fnt9g/y#߾>>7N>0Q?ntۃS>> =ud>ZB=mͣ>Cg־-=6N=4=,i>pF^=j#>Z Ҿt="{=nf<Y0>npta>= ?)};/hv>weϼbhplU IѾV>>#?@>,>ekȭ#/νJCYLE>f a)Ie=5<:=r7>N>{H=]v=-=] >KOTK=[==:=m=r=W>=>L|=,?J!>dBw=!>{+O> վ` m)S>l{>ӳ4>>{>_V0u8'W*±=p,ʽ:-:׽ƾP>>e@=> xi0΋ٶ=|.:+>-Xy==>x}榾X> ;y!E=M䩇=X>>x>ɽ)<$>8>@m=2.5Gu]%=xv>P𾗇ki0nm=uѽ&[s/>Ž>> F"뽲>]*`nP=O>j4>'M.|==>t>r~>8r3%|>p=.>=%+>$K!=>a=wa@<7SNȾa+>w`C=4 Q=]e8> ξ!ґ>T:9v;=5%k>E>0fy<1<^>f=G)5?Ӷ>8>asN >˨!] /^>pV>\?)f#={̡>T>˗当;#D<~=l0ǃu> />ySwCX>=R*pq=X+ <>k>(z A=f}Rk%qZ '`>KFӽ=C#>Wk=o| 13S>\<>,i>m>ӽȭ͡hi>8>\i5?=4 꽀li,>,=\P)>|q=Z6Ǽ4>!>[T,o>@>vHP>#ī>4C>S=5@4T<ʘ>va<ʢ׾Yl^ >~=mֽF6 #>}IW \x<$>,F<>aG>E~\Q]hNũ>?]=mLv#G\ =eR = 6ڽMV=%ͽ<>i'<$3ýL=E=S&0M> s>T:=,""f0>== Žt&F=/p>=hk6d$=*>G>=:z%+"=yxҼ)Fk= b,>cl4=T@>^?T6=X5Fœ<:=i= I>ċb>wԽI>> d} >gy, >l>m=Sp5 >mx>j νꜾC>S<׾sܵ=9lɽ >S ><D=B[=4=/x>ZY=޺;e^J=a5>qm>˽𡆾b̾EU>/=;Il%>~̴>5>=0/нo+ս51K=>ɽaB=/C8=H_">6{,v=1>)rj>^WZX>I>x>rF=|0š47 2>cfr~=x>CT >aS5>= UL-*ThuVv>,=Č\>"F4#oe>BSe^>ǨP>&>HO=MJ=AvF6s>rxxd:4)>ꕧ>嫟sQ"J9=r_,>D"?>>󼽼X>#{3&=ѾH0 w>n:Lwn>|-iᾒ\i|>^*Z=?N>I|bćDmB_2> y;efʹ=Y =x>>>v 1 3vI>O>n7s>>[b>S=+J=U5=ftP3b+>Uf>dͿ=寁uK:d]==1> cg/>=nb>jg>vq޽ (?j>H~>,m<Ծ72->,=ݾ Q]=͔=,= fy켉2N>5M֞־ oRX;>'>9< >Zf㽘2>R #=^0Gk? >  =ᕜ>A>Jf= ˽X<DX>>DT2>{*L 3u>l O_Ph>bl&§ՕU;z=>':' DpM[>47U־>2=Cp>Z>c>~p=/ͭ=ZaOf=iսF375=Ց>By>7v+,?az=]=w49=Y ;M>B> ҝ>˪=\=w==>ไOԠ>{.D=?Gؾ <>)=͎ž%gY*>L7i=E=>j> =%>Hk>#>=RN>&>MZ>Ԯm=e载< >T.>6!=}=#>۾ٞJ$-=~,=?F>eZ>#=52 [G$ȽA>m;< U>?Uý c>

    ?+!>$>9>Ѝ.>W+昼3$mUor?p/0=n=̐>> >ʲrH]ڽZm>گщ==>ˮ>.q*2>6>V&>H=OAžoѽkE貾bD>AyQ->p=cO>)`aF=g2fb=pu3S.=ɷGC@r>= v>B0Lc>x=1}+9JYH>s< ~M;!޽0>nؽP>=SɾfSZ<SʳH>iQ |<"=S F>=2P6 #{mpB=S>ߐܽ&>`E=s=\(>NȨ=K>n>wS<<<=>?䃽4@D>₽V{hI=.5=*P; >Ug>(>)dc)>*| =T>_ @>ZP>Oc_hэ =潾NҾ^_S;|><[> >_6=_F{ `=5=>$r =t½6T=:=e>h6>RA~jCRѶ=Lɽn>`)>Jzj}p¾g:7>Vp@;Pc>|J>>N>gz $9q=l<`Ͽf>R~&tr@=EJ>f+ >cvnz|νlT ܇=@xg;I#ݽw:>̽Շ=>A=S@u==[ѽH>=Cy>虑==7&=h˾=2O:=~jn9+Ӳ>xEt">8>dXI>I<5"] :1ׯ 50z==_= Rkcʀ=Roc ]sbg>'M)#k0R pu= =E◽+5t;!>=}=~9>N@,|]T=i>N=>7 ,I=]~K2>sѭ3=T>>>޽A">Wfý?%>Ut7Ӿ缚^j>=8t=H{ >z0O>HV]3;=V<@?MɾŔ=*>ѽ(]> =$3]=,>=A>2=2bֻûcmš=>7r=~*E><^>9.n=$t/Ծj9U >Ծ#>=>:3L*4„=>ҸU<2m>&tþ ~삽 UN`H; |%D>Ɉ >R_=U6m>E篽]=_=Ϻ&ҽ^o==uAA<0<"w=&= ><4]O>< >/u~ì=E}=Xf>4Ἱ'=Ku >r>?=wC)> mfսO:(=4ͽr=@! ľ ⼅>Xϓ=A:=k׽>ʹ>>d;#|n=;g8+> ~ co̾0gڽl>"<нt>7Vl<k>gR>y>=!d #I} >z'־ >lQXƾ@>69=8IYҾBIF&fw>Xb>HTW#vdb>K+cv>q/d>==eRb=B `='U>;\> >>Y>y<'=>oVu>ӌq>΀s=˲ >+=R+>>>>#G>᪊Q[Iʽ=r;gҽ|Ta 侚%yw%i>eP>dLg=!پ>*>>q?1.}=4ݾ|c/ ;>#Q X=yþ>3>7@ 3TҾ>`]b=<;16>>['?J=AJcJMKQb6;N4^I>2r>ཛ=Ŷżu=”s>=>j>I |Xc8S>G^C>-C.

    38N <7vm>,6>|}{sZ=D.aǾ>A&>9>(? >g>N/=/>f@C>&dﳽ>5)q`>;?򕾻h>Yy>Ÿ> þ>(;(>h%r72>>|϶#ný>mˀrX=o=.=">3D>V>SzS=^">V7dV >Y>fP|HFĺa~=`J;O>aB6>._p#>뒽"=L:<:i>fnY?>>IlNi)ճdM~mƛKJZ2>=WzDzK>og̾6%˽R1>r<>4=!;r +=>v>q>i*CT$=U>?]>F>\ WԸ>&Z%>ǽe>6>`!V@>=RD&P>W׼AL=O=>:%˾\Q諒ڄn>a >*ke+m>pS=T꾚^j.O>{Sq>pq>a=a=JAb=V>|z߾R?l'>r;_" >F>t>Z D7̗>]&>2emNjͯ>U=<*>w#> J=>Z='G*=;>Cl -4>NZ>usX>xq?)R4,<>nG˵>};u{=0h=G^=##-@eR >Eɽ%qB>H-R>̜5>vF?K=Q$u8=Ca=Cߨ>>qa'>#̽~?>œ=(? ӽ; >:s 5n>kK>>ZYƄ.>F8=v++oP ɰ>>| \>x=fܾBnz>޵E>>LX>9>>">>,r >ӽz="I./"O>\Ό>a<6#> -p =+I,c>p&UȽ+tv=[ ̽U `}> ti>y0{}ҽ<_dW(`(즾R>z,7(=>}:Z>oB0>b1=8h_> . =>ャ>~<?A=x>%؆>%s= J>޽o,= Ҽty>?E=&>8ؾ$E,y?E[=_@>l=ъ>ϓ /=($>,=8+4ü7¸:<J; 2=vq߈<>_ ">g<^0dҾW=ȾزTTt>>>Q=T&->? uron}sg'=a}>uq>g7=k>sP>(>,8>>='޽P).>*>4>f >S}վY> >=R$:=sH>=?<,AI;.}=%>TM:8(h; =8?ĉTr>O >N[>h퟽!a=fڼl>U*>#*.f+>?h>!7D5J?>U0>ྎĒgmZ>VGvB==3p=DT ē>:U>Z%tPlJHC>>7?Ǻ,=p=Y>]>o z_>p7=><> Ó=sb>'<=Qd>Ra<2N>TP*³=+nB<'^PCRqmٽl։>J>Ɏ>8">{ >qq=W)> 9> z<a>6ͽbu ME[M=ץ>彏>)놽-3=>=>bz=* ?={T>A>́>e=Iw>5=m>RDo=꡾2PL><1iʨHZ%>~yƾ:,K >>q->B.kY:J0+d>U>q=>=F”^>D<r>=ce-=(ެ[==9)\hvQo?V>>AϽ{ڽ/e?]%%>'A8O= s= }&=l읾S>=K<!.؇>Rw=Ld/z{==QcЊE=[<]i^>qUFHYPH-F>T=GDZȃ> >;hN=v 8@mxX>$;(-V=哾3M=[!>t=<@>D9m=#Uu=r{>ǀo{IM ;GA>+k=* =6>= X=Nk5<>]?S>#'=>h5]ʼֽh6=Lb%ľ2n>LhQ\H >ӵK><>>+=G/%} Xv=v<)퍽P4k9>(9[T2 ;=RT= >_a>{= 3<ISsO1>0NI<=ؽ\ͅ= 1UЬH:=LH0,m >`뺾hr;m >>ޮ=R>>#>ܽCӽFQ1>28+==ܬ>-=g?=X>#=?RkZS1 ?=4>>~޾o>u:=t<.޾C݋K=0 <̩5{$t>E卽xߢ==/>>*>ڬH#A3u͊>,d{Nx̾ɺA= =G۽8= >J>1≯=6CIYj< =w>(=*k=\(>>\>˦R!Qۼ1D<,K=Е>X=9<+Žn$U>KJ#>oì>|V#s-=. g,Z>5+ռPϫ>A>.g=ʾ>±=xa><>:K % =;= ֽ+k;=Ny{>ZL/>==A6>齽нxJP>>Ȅn;<=>K> Xq>kχ=_:qQɻR> (A0'<e>VK=½>>otQe>=<>h=6W> $>=jn?/޽gA>zmBV1+ >\>ϣ;SGi &1#~l|FdVjC:>3cV2K*Rs=njYf@>"t "-i=]Žf>\Z>}>%Ƚ>=d]=>ռ=%(U=)H>=[5Gw4ξ>V>BE>3d<#=>iP-=GLJV /-dtݾY= b<տ=KO>+?&?+()>N=h^>UK=Ú&ڽ{2;nE> (>a>I꽅o="$2>-޽4=iS=;=)K>``=6B<@kٚ>~8hf=I’C(s00=>d=qp>;>U$5> Ǡ=yRK>>w"x 5܄>!?9׈;1>~<`h($ǽ-IJ2{=(Y>%Dra,3>b6>>=k.E>B>B.^J>!XG);l$J=?U >y=[ >JF>JǾV׽)Ǿoy>gR> \;%U>_>=p?(`=E>qBe%>=*1E> `>Wd>B]w$`p=V搽'?> {>1. n>&l=EQ>.X==<G>we17>{'->Zg"[q翻eV=7P J>e1 ;>9f>x>=V"*=WPϾuͽ'Uȗ=nݼh /=&i=Z>З>I]<@>3c=*LM&{ȕFǾ<%1>6>5I+q=^4=7NЪ|[>< > fb>[Ԃ=\'My>!*Ke(>xb>5B@Ǻl=E9B ۅ=V,4=CLq>8$}=N`>Ft5f=T>l!>4$"-qC=H6>wx+X5O=0>m>؊\;O==S\=?;>/ؽ?`" upf=>h >nU>[˽R=3 < Y)F= ]=>?[R">?C>}v{((N=^Q>Ch!BH=DN½Z>Y Ow>P>7>[?$>9wU k=?M˾X&i6AE I?$"=k=X5 8>#̅>"F{`V>>s>=^m x0J~>d>?>QثyO=]>y>2 K=a12:Z>=BFAΫ(`C:>^N==ؽ`> P>þBpJd;]@03s>#>Sh>[z>0">>ʼz>&^`>~;̽Є >Q;Z<';EMז<*yhj!'9>% >@>>7>A+>߽F'Z>nA=>$>|UGھ*%>t>>l⪑z,>n>)=}m̾0>?) 3"拙=M0,>߰\S> >$o>ͫ[=@L=8O= .N>׼]>=iA_L$L>'>><>4nӽ c^=A>E>|Bẅ>?+E"T>+G-">gQpk콧6U=kAw>Xo==*7<=>} Oe>[Hp=R_qm>Y>R>y=Э=d8>6_=q>;>'.+P>>o8)>^I„=gfX>2r>A >=}^`}8S.>nŽ-sa>{H!=l?<@@>zP>X>6 ˽=u=_6> >>xrJ=θ> =a >t\X=n=>>5>Fٙ><DUHq>*h醼3>⌾󢾻p]n&)>USC!= r /k9Ͽ,?9n=۱> = *>3Г=&!>, =J:9=.`ݽ/>p=3e2>*¾mRm8]VԾ!=$xm>ѬwToL<'Y>K#>{=G>d:MWp>ǾHѡ<!k>:>@ѼH>z{>`<`Ui=A9զàv?E>Υ>,~>e=SG 8G=bh=֚=۽BܰI>=n">6|; 3>b=cBR> ׾푽PLZPs &>λ>"#=N]>Vд

    F>S=44> Z,o= 㽡7ў<=e>=p>1~> 6 E>gY=G4>j9`+hTd;=N L^ i0C>y>>/=1S9>2c߽?>­6=?l\c.>^ ?okh'[$>c`׽E^[7>0mⵝ===!/?ѽn 匔>==r*1=}9 ޽T~JuQ>޾ۼk ӣͮ:>9>@0=  'Drp_,>W 2uF=#ӽ=R24:=vϼ/>^<-T>B">n ;\&n>yu%="݄<>*Iapc><+e_ŃL>OC>;>26W=S=SzHb>6{k=?n>=eeAsc>=m>/@>ۘ>TIݓ<>?HN>D3u̽w a=0>1++ܾ<=o'LD[%> 7>g"q>X] _ 5WƼ>>==D=B>`< r P=6=Zξ<>`VR=urTLV<Ӿxka>̾d>Z>=^K>)_*>9̖>U&>LODۡC>M=Gu/>jk ೻X>=!=U>scikit-bio-0.6.2/skbio/io/format/tests/data/embl_constructed000066400000000000000000000021211464262511300241140ustar00rootroot00000000000000ID LT357133; SV 1; linear; genomic DNA; CON; INV; 140 BP. XX AC LT357133; XX PR Project:PRJEB12116; XX DT 18-SEP-2016 (Rel. 130, Created) DT 18-SEP-2016 (Rel. 130, Last updated, Version 1) XX DE Spodoptera frugiperda genome assembly, scaffold: C262451 XX KW . XX OS Spodoptera frugiperda (fall armyworm) OC Eukaryota; Metazoa; Ecdysozoa; Arthropoda; Hexapoda; Insecta; Pterygota; OC Neoptera; Endopterygota; Lepidoptera; Glossata; Ditrysia; Noctuoidea; OC Noctuidae; Amphipyrinae; Spodoptera. XX RN [1] RA Landry J.; RT ; RL Submitted (16-MAR-2016) to the INSDC. RL EMBL, Genemics Core Facility, Meyerhofstrasse 1,, 69117 Heidelberg, Germany XX DR MD5; 90c05497c7cde972dc52b8d06097f88c. DR ENA; FJUZ01000000; SET. DR ENA; FJUZ00000000; SET. DR BioSample; SAMEA3707213. XX FH Key Location/Qualifiers FH FT source 1..140 FT /organism="Spodoptera frugiperda" FT /mol_type="genomic DNA" FT /cell_line="SF21" FT /db_xref="taxon:7108" XX CO join(FJUZ01138823.1:1..140) // scikit-bio-0.6.2/skbio/io/format/tests/data/embl_feature_level_record000066400000000000000000000075231464262511300257520ustar00rootroot00000000000000ID LK021130.1:74067..75610:rRNA; SV 1; linear; genomic DNA; STD; PRO; 1544 BP. XX PA LK021130.1 XX PR Project:PRJEB5701; XX DT 02-JUN-2014 (Rel. 121, Created) DT 04-FEB-2016 (Rel. 127, Last updated, Version 5) XX DE Vibrio anguillarum 16S rRNA XX KW . XX OS Vibrio anguillarum OC Bacteria; Proteobacteria; Gammaproteobacteria; Vibrionales; Vibrionaceae; OC Vibrio. XX RN [1] RA Holm K.; RT ; RL Submitted (26-MAR-2014) to the INSDC. RL Norstruct, Dept of Chemistry, University of Tromso, Science Park 3, NO-9037 RL Tromso, NORWAY. XX RN [2] RX DOI; 10.1186/s40793-015-0060-7. RX PUBMED; 26380645. RA Holm K.O., Nilsson K., Hjerde E., Willassen N.P., Milton D.L.; RT "Complete genome sequence of Vibrio anguillarum strain NB10, a virulent RT isolate from the Gulf of Bothnia"; RL Stand Genomic Sci. 10:60-60(2015). XX DR SILVA-LSU; LK021130. DR SILVA-SSU; LK021130. DR MD5; afd116bf2c1a13acbf40d63d82f0218c. DR BioSample; SAMEA3865288. XX FH Key Location/Qualifiers FH FT source 1..1544 FT /organism="Vibrio anguillarum" FT /chromosome="1" FT /host="Rainbow trout" FT /strain="NB10" FT /mol_type="genomic DNA" FT /country="Sweden:Baltic Sea, Norrbyn Umeaa" FT /isolation_source="clinical isolate, Rainbow trout" FT /serovar="O1" FT /db_xref="taxon:55601" FT rRNA LK021130.1:74067..75610 FT /gene="16S" FT /product="16S rRNA" FT /note="16S rRNA subunit (checked and believed to be FT right,based on 454- and PacBio-sequencing)" XX SQ Sequence 1544 BP; 386 A; 344 C; 493 G; 321 T; 0 other; aattgaagag tttgatcatg gctcagattg aacgctggcg gcaggcctaa cacatgcaag 60 tcgagcggca gcacagagga acttgttcct tgggtggcga gcggcggacg ggtgagtaat 120 gcctaggaaa ttgccctgat gtgggggata accattggaa acgatggcta ataccgcatg 180 atgcctacgg gccaaagagg gggaccttct ggcctctcgc gtcaggatat gcctaggtgg 240 gattagctag ttggtgaggt aatggctcac caaggcgacg atccctagct ggtctgagag 300 gatgatcagc cacactggaa ctgagacacg gtccagactc ctacgggagg cagcagtggg 360 gaatattgca caatgggcgc aagcctgatg cagccatgcc gcgtgtatga agaaggcctt 420 cgggttgtaa agtactttca gtcgtgagga aggtggtgtt gttaatagca gcatcatttg 480 acgttagcga cagaagaagc accggctaac tccgtgccag cagccgcggt aatacggagg 540 gtgcgagcgt taatcggaat tactgggcgt aaagcgcatg caggtggtgg attaagtcag 600 atgtgaaagc ccggggctca acctcggaac cgcatttgaa actggttcac tagagtactg 660 tagagggggg tagaatttca ggtgtagcgg tgaaatgcgt agagatctga aggaataccg 720 gtggcgaagg cggccccctg gacagatact gacactcaga tgcgaaagcg tggggagcaa 780 acaggattag ataccctggt agtccacgcc gtaaacgatg tctacttgga ggttgtggcc 840 ttgagccgtg gctttcggag ctaacgcgtt aagtagaccg cctggggagt acggtcgcaa 900 gattaaaact caaatgaatt gacgggggcc cgcacaagcg gtggagcatg tggtttaatt 960 cgatgcaacg cgaagaacct tacctactct tgacatccag agaagccagc ggagacgcag 1020 gtgtgccttc gggagctctg agacaggtgc tgcatggctg tcgtcagctc gtgttgtgaa 1080 atgttgggtt aagtcccgca acgagcgcaa cccttatcct tgtttgccag cgagtcatgt 1140 cgggaactcc agggagactg ccggtgataa accggaggaa ggtggggacg acgtcaagtc 1200 atcatggccc ttacgagtag ggctacacac gtgctacaat ggcgcataca gagggcagca 1260 agctagcgat agtgagcgaa tcccaaaaag tgcgtcgtag tccggattgg agtctgcaac 1320 tcgactccat gaagtcggaa tcgctagtaa tcgtagatca gaatgctacg gtgaatacgt 1380 tcccgggcct tgtacacacc gcccgtcaca ccatgggagt gggctgcaaa agaagtgggt 1440 agtttaacct ttcggggagg acgctcacca ctttgtggtt catgactggg gtgaagtcgt 1500 aacaaggtag cgctagggga acctggcgct ggatcacctc ctta 1544 //scikit-bio-0.6.2/skbio/io/format/tests/data/embl_feature_level_record_no_FT000066400000000000000000000061241464262511300270330ustar00rootroot00000000000000ID LK021130.1:74067..75610:rRNA; SV 1; linear; genomic DNA; STD; PRO; 1544 BP. XX PA LK021130.1 XX PR Project:PRJEB5701; XX DT 02-JUN-2014 (Rel. 121, Created) DT 04-FEB-2016 (Rel. 127, Last updated, Version 5) XX DE Vibrio anguillarum 16S rRNA XX KW . XX OS Vibrio anguillarum OC Bacteria; Proteobacteria; Gammaproteobacteria; Vibrionales; Vibrionaceae; OC Vibrio. XX RN [1] RA Holm K.; RT ; RL Submitted (26-MAR-2014) to the INSDC. Norstruct, Dept of Chemistry, RL University of Tromso, Science Park 3, NO-9037 Tromso, NORWAY. XX RN [2] RX DOI; 10.1186/s40793-015-0060-7. RX PUBMED; 26380645. RA Holm K.O., Nilsson K., Hjerde E., Willassen N.P., Milton D.L.; RT "Complete genome sequence of Vibrio anguillarum strain NB10, a virulent RT isolate from the Gulf of Bothnia"; RL Stand Genomic Sci. 10:60-60(2015). XX DR SILVA-LSU; LK021130. DR SILVA-SSU; LK021130. DR MD5; afd116bf2c1a13acbf40d63d82f0218c. DR BioSample; SAMEA3865288. XX SQ Sequence 1544 BP; 386 A; 344 C; 493 G; 321 T; 0 other; aattgaagag tttgatcatg gctcagattg aacgctggcg gcaggcctaa cacatgcaag 60 tcgagcggca gcacagagga acttgttcct tgggtggcga gcggcggacg ggtgagtaat 120 gcctaggaaa ttgccctgat gtgggggata accattggaa acgatggcta ataccgcatg 180 atgcctacgg gccaaagagg gggaccttct ggcctctcgc gtcaggatat gcctaggtgg 240 gattagctag ttggtgaggt aatggctcac caaggcgacg atccctagct ggtctgagag 300 gatgatcagc cacactggaa ctgagacacg gtccagactc ctacgggagg cagcagtggg 360 gaatattgca caatgggcgc aagcctgatg cagccatgcc gcgtgtatga agaaggcctt 420 cgggttgtaa agtactttca gtcgtgagga aggtggtgtt gttaatagca gcatcatttg 480 acgttagcga cagaagaagc accggctaac tccgtgccag cagccgcggt aatacggagg 540 gtgcgagcgt taatcggaat tactgggcgt aaagcgcatg caggtggtgg attaagtcag 600 atgtgaaagc ccggggctca acctcggaac cgcatttgaa actggttcac tagagtactg 660 tagagggggg tagaatttca ggtgtagcgg tgaaatgcgt agagatctga aggaataccg 720 gtggcgaagg cggccccctg gacagatact gacactcaga tgcgaaagcg tggggagcaa 780 acaggattag ataccctggt agtccacgcc gtaaacgatg tctacttgga ggttgtggcc 840 ttgagccgtg gctttcggag ctaacgcgtt aagtagaccg cctggggagt acggtcgcaa 900 gattaaaact caaatgaatt gacgggggcc cgcacaagcg gtggagcatg tggtttaatt 960 cgatgcaacg cgaagaacct tacctactct tgacatccag agaagccagc ggagacgcag 1020 gtgtgccttc gggagctctg agacaggtgc tgcatggctg tcgtcagctc gtgttgtgaa 1080 atgttgggtt aagtcccgca acgagcgcaa cccttatcct tgtttgccag cgagtcatgt 1140 cgggaactcc agggagactg ccggtgataa accggaggaa ggtggggacg acgtcaagtc 1200 atcatggccc ttacgagtag ggctacacac gtgctacaat ggcgcataca gagggcagca 1260 agctagcgat agtgagcgaa tcccaaaaag tgcgtcgtag tccggattgg agtctgcaac 1320 tcgactccat gaagtcggaa tcgctagtaa tcgtagatca gaatgctacg gtgaatacgt 1380 tcccgggcct tgtacacacc gcccgtcaca ccatgggagt gggctgcaaa agaagtgggt 1440 agtttaacct ttcggggagg acgctcacca ctttgtggtt catgactggg gtgaagtcgt 1500 aacaaggtag cgctagggga acctggcgct ggatcacctc ctta 1544 // scikit-bio-0.6.2/skbio/io/format/tests/data/embl_multi_records000066400000000000000000000151351464262511300244430ustar00rootroot00000000000000ID M14399; SV 1; linear; mRNA; STD; PRO; 63 BP. XX AC M14399; XX DT 16-JUL-1988 (Rel. 16, Created) DT 02-SEP-1999 (Rel. 60, Last updated, Version 3) XX DE E.coli alkaline phosphatase signal mRNA, 5' end. XX KW alkaline phosphatase; signal peptide. XX OS Escherichia coli OC Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; OC Enterobacteriaceae; Escherichia. XX RN [1] RP 1-63 RX DOI; 10.1016/0378-1119(85)90319-1. RX PUBMED; 3912261. RA Gray G.L., Baldridge J.S., McKeown K.S., Heyneker H.L., Chang C.N.; RT "Periplasmic production of correctly processed human growth hormone in RT Escherichia coli: natural and bacterial signal sequences are RT interchangeable"; RL Gene 39(2-3):247-254(1985). XX DR MD5; c9b40131b8622946b5aafdf5473b3d43. XX FH Key Location/Qualifiers FH FT source 1..63 FT /db_xref="taxon:562" FT /mol_type="mRNA" FT /organism="Escherichia coli" FT CDS 1..>63 FT /db_xref="GOA:P00634" FT /db_xref="InterPro:IPR001952" FT /db_xref="InterPro:IPR017849" FT /db_xref="InterPro:IPR017850" FT /db_xref="InterPro:IPR018299" FT /db_xref="PDB:1AJA" FT /db_xref="PDB:1AJB" FT /db_xref="PDB:1AJC" FT /db_xref="PDB:1AJD" FT /db_xref="PDB:1ALH" FT /db_xref="PDB:1ALI" FT /db_xref="PDB:1ALJ" FT /db_xref="PDB:1ALK" FT /db_xref="PDB:1ANI" FT /db_xref="PDB:1ANJ" FT /db_xref="PDB:1B8J" FT /db_xref="PDB:1ED8" FT /db_xref="PDB:1ED9" FT /db_xref="PDB:1ELX" FT /db_xref="PDB:1ELY" FT /db_xref="PDB:1ELZ" FT /db_xref="PDB:1EW8" FT /db_xref="PDB:1EW9" FT /db_xref="PDB:1HJK" FT /db_xref="PDB:1HQA" FT /db_xref="PDB:1KH4" FT /db_xref="PDB:1KH5" FT /db_xref="PDB:1KH7" FT /db_xref="PDB:1KH9" FT /db_xref="PDB:1KHJ" FT /db_xref="PDB:1KHK" FT /db_xref="PDB:1KHL" FT /db_xref="PDB:1KHN" FT /db_xref="PDB:1URA" FT /db_xref="PDB:1URB" FT /db_xref="PDB:1Y6V" FT /db_xref="PDB:1Y7A" FT /db_xref="PDB:2ANH" FT /db_xref="PDB:2G9Y" FT /db_xref="PDB:2GA3" FT /db_xref="PDB:2MLX" FT /db_xref="PDB:2MLY" FT /db_xref="PDB:2MLZ" FT /db_xref="PDB:3BDF" FT /db_xref="PDB:3BDG" FT /db_xref="PDB:3BDH" FT /db_xref="PDB:3CMR" FT /db_xref="PDB:3DPC" FT /db_xref="PDB:3DYC" FT /db_xref="PDB:3TG0" FT /db_xref="PDB:4KM4" FT /db_xref="PDB:4YR1" FT /db_xref="PDB:5C66" FT /db_xref="PDB:5GAD" FT /db_xref="PDB:5GAF" FT /db_xref="PDB:5GAG" FT /db_xref="PDB:5GAH" FT /db_xref="PDB:5JTL" FT /db_xref="PDB:5JTM" FT /db_xref="PDB:5JTN" FT /db_xref="PDB:5JTO" FT /db_xref="PDB:5JTP" FT /db_xref="UniProtKB/Swiss-Prot:P00634" FT /note="alkaline phosphatase signal peptide" FT /codon_start=1 FT /protein_id="AAA23431.1" FT /transl_table=11 FT /translation="MKQSTIALAVLPLLFTPVTKA" XX SQ Sequence 63 BP; 17 A; 17 C; 12 G; 17 T; 0 other; gtgaaacaaa gcactattgc actggctgtc ttaccgttac tgtttacccc tgtgacaaaa 60 gcc 63 // ID KX454487; SV 1; linear; mRNA; STD; INV; 743 BP. XX AC KX454487; XX DT 02-FEB-2017 (Rel. 131, Created) DT 02-FEB-2017 (Rel. 131, Last updated, Version 1) XX DE Ruditapes philippinarum histone mRNA, complete cds. XX KW . XX OS Ruditapes philippinarum OC Eukaryota; Metazoa; Lophotrochozoa; Mollusca; Bivalvia; Heteroconchia; OC Euheterodonta; Veneroida; Veneroidea; Veneridae; Ruditapes. XX RN [1] RP 1-743 RA Yang D., Zhao J., Wang Q.; RT ; RL Submitted (27-JUN-2016) to the INSDC. Key Laboratory of Coastal Zone RL Environment Processes and Ecological Remediation, Yantai Institute of RL Coastal Zone Research (YIC), Chinese Academy of Sciences (CAS), 17 Chunhui RL Road, Laishan District, Yantai, Shandong 264003, China XX DR MD5; cbc730cf7a8d694b50fb7dd6b993ae0d. XX CC ##Assembly-Data-START## CC Sequencing Technology :: Sanger dideoxy sequencing CC ##Assembly-Data-END## XX FH Key Location/Qualifiers FH FT source 1..743 FT /db_xref="taxon:129788" FT /mol_type="mRNA" FT /organism="Ruditapes philippinarum" FT CDS 58..444 FT /codon_start=1 FT /product="histone" FT /protein_id="APY18893.1" FT /translation="MPGGKAGKDSGKAKAKAVSRSARAGLQFPVGRIHRHLKNRTTSHG FT RVGATAAVYSAAILEYLTAEVLELAGNASKDLKVKRITPRHLQLAIRGDEELDSLIKAT FT IAGGGVIPHIHKSLIGKKGGQQAK" XX SQ Sequence 743 BP; 241 A; 147 C; 145 G; 210 T; 0 other; tgtgcacagt ctacgcgtca tcttgaaaga aagaactaca ctactccaaa aataatcatg 60 cctggtggaa aagctggtaa agattccgga aaggccaagg ctaaggcagt gtcaaggtcc 120 gcaagagctg gcttacagtt tccagtcgga cgtattcaca ggcatttgaa gaacagaacc 180 actagccacg gtcgtgttgg agctacagca gccgtttaca gtgcagcaat ccttgaatac 240 ctgaccgccg aagtgcttga gttggctgga aacgcaagta aagatctcaa agtaaagaga 300 atcaccccac gtcacttgca gttggcaatc agaggagatg aagagttgga ttccctaatt 360 aaagccacaa tcgctggtgg tggtgttatt ccacatatcc acaagtcact tattggcaag 420 aagggaggtc agcaagccaa ataaattgga catactcatt catcagggaa caatgtgtag 480 tgaatgtgtt aaaaagaaca atctcattgt gtagctcttt agttttatat gaatgtgtta 540 acatggtcat tcacatcgta tgactcatag aatcatctgt gtatcatttc atcctctcat 600 tttatagctc ctcattttcc ttagactcat taaaattttt atctcggaaa aatgtttttt 660 ctacaatttt agcattcatt tatcttcatc ttgcttttat gtttaataaa acgaacttat 720 aataccaaaa aaaaaaaaaa aaa 743 // scikit-bio-0.6.2/skbio/io/format/tests/data/embl_single_record000066400000000000000000000100501464262511300243760ustar00rootroot00000000000000ID M14399; SV 1; linear; mRNA; STD; PRO; 63 BP. XX AC M14399; XX DT 16-JUL-1988 (Rel. 16, Created) DT 02-SEP-1999 (Rel. 60, Last updated, Version 3) XX DE E.coli alkaline phosphatase signal mRNA, 5' end. XX KW alkaline phosphatase; signal peptide. XX OS Escherichia coli OC Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; OC Enterobacteriaceae; Escherichia. XX RN [1] RP 1-63 RX DOI; 10.1016/0378-1119(85)90319-1. RX PUBMED; 3912261. RA Gray G.L., Baldridge J.S., McKeown K.S., Heyneker H.L., Chang C.N.; RT "Periplasmic production of correctly processed human growth hormone in RT Escherichia coli: natural and bacterial signal sequences are RT interchangeable"; RL Gene 39(2-3):247-254(1985). XX DR MD5; c9b40131b8622946b5aafdf5473b3d43. XX FH Key Location/Qualifiers FH FT source 1..63 FT /db_xref="taxon:562" FT /mol_type="mRNA" FT /organism="Escherichia coli" FT CDS 1..>63 FT /db_xref="GOA:P00634" FT /db_xref="InterPro:IPR001952" FT /db_xref="InterPro:IPR017849" FT /db_xref="InterPro:IPR017850" FT /db_xref="InterPro:IPR018299" FT /db_xref="PDB:1AJA" FT /db_xref="PDB:1AJB" FT /db_xref="PDB:1AJC" FT /db_xref="PDB:1AJD" FT /db_xref="PDB:1ALH" FT /db_xref="PDB:1ALI" FT /db_xref="PDB:1ALJ" FT /db_xref="PDB:1ALK" FT /db_xref="PDB:1ANI" FT /db_xref="PDB:1ANJ" FT /db_xref="PDB:1B8J" FT /db_xref="PDB:1ED8" FT /db_xref="PDB:1ED9" FT /db_xref="PDB:1ELX" FT /db_xref="PDB:1ELY" FT /db_xref="PDB:1ELZ" FT /db_xref="PDB:1EW8" FT /db_xref="PDB:1EW9" FT /db_xref="PDB:1HJK" FT /db_xref="PDB:1HQA" FT /db_xref="PDB:1KH4" FT /db_xref="PDB:1KH5" FT /db_xref="PDB:1KH7" FT /db_xref="PDB:1KH9" FT /db_xref="PDB:1KHJ" FT /db_xref="PDB:1KHK" FT /db_xref="PDB:1KHL" FT /db_xref="PDB:1KHN" FT /db_xref="PDB:1URA" FT /db_xref="PDB:1URB" FT /db_xref="PDB:1Y6V" FT /db_xref="PDB:1Y7A" FT /db_xref="PDB:2ANH" FT /db_xref="PDB:2G9Y" FT /db_xref="PDB:2GA3" FT /db_xref="PDB:2MLX" FT /db_xref="PDB:2MLY" FT /db_xref="PDB:2MLZ" FT /db_xref="PDB:3BDF" FT /db_xref="PDB:3BDG" FT /db_xref="PDB:3BDH" FT /db_xref="PDB:3CMR" FT /db_xref="PDB:3DPC" FT /db_xref="PDB:3DYC" FT /db_xref="PDB:3TG0" FT /db_xref="PDB:4KM4" FT /db_xref="PDB:4YR1" FT /db_xref="PDB:5C66" FT /db_xref="PDB:5GAD" FT /db_xref="PDB:5GAF" FT /db_xref="PDB:5GAG" FT /db_xref="PDB:5GAH" FT /db_xref="PDB:5JTL" FT /db_xref="PDB:5JTM" FT /db_xref="PDB:5JTN" FT /db_xref="PDB:5JTO" FT /db_xref="PDB:5JTP" FT /db_xref="UniProtKB/Swiss-Prot:P00634" FT /note="alkaline phosphatase signal peptide" FT /codon_start=1 FT /protein_id="AAA23431.1" FT /transl_table=11 FT /translation="MKQSTIALAVLPLLFTPVTKA" XX SQ Sequence 63 BP; 17 A; 17 C; 12 G; 17 T; 0 other; gtgaaacaaa gcactattgc actggctgtc ttaccgttac tgtttacccc tgtgacaaaa 60 gcc 63 // scikit-bio-0.6.2/skbio/io/format/tests/data/embl_single_record_lower000066400000000000000000000004201464262511300256060ustar00rootroot00000000000000ID M14399; SV 1; linear; mRNA; STD; PRO; 63 BP. XX SQ Sequence 63 BP; 17 A; 17 C; 12 G; 17 T; 0 other; gtgaaacaaa gcactattgc actggctgtc ttaccgttac tgtttacccc tgtgacaaaa 60 gcc 63 // scikit-bio-0.6.2/skbio/io/format/tests/data/embl_single_record_simple000066400000000000000000000076661464262511300257720ustar00rootroot00000000000000ID M14399; SV 1; linear; mRNA; ; PRO; 63 BP. XX AC M14399; XX DT 02-SEP-1999 XX DE E.coli alkaline phosphatase signal mRNA, 5' end. XX KW alkaline phosphatase; signal peptide. XX OS Escherichia coli OC Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacterales; OC Enterobacteriaceae; Escherichia. XX RN [1] RP 1-63 RX PUBMED; 3912261. RA Gray G.L., Baldridge J.S., McKeown K.S., Heyneker H.L., Chang C.N.; RT "Periplasmic production of correctly processed human growth hormone in RT Escherichia coli: natural and bacterial signal sequences are RT interchangeable"; RL Gene 39(2-3):247-254(1985). XX DR MD5; c9b40131b8622946b5aafdf5473b3d43. XX FH Key Location/Qualifiers FH FT source 1..63 FT /db_xref="taxon:562" FT /mol_type="mRNA" FT /organism="Escherichia coli" FT CDS 1..>63 FT /db_xref="GOA:P00634" FT /db_xref="InterPro:IPR001952" FT /db_xref="InterPro:IPR017849" FT /db_xref="InterPro:IPR017850" FT /db_xref="InterPro:IPR018299" FT /db_xref="PDB:1AJA" FT /db_xref="PDB:1AJB" FT /db_xref="PDB:1AJC" FT /db_xref="PDB:1AJD" FT /db_xref="PDB:1ALH" FT /db_xref="PDB:1ALI" FT /db_xref="PDB:1ALJ" FT /db_xref="PDB:1ALK" FT /db_xref="PDB:1ANI" FT /db_xref="PDB:1ANJ" FT /db_xref="PDB:1B8J" FT /db_xref="PDB:1ED8" FT /db_xref="PDB:1ED9" FT /db_xref="PDB:1ELX" FT /db_xref="PDB:1ELY" FT /db_xref="PDB:1ELZ" FT /db_xref="PDB:1EW8" FT /db_xref="PDB:1EW9" FT /db_xref="PDB:1HJK" FT /db_xref="PDB:1HQA" FT /db_xref="PDB:1KH4" FT /db_xref="PDB:1KH5" FT /db_xref="PDB:1KH7" FT /db_xref="PDB:1KH9" FT /db_xref="PDB:1KHJ" FT /db_xref="PDB:1KHK" FT /db_xref="PDB:1KHL" FT /db_xref="PDB:1KHN" FT /db_xref="PDB:1URA" FT /db_xref="PDB:1URB" FT /db_xref="PDB:1Y6V" FT /db_xref="PDB:1Y7A" FT /db_xref="PDB:2ANH" FT /db_xref="PDB:2G9Y" FT /db_xref="PDB:2GA3" FT /db_xref="PDB:2MLX" FT /db_xref="PDB:2MLY" FT /db_xref="PDB:2MLZ" FT /db_xref="PDB:3BDF" FT /db_xref="PDB:3BDG" FT /db_xref="PDB:3BDH" FT /db_xref="PDB:3CMR" FT /db_xref="PDB:3DPC" FT /db_xref="PDB:3DYC" FT /db_xref="PDB:3TG0" FT /db_xref="PDB:4KM4" FT /db_xref="PDB:4YR1" FT /db_xref="PDB:5C66" FT /db_xref="PDB:5GAD" FT /db_xref="PDB:5GAF" FT /db_xref="PDB:5GAG" FT /db_xref="PDB:5GAH" FT /db_xref="PDB:5JTL" FT /db_xref="PDB:5JTM" FT /db_xref="PDB:5JTN" FT /db_xref="PDB:5JTO" FT /db_xref="PDB:5JTP" FT /db_xref="UniProtKB/Swiss-Prot:P00634" FT /note="alkaline phosphatase signal peptide" FT /codon_start=1 FT /protein_id="AAA23431.1" FT /transl_table=11 FT /translation="MKQSTIALAVLPLLFTPVTKA" XX SQ Sequence 63 BP; 17 A; 17 C; 12 G; 17 T; 0 other; gtgaaacaaa gcactattgc actggctgtc ttaccgttac tgtttacccc tgtgacaaaa 60 gcc 63 // scikit-bio-0.6.2/skbio/io/format/tests/data/embl_single_record_upper000066400000000000000000000005301464262511300256130ustar00rootroot00000000000000ID M14399; SV 1; linear; mRNA; STD; PRO; 63 BP. XX SQ Sequence 63 BP; 17 A; 17 C; 12 G; 17 T; 0 other; GTGAAACAAA GCACTATTGC ACTGGCTGTC TTACCGTTAC TGTTTACCCC TGTGACAAAA 60 GCC 63 63 // scikit-bio-0.6.2/skbio/io/format/tests/data/embl_uniprot_record000066400000000000000000000044351464262511300246270ustar00rootroot00000000000000ID 001R_FRG3G Reviewed; 256 AA. AC Q6GZX4; DT 28-JUN-2011, integrated into UniProtKB/Swiss-Prot. DT 19-JUL-2004, sequence version 1. DT 17-FEB-2016, entry version 31. DE RecName: Full=Putative transcription factor 001R; GN ORFNames=FV3-001R; OS Frog virus 3 (isolate Goorha) (FV-3). OC Viruses; dsDNA viruses, no RNA stage; Iridoviridae; Ranavirus. OX NCBI_TaxID=654924; OH NCBI_TaxID=8295; Ambystoma (mole salamanders). OH NCBI_TaxID=30343; Hyla versicolor (chameleon treefrog). OH NCBI_TaxID=8404; Lithobates pipiens (Northern leopard frog) (Rana pipiens). OH NCBI_TaxID=8316; Notophthalmus viridescens (Eastern newt) (Triturus viridescens). OH NCBI_TaxID=45438; Rana sylvatica (Wood frog). RN [1] RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. RX PubMed=15165820; DOI=10.1016/j.virol.2004.02.019; RA Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.; RT "Comparative genomic analyses of frog virus 3, type species of the RT genus Ranavirus (family Iridoviridae)."; RL Virology 323:70-84(2004). CC -!- FUNCTION: Transcription activation. {ECO:0000305}. DR EMBL; AY548484; AAT09660.1; -; Genomic_DNA. DR RefSeq; YP_031579.1; NC_005946.1. DR ProteinModelPortal; Q6GZX4; -. DR SwissPalm; Q6GZX4; -. DR GeneID; 2947773; -. DR KEGG; vg:2947773; -. DR Proteomes; UP000008770; Genome. DR GO; GO:0006355; P:regulation of transcription, DNA-templated; IEA:UniProtKB-KW. DR GO; GO:0046782; P:regulation of viral transcription; IEA:InterPro. DR GO; GO:0006351; P:transcription, DNA-templated; IEA:UniProtKB-KW. DR InterPro; IPR007031; Poxvirus_VLTF3. DR Pfam; PF04947; Pox_VLTF3; 1. PE 4: Predicted; KW Activator; Complete proteome; Reference proteome; Transcription; KW Transcription regulation. FT CHAIN 1 256 Putative transcription factor 001R. FT /FTId=PRO_0000410512. FT COMPBIAS 14 17 Poly-Arg. SQ SEQUENCE 256 AA; 29735 MW; B4840739BF7D4121 CRC64; MAFSAEDVLK EYDRRRRMEA LLLSLYYPND RKLLDYKEWS PPRVQVECPK APVEWNNPPS EKGLIVGHFS GIKYKGEKAQ ASEVDVNKMC CWVSKFKDAM RRYQGIQTCK IPGKVLSDLD AKIKAYNLTV EGVEGFVRYS RVTKQHVAAF LKELRHSKQY ENVNLIHYIL TDKRVDIQHL EKDLVKDFKA LVESAHRMRQ GHMINVKYIL YQLLKKHGHG PDGPDILTVK TGSKGVLYDD SFRKIYTDLG WKFTPL // scikit-bio-0.6.2/skbio/io/format/tests/data/embl_w_beginning_whitespace000066400000000000000000000004221464262511300262630ustar00rootroot00000000000000 ID M14399; SV 1; linear; mRNA; STD; PRO; 63 BP. XX SQ Sequence 63 BP; 17 A; 17 C; 12 G; 17 T; 0 other; gtgaaacaaa gcactattgc actggctgtc ttaccgttac tgtttacccc tgtgacaaaa 60 gcc 63 // scikit-bio-0.6.2/skbio/io/format/tests/data/empty000066400000000000000000000000001464262511300217100ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/io/format/tests/data/error_diff_ids.fastq000066400000000000000000000011401464262511300246550ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_124 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQQ scikit-bio-0.6.2/skbio/io/format/tests/data/error_double_qual.fastq000066400000000000000000000012351464262511300254070ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQQ scikit-bio-0.6.2/skbio/io/format/tests/data/error_double_seq.fastq000066400000000000000000000012351464262511300252350ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQQ scikit-bio-0.6.2/skbio/io/format/tests/data/error_long_qual.fastq000066400000000000000000000011411464262511300250700ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWYY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQQ scikit-bio-0.6.2/skbio/io/format/tests/data/error_no_qual.fastq000066400000000000000000000007421464262511300245530ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGC +SLXA-B3_649_FC8437_R1_1_1_850_123 @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 scikit-bio-0.6.2/skbio/io/format/tests/data/error_qual_del.fastq000066400000000000000000000011401464262511300246740ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQQ scikit-bio-0.6.2/skbio/io/format/tests/data/error_qual_escape.fastq000066400000000000000000000011401464262511300253700ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYWYYYYWYWWUWWWQQ scikit-bio-0.6.2/skbio/io/format/tests/data/error_qual_null.fastq000066400000000000000000000011421464262511300251040ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQQ scikit-bio-0.6.2/skbio/io/format/tests/data/error_qual_space.fastq000066400000000000000000000011411464262511300252240ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYY WWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQQ scikit-bio-0.6.2/skbio/io/format/tests/data/error_qual_tab.fastq000066400000000000000000000011411464262511300246770ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYY YYYYWYWWUWWWQQ scikit-bio-0.6.2/skbio/io/format/tests/data/error_qual_unit_sep.fastq000066400000000000000000000011401464262511300257560ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQQ scikit-bio-0.6.2/skbio/io/format/tests/data/error_qual_vtab.fastq000066400000000000000000000011401464262511300250640ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYY YYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQQ scikit-bio-0.6.2/skbio/io/format/tests/data/error_short_qual.fastq000066400000000000000000000011371464262511300252750ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYS @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQQ scikit-bio-0.6.2/skbio/io/format/tests/data/error_spaces.fastq000066400000000000000000000011641464262511300243720ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAA TACCTTTGTA GAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYY YYYYYYYYYW YWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGA AAGAGAAATG AGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYY WYYYYWWYYY WYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTT GATCATGATG ATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYY YYYYWYYWYY SYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAA GTTTTTCTCA ACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYY YYYYYYYYYW WWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTT AATGGCATAC ACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYY YWYYYYWYWW UWWWQQ scikit-bio-0.6.2/skbio/io/format/tests/data/error_tabs.fastq000066400000000000000000000011651464262511300240460ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAA TACCTTTGTA GAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYY YYYYYYYYYW YWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGA AAGAGAAATG AGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYY WYYYYWWYYY WYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTT GATCATGATG ATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYY YYYYWYYWYY SYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAA GTTTTTCTCA ACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYY YYYYYYYYYW WWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTT AATGGCATAC ACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYY YWYYYYWYWW UWWWQQ scikit-bio-0.6.2/skbio/io/format/tests/data/error_trunc_at_plus.fastq000066400000000000000000000010441464262511300257730ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA scikit-bio-0.6.2/skbio/io/format/tests/data/error_trunc_at_qual.fastq000066400000000000000000000011061464262511300257510ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 scikit-bio-0.6.2/skbio/io/format/tests/data/error_trunc_at_seq.fastq000066400000000000000000000010121464262511300255730ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 scikit-bio-0.6.2/skbio/io/format/tests/data/error_trunc_in_plus.fastq000066400000000000000000000010621464262511300257750ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FCscikit-bio-0.6.2/skbio/io/format/tests/data/error_trunc_in_qual.fastq000066400000000000000000000011371464262511300257570ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGCATACACTCAA +SLXA-B3_649_FC8437_R1_1_1_183_714 YYYYYYYYYYWYYYYWYWWUWWWQ scikit-bio-0.6.2/skbio/io/format/tests/data/error_trunc_in_seq.fastq000066400000000000000000000010271464262511300256030ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_183_714 GTATTATTTAATGGscikit-bio-0.6.2/skbio/io/format/tests/data/error_trunc_in_title.fastq000066400000000000000000000010011464262511300261240ustar00rootroot00000000000000@SLXA-B3_649_FC8437_R1_1_1_610_79 GATGTGCAATACCTTTGTAGAGGAA +SLXA-B3_649_FC8437_R1_1_1_610_79 YYYYYYYYYYYYYYYYYYWYWYYSU @SLXA-B3_649_FC8437_R1_1_1_397_389 GGTTTGAGAAAGAGAAATGAGATAA +SLXA-B3_649_FC8437_R1_1_1_397_389 YYYYYYYYYWYYYYWWYYYWYWYWW @SLXA-B3_649_FC8437_R1_1_1_850_123 GAGGGTGTTGATCATGATGATGGCG +SLXA-B3_649_FC8437_R1_1_1_850_123 YYYYYYYYYYYYYWYYWYYSYYYSY @SLXA-B3_649_FC8437_R1_1_1_362_549 GGAAACAAAGTTTTTCTCAACATAG +SLXA-B3_649_FC8437_R1_1_1_362_549 YYYYYYYYYYYYYYYYYYWWWWYWY @SLXA-B3_649_FC8437_R1_1_1_scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_10_seqs000066400000000000000000000002561464262511300232200ustar00rootroot00000000000000>seq1 desc1 ACGT >seq1 desc1 ACGT >seq1 desc1 A C GT >seq1 desc1 ACGT >seq1 desc1 ACGT >seq1 desc1 ACGT >seq1 desc1 ACGT >seq1 desc1 ACGT >seq1 desc1 ACGT >seq1 desc1 AC G T scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_3_seqs_defaults000066400000000000000000000000771464262511300250320ustar00rootroot00000000000000>s_e_q_1 desc 1 UUUU >s_e_q_2 desc 2 CATC >s_e_q_3 desc 3 sits scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_3_seqs_non_defaults000066400000000000000000000001021464262511300256710ustar00rootroot00000000000000>s*e*q*1 desc+1 UUU U >s*e*q*2 desc+2 CAT C >s*e*q*3 desc+3 sit s scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_5_blanks_start_of_file000066400000000000000000000002261464262511300263400ustar00rootroot00000000000000 >seq1 desc1 ACGT-acgt. >_____seq__2_ A > desc3 AACGGuA > ACGTTGCAccGG > ACGUU >proteinseq detailed description with new lines pQqqqPPQQQ scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_5_ws_lines_start_of_file000066400000000000000000000003151464262511300267100ustar00rootroot00000000000000 >seq1 desc1 ACGT-acgt. >_____seq__2_ A > desc3 AACGGuA > ACGTTGCAccGG > ACGUU >proteinseq detailed description with new lines pQqqqPPQQQ scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_6_blanks_start_of_file000066400000000000000000000002271464262511300263420ustar00rootroot00000000000000 >seq1 desc1 ACGT-acgt. >_____seq__2_ A > desc3 AACGGuA > ACGTTGCAccGG > ACGUU >proteinseq detailed description with new lines pQqqqPPQQQ scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_6_ws_lines_start_of_file000066400000000000000000000003311464262511300267070ustar00rootroot00000000000000 >seq1 desc1 ACGT-acgt. >_____seq__2_ A > desc3 AACGGuA > ACGTTGCAccGG > ACGUU >proteinseq detailed description with new lines pQqqqPPQQQ scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_blank_lines_between_records000066400000000000000000000002351464262511300274550ustar00rootroot00000000000000>seq1 desc1 ACGT-acgt. >_____seq__2_ A > desc3 AACGGuA > ACGTTGCAccGG > ACGUU >proteinseq detailed description with new lines pQqqqPPQQQ scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_blanks_end_of_file000066400000000000000000000002301464262511300255200ustar00rootroot00000000000000>seq1 desc1 ACGT-acgt. >_____seq__2_ A > desc3 AACGGuA > ACGTTGCAccGG > ACGUU >proteinseq detailed description with new lines pQqqqPPQQQ scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_description_newline_replacement_empty_str000066400000000000000000000001131464262511300324660ustar00rootroot00000000000000>proteinseq detaileddescription with newlines pQqqqPPQQQ >foo AGGAGAATA scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_description_newline_replacement_multi_char000066400000000000000000000001551464262511300325750ustar00rootroot00000000000000>proteinseq :-)detailed:-)description with new:-):-)lines:-):-):-) pQqqqPPQQQ >foo :-):-):-):-) AGGAGAATA scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_description_newline_replacement_none000066400000000000000000000001271464262511300314040ustar00rootroot00000000000000>proteinseq detailed description with new lines pQqqqPPQQQ >foo AGGAGAATA scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_id_whitespace_replacement_empty_str000066400000000000000000000000211464262511300312300ustar00rootroot00000000000000>seq2 A > a b UA scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_id_whitespace_replacement_multi_char000066400000000000000000000000651464262511300313410ustar00rootroot00000000000000>>:o>:o>:o>:o>:oseq>:o>:o2>:o A >>:o>:o>:o>:o a b UA scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_id_whitespace_replacement_none000066400000000000000000000000351464262511300301460ustar00rootroot00000000000000> seq 2 A > a b UA scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_invalid_after_10_seqs000066400000000000000000000002721464262511300261050ustar00rootroot00000000000000>seq1 desc1 ACGT >seq1 desc1 ACGT >seq1 desc1 A C GT >seq1 desc1 ACGT >seq1 desc1 ACGT >seq1 desc1 ACGT >seq1 desc1 ACGT >seq1 desc1 ACGT >seq1 desc1 ACGT >seq1 desc1 AC G T >seq1 desc1 scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_invalid_blank_line_after_header000066400000000000000000000000641464262511300302370ustar00rootroot00000000000000>seq1 desc1 ACGT >seq2 desc2 AAAAA >seq3 desc3 CCC scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_invalid_blank_line_within_sequence000066400000000000000000000001001464262511300310070ustar00rootroot00000000000000>seq1 desc1 ACGT >seq2 desc2 AAA AA >seq3 desc3 CCC scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_invalid_blank_sequence000066400000000000000000000000561464262511300264300ustar00rootroot00000000000000>seq1 desc1 ACGT >seq2 desc2 >seq3 desc3 CCC scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_invalid_legacy_format000066400000000000000000000000501464262511300262570ustar00rootroot00000000000000; legacy-seq-id legacy description ACGT scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_invalid_missing_header000066400000000000000000000000201464262511300264210ustar00rootroot00000000000000seq1 desc1 ACGT scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_invalid_missing_seq_data_first000066400000000000000000000000561464262511300301720ustar00rootroot00000000000000>seq1 desc1 >seq2 desc2 AAAAA >seq3 desc3 CCC scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_invalid_missing_seq_data_last000066400000000000000000000000571464262511300300070ustar00rootroot00000000000000>seq1 desc1 ACGT >seq2 desc2 AAAAA >seq3 desc3 scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_invalid_missing_seq_data_middle000066400000000000000000000000551464262511300303000ustar00rootroot00000000000000>seq1 desc1 ACGT >seq2 desc2 >seq3 desc3 CCC scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_invalid_whitespace_line_after_header000066400000000000000000000000771464262511300313100ustar00rootroot00000000000000>seq1 desc1 ACGT >seq2 desc2 AAAAA >seq3 desc3 CCC scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_invalid_whitespace_only_line_within_sequence000066400000000000000000000001051464262511300331220ustar00rootroot00000000000000>seq1 desc1 ACGT >seq2 desc2 AAAAA AAAAA >seq3 desc3 CCC scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_invalid_whitespace_only_sequence000066400000000000000000000000711464262511300305330ustar00rootroot00000000000000>seq1 desc1 ACGT >seq2 desc2 >seq3 desc3 CCC scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_max_width_1000066400000000000000000000000401464262511300241400ustar00rootroot00000000000000>seq1 desc1 A C G T - a c g t . scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_max_width_5000066400000000000000000000002261464262511300241520ustar00rootroot00000000000000>seq1 desc1 ACGT- acgt. >_____seq__2_ A > desc3 AACGG uA > ACGTT GCAcc GG > ACGUU >proteinseq detailed description with new lines pQqqq PPQQQ scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_mixed_qual_scores000066400000000000000000000000631464262511300254470ustar00rootroot00000000000000>seq1 desc1 ACGT-acgt. >da,dadadada 10 hours AAAAT scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_multi_seq000066400000000000000000000002211464262511300237370ustar00rootroot00000000000000>seq1 desc1 ACGT-acgt. >_____seq__2_ A > desc3 AACGGuA > ACGTTGCAccGG > ACGUU >proteinseq detailed description with new lines pQqqqPPQQQ scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_multi_seq_roundtrip000066400000000000000000000001621464262511300260510ustar00rootroot00000000000000>seq-a a's description ACATAGGTA >seq-b b's description TAGATAGATAGA >seq-c c's description CATCATCATCATCATCATCAT scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_prot_seqs_odd_labels000066400000000000000000000001201464262511300261220ustar00rootroot00000000000000> DEFQ fp > skbio S K B I scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_single_bio_seq_defaults000066400000000000000000000000221464262511300266050ustar00rootroot00000000000000>f_o_o b a r ACgt scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_single_bio_seq_non_defaults000066400000000000000000000000251464262511300274620ustar00rootroot00000000000000>f-o-o b_a_r A C g t scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_single_dna_seq_defaults000066400000000000000000000000221464262511300265760ustar00rootroot00000000000000>f_o_o b a r TAcg scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_single_dna_seq_non_defaults000066400000000000000000000000251464262511300274530ustar00rootroot00000000000000>f-o-o b_a_r T A c g scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_single_prot_seq_defaults000066400000000000000000000000211464262511300270170ustar00rootroot00000000000000>f_o_o b a r PqQ scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_single_prot_seq_non_defaults000066400000000000000000000000231464262511300276730ustar00rootroot00000000000000>f-o-o b_a_r P q Q scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_single_rna_seq_defaults000066400000000000000000000000221464262511300266140ustar00rootroot00000000000000>f_o_o b a r uaCG scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_single_rna_seq_non_defaults000066400000000000000000000000251464262511300274710ustar00rootroot00000000000000>f-o-o b_a_r u a C G scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_single_seq000066400000000000000000000000271464262511300240720ustar00rootroot00000000000000>seq1 desc1 ACGT-acgt. scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_tabular_msa_different_type000066400000000000000000000001101464262511300273130ustar00rootroot00000000000000> aUG >rnaseq-1 rnaseq desc 1 AuC >rnaseq-2 rnaseq desc 2 AUg scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_ws_lines_between_records000066400000000000000000000004251464262511300270200ustar00rootroot00000000000000>seq1 desc1 ACGT-acgt. >_____seq__2_ A > desc3 AACGGuA > ACGTTGCAccGG > ACGUU >proteinseq detailed description with new lines pQqqqPPQQQ scikit-bio-0.6.2/skbio/io/format/tests/data/fasta_ws_lines_end_of_file000066400000000000000000000003511464262511300260750ustar00rootroot00000000000000>seq1 desc1 ACGT-acgt. >_____seq__2_ A > desc3 AACGGuA > ACGTTGCAccGG > ACGUU >proteinseq detailed description with new lines pQqqqPPQQQ scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_5_blanks_start_of_file000066400000000000000000000001341464262511300263560ustar00rootroot00000000000000 @foo bar baz AACCGG + 123456 @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_5_ws_lines_start_of_file000066400000000000000000000002231464262511300267260ustar00rootroot00000000000000 @foo bar baz AACCGG + 123456 @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_blank_lines000066400000000000000000000001321464262511300242370ustar00rootroot00000000000000@foo bar baz AACCGG + 123456 @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_invalid_blank_after_header000066400000000000000000000001301464262511300272420ustar00rootroot00000000000000@foo bar baz AACCGG + 123456 @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_invalid_blank_after_plus000066400000000000000000000001301464262511300267750ustar00rootroot00000000000000@foo bar baz AACCGG + 123456 @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_invalid_blank_after_seq000066400000000000000000000001301464262511300266020ustar00rootroot00000000000000@foo bar baz AACCGG + 123456 @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_invalid_blank_in_seq_at_symbol000066400000000000000000000001311464262511300301610ustar00rootroot00000000000000@foo bar baz AACCGG + MNO @QR @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_invalid_blank_within_qual000066400000000000000000000001311464262511300271560ustar00rootroot00000000000000@foo bar baz AACCGG + MNO PQR @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_invalid_blank_within_seq000066400000000000000000000001311464262511300270040ustar00rootroot00000000000000@foo bar baz AAC CGG + 123456 @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_invalid_missing_header000066400000000000000000000000271464262511300264500ustar00rootroot00000000000000seq1 desc1 ACGT + 1234 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_invalid_missing_seq_data000066400000000000000000000001471464262511300270040ustar00rootroot00000000000000@seq-1 first sequence ACG T + [[[[ @seq-2 second sequence T GCAC + [[[[[ @seq-3 third sequence + [[[[[ scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_invalid_ws_line_after_header000066400000000000000000000001431464262511300276170ustar00rootroot00000000000000@foo bar baz AACCGG + 123456 @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_invalid_ws_line_after_plus000066400000000000000000000001431464262511300273520ustar00rootroot00000000000000@foo bar baz AACCGG + 123456 @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_invalid_ws_line_after_seq000066400000000000000000000001431464262511300271570ustar00rootroot00000000000000@foo bar baz AACCGG + 123456 @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_invalid_ws_line_within_qual000066400000000000000000000001441464262511300275330ustar00rootroot00000000000000@foo bar baz AACCGG + MNO PQR @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_invalid_ws_line_within_seq000066400000000000000000000001441464262511300273610ustar00rootroot00000000000000@foo bar baz AAC CGG + 123456 @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_multi_blank_between_records000066400000000000000000000001371464262511300275160ustar00rootroot00000000000000@foo bar baz AACCGG + 123456 @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_multi_blank_end_of_file000066400000000000000000000001361464262511300265740ustar00rootroot00000000000000@foo bar baz AACCGG + 123456 @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_multi_blank_start_of_file000066400000000000000000000001351464262511300271620ustar00rootroot00000000000000 @foo bar baz AACCGG + 123456 @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_multi_seq_sanger000066400000000000000000000001271464262511300253230ustar00rootroot00000000000000@foo bar baz AACCGG + 123456 @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_multi_whitespace_stripping000066400000000000000000000005131464262511300274260ustar00rootroot00000000000000 @foo bar baz AACCGG + 123456 @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_multi_ws_lines_between_records000066400000000000000000000002371464262511300302530ustar00rootroot00000000000000@foo bar baz AACCGG + 123456 @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_multi_ws_lines_end_of_file000066400000000000000000000002671464262511300273350ustar00rootroot00000000000000@foo bar baz AACCGG + 123456 @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_multi_ws_lines_start_of_file000066400000000000000000000002371464262511300277210ustar00rootroot00000000000000 @foo bar baz AACCGG + 123456 @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_single_seq_illumina1.3000066400000000000000000000000411464262511300262220ustar00rootroot00000000000000@ bar baz a C G T + a b c d scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_single_seq_illumina1.8000066400000000000000000000001661464262511300262370ustar00rootroot00000000000000@EAS139:136:FC706VJ:2:2104:15343:197393 1:Y:18:ATCACG CCGCCGGTCATCCAATTCATTGCGAAAGGT + n%OiP]XgBh]u:9jO|KN4Op~7Ry;-&. scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_whitespace_only_lines000066400000000000000000000001731464262511300263520ustar00rootroot00000000000000@foo bar baz AACCGG + 123456 @bar baz foo TTGGCC + 876543 @baz foo bar GATTTC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_wrapping_as_illumina_no_description000066400000000000000000000015651464262511300312740ustar00rootroot00000000000000@SRR014849.50939 GAAATTTCAGGGCCACCTTTTTTTTGATAGAATAATGGAGAAAATTAAAAGCTGTACATATACCAATGAACAATAAATCAATACATAAAAAAGGAGAAGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGG + Zb^Ld`N\[d`NaZ[aZc]UOKHDA[\YT[_W[aZ\aZ[Zd`SF_WeaUI[Y\[[\\\[\Z\aY`X[[aZ\aZ\d`OY[aY[[\[[e`WPJC^UZ[`X\[R]T_V_W[`[Ga\I`\H[[Q^TVa\Ia\Ic^LY\S @SRR014849.110027 CTTCAAATGATTCCGGGACTGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTTCGGTTCCAACTCGCCGTCCGAATAATCCGTTCAAAATCTTGGCCTGTCAAAACGACTTTACGACCAGAACGATCCG + \aYY_[FY\T`X^Vd`OY\[[^U_V[R^T[_ZDc^La\HYYO\S[c^Ld`Nc_QAZaZaYaY`XZZ\[aZZ[aZ[aZ[aZY`Z[`ZWeaVJ\[aZaY`X[PY\eaUG[\[[d`OXTUZ[Q\\`W\\\Y_W\ @SRR014849.203935 AACCCGTCCCATCAAAGATTTTGGTTGGAACCCGAAAGGGTTTTGAATTCAAACCCCTTTCGGTTCCAACTATTCAATTGTTTAACTTTTTTTAAATTGATGGTCTGTTGGACCATTTGTAATAATCCCCATCGGAATTTCTTT + `Z_ZDVT^YB[[Xd`PZ\d`RDaZaZ`ZaZ_ZDXd`Pd`Pd`RD[aZ`ZWd`Oc_RCd`P\aZ`ZaZaZY\YaZYaY`XYd`O`X[e`WPJEAc^LaZS[YYN[Z\Y`XWLT^U\b]JW[[RZ\SYc`RD[Z\WLXM`\HYa\I scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_wrapping_as_sanger_no_description000066400000000000000000000015651464262511300307410ustar00rootroot00000000000000@SRR014849.50939 GAAATTTCAGGGCCACCTTTTTTTTGATAGAATAATGGAGAAAATTAAAAGCTGTACATATACCAATGAACAATAAATCAATACATAAAAAAGGAGAAGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGG + ;C?-EA/=60,)%"<=:5<@85@7@8+8<<3;=4:DA3%<;=8-9.A=):B=* scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_wrapping_original_sanger_no_description000066400000000000000000000016021464262511300321320ustar00rootroot00000000000000@SRR014849.50939 GAAATTTCAGGGCCACCTTTTTTTTGATAGAATAATGGAGAAAATTAAAAGCTGTACATATACCAATGAACAATAAATCAATACATAAAAAAGGAGAAGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGG + ;C?-EA/=60,)%"<=:5< @85@7@8+8< <3;=4:DA3%<;=8-9.A=):B=* scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_writer_illumina1.3_defaults000066400000000000000000000001361464262511300273010ustar00rootroot00000000000000@f_o__o bar baz AaCcGg + PQRSTU @bar baz foo TtGgCc + WVUTSR @ba___z foo bar gAtTtC + TUVWXR scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_writer_sanger_defaults000066400000000000000000000001361464262511300265240ustar00rootroot00000000000000@f_o__o bar baz AaCcGg + 123456 @bar baz foo TtGgCc + 876543 @ba___z foo bar gAtTtC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/fastq_writer_sanger_non_defaults000066400000000000000000000001361464262511300273760ustar00rootroot00000000000000@f%o%%o bar^^baz AaCcGg + 123456 @bar baz foo TtGgCc + 876543 @ba%%%z foo bar gAtTtC + 567893 scikit-bio-0.6.2/skbio/io/format/tests/data/genbank_5_blanks_start_of_file000066400000000000000000000001251464262511300266450ustar00rootroot00000000000000 LOCUS NC_000932 154478 bp DNA circular PLN 15-APR-2009 scikit-bio-0.6.2/skbio/io/format/tests/data/genbank_6_blanks_start_of_file000066400000000000000000000001261464262511300266470ustar00rootroot00000000000000 LOCUS NC_000932 154478 bp DNA circular PLN 15-APR-2009 scikit-bio-0.6.2/skbio/io/format/tests/data/genbank_missing_locus_name000066400000000000000000000001071464262511300261250ustar00rootroot00000000000000LOCUS 154478 bp DNA circular PLN 15-APR-2009 scikit-bio-0.6.2/skbio/io/format/tests/data/genbank_multi_records000066400000000000000000000030761464262511300251320ustar00rootroot00000000000000LOCUS AAB29917 9 aa linear BCT 23-SEP-1994 DEFINITION L-carnitine amidase {N-terminal} ACCESSION AAB29917 VERSION AAB29917.1 GI:545426 DBSOURCE accession AAB29917.1 KEYWORDS . SOURCE Bacteria ORGANISM Bacteria Unclassified. REFERENCE 1 (residues 1 to 9) AUTHORS Joeres,U. and Kula,M.R. TITLE a microbial L-carnitine amidase JOURNAL AMB 40 (5), 606-610 (1994) PUBMED 7764422 REMARK from the original journal article. REFERENCE 1 (residues 1 to 9) AUTHORS Joeres,U. and Kula,M.R. TITLE a microbial L-carnitine amidase JOURNAL AMB 40 (5), 606-610 (1994) PUBMED 7764422 COMMENT Method: direct peptide sequencing. FEATURES Location/Qualifiers source 1..9 /organism="Bacteria" Protein 1..>9 /product="L-carnitine amidase" ORIGIN 1 gsreildfk // LOCUS HQ018078 9 bp DNA linear ENV 29-AUG-2010 DEFINITION Uncultured Xylanimonas sp.16S, partial ACCESSION HQ018078 VERSION HQ018078.1 GI:304421728 KEYWORDS ENV. SOURCE uncultured Xylanimonas sp. ORGANISM uncultured Xylanimonas sp. Bacteria; Actinobacteria; Micrococcales; Promicromonosporaceae; Xylanimonas; environmental samples. FEATURES Location/Qualifiers source 1..9 /country="Brazil: Parana, Paranavai" /environmental_sample rRNA complement(<2..>8) /product="16S ribosomal RNA" ORIGIN 1 catgcaggc // scikit-bio-0.6.2/skbio/io/format/tests/data/genbank_single_record000066400000000000000000000020761464262511300250750ustar00rootroot00000000000000LOCUS ECOALKP 63 bp mRNA linear BCT 26-APR-1993 DEFINITION alkaline phosphatase signal mRNA, 5' end. ACCESSION M14399 VERSION M14399.1 KEYWORDS alkaline phosphatase; signal peptide. SOURCE Escherichia coli ORGANISM Escherichia coli Bacteria; Proteobacteria; Gammaproteobacteria; Enterobacteriales; Enterobacteriaceae; Escherichia. COMMENT Original source text: E.coli, cDNA to mRNA. FEATURES Location/Qualifiers source 1..63 /db_xref="taxon:562" /mol_type="mRNA" /organism="Escherichia coli" CDS 1..>63 /db_xref="taxon:562" /db_xref="taxon:561" /note="alkaline phosphatase signal peptide" /codon_start=1 /protein_id="AAA23431.1" /transl_table=11 /translation="MKQSTIALAVLPLLFTPVTKA" ORIGIN 1 gtgaaacaaa gcactattgc actggctgtc ttaccgttac tgtttacccc tgtgacaaaa 61 gcc // scikit-bio-0.6.2/skbio/io/format/tests/data/genbank_single_record_lower000066400000000000000000000001321464262511300262740ustar00rootroot00000000000000LOCUS AAB29917 9 aa linear BCT 23-SEP-1994 ORIGIN 1 gsreildfk // scikit-bio-0.6.2/skbio/io/format/tests/data/genbank_single_record_upper000066400000000000000000000001321464262511300262770ustar00rootroot00000000000000LOCUS AAB29917 9 aa linear BCT 23-SEP-1994 ORIGIN 1 GSREILDFK // scikit-bio-0.6.2/skbio/io/format/tests/data/genbank_w_beginning_whitespace000066400000000000000000000001221464262511300267460ustar00rootroot00000000000000 LOCUS NC_000932 154478 bp DNA circular PLN 15-APR-2009 scikit-bio-0.6.2/skbio/io/format/tests/data/gff3_bad_missing_directive000066400000000000000000000003161464262511300260060ustar00rootroot00000000000000##sequence-region ctg123 1 1497228 ctg123 . gene 1000 9000 . + . ID=gene00001;Name=EDEN ctg123 . TF_binding_site 1000 1012 . + . Parent=gene00001 ctg123 . mRNA 1050 9000 . + . ID=mRNA00001;Parent=gene00001 scikit-bio-0.6.2/skbio/io/format/tests/data/gff3_bad_wrong_columns000066400000000000000000000007131464262511300251740ustar00rootroot00000000000000##gff-version 3 ##sequence-region Chromosome 1 4641652 #!genome-build European Nucleotide Archive ASM584v2 #!genome-version ASM584v2 #!genome-date 2014-08 #!genome-build-accession GCA_000005845.2 #!genebuild-last-updated 2014-08 Chromosome chromosome 1 4641652 . . . ID=chromosome:Chromosome;Alias=U00096.3;Is_circular=true ### Chromosome biological_region 148 148 . + . external_name=Promoter thrLp (RegulonDB:ECK120010236);logic_name=regulondb_promoter scikit-bio-0.6.2/skbio/io/format/tests/data/gff3_dna000066400000000000000000000001521464262511300222310ustar00rootroot00000000000000##gff-version 3 NC_1 Prodigal_v2.60 gene 1 9 . + 0 ID=gene1;Name=FXR ##FASTA >NC_1 species X ATGCATGCATGC scikit-bio-0.6.2/skbio/io/format/tests/data/gff3_multi_record000066400000000000000000000015611464262511300241640ustar00rootroot00000000000000##gff-version 3 ##sequence-region Chromosome 1 4641652 #!genome-build European Nucleotide Archive ASM584v2 #!genome-version ASM584v2 #!genome-date 2014-08 #!genome-build-accession GCA_000005845.2 #!genebuild-last-updated 2014-08 Chromosome European Nucleotide Archive chromosome 1 4641652 . . . Alias=U00096.3;ID=chromosome:Chromosome;Is_circular=true ### Chromosome regulondb_feature biological_region 148 148 . + . external_name=Promoter thrLp (RegulonDB:ECK120010236);logic_name=regulondb_promoter gi|556503834|ref|NC_000913.3| Prodigal_v2.60 gene 337 2799 1.8 + 0 ID=1_1;gc_cont=0.427 gi|556503834|ref|NC_000913.3| Prodigal_v2.60 CDS 337 2799 333.8 + 0 ID=1_2;Parent=1_1;rbs_motif=GGAG/GAGG;rbs_spacer=5-10bp NC 7 Prodigal_v2.60 gene 1 100 1.8 + 0 ID=1_1;gene=FXR receptor NC 7 Prodigal_v2.60 exon 1 50 1.8 + 0 Parent=1_1 NC 7 Prodigal_v2.60 exon 56 100 1.8 + 0 Parent=1_1 scikit-bio-0.6.2/skbio/io/format/tests/data/gff3_single_record000066400000000000000000000007711464262511300243150ustar00rootroot00000000000000##gff-version 3 ##sequence-region Chromosome 1 4641652 #!genome-build European Nucleotide Archive ASM584v2 #!genome-version ASM584v2 #!genome-date 2014-08 #!genome-build-accession GCA_000005845.2 #!genebuild-last-updated 2014-08 Chromosome European Nucleotide Archive chromosome 1 4641652 . . . Alias=U00096.3;ID=chromosome:Chromosome;Is_circular=true ### Chromosome regulondb_feature biological_region 148 148 . + . external_name=Promoter thrLp (RegulonDB:ECK120010236);logic_name=regulondb_promoter scikit-bio-0.6.2/skbio/io/format/tests/data/illumina_full_range_as_illumina.fastq000066400000000000000000000006521464262511300302710ustar00rootroot00000000000000@FAKE0005 Original version has PHRED scores from 0 to 62 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG + @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ @FAKE0006 Original version has PHRED scores from 62 to 0 inclusive (in that order) GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA + ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@ scikit-bio-0.6.2/skbio/io/format/tests/data/illumina_full_range_as_sanger.fastq000066400000000000000000000006521464262511300277360ustar00rootroot00000000000000@FAKE0005 Original version has PHRED scores from 0 to 62 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_ @FAKE0006 Original version has PHRED scores from 62 to 0 inclusive (in that order) GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA + _^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,+*)('&%$#"! scikit-bio-0.6.2/skbio/io/format/tests/data/illumina_full_range_original_illumina.fastq000066400000000000000000000006521464262511300314720ustar00rootroot00000000000000@FAKE0005 Original version has PHRED scores from 0 to 62 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACG + @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ @FAKE0006 Original version has PHRED scores from 62 to 0 inclusive (in that order) GCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA + ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@ scikit-bio-0.6.2/skbio/io/format/tests/data/legacy9_and_blast7_default000066400000000000000000000007711464262511300257300ustar00rootroot00000000000000# BLASTP 2.2.31+ # Query: query2 # Subject: subject2 # Fields: query id, subject id, % identity, alignment length, mismatches, gap opens, q. start, q. end, s. start, s. end, evalue, bit score # 1 hits found query2 subject2 100.00 8 0 1 0 9 3 10 2e-05 9.8 # BLASTN 2.2.3 [May-13-2002] # Database: other_vertebrate # Query: query2 # Fields: Query id,Subject id,% identity,alignment length,mismatches,gap opens,q. start,q. end,s. start,s. end,evalue,bit score query2 subject1 70.00 9 1 0 1 8 4 9 0.025 11.7 scikit-bio-0.6.2/skbio/io/format/tests/data/legacy9_invalid_differing_fields000066400000000000000000000006241464262511300271740ustar00rootroot00000000000000# BLASTP 2.2.31+ # Query: query1 # Subject: subject1 # 0 hits found # BLASTP 2.2.31+ # Query: query1 # Subject: subject3 # 0 hits found # BLASTP 2.2.31+ # Query: query2 # Subject: subject1 # Fields: query id, subject id, q. start # 1 hits found query1 subject1 4 # BLASTP 2.2.3 [May-13-2002] # Database: other_vertebrate # Subject: subject1 # Fields: Query id,Subject id,subject ids query2 subject1 N/A scikit-bio-0.6.2/skbio/io/format/tests/data/legacy9_invalid_too_many_columns000066400000000000000000000004061464262511300272740ustar00rootroot00000000000000# BLASTN 2.2.3 [May-13-2002] # Database: other_vertebrate # Query: AF178033 # Fields: Query id,Subject id,% identity,alignment length,mismatches,gap openings,q. start,q. end,s. start,s. end,e-value,bit score query1 subject1 80.00 7 2 0 0 9 4 9 0.023 14.4 0.145 scikit-bio-0.6.2/skbio/io/format/tests/data/legacy9_mixed_nans000066400000000000000000000004501464262511300243250ustar00rootroot00000000000000# BLASTN 2.2.3 [May-13-2002] # Database: other_vertebrate # Query: query2 # Fields: Query id,Subject id,% identity,alignment length,mismatches,gap opens,q. start,q. end,s. start,s. end,evalue,bit score N/A subject1 N/A 7 1 0 N/A 8 4 10 N/A 15.5 query2 subject1 90.00 8 N/A 0 0 8 N/A 9 1e-05 N/A scikit-bio-0.6.2/skbio/io/format/tests/data/legacy9_multi_line000066400000000000000000000005311464262511300243410ustar00rootroot00000000000000# BLASTN 2.2.3 [May-13-2002] # Database: other_vertebrate # Query: query1 # Fields: Query id,Subject id,% identity,alignment length,mismatches,gap opens,q. start,q. end,s. start,s. end,evalue,bit score query1 subject1 90.00 7 1 0 0 8 4 10 1e-05 15.5 query1 subject1 70.00 8 0 1 0 9 5 7 0.231 7.8 query1 subject1 90.00 5 1 1 0 0 2 10 0.022 13.0 scikit-bio-0.6.2/skbio/io/format/tests/data/legacy9_single_line000066400000000000000000000003731464262511300244740ustar00rootroot00000000000000# BLASTN 2.2.3 [May-13-2002] # Database: other_vertebrate # Query: query1 # Fields: Query id,Subject id,% identity,alignment length,mismatches,gap opens,q. start,q. end,s. start,s. end,evalue,bit score query1 subject1 90.00 7 1 0 0 8 4 10 1e-05 15.5 scikit-bio-0.6.2/skbio/io/format/tests/data/longreads_as_illumina.fastq000066400000000000000000000203041464262511300262330ustar00rootroot00000000000000@FSRRS4401BE7HA [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95] tcagTTAAGATGGGATAATATCCTCAGATTGCGTGATGAACTTTGTTCTGGTGGAGGAGAAGGAAGTGCATTCGACGTATGCCCGTTTGTCGATATTTGtatttaaagtaatccgtcacaaatcagtgacataaatattatttagatttcgggagcaactttatttattccacaagcaggtttaaattttaaatttaaattattgcagaagactttaaattaacctcgttgtcggagtcatttgttcggttattggtcgaaagtaaccncgggaagtgccgaaaactaacaaacaaaagaagatagtgaaattttaattaaaanaaatagccaaacgtaactaactaaaacggacccgtcgaggaactgccaacggacgacacagggagtagnnn + eeeccccccc`UUU^UWWeegffhhhhhhhhhhhhhhhhhhggghhhhhhhhhfgfeeeee\\\\ceeeeeeeeeeeeeec^^^YRPOSNVU\YTMMMSMRKKKRUUNNNNS[`aa```\bbeccccccccYUUUbceeee\[`a`\ZYRRRPPP[\\\XXZaWWXeeeeeeccacaccc\WWSSQRPMMKKKLKKKKKKKKPPRRMMLLLPVPPPKKKKKQQTTTPRPPQPMLLMKRRRPPKMKKRLLKKMKKLLKRTPPPQRMMLL@KKKKLLKLLLLXKKKKW\KKLKKKLKKKKLLLQUYXYTLMMPKKKKPPNNKKKK@KKPXPVLLKKKKLRMKLLKKPVKKKKLLLJPPPPRMOROOOOKKKOSSSOOORUZXUUUQMNNZV][Z@@@ @FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74] tcagCCAGCAATTCCGACTTAATTGTTCTTCTTCCATCATTCATCTCGACTAACAGTTCTACGATTAATGAGTTTGGCtttaatttgttgttcattattgtcacaattacactactgagactgccaaggcacncagggataggnn + eeeeeeeeecccceeeefecccca`````\[SSSS__a\TTTYaaaaa__^WYW[^[WXWXW[WSSSQZ\\RKKKTPSKKKPPKKKMKKQPVVVTTTTPRKMMLLPPPTVTWMNNRSSWW][[ZZZZXXSSN@NSKKKTVWTT@@ @FSRRS4401B64ST [length=382] [gc=40.58] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=346] tcagTTTTCTTAAATTACTTGAATCTGTTGAAGTGGATGTCCACTTTTGTATGCCAAATATGCCCAGCGTATACGATCTTGGCCACATCTCCACATAATCATCAGTCGGATGCAAAAAGCGATTAAACTAAAAATGAATGCGTTTTTAGATGAGTAAATAGGTAATACTTTGTTTAAATAATAAATGTCACAAACAGAACGCGGATTACAGTACCTGAAAATAGTTGTACTGTATCTGTGCCGGCACTTCCTCGGCCCTGAGAAGTTGTCCCGTTGTTTCCATTCGCACCATCCAATGGCCAAAGTTTGCGAAGAATCTGTTCCGTTCCATTACCAATTGTTTTTCCATGctgagactgccaaggcacacaggggataggnn + hhhhbbbbh^^UUUhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhUUUUh`hhhhh^^^hhhhbbbhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhUURRRdhbdYYRRW\NLLLLKW\]]^^YQLNNNNV]bddhdhggghhhhhhhhhdZZXXPPPXXa^^^habghhhhhhggghhggghhhhhhhhhhhhhhhhhhaabbhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhfffhhhhhhhhhc^\\\chhhggghhhhhhhhhggghhhhhhhhhhggghggghhhhhhhhhhhhhhhhhhhhhh^]ZXXWW\\TLLLLM__`dfhhhhhhhhhgg^^^^dhhheeXXXZdhhaa@@ @FSRRS4401EJ0YH [length=381] [gc=48.29] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=343] tcagTTTTTGGAGAATTCCGTCAGGGACGGCATGGCATATTTGTGGGTTCGGCACGGCGTCCTGGCCAAGAAGAAGAAGACGAATTAGCCCGTTAATTTAATGACACCTTCCCCAATTTTGCAGCAATGATTGGTTCATTCTTGGCGGTGCGTTTTTGTGCTTCGTCGAATTGTTGGCCATTTTGGTCCACCGGCCATCATCTTTACGCTATCCGACTGATTGGAAATCACCGCCTAGCATTTTGCCGAAGATTGTTGCGTTGTACGGCCATGTGCTGATTGTTTACATTGGCATTCTTGGCAATTTGTCCTTGGTCGGCTTTGACGGCAAATTTGCGGTGTTAAGTctgagactgccaaggcacacagggggatagggnn + hhhh^^^^^hhhhhhhhhhhhhhggghhhhhhhhhhhhhggghhggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggghhhhhggghhhhhhhhhhh````hh]]]]hhhhhhhhhhhhhhhhhhhhhhhhhhddhddZRRRRRcVhhhhhhhhhhhhhhhhhhhhhbb__gghhhhhhhhhhhhhhhhggghhhhhhhhhhhhhhhhhhhggghhhhhhhhhhhhhaaaahgbcbghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggghhhggbbchhhhhhggghhbbbg\bbhhhhhhhhhfffhhhhhhgggggghhhhhhhhhhhhhhhggghhggd^^]]^dedd^NNNNNZYWOLL@@ @FSRRS4401BK0IB [length=507] [gc=49.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=208] tcagTTGACCGGCGTTGTGTAACAATAATTCATTATTCTGAGACGATGCCAATGTAATCGACGGTTTATGCCCAATTATTCCCATCTATGCTTAACTGATCAAATACTATTTGCATTACGTCACGAAATTGCGCGAACACCGCCGGCCGACAATAATTTATACCGGACATACCGGAGTTGATGGTAATCGGTAAAGAGTTTTATTTAATTATntattatcnctattaattattgttancaacaatgtgcacgctntgccgcccgccgccgccgtgtcggtaggaccccggacggacccggacccggttcgggtacccgttttcgggttcccggaaccgtttttcgggtacccggttttttcggggggccccccggtaaaaaaccggggaaccccctaaaacgggtaaacgtaccgtaagggaccccctaaacgggggccccgaaaaaccgggacccaaaccggggggaaacggttaaaggggggggaagtaggngnnnnnnnnnnnn + eee`__eeeeeeeeeeggaYYY_aeeeeffghghhhhhhhhhhhhhhhhhhhhhhheeeeeeeee^\a`_PPPWWOPP[[WWZ^``accb^^^cc````c`UUUc^ccc\\\\\``]^]][[[\[PPPWW[[^^^``^XTTT\`aaa__^\]]^__PPPSQYYcc`^^^ceeeeeeeeeeeeea````[[OOOOMQQ\NNNNWKLLPPPPPP@QRLLNQS@RVYUUUU[ZWQQNMMS@SUTQPPVVTTRMLMQR@QRPPQPPPQKKLKKQPP\\TLLLLLLKPQKKKKKKLKKKLPKKKKLKKPTTLLKKKKPRPPPMKKKKKKKKJJPPPMMPPMMPKKKKKKKKKJRKKKKKLLQQLLLLLNNLLLLTTNNIINLLQQLLIIKKKKIIKKKKKKMPMKIKKKKIIIKKKKKKKKKKKKKKKKKKKKKKKHKKLKKKKKKHKKKKKIINNMHKKKNNNKKKKKKKKKKKMHHRRLLLKKKKKKKKKKGOKKK@M@@@@@@@@@@@@ @FSRRS4401ARCCB [length=258] [gc=46.90] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=193] tcagTTATTGCAGTCGTTCCGCGCCATCGCCGGTAACCGTCCGCGTGTTATTCTGTGTATCGGCCAACCTTCGTATAACTTCGTATAATGTATGCTATACGAAGTTATTACGATCTATACCGGCGAAACTCAGCCGAAAGGTCTCGCGGTAGAGCCTATGAGCTGCCCGACCGATGCATTTAAATTTCCGGGGATCGtcgctgatctgagactgccaaaggcacactagggggataggnnnnnnnnnnnnnnnnnnnn + eee[WYY_ceeeeeeeffecb`````a__OOOOSU[ZUURQQRUUVUQQSRRSW[[\^^SSSTYY]`a```_[[\\a\YTTTYaac^^\acccceeebbbbbbbeebccceeeeeca``\\WWWWOOOS][[[XXUWWZWWX[WWX^aaaa`\^^^ccaaa__^^WWWWXLLLQRVVVPKKKKKKKKLLPPTQ[[OOPTW`_][[[[[SRQVVVPPKKKLLRV\\\VTKLLLLRSUUU@@@@@@@@@@@@@@@@@@@@ @FSRRS4401CM938 [length=453] [gc=44.15] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=418] tcagGTTTTAAATCGCTTTCCAAGGAATTTGAGTCTAAATCCGGTGGATCCCATCAGTACAAATGCGGCGACAAGGCCGTGAAAACACTGCTTAATTCTTTGCACTTTTTGGCCACCTTTTTGGAAATGTTGTTTTGTGTTCTCAAAATTTTCCATCTCAGAACAAACATTCCATCGGGCTGATGTTGTGGCTTTTGGCGCGCGAAGTGCTGCTACTGCGCGGCAAAATCAGTCGCCAGACCGGTTTTGTTGTGGACGACAAAGTGATCATGCCTGACTTGTACTTCTACCGCGATCCGCAAGCGCGAATTGGTCACATAGTTATAGAATTTTTGAGCCTTTTTCTTGACATAAAAAGTGTGGTTTTAAAAATTTCCTGGCAGGACCCACGCCAACGTTCAGGAATAATATCTTTTAAAAAGctgagactgccaaggcacacaggggataggn + hhhhhbb]]UUUhhhhbbbhhhhhhhhggghhhhhfUUUhhhhhhhhhhggghhhhhhhhbbbhhhhhhhhhhhhhhhhhh____hhhhhhhhhhhhhggghhhh^^^\ZhhddhYYNNNNNVTSSY^haaVQQSSdWddbdab\_______gghhhhhhhhhhaaahhhhhhhhhggghhhhhhhhhhhhhbbbbhhhhhhhhhhhhhhhhhhhhhhhhhhhhUUUUcdhhgda^^c_VVVVVQQQQcWXddhhhhhhhggghhhhhhhhggghhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhggghhhhhhhhhhhhhhh\\^\\hhhhh^^^\ZhURcccWQLLKKKRW\\YYLLLLKKLLLJJJRROUUZ_URWOOOWNYWWX[Yafhhhhhhhhhed[^eTTOOLLLLLTYZZZY]^_b[[VXXXdddddd____ddddd@ @FSRRS4401EQLIK [length=411] [gc=34.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=374] tcagTTTAATTTGGTGCTTCCTTTCAATTCCTTAGTTTAAACTTGGCACTGAAGTCTCGCATTTATAACTAGAGCCCGGATTTTAGAGGCTAAAAAGTTTTCCAGATTTCAAAATTTATTTCGAAACTATTTTTCTGATTGTGATGTGACGGATTTCTAAATTAAATCGAAATGATGTGTATTGAACTTAACAAGTGATTTTTATCAGATTTTGTCAATGAATAAATTTTAATTTAAATCTCTTTCTAACACTTTCATGATTAAAATCTAACAAAGCGCGACCAGTATGTGAGAAGAGCAAAAACAACAAAAAGTGCTAGCACTAAAGAAGGTTCGAACCCAACACATAACGTAAGAGTTACCGGGAAGAAAACCACTctgagactgccaaggcacacagggggataggnn + hhh^UUU^^ggghhhhhhhhhfffhhhhhhhhhhhfffggghhhhhhhhhhhhhhhhhhhhfffhhhhhhhhhhggghhh____hhhhdhdPPPPOOLLLLQQ^\WLLLYLLLLLLLKKKKRRLLLTYRKLLLLYPaadddghhhhhhhhhhha^^`PQQOOOMMMY^\OQSfhhhhhhhhhhhhhhhhhhdbbgga\NNLKKQP^^[TLOOQ\Ueaa^YX[\PPNSSSSNNLNNVV^^fdhddgh`bbhhhggghhhhhhhbbb`hhhgggggghhhhhhhhhhhhhhhhhhhhhhddPNNLLWQQZLLLLMVVV_dhhhhhh^^^hhhhhhhhhhhggghhhhhhhhhhhhhhhhhhhhXXSQQVVVTTTT`dZhdddddhhhhh^^XVTT]_\\YRKKKKKRRRRU@@ @FSRRS4401AOV6A [length=309] [gc=22.98] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=273] tcagTTTTCAAATTTTCCGAAATTTGCTGTTTGGTAGAAGGCAAATTATTTGATTGAATTTTGTATTTATTTAAAACAATTTATTTTAAAATAATAATTTTCCATTGACTTTTTACATTTAATTGATTTTATTATGCATTTTATATTTGTTTTCTAAATATTCGTTTGCAAACTCACGTTGAAATTGTATTAAACTCGAAATTAGAGTTTTTGAAATTAATTTTTATGTAGCATAATATTTTAAACATATTGGAATTTTATAAAACATTATATTTTTctgagactgccaaggcacacagggggataggn + hhhhbbbbhZZZbbbbhhh^^^ggghhhhggghhhhhhhhhhggghhhggghhhhhhh____hehbbbhb``ZZZZdc^a__cUUSSTTTT[[[fhh]]``hhhhhhhhZZZYYhhh^^^bbbhhhZZZZheehhhhhbbbahahddcbSSSS^Saaad^dhhhbgghhZZZghhhhhhggZZZgghhhhhZZZhhhhggghhhhhh]]^^]hddaffYYPPPPNSUeaeaa^\Z\`^XVVVPPPXYd```ccacVVVV\NPPPPQQc`__aUWZZZhWgghhhhhZZZZ^]hdbbbaNNNNNZVST\@ @FSRRS4401EG0ZW [length=424] [gc=23.82] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=389] tcagTTTTGATCTTTTAATAATGAATTTTAATGTGTTAAAATGATTGCATTGATGGCATAACCGCATTTAAATTAATTACATGAAGTGTAAGTATGAAATTTTCCTTTCCAAATTGCAAAAACTAAAATTTAAAATTTATCGTAAAAATTAACATATATTTTAAACGATTTTAAGAAACATTTGTAAATTATATTTTTGTGAAGCGTTCAAACAAAAATAAACAATAAAATATTTTTCTATTTAATAGCAAAACATTTGACGATGAAAAGGAAAATGCGGGTTTGAAAATGGGCTTTGCCATGCTATTTTCATAATAACATATTTTTATTATGAATAATAAATTTACATACAATATATACAGTCTTAAATTTATTCATAATATTTTTGAGAATctgagactgccaaggcacacaggggataggn + hh`XSSSTddhh\\\]hhhhhhhhhbbbbhhghhhbbZZZZhhhhhhhhhhhhhhhhhhhhhhhhheZZUUUcchhhhhhhhhhhhhhhhhhhddXSSSQQSS__UUUbb[[acc`\LLLLLQ[KKKKUTXNNOO\\\WbhhhZ]]\\ggZZhhhhhhbb__^^^hhh____hb^UUUghccbh^a^^bb[ddPPPPPaSaccbaZ\_aVVV]NNNNL\RQR^SQRKKKN\PKKKKLYSdZ^^dhhhhhbbbbh]ZZZhhhhhhh[[__^\NNNNV\`XXXWW[[SSTThdddhhhhhhhhhhhhh[XXXghhhhhhhhhhh^^^^^hhhhhhhhhhhb`bZTTTRXdhhhhhhhhhhhhhhhhggXXXgggh`\`ddee_\MMMMM`c___ccddddehhhZZZXVVeebbb_QSSSX^ecc@ scikit-bio-0.6.2/skbio/io/format/tests/data/longreads_as_sanger.fastq000066400000000000000000000203041464262511300257000ustar00rootroot00000000000000@FSRRS4401BE7HA [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95] tcagTTAAGATGGGATAATATCCTCAGATTGCGTGATGAACTTTGTTCTGGTGGAGGAGAAGGAAGTGCATTCGACGTATGCCCGTTTGTCGATATTTGtatttaaagtaatccgtcacaaatcagtgacataaatattatttagatttcgggagcaactttatttattccacaagcaggtttaaattttaaatttaaattattgcagaagactttaaattaacctcgttgtcggagtcatttgttcggttattggtcgaaagtaaccncgggaagtgccgaaaactaacaaacaaaagaagatagtgaaattttaattaaaanaaatagccaaacgtaactaactaaaacggacccgtcgaggaactgccaacggacgacacagggagtagnnn + FFFDDDDDDDA666?688FFHGGIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIGHGFFFFF====DFFFFFFFFFFFFFFD???:3104/76=:5...4.3,,,366////4<;!!! @FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74] tcagCCAGCAATTCCGACTTAATTGTTCTTCTTCCATCATTCATCTCGACTAACAGTTCTACGATTAATGAGTTTGGCtttaatttgttgttcattattgtcacaattacactactgagactgccaaggcacncagggataggnn + FFFFFFFFFDDDDFFFFGFDDDDBAAAAA=<4444@@B=555:BBBBB@@?8:8<<;;;;9944/!/4,,,57855!! @FSRRS4401B64ST [length=382] [gc=40.58] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=346] tcagTTTTCTTAAATTACTTGAATCTGTTGAAGTGGATGTCCACTTTTGTATGCCAAATATGCCCAGCGTATACGATCTTGGCCACATCTCCACATAATCATCAGTCGGATGCAAAAAGCGATTAAACTAAAAATGAATGCGTTTTTAGATGAGTAAATAGGTAATACTTTGTTTAAATAATAAATGTCACAAACAGAACGCGGATTACAGTACCTGAAAATAGTTGTACTGTATCTGTGCCGGCACTTCCTCGGCCCTGAGAAGTTGTCCCGTTGTTTCCATTCGCACCATCCAATGGCCAAAGTTTGCGAAGAATCTGTTCCGTTCCATTACCAATTGTTTTTCCATGctgagactgccaaggcacacaggggataggnn + IIIICCCCI??666IIIIIIIIIIIIIIIIIIIIIIIIIIIIII6666IAIIIII???IIIICCCIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII66333EICE::338=/----,8=>>??:2-////7>CEEIEIHHHIIIIIIIIIE;;9911199B???IBCHIIIIIIHHHIIHHHIIIIIIIIIIIIIIIIIIBBCCIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIGGGIIIIIIIIID?===DIIIHHHIIIIIIIIIHHHIIIIIIIIIIHHHIHHHIIIIIIIIIIIIIIIIIIIIII?>;9988==5----.@@AEGIIIIIIIIIHH????EIIIFF999;EIIBB!! @FSRRS4401EJ0YH [length=381] [gc=48.29] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=343] tcagTTTTTGGAGAATTCCGTCAGGGACGGCATGGCATATTTGTGGGTTCGGCACGGCGTCCTGGCCAAGAAGAAGAAGACGAATTAGCCCGTTAATTTAATGACACCTTCCCCAATTTTGCAGCAATGATTGGTTCATTCTTGGCGGTGCGTTTTTGTGCTTCGTCGAATTGTTGGCCATTTTGGTCCACCGGCCATCATCTTTACGCTATCCGACTGATTGGAAATCACCGCCTAGCATTTTGCCGAAGATTGTTGCGTTGTACGGCCATGTGCTGATTGTTTACATTGGCATTCTTGGCAATTTGTCCTTGGTCGGCTTTGACGGCAAATTTGCGGTGTTAAGTctgagactgccaaggcacacagggggatagggnn + IIII?????IIIIIIIIIIIIIIHHHIIIIIIIIIIIIIHHHIIHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHHHIIIIIHHHIIIIIIIIIIIAAAAII>>>>IIIIIIIIIIIIIIIIIIIIIIIIIIEEIEE;33333D7IIIIIIIIIIIIIIIIIIIIICC@@HHIIIIIIIIIIIIIIIIHHHIIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIIIIIBBBBIHCDCHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHHHIIIHHCCDIIIIIIHHHIICCCH=CCIIIIIIIIIGGGIIIIIIHHHHHHIIIIIIIIIIIIIIIHHHIIHHE??>>?EFEE?/////;:80--!! @FSRRS4401BK0IB [length=507] [gc=49.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=208] tcagTTGACCGGCGTTGTGTAACAATAATTCATTATTCTGAGACGATGCCAATGTAATCGACGGTTTATGCCCAATTATTCCCATCTATGCTTAACTGATCAAATACTATTTGCATTACGTCACGAAATTGCGCGAACACCGCCGGCCGACAATAATTTATACCGGACATACCGGAGTTGATGGTAATCGGTAAAGAGTTTTATTTAATTATntattatcnctattaattattgttancaacaatgtgcacgctntgccgcccgccgccgccgtgtcggtaggaccccggacggacccggacccggttcgggtacccgttttcgggttcccggaaccgtttttcgggtacccggttttttcggggggccccccggtaaaaaaccggggaaccccctaaaacgggtaaacgtaccgtaagggaccccctaaacgggggccccgaaaaaccgggacccaaaccggggggaaacggttaaaggggggggaagtaggngnnnnnnnnnnnn + FFFA@@FFFFFFFFFFHHB:::@BFFFFGGHIHIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFF?=BA@11188011<<88;?AABDDC???DDAAAADA666D?DDD=====AA>?>><<<=<11188<>?@@11142::DDA???DFFFFFFFFFFFFFBAAAA<<0000.22=////8,--111111!23--/24!37:6666<;822/..4!46521177553.-.23!231121112,,-,,211==5------,12,,,,,,-,,,-1,,,,-,,155--,,,,13111.,,,,,,,,++111..11..1,,,,,,,,,+3,,,,,--22-----//----55//**/--22--**,,,,**,,,,,,.1.,*,,,,***,,,,,,,,,,,,,,,,,,,,,,,),,-,,,,,,),,,,,**//.),,,///,,,,,,,,,,,.))33---,,,,,,,,,,(0,,,!.!!!!!!!!!!!! @FSRRS4401ARCCB [length=258] [gc=46.90] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=193] tcagTTATTGCAGTCGTTCCGCGCCATCGCCGGTAACCGTCCGCGTGTTATTCTGTGTATCGGCCAACCTTCGTATAACTTCGTATAATGTATGCTATACGAAGTTATTACGATCTATACCGGCGAAACTCAGCCGAAAGGTCTCGCGGTAGAGCCTATGAGCTGCCCGACCGATGCATTTAAATTTCCGGGGATCGtcgctgatctgagactgccaaaggcacactagggggataggnnnnnnnnnnnnnnnnnnnn + FFF<8::@DFFFFFFFGGFDCAAAAAB@@000046<;66322366762243348<<=??4445::>ABAAA@<<==B=:555:BBD??=BDDDDFFFCCCCCCCFFCDDDFFFFFDBAA==88880004><<<99688;889<889?BBBBA=???DDBBB@@??88889---237771,,,,,,,,--1152<<00158A@><<<<<43277711,,,--37===75,----34666!!!!!!!!!!!!!!!!!!!! @FSRRS4401CM938 [length=453] [gc=44.15] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=418] tcagGTTTTAAATCGCTTTCCAAGGAATTTGAGTCTAAATCCGGTGGATCCCATCAGTACAAATGCGGCGACAAGGCCGTGAAAACACTGCTTAATTCTTTGCACTTTTTGGCCACCTTTTTGGAAATGTTGTTTTGTGTTCTCAAAATTTTCCATCTCAGAACAAACATTCCATCGGGCTGATGTTGTGGCTTTTGGCGCGCGAAGTGCTGCTACTGCGCGGCAAAATCAGTCGCCAGACCGGTTTTGTTGTGGACGACAAAGTGATCATGCCTGACTTGTACTTCTACCGCGATCCGCAAGCGCGAATTGGTCACATAGTTATAGAATTTTTGAGCCTTTTTCTTGACATAAAAAGTGTGGTTTTAAAAATTTCCTGGCAGGACCCACGCCAACGTTCAGGAATAATATCTTTTAAAAAGctgagactgccaaggcacacaggggataggn + IIIIICC>>666IIIICCCIIIIIIIIHHHIIIIIG666IIIIIIIIIIHHHIIIIIIIICCCIIIIIIIIIIIIIIIIII@@@@IIIIIIIIIIIIIHHHIIII???=;IIEEI:://///7544:?IBB72244E8EECEBC=@@@@@@@HHIIIIIIIIIIBBBIIIIIIIIIHHHIIIIIIIIIIIIICCCCIIIIIIIIIIIIIIIIIIIIIIIIIIII6666DEIIHEB??D@777772222D89EEIIIIIIIHHHIIIIIIIIHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIIIIIII==?==IIIII???=;I63DDD82--,,,38==::----,,---+++33066;@6380008/:889<:BGIIIIIIIIIFE?@C<<7999EEEEEE@@@@EEEEE! @FSRRS4401EQLIK [length=411] [gc=34.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=374] tcagTTTAATTTGGTGCTTCCTTTCAATTCCTTAGTTTAAACTTGGCACTGAAGTCTCGCATTTATAACTAGAGCCCGGATTTTAGAGGCTAAAAAGTTTTCCAGATTTCAAAATTTATTTCGAAACTATTTTTCTGATTGTGATGTGACGGATTTCTAAATTAAATCGAAATGATGTGTATTGAACTTAACAAGTGATTTTTATCAGATTTTGTCAATGAATAAATTTTAATTTAAATCTCTTTCTAACACTTTCATGATTAAAATCTAACAAAGCGCGACCAGTATGTGAGAAGAGCAAAAACAACAAAAAGTGCTAGCACTAAAGAAGGTTCGAACCCAACACATAACGTAAGAGTTACCGGGAAGAAAACCACTctgagactgccaaggcacacagggggataggnn + III?666??HHHIIIIIIIIIGGGIIIIIIIIIIIGGGHHHIIIIIIIIIIIIIIIIIIIIGGGIIIIIIIIIIHHHIII@@@@IIIIEIE111100----22?=8---:-------,,,,33---5:3,----:1BBEEEHIIIIIIIIIIIB??A122000...:?=024GIIIIIIIIIIIIIIIIIIECCHHB=//-,,21??<5-002=6FBB?:9<=11/4444//-//77??GEIEEHIACCIIIHHHIIIIIIICCCAIIIHHHHHHIIIIIIIIIIIIIIIIIIIIIIEE1//--822;----.777@EIIIIII???IIIIIIIIIIIHHHIIIIIIIIIIIIIIIIIIII994227775555AE;IEEEEEIIIII??9755>@==:3,,,,,33336!! @FSRRS4401AOV6A [length=309] [gc=22.98] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=273] tcagTTTTCAAATTTTCCGAAATTTGCTGTTTGGTAGAAGGCAAATTATTTGATTGAATTTTGTATTTATTTAAAACAATTTATTTTAAAATAATAATTTTCCATTGACTTTTTACATTTAATTGATTTTATTATGCATTTTATATTTGTTTTCTAAATATTCGTTTGCAAACTCACGTTGAAATTGTATTAAACTCGAAATTAGAGTTTTTGAAATTAATTTTTATGTAGCATAATATTTTAAACATATTGGAATTTTATAAAACATTATATTTTTctgagactgccaaggcacacagggggataggn + IIIICCCCI;;;CCCCIII???HHHIIIIHHHIIIIIIIIIIHHHIIIHHHIIIIIII@@@@IFICCCICAA;;;;ED?B@@D66445555<<>AAIIIIIIII;;;::III???CCCIII;;;;IFFIIIIICCCBIBIEEDC4444?4BBBE?EIIICHHII;;;HIIIIIIHH;;;HHIIIII;;;IIIIHHHIIIIII>>??>IEEBGG::1111/46FBFBB?=;=A?97771119:EAAADDBD7777=/111122DA@@B68;;;I8HHIIIII;;;;?>IECCCB/////;745=! @FSRRS4401EG0ZW [length=424] [gc=23.82] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=389] tcagTTTTGATCTTTTAATAATGAATTTTAATGTGTTAAAATGATTGCATTGATGGCATAACCGCATTTAAATTAATTACATGAAGTGTAAGTATGAAATTTTCCTTTCCAAATTGCAAAAACTAAAATTTAAAATTTATCGTAAAAATTAACATATATTTTAAACGATTTTAAGAAACATTTGTAAATTATATTTTTGTGAAGCGTTCAAACAAAAATAAACAATAAAATATTTTTCTATTTAATAGCAAAACATTTGACGATGAAAAGGAAAATGCGGGTTTGAAAATGGGCTTTGCCATGCTATTTTCATAATAACATATTTTTATTATGAATAATAAATTTACATACAATATATACAGTCTTAAATTTATTCATAATATTTTTGAGAATctgagactgccaaggcacacaggggataggn + IIA94445EEII===>IIIIIIIIICCCCIIHIIICC;;;;IIIIIIIIIIIIIIIIIIIIIIIIIF;;666DDIIIIIIIIIIIIIIIIIIIEE94442244@@666CC<>==HH;;IIIIIICC@@???III@@@@IC?666HIDDCI?B??CC////-=323?423,,,/=1,,,,-:4E;??EIIIIICCCCI>;;;IIIIIII<<@@?=////7=A99988<<4455IEEEIIIIIIIIIIIII<999HIIIIIIIIIII?????IIIIIIIIIIICAC;55539EIIIIIIIIIIIIIIIIHH999HHHIA=AEEFF@=.....AD@@@DDEEEEFIII;;;977FFCCC@24449?FDD! scikit-bio-0.6.2/skbio/io/format/tests/data/longreads_original_sanger.fastq000066400000000000000000000223721464262511300271100ustar00rootroot00000000000000@FSRRS4401BE7HA [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95] tcagTTAAGATGGGATAATATCCTCAGATTGCGTGATGAACTTTGTTCTGGTGGAGGAGAAGGAAGTGCATTCGACGTAT GCCCGTTTGTCGATATTTGtatttaaagtaatccgtcacaaatcagtgacataaatattatttagatttcgggagcaact ttatttattccacaagcaggtttaaattttaaatttaaattattgcagaagactttaaattaacctcgttgtcggagtca tttgttcggttattggtcgaaagtaaccncgggaagtgccgaaaactaacaaacaaaagaagatagtgaaattttaatta aaanaaatagccaaacgtaactaactaaaacggacccgtcgaggaactgccaacggacgacacagggagtagnnn +FSRRS4401BE7HA [length=395] [gc=36.46] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=95] FFFDDDDDDDA666?688FFHGGIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIGHGFFFFF====DFFFFFFFFFFFFFF D???:3104/76=:5...4.3,,,366////4<;!!! @FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74] tcagCCAGCAATTCCGACTTAATTGTTCTTCTTCCATCATTCATCTCGACTAACAGTTCTACGATTAATGAGTTTGGCtt taatttgttgttcattattgtcacaattacactactgagactgccaaggcacncagggataggnn +FSRRS4401BRRTC [length=145] [gc=38.62] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=74] FFFFFFFFFDDDDFFFFGFDDDDBAAAAA=<4444@@B=555:BBBBB@@?8:8<<;;;;9944/!/4,,,57855!! @FSRRS4401B64ST [length=382] [gc=40.58] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=346] tcagTTTTCTTAAATTACTTGAATCTGTTGAAGTGGATGTCCACTTTTGTATGCCAAATATGCCCAGCGTATACGATCTT GGCCACATCTCCACATAATCATCAGTCGGATGCAAAAAGCGATTAAACTAAAAATGAATGCGTTTTTAGATGAGTAAATA GGTAATACTTTGTTTAAATAATAAATGTCACAAACAGAACGCGGATTACAGTACCTGAAAATAGTTGTACTGTATCTGTG CCGGCACTTCCTCGGCCCTGAGAAGTTGTCCCGTTGTTTCCATTCGCACCATCCAATGGCCAAAGTTTGCGAAGAATCTG TTCCGTTCCATTACCAATTGTTTTTCCATGctgagactgccaaggcacacaggggataggnn +FSRRS4401B64ST [length=382] [gc=40.58] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=346] IIIICCCCI??666IIIIIIIIIIIIIIIIIIIIIIIIIIIIII6666IAIIIII???IIIICCCIIIIIIIIIIIIIII IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII66333EICE::338=/----,8=>>??:2-////7>CEEIEIHHHII IIIIIIIE;;9911199B???IBCHIIIIIIHHHIIHHHIIIIIIIIIIIIIIIIIIBBCCIIIIIIIIIIIIIIIIIII IIIIIIIIIIIIIIIGGGIIIIIIIIID?===DIIIHHHIIIIIIIIIHHHIIIIIIIIIIHHHIHHHIIIIIIIIIIII IIIIIIIIII?>;9988==5----.@@AEGIIIIIIIIIHH????EIIIFF999;EIIBB!! @FSRRS4401EJ0YH [length=381] [gc=48.29] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=343] tcagTTTTTGGAGAATTCCGTCAGGGACGGCATGGCATATTTGTGGGTTCGGCACGGCGTCCTGGCCAAGAAGAAGAAGA CGAATTAGCCCGTTAATTTAATGACACCTTCCCCAATTTTGCAGCAATGATTGGTTCATTCTTGGCGGTGCGTTTTTGTG CTTCGTCGAATTGTTGGCCATTTTGGTCCACCGGCCATCATCTTTACGCTATCCGACTGATTGGAAATCACCGCCTAGCA TTTTGCCGAAGATTGTTGCGTTGTACGGCCATGTGCTGATTGTTTACATTGGCATTCTTGGCAATTTGTCCTTGGTCGGC TTTGACGGCAAATTTGCGGTGTTAAGTctgagactgccaaggcacacagggggatagggnn +FSRRS4401EJ0YH [length=381] [gc=48.29] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=343] IIII?????IIIIIIIIIIIIIIHHHIIIIIIIIIIIIIHHHIIHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII IIIIIIIIHHHIIIIIHHHIIIIIIIIIIIAAAAII>>>>IIIIIIIIIIIIIIIIIIIIIIIIIIEEIEE;33333D7I IIIIIIIIIIIIIIIIIIIICC@@HHIIIIIIIIIIIIIIIIHHHIIIIIIIIIIIIIIIIIIIHHHIIIIIIIIIIIII BBBBIHCDCHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHHHIIIHHCCDIIIIIIHHHIICCCH=CCIIIIIIIII GGGIIIIIIHHHHHHIIIIIIIIIIIIIIIHHHIIHHE??>>?EFEE?/////;:80--!! @FSRRS4401BK0IB [length=507] [gc=49.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=208] tcagTTGACCGGCGTTGTGTAACAATAATTCATTATTCTGAGACGATGCCAATGTAATCGACGGTTTATGCCCAATTATT CCCATCTATGCTTAACTGATCAAATACTATTTGCATTACGTCACGAAATTGCGCGAACACCGCCGGCCGACAATAATTTA TACCGGACATACCGGAGTTGATGGTAATCGGTAAAGAGTTTTATTTAATTATntattatcnctattaattattgttanca acaatgtgcacgctntgccgcccgccgccgccgtgtcggtaggaccccggacggacccggacccggttcgggtacccgtt ttcgggttcccggaaccgtttttcgggtacccggttttttcggggggccccccggtaaaaaaccggggaaccccctaaaa cgggtaaacgtaccgtaagggaccccctaaacgggggccccgaaaaaccgggacccaaaccggggggaaacggttaaagg ggggggaagtaggngnnnnnnnnnnnn +FSRRS4401BK0IB [length=507] [gc=49.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=208] FFFA@@FFFFFFFFFFHHB:::@BFFFFGGHIHIIIIIIIIIIIIIIIIIIIIIIIFFFFFFFFF?=BA@11188011<< 88;?AABDDC???DDAAAADA666D?DDD=====AA>?>><<<=<11188<>?@@1114 2::DDA???DFFFFFFFFFFFFFBAAAA<<0000.22=////8,--111111!23--/24!37:6666<;822/..4!46 521177553.-.23!231121112,,-,,211==5------,12,,,,,,-,,,-1,,,,-,,155--,,,,13111.,, ,,,,,,++111..11..1,,,,,,,,,+3,,,,,--22-----//----55//**/--22--**,,,,**,,,,,,.1., *,,,,***,,,,,,,,,,,,,,,,,,,,,,,),,-,,,,,,),,,,,**//.),,,///,,,,,,,,,,,.))33---,, ,,,,,,,,(0,,,!.!!!!!!!!!!!! @FSRRS4401ARCCB [length=258] [gc=46.90] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=193] tcagTTATTGCAGTCGTTCCGCGCCATCGCCGGTAACCGTCCGCGTGTTATTCTGTGTATCGGCCAACCTTCGTATAACT TCGTATAATGTATGCTATACGAAGTTATTACGATCTATACCGGCGAAACTCAGCCGAAAGGTCTCGCGGTAGAGCCTATG AGCTGCCCGACCGATGCATTTAAATTTCCGGGGATCGtcgctgatctgagactgccaaaggcacactagggggataggnn nnnnnnnnnnnnnnnnnn +FSRRS4401ARCCB [length=258] [gc=46.90] [flows=800] [phred_min=0] [phred_max=38] [trimmed_length=193] FFF<8::@DFFFFFFFGGFDCAAAAAB@@000046<;66322366762243348<<=??4445::>ABAAA@<<==B=:5 55:BBD??=BDDDDFFFCCCCCCCFFCDDDFFFFFDBAA==88880004><<<99688;889<889?BBBBA=???DDBB B@@??88889---237771,,,,,,,,--1152<<00158A@><<<<<43277711,,,--37===75,----34666!! !!!!!!!!!!!!!!!!!! @FSRRS4401CM938 [length=453] [gc=44.15] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=418] tcagGTTTTAAATCGCTTTCCAAGGAATTTGAGTCTAAATCCGGTGGATCCCATCAGTACAAATGCGGCGACAAGGCCGT GAAAACACTGCTTAATTCTTTGCACTTTTTGGCCACCTTTTTGGAAATGTTGTTTTGTGTTCTCAAAATTTTCCATCTCA GAACAAACATTCCATCGGGCTGATGTTGTGGCTTTTGGCGCGCGAAGTGCTGCTACTGCGCGGCAAAATCAGTCGCCAGA CCGGTTTTGTTGTGGACGACAAAGTGATCATGCCTGACTTGTACTTCTACCGCGATCCGCAAGCGCGAATTGGTCACATA GTTATAGAATTTTTGAGCCTTTTTCTTGACATAAAAAGTGTGGTTTTAAAAATTTCCTGGCAGGACCCACGCCAACGTTC AGGAATAATATCTTTTAAAAAGctgagactgccaaggcacacaggggataggn +FSRRS4401CM938 [length=453] [gc=44.15] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=418] IIIIICC>>666IIIICCCIIIIIIIIHHHIIIIIG666IIIIIIIIIIHHHIIIIIIIICCCIIIIIIIIIIIIIIIII I@@@@IIIIIIIIIIIIIHHHIIII???=;IIEEI:://///7544:?IBB72244E8EECEBC=@@@@@@@HHIIIIII IIIIBBBIIIIIIIIIHHHIIIIIIIIIIIIICCCCIIIIIIIIIIIIIIIIIIIIIIIIIIII6666DEIIHEB??D@7 77772222D89EEIIIIIIIHHHIIIIIIIIHHHIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHHHIIIIII IIIIIIIII==?==IIIII???=;I63DDD82--,,,38==::----,,---+++33066;@6380008/:889<:BGII IIIIIIIFE?@C<<7999EEEEEE@@@@EEEEE! @FSRRS4401EQLIK [length=411] [gc=34.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=374] tcagTTTAATTTGGTGCTTCCTTTCAATTCCTTAGTTTAAACTTGGCACTGAAGTCTCGCATTTATAACTAGAGCCCGGA TTTTAGAGGCTAAAAAGTTTTCCAGATTTCAAAATTTATTTCGAAACTATTTTTCTGATTGTGATGTGACGGATTTCTAA ATTAAATCGAAATGATGTGTATTGAACTTAACAAGTGATTTTTATCAGATTTTGTCAATGAATAAATTTTAATTTAAATC TCTTTCTAACACTTTCATGATTAAAATCTAACAAAGCGCGACCAGTATGTGAGAAGAGCAAAAACAACAAAAAGTGCTAG CACTAAAGAAGGTTCGAACCCAACACATAACGTAAGAGTTACCGGGAAGAAAACCACTctgagactgccaaggcacacag ggggataggnn +FSRRS4401EQLIK [length=411] [gc=34.31] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=374] III?666??HHHIIIIIIIIIGGGIIIIIIIIIIIGGGHHHIIIIIIIIIIIIIIIIIIIIGGGIIIIIIIIIIHHHIII @@@@IIIIEIE111100----22?=8---:-------,,,,33---5:3,----:1BBEEEHIIIIIIIIIIIB??A122 000...:?=024GIIIIIIIIIIIIIIIIIIECCHHB=//-,,21??<5-002=6FBB?:9<=11/4444//-//77??G EIEEHIACCIIIHHHIIIIIIICCCAIIIHHHHHHIIIIIIIIIIIIIIIIIIIIIIEE1//--822;----.777@EII IIII???IIIIIIIIIIIHHHIIIIIIIIIIIIIIIIIIII994227775555AE;IEEEEEIIIII??9755>@==:3, ,,,,33336!! @FSRRS4401AOV6A [length=309] [gc=22.98] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=273] tcagTTTTCAAATTTTCCGAAATTTGCTGTTTGGTAGAAGGCAAATTATTTGATTGAATTTTGTATTTATTTAAAACAAT TTATTTTAAAATAATAATTTTCCATTGACTTTTTACATTTAATTGATTTTATTATGCATTTTATATTTGTTTTCTAAATA TTCGTTTGCAAACTCACGTTGAAATTGTATTAAACTCGAAATTAGAGTTTTTGAAATTAATTTTTATGTAGCATAATATT TTAAACATATTGGAATTTTATAAAACATTATATTTTTctgagactgccaaggcacacagggggataggn +FSRRS4401AOV6A [length=309] [gc=22.98] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=273] IIIICCCCI;;;CCCCIII???HHHIIIIHHHIIIIIIIIIIHHHIIIHHHIIIIIII@@@@IFICCCICAA;;;;ED?B @@D66445555<<>AAIIIIIIII;;;::III???CCCIII;;;;IFFIIIIICCCBIBIEEDC4444?4BBBE? EIIICHHII;;;HIIIIIIHH;;;HHIIIII;;;IIIIHHHIIIIII>>??>IEEBGG::1111/46FBFBB?=;=A?97 771119:EAAADDBD7777=/111122DA@@B68;;;I8HHIIIII;;;;?>IECCCB/////;745=! @FSRRS4401EG0ZW [length=424] [gc=23.82] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=389] tcagTTTTGATCTTTTAATAATGAATTTTAATGTGTTAAAATGATTGCATTGATGGCATAACCGCATTTAAATTAATTAC ATGAAGTGTAAGTATGAAATTTTCCTTTCCAAATTGCAAAAACTAAAATTTAAAATTTATCGTAAAAATTAACATATATT TTAAACGATTTTAAGAAACATTTGTAAATTATATTTTTGTGAAGCGTTCAAACAAAAATAAACAATAAAATATTTTTCTA TTTAATAGCAAAACATTTGACGATGAAAAGGAAAATGCGGGTTTGAAAATGGGCTTTGCCATGCTATTTTCATAATAACA TATTTTTATTATGAATAATAAATTTACATACAATATATACAGTCTTAAATTTATTCATAATATTTTTGAGAATctgagac tgccaaggcacacaggggataggn +FSRRS4401EG0ZW [length=424] [gc=23.82] [flows=800] [phred_min=0] [phred_max=40] [trimmed_length=389] IIA94445EEII===>IIIIIIIIICCCCIIHIIICC;;;;IIIIIIIIIIIIIIIIIIIIIIIIIF;;666DDIIIIII IIIIIIIIIIIIIEE94442244@@666CC<>==HH;;IIIIIICC @@???III@@@@IC?666HIDDCI?B??CC////-=323?423,,,/=1,,,,-:4E ;??EIIIIICCCCI>;;;IIIIIII<<@@?=////7=A99988<<4455IEEEIIIIIIIIIIIII<999HIIIIIIIII II?????IIIIIIIIIIICAC;55539EIIIIIIIIIIIIIIIIHH999HHHIA=AEEFF@=.....AD@@@DDEEEEFI II;;;977FFCCC@24449?FDD! scikit-bio-0.6.2/skbio/io/format/tests/data/misc_dna_as_illumina.fastq000066400000000000000000000013771464262511300260430ustar00rootroot00000000000000@FAKE0007 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTA + @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh @FAKE0008 Original version has mixed case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) gTcatAGcgTcatAGcgTcatAGcgTcatAGcgTcatAGcg + @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh @FAKE0009 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) tcagtcagtcagtcagtcagtcagtcagtcagtcagtcagt + @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh @FAKE0010 Original version has mixed case ambiguous DNA and PHRED scores of 40, 30, 20, 10 (cycled) gatcrywsmkhbvdnGATCRYWSMKHBVDN + h^TJh^TJh^TJh^TJh^TJh^TJh^TJh^ scikit-bio-0.6.2/skbio/io/format/tests/data/misc_dna_as_sanger.fastq000066400000000000000000000013771464262511300255100ustar00rootroot00000000000000@FAKE0007 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTA + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0008 Original version has mixed case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) gTcatAGcgTcatAGcgTcatAGcgTcatAGcgTcatAGcg + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0009 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) tcagtcagtcagtcagtcagtcagtcagtcagtcagtcagt + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0010 Original version has mixed case ambiguous DNA and PHRED scores of 40, 30, 20, 10 (cycled) gatcrywsmkhbvdnGATCRYWSMKHBVDN + I?5+I?5+I?5+I?5+I?5+I?5+I?5+I? scikit-bio-0.6.2/skbio/io/format/tests/data/misc_dna_original_sanger.fastq000066400000000000000000000013771464262511300267110ustar00rootroot00000000000000@FAKE0007 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTA + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0008 Original version has mixed case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) gTcatAGcgTcatAGcgTcatAGcgTcatAGcgTcatAGcg + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0009 Original version has lower case unambiguous DNA with PHRED scores from 0 to 40 inclusive (in that order) tcagtcagtcagtcagtcagtcagtcagtcagtcagtcagt + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0010 Original version has mixed case ambiguous DNA and PHRED scores of 40, 30, 20, 10 (cycled) gatcrywsmkhbvdnGATCRYWSMKHBVDN + I?5+I?5+I?5+I?5+I?5+I?5+I?5+I? scikit-bio-0.6.2/skbio/io/format/tests/data/misc_rna_as_illumina.fastq000066400000000000000000000014061464262511300260520ustar00rootroot00000000000000@FAKE0011 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) ACGUACGUACGUACGUACGUACGUACGUACGUACGUACGUA + @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh @FAKE0012 Original version has mixed case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) gUcauAGcgUcauAGcgUcauAGcgUcauAGcgUcauAGcg + @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh @FAKE0013 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) ucagucagucagucagucagucagucagucagucagucagu + @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefgh @FAKE0014 Original version has mixed case ambiguous RNA with PHRED scores from 35 to 40 inclusive (cycled) gaucrywsmkhbvdnGAUCRYWSMKHBVDN + cdefghcdefghcdefghcdefghcdefgh scikit-bio-0.6.2/skbio/io/format/tests/data/misc_rna_as_sanger.fastq000066400000000000000000000014061464262511300255170ustar00rootroot00000000000000@FAKE0011 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) ACGUACGUACGUACGUACGUACGUACGUACGUACGUACGUA + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0012 Original version has mixed case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) gUcauAGcgUcauAGcgUcauAGcgUcauAGcgUcauAGcg + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0013 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) ucagucagucagucagucagucagucagucagucagucagu + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0014 Original version has mixed case ambiguous RNA with PHRED scores from 35 to 40 inclusive (cycled) gaucrywsmkhbvdnGAUCRYWSMKHBVDN + DEFGHIDEFGHIDEFGHIDEFGHIDEFGHI scikit-bio-0.6.2/skbio/io/format/tests/data/misc_rna_original_sanger.fastq000066400000000000000000000014061464262511300267200ustar00rootroot00000000000000@FAKE0011 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) ACGUACGUACGUACGUACGUACGUACGUACGUACGUACGUA + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0012 Original version has mixed case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) gUcauAGcgUcauAGcgUcauAGcgUcauAGcgUcauAGcg + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0013 Original version has lower case unambiguous RNA with PHRED scores from 0 to 40 inclusive (in that order) ucagucagucagucagucagucagucagucagucagucagu + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHI @FAKE0014 Original version has mixed case ambiguous RNA with PHRED scores from 35 to 40 inclusive (cycled) gaucrywsmkhbvdnGAUCRYWSMKHBVDN + DEFGHIDEFGHIDEFGHIDEFGHIDEFGHI scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_L&L_CA_data_scores000066400000000000000000000005371464262511300263060ustar00rootroot00000000000000Eigvals 2 0.0961330159181 0.0409418140138 Proportion explained 0 Species 3 2 Species1 0.408869425742 0.0695518116298 Species2 -0.1153860437 -0.299767683538 Species3 -0.309967102571 0.187391917117 Site 3 2 Site1 -0.848956053187 0.882764759014 Site2 -0.220458650578 -1.34482000302 Site3 1.66697179591 0.470324389808 Biplot 0 0 Site constraints 0 0 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_PCoA_sample_data_3_scores000066400000000000000000000030601464262511300277250ustar00rootroot00000000000000Eigvals 9 0.512367260461 0.300719094427 0.267912066004 0.208988681078 0.19169895326 0.16054234528 0.15017695712 0.122457748167 0.0 Proportion explained 9 0.267573832777 0.15704469605 0.139911863774 0.109140272454 0.100111048503 0.0838401161912 0.0784269939011 0.0639511763509 0.0 Species 0 0 Site 9 9 PC.636 -0.258465461183 0.173999546883 0.0382875792552 -0.19447750562 0.0831176020844 0.262430333201 -0.0231636392235 -0.0184794039581 0.0 PC.635 -0.271001135391 -0.0185951319063 -0.0864841926349 0.118064245315 -0.198808358437 -0.0211723599535 -0.191024027565 0.155646592377 0.0 PC.356 0.235077898175 0.0962519254489 -0.345792726714 -0.00320862577619 -0.0963777675519 0.0457025386953 0.185472813286 0.0404093971793 0.0 PC.481 0.0261407664325 -0.0111459676533 0.147660603015 0.29087660853 0.203945472801 0.0619712384758 0.101641328709 0.105690998719 0.0 PC.354 0.285007552283 -0.0192549888483 0.0623263375385 0.138126799852 -0.104798602423 0.0951720730628 -0.129636097542 -0.220687170372 0.0 PC.593 0.204636326241 -0.139361150932 0.291513819623 -0.181566786821 -0.159580132715 -0.0246412130162 0.0866252404441 0.0996221476871 0.0 PC.355 0.233482403212 0.225257974068 -0.0188623096268 -0.107729981831 0.177108999572 -0.192905835151 -0.149819471408 0.0383549037465 0.0 PC.607 -0.0949631911323 -0.420974802495 -0.154869454869 -0.0898427509281 0.152618194488 -0.0334232691501 -0.0251224777303 -0.0508988536409 0.0 PC.634 -0.359915158638 0.113822595435 0.0662203444138 0.0297579972788 -0.0572254078183 -0.193133506163 0.145026331031 -0.149658611738 0.0 Biplot 0 0 Site constraints 0 0 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error1000066400000000000000000000107311464262511300242260ustar00rootroot000000000000000.366135830393 0.186887643052 0.0788466514249 0.082287840501 0.0351348475787 0.0233265839374 0.0099048981912 0.00122461669234 0.000417454724117 Proportion explained 0 Species 9 9 Species0 0.110350890177 0.282399990052 -0.203028976154 -0.00192462284409 -0.082232863384 0.0857314258364 -0.0122038907184 -0.0425198793666 0.00466719926338 Species1 0.141359038961 0.303495645402 0.395441211576 -0.14126625534 -0.0268859204718 0.143253061936 0.0430260301697 0.0476377655759 -0.00228172378295 Species2 -1.01552204222 0.0958317865043 -0.198262718034 -0.104801030067 0.130025239749 0.0244045261332 0.0464701211285 0.0269279200532 0.0350103177576 Species3 -1.03620650502 0.109624974112 0.220984718362 0.223640072997 -0.243745876054 -0.0259064859794 -0.0534088909011 -0.0315611195993 0.0256448427552 Species4 1.05371722248 0.537178749104 -0.438075060322 0.223480553581 -0.323948461806 0.124644870822 -0.119275907223 0.0416254660329 -0.0381955235096 Species5 0.998558655 0.573960582723 0.679918103399 -0.389963380717 0.299077945999 0.328451006171 0.21215881857 -0.0829871883001 -0.0439653996462 Species6 0.255245719879 -0.178168259149 -0.204127155429 0.433397565801 0.0707099230629 -0.18817306522 0.126908756045 0.0044937289123 -0.0122511718244 Species7 0.146555872394 -0.857362497037 -0.0152499051659 0.0527604990862 0.354475793915 -0.0416813697787 -0.199011239586 -0.00213723187073 -0.00782946141667 Species8 0.413705102117 -0.707948964322 0.21569736034 -0.690314241725 -0.148431001217 -0.334251868558 -0.00628707445028 -0.00364123416731 -0.0122722164511 Site 10 9 Site0 0.710587311248 -3.08166800613 0.219651379947 -1.24528801163 -1.07293546227 -0.506241907472 0.244126652455 -3.63164833508 1.16311896657 Site1 0.584771352278 -3.00669301091 -0.947448656768 2.69965142856 2.13682885838 0.813520011254 0.471530333182 0.908423015086 -1.34724387844 Site2 0.762734278287 -3.15258603503 2.13924426714 -3.1162748358 -2.30660936925 -0.698929858809 -1.39062619586 4.84117591747 0.562102984837 Site3 1.11230735331 1.07150585141 -1.87527740873 0.666370241998 -1.10153224699 1.43517552491 -1.10619960297 0.0137029328454 -0.0371803939101 Site4 -0.979116769996 -0.0603144289026 -0.696277367656 -0.612646703308 0.983006619615 0.315662442163 0.574110232297 0.328630035672 0.868027697443 Site5 1.04322560423 0.459426970165 -0.639802790578 0.287156643872 -0.573935423877 -1.44980634943 1.70166994063 0.306164261447 -0.442115969758 Site6 -0.954490118162 -0.0847021660539 0.132509124411 -0.42143341064 -0.111552348931 -0.394242454835 -0.673963982894 -0.379018566362 -1.7472502885 Site7 0.947268764751 -0.108370567311 0.526107182587 -0.00565282365567 1.26272400228 -1.06565692165 -1.46326596729 -0.154459216567 0.778139732463 Site8 -1.14808173207 0.490449274267 0.478353666755 1.17015870919 -1.00599224074 0.0735071441404 0.0860462673715 0.0417647558417 0.935819560428 Site9 1.03291557934 1.0350490304 2.74691777314 -1.28083971649 0.363002636972 1.98647950015 1.05356145232 -0.24813142226 -0.463165215106 Biplot 3 3 -0.169746767979 0.63069090084 0.760769036049 -0.994016563505 0.0609533148724 -0.0449369418179 0.184352565909 -0.974867543612 0.0309865007541 Site constraints 10 9 Site0 0.692138797603 -3.08053663489 -0.328747278055 -1.24528801163 -1.07293546227 -0.506241907472 0.244126652455 -3.63164833508 1.16311896657 Site1 0.664559513865 -3.06214571808 0.230249303805 2.69965142856 2.13682885838 0.813520011254 0.471530333182 0.908423015086 -1.34724387844 Site2 0.636980230127 -3.04375480127 0.789245885666 -3.1162748358 -2.30660936925 -0.698929858809 -1.39062619586 4.84117591747 0.562102984837 Site3 1.10887578995 0.500396915484 -1.55606822404 0.666370241998 -1.10153224699 1.43517552491 -1.10619960297 0.0137029328454 -0.0371803939101 Site4 -0.970016224052 0.0654867737684 -1.1206070781 -0.612646703308 0.983006619615 0.315662442163 0.574110232297 0.328630035672 0.868027697443 Site5 1.05371722248 0.537178749104 -0.438075060322 0.287156643872 -0.573935423877 -1.44980634943 1.70166994063 0.306164261447 -0.442115969758 Site6 -1.02517479153 0.102268607388 -0.00261391438256 -0.42143341064 -0.111552348931 -0.394242454835 -0.673963982894 -0.379018566362 -1.7472502885 Site7 0.998558655 0.573960582723 0.679918103399 -0.00565282365567 1.26272400228 -1.06565692165 -1.46326596729 -0.154459216567 0.778139732463 Site8 -1.080333359 0.139050441007 1.11537924934 1.17015870919 -1.00599224074 0.0735071441404 0.0860462673715 0.0417647558417 0.935819560428 Site9 0.943400087524 0.610742416342 1.79791126712 -1.28083971649 0.363002636972 1.98647950015 1.05356145232 -0.24813142226 -0.463165215106 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error10000066400000000000000000000004621464262511300243060ustar00rootroot00000000000000Eigvals 2 0.0961330159181 0.0409418140138 Proportion explained 0 Species 3 2 Species1 0.408869425742 0.0695518116298 Species2 -0.1153860437 -0.299767683538 Species3 -0.309967102571 0.187391917117 Site 3 2 Site1 -0.848956053187 Site2 -0.220458650578 Site3 1.66697179591 Biplot 0 0 Site constraints 0 0 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error11000066400000000000000000000107421464262511300243110ustar00rootroot00000000000000Eigvals 9 0.366135830393 0.186887643052 0.0788466514249 0.082287840501 0.0351348475787 0.0233265839374 0.0099048981912 0.00122461669234 0.000417454724117 Proportion explained 0 Species 9 9 Species0 0.110350890177 0.282399990052 -0.203028976154 -0.00192462284409 -0.082232863384 0.0857314258364 -0.0122038907184 -0.0425198793666 0.00466719926338 Species1 0.141359038961 0.303495645402 0.395441211576 -0.14126625534 -0.0268859204718 0.143253061936 0.0430260301697 0.0476377655759 -0.00228172378295 Species2 -1.01552204222 0.0958317865043 -0.198262718034 -0.104801030067 0.130025239749 0.0244045261332 0.0464701211285 0.0269279200532 0.0350103177576 Species3 -1.03620650502 0.109624974112 0.220984718362 0.223640072997 -0.243745876054 -0.0259064859794 -0.0534088909011 -0.0315611195993 0.0256448427552 Species4 1.05371722248 0.537178749104 -0.438075060322 0.223480553581 -0.323948461806 0.124644870822 -0.119275907223 0.0416254660329 -0.0381955235096 Species5 0.998558655 0.573960582723 0.679918103399 -0.389963380717 0.299077945999 0.328451006171 0.21215881857 -0.0829871883001 -0.0439653996462 Species6 0.255245719879 -0.178168259149 -0.204127155429 0.433397565801 0.0707099230629 -0.18817306522 0.126908756045 0.0044937289123 -0.0122511718244 Species7 0.146555872394 -0.857362497037 -0.0152499051659 0.0527604990862 0.354475793915 -0.0416813697787 -0.199011239586 -0.00213723187073 -0.00782946141667 Species8 0.413705102117 -0.707948964322 0.21569736034 -0.690314241725 -0.148431001217 -0.334251868558 -0.00628707445028 -0.00364123416731 -0.0122722164511 Site 10 9 Site0 0.710587311248 -3.08166800613 0.219651379947 -1.24528801163 -1.07293546227 -0.506241907472 0.244126652455 -3.63164833508 1.16311896657 Site1 0.584771352278 -3.00669301091 -0.947448656768 2.69965142856 2.13682885838 0.813520011254 0.471530333182 0.908423015086 -1.34724387844 Site2 0.762734278287 -3.15258603503 2.13924426714 -3.1162748358 -2.30660936925 -0.698929858809 -1.39062619586 4.84117591747 0.562102984837 Site3 1.11230735331 1.07150585141 -1.87527740873 0.666370241998 -1.10153224699 1.43517552491 -1.10619960297 0.0137029328454 -0.0371803939101 Site4 -0.979116769996 -0.0603144289026 -0.696277367656 -0.612646703308 0.983006619615 0.315662442163 0.574110232297 0.328630035672 0.868027697443 Site5 1.04322560423 0.459426970165 -0.639802790578 0.287156643872 -0.573935423877 -1.44980634943 1.70166994063 0.306164261447 -0.442115969758 Site6 -0.954490118162 -0.0847021660539 0.132509124411 -0.42143341064 -0.111552348931 -0.394242454835 -0.673963982894 -0.379018566362 -1.7472502885 Site7 0.947268764751 -0.108370567311 0.526107182587 -0.00565282365567 1.26272400228 -1.06565692165 -1.46326596729 -0.154459216567 0.778139732463 Site8 -1.14808173207 0.490449274267 0.478353666755 1.17015870919 -1.00599224074 0.0735071441404 0.0860462673715 0.0417647558417 0.935819560428 Site9 1.03291557934 1.0350490304 2.74691777314 -1.28083971649 0.363002636972 1.98647950015 1.05356145232 -0.24813142226 -0.463165215106 Biplot 3 3 -0.169746767979 0.63069090084 0.760769036049 -0.994016563505 0.0609533148724 -0.0449369418179 0.184352565909 -0.974867543612 0.0309865007541 Site constraints 10 9 Site0 0.692138797603 -3.08053663489 -0.328747278055 -1.24528801163 -1.07293546227 -0.506241907472 0.244126652455 -3.63164833508 1.16311896657 Site1 0.664559513865 -3.06214571808 0.230249303805 2.69965142856 2.13682885838 0.813520011254 0.471530333182 0.908423015086 -1.34724387844 Bro2 0.636980230127 -3.04375480127 0.789245885666 -3.1162748358 -2.30660936925 -0.698929858809 -1.39062619586 4.84117591747 0.562102984837 Site3 1.10887578995 0.500396915484 -1.55606822404 0.666370241998 -1.10153224699 1.43517552491 -1.10619960297 0.0137029328454 -0.0371803939101 Site4 -0.970016224052 0.0654867737684 -1.1206070781 -0.612646703308 0.983006619615 0.315662442163 0.574110232297 0.328630035672 0.868027697443 Site5 1.05371722248 0.537178749104 -0.438075060322 0.287156643872 -0.573935423877 -1.44980634943 1.70166994063 0.306164261447 -0.442115969758 Site6 -1.02517479153 0.102268607388 -0.00261391438256 -0.42143341064 -0.111552348931 -0.394242454835 -0.673963982894 -0.379018566362 -1.7472502885 Site7 0.998558655 0.573960582723 0.679918103399 -0.00565282365567 1.26272400228 -1.06565692165 -1.46326596729 -0.154459216567 0.778139732463 Site8 -1.080333359 0.139050441007 1.11537924934 1.17015870919 -1.00599224074 0.0735071441404 0.0860462673715 0.0417647558417 0.935819560428 Site9 0.943400087524 0.610742416342 1.79791126712 -1.28083971649 0.363002636972 1.98647950015 1.05356145232 -0.24813142226 -0.463165215106 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error12000066400000000000000000000030411464262511300243040ustar00rootroot00000000000000Eigvals 9 0.512367260461 0.300719094427 0.208988681078 0.19169895326 0.16054234528 0.15017695712 0.122457748167 0.0 Proportion explained 9 0.267573832777 0.15704469605 0.139911863774 0.109140272454 0.100111048503 0.0838401161912 0.0784269939011 0.0639511763509 0.0 Species 0 0 Site 9 9 PC.636 -0.258465461183 0.173999546883 0.0382875792552 -0.19447750562 0.0831176020844 0.262430333201 -0.0231636392235 -0.0184794039581 0.0 PC.635 -0.271001135391 -0.0185951319063 -0.0864841926349 0.118064245315 -0.198808358437 -0.0211723599535 -0.191024027565 0.155646592377 0.0 PC.356 0.235077898175 0.0962519254489 -0.345792726714 -0.00320862577619 -0.0963777675519 0.0457025386953 0.185472813286 0.0404093971793 0.0 PC.481 0.0261407664325 -0.0111459676533 0.147660603015 0.29087660853 0.203945472801 0.0619712384758 0.101641328709 0.105690998719 0.0 PC.354 0.285007552283 -0.0192549888483 0.0623263375385 0.138126799852 -0.104798602423 0.0951720730628 -0.129636097542 -0.220687170372 0.0 PC.593 0.204636326241 -0.139361150932 0.291513819623 -0.181566786821 -0.159580132715 -0.0246412130162 0.0866252404441 0.0996221476871 0.0 PC.355 0.233482403212 0.225257974068 -0.0188623096268 -0.107729981831 0.177108999572 -0.192905835151 -0.149819471408 0.0383549037465 0.0 PC.607 -0.0949631911323 -0.420974802495 -0.154869454869 -0.0898427509281 0.152618194488 -0.0334232691501 -0.0251224777303 -0.0508988536409 0.0 PC.634 -0.359915158638 0.113822595435 0.0662203444138 0.0297579972788 -0.0572254078183 -0.193133506163 0.145026331031 -0.149658611738 0.0 Biplot 0 0 Site constraints 0 0 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error13000066400000000000000000000030411464262511300243050ustar00rootroot00000000000000Eigvals 9 0.512367260461 0.300719094427 0.267912066004 0.208988681078 0.19169895326 0.16054234528 0.15017695712 0.122457748167 0.0 Proportion explained 9 0.267573832777 0.15704469605 0.109140272454 0.100111048503 0.0838401161912 0.0784269939011 0.0639511763509 0.0 Species 0 0 Site 9 9 PC.636 -0.258465461183 0.173999546883 0.0382875792552 -0.19447750562 0.0831176020844 0.262430333201 -0.0231636392235 -0.0184794039581 0.0 PC.635 -0.271001135391 -0.0185951319063 -0.0864841926349 0.118064245315 -0.198808358437 -0.0211723599535 -0.191024027565 0.155646592377 0.0 PC.356 0.235077898175 0.0962519254489 -0.345792726714 -0.00320862577619 -0.0963777675519 0.0457025386953 0.185472813286 0.0404093971793 0.0 PC.481 0.0261407664325 -0.0111459676533 0.147660603015 0.29087660853 0.203945472801 0.0619712384758 0.101641328709 0.105690998719 0.0 PC.354 0.285007552283 -0.0192549888483 0.0623263375385 0.138126799852 -0.104798602423 0.0951720730628 -0.129636097542 -0.220687170372 0.0 PC.593 0.204636326241 -0.139361150932 0.291513819623 -0.181566786821 -0.159580132715 -0.0246412130162 0.0866252404441 0.0996221476871 0.0 PC.355 0.233482403212 0.225257974068 -0.0188623096268 -0.107729981831 0.177108999572 -0.192905835151 -0.149819471408 0.0383549037465 0.0 PC.607 -0.0949631911323 -0.420974802495 -0.154869454869 -0.0898427509281 0.152618194488 -0.0334232691501 -0.0251224777303 -0.0508988536409 0.0 PC.634 -0.359915158638 0.113822595435 0.0662203444138 0.0297579972788 -0.0572254078183 -0.193133506163 0.145026331031 -0.149658611738 0.0 Biplot 0 0 Site constraints 0 0 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error14000066400000000000000000000030601464262511300243070ustar00rootroot00000000000000Eigvals 9 0.512367260461 0.300719094427 0.267912066004 0.208988681078 0.19169895326 0.16054234528 0.15017695712 0.122457748167 0.0 Proportion explained 9 0.267573832777 0.15704469605 0.139911863774 0.109140272454 0.100111048503 0.0838401161912 0.0784269939011 0.0639511763509 0.0 Species 0 0 Site 9 0 PC.636 -0.258465461183 0.173999546883 0.0382875792552 -0.19447750562 0.0831176020844 0.262430333201 -0.0231636392235 -0.0184794039581 0.0 PC.635 -0.271001135391 -0.0185951319063 -0.0864841926349 0.118064245315 -0.198808358437 -0.0211723599535 -0.191024027565 0.155646592377 0.0 PC.356 0.235077898175 0.0962519254489 -0.345792726714 -0.00320862577619 -0.0963777675519 0.0457025386953 0.185472813286 0.0404093971793 0.0 PC.481 0.0261407664325 -0.0111459676533 0.147660603015 0.29087660853 0.203945472801 0.0619712384758 0.101641328709 0.105690998719 0.0 PC.354 0.285007552283 -0.0192549888483 0.0623263375385 0.138126799852 -0.104798602423 0.0951720730628 -0.129636097542 -0.220687170372 0.0 PC.593 0.204636326241 -0.139361150932 0.291513819623 -0.181566786821 -0.159580132715 -0.0246412130162 0.0866252404441 0.0996221476871 0.0 PC.355 0.233482403212 0.225257974068 -0.0188623096268 -0.107729981831 0.177108999572 -0.192905835151 -0.149819471408 0.0383549037465 0.0 PC.607 -0.0949631911323 -0.420974802495 -0.154869454869 -0.0898427509281 0.152618194488 -0.0334232691501 -0.0251224777303 -0.0508988536409 0.0 PC.634 -0.359915158638 0.113822595435 0.0662203444138 0.0297579972788 -0.0572254078183 -0.193133506163 0.145026331031 -0.149658611738 0.0 Biplot 0 0 Site constraints 0 0 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error15000066400000000000000000000030071464262511300243110ustar00rootroot00000000000000Eigvals 9 0.512367260461 0.300719094427 0.267912066004 0.208988681078 0.19169895326 0.16054234528 0.15017695712 0.122457748167 0.0 Proportion explained 9 0.267573832777 0.15704469605 0.139911863774 0.109140272454 0.100111048503 0.0838401161912 0.0784269939011 0.0639511763509 0.0 Species 0 0 Site 9 9 PC.636 -0.258465461183 0.173999546883 0.0382875792552 -0.19447750562 0.0831176020844 0.262430333201 -0.0231636392235 -0.0184794039581 PC.635 -0.271001135391 -0.0185951319063 -0.0864841926349 0.118064245315 -0.198808358437 -0.0211723599535 -0.191024027565 0.155646592377 PC.356 0.235077898175 0.0962519254489 -0.345792726714 -0.00320862577619 -0.0963777675519 0.0457025386953 0.185472813286 0.0404093971793 PC.481 0.0261407664325 -0.0111459676533 0.147660603015 0.29087660853 0.203945472801 0.0619712384758 0.101641328709 0.105690998719 PC.354 0.285007552283 -0.0192549888483 0.0623263375385 0.138126799852 -0.104798602423 0.0951720730628 -0.129636097542 -0.220687170372 PC.593 0.204636326241 -0.139361150932 0.291513819623 -0.181566786821 -0.159580132715 -0.0246412130162 0.0866252404441 0.0996221476871 PC.355 0.233482403212 0.225257974068 -0.0188623096268 -0.107729981831 0.177108999572 -0.192905835151 -0.149819471408 0.0383549037465 PC.607 -0.0949631911323 -0.420974802495 -0.154869454869 -0.0898427509281 0.152618194488 -0.0334232691501 -0.0251224777303 -0.0508988536409 PC.634 -0.359915158638 0.113822595435 0.0662203444138 0.0297579972788 -0.0572254078183 -0.193133506163 0.145026331031 -0.149658611738 Biplot Site constraints 0 0 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error16000066400000000000000000000107431464262511300243170ustar00rootroot00000000000000Eigvals 9 0.366135830393 0.186887643052 0.0788466514249 0.082287840501 0.0351348475787 0.0233265839374 0.0099048981912 0.00122461669234 0.000417454724117 Proportion explained 0 Species 9 9 Species0 0.110350890177 0.282399990052 -0.203028976154 -0.00192462284409 -0.082232863384 0.0857314258364 -0.0122038907184 -0.0425198793666 0.00466719926338 Species1 0.141359038961 0.303495645402 0.395441211576 -0.14126625534 -0.0268859204718 0.143253061936 0.0430260301697 0.0476377655759 -0.00228172378295 Species2 -1.01552204222 0.0958317865043 -0.198262718034 -0.104801030067 0.130025239749 0.0244045261332 0.0464701211285 0.0269279200532 0.0350103177576 Species3 -1.03620650502 0.109624974112 0.220984718362 0.223640072997 -0.243745876054 -0.0259064859794 -0.0534088909011 -0.0315611195993 0.0256448427552 Species4 1.05371722248 0.537178749104 -0.438075060322 0.223480553581 -0.323948461806 0.124644870822 -0.119275907223 0.0416254660329 -0.0381955235096 Species5 0.998558655 0.573960582723 0.679918103399 -0.389963380717 0.299077945999 0.328451006171 0.21215881857 -0.0829871883001 -0.0439653996462 Species6 0.255245719879 -0.178168259149 -0.204127155429 0.433397565801 0.0707099230629 -0.18817306522 0.126908756045 0.0044937289123 -0.0122511718244 Species7 0.146555872394 -0.857362497037 -0.0152499051659 0.0527604990862 0.354475793915 -0.0416813697787 -0.199011239586 -0.00213723187073 -0.00782946141667 Species8 0.413705102117 -0.707948964322 0.21569736034 -0.690314241725 -0.148431001217 -0.334251868558 -0.00628707445028 -0.00364123416731 -0.0122722164511 Site 10 9 Site0 0.710587311248 -3.08166800613 0.219651379947 -1.24528801163 -1.07293546227 -0.506241907472 0.244126652455 -3.63164833508 1.16311896657 Site1 0.584771352278 -3.00669301091 -0.947448656768 2.69965142856 2.13682885838 0.813520011254 0.471530333182 0.908423015086 -1.34724387844 Site2 0.762734278287 -3.15258603503 2.13924426714 -3.1162748358 -2.30660936925 -0.698929858809 -1.39062619586 4.84117591747 0.562102984837 Site3 1.11230735331 1.07150585141 -1.87527740873 0.666370241998 -1.10153224699 1.43517552491 -1.10619960297 0.0137029328454 -0.0371803939101 Site4 -0.979116769996 -0.0603144289026 -0.696277367656 -0.612646703308 0.983006619615 0.315662442163 0.574110232297 0.328630035672 0.868027697443 Site5 1.04322560423 0.459426970165 -0.639802790578 0.287156643872 -0.573935423877 -1.44980634943 1.70166994063 0.306164261447 -0.442115969758 Site6 -0.954490118162 -0.0847021660539 0.132509124411 -0.42143341064 -0.111552348931 -0.394242454835 -0.673963982894 -0.379018566362 -1.7472502885 Site7 0.947268764751 -0.108370567311 0.526107182587 -0.00565282365567 1.26272400228 -1.06565692165 -1.46326596729 -0.154459216567 0.778139732463 Site8 -1.14808173207 0.490449274267 0.478353666755 1.17015870919 -1.00599224074 0.0735071441404 0.0860462673715 0.0417647558417 0.935819560428 Site9 1.03291557934 1.0350490304 2.74691777314 -1.28083971649 0.363002636972 1.98647950015 1.05356145232 -0.24813142226 -0.463165215106 Biplot 3 0 -0.169746767979 0.63069090084 0.760769036049 -0.994016563505 0.0609533148724 -0.0449369418179 0.184352565909 -0.974867543612 0.0309865007541 Site constraints 10 9 Site0 0.692138797603 -3.08053663489 -0.328747278055 -1.24528801163 -1.07293546227 -0.506241907472 0.244126652455 -3.63164833508 1.16311896657 Site1 0.664559513865 -3.06214571808 0.230249303805 2.69965142856 2.13682885838 0.813520011254 0.471530333182 0.908423015086 -1.34724387844 Site2 0.636980230127 -3.04375480127 0.789245885666 -3.1162748358 -2.30660936925 -0.698929858809 -1.39062619586 4.84117591747 0.562102984837 Site3 1.10887578995 0.500396915484 -1.55606822404 0.666370241998 -1.10153224699 1.43517552491 -1.10619960297 0.0137029328454 -0.0371803939101 Site4 -0.970016224052 0.0654867737684 -1.1206070781 -0.612646703308 0.983006619615 0.315662442163 0.574110232297 0.328630035672 0.868027697443 Site5 1.05371722248 0.537178749104 -0.438075060322 0.287156643872 -0.573935423877 -1.44980634943 1.70166994063 0.306164261447 -0.442115969758 Site6 -1.02517479153 0.102268607388 -0.00261391438256 -0.42143341064 -0.111552348931 -0.394242454835 -0.673963982894 -0.379018566362 -1.7472502885 Site7 0.998558655 0.573960582723 0.679918103399 -0.00565282365567 1.26272400228 -1.06565692165 -1.46326596729 -0.154459216567 0.778139732463 Site8 -1.080333359 0.139050441007 1.11537924934 1.17015870919 -1.00599224074 0.0735071441404 0.0860462673715 0.0417647558417 0.935819560428 Site9 0.943400087524 0.610742416342 1.79791126712 -1.28083971649 0.363002636972 1.98647950015 1.05356145232 -0.24813142226 -0.463165215106 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error17000066400000000000000000000106631464262511300243210ustar00rootroot00000000000000Eigvals 9 0.366135830393 0.186887643052 0.0788466514249 0.082287840501 0.0351348475787 0.0233265839374 0.0099048981912 0.00122461669234 0.000417454724117 Proportion explained 0 Species 9 9 Species0 0.110350890177 0.282399990052 -0.203028976154 -0.00192462284409 -0.082232863384 0.0857314258364 -0.0122038907184 -0.0425198793666 0.00466719926338 Species1 0.141359038961 0.303495645402 0.395441211576 -0.14126625534 -0.0268859204718 0.143253061936 0.0430260301697 0.0476377655759 -0.00228172378295 Species2 -1.01552204222 0.0958317865043 -0.198262718034 -0.104801030067 0.130025239749 0.0244045261332 0.0464701211285 0.0269279200532 0.0350103177576 Species3 -1.03620650502 0.109624974112 0.220984718362 0.223640072997 -0.243745876054 -0.0259064859794 -0.0534088909011 -0.0315611195993 0.0256448427552 Species4 1.05371722248 0.537178749104 -0.438075060322 0.223480553581 -0.323948461806 0.124644870822 -0.119275907223 0.0416254660329 -0.0381955235096 Species5 0.998558655 0.573960582723 0.679918103399 -0.389963380717 0.299077945999 0.328451006171 0.21215881857 -0.0829871883001 -0.0439653996462 Species6 0.255245719879 -0.178168259149 -0.204127155429 0.433397565801 0.0707099230629 -0.18817306522 0.126908756045 0.0044937289123 -0.0122511718244 Species7 0.146555872394 -0.857362497037 -0.0152499051659 0.0527604990862 0.354475793915 -0.0416813697787 -0.199011239586 -0.00213723187073 -0.00782946141667 Species8 0.413705102117 -0.707948964322 0.21569736034 -0.690314241725 -0.148431001217 -0.334251868558 -0.00628707445028 -0.00364123416731 -0.0122722164511 Site 10 9 Site0 0.710587311248 -3.08166800613 0.219651379947 -1.24528801163 -1.07293546227 -0.506241907472 0.244126652455 -3.63164833508 1.16311896657 Site1 0.584771352278 -3.00669301091 -0.947448656768 2.69965142856 2.13682885838 0.813520011254 0.471530333182 0.908423015086 -1.34724387844 Site2 0.762734278287 -3.15258603503 2.13924426714 -3.1162748358 -2.30660936925 -0.698929858809 -1.39062619586 4.84117591747 0.562102984837 Site3 1.11230735331 1.07150585141 -1.87527740873 0.666370241998 -1.10153224699 1.43517552491 -1.10619960297 0.0137029328454 -0.0371803939101 Site4 -0.979116769996 -0.0603144289026 -0.696277367656 -0.612646703308 0.983006619615 0.315662442163 0.574110232297 0.328630035672 0.868027697443 Site5 1.04322560423 0.459426970165 -0.639802790578 0.287156643872 -0.573935423877 -1.44980634943 1.70166994063 0.306164261447 -0.442115969758 Site6 -0.954490118162 -0.0847021660539 0.132509124411 -0.42143341064 -0.111552348931 -0.394242454835 -0.673963982894 -0.379018566362 -1.7472502885 Site7 0.947268764751 -0.108370567311 0.526107182587 -0.00565282365567 1.26272400228 -1.06565692165 -1.46326596729 -0.154459216567 0.778139732463 Site8 -1.14808173207 0.490449274267 0.478353666755 1.17015870919 -1.00599224074 0.0735071441404 0.0860462673715 0.0417647558417 0.935819560428 Site9 1.03291557934 1.0350490304 2.74691777314 -1.28083971649 0.363002636972 1.98647950015 1.05356145232 -0.24813142226 -0.463165215106 Biplot 3 3 -0.169746767979 0.63069090084 -0.994016563505 0.0609533148724 0.184352565909 -0.974867543612 Site constraints 10 9 Site0 0.692138797603 -3.08053663489 -0.328747278055 -1.24528801163 -1.07293546227 -0.506241907472 0.244126652455 -3.63164833508 1.16311896657 Site1 0.664559513865 -3.06214571808 0.230249303805 2.69965142856 2.13682885838 0.813520011254 0.471530333182 0.908423015086 -1.34724387844 Site2 0.636980230127 -3.04375480127 0.789245885666 -3.1162748358 -2.30660936925 -0.698929858809 -1.39062619586 4.84117591747 0.562102984837 Site3 1.10887578995 0.500396915484 -1.55606822404 0.666370241998 -1.10153224699 1.43517552491 -1.10619960297 0.0137029328454 -0.0371803939101 Site4 -0.970016224052 0.0654867737684 -1.1206070781 -0.612646703308 0.983006619615 0.315662442163 0.574110232297 0.328630035672 0.868027697443 Site5 1.05371722248 0.537178749104 -0.438075060322 0.287156643872 -0.573935423877 -1.44980634943 1.70166994063 0.306164261447 -0.442115969758 Site6 -1.02517479153 0.102268607388 -0.00261391438256 -0.42143341064 -0.111552348931 -0.394242454835 -0.673963982894 -0.379018566362 -1.7472502885 Site7 0.998558655 0.573960582723 0.679918103399 -0.00565282365567 1.26272400228 -1.06565692165 -1.46326596729 -0.154459216567 0.778139732463 Site8 -1.080333359 0.139050441007 1.11537924934 1.17015870919 -1.00599224074 0.0735071441404 0.0860462673715 0.0417647558417 0.935819560428 Site9 0.943400087524 0.610742416342 1.79791126712 -1.28083971649 0.363002636972 1.98647950015 1.05356145232 -0.24813142226 -0.463165215106 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error18000066400000000000000000000030421464262511300243130ustar00rootroot00000000000000Eigvals 9 0.512367260461 0.300719094427 0.267912066004 0.208988681078 0.19169895326 0.16054234528 0.15017695712 0.122457748167 0.0 Proportion explained 8 0.267573832777 0.139911863774 0.109140272454 0.100111048503 0.0838401161912 0.0784269939011 0.0639511763509 0.0 Species 0 0 Site 9 9 PC.636 -0.258465461183 0.173999546883 0.0382875792552 -0.19447750562 0.0831176020844 0.262430333201 -0.0231636392235 -0.0184794039581 0.0 PC.635 -0.271001135391 -0.0185951319063 -0.0864841926349 0.118064245315 -0.198808358437 -0.0211723599535 -0.191024027565 0.155646592377 0.0 PC.356 0.235077898175 0.0962519254489 -0.345792726714 -0.00320862577619 -0.0963777675519 0.0457025386953 0.185472813286 0.0404093971793 0.0 PC.481 0.0261407664325 -0.0111459676533 0.147660603015 0.29087660853 0.203945472801 0.0619712384758 0.101641328709 0.105690998719 0.0 PC.354 0.285007552283 -0.0192549888483 0.0623263375385 0.138126799852 -0.104798602423 0.0951720730628 -0.129636097542 -0.220687170372 0.0 PC.593 0.204636326241 -0.139361150932 0.291513819623 -0.181566786821 -0.159580132715 -0.0246412130162 0.0866252404441 0.0996221476871 0.0 PC.355 0.233482403212 0.225257974068 -0.0188623096268 -0.107729981831 0.177108999572 -0.192905835151 -0.149819471408 0.0383549037465 0.0 PC.607 -0.0949631911323 -0.420974802495 -0.154869454869 -0.0898427509281 0.152618194488 -0.0334232691501 -0.0251224777303 -0.0508988536409 0.0 PC.634 -0.359915158638 0.113822595435 0.0662203444138 0.0297579972788 -0.0572254078183 -0.193133506163 0.145026331031 -0.149658611738 0.0 Biplot 0 0 Site constraints 0 0 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error19000066400000000000000000000004601464262511300243150ustar00rootroot00000000000000Eigvals 2 0.0961330159181 0.0409418140138 Proportion explained 0 Species 3 1 Species1 0.408869425742 Species2 -0.1153860437 Species3 -0.309967102571 Site 3 2 Site1 -0.848956053187 0.882764759014 Site2 -0.220458650578 -1.34482000302 Site3 1.66697179591 0.470324389808 Biplot 0 0 Site constraints 0 0 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error2000066400000000000000000000107131464262511300242270ustar00rootroot00000000000000Eigvals 9 0.366135830393 0.186887643052 0.0788466514249 0.082287840501 0.0351348475787 0.0233265839374 0.0099048981912 0.00122461669234 0.000417454724117 Species 9 9 Species0 0.110350890177 0.282399990052 -0.203028976154 -0.00192462284409 -0.082232863384 0.0857314258364 -0.0122038907184 -0.0425198793666 0.00466719926338 Species1 0.141359038961 0.303495645402 0.395441211576 -0.14126625534 -0.0268859204718 0.143253061936 0.0430260301697 0.0476377655759 -0.00228172378295 Species2 -1.01552204222 0.0958317865043 -0.198262718034 -0.104801030067 0.130025239749 0.0244045261332 0.0464701211285 0.0269279200532 0.0350103177576 Species3 -1.03620650502 0.109624974112 0.220984718362 0.223640072997 -0.243745876054 -0.0259064859794 -0.0534088909011 -0.0315611195993 0.0256448427552 Species4 1.05371722248 0.537178749104 -0.438075060322 0.223480553581 -0.323948461806 0.124644870822 -0.119275907223 0.0416254660329 -0.0381955235096 Species5 0.998558655 0.573960582723 0.679918103399 -0.389963380717 0.299077945999 0.328451006171 0.21215881857 -0.0829871883001 -0.0439653996462 Species6 0.255245719879 -0.178168259149 -0.204127155429 0.433397565801 0.0707099230629 -0.18817306522 0.126908756045 0.0044937289123 -0.0122511718244 Species7 0.146555872394 -0.857362497037 -0.0152499051659 0.0527604990862 0.354475793915 -0.0416813697787 -0.199011239586 -0.00213723187073 -0.00782946141667 Species8 0.413705102117 -0.707948964322 0.21569736034 -0.690314241725 -0.148431001217 -0.334251868558 -0.00628707445028 -0.00364123416731 -0.0122722164511 Site 10 9 Site0 0.710587311248 -3.08166800613 0.219651379947 -1.24528801163 -1.07293546227 -0.506241907472 0.244126652455 -3.63164833508 1.16311896657 Site1 0.584771352278 -3.00669301091 -0.947448656768 2.69965142856 2.13682885838 0.813520011254 0.471530333182 0.908423015086 -1.34724387844 Site2 0.762734278287 -3.15258603503 2.13924426714 -3.1162748358 -2.30660936925 -0.698929858809 -1.39062619586 4.84117591747 0.562102984837 Site3 1.11230735331 1.07150585141 -1.87527740873 0.666370241998 -1.10153224699 1.43517552491 -1.10619960297 0.0137029328454 -0.0371803939101 Site4 -0.979116769996 -0.0603144289026 -0.696277367656 -0.612646703308 0.983006619615 0.315662442163 0.574110232297 0.328630035672 0.868027697443 Site5 1.04322560423 0.459426970165 -0.639802790578 0.287156643872 -0.573935423877 -1.44980634943 1.70166994063 0.306164261447 -0.442115969758 Site6 -0.954490118162 -0.0847021660539 0.132509124411 -0.42143341064 -0.111552348931 -0.394242454835 -0.673963982894 -0.379018566362 -1.7472502885 Site7 0.947268764751 -0.108370567311 0.526107182587 -0.00565282365567 1.26272400228 -1.06565692165 -1.46326596729 -0.154459216567 0.778139732463 Site8 -1.14808173207 0.490449274267 0.478353666755 1.17015870919 -1.00599224074 0.0735071441404 0.0860462673715 0.0417647558417 0.935819560428 Site9 1.03291557934 1.0350490304 2.74691777314 -1.28083971649 0.363002636972 1.98647950015 1.05356145232 -0.24813142226 -0.463165215106 Biplot 3 3 -0.169746767979 0.63069090084 0.760769036049 -0.994016563505 0.0609533148724 -0.0449369418179 0.184352565909 -0.974867543612 0.0309865007541 Site constraints 10 9 Site0 0.692138797603 -3.08053663489 -0.328747278055 -1.24528801163 -1.07293546227 -0.506241907472 0.244126652455 -3.63164833508 1.16311896657 Site1 0.664559513865 -3.06214571808 0.230249303805 2.69965142856 2.13682885838 0.813520011254 0.471530333182 0.908423015086 -1.34724387844 Site2 0.636980230127 -3.04375480127 0.789245885666 -3.1162748358 -2.30660936925 -0.698929858809 -1.39062619586 4.84117591747 0.562102984837 Site3 1.10887578995 0.500396915484 -1.55606822404 0.666370241998 -1.10153224699 1.43517552491 -1.10619960297 0.0137029328454 -0.0371803939101 Site4 -0.970016224052 0.0654867737684 -1.1206070781 -0.612646703308 0.983006619615 0.315662442163 0.574110232297 0.328630035672 0.868027697443 Site5 1.05371722248 0.537178749104 -0.438075060322 0.287156643872 -0.573935423877 -1.44980634943 1.70166994063 0.306164261447 -0.442115969758 Site6 -1.02517479153 0.102268607388 -0.00261391438256 -0.42143341064 -0.111552348931 -0.394242454835 -0.673963982894 -0.379018566362 -1.7472502885 Site7 0.998558655 0.573960582723 0.679918103399 -0.00565282365567 1.26272400228 -1.06565692165 -1.46326596729 -0.154459216567 0.778139732463 Site8 -1.080333359 0.139050441007 1.11537924934 1.17015870919 -1.00599224074 0.0735071441404 0.0860462673715 0.0417647558417 0.935819560428 Site9 0.943400087524 0.610742416342 1.79791126712 -1.28083971649 0.363002636972 1.98647950015 1.05356145232 -0.24813142226 -0.463165215106 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error20000066400000000000000000000004621464262511300243070ustar00rootroot00000000000000Eigvals 2 0.0961330159181 0.0409418140138 Proportion explained 0 Species 3 2 Species1 0.408869425742 0.0695518116298 Species2 -0.1153860437 -0.299767683538 Species3 -0.309967102571 0.187391917117 Site 3 1 Site1 -0.848956053187 Site2 -0.220458650578 Site3 1.66697179591 Biplot 0 0 Site constraints 0 0 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error21000066400000000000000000000000121464262511300242770ustar00rootroot00000000000000Eigvals 0 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error22000066400000000000000000000000221464262511300243010ustar00rootroot00000000000000Eigvals 1 0.12345 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error23000066400000000000000000000000521464262511300243050ustar00rootroot00000000000000Eigvals 1 0.12345 Proportion explained 1 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error24000066400000000000000000000001131464262511300243040ustar00rootroot00000000000000Eigvals 1 0.12345 Proportion explained 1 0.6789 Species 2 1 foo 0.987654 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error3000066400000000000000000000107271464262511300242350ustar00rootroot00000000000000Eigvals 9 0.366135830393 0.186887643052 0.0788466514249 0.082287840501 0.0351348475787 0.0233265839374 0.0099048981912 0.00122461669234 0.000417454724117 Proportion explained 0 Species0 0.110350890177 0.282399990052 -0.203028976154 -0.00192462284409 -0.082232863384 0.0857314258364 -0.0122038907184 -0.0425198793666 0.00466719926338 Species1 0.141359038961 0.303495645402 0.395441211576 -0.14126625534 -0.0268859204718 0.143253061936 0.0430260301697 0.0476377655759 -0.00228172378295 Species2 -1.01552204222 0.0958317865043 -0.198262718034 -0.104801030067 0.130025239749 0.0244045261332 0.0464701211285 0.0269279200532 0.0350103177576 Species3 -1.03620650502 0.109624974112 0.220984718362 0.223640072997 -0.243745876054 -0.0259064859794 -0.0534088909011 -0.0315611195993 0.0256448427552 Species4 1.05371722248 0.537178749104 -0.438075060322 0.223480553581 -0.323948461806 0.124644870822 -0.119275907223 0.0416254660329 -0.0381955235096 Species5 0.998558655 0.573960582723 0.679918103399 -0.389963380717 0.299077945999 0.328451006171 0.21215881857 -0.0829871883001 -0.0439653996462 Species6 0.255245719879 -0.178168259149 -0.204127155429 0.433397565801 0.0707099230629 -0.18817306522 0.126908756045 0.0044937289123 -0.0122511718244 Species7 0.146555872394 -0.857362497037 -0.0152499051659 0.0527604990862 0.354475793915 -0.0416813697787 -0.199011239586 -0.00213723187073 -0.00782946141667 Species8 0.413705102117 -0.707948964322 0.21569736034 -0.690314241725 -0.148431001217 -0.334251868558 -0.00628707445028 -0.00364123416731 -0.0122722164511 Site 10 9 Site0 0.710587311248 -3.08166800613 0.219651379947 -1.24528801163 -1.07293546227 -0.506241907472 0.244126652455 -3.63164833508 1.16311896657 Site1 0.584771352278 -3.00669301091 -0.947448656768 2.69965142856 2.13682885838 0.813520011254 0.471530333182 0.908423015086 -1.34724387844 Site2 0.762734278287 -3.15258603503 2.13924426714 -3.1162748358 -2.30660936925 -0.698929858809 -1.39062619586 4.84117591747 0.562102984837 Site3 1.11230735331 1.07150585141 -1.87527740873 0.666370241998 -1.10153224699 1.43517552491 -1.10619960297 0.0137029328454 -0.0371803939101 Site4 -0.979116769996 -0.0603144289026 -0.696277367656 -0.612646703308 0.983006619615 0.315662442163 0.574110232297 0.328630035672 0.868027697443 Site5 1.04322560423 0.459426970165 -0.639802790578 0.287156643872 -0.573935423877 -1.44980634943 1.70166994063 0.306164261447 -0.442115969758 Site6 -0.954490118162 -0.0847021660539 0.132509124411 -0.42143341064 -0.111552348931 -0.394242454835 -0.673963982894 -0.379018566362 -1.7472502885 Site7 0.947268764751 -0.108370567311 0.526107182587 -0.00565282365567 1.26272400228 -1.06565692165 -1.46326596729 -0.154459216567 0.778139732463 Site8 -1.14808173207 0.490449274267 0.478353666755 1.17015870919 -1.00599224074 0.0735071441404 0.0860462673715 0.0417647558417 0.935819560428 Site9 1.03291557934 1.0350490304 2.74691777314 -1.28083971649 0.363002636972 1.98647950015 1.05356145232 -0.24813142226 -0.463165215106 Biplot 3 3 -0.169746767979 0.63069090084 0.760769036049 -0.994016563505 0.0609533148724 -0.0449369418179 0.184352565909 -0.974867543612 0.0309865007541 Site constraints 10 9 Site0 0.692138797603 -3.08053663489 -0.328747278055 -1.24528801163 -1.07293546227 -0.506241907472 0.244126652455 -3.63164833508 1.16311896657 Site1 0.664559513865 -3.06214571808 0.230249303805 2.69965142856 2.13682885838 0.813520011254 0.471530333182 0.908423015086 -1.34724387844 Site2 0.636980230127 -3.04375480127 0.789245885666 -3.1162748358 -2.30660936925 -0.698929858809 -1.39062619586 4.84117591747 0.562102984837 Site3 1.10887578995 0.500396915484 -1.55606822404 0.666370241998 -1.10153224699 1.43517552491 -1.10619960297 0.0137029328454 -0.0371803939101 Site4 -0.970016224052 0.0654867737684 -1.1206070781 -0.612646703308 0.983006619615 0.315662442163 0.574110232297 0.328630035672 0.868027697443 Site5 1.05371722248 0.537178749104 -0.438075060322 0.287156643872 -0.573935423877 -1.44980634943 1.70166994063 0.306164261447 -0.442115969758 Site6 -1.02517479153 0.102268607388 -0.00261391438256 -0.42143341064 -0.111552348931 -0.394242454835 -0.673963982894 -0.379018566362 -1.7472502885 Site7 0.998558655 0.573960582723 0.679918103399 -0.00565282365567 1.26272400228 -1.06565692165 -1.46326596729 -0.154459216567 0.778139732463 Site8 -1.080333359 0.139050441007 1.11537924934 1.17015870919 -1.00599224074 0.0735071441404 0.0860462673715 0.0417647558417 0.935819560428 Site9 0.943400087524 0.610742416342 1.79791126712 -1.28083971649 0.363002636972 1.98647950015 1.05356145232 -0.24813142226 -0.463165215106 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error4000066400000000000000000000107311464262511300242310ustar00rootroot00000000000000Eigvals 9 0.366135830393 0.186887643052 0.0788466514249 0.082287840501 0.0351348475787 0.0233265839374 0.0099048981912 0.00122461669234 0.000417454724117 Proportion explained 0 Species 9 9 Species0 0.110350890177 0.282399990052 -0.203028976154 -0.00192462284409 -0.082232863384 0.0857314258364 -0.0122038907184 -0.0425198793666 0.00466719926338 Species1 0.141359038961 0.303495645402 0.395441211576 -0.14126625534 -0.0268859204718 0.143253061936 0.0430260301697 0.0476377655759 -0.00228172378295 Species2 -1.01552204222 0.0958317865043 -0.198262718034 -0.104801030067 0.130025239749 0.0244045261332 0.0464701211285 0.0269279200532 0.0350103177576 Species3 -1.03620650502 0.109624974112 0.220984718362 0.223640072997 -0.243745876054 -0.0259064859794 -0.0534088909011 -0.0315611195993 0.0256448427552 Species4 1.05371722248 0.537178749104 -0.438075060322 0.223480553581 -0.323948461806 0.124644870822 -0.119275907223 0.0416254660329 -0.0381955235096 Species5 0.998558655 0.573960582723 0.679918103399 -0.389963380717 0.299077945999 0.328451006171 0.21215881857 -0.0829871883001 -0.0439653996462 Species6 0.255245719879 -0.178168259149 -0.204127155429 0.433397565801 0.0707099230629 -0.18817306522 0.126908756045 0.0044937289123 -0.0122511718244 Species7 0.146555872394 -0.857362497037 -0.0152499051659 0.0527604990862 0.354475793915 -0.0416813697787 -0.199011239586 -0.00213723187073 -0.00782946141667 Species8 0.413705102117 -0.707948964322 0.21569736034 -0.690314241725 -0.148431001217 -0.334251868558 -0.00628707445028 -0.00364123416731 -0.0122722164511 Site0 0.710587311248 -3.08166800613 0.219651379947 -1.24528801163 -1.07293546227 -0.506241907472 0.244126652455 -3.63164833508 1.16311896657 Site1 0.584771352278 -3.00669301091 -0.947448656768 2.69965142856 2.13682885838 0.813520011254 0.471530333182 0.908423015086 -1.34724387844 Site2 0.762734278287 -3.15258603503 2.13924426714 -3.1162748358 -2.30660936925 -0.698929858809 -1.39062619586 4.84117591747 0.562102984837 Site3 1.11230735331 1.07150585141 -1.87527740873 0.666370241998 -1.10153224699 1.43517552491 -1.10619960297 0.0137029328454 -0.0371803939101 Site4 -0.979116769996 -0.0603144289026 -0.696277367656 -0.612646703308 0.983006619615 0.315662442163 0.574110232297 0.328630035672 0.868027697443 Site5 1.04322560423 0.459426970165 -0.639802790578 0.287156643872 -0.573935423877 -1.44980634943 1.70166994063 0.306164261447 -0.442115969758 Site6 -0.954490118162 -0.0847021660539 0.132509124411 -0.42143341064 -0.111552348931 -0.394242454835 -0.673963982894 -0.379018566362 -1.7472502885 Site7 0.947268764751 -0.108370567311 0.526107182587 -0.00565282365567 1.26272400228 -1.06565692165 -1.46326596729 -0.154459216567 0.778139732463 Site8 -1.14808173207 0.490449274267 0.478353666755 1.17015870919 -1.00599224074 0.0735071441404 0.0860462673715 0.0417647558417 0.935819560428 Site9 1.03291557934 1.0350490304 2.74691777314 -1.28083971649 0.363002636972 1.98647950015 1.05356145232 -0.24813142226 -0.463165215106 Biplot 3 3 -0.169746767979 0.63069090084 0.760769036049 -0.994016563505 0.0609533148724 -0.0449369418179 0.184352565909 -0.974867543612 0.0309865007541 Site constraints 10 9 Site0 0.692138797603 -3.08053663489 -0.328747278055 -1.24528801163 -1.07293546227 -0.506241907472 0.244126652455 -3.63164833508 1.16311896657 Site1 0.664559513865 -3.06214571808 0.230249303805 2.69965142856 2.13682885838 0.813520011254 0.471530333182 0.908423015086 -1.34724387844 Site2 0.636980230127 -3.04375480127 0.789245885666 -3.1162748358 -2.30660936925 -0.698929858809 -1.39062619586 4.84117591747 0.562102984837 Site3 1.10887578995 0.500396915484 -1.55606822404 0.666370241998 -1.10153224699 1.43517552491 -1.10619960297 0.0137029328454 -0.0371803939101 Site4 -0.970016224052 0.0654867737684 -1.1206070781 -0.612646703308 0.983006619615 0.315662442163 0.574110232297 0.328630035672 0.868027697443 Site5 1.05371722248 0.537178749104 -0.438075060322 0.287156643872 -0.573935423877 -1.44980634943 1.70166994063 0.306164261447 -0.442115969758 Site6 -1.02517479153 0.102268607388 -0.00261391438256 -0.42143341064 -0.111552348931 -0.394242454835 -0.673963982894 -0.379018566362 -1.7472502885 Site7 0.998558655 0.573960582723 0.679918103399 -0.00565282365567 1.26272400228 -1.06565692165 -1.46326596729 -0.154459216567 0.778139732463 Site8 -1.080333359 0.139050441007 1.11537924934 1.17015870919 -1.00599224074 0.0735071441404 0.0860462673715 0.0417647558417 0.935819560428 Site9 0.943400087524 0.610742416342 1.79791126712 -1.28083971649 0.363002636972 1.98647950015 1.05356145232 -0.24813142226 -0.463165215106 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error5000066400000000000000000000107311464262511300242320ustar00rootroot00000000000000Eigvals 9 0.366135830393 0.186887643052 0.0788466514249 0.082287840501 0.0351348475787 0.0233265839374 0.0099048981912 0.00122461669234 0.000417454724117 Proportion explained 0 Species 9 9 Species0 0.110350890177 0.282399990052 -0.203028976154 -0.00192462284409 -0.082232863384 0.0857314258364 -0.0122038907184 -0.0425198793666 0.00466719926338 Species1 0.141359038961 0.303495645402 0.395441211576 -0.14126625534 -0.0268859204718 0.143253061936 0.0430260301697 0.0476377655759 -0.00228172378295 Species2 -1.01552204222 0.0958317865043 -0.198262718034 -0.104801030067 0.130025239749 0.0244045261332 0.0464701211285 0.0269279200532 0.0350103177576 Species3 -1.03620650502 0.109624974112 0.220984718362 0.223640072997 -0.243745876054 -0.0259064859794 -0.0534088909011 -0.0315611195993 0.0256448427552 Species4 1.05371722248 0.537178749104 -0.438075060322 0.223480553581 -0.323948461806 0.124644870822 -0.119275907223 0.0416254660329 -0.0381955235096 Species5 0.998558655 0.573960582723 0.679918103399 -0.389963380717 0.299077945999 0.328451006171 0.21215881857 -0.0829871883001 -0.0439653996462 Species6 0.255245719879 -0.178168259149 -0.204127155429 0.433397565801 0.0707099230629 -0.18817306522 0.126908756045 0.0044937289123 -0.0122511718244 Species7 0.146555872394 -0.857362497037 -0.0152499051659 0.0527604990862 0.354475793915 -0.0416813697787 -0.199011239586 -0.00213723187073 -0.00782946141667 Species8 0.413705102117 -0.707948964322 0.21569736034 -0.690314241725 -0.148431001217 -0.334251868558 -0.00628707445028 -0.00364123416731 -0.0122722164511 Site 10 9 Site0 0.710587311248 -3.08166800613 0.219651379947 -1.24528801163 -1.07293546227 -0.506241907472 0.244126652455 -3.63164833508 1.16311896657 Site1 0.584771352278 -3.00669301091 -0.947448656768 2.69965142856 2.13682885838 0.813520011254 0.471530333182 0.908423015086 -1.34724387844 Site2 0.762734278287 -3.15258603503 2.13924426714 -3.1162748358 -2.30660936925 -0.698929858809 -1.39062619586 4.84117591747 0.562102984837 Site3 1.11230735331 1.07150585141 -1.87527740873 0.666370241998 -1.10153224699 1.43517552491 -1.10619960297 0.0137029328454 -0.0371803939101 Site4 -0.979116769996 -0.0603144289026 -0.696277367656 -0.612646703308 0.983006619615 0.315662442163 0.574110232297 0.328630035672 0.868027697443 Site5 1.04322560423 0.459426970165 -0.639802790578 0.287156643872 -0.573935423877 -1.44980634943 1.70166994063 0.306164261447 -0.442115969758 Site6 -0.954490118162 -0.0847021660539 0.132509124411 -0.42143341064 -0.111552348931 -0.394242454835 -0.673963982894 -0.379018566362 -1.7472502885 Site7 0.947268764751 -0.108370567311 0.526107182587 -0.00565282365567 1.26272400228 -1.06565692165 -1.46326596729 -0.154459216567 0.778139732463 Site8 -1.14808173207 0.490449274267 0.478353666755 1.17015870919 -1.00599224074 0.0735071441404 0.0860462673715 0.0417647558417 0.935819560428 Site9 1.03291557934 1.0350490304 2.74691777314 -1.28083971649 0.363002636972 1.98647950015 1.05356145232 -0.24813142226 -0.463165215106 -0.169746767979 0.63069090084 0.760769036049 -0.994016563505 0.0609533148724 -0.0449369418179 0.184352565909 -0.974867543612 0.0309865007541 Site constraints 10 9 Site0 0.692138797603 -3.08053663489 -0.328747278055 -1.24528801163 -1.07293546227 -0.506241907472 0.244126652455 -3.63164833508 1.16311896657 Site1 0.664559513865 -3.06214571808 0.230249303805 2.69965142856 2.13682885838 0.813520011254 0.471530333182 0.908423015086 -1.34724387844 Site2 0.636980230127 -3.04375480127 0.789245885666 -3.1162748358 -2.30660936925 -0.698929858809 -1.39062619586 4.84117591747 0.562102984837 Site3 1.10887578995 0.500396915484 -1.55606822404 0.666370241998 -1.10153224699 1.43517552491 -1.10619960297 0.0137029328454 -0.0371803939101 Site4 -0.970016224052 0.0654867737684 -1.1206070781 -0.612646703308 0.983006619615 0.315662442163 0.574110232297 0.328630035672 0.868027697443 Site5 1.05371722248 0.537178749104 -0.438075060322 0.287156643872 -0.573935423877 -1.44980634943 1.70166994063 0.306164261447 -0.442115969758 Site6 -1.02517479153 0.102268607388 -0.00261391438256 -0.42143341064 -0.111552348931 -0.394242454835 -0.673963982894 -0.379018566362 -1.7472502885 Site7 0.998558655 0.573960582723 0.679918103399 -0.00565282365567 1.26272400228 -1.06565692165 -1.46326596729 -0.154459216567 0.778139732463 Site8 -1.080333359 0.139050441007 1.11537924934 1.17015870919 -1.00599224074 0.0735071441404 0.0860462673715 0.0417647558417 0.935819560428 Site9 0.943400087524 0.610742416342 1.79791126712 -1.28083971649 0.363002636972 1.98647950015 1.05356145232 -0.24813142226 -0.463165215106 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error6000066400000000000000000000107161464262511300242360ustar00rootroot00000000000000Eigvals 9 0.366135830393 0.186887643052 0.0788466514249 0.082287840501 0.0351348475787 0.0233265839374 0.0099048981912 0.00122461669234 0.000417454724117 Proportion explained 0 Species 9 9 Species0 0.110350890177 0.282399990052 -0.203028976154 -0.00192462284409 -0.082232863384 0.0857314258364 -0.0122038907184 -0.0425198793666 0.00466719926338 Species1 0.141359038961 0.303495645402 0.395441211576 -0.14126625534 -0.0268859204718 0.143253061936 0.0430260301697 0.0476377655759 -0.00228172378295 Species2 -1.01552204222 0.0958317865043 -0.198262718034 -0.104801030067 0.130025239749 0.0244045261332 0.0464701211285 0.0269279200532 0.0350103177576 Species3 -1.03620650502 0.109624974112 0.220984718362 0.223640072997 -0.243745876054 -0.0259064859794 -0.0534088909011 -0.0315611195993 0.0256448427552 Species4 1.05371722248 0.537178749104 -0.438075060322 0.223480553581 -0.323948461806 0.124644870822 -0.119275907223 0.0416254660329 -0.0381955235096 Species5 0.998558655 0.573960582723 0.679918103399 -0.389963380717 0.299077945999 0.328451006171 0.21215881857 -0.0829871883001 -0.0439653996462 Species6 0.255245719879 -0.178168259149 -0.204127155429 0.433397565801 0.0707099230629 -0.18817306522 0.126908756045 0.0044937289123 -0.0122511718244 Species7 0.146555872394 -0.857362497037 -0.0152499051659 0.0527604990862 0.354475793915 -0.0416813697787 -0.199011239586 -0.00213723187073 -0.00782946141667 Species8 0.413705102117 -0.707948964322 0.21569736034 -0.690314241725 -0.148431001217 -0.334251868558 -0.00628707445028 -0.00364123416731 -0.0122722164511 Site 10 9 Site0 0.710587311248 -3.08166800613 0.219651379947 -1.24528801163 -1.07293546227 -0.506241907472 0.244126652455 -3.63164833508 1.16311896657 Site1 0.584771352278 -3.00669301091 -0.947448656768 2.69965142856 2.13682885838 0.813520011254 0.471530333182 0.908423015086 -1.34724387844 Site2 0.762734278287 -3.15258603503 2.13924426714 -3.1162748358 -2.30660936925 -0.698929858809 -1.39062619586 4.84117591747 0.562102984837 Site3 1.11230735331 1.07150585141 -1.87527740873 0.666370241998 -1.10153224699 1.43517552491 -1.10619960297 0.0137029328454 -0.0371803939101 Site4 -0.979116769996 -0.0603144289026 -0.696277367656 -0.612646703308 0.983006619615 0.315662442163 0.574110232297 0.328630035672 0.868027697443 Site5 1.04322560423 0.459426970165 -0.639802790578 0.287156643872 -0.573935423877 -1.44980634943 1.70166994063 0.306164261447 -0.442115969758 Site6 -0.954490118162 -0.0847021660539 0.132509124411 -0.42143341064 -0.111552348931 -0.394242454835 -0.673963982894 -0.379018566362 -1.7472502885 Site7 0.947268764751 -0.108370567311 0.526107182587 -0.00565282365567 1.26272400228 -1.06565692165 -1.46326596729 -0.154459216567 0.778139732463 Site8 -1.14808173207 0.490449274267 0.478353666755 1.17015870919 -1.00599224074 0.0735071441404 0.0860462673715 0.0417647558417 0.935819560428 Site9 1.03291557934 1.0350490304 2.74691777314 -1.28083971649 0.363002636972 1.98647950015 1.05356145232 -0.24813142226 -0.463165215106 Biplot 3 3 -0.169746767979 0.63069090084 0.760769036049 -0.994016563505 0.0609533148724 -0.0449369418179 0.184352565909 -0.974867543612 0.0309865007541 Site0 0.692138797603 -3.08053663489 -0.328747278055 -1.24528801163 -1.07293546227 -0.506241907472 0.244126652455 -3.63164833508 1.16311896657 Site1 0.664559513865 -3.06214571808 0.230249303805 2.69965142856 2.13682885838 0.813520011254 0.471530333182 0.908423015086 -1.34724387844 Site2 0.636980230127 -3.04375480127 0.789245885666 -3.1162748358 -2.30660936925 -0.698929858809 -1.39062619586 4.84117591747 0.562102984837 Site3 1.10887578995 0.500396915484 -1.55606822404 0.666370241998 -1.10153224699 1.43517552491 -1.10619960297 0.0137029328454 -0.0371803939101 Site4 -0.970016224052 0.0654867737684 -1.1206070781 -0.612646703308 0.983006619615 0.315662442163 0.574110232297 0.328630035672 0.868027697443 Site5 1.05371722248 0.537178749104 -0.438075060322 0.287156643872 -0.573935423877 -1.44980634943 1.70166994063 0.306164261447 -0.442115969758 Site6 -1.02517479153 0.102268607388 -0.00261391438256 -0.42143341064 -0.111552348931 -0.394242454835 -0.673963982894 -0.379018566362 -1.7472502885 Site7 0.998558655 0.573960582723 0.679918103399 -0.00565282365567 1.26272400228 -1.06565692165 -1.46326596729 -0.154459216567 0.778139732463 Site8 -1.080333359 0.139050441007 1.11537924934 1.17015870919 -1.00599224074 0.0735071441404 0.0860462673715 0.0417647558417 0.935819560428 Site9 0.943400087524 0.610742416342 1.79791126712 -1.28083971649 0.363002636972 1.98647950015 1.05356145232 -0.24813142226 -0.463165215106 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error7000066400000000000000000000001011464262511300242220ustar00rootroot00000000000000Eigvals 2 0.0961330159181 0.0409418140138 Proportion explained 0 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error8000066400000000000000000000030421464262511300242320ustar00rootroot00000000000000Eigvals 9 0.512367260461 0.300719094427 0.267912066004 0.208988681078 0.19169895326 0.16054234528 0.15017695712 0.122457748167 0.0 Proportion explained 9 0.267573832777 0.139911863774 0.109140272454 0.100111048503 0.0838401161912 0.0784269939011 0.0639511763509 0.0 Species 0 0 Site 9 9 PC.636 -0.258465461183 0.173999546883 0.0382875792552 -0.19447750562 0.0831176020844 0.262430333201 -0.0231636392235 -0.0184794039581 0.0 PC.635 -0.271001135391 -0.0185951319063 -0.0864841926349 0.118064245315 -0.198808358437 -0.0211723599535 -0.191024027565 0.155646592377 0.0 PC.356 0.235077898175 0.0962519254489 -0.345792726714 -0.00320862577619 -0.0963777675519 0.0457025386953 0.185472813286 0.0404093971793 0.0 PC.481 0.0261407664325 -0.0111459676533 0.147660603015 0.29087660853 0.203945472801 0.0619712384758 0.101641328709 0.105690998719 0.0 PC.354 0.285007552283 -0.0192549888483 0.0623263375385 0.138126799852 -0.104798602423 0.0951720730628 -0.129636097542 -0.220687170372 0.0 PC.593 0.204636326241 -0.139361150932 0.291513819623 -0.181566786821 -0.159580132715 -0.0246412130162 0.0866252404441 0.0996221476871 0.0 PC.355 0.233482403212 0.225257974068 -0.0188623096268 -0.107729981831 0.177108999572 -0.192905835151 -0.149819471408 0.0383549037465 0.0 PC.607 -0.0949631911323 -0.420974802495 -0.154869454869 -0.0898427509281 0.152618194488 -0.0334232691501 -0.0251224777303 -0.0508988536409 0.0 PC.634 -0.359915158638 0.113822595435 0.0662203444138 0.0297579972788 -0.0572254078183 -0.193133506163 0.145026331031 -0.149658611738 0.0 Biplot 0 0 Site constraints 0 0 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_error9000066400000000000000000000004601464262511300242340ustar00rootroot00000000000000Eigvals 2 0.0961330159181 0.0409418140138 Proportion explained 0 Species 3 2 Species1 0.408869425742 Species2 -0.1153860437 Species3 -0.309967102571 Site 3 2 Site1 -0.848956053187 0.882764759014 Site2 -0.220458650578 -1.34482000302 Site3 1.66697179591 0.470324389808 Biplot 0 0 Site constraints 0 0 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_example2_scores000066400000000000000000000065041464262511300261120ustar00rootroot00000000000000Eigvals 7 25.8979540892 14.9825779819 8.93784077262 6.13995623072 1.68070536498 0.57735026919 0.275983624351 Proportion explained 0 Species 6 7 Species0 1.38198713901 -1.71496426179 0.632272455288 0.00712898231575 0.120512431133 -0.0723104306179 -0.00815886062344 Species1 0.919178380672 -1.25430767906 -1.1787426896 -0.00712898231576 -0.120512431133 -0.0723104306179 0.00815886062344 Species2 3.39897234869 0.446168315515 0.406691610423 0.749336668014 0.0793892812781 7.37971401683e-17 0.0329418170936 Species3 2.52353261895 0.446932822723 -0.413412046583 -0.639449029945 -0.0640330006084 3.40602185392e-17 0.0335491330226 Species4 -0.53155341411 -1.34263985744 0.464155649196 -0.412041388665 0.198336195195 7.37971401683e-17 0.00604836743485 Species5 -0.288618167117 -0.571491852197 -0.406527290424 0.206020694333 -0.0991680975973 -1.13534061797e-17 -0.00302418371743 Site 10 7 Site0 -1.48848983495 2.12675623514 0.727805340002 -0.227234564008 -3.8413042049 -2.30487725273 0.260061682644 Site1 -1.5541678384 2.37027298265 0.475523558326 1.58712629997e-16 1.39853499536e-15 4.60975450547 -1.41948353841e-14 Site2 -1.51048450796 2.19216727329 0.00519576944216 0.227234564008 3.8413042049 -2.30487725273 -0.260061682644 Site3 -0.872786591764 -2.6271708553 2.68871897067 -1.97005774092 0.948287641474 -2.0356145959e-14 0.0289185344306 Site4 2.97228673755 0.322310666722 2.50294580667 3.50264153009 0.489477682536 -1.25529566747e-14 2.11938273809 Site5 -0.879968888341 -2.19620098193 0.710888524695 -0.656685913639 0.316095880491 -4.47835211098e-15 0.00963951147681 Site6 2.64194948913 0.390104638861 -0.086230363198 -0.211189359785 -0.298609965083 -3.88762243221e-15 -4.5952222736 Site7 -0.887151184918 -1.76523110855 -1.26694192128 0.656685913639 -0.316095880491 1.21458337555e-14 -0.00963951147698 Site8 2.47314610115 0.521252384288 -2.51313331808 -3.29145217031 -0.190867717454 1.65563320466e-14 2.4758395355 Site9 -0.894333481495 -1.33426123517 -3.24477236725 1.97005774092 -0.948287641474 3.0262803659e-14 -0.0289185344308 Biplot 4 3 0.422650019179 -0.559142585857 -0.713250678211 0.988495963777 0.150787422017 -0.0117848614073 -0.556516618887 0.817599992718 0.147714267459 -0.404079676685 -0.9058434809 -0.127150316558 Site constraints 10 7 Site0 -1.48131076339 2.07063239013 1.42061063192 -0.227234564008 -3.8413042049 -2.30487725273 0.260061682644 Site1 -1.51771406044 2.22973216369 0.402841555923 1.58712629997e-16 1.39853499536e-15 4.60975450547 -1.41948353841e-14 Site2 -1.55411735749 2.38883193726 -0.61492752007 0.227234564008 3.8413042049 -2.30487725273 -0.260061682644 Site3 -0.774350145471 -2.45801536594 2.77528052969 -1.97005774092 0.948287641474 -2.0356145959e-14 0.0289185344306 Site4 2.76860070338 0.0930230161545 2.00339886045 3.50264153009 0.489477682536 -1.25529566747e-14 2.11938273809 Site5 -0.847156739577 -2.13981581881 0.739742377702 -0.656685913639 0.316095880491 -4.47835211098e-15 0.00963951147681 Site6 2.69579410928 0.41122256329 -0.0321392915344 -0.211189359785 -0.298609965083 -3.88762243221e-15 -4.5952222736 Site7 -0.919963333683 -1.82161627167 -1.29579577429 0.656685913639 -0.316095880491 1.21458337555e-14 -0.00963951147698 Site8 2.62298751517 0.729422110426 -2.06767744352 -3.29145217031 -0.190867717454 1.65563320466e-14 2.4758395355 Site9 -0.992769927788 -1.50341672453 -3.33133392627 1.97005774092 -0.948287641474 3.0262803659e-14 -0.0289185344308 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_example3_scores000066400000000000000000000107431464262511300261130ustar00rootroot00000000000000Eigvals 9 0.366135830393 0.186887643052 0.0788466514249 0.082287840501 0.0351348475787 0.0233265839374 0.0099048981912 0.00122461669234 0.000417454724117 Proportion explained 0 Species 9 9 Species0 0.110350890177 0.282399990052 -0.203028976154 -0.00192462284409 -0.082232863384 0.0857314258364 -0.0122038907184 -0.0425198793666 0.00466719926338 Species1 0.141359038961 0.303495645402 0.395441211576 -0.14126625534 -0.0268859204718 0.143253061936 0.0430260301697 0.0476377655759 -0.00228172378295 Species2 -1.01552204222 0.0958317865043 -0.198262718034 -0.104801030067 0.130025239749 0.0244045261332 0.0464701211285 0.0269279200532 0.0350103177576 Species3 -1.03620650502 0.109624974112 0.220984718362 0.223640072997 -0.243745876054 -0.0259064859794 -0.0534088909011 -0.0315611195993 0.0256448427552 Species4 1.05371722248 0.537178749104 -0.438075060322 0.223480553581 -0.323948461806 0.124644870822 -0.119275907223 0.0416254660329 -0.0381955235096 Species5 0.998558655 0.573960582723 0.679918103399 -0.389963380717 0.299077945999 0.328451006171 0.21215881857 -0.0829871883001 -0.0439653996462 Species6 0.255245719879 -0.178168259149 -0.204127155429 0.433397565801 0.0707099230629 -0.18817306522 0.126908756045 0.0044937289123 -0.0122511718244 Species7 0.146555872394 -0.857362497037 -0.0152499051659 0.0527604990862 0.354475793915 -0.0416813697787 -0.199011239586 -0.00213723187073 -0.00782946141667 Species8 0.413705102117 -0.707948964322 0.21569736034 -0.690314241725 -0.148431001217 -0.334251868558 -0.00628707445028 -0.00364123416731 -0.0122722164511 Site 10 9 Site0 0.710587311248 -3.08166800613 0.219651379947 -1.24528801163 -1.07293546227 -0.506241907472 0.244126652455 -3.63164833508 1.16311896657 Site1 0.584771352278 -3.00669301091 -0.947448656768 2.69965142856 2.13682885838 0.813520011254 0.471530333182 0.908423015086 -1.34724387844 Site2 0.762734278287 -3.15258603503 2.13924426714 -3.1162748358 -2.30660936925 -0.698929858809 -1.39062619586 4.84117591747 0.562102984837 Site3 1.11230735331 1.07150585141 -1.87527740873 0.666370241998 -1.10153224699 1.43517552491 -1.10619960297 0.0137029328454 -0.0371803939101 Site4 -0.979116769996 -0.0603144289026 -0.696277367656 -0.612646703308 0.983006619615 0.315662442163 0.574110232297 0.328630035672 0.868027697443 Site5 1.04322560423 0.459426970165 -0.639802790578 0.287156643872 -0.573935423877 -1.44980634943 1.70166994063 0.306164261447 -0.442115969758 Site6 -0.954490118162 -0.0847021660539 0.132509124411 -0.42143341064 -0.111552348931 -0.394242454835 -0.673963982894 -0.379018566362 -1.7472502885 Site7 0.947268764751 -0.108370567311 0.526107182587 -0.00565282365567 1.26272400228 -1.06565692165 -1.46326596729 -0.154459216567 0.778139732463 Site8 -1.14808173207 0.490449274267 0.478353666755 1.17015870919 -1.00599224074 0.0735071441404 0.0860462673715 0.0417647558417 0.935819560428 Site9 1.03291557934 1.0350490304 2.74691777314 -1.28083971649 0.363002636972 1.98647950015 1.05356145232 -0.24813142226 -0.463165215106 Biplot 3 3 -0.169746767979 0.63069090084 0.760769036049 -0.994016563505 0.0609533148724 -0.0449369418179 0.184352565909 -0.974867543612 0.0309865007541 Site constraints 10 9 Site0 0.692138797603 -3.08053663489 -0.328747278055 -1.24528801163 -1.07293546227 -0.506241907472 0.244126652455 -3.63164833508 1.16311896657 Site1 0.664559513865 -3.06214571808 0.230249303805 2.69965142856 2.13682885838 0.813520011254 0.471530333182 0.908423015086 -1.34724387844 Site2 0.636980230127 -3.04375480127 0.789245885666 -3.1162748358 -2.30660936925 -0.698929858809 -1.39062619586 4.84117591747 0.562102984837 Site3 1.10887578995 0.500396915484 -1.55606822404 0.666370241998 -1.10153224699 1.43517552491 -1.10619960297 0.0137029328454 -0.0371803939101 Site4 -0.970016224052 0.0654867737684 -1.1206070781 -0.612646703308 0.983006619615 0.315662442163 0.574110232297 0.328630035672 0.868027697443 Site5 1.05371722248 0.537178749104 -0.438075060322 0.287156643872 -0.573935423877 -1.44980634943 1.70166994063 0.306164261447 -0.442115969758 Site6 -1.02517479153 0.102268607388 -0.00261391438256 -0.42143341064 -0.111552348931 -0.394242454835 -0.673963982894 -0.379018566362 -1.7472502885 Site7 0.998558655 0.573960582723 0.679918103399 -0.00565282365567 1.26272400228 -1.06565692165 -1.46326596729 -0.154459216567 0.778139732463 Site8 -1.080333359 0.139050441007 1.11537924934 1.17015870919 -1.00599224074 0.0735071441404 0.0860462673715 0.0417647558417 0.935819560428 Site9 0.943400087524 0.610742416342 1.79791126712 -1.28083971649 0.363002636972 1.98647950015 1.05356145232 -0.24813142226 -0.463165215106 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_exp_Ordination_CCA_site000066400000000000000000000043651464262511300274760ustar00rootroot000000000000007.105873112480000087e-01 -3.081668006130000137e+00 2.196513799469999861e-01 -1.245288011630000025e+00 -1.072935462270000029e+00 -5.062419074720000411e-01 2.441266524550000094e-01 -3.631648335079999956e+00 1.163118966569999913e+00 5.847713522780000339e-01 -3.006693010909999852e+00 -9.474486567680000526e-01 2.699651428560000177e+00 2.136828858379999918e+00 8.135200112539999751e-01 4.715303331819999944e-01 9.084230150859999853e-01 -1.347243878440000042e+00 7.627342782870000226e-01 -3.152586035030000122e+00 2.139244267140000044e+00 -3.116274835800000087e+00 -2.306609369249999819e+00 -6.989298588090000486e-01 -1.390626195859999914e+00 4.841175917470000201e+00 5.621029848370000526e-01 1.112307353310000080e+00 1.071505851409999988e+00 -1.875277408729999928e+00 6.663702419979999902e-01 -1.101532246989999964e+00 1.435175524910000000e+00 -1.106199602970000084e+00 1.370293284539999927e-02 -3.718039391010000139e-02 -9.791167699959999471e-01 -6.031442890259999801e-02 -6.962773676560000125e-01 -6.126467033079999736e-01 9.830066196150000213e-01 3.156624421629999899e-01 5.741102322969999783e-01 3.286300356719999982e-01 8.680276974430000125e-01 1.043225604230000103e+00 4.594269701650000037e-01 -6.398027905780000468e-01 2.871566438720000169e-01 -5.739354238770000283e-01 -1.449806349429999974e+00 1.701669940629999989e+00 3.061642614469999857e-01 -4.421159697579999937e-01 -9.544901181620000230e-01 -8.470216605390000486e-02 1.325091244109999900e-01 -4.214334106400000057e-01 -1.115523489309999949e-01 -3.942424548349999780e-01 -6.739639828939999466e-01 -3.790185663619999867e-01 -1.747250288500000082e+00 9.472687647509999698e-01 -1.083705673110000045e-01 5.261071825869999552e-01 -5.652823655669999593e-03 1.262724002279999924e+00 -1.065656921650000033e+00 -1.463265967289999914e+00 -1.544592165670000128e-01 7.781397324629999446e-01 -1.148081732070000083e+00 4.904492742670000238e-01 4.783536667550000177e-01 1.170158709190000090e+00 -1.005992240739999932e+00 7.350714414039999367e-02 8.604626737149999316e-02 4.176475584170000199e-02 9.358195604279999635e-01 1.032915579340000001e+00 1.035049030399999959e+00 2.746917773139999852e+00 -1.280839716490000058e+00 3.630026369720000257e-01 1.986479500149999966e+00 1.053561452320000091e+00 -2.481314222600000119e-01 -4.631652151060000056e-01 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_exp_Ordination_CCA_site_constraints000066400000000000000000000043631464262511300321230ustar00rootroot000000000000006.921387976030000111e-01 -3.080536634890000069e+00 -3.287472780549999807e-01 -1.245288011630000025e+00 -1.072935462270000029e+00 -5.062419074720000411e-01 2.441266524550000094e-01 -3.631648335079999956e+00 1.163118966569999913e+00 6.645595138650000067e-01 -3.062145718080000023e+00 2.302493038049999996e-01 2.699651428560000177e+00 2.136828858379999918e+00 8.135200112539999751e-01 4.715303331819999944e-01 9.084230150859999853e-01 -1.347243878440000042e+00 6.369802301270000022e-01 -3.043754801269999977e+00 7.892458856659999578e-01 -3.116274835800000087e+00 -2.306609369249999819e+00 -6.989298588090000486e-01 -1.390626195859999914e+00 4.841175917470000201e+00 5.621029848370000526e-01 1.108875789949999913e+00 5.003969154840000044e-01 -1.556068224039999892e+00 6.663702419979999902e-01 -1.101532246989999964e+00 1.435175524910000000e+00 -1.106199602970000084e+00 1.370293284539999927e-02 -3.718039391010000139e-02 -9.700162240520000534e-01 6.548677376839999453e-02 -1.120607078099999931e+00 -6.126467033079999736e-01 9.830066196150000213e-01 3.156624421629999899e-01 5.741102322969999783e-01 3.286300356719999982e-01 8.680276974430000125e-01 1.053717222479999993e+00 5.371787491039999862e-01 -4.380750603219999983e-01 2.871566438720000169e-01 -5.739354238770000283e-01 -1.449806349429999974e+00 1.701669940629999989e+00 3.061642614469999857e-01 -4.421159697579999937e-01 -1.025174791530000018e+00 1.022686073880000046e-01 -2.613914382559999906e-03 -4.214334106400000057e-01 -1.115523489309999949e-01 -3.942424548349999780e-01 -6.739639828939999466e-01 -3.790185663619999867e-01 -1.747250288500000082e+00 9.985586549999999617e-01 5.739605827229999901e-01 6.799181033990000511e-01 -5.652823655669999593e-03 1.262724002279999924e+00 -1.065656921650000033e+00 -1.463265967289999914e+00 -1.544592165670000128e-01 7.781397324629999446e-01 -1.080333358999999938e+00 1.390504410070000085e-01 1.115379249340000101e+00 1.170158709190000090e+00 -1.005992240739999932e+00 7.350714414039999367e-02 8.604626737149999316e-02 4.176475584170000199e-02 9.358195604279999635e-01 9.434000875239999528e-01 6.107424163419999941e-01 1.797911267119999934e+00 -1.280839716490000058e+00 3.630026369720000257e-01 1.986479500149999966e+00 1.053561452320000091e+00 -2.481314222600000119e-01 -4.631652151060000056e-01 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_exp_Ordination_CCA_species000066400000000000000000000040211464262511300301520ustar00rootroot000000000000001.103508901770000050e-01 2.823999900520000050e-01 -2.030289761540000071e-01 -1.924622844089999901e-03 -8.223286338400000173e-02 8.573142583640000480e-02 -1.220389071839999943e-02 -4.251987936659999689e-02 4.667199263379999581e-03 1.413590389609999942e-01 3.034956454020000249e-01 3.954412115759999846e-01 -1.412662553400000076e-01 -2.688592047179999903e-02 1.432530619359999868e-01 4.302603016969999694e-02 4.763776557589999988e-02 -2.281723782949999797e-03 -1.015522042219999976e+00 9.583178650430000667e-02 -1.982627180340000017e-01 -1.048010300670000061e-01 1.300252397489999878e-01 2.440452613319999894e-02 4.647012112849999849e-02 2.692792005320000098e-02 3.501031775760000270e-02 -1.036206505020000002e+00 1.096249741120000065e-01 2.209847183620000100e-01 2.236400729970000056e-01 -2.437458760539999991e-01 -2.590648597939999853e-02 -5.340889090110000070e-02 -3.156111959929999711e-02 2.564484275520000078e-02 1.053717222479999993e+00 5.371787491039999862e-01 -4.380750603219999983e-01 2.234805535809999966e-01 -3.239484618060000098e-01 1.246448708220000007e-01 -1.192759072229999939e-01 4.162546603290000202e-02 -3.819552350960000314e-02 9.985586549999999617e-01 5.739605827229999901e-01 6.799181033990000511e-01 -3.899633807169999855e-01 2.990779459990000055e-01 3.284510061709999817e-01 2.121588185699999907e-01 -8.298718830010000380e-02 -4.396539964619999852e-02 2.552457198790000215e-01 -1.781682591490000023e-01 -2.041271554290000068e-01 4.333975658009999732e-01 7.070992306289999862e-02 -1.881730652199999909e-01 1.269087560449999874e-01 4.493728912300000086e-03 -1.225117182439999959e-02 1.465558723940000030e-01 -8.573624970369999598e-01 -1.524990516589999970e-02 5.276049908620000090e-02 3.544757939150000037e-01 -4.168136977870000315e-02 -1.990112395860000050e-01 -2.137231870730000108e-03 -7.829461416669999951e-03 4.137051021170000120e-01 -7.079489643220000517e-01 2.156973603400000050e-01 -6.903142417249999996e-01 -1.484310012170000082e-01 -3.342518685579999871e-01 -6.287074450280000053e-03 -3.641234167310000193e-03 -1.227221645110000009e-02 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_exp_Ordination_PCoA_site000066400000000000000000000040161464262511300276630ustar00rootroot00000000000000-2.584654611830000115e-01 1.739995468830000114e-01 3.828757925520000216e-02 -1.944775056199999907e-01 8.311760208439999964e-02 2.624303332010000100e-01 -2.316363922349999951e-02 -1.847940395810000053e-02 0.000000000000000000e+00 -2.710011353910000143e-01 -1.859513190630000118e-02 -8.648419263489999509e-02 1.180642453149999965e-01 -1.988083584370000101e-01 -2.117235995349999941e-02 -1.910240275650000041e-01 1.556465923769999871e-01 0.000000000000000000e+00 2.350778981749999941e-01 9.625192544890000257e-02 -3.457927267140000205e-01 -3.208625776189999966e-03 -9.637776755190000100e-02 4.570253869529999902e-02 1.854728132859999878e-01 4.040939717929999814e-02 0.000000000000000000e+00 2.614076643250000040e-02 -1.114596765330000058e-02 1.476606030150000026e-01 2.908766085300000248e-01 2.039454728010000051e-01 6.197123847580000150e-02 1.016413287090000006e-01 1.056909987190000066e-01 0.000000000000000000e+00 2.850075522830000097e-01 -1.925498884830000035e-02 6.232633753849999708e-02 1.381267998519999929e-01 -1.047986024230000007e-01 9.517207306279999723e-02 -1.296360975420000061e-01 -2.206871703720000022e-01 0.000000000000000000e+00 2.046363262410000050e-01 -1.393611509319999942e-01 2.915138196229999923e-01 -1.815667868209999980e-01 -1.595801327149999893e-01 -2.464121301619999829e-02 8.662524044409999902e-02 9.962214768709999613e-02 0.000000000000000000e+00 2.334824032120000059e-01 2.252579740679999942e-01 -1.886230962680000151e-02 -1.077299818309999935e-01 1.771089995719999921e-01 -1.929058351509999880e-01 -1.498194714080000045e-01 3.835490374650000339e-02 0.000000000000000000e+00 -9.496319113230000664e-02 -4.209748024950000223e-01 -1.548694548690000006e-01 -8.984275092809999863e-02 1.526181944879999863e-01 -3.342326915010000038e-02 -2.512247773030000172e-02 -5.089885364090000058e-02 0.000000000000000000e+00 -3.599151586379999990e-01 1.138225954350000069e-01 6.622034441379999470e-02 2.975799727879999829e-02 -5.722540781830000312e-02 -1.931335061630000127e-01 1.450263310309999887e-01 -1.496586117379999914e-01 0.000000000000000000e+00 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_exp_Ordination_RDA_site000066400000000000000000000033721464262511300275130ustar00rootroot00000000000000-1.488489834949999979e+00 2.126756235139999784e+00 7.278053400020000341e-01 -2.272345640079999896e-01 -3.841304204900000130e+00 -2.304877252729999881e+00 2.600616826439999807e-01 -1.554167838399999946e+00 2.370272982649999971e+00 4.755235583260000176e-01 1.587126299970000006e-16 1.398534995359999983e-15 4.609754505469999764e+00 -1.419483538409999981e-14 -1.510484507960000000e+00 2.192167273289999940e+00 5.195769442159999842e-03 2.272345640079999896e-01 3.841304204900000130e+00 -2.304877252729999881e+00 -2.600616826439999807e-01 -8.727865917639999749e-01 -2.627170855300000163e+00 2.688718970670000097e+00 -1.970057740919999967e+00 9.482876414740000204e-01 -2.035614595899999968e-14 2.891853443059999931e-02 2.972286737550000169e+00 3.223106667219999832e-01 2.502945806670000106e+00 3.502641530090000010e+00 4.894776825359999917e-01 -1.255295667470000031e-14 2.119382738090000107e+00 -8.799688883409999551e-01 -2.196200981930000129e+00 7.108885246949999681e-01 -6.566859136389999740e-01 3.160958804910000142e-01 -4.478352110979999834e-15 9.639511476809999382e-03 2.641949489129999940e+00 3.901046388609999971e-01 -8.623036319799999572e-02 -2.111893597850000026e-01 -2.986099650829999730e-01 -3.887622432210000342e-15 -4.595222273600000129e+00 -8.871511849180000464e-01 -1.765231108550000094e+00 -1.266941921279999939e+00 6.566859136389999740e-01 -3.160958804910000142e-01 1.214583375549999947e-14 -9.639511476980000548e-03 2.473146101150000220e+00 5.212523842880000258e-01 -2.513133318079999956e+00 -3.291452170309999925e+00 -1.908677174539999966e-01 1.655633204660000023e-14 2.475839535500000022e+00 -8.943334814950000267e-01 -1.334261235170000059e+00 -3.244772367249999956e+00 1.970057740919999967e+00 -9.482876414740000204e-01 3.026280365900000082e-14 -2.891853443079999905e-02 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_exp_Ordination_RDA_site_constraints000066400000000000000000000033731464262511300321430ustar00rootroot00000000000000-1.481310763390000007e+00 2.070632390130000111e+00 1.420610631920000033e+00 -2.272345640079999896e-01 -3.841304204900000130e+00 -2.304877252729999881e+00 2.600616826439999807e-01 -1.517714060439999901e+00 2.229732163690000046e+00 4.028415559230000276e-01 1.587126299970000006e-16 1.398534995359999983e-15 4.609754505469999764e+00 -1.419483538409999981e-14 -1.554117357490000018e+00 2.388831937259999982e+00 -6.149275200699999555e-01 2.272345640079999896e-01 3.841304204900000130e+00 -2.304877252729999881e+00 -2.600616826439999807e-01 -7.743501454710000065e-01 -2.458015365940000141e+00 2.775280529689999831e+00 -1.970057740919999967e+00 9.482876414740000204e-01 -2.035614595899999968e-14 2.891853443059999931e-02 2.768600703380000194e+00 9.302301615449999617e-02 2.003398860449999930e+00 3.502641530090000010e+00 4.894776825359999917e-01 -1.255295667470000031e-14 2.119382738090000107e+00 -8.471567395769999953e-01 -2.139815818809999826e+00 7.397423777020000202e-01 -6.566859136389999740e-01 3.160958804910000142e-01 -4.478352110979999834e-15 9.639511476809999382e-03 2.695794109279999962e+00 4.112225632900000094e-01 -3.213929153439999686e-02 -2.111893597850000026e-01 -2.986099650829999730e-01 -3.887622432210000342e-15 -4.595222273600000129e+00 -9.199633336829999841e-01 -1.821616271669999954e+00 -1.295795774289999924e+00 6.566859136389999740e-01 -3.160958804910000142e-01 1.214583375549999947e-14 -9.639511476980000548e-03 2.622987515170000172e+00 7.294221104260000255e-01 -2.067677443520000047e+00 -3.291452170309999925e+00 -1.908677174539999966e-01 1.655633204660000023e-14 2.475839535500000022e+00 -9.927699277879999951e-01 -1.503416724530000081e+00 -3.331333926270000134e+00 1.970057740919999967e+00 -9.482876414740000204e-01 3.026280365900000082e-14 -2.891853443079999905e-02 scikit-bio-0.6.2/skbio/io/format/tests/data/ordination_exp_Ordination_RDA_species000066400000000000000000000020561464262511300302000ustar00rootroot000000000000001.381987139010000032e+00 -1.714964261790000055e+00 6.322724552880000237e-01 7.128982315749999785e-03 1.205124311330000031e-01 -7.231043061790000392e-02 -8.158860623439999696e-03 9.191783806719999728e-01 -1.254307679060000069e+00 -1.178742689599999949e+00 -7.128982315759999598e-03 -1.205124311330000031e-01 -7.231043061790000392e-02 8.158860623439999696e-03 3.398972348690000089e+00 4.461683155150000246e-01 4.066916104230000051e-01 7.493366680140000069e-01 7.938928127809999347e-02 7.379714016829999409e-17 3.294181709360000099e-02 2.523532618950000028e+00 4.469328227230000006e-01 -4.134120465830000146e-01 -6.394490299449999693e-01 -6.403300060840000363e-02 3.406021853920000005e-17 3.354913302260000008e-02 -5.315534141100000110e-01 -1.342639857440000029e+00 4.641556491959999975e-01 -4.120413886650000079e-01 1.983361951950000091e-01 7.379714016829999409e-17 6.048367434850000432e-03 -2.886181671170000018e-01 -5.714918521970000498e-01 -4.065272904239999874e-01 2.060206943329999929e-01 -9.916809759729999785e-02 -1.135340617969999987e-17 -3.024183717430000123e-03 scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_dna_3_seqs000066400000000000000000000001201464262511300241610ustar00rootroot000000000000003 13 d1 ..ACC-GTTG G.. d2 TTACCGGT-G GCC d3 .-ACC-GTTG C-- scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_invalid_empty_line_after_header000066400000000000000000000001061464262511300305120ustar00rootroot000000000000002 20 foo ..ACC-GTTG G..AATGC.C bar TTACCGGT-G GCCTA-GCAT scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_invalid_empty_line_before_header000066400000000000000000000001061464262511300306530ustar00rootroot00000000000000 2 20 foo ..ACC-GTTG G..AATGC.C bar TTACCGGT-G GCCTA-GCAT scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_invalid_empty_line_between_seqs000066400000000000000000000001061464262511300305650ustar00rootroot000000000000002 20 foo ..ACC-GTTG G..AATGC.C bar TTACCGGT-G GCCTA-GCAT scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_invalid_header_too_long000066400000000000000000000001201464262511300270000ustar00rootroot000000000000002 20 extra_text foo ..ACC-GTTG G..AATGC.C bar TTACCGGT-G GCCTA-GCAT scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_invalid_header_too_short000066400000000000000000000001041464262511300272020ustar00rootroot00000000000000 20 foo ..ACC-GTTG G..AATGC.C bar TTACCGGT-G GCCTA-GCAT scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_invalid_no_header000066400000000000000000000001001464262511300255720ustar00rootroot00000000000000foo ..ACC-GTTG G..AATGC.C bar TTACCGGT-G GCCTA-GCAT scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_invalid_seq_too_long000066400000000000000000000001071464262511300263450ustar00rootroot000000000000002 20 foo ..ACC-GTTG G..AATGC.CGA bar TTACCGGT-G GCCTA-GCAT scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_invalid_seq_too_short000066400000000000000000000001031464262511300265410ustar00rootroot000000000000002 20 foo ..ACC-GTTG G..AATGC bar TTACCGGT-G GCCTA-GCAT scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_invalid_too_few_seqs000066400000000000000000000000451464262511300263530ustar00rootroot000000000000002 20 foo ..ACC-GTTG G..AATGC.C scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_invalid_too_many_seqs000066400000000000000000000001051464262511300265330ustar00rootroot000000000000001 20 foo ..ACC-GTTG G..AATGC.C bar TTACCGGT-G GCCTA-GCAT scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_invalid_zero_seq_len000066400000000000000000000000171464262511300263420ustar00rootroot000000000000001 0 scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_invalid_zero_seqs000066400000000000000000000000041464262511300256630ustar00rootroot000000000000000 1 scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_single_seq_long000066400000000000000000000000521464262511300253160ustar00rootroot000000000000001 24 foo ..ACC-GTTG G..AATGC.C ---- scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_single_seq_short000066400000000000000000000000201464262511300255110ustar00rootroot000000000000001 1 - scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_two_chunks000066400000000000000000000001051464262511300243310ustar00rootroot000000000000002 20 foo ..ACC-GTTG G..AATGC.C bar TTACCGGT-G GCCTA-GCAT scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_variable_length_ids000066400000000000000000000001521464262511300261340ustar00rootroot000000000000006 6 .-ACGT a TGCA-. bb .ACGT- 1 TGCA-. abcdefghijAAAAAA ab def42ijGGGGGG scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_varied_whitespace_in_seqs000066400000000000000000000001211464262511300273520ustar00rootroot000000000000002 20 foo ..ACC -GTTG G..AA TGC.C bar TTACCGGT-G GCCTA-GCAT scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_whitespace_in_header_1000066400000000000000000000001521464262511300265210ustar00rootroot00000000000000 2 20 foo ..ACC-GTTG G..AATGC.C bar TTACCGGT-G GCCTA-GCAT scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_whitespace_in_header_2000066400000000000000000000001261464262511300265230ustar00rootroot00000000000000 2 20 foo ..ACC-GTTG G..AATGC.C bar TTACCGGT-G GCCTA-GCAT scikit-bio-0.6.2/skbio/io/format/tests/data/phylip_whitespace_in_header_3000066400000000000000000000001461464262511300265260ustar00rootroot000000000000002 20 foo ..ACC-GTTG G..AATGC.C bar TTACCGGT-G GCCTA-GCAT scikit-bio-0.6.2/skbio/io/format/tests/data/prot.emb000066400000000000000000010214541464262511300223210ustar00rootroot00000000000000HDF  ,#`TREE0HEAPX(idptridembedding0 xSNOD  TREE0"> >TREE4">(>  xTREE ,# ,C ,c , , , , , ,# ,C ,c , , , , , ,# ,C ,c , (, (, (, (, 0,# 0,C 0,c 0, 8, 8, 8, 8,@>IGKEEIQQRLAQFVDHWKELKQLAAARGQRLEESLEYQQFVANVEEEEAWINEKMTLVASEDqYV'>:J/W0*t@8w>#>Bb>M= Ѿ~>ș^>륽\UNZL%بR>Mڽ&߽`߽ #2g>P=ݣ>8G[=ػf>E߱iľɃ @==~=><>ja}M> >Ž7`q2=vkV>+a==W>T2= aݦ-/(>Xo$K=@N@>]Y8uu=8xF %=)==A>R>"JP>Lz=>MaP>[HE>c[=J2:Rt;ۤ]=o>~ξU 0^ >/;=r1S>M>t=Ұ=\>9枾=> tʰ́ˉ=o =-Ⲥ>E>:<U=4&>|{>H~ri><>Ct=6ݻP=XF=9>ASu>P;`>w>*ھ 9ؼ-;Ÿ: )E>y==#>Ja w >Sh?&Y!7Ifʽ(v>s=>{/9 >/ >^>'=!&?gbͽ<¢=Ccȋ  kye>jN:p|>< (=h=h _x>MH<wų=,r=@>>%AlwO=iVf =1ǽc>L\>LW>픦=Ƽ>_mM𡬾ƾ:ܻ>nGRmNb G>m>4">HOr͢=uǽ>< m_=>>6>fj*Ѿ2WQƄ'LUc>+e=TŻjX>>1FNC>j<}P/>.h;ཕ˽=7Y=6?<'*9=/>O>L>h4u=!?q R=C4A^>f`1>"a =I=].Ջ>8o p>{L=HD+ճW;/9<[ڽ֩co>XDI>F2>һbIo慼=۽o\k=V>>xVҾzOY>Cu%>+>w=*{}>1=$:L>G?kI=j[H;ܠO> 8=bA} ~w<\=9> /=*=T>t?~>F=Ҩ=̼94>{"Ups>Wd;>T'@=5=>BW>_0=CҼ=߯O>J>PK>zߏ=$>T>&7~Y-_b;ڽS=F0޽l>V>d#>"B:=>*<);=fc,5=>>sZ>-G=/fJ(o5=v^> w); IF$=Cb==^Hоa%qjva⍽?A>}č^h/>e%>= `~=ۣick??Y"^/>u/M0 I=X;:rb?{y=ai=Y= ƾKؽk^=Feڡ]/>H>d>U?>i79ʽ#N.Db*>)P>#>=E79NT&rF],">eɾ|>>3Wz=>>>a=VV(=Kzj=&Jd=4ݱWO>c=y3'z>@0*>[>ⳑ0[=/>)愘<=A׺>>V~j>!3Z[UM^f*>h?%sPӽ@?>J~}>{>E>@>^>{>>=_"1ý=X%%,"Qu 4#=M=Xn >*6nȄ>EѽV<0:> F轼H >|X<ý*1= (>\<>=,E=q#@>ܾx_[9Vkv =.립C<>%=^>s>=~=bZ=g>b8bf>l1>3>ѓ>>:9 >MY=e&ܾ>%l>bX =0>BB S>;𙾆k)4Wu=##]<OA?>^=D>>zafX۾]=P>HO>PR >0=5p.8>ޕ>>~>8P,=f>Ït>nl>nX.=>6`>ͪ>ZF}^y7=ׄ w>?3"=Ftl>ʦ>\1ɒq6y>)=> b>\ 0k;=ٲ>S>Ι>E>n">pc=s~ ʾc$=]=н<ݽ> #k>pP>Z]=>>b*?ѽMRսG֫>%Y>IC"ļijDX="W>&>;6<׻ڀ>2+׼P=ur񭈾 >SoX6? =#~ѡK>3VWq>!}<.!C8H(>xȾGC+O=ꋂ"J]>a>e<" M =0)M=lv= >>,[,u)yJ*:>>">d=~<(=fp><>PbN 7 _Ce̽ؽԼ>>>&_>;$=6=8.=rjQm>JA=¨==6<+jVA>t=ҽ&o遾n=M1hDV=Cv_=߾R*'=$žV$1>韈> >'<> C= l=}=^> 3{Ĵ>__>ju>juu>_"F罻v>_=~u4|X>;`=k>>Y=+B[OCA=Ľ>G&%=!9p d>:I=߼'#>>8t35<;Ѿ;7be:u>ag>+J})>T1*>Y2滺8;1b>hg禎>&O)>s>MA X?0=p@={=ݨ)l>}A<+;Y*>/H7l>k.h(>A*>]z[u><>hm<=P="K49>ϝP=e=DV>x{1=h>侘>>>M ᓽ@H=;&s>*y6,I >3` mmrXM%q-=l=yl>=uŽV K>l(Ǿl >q>e3>&Oڬþn==0=i3dZ=+$<>mX81'8>>B۽ @ =&hMt"9O)34ؽU<^"=T<=b u>p>A>Rfp=c->L콽̌˾/q=>>k=f/b>-a5>֓~2= % |Y+=W)>־X}ýżA=I=뽵-5>"h=@カ_>u mFMf> v">E>悔p~>>#Yxxua^stɩ1ώ =ܦ#=mY`p"=฾9=zgǴQ[>i?0B=p =K촼kQvHYg=^V7==C>4lн>>w>{q<=w$>Q3=ub^,l=M<b:Nԕ<҂> > &f>+r>kI>B# cTKrk,`p> =>j=k>?.y>Z=Rk 5S>'jiz>PE)6E>qeO>>t ޾Jq>>u͖=+>i"+='ND ==Њ=bK>!=H=O:8?}8Ƽ>ma>e>>~2= $h<_ڼU>t1J&Q y*/0<4ػ{>=Ydyr#W+>zE>k#>9) 2=7eOJ<0Z&=C[nf5?LpkN=S2&>mJ֟> [սIn<{>撆>SUG1+#=['=b=T徝Hw`>=#1>5>J)߻ !'(>{>Y>4>4}=[&y>ܢػ%99g=:>. [>G =9 >/>.>A:5>9<= >T>aCoh=a /A=T*>.,=GYϾ===IHYA iUC0>r>Bm|>,=>N|qn^K0>kd9/޾k>D4'ui>oDщnH>懾~>$$J :kvоYXРDznS+=>"B >@0=Xe{޾ OnKlt3>>>==X*~>< =?>>|O.>g< pn>F H}[ê!=l<ς>g;4e<=1]P|GTR>CJl=V>>Q=ڕV>>K>9pN8=>8;54)IT<ǎ-0>H>iq>c-->D=>Q+>u,5>Q>F,=yib >'<@@?=':>*G0>&=J>=>F_Л={ =}>+U+Ʃ>P>>Z>>?I=>S=:>>Ҷp5>%>̙=}=bi>y%t>X>Ѽo>L?W<+˽ =Nt>V]M>fbhy>+C=D|Lk~V̀> 9Za:>=,{>,nΓ jI̽,=Y+pG󻷏>seV*<>߽Z=`߄C=<= =a y>><^|C8> ˨A=A>Al07=Ww k ־>e>hʽ~<"s=!?ӯwl&o>gPv>w.>=q[ߍ>̽Ӿ=踆>?<5߸w>Cپ;%uDM ?1@ؽcR>=k>7>f,Ƣ =sQ1Hc*=t %>6Md!>[}@K>y>JZ!"[=@`(=?k?h,( >n<#=?r3>J֪>#!>IKtij<.$>ȪP>ۙC=r1>Q<ܽF?=[>>ts'sz,> =+>.>+_>W>.C ս=❾==J>a>==khJS;$>=|< /> >&о}E7Us>g>5>9->q7>>͞>75gq>>T5>{m=<̩=}k> @f>$>̂6Oq9y#S>A$Ƚ@"oi/~4uw9>_%D">)Q=\_(=>~;>C\VZ&>eޤ>H|=a)<$=K>@=yb#>tN>r= =S#н :>H>`.s>L=Pfe1l=7<3?"=ŸA>>1;E+>=<H=n$>U߽X$?UD>,E==#VGźz%&>FN0>3[!|`5>_3=Z>aj@>=Nr=`ʟ6>R=+⥾={>#>ͷ4"U>,.qB]A^i`>tL%uTʽCY=|+=g ܽaq ?1>o)w>%{=E]>$4z\ -=\=>tg+>=l>ޛny`>y>>>'>ߠ')e^T>ke9>Ľ+<9n  *v>VA>!=|> T?,>J9=F2| r>=듏=߈>\;>4cT>ľG-+SI'=t $9B放\(pۼ>gϻT|پ}>%K1t>t>=y>l V>jD>޽w9=րӾ}U=s]>4߾>Ɇ>|Y˾L> =Ŕ"ʾT><6e=q=*˫uU/=;S>diTf>⭁B{<8|>=!>Җ5d3=V]^>>xZ>=>T\><>A=u< $v龲>lv=L> > ZG>.>h$=( N=f=_Ɖ= 9rc)>`UJ>$*u=Kݾ对ɪ=A0>G6= G=|l>a>g==>j=>==&9ӗ>#.>Q!>><7̽=d>uy>ڬ=ck9.>>}= ⨽=o½'=GY<U>_=w> =S[B@>F=ޏ>XT=/>zݽX\=T=w|>34N<<2II0)Axe>8>ǍH>>|2>=+U=nkp>> >ٽA>}>>8{n > ؜Evn>b=(>x`zm2{]1iWL*aB `=Ƚ+<{==,̾Bp>ۛM>tDCm=h>S>;ocM~ kr)=8>͚)l>u?F>^$>Cؾu1>H<=K=iI>G{=P𒼳%+,=d== \ Ŋ> Z=-o[6>kɜ2Ґ>c=;S>;ʺ<ުWGŒ>=[-=|d *@O< ܖ= )>ս1y*=<">LM7R(>65>ە+>6ܴ">N0$ =B==a1]X>>M$`=WI;ڶ&[>*y= 8=T>q=a\c 5r9z)>g>>`<$>'>ͽ{=." 6><>oVs˿(=FLՂ׾ =ѭ=a@>{t=3Cp<ͷJ>]T<>SwC%->? =I>7>!E/p>8>WL>՘E> ^A{T>C=d8f=Iq-=Q.w>] )Wj>x?PfԽO~b>y:uu0>Ce>M> ƾoZg>Ї>)>'8>#=˼>D}>څ>7,>!=pNþEqo< a\=v4>*Ao!&=JIYvqre#8>*4=:7P#-tdǽ<$Y=I_A<>5=*R! ygC:={T< =Vw(Vk.>С=ņZiɅ7 >gۨ$콭6=V=m[v>}>W<ۺ=|= >.>=(=[YZV>=^wW< pI0>۽Ҵ>R *C?<">2.bS>Ju=j˽R/> >_=(Es==ԃ;`N=l@>.ؾ3=>+a=>&d>h=&=K/mK>T5>!pI=B[>/ٕ,aҽ>E; R<7=Ro+>@gv9j>l>ش-o6ʲ!$ھAf =T-t >J=<߸>t=z@f>xC=F>>?E<8E>8}ݲ ƽ=A83w;a>.sSm>=jWfuԽ5H)>sOb޽c!k þȽe޽ʾi=>@F>ggվn&j$ݠ><>'i83>΋=Ώ>=m-RtuEfbt>7>-jr=? > |b>>iu> +Rȑ=(:1 >iݳ>3>L>n ?z[> ?wx=lUJ>U%Sfm=>$|&W=>"l=Gi .<:#N'=, m<e2>[ 5Ě >GdC >><'($=9u>7NF>S/>+2==(u|;9/ۯ!~}B0>E{=~`>-d>>R&l6q:=S\z}a Ȕ>oBD=?J=Ţ4a :^ k[J>=4b6>L=9B>m*=.>ߖѽs"?5)=F'v<>f;&>m>ɜ[C<$>.>l"l:>sHt>xܼSm=w=;>Tr=%>T˾)=$8a==Ҧ0T.>y&='/>@#r>֠&<>J4:Y =υ;(T>k>1> ,sMgzw$>"D2.=5XW=@a>mIe>$tM>>e=oC>e;K<|*LI>!o=)v.ȏ>Kebн'/7QŮ=R1>آvPAd= 51վ>[=P8=>A=-%l>=R[/=J՟Ȣ% =姥=cg>=gbˈ=tb>T(>"K=_ɬ=Ĵ>y㠽fSyZǔ>˟2=Q>c>΄>>8>=JgV)\L.=̎1*;>>r=;TCafe>G2=->< l창>@/'>~$>>ؤ]7"K=n J>3>u>nP9kR`su?>/^?,Hc/x7J>f5 >[<<0d[$,Jy>xiC!ܴt> <>:t&=X-u-|Ĺ$>*> G W|=kz;~3>L|΅=ӻX%-|u>vn>+@q?>N>>Պ>Q2=~s=u<=>/xĜ=z.1=;[LFQu=ս2f5 dL0>9=g`6=>Ti^>X<H>=8l?P=*m7>U>x`M>ʴ>烾$ b>y|m9>oX7ƛ>s>N==r > v:5c \we>n"Yaʾ_3H(>=8g1x=R=掾H>,Q^>q>1*X>w>=:ˏېraM־roUn~P> guyq[T=>A*mT>qZ侟=2 = Ep>L'`6܂y=bR=>=PDOpc¾ؓTO>B>J=wؽO=>d<==Wѽɪ>r^ii?/=RWT=x2>̼V=~=?<a=ae<e=7>ut=yf2}8<ȇ,;gC>Vʸ:^= G 0> >>o:=_>̽=2|3>;<Y;ȵ.>^ѽߏ>z4>rZNm>="q, -rk><>NVsE<6齃=R‰>x[>'P+wHQ(>ƽp>JX=d>2=a׾"에%8=>je>$ }>\S>*l<ɽc$ @-EN׼<>b=!Ҁ tv>Ծ=ɽ8:elM;T=!۽+l]>2<h=6eI>[bԽ {=ʜ;>^ûl1|>[1>C=#>"9ǵ<挾=>` pd64%D>D2ؽ#=2^>rnP(x Z=Qa2֘> oY=’+> =&=&n=>ii)8>f>r&)>aa;>Tb>=sT=!S*> ">P:>r>t~O=μj>!>>0<ೀcN>}[λ#=n4Rr!cP> ;i>  =Sau> v (~>pY>ྍ1Zޕ>]¼뉾N0hfQU>XM>0OBD׼>;4F^a:ٰz>,"Y4?mp<v[r>T-u0>I=Y!-ǾMN>܎#&>`Y>>p=6ܛyQD=q>P"P8re=ENُd1n>S{ռiнm\<A=֤_ߏR,\4b=jew=P>1g1G?2? ='V=mq=^=џ 'ú>h <4ct>7e㙾. =X<Ӿt }!E>ѲIP;=盽51VyG>1 =d-U<Kt;>dJ>?>q%>Zս F>=W;(ek> G UN>(?gy>/Xɫ>==@ |>>=@  ݈=9= mT/=5G?LɼYľB>uN̽0=N>h/P0>"$>%>vm&t[+a< r*>1K>yN&=g=d1>-½<)1p<16>!>>.1A=[2余cfV>ɿ6>o=ʡ2>Pޅ>f5ýeU>4{F5k={S>K>UԾ*_>6~>y>z(> U7=򼶾ɾG>=#|%>lW>x=FoNX"=&S>_?^tgRt ?XO=B<0an=!䵽^>ދ?a=yFѽtz$au=Mb$6 =[=ɴ O>7D]=*h>}LɽZiLhj묻O =>n@ _Xo=Ej2IqPѤ= 2b=Хޱ>>>ɺ=3U%7o РCTYȣqIM.>F%>򷅻V>>oqľ/-νG0n>>-a=;-i>=zf= I>U0OyAUWFCCd=BW!3=L ?b >>`=2Q>n֧E->+=G >q1˾ڽԔMԖT&=ձ> hM >{1t=>)=w> >>m־PW>&:=}$9?S=>G70>Gj-w==uoe\>or>=>=sQ>Kڽ)>&v>N>1;t>ﲽm{MDL!>h7޿pFAw>(x9=aa>>*刟t>F6>:F_R1=ؑAL-2༟e=> >|;=BDpd7=Σ=GnƽU'甬82=J%Ԏ}>sTBA=;Vg>B<<}0=U>*@>X3-)9==Q}MV21>@#M4M)i>ھam9W=2=H>e='K򞾁Vͽ>t>^fE>I>q>=/J j5#>=u1=D?! ?P$=Z>8<3> ѻ==̻ 湾VK y ߽&r.=7/3< o>2F':= =>$^供9=o>DrsR>?a<> jx>og=kOֽ<]>l߽&FJzvq:獡>o:?^jS>[A> y>"r>x? =2LT]^1>^ݨ>/J<ч?*m=#~S$;b=j>2h>>C<-pW>0$>uT=t'>aD߾:>9BVr8>m>=$=%>Sh>Mg2>īqFW>1&GS<==b 䜺Ɗ<>Z =d=6Xsjm>Npՠ=֣ >nq=>]>X-kiP=L{>fMn}< s >R<<}K= >L ]V'=d1'>=T>=xa؜ɽ>'= >d>,=eJ7^=R!>95݁>4k=7:"j~jZWa=풽uZ*=<=fN/ jPA1?>xTC=N<:>`=>=V8Y=҄=R|=m=罺|UK>Nwi= >|uR3e>>>ڈ "<F;>ɟ4"6;>b>>JV)=ai1r>K=e;a3M>9={:=S>>耔=UD<=!K&sghs"=>Z=>??>…֖#>f:;=ͩ>k&il'ͳ=@>,> E>uffWgK=ަ>ռHW<r7B>={4^I9_>+`:>n>6,|=@>i!B>|Ҽh > (>P>S&=[>UQ>= =+޽3Kd>pA9>fT>=_}dY&݊> BQpq>(=Aἱ >kؽu>k9>* ->ν>"'Fsv >s =;&0DC>=>UYɼ z60>½=]_>؁5>' xv*ž>= =p3>;7>T>><ֽ>*$}=IxZ=aý>s *=: />3~IhNa>9=q/>z>=<85=L󻿕=\V'UWj>={o}>"ʁ> ѽZJL$ג;ގ[>;Lg7$>+>%߽-q=w䓾kz]==.;Bq)>U">WvP*= )>* мܧ8sgﳾVB2,뽮=#>@>;#>ʽ Ev= H 禼Κ=Y>40=xu=ة=8?L=W>>j|m">l=7'>= 6>x<)ۉ왋>=T>6>Fͽ:y]J 2=;W=!$?9FH>>z=3 >3=XVN,F>k m2sĠ_:=*W>2>b֗>\>d`U:'=r->նC,Z+"->ॾf ;0<*>=X?$uc=0ѽ$I>tt">j$=_F>3l6=<ݖ9zκ=;vᵢȖC>vx1>X;a>{ƙs=h=<>`GW=7]!A>a S? *tQ=Ӿ 6 >aJq,o =WG( _^P=iIQ i:=(N>)e>]ԄpH"G߽JY[=J9ۼEˀBf9 q= 5%>i%=>P,F==CXTPCfϼhAk=]~<|U>>=,Ӆ=Y 1C=x='=ļ'O==I%R@1>cDcw >GjPy}Yq> ~<=[>1=g*O=G <,YR=k=-2>'->6ٻB 8>~J־S"kN>c}ug=Ks|x8=;>/ 6|-P>v>t-!=Z9?*,pNJ9>A!X'H>ض<+?(K"րy>>G=b>=k>E==_=:'𘽈9nMXEH\><>8>SW m3=*$M=ټP3YQ3<$> `=s%侷E%>DQ5%|=ߨ=: ?횼di<뺼ċp4=c=&n[)XreK=#R=@wk>LzѼk;4;|q>HÚ=B֗=@>,Q>\gIӵ5<.=NCH2='菉G->$=}G9 ݽ>>(>:3EUhٽ3=0=N>C8==>pľZm>g=$<>Ko>$ս:}ɽC&Hx!> ='i>Z/>"Fdu A<>EL>Q_U>>/=E> >B>=$>T>>D侩 <z!>>7>#">S=űu< P/>"d͆8ElW^N>~ u[t=>01è=- ]<1:QtЇ>zx=p ?~M_"Z>c6P >~;^>hTo>g>J=a>M=CS=*c >D5>1i Fw5=bs=nC>>pgB>dp >@@Ӽ==2c< >)#i $FؾkFU [ d;@<ެ|D>6r>0#p=돾%=P-=>Q=>ƒX>Ա3k6>"O>l8<̾LL>S]O=w=;o'?Tv?=̽>F½,>GKC=RR!=v>yΛ?bvͽ?>G>kݼJ{=i걎Ntd$>+[4>«h; ߾i'>/L*]D.}Ge{¯>\>r,>!Ѿ* >{:>bv,y=<yoqcY[>K=:=xAyY>= E|>i>Igљ=>=G#<~)ht>9>A~z?=7=SZ~[=6<6>"d>U >Y1Y>̾ =1>=<)=\>=}Vavk=.F:>9˽(>4ZzS.\J=o=c }>泛i!3'k;&"'i>)=Pk>O{ =r><=Qtk\==>9J>>>RQzٽE=X7=u@>.>8=m?lYfL=ν=V=><Kݰ9>) I6>Y1 >S8>*>,kX%>*M>w9>Q̫> =jks󤿽.{>L>d1>P->i>_=@$<vQ>yx>=Y(~=LQ`0>=i4>(;f 5(V^4>wټa>>>š;L4>G4b=| h![Y[>@=p1õQ=Eþ">&=o.=o-1>x>=>Q _ļtNQ6t>.7z>=1>db>3>.MP= ?C=.>&=*ݾ{Vۛa+C=kH4->}qȑ>X}D9f<>64L̾@U>})!ٽ>YG==;#> /= .+> ;#G=Cd/="9="z(>/9ڼ׾N% >=>[=3{= ]>vqcʾS=ƼAP>+?0߻prdv[ŵ ?Ȣ=v=SԽ=.M5=> J=*νpX2>u`K>8z1=&= ='2Ly>8K<¾r<^x<%<>̼wQGT=k>%pӽu>n`_S˽j>[t>/:U=@=PV>*<բ>9d=mA;[ ͹wn=݉=Y>r= 3;лt>f'm7)'d>tw$ʱL> e_;S=j_>8ڱ=oe>)9<ھ[c>".+Yf>>y=TN0ܟRQ=wU9>= >$YW>6얾=E]u? i>^=nV>=YS9.=ۥ>ge>>>?Ma>L= 3=;2Ff>=@X=2O ix;/?2>Φ(= ǎ\@ŽNcԎMy.ujp>'=X(=>=>' j=p>RA??r#=-B ^3¾2=r"}QJ`\"43$X8H>7>>T=Ł>?%=.;#=<쾐m=u\\jst<] >.= wn.=kƦ  s=Za; >,_F=̙V(>P=/o=t'V?3­D>ҷ=(ξ0D3ݰM>jR>"^.`=s>=Ì>"=>Ih;1H%݀>>'?=ry p@l\Nj=,M `,>>IhRw*>Rʾ;#?e=W=mlO3u-O>r >n6^ ~8>~8 ?٢ oC j4>_{ WW>|j=,̥X<㽔s>2:Ev>ࠅX]8-3b>t/Xw>bξýH=q$>9==ޖ<"g:/>X?۽?{==>' dV*ϽÙpF=-:>3ۮ>|>B}>B>Ѿ[%>qݽ ={(=rzkǾ~>_6`u*>̾>>R?6S>NK<zv=qM^>0>t[ս E=x==hZ^>M=B¿>δ<zڶL2=Y""6=<=g^WJ=:=ɍUn;ٖit>=H?>><0>$꽯対n>>WGI^=35<<7<RVNMy>>=`&9><="}ܽ}m&ﭾSS6=RԺU>h>g>h#8\Xd=>NG>s뤽j◼[=%hW*nf>U>/Do>6;>1'

    a^tw=H๽ R¾>HR =6>I@> *+Ŵ¶+y>U>˪E>w>sľд%>5>^raŧsL< Dq>w7oip>?z>Fɽzc=܍>4>Rz?>$jC7B;?j >[x a{>u߽>x=>AZ==a=:=>=j;A<ak=wD>Ҽ=>3 X b!-}ҙU>A=?i9>oO4>H>о긾"\="<7= 13Ӥ>G)势>>=ýK>c5>Ɓ>#><̄>^JR}%}f_ w?TOk=ه=KNb=RսrL>6#>\>X=ȻgO>9h>a=A>Gi> *=Q|>e߭v=> ?`=w=Il㾣}=Ǭr>n<;{?>Ⱦ$">zo >Og>:=r^C >0>붾1>ˊ<=GyH5lU>=V}>=Fa5pD= >><>=D,;S\x>AZɧ=H'>[><oܼ0O40 C.>*>ҮPL >A83M">Af>UO<|h>q<x>nn*6/q>EX?QQ=I>pMX,YľK=ܾCY= b=a.>Z=y^>hj=0W<>qԭf>w潉m==ž/[?vy=︩:W nϾ޽T >nN0Fɽ+eRg6=ZFܔ=B>J=S>VUgcd+=$>%=gs >==P=C >2$]>Vep0`>W1?>V>395)3< >[m]d޾F=5>DY ˎ>o;NJ=6?=h['=گsX>,=>0B =4K> >}w<& 17W5 =E\JʽxH>b#=pu%>8۽&ƾ:d>>fH>fU=>8!>='>kmh]G>'=E8}=˛p5>xf< >m=>>yc>x4>X$:߽ע A=wI>R>7> . J>~&#+wX֗<]٤Q<'>p<2wt>UAA=s 2>uf{#>`&{>c}>u~=l6g,s>>=2-;-=}ϾF)=+!v=ù=n;=2$==>6>6}1CϾA7*~>%6>=35=HȄž Q >В5cվF>K~Խ%3:>d_I">+OB43Y<؈]3> h>\@Ƚo*>-J=(N>i=Xsܽ=y_>}*==j/2>\=dI$=y6B:t4>:=̉>@Z>͎>D*]==n=k/5E񰢽YZ=9k=>A2\?vI#>.A:叾j=>==Oq=+QahFB>P>fa$>ڤ>~뽹սh>>3=L+(>Hž =нP:â=ID>M7S> V,׽!0>{=ۼ8>H=5i׽?_2x=:>_ae>ڗ0? X>V>R;"=H;<7S=^+@?@[V18= =Ŏ>+f>}v!8>]sI`}Ғid<:F>_!}&=G J|@9>;f>[ѽ;>MHx> >Wk=>S?*[>. 0.=<#}FN=L%旾>w<';'jv<[ݽ=&i=7K>0>;͉>> H>l%t>ӟœr>=[リ<~>(4~7R=y󻖎'=ϐjLh; PwiF<)єB >ґWOS̽==L :;8>9̊>~=nD> >73=@>O 7ek辽V==8>;:>y=fýc]Q= g>/=>mw>=2>y=h=?d?oF={ez>R>iG$z>b<=>0@=`= >Tڽ'CIӽwEsʽ9=S;>:=BK>@>H>ɲP\=8 >~t!h=>AN>闽=z=i һq=s`=>T=h>F==ʶ<:q);b=p>a¥;=©=M'@uÜ>Xt=<;. &'y4ڌ;S!Խ >(7>>ʊxA>5 Ew>c/=2?>w>&A=@[ >B>35<>Z=껾`3<;t>/ڼb>sὋZꚀ̬Y>*˾ׯwC='>2@潗YӽQ> >ye>52>2>QG>Fx/>iSh>,{H=>Sm4#e >x`> ٜv/>ɾ@DV?I>'S<'Sdžq=a>n>ً>z=C&>V>]|,<$^:7h>nх ;>k3N)c>D) >Wb>2A>O4<`>P]q,H=[HCZ=K>,a,"opy(>lR jg=㣼q=)>@>v>~zFw"/CM򞾬w~ fι!>Z=IO`-&>JP>be5M=~@:+>Ck=oƽzyݽ05pn .Ձ<|><*нКZ=K>AK>վ=& /=E=0T>6nŭ=r=dY_>89>T)>_&>*+ex>g< AB=e>Ғ=>)>l^b>/:3>Խԣ> <+5>%<7ļ =">UH>Y}ܾW>>(QU"?~q->> >B>>N>d>ez9BAC{w,><*>E=Hj8#%3sҾ;mC>J>Tx&8b>$#Xg@=и>̊>A> >X\:W=;ͽYyhE $S\ !1( X%`-iN,=wl-m==<]ۀW: ԽJPN?>Z?*h>ϐM>$>: >Q<>/GWs>)=Un}i6>K=mrZ߽A>$?vYF;=M򇽣N>=q > n>3럽 ǎ>x=ęZԐ>M0Z%_=#)==hJ+B<>vm=9>74>2n5㢦 qν=vĽ$> >pO?TI=><& z=>%⽕*y=a{,nN>iԱ3Ka>>}?/L>c" o};,AN?ؒk=h>*nn>0 :>5}ʼn;pݖ;>:E=!y>=ٯ]9G?=X;>j< >E>>7F>.k :=9Hz=X=y>> =i="h; 1>Iw>nP< р=T>%,rMhc?Q20===-%61=.>#=Po˂>N8>[[>p8N:G] Km=EOq>뼬QqX=yMy);c=v>>KU>d2U%@Wx< >1=;>% ,>nf>'@p>Rp2S>h=\ <L%IXN>@i$?y$xp==-{==Ã=U>˟U`m>^>˴>f>]+oQ== ,6=i >NN>krxJ y6Z=S>~laZ*鋉Wꤧ=WG>~,{beܑ\X3M>;<ycr;H<?MP3>Ct;>ܸ=͊] @>uoxD)>>(m̾>ό:VM<2$ ==)_T"ZɏcX=":>$u =2Z <@/K =vP= IekD>ci/F>|=Xl>]=_ =;E!QGI>]$ZL>M:An=?I=J"Ǿ=) Tݸ9ͮ> A=>o>C&Q?p= > <6~@5]/x+!>_xA=Vo,KUy6!>LJz!OU=?>b(%D/i t>zҽ}s<$5X= ?>==f,>=Ł=>{JE>>KYo,,<)W[=ՠ>HLS^Y=6>>'ܙ1ٽ ; =z?Y\}4=?=DUؼvmÑDs~&@:>ԛ>=IN>/E=Xu>V =O;>Ҏ ,S>R rB# ?}>3="`~=>R4~>y<潠Խ=&=+T> >Lm@"Kގ~SbPj;=#D>0?j>\*B|ݼ0#G>_%??>{}>ψս>M== y9>A[=?\=*G>`[>Ы=>4x=`1Ab3^=|n>S׾ZL>8le 5>Pts8R5=njxW <8>N$]=:'m-8>Tۼ ?V'? a=>6=Ӿm|=Ai<ò=q<;^轈=o%==7a>H'! >CF.?;O:V={=M=M<=!#f ?ǞTM>T-1(rR.mI=Ĕz6>m<#{,}Hl>;n=秽J>5x><=Zj־+M2=/= >"~>Ǩ=LgJR9*,J>BU=!/6=E4t>*!><8<0޽ ,=^a>u> >H=W_=aFM pW>n=Rvv|(>f=wfva<4 0>IoE8>D5g.P>1 =>G=>>%/:{>@@;"Z&~F=׭ov =>Q?= 1.=V== [{>󛽊=G>UȾ>>d>)g*>&J>a$> ?jF> n}[>>spǿ>])=O>d<>Cd>1=ˉI{h<>ֽjj徳 ־ %V I>>㥐I>>==%y>MIzn=ؽ=xc>>6>NF"О=>w=?;6!4<քvc>>iD>݊=݊> o1gd=ODD =I?H="T,P=Fg ױt=l}z=3>Nh=@,/>i> =qq`:Н;\!ᠾKIE%=H 4> <>T+D=]>O= >t9ͼp]=>{C==ԇ=Ap?σ⽿@>C=qi>CN>Xr߯x/ Ҿ,wl򽣤=қc>#7=>$2)?w=`4j.>Mjs>2/?>*<#-D>=ZjTB3>>S"-=xнH|ܾv#l=C{>l`=q;C2= N=BLL$`>4>0">T{u2 ->Ѿpf=r!=-f=?̐>>C R>%>j2|U-a>s>ü'~>@<]*PN>PM>P>Ճ=lBž`gnnZ>; z떢5a /ry#=N̾f;=B=f{=v>(l<"=^=/]==`=҃?̝8S<=k=YߚϽr9> ?U$=>){V >'=;>i_=;=4>n; S '>8H=AO=X]>;Y"=LK@>>U=d-U!0>=;>A^D>Q^=+=֘QtDr> q>v\>Z;D=,]>M=!UE=+iݾ+==BR?=!9=+4:4K=nAj>!=>f?/he־T=f`þ#N>1P=, >ˈ|=V>褾kF=lpѼ` >5f9|;{괾׽ZIl>>C5G=PA*>ޕO=젾ȥK>,[Q#2.>C>0Ⱦ[>kw+J=؟=>ۘ>X==Kױ6>߶z7=SN=>+5ĩ?\oX޽vⱱ,3>J<ʛ>; q=;mĆ =>p>%cg>9>' $>y=''Vj==h gWi<  ;x˾?8\)[y>8;Ǽ\=Ir%=F=wlR=?'q׎+=#몾Ǹ\=)>o+=u>ͽ/9>F><ȾJ=J?;<^(==)Z3g>ߛ=+><_=!R>>!==Z0=.U=;x< =}~5>dþbA< <'%ʐ>\q=޿>!=ZB^t2>k*>ӂɽA1`=9>= 4*>-=>ky5>9畾8>cD>]#>c=}>֯>]wA?>\$>m=,/><]뒾>\>3nI_>T>y %CCB=-D>-kAfV=OHLH ?tO=2I}=ꍥ>!M>^3s> i ;q;>[m =N芾=՛j=ؙ;) pXѾN= )9zt̙=ҾR=ͮUҾ'|qY r>N>P==ӽ!KU <@u>͐=%չ?Œ=W">=)7σ=3޽z|=W~>\ #H=轤G@=i<=iXrP>=>'*>D0=0p=wp=4c>B\n%-+::S=x(>4>6/>U>;>l"> :;&d=q}žv=C>dwv-#<>>\sO3><v].E}=l Ӿ5g轷=/G>)>B~^o~pt丼E>9KP>O댽*o=m=2ga=6ÄE6>>V#u-`0 VkFf< =q>Ӳ>$<%>(;QA>t 08&Mb֊:ľν913ֽ1v1>t7yjXK}yF='>" :!%=!4̼p=U};>՘'B½_0H=H>]y=U=8 }=tdz>(E>`f褾.=%>&d0=F4Z>I蟾M\b؁=D[=64>i2T==?hU>0>N=-n>M = F^=뤾.W>7y>"=j_a*T >@ T>$>?a>xl>l >Us=ӈEd\T2J>eS=Ej>ҽ"=m|`=戰]S=2Ce+>9ш q>S,<݉m'QdP{>[I=$w>=Ƚk>+p=Ae<7X;r >p꾊Gͯ> ;D>8>)>½k =ڒ=Ѿ->-+>9ahk eW>ؗ=s"=Dm׳<¸=3<fҽ}=/a=>?CU =k2p>wf9;=O}<ؗB6:ӵ >>>7%>s 0;b>5> >-Ic!AwvQ>ր輂肽m=_oƽ>L5t}>>KBf>H @Z侾>>7#=X47c2oǼ}F==>Ճ>&Y6qx=k='5>>hQO>L;<BU>>bWZսByʽSc= ?zߵ>>̊$<>,>pi>H; =rci >lK=PU=3V>#A>QWtDN=>6:Z R=>!&=jju>N~=~: <_ЃʽŽ 8=|۽ q

    a=T<>Ѽ>K{;>G=;EQ !>&>6_C=`o;U°?8<>==#C8>ݚ=g\Dsv>WQ^<=u'=[ ?f=>P>6>=Y_.)<0=D^a?>=jK=(>N>->@>laFJ=V>==#qXu35>4_7># =;h͍&>%;#>a;o!Mm0>Oeꋽ?>7x5>M5~WOվ qU6O=O=t~&U>?=x>þD<>8߽=Ň㛍 [)ڼK"==zx!ͽ>>W=VNHy#h)ۻNS>}HG>P>>ሾ^۽>=O=,>9D:{ѽ2=Q\;.=`>i@V2þK=J!>R><1>(>rf>.1r >=T<8ʛ>Dy>=|aj>WA@vQ(r:g݇~|=2G_7">*>k = >A˽г=+]\9hg=\r=oYo> 6"!qK< =/>!½Dؠ#M= ?{ͽaS,>MI((nm='*;. e =XX!%0=!=[0=A=+Y>pC>)f sXS<[YC>0@ò=PI>"> ==i= Aּ]=B=>4[?~>Jku=|D><w>>B:= nh˽G8.>y>7=>;>|7 qPb.$>=!;.>2: 1јT>O@s%=_>Y)_>pU>Z%>)\=#Κ+HP>ہJV娾 >n=4{᭰rd=Ⱦ*?zu>|=l= ;>u>W===>=;=WUm6><0Gv=yLP_Ѫ&+)>JݽS>' +3?Zf>LYȾw \]UJ<=O>+2{=*=,tI==N=K>L>7Yrt=;[K=bHּ= =e6cJy==Sv=T=r=>>7V^Q̼=(H/>>\8~۽>hL>=p3:>c(:4?>RIkE>C$ \zAr<ن=7 >b`!g>=X=:>Rt.>= ?[҄m,>ϽG>Hӭ=Vu|P]=S;=?f><ġ#2^=Uv*'>ZQrU>J_L7>;T< d ?v1Y,O9= o"*=.=žlو>߹=v<M#e>Bi~O>,+< TC⾽X<="5E u>2e<;=>LF1L^>;<> Hݽ<ܸ=>X=r>:=QfR@=~2:CT'<#="ag=4Ͻjм{|$ǽn*%>^ޜ<XG-`Ig=G5=0A1>V;b=1^<H@a¢ت8 {U{R>Dj=D\>D=%W`=~D>W>H=g=ﻖ=>[=0d@>%=ᇑB=9<<=]<_>@&!=u9l=q>J=Y,V>ގ>˾h:> >_> =x=Oٽ˱<ȁ>0,q6[>̏|߽6^z==_f囖=ҽ_b8o>PD)U;7>Of>S > W=%=}>[Y r8[Ƚ[Ȁ+>"G>&2ի@>s=:<=<&>L <<>7\=$.>1]=T>q9| >h%>.> #>a?>)=6%T>:^=hνwX>- >赽T>=|Gbl>s>/lP>j=a={,=O$>*)R>uϼ=i?s䔡=bl<-8C|d׾|I=^R<0v=h!v>.h=8]W >J򕾦x>ۭO;x 4Q=@#= >S@=p}=\Z0>)v=]b>j1cN= UlK=,i>VX>;o\>SQ7=l<\Jo'<m=yJ<w>o>3 0Ta>p̄>u=i=>W5>->^u>~t=q4=>}f'>m=a>/ >J}>=R->>!#U_wfo`N=oj=YG=BI>wn==J 0=#m7i187=UԽ>LM=b=><ij<>}'.¾qF>T>,)j֊A=YJp@>E=):}꾠ᅾ X44=W>B6>EZ=Ćp>A>ѱ<Ĭ>n0>{#jW 1Ty~= >ݳXD=s't=|z>)>즾<6 )0>=>@=w"=-໾P6=4E\X>*>| >NB<.]>^⯽MľE'gJܾ q>n=P>e}8JμԘ4>98>jFms\=UD5>,!==i=@Bm>֝D>$;7O/?Yh=t>R>Q>Uվ'>|H;r='|L:&ED0>G>$ͼXe6W)惾?>=-I=> #WFT>#<T>K=н I5!Bh/82>/y8=z~"6=:[@=KktF?Zi>h=E>l;?Iq˼r2>T=D>u3 >"rýN6=<0X'J%hƉۥm+>>ie>>ɍ=Z(==K[<-09#N1ą>&ڽv+>UGA;>Y/|=-/<>WaA>ר;<== >xFP<=gFx>o]= =w=0Gw>> W>J>x :A@rk=; Oڗ=&= 򠽕[=.=V$=0{)>gJ)ڽӭG+=ͽѾ 6>N>(=tl>뇪a>mV>^8v>=ԽB$(>ᆬ>0qnj,,>=Iw=;}1,>Xڽ*>qP<^' Yi 4 2sU>Sd>u;=ԹH= }6B½  e=Pv>< z-ͽ@=q"B*>E?z6?^{>S[nb=}]lk>;iF>u!B'!*2=4%>~ZK׺J>ѩc=>>A P>w <חn>Y L>$<30>>k2#6>ڇ>={秼qY?T5>W*t=CdȾ|'޾yap$c%<+׽TBHX>-3>U><=&>`W,> 3<>>rgT R=>o{=Jž?Мe$;Z?>B>`{'Y>,=}|] vnsd =Ae D\C>N{= >Y< f>ν2 ͽ;ׁ(ÕC+7<`k>eVA=>Wid T=Ks)<;>(P>g>F(b:P#'={ =x9vV;ͼ넾G >k>8z$=H羷 z$]=0wC=>ߎ=E?&?>2d>uD>ԩp=f*=(<=hӓ> t=05">42<-Q`ߡ==J64=>?F_=G)_>=a>o=2bL>bm@y\f:VQW8<O]>zT2=1>(>-|8"`^5tT䡾I_=f\[==2> -w>V̺͚=t>>>a>iWF+ñf=q۽X<>Δ-I>eサ35ݍ>/Z=L">ᴾ`.>lwKŲ&>H\=&EbRV> X>R=<<ꎘ>Q|==8=q>,.==t >xn)>Q=lF<>+>>d[=j(?c>s>+>=PG>ԡ+=V>ﴼw=q=U -=u=j>۽%>~Rs>ʝ>Ƒ>Qyˁ=~dߙR><ͣXKAr ./`[> f=I4>[݉=XzO<ѽ =7ccV'> ==7<:>wև>Ⱦ,3Wvp>I> _ɖmވU>+<镽e>%<-0-<&>%>F>ѱ>ُTH9~ɃK>©$ԾKY<>= >} Z>_Bc=SAu>Ẕ=\f+½>X>2@=j|.îZc>&> 1ͼ=_cZכY=^[B>Mr>VbV>|>>e>0vv>*reN߽= >3>,>ġ=-=WݖεڽUat<]Z{M˚n>c>|>A>:gJv;.DD>ܽ<v|YAT= Q b<>RHLƞ㼴f> ٠$==aV#MV$a>:>/>x>lp= ;yRs>D>OȾI˽V>tG1=X/">=7!\?HG=E9:Mp.=\<&HҾ0iR{ ]z+>N>O=u>/=/7'=c!==Tp=r>@ܽJO= L=:o-Z>U~нgU%χ]N=><>Ӡܧ?' >^{=<@ =C??`w>c>ݵ=nW' ;>Q>,>>4ߢ=ze>e=f>U>~>; =g>@B _-="UQ=̴qWJ߾2Z>#=">26=Aύ 7>4=J>h]xG>wG=抽/jv=#>>Êh^=JWh>-׽= > }7f'?9`0 >V0Rn3R>ٕXUGEi%,>MϽ2 HM>N>7=wrռ$т>>|ݾjIuܖM<nW)=R1`>e=t=$ >]d=c6W>W|x>#?GQV=Y h|>=): ;ڏ=w3%/>H9C=:iQȽ{<;eL=ü/BG=X{Pɞپ?tv=C>WN>17Y">z=yQ#x+ > < >9=\C=9><<15 J>蜾Bw73s??D>*=P=m>>>TJ>V=`v͊N@>>N>>5=?JCc=/7m>=ɾiP T=>Lm< $>-r>[ou\G< >_ 65=C$>;x=͸>?>Oɺ1 >kN$y̮>5ѽ", =O j}=W:.Dhչ1<=,\=l?.>B>q>тvfJi_ C>ۿ0?=<=ҷ ?>@>/>(5>U"WZ=qվF3[.<nx. x޼~= M/i>Iws>Ie[߻ yO^=F>@e'y{=5y ӊ<`=]K]W6>oaռzk >[MEy>;p̼ ?!iNeg>JIͦ ={mE &]==X>>N>ȯV=?֘=HTƅ=E-<>!M1?r Tđ*np>g >Er<3=LyS >=}6z>M2z=_q>=k>Q~>=Gy=,_=j>2+N=k8T>)>Fo>+>I?=4#Zv>>e>(&0=4>^Qsx(ͱ >}p}=RY>U=7Ž=2b>DzI>~?>ۈs>=#U!hnd>@>*>W{W=@@><쏼'F>O>nN>&z>iX=$%G=W>yX >I<ٽn>M>f=J6 zd>>:>HҾN=CCg>p(>F{U)=w2>{.=ˤ>5Wv>Do=tR g;3> =pto>s!b=E<`X> vȣʽr=+>߾:>?>%=pӌy=}ؾ 3;(>NݼE=>2=j5=0ǎ<,eϽܙY>W=,kM>yOὭ`@([=t>z-b?#Q>%:O=Y>;\>^>퍔$UQ>wg`a 4y>傾= =ǽ=7Sܾ^ܠ>r> AuИ>=>*~HD>y^t >ud=!HϾbR=H1j͑>6ʽ[NL~ܽ>5=5 >'-s=T>?@>l_=YAs=+^.>&s̶7D\ns=j>b>Rq=08?8Ȯ=ސb>ךk`>==Jɪ>~6(V>Ϧ=+f%g>ab_j6M>>>s>=Nxڼ1\>1>>Kyi>콰9=>̍=*c>;%pk> }=@FS>BX><`>#d4>R<3->.>s>%*J#>U= q<&uQ>X[>{>H>jdĥ>XJ ソ&{EZP:>` }w/e>T$>}>S C s>9H?>C=U>Ղ#>$B!:->~>SaIH41j?~=V%4:s9>b[e>Ku)?E]g>H^=>d=/>68>/%}=d ==hO>\~t>G1< *$=^ >|;T=_P>jl\S>.-oYc=;Ա.>[p>X'̽>z+\>%>>`?S?t=BT=Ha>;YZ.>F ȾA>=>IG>s=,>W I;p>wW>d=bp>?5>oW =r;/=[:-=r=pA>XFzۏ=<>c7 >ə=I 0>ˆ>_X";F?#U4lD᭾]`>DԽ"=>!r><+> o=)<=f8=n=ht>88>>4&;>x vǽV>I ]#O).)=!HǼ,V>Vg=>Ͼ=L=j ]s=ť=l~=~=%[>n=H ,=膬=Qn'>E н[>"=c**\io^O6=9¾PS0+9lnL>dŸӲ<-=KO%S>ޫ =Jc q\Ul"=ȼ`3,}qD%m>愡;>N8ă>+<4$]>'>ϗhie>"ɕY>Lf8?0+<`>ZP=!tkc> rľ>8=S2>:=$=O=xdAY[;>J{T?+>!&[;u㰽a V}=e&`5==P/+==^엾qf'=ŋ>3 W?I/dh,>Ug>'=-S>E g> >U= =WG#F> >$>%>Eww=l$>/'|>ޝ䨽D>m9>?̷<64*l#>/n>sW8)x(==5Q>vkM0>C@zJKt!Mv|$==:!X.kVm>B>=o>R# rB*<].>>'ֽ$/c>)/>V>[n>7<چOEB>#.T9%R !L?< "=#u>[K= *\=>.Y=.dRP=aI} <cپ )>I>MO=vfcD>=(S=c=J]Я倾E=&=K>= ;Ƚ̵`>/=?>D/ɽ> >w]>yj>&v<=c2*=nC">"lz0>o=i>K;$7>Ǹt> K>0>=FD&>8ZYì>%mS>Zm/хI5# >mUC8?`;k>`<>Hp$}>-<><ýf=w=>>=o=a>HɻPB=q>x= ٳ4>(=;j@pS0=vr>/M;b ?=Яľ6?=SL=f=b=p>ꓽ~>H=O= >e>URp>>V.ybR>r=Ι>Aݽ?3!>{=Sn.b.>:=O<;X<L8>K>tDнυ> R ~˽t> yW=>Oh;pO4W5r4>>|.>*>0Ž==TTM>پ=Q>>q=5='>wl>%B3=>sP_=%>>T>*w޼ꎇm`4>YQ=-Xx==ڜ>meν6 ,>>eI@$=.* I6=?<->rVw>}[;>e>p=7===AʼB54=j=4=$#>===68+>Z߾ֽi)IE\=ý3N I>T+>K{p>޾X=!>P#?:Ӿ*-=^*">[<=e>b>^?p.M`>>>SR>(<{zW>wn>W ,>:=(=⪚==A͇ />o9>׀.>\&>UzM.Ͻv >l!!3={Ҟ>J,>n 7<]L=Bp=lJ~>ӽzrU>>[&<=%N=rL>M){<>4>0)>Jc&ټ+?>5C =n/=o=.>vgXz= L>_l=­>1%=2= z _>5~;! e>E>;ܦ]/^3뽲C>>=>}t >^=B;>`>OWX5MdO2NO380dX?>S>{>e*>?ӽ>=3>|H<&=Q_f>{Fklz >a=̔I>6i4w%=``>] 9=J>X63>QܻV>:>k+;:8f>=Kweer=>5wE=Ҕ=;i7j>iĽɋ4>ZT>SMa=տ=lWxn>>Ί̊d'Y>f>>(>=2?h`P=G=<= [>i:;v>=B` 3JQ᥼XI=*>z=>n^P>6-=tͶi>>2>=3Ȏ>2&@D!=?y>==/>> F@ӱ<N>5=>{> =C:=)>ɽ(a>z6>S>VZ> 9>.;>=wp=r>}L2>[vԽqW>YqH~>:=| i=XƽpE=`>J`;{m>^}|> !C=G޽Wnzbn>AW==*=?T- >{= &h=>0F>mþ=ȘU$オ> *P}pϾ >Qͼ >.:>:5>!==8}<)>B7&S>?L> ZM>ž̋{>LJ>QC"޾ܻI=wh)= E=>豼*T>s >J=cǽk=]%b?` @<$:%Ś>Xϡ>1> ='8D=mDs/=.>Ȃ >(w {>0ǽ7T ߽`>&>os<][=4=><6 >Үd=ۼj>o6Xƽy͝>eԾ;:D?'> =ɉxpq+@>8Xe#K>rx>-b>(>ff>u߻\v<Ǚ==+,:7)=w=GW,>ュ=kn>G ?5o=[ҽ>>>s #[K\P>6>3i=v>=> =q=s;^}4 &,Z='n=0>Λ=j=x+CG=5e>>#=>4QA>=Z" g=sĭH>IˈP> yY` =^pOXe=$>7>Qvͽ *"}9>+=\I>55e>=CU> u==d>X9*z>WAhqc #* >&H;=۞=d>>8>*p<7ӽz =Mb>߀}+jXC>FM>B=%=0a=1g1>6 v==WyC8Q fe>1>gV>lD=f >!>==Rƾ>|;>!=8_>=_<>[<|Vj >=8qC>ѽ^5sni=(=\>me=:l=E#3PVL>>S&>s>=0<cs=QB>3>:.ݽk>=~>hl>=@^qjͽ=z<˓>CQSXbg>%Q=:O;=+ѳ~>ʔa>OĽ|a/>48><==A;9\_?=4 =zk! >9=ɻ'8O,D <>j>;=T3>n񽯮7Yf*A>1,uvKc>l8\>2?پd>Ʃ==L =̇a~$iP]>5t\>W8X3O>fcν7==@޽p vs9,==ah>!NV0Md;>m=*娾~Ȼ7>)>co>Sm=qU=c>ϝ>:&>ěS>U:>jv-G/=eZ>T? >!(<>z>PB=Y=9==EfM<6=l,L =sq=;E>Y7_=VM=>Bkb[p>>93>yy>>(>{,j>D=mW<2=t>>˽M=uXٽ>[*> =s>ɠ=.eދ>]2>yS½╀F|>rNh+=>t1n퍾y#> 征i~>V)2>yw>_>>ZD>ߏ<;=~'>)|> >rC>Q>\I>5>M>5>XgJF>D==7=҉g,s=>w=f3 >Nq]v_=T#Ľ@!>N>>=X<:kإ>-v>I̽d?>@>ѽ/>*+n>D>4K8[>`F% > >;=/C4>V&= =H>6?>0M>(; >-=ZF~x>@;h>J=ʼBe?Uj>>dVyX$ؽN>x𽷝80 >6$7キ =õ >R_n;M%Īν+>2ZB=4>|:liE-PM=Ȉ}=o=\t[o==D&<~ȁ!>(N 3˾-=>}'> (C=~w@ļ`=Rx!gSs >B=A+=W,>[f==_>(U>B>Sy>%WOn>]罴<%P>w>$>yS>Jj6]>]xG>~Ķ Ǣv#=.>$<P><2A>㶯{|hY`)d%>F|=>f< =C>]=Jt>۞M >cO9=SGw: <=[ ( =[>XΥ<=-@%U®C =0S¾>|=˽?_>{=5?>۴0uZ>+r>=3=">Zհ ǽp:) 0=SL$>φ;uw>VֱI]>> >ۙ7>PyC@%M/<´DÄ>)cS=*xN:)$<< I\<7<{v=/d@>I[D>"y4ݼ9<.1̟=*<'f=&իZVX>/eث !=J>><>ư=5><0=ںʝ̾ٴ(7aоa>l\>F=_Y/es>>ڤz>bپ؍=I{=1_>k<]=/<;=jͽI:~ n>ex=ko==|оذG\r,P>*38=c&>iGi>,Ǽ=>Aj{s=MF>,=`>pG>E>OBIOPyX=ME>ↄ>K>0 Y;Og>>Fa=|=2uQi>מ=:;>D>{>}"V OV>.>*;>T< l>p=彽>iY>=7>E=b>WSe.=K>/>;<=>;y>d3==>[vB>ޙub>wz=)>>>ݐM.&D>!zU>.Ww=k<>; b3<\ػ:>EOK<=2+=+aB=l<>:Yv>g>>=ݽ19lx=썾m>P==`[UI퐩=&>v;罹O4 a0k>ɟ=Kb`=&/i>kɼchnN,>#I>f-oP>X=pց>.`}>^>]-R+b=-FM=N-> =Ӵ>{i>n=ƾ:,\x?`/ ;=ӽXJGzl |=$ٛ=Z2>2+Ľ;$>Ƥ>X/f>Nx+8;)[=:=2T= R` U=7Π?Y>x=АX>Ihg^d=>0_=>1 +>|P.cR,>=賾:0=xC >vرy>>2Ӿ1½ȼ=M Ͻ{>6f=n,'>L^uJ" >,9=p=%pb+ٽ7=+Mc')nƾ6= t;KA>5>[=+S2==>]O>ܥ5s=܈=߾3=(>o=>;m}A>\/>F>l<*L=Ɍ@w<<>W>>h;,b>zI= >'ض[=JZ=jNl< -;*G>y=H;=q> kJ\sľlA8o{>hz?)v=j>Pj)<>ջ55>->z=d پTExW<-y*T>=`[>Q=ͯ=4>XX{,?j>ǹ>o=_ɋ>y&ٮeUD=|>Z;? B>ɓð9(d>v`¾~{<"Y@Ww>>༁w=s=v] =e=<]J<OW>~5V>:=_(Z)'2=!u=">x2P>I =@<9ED 1"#>x+*(5ڢ!> =Nb,cn9z]>٬=G>-b<=7E OC@"Gqӯ<ڌ> 3^>m<>"O=Y۸V3o r=C𥽱l>I(=q>= ]R!\"Ͼ->'$>23۽Ɨ>\*=9;]<a>L[>T'ü1k=qN<1&J!=73:>7@f/>b'L>8Y_: ǚZ>)Ii=+"oA<#E f!> (K<\8]~>e3;>l.Kv7X(}>LӽK="oE>pi>3Tw@\\>35>>D @o_Ծu<> B&> 4ݽ5 > R׾z=i,==K===jb;V]=Y&XLr>A{#Y>x3u߽F;D+>=D/=>/>=>>1=;;=(xI1q Kj߽g>`=>o'g=SȤdǵ~h-d:޽ >_=6C>i7> } ?|==:}>c[l>fhS=+:6ؾī1W<>9k>;=bxJ6)b <-:>U$>v=T;H)=>.@4=ھL>΅=?gȾ?>EQ=RѾɍ=(=< >=pS>n=j6=I̲mie7H%> i>FL>cռ ,jϽ Wu*' Q>= O=>4=n>C?,}*{>>o#>QI>>=D.>4H=WX= ȼ|۬=͉6= Wཿ R Guý'^->\'V>,a>3b=X>@=fv<&=8M[=>H)=>?>[o-O?|̼O<sٽC_6>V9Z= h%>^G;j!=>>2>Q.=P}1=>0zPp1<=yL>G|8=6j>\d==?$ C=l>Z%="#zH>i=A I+m=[~A>ԟE=1m>ɃF=#ϽM&=:3o+ >2=qb >I@>">dǡQ>DKq_>?0 >^G;l=:֧W>b=RE8<=Pv,>5n*MY>_BͺX>֠=`>B>׋>= >L0=ے <;ƾMD＀> 彯l> Ǿ @1'{>҈Nj;N\ K&kB>9F)t> r%>ڽ< >P>f>N E,Eڻ-ǡW|=?q0 V_>=3&}=hZM$g_>?a=ypS=k=gb2=u =[E<fyѾ1ϽY>m> )7z=%ټiv>j;= Z= #֤>|Й>W>Z>>L@?2%>ra<,?2Ӿ=oW>_MϠ[t|>G>g'Kr(nHIֽS=fVq- >:F`PX=(=aW:8=~#>{>|"B⢾$-> <\=IE=i>2*[8=M=;>f= !>V'" }'p'l=ҿg\=F! ] >gU5}޽=6߾s"=Q?A$> #=X6=J>}w 9:>79&=}%z>@н6>,>ؙdR>^=dUGok>C\&=> 7h>;>9:ϣa>14=u=6>ü!<=\>,)>ѽ.Ĭ7#>^eҞ(>P>漡}e;>,=x=o+<>-(<վczR>ξ 1j>%[?u.$n=l',KU9>>Z>H>=a!x>0>Z8C=OE>)U=}U>uJ=Fn/"<ⰽa,><=+c>1ϰ4>lY$>I=>,?;AO4D@>LI?BH>QV>Ib޾6 ?x7>LP=tD>*~Ss8뾂f> 7wr}>B<>ɰ;i> @> PO#>>L(>}|S>=,=%V{ۇS>>X޻z>><,=>s1UoT>g=3=4=]:=թ>O4.lY&u<>{<\<2G}D>ҧk>:=$K~̟0]>B=Lbr2r8J">NBe˻u}ν-?=ח>ꁬ?>1-_M=񠽚H2>KJ>#5>K@>>Y> +=x;< @U >WO&D>->pgAK> q%|K|8 A=s\= ~>j1A>cM>R,> *mz~5;k<}W>. LuH<8<ۤ ^R->.>}`>d=`oPh>>3AQؾo-kAHH=^w>Z3=Wf;Ɏ=1>Nj位N=ľW|( >9y@ll>>4? Hˎ>{%>f>na>AC^/}TN >vq)>˩=Ͻ6N7>JM C<>-pXL= >֋=v<+< U>ĉITd =|Ș>%="z{>[>M2=` $閾 _<>=4~뽂#=*A#= 4ؽ%)N[*>sB^>5)ŵ@`3>^YO>J =İ2={?/m=HV>ceT>Rk;Hܼ^>塽=Jc>mP,>z]=U(<҂=})Y< F<}B>)x=M=ULӽ} >>>FySٽ w ZlF> "_ݙ=p=04׋6]>0=þM(cml8;>rjy'¼\>}:xK$8YiH8>&6w0f<ͨnFo1> U<ٞ=Pnf:޾G=-7:>c> > O=Ds>f>Q=E_vL= >9zDJ>ᠽo=>h[,*>\.=Xر=IнT=?>bGm$>;>(=İ>cգ>i=>y=즋>=_羧Fk>b=tE>zxر-Ҝ&=v=0>hĎ1܍렽>j=,;!= ƈ=u/$v>%Yʼ8.>V=8Ǖ=^Mҽg=iAgB>;ilW#=&GDU>G='02<&=feT=->L7G>~z= M+>=;抽Aɺ6=>>>B=jH>[4>å=/ TsB|D%X>&%ɋμ^>uD) 5ͼ\A{>s^w<>aD>OԒ>i8'9bPKN =Di>>r\>v>>ML=G86x-.5>pupʼܖ>H,)gn>`̎>ޜo3<{>dM=ڃ\>FF9>"= =[8>B=>K>>a~=v=oU1Z>q>ǧ>c݂:=%>0|>6 0-ؾB!_>Zw>E>tŽO=A/>wsBHWs>Z>_u`$RI=x*Ƚw>"c=fVh>K2sH>76<1%<0=._=' <"E`5=2}=A=v=n>:mӼˌg;Q=>֊3ؾӾU>S/ʼXujNhiz0Ұ2=KaY8c>x\>t dc >G %b=x =6:9 >`>20:Wf?/&>8>`ٽ@n5{:vH=~=Ti=[>Ѧ=q k=~:֬\p̾k=.6߽yS>4=7.;6Bi='^=̼>u>R_hNWx缰]=q>o_ֽ0=>AT>ɘǻyb=I T9=;>=-nn[>g;|>4q=,097=ARdo<2=!<> > nE>rh *=*h>>u==uu(==/s>s>GG=?>܉M>t=?;=-p>t]<_x>>@Hr=T<=:).~q>'ɽ?B>5%ʖ>X5=ʽ0?b>E>1?7]%=?sY>=>Y6>挾;/>S@>(=F=ee>t>Na4= w?!rE<Һq<>n=~5=>=z ?>a>\=0>?3D8.н>` >0h 3>p{=n=]J w>Ta>1> @S==>GI=>g >Q'4<ӾrY,>h>s[SZ;p>K j>]?y5=S\:>_>r=I ~~=6>YR=Rϲ߾- ҽIb=A᫽=6l> 6>9?>c<&>1YyԾX>O{)A'֯USldǗ,>. ͉fm>}-1,CR"=tPͼi>|=nĽB> ?ٮ`>FV."|۽]G=:>Y]qT =*>;)2 .%6Y>옺+f<3q~><.Q4c%=<==N9)V/>螆>nJ7=V2>yК=o>C>wbG=YHMc0&3>t>c >^'̽=>Ђ2X=(b%>*浽k/>C>>JP+s:)=ؽ;;5j><>^ =8[>E־8>=]>׭/==ں-;1">gjl&7;>B=N!W)>u*>yHF|=>\>:}ؽz6>R]W>$=l=Z=׽,’8u>z=-줾( F=[fP=0?)>n->2yĆW >LOG2DS޾oa?>_>N>ŽuR 5>$C=`>/y=aa>jS =v= >?=k=ܽk$ >YJW=þ!Q> y>(#⡮yݾiĄ>>2 >kzQ=bf=]=rޓ>Y>= G>\=G=+ܽ /9.>;ͼ'>)}ҵZ±]>L:}qNؾV.=,q>k`= q4=z=1> ?=4=-u<9 ߣ>P&U X>?=v~>>waGU.?t;HP>b>2x=ʕqsB>:eA=$<=FI<[><Ҿ^={e=ʘ"(H>f,>+=(KKg>B;>'[žy<_X>=J;6s> Pֲ>xT>=4>t8?>RsdÉUD==0>4Ѿ*=*>=<>tY>LM>_m=i1KV[O=?+` )`d>ۭ=社F$>=F><#p3>+=Y_>=k>Yb>!w|>>>UHjչʝ>R_7>PE(j=M>ͦ=,^&>@qn >M2=4ȇ-(.> &>4s<>ެ^ >Y>s=F9 >=:־z>y=&V>3<`>*>l⽢㽎>R5ͽ"%<1 q>J>|"ϽI>0MLG =& >-9=sf>K ɓ> uX>*Ս=NUձo>UւQL +>(>i>M3>r*z>* Gtc=<?-+>r,:>m=~>l9$>WrüAD>&r&>)<޽m=Ks;>&>I>isv#`_>pL>{n>=m>T>m;\:)=ѣ>?1f=8=}']c =-=0>񐥼=j:4^=-=3V1ڀf(é=y>Th?Urk>2P>Mtx=kA꾿>v<' 1ٽA {J:J-;gpF/>=2>>=j vk=Ge'=ǿO>lĻ+> AUc;>#ذ<]>I2N ZDq =l̽:Ti>_0>xX=ƪ">սhn >_lN^>{ŵ<=ҽu3%3= =.Y3>>\8Ə8Ծ* ?+B=et=sj23,;i?==(l>^GO>-8>8ɐ<i4>{p>X=@=&H=͂=>ZR=ý0}즎>5p#{̽-tk my>/>I\ڑ6>`IS>$c< HB7P=H=L=o2=<[ >jrn>g>q>-Q>_k5 (2gD&͉= ~}쾏!Ľ 5>~Z>E=ih >%M=|t/qv>=`ν`BX=CMҽ>p;4yAF>)I?=3d>H<h<,'=9+!=<=g >dJ=+֐\0 r==)/=oA,y>%V1>SVWmi>F>0i=a+=j@=U=FcBFʯ3>.>_yP4'C >6r/OCiY<"G=̙We&l=4MJQ0Q%צ֔=ffRûT=eR݁=N>Ӌ=R >N>םXD<`>fwwc{Ѹy\?=aըJ>*g>Қ$==˭)O-y")2${>k '>Q>*4/N+4qt>B>D@ݞ s{ڣ+*>l&^PZ̽=7ľ [XOjf=i/k=;\=k꽰wk->T:<_< =m=$v>Nۻ>윅g>|XU<İ>~?> 9þ>=ULɾt=sH>!=;C >~+ >=p؍#+I$\ gQ>m=^O>P>>R 5H֗>ѻp> CD==Y(>Qbg.>O+=Hͽ !>,.i!(oQEr&rw>8q8b==n>0ν>Y5ⰾ r">&A>J 5I!C/=>hm>RE===v>nFղ`]O="6s=*>Ǚcdt}`$sL>Y/>='ֽ YgL>5q=J=xӄ>l3nfW->h>x%.">*=콶 =;Lrgn=۽rz=3(=.>-;D=%B=r8fս=$P6=w=>@1qd=vC=Tp2fDھ>t>`TC>R=f_> gW >L> wN9 ?2z=aЙ%|=\㾾k=<>v=*mվePdL=\u>*f2>zUlt⾻6>ؕ= ,;Tgƾ?h!|þu=>L=u>e9l>>=ڕ6o=Mq>vc >q=='҄Yo\ hz>rt>Y6]=gT$(}?=P;ͼo!>lr,|>.cL>^%Z>ע=|=2ν~>}>B!X=%>0砾vZT E> <^'=:Sۥ<4>]/#>c<>@O߼U/ƶ~7=O>Ƚ>K>,l';饽j=kf->C>'>?:U>=͚T=%"D$==5.H> ᏾^=Q<$ ;e=6>^>>0C$>=qs=&HJ>.Px˾Kkq 5<<@Ona >m.pI=!>|>Ht1=)#>^=&u>Zo>-=m,,H>aȞ>~x /!>>ߋçNΤ=$ T={>1ܾӕ>>B9B> s <-=#>!"=Ո>JA3= ѽt=ߨKǽ#C=Z#>=&=T5J z!Pڇ~1DÆ;Ըn>bνxiw3>H89=jH ?2>w=S>6k=e{[|du֊!?3>3ֽ1%r<4x)K=>@> >=OLoDAiߺo>!ʇ ,>[Ľc> 'm>' >w`L>p>(c1>"J>*eټ ս=)!=\Oh>FYܚ;|>\>XBhF=%B@ >JŽ<>  =iq={ >4\DY=d>kL>x>>J=,wվ^ez="=9c6<=zpJ>XB#Nbt5SX1@&l>'->>O=p=62> KQ=Xɹ=QY[>;e-젾l>ּ.4=ʼR=$#S4b-=)=3y<ʡ`>ľ =t>=gqܻ/>揲m/>{B ӜjI( >c>'ѤY>є=Sڇ=xs>[=>_k=Y >`>b>_e;=hQQ>3z>V>>>B>Lly>u>/<]=IFV>>o>Dg4;Ď>P>%>J>˴O>`z9>ݯS>%>=`>:b=._>1>%+׽6>f >EK-؎>JFQ7UE#RkJ; > >[ҽӅ>bB齎Ⱦ4/>9=R[E>l=KEfJ<)ZEve<@*։=5t>Y>'>0[NT®}>E1>)߼<ۺ>"!Ɏ=kH>S/>pa&9=>uT>ho!1D=;d>ؽAs=Ն >6HVK6>ڽNw.>kȾkU$=|mQ=kE=p]'>Dvhi!f=>\X>$>;E4W==%= tQɽ n>$;˗>\>:vH/Ь>9飾7L0)>lp˽~Ћi/d>L>; =(뺉=?GI~==݀ >|PS=>m=.;N>)>r)N(j=mH>59<F(PZg<0Է>nJ>쾾α>{6=T MPԼ]=n>Ů[0 O ::>AW B)=p,>aQq(s>`ǾA=.>#FX2T#<7*e=6^=Z՞>>< >>pA/; 9=^>F=0 >>ol<$g>|ֽuó:E>HA>G=9>e=vDs{Y+>Ըm#J>,=R|)>z@ٶ>YLBȾ]>N>~> =MFS >=CE^AGc={ҷ-< =Q>4>>Z4KA<>Ճ>N@=Z޽~>G >R[/ /C+ :6>>a> =ս7>y>퇾,% eUë72>r 忩\K^=& >=̦=X/׾@< =4?"> jn>҈F=>x<g贽** .>_>=&="!>O=Ii>¾FI><'B=Yj>+<)|O>7¼<"[l >?˽o>G=r}.;k:9+BC = 0>{ت>(`,>Pa< )н}=o)<@΃>皅%>Kߎ=u_>ܽM>9Pq>2\a+p+>#'ԏ hHL0T,f}=L@>r"l=w^ɜ;)>Kc>l>> ߽\د>2XWLKz\=t{f }<=[d97%/>ڟ=fy?4>!;5ƹ>hO{==f&>s>`>[>8> g=>r~=ƔxѐLn>4>i߽"}=ĻWg< >ɼӽU_*>Ӽ'=1۽EB>.>l^胾z<== ?>[h>x2v<57<` d;ڽ->>"톻~>g@U>5>/\=Wǐb%`i>g{tS>_ٽvYR>q&>ȧ=>o=I 'Qʼ/X=}>Gk >>>ʽylR<&xL_>ս%&> 4=J,> =t=1b>)3>֞=Uof>dv9Q *== Fs>Q>>Xۉ>+ b>jW9ݽ~ow==:>=k?g>jy?7;>w.ɴ"iJ>U5>5>*(Vͨϼdgr %xvZ}#̋+=;DRv =Z f>|߾&=0&jh=Ǡ>u>hT;=¾w>nV><Yc>=ƐսU=,>>9 ?_A>2߽ٛu>Ⴞ^=b=;ű|p>Ȟ>)=~!!F[=sM>ջ8\o>`>!Ὕ&= [>k"݌=ҾWŽaԩ<đ὏> >?=AFP彧>x'>3؁L}<&&Ů<ak=e>)>FM>;vfOR/\G>ח?`=-N ?/`E#IIy=8',R~D޽ ;h;>>A0j;-buW=)~>N>;q=ht_=K>fjp<=> >e=?9>覽N>-==v:Js>=G>c4><|86 < >l\If_Gq;GXD<Æ> mp>gݽqӜ==[>9j=6&>{h'+rUi>!>pƋB>*f7> ?V>/>QGJ>p>~Laz*N򽽢>?UH?|F::2=%=lczr>U*9>OW>L%2=ֽv>D?<1_=̔|./ )Q0 V==cטa=o>!=؊^>tq=H>Z7|)&6C(+n=z>w>Ȓ=QX>s7 =|.> Z=D>j_c=:T.?;=U=5=*:BY=&^>3>?> _<_|=>d=H=I>=,>>|={^{q=>IG >[>R>n)u \wc>/=,> >>|- d^!1i=ɇ== ^aU>>G0= =XAY=C{/1ڹ>Bk> e> 6K*fx|IOP7/5=T2z2^=l.=>ͽ">ľ >]ֽͪ=<7>n>N=]>ki=A7<4.>>q=YK)E>`o<=w恾E $=[8=l> SY&ľKN,!>>HbJH=3=SP>5=TV9>Ӏ~=g=t=ESS<,6'X>S >[=ܯ=Iz6I>3l{A)=Iqn>8A4z>>F}=URy>=e=43l=S%E<;D< >W2-ށI>޹V(2U<kj2A> ưm==?^t. p=>X+=`>> h~=DMs">=;}=>Y<>dE?=|7\>IƾK>=쌾XҽJ>Ѣ<=8= Z6%u=O=ڼDT Bнb r>?"NJؽН`>^ ;̽K> ]" D:>>1\Kn>i>>>!q=>>-P >==.8=d| =>:=l}g4k/=wbAB >>~S>l`>'NLQ-=V>~(>G-?rX=Q=jm~>3>}+>P>=Y{(>V>Gxd>@BǢ< =>͌erg<~ >s >>% I2Y= 1?5`>q)Ӽ:?E=AĽA =v>u=E^j=z-Ӑ0#+ >{t?ٽǮ>SfzAZۋȾ=Q>P4>j7>ށ>[NuNbKu>޽Q!6F_s>=fzB=zG]p=[m }e=n꽱%I)y>?C#=~:оI >Τ<f>ó-=W腾=Jς>>=) =%es>kz=Y>_ϽmLR>P=%N>NBf<'m>϶X䚾1p,u/?rp5L=7@U>Խf8<߈4ѽ 8Fe=d:> ;Az=>[ǽE½E(*u>P?>! =%e+|.?>Ya=^=x[ґF=h|t>p]pb,2d>6Q}>>(6׾[<'v=kΟ=Pp>l]Z_=7¾Ǿo\=gtỼW=w=a=Wf>s\ʽ :y +w$74c_rg۵I>#v>+btr=<'{=f{11Kd>s_>>=֨gt"P>7mV>LI>CI}:Nʾ@콽<_'>jPgmDU<> > >_G<=">9=1?PD=m=ů K>(y>v(>S=h\'b*;}c>R <-> =-A6=+M\><潓=j>o>> μ&>86ZQ>Ԇh9=G4>=k1H>]̤ƽlx/ƾ5>A<)-;!@,˽"4@ܳ5+>=|>¯:!WQt=^G\9p**җ B8=*=>$W5F=R1>۫ǻ>>#QV'gDM>:#>%%5-=.!=tδLTr=֬TK6{ h(]4=/>lc>"ký=ͽa׽x|W>M ?> l;M>P7<45cCyҼ>Ys<ci==hlm<>A]x>=ݽ 烾'=ړ K%(G@ӕ,>y =>3rGȽ i(pG>aq>.<뾫$==*58ν4>q>/_ <>U=v>$ >+4 ׅY!S =v>3%%wȾ .>v}7tW_>poe;LT>WZ>]?n*>3y=_a‚!{>O߼sM}Vǟ)>F.GSQ=Hibv8>#>O0нx6LGi)==d/t8ֽ >۽UD˽k.<,>h}>Jگ>X&M>= 4===xl>gL>{=<>E߾Z>Ez 鄾8> >0\@>a=" >A_=5n=ˊ=Nx)|qD> =ӰR<:Ѿ?>c >C<>]ֽ(>l' >q#D`>uO<.;EZw l>0K凾3S>=ת=w/=2<=ޱ*νǼ3bü1f=y';w>̓r;do>=i>]½b[6IZr&Dʾ#>T== >FBoaK>?>e=OESmE:q=8оU3(=R >ì|;8s=n4=7G~g>3X=MAE@ZMO->K}\p9\Ὅ#>4.-WTP13>Xf뜘=>tXT>q{ B=֣½ݗ6.'=Ζ=+Yl<'>>]%I=R>~+R%i=F<3*M0i>=>lw# =;i >q=l=D¼r@@>TRƫ7O=0>}zE>pϾ2<<ۑ/>:Z>x1>=n<̾us[>Y{g=#AX=>[4>D>h<,e5҄>ѥ>4+>m0>,==8>Z\`\$S=+>%WP>h=;.PQ=OV yƢ˞*>CѽmHm2>??E<;=N='ӟ=Q>+RI,>d]-+N>.Kv=wmY5>Ɖ==碙>7QቾD=<} r˾{

    C?U)r@>->g{a|i==N >>z=&1>&>=ʵTnY<a>J>=>?oLJ> ,"7"\=qJ=ix&׼|C# >#rf=K;eI>xQh>wjev=CL>} >š>QZZ=5>G=~`>.ʽ"$ >6 Se=e={>Aqf>CֽK'BCcro= >K{a<>q@۾IY>4 h=N dآ=="2>Z j>>>/^=v@>6lC>cCRD=I=u >yR㥾*=(U=U=I2̾>=}:M=֋D澆 h<>2%>plQ>W<>⼠ <=ڏ$9)>:}zzɧG=aJмc޼)t> >>N*2?f 9Hヽz(½.X; =Y>`oO>oa6}nܽ콇D><yͼU. *>aIknzU(]}->OI>,wZSsč6>)p}=։j=>)E E` ,>yyP>=Q㾂{lu{>?0=՞D>e1<XLg>S6v=#Ա'վ*c=FJT>iQ>V73>.=kSH>H=\>>s>/*x=?D =X;LY>:<q>=.7P>3ն=7iȽgh=W>N9>=2S"<=TK=ڔ%2L=V2z>O>/4=rƽ\+þƕ>}E[u>QF>JZ ;i>>n;Vr3=E=u:=b?= 5=oļ>7 ni=iw<A>S)>&>E>:i=+ӼL|F=h>O>l#)C)xL>ؗ=W¾Jx㭽_>fJǾ}N>^>==>_ڜ>ɳ>nVZVQ %S>ǜ2h9 =d;z>*=5EaǢ=|3@= :Xt>09"IFdB>/<, ;>W<`轄،=3)z=:`X>hY>>:h!Q=Ý>UeZ]jq>Bv=,P\XU>6>^\lԑ J>s= ㊾V%;%U>l>՞z7@ɽJ= b=8?;<>==+˖>2P>Ơ#u(=;+>a>|޾4>cn>I({<=-و>>}:>H>ξ2=C= =z>\>H>j#8JƇ=^=iL={(# U>FΝ;+E=qE=BgBqQL>#3䕽ABL]ic)IS>i>ý <^Y> > ]"R+R*>r4a>=*z-=>,>/3>Q>0=8f>(=>i>/q= N>y=v6¾[v})v0>" >^h>\s>tE2>4N}+&,=6x=蹽<,>@ ~4wfu>PB>QrKľI >l@3 >0?hvK7޽KY<>a̫>:>Lڞb7 ==F=->;U=RH>]X>(7dw==:a>'>0=S- <">{p^=|/ec9uz>X>Mfkؠ=h!=Ž{ι>nTUU>qSݾi=wYξ,j½j#AZ>-s>Ä=Wd^,5qK^nӡ|=vm>0ƽ݀=c,w?ޝtYJ.>^>ǽ (/yW"0<:=El=L '3 qc57<>5h$;ޖb>'M=0ǃ"I >"d='UCñU>=XGB>t0;*Mz_zᙢG5>p쵽> >= Ms>;+P=g>SXM?f=I;=?5>r==ri_F'T.䊸<=I=wb˖:*M=m>>6|pݽ&ľcI>D>6=ۑ;-B!j+>FN~jx@3w`ý>㢾%>ErþT﬽pc!u7=v/\Lr=VM>`>g7 zݥ>&1=Co>?>e>4}>>Q(1?Z7=XʾFh>Q&G>7=gG4^ɾW8T>_=P"OE>^>=1z^S  <]oJՕù1|Cü*=uk>p@_:='=V!!df ۟9>ˬe=W=H;\;v+9= 彾7=ȷ=BBѽr>4=$*=> />[vb%Lc{f=A,_8>GO>j6ZG\a>6ƽm`AvN=m>3>2=f>uj t >*I=Z=]>x>y; ʽ>3)=|^#<~U>Y'k8=d7Z%x=gɨ=p>4(57;^>%< ===9)!>ąe7M><5>O=nõཕ\>^>>=r ~>FY,>=8>+cyCX>)R ?Ľ!n==h="<>ٹ꽧G>O>>Pt :>ݾkwF=2@=ٞ >$>G<֜u$4>>>v-E b>q^>K\1>F1uU>. <Hr=ֱ> FĽ˾I>W(m=ުNBԾ\X<?p=N>qY=ؾL = >8ƾU=/>?=O=6bo N>`QH>e==.kث,e>xCႽRO>2=2܃>{7SuŽ%j^?8嬻i Tc㾴E>YX>Y=W=ƞ>:~< >3Xy>旼Mue5>;9#O@N7>_>`=;(HC> ?B?e?a=VV;Ro˾L.<2מE>}ch(>Oͽ>8e!? >6r)>c>=Cz<0# R->+>F΄=[Sn>M->p->vu>4m=3P==iXT>j3>Ja>>Iʍ/Qn4>W^=Os>F7==>z>>=>+>=?>&Y>Q=W'0-xIɽ>f J0>GY}>;>%= P>>>%)x]r|7ĽMC>y}=u= 'j>[="ݾk= >SF>X5>Lڸ>y鼎>>k ?-J>]0>I.=Mg>ZӻB>f>&?T}>(=>5~3>ŏt7M=_=>d۷սWlD?b>@5u7>Q;T>=XJB=<=y@e]`p(o.N-> ɜ,>2ۆ=]> ],>^{/7f> {?n֤hL>ɒ|+>=<'=t)rŽs>;`gkqU4>D> ~,})>\=P=>͏>=b́>lmI>^r`&<*ƭ$>A?>0f>="=Z+=tC=sX>-5V'R>C/QT8ǫ>5份JG=N> \=t"<΀>F>=(=H>S+<&aļML>W($x]b.*v>c>i= =I< *>tuB=<(e<щ`t>p*>"~ =Cf>;> u">}Ƚ `:>>t4>Qx>IXH=W|Y T>}}>zܻeNWIޢ==X:1>B=TYW ֽi+mj =G; 5>R!Is>>%~Z>@]L\Z=uY=0=/b潨k>E;E=w7S=0̣=DU=C>/=ƽLɭuk8v* J>KCeJ>j`<5;Kӷq>>]M=ilL?姂8E-Ž|>v>==lY>I={e><1>:v\=}_> ֨<="l騽ȾT!3v>׾'=Ojae%h->)y=]06=ӓDTvx侇>:%>+>kB>c|>_2>`y7,>6=;pw= :m8;o7B:>ᕽb>R>\=vBy?лK)=A)[%W#1O=Z&>dI>lz6-vpfw=d={O > vӠx>Š>= -< -=>%ڽ!9>4>=>Z>o>O1=7>l;!ћw 8=} >ds] +=,<~UZ 7=櫒/н>HDe=辒>꘼ǽi=)v>$[>}&l[?N<)>Z>/A*= qS> $>:/=֏Kq=uV= .E> I>;>~>F=x9l3-h >^8>=\>ɯɔ=w4< @_>iړgP^> >q~=!V Q>:>Q>f'=j{bC>D#-@໎mQ>US > <'>򚾥L>Y<} a $ /~+6=6[ݳ> ey}=>uh>d==#=F>sem#_4=!J>( K֯?;cRj Ǿ>W:R= >O}>楖=E=臽}>1!L;v>F9s!Cq=_MO9d= =+޽D=5D->m l=<>pQ s>)Mň=5t(lýu=akXD=˚>.4x"`ɽ1ӾFƾ>1Ksܛ>us>?;=T.K<6P@>ܱ=fA>>Tui^C!>o= c>ymN9,>Qen<cI6=6!C>щn= Hx>Wpc>y>>8>#w={I3& 1=a;7X>H.>hӼ;k>1C=7 >W>\{ B>g/=b>T/F7>7=?=8uw <=_}=S=}<=NxQ 6sJ>Ԏ>3a>h=I>F=H|Žܽ E4O; %nU`5$%=$j>,>u>cc<=hV>*b=>~׻XT>{=Lz;>y5>,{>i۽ڌ=!Fƽ*HE>Q> =žg,pF!>W@>T+ ¾*0>~,>e΂?o >hy;yj>EHy"=*\=E>cxIc<&>u=wVf>\B=:=$d=J7㼥p$=a!*>>5_b>:֓;ӾN ;8> ۪>!D>(=T(|>a> v>'=MN!6v(>~q":=t:,+>s1=>: W>ߒ"c> Aq=ry>Z=(>\b==սq('pK=>+Tܽ>>`>4)>z=Zc>Bo{>t=Va'Oc>d=G߽/pA=<7>Ƽ+v>A(={Ra=3q=>M =>=>(RwGǽ"> ݽU?c=&#=C>N; E8)>#L>D>=@.y>C `=>Fg=.=&^]۞= >ٿ<> qYF>u>I;>u.YpZ=Y0;wi=v=b<{i>s==cDs4$bMP7n[L~ ;!:Ṻ>˵k>f꼽["Z_W>zs>$=>Vek>ͯ< vM=G7m@2峨?>8b>z=p=4f>:TSm=m\ZH==>E;R7V>Ζ>!*쓾7.ۧ=GT=i>#=Zt>fo;d(>KrɽD =6&^QEӽ=6>_T7>o,G>r>)]ͽ#vvg!>]>A]N۸U="x[|>Aˡ=mk>Ē04N>$n>mG>$Y>\%+>80u j==u>C@;nxϺs=: =) >ZvI=3O>Oz?i־qD횾$t>߽R>=p^< >b= G>e'E=Z=^>=9>ҽN1>=K>ėh~=)=/ =7ɭ~1=T>Fp{>t=8erL 9<@>ļat{=sh}>o=֟ h5$<*> Uv>=mJg>>Ͼ\>H->ƺ#b>I[e2.~=d$t>YD=oB@tȝtc ;,+<ѾY+ ՉT>ƀ>*>>UǽzUB<0T3=]Z F>>Ԁ=gK#_ɾ=*|=v}=0=@=\^aY-TL߽C= >nb=kjY/t>m>d<_BE-'Ƽ>1==pj{k2>G=r>>Z< =x~|a%۽áW<<[]=MqVz>9̓>HNFzZ=;T :ݼu=V>r 6h&<~>"`9==kc=q8 ʃ>=A:>rQ=g=`=Bխ=o7VƇ(>>> ,> >y>ٜ;= >y={bo>;6ٺi͏y%x>{m_[Yř=R=۴Y=d#<ϑ>'Ƨq{|V0_v=q]͘8>ځ?l{#_=p=ǽ} jheVƎKXհ$=k~=`>k<|bX> =߀=8>NsD@<-dJP>y3>E|= =⁾=἟T >6=~C5>׎ت>~J^=ʅp=v=.X9aǽ =:>@=0>7>zG< > =sL=/Fo*>Z=oƹ1>>N,=Fu6_=n=5=ڊ>O=b=E">a>P=%>bK>UZ>r=D=¼== )O>'v=Sh<#< == ?f>з<&Ļ3ZE>^t({6F EW>h`]=ѣ>- p>|c|R',>9d=}Ļ= d=ZEQ>fޑ=[> G;T0m+AJ;$%>l6=. <*6>(F=|>+=1qڑ>ZtI=<>=B[4=j;轓qμ[Y=BO>줕0cҫE d6>,6>>켺x>aK>>74K=W*=h췾ؽ9>B> 7>UqA>M>6?E6>J>?u克>4<^=(F5& >)6b'{ >W6<.A^>}DY cGT=;L ع8>ڽR>[^F_V=,=*:p\ãD=~2伃a<XT>r>^VQbbIa/фȽ=4<&^2e%(ܼe =ѱ~>@O'>:h˿*!}RG>TGw>h#>d/(=ȩh=1=wE>=88R>J;>6Gף=l#=@J=^>(=>=s)q,.$=a(>Q=;ZS;P6>,|]?<,>.=?=,d/z=="5=v=r=J=c=2">ǟ>]<)@W= >ʍ>3Rx>7>^@>;V&\6^U3/t#*b0> M|pX>(sf>2=]=> \+2e:ۉ> !=7ޞ>C:hywJ>q>!r$>T3"ɾ8>$1n=j}+<ڈh߃E>0`8=k>:P^zg>>gU>L N<;=?¹x >FЕ>y>^<Q>Ծŗ7+VU>/x=9si.e>`=юw?1?>D9=t<h>#> >佾,fqVB>N<[!5m薾oF=^>3a?>u6>2=>>c<́=X>+ES{m$>>熏>9/u>Nlr=|a2lr5>20>=R餾֙>>ӼJ=$}F=R$Y= >/>Qʻ;5>Һ1>1PO>U%'ԭ=>|S=P=˺=J1cL0i>:=>Ǯfԟ=>*i*;>s.>䡐'^>=%>`W>8G=]`E>n>Ph>Dok>S$N=\St> ;ܽLw-)GU>Vr>o>+;/=>gʀ]d:>>Z>G==]*aX=b[j*qW[v^/=C>.8ܸS=L=鞭?=&>Iֿ>q===L >f>:.n>W&g>,)>OھAI<[=Ht=s=7E@>VD=y֥>F>ZK>/>J*>+뾤2G>>Íg<rl@=_,xx>JBq>@>=3D~=:/>-ĞW=[PTHlU>>z=>> >*=Wn!8R=]y)?Խ b)w>>Ns3D[=uv0>6 [>>VE9,(Ǿd=>fι]>%>>#EѾCm-w\ͽqm>a(a#?6=ih:TYo>FB >#+ >ɾ >pwb;9X>D?|Շ>~cCξHݾgQ>>! c>$R9MУ=^*c3cKb>h>t"@w?=N۽4} ,s=88=Z" lJ==7 =־m6H $ҾHf+m}='`=Q> .> `e=Z3o>R5>F>a>i]=?򪟽HǽyXM=Cu w>!?#>l=+齀I!mc>݊->F;s+.>̰%>E=,>"=ԅ>DKJ jLɕoѪ>:?JS;]>Խ.X?e] >mS<[> 1=>:dA >$M>&un~>9R]h +gtR=1v=ԉ#<!>n>Ṡ>{ _+ R>1>蟽& ?W(T<`ϔ> <Ψ>ZfM]wU#;q>f(>=.>&>T=Y<=eR; SWt>sU'>=i5)>B >$>s33YܽjSվ@?\>=~u>{ܕ/>> =s'>]=f?h=rSV=5ܽ_!W>㽵VXy>b l>>B:>tȾ{0AP0qm ӫV=Hhߓ>|_>8U\>&>Nb;=>|[q=  <=>c8=:a=+x=In5k$K[ >&{=I=0> {ON>=Ak>RH>a@QA$=p"ʼh<{>1~`N=-=V=/>t9}(=(Wj=>>'>I$%=,oI=m 0=l߾c3 >*f >@AX$>>ÃKR*=T=1<=)^Z!O>y,(>,;[k>Ƚ\ >=}>=60= 0a/i[>f)iE=g\>=D=9a>2_=3SN=H>>#&=}{HB5=Y>E?#>I>%ڽ>:BN[>EY=dyY>Pm~2l>=95;é<ج.>4+r& C;@VЙ8= +>¾4u>=>o>R[>yx> B>>X n;潮>=-܂=`)Ua>eYj>)^2aO><%>sl>A>=*L> Ŀl=þ_>Jd =o?|h.о@‘=5=q>h̽v593S=(f>C>^ =Ï=>[>,BR=̩\> _E>].ˋl=->>Ahg >k}2hԼx'!+M=|>\H=F>ϜVU=}\(5 -<5 G>/> =L=/>Qqip;ž>^l>M>oI >훒B;L=L;q=^Xk= >FP >(C=ա%y[>ξ/=Q8;Ɵ>(2>u>S<=j>Cգ=lr⾤6v>ex O>##=;=$;x3T%6`dߦ=ZkuEm=c[M=l*2_>[<ˡv>Z+'>l h/X>콚<_jofimcy0f]im>rnB=l=P>e>QW<&>38>GS>=B>H¾8p㵾1= .ѾƔ=_\Ͻ,,>=]>u%PG?R>hժ>">e z%(w==He=";>/n>H>(P>ļv>oʽ>D=\i>X=BDƦP6><w=e}>9l=^.>86;bJ̵h>^vZ>PuǾLPO > =nI=Yv># D4>/~d>jK :<=@=Uҽ6=:3T=SUC=5 >-V>;=C=zEN_.>|>;A=kCw>x>Îi|+Ǥ+>e@{>;þ`>$=w>QٽE>++h>lo=>a>Atljl4=&Vs \)>Pc):[_9^=ʕ>X`GȼĶ>N<+kp>E=;z=(k|>pAN>PTZڍ>1vQaRjJLlHS>K">iJý V<>߽dYaiX:7˾Ͼ>=c ->/`-\>v`M2IJ=:>w>~e 7=\޽褽J>>o0>a0ߧ==ai+뻤3=`{"=vbM:2ݽF=b.ŵyB#>lvjPP]=y⾡M=<} zrGH=5Ͻ ,>v=>=c.>֨qhZӜDADV=>UKɱr=7松% <Twĕ`=i=5-=Ub=p͑; W=̥Y=8"|j>H=d=f<۝=4]۽Ag>Պ=r3>=B=G_?>0=R>`r7bž+% ?d<,u׽n Ğ>rw>9=Խy='BԽ?>^ýʰ>me.C>=x8f7?:0>I>:=6UByW.4\>i>(=>D~﬏=SLec=> 1>F=hT]uRRa>5Q'־0>ſu>H@a<;=?$> v>>p,.< d=4>/=>7d=#9P: )G>ܹOm>.<(={B1h=iN>G0=2 }>/cHτ>mG|3 f Xe~ؾ>1ɾ>>=&ٕ.=P=5>.q> j7T_=Yf>0]'<(f]1>78>=> |a)jgQێA X>p> /=;P.Is >t轶>8>9=aڗ>= 5nVO=b1F=R>qG$-W=?V>W;/ܼg>|>r2R_k轸V=冞>ZekSսx>KY=m>>';f`0+5=$vWz<:f(>e`G>j`G;f(gӽ >w>;A=$ս`0ҽ>E=3==_E=t=>>h=-O^VY=ż>>=4>ͱN<:>GC/>@@\X%?7E>a=c=(G=?=c= <=B>jDݙ>Br>m*>1>T=g)>Alm;2>7=>4-V AW>o "2="j =P>/>*m=;0s<iھg<`'l=E=>{ >ӽ"}<$6x_,L=->)b=+;;>轏>˼W=\p=<ؽ=^=7*>jFjϽn>U5zʻ&=R>r2N">w,5==kj< >N.=h=%>x;I>>]gk6>}86Fެ<1@r(>/=;BeLR;)>[=!KvwH׼@>f>e@d=rT>kSki=Y> =yK=0>;>}# ,?>H8P=߽>;PFd;μ\>5>B=,v>!Ѽȕ==M.b<<`r>673Z>έw">xx Yg>36<94x>à>H>":=&=46f>,=idg=˰L7WH>N/0BQ<Ɉ>S2=r=4q>D<8>0'{=H#4=sy.>WTʾT9È/ >>1A=6=/=NB>e}HI*>bJ=v]@>!>GjGEͽVh=j>/^>>ڹ=fYWE>-+>;%Ӽy<;=p#Tw=C@= O}=b#Y>pC<_zB‹+H=Gcj%>\ }墠=W=~3HE<)Oo>Y==.=>'>Ko DmA>\rQ>нa.h>aʳ5b:Ro:Ҿ Dƻ 6>HϼL=> +7ݣ3NjTR>>V=Zҽv>_+<>>y= ޠ> 3=~*>nK#$I>R>s ^)1q=;$L=l\=_HIzb= <>'Q >=U>H_> fd󢽊63(2M7Ӌ{G=T}>86e٘,)>?7*>L ku}?Ž ~@RB_vp>f=;A^hgW=AU9>R:-ܲàx<>;s4'= t=Qȷ;])* 89ϼ~Խ7>ᢽa\= uzfp>UpŽm>&^ڼ=?>+>dj >>U>^x>]f>)k~#>WV>p-/>a;'4{U̽.ݮt]sU> V?\= Ɏ>>b^x; ;=Lڛ2j0=໐F>=OK>=Mq=❃1>jU·>7̃1wg=Бt>g "gG=%u⽺K>y=A>F01=j=N짾5=ḱMw=ټ c8L9b3ZO=XJ>F/>H]ڼԃ>D>>7=!Aߥ=z=_>_=a;D轗s+ٽI~ =żtX6>> q-HE>,=>*%> >z=W<(B=/= 껾:>g< -(=#);g;DH>t>tиr'=2>$5oB\>8yS>c%>.0V>= &=5>e->>n=u0=">+%?bh7a;ZԮ՝>3=<=q\^{!v9=b>3X=#G>P=q=zIKQ_C> >R$ᚽ*>-;\`=>uZ>g$=|>9>_.MD+ H >Ɗ'>Ck*=)=Bqy'$>>YJ>==ྒ>14=F$>A>^Bv7K Y*o>H,?=j=c:<^hl>DƾlА=A|=Ҿu>z=(=l"=x(>8K6oq5>=> >M?e `>x:=`>LG<䋽>&;С=Pvӽ5K.>q>>8=v,>j~b=՝LC=No=E>GYDE>%mz>E7<3>}= ]>̩=%U>Z=e",R<Ź=w>|=d/>Ӝ>Bvy>=u>~(q?ℶf)<><_$>(=-IY>=K>p$=:ķ_`=4mБ̾>/>mN=OnK/=2<#(>\½L>Tq7n?i~ۂ4 *>>;G=pFUC.9.>>>=濽%.d>w>NjD=2d>ҹ" 7=e>TOKI!=C?>ǽb|<q>L*>#c;>󳬾lC>ֽօ=̛{>=@>3cѝ #E>?0J3颎wc8.>W,=А> pH>홽 iz[>',{f]]=>z >V!>m-I*@> =G>ړq>f>p$.>͹ ?O>n-=]⳾h>RpH==>%R-/=>*^F>쎾Wf>@[=>-&>R Š#+> }>>bBwg> %B>FjDV#ykUѻD\OC i)i2>~ =qνDhu+8=.j>`=ZF=>%Fu$>9v>9>jh=(q>>o+$r9>V=x?">ix=ط=/;,ȽO> sR=@p*>g>(w0Ùo>d=dq='za=Ao1]J,2>'nkU3">N^> >C->P==5>KP>G|>-a> y5F=ןHsi.?c& Dt=)Dͤ>Ǒ=  >by-=J}>Դ/H>B<\>vnK=?:Xf>qU>>$ci>8jA6 >!VR(v#}W>L ?D>wPs=IH6>@풾 >8,>;k=;>v:ՙ> >m OO=>ޥ$;e(l5`6>Ưg "ϝ>_ <E?> f>>XQ!k>xƾUSۿ>֗>\2<> > ?BB<2?e>>>t8>>R1^=X*!i=3S½$=` 8>xN|gUL>AT$EiD}8=C==B=x>L >{k W>q>"=y=R5Bݼ(N4>{Q'>EP>\ʰ=G< !-&{>\ŽdT/#>uTޞ=:?>*V=j>ݶ>,l=Խ>E>W=jr8>>>3=!e d=Uj>Q8tw=? !cE>=c4<> >[>9 =Րة4>>)=#C<Ý>>E==n=> _=?qsH>m>q`)>ng5ކiRV̽x>= rrJνrqM= =G>>>:i=a!>W><#> Y>tBOD"=8+<==oR6M-䵾\ -h?=>&>=𽵷?=wQ|' =' ڋ=^>Ov=:+!=D`f>Yhdnk= ʽK->Պ>.r%,ua<Nc=>)Tv>nGK=a|=<>c/>g>O=5@;@>*=;?ؾ >#׽Y=Fc?I=/~v0a=+3pj+6>>E>G >$Ѿ=Z">}==_=uE G 411uH>V>j,f >It>=K FBu/=k=i>1=gh<1mY>>c>Y= #b=0?g@ڇƽlݺL>S}1Ȇ==9@U>>7=C>zʝ=2s=$8U >;]MIM=~A{><(]>i=r.>L8pWiV>g_=e>ߺ>=};+[{=;=f>$Ǿ Ց=J^Q->g=򊁾Zn=\=լ=>f|]>v>ϻ>ؽɽO(>K>== ?1={1>6-gĢ=S c6bO߫=i>^fY4^w>M&0>&ݽ Z>εB>f˻BE*J>ܒ(@>;c8=->=浽@\=@>i_0MvҼg&H>x>&>=j>W{=ܲӼG5u%Zn`>ݧ> Ĵ=| >'^0>;;Ҋ>>kagy[>7`= .5>3>XLoh;wj1ze=O80><>(>ШSV>#;_=yYz>2gV>HѷI>w;$=o;'پq-=7o<7:J># M>>̬d䢾p?>7>& E$a(>ud=jٖET =ux[< 7ݜp%HJ-a<:U f>F>jc5фc><>٧32g<=4~ A>D>:=ԅV ?J=Y=t<ƽ<=ϼI̽BMN>ٳi>Gx<Ҿ(j<꨾㲓>?Oa?d>B9 >-7>AS(-W)>/6>_='ᔢ>p>Xm= ͽ=x&;>6?k>P?*+>(=N5)H<==@)5;ܼk=D>:Z> b=')=ɽ h>ai=S=>]̽>BeX;`A=[<^>0ؽ 4˻퓅>o"> $=!){:!>XXjQ>#w&>=$eB>~=;?YGY->( мM3VAK%M=\N<ʼԾ`;G=՟߽!Ct=>>u>=H<|;>- >(p=ľOǂͬ=>Rx= U=bFIAj>>Y9>> >3=_<4>ʔI4>e"u=Jm0Tnc>͹y>&=Q\s u$=R=b>Wo>I=?ս5$>-= =8>v!pv1=&D=둾r=,;+9/Y==m׬=Lb>= ~u mἌ>î>Z>Ԯ>V2Fc"̽rcּ<; %^Q=`tv= #>EٓW;aIVzi{>b?a=*ο=x2üvc/=">A{>,<$=v(սξz`s)L='>;m:=˖> < ҽ{Իdg>j >ϱzki,W0>so=}=qk=x>Fؤ zJ> =L2<=%нrί=\=h='&P=gow>y9>>'-٣ :=t/1>ÏоI(>&L=W=ҏߺh=ڏ >` ?=ܷ l=ȷ=*>v> ާ<>ѽ=>hJ>") =?>e>Y <=[ >Wի>Л?.>s g=q;!1>_K) C> 0>!+H1>b=Dg<>ͦ= k= P5H=x á!>=@F = F$6/ j>(>f]>K2 S]>#L> >ڝsX,I ?Y=3νnի=!2QfgP>K^>C[<,=]㽵y>i,>2==wkB>!D&l> ȼ Q>E>'>==]0=ݼC=]>.Jм>b<Q=/=FG>2ZR>G7x_i>Vu=BI+g潒>>e.2>#{><8=U C>9֖1Aƞ>4YE|^Ⱦ?<>@5ۭLc>)ɺ7=r)Ὤ7P>{==DҸP>,ٽC==1ΟpžSg>v YA=}tg=F.yPl%~,=>WH>>!ޫ==v>3-7>% )=gUo>_ȼ><˲!iLs?">ٽ ڽñk> =fn<%=(K>R>!ýb齂r> '>,>;02=k >e>-˽&>=$qƍh>sAm!=c ~ 5>BVݽ>'=5y>7 >E-b>E#>e=Ӭֽ=SD7\Nb=5> dc=c6 ƽ>tw=v>=@J=K=Ǚb>e9> Z')UR>s ҽIs^O"=!Y>'6(>6T=6ʵ=3V;\> Bg>˽=ּJ>V< `=W=q>3f@?>_5<>-s^B<=H>T'y>ү>>VZgľI>>1s>L/d>|_PúElm>ry9q,= Ľ,蝾m=40=#>6=!s2O*Eom=ߌ>W =T>:ۀ\>f=i!߽գ=6%ؼ4M5 rkǾ2k?u=4Y3>־%)b[L#'$w=׼!ھ#r>J4ѽa2.  g້*?3{8kՇTk[2OmW>}=JKGþBH(>"a=ϊ/>UJ3iR*>Хu١R,. "!ӽսNI>(>uu0>1A =?=j&>(f>:`=:нsF4=#(N>>dN#>.\=PnV>G#Cǃ =V^(iW>n`>\Z>R!m=v=>Gؐb )=O/н9>JiK>F= 6EWo@g=/! =w>[P>6>Ӣ19<9=)0{1!&;==3>3>烉>Py> !C>z>$=MQ h*>c5>>=B~\=O>( >m=.>HS-[le ̈́>$u >Yk>W>><>|>',H>Ȍdl>a$=O=st>bHf=F+>tHX=>i ξ̽p>x= WQrk=, ==H/=F>>*=>C=l=k0p>9ܜ,'Aٽǽ<ü0==>F=wo>x̾Z== ˢ>`׾U=w=<r%>nM)sK3> ,%!4-< Ot`CVY>.>qx=&4>m;Pu=bAB=z=Y[=z/=o:==:!<ĠZx=Ymv>e>:³ؠ==%> 0>N=t_?>:>甜;&W=gN?ˌ>PTz:>V(>񻾮L>u=7B<н -t<>2N.z&SlҨJ9=<#?O =hvX>˟=J> =Yeq=w> =ߚ8R ;>$ ;rr_=񀽫=>">JKdW-==tmd<\0>YŮ>;=:Kk>iQ`e>A*emXVT>6# S=c̽\>̶>R4!<>K#9&LvS/>-y=G<=Rƾiٌ9ܽ|b4Y=p=@=y\>6=W=t>=Rs uĉ>|dL>w@==4=)>SW쵻ޏ=+=ª>=/jj=ャkɾw>dZ=܄>"=@l>N=La5^&>*Ͼ=8BK!o> q<7>>? >m \<>2=bz5>8EG=rWa">}=/O7$%>S>н=">T׋=9|}K=g>݃>?,=W;>]`=ǕAuڽa+ξZ=-n>N>>L4TH}-I=}$ {< ^1>_=c>BzoҰmH>L4>u9>>Z=23>8=%La/K>f=e>[< tX=Pav<1=fY=S$>,m;8 0s=]ZL={!V>ԝ)mh>uʂ&F>_H+n>]}>nZL>hxbq䮽>ɟ=lC>-)sr>s>I#ݦ=+·rҽ%ͮ>?و>%qIRI>dwAؾ~~rLCS>mL>L>2ms08>=\2ƾNޕ G;=.>ILT&%.OM364t>:?潐آ=ő>L;(f,>s&>wfq=C>l>mԀ=n> R>yݧ)=H'*l>&!&>iԾPbj>o+>YS4 :<#3=>>', a>5=< Յ>D><;< |>kK1=|A=>X{>j=#=>ZSʾ@Sʻ}=A9=־':g>>=x>>ֽ&xc/>Ô>q=7j=1>:>OJ"K= ~vpF#;>P`=_>z=X9=~>,ļs+ُli$=iU]^l=^?T̡o>E:H&X̣3)U @>8ý{o>H>ZD=2)߾ҁ=R>Db>ΓzKRT>A L.>;zEp3>}'>r><*L@c;UA+~l.=E4p<~{2G>w/'m:%sQ=D=#Q|(u > Z6[D=>,>|B۾׵>O6#8r>/ĺ=FSd G=<=5==Z>%D8T>OP=gI8 ==Cpi>S^Oٝ~>D>=UB>=?[~pd>;c=cCgA >0;=Z"=A=]M=V =>9=Q=Խ$=V1=hJg d=Wr;2=^ڊ>όȽN>cLM )X~[RU,Ƚ)>v( >5:l>O'<+j4>Z\>>k=`W>شڕG=|h[ ?9=%=Aۻ٦gU^go<->蛾h̽^6>3]Oq>#,U=m?>n#M=b>>3,E81-)E=8Y>+N`>K>m̾yl>K=Xf>楽T&"*XR >:ًR+hQ#=~u==>0L >Rt(-4,L#-> >kqJ%>&T=ɽ=$>tsɽzU> /Xs>v(>y?b SK(@->阊T=9k=$a=ZYP>P^ >ɑ>X7=OZ$3|ăgX@6P>^Q=*>X>Y>)yk~HS>X:x 0ϓ>+5=>>">4ߝm`k==T=U= Y=Owz[i< 5X.='nV.ڔZ=uSbD=(վs>\|gPC>>q > >@)e<=,j>7j=[ρ<U(=q>M=>N=̋==><>㺥>E=>2A<&]TIA="=|`>!k 4{{=\=!>=`H^U&=b[=꺴w轼* ;ԝQ+>Α= >B[<i &L>/">Z)=?>(:>Q>K>XQd2>xi^=0돾k>-ٽvĻ=R==3Ѩ=t>7U׽Si!>m =U<Ŵ=>* /V<=j>趾=6gվj<\+=`cfOE>an>d_ H>=!>b=;>L>=hW%9irkp>}JH:=q0>!>ي==>>ry>u1>4DD]*:Iɲ-(c>0"W齅S|Ǹ&g>Ê4=*t+>%==,dh>V=%˽=UC;:v=?q>a# &='úYI!'ܿ=:>pU=.4}c=>&."8Ã>yx>/!>摞=b/y=I>#_>( j>?0v|MY ?>#>P=,=C< $<+>@w />>S>`] ?=-A==W䂽F+Q>f&3=|ff;;H>*᳾}׻=d>ܦH=p>,>d=ld$=G>/=t=ZFf\ _=Ѝш;X>d>Bd#>xǽ>>>5L>L>I-P >6۸==T\=}wmEI;>W>R=>=p+zTB:=q>Xi;=0>"w>vTi>%̀ E=_ulu;G&=;+=\1*ֹͧ>&>_DI>̀=@ݒ>MBf:ְ>۪R;hZb)8|=A\>C C;!sVOo<>j>&יT}#=Lt> @ȾLOx=z ='ș1">ڲkJ~(3v+۰>I=r >(ǽ k{ЍL<8=&1g=% ;h ?<ژ>rK>6><>OAc>E>F=O>2 =6N== Ƚ2=Ko>\=~>ϻp >vş>T󄾌 A>8=!-">bŽ5"8>$> *>m%=m=z@P=J.> 5i?d>TT]>M,yj9Q,q^;mS<ִ=\=:1>jR=G>}>ZR2`?p/m>鐾¾C39 ;O)E~%58o>eFrv=d>A>ϕ>NJq'>xUY> hfEq>=n>}.^.;pӁ=-1ٹRi==[.[((y=> =6e==y_3?>>ֽ!>=f&W n<)\I>0 $;P=t~=5 ;>=X.==@罵5)>>y%Rz/)=jРJ=L=Z>8=Dd>^x=yWyժ3>=}^'t==k"=>*>Uͽ=?/>Pvu :=d_>JG>=9=OW=ߧĎ>{?D>kھn5[J͒>; >đv=%=u=tL5>y>Qd=ͽձ=€E.P2Ľ{>C>>UdK=")eFS>~>ٚ د;zS=a>0= >lD>=Sb=]>j=o9D62TZ/>y ,>)=^=81سe>ks>ŷb;>Q=WW>Q&:>$==K݀>=D>ٓeגe*=~yգ4= #Zu*%1;6>.; c Ӱ ҽ>>b>/ƫn>y%3K=5> BBlD=Aρ=>=c>^;[?->B>q7>>==[>n< fj,>DU>0>;}͸l(/='fֽW?*>=+c}Ծy <˖>9>L7X%7xf=> 4Ž/e ŧR<& h*=r,>ق=*N>xer{|U;';I>gAҾ=3-0">5 tDK=\oh; eֶI:>?=?jJ >`:f˜>s;ؽz=k6>Ӕ> ;]9K>I tʘ<тH> =>c=60>}>=&=J>=:=&Ƽj&B>j>J =)nטN&> >.Y>C=.I!=g=ԴS-b=>¢ԓ; {'=Y%y&$<j> 1,JC>,g#=q >tz]l> >A={>Az>a0 >=ѼV]> {>a @=:>QP3BLV>>BK1>qP=U\=M>(f>,o>/:C˾>>Xz=>U 6,")lп<+>r>hB=}':&+%HXW`&$>`n$!>?Ig=J>=;> *>7 2= )mm>&=<+.= >4W< =8b>U> _̼>*o>j=g[ck3>h=ysH==D>ѓ>d^ >}=N6CpG>8>5"tYрu|PH4/sz>b>[ >e<c>=-$>c;P#> >a >n>6rvh5)=]'>;i!>]BD==D>=Nd=}=@u xƽjrq>Y<>1>I\9>š.L>hվ>C~Ib> >8Et+V>;ZK>r ">TI>!d(c">=A΂Z$S>v>}bf=Oi>M > }>Tᆽ羼<]޽=1_=V>Õ>Fm=;r#><,>=X= >H7C>Nܽ=k==ʷ=Wcv>=ci >o~/zn@#>* >+[R5>{> z(CA>0C=T:=<x q&L '6 =RAGu:6k?!>,/.پnJ>B$W~q{w<7XC1`4;L+]hм==G@=h!>* ?=yM+>Q?=>8>01<=]=<[B#>l^C===*',= P-N>x@?k@*>i(>>t(>Tc6W־;=lb=><>M?%=% Q=u>L=ZǾAka> >T8>/&pfO=x ޽$t>hھ1M="<>'*;>(>=L=~)½k`n$靾V>߆I}>  <=5>>=<[=F<Բ>Z>={/λq=m>]`h={J+E>>oȽ>>ۧ`,=H9<~>WL_`=^Um>> >1$\?xNq>ekxAPK)Tc>ټ-;V0Q=rNa>p=@r[L>8>?|0=wн<|=d{ W}XSf=# $轪8QUz(9>u=> >Ԅ>Xw=ԁ&=@@qU>tA>eA;2>Vm]>Ʒݽ>&<>Q;>w;q&=߱=7~;<=V%>หL#|J/4>/ؽ=F2>\ڽC=f=͖/z8>H>"4>mLx[[bvxX>=U߾ھTP>,R ?2rE>7-;H=kƝ;bZ 0<=;>=`>ciν>C@{C>=I=wr]>' =ȹ85Bb>ȇg f_a=?u=Vu >/)>%.>BI>v>ss׭>6>ŽK8n;%wuv>ڻ)񼆱t>ӻ=4fX<|1g[/OݽJ 8=Q.>j8(>Yy>P>u>֔(yC>}q>Hy',:VԢ= 31>r>(Pf>w*%;_=5>GuR=L=擾>>j%<6k*Y"MĪJ=o=Qz3{=W#=7[qݓ=L= 7:v^U=U{½>7) >Kƾ/eQ>/<)">1,=!pkἛ1i==ܪ*N%9>=?# MeDs= Ⱦ-q>8+>S== >Ô>jս?46A=> ?χ!m4z>W>[ '8ܼ=e=Wg>@=6JT =<5C..=[09"A:c>aX>U|56>nl4>R=瓜.Dh=rR^U~1⭙W>çl~'̫=dPM>~v'{ݼg>,Q=N^XO>0̻:-E]F=O[$=Jt >]>ܦ>J>U>'c;[s>>vQj>Ez>X =>C ӌQH%> ov2#9M<7b;/`:Nf >u->뮸,aOjh0>置x>{>"R˔A>p=L'dپ!/(P>L"=F=!k;>h=EKu*=j><j90>=f!2>վ)c95^B=<د;,zF>C:=h7< !C=wi=oR<=8sD6>:>4c[Zョv¿a.;V>n=I.<6žEZW>B>CW%>] >B>R=C܏~>V>|ΆqM+!V @Eg=)=na>& t=Db;'/jh=?)=-;[ g|B^>xiy'μ6c=d=hD>2=5C>tPn ;IE=[<>\rjx>@e>&X=巜<<-H==>*'S/Gtɽ/L1ZmĽ\9W/a>\>=a='t`p 3g;{7妒[/%>H?J=m?i<=]=/k<'Z=y>*=(9G[>>M94ʽg=@~n=0Qv!= 9=Q-=jߒأ=W'j>ƒ<7.>_I: ,=(`-=<>Y`= x>ݼA+ > 䌾%=u= cw/>آ?tP[Ҏ=eOj]>؜= J=>_<yT;T> u>=*˽}о{=A*j$(=5BBT>f9k>}>Ʉn+XR>t >=d=>a=> =U4T>KD)>Hޡge+RDQ>&?>|>Me [Ҽp m=!>M=c1=^^\z=6<%ܸNw>|>s<0y>P{)|P7庽+V<-ql!T=,U}/m3Ǽse7^զ!>d#f[CeP/L>[[=Vw:P>a=J>=#I(>.$tha!1=+fγ.n>!px؛i02Z#!󩽝8Q<ژX;< W&ִ2^=1&D&>-=lXfQ;F!>ْH<3> Q½oB=E=e3=!v=bF-_ۋxmjzW"ߴk 釾0.{ѧ={t1c>>6")}$><>‰Ĕ>A=Ӷh>9j= \ٛd¤;>P9=+)gP>@{>= =Ly9=]㲾A۽wCn>2|>>d=Z{o ><5=Y=V>j>H=0FoR>2 =L!i>T>m޽>s=MerVęSG}4HUlR>Me=)W WjW>8m=Y&>|ν?f(>Sl~\K]$ؽ/=l==gxQ `$2>8:l='>jq>a=]Wӽ`8>D=L4>m=t!n<+==>U=>o>j>WBP/f>\>,?> =mδ!=}%!>̲`==:NK1=g>n> >> 4>V=zgR瘽Q!żE{ ɽ =^=>9np=6<^ ۾d)>eн9 좼=B_^xK>bG5Jx3lo#=!>z3#H#>%>_t(%9]1=(a>p]4 =tr>-쓾uY=b:=% >@>d>h?1Ҳ,>,}PT>=^t>L=Ƚ-7o࿽ a>څ>=M==;~?'<7>gܾ!Pt>H̒ <6=@>5 w5<>Y\9=i~ ==׼>2 nHi= >҇ ?ZW\e>}(t=6=ђ=^=N}=`Xm(6=(ty_>͠<[C,펪=g-=@=>4=ڜFrMCz=>()>L8@vw>E>d>5V= > n=>QI#>*>;/f>\սk =FL_<v?*>R8>5R{=S<'(0X>>$ZcP{haž<^e@ܾ bnhڽ8<͒> >בd><,MTD4>==hE>8=i{2t;> mQ=[({_> `1k?"/ >=z=3Q>U>>=H> Rps>ﱾO=N[x>G='z ;+=읠;I;= 9p>ڼo>X⯾a>H=HrĴ= xaaR=]f@~6Cվ>ں=Аj C=R=E-ll>շ>c=C*>=;l>־ʾ>>T#3.۾z>>Q>>Y3N > P[ >?ܽ'нv>> fv=b>=>&f==1R>ͼV=E'B>zy˽u^ml3,_+>>z&= >͛Nf=랾:a ܾ:>>4}><~Dqmkc>=W=du>E>+ߪ A>a@>uWA=?e>Fm>l^>!W`y>3?>,z=!6{=ZeT<`H\?L=ǘv}P޾P<&=(IލU=\gʾI=y><>~%8=ܾȻZʽ:<'O>c>>\ZNʽRnnQ˨>\%ݏb>S<>=Kx>h Rqbv>+> >I 4>.7vN >t.><)>ê >iͽ=DGݤ=-/Vf=g%m>m>Yٽ=|_C>Kx=n=OȽsW9d=I=q=c <,>5=0Hܽca5EW*>H"Z=Zx!R>½>a>=$TcW>=uhm >&>=gv7*>>>c>y|1V<輾~b>>Ǿ}=F<{F=+=WNԼ>O=h>_.qLE.q> ӽhuwɾ>(!W8ýP>qbm=>Iħu׼t<=!]>L}BU=_Y=x> DS̼g=J5`jz=13=WN}GN>_9齼Az2>P0 @6Wp̃=ܜ>R>t =QT=o(؃MѽF>[n[󽶸>ɭwVu>[ *0it~  G -=V>Ã>AD>|c(=l=3"Z+>D=!:= >ms  =2= =zV<>ܩ=Ƿ<}=tI+ظZ=S= A>D=9Q=~Y =^TZ+,C=SݼAH>>K=dL#Z>KU+=^*>q'p>ȹz>PXX>">0tຽjU>_+=Q^D=},Η+6:=R>D>9C>| J;T+L<>|>l)~>2)&>Y>r>2.<7\Ԑ">=ѩ#>uY=tT-1;>"kmW#> $Fi =:+沷y|=Q?>uPIt=սo>M\ ޴=G=7iξ>WZa;M33> >t8>=x5=>{Ͼ$>O>=OK>6.>>gHT&ԽL>='7b *=e'K>[>>0=<R9Z3څ>i'҉}»>zH=Pe>oXR=d=9> l>e"6=Zj;0<ۨ=7~Ldh>=xf[QB>)=o}">Mty>G>-0I>ؤ=sA_"=!diYaپ^y:ҕ>T.E8>Ƒ[ /޷P?ʹ>hbվ=_>l;fVG>OJ$=<‹r%X2D!jqv>~;=k>dz|>0=%WA=Ғ>ҽg<-ʎ>і2ϵ>2 > ͠=5o>+?>\&> =N>=<" >ꐻ[n>!̝=; ^ >>jE3Ս^>bĽC>H8>C.׽&\=a=YyI< >y6=Y|>ғ= ?9=HO:ܻ?86ɮԍc=Ž$ >>>/A>_|>M>.>^}c?#Y>VgZ>UX0;Խ>T= 3(a)>HbFT=.>t=`>+D=̭^N=n=j>%!ԪS*==>䰳sG,>\=(<r><7[˾Ec}L=9e==W)^];k=FJI> ~Q=5=zD"'JǾ >fLT+=(>n_ >'k0q=zm>=¤+~=0=j>I="==ײҽluZ1=*?=QN;;<~߉>BŤBu\=Q^|^iL>P\>Sc>=>۽6Y=> 郾@>D$>Bor>Q>s6(>.X>H>hy]>===*$l=c#9x:Z"ꣾꂾ#WFƼϽ}|=a>u=>Xnړރ>T,>z =̾wvbr1j ?=U>>2?J!ߙJ$Hxx>= H>C>3>-rP> 1=P=,1L5r>>~;W>"C> ͽ)s@ >4.>Aח=e]=sJKbq= T|<=M=剾 Y;Y6=Dݠ=<>_ց>EZ=wW_>`2w\t>%ؾ+9>5Y[ǒ#eP=0m-|j}N((=w~=en"1$N>Ik>${)<~>\cʽNg&1K >\}dl>i>Ut >g>>P_j&K]==Oh<|=F\Q g>Hr>=7=+>s=w˾4>__]P Jz> >h><=Wf\ ܺb:>˸=%)Ͻ=ٔ >n<]$>dџ=e=Ck,ٜ!oJk(>74>1w<|>|>M>i7C[>>Sc>}>EE>==H> z;)->0>J񼜾_nI[e` >z>@U=s&!>K>F=Ϫn '6-P>wXBEUU^XΎp'J-=>^趾:Ƚ/>Av)r*S( 7>!= >R>$蛾Ni>$=SH>>>{>rKFt>fDs=T0f]= q>W0>(7>J(F.>3!';g>Yh=7i˾by5Y>ޤa;<݋>0l=yϽ ہ+9>i:@]͒=>&jH>IE=ŒG6^˳̾a=n=/>==={>>o#>>Q>}}>˽, >"=->%=puGàAϟ>pTP>-C֑&N=yI  YU_*ԘE==>[=(=Gh=O:=e5A>B>T!>v=AQ>Gj>Y.>sBD =>Er=O<yJ6=c>b胾q>lh [Y;&== V<~>݃>GK =gg[5l= %=¾c>,'=:8i>|/>=7Ǩ8)1>D$d>8{GQ>>as?=󸫽<"=DUʽX!R;=> +=3V%,==r`>ʑd$ie/>; S8_ڽKR A >=4q8`>X8qW3=*(z=g=y>c-?P8B=$>tR<~>I=`4>?ܼ@ň=/$=c^pn9\򷴽MP Q>0FM'p;=46=xq=L44>,Q>k>_=;&7:^73ѽ;@G>5=sq==hY=Hr>T=>J^=1=/<=ʽpK}`A> ټ 'Q ;= tJ0ɝxM=,>>@: =<49?3B=ʑ=G)=.>>`=3k&o)>߽s":=3R>=,�S>>E)>K==x=>y>4=Ҁ>ɼr7>Za=N>d>]>S;N=f=J>=D=Vsl> ;ּ'=!3<^a>^>< ^\0T׎➽X,0>TΟ>X`nB>< o#0y>!>۴I>:s>T9W0=d=8G[l~OA>i=lrJmCvj^!>f*z &*'1> >av> (>=c=/Ak>PX>XS݌=߿h= *D5rhKf]$ha>fQ~0=C?@׽0#r=)[~M&8D&`g>NE <D>ƒS.=X慬==߀l>տ[@~Z[r>÷o={2̼3H<ċ=:@= 4>f)(I>+^>GJT>>|GoD8=d;>:<֪?oCU=<&><Ž2>Se<=U ppXUW_l>:˾ý?>ܼ ܽ~P8>h]>=xc==YYǽg<>*Y=aϽow:sF6 >Ŭ辉Ve> S>@>tY=+>EHGY;[5>2='L>h%O>O͵;˾ʣ^R{>E#=2O<@v> >>0w< ({ǣ=90^ 4EϽJ=v<3;7Jƨ>^ j{=7ɔ=n#!>t`ʾSe=()>* ׽]?u&H>x_i¾ v>DRڀbq==گ=0=)X=r>WNG&N<.ƽ= <>]4P">͘=5*>==>R{:ܧ=iž-o#0eAR a2=Bý2"=R7>x=)5>qW=Rr޽6vVM>~= <_SO>y3̽G-<2=gS=y0˗=>BƎ=H>6'b<`l>r`>t]>.$'>* 67Q9a>u=ӏ=æ@@Žڢ=.=QgNjZ=-=ր>91Ei!>av=x>o>gk>_i߀9 ʆv)?/>WSp(ܽN>'S)t׽Z>iU{=*=tu)>JW=:Fs N^ TU,5)(2=*>ksǐ<=SH=vL >PA=l4<r<>q;Ͼ2_;=6e>x=S1>6YN=Et=uyā= 8R<>3!핈=[sN=IUZxnMؼ[LPn>W<TCi6z=Ȁ>R=(= >x>:=NMXNaV׼V>o./%>I۾_U; =Q>^!=+ J>fB[_L5*w I>y*0><?j$;L=gjl&g>==;ϫ=P>MI'6<->:IB>}o=nApW ,<\G# 佔 v:| ޲=?< yxk+3myy (l\_kC>??/>iA.2վ ̾&ɻ>S >aH9QϾW-%l b6Ea=^2>(o==Te=<bx=%>(sW7B.s>K=X=[j=9=6cD=G9=#4N>нF> OIr}HG>>Aya=<=yEg3> >(>>f=bv==Ɯg"wx=3-~ >^b~>~=@>>%8<i x>w> >^T8>{}cɼ>5>;n> >܅>zw½ȳK;mnsx|<<#uS=p=fùs> b7=G?Q,==>>Z=o>, =P[>FA;={=JNV)<ܾP>ǁ=%Cp>p0𼮐`X>]= @  <<]>8r>5{.";S=ZUm><ʒ/g>|=u-I>pGH>c}0MN=$?N!?]!>d> =i Sv7>^۽>?j*~=j='St ?#^Z>'N? =>"=G=%0;?N><薽ܧ{!=gl=FQ#>eI>?a?@;F=džؽ>&u><3>H>+<]㥍=N=fP7==B:e<^ =I =@h=mF~8 b\/c EPm;>:P=KGFL4ZQ>~Z `>ʅT=nI=>߽^''<> ̆Tp=8{)ޑ>">Y3GɼL>[=fw'Z=@=mC>BI=>|6=^g+i>fI?H$ȳ7nl[1='!Ø>剨<O辡-(㽜(>lR=>tv6^G;>'=>G>>=FཏN=F%n=x>f1 ގ=۾ޏ>X>ꗒ>l[)>=->S%X2*>h}\<K={|t>x>N>HSӉ>=ĉ>"׽hPaJkskR>P>#=bb=L>vM^=<@1>['>h >άsO>N4-BZ=>=}G>9AΞlb 0>=6*>|MU<<ˑcPc=w>h޾;P$Ύ)Y>=+`bĨ>\z=?V-1dش9~>\P=_;M>%n=0>#'< Ϡ>'/>>=ذ>U=HNq>ߨ3 Ͼ]o>4ܩ>d>Ɔ =j>{i>/c>(H>;>9&m+~7r=3L>stqvm0 >> >_st>P>~<7>f> ->B߽I;u=/PX>F=y=qMl>3=o;N>4\ZR"dWb== =ǽ" ^0 G~>zQ >UuB-=,6<N< >*;5KؼDp>z6M}嗽$&Lc>͛2E5>Gm;=b%P <彣;ô>ך>$>.[=d^>=P=`$މ>[>u=%+>bDM>yk=T߁>Â>& W6]C>=#>-N^VHO>`>3#>X薾̅MJXJ=>_>W<>B;>wkIwTn㊼ =>f^>:x +>0=Y='>-Q>`CJ=0=l{%o>%þDI>ǽtࡼƍ:!ӽE}>=.0X ;K=iռK=rR n<,K˻a>"=Z,E}<Їw(O>w=B=:݅o>5 >\=F3B/⽧{>]s="-X߽4FJ=x9>* csF<>-d&Dν3?#=4\=d@%m`"߾>*>K=D7>0 w kSV> WaM;Y>vr]'K]> =-m=/Ot> 7Y={Ý C>==QR<@MU>2X^[䒽ӭ>youG=iېՄ>(՞2< q=~T s-=jY4:O꒾U>V=ڮ==7 [>ξ= X>Y=+>m>x)=Hn EА2=X>`_Ϳ3>"? ;fV^H˭Y5=dgǽ oɴ!Mt=~gfzP>o<7?>dҧj:0=fm >;Qn&K%>^p>V:齧1)6dKgU>D,>Fvy~fnB;>:>Ra>qSIA6<>3޷#>>~-~3ln:'!=n<2<t>,B̾Uׯ;>,L>q=f>収> >^=TIvJ>U<>=N\=8?c>5#>Ը M= @ls==`&;=p=7<,ݐmѽOke]>\2>L=e7+~<=]\=`=>͇<_?>س(ba>)k>.U>.="=]>;fS=򓌽s=g>=~=t >~h>V%R`z4<w̽a8

    ȼ =L>%LnپN=CA=v =;WCܬ=S]=<:мo;>V=;>*> =&$Dl=9 =uSx$ B=>)w0=F~=6>"&[Ѯ꾭+:T>P=e6=ͽԘ=U>L>~쑼6|>m>rr<&=~uŽ?[ƻYu>};=GDM&vs{O=C= EQ)8>P ^>=l'=02b=uBeɽLl<-C%=)=D=^;- 𿽊PX=XԼ˄+Pِ>xI=棼<>$= e ELM>Y>315\> ˽lx> 9=Q>G>=>0NJS>O'>>ƽ촽Y\>䦾5dֻ>t ͐>fݽdo}RlRl&_6Z=p><ξdh=,<ý_n>Y#>x>#@>+ p0 H >ּR^G>Q>?k`ݽ䥾  +,<ʼ2g\RK>;y=3tf=/>yb<081>f>!=O :>xӱ<7F>>x>j%'D>`˽>z > S0G;j>9M=-X=3o>=χS>=iea>*=8{HT= '>0=ӟ)Q=_|k پD`o:w>8==Ki==at=.q>I>_9⾼' vH=3,i۵ý"f*kz>ƒ$|>41g#>wx=OQ἗G߽D/#۽;>}7[<䖾m>&j#w>UT>8>]֌ P>wDgDx&32>{=ؤ>*¬>>۶=Z= =ܫ_>&]ԥe>t< lm|o[ >3u9>̮>j<`Żj,>?rw=LսR:]4==ξ.>rSmp=o>̳λ;<>&i!nma:==KN?go<=1n=>P>UϽ9Їݦ=\=ƽG:=8:{M>= 0 =Op >'Y>+ Hy־$=L)GB=5(>[)s>=xܾ>H{DuٿEbn8>>==k{K=_t<m>O>!W=:t>(= y>=|d.WC=W=' ѽe=5{gQl>Hx̽8>*=o)[>Q#>R<Óc>ޢ=L 9>?<Θ>p{>ɥJ>̟o"O KW>I6\>J>ob=CT>g?k>[%>*ᚾ`=i==e:=~:>==(̽ɾlDi8~~>=3>PbM;>L3>n=J>cg(X9= +=rw>t`b[89o=Z岼^ Vc= d=E0&Ax>I)z׷9K>9ݽ<>$U3v;=Q%>1W$'9|t=<_@,L.>>’=J4}g'{O:=5g>^Ӥ)D>i>Fy>Nn40=P57;;n>aK=׿=p$:ah;W=<">־eg<=2=Z=eC:)AEH>;>r:7=>䘽S=?>&Zž =oO;m>=̽ 2s>u<#{ؘ>y>/ `=K m>y $b>+뼻 S=q>fOu8h+>䗽Ūju=P=>oHĕ5R!!U>ϫ>sPARy< <$=t`>aa=ڸ>>jдm((>6\V>.=߃=kHzK/>o&=,ʰQ<#>ɦ1>0V>ѹ`>k=~Ro:=R>l[j$4c>I>ɽOCg=B >.3=y<>]>.1=s=/G>S93ӽKv>EHz7&z>pK>Iɽ&Е=>Q!@>Q_?О=X>۽=\}݀=ƽSJ?ܾ݄_7<>㐌 = lf8p=2>"> =cg>]= , :=N=7۽e>v>ekUC8 >:5Ox>(oY>;d P>mk>V>aW>CE9AohY>iL>:>xVgy+aO:s;> ?>E->6F.}=^ <8z>!yY[&,$={>>->=ͽf>F/>"̳5=*v2K>{<,,>v)p4R>i@>X>?նԽ0;WA>u0>({Ͻd>o<#$>g=Ľ8^K˾xc=G9>k&&=2+_> <ߎ>Z?>  QGĢ PYtAL>}h>N>%> ܑ>O R>>6w=iBhp½–/I>V+ް=5>;-ie>f>W.>ګ<4]R^>ϡ<8>c>@ؽtNM[OGѸ=]s'>>Qi>~7>! ;E.x>'>CJ<=|a=1^pr>{gf=E=*=w>'L,=H@= =ug>s`>z\}>v=BX(ֽֽTF )=eXսzFE=>ɢ>ɳj^d? >[>AY6=!iN[Ń>>'A=Jɏ>X8=*ɼd$s=2/>lھ%S=>w =ɮCZA!=>z^>a&=N>^s4n4>uQ<#>MUK >F׮*>t)>񞂾#r]=g>5c<[r\=b=8==ԕj6!"zbX&.nF;1:Rm< >k>u̽m3T9Ws<:`=<=A ܢ^*=_Խn48>ydL nQ;j==cZ?v>< N۽>X,,wY]>Ҫ>= >H<}*l; w=s>Dc>B.<~ǽp=1>6>rڙ8R6=.н ==Lk=!$S >7ɻ<>e<=#ȾhnC^Y>B>φ>߾uܽƢ'ƽ> /E鼸oه;%Ri$HR=rY=>T> (>{긂洽VZ=s" >5=;;> >d&#<=>>,8bK$^((b=(n=k|>=p>|St>.dh>d<l=uh>4FtP->@J>3$fD> p>]>@=GW1 ?G*M-F>|ֽW˕4=Ll?=ǽu D\{>^_L=a!j> > j;>IF>T?<w8;>Ѿlg5zyp">ס>=)<$kvq=8dY>nD =>9vn=>`i"!|MͽfT>\=nl> V>&ymhv/== AC~'>&v>">^/>i=F_K&= w=G5>U>ݭ=h?i\־><|>2S'hwS[>:F>P>T=HeB>/DpMJW\<43 v#(I>޽L=eUȓ=>P"=8[{0_:";Z:>neh>S=8>={/.= =n#xS>6SUv;<߽-7<D=2^1zC 転 @>Uf)Dp*>tiG=Q3&>G4>a=<蒶>x>d⽘C|0 Կly䱰=ܳ9(=$B7><>֖><*'ɽ(?b->of|[=j((X.>.;(=Hdi"ӾU;|+?.i->ͽZ=>*>>;ij>Hu={>yBe>E>rib;w_+=t>LI?>ܼKS>0{rCxxK>jCȾA=naײ>c>Cs8Խ?)<[jآ>>LFbd.s<*={0#G=|u>:>ѠK;>\'V [DwĽS^|8> :=꼦 ;=$>kFk[[Z\>] >=:=23=c>Y>UJ>2Þ==<9Ž|DG>x5>)g==>;N6>3<!>)?rL=>2YI=@+>)Z=S(Yԏ>>r?:=#-w='B q3;_\=ƣ < ; =}=6#%>wF]o`VU۾2>֖wO=AHa<[J>n]½ 3( =V=Z<=ի>kb|]˼P>Ž?ų=S >*>Eڅ>G} >ƖH==bZF7<}coz>q<ݻUƓ>&I[>O`=į >=!>7=g>Bl>*=+`=2-d>^`:uD ⧚>g>zBaL7>uS)<ǽ =x=d!>dbŽt)Ľ=6<UΪ>>^>ko=V=hL=!">;<^̼lžux蕰h6>h=>D9>Zd>+Bn>e=@ھ;1)>{>D}<5`ո1D<ǧT=b=o=)>+>r^>o=^Dh=It=ʧwN>o?qW>b=#>Я '/{={=A>S;ͽ>,G8= >$`>M܁>t=x>N˱>}=j}\G2>6*><1>MOȄ>g [9pv|n[;8g=-̆>཯ >L>m>=Oq9=!R=>F>ay >I"W>ik\>vy>A<^?.>= >aTѽK>W%ɽ2/>=7.>>O>@a@\Ծ>'`;0ʍ=q1=Om ~<֓>/i Z:='䖾==+G><=%=4Z6coT> 8>Qt>S=QI>rœAN[. =V>҃ս\ < >XB޽U}>Ot]59JĽ&: >k>5:|RDl]= Ϲ="/Jt-=Eg>L.>u+㽈=x .=Aq@u=œ> jkγNx=6B=P Ёo]œ"c#0=r0w\F>b=k[S=YX#<.=L>D XϽM}vk>Dz<<3@-i.=OUKfK=}w >=դ8ֽs9n;Zm;͓>Zj "`]A=: v64nսsN>>ӽR&$=a< GE$d$ƍ>{ +>хpAٽD7> >sc >^>z?=0]>>5K=,>,OGj;>ob>ͽ T9ǣ<>=:>T=W8D>%>`v=Qgb;Ԯ=[R>,9f>UCr>BZT&>,>T >>e=!H=AcId>o2LYtWS@sN=t?==D㉾iiy>NU)½ > &ؾG>% >`=> =|>=& ->||F;>f)gDi;C '$Sߕ>Jn=>v?!9n>}绾kF=t¾A>>ν^=6=o>==>(vὑr>8NͽלXܤ=s>5]>xt5>=>w> X}> NE7ݬB>Ÿ>F><`i%<3Y3<>c=6@Rn))y>Gv>`o=)\+;FU=fu>k<,K] &Z>"=h >k8U>"Yɋ<[`bme/[idL=׾ =>rz>я>< MX>:ɽrH=^3<ɽdxsҨ>=t=ز!<'U輓8\N=G˽S<#>Z>=y<=#x%>>>*A#Őm));S6=. >RoÞ-=!F<=p">si>=evp{>Rs>8C潺e{˶ >3/tvIRԻTX> >*Ͻ^E>T>-/4U>>>xTZKG7=T$᫽)S'>{½H=MK<ض'=SZR>ҕ=[9ʫ=Ma>8h\ >)ys>ꉵ=/A>Fi=->R{*> y=T3w=>> JT><>6/<2̐7>R{19I=I>Kཷ^=IK9BS=>)=8 >Y W*>0=- ᅾ >=dI>V4> clj_^+t1L6>&O>=ю<濮>>l>ʗ$>g B1>wξһ#̆,> _,3=M>͓d.ؽdD=t=->)v=K=3 =XPK^־-iwD== W2Mj>}894>y>*!>t=65>={>+==e{0Z~%> =5>t<>ܞpgw<>;ҋ=~mo=WW>WvL@;e~>>ܴ>46g=$>qJ >-3罓c>R=x%K)><0e>k=lm >>̽`:m>; >=S6F,>uW;Z[͘b>U6̽===P >Z@ND>"hl7e>`B?oCnj#yf=ڠ:I@+ֽ彖b=8 >&|ז=b^j\ =z'=𦡾>SgY=(M>=b=`+dW==sm>A _bm>N>۽|B"=$Ψ=P6HMvoZ ˺F &|>Y2>'ŽG,> >VoNF=J=]w'򟽳`>c|HԽO<<{[Ma=Q%x|>K]>h =?=07m=(6>ΣB>ONH{¾s>/h{HNc>9RJ >9BT>mq42 >xS<:}ۥý0>%:S,>=u?=>Ľ_>D>$(>Ew> 6!Q>y>- u=/-S=ރG> S=t>vYJ>W>\:>=69h4=;=];+A\ =;u`C]5>2=JRzR>;5|>Rv;#[6MC>_ c>jh]ЛNZ::>A.꺝<=O>>t=}K>Ayt1+`ǽQc>܇1=c=[Al*>ߞ=;<EIƾ'zu=:=m>K<"zpB>s<~=g=]$4'4<$^>(Wo"ܽ'B%;)>2'uP>6tJ9>X=:XC=#>-N u->qm>~=AԢܾﯾO;:8^ =9*H=p9>½@I"=;ѬS Cgí=F!ug#=|?>%#38 >'== >Q,-c>KM>\E9{>d >/ݫ>󇾫=ObݼYZM>+>>J4=z\:U;=w<֝``Zj2z>ֻnN>f{Ⱦ&1a^X; ͖=?;=KgR=mֽ2>/ c* Ӈ>}>mI>C>V>><}=8oƼd=j:X==>L_r>W<ԟ7׸ume\PB''>>$GP^> ƾGA'@>$nǽCɷ>7R3нZބ=$,Z>G >c=z>֚>s=~>.RQ%=^MW>98=r>{=ʭ>Py=/ע=.1zšW&#W=>=ł=J>ʾ$@<~>57#_V(>'Nꞽv>E|%=ßyj>4<KC`l>*/>8rq~>yģ;Jx>&f>>_Ă=0x>>n>b>cW?l#^g/=†R<ڗ_>[> >÷=l4:\6~ނ>$clގ>?R>H=>#U>ƽ>@u=!=G- ]>ڏx ub=R~K, S>'>ƒ>.=e= =АN=E >=[>Js +>7=ˀt=|˽=檾`=1!<9f=~>B"i!b=0eY<}=>I>M^$bϼ=>b=暴i ٲ>= 8\!Kξ{ 8(5>߽St!=_<P<yd>&=dR=}{>a=`=S,> #I1&=@(Ǯt>kļ;ļt77m=ƺIh-6>+7"?>H8?}#$mge>Ra[>X[ a> k>Z{O=S*@轤>``=q6V='`c>2?i4-@VӅd@W=]=f*=`m;H=ZQ=J=l>^=)F>lp5>$N`8=$-j{>{>"Q9_&M̙ܽ>ٽ)>FU&=k>."$=v>p.>ѽO'=A8=K<'N>%L@H≾; 6/jgq2?4;9>5>'T>|>YߔjMf= ѽm<+ C~ >=Pn=<@=m=޴5W >,=g6>ZƼ^Gq%f=q=22!AZ,?s,ƻqҴ=K!T 02DZ=2E=7=#>39Râ>ٽ3Qo>>+jPgƽB>=?Dkb>}S(W^>p/3>}E<V"=/>"BĽBs>"='1<(d=bE>DvL?72^Ⱦ~ƚ>l>=ݽv &ˆ׻s>%=O(>>)Nh/u]uAd >Yf!q->s齩>@>(%f>}E>L;;%ѶE>" jLcŝᡡ? "V>av=Xn<m;=ri>>p=/>u5v=O= >#TƽK>F18=0=b=f=>=4:MZ4=CU>E>U=hg0I/=m(>IVt,=Ȉ=2>x#G=q%2w־4=`=s>o=ZE;9u[ ><=} )>m%=ϝ> %SRr]c=>ұw===B=\{\}>8>ٽ@$>LW>IJ=+ :=k= o>Rf=HR> k>x%뾸 r+dC=aN>yor?I>!?=B3u=SC>jG>QC>7o=m z>>%zq8}ѽ.D>a>W*>~qA,׾?= H>+1>ˑU'ygBva=Zx=\mP*>Bhn=M}=| :!`E='[L>ԥWn>y{>%'>,a=Sdܫ٧Ky>= =;=K[E =Io=YSʾ,hϽ kM(>@p>kK;9=hd>6>#Z>JV* >%=5>V W#=c>d<>?0=y۽Q׽ >\j<>顠G>ۂ>Vg=>=P1A> A=Ļg>0+ݼh>n>S;¼ =׷^=_> >>AK>=ta>t JEݾ>T@>۾#>ͦ1X+뽴=>~]dx> ,ʽ=4ڜ=ue=Pq<Wx?$T><9BT>=2>lN:}Lu!=Ŷl>C>> >T@Ǎg>c>fE>ؼ˵+>05o->n;k==n*>=c>j><Ϋ3Z> 唾F=|/^no=àvz=T2=U=<"V=L (_7>>ab=_c =Ec;/E_ڙ=;W>$v]c;xGἹX&2)}*NBoսIqڽ*AHK>>b #>>2X>֌==/v ( >n`It>zG2=ܘ=T#> =H)z> FFxGZ( q.d>?ὦ> K6$g'eV>i:'z>eN>څ=C"V,v4=0>~=ف>,:ֳ=/ZҐA**uW>֌= ;9=OSAD=&/>w='e='Z;w=)=ĵ=ów~\>|z95%>>?\w >P>޽_lb=X >U>H>4$7>| Na>K>成N$==쇽;PC8ҽVK8s>===,ѽ @>.>g oN > <̽>~%= >1>=A=fӪ=s!@>=r,>.>CG>`=Lۉ==l|9zV>U3&*Ӿ Qy>ԯ^>R/=F=i*z9mv)> q9=>o=P//>=}%ª]W>:=MT1O>M>=?= u=Qۜ>[]=q>=ވS0>S:=d.ʼxA60ϫ= ]<5=Y>Z>WX=9K(>b>(ICK>pө>!>(A=_^=R5νѓ=n=VTK\=>?/>Jb>ڽ1>ȓ>=p^=[>X=s=yYlPDQ^_.*S>*߀=~7 >S +齈揽:>ZM<ľI)kl2h>a=_&I{ށ>=LutT>=wJ>`Q3PF>c=*_>= =v5r胾Le$3>N[X׽r>Ң<2=5f> >x$@> XDINоe=R#=})>n;<=g?!<F{Dk]<н>>\@.>$L>҈{̫>>">KZ=:;<Un| EOh\d?fپ5>}4O> 󦽑Ž&}=W>>ڔ>*k(M?H$>==mw=⟾N)Cȏ=f-4[}>N.=^N>F^U< =y޷l[>U& W?Q=Ks>dߐ=g;}>~NX=1[=)]Lok=f.zF>Z >FB> >|(>$᪼ )W{A@>8=ýt32a=<A>D)>Ծ>N>?/ =-Drٰ>Ṅv>4soM&>N|==7>xl!>,[c>> 7 >;zx3%$>u^>n% 9=v>F^> rO>CϼS=t,>;>V\h=c> };o?eA>;m=ډ=۽>o27=s?=>@hM;c׎J5=_ؽ^B>V K'>4tȽ=>{W>>==c>$ɭ=վ$پF=۽>YXƽ=7~c>K!=(;&>xp?D=]]=<[<}vV= 4 '0>n**DO~3Fo#>>H>B>7"?JUX=,d\br>C>3@=୾]O!=nN5=̙>\ubZz9>K=* =Tx/>| e>=R>GDP̾=|&(=a<bjƾ~C>=MwȼF >@N½f绾-׾D<40> f<*߽>JU> i=z?>] =R>aHݿ=,3n-`ӾY a8r>O?>5M+> += >ak0@< >tǾr5=@d>6U>kP>< >&Mr>~h&?kL>,&S=;c\|C=Ψl}=̾$5>ݽBm=$->X7a/F>Ľ=c3>=n>OJ=Ӕ>$g>sx>lvr=P=k5S(B<7$<Bܓf>>ݔÉ>oX> !x!=&r>F.,>/3>Fjd>ъ7:&Ѿۀ{=UR>>AU>QK>oGj=;M[SEg>M@WQڧ>U$&=IL>8Ht>ڇ>>8 !>O ޒ&>!!>uz >&'>˯P>Yt >ْ=T<ɲ#oH w7Z>H# t90@=;S>Z̈J>L>$+;>`Z<5oB{d\O"ͽp>ѾAӽ=M<[4[.>ܭ͌>>iBg=p>Y1M~=:>X+x>.4,>(a(sLC">/UDeN>~<}>->>:'=>UPAgXSQAJI>l`叼8Ƚ޽/;7R>vh<ҽU?Mq <K>G==>K8ɶ L>n덽^  u ~fu>q?\LM̞|>_т =3"=T׋sl=1=HRVaA =YԓD>k=c@4>U >;>.̝=4ޘ> `7j==TϾ5=rWz[>M>gE>?=r=ZI=77/#'> >Y΋> >?PX>>Rk;ѽ݋$=/F>l=ŷ>Zn>Scq"=;<|}=u\>p=I=lBrɹ=lA =o'Ϩzv>8<(V/>1f=>?>:=V>{|=p#>,l{>QwՔq>5Bh$%>YǾR$'%؍>>=p@>`vf=.{*>U =a Lw_=s]=v=; w>8=f=P%QὊV=>T1=P>>y \>dlRDڗIF>h>͚]>ٝ>5UuUͽ,o/=7:>=pР+ֽP>yZ>[>@8L=9I2H =:ýPUu>;zx(G>[5>r[UU>Ndf=x(<!⼉e՜=y>=u=-r>3 $P =ἶ#ZU=̲>=e =Z >x=G"c=D#v>m>I`:>آ7=0>^0->kj>=J-=b = >>ڹl>b>I>^߽퍾~K==@c7a:>X>ܛ䮜=KZ>W3ԅ=>"e>= =⣲=^-ɪX==rb>wQ=>J`lkx.a-=Q=>2`WQڽӾo: >/> .=Qʿ> >!yUO<=01=Yt>K\1>2x>5u<2DT2Jp=ѩ_> =U`n齃v;"5O> > d]]սtȊ>L=+0E5>P#=6B= *o)<2e=ǽ>0J=9PC//tx`>|';LjFbf&>1{=G>,o&*>>Nu)}:^>T>Z'U>>r=9C>u< Sҽ7;`[ء~ycž]X0K>3=03>Yf>>ÿ-B<&U,z ?(>u=Uf>j]=USe $>*>a>vh=*JOf=<.l7?-<=.i\W>Ȭ>cd>[.>VQ ;!)>E;$f=[c=:v>ؾ]>#z7R;?=<>_X1F<9r6c=:{8 k=(=-,k5>Z=`'?=> (>k" --Q>> >#2>n?Ƥ>=稾rɽVy>K>GV=/>,/9W 4xޣU4>loZzsbRUͻQ/>>ݾ]➽&>=H>>خ u>S=j->< d>b̓< y>-=Eg>68>76k=7BMp>0>6p=\ҡ <x>caHБ!><ʫ^>=bV{^>u|DV>EF>lh]Py>Ër=mܼJb>lTn>S <Żk D<>K:=P<ýp 0=&>}m= =),*p=f8i>=PҼA<@<ľCn=7sR~=.=sm=S=>l%>ã>֢B=6[>=1yF><=F~i0`=5=qrM/u>ĸ= f_>j<>>LRa=+>ѻZ :E=ܽ =J6=a/Ӿ[>8$֟=7>A& P:>N=,D>r;I=HC>>=O>߇=,t`%i(P=Ō=#ܤ58Y,>xB>=\>Eὓ n>!ֽ>C{q>Ga4 T=>->=v/;ݽO >>~bӽ=~=7MY>= 9>$>wXay?M4e$Q]=^1C F=>=o>BmLF6>4$NNC>s(=ɹ\>+;q X͊)ʱ)\T>"*=J>a?QGW>#K;x>{)+UJɉڧX=Nvؑt>%Qw>?>7={^姱gC҇b==W.>ۼa(>d(<ߌ ;`@:>A1&R꼞C8>VI>1?>;}TH!ǽ9h>ˌ>>໢d;d>jŢ;>=c!>M[ƫ>m@]ls=5v>;^=7@W=>z>>O;< >lW,Nv1˽=l;>_> >5ÍC׽IJ^\<͖>{;7J̾/컽=]=q >ᠾ<#=)x ⯜={-=rUֽkPȽ۾,A>fd=f>DX`>9O>'Ϟ>=E|8=="=.5=[s<օ1{udﺸu.>=ҋ%&| V =U>p Ɵ=ۣ>׼>;;<=Ea=ہ=g%n&c~<23Q>˟=Rz>f=8='T?>&;R-"K=U > se"=)x>l K>(HNm>(>>M>D>>ߔ=D>GyUB/K> Fn}9=B#zNR_>iə[>m=jf>j< X>v>?f=$hTվʊ@&/>?/=_]^;>ͽi,`>c)>7=Ľ 4>PQ>Z'p+ =Vu2u~=W=Ss=ou<s,>]">{>p*> =>ax&i=Jl9= =(2=HJ4 >'6=e.= =8>cn=Ey?6\!=+=liF=*>>84=\=e-;@=O>>u>A> z#}Lʼa=z ;>T~Ž=Jr=\,Գ= #>W=-)%x<@t,W;s<'fiLnٽ5%_=)t>*=9`A;<HN b=8 ==iB>x(6F(> ي<)蕽S孽m>5 &>IC>>(u#>)=>9=}ɲ=/Y8U <>:=bJ=w>=D=h=>Dmee^=ϥ=vUiKz=z+V^<n)nn>&-_$>tɽӼr =:>9 <>z ޲><u=lW>#>ى>Q@qz>Ė~>0B: 5>o‹c9N>}ɽO\=&S{$p >L?42׼..`>L>>=y5=2YJ0V< t:L3!=Y˂?T[>h>X<=#>Y ={>ZA>/=Q_="]ӽ>*ٯ8>$J >c>=Tb=d >)A=sn:x>ꖽ,& ==-Ͻ ۽.<=T>sP4;,#>>>c~>J>|t=03ft>vJ>A=ϥt$F>;s]>>"6%pM1e>']>+v=X>>FY1wfH>X>gȠ>l=;JX >s=;>71dic; ~#{>=CgY >#=>`>0F>0l=7MBdCǑ>++S)=~:D=%?h>(=謢&iYU=5=S'8i>ዛ>Ӎ=ذsH<èPD<";;eSRpy;We>}x<#Z=F==k==&>CC\L\94e*>%~==o| >r>6#iľ佺<(9=|F(Qy>o=>e.=.c>=,>>(Bq>.t=h=m>Udp > >`{)b5z+> 3'*c%@L!uS*+>f>t== p>%n={ e>s^=Gz>l=M=ӷ2>TIk=Ծ9>ڡS!0=(\t=t==2Ux=4>p=Cy=ݺ.a>a >5Z,D>=vMO=O<Շ>f]?>i=B4u=}>{M˾;MA0U,>s^>-:w/ag=9> =0d,=$%=4y>vL =dpo>?.>n=[7||>ߜ=~wн-> >"=3,>:*><_=$=͸E=={+=l=} =G*[~a->x=1#>D>>󼿾%M=L=(> r=&`y > d4>U V>u4@4>d>ʜ-u/?;;=i+> %=>4 >U>=J=(=K6 ,$^l>쁽 pFk ;ŽTi>='ƾԨ˽\ ?KA>gZ=Ɂj>">8c=~t=nf>Üс>{=;=1'=&%>+G/HʣvӼeXr=a(>^R=Tɾb;=9E-ϓg>bgZ?>쐠b>>u0Љ0[v:(a>\ = ɼ.ž}>={G .Dg9=9LK>΄۽>U>x>_ =@>,bT= >~=FI>-K¾4~ s0sl6B=w-ׄWy=K=<ӽ{>0;BHe!=G>^yi м89]eI꽶Ʀ>zU>3Ig~X>]=rF=>p7(<$>G.aIhĸE!0<H>О䤍CeN>-"G=VKPU=>>=]S>eJ1޾R=@u=I]{t>Q>'=}TZE>[=sR>O>N`>u&o9=,+=;V F>0?>j:roǾk?GU>F>Rn>&N=9D3k'>н >C=5>0a>G|!>20z=X*\Z׽O=P*A>p.ͽS =c>7)C=J:>A@=$ngӾ5j=ڍ=hk%Ͼ>AӚ>f}=>[y>6@dN=0W=_>N;>x wl=۸7f=jaܽ? >jy>B79 3bfc8>aw=˘`y>tu0>L>۟>&=Xٷ"fbRxQ\>Pg>>+V>S"{=.(/d>gur>h|>t@٠B v<|>1z!=R==6cX;?y ">`l,8e >re> >=qbVͼ=j1+.B r1KB>/̽w#}vSU=P!>X=>Y-P=|#=h> kX=n4T^e>cx71vա<ĝg>2==|L>z >R==G˽J =j$:$gW߀ow><}>dkWn:xb-.p=vL+^l)>PԤ>2Y>F|>a@=1ן|3ýz>9;>06=CƽBy߾*l>_ >۽t(e{5E=U Tc=]>>|> V >M;<~kc]>r.>D=I>od>ղ۽ >MXGΦ vJ Z8>2jG=-1Y>N$=J>n U'>rz=W^o=97r>FЌe>~-=Mmt_>L_ȾzH#< Y=ߚ>p>B=wv%1l=G,=>$>q>1Ǭ>bx*=`KV=jܽg>85(~=lv`<><=t@ jgfIT=vf=0 T0j3 >=}Z>vT$>>j>=ѽ;uc><"===q=1>."9<ԏUa?>;=ő>"&>*=v n R>^u<1 q>S^L"b"V7ǟ~F=j>7>ǃ'>vO8$oz=z>'4_ms=="a@A>X> oHGs<>r>X#=ʦ=ZPֽGQܽzUI&U3>+VsP3>bUrl[.S>ż&mxs,E;ٽ=}K=;>K̽nk5L,|%bw=ɆB>, ?a=>J&>k*I;0>Ss&' PºI)=>hN=hO=ʺ=H%= >= Q{>Dl?̾=JH=iݽ̷;<>N=k>DWFN;'>dƇ8C)=NvK>$NkV=!'^A>ʽ4C >lyƾ>>|̾;U8eR=.= 6>/,x>Ռdu>Vμ%>!E>G<3>9Vښ=>?f>E?Ry<>;>Q>EH>l˽nKv,q>@A==L==)&>f=Ƀ?U=>ߔv> >mSXf FX< =e;:h(34>_>[žpF>\B t@1>3ǚ=,>y >/?c>a<>>ͩ=6K>\>=N|:V)̻ؽ0==P<9:c>ajJf=ܿƾhȾS>V2>i>&_:L41>r>Lc><->a>M5=h=J>oz>$T>e5v]O˼շ>VUPѽ崇Iڽ~C>F`^5P=v?`>>\ HB8'UV)\>tL^1hɽ"i_D l>zBTCnm>`ξr>@6=uν-~$GR==ʀR?Y+>\r>c>(Ҽ>+ϓ8>=>U=f;J](d>=->U>o+8q>>H?>"g=.=i=qҽy+R=;-0<=x>^>]>!> =W$ qxS>$P>>=hƻ }e<<&d) !>Rټ þ!>G>`=") =A=Q{c(>gpF>՘=i=g8_׾ =MUwӾa9=g= ߾qJ23 >Hg>Yh=!y<X=;ߡU>lh==QY=e=apb>\j]1>I>b.8w[ fC>W=@V(=A>Ӿ֔> Y>½iW=Լ =*d>>JaK0=>37!7={

    :>L >vD>ٙ'>5)=M=3 >=\:Ͻ>٭6iþ(>>T>׀O>&&>DO>[^=> .=謁G$>AN=H>=Y +5X.C>>gkB<.2< > L=>W_>—+u.>D>}>;sb=JQ=>[4}?<Br=7w<{;\=Hm.̽U0E9=fq>0>iK\>=A<̾ &>\>-T>r)UZ 1= =E.>R=̺= ==)>u>g%XV>椬ѷ }1p=ʽ">g>Y{+^uD6":cېh-=tw=3K=A-gok>8>mK c=v#=HK%{= kV>B+̽!2b=;+v:.=K;OpKq ~>caE>G@䐽Co;b>qf;>kZA)_Efd^3gȴgSb>=>)]<>?<>s">K8u Z;Ϗ]m$>j<=^'G<Ͻmz1c >ȳ=Or>‹>C6>4C2yX==uF@68=K;Y;[ߪ=}!@Iǽui=&I1>}{¾{P?}> 4>+9fPj>nW=G=Cwِc;/%er>P?<%G>pܼ%(>#1;I;_>%=?Ϙ><> >3jR>ӱ@!=.T=̲-D>>=TKٽ](>><=%=P+q$;K߾==#>j=]4>Ɓ>>$>4K=V=mE= ==P{~>i,>$<}W[:==>P@T?===ڽHt\=û(ʱ=HrB(̊:F5(ǰ% ͙> f~=Cb=[L% )*)W=>[ eož A=j<W>>==!c=྅gVb)#::ш'% >}>d, r`>Iv>tR׻r7>c ?=0'=Ms>qXVO=YF> Vɂ _>>D<Ȅ='O >)>5=ؔ=k#^=4S>.iM>0>'=*/=X! ><9=>>tZ>n(=K>>cNQ%X>lӅ>1>>uҼ=7,F=پ>u!cP> Sc>ֻ>i=?A6>_;ФA>[* > 3>k=t>+7Dy>C4|>n=i<3'>FCo>F<=d>gL{սݯ->2q=$`>>你/|"='(k8ѻZ%=0B>wC>a>>)==#=K$g淾-lڻD=1o/U>4~=PR=LqU=w.iƤ>As> x >Y{=wtѽ$(>>)Q>ƌ>w=Ÿ<4+=|29YI=,/F 0z,>c=!w=wV>rY<_;A>JB>a>>w >3s,j᯽'P<|>>m9=<1>y2:);،r=>;>ZýO_>_>܉<^g=iH<тfyP;V>Pe>>?=+M=O0YF 9i7>S=)=2a =XaLV}Y%;v==}Id F>2+^DxZ ۽F>Eh=p=]e<.> >P?aVIpXýLp!<=R]9F=.J-(= ;>P{>M=Z><ɠ_=>Fr̟= ]P׽l .6=u;(=>6MA;7)-Y`>iŽxd>/j'"> y=ʗ->@.=#w.z> =XRIz8 @=q%+/>*;V]=^=xD~*-=Y==]7FQ~ >-Ľ=: T"=>; <(?j>::˔qi=a^>n09?,5>Yj> =1 =I=c. ʸ*X=*+)>[h>@I>=jeݽǾh>w /n>4_uMR><׼Z=_=ݫ>0m; Ѽ{<6>u=C5=;.@=;i̼=>ɾo>J BUʽ="d=j==h0e96>|=4O>=Yiw><˽w>п 0=&I>ޕÝ޽===>c@=⺠u;=x=|z"ϣSֽNR_>>@G>D=P} R>dp+g>@#>c;=WS>>PGtF=#>‰>gFl}ٔ=D">Vn:>S>Մ$>AL>_þӶ=H=V92ܹiNՄ?=?Ω=Jkƽ Q啾w݁=] ۽@>sS*vkJ{>j:b?yK= ivW5x>':PO>婾ك>WWPU>(a<>:mE;Gr=r>-=d>-Z=q=7y>8ࣽ/0> >%gw+>vSҫj>>)>5־3></=|=''#<~;z^x1#>6=?=J6 >7]`۽|/>ü VRs%?m+<⽉+=L=(뱽6XU>$?-C7T41Q2Ғ=䂻>F>=v4{drVEs!eW>=ֽ W=>^< >> >h Y'3=*>Io|!Z>^> ѼP&P V=u֒3>e:Mh)>\H=<*=I=4J?4x.= ׼5~+=׾{y>iS<>;ؑ>.P>œkH-,N>_= Zȳ[# +>ͺ>̼H>͛J=plܽ`#8>Ⱦ4 k{=8>Eg9>=ʂw>̨uX񄽧=>>ɪ?Y%#>0Q6mD>uNc3>t=7?¾ +DW>n="~B޼Q>yw3f>>ЎG<aW[qM=s===?>:,>8"f< >bC>>;>=硨<~]Vk=P"S& ,j<>' FzLd=[Ӿ&;[K\l%=섰k=.>U7U'=_ 8>(=ܘw>=W\>=VV=ʒ>Lb<_mCkYJ=;?=k>>=I*}=d=;/F=1=νŮ>gY>u>hr=AD>]G_=J 6)Ξ?G=cPF<*(>{W,ss=9]y]>o> 2-ǫB@><ႻT>W>>rj,7<?+=HI ":[;>3>O./ro=xZ#'>;S1Q=S:=++o>h>s {L Υ /@<(9f/QJ >@>È,^>.ɬ;>fqѾC>G(zI =8=^ӏ>R> ?0=pk9>P8B|lLdP3><0 >Eh4Z> >F5>@ؽ4m>ǒ=I >Ӌ=Wr>p>km<,!>{@ "$Ľ̽v>=s=+>'>\r>'K|>=+=%[a<퐽7轷=~/k8=1d>~=2 ͼH :1y<7i>>PN轨ml8O>BsQ>kﯽ1Q?>Oh'_C>G>ݽ+=0~>ah>EL@ֽNpP;̽"H=,>@5m֡;y,<>۽}i>׾s >y0=J=vy?8>p>%.Hov0+!йnT>,=t>o(>$Ľ><4$>}>>>k=ᚯ>4h>>uK>L>ma>2{=9=X@G>I _*R``f!e$>j6>G>P=_k=>tO>1kAG> >'A>9 EZx`W>Ma<,xPJ">f=:K>1Ei>{ֻ릚4>"s=MA=PƼzq@ݼj=2>Vj{=F> >EБ>mm佾p=p={lԽk>I}.VR#i꼡#=[Q> O]O Ҕ=ؙ= 戼9dDbF=fU½]ؤ>~׾o=l>kDx=Eýf*=< >C=*]>wM}_>y۫Ƚ=o>婼=>=C-V>LM>O=S*;j=)Z:!x5:Q>;cC7>">_%ǽ>{U=1>2KA C >c>y=׮ۻ<8>I~ϼ Z>&=K^>pm7K&=jR;~~>w=[ُ; m=oQ=#&>VYFYl$>J i>_ A >zFK>][( >=oP=Z(-L;GӼꖻw#LvT>ʥ`C>'6xq^e7Dpt= >xB>6Zp==bk5s'6>=A+ >@ĽE1%>y=P\ f3醾%>\>>箒ppz=kG>G>$=O=H&=$c>)~<>o= wY==m>AbRo<^<½pZ>->.W;8>),>6># >84v9=n=_?Z<[jZ==7C=8k?ֻ&>yJz==Q= >^sk+-=L>Og>=i+˂m=g=\5>cPb>WM+1@ܽ == Ӿd=\:*>;<&>6;DL䝽#== =Y&O>+>&dǾڽIv=qO,=/:w*=#C F᤿|[->[I&;-&>朽3Q<"d>蜾B;>LC>O=>>xfiMa?>HQQ.< 9_"?!ݼS%G>=a=>]!:>>>n^Yq=۽W>>fs?>=u,;>c< C$mS=r>١>>+==>dHRO>}Stľb&gU=*.`b6,4$,\q(>d>n53r>|#pzN>r ?: hŭ>2z>+==?>y 5ؾ]-?=ڼ*H>>OgX=@Z>9`;=_ k=aw=Co>#&F<+=Uw5Ȋ>>>c>3 ? >$W=dE;=׽6i/5==Lo~ٽb V>0\`>uH%=>-kA>>ŇU|<;*=L\=K4=3 > ?>DHڐ,>=g!=cfK#= ̤K>=is >H >xUmϻc3>e<;>>>ZI:=Խ|=y<.줽/ľs4<^j<]°= ;<)e3=>Q=y<:/+gB\Cײ.=kL=Rls&t>>Fg=)>E>(HO/=dսb=.w>喼>3a>o[C='">,.>~?THq&-Z>!iM?W>/$lR G>s >cwC">=¾F/߽rfgZپs=s+9s=B>>ŻH; 6I>EK¾d>>v=ٞb>hw!>أ=>PD ;R= =\=LNѾC>D>ےDӉֽk>qGy=a>ËV$>┾ >d7M<Ї>8>f=e>8@<:=R>PK~C*=&޽{1:;+*>݁=K>iHҋ=H5>F gm=ߙۑ>]r>O >ҾN0:="epM!=Ŀ>7>7ھUU3;QԀ>F>#ZM>:=V>w$X)}=>YF=qy(=nj==><s>`Yh ;=ףT8n=e> ?=>ӽ AK˽lN`v?>&0+xٽ\KZ >.%1Ȑ>Z$>lL=3S<>^=I6.> )>WLc>Ar1<!;Ͷw>&-}>X >IځI=s=$> O=f>ӌxN>¡y=;~(yh`֪CV/>eagž>s"_=Dt;R;?@kg>hー;gs>kKɄ@>-= ^.c% ʽ =:iu>>=CҲE<K=Y]O=WC>*ֺ =Nb=h־ >V>}< R>-̼p)>ͅ>=˾amN ol?a(=u*ѽ7=.>C> \:Ҡ>j> 5>9U yȾ h=P=f;>?O>=0V>:<S<䠽FA ߒ]=!?}>s7=<=jJ=> w>(9/ќ:n%5 :>>@U=?Ҳ;HN]ҍ>=<= >64>5=ŽJGJ}>U[gE6u[-瘢yS:XT>lu >gg>'B :; X,Bu<->kװb\~>6BtO=fϽ ׭0T[="0%2B><{;`c":V>=5LǽAՅ>x&¶r>]Гѽ*\pK=>;>5Uy=>}%>0È5<,y-@^HI>4MԽCy=g">qqDs>k>!&穽^T> =>̝L'꫽]C>b(~=*>V==zu(>·=y>(>ATw>lμp==U=;ZO=;v>=?w_>A>d⎽q];~U> =~AC>,V(=ſN>jTg>G><m#95>߼ƽk!G>q>G+>">u=h>=ǽsc>>\=^ƽ?߽G?xJf<7PK= 1c=@s.bt=4 >},8>OW=g*==`@?m/=3[=Z?>8Y>A29 >\;0>d=V=8d)=zH>=|q&Ya>e>Oj;8y>d?>,C>ݽ:=\= )oBսƻ W=_>~<`cU>l7R(léC('>gX ξ>? uʉ>`,==80>cW=>V =g><>ݙ>W3>䶤>)&>ɤh1=*#ǽөu=-R< Q^:#6 >jm<ܶ>3A@=PD=O=լ=f >='SK>/} W8ܛ<7>5=Cޝ>½=Gȼ}='>"> -:ͷ>d;f=U=ثM>B=j>ۀi+dHM>;ko>^>> =N=$ >$>H!N~=@==Ji<5 f{h>^j:A־Œ=ԄQ>>Sy=lJ>ee>>Y r>Ǘ<=4==>ٗ=# 'Na5G>D#=bK>VC}%1>Ļ8>' o> >Y=EKJ:ͽj@=Ɩ;>ы;㽝׼6Kܽs?9=Tp>ʂJn^H$7>9ݽr=؛gn6>C'>I싾BK>jvc=3=-1>*@=s><~х>f[><`l>-V=Q7>/>7J=%ཽ'^>w!Cg~<=6\tԾ4%>'Ж<2;8 x>ȸ>0r=2%` =(=S>ۢ=ys>` ==oPO=A"W>31;J eh>ϾӂtqK:6>tI=_u;rq=ap=(j>z>)JOtf=ֽhZ>=;>VTz4瑽L 8>硽;?;]z(,n>>hX=#>xC,==1=,=aB==߽=D_]=u>M@Q{>E>p$>F=Wz=k=bFP4SYJ}^ 0 >YϽz;=_=OO==H@g3r>X3= G>Ҙ>3ٔ>jѽ# >l֩8>uS\=>‚>(Њٶ=e;򹯽J>?NCi$b>÷>i~>Gx=zl=zE:K_)\=>J F=}>Ah><TT 0>Q˼iUK=X >6٠p(<^x½m=RY a7P >!bW\==Q>)_>庼py>`B>)>L`q:(=$w>9&>罻>5RNPrŒU.2~>sýў==Q_>ڿ=8\W氾V'= =~fP>}>@q ,=n(ly=O6>~_=Ct>s>ž6C/z?>̾;6+Æ=צ=T¾%>"n=>'b?b_>S5>KP=+==6w~'>8ŧ>|:VC=k8>W=->U=Eɲn(ʧ=M=>Ό>LTp>C=8>v.\O>wX;>1VZ>Rzx>׽:rz>A=>rUC=78Rȼf<h=}C> z=PG`k,&=9>!u>$>5g=q>m=:5&7ov)g#ݰ=Ӹ=&8,>n=ݐc9p;)>N-=Z$?>X>4<,}=x= VڽŇ=|1>n>u3j&?HH 2>:>/Hc>KN>Z=kaa=,᧾5=h7>^.N {=|g¬ݲCv M=UR>]=Kξ->bl+hO>b>߽`=S>t;X>6>g>|=D>ID=|>ٟkm!@< h>9ie<:> z UG>:&= >2D= P=t>>Mݣ>/u'6!=&>=/>콝µ=>꨽lt]>R^=ݗڕU͍8>Иh>^ ?, >R>V˻O ?tK6?k>R=æH6C=s|թT?.>_=U$W>Bv>b=w"=~d>74<>=Ӿ >f"l\==>`Av=s=LC>~73Mb==1 Ug>Iլ+^eѠ.>y=Q>vҽŒ>崊<[=>Mh>9|b=ƴ=;eG>cb=!i>@a?P>w?7=G&d;-{e:׽{=~DC`0=6G뭽jԤ> mvCj=%?h<Á ze#Is=lM<>G[> Kl+>r=!>`>#='{>|w4>ѾĎQ>=ٓ *~&iEim==CŻ>Ba=R >u+0=Jr=v%肔$><:<ޛ>-K}%>sz'ܽ<俽X[j&=25|=gY/-j^<좘Y9<ʾNU>>it&ZeSJ>J'^]Nq=Q>{/=e^>8W$ 2JAiA: U")< Q|,W׾ώ)ۜ=P>*>+>jTDt">yA>>j\$= >d=d>e>)-=^aw=Y\(O>>Z=\AŽ>F=e X$>{Xm>1"<2=j"T= +Q>,>[H@>>y:G>0L>_>_,>rp41k5o<>!>E@9PP?e=Hb&.ؼV]a=Tayx>_L>t˷7:>`ƾEsc>>='=[揆=8T`>C=aw="EA<T >y>C K7,j_t=_>A^=9U=^>#>݌>|=۽6[z>&4'n>g N>/8>dP=Ec>'=Gv&>'>-@e<*s4=ŗ>j ?L8&=2>>>> ]4;=ؼ>=;ؽ84Gm=JĞ4#3˽nf>DWn>\">L>'K@=}<d{ZӾ= L>H=*N>}=>>*>}( >ĵ>U">1>.QLۥ=἖>]hQd }uwy> Խ*> y /D=.@=uOƒ/^1>* l;>t>F'Ǧg>x9 3v<3d>s>R=FREq>G \>i.f>Ӓ`>>Vb`8&Ɣn>";vpξP>@>o 1O0S/=s21\b)"A=&rA|>4;>Q>޶*> *>=,㽷$<: PM>l9==Pe>nE>Eth"4>c=*mN"I==[;<'=tE =>>⪽;_e<B==fL6=đؽ`=f5=^>Qp>x>8L>>f Ysj>^9ۼ푽D(=ګ<`lkH=1NJqqݒaJM#E} >ν+ ..>F/>&M6{>H=?X>ICo>/>V.==2>>6k[.\ѽ.<P޽;= J㽝[Pj5j=,XнH'=vFHL&ZLK> ۟d(>[T(=[vSV4`ξyC>] ;E=HCc=b;|ct9=nཿ=q=|~<< >ES=M26< $H>> =}2i>X ޾b%ٽ4w3>*=!>낾u-=%̽.%>{:>?[>g2==-@/>lh >@=T=u;>\>[4<2U>f=*f=ԁ>Xͽq NJ04=_5=S>`>s>1d=,ʽB+>>_Za>jp=dp.ӏ,<* =x>tfׅ>{>6$}ѽ9ɹ=%=L0 }@\Lg>+_u>=a.>#YId&7p>xݽk2_>uۆ>d[~s-==* rhT>hZC<: I>u=] ^>NM@=PD>X= P=c>,f,F><@S>hU㽢ӀK=o=>O< !>:=0Y>[ >bI>&K>jB >=ʾ >Me>$T}=5'﫻~gֽS޽r1P۞/@>L=}=R?! '>C>)C=TC$XedD>۽>L8>7{r=m5Bs> ٬ũ=]=^= &\>G_FW=oD=`n=RAs=p>P=S(`=D6>> >j>=*4>\bȽ&=>u=+=l>p=9>Af>4=.>U㽦=M>Z>?>=vX!=Q >oϾ,<ソ==t$>>B+>:ý3ѕ##♾*= ھz(g:wѾbBto'B_=>׭R |ֽ[%|>I=h=Y0>k=ҋ=^$=0?U72hإ==$o =3ž<CϾ,>h>_S>8 %cV>M>aj=7u>>Ҿ<[ >q= =t">= 2٭=«w=>Z>t>u>[t>G=0J >‘><,<0@ n>gl; JQϣ)@>8>Z= =Д>W!$ >Q>B>|` >E}>s脻Yǽ />Id=֫S>{ľrN=*R>M\>z>/bPH >x5>p;]q>cp> =G"b> S6P=FRǼL>8=k =FP`.~ p̭>oC8 vK=ͽLټk? p.>1|>4N׆=H۽"=Kf.>8?&=K>dK !OP=ꃾoŽkM>B2=VU>9н^->{=t> k> Z=LVLi;t>>%v <@>ȣ9jc?"󭽽"=24<{o̩4=8/>O< >k>q0>5V=u:>Ld:>^7?<9 v>K)>4>6>~6>+kV˾ Vٽز;YԼ{tN>>=`&> P=>~$4=΢U&Y- MfztRǽ}=|=ʏ`>V=h8F=E.MY=Q=[^>P@ɼsC>}ؗ>kay߽WpI=P"=g>=q|=Mz>{l+>Ž{3 >x U+ǛF>-Ƚգe==O>}Խ ) ޾>d;+m=g[=}y[>3<@2<\'M>XMuȼ|Th=O]yP%mVk>3&"8Q[>侇W==΍>=M>=&>&z>\Y&=qadCs>56=$Լ._t=E>W ;=`=fݽͣ7>C=lE0-sqx2> prxwƽ-={;F5^?6g>)s p={<ؽ <!#=λ ,*<ٻl,x;ՑLd8=+W>=)7u=Ũ>C>: 7>=>_=i>D@4S> c=x3%g`i>\7M= p>A/ı=ŹJ;Om oe&=!Ѽ%:Ͼ;>BKSiO>C[}a#9>p7>>>Y#ER :=KTx־M=Hl^<>'T -A==%V <=0 q^>>.v>D]h<=eɽ˪=g:∏%auٲ =p<<>:2h@~=¾P>{{s=>Tk>lоt>>Ž!=j9p)>Ωx0=xl>nO=>t=4>">2>LK=zBZ>s? _؃=6=>Ѝg꘾=U/>[V>Pfk>ٕн>J;_=`>ϽWjpw=j=*=/Ϯ׾}>-=D=^ͽְ=<^',w¾)錽ښq0=U>=P>+P;`7:>=;_p@\y`ǽ>&PP>|,=˙=g'=go>Gcq'+>$hh >>u]/=@zCW > ? ;ԧa=ɸ= <0W>؃2={/"b𳄽=^>pw~oSM=c+ȍ:)b ?@">q='f=Zc턫,>3h>0>,á;xe'>ς⍏>W=$d4 ]C^Jžp$>,˾y|3 Œk'>J\<5w/Y8ů4;=)3>JAJ=$qa@T>VԀ=,>X9>(~GD=aрj\=C>>>>c>>%"虛p >S&JV>dx=RS==L><>E!n=8>BZ\;=6=S$>lҾ@w><9gl=)>zj#>z鸑XO<r V+=<Fu<0>ci>>)=B=fI=E߲1=3= >T%&?C=ы=a=載:ľz=o >>6)>.>j~>>_َLŽ+=Xێ>|BMC>G7nSv=.&>#$Qvnͦ> >=0@->64>jq<3=. -6?>y$ d&>]<8̽͹=~`"D`HB=@N=b麍I6Myp>U>d]m>JH1fX6q>k~=eD<$vR>q{=>ހ><Ғ,lL>H>k-h=vI>2>D Ǹ>G>\>=V\=dꭾ9{6vi>Y>!]оx>ͽQ)>㋽>&Q=.;8[kͼ6ݝ>̂+>i](,엽@Q@>-׌/>y>6e/ 5=iY+PgcK3j4>\OEp{=]ttp=g!T=.7->؏>>N >>$=o$ >N>0MX4>vz >>Q@>I>=AȾK+ZG>=(ID'';)>7슽a <4>Ph>ߍ<>hb>L?<̡>"s I>2؀2 >͸<"6v?/b;ke:=#uS<\BDmtEsjj><>ʼpZ>V= cT->&>mO=6 > f +kO`h+Ǽ6j=Eg>f>vf>#I>(MݽX=_̌!+>n8>C>=<&{f =u:5=!AȽ;z!=N=U:`fie?•=52>a٬==WE>z>ievKT=1A:>̟/SR=ϧgžR=2zr>k&^b2=D<1;u=ksxڣc=1;n=OPQ>?>\ErV< f#=]:0U>ay >惋ۿ=36>)=D>y>_=\p ֽτཏ4>O$.>}u>g>t`k>>?5r[QS>ҽa=u0ܘ>;[Sg;!7f>zXq>` >K%齗>k霽c> h>V?=f6:콿==uFΙ;D>xy >H&֌>=[D>=G sT>a4LB>̝ =07>ŐrⅾN3>Ti>OPP =xH=c)9>[U=}2J ۡ>L tIԽ ~>8>d_:>.% 澠Zvɽ=`w<>Q> -罈b9~vF>j0o½Nw:/saJ@>w>-\ݻWc@be{ľwC=R̽FL< K6]>>=.%yhJ<S>s!/6BGTF\C>Ec-|ھbs_>7Y>t>#<ͺ= [{=S=>)3=6X#>=$=<{m>g>Pa>bž&3=]uz)S˻=9 "=&b=)4;ͻ½1Xv S=<>GX"@IKMV=:Rн>]>8BWܥ= O<0=<>9Xm:46BּΕX->p>=B7L>rY>V>^>>vlt`u>"u>c=q75ʾ-k7o=2G5AEY\=r=ߖ>dq>8^T>&M@Ʈfmn'݁=B5$>H P w=C>&Ž<"BFlϊ>毙=>ML>ܽ>K8Q<ؽ?4=z]= f>3{D2upQ=HJ==$͂\z*[! >VtB>Fҽ) =&;>A<>>D>;{88>qýYO=;j7=:>kʹ="=p( >y=7>k>:>z=2L<;->b0)xh=ۏ.>Ul*j>Ib=]3G>.Gw>`(8G=cW=N=3?-w=I>㒽|A=-2e >TL]=0>e@X2+os >>Wt>+yA{>:{=Cl=/;;e=O ԍ%=15>K >Cv Lf>q="`O">UA#ɝ>pR?^]S{hi1$翼!52>Ӿ]<~<ظ$U?=3=<>iU;OtZFQxb<=t<0꾷=d;4i==.="=R4={vԽHpV>XT>0GZR颀;:>;ʆ>5yԽ&>@=A<3~>MU=Z>8>0s=Mze>q>t>Wm[9kFP=4>;#>Ⱦ=Z>w,=>t*=X'>_O>L=q>6=ZL==4p"cp=Xֽ* 5td>Z>~?_I>S=(*OU>"R2=K$ >7ɽa1>5=|{mIݾ]=<=6=w=܌M"m>tseŽb=X=r=X<ƛF>2J̾e>R> 6:X==.f>#;q`ƨQ=8G =?a><,½ TG}'>G>>J9ﻞՈ䚽>Ec> =!=L"JP˰>JS>8>"W> >1מּ=jg@x 3:=nH>뽱޾:b>~3=߾2ܷkezӽ½N>x/K>=k>>c>]Xt# D}H>jǾZOh(u񙑾C2>d=z<@4۽{ >q>M>iiЉH!<$=N^¥>(QT><>iu8_>=ƾ:+۽Q;>7ng3=\W>r%>:t}8 ?und=/=Yi⾱ɔ>C$>fwM> _>\++t1I:KA>D>{$6Xn/j-=D>-X쾯=_W=P>%¦񽐉X>>0\>ě>=9U ?R'>;=>D >|J=%>uW=ްٽI,V>s>KrDt=|뙽Į(: >_>ߒ=+R9>٩> }+ق yW:FEXʾ^>> G>=z@Uʾ?Ɵ;P= x 6m=%F߾D= >}V=HP>!fN>׭)<>Eb;>U }'=N=7p=7>yK1󏾎)=G=?x[> i^]/>.">E#E1>kFvG^a>l2D>#u=-,0>6> ?>휽lAF=EK{ 1 ?S'dɽ`>GkՑ>&]Uً>(=>L=v>|`1Om=@-< 2Mƽ>Z־z֋> 3= <{_ >*L@TW;<==^k=d >ᅾjm>o>)=Bh> "r3<ȇ=C=?FQ@n>K='g?ԧ3[V.,a$?bȑ^,=W`>럽)9>4;N=ý9&>ΰ;>m=z<[@w@=tCJ->!D>+>j`V5^p"NF=k@>qR7g1>yM=7)j=x=L==d=D>`>ݏ<*o>b=2K,>=3C>>=?L/R=[>s@=Mjf>.Mkx5|Ң=[>̇5j>rrܽLVT,>J/>]="[>- F=Z F>K]>؄*=V =+I>Ͻ>M=yO=[>oifj>$Xz=ۍ=>dBV@=:>=韾Aa|BM>>vXvX>[8=KU&Ÿ_>͊>Kǽ&NҾ==}=A:OP?ɽ_O =fYF43Sk3==t<<5u##i>~K,>xڤoU=8!ϡ>}罸=q\=T>~=M>H l꽯>z<5 ׾J>Kd=!" -B6>IKqp>tټ >*Y6>*<^C<,Ζ޽ұ>LO_<㽒>>!R:>z >lӽg;}>ý=×>s=Ӊ'+sF>H<(+9>;Ϟ>PDt1 +=&Q,׼8$1K:/<|@w=t=k=M=>C9Ռ>0>~ Eҽb=B7P=d=/X=,5$ɦ̞=Ų`8>U@@><\t>ny5~ =_ -=[G~9Dн0 ;Lꄾ8>9V>~5% >PFA;hr徰PV>>=,8diߍ2DU<,1=ٻ֔=p4> >&|4$=u<`=N.J=d:=( W=`؛=b;}cm߽mR5>p>=[йPD>0CN= ?l =5>,=,B|>ͱ@Qε>:4>uнW@tq=b=X]x>vj׽=)|Äܯ6='>~Wm!=(cV.=Yp;FEڽƾ,PH~`&ZEUʼ S5n#=]V> ;>}=4=3 &>羢Wd=5 >=q->u2<=uH"B=3>m6<="7>ZM>Ѽ>}s >IF*4>=ľ"\&c>OvR>`cL>Ә3q:! >y 0<]U>k~==gp=#^ i(I"&<<@ZcM>V>M,+=J}X=!c=B=[;=)=a>08>:y3>^gƃއE>e>T< N>==cA^ s=doz=f=N>'T#>J=G=!>fDؽLu.%}qu\<̏*g<N>3cv4<=NmO}=M=<)hv.=C=h>g=kk>g>>ip/>=e1\>ǽ=յ<]Q>sIDw;>~>rH:ou)|Q=0=>OcҘϽ޽aL=$M=L9Қj >w/GK9S҃=>~wԽB>.>,=>˼>_>dk +>z5Gp`ϭ>],>m=@lB@"pe>z68B>a>> ;@k=͜Ͼ\wtx>[$pZ>;c6ǽ;>u\]:8>\q=3>wz>|>+OS>݉=Ԕ=><8-pA>轾?N7>ž ng2?2j@/=<`>0=7_=5^>ҽK><̼2=CwqM?>&< _ b[6Z==Bk<>04<"}0B=<6_>uE*>т>ft,ؾbj>o>cm >Z?<4Z=]y.x=L$>Wk=as>b~^<@y=Jؽ m#><[>C=G <<ڶ;5>'ugؘJ3r#>f;-v> >08>8y9;=⦅ l퟽=PJ3\>r=\~>"x>Yf>Td!={ǯy<,=J߫=@2M8,>5>~=M<2+H>Xq'>X=漈A>5S>L3=#|=%;>B=8A=(3]TzPa>=A<<>Jʘ`͘Ǽ-+<>Y>!>Mx->w0>>tՕy =8'>5=s=PU>r>喽(U>@,>v=>O==&"#(>}@=[b;>U8Kq>Mx<ҽI>Dž=w=V>=a>\!q6½ֻK<ؾ(=HG>k>֮l= ,~:w$})=m=^wO 8>ځ>3ZMA>4kKԗ8Yn=>;g=|)>-Q> ?qc<=C=x%>=u>>t>?=(,cq9M½O_7>/="} 3>+<9}㾧s<,pCO?z>Γ>^=S>},<ƃ>Փ=ϖ*B5 ?M=#Ⱦ#.9s>jO]oscLؔR==߷O>&dE `I5ꈽ4i==]1C= ? >Tܽ@|=qM>LY=F71AtI>']/>8*>_ >>6TT>dT>!Ȳ2e=Oo>=44ɽO>>=(ۼa=$;5we >'>;gZ>=K==>G>¾>;>i =>>9>R=>ҥ.]= ߳<6=dS>h>Igg>$=K.z<>}<]p;n= mZ3C>ySν,ǃ%=XY>ѻܱ=H|=7D;o;Q"~ =!n<`U>ҁx%>>!|LQ>?9> gA=6j>@<>S>: x>( D>:[@>>gǹ >Pb=~jsuw=>ͽeNn=-=$=U5>ƱFE>Z>#i_=Kb`=,?[>)l=`G>,o>>I)%'>hƣ=Bpl`>J{>U>Cي>8J=üy컽#;o=v@F&u5>z>`O3k6ǽс<>w=W>r9gGMŽ>u/=Ik&ݾ+IԼ-sGjmP;>YK$>2}=;>{,и:>6xK=tn~rFA>S.I=Q34( <>'=n=><>I>1>Q֢Jn!~=M=h>qb>-*> %D$v td.> ݏ>`?Ў=im:>fh,>S=F=i> ݽ< >>G=ke6=)ýϥ1>Z n->-~W A׽= :>ӾoF=Xe@s=f=HTЉ0>Gvz#!>`/ýTb=XՄݽ:ݾ(ŵsi>Z֫=j>>ʞ>tu>k1$.>OǾі+>f n#[>e>ه:k(g>??N>PbƝ>xQJ<[9>m>gĞE>񌾿 8gͽMDt0?>7<+=|<=">i_<&:>лN>1^l>|{n6C=>_jN>SwN>̾*`=>l:Q>:gm)ľDo-QXF=;ɽ8=>R>=l>⼏ȾYL=bd;=bι>zEdQcԋc<]#=~\' ']oE=%>وV<<>@S>rG>h=Sr>Ⱦ8;x>Shy=bO@9>\V=M?<;7wվ6l==6q>>+z>N%>*>j20Mp냾|̾5$Cn=sê=E(ؽU>6<9.=$Jg>3;o۠<)>R=ܱ%>T>M6>=#{  FmJS}=d''`u>nS?=N%XR0˕>r_[>=7W>1Y@DTov@=@>x)8>=J=Z>>>2l@QC>!t>ҿD>=Z*=F0p">+>$w ~=-སZ=f\=A>=s"ʽ^{8]=ѧ+><лyٷ=/bN㵽W o>>e<(̽[`>$9[Q=`N=F t#>w2>F"]8mY>R =>Y>22>=CʽRz>ю>,>=Ҿ߼7w>EQC^^M>ϑ؀Ͼ%>=4>,+*Ⱦn=`>DG;=L=Pϼ؏F=>|쁽ÉC=y:>.[wnHh=T>_t̜_l:=S= Q\==+i>R=?hA:70?w>J_>)xJon>>_>[!9u;2P=mwT/>=*3=$t> Z5i+E[=Jq=&9ǃ @" =>>At=VR#>ΣG=tϽb06J5>p5>d>Tj>s>Ͼ~=v=T>/8>Bڏ>GHj>sI>̩=JZC<^/&]C>RX-mD}W<:h@=-罸׽mr=X=׾L?=szIL>TЪ+q>^j/>T>3c>rνu\0==>-7< ڽ 4>;95>(o>z={aPy>cj=yw9Vg\> 3T= DƼƆ=ǔ>iJ> r<)>ûoOS=埾 ^>䐒><?=eWYj>+U>ok%Ȋ!> Ӽp"=tq1>m=0d!>= =3>oiO;j^=>} 4==P()<-=O=NS=.>ܮ>q=Y2.==["&3>+Z=>1 ;^ۍZ 6 >Ԕ-j4q>~>>=B=]&gli> >vVP{d=H >ͣO=ri>#<)ͼ]ϐ$ټ;2g>=&iž,>><.yu=VO҇>>k>7ǭ<9]-c>N[*>7Éa=VԽ>6>}>B3=J=H[ҝ=FD!ZԼ=?<*"<'@^V@,?=v˼5E>5D>>=B>dt=$== ->Z>oоo>16 =3{?] >>m>9*=?=ˋ< ]>K);S>hB=">E> <(3<*;!>>1>?#>m>=99V ྅?=~?dB?/=ٽZjR>m=̽hiWw;>sI>t˽ ]V>>j҉>]R@:=~T=tɐ=c<&>3>nv`#4=1>C=iF>/ >a{fLG=q>e)>Eھ3>DH<2=1>P>8{;%9{+ ?2d>Q}>qOjKٝ>Pծ4>3r=J=D%={ =Ƥa>]YLvB b?轣Y1/$< >{=!>~Z9> B>kx=a>%c<>]E=E=ҵ>iq>y>U:ℽDf= Ԡ>DU>42߷č=K>:>NɃ>N>?># Q>^O8_/ؾÄW0ɽ9>)c=Q]=>nR=>At6T(,0>$1퐜<1wS/>*@>>jʽWF=>O$;VH>͙>%Ct2<>r>#>xZ]X><=_=[}=5Z> !=7>.k2?ٻ^}=<(=>S= o 'u.4ELEX>>,%*&þ>>2i<;Y=-FsKŽM#& tq}?׽> ><A`>a~;/H;+=\3k_9\T">wHQ=1=YJL6Ͼ>Ԯ;:Cl>>n=>1#K?`S0ʼ&8>ؾ W>V=뚏>X=z=ز2. ?%=ӽ>=[>ڽPZ?ś bBAm{3=(l)>pSE/k>= > >t/&ԽG?h>X(z={;P>>: =Yн8\hm|"s1>^x=s= ϩ 䳎>{=/O=0k>= =q= &6T>袒=j D fkv9m>u>:>xIei0[M=&MJ>Hd W \>Rƒ>>M3,>ҽDV>+|<ݦ==G;=zDk=헆=$2QX+>v>W>k#>kCih=Y#V>7'`BgC>:==>O=|=*>쎽oBQR3h]=kT$>:>+@=H>"[E׽2=^̳l؀=DoM>:Zu>A ΍u᝾mWk/O7ȏ> =Ba-J>X ¹=aY>a S=EU>d$>,m'>6}>4#144=4J<=hi=J=d=N =v8Pݾ/=%>==5=Ƞ >+-%B>fU>Xǫ=B/y=5<*>>Ng+ܩ=N)>=Bнד> =G@U>$*8䓽)^qq(>=>X=wV(>t)><(B=|3 >Hqa{B=P3>ݙ=Qu;LK=Tk>U{==%G>.C= wA =/;Ȏb :e8ξ⃾4Ոev>?Ⱦѽl=̛; =>tF>&!>f">|>=˽Zx>a#"=hA@D<Ť>j&> S=Ľ="=Ǻ=Y~M%5=qmRC@=R軫XTG>>'Ab>2Yj<#ϽÕ=>">V6z  k=i;F< %n|<|>suK=?=>9=d=f0et;"n5PȽYD$>UO'(i:>H=G½gu\5=Uj=6=Ki=<ܽ6=L*ui=/l8>J]=qN>4ip>GԨݼ4(#ȟW2>E[>au7>\RK9=QV\(>Q[4q<%<ȕI>h`7'#7[5*M/|>1sF=L>=0?<$=o=;B >$ٽ_ܽ=[½*c2>~}>̧콱=ǽ{=>=(F,X?bбӚ`x>U{Z= f=e3>,Tֻ{Y4>լP혁>1,=]><{=:P=k5SI=X=ٷ2u>=e{"=%rF>A-D= >z:><7>;{q3罠 i)>>!p={>꽷==оBK>Ƿ6?cat<#>2vNjl=b>>M>yC L &![$a>=4 ܍=#@,>S,ὓo]=;S;f#B>f~=7;CzzӇ>g=1-Z:,[3Ύ>4OؽJk<б+L<S@>; A=u>hV>,C= <<$=>OVU&@bNn>&i<H$==>N>@! =n>!J:w= L0f3DieyN>u{=d >Z̳>I>1M>MT=~>>ǽW=!>pA>Q$Jü=gr>)Է= >S=M#нդ[6=N>2%=MFR>bJ>7T"#>1Yo[>v='߽i=};D>l[%=Q(>@==/Ԟ>~=ݚ~.=Gm19[(i;^hH_>uN> =Tb>7;> C<=p>F6>PT;>Hg23=5=uwX=葾٦=<ʅidWz<>d=t{%R>+KхW̅W۽,HAǾ}7>鶓>,F֚Ѽ{t=A2=ɾR>8 (٥=`A¾]L#]lvڽ;>C Fs[=[!vD>oj>n5L>NWx,=V=f׭=O |=dP=%aST=叜w(>@=5~>M=i=yy꽪x3>!;qGX#$&]ӑӾ7⳽-I>ݺZ=`=ܞ>B~>>1ჼ˽,vӼkRG<k=E= '* %H>퍾j_'==>˯5g=K>>4b0 R=v>[(<tM=| rU)>;>ľ޾Sgjo<,>A=0=  3sYJ0!>,>}KY >Z=N <(2>YG׃=8*>bI<So3)># >,ͼԽ㵗=>RIH=J =9yw@]D>ϻ[G=j=M>k>=8#=z=5 5>V>R[(^b>\>Yl>)>=2YV}%=< >(3C=!2 E> >beA>* =1GWg=[ < <'̰=ලQ>SZs=hDy =WUMP=!U゚a/@t>z?)%R>M=R:2nR

    MA=B>Xp>3>>l$>@=,>8>>3ތ>߄(=>.==C;>K=+(=jGn2L:=-<\从(>Ï ϽlP6 x6>:>el<=ڏX'<̽6>=H>Qa>P>444|@=?n>de=D8;ʸǼ_[>}P>g9> +}=>*x|=.B ۾g>s ]h>p=K=>o=Y]# ^>/wW">I==>^b->*#>=h {7tMgMU&qt>vKiB>d+t)*B>S">7Ž侧ES>>[=۳>ژ޼)>rE޽1 -Cu{==>d kΥ>V>Đ (zӼ0Zu><>~T>־o=RĽa>|:;Ao>_>^>~װI굾ƶkCY޼Uμ!jiNs S$GR= k>k&>||=*:i=G=>>N> o>>jT> 4>ϗ7>sD>͘= >B<սcϽފ|>N{w>?Y$Ӽ_ŋ*=^m>ua*=-<}=%/<=h-=ApN>}>o=-Zv>MZJ>RTBA>.>>= J$.=g>q9Dd|=ܼ1>θe>|t.>> *>2嫾}>Xi=iLBEn@=b؁ý >\>S/'>z=;᩾o{5܁N3>K>nx>=h=7!:>F>7Y3o >#j>89]>(n=@V>v>Cľd #(>lBMt[J;T Mhz b=⿮>X8>'齵`=>a=>=b_Ww>@=d=;XG>2뽰x>]>ԟA>%>G> u>҄1s>W>tUdRirKA;a>_&ĽFJ> .>Ar6of>>KPZ>սEcо(bU Vr>4>v> =J6=A>X;=`X=>m!x>Y=,!L[;Np&>W@t7->o-31|=Z{^h\>c=>ɣ B>U= >=Qq=i3:>@ē=jȶ==_>>Ёtf=blE<(> nRe/>=? =qB>@TG>ӟ>T>%Ň>ˉ1ɂzpƎ>zG))>=)lS>1ȿ'K;f=7=|1cN>Z>>-] m>̎-W>tǡ=Tp=VOQSXe0"=m>U>t'R>>C١{.y£z JM[p2=TN2 >@9AWИZJ<BW 7]> o>䆛0ˆ~=Ƥ>@SL=i=K>vPN>&2>@*5½ >ُ=]7>:;Z21>"=g=>{>n>>k>-x>n߆X>3_Hxξ±6 6(UJ!U?)=; Pl,>Y>I>r>=rυCc~z>s=>><7ݞ>~l>TԽ*mE;>b34uZ *)Ӽi<W=sDK>û lL >IX>&W?<<ͼ!&>f14 hF>c>:u"<7U;=8`;>t>;)ý".> =uш,@D^\:Z^D>Ov<#$ߚH>*=Y>=-f>-<&Q=a?'|>#>a=n#>>Q;8=>k?>`Cf>fjtHN;r=i> ]Lҽ/믾[}w09|M>k3>w1A>Ӽ>s>k >=٭=!MP>[0=8p>.;Suy|>>Lܽ$=$f>;\>Cm:YN;=cJib&}Yٗ>˫[8ֽS=>_>E,<>:M8*>Τ=@>+* A>s;>}4ٞ$ G\$b'.>H.W1;Ks˽>x!> >=#>+ҽbmMZ9#>Mz=྽K(XH>TNU)>j<+z4ł͂=H=G==327W=<1>Q>2`Y>]A:2eb>5)9,#>>;h&6=P~\<Ɋޡ[&>c=x>ożW?_;>;+>X3>1QԽ ̭72܊׺Kps(<_=U>R>ʗ(0>@Ӯ. &=.--=M>jqDyd=|}彳 =P#?LJ>B1>tL*>:==߾5 >^L>G0=vf!-o >Y1̾>R ?]+j >V=9>/dͽRH-vY>a>jv>_?VľF9=,߽0jK=Bxe$$ʞ b> ;9*l==q'=֑>V4}>\wY]S>ളM5>>i>8=i\ꣾa=QL>!SN>>>A f>].K\Q!G_=uDվD=S\7G=3f==>n>񭷽OBˆ>=qwf/M>W h>\31\P_< =1>MRQ>tP=-{H>>3ZB =>=e[>i (>K>6>◾wPveb+?<+p,=AW]=☰={lN=XKo ?ȧ3=ԿH>z>§=W>Q ?/G>!> =O= N`8=o);g<" +<+i>\>;7V>[>J?(Ψ=O;[_s1jVH;į\:!Ѿ7i>4!> >ν=siw>)~=`>XΓ=޽`> >QU=ZTŬ=B>x>q8>Cƃ=]<{>; 軽"!Y<[< p t<5ᄉ2>bc=¾Xpy$#>`a>0E9'G>j> 1= Ȁ>L= x>D_>Q;>=M*PgȠr>=Ev羡{2>l~>=0= ǽ>_=&%>s> =G=>=p>vMc8? *?>j>,a?T=෈0> (D>(@>֧#O<> |;n@>Z0d ?87m0>@}ƾzM8>>Mf>|}GZ>I=W18=fu]=^U>``>?>u=@[?A>QXY>0>=>>O?$>R$K]xG$ܽiB?>ءA=p>=haK >b> >*=ե7<R>.\n4ig<y>+>аּ&<;VϾb<>`=J82>ᐦ=܆X>7c>V=eQ= ==y ޻rd4k=aN>1>i=ld>i>&aN<9;:>s>=2]ʽ< SMX>ν:~ O<> J=3h,ۧ@=c5Kl> =۝g>lo᜽kyf&c;!>v>5Խy>ؙE>->'ip+7QԼp>K=Y={0>tz=<<˸/>A潋 >a>L=_>X>(9o@==fMU=8+3>>{>?kk> 'a>߇AϽǽU=8y`>ոw=@㾗nP)K =W=zi<9V%l>n-=׻>&T>gD=?^=E?ޏ<̾C˾=A>i>㉌\=ghy>Z==!d3> ^>rE> j38Յ=`^=>W\>)b;XEO(- =@ k=ٌ[>:W'=g>r̼ྺ<3W=J=5]=>7{>>|=^>LmJ⼴==}@%K>t-_4=="]ƾT=>Ԯ>\>Z]{H>Rž쌾#Ё>.t>C>\u"}=z6m_>aR>^>>*8> Y<^ri}< >n>Q=&KV>swR%q>\6>=S`>'xۚ=U#A= R\综e<!: -Kb=j>̽q WȾP->Q}xr=[=0L> >e>њu[<{nH>ԇ>dyH><֢==q= ~=> I >|r<;>z<>.:=_r>֤>a>UǼN>&}R<J:O=Ci=0c>nv%>vKּQz$}Sݐ=H=E>H =]sżR=ND=٧>n> Vx--~>L>\0`u>=>DP>x= )>n&=jm8MM s=&h3Pм>o\GL>LԽ7H=;.?;ٳ>SA;D; >"qҺpRĒ=`z5>?<нu>U>x=a!%4Q=><9=Q<::=+=m>wQ -QC=#W-8==z= b> u=gx=>>>g |>=]>>&V>'` Y>\帽Yܽ_v8yq= o>P=F>[=l>O,$U\>;FS>[>!i= %Y A`>yYy=J=9>3^="~Q>>nk=dkν/~='>_(1~=<ѵ==У=Zfd>>[,>T=Ƭ+X/6/TT4>T<Ν > >/=UQ= U>}ؽu>>81ʙB=p2m?8 Xƣe=,,>&tm_2hpx>J žŀ0>2[/Lْ㿽$;Rľ|Z&:DWF>a(.ϽGZ!>nԽ>XӾA>;=%`ћ =[<2ľ =pͤ><_5%=3:=q"=?=+=G,>;d>%z=P]q >l2;=?̥=i>=ts{=ma?>={(=~(խ'=\1~=*.G>.$>,ӽQ;u*x$>E] >p >>Pz= ?D *t>M,6L : M׿Sؽ>lߝ<]h>"c9>4Q>`>?>=4=>}Y@>He&Oh2Er>ػ4fY`>>Y(>^>)>N>pYO>Nf=<:1>TE=U]뻴KWj>2r=֡כ=NϽ_P>>=>ͫ #'<f. ~ʽd=TsrmRh⽳bP<d.(WJy=U>(=cu>v>ּ<.d>彞Z>*G<ؽ>'>)D!>qN&=&1 #A}=:׽>=;">)/=>"= ! ?*==ܭ\kk>x>4a>^h<I>W>=D:>{=Tq-=Vo>Rgw>>f=$=k~`=)=㮾,}-jļB+>;A< >*>P=T<< vR>໓]\Cwý=_4=dP>>8t==mN)>L `m]qjA |<(=?ǾG>_? m=&}`?L.WP^N>1<\a>>;ҋ='= =)p5>7ì =A?>>3y >Bf>=mR=qe>*?e=]V>mҒ>EP־>=@T+>>b9= >= a%7Nv=D=4=Q=g'>/ؚ>0>]|>>>ȍ%|>pSz<+>1%><< >8*Y<><$/ F<W>Я>8=Ƽ6>v17=mّI>ѽ'A>>iyp>9,uL= l,z,j=ӑ=P>16p'󌒾 ?=oK=~=U2EG>Jgi}7w<[>>s<o=T\۝ݽ51y> +=fE=|>%==н#T=Ui㾦Z>tm#>+"(XGqwٽwX C=^0?7μ W>kF>~=;Z搽ꭽk>? >+ɾ̽ c>vaA>\D>UjL=p=~9K>>ċ<Ͻw@n#=`-=^X W>͝;>Л>Y;s<=Et>OGD*=wr=?I>s[w]<@%>==k>}%Y=+vCaml=x>9G?>|= B3>yCY>I>W>c)YTK&ZU-,Ns.=@)=NU< S4>]=Z>!߽5>Nh̒==:D5* i&fl=BNtv'/ H%n{ս >#R=P=)I"G=>,=S>@?=Lkо7XV>>Aq>`?uGb#@s=׽+\. j+>Ε<=a)=N޼?%=;(L4x<D\>b>^˷=#p>.>="e}>Yy="]6Fž!ɛG=A;ECw=6=Z>:=};=_c='@l *=v즽=Ħy=>N D.D`K<bdy2>֕in >LC=I{cO=1̕=)e:W>==V>q==V㐄>$B5>,=A:9q=(Ryt)F===>,i >8?K<0(2P>6X=N>ji]Ľi=;̾=na=Ҷ>>>o92< %B>3>ֽ0ۂU>ؾ$C%CM>=L l`-!-=: -<㌓>=VFά<,͜=㺩<*gzc>LZ>M>K>Q=ZY=>=սݑ>=O40^>Z=̀9>V>>j>=J{DHhc=> >k{=e,=PfO[5kaӽ@7>J-PZo"dUiۼ.{JK=g]o#ͤ>ul >^oA!w= ̄&B>O A=M$p==13->E>wH>k;M==0<䨑v=t82Bq=wHz8I9%`CۼPOoBlbr >S3>0=_>>=:Vo~`D>?>J"gĽ>N>Ҍ`<,%>=(^> >ӣ==(>G>v o=F>Y<׽ ={$=㑽2{<&=t=8ZA)==!6Xaۨi(>?><>=0 >> h=0>l&>KP> Λ> :8>AM5>o>NA><ѼJ>Pm >yQWp|j=$=Ɏ>5Y,>pr=th=@>nD0=>9>bn>y>}>)x=^uO(> >W=u1 F T > @c[>V1=>f6/BN>@71=sa=պHʢۼ9>b_6?;Mߜ=<ѽg>ʹ=g/P*&n /l>Nl$G=f=)=/;X=?QZ>'Z>ij>G4D#9>>w6" *>^^>pjDvg=0">4b >ׁ<7SоŽǼ25+a>?>2=<>⻸Pm8k=Rʪy|ZDT>yo>w>l7Mžpuɗ>`b=maR >"Y/>>/Y>^>,p>Ьw=>dpdK%M>'N=oW2=^$>9_>_R=ɑ>w->W̽W>= =6p->#=8W=& =D36? >oo> m<^=;m^=Ph'M>t>>Z=}V<>ɅK.xB>N'`>o/??ԽEz>O=%>> S)=.+?|>߾urI5?>WƀƎټs>:yls>%Ǚ=MLM>̾ (x>|>E=b>3ߣ>wƾ6r>95(>p>2>6)>8>>>>;!ѕ>2?x>l>.>_>n=rսCE=ߊz(=6(ƚ蕎>Ѩ >䐸=>(&aJ=%'Ѿ4>w?"> 6:#n1ʔAzha>)ԁ>5s m9=V}?;Z>&('4 ``Mb>g!>=d\<˼)=>; X?4=dȽD">==>Yq={>U*>iO=>*Ӫ<8?7 t>x0Ha>VK1>o>{4>P;9нpt==x7=} 辕0\> =>eA孱>Y>=Vl>ī aDqC>63x===5=$=8ܯ"Zr=&_E9iTx vD6ԽHV>?2S=Yu>Ӭ>=%) 9[j>0v]= >Dש;p:>uyS>?>R>LP%Ό ?t> >>">L=B/:>3jZ g&>IgH>E#ݭ> >5a>m=>T=l`=xd߾ʍEG~Ž%4UU>niڣ=9,1>,>DtK*?-W Bg!]>sI>@%;>1ԧ=-=zC,1f.W3=ZQrmV?l>o<#%C` =/T>#&60>K)>X ?t?۔*>e܂>)>$="==*o>?=ui:8:Ol厾%>0BHW=%ޭh> >e>fc7>1值0>Ǽ!bs<0)=Ϸ=Wő>>;3(H^$Ⱦ\d?;O>'>Ft>oZ'*=)>}GO#==)>LAƼP=sb>Pɽv<J>{~>8;4Re>`Uh=#C[þ4%=\?PX>==jv>y>a8ϊX=y|y='U c t=V߰u'><$~>t>/= > >|ƾ5>TP>Y>O6=j#Fr=Ua=q=̄~{e->ŰtrYw>}L==}?S>:`c><ΰ">(f=(>P>>c]׈*Ƽ= ưܽ?{Y>Z< =/~<>}HmjUҽ>:`=>F==`ս]=.N+>r=,݁=z>͇3>Mm>ȞŇ>X޽E,Z:=ݽ>߄>v*>Z>Я>?VM>ZK=]lHS:>qGqoD=3/>F wף>HCs>_¼cfνh[01 D=\w>CA>L"Z >>~M3;U]<7yfU=b8s>1j\>=xBIW=_D>B= ;=%=c==+=g*CP> uRw=>սxIn>7:M_{'L=|>r=H3>_<*.=_= 1z޽1KE>K*A<ʁ.=@=E9<0v>^l>qo>J=3=޶p:>wM>k5ѽ>9=Кw>ҽ->>=5~f YOsh=8=;'*8>~"w.π=V!P -=?>Y==_^>5do>{>u1X>Ѹ%ͼp>+d=(դOs>17>caDoٽ^=<;f;=\Y ͔>|^=&>Z<X&h~=:w<=C> X Tf>Q=Ȃ=N۔>o>ؾ۽+=R>再<᜽>maO+=Уj= ==cSIGhT7r<>U8a>$> s&޲ݼ4ȫ>]q߾ =>?=voO=kvs>J <>L>g[> 8=>=݆T>\lB׷>!#>9=~4= ?<>{/BN."М=JfVg=T:;{,>Ƚm\> u#>1SU>jF>Z>1>{E=iM>T6RV=/&HǾDAݽ/fkM:w=;=Q >%>mV>IDr'? = P=nTBm>WRE= /k IZ=軈8=@nKc* (Ag>ķ:߾@ O`|=ߥ>\*씙g >DP\>SPym0A->-->6=>>orF<{zC/<$==(>ȓY>RM0x2=T= =cAS=`==>Yuxl#3=- 1;}ȽMڽ{حr] =xb>C:=߲=B'nn> ߛB=zy #U>'q=6/=4ý =>苣=;=c>O̘>gнs^Y=k=a^>C_&==s*=(=JE; >Z+/ʲ>= I>;=T='YHw>H6vz>t>=QC>=AVNrg==FeX>PV<~Ԫ²5{>-$"뚲<^=j>fÜ4x7T >Q==8<=<,,e=NICc:+=H|>B<=wѽTٽaAaּ=߱7J1%>z.=Ͻs>w;ݘ>cqѝ.a=l:`%ʾ2Ɨ>Rm>6`> ˽=n =ԹlB> /> ryC">Uj>s&?F+6>^/=@վz^=*M*GƽGzqL>[| ,>dg*|4<5҃>ܽGė==?dc?\S>'~2>E5ct.yҭm.?)=?ȅþSҽDzM=>>Ʌ:?3dz3>?~;>+B>CNýP=NOV>>>ODI q!ʣ=Iɾ(=鞞>)A>ݜ>8x%B&n?5>o(jq}q=TO?@>">->۾>;23 Ī=pY=O?|~L?l6C=".>\ >MpK F=\e>~=Ѓ>&ֽC>O =>@(>G w>bC=.>w@U>„?q`Tq> C->}>dm;J=\^>Dc>4ə> >a柁@X>܂ᙟ>:? D=jv< D_U0݉L@c?6>`7H9= vI>[ݯ>Ne=bc=K1>MoeuO>>D>=)1kU5#=sz=LI>o>aWjǽUޚM;ĽTI뾿<4ྕ_< dFo>hR=6>~= >R==fZ<:.?Y:/i Uo <_\>9=\EHa>uȒsTCJ/ؾ>t٪=8Ű= 1;>m>Ƚ] 1E>W¾D=>j?sxg>$>S=H=6>1=GĿ=lr>T->νnA>fWbC%'>P=dֽ >~i9=ճu>R[X=L?j=Sm>}L=]V`ؾ##>F/1$ >im*֠0b(1%JȆƐ > ??|=)7e##=ӽ\>R}:>>M'>>|#5>/>(G>>PE9>r>ע=f>hE[g3>&(=P=پ>-MT>"=:AB>˽H>͘稈 헽Ek=uRCr6SXY|E=Rr =_F>k!*>45>d=kh(㽣ì5R5j>泟=!}> IP>~Q>=7{6>`=!o =ᕽUIuuW<=VE{<ǾMW>n)D>e>M^=l>ܾ>ABӥ=}>ySc>b9>dg/sIPl'.S#n 3 $#>-U&gqtѽ=b>3:$e=i !D->91{mȾ{ǾE>=eNP=e>>YK>4?8==nޝ=e"mUx>v5B>)=G~ s=n􃾤Jý[?_]櫽>=>> >D'T>==+Ƞ"Lg>D>p狾\3ٝ2s!$

    )B@~A>3W=*>dVX=׬ ]DߛY:u_E0A>33p>i=4=<>feJ>[|o1=\==r>1>е>K>CE ޻7dD>>>cRd7}z J*+_>>$w= $.3{ V>!=um"h>}=>z!댾}=$w佃=ӽt?ZA>%ٽFu>Оܴl`>wW8=W >Y4n>#=R!Y=>c>ዼȇ96=ю>e{]=Gl= >ÿ+$! [>=A)>=V=l-;*r=FA3==?r>u>]r;ͽ><|J>^7:gm==ݫ}A?=bQ'-h^I >X־2ey>0Mn={<"((r=6.Ů>"7>|J[d=A>e<(h >DL;=2O>  =P>t0> ϼV==üK->zr8>%`p5>WۦFAi>=ڴ9>¤==|D=0om=:xK>ik򕧾x hm:n>P9y /ɽEC>a(>!<`SRHD;ݙ>מG+fսE9#n^3\@= =/;Me>=_ zɸL= ZR=2>vR> ">|=R>|b*=P4=W޺=@g>>>;9TG>lw6]>@`ǾI=4!M=s>E>x>E=)m;9e=>5WP׼Q_I= ۻUxN4>;vC=7> |="|'=jY.(f<^gɖ>=z>y=>UL>>Km=;=$>hʝ4=c۬A>sr<V>JwbKx Os9->\>-=EE@3D+L>=Mņ=>]O:Fo<>.=E[={}x0>j>xe>bF>>ܽf=2MBF=' &A>܃>k=>?=vM\JT==VK%Tg>G=9={7> =osV=OnᲽA'Em=M:B=O01={3ʽ@Eh\>p [SӾRl=@ Y*>P =✄>GB=Aq%ڽ5r; 3>?. =!Wv>>s=w>>Ž]̾= >1آ> ysJ6V& >=owҽN >Rn=q>A'=j>np>&5>*Nf>4*ݾFuKa@K/mƆ>Cޢ_E=<#>>ts S4W\>~?}>@>"r<=aGԽg>ͽ/=F, (>E>D?q =V=82l={x >ӡ>}><Ͻ+Ђ=8C&=(QiB9V<-[>ſ=mQ̔ 4;>dtI:>UNfSޅ7k=55= ==>t>>>"?T>cۭ=ix:`.x3=*轘gK'9>B = >捾m=av>>@W>R\hԽ]>*c>|$ {ے>'E: ;O֍!al ST]<8 ]HdJ< #M><4>;|>DCl >Y*=~*`>(>)Y>+=Q׹*?{>=YY>!ݾ4N̽ 6>V>V;)ѾUlL;JN$E>I>$i>]:>>=(#ٽ!U)=2>l=-OdR=֏ټ@@>{[Qh؆ebp@$en `f==f>r=t2wǾn=&>`=Lj>64B k;3ZۋE> e >>>=>%?S>g=ȼ2>|j==߼ld1[>L!<<L>xݾL=q1;X`)O䆾񽓈<ӽ[>>…$=E=ȅ{o =cB }=fȔx ZٽSj =nr9&A=9L> 0>>`d(>{+>|رd>ȅ>H>A>q{~C>~=$B>X2%|>=r޾[!?ǗQ]֮Vo>e̒>=@;Yy+;Λd]=~ Uo*=`<=s>HuIeپOr,o=P19 =[jN0>~Xl V>o=w>K>->{<(o.,r1>$=I=E=_<"O=`=76>?do־˺Dg)d\=g;O=L] JQ=Ի->l, >C8=lp =A:վF<>>"I=E83W.%v i=K .>>>z<=lo1=f>>+s>T ==`>4@o>>{Z w~B>rP>ԖX=B.>돟û2{ ^`*U=yq>Vtq"0iw'=yL=,>C~=2`ݽ*=>ݷ>rSp= _>Ɛc>Rrݓ>s({>\ڄ<+-_=VGĻXj!5>"15^=c0=Z< s=7cKJ>؄Ӽ;sNJn==u<٥ۃ>D<9d>f1?=j><k!& Ծg菉Gqa>y=>>>r#>Y7<w$㙽:f= y= `?;I=D.rG{>?IػW\>ɨM(>#>*HB=*ax<>WܽKv=?=jýQz*=ۧYi>Zj>d>v!9:>3 =M\=.=H==0> f<^ۂg?J>5=g=Ƌ#p=g=TFG=LvCp>Ч\=>>=>10c >G˾Z=>Dt>!r7[q!>eD[ U>଼==}D> =ϗ=)frI0>Xx>=iP=7Sa>xP>>SB>ލ>/>͂<>M_[>>Nn>!7wR].=#=d:򽉱xG!X+ 9u=&R*>B=8bwnCC[=H,VG&5оs-X}Ʊb=͋bb2HҾ+=҅=>A=˽\=>Л= |꾰=>tHn| ҤC>׹>ٔ >jg=I>=1cx>0DS%>ǓBޚ=RmW=?)==j>a>LK=B\zj=ڽ~jTa=hսd}VP=!X>R?v"j>Ri۽1\7H =ٽZ2=-E=P` {M>=V].-=.>`H>= ּ:=k<3&[g>&^ S}]S!>a<5< Eq/b&N"=:q"GG>v=A4;@>6b.>X=_ѓ=r;%=>=l "ͽ%55[羬8<ì>HM2;<[m3 >20UW=)/<ι-u:\>$ =ѻ\wJB?=?2= А j=vɾ :=v>=A3E> K">W=>׷+<0 =Q$>)p>f=_E5>?ƽ,>++N(M!=*¼=㙗j=[6kV31d?=ۖ=:+p<u>#GQ\OϽƝav2QR'س<ϓd> ; M>G;p=qMG~GD=#-q&>P>?A?=¼ vڽl>ڽc~Kn!RѽǬ={Q< >>9=EO>I=+PΑ<P X < ĭy!@_=ܞK*̼xAuq7@ >f<"9>< <+XR)l]hc>  >,>)'±i;= >U|'>c %]F+>@T>pck1T8<7> ,9P>fӽ(L+㕯>?,<4i>8>ҍ@9Go[6!?_э=ж[T>)=1=y=>>kƽFI,>]֪o=.=Tk=:f1='<=R]1ڽ~ijTx> <k^>==S>ż̽nw{ >sG> >̀(>wٽ=>ѽ>@=Di8|}[Ľw!tOܼ4>Jɺ\>Sν=L_iB녕>hA׾(fɇ<T%>J>޶>;V>b彽pҼ{$>S\P4=">* K>->/r>* xL[/>L<&7=4 X]>_>L;1 u=ޖA==ḾhԽ{-q=<|Z>82=\V>A> ?ƪ=>=K>#> >+>fӽ᜽ZVSZ>Ӂ=U`H2 p%cę>9=> N3@>n6Uy5>R=B=H-;c>W>RYaI>ݽ,> L!=ƾlr K>-Bs:?X>#d+D=V/׽XMC>z3=p P>qo<>Rf/x>ƞ>ڻRSw=):=jF$>8"l=>=!>:1뾑N>b&:dmE >%}J9m>GŐ>=<t 9=i]U >TþA]Zþv9 W-l疁˘ĉľ¶q>\=>&սS/bd=/>&=FL;M@>% ; K?f׉>i%8<'О>ߕ=S۽>ns> >X>+>|= Oւ>ژ=~=>WhGp>#=mGO>;>I;pD.5g]<\ٿ>>QT)9=#a>F>1n7>4e><,;0Ľn>=3=9\˾3, >MLJs=P=gykr>]=Y*?ysL>hsҽg>f5oQ>w\~,?[>N=aRGν?>KO>/ӺgEd&>͚>4h<N=A> rNi=ѾCW>B>,Ĩ;> VsнC*(=T==l>*>> =4cT=%:fK>MBY>>;HIA>󣔽L7h=?Ľbh!#=߼-=㓽=(RP0\<!>fT;e>.=+w]==~D>@)">=)D>˾2C=uJ>%3Y}\9T|WR<>xaW=gD%=,%t u">6\=q>i'd#b3>Ux=qc<ҕ;/HNEqv=>޽CÍ>1G <7X*(JO=< ds՟O>y>hڼ@gX= >o>`\8˟< X=̓:M=\$K2vƘn_>ʾh=K>I.>Z}-Vj)1弙ֶ=T+=u/(>Ԇ> T<5>_8>-e>ʲ99a=e̽i c>D>a>+.>ڸg=~G>f@>s`=e ҽ!<)ެ>->I0>>ORþS{OQ>>uF=o=$_e;E)> ü \=@T#=Mk٭d>Iu={Gk@bg\>k1;}qp>FPT9C(;$N~aw>=菠=q=<n=SNO;Q;B>_^Ͻŵ=h0=O,]0>E=|> <=a)n+~->>cKjH =ag־^5<\=2 o=ɘ>>dF<:>jQg=Q.a> {p=6=9_+]N1~>gZlݽ>G>:>1R"#-kb=ƽ\|>i==[(=f 8ŽS;9̽LO>"ΑD->!뻲ф=o=~ƍS-߾fI<Žvj>D;sR¼7=,B4㙁!V>+)Yw<F>K>7>u ?F=qfP>/?{z>d3v3(,D>sq d&=>٘=G>ݽnjrbzNB2>߫=$E}i=뵽3T|̌%S >F>W:[=>F-=׫X=^= >˵ꧾ򨽡>=ka=ü/%>SKxiCapLɆ=5>D>@׻c9<>VҼ>N>jVT4s=Z~T=]q<:U;B>%~=GS>R>$νe7*=de=d>ˤ/f׽*>">@*=Ӄ>Z$G>R_p: O!:B>hua===D>Ԥ<" a ><_&'vD6zR,E=Rw==FP<x!>T=ݚ </>u_kRY Y!,kþ=}X>?rQн>vR@U==e>;jYD ,÷ cW=MC>-'=> !#)<n>x>=TG)D=.XT{MK=>GD>s)/WYƄ8>ޥ>Kܽ3Q2<pLR9YW=(77z >r =mK=0ս&^MK=?*=-r>qq,k6>/U>m=8F> g>>|>uJ>>3B=+D>t}u# K=;ӗ=n=Z>Sҽ;>vN=2>Nct =rؿJ>1鍾>>D>x_=+8iVUQ>Mrq>>1J>u7оA!|>ڰͼ7?]hA>\&=T=ٰk>i=iw>M0b=T=[ =-s>\³=׮ռ 4C> =~]=<=<Q=ʌݽj==>>=)ӗ>轟|$lE=A>=l=Fa>̍I>Tþ;,iw>7.>hT=_>ML>`Esʾ:>>vN9I)>;~i=\===@<&>|ݽ6> ȼ;DTƾB(> +UqT><=ݘ>y py>ބ=> -ov^>o3 P >V =NPK>9; = v=N7QX= ,+>OY>hu=Z>7K>|ܽ_P=~=3뽫 =MV(>㸪߶!mfv T=\#ҽj?;Tlтqq)*=BP=>찾a<:|<)>b~FggM= ='=Z/+鰾ccmr>rk,;.|r>nME<=U;H Iu>zyщ=̾ĕJt jb$`='WK><;zG'3>:`=H__{⽇];S%>=%`꾃CAU=N==9=lH㚾?=y>*>>%҆>p d<<]{=pUIy;ս'j>Ok ۽S}N>3=MF>>Nc>[wWZŅ2?ߐT=?SL2>뽽6>~|k;_촾FRٽ>n=gF>`" 7>^ٷc>Jf >>u,G<⼝=䥠۾=c=fB/>)u`j!h ?:+=2VfVI>35>ٱ>r>ХS1<$>:<:i> ׽bN>u~=Ǣ2߽ޞ>Nc=>=۷=g>:^><&5Ŀ^=pZ>);tμ%f_Խ㝨MuU[>C;M{>`.L*=O)>b>?=J\<=+Oqٚ>g]5>PM>A  >nH>>XZ|=i%ž=c>ƓE99 >2Zн>>*~=PĽE{&Q> !<;4>t>doF_>==6c=1>{">uP=1>("(mC& >jآ'оR a0t4Adɽ7 NAz=Ђ>`q=>>Щ>Tq<`h}җhĻrZ*=d8>=<ݾH> Q,>4۽Ur]O>A'MQH9=t3yEz3`>,}= =F)=NO =&ɾk ԼLf齀z=P\=ij>=pN컀"F}=`>==P<>s`=ֽMDR>m=R@p2>e<DҼW>Mی5>/|8&=3(ݱ e=+=R o=Ιk`->w S>X}eD=l|DD<=7}S )K=i_'qU<#d$<1>>xhG>۲ <=5W?Vǭ=~];; <^=X0uȟc=&&&д!;zh.#==PASqc=}=3]Ss$!\ڊw> fQ:WP;7=T> &> >`v< :<ý5-VL<Sڽ ?=н.8=(ӽk3%NF2>GsGrږV=>X>=y\=[ _>(>=;=< =;S<|,=+/=8v=>Ȁ_v>Gd>-޽.?V6Zp=h>8:{!=ݾ0=6ESgS>G>3ML`=/Q=83Bc/4փ-<)ڽnfɽ>=?< S>]>>=>T=<*ܬn 6HS>9@׽3>J= :!>>ds5Ǿw㽴kQNC=m>:k=B y=DǤ< :A۾ekU:=ѻU̽F>v+>;`<=Vy=eR=G$>H뼜O[hA>>X "\>s>?VFV<\Z};><1_`<ŷ)u֔==<9/i=&YX=0=|*%Dn<3ֽcJHEx >;jTx=>=H>%\<>6:=nBZS_=eg\}>[}+>_>9>( =ĒϽK|U==۾@>S<*v>(p;P=|o6.h=h(ܴ=`hC臓>L_= ==l9=n|-D > +w=(#m4-! >_',V ۽,=9<!=zCá2_;/>D>6~#>j;|>5,R;o>=6n@>=r=x˽' {l=E=<]; ܽ=59=K=qQn-Xu*=D=<6Wq>q2>WC\=`>`ͻ\aߖ<)C=q<燾 <==aoFh&e;b0>p=B=u=T%S=_W.7U>Pq=lk(ӆ<>>vV+HG=; +>!M>{ܽw=y>u=Y܅o<==`76>Q]:Yǽߐ&\<|½X=Cj>%au}oٽL>Ҽ=u>z=][>A | >B-=ܯ=<I. j<%>=E=i&;x]>ٗQν >>?M}=|౽-O>rr=i>]Jes%58A?MӾ #ҥ>;9=b4>i>>N$p=Xr=?w> ̼VɾU#J3q=K>3% > >R=> z>~s>K(eڽ=1^g==A=64>$~0N,N=혻5>ç(<(V<9<clu>+=">H cJ>$ýjU>+>y=h] ҽ5=Aj=>>(A>-i>t>W>Lqzu=Od,׾y/o$#>/\MKY=퀫vQ ,>a>G>V>z>]OkMP8U>>;=9PҼT>kkAq** 7>˽~Sk;Vě>^jƅƽm>*˼V=;c&=!Cb<=8еP>7vd.>h4/>P2Te=(=@LA>>N 5廴U88<={9 .>C>`{>x>K =Swg== c>O>`#>=:=Y?>=>>k1<賨)?tpC.>=g>~=|>I>ms>{ьȽ]& 0<]2>M̵=<0s>RF=S0E:Е6f>a=GK}=LB>D1>;f>%X\zn>8/F=ā >A>u<6Od2s~n=й >Fܽw> [ R0>6)=.>ӻm=nɽ!>jnM~>W>O>Y_P?>S~ji>>>g!yG=^ĥ>G>;=>r_xeYA2=% =>?9b>Ͻl><9#g> >a-H=Ya=\><>/tN>H>ؾBȾ>>6eI>d=<=Ʉ>G>۾7}B> >!jS줼$f>4> (=,>QbN=fϽ>i< +?ӞҾs$>k>=>C >A>>& >&/F=0>ɂ=]B)?}=M 6ƾeY2>Y5 ޒ=<=v>K= ;?7=8y>6΍)h>C~ 6=#>*8^ i=Ņ>|>l`.=5¾ =`? D>->D!=B=F>66լ=C(y>sB :>U+@W捬 =3:>=g2OU.?|E=:=!>[דý_>x>>.u>9>?>:x╾0Լ cQ ?SL</5>tKA>W%>;,!wK|CT.?2J=m<;l>5ы=̟)`>=>Qx=i/=>X >\𽵗0?jn<-= > >dýW">/3>y>3S}c2>iSt=؏ :P>:=ӣ^;E>F'>*)>>?>DJ=XξvQ>,C>L ^ Y;=cgRa=m(ԾJf]dl>!/J?/kνe˽.b+>8<=:Y LʽK>?>漝<^/R>ゲ彇i+=' ׽.ǯp>!>먼1A =pD#= s> >Qۋ>!?*ýI>-,Lb^u= =8ݢ?>xOtK<;->o/>ӛ<|={=eݗ= MLֽR<_u-漂>!;ڽ(cӽ>^>ډ*>Ӵ >bM?q/>=,@>S=?$?fEнN&R4;=yr=x2=Z>ƕ;l!>;1>=m=^>So'k>T=_.>w>25>Wk=*} *u>%p= =&=!:3\"vE͔>m6=뱁<4M|;״h8>ƽJs"3L>q>X>FU =[սS=/8=%8>%r>,ڣdMAV:/~k> =S?S>q$4(oV>H&qƛz=]=v?ʔ=.3= ލ>'>>yp >½=6Q>R=>&a]ܽs>/N.M> X=0e=/E,/:#>=.<9ݼE9=">}>Hdž>3>cm>=eq=A1+q=ubz>?8=& 6σ<>)>JCZXj<u&q>¾TQ>i8aL >6=,>c=8='>X%M>fDN=鼉IVL=d ><1;urf>[g>%&u

    ^񼢡*;=btW>,ܿͰ8>m=>ν >9=UiA>>uz>T =ǕJ>JнZk ^(>=⁅=j=Mn3^Wj(3>X9<%=aRǾ=}5Li>q; >ƙ=ችe8ǰ<~U<3T>w7\ ?2;\>V9u4<罭N>5A=Jz^)ǽ&.5!AνxU=>je YF B*>c 5[ϑ==9Y~>5@WW}=Jz=a[">AՒ$@>Zs>j6>_{U>x{=R>+O_)8>S<(x>O@6[Y=lO=DAA>Phw|ǎS>=" = > m>D>/ż]Ƿ=4="=nw>S<2>>=4=ߐkȼT4vJ'V=%;>½4>WZ= =v>Ӛn=Sxvm = 佀[<:>m'>DVRýl q>I=-=Ǯ>>.h >}7= >>;2Y߾|<;>^=uz=2ԼE]Pr:Q{[qZ= 2=%4뽊 =.B=lDWZWd$>j=̓Ok#>bg<,`:ػ*<-(>پX{1d(>=)>N=?;%C4Φ;pI==P=R=ҷ :ORY;ah=K>=>i}_E=eb>!f= Yn>=E/7>\[<&O@vZz>lm>^Ӿǹ8=%N>Q>\@1>Ly=WMŽd=&>=Ŧs7 ]>=O齩<> N=@< U='h3t[JO`)>u 4RK>f>E >h~[P=t$=:\a {~X>2>R=lm>UN˽Vþ;qD+<:߽w0-= ?M.|=p>l׽ݬ7> >l.Q20I< >6?D֌þP=oy>߿>jxq=G==?8뻉h)~5ʱ>x>#`i=/ޫsELG7,gh}:-.!Cn>Ul=m߼>Fկ>6>==-jJ>0Y>fǾ*c%><{>N47鿽n>8<<<>~T=Tu=8W=l$>1()=w<c>y>4=mY*=eh= >>=>F>Mϼ_\>HD >`>=~ޚr/h[nC >C>3s9%;zJ=ʈ>jуY'\vL>') D;O-⾫n<9Ǚ=#>Bz>PN=[j%X= >OL_>#S=g!Y ľM>8>E>e_< f<=<~=_L > V>#H`(>ĽS6d=af\=м>>m<=C d䨽2 >!r9򡰾;@x>% b ? 8AOnBU==>Ƅnma ==n><>姽L~#_o>@;= ҽ{=9!LC=v>뽗W)ǽ-񵊼"L?$ \'=YV=z9=l/=&,U =/Ӛ&ɾb<1>ɆZ>$5a!z=>-uݼZvf& 㾑!z>/=id>,x/>(X`>eR=<>dŀ=?=n"!W>h< ;xm%>K >3=#>gV>;>|$>t>C>w#&=̊Q>j.A뽅>RT=:恽1+ٽ<5l=>1 =Н< >}=VHo|= l9>Z)Q>4E=>]@? WFڇ;)=->>WbX$սƶA= t=M+V>M <6=o6=='}@D>e<\>;=W?>66D>=t<=>A`3=>{&e<T>{>A>-o=p>AV>k~>n; DhΓ=}=I>(I=hm=Zlr>gYۼI j9=>!_& C>4#+ļBE>ݾߎO6e=x)=5>/V&5A>1X/yI.?`=-.P[.a(9aw&>>żŢ>e=B=rL ?1T>>!?=> M=O>3>𭧾|>n=Da>ʋƽX=v> #@>(`=`DӴ>G澞:ki>W='>Ϡ*>j:>}s>R=L㉽" 1><x>:U=8b>*>o"Dx4=z;<9t8=L);`,t<2#2Fj>L=n.ƽa@En=ސ>/=D>.LT=a4s/=A5 >˅̼cF<Sn97 i>%>%M=VZ> >:[Z=6a>B> 0>V^>a= ?=XQ:=t?_>r :>=S5>(+ɥ=]Ƚ">(v{ӽe=W>}5`N',#>D=>*;>U=Q8= !*ol;@Y >Z|z耾SN=c*>DG=zu=b=x=G Z82>==;.=S>Y.%>X=Yk>l]VB/Pp>aUy_>F=>錨h=U(Ʉ=|>DR=!>7<.}=n#ZP>lgLfrsg>p>x/>"!*RU1~)0; =t1hE>R!;Ŧ'>&5==9fxE<)̥cϽPb^>ӽɅ9j5>!sv>)G6p>֩ܽ6*)>XŽ>1>WMD>b<ۡ*==-V=Ez=E=m=vH8< S=D[9>izO =>jW<ǝO>'=>>G>?sN^==J>]=ɽyd>2O>K5>>Do>Wʊ9.>ȼ\M==k=";J9˽$d>誽j>v=X >,N>t-=x>xyl>¾Mپ֢H='a.7>!>Vf=Ϋ/ =N%|h؁=$l [{%N.=DNdG>'5?+Dr< f>~bnؽs8Y| U5 鶒`X۽>|[=bݖ=} dʫ=Fx"=G$\5>W= l6G= >i< >Nj=sv\O=`K6G=86ɾ)>T0ơ5Tȼ,>qK,Cy| =݀dn;kC;BͽgOcph>pa==\LE7Wx?ڽo:Y=MV=<>ս-幽<9>_c>rѽ*;>qSuOKu`t۽>޽*>>ۦ<%kRS<c[>,3=+v ָ;n>#V>Ȥ^9V>B=mxT>NEhHd6=ソ}>TPh=5 >fNܼ̿=߻=j>vCV5v> |>(=(#׼E<:vb=f$+>[ZG%[& G>HC<>Nx>>S!/~Þ>۴ Oʽ8ŽV>0}=#>-59>=Gaa}RP|(.H8輸,U+#q9(>M> >9=>*S>.>o=e=2>;1> > w! ۼYqI=V' U= tUh ={4=`[<=1>=󗀾>.M>Q;>Bq=Ծg+<qU>3=03ýr>H=8 >%kH=;ۭ=ўj0dNJ>=f=mGJ۽~^>b\=> &56=h#_=-܍,= -ߎ>H>^̽=q =_=Kv>׃>U<^=c<=>\>=9=a=4U=1j=-} ==9٩">|i=>;F (KbaK=>J_=3P>\>t tw=ca=i%c  +|>WӾ>#<{qS=DA<5H>j>uX<><=x>")> !>AϾj_>S݈>3=.Xf3>x={>M P>F>]>>2= @-> K Cs{3B?Mý̧=۾Y>j>&=|>SiWľꍷ=I7>bT>I@>E=V9 h{=5M= m>E[>ӽ2==djsqW{a>hʾ=Ͼ³0+b=%i+>>>(k>l_>RU=ZQW:W3M>>`P;>=D+F=T?+>,[Q>yjj<`>;<=>)̲> t=C>->L$h=ғBH}"Z>w\fξF9 {SB0G>ba{Jd>R=C>k*>X=U >9ֿⶶ='>,q"f ">ݮP]\bST>">B0>x==&uƽto:¾===*n->I>>=_7=#j*=(L=Ŋ=#(Ծ }}=Bh>>q>L2Z<>Hp={ ~=4z=ǻX=>.>}Խl>1?k= "?Yz˾=4>?=B2x> n9>:Ù>R1(>]K>>.r=ȼd=IN> мKyԆ>@>K܉?=|©> M;( ʙ=&g왚=ݭ<==)<6=s|d>K=Pq >qFF?z <>>cke>ػnbCF>\<60`yu?6U".>ljo>f=̂=fP >um`>;>#@`X>h>'=6='3p=HB=$>޾]<%h{>m봼Ӈ::=HsFXk=^Jýjʳ=,>2=Rg})LB=!>ԓ=x>J >>K=n=Oh>t%>}1>l=p~=P4>&վDZVBK^t>O>V=8dQ O.G>q >J2,2_=\Կ=J&>= [>+2{;>x{;v:8=v(9U پQn>JTº<}zӪ=\=фԽs9p[>-" T}=I!,G4(r >^H=#1 Bono=7">ͼԣ>Q]I'% ;FN>fX K=;7>=B>H>x\SU=^湽!= dB=A>R|>?)_d=Y=6$>o=˻M>ɽ' v>7[< >r峽lN-fcٽ߼>3;l+>dT>bW=rҾ;ɽy =u>ב{> 'a>h={hr>iݲ=% EJ[-I/W=V =/1>'>̽{S>4;>hbPBm>>=ҙ>g<݅iA=nGI>]>jZs> m7>#>M==A='=ZU,I>=v[5E54| = =5eJ;\>jR,>ZE>67=ElgpE>:.>p%=e+l7>\q=8> u (Wqm >N;pʽD=ߴb#>=Xo=r}>>>>=ܔнI|=?'>׮>,p0>y+}<Lm>8>BQ9.<?>z|>_c=}!)>%==ao8ֽS"=/>`{RN>5>ؽ1='\|6">>3 >g>~vZ> Ua=?2ѽ^<>x!ZG>9c-S;X"C>{+^=w=A6ʷFCc>(]>낄ͭa;ұPڽθU=9^_Ts >] >I5S>>Ctm=N.z_%=нZ>t=_= ra>I> Y9zX @>.n0=WB- OȽw{"=V5q=:X3û=d-<>"=>N>@2[=k>i==d>;vW>q=)>;@dpH8H>4e==>n8e=<HӼ'L=4|b9I>)4n%ЯB;=dnm=qK=Ȍ=`l*H4^X>#=TAv=h},x+`=lEHEa>9>^1m>n>A>v<ھ>"Nyǘ>s?6>%>ե>Hi[z= W4Z=+.>8ؾ]}{>4o[>>!=bJ=Y!=YNڑ<~CS5>R>xNy쌼M >tt SL>]9nt> &=xѻ\>t= uuQt\x>ʽBp=Q>ڽ 挾*j>ha9 >=F> >Ѿ._=sP½J>%<݃>{;-9<=< >4W+2=^ͽXp>}b=峾٭=(;b>JĴ>y>\X^.>>O!= *>:!|>н=6>x_>Ɉ{rp>c=Od==xj=#=P m>p=aVY='=)ioV>A+=KU>Zoֽ\HhdI>9rR;lv=都t!C1Ļ>s]=oT> >Ϯ=#>茱>X |ꮒ_Z'Tsq+/Y!"8n=OhHLB<Խ=]Zj>ߞY>Yn=kþ>v>{<>A}>Z>6W>`E K>si>Sei5ƈ>N1>  >1<>*2=f>#ɦwӧi8> PI=}\5>L=ؼn'C S#$y!<(rDѽ=+\=9">޼s~e>q!ξU߽EQma >=,)>/>tߔ=>>w=+>B>dA8 >B w^ay6=af>fJx=;sO>2(]1>h >Ox>v2>H#>z=;>!Vܲ=V5>  .;MK^>> l$ /4d<[_,==2a/>'|5lp=р-O ><:7a=i.L<׽ xF+>[>^G8> +9 ˽V]݀>[=Fo=>˼s]>w<=!>r6>XW>1Ĩ e=\Ľ%=PW=W§=ιhډ>q`Z>=;><Wbz6Q_=8n=!=fle>g>Ynx=n>gb>㶌ivA=φ=s Q>M>8xͩe|>ͻR?="w=Ց>7%<2>< >H&G>w=_瘾 5f= =>$=#PM<q>Z=g> ? >.N=-qa0>&=ӽ˻Ȧ#ûi>ZܾUi>轡)cE>6ʺ=F>H=ψ>V?n%)>w!1/>-K̽D\=tEH>mKzD>Y]˽3NCGw; >ּY=<]/SChO=0>î>t=i!>::y>wMdK,:>&ը =ā=ad>;@y9>*Q#]z >nCc+yA>O>Ļ=..B>1* H>0f9>1W ȋо6ܻ=%QY>->6kF<'D* y1>]a>߆&!N[>U >>Y>#H4r>轤:m< P={騺v4>lY5>c0>kɒ>=T1Ge>Q=7R'=Q(>lvѐ`,c=H)<,>o=\<»D>A^i t[ν< ';3=c^>=k}=M`,=`=>X=t{">q!H>=w >0etyQ<9< =Z+轏pi>ys>"=i=6Ұ>¥>J^=%>~><)$=mЛA~i_=d=?>.O<뢾^3]\S=l s>> %[ǽ >7$=>"+e2o쎾'Ӽycp>=X埽=)=78PF>'>4Bȭ=#.=ێ>囑RDr=P 򽿙O>J>㽸>=sK_y=D5=_RJ0?6>Õ/ԽaX>~)=T=W=%r=+ $<&D]ڟ>=0*>@|= h#>˧=jXq齂*=|d=̹E|><*+:>-!>Ÿ=`Խ;#Aʈ>j%G>a*>9=!9=Mk`>R;3B=J ?ݽP:>ۖ*gc=0F#a>==E >@9e>ͱ=(5&Z$>Mnaܑ>=1`W3>=}ڻƬO=">Ƚպ .>/>X>םaŻ=I=㍼,PO!B`Y?ǽ />k$`^fe>bu<;,;v>z<=bѽx:,=Q,=X= >p=]5z7K;GC>=q=.]F=#<3>ݦ=R]O=Q>GOn((<>pdnPdQ=csMtzk=Q=ue `>yd#sw_g>e>cVKn=f=A(F)>,<31>L=5ͅ>*>_5q=7>;Y"> (<=z"=EV9>Z='0> ;:/<ݡ0=&c Vc=_1f0Pb;< &s>CW<&\=>>!ZK>|>r=I=b,//>OG*>=ʌ=T{֫ >=/>e9>JճI<1&r. >'o㭾mܽ ɾFC>H>{.tȾTD<׾QAR>X=fS]I>^'C<(gBRK=q5Soچe`n;=ҖKN==g5|N*(Z`v>O=gq=(ǽ`=)>{ڧ}Rq>']4 a>s>Ҕ/> >:>=Ѓ̽>Z==u!1+>%Ͻ1=+>~2H<߽[=,,=[>X >؃>a=yK6,*>B1z=o=/Ê<>Wd= wH0)->d\>>@> 2ɛ>>xk= 7Jǽc>s5:ބ>\[ZS=44aQ'<>k=V?_=ּ<iӮP ik=q¼y~=.=ACU>< ==>V²>[c v.þiaI=Ixk>f!=Iiq=9H>\KN>@$;=| >iƫ=Dϒy=C?;r;o+Z<8׶㽼S)=]7">̣3>U; O>u+ǽVqn=0)}~<:>ˊO} 0=شK>=BP=t=(O> OEB>'V;=\>J>=oTK.>l =/<8=E<>z=Q=F5K=>=Jێ:OgOaS={=?=@6Y:нQ|3=10>> JYz/G!3Y>W!X>>f=X=V>/5<=VRUֽ=:=o= &a>Xj<3_=W >cҵ>H굽KA=C=yXW[>r >e)B_B>3ە==cg>Yw= m9ձ>U5< >k=$Nq">>g1e=b>:_=u><[Hw>W!=ZӞ= ==p= 1V*+>Wv.:׽Ն&ksT>Pü=@;D=#ς;a]VGn>0<r3Ͻ1h >>.߽ p=.>~9O>~þ`4'=!N= 6>==~>h-=4=jrjuVd> =vJ=%)-g>d 4Jw=Ƽƺ{(~=|cJ>>V%7)Q= }>2w %.>Т>w>K.>fg={=2=dUw>n<[=~&=P-^1+ؼp&;ֽ8<[ }2>ELg%=';Ag!>ߐg>-+>K$<=C= ~>b=E됽\=3 >3>r&<)-<8=.y>r[=9=U=ھ0V02U= /=e";mH3>)>=n9 *t&=/,Xs= ߑss곱(>0'> =; qή=l=X=3Yf]-nfrƽYcFԺ=܂Ƚb)=L/7?h>S ==ն>;:2Y>S=O3h>VDT >$z>3_SL6=9ؽ >ST>n(#p[=s< >$wf|^پ^gSսKv$ O*%D>%>ݽKD.>|=k+X3(;h >ː=^=Թ>Kmq11 W?b>=>SPip>>\lc׽mIeٽUF=kvi=њ&\>ƕI>R"<&=;?Obsdnwڸ= b='>n>Ộ=oJC>t>8<圥n= I>Xb==EmURнla>Rt뽌Ca%*M쑽b_43g+=v>>==G=}üUJ<$n@x>=|T=(1=?z=ݴ=lE.?1>@k]>=0=_$JpW=f˽ѐ==pXNW=E>k=n%륯Jߙ< mW)>P==3"D>MX+#^.}<>=N=-g>]=ހýXIK>2=` ㊽@=}m.<=K6>r)_JḬ̈h@6<>tE7*DG=[=Ɵ-=`[vAk4 > VSkIi=<.igr2Li>C1> Tp*$>m=UǾ5 =0;=jI>pCsO>`>cS>>g>'RS(vr@><\t=w=PW=fۥ-L>*phR<1Ȁ0=Z2 15 ?=xyY=bx=z*>I2> }Q<҉|#A;.>n6@>/;P<k==*·<0=!d< yY<}<>H3HDa=?IW(|W><&p=MJ5>޻Cx=>\|嵽==Cn=zz>]<k/>>Hۄ>R$>::'ur>#@-D&W*=4>p=7Aڽ]%>g >H>f==t==!ѽ! 0xN:c=yb>StEfW=t> @>hS-Ԗ6t>>>=4zϾJo+=}. =a9>|*=Eu=]#>u=Mm;rbBŅ>Z?; Dx)J#=J>* \>$膾 x=b1>ǧwӽp~[*>Y{l{>3/7=Y>N@(C[<`(]ѻ =_H=tqYI+E<>=eėi=ؾ޼Xjr1:R*=tޏ<,C|z=Roa{04==k8 2>V@ $,=3=]o+=ñz >KI>Խ3==ȇ{=\=ȀIbd9>,1U<> '>mKL5L==(dx=L0dB#Z>S >Ǡ=k"=v|56>&>K 8ftE=u\TFb>U\hE>)g8iC?XF:X1[a[:!'s=H<,=.8=댽=f<}Vx(=<?>>6C=۰|.+<*B==J,w>u<>W=%Z>X=xaT<>Z F9 .b#r=VlR>/>"]=8><ǽ@@>`<^=/>ƼVX=Q>a 7=|1^|>q=+(G>LP;C{AXȼ2[:Fȗ=,>X>b=f(f >fp-># = <o|>BMM<}>pv>r>a=nC)B~<=BCS>>=iv[M:=a85=_]>^0=K<:__>;9۬- >Զ+K>1UUқr!>(쟾)>`>u7A:>I.׸\o<~=F =5\f`>sN= <;{ҽGj=͕z$=.==χ=oK`h^Q>R;=$7O<>=JP{>lX羫d>>-Н=ҽRsEiO+H>OfAU\El=Eif<(=§]=y>9Պ=h0>$>UP>|>!MZ,>v<=>o콘2A>4艸V湷=\i>V]`eܱŻ*<8<><μg>e=;ݩ=q=S(9Cj2/ƽ=3>4/>m>T3=hNSM7+<>n߾8.%=G>֕Ue=Ne#VX6>Bȵ(=0uUNǾd=-ރ1<0a[>iđ\=i1i=Yս~ H…4a>D1 =$GҞ=|*~ =ǟ>ܙ;j=E0;k SFP"={@>A=C>G1V>\ 2ԄJ^>>"G6R=8a]y$=! Ec=F`>޼ >O>t6>ynT>%E=,>=~=J G*>' ae=$=>)>g+x>'@x=puh EPP> >kbnԸ<ݚ3>Vʾ=D2PS>?!>M^ x~>TbZ2D]]9㢽HоN;Ҿ x۲D%8>)>R>HՇ>>>)&A꽲`mC]ݽG$"!{T#:0%!,>΁뽓11>\~H>#>>4c՟> M==H뮾>l=P>"U>ڽX _Nv:`=9 E 07= >@>곱>>7!=q$>:F`=޾<,qļQ>,>"8= >^b>>J=[j=ȍ>U> >@J>~/C:ѾY>#<#>Cּ3wWC>:J=-@>\=>tF=|3>-.=I>"ѿ=>v>+>?(>>eK=F<{g>ZTK ֵGpU&琾0Խ>_2= >j=4=S=ON.!ӽB=1K 0>pB>׽ .>[=4>=ؘ'Y㽉;>2>6>!˽#=s{ܫU<*s>᡾(H>ihýW󞾏%=c>(d5>GV=(ǽm˺ ӹCgb=ܳH7.=}V=P T=jc}>9=@x=] Y̾^y1,=4w=>=Ƽb>dM=GM=q,M׍s`i>\2J> 0>JԽ7c}I==*>T U=щ<:bϪ>;ɋ%Ͻ'U>SQظ$>GF3>M>>VFZIʡZ>= N<-)z=>a;ekj>[*=*xT=bk=n\9!ЃnM=;|>>vH1>陽@TNVч>9>@">QLp7=3>L!<D>ECbhn>c=;>4[>w3>Mƶ )/PE92q >JR=oN2=iDf^>><>ҙn+ % <CF=<]j>5=.uJ=E~C+9f1<<>$ >tDàڼC= =ɥ<7}==n!8o/>},2(#>0;]c<->{۽ _+ؽqSh<2X<rvN>= >x=2>10b>k => 2 c=Y=>]>S)!(==m>y=[>*⽽=Vɽk ؼ잽ۘ=MӐ= ɒ=z%;U;=?=0ҼOU= z6ݖKPo>n<=F>6꯼d˳Yͽu==ij>w=}=U&<>=j7= Ar=⮱=eHA>:>㹾\мʈ!>f!D=yJMd>!4=R=>pp1-> YI <_}q=;߼Fm =>LK=q>9eNz$HVf Y<;Q+=v.>S=פA=f".=*$a нƂ="=Q-{->\|s=*>dS>.N>;+=2hQR=ڽ >cR=#L=(J=/~>7^=)d :r==>>==&"->{q%_=j>z/JL(>yH>>)CmRpP>;|xb:;+52InY={,>z{c>+>t >Æn=-p>fד5l,=Y \Q_|0>0|74\,> "O=(5Q>쯽t뒼JJՕ=o8H轪M<:<=?>m>z|Z4="X=곧==gSE<䫽qc,=Q9>3<>_?=|}>H i>Y8> 5X=?=M﮽Z᧾m;̽߰o=$^ =?Xt<|>r>6=薾ڠx?Vtٻu>:6w$>#)iN1=T5dE>"=?;b===.:%N5Ծ"<6!=x0f=漅U 9;><5>;r=daľ!N>@I| =`J>~ >.kdsB=.bEk>;>罨\C`;!o>RF>T==P< <=J#i{S7HB8ӝ3:=c=Vv*;2b(<<5o>)H0О>)=E3=G=桽w=$1K>5yܱ;>="cA=7}R=FZ>Rꍽ(R+cȽ=Bjm>i5h|)2=\ =\nK*HV (=]»<4#>,d<< ><9r=R[+=F½97 ?=гl8$;>=.=o޽p@>X =_]=kнN=z=o>q1=3>Iy4!`F>` >`D=؞,=%>EŽيjh ka%a!>X>q>>q~CY=bBM=W=$<4.>t>?X`FY$E= Y<Ҳ$SE=_ {a=>CG> (E޽ƈ<=>l5B="Ԟ;Qb=qk5=Gt==@Ce6>G *#6Z)U\>1==07;Vct>D>N.Z>a=+WI>К[Dz𽈼==^@>g༥,lcE<4&͉I>+j=Mi<ߓ0?O44>a=k=ܭC=Ƚu'R=b <`~=^*"`08ռѽ08=I̻2(<*=~@e<h!="=|J >hQK½O}Y=t#+¼2=AK,.;6sܘ=5/d>I><`eA*=ZœDCy :$<

    bWaν<~c4S˷( >&g0OuPP=}=8>r0=T;>z+ja߽=MF=ٞ;K췽ʻ=u>=ѽ=\;uh<3N<{=QNNy]>ԽU; <44q?蕇==`Ӿ;0 >AHcL'NgG2>8_=92rc>ٯ{4>gg=L>= <ƶ==Qv=a< ߟi>^>eL>v-Z|AC<ó>m1f=,>><%G>N<ɖ=nL>%]d<#.=)>ңڼ>>>dH=_W=jKWG=>)jtrC>.=>,$>J>s s<ĵK<=';-~ý=c=H>z'=8i)=vw>¡(|齀}tǽ=h1=# [>n ;=s"%X=u P݉EW>TY5;.!v=޿/<췛ê>>g>%f]aU'=o2\>B>mϽh==|Q<6r~LW=kҼv>=Ўǻj>tO4>d=AN= 5)f>KE_>NL̽U<ѝ0p'[}<^tGh>5=Z >喥[=4F>> L>|Q=9N<ờ+>a>i%W=/96>) >z= 2k^S=UUFe;Jnxv{s G>=jý7x>;C=,><=;w>(cl5 >9o>M;>ײUA]>ҽEoU?W=#>鰒>i:'{7g^ν%K;  Jn ݽ=AK=L=W=~}ݥ>7?=f>e$61!">g.10ʶ9= =}t=3>ˬ=~,>a20=@z&>=Pdi=/x=&=O=9K}[e$>3<߾q >=~8H=Z=aq=Lࢽ^`=HUuZ;Ʒm,2$/֯=| =U$[>n&=ɾ>4Y >*OY>=בr=o>)hq=,e>(W=v_iP> ?{#XejE)A>hT;KAV>ѽ(g(e:of)=9>x>8=%x[z0=8zw./1>Id=Q*2> 3>+BX nFLAS<>|~>XPmid== !>4[9G+>>H=n>bg%>gѽ" =9;7Xh;E<3Ͷi>퀸t>S0C->NͽUH(6Y40=>ds>`,q=O=/>d D3>a81>Wi=%j>>2h>#B +Dc>{h>!MiD=H&>c]N>>[>nN>J>#(>s5=B>>$>]#BOA>3Wu\=1"<{_[ )=2'>8qp?>Ekϣ*m} 2x< i=7A=&%ʽ>=[<_ݪT>W-=s r3=R=5<(i>&;Q->0a>+>Ai>*2lJI&X0= >y<{]2>뽐~s}2 >#¾!CApQ=x3>Ĺ=l>4>sPnK>T> ^R==u'˾P>q= V>p>3e>&U X>C*y>MvT=~w>'/\>dEB0->x(',׾|:a>=U=rwB =pٮxQ>1>qa6 M >y=rՆ3o>l do+3T=>km(2=w<D۽.ْ=p=k҆Y޽C04M>ܐ= î=^wŽ0FS>۬=T4Da$U==Ƴ5m#=ݼ}O[k켃p:=%>j=t)>h?~_֣OK;! =%=l=6m=f1""v0chf~ý!=8>]>K>9~>t-=k5J>ڲ>dӽ==[/>(~" x\=s@?W=dr~l<<.@'PI.dD>L=wkv2>i2=w O4]q1W=OCݴ>kUQBDMk={t%)CcPu= /j<Ό>>V=y=;_>r*_ֽx5>&:>ڌ,x4>5P;%%߽h> =AֽY#=@>& x=Q-V>rV %=eH=UȽ8 \n=诪0R(QH=脉>O5Oդ=/k=֠>H7/.>Ľ}=?>A;2v x=ڼvRBļJ% >A[<7_=~=->6E"y>Y=0'݀=;N'DGX> =Q>1}n>褠=l`bR>H>H:Ҷ=1͇>|=ֽ_P)>wJT=7 +mK(K(='>Io=> 0 i:ҽ!>F/>K<>d>a>s :c`<FT>y2>0=/`>F=!}9>k=+_p>ʮ|=|yy; >:>=>w>I>I=kDJw{=e=K\>gfcI>6/>J=`=d\>,=S}ȋ=е=43 ?<3E].=ZuQթ>#y;0p=0=|>Rq㽐< rd!Ȗ==K{buԻ>*=ѮCrs=я1>⼠CK{>7=܅=8>]>>2ޫͰ=0Ǽ=\_߽YDos={>|@#G> @><D= PF>5Q.>s4=rˤ= n;: }Cw;ȊŽ!4=4=[r=)I>5=+>'D2>Y[1b0ξJpz>Ѿ =i~ l=b=j=4"3f<),ٺNw >ŢYqmqY= 諾Ѹ=y=& {>> >JX=Hr=1伖i=]xͬ>n/=u<*> UʼbS>^_nYOD+@}ս5|=ԏ=ז-:3>=_½.N`>q-9=p&>=֍>@ d޼ɷ_Y~!>HK*=?~ُ>}ɬ >.9><6衺=-I) $>ѱ)pB>-:08o=._zr Ue>A=]==_B: >bLGJy( _z&\>_)f$=[3>\eAuٷd¼ट=G>;#h=˙= M-;Gn?>I ;=7<=</<>p 3=ԽE+=#>oJ$>Zy>6%^>Z=t5j&L>f-P=Z=ˆ2.=7)[<90=$;fJzY@>NX-=򺢽t^=زY=-=[$11 ֽl=vYIxS >Qj5m>Wk<|~&>騽'>׹=:L==<=;(>WG>vq<<7B()==YS=>8 ;3IN=\, (g)s=V=Us>Bj5z>= ýW>=>]>CM+=t:A=bI4y*>KX~=5@=I =}>/BWZHm서<+U=u=|{> =G>V= >4u>C뽧=݄fdi>R.=>㽕^=$Ià EE{)sXA/=A=2O߉<:ۭBxg嫈Jr>T/ =4Z>[=<=hiQw5=jvjq>d>h0R7Ǽ-2=6[Oc>=B{>ƴ=a>ώ>;7o=ꚧC>Ag =蹦>v'T_>6<06YarD ýG;hA9>FI==΁ Q)gF)>m]׉hj>?;IL]ZeMػ=om[N=Cw=AJ}$>> F:ӛ;Er<8F;lnş_|="==߭< ?W=:ns/==B=r=;)>XI=mTz>Ej>Feú}a>=?5=n .$`_>>սDp<xbY)>HM۾'=DP2/I>![E=G.ɼ8>F=Z>[[tT>$<{^1[+>gM =E5=ӽQt>0>ON=i[DO^~>5T|\Yf>5>ּӼX>p[=2d>X>5s<4}d<.w2=}P>V'>NUPD UÆpD>1>9=hτ>r=5.e=M&i=DvH>_=M[<ጼj&>Ty=/6N!lL<'>>"LQ><ʽ$C>*=qI\78ۼ(;Q$6O7_=nu ^=ȅ>h>6J;>p*!4 >=j=6=Dك ͼ'p!DB}\>P׽P4><\=Wxϊ4J>{HtJ7;Ex3dNdT8>"<>=\=C%w=ェ/>Rν >`S=č =],}= >Yy>5༆V=f >Xg=OB>q>;H=(.2QB=4oq=K=WWcu>@8=&ԼK0>ߘ H>I>&=lt>Sٶq'=F]=Ʒ$>:ɻtz8R>UZ~MύqʼĶLS<6P./,>g[czX>'= >!jܼ2YJ0xƼ3yLP0,=Dtqw3>ߑ;ؾF;>c$&=FD>Mζ=n%[>#؝_H v>~-ew=qU,' =>bl6f&>͝ټDH>L=.9oA>=;+>QԾ=<u<[N=Azϼ->7%r>-9i<$쓚>T"#>齗Zν?av>\=~־?DqUwdx=p>=.>@ a>=<, '~b=,>+<>Ɛehcؽjz~K*!Y=&;M9M=-V=2>I>aGӰ=P[8>\=F= +Խ_J= >/ 0>⤿;4;9&=ˑ 6M >Ӄ=!=[.7>Y';=s8pȚ;_ љ4 (̵KVwm2<]>74:`<ept=lp;q>@%غl8=c2="kc潾>);Y7¾<ő==鬘RjA>=-6ּC=>Τݼ{d==yT>>= mlOԾȇm=6]<)k 66.==r>D>ŽVe<=߹=ʤ?=nq=Ƅc?Ban.p6<>;;`N14H=\JͽdU=="hؽ=b=А= )%>=۟=/2+9h<r,><>yЍv>?fu>>y>t=?>ץ=ֆS)<3>>Y u>|;>0=+S>y#Ka>8r3JF5>*Jve\=ՇRxR>[=!L0>H7V>A<)=E4>,>[.ߺ쾌<="$=4&e<̾ǯ<Y+rK䊵:Y 'qβ>[?믩<銒=AA>#>9>ι=rY; Clr>I&3>kB >z> յw/>FpW=TzZ=<>0S9>r>i2>7=.;^ړ >?Op zx:>!=a436=HvdN=fTA裀=F= gbӽ>Ӂ=>~>=X=?=>ބ R=P>ټݮ@>K=:=4`n>}e> =I}9L⺉37[R=bY6;T>e[; %>O==őʾ >Tw>Z%>2bƭ= ==22޼S<#1J=>V+>ҷ=<=">;E=s=M7<Ӌf>6̔=*>9>8vB}=I/1R>yJ$ý>a-> >(<'^sRSMuܘm˽,r!טgaFT=R=BN=>Y|;KfܽԼۃk>% <8=8=5ZY>>HZa;O`z|?=( p=.>[9>En>PpW><0<_>y7RJ| 14n򽍽1>= 0>P[>=T ='O_>m>-p C/ `66L b;=B¾ښ>D>"&>yq=(<U>l>>Ľ0X; = x>λxν Q=G|[9JH_=c JVS>>ݽ0=/I='%zp]%4v>.>}w=ex=/x=D] տ>4qű=#'>b];T@{ Vcu>.>f=д=z=d{9==_ =Zs> N=>_**$@= x=!폽Op=fUԽ@,,;~.=7>1~+>ֻV=w=*>!=/[B<3EmE='u>==`\1D>=g7.>;TY1A|c=fߊFH_,˜<>>Ifu=% >Bu>]>];=`<AFˤ=>Q)GT08>P> ~>ٶ{>N;U~Ɵ4^8әe>f=1~>>pg>'ՅzٱP{x^z{O=#Up\#׃r==[;-/>t^>+QS>/;.Yq;#< Ͻ=V=[>&fy#,=[wn^>^0vG,нA~ >t=>Y3xӾ",c=?<9=%WM |m==pu=leG>z=һ=SNC5=ɦ[>! ?>E=JSѾ(9u%>>eO{<\<佀=i [P1ް|E>fOaG6Zؽȕ XFu(=wKս>I>j>Ӱ<9=ܦ>ľ?&=mQ̽0<'>S~=j=:ޯ>fo&r=<4T}=틁r$>;='>ޟ=W5 >nj*>"Y=->B>5("OA= X=X =4M=4,9:#;1=⽢x>" >=!*=o-^1=J= ;>hϚfǮ=q=+=M>ޑ>4µ(>3 >$"뮖=I< =yd0==k^\پ@n֐+>1~/X>5>nZ=ܣ>r=z.j4 >lq/=Yf> >=Gþ&>=M1#e.t>0 =51ݕl"@>-(y=>nq+ׁ >~wA=09yM>a+<=w !>J<:;<(?KR7w>!lX=C="=|C$Ca=f㽴>93>=q=R I>O~>Hr>_y <5="^>a_=6>늽}_ͽxkdl=t8F;n= >=R ]l=G`>G쁾ݽ5,>7>;->FO==(==*`ݮ"5=?bŽ;`爇=4%Q>7n=W/ cxxb> >*_><]SG/V"R=b>O#>T5qӽ==x>=dQ>5\ c=*>޽8<@{ Bl=1pq>Cs=>4?D=R>Yz>eϽj#Z#=-;-}Z0콵]\ ]/.0~>>=r9»"L5>H<=/M>*f>=Y>Ru=W۽_ӽv(<6<2=>G%>! =d %FK=>n3>}mD+<# vr>=r6=+<7MfV=Jf9z>= '=>g=GH =(>Y=-c%,=mx=S= oZ y=r>sۏ;h+=a %;i b8>\>b"==X;0u;C(cO<̽>1{B>3 Y >3פ>7v=lͼj_= G<]LS; wD>blUm!>ܰ+> 7# >OJX߽qi>1<^1==):8;D> >=s==rP%ɨ=#!Z Ɨ>OAF>xf@=!.=,Ɛ<=>F׀=Ղ=*;z->lܬ]{=(L3 3G>ٵ(Ep=Δ=h>}a>l=/f9 R@>T%\랼Y>Q׽`>C>O=(׼'>g8Js}=*>A=\ X>°=џ3>.<ݦ=k1;e >%"=BIO=FRݾm$^=˪=djuAӹ̐ݽ{=3Q> tpG01='dGeᾒ^ xN:s<ʕ}H-'5>Ζ"mr|aTS0>=8ؼ+I=O˾uG'>%>|1 <2-|$yn fA#skD>Sn;r>gĽR@B>0E24>0(w>̫/6l>ZEn=W= ql===~@7 zn>T=WDh|=Ͻui@ =3 *c=W%yx|`)=:<=>ӿ:3Լϛ5=,/;.<x=/= 8,=b)>'=dLr->,=_˽ D>NCa>D=%Ƚi=;3ؚ##m >>Q>+>e=FbA=0}=wOu,>\qK=~>JИ%O>=]41>Sҽ [>X@a>)2=qν>r=ILk*0>ܾ\=譾$c*:,b ک=>Mm1o9eG$>,p=p"f ^R>k =U>Q˽'Z>2 ==cn=^=>"O龩<;ofܾ]<6³>r,ċ> [`B <@l=\=>t=ѲYi<=>ۑo'x>J>ȅ=}&>k>g!=4>a;u;X;)KA(e=b'O9@;Q`7+'=Z >^m&>0^ې>:vb~r<{>ώ >V "3HL=u<#n>@=#=ՌT@5=]=>ֲ ('=:V2=JZ>'$Jpi=LCl><$CTU;ٽw>I.ͽoƽ5˽}>ݼ4>.>񠍾Y<>oJr2:j@Q@\T=<_4M8Ob> >1%y>|==0=~?X=0uʼ4)Y>O:0>Xx=۽ Ujݽ:>شN:j=X>~=l^E)Y>2]]=Ċh>'(]0==<,>^zns> p˝?1ؽ >Lټ<>toHM==2>==[=ɽ =k>]>= /=!ko>%i=^>M }tmW?`L<>m>KD6@!.{ tdϟ=`y>C>_=U>v޽=>̶<ձo">9w=(> =`׼Ė>#˚㱽 =N=f=NX>F@_=^.eeS>xT>W><8~мSIA>YC>@BGN>4{>bE5>ѪW,ž!<\>;>ii>#Ez;>X>3_>MA>$,aY=>6^,稽YиtLC>P{w;>%?l i ==Bgp퐼|d>ة=7< L>c>n}>UB뵽 J>< =Aw;]x=W=>~"e= A&4>P=>vhܱ=L=ؿ*'ƽyg>彎P&!o#<h<5=Y=(U=ې=FpV=#/ >iQ<j&׽ >|<? 콊8="31>}$GS>,>y6>~J=i>=䫱6M36>lo>=="=sbT=N|>ǫ<=8I>}ɽ+vBj=Y@*ZP=d;=e+e<(>Q=fXV>y_kw> `BA==~Ne@>Wڷ=k>'>)=kR>ᕊ:|=scikit-bio-0.6.2/skbio/io/format/tests/data/prot_vec.emb000066400000000000000000000365201464262511300231550ustar00rootroot00000000000000HDF  P=` `TREExHEAPX8idptridembedding_ptrembedding  @format GCOL embedding1.0float64 Hformat-version @dtype 0 dim@   deflatehSNOD(p10(hTREE> deflate >hTREE=<> @)TREE(  ?@4 4 deflate2 PTREE[< x^ Q ѳ-ڐ(Hf>pҤP\g ~pm;,x^P%]0޿f ̴@Cy?,yG?>Lnd .]Έֿ)f?Sj{̿uu)9scikit-bio-0.6.2/skbio/io/format/tests/data/qseq_invalid_filter000066400000000000000000000003311464262511300246050ustar00rootroot00000000000000sanger 1 3 34 -30 30 0 2 ACGTACGTACGTACGTACGTACGTACTTTTTTTTTTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC ;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 scikit-bio-0.6.2/skbio/io/format/tests/data/qseq_invalid_lane000066400000000000000000000003311464262511300242370ustar00rootroot00000000000000sanger 1 -3 34 12 -2 0 2 ACGTACGTACGTACGTACGTACGTACTTTTTTTTTTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC ;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 scikit-bio-0.6.2/skbio/io/format/tests/data/qseq_invalid_read000066400000000000000000000003311464262511300242330ustar00rootroot00000000000000sanger 1 3 34 -30 30 0 4 ACGTACGTACGTACGTACGTACGTACTTTTTTTTTTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC ;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 scikit-bio-0.6.2/skbio/io/format/tests/data/qseq_invalid_tile000066400000000000000000000003301464262511300242540ustar00rootroot00000000000000sanger 1 5 -4 12 -2 0 2 ACGTACGTACGTACGTACGTACGTACTTTTTTTTTTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC ;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 scikit-bio-0.6.2/skbio/io/format/tests/data/qseq_invalid_x000066400000000000000000000003521464262511300235720ustar00rootroot00000000000000sanger 1 3 34 if_you're_a_shepherd 42 0 2 ACGTACGTACGTACGTACGTACGTACTTTTTTTTTTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC ;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 scikit-bio-0.6.2/skbio/io/format/tests/data/qseq_invalid_y000066400000000000000000000003731464262511300235760ustar00rootroot00000000000000sanger 1 3 34 42 and_a_snake_is_killing_all_your_sheep 0 2 ACGTACGTACGTACGTACGTACGTACTTTTTTTTTTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC ;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 scikit-bio-0.6.2/skbio/io/format/tests/data/qseq_multi_seq_illumina1.3000066400000000000000000000003421464262511300257320ustar00rootroot00000000000000illumina 1 3 34 -30 30 0 1 ACG....ACGTAC ruBBBBrBCEFGH 1 illumina 1 3 34 30 -30 0 1 CGGGCATTGCA CGGGCasdGCA 0 illumina 1 3 35 -30 30 0 2 ACGTA.AATAAAC geTaAafhwqAAf 1 illumina 1 3 35 30 -30 0 3 CATTTAGGA.TGCA tjflkAFnkKghvM 0 scikit-bio-0.6.2/skbio/io/format/tests/data/qseq_single_seq_sanger000066400000000000000000000003311464262511300253020ustar00rootroot00000000000000sanger 1 3 34 -30 30 0 2 ACGTACGTACGTACGTACGTACGTACTTTTTTTTTTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC ;;>@BCEFGHJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_2_seqs_defaults000066400000000000000000000001051464262511300246650ustar00rootroot00000000000000>s_e_q_1 desc 1 1 2 3 4 >s_e_q_2 desc 2 42 41 39 40 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_3_seqs_defaults000066400000000000000000000001441464262511300246710ustar00rootroot00000000000000>s_e_q_1 desc 1 1 2 3 4 >s_e_q_2 desc 2 42 41 39 40 >s_e_q_3 desc 3 100 0 1 -42 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_3_seqs_defaults_desc_mismatch000066400000000000000000000001311464262511300275500ustar00rootroot00000000000000>s_e_q_1 desc 1 1 2 3 4 >s_e_q_2 desc 42 42 41 39 40 >s_e_q_3 desc 3 100 0 1 -42 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_3_seqs_defaults_extra000066400000000000000000000001741464262511300260770ustar00rootroot00000000000000>s_e_q_1 desc 1 1 2 3 4 >s_e_q_2 desc 2 42 41 39 40 >s_e_q_3 desc 3 100 0 1 42 >s_e_q_4 desc 4 42 42 42 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_3_seqs_defaults_id_mismatch000066400000000000000000000001311464262511300272260ustar00rootroot00000000000000>s_e_q_1 desc 1 1 2 3 4 >s_e_q_42 desc 2 42 41 39 40 >s_e_q_3 desc 3 100 0 1 -42 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_3_seqs_defaults_length_mismatch000066400000000000000000000001411464262511300301140ustar00rootroot00000000000000>s_e_q_1 desc 1 1 2 3 4 >s_e_q_2 desc 2 42 41 40 >s_e_q_3 desc 3 100 0 1 -42 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_3_seqs_non_defaults000066400000000000000000000001511464262511300255410ustar00rootroot00000000000000>s*e*q*1 desc+1 1234 0 0 2 >s*e*q*2 desc+2 1 11 111 11112 >s*e*q*3 desc+3 12345 678909 999999 4242424242 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_5_blanks_start_of_file000066400000000000000000000003321464262511300262020ustar00rootroot00000000000000 >seq1 desc1 10 20 30 10 0 0 0 255 1 255 >_____seq__2_ 42 > desc3 0 0 0 0 0 0 0 > 55 10 0 99 1 1 8 77 40 10 10 0 > 10 9 8 7 6 >proteinseq detailed description with new lines 42 42 255 255 42 42 42 42 42 43 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_5_ws_lines_start_of_file000066400000000000000000000004211464262511300265520ustar00rootroot00000000000000 >seq1 desc1 10 20 30 10 0 0 0 255 1 255 >_____seq__2_ 42 > desc3 0 0 0 0 0 0 0 > 55 10 0 99 1 1 8 77 40 10 10 0 > 10 9 8 7 6 >proteinseq detailed description with new lines 42 42 255 255 42 42 42 42 42 43 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_6_blanks_start_of_file000066400000000000000000000003331464262511300262040ustar00rootroot00000000000000 >seq1 desc1 10 20 30 10 0 0 0 255 1 255 >_____seq__2_ 42 > desc3 0 0 0 0 0 0 0 > 55 10 0 99 1 1 8 77 40 10 10 0 > 10 9 8 7 6 >proteinseq detailed description with new lines 42 42 255 255 42 42 42 42 42 43 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_6_ws_lines_start_of_file000066400000000000000000000004351464262511300265600ustar00rootroot00000000000000 >seq1 desc1 10 20 30 10 0 0 0 255 1 255 >_____seq__2_ 42 > desc3 0 0 0 0 0 0 0 > 55 10 0 99 1 1 8 77 40 10 10 0 > 10 9 8 7 6 >proteinseq detailed description with new lines 42 42 255 255 42 42 42 42 42 43 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_blank_lines_between_records000066400000000000000000000003371464262511300273240ustar00rootroot00000000000000>seq1 desc1 10 20 30 10 0 0 0 255 1 255 >_____seq__2_ 42 > desc3 0 0 0 0 0 0 0 > 55 10 0 99 1 1 8 77 40 10 10 0 > 10 9 8 7 6 >proteinseq detailed description with new lines 42 42 255 255 42 42 42 42 42 43 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_blanks_end_of_file000066400000000000000000000003341464262511300253710ustar00rootroot00000000000000>seq1 desc1 10 20 30 10 0 0 0 255 1 255 >_____seq__2_ 42 > desc3 0 0 0 0 0 0 0 > 55 10 0 99 1 1 8 77 40 10 10 0 > 10 9 8 7 6 >proteinseq detailed description with new lines 42 42 255 255 42 42 42 42 42 43 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_description_newline_replacement_empty_str000066400000000000000000000001501464262511300323330ustar00rootroot00000000000000>proteinseq detaileddescription with newlines 42 42 255 255 42 42 42 42 42 43 >foo 0 1 2 3 4 5 6 7 8 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_description_newline_replacement_multi_char000066400000000000000000000002121464262511300324330ustar00rootroot00000000000000>proteinseq :-)detailed:-)description with new:-):-)lines:-):-):-) 42 42 255 255 42 42 42 42 42 43 >foo :-):-):-):-) 0 1 2 3 4 5 6 7 8 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_description_newline_replacement_none000066400000000000000000000001641464262511300312510ustar00rootroot00000000000000>proteinseq detailed description with new lines 42 42 255 255 42 42 42 42 42 43 >foo 0 1 2 3 4 5 6 7 8 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_id_whitespace_replacement_empty_str000066400000000000000000000000261464262511300311010ustar00rootroot00000000000000>seq2 42 > a b 1000 1 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_id_whitespace_replacement_multi_char000066400000000000000000000000721464262511300312030ustar00rootroot00000000000000>>:o>:o>:o>:o>:oseq>:o>:o2>:o 42 >>:o>:o>:o>:o a b 1000 1 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_id_whitespace_replacement_none000066400000000000000000000000421464262511300300100ustar00rootroot00000000000000> seq 2 42 > a b 1000 1 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_invalid_blank_line_after_header000066400000000000000000000001451464262511300301030ustar00rootroot00000000000000>s_e_q_1 desc 1 1 2 3 4 >s_e_q_2 desc 2 42 41 39 40 >s_e_q_3 desc 3 100 0 1 -42 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_invalid_blank_line_within_seq000066400000000000000000000001461464262511300276450ustar00rootroot00000000000000>s_e_q_1 desc 1 1 2 3 4 >s_e_q_2 desc 2 42 41 39 40 >s_e_q_3 desc 3 100 0 1 -42 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_invalid_blank_sequence000066400000000000000000000001241464262511300262700ustar00rootroot00000000000000>s_e_q_1 desc 1 1 2 3 4 >s_e_q_2 desc 2 >s_e_q_3 desc 3 100 0 1 -42 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_invalid_legacy_format000066400000000000000000000000571464262511300261320ustar00rootroot00000000000000; legacy-seq-id legacy description 40 30 20 10 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_invalid_missing_header000066400000000000000000000000231464262511300262700ustar00rootroot00000000000000seq1 desc1 1 2 3 4 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_invalid_missing_qual_scores_first000066400000000000000000000001341464262511300305720ustar00rootroot00000000000000>s_e_q_1 desc 1 >s_e_q_2 desc 2 42 41 39 40 >s_e_q_3 desc 3 100 0 1 -42 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_invalid_missing_qual_scores_last000066400000000000000000000001251464262511300304060ustar00rootroot00000000000000>s_e_q_1 desc 1 1 2 3 4 >s_e_q_2 desc 2 42 41 39 40 >s_e_q_3 desc 3 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_invalid_missing_qual_scores_middle000066400000000000000000000001231464262511300306770ustar00rootroot00000000000000>s_e_q_1 desc 1 1 2 3 4 >s_e_q_2 desc 2 >s_e_q_3 desc 3 100 0 1 -42 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_invalid_qual_scores_float000066400000000000000000000001461464262511300270220ustar00rootroot00000000000000>s_e_q_1 desc 1 1 2 3 4 >s_e_q_2 desc 2 42 41.0 39 40 >s_e_q_3 desc 3 100 0 1 -42 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_invalid_qual_scores_negative000066400000000000000000000001441464262511300275150ustar00rootroot00000000000000>s_e_q_1 desc 1 1 2 3 4 >s_e_q_2 desc 2 42 41 39 40 >s_e_q_3 desc 3 100 0 1 -42 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_invalid_qual_scores_over_255000066400000000000000000000001441464262511300272610ustar00rootroot00000000000000>s_e_q_1 desc 1 1 2 3 4 >s_e_q_2 desc 2 42 41 39 40 >s_e_q_3 desc 3 100 0 1 256 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_invalid_qual_scores_string000066400000000000000000000001451464262511300272220ustar00rootroot00000000000000>s_e_q_1 desc 1 1 2 3 4 >s_e_q_2 desc 2 42 41 39 40 >s_e_q_3 desc 3 100 0 1a -42 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_invalid_whitespace_line_in_seq000066400000000000000000000001611464262511300300130ustar00rootroot00000000000000>s_e_q_1 desc 1 1 2 3 4 >s_e_q_2 desc 2 42 41 39 40 >s_e_q_3 desc 3 100 0 1 -42 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_invalid_whitespace_only_sequence000066400000000000000000000001371464262511300304020ustar00rootroot00000000000000>s_e_q_1 desc 1 1 2 3 4 >s_e_q_2 desc 2 >s_e_q_3 desc 3 100 0 1 -42 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_invalid_ws_line_after_header000066400000000000000000000001601464262511300274420ustar00rootroot00000000000000>s_e_q_1 desc 1 1 2 3 4 >s_e_q_2 desc 2 42 41 39 40 >s_e_q_3 desc 3 100 0 1 -42 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_max_width_1000066400000000000000000000000501464262511300240050ustar00rootroot00000000000000>seq1 desc1 10 20 30 10 0 0 0 255 1 255 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_max_width_5000066400000000000000000000003251464262511300240160ustar00rootroot00000000000000>seq1 desc1 10 20 30 10 0 0 0 255 1 255 >_____seq__2_ 42 > desc3 0 0 0 0 0 0 0 > 55 10 0 99 1 1 8 77 40 10 10 0 > 10 9 8 7 6 >proteinseq detailed description with new lines 42 42 255 255 42 42 42 42 42 43 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_multi_seq000066400000000000000000000003251464262511300236100ustar00rootroot00000000000000>seq1 desc1 10 20 30 10 0 0 0 255 1 255 >_____seq__2_ 42 > desc3 0 0 0 0 0 0 0 > 55 10 0 99 1 1 8 77 40 10 10 0 > 10 9 8 7 6 >proteinseq detailed description with new lines 42 42 255 255 42 42 42 42 42 43 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_multi_seq_roundtrip000066400000000000000000000002461464262511300257200ustar00rootroot00000000000000>seq-a a's description 0 1 2 3 4 5 6 7 8 >seq-b b's description 0 1 2 3 4 5 6 7 8 9 10 11 >seq-c c's description 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_prot_seqs_odd_labels000066400000000000000000000002031464262511300257700ustar00rootroot00000000000000> -0000000 0000 01 5 00044 -0 > skbio 1 2 33 123 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_single_bio_seq_non_defaults000066400000000000000000000000251464262511300273260ustar00rootroot00000000000000>f-o-o b_a_r 1 2 3 4 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_single_dna_seq_non_defaults000066400000000000000000000000251464262511300273170ustar00rootroot00000000000000>f-o-o b_a_r 0 1 2 3 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_single_nuc_seq_non_defaults000066400000000000000000000000271464262511300273440ustar00rootroot00000000000000>f-o-o b_a_r 0 1 2 3 4 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_single_prot_seq_non_defaults000066400000000000000000000000261464262511300275420ustar00rootroot00000000000000>f-o-o b_a_r 42 41 40 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_single_rna_seq_non_defaults000066400000000000000000000000251464262511300273350ustar00rootroot00000000000000>f-o-o b_a_r 2 3 4 5 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_single_seq000066400000000000000000000000501464262511300237320ustar00rootroot00000000000000>seq1 desc1 10 20 30 10 0 0 0 255 1 255 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_tabular_msa_different_type000066400000000000000000000001251464262511300271650ustar00rootroot00000000000000> 20 20 21 >rnaseq-1 rnaseq desc 1 10 9 10 >rnaseq-2 rnaseq desc 2 9 99 99 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_ws_lines_between_records000066400000000000000000000005151464262511300266640ustar00rootroot00000000000000>seq1 desc1 10 20 30 10 0 0 0 255 1 255 >_____seq__2_ 42 > desc3 0 0 0 0 0 0 0 > 55 10 0 99 1 1 8 77 40 10 10 0 > 10 9 8 7 6 >proteinseq detailed description with new lines 42 42 255 255 42 42 42 42 42 43 scikit-bio-0.6.2/skbio/io/format/tests/data/qual_ws_lines_end_of_file000066400000000000000000000004651464262511300257470ustar00rootroot00000000000000>seq1 desc1 10 20 30 10 0 0 0 255 1 255 >_____seq__2_ 42 > desc3 0 0 0 0 0 0 0 > 55 10 0 99 1 1 8 77 40 10 10 0 > 10 9 8 7 6 >proteinseq detailed description with new lines 42 42 255 255 42 42 42 42 42 43 scikit-bio-0.6.2/skbio/io/format/tests/data/sample-metadata-comments-comment-char-id.tsv000066400000000000000000000010551464262511300312270ustar00rootroot00000000000000# pre-header # comment #SampleID col1 col2 col3 # post-header # comment id1 1 a foo id2 2 b bar # intra-data comment with another # sign # ## # comment with leading whitespace is still a comment. # comment with tab characters is also a comment! "# if the first cell is quoted, the parsing rules first process and strip double quotes, then check if the first cell begins with a pound sign" " # same rule applies if the de-quoted cell has leading whitespace (leading/trailing whitespace is *always* ignored)" id3 3 c 42 # trailing # comment scikit-bio-0.6.2/skbio/io/format/tests/data/sample-metadata-comments-mixed-case.tsv000066400000000000000000000010541464262511300302760ustar00rootroot00000000000000# pre-header # comment sAmpLEid col1 col2 col3 # post-header # comment id1 1 a foo id2 2 b bar # intra-data comment with another # sign # ## # comment with leading whitespace is still a comment. # comment with tab characters is also a comment! "# if the first cell is quoted, the parsing rules first process and strip double quotes, then check if the first cell begins with a pound sign" " # same rule applies if the de-quoted cell has leading whitespace (leading/trailing whitespace is *always* ignored)" id3 3 c 42 # trailing # comment scikit-bio-0.6.2/skbio/io/format/tests/data/sample-metadata-comments.tsv000066400000000000000000000010461464262511300262620ustar00rootroot00000000000000# pre-header # comment id col1 col2 col3 # post-header # comment id1 1 a foo id2 2 b bar # intra-data comment with another # sign # ## # comment with leading whitespace is still a comment. # comment with tab characters is also a comment! "# if the first cell is quoted, the parsing rules first process and strip double quotes, then check if the first cell begins with a pound sign" " # same rule applies if the de-quoted cell has leading whitespace (leading/trailing whitespace is *always* ignored)" id3 3 c 42 # trailing # comment scikit-bio-0.6.2/skbio/io/format/tests/data/sample-metadata-complete-types-directive.tsv000066400000000000000000000001431464262511300313600ustar00rootroot00000000000000id col1 col2 col3 #q2:types categorical categorical categorical id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/io/format/tests/data/sample-metadata-empty-rows.tsv000066400000000000000000000002611464262511300265610ustar00rootroot00000000000000 id col1 col2 col3 id1 1 a foo id2 2 b bar " " " " " " " " " " " id3 3 c 42 scikit-bio-0.6.2/skbio/io/format/tests/data/sample-metadata-leading-trailing-whitespace-split-id.tsv000066400000000000000000000006351464262511300335270ustar00rootroot00000000000000 # Leading/trailing whitespace is ignored in *any* type of cell, including # comments, empty rows, headers, directives, and data cells. # Double-quotes are always processed prior to stripping leading/trailing # whitespace within the cell. sample id "col1 " " col2" col3 #q2:types " numeric " categorical " categorical " id1 " 1 " a foo " " " id2 " 2 b "bar " id3 3 "c " 42 scikit-bio-0.6.2/skbio/io/format/tests/data/sample-metadata-leading-trailing-whitespace.tsv000066400000000000000000000006651464262511300320070ustar00rootroot00000000000000 # Leading/trailing whitespace is ignored in *any* type of cell, including # comments, empty rows, headers, directives, and data cells. # Double-quotes are always processed prior to stripping leading/trailing # whitespace within the cell. id "col1 " " col2" col3 #q2:types " numeric " categorical " categorical categorical " id1 " 1 " a foo " " " id2 " 2 b "bar " id3 3 "c " 42 scikit-bio-0.6.2/skbio/io/format/tests/data/sanger_full_range_as_illumina.fastq000066400000000000000000000010461464262511300277340ustar00rootroot00000000000000@FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC + @ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order) CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@ scikit-bio-0.6.2/skbio/io/format/tests/data/sanger_full_range_as_sanger.fastq000066400000000000000000000010461464262511300274010ustar00rootroot00000000000000@FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ @FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order) CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA + ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,+*)('&%$#"! scikit-bio-0.6.2/skbio/io/format/tests/data/sanger_full_range_original_sanger.fastq000066400000000000000000000010461464262511300306020ustar00rootroot00000000000000@FAKE0001 Original version has PHRED scores from 0 to 93 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ @FAKE0002 Original version has PHRED scores from 93 to 0 inclusive (in that order) CATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA + ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<;:9876543210/.-,+*)('&%$#"! scikit-bio-0.6.2/skbio/io/format/tests/data/solexa_full_range_original_solexa.fastq000066400000000000000000000007021464262511300306300ustar00rootroot00000000000000@FAKE0003 Original version has Solexa scores from -5 to 62 inclusive (in that order) ACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT + ;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ @FAKE0004 Original version has Solexa scores from 62 to -5 inclusive (in that order) TGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCATGCA + ~}|{zyxwvutsrqponmlkjihgfedcba`_^]\[ZYXWVUTSRQPONMLKJIHGFEDCBA@?>=<; scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_all_data_types000066400000000000000000000006241464262511300256360ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF NM Kestrel Gorlick #=GF DT February 11, 2016 #=GF FN Writer test file #=GS seq1 DT February 1, 2016 #=GS seq1 NM Unknown #=GS seq3 DT Unknown seq1 GAGGCCATGCCCAGGTGAAG seq2 ACCTGAGCCACAGTAGAAGT seq3 CCCTTCGCTGGAAATGTATG #=GR seq3 AS CCGAAAGTCGTTCGAAAATG #=GR seq3 SS GGCGAGTCGTTCGAGCTGGC #=GC AS_cons CGTTCGTTCTAACAATTCCA #=GC SS_cons GGCGCTACGACCTACGACCG // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_blank_lines000066400000000000000000000000571464262511300251320ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF AL ABCD #=GF NM 1234 // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_data_only000066400000000000000000000001221464262511300246140ustar00rootroot00000000000000# STOCKHOLM 1.0 seq1 ACUCCGACAUGCUCC seq2 UAGUGCCGAACGCUG seq3 GUGUGGGCGUGAUUC // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_different_padding000066400000000000000000000002161464262511300263020ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF RN [1] #=GF RC A Runon #=GF RC Comment Without Whitespace #=GF RN [2] #=GF RC A Runon #=GF RC Comment With Whitespace //scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_differing_gc_data_length000066400000000000000000000001061464262511300276040ustar00rootroot00000000000000# STOCKHOLM 1.0 seq1 ACGCUUGCAA #=GC AT_cons UGCUUGGCGCAU // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_differing_gr_data_length000066400000000000000000000000761464262511300276310ustar00rootroot00000000000000# STOCKHOLM 1.0 seq1 AUCGCCUG #=GR seq1 AL UCGCUUG // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_differing_seq_lengths000066400000000000000000000001061464262511300271750ustar00rootroot00000000000000# STOCKHOLM 1.0 seq1 AUCCGCUUAC seq2 UGCCUGGAGCG // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_duplicate_gc000066400000000000000000000002641464262511300252740ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF ID RCR2 OSF22 TCCTCCCAGTGTCGCCCGGT OSF33 TTTTTTGGTCAAATTAAAGG #=GC SS_cons CAAGGGAAATACTACGGGAC #=GC SS_cons CGGGACTCGTGCGTTGTAGG // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_duplicate_gr000066400000000000000000000003031464262511300253050ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF ID CBS #=GF AC PF00571 LFDR2 GGCCTCAGGACGAAGCACGG LFDR3 AATTGTGATCATCTTACAGG #=GR LFDR3 OS TTTTACCAATTGCTGACAGA #=GR LFDR3 OS CCTGGACATCCCCCGCACGG // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_duplicate_sequence_names000066400000000000000000000002371464262511300276760ustar00rootroot00000000000000# STOCKHOLM 1.0 ASR132 GGCGUUCCGUUCAGUGCUGG RTF112 GGGCGGUGCUAUGAAAAUAC ASR132 GGCGUUCCGUUCAGUGCUGG TTL879 AGCUAUCUCCGGGCCCUUGG // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_duplicate_tree_ids000066400000000000000000000001111464262511300264700ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF TN tree1 #=GF NH ABCD #=GF TN tree1 #=GF NH EFGH // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_extensive000066400000000000000000000012141464262511300246570ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF ID CBS #=GF AC PF00571 #=GF AU Bateman A #=GF SQ 67 #=GS O31698/88-139 OS Bacillus subtilis O83071/192-246 MTCRAQLIAVPRASSLAE..AIACAQKM....RVSRVPVYERS #=GR O83071/192-246 SA 999887756453524252..55152525....36463774777 O31698/88-139 EVMLTDIPRLHINDPIMK..GFGMVINN......GFVCVENDE #=GR O31698/88-139 SS CCCCCCCHHHHHHHHHHH..HEEEEEEE....EEEEEEEEEEH O31699/88-139 EVMLTDIPRLHINDPIMK..GFGMVINN......GFVCVENDE #=GR O31699/88-139 AS ________________*__________________________ #=GR O31699/88-139 IN ____________1______________2__________0____ #=GC SS_cons CCCCCHHHHHHHHHHHHH..EEEEEEEE....EEEEEEEEEEH // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_extensive_mixed000066400000000000000000000012241464262511300260460ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GR O31699/88-139 AS ________________*__________________________ O83071/192-246 MTCRAQLIAVPRASSLAE..AIACAQKM....RVSRVPVYERS #=GF ID CBS #=GR O31698/88-139 SS CCCCCCCHHHHHHHHHHH..HEEEEEEE....EEEEEEEEEEH #=GF AC PF00571 O31698/88-139 EVMLTDIPRLHINDPIMK..GFGMVINN......GFVCVENDE #=GC SS_cons CCCCCHHHHHHHHHHHHH..EEEEEEEE....EEEEEEEEEEH #=GR O31699/88-139 IN ____________1______________2__________0____ #=GS O31698/88-139 OS Bacillus subtilis #=GR O83071/192-246 SA 999887756453524252..55152525....36463774777 O31699/88-139 EVMLTDIPRLHINDPIMK..GFGMVINN......GFVCVENDE #=GF AU Bateman A #=GF SQ 67 // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_invalid_data_type000066400000000000000000000000551464262511300263270ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF AL ABCD #=GZ NM 1234 // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_invalid_nonexistent_gr000066400000000000000000000002671464262511300274300ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF ID ARD RL1255 AAGGGUUAUUUAUAUACUUU RL1332 UGCUAAGAGUGGGGAUGAUU RL1232 GCCACAACCGAUUAGAUAGA #=GR RL1355 AS ACUAUAUACAUAGCUAUAUU // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_invalid_nonexistent_gs000066400000000000000000000001561464262511300274260ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GS AC14 ID ARD2 AC12 AAGGGUUAUUUAUAUACUUU AC13 UGCUAAGAGUGGGGAUGAUU // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_malformed_data_line000066400000000000000000000000301464262511300266060ustar00rootroot00000000000000# STOCKHOLM 1.0 seq1 // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_malformed_gc_line000066400000000000000000000000751464262511300262770ustar00rootroot00000000000000# STOCKHOLM 1.0 seq1 ATCCGCT #=GC AT_cons // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_malformed_gf_line000066400000000000000000000000331464262511300262740ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF AL // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_malformed_gr_line000066400000000000000000000000661464262511300263160ustar00rootroot00000000000000# STOCKHOLM 1.0 seq1 ACGGTCG #=GR seq1 // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_malformed_gs_line000066400000000000000000000000531464262511300263130ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GS seq1 AL seq1 ACCTG // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_metadata_only000066400000000000000000000001061464262511300254650ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF NM Kestrel Gorlick #=GF DT February 5th, 2016 // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_minimal000066400000000000000000000000601464262511300242710ustar00rootroot00000000000000# STOCKHOLM 1.0 0235244 TGTGTCGCAGTTGTCGTTTG // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_missing_footer000066400000000000000000000001711464262511300256750ustar00rootroot00000000000000# STOCKHOLM 1.0 0232455 TTATCTTAGCCTCTCTAAGT 0234323 ATCCCACGGAAACAGATGGC 0235244 TGTGTCGCAGTTGTCGTTTG scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_missing_header000066400000000000000000000001541464262511300256300ustar00rootroot000000000000000232455 TTATCTTAGCCTCTCTAAGT 0234323 ATCCCACGGAAACAGATGGC 0235244 TGTGTCGCAGTTGTCGTTTG // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_missing_reference_items000066400000000000000000000001021464262511300275300ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF RN [1] #=GF RT A Title #=GF RA The Author // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_missing_rn_tag000066400000000000000000000000551464262511300256520ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF RL Flagstaff Arizona // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_multi_line_tree_no_id000066400000000000000000000000551464262511300271770ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF NH ABCD #=GF NH EFGH // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_multi_line_tree_with_id000066400000000000000000000000731464262511300275360ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF TN tree1 #=GF NH ABCD #=GF NH EFGH // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_multiple_msa000066400000000000000000000007461464262511300253510ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF AC G2134T23 #=GF ID ARD RTC2231 AAGGGUUAUUUAUAUACUUU RTF2124 UGCUAAGAGUGGGGAUGAUU RTH3322 GCCACAACCGAUUAGAUAGA RTB1512 UUAGAAACCGAUGGACCGAA #=GC AC_cons GGGACUGGACAUCUAUUCAG // # STOCKHOLM 1.0 #=GF AC G2134T23 #=GF ID ARD RTC2231 AAGGGUUAUUUAUAUACUUU RTF2124 UGCUAAGAGUGGGGAUGAUU RTH3322 GCCACAACCGAUUAGAUAGA RTB1512 UUAGAAACCGAUGGACCGAA #=GC AC_cons GGGACUGGACAUCUAUUCAG // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_multiple_multi_line_trees000066400000000000000000000001431464262511300301230ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF TN tree1 #=GF NH ABCD #=GF NH EFGH #=GF TN tree2 #=GF NH IJKL #=GF NH MNOP // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_multiple_references000066400000000000000000000004771464262511300267130ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF RN [1] #=GF RM 123456789 #=GF RT Title 1 #=GF RA Author 1 #=GF RL Location 1 #=GF RC Comment 1 #=GF RN [2] #=GF RM 987654321 #=GF RT Title 2 #=GF RA Author 2 #=GF RL Location 2 #=GF RC Comment 2 #=GF RN [3] #=GF RM 132465879 #=GF RT Title 3 #=GF RA Author 3 #=GF RL Location 3 #=GF RC Comment 3 // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_multiple_trees000066400000000000000000000001441464262511300257030ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF TN tree1 #=GF NH ABCD #=GF TN tree2 #=GF NH EFGH #=GF TN tree3 #=GF NH IJKL // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_no_data000066400000000000000000000000231464262511300242470ustar00rootroot00000000000000# STOCKHOLM 1.0 // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_nonstring_labels000066400000000000000000000001561464262511300262140ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF 1.3 2857 #=GS 11214 8 123 11214 ACTG #=GR 11214 1.0 1234 #=GC 25 4321 // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_rna000066400000000000000000000003631464262511300234310ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF AC G2134T23 #=GF ID ARD RTC2231 AAGGGUUAUUUAUAUACUUU RTF2124 UGCUAAGAGUGGGGAUGAUU RTH3322 GCCACAACCGAUUAGAUAGA RTB1512 UUAGAAACCGAUGGACCGAA #=GC AC_cons GGGACUGGACAUCUAUUCAG // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_runon_gf_no_whitespace000066400000000000000000000002341464262511300273730ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF CC CBS domains are small intracellular modules mostly found #=GF CC in 2 or four copies within a protein. GG1344 ACTGGTTCAATG // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_runon_gf_with_whitespace000066400000000000000000000002351464262511300277330ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF CC CBS domains are small intracellular modules mostly found #=GF CC in 2 or four copies within a protein. GG1344 ACTGGTTCAATG // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_runon_gs_no_whitespace000066400000000000000000000001271464262511300274110ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GS seq1 LN This is a runon #=GS seq1 LN GS line. seq1 ATCGTTCAGTG // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_runon_gs_with_whitespace000066400000000000000000000001301464262511300277420ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GS seq1 LN This is a runon #=GS seq1 LN GS line. seq1 ATCGTTCAGTG // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_runon_references000066400000000000000000000002261464262511300262110ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF RN [1] #=GF RM 123456789 #=GF RT A Runon #=GF RT Title #=GF RA The Author #=GF RL A Location #=GF RC A Runon #=GF RC Comment // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_runon_references_mixed000066400000000000000000000002261464262511300273770ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF RN [1] #=GF RC A Runon #=GF RM 123456789 #=GF RT A Runon #=GF RA The Author #=GF RT Title #=GF RL A Location #=GF RC Comment // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_single_reference000066400000000000000000000001671464262511300261520ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF RN [1] #=GF RM 123456789 #=GF RT A Title #=GF RA The Author #=GF RL A Location #=GF RC Comment // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_single_tree_with_id000066400000000000000000000000561464262511300266570ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF TN tree1 #=GF NH ABCD // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_single_tree_without_id000066400000000000000000000000401464262511300274000ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF NH ABCD // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_two_of_each_metadata000066400000000000000000000003661464262511300267710ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF NM Kestrel Gorlick #=GF DT February 5th, 2016 #=GS seq1 AL ABCD #=GS seq1 NS 1234 seq1 ACTGACCATGTTCA #=GR seq1 SS CACTACTTGTGACG #=GR seq1 AS TCACATCGGCGATG #=GC SS_cons ______1____2__ #=GC AS_cons __1___1___3___ // scikit-bio-0.6.2/skbio/io/format/tests/data/stockholm_whitespace_only_lines000066400000000000000000000001121464262511300272300ustar00rootroot00000000000000# STOCKHOLM 1.0 #=GF AL ABCD #=GF NM 1234 // scikit-bio-0.6.2/skbio/io/format/tests/data/taxdump_names.dmp000066400000000000000000000022071464262511300242110ustar00rootroot000000000000001 | root | | scientific name | 2 | Bacteria | Bacteria | scientific name | 2 | eubacteria | | genbank common name | 543 | Enterobacteriaceae | | scientific name | 548 | Klebsiella aerogenes | | scientific name | 561 | Escherichia | | scientific name | 562 | "Bacillus coli" Migula 1895 | | authority | 562 | Escherichia coli | | scientific name | 562 | Escherichia/Shigella coli | | equivalent name | 570 | Donovania | | synonym | 570 | Klebsiella | | scientific name | 620 | Shigella | | scientific name | 622 | Shigella dysenteriae | | scientific name | 766 | Rickettsiales | | scientific name | 1224 | Proteobacteria | | scientific name | 1236 | Gammaproteobacteria | | scientific name | 28211 | Alphaproteobacteria | | scientific name | 91347 | Enterobacterales | | scientific name | 118884 | unclassified Gammaproteobacteria | | scientific name | 126792 | Plasmid pPY113 | | scientific name | 131567 | cellular organisms | | scientific name | 585056 | Escherichia coli UMN026 | | scientific name | 1038927 | Escherichia coli O104:H4 | | scientific name | 2580236 | synthetic Escherichia coli Syn61 | | scientific name | scikit-bio-0.6.2/skbio/io/format/tests/data/taxdump_nodes.dmp000066400000000000000000000023411464262511300242150ustar00rootroot000000000000001 | 1 | no rank | | 8 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | | 2 | 131567 | superkingdom | | 0 | 0 | 11 | 0 | 0 | 0 | 0 | 0 | | 543 | 91347 | family | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | 548 | 570 | species | KA | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | 561 | 543 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | 562 | 561 | species | EC | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | 570 | 543 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | 620 | 543 | genus | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | 622 | 620 | species | SD | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | 766 | 28211 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | 1224 | 2 | phylum | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | 1236 | 1224 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | 28211 | 1224 | class | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | 91347 | 1236 | order | | 0 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | 118884 | 1236 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | 126792 | 36549 | species | PP | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | 131567 | 1 | no rank | | 8 | 1 | 1 | 1 | 0 | 1 | 1 | 0 | | 585056 | 562 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | 1038927 | 562 | no rank | | 0 | 1 | 11 | 1 | 0 | 1 | 1 | 0 | | 2580236 | 488338 | species | SE | 7 | 1 | 11 | 1 | 0 | 1 | 0 | 0 | | scikit-bio-0.6.2/skbio/io/format/tests/data/tsv_10_fields000066400000000000000000000002401464262511300232220ustar00rootroot00000000000000buh na de nuh buh KRRGRHRGRBAWAOMPWAMPOWAMP na de nuh de 1 1 1 1 1 1 1 1 1 1 3 3 3 3 3 3 $ 3 3 3 4 4 4 4 4 4 4 % 4 4 4 4 4 4 4 4 4 4 % 4 5 5 5 5 5 5 5 wat 6 ^ scikit-bio-0.6.2/skbio/io/format/tests/data/tsv_8_fields000066400000000000000000000002051464262511300231520ustar00rootroot00000000000000buh na de nuh buh KRRGRHRGRBAWAOMPWAMPOWAMP na de 1 1 1 1 1 1 1 1 3 3 3 3 3 3 $ 3 4 4 4 4 4 4 4 % 4 4 4 4 4 4 4 4 5 5 5 5 5 5 5 wat scikit-bio-0.6.2/skbio/io/format/tests/data/whitespace_only000066400000000000000000000000571464262511300237630ustar00rootroot00000000000000 scikit-bio-0.6.2/skbio/io/format/tests/data/wrapping_as_illumina.fastq000066400000000000000000000017031464262511300261060ustar00rootroot00000000000000@SRR014849.50939 EIXKN4201BA2EC length=135 GAAATTTCAGGGCCACCTTTTTTTTGATAGAATAATGGAGAAAATTAAAAGCTGTACATATACCAATGAACAATAAATCAATACATAAAAAAGGAGAAGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGG + Zb^Ld`N\[d`NaZ[aZc]UOKHDA[\YT[_W[aZ\aZ[Zd`SF_WeaUI[Y\[[\\\[\Z\aY`X[[aZ\aZ\d`OY[aY[[\[[e`WPJC^UZ[`X\[R]T_V_W[`[Ga\I`\H[[Q^TVa\Ia\Ic^LY\S @SRR014849.110027 EIXKN4201APUB0 length=131 CTTCAAATGATTCCGGGACTGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTTCGGTTCCAACTCGCCGTCCGAATAATCCGTTCAAAATCTTGGCCTGTCAAAACGACTTTACGACCAGAACGATCCG + \aYY_[FY\T`X^Vd`OY\[[^U_V[R^T[_ZDc^La\HYYO\S[c^Ld`Nc_QAZaZaYaY`XZZ\[aZZ[aZ[aZ[aZY`Z[`ZWeaVJ\[aZaY`X[PY\eaUG[\[[d`OXTUZ[Q\\`W\\\Y_W\ @SRR014849.203935 EIXKN4201B4HU6 length=144 AACCCGTCCCATCAAAGATTTTGGTTGGAACCCGAAAGGGTTTTGAATTCAAACCCCTTTCGGTTCCAACTATTCAATTGTTTAACTTTTTTTAAATTGATGGTCTGTTGGACCATTTGTAATAATCCCCATCGGAATTTCTTT + `Z_ZDVT^YB[[Xd`PZ\d`RDaZaZ`ZaZ_ZDXd`Pd`Pd`RD[aZ`ZWd`Oc_RCd`P\aZ`ZaZaZY\YaZYaY`XYd`O`X[e`WPJEAc^LaZS[YYN[Z\Y`XWLT^U\b]JW[[RZ\SYc`RD[Z\WLXM`\HYa\I scikit-bio-0.6.2/skbio/io/format/tests/data/wrapping_as_sanger.fastq000066400000000000000000000017031464262511300255530ustar00rootroot00000000000000@SRR014849.50939 EIXKN4201BA2EC length=135 GAAATTTCAGGGCCACCTTTTTTTTGATAGAATAATGGAGAAAATTAAAAGCTGTACATATACCAATGAACAATAAATCAATACATAAAAAAGGAGAAGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGG + ;C?-EA/=60,)%"<=:5<@85@7@8+8<<3;=4:DA3%<;=8-9.A=):B=* scikit-bio-0.6.2/skbio/io/format/tests/data/wrapping_original_sanger.fastq000066400000000000000000000017171464262511300267610ustar00rootroot00000000000000@SRR014849.50939 EIXKN4201BA2EC length=135 GAAATTTCAGGGCCACCTTTTTTTTGATAGAATAATGGAGAAAATTAAAAGCTGTACATATACCAATGAACAATAAATCAATACATAAAAAAGGAGAAGTTGGAACCGAAAGGGTTTGAATTCAAACCCTTTCGG + ;C?-EA/=60,)%"<=:5< @85@7@8+8< <3;=4:DA3%<;=8-9.A=):B=* scikit-bio-0.6.2/skbio/io/format/tests/test_base.py000066400000000000000000000321761464262511300222640ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest import numpy.testing as npt import numpy as np from skbio import Sequence, DNA, RNA from skbio.io.format._base import (_decode_qual_to_phred, _encode_phred_to_qual, _get_nth_sequence, _parse_fasta_like_header, _format_fasta_like_records) class PhredDecoderTests(unittest.TestCase): def test_missing_variant_and_phred_offset(self): with self.assertRaises(ValueError) as cm: _decode_qual_to_phred('abcd') self.assertIn('`variant`', str(cm.exception)) self.assertIn('`phred_offset`', str(cm.exception)) self.assertIn('decode', str(cm.exception)) def test_variant_and_phred_offset_provided(self): with self.assertRaises(ValueError) as cm: _decode_qual_to_phred('abcd', variant='sanger', phred_offset=64) self.assertIn('both', str(cm.exception)) self.assertIn('`variant`', str(cm.exception)) self.assertIn('`phred_offset`', str(cm.exception)) def test_solexa_variant(self): with self.assertRaises(ValueError) as cm: _decode_qual_to_phred('abcd', variant='solexa') self.assertIn('719', str(cm.exception)) def test_unrecognized_variant(self): with self.assertRaises(ValueError) as cm: _decode_qual_to_phred('abcd', variant='illumina') self.assertIn('variant', str(cm.exception)) self.assertIn("'illumina'", str(cm.exception)) def test_empty_qual_str(self): npt.assert_equal(_decode_qual_to_phred('', variant='sanger'), np.array([], dtype=np.uint8)) def test_sanger_variant(self): # test entire range of possible ascii chars for sanger all_sanger_ascii = ('!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOP' 'QRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~') obs = _decode_qual_to_phred(all_sanger_ascii, variant='sanger') npt.assert_equal(obs, np.arange(94)) with self.assertRaises(ValueError) as cm: _decode_qual_to_phred('a b', variant='sanger') self.assertIn('[0, 93]', str(cm.exception)) def test_illumina13_variant(self): # test entire range of possible ascii chars for illumina1.3 all_illumina13_ascii = ('@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijk' 'lmnopqrstuvwxyz{|}~') obs = _decode_qual_to_phred(all_illumina13_ascii, variant='illumina1.3') npt.assert_equal(obs, np.arange(63)) with self.assertRaises(ValueError) as cm: _decode_qual_to_phred('a!b', variant='illumina1.3') self.assertIn('[0, 62]', str(cm.exception)) def test_illumina18_variant(self): # test entire range of possible ascii chars for illumina1.8 all_illumina18_ascii = ('!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKL' 'MNOPQRSTUVWXYZ[\\]^_') obs = _decode_qual_to_phred(all_illumina18_ascii, variant='illumina1.8') npt.assert_equal(obs, np.arange(63)) with self.assertRaises(ValueError) as cm: _decode_qual_to_phred('AaB', variant='illumina1.8') self.assertIn('[0, 62]', str(cm.exception)) def test_custom_phred_offset(self): ascii_chars = '*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\' obs = _decode_qual_to_phred(ascii_chars, phred_offset=42) npt.assert_equal(obs, np.arange(51)) with self.assertRaises(ValueError) as cm: _decode_qual_to_phred(ascii_chars, phred_offset=43) self.assertIn('[0, 83]', str(cm.exception)) with self.assertRaises(ValueError) as cm: _decode_qual_to_phred(ascii_chars, phred_offset=0) self.assertIn('`phred_offset`', str(cm.exception)) self.assertIn('printable', str(cm.exception)) with self.assertRaises(ValueError) as cm: _decode_qual_to_phred(ascii_chars, phred_offset=127) self.assertIn('`phred_offset`', str(cm.exception)) self.assertIn('printable', str(cm.exception)) class PhredEncoderTests(unittest.TestCase): def test_missing_variant_and_phred_offset(self): with self.assertRaises(ValueError) as cm: _encode_phred_to_qual([1, 2, 3]) self.assertIn('`variant`', str(cm.exception)) self.assertIn('`phred_offset`', str(cm.exception)) self.assertIn('encode', str(cm.exception)) def test_variant_and_phred_offset_provided(self): with self.assertRaises(ValueError) as cm: _encode_phred_to_qual([1, 2, 3], variant='sanger', phred_offset=64) self.assertIn('both', str(cm.exception)) self.assertIn('`variant`', str(cm.exception)) self.assertIn('`phred_offset`', str(cm.exception)) def test_solexa_variant(self): with self.assertRaises(ValueError) as cm: _encode_phred_to_qual([1, 2, 3], variant='solexa') self.assertIn('719', str(cm.exception)) def test_unrecognized_variant(self): with self.assertRaises(ValueError) as cm: _encode_phred_to_qual([1, 2, 3], variant='illumina') self.assertIn('variant', str(cm.exception)) self.assertIn("'illumina'", str(cm.exception)) def test_no_phred_scores(self): self.assertEqual(_encode_phred_to_qual([], variant='sanger'), '') def test_sanger_variant(self): # test entire range of possible ascii chars for sanger all_sanger_ascii = ('!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOP' 'QRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~') obs = _encode_phred_to_qual(list(range(94)), variant='sanger') self.assertEqual(obs, all_sanger_ascii) with self.assertRaises(ValueError) as cm: _encode_phred_to_qual([42, -1, 33], variant='sanger') self.assertIn('-1', str(cm.exception)) self.assertIn('[0, 93]', str(cm.exception)) obs = npt.assert_warns(UserWarning, _encode_phred_to_qual, [42, 94, 33], variant='sanger') self.assertEqual(obs, 'K~B') def test_illumina13_variant(self): # test entire range of possible ascii chars for illumina1.3 all_illumina13_ascii = ('@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijk' 'lmnopqrstuvwxyz{|}~') obs = _encode_phred_to_qual(list(range(63)), variant='illumina1.3') self.assertEqual(obs, all_illumina13_ascii) with self.assertRaises(ValueError) as cm: _encode_phred_to_qual([42, -1, 33], variant='illumina1.3') self.assertIn('-1', str(cm.exception)) self.assertIn('[0, 62]', str(cm.exception)) obs = npt.assert_warns(UserWarning, _encode_phred_to_qual, [42, 63, 33], variant='illumina1.3') self.assertEqual(obs, 'j~a') def test_illumina18_variant(self): # test entire range of possible ascii chars for illumina1.8 all_illumina18_ascii = ('!"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKL' 'MNOPQRSTUVWXYZ[\\]^_') obs = _encode_phred_to_qual(list(range(63)), variant='illumina1.8') self.assertEqual(obs, all_illumina18_ascii) with self.assertRaises(ValueError) as cm: _encode_phred_to_qual([42, -1, 33], variant='illumina1.8') self.assertIn('-1', str(cm.exception)) self.assertIn('[0, 62]', str(cm.exception)) obs = npt.assert_warns(UserWarning, _encode_phred_to_qual, [42, 63, 33], variant='illumina1.8') self.assertEqual(obs, 'K_B') def test_custom_phred_offset(self): ascii_chars = '*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\' obs = _encode_phred_to_qual(list(range(51)), phred_offset=42) self.assertEqual(obs, ascii_chars) with self.assertRaises(ValueError) as cm: _encode_phred_to_qual([42, -1, 33], phred_offset=42) self.assertIn('-1', str(cm.exception)) self.assertIn('[0, 84]', str(cm.exception)) obs = npt.assert_warns(UserWarning, _encode_phred_to_qual, [42, 255, 33], phred_offset=42) self.assertEqual(obs, 'T~K') class TestGetNthSequence(unittest.TestCase): def setUp(self): def generator(): for i in range(1, 6): yield 'goldilocks: ' + str(i) self.gen = generator() def test_seq_num_too_small(self): with self.assertRaises(ValueError) as cm: _get_nth_sequence(self.gen, 0) self.assertIn('between 1 and', str(cm.exception)) self.assertIn('0', str(cm.exception)) def test_seq_num_too_big(self): with self.assertRaises(ValueError) as cm: _get_nth_sequence(self.gen, 6) self.assertIn('end of file', str(cm.exception)) self.assertIn('6th', str(cm.exception)) def test_seq_num_just_right(self): value = _get_nth_sequence(self.gen, 3) self.assertEqual(value, 'goldilocks: 3') class TestParseFASTALikeHeader(unittest.TestCase): def test_no_id_or_description(self): obs = _parse_fasta_like_header('> \t\t \n') self.assertEqual(obs, ('', '')) def test_id_only(self): obs = _parse_fasta_like_header('>suht! \t\t \n') self.assertEqual(obs, ('suht!', '')) def test_description_only(self): obs = _parse_fasta_like_header('> suht! \t\t \n') self.assertEqual(obs, ('', 'suht!')) def test_id_and_description(self): obs = _parse_fasta_like_header('>!thus suht! \t\t \n') self.assertEqual(obs, ('!thus', 'suht!')) class TestFormatFASTALikeRecords(unittest.TestCase): def setUp(self): def generator(): yield Sequence('ACGT', metadata={'id': '', 'description': ''}, positional_metadata={'quality': range(4)}) yield RNA('GAU', metadata={'id': ' foo \t\t bar ', 'description': ''}) yield DNA('TAG', metadata={'id': '', 'description': 'foo\n\n bar\n'}) yield Sequence('A', metadata={'id': 'foo', 'description': 'bar baz'}, positional_metadata={'quality': [42]}) self.gen = generator() def test_no_replacement(self): exp = [ ('', 'ACGT', range(4)), (' foo \t\t bar ', 'GAU', None), (' foo\n\n bar\n', 'TAG', None), ('foo bar baz', 'A', [42]) ] obs = list(_format_fasta_like_records(self.gen, None, None, False)) self.assertEqual(len(obs), len(exp)) for o, e in zip(obs, exp): npt.assert_equal(o, e) def test_empty_str_replacement(self): exp = [ ('', 'ACGT', range(4)), ('foobar', 'GAU', None), (' foo bar', 'TAG', None), ('foo bar baz', 'A', [42]) ] obs = list(_format_fasta_like_records(self.gen, '', '', False)) self.assertEqual(len(obs), len(exp)) for o, e in zip(obs, exp): npt.assert_equal(o, e) def test_multi_char_replacement(self): exp = [ ('', 'ACGT', range(4)), ('-.--.-foo-.--.--.--.-bar-.-', 'GAU', None), (' foo_-__-_ bar_-_', 'TAG', None), ('foo bar baz', 'A', [42]) ] obs = list(_format_fasta_like_records(self.gen, '-.-', '_-_', False)) self.assertEqual(len(obs), len(exp)) for o, e in zip(obs, exp): npt.assert_equal(o, e) def test_newline_character_in_id_whitespace_replacement(self): with self.assertRaisesRegex(ValueError, r'Newline character'): list(_format_fasta_like_records(self.gen, '-\n--', ' ', False)) def test_newline_character_in_description_newline_replacement(self): with self.assertRaisesRegex(ValueError, r'Newline character'): list(_format_fasta_like_records(self.gen, None, 'a\nb', False)) def test_empty_sequence(self): def blank_seq_gen(): yield from (DNA('A'), Sequence(''), RNA('GG')) with self.assertRaisesRegex(ValueError, r'2nd.*empty'): list(_format_fasta_like_records(blank_seq_gen(), None, None, False)) def test_missing_quality_scores(self): def missing_qual_gen(): yield from (RNA('A', positional_metadata={'quality': [42]}), Sequence('AG'), DNA('GG', positional_metadata={'quality': [41, 40]})) with self.assertRaisesRegex(ValueError, r'2nd sequence.*quality scores'): list(_format_fasta_like_records(missing_qual_gen(), '-', '-', True)) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/io/format/tests/test_binary_dm.py000066400000000000000000000113051464262511300233050ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest import tempfile import shutil import os import numpy as np import numpy.testing as npt import h5py from skbio import DistanceMatrix from skbio.io.format.binary_dm import (_h5py_mat_to_skbio_mat, _skbio_mat_to_h5py_mat, _get_header, _parse_ids, _verify_dimensions, _bytes_decoder, _passthrough_decoder, _set_header, _vlen_dtype, _binary_dm_sniffer) class BinaryMatrixTests(unittest.TestCase): def setUp(self): self.mat = np.array([[0, 0.1, 0.2], [0.1, 0, 0.3], [0.2, 0.3, 0]]) self.ids = ['a', 'b', 'c'] self.tempdir = tempfile.TemporaryDirectory() self.basic_fname = os.path.join(self.tempdir.name, 'basic') self.basic = h5py.File(self.basic_fname, 'a') ids = self.basic.create_dataset('order', shape=(3, ), dtype=_vlen_dtype) ids[:] = self.ids self.basic.create_dataset('matrix', data=self.mat) _set_header(self.basic) self.basic.close() self.badids_fname = os.path.join(self.tempdir.name, 'badids') self.badids = h5py.File(self.badids_fname, 'a') ids = self.badids.create_dataset('order', shape=(2, ), dtype=_vlen_dtype) ids[:] = ['a', 'b'] self.badids.create_dataset('matrix', data=self.mat) _set_header(self.badids) self.badids.close() self.noheader_fname = os.path.join(self.tempdir.name, 'noheader') self.noheader = h5py.File(self.noheader_fname, 'a') ids = self.noheader.create_dataset('order', shape=(3, ), dtype=_vlen_dtype) ids[:] = self.ids self.noheader.create_dataset('matrix', data=self.mat) self.noheader.close() def tearDown(self): shutil.rmtree(self.tempdir.name) def test_binary_dm_sniffer(self): self.assertEqual((True, {}), _binary_dm_sniffer(open(self.basic_fname, 'rb'))) self.assertEqual((False, {}), _binary_dm_sniffer(open(self.badids_fname, 'rb'))) self.assertEqual((False, {}), _binary_dm_sniffer(open(self.noheader_fname, 'rb'))) def test_h5py_mat_to_skbio_mat(self): exp = DistanceMatrix(self.mat, self.ids) obs = _h5py_mat_to_skbio_mat(DistanceMatrix, h5py.File(self.basic_fname, 'r')) self.assertEqual(obs, exp) def test_skbio_mat_to_h5py_mat(self): fh1 = h5py.File('f1', 'a', driver='core', backing_store=False) mat = DistanceMatrix(self.mat, self.ids) _skbio_mat_to_h5py_mat(mat, fh1) npt.assert_equal(np.asarray(fh1['order'][:], dtype=str), mat.ids) npt.assert_equal(fh1['matrix'], mat.data) def test_get_header(self): self.assertEqual(_get_header(h5py.File(self.basic_fname, 'r')), {'format': b'BDSM', 'version': b'2020.06'}) self.assertEqual(_get_header(h5py.File(self.noheader_fname, 'r')), None) def test_parse_ids(self): tests = [(['a', 'b', 'c'], ['a', 'b', 'c']), ([b'a', b'b', b'\xc3\xa9\xc3\xb8asd'], ['a', 'b', 'éøasd'])] for test, exp in tests: self.assertEqual(_parse_ids(test), exp) def test_verify_dimensions(self): self.assertTrue(_verify_dimensions(h5py.File(self.basic_fname, 'r'))) self.assertFalse(_verify_dimensions(h5py.File(self.badids_fname, 'r'))) def test_bytes_decoder(self): test = [b'', b'a', b'\xc3\xa9\xc3\xb8asd'] exp = ['', 'a', 'éøasd'] self.assertEqual(_bytes_decoder(test), exp) def test_passthrough_decoder(self): tests = [('', ''), ('a', 'a'), ('éøasd', 'éøasd')] for test, expected in tests: self.assertEqual(_passthrough_decoder(test), expected) def test_set_header(self): def mock(): obj = h5py.File('bung', 'a', driver='core', backing_store=False) return obj m = mock() _set_header(m) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/io/format/tests/test_biom.py000066400000000000000000000045031464262511300222710ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io import unittest import tempfile from pathlib import Path import h5py from skbio.table import Table, example_table from skbio.io.format.biom import ( _biom_to_table, _table_to_biom, _biom_sniffer) class BIOMFormatTests(unittest.TestCase): def setUp(self): self.tempdir = tempfile.TemporaryDirectory() tempdir = Path(self.tempdir.name) self.valid_biom_path = str(tempdir / Path('valid.biom')) self.invalid_biom_path = str(tempdir / Path('invalid')) self.writable_biom_path = str(tempdir / Path('write.biom')) self.nonbiom_hdf5_path = str(tempdir / Path('other.hdf5')) self.difbiomver_path = str(tempdir / Path('otherver.biom')) self.table = example_table.copy() with h5py.File(self.valid_biom_path, 'w') as fp: self.table.to_hdf5(fp, 'unit-test') with open(self.invalid_biom_path, 'wb') as fp: fp.write(b'this is not a biom file') with h5py.File(self.nonbiom_hdf5_path, 'w') as fp: fp['stuff'] = [1, 2, 3] self.table = example_table.copy() with h5py.File(self.difbiomver_path, 'w') as fp: self.table.to_hdf5(fp, 'unit-test') fp.attrs['format-version'] = [3, 0] def tearDown(self): self.tempdir.cleanup() def test_sniffer(self): self.assertEqual(_biom_sniffer(self.valid_biom_path), (True, {})) self.assertEqual(_biom_sniffer(self.invalid_biom_path), (False, {})) self.assertEqual(_biom_sniffer(self.nonbiom_hdf5_path), (False, {})) self.assertEqual(_biom_sniffer(self.difbiomver_path), (False, {})) def test_biom_to_table(self): tab = _biom_to_table(self.valid_biom_path) self.assertEqual(tab, self.table) def test_table_to_biom(self): _table_to_biom(self.table, self.writable_biom_path) roundtrip = _biom_to_table(self.writable_biom_path) self.assertEqual(roundtrip, self.table) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/io/format/tests/test_blast6.py000066400000000000000000000140431464262511300225360ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest import pandas as pd import numpy as np from skbio.util import get_data_path, assert_data_frame_almost_equal from skbio.io.format.blast6 import _blast6_to_data_frame class TestBlast6Reader(unittest.TestCase): def test_default_valid_single_line(self): fp = get_data_path('blast6_default_single_line') df = _blast6_to_data_frame(fp, default_columns=True) exp = pd.DataFrame([['query1', 'subject2', 75.0, 8.0, 2.0, 0.0, 1.0, 8.0, 2.0, 9.0, 0.06, 11.5]], columns=['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore']) assert_data_frame_almost_equal(df, exp) def test_default_valid_multi_line(self): fp = get_data_path('blast6_default_multi_line') df = _blast6_to_data_frame(fp, default_columns=True) exp = pd.DataFrame([['query1', 'subject2', 100.00, 8.0, 0.0, 0.0, 1.0, 8.0, 3.0, 10.0, 9e-05, 16.9], ['query1', 'subject2', 75.00, 8.0, 2.0, 0.0, 1.0, 8.0, 2.0, 9.0, 0.060, 11.5], ['query2', 'subject1', 71.43, 7.0, 2.0, 0.0, 1.0, 7.0, 1.0, 7.0, 0.044, 11.9]], columns=['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore']) assert_data_frame_almost_equal(df, exp) def test_custom_valid_single_line(self): fp = get_data_path('blast6_custom_single_line') df = _blast6_to_data_frame(fp, columns=['qacc', 'qseq', 'btop', 'sframe', 'ppos', 'positive', 'gaps']) exp = pd.DataFrame([['query1', 'PAAWWWWW', 8.0, 1.0, 100.00, 8.0, 0.0]], columns=['qacc', 'qseq', 'btop', 'sframe', 'ppos', 'positive', 'gaps']) assert_data_frame_almost_equal(df, exp) def test_custom_valid_multi_line(self): fp = get_data_path('blast6_custom_multi_line') df = _blast6_to_data_frame(fp, columns=['sacc', 'score', 'gapopen', 'qcovs', 'sblastnames', 'sallacc', 'qaccver']) exp = pd.DataFrame([['subject2', 32.0, 0.0, 100.0, np.nan, 'subject2', 'query1'], ['subject2', 18.0, 0.0, 100.0, np.nan, 'subject2', 'query1'], ['subject1', 19.0, 0.0, 70.0, np.nan, 'subject1', 'query2']], columns=['sacc', 'score', 'gapopen', 'qcovs', 'sblastnames', 'sallacc', 'qaccver']) exp['sblastnames'] = exp['sblastnames'].astype(object) assert_data_frame_almost_equal(df, exp) def test_valid_nan_handling(self): fp = get_data_path('blast6_custom_mixed_nans') df = _blast6_to_data_frame(fp, columns=['qacc', 'qseq', 'btop', 'sframe', 'ppos', 'positive', 'gaps']) exp = pd.DataFrame([[np.nan, 'PAAWWWWW', 8.0, 1.0, 100.00, np.nan, 0.0], ['query1', np.nan, 8.0, 1.0, np.nan, 8.0, 0.0]], columns=['qacc', 'qseq', 'btop', 'sframe', 'ppos', 'positive', 'gaps']) assert_data_frame_almost_equal(df, exp) def test_valid_minimal(self): fp = get_data_path('blast6_custom_minimal') df = _blast6_to_data_frame(fp, columns=['sacc']) exp = pd.DataFrame([['subject2']], columns=['sacc']) assert_data_frame_almost_equal(df, exp) def test_custom_and_default_passed_error(self): fp = get_data_path('blast6_default_single_line') with self.assertRaisesRegex(ValueError, r"`columns` and `default_columns`"): _blast6_to_data_frame(fp, columns=['qseqid'], default_columns=True) def test_no_columns_passed_error(self): fp = get_data_path('blast6_default_single_line') with self.assertRaisesRegex(ValueError, r"Either `columns` or `default_columns`"): _blast6_to_data_frame(fp) def test_wrong_amount_of_columns_error(self): fp = get_data_path('blast6_invalid_number_of_columns') with self.assertRaisesRegex( ValueError, r"Specified number of columns \(12\).*\(10\)"): _blast6_to_data_frame(fp, default_columns=True) def test_different_data_in_same_column(self): fp = get_data_path('blast6_invalid_type_in_column') with self.assertRaises(ValueError): _blast6_to_data_frame(fp, default_columns=True) def test_wrong_column_name_error(self): fp = get_data_path('blast6_default_single_line') with self.assertRaisesRegex(ValueError, r"Unrecognized column.*'abcd'"): _blast6_to_data_frame(fp, columns=['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'abcd', 'bitscore']) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/io/format/tests/test_blast7.py000066400000000000000000000233601464262511300225410ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest import pandas as pd import numpy as np from skbio.util import get_data_path, assert_data_frame_almost_equal from skbio.io import BLAST7FormatError from skbio.io.format.blast7 import _blast7_to_data_frame, _blast7_sniffer class TestBLAST7Sniffer(unittest.TestCase): def setUp(self): self.positives = [get_data_path(e) for e in [ 'blast7_default_single_line', 'blast7_default_multi_line', 'blast7_custom_minimal', 'blast7_custom_single_line', 'blast7_custom_multi_line', 'blast7_custom_mixed_nans', 'blast7_invalid_differing_fields', 'blast7_invalid_no_data', 'blast7_invalid_too_many_columns', 'legacy9_and_blast7_default', 'legacy9_invalid_too_many_columns', 'legacy9_mixed_nans', 'legacy9_multi_line', 'legacy9_single_line']] self.negatives = [get_data_path(e) for e in [ 'blast7_invalid_gibberish', 'blast7_invalid_for_sniffer', 'blast7_invalid_for_sniffer_2', 'empty']] def test_positives(self): for fp in self.positives: self.assertEqual(_blast7_sniffer(fp), (True, {})) def test_negatives(self): for fp in self.negatives: self.assertEqual(_blast7_sniffer(fp), (False, {})) class TestBlast7Reader(unittest.TestCase): def test_default_valid_single_line(self): fp = get_data_path('blast7_default_single_line') df = _blast7_to_data_frame(fp) exp = pd.DataFrame([['query1', 'subject2', 100.00, 8.0, 0.0, 0.0, 1.0, 8.0, 3.0, 10.0, 9e-05, 16.9]], columns=['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore']) assert_data_frame_almost_equal(df, exp) fp = get_data_path('legacy9_single_line') df = _blast7_to_data_frame(fp) exp = pd.DataFrame([['query1', 'subject1', 90.00, 7.0, 1.0, 0.0, 0.0, 8.0, 4.0, 10.0, 1e-05, 15.5]], columns=['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore']) assert_data_frame_almost_equal(df, exp) def test_default_valid_multi_line(self): fp = get_data_path('blast7_default_multi_line') df = _blast7_to_data_frame(fp) exp = pd.DataFrame([['query1', 'subject2', 70.00, 5.0, 0.0, 0.0, 7.0, 60.0, 3.0, 100.0, 9e-05, 10.5], ['query1', 'subject2', 30.00, 8.0, 0.0, 0.0, 6.0, 15.0, 1.0, 100.0, 0.053, 12.0], ['query1', 'subject2', 90.00, 2.0, 0.0, 0.0, 9.0, 35.0, 2.0, 100.0, 0.002, 8.3]], columns=['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore']) assert_data_frame_almost_equal(df, exp) fp = get_data_path('legacy9_multi_line') df = _blast7_to_data_frame(fp) exp = pd.DataFrame([['query1', 'subject1', 90.00, 7.0, 1.0, 0.0, 0.0, 8.0, 4.0, 10.0, 1e-05, 15.5], ['query1', 'subject1', 70.00, 8.0, 0.0, 1.0, 0.0, 9.0, 5.0, 7.0, 0.231, 7.8], ['query1', 'subject1', 90.00, 5.0, 1.0, 1.0, 0.0, 0.0, 2.0, 10.0, 0.022, 13.0]], columns=['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore']) assert_data_frame_almost_equal(df, exp) def test_default_valid_mixed_output(self): fp = get_data_path('legacy9_and_blast7_default') df = _blast7_to_data_frame(fp) exp = pd.DataFrame([['query2', 'subject2', 100.00, 8.0, 0.0, 1.0, 0.0, 9.0, 3.0, 10.0, 2e-05, 9.8], ['query2', 'subject1', 70.00, 9.0, 1.0, 0.0, 1.0, 8.0, 4.0, 9.0, 0.025, 11.7]], columns=['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore']) assert_data_frame_almost_equal(df, exp) def test_custom_valid_minimal(self): fp = get_data_path("blast7_custom_minimal") df = _blast7_to_data_frame(fp) exp = pd.DataFrame([['query1']], columns=['qseqid']) assert_data_frame_almost_equal(df, exp) def test_custom_valid_single_line(self): fp = get_data_path("blast7_custom_single_line") df = _blast7_to_data_frame(fp) exp = pd.DataFrame([['query1', 100.00, 100.00, 8.0, 0.0, 16.9, 8.0, 'PAAWWWWW']], columns=['qseqid', 'ppos', 'pident', 'length', 'sgi', 'bitscore', 'qend', 'qseq']) assert_data_frame_almost_equal(df, exp) def test_custom_valid_multi_line(self): fp = get_data_path("blast7_custom_multi_line") df = _blast7_to_data_frame(fp) exp = pd.DataFrame([[1.0, 8.0, 3.0, 10.0, 8.0, 0.0, 1.0, 'query1', 'subject2'], [2.0, 5.0, 2.0, 15.0, 8.0, 0.0, 2.0, 'query1', 'subject2'], [1.0, 6.0, 2.0, 12.0, 8.0, 0.0, 1.0, 'query1', 'subject2']], columns=['qstart', 'qend', 'sstart', 'send', 'nident', 'mismatch', 'sframe', 'qaccver', 'saccver']) assert_data_frame_almost_equal(df, exp) def test_custom_valid_mixed_nans(self): fp = get_data_path("blast7_custom_mixed_nans") df = _blast7_to_data_frame(fp) exp = pd.DataFrame([[0.0, np.nan, 8.0, 13.0, 1.0, 1.0, np.nan, 'subject2'], [np.nan, 0.0, 8.0, np.nan, 1.0, 1.0, 'query1', np.nan]], columns=['qgi', 'sgi', 'qlen', 'slen', 'qframe', 'sframe', 'qseqid', 'sseqid']) assert_data_frame_almost_equal(df, exp) def test_legacy9_valid_mixed_nans(self): fp = get_data_path("legacy9_mixed_nans") df = _blast7_to_data_frame(fp) exp = pd.DataFrame([[np.nan, 'subject1', np.nan, 7.0, 1.0, 0.0, np.nan, 8.0, 4.0, 10.0, np.nan, 15.5], ['query2', 'subject1', 90.00, 8.0, np.nan, 0.0, 0.0, 8.0, np.nan, 9.0, 1e-05, np.nan]], columns=['qseqid', 'sseqid', 'pident', 'length', 'mismatch', 'gapopen', 'qstart', 'qend', 'sstart', 'send', 'evalue', 'bitscore']) assert_data_frame_almost_equal(df, exp) def test_differing_fields_error(self): fp = get_data_path("blast7_invalid_differing_fields") with self.assertRaisesRegex( BLAST7FormatError, r"Fields \[.*'qseqid', .*'sseqid', .*'qstart'\]" r" do.*\[.*'qseqid', .*'sseqid', .*'score'\]"): _blast7_to_data_frame(fp) fp = get_data_path("legacy9_invalid_differing_fields") with self.assertRaisesRegex( BLAST7FormatError, r"Fields \[.*'qseqid', .*'sseqid', .*'qstart'\]" r" do.*\[.*'qseqid', .*'sseqid', " r".*'sallseqid'\]"): _blast7_to_data_frame(fp) def test_no_data_error(self): fp = get_data_path("blast7_invalid_gibberish") with self.assertRaisesRegex(BLAST7FormatError, r"File contains no"): _blast7_to_data_frame(fp) fp = get_data_path("blast7_invalid_no_data") with self.assertRaisesRegex(BLAST7FormatError, r"File contains no"): _blast7_to_data_frame(fp) fp = get_data_path("empty") with self.assertRaisesRegex(BLAST7FormatError, r"File contains no"): _blast7_to_data_frame(fp) def test_wrong_amount_of_columns_error(self): fp = get_data_path("blast7_invalid_too_many_columns") with self.assertRaisesRegex(BLAST7FormatError, r"Number of fields.*\(2\)"): _blast7_to_data_frame(fp) fp = get_data_path("legacy9_invalid_too_many_columns") with self.assertRaisesRegex(BLAST7FormatError, r"Number of fields.*\(12\)"): _blast7_to_data_frame(fp) def test_unrecognized_field_error(self): fp = get_data_path("blast7_invalid_unrecognized_field") with self.assertRaisesRegex(BLAST7FormatError, r"Unrecognized field \(.*'sallid'\)"): _blast7_to_data_frame(fp) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/io/format/tests/test_clustal.py000066400000000000000000000267031464262511300230200ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import string from io import StringIO from unittest import TestCase, main from skbio import TabularMSA from skbio.sequence import GrammaredSequence from skbio.util import classproperty from skbio.util._decorator import overrides from skbio.io.format.clustal import ( _clustal_to_tabular_msa, _tabular_msa_to_clustal, _clustal_sniffer, _is_clustal_seq_line, _delete_trailing_number, _check_length, _label_line_parser) from skbio.io import ClustalFormatError class CustomSequence(GrammaredSequence): @classproperty @overrides(GrammaredSequence) def gap_chars(cls): return set('-.') @classproperty @overrides(GrammaredSequence) def default_gap_char(cls): return '-' @classproperty @overrides(GrammaredSequence) def definite_chars(cls): return set(string.ascii_letters) @classproperty @overrides(GrammaredSequence) def degenerate_map(cls): return {} class ClustalHelperTests(TestCase): def test_label_line_parser(self): self.assertEqual(_label_line_parser(StringIO('abc\tucag')), ({"abc": ["ucag"]}, ['abc'])) with self.assertRaises(ClustalFormatError): _label_line_parser(StringIO('abctucag')) def test_is_clustal_seq_line(self): ic = _is_clustal_seq_line self.assertTrue(ic('abc')) self.assertTrue(ic('abc def')) self.assertFalse(ic('CLUSTAL')) self.assertFalse(ic('CLUSTAL W fsdhicjkjsdk')) self.assertFalse(ic(' * *')) self.assertFalse(ic(' abc def')) self.assertFalse(ic('MUSCLE (3.41) multiple sequence alignment')) def test_delete_trailing_number(self): dtn = _delete_trailing_number self.assertEqual(dtn('abc'), 'abc') self.assertEqual(dtn('a b c'), 'a b c') self.assertEqual(dtn('a \t b \t c'), 'a \t b \t c') self.assertEqual(dtn('a b 3'), 'a b') self.assertEqual(dtn('a b c \t 345'), 'a b c') def test_check_lengh(self): self.assertEqual(False, _check_length({'abc': ['adjfkadfjaksdlfadskfda'], 'xyz': ['adjfkadfjaksdlfadsk']}, ['abc', 'xyz'])), self.assertEqual(True, _check_length({'abc': ['adjfkadfjaksdlfadskfda'], 'xyz': ['adjfkadfjaksdlfadsksdf']}, ['abc', 'xyz'])) self.assertEqual(True, _check_length({'abc': ['adjfkadfjaksdlfadskfda', 'adjfkadfjaksdlfadskfda'], 'xyz': ['adjfkadfjaksdlfadsksdf', 'adjfkadfjaksdlfadsksdf']}, ['abc', 'xyz'])) self.assertEqual(False, _check_length({'abc': ['adjfkadfjaksdlfadskfd', 'adjfkadfjaksdlfadskfda'], 'xyz': ['adjfkadfjaksdlfadsksdf', 'adjfkadfjaksdlfadsksdf']}, ['abc', 'xyz'])) self.assertEqual(False, _check_length({'abc': ['adjfkadfjaksdlfadskfda', 'adjfkadfjaksdlfadskfda'], 'xyz': ['adjfkadfjaksdlfadsksdf', 'adjfkadfjaksdlfadsksd']}, ['abc', 'xyz'])) class ClustalIOTests(TestCase): def setUp(self): self.valid_clustal_out = [ StringIO('CLUSTAL\n\nabc\tucag'), StringIO('CLUSTAL\n\nabc\tuuu\ndef\tccc\n\n ***\n\ndef ggg\nab' 'c\taaa\n'), StringIO('\n'.join(['CLUSTAL\n', 'abc uca', 'def ggg ccc'])), StringIO('\n'.join(['CLUSTAL\n', 'abc uca ggg', 'def ggg ccc'])), StringIO("""CLUSTAL abc GCAUGCAUGCAUGAUCGUACGUCAGCAUGCUAGACUGCAUACGUACGUACGCAUGCAUCA def ------------------------------------------------------------ xyz ------------------------------------------------------------ """), StringIO("""CLUSTAL abc GCAUGCAUGCAUGAUCGUACGUCAGCAUGCUAGACUGCAUACGUACGUACGCAUGCAUCA def ------------------------------------------------------------ xyz ------------------------------------------------------------ abc GUCGAUACGUACGUCAGUCAGUACGUCAGCAUGCAUACGUACGUCGUACGUACGU-CGAC def -----------------------------------------CGCGAUGCAUGCAU-CGAU xyz -------------------------------------CAUGCAUCGUACGUACGCAUGAC """), StringIO("""CLUSTAL W (1.82) multiple sequence alignment abc GCAUGCAUGCAUGAUCGUACGUCAGCAUGCUAGACUGCAUACGUACGUACGCAUGCAUCA def ------------------------------------------------------------ xyz ------------------------------------------------------------ abc GUCGAUACGUACGUCAGUCAGUACGUCAGCAUGCAUACGUACGUCGUACGUACGU-CGAC def -----------------------------------------CGCGAUGCAUGCAU-CGAU xyz -------------------------------------CAUGCAUCGUACGUACGCAUGAC abc UGACUAGUCAGCUAGCAUCGAUCAGU def CGAUCAGUCAGUCGAU---------- xyz UGCUGCAUCA----------------"""), StringIO("""CLUSTAL W (1.74) multiple sequence alignment abc GCAUGCAUGCAUGAUCGUACGUCAGCAUGCUAGACUGCAUACGUACGUACGCAUGCAUCA 60 def ------------------------------------------------------------ xyz ------------------------------------------------------------ abc GUCGAUACGUACGUCAGUCAGUACGUCAGCAUGCAUACGUACGUCGUACGUACGU-CGAC 11 def -----------------------------------------CGCGAUGCAUGCAU-CGAU 18 xyz -------------------------------------CAUGCAUCGUACGUACGCAUGAC 23 : * * * * ** abc UGACUAGUCAGCUAGCAUCGAUCAGU 145 def CGAUCAGUCAGUCGAU---------- 34 xyz UGCUGCAUCA---------------- 33 * ***""") ] self.invalid_clustal_out = [ StringIO('\n'.join(['dshfjsdfhdfsj', 'hfsdjksdfhjsdf'])), StringIO('\n'.join(['hfsdjksdfhjsdf'])), StringIO('\n'.join(['dshfjsdfhdfsj', 'dshfjsdfhdfsj', 'hfsdjksdfhjsdf'])), StringIO('\n'.join(['dshfjsdfhdfsj', '\t', 'hfsdjksdfhjsdf'])), StringIO('\n'.join(['dshfj\tdfhdfsj', 'hfsdjksdfhjsdf'])), StringIO('\n'.join(['dshfjsdfhdfsj', 'hfsdjk\tdfhjsdf'])), StringIO("""CLUSTAL W (1.74) multiple sequence alignment adj GCAUGCAUGCAUGAUCGUACGUCAGCAUGCUAGACUGCAUACGUACGUACGCAUGCAUCA ------------------------------------------------------------ adk -----GGGGGGG------------------------------------------------ """), StringIO("""CLUSTAL W (1.74) multiple sequence alignment adj GCAUGCAUGCAUGAUCGUACGUCAGCAUGCUAGACUGCAUACGUACGUACGCAUGCAUCA adk -----GGGGGGG------------------------------------------------ adj GCAUGCAUGCAUGAUCGUACGUCAGCAUGCUAGACUGCAUACGUACGUACGCAUGCAUCA adk -----GGGGGGG--------------------------------------------- """), StringIO("""CLUSTAL W (1.74) multiple sequence alignment adj GCAUGCAUGCAUGAUCGUACGUCAGCAUGCUAGACUGCAUACGUACGUACGCAUGCAUCA adk -----GGGGGGG--------------------------------------------- adj GCAUGCAUGCAUGAUCGUACGUCAGCAUGCUAGACUGCAUACGUACGUACGCAUGCA adk -----GGGGGGG--------------------------------------------- """), StringIO("""CLUSTAL W (1.74) multiple sequence alignment adj GCAUGCAUGCAUGAUCGUACGUCAGCAUGCUAGACUGCAUACGUACGUACGCAUGCAUCA ------------------------------------------------------------ adk -----GGGGGGG------------------------------------------------ """), StringIO("""CLUSTAL W (1.74) multiple sequence alignment GCAUGCAUGCAUGAUCGUACGUCAGCAUGCUAGACUGCAUACGUACGUACGCAUGCAUCA ------------------------------------------------------------ ------------------------------------------------------------ GUCGAUACGUACGUCAGUCAGUACGUCAGCAUGCAUACGUACGUCGUACGUACGU-CGAC -----------------------------------------CGCGAUGCAUGCAU-CGAU ------------------------------------------------------------ : * * * * ** UGACUAGUCAGCUAGCAUCGAUCAGU 145 CGAUCAGUCAGUCGAU---------- 34 UGCUGCAUCA---------------- 33 * ***""")] def test_tabular_msa_to_clustal_with_empty_input(self): result = _clustal_to_tabular_msa(StringIO(), constructor=CustomSequence) self.assertEqual(dict(result), {}) def test_tabular_msa_to_clustal_with_bad_input(self): BAD = StringIO('\n'.join(['dshfjsdfhdfsj', 'hfsdjksdfhjsdf'])) with self.assertRaises(ClustalFormatError): dict(_clustal_to_tabular_msa(BAD, constructor=CustomSequence)) def test_valid_tabular_msa_to_clustal_and_clustal_to_tabular_msa(self): for valid_out in self.valid_clustal_out: result_before = _clustal_to_tabular_msa( valid_out, constructor=CustomSequence) with StringIO() as fh: _tabular_msa_to_clustal(result_before, fh) fh.seek(0) result_after = _clustal_to_tabular_msa( fh, constructor=CustomSequence) self.assertEqual(result_before, result_after) def test_invalid_tabular_msa_to_clustal_and_clustal_to_tabular_msa(self): for invalid_out in self.invalid_clustal_out: with self.assertRaises(ClustalFormatError): dict(_clustal_to_tabular_msa(invalid_out, constructor=CustomSequence)) def test_clustal_sniffer_valid_files(self): for valid_out in self.valid_clustal_out: self.assertEqual(_clustal_sniffer(valid_out), (True, {})) def test_clustal_sniffer_invalid_files(self): for invalid_out in self.invalid_clustal_out: self.assertEqual(_clustal_sniffer(invalid_out), (False, {})) # sniffer should return False on empty file (which isn't contained # in self.invalid_clustal_out since an empty file is a valid output) self.assertEqual(_clustal_sniffer(StringIO()), (False, {})) def test_no_constructor(self): with self.assertRaisesRegex(ValueError, r"`constructor`"): _clustal_to_tabular_msa(self.valid_clustal_out[0]) def test_duplicate_labels(self): msa = TabularMSA([CustomSequence('foo'), CustomSequence('bar')], index=['a', 'a']) with self.assertRaisesRegex(ClustalFormatError, r"index.*unique"): with StringIO() as fh: _tabular_msa_to_clustal(msa, fh) def test_invalid_lengths(self): fh = StringIO( "CLUSTAL\n" "\n\n" "abc GCAU\n" "def -----\n") with self.assertRaisesRegex(ClustalFormatError, r"not aligned"): _clustal_to_tabular_msa(fh, constructor=CustomSequence) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/io/format/tests/test_embed.py000066400000000000000000000172631464262511300224260ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main from pathlib import Path import tempfile import h5py import numpy as np import skbio from skbio import Protein from skbio.util import get_data_path from skbio.embedding._protein import ProteinEmbedding from skbio.embedding._protein import ProteinVector from skbio.io.format.embed import ( _embed_sniffer, _embed_to_generator, _embed_to_object, _generator_to_embed, _objects_to_embed, _embed_to_protein, _protein_to_embed, _protein_to_vector, _vector_to_protein ) class EmbedTests(TestCase): def setUp(self): # single sequence rk = 5 # latent dimension of residues self.sequences = ( [ ( np.load(get_data_path('embed1.txt.npy')), Protein(('IGKEEIQQRLAQFVDHWKELKQLAAARGQRL' 'EESLEYQQFVANVEEEEAWINEKMTLVASED'), metadata={"id": "seq1"}) ), ( np.load(get_data_path('embed2.txt.npy')), Protein(('QQNKELNFKLREKQNEIFELKKIAETLRSKL' 'EKYVDITKKLEDQNLNLQIKISDLEKKLSDA'), metadata={"id": "seq2"}) ) ] ) self.tempdir = tempfile.TemporaryDirectory() tempdir = Path(self.tempdir.name) self.writable_emb_path = str(tempdir / Path('test.emb')) self.writable_emb_path2 = str(tempdir / Path('test2.emb')) self.valid_embed_path = get_data_path('prot.emb') self.invalid_embed_path = str(tempdir / Path('invalid')) self.nonembed_hdf5_path = str(tempdir / Path('other.hdf5')) with open(self.invalid_embed_path, 'wb') as fp: fp.write(b'this is not a embed file') with h5py.File(self.nonembed_hdf5_path, 'w') as fp: fp['stuff'] = [1, 2, 3] def test_sniffer(self): self.assertEqual(_embed_sniffer(self.valid_embed_path), (True, {})) self.assertEqual(_embed_sniffer(self.invalid_embed_path), (False, {})) self.assertEqual(_embed_sniffer(self.nonembed_hdf5_path), (False, {})) def test_read_write_single(self): for emb, seq in self.sequences: fh = self.writable_emb_path obj = ProteinEmbedding(emb, seq) _protein_to_embed(obj, fh) emb2 = _embed_to_protein(fh) np.testing.assert_array_equal(emb, emb2.embedding) self.assertEqual(str(seq), str(emb2)) def test_read_write_generator(self): writable_emb_path2 = 'test2.emb' objs1 = [ProteinEmbedding(emb, seq) for emb, seq in self.sequences] _generator_to_embed(objs1, self.writable_emb_path2) objs2 = _embed_to_generator(self.writable_emb_path2) for obj1, obj2 in zip(objs1, objs2): np.testing.assert_array_equal(obj1.embedding, obj2.embedding) self.assertEqual(str(obj1), str(obj2)) def test_write_generator(self): sequences = [ ( np.load(get_data_path('embed1.txt.npy')), Protein(('IGKEEIQQRLAQFVDHWKELKQLAAARGQRL' 'EESLEYQQFVANVEEEEAWINEKMTLVASED'), metadata={"id": "seq1"}) ), ( np.load(get_data_path('embed2.txt.npy')), Protein(('QQNKELNFKLREKQNEIFELKKIAETLRSKL' 'EKYVDITKKLEDQNLNLQIKISDLEKKLSDA'), metadata={"id": "seq2"}) ) ] f = lambda x: ProteinEmbedding(*x) objs1 = (x for x in map(f, sequences)) tempdir = Path(tempfile.mkdtemp()) writable_emb_path = str(tempdir / Path('test.emb')) skbio.io.write(objs1, format='embed', into=writable_emb_path) objs2 = iter(skbio.io.read(writable_emb_path, format='embed', constructor=ProteinEmbedding)) for obj1, obj2 in zip(objs1, objs2): np.testing.assert_array_equal(obj1.embedding, obj2.embedding) self.assertEqual(str(obj1), str(obj2)) class VectorTests(TestCase): def setUp(self): # single sequence rk = 10 # latent dimension of residues self.sequences = ( [ ( np.random.randn(rk), Protein(('IGKEEIQQRLAQFVDHWKELKQLAAARGQRL' 'EESLEYQQFVANVEEEEAWINEKMTLVASED'), metadata={"id": "seq1"}) ), ( np.random.randn(rk), Protein(('QQNKELNFKLREKQNEIFELKKIAETLRSKL' 'EKYVDITKKLEDQNLNLQIKISDLEKKLSDA'), metadata={"id": "seq2"}) ) ] ) self.tempdir = tempfile.TemporaryDirectory() tempdir = Path(self.tempdir.name) self.writable_emb_path = str(tempdir / Path('test.emb')) self.writable_emb_path2 = str(tempdir / Path('test2.emb')) self.valid_embed_path = get_data_path('prot_vec.emb') self.invalid_embed_path = str(tempdir / Path('invalid')) self.nonembed_hdf5_path = str(tempdir / Path('other.hdf5')) with open(self.invalid_embed_path, 'wb') as fp: fp.write(b'this is not a embed file') with h5py.File(self.nonembed_hdf5_path, 'w') as fp: fp['stuff'] = [1, 2, 3] def test_sniffer(self): # make sure that the sniffer throws errors as expected self.assertEqual(_embed_sniffer(self.valid_embed_path), (True, {})) self.assertEqual(_embed_sniffer(self.invalid_embed_path), (False, {})) self.assertEqual(_embed_sniffer(self.nonembed_hdf5_path), (False, {})) emb, seq = self.sequences[0] obj = ProteinVector(emb, seq) _protein_to_vector(obj, str(Path(self.tempdir.name) / Path("prot_vec.emb"))) def test_read_write_single(self): for emb, seq in self.sequences: fh = self.writable_emb_path obj = ProteinVector(emb, seq) _protein_to_vector(obj, fh) emb2 = _vector_to_protein(fh) np.testing.assert_array_equal( emb, emb2.embedding.ravel()) self.assertEqual(str(seq), str(emb2)) def test_read_write_generator(self): writable_emb_path2 = 'test2.emb' objs1 = [ProteinVector(emb, seq) for emb, seq in self.sequences] _generator_to_embed(objs1, self.writable_emb_path2) objs2 = _embed_to_generator(self.writable_emb_path2, constructor=ProteinVector) for obj1, obj2 in zip(objs1, objs2): np.testing.assert_array_equal(obj1.embedding, obj2.embedding) self.assertEqual(str(obj1), str(obj2)) def test_write_generator(self): sequences = self.sequences f = lambda x: ProteinVector(*x) objs1 = (x for x in map(f, sequences)) tempdir = Path(tempfile.mkdtemp()) writable_emb_path = str(tempdir / Path('test.emb')) skbio.io.write(objs1, format='embed', into=writable_emb_path) objs2 = iter(skbio.io.read(writable_emb_path, format='embed', constructor=ProteinVector)) for obj1, obj2 in zip(objs1, objs2): np.testing.assert_array_equal(obj1.embedding, obj2.embedding) self.assertEqual(str(obj1), str(obj2)) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/io/format/tests/test_embl.py000066400000000000000000001067311464262511300222700ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io from unittest import TestCase, main import skbio.io from skbio import DNA, RNA, Sequence, Protein from skbio.metadata import IntervalMetadata from skbio.util import get_data_path # Module specific execption and functions from skbio.io import EMBLFormatError from skbio.io.format.embl import ( _embl_sniffer, _parse_id, _parse_reference, _embl_to_generator, _get_embl_section, _embl_to_sequence, _embl_to_dna, _embl_to_rna, _embl_to_protein, _generator_to_embl, _sequence_to_embl, _protein_to_embl, _rna_to_embl, _dna_to_embl, _serialize_id, _parse_assembly, _embl_parse_section_default, _serialize_dbsource) class SnifferTests(TestCase): def setUp(self): self.positive_fps = list(map(get_data_path, [ 'embl_single_record', 'embl_multi_records'])) self.negative_fps = list(map(get_data_path, [ 'empty', 'whitespace_only', 'embl_uniprot_record', 'embl_w_beginning_whitespace'])) def test_positives(self): for fp in self.positive_fps: self.assertEqual(_embl_sniffer(fp), (True, {})) def test_negatives(self): for fp in self.negative_fps: self.assertEqual(_embl_sniffer(fp), (False, {})) # Boilerplate for EMBL IO tests # TODO: implements all setUp needed class EMBLIOTests(TestCase): def setUp(self): # to test ID line self.id = ( # This is a derived record (non-coding, rRNA and spacer records) # (feature level record: # http://www.ebi.ac.uk/ena/browse/feature-level-products # TODO: a Uniprot record? (['ID AB000684.1:1..275:rRNA; SV 1; linear; ' 'genomic DNA; STD; ENV; 275 BP.'], {'division': 'ENV', 'mol_type': 'genomic DNA', 'shape': 'linear', 'locus_name': 'AB000684.1:1..275:rRNA', 'unit': 'bp', 'size': 275, 'version': 1, 'class': 'STD', 'date': None}), # A standard record (['ID M14399; SV 1; linear; mRNA; STD; PRO; 63 BP.'], {'division': 'PRO', 'mol_type': 'mRNA', 'shape': 'linear', 'locus_name': 'M14399', 'unit': 'bp', 'size': 63, 'version': 1, 'class': 'STD', 'date': None})) # define a single DNA record (with no interval metadata) # M14399; SV 1; linear; mRNA; STD; PRO; 63 BP. self.single = ( 'gtgaaacaaagcactattgcactggctgtcttaccgttactgtttacccctgtgacaaaagcc', {'LOCUS': {'locus_name': 'M14399', 'class': 'STD', 'division': 'PRO', 'mol_type': 'mRNA', 'shape': 'linear', 'size': 63, 'unit': 'bp', 'version': 1, 'date': None}}, None, DNA) # define a single protein record (uniprot) self.protein = ( 'MAFSAEDVLKEYDRRRRMEALLLSLYYPNDRKLLDYKEWSPPRVQVECPKAPVEWNNPPSEKG' 'LIVGHFSGIKYKGEKAQASEVDVNKMCCWVSKFKDAMRRYQGIQTCKIPGKVLSDLDAKIKAY' 'NLTVEGVEGFVRYSRVTKQHVAAFLKELRHSKQYENVNLIHYILTDKRVDIQHLEKDLVKDFK' 'ALVESAHRMRQGHMINVKYILYQLLKKHGHGPDGPDILTVKTGSKGVLYDDSFRKIYTDLGWK' 'FTPL', {'LOCUS': {'locus_name': '001R_FRG3G', 'status': 'Reviewed', 'size': 256, 'unit': 'aa'}}, None, Protein) # define a single DNA record uppercase (filepath) self.single_upper_fp = get_data_path('embl_single_record_upper') # define a single RNA record lower self.single_lower_fp = get_data_path('embl_single_record_lower') # define a single RNA record file path self.single_rna_fp = get_data_path('embl_single_record') # define a http://www.ebi.ac.uk/ena/browse/feature-level-products self.feature_level_fp = get_data_path("embl_feature_level_record") # define a interval metadata (see skbio.metadata.IntervalMetadata) imd = IntervalMetadata(63) # then add interval object to interval metadata. Add source imd.add([(0, 63)], [(False, False)], {'db_xref': '"taxon:562"', 'mol_type': '"mRNA"', 'organism': '"Escherichia coli"', 'type': 'source', 'strand': '+', '__location': '1..63'}) imd.add([(0, 63)], # the second True is beacause exact location is not known [(False, True)], {'phase': 0, 'db_xref': ['"GOA:P00634"', '"InterPro:IPR001952"', '"InterPro:IPR017849"', '"InterPro:IPR017850"', '"InterPro:IPR018299"', '"PDB:1AJA"', '"PDB:1AJB"', '"PDB:1AJC"', '"PDB:1AJD"', '"PDB:1ALH"', '"PDB:1ALI"', '"PDB:1ALJ"', '"PDB:1ALK"', '"PDB:1ANI"', '"PDB:1ANJ"', '"PDB:1B8J"', '"PDB:1ED8"', '"PDB:1ED9"', '"PDB:1ELX"', '"PDB:1ELY"', '"PDB:1ELZ"', '"PDB:1EW8"', '"PDB:1EW9"', '"PDB:1HJK"', '"PDB:1HQA"', '"PDB:1KH4"', '"PDB:1KH5"', '"PDB:1KH7"', '"PDB:1KH9"', '"PDB:1KHJ"', '"PDB:1KHK"', '"PDB:1KHL"', '"PDB:1KHN"', '"PDB:1URA"', '"PDB:1URB"', '"PDB:1Y6V"', '"PDB:1Y7A"', '"PDB:2ANH"', '"PDB:2G9Y"', '"PDB:2GA3"', '"PDB:2MLX"', '"PDB:2MLY"', '"PDB:2MLZ"', '"PDB:3BDF"', '"PDB:3BDG"', '"PDB:3BDH"', '"PDB:3CMR"', '"PDB:3DPC"', '"PDB:3DYC"', '"PDB:3TG0"', '"PDB:4KM4"', '"PDB:4YR1"', '"PDB:5C66"', '"PDB:5GAD"', '"PDB:5GAF"', '"PDB:5GAG"', '"PDB:5GAH"', '"PDB:5JTL"', '"PDB:5JTM"', '"PDB:5JTN"', '"PDB:5JTO"', '"PDB:5JTP"', '"UniProtKB/Swiss-Prot:P00634"'], '__location': '1..>63', 'strand': '+', 'note': '"alkaline phosphatase signal peptide"', 'protein_id': '"AAA23431.1"', 'transl_table': '11', 'translation': '"MKQSTIALAVLPLLFTPVTKA"', 'type': 'CDS'}) self.single_rna = ( 'gugaaacaaagcacuauugcacuggcugucuuaccguuacuguuuaccccugugacaaaagcc', {'LOCUS': {'locus_name': 'M14399', 'class': 'STD', 'division': 'PRO', 'mol_type': 'mRNA', 'shape': 'linear', 'size': 63, 'unit': 'bp', 'version': 1, 'date': '02-SEP-1999'}, 'ACCESSION': 'M14399;', # accessions (could be more than one) 'VERSION': 'M14399.1', # a genbank like version 'DATE': ["16-JUL-1988 (Rel. 16, Created)", "02-SEP-1999 (Rel. 60, Last updated, Version 3)"], 'DBSOURCE': 'MD5; c9b40131b8622946b5aafdf5473b3d43.', 'DEFINITION': "E.coli alkaline phosphatase signal mRNA, 5' end.", 'KEYWORDS': "alkaline phosphatase; signal peptide.", 'SOURCE': {"ORGANISM": "Escherichia coli", 'taxonomy': "Bacteria; Proteobacteria; " "Gammaproteobacteria; Enterobacterales; " "Enterobacteriaceae; Escherichia."}, 'REFERENCE': [{'AUTHORS': 'Gray G.L., Baldridge J.S., ' 'McKeown K.S., Heyneker H.L., ' 'Chang C.N.;', 'JOURNAL': 'Gene 39(2-3):247-254(1985).', 'REFERENCE': '1 (bases 1 to 63)', 'TITLE': '"Periplasmic production of correctly ' 'processed human growth hormone in ' 'Escherichia coli: natural and bacterial ' 'signal sequences are ' 'interchangeable";', 'PUBMED': '3912261'}], 'CROSS_REFERENCE': ['DOI; 10.1016/0378-1119(85)' '90319-1. PUBMED; 3912261.']}, imd, RNA) # define a multi record. File path self.multi_fp = get_data_path('embl_multi_records') # define interval metadata (as single metadata) imd1 = imd # define interal metadata for multi 2 imd2 = IntervalMetadata(743) # then add interval object to interval metadata. Add source imd2.add([(0, 743)], [(False, False)], {'organism': '"Ruditapes philippinarum"', 'type': 'source', '__location': '1..743', 'strand': '+', 'mol_type': '"mRNA"', 'db_xref': '"taxon:129788"'}) imd2.add([(57, 444)], [(False, False)], {'translation': '"MPGGKAGKDSGKAKAKAVSRSARAGLQFPVGRIHRHLKNRT' 'TSHG RVGATAAVYSAAILEYLTAEVLELAGNASKDLKVKRI' 'TPRHLQLAIRGDEELDSLIKAT IAGGGVIPHIHKSLIGKKG' 'GQQAK"', 'type': 'CDS', '__location': '58..444', 'protein_id': '"APY18893.1"', 'strand': '+', 'phase': 0, 'product': '"histone"'}) # multi object self.multi = ( ('GTGAAACAAAGCACTATTGCACTGGCTGTCTTACCGTTACTGTTTACCCCTGTGACAAAAGCC', {'LOCUS': {'locus_name': 'M14399', 'class': 'STD', 'division': 'PRO', 'mol_type': 'mRNA', 'shape': 'linear', 'size': 63, 'unit': 'bp', 'version': 1, 'date': '02-SEP-1999'}, 'ACCESSION': 'M14399;', # accessions (could be more than one) 'VERSION': 'M14399.1', # a genbank like version 'DATE': ["16-JUL-1988 (Rel. 16, Created)", "02-SEP-1999 (Rel. 60, Last updated, Version 3)"], 'DBSOURCE': 'MD5; c9b40131b8622946b5aafdf5473b3d43.', 'DEFINITION': "E.coli alkaline phosphatase signal mRNA, 5' end.", 'KEYWORDS': "alkaline phosphatase; signal peptide.", 'SOURCE': {"ORGANISM": "Escherichia coli", 'taxonomy': "Bacteria; Proteobacteria; " "Gammaproteobacteria; Enterobacterales; " "Enterobacteriaceae; Escherichia."}, 'REFERENCE': [{'AUTHORS': 'Gray G.L., Baldridge J.S., ' 'McKeown K.S., Heyneker H.L., ' 'Chang C.N.;', 'JOURNAL': 'Gene 39(2-3):247-254(1985).', 'REFERENCE': '1 (bases 1 to 63)', 'TITLE': '"Periplasmic production of correctly ' 'processed human growth hormone in ' 'Escherichia coli: natural and ' 'bacterial signal sequences are ' 'interchangeable";', 'PUBMED': '3912261'}], 'CROSS_REFERENCE': ['DOI; 10.1016/0378-1119(85)' '90319-1. PUBMED; 3912261.']}, imd1, DNA), ('TGTGCACAGTCTACGCGTCATCTTGAAAGAAAGAACTACACTACTCCAAAAATAATCATGCC' 'TGGTGGAAAAGCTGGTAAAGATTCCGGAAAGGCCAAGGCTAAGGCAGTGTCAAGGTCCGCAA' 'GAGCTGGCTTACAGTTTCCAGTCGGACGTATTCACAGGCATTTGAAGAACAGAACCACTAGC' 'CACGGTCGTGTTGGAGCTACAGCAGCCGTTTACAGTGCAGCAATCCTTGAATACCTGACCGC' 'CGAAGTGCTTGAGTTGGCTGGAAACGCAAGTAAAGATCTCAAAGTAAAGAGAATCACCCCAC' 'GTCACTTGCAGTTGGCAATCAGAGGAGATGAAGAGTTGGATTCCCTAATTAAAGCCACAATC' 'GCTGGTGGTGGTGTTATTCCACATATCCACAAGTCACTTATTGGCAAGAAGGGAGGTCAGCA' 'AGCCAAATAAATTGGACATACTCATTCATCAGGGAACAATGTGTAGTGAATGTGTTAAAAAG' 'AACAATCTCATTGTGTAGCTCTTTAGTTTTATATGAATGTGTTAACATGGTCATTCACATCG' 'TATGACTCATAGAATCATCTGTGTATCATTTCATCCTCTCATTTTATAGCTCCTCATTTTCC' 'TTAGACTCATTAAAATTTTTATCTCGGAAAAATGTTTTTTCTACAATTTTAGCATTCATTTA' 'TCTTCATCTTGCTTTTATGTTTAATAAAACGAACTTATAATACCAAAAAAAAAAAAAAAAA', {'ACCESSION': 'KX454487;', 'VERSION': 'KX454487.1', 'COMMENT': '##Assembly-Data-START##\nSequencing Technology ' ':: Sanger dideoxy sequencing\n##Assembly-Data-END##', 'DATE': ['02-FEB-2017 (Rel. 131, Created)', '02-FEB-2017 (Rel. 131, Last updated, Version 1)'], 'DBSOURCE': 'MD5; cbc730cf7a8d694b50fb7dd6b993ae0d.', 'DEFINITION': 'Ruditapes philippinarum histone mRNA, ' 'complete cds.', 'KEYWORDS': '.', 'LOCUS': {'locus_name': 'KX454487', 'class': 'STD', 'division': 'INV', 'mol_type': 'mRNA', 'shape': 'linear', 'size': 743, 'unit': 'bp', 'version': 1, 'date': '02-FEB-2017'}, 'REFERENCE': [ {'AUTHORS': 'Yang D., Zhao J., Wang Q.;', 'JOURNAL': 'Submitted (27-JUN-2016) to the INSDC. Key ' 'Laboratory of Coastal Zone Environment Processes ' 'and Ecological Remediation, Yantai Institute ' 'of Coastal Zone Research (YIC), Chinese Academy ' 'of Sciences (CAS), 17 Chunhui Road, Laishan ' 'District, Yantai, Shandong 264003, China', 'REFERENCE': '1 (bases 1 to 743)', 'TITLE': ';'}], 'CROSS_REFERENCE': [None], 'SOURCE': { 'ORGANISM': 'Ruditapes philippinarum', 'taxonomy': 'Eukaryota; Metazoa; Lophotrochozoa; Mollusca; ' 'Bivalvia; Heteroconchia; Euheterodonta; ' 'Veneroida; Veneroidea; Veneridae; Ruditapes.'}}, imd2, DNA)) # define the feature level product obj self.feature_level = ( 'AAUUGAAGAGUUUGAUCAUGGCUCAGAUUGAACGCUGGCGGCAGGCCUAACACAUGCAAGUC' 'GAGCGGCAGCACAGAGGAACUUGUUCCUUGGGUGGCGAGCGGCGGACGGGUGAGUAAUGCCU' 'AGGAAAUUGCCCUGAUGUGGGGGAUAACCAUUGGAAACGAUGGCUAAUACCGCAUGAUGCCU' 'ACGGGCCAAAGAGGGGGACCUUCUGGCCUCUCGCGUCAGGAUAUGCCUAGGUGGGAUUAGCU' 'AGUUGGUGAGGUAAUGGCUCACCAAGGCGACGAUCCCUAGCUGGUCUGAGAGGAUGAUCAGC' 'CACACUGGAACUGAGACACGGUCCAGACUCCUACGGGAGGCAGCAGUGGGGAAUAUUGCACA' 'AUGGGCGCAAGCCUGAUGCAGCCAUGCCGCGUGUAUGAAGAAGGCCUUCGGGUUGUAAAGUA' 'CUUUCAGUCGUGAGGAAGGUGGUGUUGUUAAUAGCAGCAUCAUUUGACGUUAGCGACAGAAG' 'AAGCACCGGCUAACUCCGUGCCAGCAGCCGCGGUAAUACGGAGGGUGCGAGCGUUAAUCGGA' 'AUUACUGGGCGUAAAGCGCAUGCAGGUGGUGGAUUAAGUCAGAUGUGAAAGCCCGGGGCUCA' 'ACCUCGGAACCGCAUUUGAAACUGGUUCACUAGAGUACUGUAGAGGGGGGUAGAAUUUCAGG' 'UGUAGCGGUGAAAUGCGUAGAGAUCUGAAGGAAUACCGGUGGCGAAGGCGGCCCCCUGGACA' 'GAUACUGACACUCAGAUGCGAAAGCGUGGGGAGCAAACAGGAUUAGAUACCCUGGUAGUCCA' 'CGCCGUAAACGAUGUCUACUUGGAGGUUGUGGCCUUGAGCCGUGGCUUUCGGAGCUAACGCG' 'UUAAGUAGACCGCCUGGGGAGUACGGUCGCAAGAUUAAAACUCAAAUGAAUUGACGGGGGCC' 'CGCACAAGCGGUGGAGCAUGUGGUUUAAUUCGAUGCAACGCGAAGAACCUUACCUACUCUUG' 'ACAUCCAGAGAAGCCAGCGGAGACGCAGGUGUGCCUUCGGGAGCUCUGAGACAGGUGCUGCA' 'UGGCUGUCGUCAGCUCGUGUUGUGAAAUGUUGGGUUAAGUCCCGCAACGAGCGCAACCCUUA' 'UCCUUGUUUGCCAGCGAGUCAUGUCGGGAACUCCAGGGAGACUGCCGGUGAUAAACCGGAGG' 'AAGGUGGGGACGACGUCAAGUCAUCAUGGCCCUUACGAGUAGGGCUACACACGUGCUACAAU' 'GGCGCAUACAGAGGGCAGCAAGCUAGCGAUAGUGAGCGAAUCCCAAAAAGUGCGUCGUAGUC' 'CGGAUUGGAGUCUGCAACUCGACUCCAUGAAGUCGGAAUCGCUAGUAAUCGUAGAUCAGAAU' 'GCUACGGUGAAUACGUUCCCGGGCCUUGUACACACCGCCCGUCACACCAUGGGAGUGGGCUG' 'CAAAAGAAGUGGGUAGUUUAACCUUUCGGGGAGGACGCUCACCACUUUGUGGUUCAUGACUG' 'GGGUGAAGUCGUAACAAGGUAGCGCUAGGGGAACCUGGCGCUGGAUCACCUCCUUA', {'DATE': ['02-JUN-2014 (Rel. 121, Created)', '04-FEB-2016 (Rel. 127, Last updated, Version 5)'], 'DBSOURCE': 'SILVA-LSU; LK021130. SILVA-SSU; LK021130. MD5; ' 'afd116bf2c1a13acbf40d63d82f0218c. BioSample; ' 'SAMEA3865288.', 'DEFINITION': 'Vibrio anguillarum 16S rRNA', 'KEYWORDS': '.', 'LOCUS': {'locus_name': 'LK021130.1:74067..75610:rRNA', 'class': 'STD', 'division': 'PRO', 'mol_type': 'genomic DNA', 'shape': 'linear', 'size': 1544, 'unit': 'bp', 'version': 1, 'date': '04-FEB-2016'}, 'PARENT_ACCESSION': 'LK021130.1', 'VERSION': 'LK021130.1', 'PROJECT_IDENTIFIER': 'Project:PRJEB5701;', 'REFERENCE': [ {'AUTHORS': 'Holm K.;', 'JOURNAL': 'Submitted (26-MAR-2014) to the INSDC. ' 'Norstruct, Dept of Chemistry, University of ' 'Tromso, Science Park 3, NO-9037 Tromso, NORWAY.', 'TITLE': ';', 'REFERENCE': '1'}, {'AUTHORS': 'Holm K.O., Nilsson K., Hjerde E., Willassen ' 'N.P., Milton D.L.;', 'JOURNAL': 'Stand Genomic Sci. 10:60-60(2015).', 'TITLE': '"Complete genome sequence of Vibrio anguillarum ' 'strain NB10, a virulent isolate from the Gulf ' 'of Bothnia";', 'REFERENCE': '2', 'PUBMED': '26380645'}], 'CROSS_REFERENCE': [ None, 'DOI; 10.1186/s40793-015-0060-7. PUBMED; 26380645.'], 'SOURCE': { 'ORGANISM': 'Vibrio anguillarum', 'taxonomy': 'Bacteria; Proteobacteria; Gammaproteobacteria; ' 'Vibrionales; Vibrionaceae; Vibrio.'}}, None, RNA) # get the feature level file without FT self.feature_level_fp = get_data_path( "embl_feature_level_record_no_FT") # get a genbank file in order to to file conversion self.genbank_fp = get_data_path('genbank_single_record') # a embl constructed sequence file path self.embl_constructed_fp = get_data_path("embl_constructed") # a simple embl version to perform embl->gb->embl conversion self.single_rna_simple_fp = get_data_path( "embl_single_record_simple") class ReaderTests(EMBLIOTests): """Implements test for reading EMBL data""" def test_parse_id(self): """Parse ID record (first line of embl format)""" for serialized, parsed in self.id: self.assertEqual(_parse_id(serialized), parsed) def test_parse_id_invalid(self): lines = [ # uniprot line (should this module handle it?) ['ID 001R_FRG3G Reviewed; 256 AA.'], # missing unit ['ID M14399; SV 1; linear; mRNA; STD; PRO; 63'], # missing division ['ID M14399; SV 1; linear; mRNA; STD; 63 BP.']] for line in lines: with self.assertRaisesRegex(EMBLFormatError, r'Could not parse the ID line:.*'): _parse_id(line) # current status of protein support is described in issue-1499 # https://github.com/scikit-bio/scikit-bio/issues/1499 def test_no_protein_support(self): """Testing no protein support for embl""" # TODO: add protein support # a fake protein line. handle = io.StringIO('ID M14399; SV 1; linear; mRNA; STD; ' 'PRO; 63 AA.\n//\n') with self.assertRaisesRegex(EMBLFormatError, r"There's no protein support for EMBL " "record"): # read a protein record Protein.read(handle) # return to 0 handle.seek(0) with self.assertRaisesRegex(EMBLFormatError, r"There's no protein support for EMBL " "record"): # read a generic record skbio.io.read(handle, format='embl') def test_parse_reference(self): lines = ''' RP 1-63 RX DOI; 10.1016/0378-1119(85)90319-1. RX PUBMED; 3912261. RA Gray G.L., Baldridge J.S., McKeown K.S., Heyneker H.L., Chang C.N.; RT "Periplasmic production of correctly processed human growth hormone in RT Escherichia coli: natural and bacterial signal sequences are RT interchangeable"; RL Gene 39(2-3):247-254(1985).'''.split('\n') # DNA, Sequence and RNA data contain newlines lines = [line+"\n" for line in lines if line != ''] exp = {'AUTHORS': 'Gray G.L., Baldridge J.S., ' 'McKeown K.S., Heyneker H.L., Chang C.N.;', 'JOURNAL': 'Gene 39(2-3):247-254(1985).', 'CROSS_REFERENCE': 'DOI; 10.1016/0378-1119(85)90319-1. ' 'PUBMED; 3912261.', 'REFERENCE': '(bases 1 to 63)', 'TITLE': '"Periplasmic production of correctly processed ' 'human growth hormone in Escherichia coli: ' 'natural and bacterial signal sequences are ' 'interchangeable";', 'PUBMED': '3912261' } # read reference obs = _parse_reference(lines) # See all differences self.maxDiff = None self.assertEqual(obs, exp) def test_parse_assembly(self): lines = """ AH LOCAL_SPAN PRIMARY_IDENTIFIER PRIMARY_SPAN COMP AS 1-426 AC004528.1 18665-19090 AS 427-526 AC001234.2 1-100 c AS 527-1000 TI55475028 not_available """.split('\n') # DNA, Sequence and RNA data contain newlines lines = [line+"\n" for line in lines if line != ''] exp = [ { 'local_span': '1-426', 'primary_identifier': 'AC004528.1', 'primary_span': '18665-19090', 'comp': '' }, { 'local_span': '427-526', 'primary_identifier': 'AC001234.2', 'primary_span': '1-100', 'comp': 'c' }, { 'local_span': '527-1000', 'primary_identifier': 'TI55475028', 'primary_span': 'not_available', 'comp': '' } ] # read reference obs = _parse_assembly(lines) # See all differences self.maxDiff = None self.assertEqual(obs, exp) def test_parse_bad_assembly(self): """test for a wrong assembly line""" lines = """ AH LOCAL_SPAN PRIMARY_IDENTIFIER PRIMARY_SPAN COMP AS 1-426 AC004528.1 """.split("\n") # DNA, Sequence and RNA data contain newlines lines = [line+"\n" for line in lines if line != ''] with self.assertRaisesRegex(EMBLFormatError, r"Can't parse assembly line"): # read a malformed assembly record _parse_assembly(lines) def test_embl_to_generator_single(self): # test single record and uppercase sequence for c in [Sequence, DNA]: obs = next(_embl_to_generator( self.single_upper_fp, constructor=c)) exp = c(self.single[0], metadata=self.single[1], positional_metadata=self.single[2], lowercase=True) self.assertEqual(exp, obs) def test_get_embl_section(self): """Verify to have a section for each embl ID""" with open(self.single_rna_fp) as fh: for line in fh: if line.startswith("//"): continue # test that this function doesn't raise exceptions try: _get_embl_section(line) except KeyError as err: raise EMBLFormatError("Key {0} isn't defined in embl." "KEYS_2_SECTIONS".format(err)) def test_embl_to_generator(self): for i, obs in enumerate(_embl_to_generator(self.multi_fp)): seq, md, imd, constructor = self.multi[i] exp = constructor(seq, metadata=md, lowercase=True, interval_metadata=imd) self.assertEqual(exp, obs) def test_embl_to_sequence(self): for i, exp in enumerate(self.multi): obs = _embl_to_sequence(self.multi_fp, seq_num=i+1) exp = Sequence(exp[0], metadata=exp[1], lowercase=True, interval_metadata=exp[2]) self.assertEqual(exp, obs) def test_embl_to_rna(self): seq, md, imd, constructor = self.single_rna obs = _embl_to_rna(self.single_rna_fp) exp = constructor(seq, metadata=md, lowercase=True, interval_metadata=imd) self.assertEqual(exp, obs) def test_embl_to_dna(self): i = 1 exp = self.multi[i] obs = _embl_to_dna(self.multi_fp, seq_num=i+1) exp = DNA(exp[0], metadata=exp[1], lowercase=True, interval_metadata=exp[2]) self.assertEqual(exp, obs) # current status of protein support is described in issue-1499 # https://github.com/scikit-bio/scikit-bio/issues/1499 def test_embl_to_protein(self): # TODO: add protein support i = 0 # there is no support for protein at the moment # when protein support will be added, this code must work # exp = self.multi[i] # obs = _embl_to_protein(self.multi_fp, seq_num=i+1) # exp = Protein(exp[0], metadata=exp[1], # lowercase=True, interval_metadata=exp[2]) # self.assertEqual(exp, obs) with self.assertRaisesRegex(EMBLFormatError, r"There's no protein support for EMBL " "record"): # read a generic record _embl_to_protein(self.multi_fp, seq_num=i+1) # deal with feature-level-products: ignore feature table def test_feature_level_products(self): seq, md, imd, constructor = self.feature_level obs = _embl_to_rna(self.feature_level_fp) exp = constructor(seq, metadata=md, lowercase=True, interval_metadata=imd) self.assertEqual(obs, exp) # deal with constructed sequences: ignore interval metadata def test_constructed_sequences(self): with self.assertRaisesRegex( EMBLFormatError, r"There's no support for embl CON record"): _embl_to_dna(self.embl_constructed_fp) class WriterTests(EMBLIOTests): def test_serialize_id(self): for serialized, parsed in self.id: self.assertEqual( _serialize_id('ID', parsed), serialized[0] + '\n') def test_serialize_dbsource(self): """Serialize a complex dbsource entry""" # test with a complex uniprot dbsource exp = """DR EMBL; AY548484; AAT09660.1; -; Genomic_DNA. DR RefSeq; YP_031579.1; NC_005946.1. DR ProteinModelPortal; Q6GZX4; -. DR SwissPalm; Q6GZX4; -. DR GeneID; 2947773; -. DR KEGG; vg:2947773; -. DR Proteomes; UP000008770; Genome. """ # split by lines lines = [line+"\n" for line in exp.split("\n") if line != ''] # parse objects parsed = _embl_parse_section_default(lines) # now serialize them obs = _serialize_dbsource("DR", parsed) # test objects self.assertEqual(obs, exp) def test_generator_to_embl(self): seq, md, imd, constructor = self.single obj = constructor(seq, md, interval_metadata=imd, lowercase=True) with io.StringIO() as fh: _generator_to_embl([obj], fh) obs = fh.getvalue() with open(self.single_lower_fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_sequence_to_embl(self): with io.StringIO() as fh: for i, (seq, md, imd, constructor) in enumerate(self.multi): obj = Sequence(seq, md, interval_metadata=imd) _sequence_to_embl(obj, fh) obs = fh.getvalue() with open(self.multi_fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_dna_to_embl(self): with io.StringIO() as fh: for i, (seq, md, imd, constructor) in enumerate(self.multi): obj = constructor( seq, md, interval_metadata=imd, lowercase=True) _dna_to_embl(obj, fh) # read all records written obs = fh.getvalue() with open(self.multi_fp) as fh: exp = fh.read() self.assertEqual(obs, exp) # TODO: add support for protein # current status of protein support is described in issue-1499 # https://github.com/scikit-bio/scikit-bio/issues/1499 def test_protein_to_embl(self): seq, md, imd, constructor = self.protein obj = constructor(seq, md, interval_metadata=imd) with io.StringIO() as fh: self.assertRaisesRegex(EMBLFormatError, r"There's no protein support for EMBL " "record", _protein_to_embl, [obj], fh) def test_rna_to_embl(self): with io.StringIO() as fh: seq, md, imd, constructor = self.single_rna obj = constructor(seq, md, interval_metadata=imd, lowercase=True) _rna_to_embl(obj, fh) obs = fh.getvalue() with open(self.single_rna_fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_rna_to_embl_flp(self): """Test writing feature level products""" with io.StringIO() as fh: seq, md, imd, constructor = self.feature_level obj = constructor(seq, md, interval_metadata=imd, lowercase=True) _rna_to_embl(obj, fh) obs = fh.getvalue() with open(self.feature_level_fp) as fh: exp = fh.read() self.assertEqual(obs, exp) class RoundtripTests(EMBLIOTests): def test_roundtrip_generator(self): with io.StringIO() as fh: _generator_to_embl(_embl_to_generator(self.multi_fp), fh) obs = fh.getvalue() with open(self.multi_fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_roundtrip_rna(self): with io.StringIO() as fh: _rna_to_embl(_embl_to_rna(self.single_rna_fp), fh) obs = fh.getvalue() with open(self.single_rna_fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_roundtrip_dna(self): with io.StringIO() as fh: _dna_to_embl(_embl_to_dna(self.single_rna_fp), fh) obs = fh.getvalue() with open(self.single_rna_fp) as fh: exp = fh.read() self.assertEqual(obs, exp) # TODO: test_roundtrip_protein # current status of protein support is described in issue-1499 # https://github.com/scikit-bio/scikit-bio/issues/1499 def test_roundtrip_sequence(self): with io.StringIO() as fh: _sequence_to_embl(_embl_to_sequence(self.single_rna_fp), fh) obs = fh.getvalue() with open(self.single_rna_fp) as fh: exp = fh.read() self.assertEqual(obs, exp) class Convertertest(EMBLIOTests): def test_gb_to_embl(self): genbank = DNA.read(self.genbank_fp, format="genbank") with io.StringIO() as fh: DNA.write(genbank, format="embl", file=fh) # EMBL can't deal with genbank version (ie M14399.1 GI:145229) # read embl data and write to gb fh.seek(0) embl = DNA.read(fh, format="embl") with io.StringIO() as fh: DNA.write(embl, format="genbank", file=fh) # read gb data obs = fh.getvalue() with open(self.genbank_fp) as fh: exp = fh.read() self.assertEqual(exp, obs) def test_embl_to_gb(self): # EMBL records have more features than genbank, (ex more than one date, # embl class, DOI cross references) so I can't convert an embl to gb # and then to embl keeping all those data. But I can start from # genbank record # do embl file -> embl object -> gb file -> gb object -> # embl file. Ensure that first and last files are identical embl = DNA.read(self.single_rna_simple_fp, format="embl") # "write" genbank record in a embl file with io.StringIO() as fh: DNA.write(embl, format="genbank", file=fh) # read genbank file fh.seek(0) genbank = DNA.read(fh, format="genbank") # "write" genbank record in a embl file with io.StringIO() as fh: DNA.write(genbank, format="embl", file=fh) # read file object obs = fh.getvalue() # test objects with open(self.single_rna_simple_fp) as fh: exp = fh.read() self.assertEqual(exp, obs) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/io/format/tests/test_emptyfile.py000066400000000000000000000021111464262511300233320ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest import io from skbio.io.format.emptyfile import _empty_file_sniffer class TestEmptyFile(unittest.TestCase): def test_empty(self): res, kw = _empty_file_sniffer(io.StringIO()) self.assertTrue(res) self.assertEqual({}, kw) res, kw = _empty_file_sniffer(io.StringIO(" \n \t ")) self.assertTrue(res) self.assertEqual({}, kw) def test_not_empty(self): res, kw = _empty_file_sniffer(io.StringIO("a")) self.assertFalse(res) self.assertEqual({}, kw) res, kw = _empty_file_sniffer(io.StringIO(" \n \ta")) self.assertFalse(res) self.assertEqual({}, kw) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/io/format/tests/test_fasta.py000066400000000000000000001350561464262511300224510ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import copy import io import string from unittest import TestCase, main from functools import partial import numpy as np from skbio import Sequence, DNA, RNA, Protein, TabularMSA from skbio.io import FASTAFormatError, QUALFormatError from skbio.io.format.fasta import ( _fasta_sniffer, _fasta_to_generator, _fasta_to_sequence, _fasta_to_dna, _fasta_to_rna, _fasta_to_protein, _fasta_to_tabular_msa, _generator_to_fasta, _sequence_to_fasta, _dna_to_fasta, _rna_to_fasta, _protein_to_fasta, _tabular_msa_to_fasta) from skbio.sequence import GrammaredSequence from skbio.util import get_data_path from skbio.util import classproperty from skbio.util._decorator import overrides class CustomSequence(GrammaredSequence): @classproperty @overrides(GrammaredSequence) def gap_chars(cls): return set('-.') @classproperty @overrides(GrammaredSequence) def default_gap_char(cls): return '-' @classproperty @overrides(GrammaredSequence) def definite_chars(cls): return set(string.ascii_letters) @classproperty @overrides(GrammaredSequence) def degenerate_map(cls): return {} class SnifferTests(TestCase): def setUp(self): self.positive_fps = list(map(get_data_path, [ 'fasta_5_blanks_start_of_file', 'fasta_5_ws_lines_start_of_file', 'fasta_blanks_end_of_file', 'fasta_ws_lines_end_of_file', 'fasta_blank_lines_between_records', 'fasta_3_seqs_defaults', 'fasta_max_width_1', 'fasta_single_bio_seq_non_defaults', 'fasta_single_prot_seq_non_defaults', 'fasta_3_seqs_non_defaults', 'fasta_max_width_5', 'fasta_single_dna_seq_defaults', 'fasta_single_rna_seq_defaults', 'fasta_description_newline_replacement_empty_str', 'fasta_multi_seq', 'fasta_single_dna_seq_non_defaults', 'fasta_single_rna_seq_non_defaults', 'fasta_description_newline_replacement_multi_char', 'fasta_prot_seqs_odd_labels', 'fasta_single_seq', 'fasta_id_whitespace_replacement_empty_str', 'fasta_tabular_msa_different_type', 'fasta_id_whitespace_replacement_multi_char', 'fasta_single_bio_seq_defaults', 'fasta_single_prot_seq_defaults', 'fasta_10_seqs', 'fasta_invalid_after_10_seqs', 'fasta_mixed_qual_scores', 'qual_3_seqs_non_defaults' ])) self.negative_fps = list(map(get_data_path, [ 'empty', 'whitespace_only', 'fasta_invalid_missing_header', 'fasta_invalid_blank_line_after_header', 'fasta_invalid_blank_sequence', 'fasta_invalid_blank_line_within_sequence', 'fasta_invalid_whitespace_only_line_within_sequence', 'fasta_invalid_whitespace_line_after_header', 'fasta_invalid_missing_seq_data_first', 'fasta_invalid_missing_seq_data_middle', 'fasta_invalid_missing_seq_data_last', 'fasta_invalid_legacy_format', 'fasta_invalid_whitespace_only_sequence', 'fasta_id_whitespace_replacement_none', 'fasta_description_newline_replacement_none', 'fasta_6_blanks_start_of_file', 'fasta_6_ws_lines_start_of_file', 'qual_2_seqs_defaults', 'qual_3_seqs_defaults', 'qual_3_seqs_defaults_desc_mismatch', 'qual_3_seqs_defaults_extra', 'qual_3_seqs_defaults_id_mismatch', 'qual_3_seqs_defaults_length_mismatch', 'qual_description_newline_replacement_empty_str', 'qual_description_newline_replacement_multi_char', 'qual_description_newline_replacement_none', 'qual_id_whitespace_replacement_empty_str', 'qual_id_whitespace_replacement_multi_char', 'qual_id_whitespace_replacement_none', 'qual_invalid_blank_line_within_seq', 'qual_invalid_legacy_format', 'qual_invalid_missing_header', 'qual_invalid_missing_qual_scores_first', 'qual_invalid_missing_qual_scores_last', 'qual_invalid_missing_qual_scores_middle', 'qual_invalid_whitespace_line_in_seq', 'qual_invalid_blank_line_after_header', 'qual_invalid_blank_sequence', 'qual_invalid_whitespace_only_sequence', 'qual_invalid_ws_line_after_header', 'qual_invalid_qual_scores_float', 'qual_invalid_qual_scores_string', 'qual_max_width_1', 'qual_max_width_5', 'qual_multi_seq', 'qual_multi_seq_roundtrip', 'qual_prot_seqs_odd_labels', 'qual_tabular_msa_different_type', 'qual_single_bio_seq_non_defaults', 'qual_single_dna_seq_non_defaults', 'qual_single_prot_seq_non_defaults', 'qual_single_rna_seq_non_defaults', 'qual_single_seq', 'qual_ws_lines_between_records', 'qual_blank_lines_between_records', 'qual_5_blanks_start_of_file', 'qual_5_ws_lines_start_of_file', 'qual_6_blanks_start_of_file', 'qual_6_ws_lines_start_of_file', 'qual_blanks_end_of_file', 'qual_ws_lines_end_of_file' ])) def test_positives(self): for fp in self.positive_fps: self.assertEqual(_fasta_sniffer(fp), (True, {})) def test_negatives(self): for fp in self.negative_fps: self.assertEqual(_fasta_sniffer(fp), (False, {})) class ReaderTests(TestCase): def setUp(self): # each structure stores the sequence generator results (expanded into a # list) that we expect to obtain from reading, matched with kwargs to # pass to the reader, and fasta and qual filepaths that should # deserialize into the expected generator results # empty file shouldn't yield sequences self.empty = ([], {}, list(map(get_data_path, ['empty', 'whitespace_only'])), list(map(get_data_path, ['empty', 'whitespace_only']))) # single sequence self.single = ( [Sequence( 'ACGT-acgt.', metadata={'id': 'seq1', 'description': 'desc1'}, positional_metadata={'quality': np.asarray([10, 20, 30, 10, 0, 0, 0, 255, 1, 255], dtype=np.uint8)})], {}, list(map(get_data_path, ['fasta_single_seq', 'fasta_max_width_1'])), list(map(get_data_path, ['qual_single_seq', 'qual_max_width_1'])) ) # multiple sequences self.multi = ( [Sequence( 'ACGT-acgt.', metadata={'id': 'seq1', 'description': 'desc1'}, positional_metadata={'quality': np.asarray([10, 20, 30, 10, 0, 0, 0, 255, 1, 255], dtype=np.uint8)}), Sequence('A', metadata={'id': '_____seq__2_', 'description': ''}, positional_metadata={'quality': np.asarray([42], dtype=np.uint8)}), Sequence( 'AACGGuA', metadata={'id': '', 'description': 'desc3'}, positional_metadata={'quality': np.asarray([0, 0, 0, 0, 0, 0, 0], dtype=np.uint8)}), Sequence( 'ACGTTGCAccGG', metadata={'id': '', 'description': ''}, positional_metadata={'quality': np.asarray([55, 10, 0, 99, 1, 1, 8, 77, 40, 10, 10, 0], dtype=np.uint8)}), Sequence('ACGUU', metadata={'id': '', 'description': ''}, positional_metadata={'quality': np.asarray([10, 9, 8, 7, 6], dtype=np.uint8)}), Sequence( 'pQqqqPPQQQ', metadata={'id': 'proteinseq', 'description': 'detailed description \t\twith new lines'}, positional_metadata={'quality': np.asarray([42, 42, 255, 255, 42, 42, 42, 42, 42, 43], dtype=np.uint8)})], {}, list(map(get_data_path, ['fasta_multi_seq', 'fasta_max_width_5', 'fasta_blank_lines_between_records', 'fasta_ws_lines_between_records', 'fasta_5_blanks_start_of_file', 'fasta_5_ws_lines_start_of_file', 'fasta_6_blanks_start_of_file', 'fasta_6_ws_lines_start_of_file', 'fasta_blanks_end_of_file', 'fasta_ws_lines_end_of_file'])), list(map(get_data_path, ['qual_multi_seq', 'qual_max_width_5', 'qual_blank_lines_between_records', 'qual_ws_lines_between_records', 'qual_5_blanks_start_of_file', 'qual_5_ws_lines_start_of_file', 'qual_6_blanks_start_of_file', 'qual_6_ws_lines_start_of_file', 'qual_blanks_end_of_file', 'qual_ws_lines_end_of_file'])) ) # test constructor parameter, as well as odd labels (label only # containing whitespace, label description preceded by multiple spaces, # no id) and leading/trailing whitespace on sequence data. for qual # files, in addition to the odd labels, test leading/trailing # whitespace on qual scores, as well as strange number formatting. # also test that fasta and qual headers do not need to match # exactly, only that they need to match exactly after parsing (e.g., # after stripping leading/trailing whitespace from descriptions) self.odd_labels_different_type = ( [Protein('DEFQfp', metadata={'id': '', 'description': ''}, positional_metadata={'quality': np.asarray([0, 0, 1, 5, 44, 0], dtype=np.uint8)}, validate=False), Protein( 'SKBI', metadata={'id': '', 'description': 'skbio'}, positional_metadata={'quality': np.asarray([1, 2, 33, 123], dtype=np.uint8)})], {'constructor': partial(Protein, validate=False)}, list(map(get_data_path, ['fasta_prot_seqs_odd_labels'])), list(map(get_data_path, ['qual_prot_seqs_odd_labels'])) ) # sequences that can be loaded into a TabularMSA self.tabular_msa_different_type = ( [RNA('aUG', metadata={'id': '', 'description': ''}, positional_metadata={'quality': np.asarray([20, 20, 21], dtype=np.uint8)}, lowercase='introns'), RNA('AuC', metadata={'id': 'rnaseq-1', 'description': 'rnaseq desc 1'}, positional_metadata={'quality': np.asarray([10, 9, 10], dtype=np.uint8)}, lowercase='introns'), RNA('AUg', metadata={'id': 'rnaseq-2', 'description': 'rnaseq desc 2'}, positional_metadata={'quality': np.asarray([9, 99, 99], dtype=np.uint8)}, lowercase='introns')], {'constructor': partial(RNA, lowercase='introns')}, list(map(get_data_path, ['fasta_tabular_msa_different_type'])), list(map(get_data_path, ['qual_tabular_msa_different_type'])) ) self.lowercase_seqs = ( [DNA('TAcg', metadata={'id': 'f-o-o', 'description': 'b_a_r'}, positional_metadata={'quality': np.asarray([0, 1, 2, 3], dtype=np.uint8)}, lowercase='introns')], {'constructor': DNA, 'lowercase': 'introns'}, list(map(get_data_path, ['fasta_single_dna_seq_non_defaults'])), list(map(get_data_path, ['qual_single_dna_seq_non_defaults'])) ) # store fasta filepath, kwargs, error type, and expected error message # for invalid input. # # note: there is some duplication in testing that fasta and qual # parsers raise expected errors. even though the parsers share the same # underlying logic, these tests are here as a safeguard in case the # code is refactored in the future such that fasta and qual have # different implementations (e.g., if qual is written in cython while # fasta remains in python) self.invalid_fps = list(map(lambda e: (get_data_path(e[0]), e[1], e[2], e[3]), [ # fasta and qual missing header ('fasta_invalid_missing_header', {}, FASTAFormatError, r'non-header.*1st'), ('fasta_3_seqs_defaults', {'qual': get_data_path('qual_invalid_missing_header')}, QUALFormatError, r'non-header.*1st'), # fasta and qual with blank line within sequence ('fasta_invalid_blank_line_within_sequence', {}, FASTAFormatError, r'whitespace-only'), ('fasta_3_seqs_defaults', {'qual': get_data_path('qual_invalid_blank_line_within_seq')}, QUALFormatError, r'whitespace-only'), # fasta and qual with blank after header ('fasta_invalid_blank_sequence', {}, FASTAFormatError, r'without sequence data'), ('fasta_3_seqs_defaults', {'qual': get_data_path('qual_invalid_blank_sequence')}, QUALFormatError, r'without quality scores'), # fasta and qual with whitespace only sequence ('fasta_invalid_whitespace_only_sequence', {}, FASTAFormatError, r'without sequence data'), ('fasta_3_seqs_defaults', {'qual': get_data_path('qual_invalid_whitespace_only_sequence')}, QUALFormatError, r'without quality scores'), # fasta and qual with blank line within sequence ('fasta_invalid_blank_line_after_header', {}, FASTAFormatError, r'whitespace-only'), ('fasta_3_seqs_defaults', {'qual': get_data_path('qual_invalid_blank_line_after_header')}, QUALFormatError, r'whitespace-only'), # fasta and qual with whitespace-only line within sequence ('fasta_invalid_whitespace_only_line_within_sequence', {}, FASTAFormatError, r'whitespace-only'), ('fasta_3_seqs_defaults', {'qual': get_data_path('qual_invalid_whitespace_line_in_seq')}, QUALFormatError, r'whitespace-only'), # fasta and qual with whitespace-only line after header ('fasta_invalid_whitespace_line_after_header', {}, FASTAFormatError, r'whitespace-only'), ('fasta_3_seqs_defaults', {'qual': get_data_path('qual_invalid_ws_line_after_header')}, QUALFormatError, r'whitespace-only'), # fasta and qual missing record data (first record) ('fasta_invalid_missing_seq_data_first', {}, FASTAFormatError, r'without sequence data'), ('fasta_3_seqs_defaults', {'qual': get_data_path('qual_invalid_missing_qual_scores_first')}, QUALFormatError, r'without quality scores'), # fasta and qual missing record data (middle record) ('fasta_invalid_missing_seq_data_middle', {}, FASTAFormatError, r'without sequence data'), ('fasta_3_seqs_defaults', {'qual': get_data_path('qual_invalid_missing_qual_scores_middle')}, QUALFormatError, r'without quality scores'), # fasta and qual missing record data (last record) ('fasta_invalid_missing_seq_data_last', {}, FASTAFormatError, r'without sequence data'), ('fasta_3_seqs_defaults', {'qual': get_data_path('qual_invalid_missing_qual_scores_last')}, QUALFormatError, r'without quality scores'), # fasta and qual in legacy format (;) ('fasta_invalid_legacy_format', {}, FASTAFormatError, r'non-header.*1st'), ('fasta_3_seqs_defaults', {'qual': get_data_path('qual_invalid_legacy_format')}, QUALFormatError, r'non-header.*1st'), # qual file with an extra record ('fasta_3_seqs_defaults', {'qual': get_data_path('qual_3_seqs_defaults_extra')}, FASTAFormatError, r'QUAL file has more'), # fasta file with an extra record ('fasta_3_seqs_defaults', {'qual': get_data_path('qual_2_seqs_defaults')}, FASTAFormatError, r'FASTA file has more'), # id mismatch between fasta and qual ('fasta_3_seqs_defaults', {'qual': get_data_path('qual_3_seqs_defaults_id_mismatch')}, FASTAFormatError, r'IDs do not match.*\'s_e_q_2\' != \'s_e_q_42\''), # description mismatch between fasta and qual ('fasta_3_seqs_defaults', {'qual': get_data_path('qual_3_seqs_defaults_desc_mismatch')}, FASTAFormatError, r'Descriptions do not match.*\'desc 2\' != \'desc 42\''), # sequence and quality score length mismatch between fasta and qual ('fasta_3_seqs_defaults', {'qual': get_data_path('qual_3_seqs_defaults_length_mismatch')}, ValueError, r'Number of positional metadata values \(3\) must match the ' r'positional metadata axis length \(4\)\.'), # invalid qual scores (string value can't be converted to integer) ('fasta_3_seqs_defaults', {'qual': get_data_path('qual_invalid_qual_scores_string')}, QUALFormatError, r'quality scores to integers:\n100 0 1a -42'), # invalid qual scores (float value can't be converted to integer) ('fasta_3_seqs_defaults', {'qual': get_data_path('qual_invalid_qual_scores_float')}, QUALFormatError, r'quality scores to integers:\n42 41.0 39 40'), # invalid qual scores (negative integer) ('fasta_3_seqs_defaults', {'qual': get_data_path('qual_invalid_qual_scores_negative')}, QUALFormatError, r'Quality scores must be greater than or equal to zero\.'), # invalid qual scores (over 255) ('fasta_3_seqs_defaults', {'qual': get_data_path('qual_invalid_qual_scores_over_255')}, QUALFormatError, r'quality score\(s\) greater than 255'), # misc. invalid files used elsewhere in the tests ('fasta_invalid_after_10_seqs', {}, FASTAFormatError, r'without sequence data'), ('fasta_id_whitespace_replacement_none', {}, FASTAFormatError, r'whitespace-only'), ('fasta_description_newline_replacement_none', {}, FASTAFormatError, r'whitespace-only') ])) # extensive tests for fasta -> generator reader since it is used by all # other fasta -> object readers def test_fasta_to_generator_valid_files(self): test_cases = (self.empty, self.single, self.multi, self.odd_labels_different_type, self.tabular_msa_different_type, self.lowercase_seqs) # Strategy: # for each fasta file, read it without its corresponding qual file, # and ensure observed vs. expected match, ignoring quality scores in # expected. next, parse the current fasta file with each # corresponding quality file and ensure that observed vs. expected # match, this time taking quality scores into account. this # sufficiently exercises parsing a standalone fasta file and paired # fasta/qual files for exp, kwargs, fasta_fps, qual_fps in test_cases: for fasta_fp in fasta_fps: obs = list(_fasta_to_generator(fasta_fp, **kwargs)) self.assertEqual(len(obs), len(exp)) for o, e in zip(obs, exp): e = copy.copy(e) del e.positional_metadata['quality'] self.assertEqual(o, e) for qual_fp in qual_fps: obs = list(_fasta_to_generator(fasta_fp, qual=qual_fp, **kwargs)) self.assertEqual(len(obs), len(exp)) for o, e in zip(obs, exp): self.assertEqual(o, e) def test_fasta_to_generator_invalid_files(self): for fp, kwargs, error_type, error_msg_regex in self.invalid_fps: with self.assertRaisesRegex(error_type, error_msg_regex): list(_fasta_to_generator(fp, **kwargs)) # light testing of fasta -> object readers to ensure interface is present # and kwargs are passed through. extensive testing of underlying reader is # performed above def test_fasta_to_any_sequence(self): for constructor, reader_fn in ((Sequence, _fasta_to_sequence), (partial(DNA, validate=False, lowercase='introns'), partial(_fasta_to_dna, validate=False, lowercase='introns')), (partial(RNA, validate=False, lowercase='introns'), partial(_fasta_to_rna, validate=False, lowercase='introns')), (partial(Protein, lowercase='introns'), partial(_fasta_to_protein, validate=False, lowercase='introns'))): # empty file empty_fp = get_data_path('empty') with self.assertRaisesRegex(ValueError, r'1st sequence'): reader_fn(empty_fp) with self.assertRaisesRegex(ValueError, r'1st sequence'): reader_fn(empty_fp, qual=empty_fp) # the sequences in the following files don't necessarily make sense # for each of the sequence object types that they're read into # (e.g., reading a protein sequence into a dna sequence object). # however, for the purposes of testing the various # fasta -> sequence readers, this works out okay as it is valid to # construct a sequence object with invalid characters. we're # interested in testing the reading logic here, and don't care so # much about constructing semantically-meaningful/valid sequence # objects # file with only 1 seq, get first fasta_fps = list(map(get_data_path, ['fasta_single_seq', 'fasta_max_width_1'])) for fasta_fp in fasta_fps: exp = constructor( 'ACGT-acgt.', metadata={'id': 'seq1', 'description': 'desc1'}) obs = reader_fn(fasta_fp) self.assertEqual(obs, exp) exp.positional_metadata.insert( 0, 'quality', np.asarray([10, 20, 30, 10, 0, 0, 0, 255, 1, 255], dtype=np.uint8)) qual_fps = list(map(get_data_path, ['qual_single_seq', 'qual_max_width_1'])) for qual_fp in qual_fps: obs = reader_fn(fasta_fp, qual=qual_fp) self.assertEqual(obs, exp) # file with multiple seqs fasta_fps = list(map(get_data_path, ['fasta_multi_seq', 'fasta_max_width_5'])) qual_fps = list(map(get_data_path, ['qual_multi_seq', 'qual_max_width_5'])) for fasta_fp in fasta_fps: # get first exp = constructor( 'ACGT-acgt.', metadata={'id': 'seq1', 'description': 'desc1'}) obs = reader_fn(fasta_fp) self.assertEqual(obs, exp) exp.positional_metadata.insert( 0, 'quality', np.asarray([10, 20, 30, 10, 0, 0, 0, 255, 1, 255], dtype=np.uint8)) for qual_fp in qual_fps: obs = reader_fn(fasta_fp, qual=qual_fp) self.assertEqual(obs, exp) # get middle exp = constructor('ACGTTGCAccGG', metadata={'id': '', 'description': ''}) obs = reader_fn(fasta_fp, seq_num=4) self.assertEqual(obs, exp) exp.positional_metadata.insert( 0, 'quality', np.asarray([55, 10, 0, 99, 1, 1, 8, 77, 40, 10, 10, 0], dtype=np.uint8)) for qual_fp in qual_fps: obs = reader_fn(fasta_fp, seq_num=4, qual=qual_fp) self.assertEqual(obs, exp) # get last exp = constructor( 'pQqqqPPQQQ', metadata={'id': 'proteinseq', 'description': 'detailed description \t\twith new lines'}) obs = reader_fn(fasta_fp, seq_num=6) self.assertEqual(obs, exp) exp.positional_metadata.insert( 0, 'quality', np.asarray([42, 42, 255, 255, 42, 42, 42, 42, 42, 43], dtype=np.uint8)) for qual_fp in qual_fps: obs = reader_fn(fasta_fp, seq_num=6, qual=qual_fp) self.assertEqual(obs, exp) # seq_num too large with self.assertRaisesRegex(ValueError, r'8th sequence'): reader_fn(fasta_fp, seq_num=8) for qual_fp in qual_fps: with self.assertRaisesRegex(ValueError, r'8th sequence'): reader_fn(fasta_fp, seq_num=8, qual=qual_fp) # seq_num too small with self.assertRaisesRegex(ValueError, r'`seq_num`=0'): reader_fn(fasta_fp, seq_num=0) for qual_fp in qual_fps: with self.assertRaisesRegex(ValueError, r'`seq_num`=0'): reader_fn(fasta_fp, seq_num=0, qual=qual_fp) def test_fasta_to_tabular_msa(self): test_cases = (self.empty, self.single, self.tabular_msa_different_type, self.lowercase_seqs) # see comment in test_fasta_to_generator_valid_files (above) for # testing strategy for exp_list, kwargs, fasta_fps, qual_fps in test_cases: if 'constructor' not in kwargs: kwargs['constructor'] = CustomSequence exp_list = [CustomSequence(seq) for seq in exp_list] exp = TabularMSA(exp_list) for fasta_fp in fasta_fps: obs = _fasta_to_tabular_msa(fasta_fp, **kwargs) self.assertEqual(len(obs), len(exp)) for o, e in zip(obs, exp): e = copy.copy(e) del e.positional_metadata['quality'] self.assertEqual(o, e) for qual_fp in qual_fps: obs = _fasta_to_tabular_msa(fasta_fp, qual=qual_fp, **kwargs) self.assertEqual(obs, exp) def test_fasta_to_tabular_msa_no_constructor(self): with self.assertRaisesRegex(ValueError, r'`constructor`'): _fasta_to_tabular_msa(get_data_path('fasta_single_seq')) class WriterTests(TestCase): def setUp(self): self.bio_seq1 = DNA( 'ACGT-acgt.', metadata={'id': 'seq1', 'description': 'desc1'}, positional_metadata={'quality': [10, 20, 30, 10, 0, 0, 0, 255, 1, 255]}, lowercase='introns') self.bio_seq2 = DNA( 'A', metadata={'id': ' \n \nseq \t2 '}, positional_metadata={'quality': [42]}, lowercase='introns') self.bio_seq3 = RNA( 'AACGGuA', metadata={'description': 'desc3'}, positional_metadata={'quality': [0, 0, 0, 0, 0, 0, 0]}, lowercase='introns') self.dna_seq = DNA( 'ACGTTGCAccGG', positional_metadata={'quality': [55, 10, 0, 99, 1, 1, 8, 77, 40, 10, 10, 0]}, lowercase='introns') self.rna_seq = RNA('ACGUU', positional_metadata={'quality': [10, 9, 8, 7, 6]}, lowercase='introns') self.prot_seq = Protein( 'pQqqqPPQQQ', metadata={'id': 'proteinseq', 'description': "\ndetailed\ndescription \t\twith " " new\n\nlines\n\n\n"}, positional_metadata={'quality': [42, 42, 255, 255, 42, 42, 42, 42, 42, 43]}, lowercase='introns') seqs = [ CustomSequence( 'UUUU', metadata={'id': 's\te\tq\t1', 'description': 'desc\n1'}, positional_metadata={'quality': [1234, 0, 0, 2]}, lowercase='introns'), CustomSequence( 'CATC', metadata={'id': 's\te\tq\t2', 'description': 'desc\n2'}, positional_metadata={'quality': [1, 11, 111, 11112]}), CustomSequence( 'sits', metadata={'id': 's\te\tq\t3', 'description': 'desc\n3'}, positional_metadata={'quality': [12345, 678909, 999999, 4242424242]}) ] self.msa = TabularMSA(seqs) def empty_gen(): yield from () def single_seq_gen(): yield self.bio_seq1 # generate sequences with descriptions containing newlines (to test # description_newline_replacement) def newline_description_gen(): yield self.prot_seq yield DNA('AGGAGAATA', metadata={'id': 'foo', 'description': '\n\n\n\n'}, positional_metadata={'quality': range(9)}, lowercase='introns') # generate sequences with ids containing whitespace (to test # id_whitespace_replacement) def whitespace_id_gen(): yield self.bio_seq2 yield RNA('UA', metadata={'id': '\n\t \t', 'description': 'a\nb'}, positional_metadata={'quality': [1000, 1]}) # multiple sequences of mixed types, lengths, and metadata. lengths are # chosen to exercise various splitting cases when testing max_width, # including exercising the different splitting algorithms used for # sequence data vs. quality scores def multi_seq_gen(): yield from (self.bio_seq1, self.bio_seq2, self.bio_seq3, self.dna_seq, self.rna_seq, self.prot_seq) # can be serialized if no qual file is provided, else it should raise # an error because one seq has qual scores and the other doesn't def mixed_qual_score_gen(): yield self.bio_seq1 yield DNA('AAAAT', metadata={'id': 'da,dadadada', 'description': '10 hours'}, lowercase='introns') self.mixed_qual_score_gen = mixed_qual_score_gen() # store sequence generator to serialize, writer kwargs (if any), and # fasta and qual filepaths of expected results self.objs_fps = list(map(lambda e: (e[0], e[1], get_data_path(e[2]), get_data_path(e[3])), [ (empty_gen(), {}, 'empty', 'empty'), (single_seq_gen(), {'lowercase': 'introns'}, 'fasta_single_seq', 'qual_single_seq'), # no splitting of sequence or qual data across lines b/c max_width # is sufficiently large (single_seq_gen(), {'max_width': 32, 'lowercase': 'introns'}, 'fasta_single_seq', 'qual_single_seq'), # splitting algorithm for sequence and qual scores is different; # make sure individual qual scores aren't split across lines even # if they exceed max_width (single_seq_gen(), {'max_width': 1, 'lowercase': 'introns'}, 'fasta_max_width_1', 'qual_max_width_1'), (multi_seq_gen(), {'lowercase': 'introns'}, 'fasta_multi_seq', 'qual_multi_seq'), (multi_seq_gen(), {'max_width': 5, 'lowercase': 'introns'}, 'fasta_max_width_5', 'qual_max_width_5'), (newline_description_gen(), {'description_newline_replacement': ':-)', 'lowercase': 'introns'}, 'fasta_description_newline_replacement_multi_char', 'qual_description_newline_replacement_multi_char'), (newline_description_gen(), {'description_newline_replacement': '', 'lowercase': 'introns'}, 'fasta_description_newline_replacement_empty_str', 'qual_description_newline_replacement_empty_str',), (newline_description_gen(), {'description_newline_replacement': None, 'lowercase': 'introns'}, 'fasta_description_newline_replacement_none', 'qual_description_newline_replacement_none'), (whitespace_id_gen(), {'id_whitespace_replacement': '>:o'}, 'fasta_id_whitespace_replacement_multi_char', 'qual_id_whitespace_replacement_multi_char'), (whitespace_id_gen(), {'id_whitespace_replacement': ''}, 'fasta_id_whitespace_replacement_empty_str', 'qual_id_whitespace_replacement_empty_str'), (whitespace_id_gen(), {'id_whitespace_replacement': None}, 'fasta_id_whitespace_replacement_none', 'qual_id_whitespace_replacement_none'), ])) def blank_seq_gen(): yield from (self.bio_seq1, Sequence('')) # generators or parameter combos that cannot be written in fasta # format, paired with kwargs (if any), error type, and expected error # message regexp self.invalid_objs = [ (blank_seq_gen(), {}, ValueError, r'2nd.*empty'), (single_seq_gen(), {'max_width': 0}, ValueError, r'max_width=0'), (multi_seq_gen(), {'id_whitespace_replacement': '-\n_'}, ValueError, r'Newline character'), (multi_seq_gen(), {'description_newline_replacement': '-.-\n'}, ValueError, r'Newline character'), (mixed_qual_score_gen(), {'qual': io.StringIO()}, ValueError, r'2nd sequence.*does not have quality scores') ] # extensive tests for generator -> fasta writer since it is used by all # other object -> fasta writers def test_generator_to_fasta_no_qual(self): # test writing standalone fasta (i.e., without a qual file) for obj, kwargs, fp, _ in self.objs_fps: fh = io.StringIO() _generator_to_fasta(obj, fh, **kwargs) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_generator_to_fasta_mixed_qual_scores(self): # test writing some sequences with qual scores and some without is # possible if no qual output file is specified fh = io.StringIO() _generator_to_fasta(self.mixed_qual_score_gen, fh, lowercase='introns') obs = fh.getvalue() fh.close() with io.open(get_data_path('fasta_mixed_qual_scores')) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_generator_to_fasta_with_qual(self): # test writing fasta and qual files for obj, kwargs, fasta_fp, qual_fp in self.objs_fps: if qual_fp is not None: fasta_fh = io.StringIO() qual_fh = io.StringIO() _generator_to_fasta(obj, fasta_fh, qual=qual_fh, **kwargs) obs_fasta = fasta_fh.getvalue() obs_qual = qual_fh.getvalue() fasta_fh.close() qual_fh.close() with io.open(fasta_fp) as fh: exp_fasta = fh.read() with io.open(qual_fp) as fh: exp_qual = fh.read() self.assertEqual(obs_fasta, exp_fasta) self.assertEqual(obs_qual, exp_qual) def test_generator_to_fasta_invalid_input(self): for obj, kwargs, error_type, error_msg_regexp in self.invalid_objs: fh = io.StringIO() with self.assertRaisesRegex(error_type, error_msg_regexp): _generator_to_fasta(obj, fh, **kwargs) fh.close() # light testing of object -> fasta writers to ensure interface is present # and kwargs are passed through. extensive testing of underlying writer is # performed above def test_any_sequence_to_fasta(self): # store writer function, sequence object to write, expected # fasta filepath for default parameters, expected fasta filepath for # non-defaults, and expected qual filepath for non-defaults id_ = 'f o o' desc = 'b\na\nr' test_data = ( (partial(_sequence_to_fasta, lowercase='introns'), Sequence('ACgt', metadata={'id': id_, 'description': desc}, positional_metadata={'quality': range(1, 5)}, lowercase='introns'), ('fasta_single_bio_seq_defaults', 'fasta_single_bio_seq_non_defaults', 'qual_single_bio_seq_non_defaults')), (partial(_dna_to_fasta, lowercase='introns'), DNA('TAcg', metadata={'id': id_, 'description': desc}, positional_metadata={'quality': range(4)}, lowercase='introns'), ('fasta_single_dna_seq_defaults', 'fasta_single_dna_seq_non_defaults', 'qual_single_dna_seq_non_defaults')), (partial(_rna_to_fasta, lowercase='introns'), RNA('uaCG', metadata={'id': id_, 'description': desc}, positional_metadata={'quality': range(2, 6)}, lowercase='introns'), ('fasta_single_rna_seq_defaults', 'fasta_single_rna_seq_non_defaults', 'qual_single_rna_seq_non_defaults')), (partial(_protein_to_fasta, lowercase='introns'), Protein('PqQ', metadata={'id': id_, 'description': desc}, positional_metadata={'quality': [42, 41, 40]}, lowercase='introns'), ('fasta_single_prot_seq_defaults', 'fasta_single_prot_seq_non_defaults', 'qual_single_prot_seq_non_defaults'))) for fn, obj, fps in test_data: defaults_fp, non_defaults_fasta_fp, non_defaults_qual_fp = fps # test writing with default parameters fh = io.StringIO() fn(obj, fh) obs = fh.getvalue() fh.close() with io.open(get_data_path(defaults_fp)) as fh: exp = fh.read() self.assertEqual(obs, exp) # test writing with non-defaults fasta_fh = io.StringIO() qual_fh = io.StringIO() fn(obj, fasta_fh, id_whitespace_replacement='-', description_newline_replacement='_', max_width=1, qual=qual_fh) obs_fasta = fasta_fh.getvalue() obs_qual = qual_fh.getvalue() fasta_fh.close() qual_fh.close() with io.open(get_data_path(non_defaults_fasta_fp)) as fh: exp_fasta = fh.read() with io.open(get_data_path(non_defaults_qual_fp)) as fh: exp_qual = fh.read() self.assertEqual(obs_fasta, exp_fasta) self.assertEqual(obs_qual, exp_qual) def test_any_sequences_to_fasta(self): # test writing with default parameters fh = io.StringIO() _tabular_msa_to_fasta(self.msa, fh) obs = fh.getvalue() fh.close() with io.open(get_data_path('fasta_3_seqs_defaults')) as fh: exp = fh.read() self.assertEqual(obs, exp) # test writing with non-defaults fasta_fh = io.StringIO() qual_fh = io.StringIO() _tabular_msa_to_fasta(self.msa, fasta_fh, id_whitespace_replacement='*', description_newline_replacement='+', max_width=3, qual=qual_fh) obs_fasta = fasta_fh.getvalue() obs_qual = qual_fh.getvalue() fasta_fh.close() qual_fh.close() with io.open(get_data_path('fasta_3_seqs_non_defaults')) as fh: exp_fasta = fh.read() with io.open(get_data_path('qual_3_seqs_non_defaults')) as fh: exp_qual = fh.read() self.assertEqual(obs_fasta, exp_fasta) self.assertEqual(obs_qual, exp_qual) class RoundtripTests(TestCase): def test_roundtrip_generators(self): # test that fasta and qual files can be streamed into memory and back # out to disk using generator reader and writer fps = list(map(lambda e: list(map(get_data_path, e)), [('empty', 'empty'), ('fasta_multi_seq_roundtrip', 'qual_multi_seq_roundtrip')])) for fasta_fp, qual_fp in fps: with io.open(fasta_fp) as fh: exp_fasta = fh.read() with io.open(qual_fp) as fh: exp_qual = fh.read() fasta_fh = io.StringIO() qual_fh = io.StringIO() _generator_to_fasta(_fasta_to_generator(fasta_fp, qual=qual_fp), fasta_fh, qual=qual_fh) obs_fasta = fasta_fh.getvalue() obs_qual = qual_fh.getvalue() fasta_fh.close() qual_fh.close() self.assertEqual(obs_fasta, exp_fasta) self.assertEqual(obs_qual, exp_qual) def test_roundtrip_tabular_msa(self): fps = list(map(lambda e: list(map(get_data_path, e)), [('empty', 'empty'), ('fasta_tabular_msa_different_type', 'qual_tabular_msa_different_type')])) reader = partial(_fasta_to_tabular_msa, constructor=CustomSequence) writer = _tabular_msa_to_fasta for fasta_fp, qual_fp in fps: # read obj1 = reader(fasta_fp, qual=qual_fp) # write fasta_fh = io.StringIO() qual_fh = io.StringIO() writer(obj1, fasta_fh, qual=qual_fh) fasta_fh.seek(0) qual_fh.seek(0) # read obj2 = reader(fasta_fh, qual=qual_fh) fasta_fh.close() qual_fh.close() self.assertEqual(obj1, obj2) def test_roundtrip_biological_sequences(self): fps = list(map(lambda e: list(map(get_data_path, e)), [('fasta_multi_seq_roundtrip', 'qual_multi_seq_roundtrip'), ('fasta_tabular_msa_different_type', 'qual_tabular_msa_different_type')])) for reader, writer in ((_fasta_to_sequence, _sequence_to_fasta), (partial(_fasta_to_dna, validate=False), _dna_to_fasta), (partial(_fasta_to_rna, validate=False), _rna_to_fasta), (partial(_fasta_to_protein, validate=False), _protein_to_fasta)): for fasta_fp, qual_fp in fps: # read obj1 = reader(fasta_fp, qual=qual_fp) # write fasta_fh = io.StringIO() qual_fh = io.StringIO() writer(obj1, fasta_fh, qual=qual_fh) fasta_fh.seek(0) qual_fh.seek(0) # read obj2 = reader(fasta_fh, qual=qual_fh) fasta_fh.close() qual_fh.close() self.assertEqual(obj1, obj2) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/io/format/tests/test_fastq.py000066400000000000000000000702101464262511300224570ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io import string import unittest import warnings from functools import partial from skbio import read, write, Sequence, DNA, RNA, Protein, TabularMSA from skbio.io import FASTQFormatError from skbio.io.format.fastq import ( _fastq_sniffer, _fastq_to_generator, _fastq_to_tabular_msa, _generator_to_fastq, _tabular_msa_to_fastq) from skbio.sequence import GrammaredSequence from skbio.util import get_data_path from skbio.util import classproperty from skbio.util._decorator import overrides import numpy as np # Note: the example FASTQ files with file extension .fastq are taken from the # following open-access publication's supplementary data: # # P.J.A. Cock, C.J. Fields, N. Goto, M.L. Heuer and P.M. Rice (2009). The # Sanger FASTQ file format for sequences with quality scores, and the # Solexa/Illumina FASTQ variants. # # See licenses/fastq-example-files-readme.txt for the original README that # accompanied these files, which includes the terms of use and detailed # description of the files. # # The example files bearing the original filenames have not been modified from # their original form. def _drop_kwargs(kwargs, *args): for arg in args: if arg in kwargs: kwargs.pop(arg) class TestSniffer(unittest.TestCase): def setUp(self): self.positives = [get_data_path(e) for e in [ 'fastq_multi_seq_sanger', 'fastq_multi_blank_between_records', 'fastq_multi_ws_lines_between_records', 'fastq_multi_blank_end_of_file', 'fastq_multi_ws_lines_end_of_file', 'fastq_multi_whitespace_stripping', 'fastq_blank_lines', 'fastq_whitespace_only_lines', 'fastq_single_seq_illumina1.3', 'fastq_wrapping_as_illumina_no_description', 'fastq_wrapping_as_sanger_no_description', 'fastq_wrapping_original_sanger_no_description', 'fastq_writer_illumina1.3_defaults', 'fastq_writer_sanger_defaults', 'fastq_writer_sanger_non_defaults', 'fastq_5_blanks_start_of_file', 'fastq_5_ws_lines_start_of_file', 'illumina_full_range_as_illumina.fastq', 'illumina_full_range_as_sanger.fastq', 'illumina_full_range_original_illumina.fastq', 'longreads_as_illumina.fastq', 'longreads_as_sanger.fastq', 'longreads_original_sanger.fastq', 'misc_dna_as_illumina.fastq', 'misc_dna_as_sanger.fastq', 'misc_dna_original_sanger.fastq', 'misc_rna_as_illumina.fastq', 'misc_rna_as_sanger.fastq', 'misc_rna_original_sanger.fastq', 'sanger_full_range_as_illumina.fastq', 'sanger_full_range_as_sanger.fastq', 'sanger_full_range_original_sanger.fastq', 'solexa_full_range_original_solexa.fastq', 'wrapping_as_illumina.fastq', 'wrapping_as_sanger.fastq', 'wrapping_original_sanger.fastq' ]] self.negatives = [get_data_path(e) for e in [ 'empty', 'whitespace_only', 'fastq_multi_blank_start_of_file', 'fastq_multi_ws_lines_start_of_file', 'fastq_invalid_blank_after_header', 'fastq_invalid_blank_after_seq', 'fastq_invalid_blank_after_plus', 'fastq_invalid_blank_within_seq', 'fastq_invalid_blank_within_qual', 'fastq_invalid_ws_line_after_header', 'fastq_invalid_ws_line_after_seq', 'fastq_invalid_ws_line_after_plus', 'fastq_invalid_ws_line_within_seq', 'fastq_invalid_ws_line_within_qual', 'fastq_invalid_missing_header', 'fastq_invalid_missing_seq_data', 'error_diff_ids.fastq', 'error_double_qual.fastq', 'error_double_seq.fastq', 'error_long_qual.fastq', 'error_no_qual.fastq', 'error_qual_del.fastq', 'error_qual_escape.fastq', 'error_qual_null.fastq', 'error_qual_space.fastq', 'error_qual_tab.fastq', 'error_qual_unit_sep.fastq', 'error_qual_vtab.fastq', 'error_short_qual.fastq', 'error_spaces.fastq', 'error_tabs.fastq', 'error_trunc_at_seq.fastq', 'error_trunc_at_plus.fastq', 'error_trunc_at_qual.fastq', 'error_trunc_in_title.fastq', 'error_trunc_in_seq.fastq', 'error_trunc_in_plus.fastq', 'error_trunc_in_qual.fastq', ]] def test_positives(self): for fp in self.positives: self.assertEqual(_fastq_sniffer(fp), (True, {})) def test_negatives(self): for fp in self.negatives: self.assertEqual(_fastq_sniffer(fp), (False, {})) def test_illumina_sniffed(self): fp = get_data_path('fastq_single_seq_illumina1.8') self.assertEqual(_fastq_sniffer(fp), (True, {'variant': 'illumina1.8'})) class TestReaders(unittest.TestCase): def setUp(self): self.valid_configurations = [ ([get_data_path('empty'), get_data_path('whitespace_only')], [{}, {'variant': 'illumina1.8'}, {'phred_offset': 33, 'constructor': DNA}], []), ([get_data_path('fastq_single_seq_illumina1.3')], [ {'variant': 'illumina1.3'}, {'phred_offset': 64}, {'variant': 'illumina1.3', 'constructor': Protein}, ], [ ('', 'bar\t baz', 'aCGT', [33, 34, 35, 36]) ]), ([get_data_path('fastq_multi_seq_sanger'), get_data_path('fastq_whitespace_only_lines'), get_data_path('fastq_blank_lines'), get_data_path('fastq_multi_blank_between_records'), get_data_path('fastq_multi_ws_lines_between_records'), get_data_path('fastq_multi_blank_end_of_file'), get_data_path('fastq_multi_ws_lines_end_of_file'), get_data_path('fastq_multi_blank_start_of_file'), get_data_path('fastq_multi_ws_lines_start_of_file'), get_data_path('fastq_multi_whitespace_stripping')], [ {'variant': 'sanger'}, {'phred_offset': 33, 'seq_num': 2}, {'variant': 'sanger', 'constructor': partial(RNA, validate=False), 'seq_num': 3}, ], [ ('foo', 'bar baz', 'AACCGG', [16, 17, 18, 19, 20, 21]), ('bar', 'baz foo', 'TTGGCC', [23, 22, 21, 20, 19, 18]), ('baz', 'foo bar', 'GATTTC', [20, 21, 22, 23, 24, 18]) ]), ] self.invalid_files = [(get_data_path(e[0]), e[1], e[2]) for e in [ ('fastq_invalid_blank_after_header', FASTQFormatError, r'blank or whitespace-only line.*after header.*in FASTQ'), ('fastq_invalid_blank_after_seq', FASTQFormatError, r"blank or whitespace-only line.*before '\+' in FASTQ"), ('fastq_invalid_blank_after_plus', FASTQFormatError, r"blank or whitespace-only line.*after '\+'.*in FASTQ"), ('fastq_invalid_blank_within_seq', FASTQFormatError, r'blank or whitespace-only line.*within sequence.*FASTQ'), ('fastq_invalid_blank_within_qual', FASTQFormatError, r"blank or whitespace-only line.*within quality scores.*in " "FASTQ"), ('fastq_invalid_ws_line_after_header', FASTQFormatError, r'blank or whitespace-only line.*after header.*in FASTQ'), ('fastq_invalid_ws_line_after_seq', FASTQFormatError, r"blank or whitespace-only line.*before '\+' in FASTQ"), ('fastq_invalid_ws_line_after_plus', FASTQFormatError, r"blank or whitespace-only line.*after '\+'.*in FASTQ"), ('fastq_invalid_ws_line_within_seq', FASTQFormatError, r'blank or whitespace-only line.*within sequence.*FASTQ'), ('fastq_invalid_ws_line_within_qual', FASTQFormatError, r"blank or whitespace-only line.*within quality scores.*in " "FASTQ"), ('fastq_invalid_missing_header', FASTQFormatError, r"sequence.*header.*start of file: 'seq1 desc1'"), ('fastq_invalid_missing_seq_data', FASTQFormatError, r'without sequence data'), ('error_diff_ids.fastq', FASTQFormatError, r"header lines do not match: " "'SLXA-B3_649_FC8437_R1_1_1_850_123' != " "'SLXA-B3_649_FC8437_R1_1_1_850_124'"), ('error_double_qual.fastq', FASTQFormatError, r"Extra quality.*'\+SLXA-B3_649_FC8437_R1_1_1_850_123'"), ('error_double_seq.fastq', FASTQFormatError, r'FASTQ record that is missing a quality \(\+\) header line'), ('error_long_qual.fastq', FASTQFormatError, r"Extra quality.*'Y'"), ('error_no_qual.fastq', FASTQFormatError, r"blank or whitespace-only line.*after '\+'.*in FASTQ"), ('error_qual_del.fastq', ValueError, r'Decoded Phred score.*out of range'), ('error_qual_escape.fastq', ValueError, r'Decoded Phred score.*out of range'), ('error_qual_null.fastq', ValueError, r'Decoded Phred score.*out of range'), ('error_qual_space.fastq', ValueError, r'Decoded Phred score.*out of range'), ('error_qual_tab.fastq', ValueError, r'Decoded Phred score.*out of range'), ('error_qual_unit_sep.fastq', ValueError, r'Decoded Phred score.*out of range'), ('error_qual_vtab.fastq', ValueError, r'Decoded Phred score.*out of range'), ('error_short_qual.fastq', FASTQFormatError, r"Extra quality.*'SLXA-B3_649_FC8437_R1_1_1_362_549'"), ('error_spaces.fastq', FASTQFormatError, r"whitespace.*sequence data: 'GATGTGCAA TACCTTTGTA GAGGAA'"), ('error_tabs.fastq', FASTQFormatError, r"whitespace.*sequence data: 'GATGTGCAA\\tTACCTTTGTA\\tGAGGAA'"), ('error_trunc_at_seq.fastq', FASTQFormatError, r'incomplete/truncated.*FASTQ'), ('error_trunc_at_plus.fastq', FASTQFormatError, r'incomplete/truncated.*FASTQ'), ('error_trunc_at_qual.fastq', FASTQFormatError, r'incomplete/truncated.*end of file'), ('error_trunc_in_title.fastq', FASTQFormatError, r'incomplete/truncated.*end of file'), ('error_trunc_in_seq.fastq', FASTQFormatError, r'incomplete/truncated.*end of file'), ('error_trunc_in_plus.fastq', FASTQFormatError, r"header lines do not match: " "'SLXA-B3_649_FC8437_R1_1_1_183_714' != 'SLXA-B3_649_FC'"), ('error_trunc_in_qual.fastq', FASTQFormatError, r'incomplete/truncated.*end of file') ]] def test_fastq_to_generator_valid_files(self): for valid_files, kwargs, components in self.valid_configurations: for valid in valid_files: for observed_kwargs in kwargs: _drop_kwargs(observed_kwargs, 'seq_num') constructor = observed_kwargs.get('constructor', Sequence) expected_kwargs = {} expected_kwargs['lowercase'] = 'introns' observed_kwargs['lowercase'] = 'introns' expected = [constructor(c[2], metadata={'id': c[0], 'description': c[1]}, positional_metadata={'quality': np.array(c[3], dtype=np.uint8)}, **expected_kwargs) for c in components] observed = list(_fastq_to_generator(valid, **observed_kwargs)) self.assertEqual(len(expected), len(observed)) for o, e in zip(observed, expected): self.assertEqual(o, e) def test_fastq_to_generator_invalid_files_all_variants(self): # files that should be invalid for all variants, as well as custom # phred offsets for fp, error_type, error_msg_regex in self.invalid_files: for variant in 'sanger', 'illumina1.3', 'illumina1.8': with self.assertRaisesRegex(error_type, error_msg_regex): list(_fastq_to_generator(fp, variant=variant)) for offset in 33, 64, 40, 77: with self.assertRaisesRegex(error_type, error_msg_regex): list(_fastq_to_generator(fp, phred_offset=offset)) def test_fastq_to_generator_invalid_files_illumina(self): # files that should be invalid for illumina1.3 and illumina1.8 variants fps = [get_data_path(fp) for fp in ['sanger_full_range_original_sanger.fastq', 'solexa_full_range_original_solexa.fastq']] for fp in fps: with self.assertRaisesRegex(ValueError, r'out of range \[0, 62\]'): list(_fastq_to_generator(fp, variant='illumina1.3')) with self.assertRaisesRegex(ValueError, r'out of range \[0, 62\]'): list(_fastq_to_generator(fp, variant='illumina1.8')) def test_fastq_to_generator_solexa(self): # solexa support isn't implemented yet. should raise error even with # valid solexa file with self.assertRaisesRegex(ValueError, r'Solexa'): list(_fastq_to_generator( get_data_path('solexa_full_range_original_solexa.fastq'), variant='solexa')) def test_fastq_to_sequence(self): for constructor in [Sequence, DNA, RNA, Protein]: for valid_files, kwargs, components in self.valid_configurations: for valid in valid_files: # skip empty file case since we cannot read a specific # sequencefrom an empty file if len(components) == 0: continue for observed_kwargs in kwargs: expected_kwargs = {} # TODO: # some of the test files contain characters which are # invalid for RNA, so don't validate for now. Need to # fix this if constructor is RNA: observed_kwargs['validate'] = False expected_kwargs['validate'] = False _drop_kwargs(observed_kwargs, 'constructor') expected_kwargs['lowercase'] = 'introns' observed_kwargs['lowercase'] = 'introns' seq_num = observed_kwargs.get('seq_num', 1) c = components[seq_num - 1] expected = \ constructor( c[2], metadata={'id': c[0], 'description': c[1]}, positional_metadata={'quality': np.array(c[3], dtype=np.uint8)}, **expected_kwargs) observed = read(valid, into=constructor, format='fastq', verify=False, **observed_kwargs) self.assertEqual(observed, expected) def test_fastq_to_tabular_msa(self): class CustomSequence(GrammaredSequence): @classproperty @overrides(GrammaredSequence) def gap_chars(cls): return set('-.') @classproperty @overrides(GrammaredSequence) def default_gap_char(cls): return '-' @classproperty @overrides(GrammaredSequence) def definite_chars(cls): return set(string.ascii_letters) @classproperty @overrides(GrammaredSequence) def degenerate_map(cls): return {} for valid_files, kwargs, components in self.valid_configurations: for valid in valid_files: for observed_kwargs in kwargs: _drop_kwargs(observed_kwargs, 'seq_num') if 'constructor' not in observed_kwargs: observed_kwargs['constructor'] = CustomSequence constructor = observed_kwargs['constructor'] expected_kwargs = {} expected_kwargs['lowercase'] = 'introns' observed_kwargs['lowercase'] = 'introns' expected = TabularMSA( [constructor( c[2], metadata={'id': c[0], 'description': c[1]}, positional_metadata={'quality': np.array(c[3], dtype=np.uint8)}, **expected_kwargs) for c in components]) observed = _fastq_to_tabular_msa(valid, **observed_kwargs) self.assertEqual(observed, expected) def test_fastq_to_tabular_msa_no_constructor(self): with self.assertRaisesRegex(ValueError, r'`constructor`'): _fastq_to_tabular_msa(get_data_path('fastq_multi_seq_sanger')) class TestWriters(unittest.TestCase): def setUp(self): self.valid_files = [ ([ ('f o o', 'bar\n\nbaz', 'AaCcGg', [16, 17, 18, 19, 20, 21]), ('bar', 'baz foo', 'TtGgCc', [23, 22, 21, 20, 19, 18]), ('ba\n\t\tz', 'foo bar', 'gAtTtC', [20, 21, 22, 23, 24, 18]) ], [ ({'variant': 'sanger'}, get_data_path('fastq_writer_sanger_defaults')), ({'phred_offset': 33}, get_data_path('fastq_writer_sanger_defaults')), ({'variant': 'illumina1.8'}, get_data_path('fastq_writer_sanger_defaults')), ({'variant': 'illumina1.3'}, get_data_path('fastq_writer_illumina1.3_defaults')), ({'variant': 'sanger', 'id_whitespace_replacement': '%', 'description_newline_replacement': '^'}, get_data_path('fastq_writer_sanger_non_defaults')) ]), ] def test_generator_to_fastq_kwargs_passed(self): for components, kwargs_expected_fp in self.valid_files: for kwargs, expected_fp in kwargs_expected_fp: def gen(): for c in components: yield Sequence( c[2], metadata={'id': c[0], 'description': c[1]}, positional_metadata={'quality': c[3]}) fh = io.StringIO() _generator_to_fastq(gen(), fh, **kwargs) observed = fh.getvalue() fh.close() with io.open(expected_fp) as f: expected = f.read() self.assertEqual(observed, expected) def test_sequence_to_fastq_kwargs_passed(self): for constructor in [Sequence, DNA, RNA, Protein]: for components, kwargs_expected_fp in self.valid_files: for expected_kwargs, expected_fp in kwargs_expected_fp: observed_kwargs = {} # TODO: # some of the test files contain characters which are # invalid for RNA, so don't validate for now. Need to # fix this if constructor is RNA: observed_kwargs['validate'] = False expected_kwargs['lowercase'] = 'introns' observed_kwargs['lowercase'] = 'introns' fh = io.StringIO() for c in components: obj = constructor( c[2], metadata={'id': c[0], 'description': c[1]}, positional_metadata={'quality': c[3]}, **observed_kwargs) write(obj, into=fh, format='fastq', **expected_kwargs) observed = fh.getvalue() fh.close() with io.open(expected_fp) as f: expected = f.read() self.assertEqual(observed, expected) def test_tabular_msa_to_fastq_kwargs_passed(self): for components, kwargs_expected_fp in self.valid_files: for kwargs, expected_fp in kwargs_expected_fp: obj = TabularMSA([ Protein(c[2], metadata={'id': c[0], 'description': c[1]}, positional_metadata={'quality': c[3]}, lowercase='introns') for c in components]) fh = io.StringIO() kwargs['lowercase'] = 'introns' _tabular_msa_to_fastq(obj, fh, **kwargs) observed = fh.getvalue() fh.close() with io.open(expected_fp) as f: expected = f.read() self.assertEqual(observed, expected) def test_generator_to_fastq_no_qual(self): def gen(): yield Sequence('ACGT', metadata={'id': 'foo', 'description': 'bar'}, positional_metadata={'quality': range(4)}) yield Sequence('ACG', metadata={'id': 'foo', 'description': 'bar'}) with self.assertRaisesRegex(ValueError, r'2nd.*quality scores'): _generator_to_fastq(gen(), io.StringIO(), variant='illumina1.8') class TestConversions(unittest.TestCase): def setUp(self): self.conversions = [ (get_data_path('empty'), get_data_path('empty'), [ ({'variant': 'sanger'}, {'phred_offset': 42}), ]), (get_data_path('longreads_original_sanger.fastq'), get_data_path('longreads_as_sanger.fastq'), [ ({'variant': 'sanger'}, {'variant': 'sanger'}), ({'phred_offset': 33}, {'variant': 'sanger'}), ({'variant': 'sanger'}, {'phred_offset': 33}) ]), (get_data_path('longreads_original_sanger.fastq'), get_data_path('longreads_as_illumina.fastq'), [ ({'variant': 'sanger'}, {'variant': 'illumina1.3'}), ({'phred_offset': 33}, {'variant': 'illumina1.3'}), ({'variant': 'sanger'}, {'phred_offset': 64}) ]), (get_data_path('wrapping_original_sanger.fastq'), get_data_path('wrapping_as_sanger.fastq'), [ ({'variant': 'sanger'}, {'variant': 'sanger'}), ({'phred_offset': 33}, {'variant': 'sanger'}), ({'variant': 'sanger'}, {'phred_offset': 33}) ]), (get_data_path('wrapping_original_sanger.fastq'), get_data_path('wrapping_as_illumina.fastq'), [ ({'variant': 'sanger'}, {'variant': 'illumina1.3'}), ({'phred_offset': 33}, {'variant': 'illumina1.3'}), ({'variant': 'sanger'}, {'phred_offset': 64}) ]), (get_data_path('sanger_full_range_original_sanger.fastq'), get_data_path('sanger_full_range_as_sanger.fastq'), [ ({'variant': 'sanger'}, {'variant': 'sanger'}), ({'phred_offset': 33}, {'variant': 'sanger'}), ({'variant': 'sanger'}, {'phred_offset': 33}) ]), (get_data_path('sanger_full_range_original_sanger.fastq'), get_data_path('sanger_full_range_as_illumina.fastq'), [ ({'variant': 'sanger'}, {'variant': 'illumina1.3'}), ({'phred_offset': 33}, {'variant': 'illumina1.3'}), ({'variant': 'sanger'}, {'phred_offset': 64}) ]), (get_data_path('illumina_full_range_original_illumina.fastq'), get_data_path('illumina_full_range_as_illumina.fastq'), [ ({'variant': 'illumina1.3'}, {'variant': 'illumina1.3'}), ({'phred_offset': 64}, {'variant': 'illumina1.3'}), ({'variant': 'illumina1.3'}, {'phred_offset': 64}) ]), (get_data_path('illumina_full_range_original_illumina.fastq'), get_data_path('illumina_full_range_as_sanger.fastq'), [ ({'variant': 'illumina1.3'}, {'variant': 'sanger'}), ({'phred_offset': 64}, {'variant': 'sanger'}), ({'variant': 'illumina1.3'}, {'phred_offset': 33}) ]), (get_data_path('misc_dna_original_sanger.fastq'), get_data_path('misc_dna_as_sanger.fastq'), [ ({'variant': 'sanger'}, {'variant': 'sanger'}), ({'phred_offset': 33}, {'variant': 'sanger'}), ({'variant': 'sanger'}, {'phred_offset': 33}) ]), (get_data_path('misc_dna_original_sanger.fastq'), get_data_path('misc_dna_as_illumina.fastq'), [ ({'variant': 'sanger'}, {'variant': 'illumina1.3'}), ({'phred_offset': 33}, {'variant': 'illumina1.3'}), ({'variant': 'sanger'}, {'phred_offset': 64}) ]), (get_data_path('misc_rna_original_sanger.fastq'), get_data_path('misc_rna_as_sanger.fastq'), [ ({'variant': 'sanger'}, {'variant': 'sanger'}), ({'phred_offset': 33}, {'variant': 'sanger'}), ({'variant': 'sanger'}, {'phred_offset': 33}) ]), (get_data_path('misc_rna_original_sanger.fastq'), get_data_path('misc_rna_as_illumina.fastq'), [ ({'variant': 'sanger'}, {'variant': 'illumina1.3'}), ({'phred_offset': 33}, {'variant': 'illumina1.3'}), ({'variant': 'sanger'}, {'phred_offset': 64}) ]), (get_data_path('fastq_wrapping_original_sanger_no_description'), get_data_path('fastq_wrapping_as_sanger_no_description'), [ ({'variant': 'sanger'}, {'variant': 'sanger'}), ({'phred_offset': 33}, {'variant': 'sanger'}), ({'variant': 'sanger'}, {'phred_offset': 33}) ]), (get_data_path('fastq_wrapping_original_sanger_no_description'), get_data_path('fastq_wrapping_as_illumina_no_description'), [ ({'variant': 'sanger'}, {'variant': 'illumina1.3'}), ({'phred_offset': 33}, {'variant': 'illumina1.3'}), ({'variant': 'sanger'}, {'phred_offset': 64}) ]), ] def test_conversion(self): for from_fp, to_fp, kwargs in self.conversions: for from_kwargs, to_kwargs in kwargs: read_gen = _fastq_to_generator(from_fp, **from_kwargs) fh = io.StringIO() # will issue warning when truncating quality scores with warnings.catch_warnings(record=True): warnings.simplefilter("ignore") _generator_to_fastq(read_gen, fh, **to_kwargs) obs = fh.getvalue() fh.close() with io.open(to_fp) as fh: exp = fh.read() self.assertEqual(obs, exp) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/io/format/tests/test_genbank.py000066400000000000000000000363431464262511300227570ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io from unittest import TestCase, main from skbio import Protein, DNA, RNA, Sequence from skbio.metadata import IntervalMetadata from skbio.util import get_data_path from skbio.io import GenBankFormatError from skbio.io.format.genbank import ( _genbank_sniffer, _genbank_to_generator, _genbank_to_sequence, _genbank_to_dna, _genbank_to_rna, _genbank_to_protein, _parse_locus, _parse_reference, _generator_to_genbank, _sequence_to_genbank, _protein_to_genbank, _rna_to_genbank, _dna_to_genbank, _serialize_locus) class SnifferTests(TestCase): def setUp(self): self.positive_fps = list(map(get_data_path, [ 'genbank_5_blanks_start_of_file', 'genbank_single_record_upper', 'genbank_single_record_lower', 'genbank_multi_records'])) self.negative_fps = list(map(get_data_path, [ 'empty', 'whitespace_only', 'genbank_6_blanks_start_of_file', 'genbank_w_beginning_whitespace', 'genbank_missing_locus_name'])) def test_positives(self): for fp in self.positive_fps: self.assertEqual(_genbank_sniffer(fp), (True, {})) def test_negatives(self): for fp in self.negative_fps: self.assertEqual(_genbank_sniffer(fp), (False, {})) class GenBankIOTests(TestCase): # parent class to set up test data for the child class def setUp(self): # test locus line self.locus = ( (['LOCUS NC_005816 9609 bp ' 'DNA circular CON 07-FEB-2015'], {'division': 'CON', 'mol_type': 'DNA', 'shape': 'circular', 'locus_name': 'NC_005816', 'date': '07-FEB-2015', 'unit': 'bp', 'size': 9609}), (['LOCUS SCU49845 5028 bp ' 'DNA PLN 21-JUN-1999'], {'division': 'PLN', 'mol_type': 'DNA', 'shape': None, 'locus_name': 'SCU49845', 'date': '21-JUN-1999', 'unit': 'bp', 'size': 5028}), (['LOCUS NP_001832 360 aa ' 'linear PRI 18-DEC-2001'], {'division': 'PRI', 'mol_type': None, 'shape': 'linear', 'locus_name': 'NP_001832', 'date': '18-DEC-2001', 'unit': 'aa', 'size': 360})) # test single record and read uppercase sequence self.single_upper_fp = get_data_path('genbank_single_record_upper') self.single_lower_fp = get_data_path('genbank_single_record_lower') self.single = ( 'GSREILDFK', {'LOCUS': {'date': '23-SEP-1994', 'division': 'BCT', 'locus_name': 'AAB29917', 'mol_type': None, 'shape': 'linear', 'size': 9, 'unit': 'aa'}}, None, Protein) self.single_rna_fp = get_data_path('genbank_single_record') imd = IntervalMetadata(63) imd.add([(0, 63)], [(False, False)], {'db_xref': '"taxon:562"', 'mol_type': '"mRNA"', 'organism': '"Escherichia coli"', 'type': 'source', 'strand': '+', '__location': '1..63'}) imd.add([(0, 63)], [(False, True)], {'phase': 0, 'db_xref': ['"taxon:562"', '"taxon:561"'], '__location': '1..>63', 'strand': '+', 'note': '"alkaline phosphatase signal peptide"', 'protein_id': '"AAA23431.1"', 'transl_table': '11', 'translation': '"MKQSTIALAVLPLLFTPVTKA"', 'type': 'CDS'}) self.single_rna = ( 'gugaaacaaagcacuauugcacuggcugucuuaccguuacuguuuaccccugugacaaaagcc', {'ACCESSION': 'M14399', 'COMMENT': 'Original source text: E.coli, cDNA to mRNA.', 'DEFINITION': "alkaline phosphatase signal mRNA, 5' end.", 'KEYWORDS': 'alkaline phosphatase; signal peptide.', 'LOCUS': {'date': '26-APR-1993', 'division': 'BCT', 'locus_name': 'ECOALKP', 'mol_type': 'mRNA', 'shape': 'linear', 'size': 63, 'unit': 'bp'}, 'SOURCE': {'ORGANISM': 'Escherichia coli', 'taxonomy': 'Bacteria; Proteobacteria; ' 'Gammaproteobacteria; Enterobacteriales; ' 'Enterobacteriaceae; Escherichia.'}, 'VERSION': 'M14399.1'}, imd, RNA) # test: # 1. multiple records in one file # 2. lowercase sequence # 3. DNA, RNA, Protein type # 4. variation of formats self.multi_fp = get_data_path('genbank_multi_records') imd_pro = IntervalMetadata(9) imd_pro.add([(0, 9)], [(False, False)], {'organism': '"Bacteria"', 'type': 'source', 'strand': '+', '__location': '1..9'},) imd_pro.add([(0, 9)], [(False, True)], {'__location': '1..>9', 'product': '"L-carnitine amidase"', 'strand': '+', 'type': 'Protein'}) imd_dna = IntervalMetadata(9) imd_dna.add([(0, 9)], [(False, False)], {'country': '"Brazil: Parana, Paranavai"', 'type': 'source', 'strand': '+', '__location': '1..9', 'environmental_sample': ''}) imd_dna.add([(1, 8)], [(True, True)], {'__location': 'complement(<2..>8)', 'product': '"16S ribosomal RNA"', 'strand': '-', 'type': 'rRNA'}) self.multi = ( ('gsreildfk', {'ACCESSION': 'AAB29917', 'COMMENT': 'Method: direct peptide sequencing.', 'DBSOURCE': 'accession AAB29917.1', 'DEFINITION': 'L-carnitine amidase {N-terminal}', 'KEYWORDS': '.', 'LOCUS': {'date': '23-SEP-1994', 'division': 'BCT', 'locus_name': 'AAB29917', 'mol_type': None, 'shape': 'linear', 'size': 9, 'unit': 'aa'}, 'REFERENCE': [{'AUTHORS': 'Joeres,U. and Kula,M.R.', 'JOURNAL': 'AMB 40 (5), 606-610 (1994)', 'PUBMED': '7764422', 'REFERENCE': '1 (residues 1 to 9)', 'REMARK': 'from the original journal article.', 'TITLE': 'a microbial L-carnitine amidase'}, {'AUTHORS': 'Joeres,U. and Kula,M.R.', 'JOURNAL': 'AMB 40 (5), 606-610 (1994)', 'PUBMED': '7764422', 'REFERENCE': '1 (residues 1 to 9)', 'TITLE': 'a microbial L-carnitine amidase'}], 'SOURCE': {'ORGANISM': 'Bacteria', 'taxonomy': 'Unclassified.'}, 'VERSION': 'AAB29917.1 GI:545426'}, imd_pro, Protein), ('catgcaggc', {'ACCESSION': 'HQ018078', 'DEFINITION': 'Uncultured Xylanimonas sp.16S, partial', 'KEYWORDS': 'ENV.', 'LOCUS': {'date': '29-AUG-2010', 'division': 'ENV', 'locus_name': 'HQ018078', 'mol_type': 'DNA', 'shape': 'linear', 'size': 9, 'unit': 'bp'}, 'SOURCE': {'ORGANISM': 'uncultured Xylanimonas sp.', 'taxonomy': 'Bacteria; Actinobacteria; ' 'Micrococcales; Promicromonosporaceae; ' 'Xylanimonas; environmental samples.'}, 'VERSION': 'HQ018078.1 GI:304421728'}, imd_dna, DNA)) class ReaderTests(GenBankIOTests): def test_parse_reference(self): lines = ''' REFERENCE 1 (bases 1 to 154478) AUTHORS Sato,S., Nakamura,Y., Kaneko,T., and Tabata,S. TITLE Complete structure of the chloroplast genome of Arabidopsis thaliana JOURNAL DNA Res. 6 (5), 283-290 (1999) PUBMED 10574454'''.split('\n') exp = {'AUTHORS': 'Sato,S., Nakamura,Y., Kaneko,T., and Tabata,S.', 'JOURNAL': 'DNA Res. 6 (5), 283-290 (1999)', 'PUBMED': '10574454', 'REFERENCE': '1 (bases 1 to 154478)', 'TITLE': ('Complete structure of the chloroplast genome of' ' Arabidopsis thaliana')} self.assertEqual(_parse_reference(lines), exp) def test_parse_locus(self): for serialized, parsed in self.locus: self.assertEqual(_parse_locus(serialized), parsed) def test_parse_locus_invalid(self): lines = [ # missing unit ['LOCUS NC_005816 9609 ' ' DNA circular CON 07-FEB-2015'], # missing division ['LOCUS SCU49845 5028 bp' ' DNA 21-JUN-1999'], # wrong date format ['LOCUS NP_001832 360 aa' ' linear PRI 2001-12-18']] for line in lines: with self.assertRaisesRegex(GenBankFormatError, r'Could not parse the LOCUS line:.*'): _parse_locus(line) def test_genbank_to_generator_single(self): # test single record and uppercase sequence for c in [Sequence, Protein]: obs = next(_genbank_to_generator( self.single_upper_fp, constructor=c)) exp = c(self.single[0], metadata=self.single[1], positional_metadata=self.single[2]) self.assertEqual(exp, obs) def test_genbank_to_generator(self): for i, obs in enumerate(_genbank_to_generator(self.multi_fp)): seq, md, imd, constructor = self.multi[i] exp = constructor(seq, metadata=md, lowercase=True, interval_metadata=imd) self.assertEqual(exp, obs) def test_genbank_to_sequence(self): for i, exp in enumerate(self.multi): obs = _genbank_to_sequence(self.multi_fp, seq_num=i+1) exp = Sequence(exp[0], metadata=exp[1], lowercase=True, interval_metadata=exp[2]) self.assertEqual(exp, obs) def test_genbank_to_rna(self): seq, md, imd, constructor = self.single_rna obs = _genbank_to_rna(self.single_rna_fp) exp = constructor(seq, metadata=md, lowercase=True, interval_metadata=imd) self.assertEqual(exp, obs) def test_genbank_to_dna(self): i = 1 exp = self.multi[i] obs = _genbank_to_dna(self.multi_fp, seq_num=i+1) exp = DNA(exp[0], metadata=exp[1], lowercase=True, interval_metadata=exp[2]) self.assertEqual(exp, obs) def test_genbank_to_protein(self): i = 0 exp = self.multi[i] obs = _genbank_to_protein(self.multi_fp, seq_num=i+1) exp = Protein(exp[0], metadata=exp[1], lowercase=True, interval_metadata=exp[2]) self.assertEqual(exp, obs) class WriterTests(GenBankIOTests): def test_serialize_locus(self): for serialized, parsed in self.locus: self.assertEqual( _serialize_locus('LOCUS', parsed), serialized[0] + '\n') def test_generator_to_genbank(self): seq, md, imd, constructor = self.single obj = constructor(seq, md, interval_metadata=imd) with io.StringIO() as fh: _generator_to_genbank([obj], fh) obs = fh.getvalue() with open(self.single_lower_fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_sequence_to_genbank(self): with io.StringIO() as fh: for i, (seq, md, imd, constructor) in enumerate(self.multi): obj = Sequence(seq, md, interval_metadata=imd, lowercase=True) _sequence_to_genbank(obj, fh) obs = fh.getvalue() with open(self.multi_fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_dna_protein_to_genbank(self): writers = [_protein_to_genbank, _dna_to_genbank] with io.StringIO() as fh: for i, (seq, md, imd, constructor) in enumerate(self.multi): obj = constructor( seq, md, interval_metadata=imd, lowercase=True) writers[i](obj, fh) obs = fh.getvalue() with open(self.multi_fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_rna_to_genbank(self): with io.StringIO() as fh: seq, md, imd, constructor = self.single_rna obj = constructor(seq, md, interval_metadata=imd, lowercase=True) _rna_to_genbank(obj, fh) obs = fh.getvalue() with open(self.single_rna_fp) as fh: exp = fh.read() self.assertEqual(obs, exp) class RoundtripTests(GenBankIOTests): def test_roundtrip_generator(self): with io.StringIO() as fh: _generator_to_genbank(_genbank_to_generator(self.multi_fp), fh) obs = fh.getvalue() with open(self.multi_fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_roundtrip_rna(self): with io.StringIO() as fh: _rna_to_genbank(_genbank_to_rna(self.single_rna_fp), fh) obs = fh.getvalue() with open(self.single_rna_fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_roundtrip_dna(self): with io.StringIO() as fh: _dna_to_genbank(_genbank_to_dna(self.single_rna_fp), fh) obs = fh.getvalue() with open(self.single_rna_fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_roundtrip_protein(self): with io.StringIO() as fh: _protein_to_genbank(_genbank_to_protein(self.single_lower_fp), fh) obs = fh.getvalue() with open(self.single_lower_fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_roundtrip_sequence(self): with io.StringIO() as fh: _sequence_to_genbank(_genbank_to_sequence(self.single_rna_fp), fh) obs = fh.getvalue() with open(self.single_rna_fp) as fh: exp = fh.read() self.assertEqual(obs, exp) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/io/format/tests/test_gff3.py000066400000000000000000000321361464262511300221730ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main import io from skbio.util import get_data_path from skbio.metadata import IntervalMetadata from skbio import DNA, Sequence from skbio.io import GFF3FormatError from skbio.io.format.gff3 import (_yield_record, _parse_record, _parse_attr, _gff3_sniffer, _gff3_to_interval_metadata, _interval_metadata_to_gff3, _gff3_to_generator, _generator_to_gff3, _gff3_to_sequence, _sequence_to_gff3, _gff3_to_dna, _dna_to_gff3, _serialize_interval_metadata) class GFF3IOTests(TestCase): def setUp(self): self.multi_fp = get_data_path('gff3_multi_record') self.single_fp = get_data_path('gff3_single_record') intvls = [{'bounds': [(0, 4641652)], 'metadata': {'source': 'European Nucleotide Archive', 'type': 'chromosome', 'score': '.', 'strand': '.', 'ID': 'chromosome:Chromosome', 'Alias': 'U00096.3', 'Is_circular': 'true'}}, {'bounds': [(147, 148)], 'metadata': {'source': 'regulondb_feature', 'type': 'biological_region', 'score': '.', 'strand': '+', 'external_name': 'Promoter thrLp (RegulonDB:ECK120010236)', 'logic_name': 'regulondb_promoter'}}, {'bounds': [(336, 2799)], 'metadata': {'source': 'Prodigal_v2.60', 'type': 'gene', 'score': '1.8', 'strand': '+', 'phase': 0, 'ID': '1_1', 'gc_cont': '0.427'}}, {'bounds': [(336, 2799)], 'metadata': {'source': 'Prodigal_v2.60', 'type': 'CDS', 'score': '333.8', 'strand': '+', 'phase': 0, 'ID': '1_2', 'Parent': '1_1', 'rbs_motif': 'GGAG/GAGG', 'rbs_spacer': '5-10bp'}}, {'bounds': [(0, 50), (55, 100)], 'metadata': {'source': 'Prodigal_v2.60', 'type': 'gene', 'score': '1.8', 'strand': '+', 'phase': 0, 'ID': '1_1', 'gene': 'FXR receptor'}}] self.upper_bound = 4641652 self.imd1 = IntervalMetadata(self.upper_bound) self.imd1.add(**intvls[0]) self.imd1.add(**intvls[1]) self.imd2 = IntervalMetadata(None) self.imd2.add(**intvls[2]) self.imd2.add(**intvls[3]) self.imd3 = IntervalMetadata(None) self.imd3.add(**intvls[4]) self.seq_fp = get_data_path('gff3_dna') self.seq = Sequence('ATGCATGCATGC', metadata={'id': 'NC_1', 'description': 'species X'}) self.seq.interval_metadata.add( [(0, 9)], metadata={'source': 'Prodigal_v2.60', 'type': 'gene', 'score': '.', 'strand': '+', 'phase': 0, 'ID': 'gene1', 'Name': 'FXR'}) self.dna = DNA(self.seq) class SnifferTests(TestCase): def setUp(self): self.positive_fps = map(get_data_path, [ 'gff3_multi_record', 'gff3_single_record', 'gff3_dna']) self.negative_fps = map(get_data_path, [ 'empty', 'whitespace_only', 'gff3_bad_missing_directive']) def test_positive(self): for fp in self.positive_fps: self.assertEqual(_gff3_sniffer(fp), (True, {})) def test_negative(self): for fp in self.negative_fps: self.assertEqual(_gff3_sniffer(fp), (False, {})) class ReaderTests(GFF3IOTests): def test_parse_attr(self): s = 'Dbxref=GO:000152,GO:001234;Note=fooo' obs = _parse_attr(s) exp = {'db_xref': 'GO:000152,GO:001234', 'note': 'fooo'} self.assertEqual(exp, obs) def test_yield_record(self): obs = [('data', 'seqid1', ['seqid1\txxx', 'seqid1\tyyy']), ('data', 'seqid2', ['seqid2\tzzz'])] s = ('seqid1\txxx\n' 'seqid1\tyyy\n' 'seqid2\tzzz\n') fh = io.StringIO(s) for i, j in zip(_yield_record(fh), obs): self.assertEqual(i, j) def test_parse_record_raise(self): chars = 'abc?!' for char in chars: lines = [ 'ctg123\t.\tgene\t1000\t9000\t.\t+\t%s\tID=gene00001' % char] with self.assertRaisesRegex( GFF3FormatError, r"unknown value for phase column: '%s'" % char): _parse_record(lines, 10000) def test_yield_record_raise(self): s = '##gff-version 3\nseq_1 . gene 1 3 . + . ID=gene01\n' with io.StringIO(s) as fh: with self.assertRaises(GFF3FormatError): list(_yield_record(fh)) def test_gff3_to_interval_metadata(self): obs = _gff3_to_interval_metadata( self.single_fp, seq_id='Chromosome') self.assertEqual(obs, self.imd1) def test_gff3_to_interval_metadata_empty(self): exp = IntervalMetadata(None) obs = _gff3_to_interval_metadata( # the seq id does not exist self.single_fp, seq_id='foo') self.assertEqual(obs, exp) def test_gff3_to_interval_metadata_bad(self): with self.assertRaisesRegex(GFF3FormatError, r'do not have 9 columns in this line'): _gff3_to_interval_metadata( get_data_path('gff3_bad_wrong_columns'), seq_id='Chromosome') def test_gff3_to_generator(self): exps = [('Chromosome', self.imd1), ('gi|556503834|ref|NC_000913.3|', self.imd2)] obss = _gff3_to_generator(self.multi_fp) for obs, exp in zip(obss, exps): self.assertEqual(obs, exp) def test_gff3_to_generator_empty(self): empty_fps = map(get_data_path, ['empty', 'whitespace_only']) for empty_fp in empty_fps: obs = list(_gff3_to_generator(empty_fp)) self.assertEqual(obs, []) def test_gff3_to_sequence(self): obs = _gff3_to_sequence(self.seq_fp) self.assertEqual(obs, self.seq) def test_gff3_to_dna(self): obs = _gff3_to_dna(self.seq_fp) self.assertEqual(obs, self.dna) class WriterTests(GFF3IOTests): def test_interval_metadata_to_gff3(self): with io.StringIO() as fh: _interval_metadata_to_gff3(self.imd1, fh, seq_id='Chromosome') # only compare the uncommented lines because the comments are not # stored in IntervalMetadata obs = [i for i in fh.getvalue().splitlines() if not i.startswith('#')] with open(self.single_fp) as f: exp = [i.rstrip() for i in f.readlines() if not i.startswith('#')] self.assertEqual(obs, exp) def test_interval_metadata_to_gff3_missing_field(self): exp = 'ctg123\t.\tgene\t1\t9\t.\t.\t.\tID=gene00001;Name=EDEN' imd = IntervalMetadata(9) imd.add([(0, 9)], metadata={ 'type': 'gene', 'ID': 'gene00001', 'Name': 'EDEN'}) with io.StringIO() as fh: _interval_metadata_to_gff3(imd, fh, seq_id='ctg123') # only compare the uncommented lines because the comments are not # stored in IntervalMetadata obs = [i for i in fh.getvalue().splitlines() if not i.startswith('#')] self.assertEqual([exp], obs) def test_interval_metadata_to_gff3_escape(self): # test escape of reserved char in GFF3 exp = 'ctg123\t.\tgene\t1\t9\t.\t.\t.\tID=a%3B%3D%26%2Cb' imd = IntervalMetadata(9) imd.add([(0, 9)], metadata={ 'type': 'gene', 'ID': 'a;=&,b'}) with io.StringIO() as fh: _interval_metadata_to_gff3(imd, fh, seq_id='ctg123') # only compare the uncommented lines because the comments are not # stored in IntervalMetadata obs = [i for i in fh.getvalue().splitlines() if not i.startswith('#')] self.assertEqual([exp], obs) def test_interval_metadata_to_gff3_multiple_values(self): # test multiple values of db_xref are correctly serialized exp = 'ctg123\t.\tgene\t1\t9\t.\t.\t.\tDbxref=GO:000152,GO:001234' imd = IntervalMetadata(9) imd.add([(0, 9)], metadata={ 'type': 'gene', 'db_xref': ['GO:000152', 'GO:001234']}) with io.StringIO() as fh: _interval_metadata_to_gff3(imd, fh, seq_id='ctg123') # only compare the uncommented lines because the comments are not # stored in IntervalMetadata obs = [i for i in fh.getvalue().splitlines() if not i.startswith('#')] self.assertEqual([exp], obs) def test_interval_metadata_to_gff3_empty(self): imd = IntervalMetadata(None) with io.StringIO() as fh: _interval_metadata_to_gff3(imd, fh, seq_id='foo') obs = fh.getvalue() self.assertEqual(obs, '##gff-version 3\n') def test_interval_metadata_to_gff3_sub_region(self): seq_id = 'NC 7' with open(self.multi_fp) as f: exp = [i.strip() for i in f if i.startswith(seq_id)] with io.StringIO() as fh: _serialize_interval_metadata( self.imd3, seq_id=seq_id, fh=fh, skip_subregion=False) obs = [i for i in fh.getvalue().splitlines() if not i.startswith('#')] self.assertEqual(exp, obs) with io.StringIO() as fh: _serialize_interval_metadata(self.imd3, seq_id=seq_id, fh=fh) obs = [i for i in fh.getvalue().splitlines() if not i.startswith('#')] # all the rest lines except the 1st are sub-region lines, so only # compare the first line from exp self.assertEqual(exp[:1], obs) def test_sequence_to_gff3(self): with io.StringIO() as fh: _sequence_to_gff3(self.seq, fh) obs = fh.getvalue() with open(self.seq_fp) as fh: exp = fh.read() self.assertEqual(exp, obs) def test_dna_to_gff3(self): with io.StringIO() as fh: _dna_to_gff3(self.dna, fh) obs = fh.getvalue() with open(self.seq_fp) as fh: exp = fh.read() self.assertEqual(exp, obs) def test_raise_subregion(self): im = IntervalMetadata(None) im.add([(0, 3), (7, 9)], metadata={'type': 'gene'}) with io.StringIO() as fh: with self.assertRaises(GFF3FormatError): _serialize_interval_metadata( im, seq_id='a', fh=fh, skip_subregion=False) class RoundtripTests(GFF3IOTests): def test_roundtrip_interval_metadata(self): '''''' with io.StringIO() as fh: _interval_metadata_to_gff3( _gff3_to_interval_metadata( self.single_fp, seq_id='Chromosome'), fh, seq_id='Chromosome') obs = [i for i in fh.getvalue().splitlines() if not i.startswith('#')] with open(self.single_fp) as f: exp = [i.rstrip() for i in f.readlines() if not i.startswith('#')] self.assertEqual(obs, exp) def test_roundtrip_interval_metadata_generator(self): with io.StringIO() as fh: _generator_to_gff3( _gff3_to_generator(self.multi_fp), fh, skip_subregion=False) obs = [i for i in fh.getvalue().splitlines() if not i.startswith('#')] with open(self.multi_fp) as f: exp = [i.rstrip() for i in f.readlines() if not i.startswith('#')] self.assertEqual(obs, exp) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/io/format/tests/test_lsmat.py000066400000000000000000000237601464262511300224710ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io from unittest import TestCase, main from skbio import DistanceMatrix from skbio.io import LSMatFormatError from skbio.io.format.lsmat import ( _lsmat_to_dissimilarity_matrix, _lsmat_to_distance_matrix, _dissimilarity_matrix_to_lsmat, _distance_matrix_to_lsmat, _lsmat_sniffer) from skbio.stats.distance import DissimilarityMatrix, DistanceMatrixError class LSMatTestData(TestCase): def setUp(self): self.lsmat_1x1_fh = io.StringIO(LSMat_1x1) self.lsmat_2x2_fh = io.StringIO(LSMat_2x2) self.lsmat_2x2_asym_fh = io.StringIO(LSMat_2x2_ASYM) self.lsmat_3x3_fh = io.StringIO(LSMat_3x3) self.lsmat_3x3_whitespace_fh = io.StringIO(LSMat_3x3_WHITESPACE) self.lsmat_3x3_csv_fh = io.StringIO(LSMat_3x3_CSV) self.lsmat_3x3_fw_fh = io.StringIO(LSMat_3x3_FW) self.valid_fhs = [ self.lsmat_1x1_fh, self.lsmat_2x2_fh, self.lsmat_2x2_asym_fh, self.lsmat_3x3_fh, self.lsmat_3x3_whitespace_fh ] self.empty_fh = io.StringIO() self.invalid_1_fh = io.StringIO(INVALID_1) self.invalid_2_fh = io.StringIO(INVALID_2) self.invalid_3_fh = io.StringIO(INVALID_3) self.invalid_4_fh = io.StringIO(INVALID_4) self.invalid_5_fh = io.StringIO(INVALID_5) self.invalid_6_fh = io.StringIO(INVALID_6) self.invalid_fhs = [ (self.empty_fh, r'empty'), (self.invalid_1_fh, r'1 value\(s\).*2.*\(2\)'), (self.invalid_2_fh, r"'b'.*'a'"), (self.invalid_3_fh, r'extra row\(s\)'), (self.invalid_4_fh, r'2 row\(s\).*found 1'), (self.invalid_5_fh, r'2 row\(s\).*found 0'), (self.invalid_6_fh, r"delimiter '\\t'") ] class DissimilarityAndDistanceMatrixReaderWriterTests(LSMatTestData): def setUp(self): super(DissimilarityAndDistanceMatrixReaderWriterTests, self).setUp() self.lsmat_1x1_data = [[0.0]] self.lsmat_2x2_data = [[0.0, 0.123], [0.123, 0.0]] self.lsmat_2x2_asym_data = [[0.0, 1.0], [-2.0, 0.0]] self.lsmat_3x3_data = [[0.0, 0.01, 4.2], [0.01, 0.0, 12.0], [4.2, 12.0, 0.0]] # We repeat the 3x3 example because there are two file format # representations of it, one that is messy and one that is not. Both # should be read into an equivalent object and written to an equivalent # format though, which is why we duplicate the 3x3 objects and strings. self.dissim_objs = [ DissimilarityMatrix(self.lsmat_1x1_data, ['a']), DissimilarityMatrix(self.lsmat_2x2_data, ['a', 'b']), DissimilarityMatrix(self.lsmat_2x2_asym_data, ['a', 'b']), DissimilarityMatrix(self.lsmat_3x3_data, ['a', 'b', 'c']), DissimilarityMatrix(self.lsmat_3x3_data, ['a', 'b', 'c']) ] self.dissim_strs = [LSMat_1x1, LSMat_2x2, LSMat_2x2_ASYM, LSMat_3x3, LSMat_3x3] self.dissim_fhs = [self.lsmat_1x1_fh, self.lsmat_2x2_fh, self.lsmat_2x2_asym_fh, self.lsmat_3x3_fh, self.lsmat_3x3_whitespace_fh] self.dist_objs = [ DistanceMatrix(self.lsmat_1x1_data, ['a']), DistanceMatrix(self.lsmat_2x2_data, ['a', 'b']), DistanceMatrix(self.lsmat_3x3_data, ['a', 'b', 'c']), DistanceMatrix(self.lsmat_3x3_data, ['a', 'b', 'c']) ] self.dist_strs = [LSMat_1x1, LSMat_2x2, LSMat_3x3, LSMat_3x3] self.dist_fhs = [self.lsmat_1x1_fh, self.lsmat_2x2_fh, self.lsmat_3x3_fh, self.lsmat_3x3_whitespace_fh] def test_read_valid_files(self): for fn, cls, objs, fhs in ((_lsmat_to_dissimilarity_matrix, DissimilarityMatrix, self.dissim_objs, self.dissim_fhs), (_lsmat_to_distance_matrix, DistanceMatrix, self.dist_objs, self.dist_fhs)): for fh, obj in zip(fhs, objs): fh.seek(0) obs = fn(fh) self.assertEqual(obs, obj) self.assertIsInstance(obs, cls) # Above files are TSV (default delimiter). Test that CSV works too. for fn, cls in ((_lsmat_to_dissimilarity_matrix, DissimilarityMatrix), (_lsmat_to_distance_matrix, DistanceMatrix)): exp = cls(self.lsmat_3x3_data, ['a', 'b', 'c']) self.lsmat_3x3_csv_fh.seek(0) obs = fn(self.lsmat_3x3_csv_fh, delimiter=',') self.assertEqual(obs, exp) self.assertIsInstance(obs, cls) # Test that fixed-width works too. for fn, cls in ((_lsmat_to_dissimilarity_matrix, DissimilarityMatrix), (_lsmat_to_distance_matrix, DistanceMatrix)): exp = cls(self.lsmat_3x3_data, ['a', 'b', 'c']) self.lsmat_3x3_fw_fh.seek(0) obs = fn(self.lsmat_3x3_fw_fh, delimiter=None) self.assertEqual(obs, exp) self.assertIsInstance(obs, cls) def test_read_invalid_files(self): for fn in _lsmat_to_dissimilarity_matrix, _lsmat_to_distance_matrix: for invalid_fh, error_msg_regexp in self.invalid_fhs: with self.assertRaisesRegex(LSMatFormatError, error_msg_regexp): invalid_fh.seek(0) fn(invalid_fh) # Asymmetric data only raises an error for DistanceMatrix. with self.assertRaises(DistanceMatrixError): _lsmat_to_distance_matrix(self.lsmat_2x2_asym_fh) def test_write(self): for fn, objs, strs in ((_dissimilarity_matrix_to_lsmat, self.dissim_objs, self.dissim_strs), (_distance_matrix_to_lsmat, self.dist_objs, self.dist_strs)): for obj, str_ in zip(objs, strs): fh = io.StringIO() fn(obj, fh) obs = fh.getvalue() fh.close() self.assertEqual(obs, str_) # Test writing CSV (TSV is written above). for fn, cls in ((_dissimilarity_matrix_to_lsmat, DissimilarityMatrix), (_distance_matrix_to_lsmat, DistanceMatrix)): obj = cls(self.lsmat_3x3_data, ['a', 'b', 'c']) fh = io.StringIO() fn(obj, fh, delimiter=',') obs = fh.getvalue() fh.close() self.assertEqual(obs, LSMat_3x3_CSV) def test_roundtrip_read_write(self): for reader_fn, writer_fn, fhs in ((_lsmat_to_dissimilarity_matrix, _dissimilarity_matrix_to_lsmat, self.dissim_fhs), (_lsmat_to_distance_matrix, _distance_matrix_to_lsmat, self.dist_fhs)): for fh in fhs: # Read. fh.seek(0) lsmat1 = reader_fn(fh) # Write. out_fh = io.StringIO() writer_fn(lsmat1, out_fh) out_fh.seek(0) # Read. lsmat2 = reader_fn(out_fh) out_fh.close() self.assertEqual(lsmat1, lsmat2) class SnifferTests(LSMatTestData): def setUp(self): super(SnifferTests, self).setUp() def test_match_tsv(self): # Sniffer should match all valid files, and will match some invalid # ones too because it doesn't exhaustively check the entire file. fhs = self.valid_fhs + [self.invalid_1_fh, self.invalid_3_fh, self.invalid_4_fh] for fh in fhs: self.assertEqual(_lsmat_sniffer(fh), (True, {'delimiter': '\t'})) def test_match_csv(self): self.assertEqual(_lsmat_sniffer(self.lsmat_3x3_csv_fh), (True, {'delimiter': ','})) def test_no_match(self): for fh in (self.empty_fh, self.invalid_2_fh, self.invalid_5_fh, self.invalid_6_fh): self.assertEqual(_lsmat_sniffer(fh), (False, {})) LSMat_1x1 = ( '\ta\n' 'a\t0.0\n') LSMat_2x2 = ( '\ta\tb\n' 'a\t0.0\t0.123\n' 'b\t0.123\t0.0\n') LSMat_2x2_ASYM = ( '\ta\tb\n' 'a\t0.0\t1.0\n' 'b\t-2.0\t0.0\n') LSMat_3x3 = ( '\ta\tb\tc\n' 'a\t0.0\t0.01\t4.2\n' 'b\t0.01\t0.0\t12.0\n' 'c\t4.2\t12.0\t0.0\n') # Extra whitespace-only lines throughout. Also has comments before the header. LSMat_3x3_WHITESPACE = '\n'.join([ '# foo', ' \t \t ', ' #bar', '', '', '\ta\t b \tc', 'a \t0.0\t0.01\t4.2', ' \t', 'b\t0.01\t0.0\t12.0', '', '\t \t', '', 'c\t4.2\t12.0\t0.0', '', ' \t ', '\t\t\t', ' ']) # Same matrix as above, but delimited by commas instead of tabs. LSMat_3x3_CSV = ( ',a,b,c\n' 'a,0.0,0.01,4.2\n' 'b,0.01,0.0,12.0\n' 'c,4.2,12.0,0.0\n') # Same matrix as above, but delimited by whitespaces instead of tabs. LSMat_3x3_FW = ( ' a b c \n' 'a 0.0 0.01 4.2 \n' 'b 0.01 0.0 12.0 \n' 'c 4.2 12.0 0.0 \n') # missing data INVALID_1 = '\ta\tb\na\t0\t1\nb\t1' # mismatched IDs INVALID_2 = '\ta\tb\nb\t0\t1\na\t1\t0' # extra data lines INVALID_3 = '\ta\tb\na\t0\t1\nb\t1\t0\n \nfoo\n\n\n' # missing data lines INVALID_4 = '\ta\tb\na\t0\t1\n \n' # no data lines INVALID_5 = '\ta\tb\n' # missing leading delimiter in header INVALID_6 = "a\tb\na\t0.0\t0.123\nb\t0.123\t0.0\n" if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/io/format/tests/test_newick.py000066400000000000000000000304621464262511300226260ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io import unittest from skbio import TreeNode from skbio.io import NewickFormatError from skbio.io.format.newick import ( _newick_to_tree_node, _tree_node_to_newick, _newick_sniffer) class TestNewick(unittest.TestCase): def _assert_node_equal(self, n1, n2): self.assertEqual(n1.name, n2.name) self.assertEqual(n1.length, n2.length) self.assertEqual(len(n1.children), len(n2.children)) def _assert_equal(self, n1, n2): def name(x): return (str(x.name), float(x.length) if x.length is not None else 0, len(x.children)) self._assert_node_equal(n1, n2) for c1, c2 in zip(sorted(n1.children, key=name), sorted(n2.children, key=name)): self.assertTrue(c1.parent is n1) self.assertTrue(c2.parent is n2) self._assert_equal(c1, c2) def _setup_tree(self, kwargs_list): trees = [] for kwargs in kwargs_list: trees.append(TreeNode(**kwargs)) trees[4].extend([trees[2], trees[3]]) trees[5].extend([trees[0], trees[1], trees[4]]) return trees[5] def _setup_linked_list(self, kwargs_list): last_node = None for idx, kwargs in enumerate(kwargs_list): new_node = TreeNode(**kwargs) if last_node is not None: new_node.append(last_node) last_node = new_node return last_node def _setup_balanced_binary(self, kwargs_list): trees = [] for kwargs in kwargs_list: trees.append(TreeNode(**kwargs)) trees[0].extend([trees[2], trees[3]]) trees[1].extend([trees[4], trees[5]]) trees[6].extend([trees[0], trees[1]]) return trees[6] def setUp(self): # Using the factory functions above, we will construct different tree # instances. Each tree is expected to serialize to the first newick # string in the list. Each string in the list is expected to # deserialize into an equivilent rotation of the constructed instance. tree_blank = (self._setup_tree([ {}, {}, {}, {}, {}, {} ]), [ "(,,(,));\n", "(,(,),);", "((,),,);", " ((,[ this is a comment ]) , , ) ; ", "((,[ i_can_do_this[0] or escape unmatched '[ ]),[more words],);", ]) tree_leaves_named = (self._setup_tree([ {'name': 'a_'}, {'name': 'b'}, {'name': 'c'}, {'name': 'd'}, {}, {} ]), [ "('a_',b,(c,d));\n", "(b,(c,d),'a_');", "(b\n,'a_'\n ,(d \t,c) ) ;", ]) tree_all_named = (self._setup_tree([ {'name': 'a'}, {'name': 'b'}, {'name': 'c'}, {'name': '[whaaat!\']'}, {'name': 'e'}, {'name': 'f'} ]), [ "(a,b,(c,'[whaaat!'']')e)f;\n", "(b,(c,'[whaaat!'']')e,a)f;", "(b,[comment] \na,('[whaaat!'']',c)e)f;", ]) tree_all_but_root_distances = (self._setup_tree([ {'length': 0.1}, {'length': 0.2}, {'length': 0.3}, {'length': 0.4}, {'length': 0.5}, {} ]), [ "(:0.1,:0.2,(:0.3,:0.4):0.5);\n", "(:0.2,(:0.3,:0.4):0.5,:0.1);", "(:0.2,:0.1,(:0.4,:0.3):0.5);", ]) tree_all_distances = (self._setup_tree([ {'length': 0.1}, {'length': 0.2}, {'length': 0.3}, {'length': 0.4}, {'length': 0.5}, {'length': 0.0} ]), [ "(:0.1,:0.2,(:0.3,:0.4):0.5):0.0;\n", "(:0.2,(:0.3,:0.4):0.5,:0.1):0.0;", "(:0.2,\n:0.1,(:0.4,\n:0.3):0.5)\n:0.0;", ]) tree_all_leaves_named_with_distances = (self._setup_tree([ {'name': 'a', 'length': 0.1}, {'name': 'b_a\'', 'length': 0.2}, {'name': 'c', 'length': 0.3}, {'name': 'de d', 'length': 0.4}, {'length': 0.5}, {'length': 0.0} ]), [ "(a:0.1,'b_a''':0.2,(c:0.3,de_d:0.4):0.5):0.0;\n", "('b_a''':0.2,(c:0.3,'de d':0.4):0.5,a:0.1):0.0;", "('b_a''':0.2,a:0.1,('de d'[why not]:0.4,c:0.3):0.5):0.0;", ]) tree_all_leaves_named_with_distances_no_root = (self._setup_tree([ {'name': 'a', 'length': 0.1}, {'name': 'b_a\'', 'length': 0.2}, {'name': 'c', 'length': 0.3}, {'name': 'de d', 'length': 0.4}, {'length': 0.5}, {} ]), [ "(a:0.1,'b_a''':0.2,(c:0.3,de__d:0.4):0.5);\n", "('b_a''':0.2\n[comment ahoy]\n,(c:0.3,'de d':0.4):0.5,a:0.1);", "('b_a''':0.2,a:0.1,(de__d:0.4,c:0.3):0.5);" ]) tree_all = (self._setup_tree([ {'name': 'a', 'length': 0.1}, {'name': 'b_a\'', 'length': 0.2}, {'name': 'c', 'length': 0.3}, {'name': 'de\' d', 'length': 0.4}, {'name': 'e', 'length': 0.5}, {'name': 'f', 'length': 0.0} ]), [ "(a:0.1,'b_a''':0.2,(c:0.3,de''_d:0.4)e:0.5)f:0.0;\n", "('b_a''':0.2,(c:0.3,de''_d:0.4)e:0.5,a:0.1)f:0.0;", "((de''_d:0.4, c:0.3)e:0.5, 'b_a''':0.2, a:0.1)f:0.0;" ]) balanced_blank = (self._setup_balanced_binary([ {}, {}, {}, {}, {}, {}, {} ]), [ "((,),(,));\n", ]) balanced_named = (self._setup_balanced_binary([ {'name': 'a'}, {'name': 'b'}, {'name': 'c'}, {'name': 'd'}, {'name': 'e'}, {'name': 'f'}, {'name': 'g'} ]), [ "((c,d)a,(e,f)b)g;\n", ]) balanced_distances = (self._setup_balanced_binary([ {'length': 1.0}, {'length': 2.0}, {'length': 3.0}, {'length': 4.0}, {'length': 5.0}, {'length': 6.0}, {'length': 0.0} ]), [ "((:3.0,:4.0):1.0,(:5.0,:6.0):2.0):0.0;\n", ]) blanaced_all = (self._setup_balanced_binary([ {'name': 'a', 'length': 1.0}, {'name': 'b', 'length': 2.0}, {'name': 'c', 'length': 3.0}, {'name': 'd', 'length': 4.0}, {'name': 'e', 'length': 5.0}, {'name': 'f:f\'f', 'length': 6.0}, {'name': 'g', 'length': 0.0} ]), [ "((c:3.0,d:4.0)a:1.0,(e:5.0,'f:f''f':6.0)b:2.0)g:0.0;\n", ]) linked_list_blank = (self._setup_linked_list([ {}, {}, {}, {}, {} ]), [ "(((())));\n", "[(((())));](((())));", "[[(((())));](((())));](((())));\t\t\n" ]) linked_list_named = (self._setup_linked_list([ {'name': 'aaa'}, {'name': 'b_a\''}, {'name': 'c'}, {'name': 'de d'}, {'name': 'e'}, ]), [ "((((aaa)'b_a''')c)de_d)e;\n" ]) inked_list_distances = (self._setup_linked_list([ {'length': 0.4}, {'length': 0.3}, {'length': 0.2}, {'length': 0.1}, {'length': 0.0}, ]), [ "((((:0.4):0.3):0.2):0.1):0.0;\n", "((((:0.4)[not a label]:0.3):0.2):0.1):0.0;\t\t\n" ]) linked_list_all = (self._setup_linked_list([ {'name': 'a', 'length': 0.4}, {'name': 'b_a\'', 'length': 0.3}, {'name': 'c', 'length': 0.2}, {'name': 'de d', 'length': 0.1}, {'name': 'eee', 'length': 0.0}, ]), [ "((((a:0.4)'b_a''':0.3)c:0.2)de_d:0.1)eee:0.0;\n" ]) single_empty = (TreeNode(), [";\n", "[comment about the root" " and its properties];"]) single_named = (TreeNode(name='athing'), ["athing;\n"]) single_distance = (TreeNode(length=200.0), [":200.0;\n"]) single_all = (TreeNode(name='[a]', length=200.0), ["'[a]':200.0;\n"]) self.trees_newick_lists = [ tree_blank, tree_leaves_named, tree_all_named, tree_all_but_root_distances, tree_all_distances, tree_all_leaves_named_with_distances, tree_all_leaves_named_with_distances_no_root, tree_all, balanced_blank, balanced_named, balanced_distances, blanaced_all, linked_list_blank, linked_list_named, inked_list_distances, linked_list_all, single_empty, single_named, single_distance, single_all ] # Invalid newick strings and list of error fragments that should be # a part of the error message when read. self.invalid_newicks = [ ("", ['root']), ("This is not a newick file.", ['whitespace', 'label']), ("((();", ['Parenthesis', 'unbalanced']), ("(,,,)(,);\n", ['unnested', 'children']), ("(()());", ['unnested', 'children']), ("(():,,)", ['length']), ("[][[]('comment is the gotcha':0.2,,);", ['unbalanced', 'root']), ("#SampleID\tHeaderA\tHeaderB\n0\t'yellow'\t0.45;", ['whitespace', 'label']), ("))();", ['Parenthesis', 'unbalanced']), ("((,,),((,,));", ['Parenthesis', 'unbalanced']), ("\n".join([",".join(str(i) for i in range(100)) for _ in range(100)]), ['whitespace', 'label']) ] def test_newick_to_tree_node_valid_files(self): for tree, newicks in self.trees_newick_lists: for newick in newicks: fh = io.StringIO(newick) read_tree = _newick_to_tree_node(fh) self._assert_equal(tree, read_tree) fh.close() def test_newick_to_tree_node_invalid_files(self): for invalid, error_fragments in self.invalid_newicks: fh = io.StringIO(invalid) with self.assertRaises(NewickFormatError) as cm: _newick_to_tree_node(fh) for frag in error_fragments: self.assertIn(frag, str(cm.exception)) fh.close() def test_tree_node_to_newick(self): for tree, newicks in self.trees_newick_lists: newick = newicks[0] fh = io.StringIO() _tree_node_to_newick(tree, fh) self.assertEqual(newick, fh.getvalue()) fh.close() def test_roundtrip(self): for tree, newicks in self.trees_newick_lists: newick = newicks[0] fh = io.StringIO(newick) tree = _newick_to_tree_node(fh) fh2 = io.StringIO() _tree_node_to_newick(tree, fh2) fh2.seek(0) tree2 = _newick_to_tree_node(fh2) self.assertEqual(newick, fh2.getvalue()) self._assert_equal(tree, tree2) fh.close() fh2.close() def test_newick_to_tree_node_convert_underscores(self): fh = io.StringIO('(_:0.1, _a, _b)__;') tree = _newick_to_tree_node(fh, convert_underscores=False) fh2 = io.StringIO() _tree_node_to_newick(tree, fh2) self.assertEqual(fh2.getvalue(), "('_':0.1,'_a','_b')'__';\n") fh2.close() fh.close() def test_newick_sniffer_valid_files(self): for _, newicks in self.trees_newick_lists: for newick in newicks: fh = io.StringIO(newick) self.assertEqual(_newick_sniffer(fh), (True, {})) fh.close() def test_newick_sniffer_invalid_files(self): for invalid, _ in self.invalid_newicks: fh = io.StringIO(invalid) self.assertEqual(_newick_sniffer(fh), (False, {})) fh.close() if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/io/format/tests/test_ordination.py000066400000000000000000000267131464262511300235200ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io from unittest import TestCase, main import numpy as np import pandas as pd import numpy.testing as npt from skbio import OrdinationResults from skbio.io import OrdinationFormatError from skbio.io.format.ordination import ( _ordination_to_ordination_results, _ordination_results_to_ordination, _ordination_sniffer) from skbio.util import get_data_path, assert_ordination_results_equal class OrdinationTestData(TestCase): def setUp(self): self.valid_fps = map( get_data_path, ['ordination_L&L_CA_data_scores', 'ordination_example3_scores', 'ordination_PCoA_sample_data_3_scores', 'ordination_example2_scores']) # Store filepath, regex for matching the error message that should be # raised when reading the file, and whether the file should be matched # by the sniffer (True) or not (False). self.invalid_fps = map(lambda e: (get_data_path(e[0]), e[1], e[2]), [ ('empty', r'end of file.*Eigvals header', False), ('whitespace_only', r'Eigvals header not found', False), ('ordination_error1', r'Eigvals header not found', False), ('ordination_error2', r'Proportion explained header not found', False), ('ordination_error3', r'Species header not found', True), ('ordination_error4', r'Site header not found', True), ('ordination_error5', r'Biplot header not found', True), ('ordination_error6', r'Site constraints header not found', True), ('ordination_error7', r'empty line', False), ('ordination_error8', r'9.*Proportion explained.*8', True), ('ordination_error9', r'2 values.*1 in row 1', True), ('ordination_error10', r'2 values.*1 in row 1', True), ('ordination_error11', r'Site constraints ids and site ids', True), ('ordination_error12', r'9.*Eigvals.*8', True), ('ordination_error13', r'9.*Proportion explained.*8', True), ('ordination_error14', r'Site is 0: 9 x 0', True), ('ordination_error15', r'9 values.*8 in row 1', True), ('ordination_error16', r'Biplot is 0: 3 x 0', True), ('ordination_error17', r'3 values.*2 in row 1', True), ('ordination_error18', r'proportion explained.*eigvals: 8 != 9', True), ('ordination_error19', r'coordinates.*species.*eigvals: 1 != 2', True), ('ordination_error20', r'coordinates.*site.*eigvals: 1 != 2', True), ('ordination_error21', r'one eigval', False), ('ordination_error22', r'end of file.*blank line', False), ('ordination_error23', r'end of file.*Proportion explained section', True), ('ordination_error24', r'end of file.*row 2.*Species section', True) ]) class OrdinationResultsReaderWriterTests(OrdinationTestData): def setUp(self): super(OrdinationResultsReaderWriterTests, self).setUp() # define in-memory results, one for each of the valid files in # self.valid_fps # CA results axes_ids = ['CA1', 'CA2'] species_ids = ['Species1', 'Species2', 'Species3'] site_ids = ['Site1', 'Site2', 'Site3'] eigvals = pd.Series([0.0961330159181, 0.0409418140138], axes_ids) species = pd.DataFrame([[0.408869425742, 0.0695518116298], [-0.1153860437, -0.299767683538], [-0.309967102571, 0.187391917117]], index=species_ids, columns=axes_ids) site = pd.DataFrame([[-0.848956053187, 0.882764759014], [-0.220458650578, -1.34482000302], [1.66697179591, 0.470324389808]], index=site_ids, columns=axes_ids) biplot = None site_constraints = None prop_explained = None ca_scores = OrdinationResults( 'CA', 'Correspondence Analysis', eigvals=eigvals, features=species, samples=site, biplot_scores=biplot, sample_constraints=site_constraints, proportion_explained=prop_explained) # CCA results axes_ids = ['CCA%d' % i for i in range(1, 10)] species_ids = ['Species0', 'Species1', 'Species2', 'Species3', 'Species4', 'Species5', 'Species6', 'Species7', 'Species8'] site_ids = ['Site0', 'Site1', 'Site2', 'Site3', 'Site4', 'Site5', 'Site6', 'Site7', 'Site8', 'Site9'] eigvals = pd.Series([0.366135830393, 0.186887643052, 0.0788466514249, 0.082287840501, 0.0351348475787, 0.0233265839374, 0.0099048981912, 0.00122461669234, 0.000417454724117], axes_ids) species = pd.DataFrame(np.loadtxt( get_data_path('ordination_exp_Ordination_CCA_species')), index=species_ids, columns=axes_ids) site = pd.DataFrame( np.loadtxt(get_data_path('ordination_exp_Ordination_CCA_site')), index=site_ids, columns=axes_ids) biplot = pd.DataFrame( [[-0.169746767979, 0.63069090084, 0.760769036049], [-0.994016563505, 0.0609533148724, -0.0449369418179], [0.184352565909, -0.974867543612, 0.0309865007541]], columns=axes_ids[:3]) site_constraints = pd.DataFrame(np.loadtxt( get_data_path('ordination_exp_Ordination_CCA_site_constraints')), index=site_ids, columns=axes_ids) prop_explained = None cca_scores = OrdinationResults('CCA', 'Canonical Correspondence Analysis', eigvals=eigvals, features=species, samples=site, biplot_scores=biplot, sample_constraints=site_constraints, proportion_explained=prop_explained) # PCoA results axes_ids = ['PC%d' % i for i in range(1, 10)] species_ids = None site_ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593', 'PC.355', 'PC.607', 'PC.634'] eigvals = pd.Series([0.512367260461, 0.300719094427, 0.267912066004, 0.208988681078, 0.19169895326, 0.16054234528, 0.15017695712, 0.122457748167, 0.0], axes_ids) species = None site = pd.DataFrame( np.loadtxt(get_data_path('ordination_exp_Ordination_PCoA_site')), index=site_ids, columns=axes_ids) biplot = None site_constraints = None prop_explained = pd.Series([0.267573832777, 0.15704469605, 0.139911863774, 0.109140272454, 0.100111048503, 0.0838401161912, 0.0784269939011, 0.0639511763509, 0.0], axes_ids) pcoa_scores = OrdinationResults('PCoA', 'Principal Coordinate Analysis', eigvals=eigvals, features=species, samples=site, biplot_scores=biplot, sample_constraints=site_constraints, proportion_explained=prop_explained) # RDA results axes_ids = ['RDA%d' % i for i in range(1, 8)] species_ids = ['Species0', 'Species1', 'Species2', 'Species3', 'Species4', 'Species5'] site_ids = ['Site0', 'Site1', 'Site2', 'Site3', 'Site4', 'Site5', 'Site6', 'Site7', 'Site8', 'Site9'] eigvals = pd.Series([25.8979540892, 14.9825779819, 8.93784077262, 6.13995623072, 1.68070536498, 0.57735026919, 0.275983624351], axes_ids) species = pd.DataFrame(np.loadtxt( get_data_path('ordination_exp_Ordination_RDA_species')), index=species_ids, columns=axes_ids) site = pd.DataFrame( np.loadtxt(get_data_path('ordination_exp_Ordination_RDA_site')), index=site_ids, columns=axes_ids) biplot = pd.DataFrame( [[0.422650019179, -0.559142585857, -0.713250678211], [0.988495963777, 0.150787422017, -0.0117848614073], [-0.556516618887, 0.817599992718, 0.147714267459], [-0.404079676685, -0.9058434809, -0.127150316558]], columns=axes_ids[:3]) site_constraints = pd.DataFrame(np.loadtxt( get_data_path('ordination_exp_Ordination_RDA_site_constraints')), index=site_ids, columns=axes_ids) prop_explained = None rda_scores = OrdinationResults( 'RDA', 'Redundancy Analysis', eigvals=eigvals, features=species, samples=site, biplot_scores=biplot, sample_constraints=site_constraints, proportion_explained=prop_explained) self.ordination_results_objs = [ca_scores, cca_scores, pcoa_scores, rda_scores] def test_read_valid_files(self): for fp, obj in zip(self.valid_fps, self.ordination_results_objs): obs = _ordination_to_ordination_results(fp) assert_ordination_results_equal( obs, obj, ignore_method_names=True, ignore_axis_labels=True) def test_read_invalid_files(self): for invalid_fp, error_msg_regexp, _ in self.invalid_fps: with self.assertRaisesRegex(OrdinationFormatError, error_msg_regexp): _ordination_to_ordination_results(invalid_fp) def test_write(self): for fp, obj in zip(self.valid_fps, self.ordination_results_objs): fh = io.StringIO() _ordination_results_to_ordination(obj, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() npt.assert_equal(obs, exp) def test_roundtrip_read_write(self): for fp in self.valid_fps: # Read. obj1 = _ordination_to_ordination_results(fp) # Write. fh = io.StringIO() _ordination_results_to_ordination(obj1, fh) fh.seek(0) # Read. obj2 = _ordination_to_ordination_results(fh) fh.close() assert_ordination_results_equal(obj1, obj2) class SnifferTests(OrdinationTestData): def setUp(self): super(SnifferTests, self).setUp() def test_matches_and_nonmatches(self): # Sniffer should match all valid files, and will match some invalid # ones too because it doesn't exhaustively check the entire file. for fp in self.valid_fps: self.assertEqual(_ordination_sniffer(fp), (True, {})) for fp, _, expected_sniffer_match in self.invalid_fps: self.assertEqual(_ordination_sniffer(fp), (expected_sniffer_match, {})) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/io/format/tests/test_phylip.py000066400000000000000000000232321464262511300226500ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io import unittest from skbio.io import PhylipFormatError from skbio.io.format.phylip import ( _tabular_msa_to_phylip, _phylip_to_tabular_msa, _phylip_sniffer) from skbio import TabularMSA, DNA, RNA from skbio.util import get_data_path class TestSniffer(unittest.TestCase): def setUp(self): self.positives = [get_data_path(e) for e in [ 'phylip_dna_3_seqs', 'phylip_single_seq_long', 'phylip_single_seq_short', 'phylip_two_chunks', 'phylip_variable_length_ids', 'phylip_varied_whitespace_in_seqs', 'phylip_whitespace_in_header_1', 'phylip_whitespace_in_header_2', 'phylip_whitespace_in_header_3', ]] # negative tests for sniffer don't include # phylip_invalid_empty_line_between_seqs, phylip_invalid_too_few_seqs, # phylip_invalid_too_many_seqs - because sniffer only reads first seq self.negatives = [get_data_path(e) for e in [ 'empty', 'whitespace_only', 'phylip_invalid_empty_line_after_header', 'phylip_invalid_empty_line_before_header', 'phylip_invalid_header_too_long', 'phylip_invalid_header_too_short', 'phylip_invalid_no_header', 'phylip_invalid_seq_too_long', 'phylip_invalid_seq_too_short', 'phylip_invalid_zero_seq_len', 'phylip_invalid_zero_seqs', ]] def test_positives(self): for fp in self.positives: self.assertEqual(_phylip_sniffer(fp), (True, {})) def test_negatives(self): for fp in self.negatives: self.assertEqual(_phylip_sniffer(fp), (False, {})) class TestReaders(unittest.TestCase): def setUp(self): self.valid_configurations = [ ([get_data_path('phylip_dna_3_seqs')], [('..ACC-GTTGG..', 'd1'), ('TTACCGGT-GGCC', 'd2'), ('.-ACC-GTTGC--', 'd3')] ), ([get_data_path('phylip_single_seq_long')], [('..ACC-GTTGG..AATGC.C----', 'foo')] ), ([get_data_path('phylip_single_seq_short')], [('-', '')] ), ([get_data_path('phylip_two_chunks'), get_data_path('phylip_varied_whitespace_in_seqs'), get_data_path('phylip_whitespace_in_header_1'), get_data_path('phylip_whitespace_in_header_2'), get_data_path('phylip_whitespace_in_header_3'), ], [('..ACC-GTTGG..AATGC.C', 'foo'), ('TTACCGGT-GGCCTA-GCAT', 'bar')] ), ([get_data_path('phylip_variable_length_ids')], [('.-ACGT', ''), ('TGCA-.', 'a'), ('.ACGT-', 'bb'), ('TGCA-.', '1'), ('AAAAAA', 'abcdefghij'), ('GGGGGG', 'ab def42ij')] ), ] self.positive_fps = list(map(get_data_path, [ 'phylip_dna_3_seqs', 'phylip_single_seq_long', 'phylip_single_seq_short', 'phylip_two_chunks', 'phylip_variable_length_ids', 'phylip_varied_whitespace_in_seqs', 'phylip_whitespace_in_header_1', 'phylip_whitespace_in_header_2', 'phylip_whitespace_in_header_3', ])) self.invalid_files = [(get_data_path(e[0]), e[1], e[2]) for e in [ ('empty', PhylipFormatError, r'This file is empty.'), ('whitespace_only', PhylipFormatError, r'Found non-header line .*: ""'), ('phylip_invalid_empty_line_after_header', PhylipFormatError, r'Empty lines are not allowed.'), ('phylip_invalid_empty_line_before_header', PhylipFormatError, r'Found non-header line .*: ""'), ('phylip_invalid_empty_line_between_seqs', PhylipFormatError, r'Empty lines are not allowed.'), ('phylip_invalid_header_too_long', PhylipFormatError, r'Found non-header line .*: "2 20 extra_text"'), ('phylip_invalid_header_too_short', PhylipFormatError, r'Found non-header line .*: " 20"'), ('phylip_invalid_no_header', PhylipFormatError, r'Found non-header line .*: "foo .*"'), ('phylip_invalid_seq_too_long', PhylipFormatError, r'The length of sequence foo is not 20 as specified .*.'), ('phylip_invalid_seq_too_short', PhylipFormatError, r'The length of sequence foo is not 20 as specified .*.'), ('phylip_invalid_too_few_seqs', PhylipFormatError, r'The number of sequences is not .* as specified .*.'), ('phylip_invalid_too_many_seqs', PhylipFormatError, r'The number of sequences is not .* as specified in the header.'), ('phylip_invalid_zero_seq_len', PhylipFormatError, r'The number of sequences and the length must be positive.'), ('phylip_invalid_zero_seqs', PhylipFormatError, r'The number of sequences and the length must be positive.'), ]] def test_phylip_to_tabular_msa_invalid_files(self): for fp, error_type, error_msg_regex in self.invalid_files: with self.assertRaisesRegex(error_type, error_msg_regex): _phylip_to_tabular_msa(fp, constructor=DNA) def test_phylip_to_tabular_msa_no_constructor(self): with self.assertRaisesRegex(ValueError, '`constructor`'): _phylip_to_tabular_msa(get_data_path('phylip_dna_3_seqs')) def test_phylip_to_tabular_msa_valid_files(self): for valid_files, components in self.valid_configurations: for valid in valid_files: observed = _phylip_to_tabular_msa(valid, constructor=DNA) expected_seqs = [] expected_index = [] for seq, ID in components: expected_seqs.append(DNA(seq)) expected_index.append(ID) expected = TabularMSA(expected_seqs, index=expected_index) self.assertEqual(observed, expected) class TestWriters(unittest.TestCase): def setUp(self): # ids all same length, seqs longer than 10 chars dna_3_seqs = TabularMSA([ DNA('..ACC-GTTGG..', metadata={'id': "d1"}), DNA('TTACCGGT-GGCC', metadata={'id': "d2"}), DNA('.-ACC-GTTGC--', metadata={'id': "d3"})], minter='id') # id lengths from 0 to 10, with mixes of numbers, characters, and # spaces. sequences are shorter than 10 chars variable_length_ids = TabularMSA([ DNA('.-ACGT', metadata={'id': ''}), DNA('TGCA-.', metadata={'id': 'a'}), DNA('.ACGT-', metadata={'id': 'bb'}), DNA('TGCA-.', metadata={'id': '1'}), DNA('AAAAAA', metadata={'id': 'abcdefghij'}), DNA('GGGGGG', metadata={'id': 'ab def42ij'})], minter='id') # sequences with 20 chars = exactly two chunks of size 10 two_chunks = TabularMSA([ DNA('..ACC-GTTGG..AATGC.C', metadata={'id': 'foo'}), DNA('TTACCGGT-GGCCTA-GCAT', metadata={'id': 'bar'})], minter='id') # single sequence with more than two chunks single_seq_long = TabularMSA([ DNA('..ACC-GTTGG..AATGC.C----', metadata={'id': 'foo'})], minter='id') # single sequence with only a single character (minimal writeable # alignment) single_seq_short = TabularMSA([DNA('-', metadata={'id': ''})], minter='id') # alignments that can be written in phylip format self.objs = [dna_3_seqs, variable_length_ids, two_chunks, single_seq_long, single_seq_short] self.fps = map(get_data_path, ['phylip_dna_3_seqs', 'phylip_variable_length_ids', 'phylip_two_chunks', 'phylip_single_seq_long', 'phylip_single_seq_short']) # alignments that cannot be written in phylip format, paired with their # expected error message regexps self.invalid_objs = [ # no seqs (TabularMSA([]), 'one sequence'), # no positions (TabularMSA([DNA('', metadata={'id': "d1"}), DNA('', metadata={'id': "d2"})]), 'one position'), # ids too long (TabularMSA([RNA('ACGU', metadata={'id': "foo"}), RNA('UGCA', metadata={'id': "alongsequenceid"})], minter='id'), '10.*alongsequenceid') ] def test_write(self): for fp, obj in zip(self.fps, self.objs): fh = io.StringIO() _tabular_msa_to_phylip(obj, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_write_invalid_alignment(self): for invalid_obj, error_msg_regexp in self.invalid_objs: fh = io.StringIO() with self.assertRaisesRegex(PhylipFormatError, error_msg_regexp): _tabular_msa_to_phylip(invalid_obj, fh) # ensure nothing was written to the file before the error was # thrown obs = fh.getvalue() fh.close() self.assertEqual(obs, '') if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/io/format/tests/test_qseq.py000066400000000000000000000316051464262511300223170ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest from skbio import Sequence, DNA, RNA, Protein from skbio import read from skbio.util import get_data_path from skbio.io import QSeqFormatError from skbio.io.format.qseq import _qseq_to_generator, _qseq_sniffer import numpy as np def _drop_kwargs(kwargs, *args): for arg in args: if arg in kwargs: kwargs.pop(arg) class TestQSeqBase(unittest.TestCase): def setUp(self): self.valid_files = [ (get_data_path('qseq_single_seq_sanger'), [ {'variant': 'sanger'}, {'phred_offset': 33}, ], [ {'id': 'sanger_1:3:34:-30:30#0/2', 'sequence': 'ACGTACGTACGTACGTACGTACGTACTTTTTTTTTTACGTACGTACG' 'TACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTAC', 'quality': [26, 26, 29, 31, 33, 34, 36, 37, 38, 39, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93, 93], 'machine_name': 'sanger', 'run_number': 1, 'lane_number': 3, 'tile_number': 34, 'x': -30, 'y': 30, 'index': 0, 'read_number': 2} ]), (get_data_path('qseq_multi_seq_illumina1.3'), [ {'variant': 'illumina1.3'}, {'phred_offset': 64} ], [ {'id': 'illumina_1:3:34:-30:30#0/1', 'sequence': 'ACG....ACGTAC', 'quality': [50, 53, 2, 2, 2, 2, 50, 2, 3, 5, 6, 7, 8], 'machine_name': 'illumina', 'run_number': 1, 'lane_number': 3, 'tile_number': 34, 'x': -30, 'y': 30, 'index': 0, 'read_number': 1}, {'id': 'illumina_1:3:35:-30:30#0/2', 'sequence': 'ACGTA.AATAAAC', 'quality': [39, 37, 20, 33, 1, 33, 38, 40, 55, 49, 1, 1, 38], 'machine_name': 'illumina', 'run_number': 1, 'lane_number': 3, 'tile_number': 35, 'x': -30, 'y': 30, 'index': 0, 'read_number': 2} ]), (get_data_path('qseq_multi_seq_illumina1.3'), [ {'variant': 'illumina1.3', 'filter': False, 'seq_num': 1}, {'phred_offset': 64, 'filter': False, 'seq_num': 2}, {'variant': 'illumina1.3', 'filter': False, 'seq_num': 3, 'constructor': Protein}, {'phred_offset': 64, 'filter': False, 'seq_num': 4, 'constructor': DNA}, ], [ {'id': 'illumina_1:3:34:-30:30#0/1', 'sequence': 'ACG....ACGTAC', 'quality': [50, 53, 2, 2, 2, 2, 50, 2, 3, 5, 6, 7, 8], 'machine_name': 'illumina', 'run_number': 1, 'lane_number': 3, 'tile_number': 34, 'x': -30, 'y': 30, 'index': 0, 'read_number': 1}, {'id': 'illumina_1:3:34:30:-30#0/1', 'sequence': 'CGGGCATTGCA', 'quality': [3, 7, 7, 7, 3, 33, 51, 36, 7, 3, 1], 'machine_name': 'illumina', 'run_number': 1, 'lane_number': 3, 'tile_number': 34, 'x': 30, 'y': -30, 'index': 0, 'read_number': 1}, {'id': 'illumina_1:3:35:-30:30#0/2', 'sequence': 'ACGTA.AATAAAC', 'quality': [39, 37, 20, 33, 1, 33, 38, 40, 55, 49, 1, 1, 38], 'machine_name': 'illumina', 'run_number': 1, 'lane_number': 3, 'tile_number': 35, 'x': -30, 'y': 30, 'index': 0, 'read_number': 2}, {'id': 'illumina_1:3:35:30:-30#0/3', 'sequence': 'CATTTAGGA.TGCA', 'quality': [52, 42, 38, 44, 43, 1, 6, 46, 43, 11, 39, 40, 54, 13], 'machine_name': 'illumina', 'run_number': 1, 'lane_number': 3, 'tile_number': 35, 'x': 30, 'y': -30, 'index': 0, 'read_number': 3} ]) ] self.invalid_files = [ (get_data_path('whitespace_only'), [ {}, {'variant': 'sanger'} ], [ 'blank line', ], QSeqFormatError), (get_data_path('tsv_10_fields'), [ {}, {'variant': 'sanger'}, {'variant': 'solexa'} ], [ 'read', '[1, 3]' ], QSeqFormatError), (get_data_path('tsv_8_fields'), [ {}, {'variant': 'sanger'}, {'variant': 'solexa'} ], [ '8', '10 or 11' ], QSeqFormatError), (get_data_path('qseq_invalid_filter'), [ {}, {'phred_offset': 33}, {'variant': 'solexa'}, {'variant': 'illumina1.3'}, {'variant': 'illumina1.8'} ], [ 'filter', '0 or 1', ], QSeqFormatError), (get_data_path('qseq_invalid_read'), [ {}, {'phred_offset': 33}, {'variant': 'solexa'}, {'variant': 'illumina1.3'}, {'variant': 'illumina1.8'} ], [ 'read', '[1, 3]', ], QSeqFormatError), (get_data_path('qseq_invalid_x'), [ {}, {'phred_offset': 33}, {'variant': 'solexa'}, {'variant': 'illumina1.3'}, {'variant': 'illumina1.8'} ], [ 'x', 'integer', ], QSeqFormatError), (get_data_path('qseq_invalid_y'), [ {}, {'phred_offset': 33}, {'variant': 'solexa'}, {'variant': 'illumina1.3'}, {'variant': 'illumina1.8'} ], [ 'y', 'integer', ], QSeqFormatError), (get_data_path('qseq_invalid_lane'), [ {}, {'phred_offset': 33}, {'variant': 'solexa'}, {'variant': 'illumina1.3'}, {'variant': 'illumina1.8'} ], [ 'lane', 'positive integer', ], QSeqFormatError), (get_data_path('qseq_invalid_tile'), [ {}, {'phred_offset': 33}, {'variant': 'solexa'}, {'variant': 'illumina1.3'}, {'variant': 'illumina1.8'} ], [ 'tile', 'positive integer', ], QSeqFormatError) ] class TestQSeqToGenerator(TestQSeqBase): def setUp(self): super(TestQSeqToGenerator, self).setUp() self.valid_files += [ (get_data_path('empty'), [{}, {'variant': 'sanger'}], []) ] self.invalid_files += [ (get_data_path('qseq_single_seq_sanger'), [ {'variant': 'illumina1.3'}, {'variant': 'illumina1.8'} ], [ 'out of range', '[0, 62]' ], ValueError) ] def test_invalid_files(self): for invalid, kwargs, errors, etype in self.invalid_files: with self.assertRaises(etype) as cm: for kwarg in kwargs: _drop_kwargs(kwarg, 'seq_num') list(_qseq_to_generator(invalid, **kwarg)) for e in errors: self.assertIn(e, str(cm.exception)) def test_valid_files(self): for valid, kwargs, components in self.valid_files: for kwarg in kwargs: _drop_kwargs(kwarg, 'seq_num') constructor = kwarg.get('constructor', Sequence) expected = [ constructor( c['sequence'], metadata={'id': c['id'], 'machine_name': c['machine_name'], 'run_number': c['run_number'], 'lane_number': c['lane_number'], 'tile_number': c['tile_number'], 'x': c['x'], 'y': c['y'], 'index': c['index'], 'read_number': c['read_number']}, positional_metadata={ 'quality': np.array(c['quality'], dtype=np.uint8)}) for c in components] observed = list(_qseq_to_generator(valid, **kwarg)) self.assertEqual(len(expected), len(observed)) for o, e in zip(observed, expected): self.assertEqual(o, e) class TestQSeqToSequences(TestQSeqBase): def test_invalid_files(self): for constructor in [Sequence, DNA, RNA, Protein]: for invalid, kwargs, errors, etype in self.invalid_files: with self.assertRaises(etype) as cm: for kwarg in kwargs: _drop_kwargs(kwarg, 'constructor', 'filter') read(invalid, format='qseq', verify=False, into=constructor, **kwarg) for e in errors: self.assertIn(e, str(cm.exception)) def test_valid_files(self): for constructor in [Sequence, DNA, RNA, Protein]: for valid, kwargs, components in self.valid_files: for observed_kwargs in kwargs: expected_kwargs = {} # Currently not validating the alphabet for qseq # files that are read in for this test. if hasattr(constructor, 'alphabet'): observed_kwargs['validate'] = False expected_kwargs['validate'] = False _drop_kwargs(observed_kwargs, 'constructor', 'filter') seq_num = observed_kwargs.get('seq_num', 1) c = components[seq_num - 1] expected = constructor( c['sequence'], metadata={'id': c['id'], 'machine_name': c['machine_name'], 'run_number': c['run_number'], 'lane_number': c['lane_number'], 'tile_number': c['tile_number'], 'x': c['x'], 'y': c['y'], 'index': c['index'], 'read_number': c['read_number']}, positional_metadata={ 'quality': np.array(c['quality'], np.uint8)}, **expected_kwargs) observed = read(valid, into=constructor, format='qseq', verify=False, **observed_kwargs) self.assertEqual(observed, expected) class TestQSeqSniffer(TestQSeqBase): def setUp(self): super(TestQSeqSniffer, self).setUp() self.invalid_files += [ (get_data_path('empty'), None, None, None) ] def test_qseq_sniffer_valid_files(self): for valid, _, _ in self.valid_files: self.assertEqual(_qseq_sniffer(valid), (True, {})) def test_qseq_sniffer_invalid_files(self): for invalid, _, _, _ in self.invalid_files: self.assertEqual(_qseq_sniffer(invalid), (False, {})) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/io/format/tests/test_sample_metadata.py000066400000000000000000000053131464262511300244640ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main import pandas as pd import tempfile import os from skbio.util import get_data_path from skbio.util._decorator import overrides from skbio.metadata._metadata import SampleMetadata from skbio.io.format.sample_metadata import ( _sample_metadata_sniffer, _sample_metadata_read) class SnifferTests(TestCase): def setUp(self): self.positive_fps = list(map(get_data_path, [ 'sample-metadata-comments-comment-char-id.tsv', 'sample-metadata-comments.tsv', 'sample-metadata-comments-mixed-case.tsv', 'sample-metadata-complete-types-directive.tsv', 'sample-metadata-empty-rows.tsv', 'sample-metadata-leading-trailing-whitespace.tsv', 'sample-metadata-leading-trailing-whitespace-split-id.tsv', ])) self.negative_fps = list(map(get_data_path, [ 'empty', 'whitespace_only', ])) def test_positives(self): for fp in self.positive_fps: self.assertEqual(_sample_metadata_sniffer(fp), (True, {})) def test_negatives(self): for fp in self.negative_fps: self.assertEqual(_sample_metadata_sniffer(fp), (False, {})) class TestSampleMetadataReader(TestCase): def test_reader(self): fp = get_data_path('sample-metadata-complete-types-directive.tsv') obs_md = SampleMetadata.read(fp) exp_index = pd.Index(['id1', 'id2', 'id3'], name='id') exp_df = pd.DataFrame({'col1': ['1', '2', '3'], 'col2': ['a', 'b', 'c'], 'col3': ['foo', 'bar', '42']}, index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) class TestRoundtrip(TestCase): def setUp(self): self.temp_dir_obj = tempfile.TemporaryDirectory( prefix='sample-metadata-temp') self.temp_dir = self.temp_dir_obj.name self.filepath = os.path.join(self.temp_dir, 'metadata.tsv') def tearDown(self): self.temp_dir_obj.cleanup() def test_simple(self): fp = get_data_path('sample-metadata-comments-mixed-case.tsv') md1 = SampleMetadata.read(fp) md1.write(self.filepath) md2 = SampleMetadata.read(self.filepath) self.assertEqual(md1, md2) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/io/format/tests/test_sequence_feature_vocabulary.py000066400000000000000000000073231464262511300271200ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main from skbio.io.format._sequence_feature_vocabulary import ( _parse_loc_str, _parse_section_default, _serialize_location) from skbio.io import FileFormatError from skbio.metadata import IntervalMetadata class Tests(TestCase): def test_parse_section_default(self): lines = [ ['FOO blah blah', ' blah'], ['FOO=blah', ' blah'], ['FOO']] kwargs = [{'join_delimiter': '=', 'return_label': False}, {'label_delimiter': '=', 'join_delimiter': '', 'return_label': True}, {'label_delimiter': '=', 'join_delimiter': '=', 'return_label': True}] expects = ['blah blah=blah', ('FOO', 'blahblah'), ('FOO', '')] for i, j, k in zip(lines, kwargs, expects): self.assertEqual(k, _parse_section_default(i, **j)) def test_parse_loc_str(self): examples = [ '9', # a single base in the presented sequence '3..8', '<3..8', '3..>8', 'complement(3..>8)', 'complement(join(3..>5,<7..9))', 'join(J00194.1:1..9,3..8)', 'join(3..8,J00194.1:1..9)', '1.9', '1^2'] expects = [ ([(8, 9)], [(False, False)], {'strand': '+'}), ([(2, 8)], [(False, False)], {'strand': '+'}), ([(2, 8)], [(True, False)], {'strand': '+'}), ([(2, 8)], [(False, True)], {'strand': '+'}), ([(2, 8)], [(False, True)], {'strand': '-'}), ([(2, 5), (6, 9)], [(False, True), (True, False)], {'strand': '-'}), ([(2, 8)], [(False, False)], {'strand': '+'}), ([(2, 8)], [(False, False)], {'strand': '+'}), ([(0, 9)], [(False, False)], {'strand': '+'}), ([(0, 1)], [(False, False)], {'strand': '+'})] for example, expect in zip(examples, expects): parsed = _parse_loc_str(example) self.assertEqual(parsed, expect) def test_parse_loc_str_invalid(self): examples = [ 'abc', '3-8'] for example in examples: with self.assertRaisesRegex(FileFormatError, r'Could not parse location string: ' '"%s"' % example): _parse_loc_str(example) def test_serialize_location(self): imd = IntervalMetadata(9) i1 = imd.add([(0, 1)]) self.assertEqual(_serialize_location(i1), '1') i2 = imd.add([(0, 2)], [(True, True)]) self.assertEqual(_serialize_location(i2), '<1..>2') i3 = imd.add([(0, 2)], [(False, True)]) self.assertEqual(_serialize_location(i3), '1..>2') i4 = imd.add([(0, 2)], [(True, False)]) self.assertEqual(_serialize_location(i4), '<1..2') i5 = imd.add([(0, 2), (3, 9)], metadata={'strand': '-'}) self.assertEqual(_serialize_location(i5), 'complement(join(1..2,4..9))') i6 = imd.add([(0, 2), (3, 9)], [(True, False), (False, True)], metadata={'strand': '-'}) self.assertEqual(_serialize_location(i6), 'complement(join(<1..2,4..>9))') if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/io/format/tests/test_stockholm.py000066400000000000000000001217011464262511300233460ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import pandas as pd import io import unittest from collections import OrderedDict from skbio import TabularMSA, Protein, DNA, RNA from skbio.io import StockholmFormatError from skbio.io.format.stockholm import (_stockholm_to_tabular_msa, _tabular_msa_to_stockholm, _stockholm_sniffer) from skbio.util import get_data_path class TestStockholmSniffer(unittest.TestCase): def setUp(self): self.positives = [get_data_path(e) for e in [ 'stockholm_extensive', 'stockholm_minimal', 'stockholm_rna', 'stockholm_runon_gf_with_whitespace', 'stockholm_runon_gf_no_whitespace', 'stockholm_duplicate_sequence_names', 'stockholm_duplicate_gr', 'stockholm_duplicate_gc', 'stockholm_invalid_nonexistent_gr', 'stockholm_invalid_nonexistent_gs', 'stockholm_no_data', 'stockholm_blank_lines', 'stockholm_differing_gc_data_length', 'stockholm_differing_gr_data_length', 'stockholm_differing_seq_lengths', 'stockholm_duplicate_sequence_names', 'stockholm_duplicate_tree_ids', 'stockholm_extensive_mixed', 'stockholm_invalid_data_type', 'stockholm_malformed_gf_line', 'stockholm_malformed_gs_line', 'stockholm_malformed_gr_line', 'stockholm_malformed_gc_line', 'stockholm_malformed_data_line', 'stockholm_metadata_only', 'stockholm_multiple_msa', 'stockholm_multiple_trees', 'stockholm_runon_gs_with_whitespace', 'stockholm_runon_gs_no_whitespace', 'stockholm_single_tree_with_id', 'stockholm_single_tree_without_id', 'stockholm_whitespace_only_lines', 'stockholm_all_data_types', 'stockholm_two_of_each_metadata', 'stockholm_data_only', 'stockholm_nonstring_labels', 'stockholm_missing_reference_items', 'stockholm_multiple_references', 'stockholm_runon_references', 'stockholm_runon_references_mixed', 'stockholm_single_reference', 'stockholm_missing_reference_items', 'stockholm_missing_rn_tag', 'stockholm_different_padding', 'stockholm_multi_line_tree_no_id', 'stockholm_multi_line_tree_with_id', 'stockholm_multiple_multi_line_trees' ]] self.negatives = [get_data_path(e) for e in [ 'stockholm_missing_header', 'empty', 'whitespace_only' ]] def test_positives(self): for fp in self.positives: self.assertEqual(_stockholm_sniffer(fp), (True, {})) def test_negatives(self): for fp in self.negatives: self.assertEqual(_stockholm_sniffer(fp), (False, {})) class TestStockholmReader(unittest.TestCase): def test_stockholm_extensive(self): fp = get_data_path('stockholm_extensive') msa = _stockholm_to_tabular_msa(fp, constructor=Protein) exp = TabularMSA([Protein('MTCRAQLIAVPRASSLAE..AIACAQKM....' 'RVSRVPVYERS', positional_metadata={'SA': list('9998877564' '53524252..' '55152525..' '..36463774' '777')}), Protein('EVMLTDIPRLHINDPIMK..GFGMVINN....' '..GFVCVENDE', metadata={'OS': 'Bacillus subtilis'}, positional_metadata={'SS': list('CCCCCCCHHHH' 'HHHHHHH..HE' 'EEEEEE....E' 'EEEEEE' 'EEEH')}), Protein('EVMLTDIPRLHINDPIMK..GFGMVINN...' '...GFVCVENDE', positional_metadata={'AS': list('___________' '_____*_____' '___________' '________' '__'), 'IN': list('___________' '_1_________' '_____2_____' '_____0_' '___')})], metadata={'ID': 'CBS', 'AC': 'PF00571', 'AU': 'Bateman A', 'SQ': '67'}, positional_metadata={'SS_cons': list('CCCCCHHHHHHHH' 'HHHHH..EEEEEE' 'EE....EEEEEEE' 'EEEH')}, index=['O83071/192-246', 'O31698/88-139', 'O31699/88-139']) self.assertEqual(msa, exp) def test_stockholm_extensive_mixed(self): fp = get_data_path('stockholm_extensive_mixed') msa = _stockholm_to_tabular_msa(fp, constructor=Protein) exp = TabularMSA([Protein('MTCRAQLIAVPRASSLAE..AIACAQKM....' 'RVSRVPVYERS', positional_metadata={'SA': list('9998877564' '53524252..' '55152525..' '..36463774' '777')}), Protein('EVMLTDIPRLHINDPIMK..GFGMVINN....' '..GFVCVENDE', metadata={'OS': 'Bacillus subtilis'}, positional_metadata={'SS': list('CCCCCCCHHHH' 'HHHHHHH..HE' 'EEEEEE....E' 'EEEEEE' 'EEEH')}), Protein('EVMLTDIPRLHINDPIMK..GFGMVINN...' '...GFVCVENDE', positional_metadata={'AS': list('___________' '_____*_____' '___________' '________' '__'), 'IN': list('___________' '_1_________' '_____2_____' '_____0_' '___')})], metadata={'ID': 'CBS', 'AC': 'PF00571', 'AU': 'Bateman A', 'SQ': '67'}, positional_metadata={'SS_cons': list('CCCCCHHHHHHHH' 'HHHHH..EEEEEE' 'EE....EEEEEEE' 'EEEH')}, index=['O83071/192-246', 'O31698/88-139', 'O31699/88-139']) self.assertEqual(msa, exp) def test_stockholm_minimal(self): fp = get_data_path('stockholm_minimal') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([DNA('TGTGTCGCAGTTGTCGTTTG')], index=['0235244']) self.assertEqual(msa, exp) def test_stockholm_rna(self): fp = get_data_path('stockholm_rna') msa = _stockholm_to_tabular_msa(fp, constructor=RNA) exp = TabularMSA([RNA('AAGGGUUAUUUAUAUACUUU'), RNA('UGCUAAGAGUGGGGAUGAUU'), RNA('GCCACAACCGAUUAGAUAGA'), RNA('UUAGAAACCGAUGGACCGAA')], metadata={'AC': 'G2134T23', 'ID': 'ARD'}, positional_metadata=( {'AC_cons': list('GGGACUGGACAUCUAUUCAG')}), index=['RTC2231', 'RTF2124', 'RTH3322', 'RTB1512']) self.assertEqual(msa, exp) def test_stockholm_runon_gf(self): fp = get_data_path('stockholm_runon_gf_no_whitespace') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([DNA('ACTGGTTCAATG')], metadata={'CC': 'CBS domains are small intracellular' ' modules mostly found in 2 or four ' 'copies within a protein.'}, index=['GG1344']) self.assertEqual(msa, exp) fp = get_data_path('stockholm_runon_gf_with_whitespace') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) self.assertEqual(msa, exp) def test_stockholm_runon_gs(self): fp = get_data_path('stockholm_runon_gs_no_whitespace') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([DNA('ATCGTTCAGTG', metadata={'LN': 'This is a runon GS line.'})], index=['seq1']) self.assertEqual(msa, exp) fp = get_data_path('stockholm_runon_gs_with_whitespace') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) self.assertEqual(msa, exp) def test_stockholm_metadata_only(self): fp = get_data_path('stockholm_metadata_only') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([], metadata={'NM': 'Kestrel Gorlick', 'DT': 'February 5th, 2016'}) self.assertEqual(msa, exp) def test_stockholm_no_data(self): fp = get_data_path('stockholm_no_data') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([]) self.assertEqual(msa, exp) def test_stockholm_with_blank_lines(self): fp = get_data_path('stockholm_blank_lines') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([], metadata={'AL': 'ABCD', 'NM': '1234'}) self.assertEqual(msa, exp) def test_stockholm_with_whitespace_only_lines(self): fp = get_data_path('stockholm_whitespace_only_lines') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([], metadata={'AL': 'ABCD', 'NM': '1234'}) self.assertEqual(msa, exp) def test_stockholm_single_tree_without_id(self): fp = get_data_path('stockholm_single_tree_without_id') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([], metadata={'NH': 'ABCD'}) self.assertEqual(msa, exp) def test_stockholm_single_tree_with_id(self): fp = get_data_path('stockholm_single_tree_with_id') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([], metadata={'NH': {'tree1': 'ABCD'}}) self.assertEqual(msa, exp) def test_stockholm_multiple_trees(self): fp = get_data_path('stockholm_multiple_trees') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([], metadata={'NH': {'tree1': 'ABCD', 'tree2': 'EFGH', 'tree3': 'IJKL'}}) self.assertEqual(msa, exp) def test_stockhom_single_reference(self): fp = get_data_path('stockholm_single_reference') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA( [], metadata={'RN': [OrderedDict([('RM', '123456789'), ('RT', 'A Title'), ('RA', 'The Author'), ('RL', 'A Location'), ('RC', 'Comment')])]}) self.assertEqual(msa, exp) def test_stockholm_multiple_references(self): fp = get_data_path('stockholm_multiple_references') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA( [], metadata={'RN': [OrderedDict([('RM', '123456789'), ('RT', 'Title 1'), ('RA', 'Author 1'), ('RL', 'Location 1'), ('RC', 'Comment 1')]), OrderedDict([('RM', '987654321'), ('RT', 'Title 2'), ('RA', 'Author 2'), ('RL', 'Location 2'), ('RC', 'Comment 2')]), OrderedDict([('RM', '132465879'), ('RT', 'Title 3'), ('RA', 'Author 3'), ('RL', 'Location 3'), ('RC', 'Comment 3')])]}) self.assertEqual(msa, exp) def test_stockholm_runon_references(self): fp = get_data_path('stockholm_runon_references') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA( [], metadata={'RN': [OrderedDict([('RM', '123456789'), ('RT', 'A Runon Title'), ('RA', 'The Author'), ('RL', 'A Location'), ('RC', 'A Runon Comment')])]}) self.assertEqual(msa, exp) def test_stockholm_mixed_runon_references(self): fp = get_data_path('stockholm_runon_references_mixed') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA( [], metadata={'RN': [OrderedDict([('RC', 'A Runon Comment'), ('RM', '123456789'), ('RT', 'A Runon Title'), ('RA', 'The Author'), ('RL', 'A Location')])]}) self.assertEqual(msa, exp) def test_stockholm_to_msa_different_padding(self): fp = get_data_path('stockholm_different_padding') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA( [], metadata={'RN': [OrderedDict([('RC', 'A Runon Comment Without ' 'Whitespace')]), OrderedDict([('RC', 'A Runon Comment With ' 'Whitespace')])]}) self.assertEqual(msa, exp) def test_stockholm_handles_missing_reference_items(self): fp = get_data_path('stockholm_missing_reference_items') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA( [], metadata={'RN': [OrderedDict([('RT', 'A Title'), ('RA', 'The Author')])]}) self.assertEqual(msa, exp) def test_stockholm_multi_line_tree_no_id(self): fp = get_data_path('stockholm_multi_line_tree_no_id') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([], metadata={'NH': 'ABCDEFGH'}) self.assertEqual(msa, exp) def test_stockholm_multiple_multi_line_trees(self): fp = get_data_path('stockholm_multiple_multi_line_trees') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([], metadata={'NH': {'tree1': 'ABCDEFGH', 'tree2': 'IJKLMNOP'}}) self.assertEqual(msa, exp) def test_stockholm_multi_line_tree_with_id(self): fp = get_data_path('stockholm_multi_line_tree_with_id') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) exp = TabularMSA([], metadata={'NH': {'tree1': 'ABCDEFGH'}}) self.assertEqual(msa, exp) def test_multiple_msa_file(self): fp = get_data_path('stockholm_multiple_msa') msa = _stockholm_to_tabular_msa(fp, constructor=RNA) exp = TabularMSA([RNA('AAGGGUUAUUUAUAUACUUU'), RNA('UGCUAAGAGUGGGGAUGAUU'), RNA('GCCACAACCGAUUAGAUAGA'), RNA('UUAGAAACCGAUGGACCGAA')], metadata={'AC': 'G2134T23', 'ID': 'ARD'}, positional_metadata=( {'AC_cons': list('GGGACUGGACAUCUAUUCAG')}), index=['RTC2231', 'RTF2124', 'RTH3322', 'RTB1512']) self.assertEqual(msa, exp) def test_stockholm_maintains_order(self): fp = get_data_path('stockholm_two_of_each_metadata') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) msa_order = list(msa.metadata.items()) exp_order = [('NM', 'Kestrel Gorlick'), ('DT', 'February 5th, 2016')] self.assertEqual(msa_order, exp_order) msa_order = list(msa[0].metadata.items()) exp_order = [('AL', 'ABCD'), ('NS', '1234')] self.assertEqual(msa_order, exp_order) msa_order = list(msa.positional_metadata.columns) exp_order = ['SS_cons', 'AS_cons'] self.assertEqual(msa_order, exp_order) msa_order = list(msa[0].positional_metadata.columns) exp_order = ['SS', 'AS'] self.assertEqual(msa_order, exp_order) def test_stockholm_duplicate_tree_id_error(self): fp = get_data_path('stockholm_duplicate_tree_ids') with self.assertRaisesRegex(StockholmFormatError, r'Tree.*tree1.*in file.'): _stockholm_to_tabular_msa(fp, constructor=DNA) def test_stockholm_missing_reference_number_error(self): fp = get_data_path('stockholm_missing_rn_tag') with self.assertRaisesRegex(StockholmFormatError, r"Expected 'RN'.*'RL' tag."): _stockholm_to_tabular_msa(fp, constructor=DNA) def test_nonexistent_gr_error(self): fp = get_data_path('stockholm_invalid_nonexistent_gr') with self.assertRaisesRegex(StockholmFormatError, r'GS or GR.*nonexistent ' 'sequence.*RL1355.'): _stockholm_to_tabular_msa(fp, constructor=RNA) def test_nonexistent_gs_error(self): fp = get_data_path('stockholm_invalid_nonexistent_gs') with self.assertRaisesRegex(StockholmFormatError, r'GS or GR.*nonexistent sequence.*AC14.'): _stockholm_to_tabular_msa(fp, constructor=RNA) def test_duplicate_sequence_names_error(self): fp = get_data_path('stockholm_duplicate_sequence_names') with self.assertRaisesRegex( StockholmFormatError, r'duplicate sequence name.*ASR132.*supported by the reader.'): _stockholm_to_tabular_msa(fp, constructor=RNA) def test_duplicate_gr_error(self): fp = get_data_path('stockholm_duplicate_gr') with self.assertRaisesRegex(StockholmFormatError, r'Found duplicate GR.*OS.*LFDR3.*supported' ' by the reader.'): _stockholm_to_tabular_msa(fp, constructor=DNA) def test_duplicate_gc_error(self): fp = get_data_path('stockholm_duplicate_gc') with self.assertRaisesRegex(StockholmFormatError, r'Found duplicate GC.*SS_cons.*supported ' 'by the reader.'): _stockholm_to_tabular_msa(fp, constructor=DNA) def test_empty_file_error(self): fp = get_data_path('empty') with self.assertRaisesRegex(StockholmFormatError, r'File is empty.'): _stockholm_to_tabular_msa(fp, constructor=RNA) def test_missing_header_error(self): fp = get_data_path('stockholm_missing_header') with self.assertRaisesRegex(StockholmFormatError, r'File missing.*header'): _stockholm_to_tabular_msa(fp, constructor=DNA) def test_missing_footer_error(self): fp = get_data_path('stockholm_missing_footer') with self.assertRaisesRegex(StockholmFormatError, r'Final line.*only "//".'): _stockholm_to_tabular_msa(fp, constructor=DNA) def test_data_type_error(self): fp = get_data_path('stockholm_invalid_data_type') with self.assertRaisesRegex(StockholmFormatError, r"Unrecognized.*'#=GZ"): _stockholm_to_tabular_msa(fp, constructor=DNA) def test_malformed_gf_line_error(self): fp = get_data_path('stockholm_malformed_gf_line') with self.assertRaisesRegex(StockholmFormatError, r'Line contains 2.*must contain.*3.'): _stockholm_to_tabular_msa(fp, constructor=DNA) def test_malformed_gs_line_error(self): fp = get_data_path('stockholm_malformed_gs_line') with self.assertRaisesRegex(StockholmFormatError, r'Line contains 3.*must contain.*4.'): _stockholm_to_tabular_msa(fp, constructor=DNA) def test_malformed_gr_line_error(self): fp = get_data_path('stockholm_malformed_gr_line') with self.assertRaisesRegex(StockholmFormatError, r'Line contains 2.*must contain.*4.'): _stockholm_to_tabular_msa(fp, constructor=DNA) def test_malformed_gc_line_error(self): fp = get_data_path('stockholm_malformed_gc_line') with self.assertRaisesRegex(StockholmFormatError, r'Line contains 2.*must contain.*3.'): _stockholm_to_tabular_msa(fp, constructor=DNA) def test_malformed_data_line_error(self): fp = get_data_path('stockholm_malformed_data_line') with self.assertRaisesRegex(StockholmFormatError, r'Line contains 1.*must contain.*2.'): _stockholm_to_tabular_msa(fp, constructor=DNA) def test_differing_sequence_lengths_error(self): fp = get_data_path('stockholm_differing_seq_lengths') with self.assertRaisesRegex(ValueError, r'Each sequence.*11 != 10'): _stockholm_to_tabular_msa(fp, constructor=RNA) def test_differing_data_lengths_gr_error(self): fp = get_data_path('stockholm_differing_gr_data_length') with self.assertRaisesRegex(ValueError, r'Number.*7.*(8).'): _stockholm_to_tabular_msa(fp, constructor=RNA) def test_differing_data_lengths_gc_error(self): fp = get_data_path('stockholm_differing_gc_data_length') with self.assertRaisesRegex(ValueError, r'Number.*12.*(10).'): _stockholm_to_tabular_msa(fp, constructor=RNA) def test_no_constructor_error(self): fp = get_data_path('empty') with self.assertRaisesRegex(ValueError, r'Must provide.*parameter.'): _stockholm_to_tabular_msa(fp) def test_unsupported_constructor_error(self): fp = get_data_path('empty') with self.assertRaisesRegex(TypeError, r'`constructor`.*`GrammaredSequence`.'): _stockholm_to_tabular_msa(fp, constructor=TabularMSA) class TestStockholmWriter(unittest.TestCase): def test_msa_to_stockholm_extensive(self): fp = get_data_path('stockholm_all_data_types') msa = TabularMSA([DNA('GAGGCCATGCCCAGGTGAAG', metadata=OrderedDict([('DT', 'February 1, 2016'), ('NM', 'Unknown')])), DNA('ACCTGAGCCACAGTAGAAGT'), DNA('CCCTTCGCTGGAAATGTATG', metadata={'DT': 'Unknown'}, positional_metadata=OrderedDict([('AS', list('CCGAAAGT' 'CGTTCGA' 'AAATG')), ('SS', list('GGCGAGTC' 'GTTCGAGC' 'TGG' 'C'))]))], metadata=OrderedDict([('NM', 'Kestrel Gorlick'), ('DT', 'February 11, 2016'), ('FN', 'Writer test file')]), positional_metadata=OrderedDict([('AS_cons', list('CGTTCGTTCTAAC' 'AATTCCA')), ('SS_cons', list('GGCGCTACGACCT' 'ACGACCG'))]), index=['seq1', 'seq2', 'seq3']) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_msa_to_stockholm_minimal(self): fp = get_data_path('stockholm_minimal') msa = TabularMSA([DNA('TGTGTCGCAGTTGTCGTTTG')], index=['0235244']) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_msa_to_stockholm_single_tree(self): fp = get_data_path('stockholm_single_tree_without_id') msa = TabularMSA([], metadata=OrderedDict([('NH', 'ABCD')])) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_msa_to_stockholm_single_tree_as_dict(self): fp = get_data_path('stockholm_single_tree_with_id') msa = TabularMSA([], metadata={'NH': {'tree1': 'ABCD'}}) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_msa_to_stockholm_multiple_trees(self): fp = get_data_path('stockholm_multiple_trees') msa = TabularMSA([], metadata=OrderedDict([('NH', OrderedDict([('tree1', 'ABCD'), ('tree2', 'EFGH'), ('tree3', 'IJKL')]))])) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_msa_to_stockholm_single_reference(self): fp = get_data_path('stockholm_single_reference') msa = TabularMSA( [], metadata={'RN': [OrderedDict([('RM', '123456789'), ('RT', 'A Title'), ('RA', 'The Author'), ('RL', 'A Location'), ('RC', 'Comment')])]}) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_msa_to_stockholm_multiple_references(self): fp = get_data_path('stockholm_multiple_references') msa = TabularMSA( [], metadata={'RN': [OrderedDict([('RM', '123456789'), ('RT', 'Title 1'), ('RA', 'Author 1'), ('RL', 'Location 1'), ('RC', 'Comment 1')]), OrderedDict([('RM', '987654321'), ('RT', 'Title 2'), ('RA', 'Author 2'), ('RL', 'Location 2'), ('RC', 'Comment 2')]), OrderedDict([('RM', '132465879'), ('RT', 'Title 3'), ('RA', 'Author 3'), ('RL', 'Location 3'), ('RC', 'Comment 3')])]}) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_msa_to_stockholm_data_only(self): fp = get_data_path('stockholm_data_only') msa = TabularMSA([RNA('ACUCCGACAUGCUCC'), RNA('UAGUGCCGAACGCUG'), RNA('GUGUGGGCGUGAUUC')], index=['seq1', 'seq2', 'seq3']) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_msa_to_stockholm_nonstring_values(self): fp = get_data_path('stockholm_nonstring_labels') msa = TabularMSA([DNA('ACTG', metadata=OrderedDict([(8, 123)]), positional_metadata=OrderedDict([(1.0, [1, 2, 3, 4])]) )], metadata=OrderedDict([(1.3, 2857)]), positional_metadata=OrderedDict([(25, [4, 3, 2, 1])]), index=[11214]) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_msa_to_stockholm_empty(self): fp = get_data_path('stockholm_no_data') msa = TabularMSA([]) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_round_trip_extensive(self): fp = get_data_path('stockholm_extensive') msa = _stockholm_to_tabular_msa(fp, constructor=Protein) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_round_trip_minimal(self): fp = get_data_path('stockholm_minimal') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_round_trip_single_tree(self): fp = get_data_path('stockholm_single_tree_without_id') msa = _stockholm_to_tabular_msa(fp, constructor=Protein) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_round_trip_multiple_trees(self): fp = get_data_path('stockholm_multiple_trees') msa = _stockholm_to_tabular_msa(fp, constructor=Protein) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_round_trip_single_reference(self): fp = get_data_path('stockholm_single_reference') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_round_trip_multiple_references(self): fp = get_data_path('stockholm_multiple_references') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_round_trip_missing_references(self): fp = get_data_path('stockholm_missing_reference_items') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_round_trip_data_only(self): fp = get_data_path('stockholm_data_only') msa = _stockholm_to_tabular_msa(fp, constructor=RNA) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_round_trip_nonstring_index_values(self): fp = get_data_path('stockholm_nonstring_labels') msa = _stockholm_to_tabular_msa(fp, constructor=DNA) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_round_trip_empty(self): fp = get_data_path('stockholm_no_data') msa = _stockholm_to_tabular_msa(fp, constructor=Protein) fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) obs = fh.getvalue() fh.close() with io.open(fp) as fh: exp = fh.read() self.assertEqual(obs, exp) def test_unoriginal_index_error(self): msa = TabularMSA([DNA('ATCGCCAGCT'), DNA('TTGTGCTGGC')], index=['seq1', 'seq1']) with self.assertRaisesRegex(StockholmFormatError, r'index labels must be unique.'): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) def test_unoriginal_gr_feature_names_error(self): pos_metadata_dataframe = pd.DataFrame( [ list('GAGCAAGCCACTAGA'), list('TCCTTGAACTACCCG'), list('TCAGCTCTGCAGCGT'), list('GTCAGGCGCTCGGTG') ], index=['AC', 'SS', 'AS', 'AC'] ).T msa = TabularMSA([DNA('CGTCAATCTCGAACT', positional_metadata=pos_metadata_dataframe)], index=['seq1']) with self.assertRaisesRegex(StockholmFormatError, r'Sequence-specific positional metadata.*' 'must be unique. Found 1 duplicate'): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) def test_unoriginal_gc_feature_names_error(self): pos_metadata_dataframe = pd.DataFrame( [ list('GAGCAAGCCACTAGA'), list('TCCTTGAACTACCCG'), list('TCAGCTCTGCAGCGT'), list('GTCAGGCGCTCGGTG') ], index=['AC', 'SS', 'SS', 'AC'] ).T msa = TabularMSA([DNA('CCCCTGCTTTCGTAG')], positional_metadata=pos_metadata_dataframe) with self.assertRaisesRegex(StockholmFormatError, r'Multiple sequence alignment positional ' 'metadata.*must be unique. Found 2 ' 'duplicate'): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) def test_gr_wrong_dataframe_item_length_error(self): seq1 = list('GAGCAAGCCACTAGA') seq1.append('GG') pos_metadata_dataframe = pd.DataFrame({'AC': seq1, 'SS': list('TCCTTGAACTACCCGA'), 'AS': list('TCAGCTCTGCAGCGTT')}) msa = TabularMSA([DNA('TCCTTGAACTACCCGA', positional_metadata=pos_metadata_dataframe)]) with self.assertRaisesRegex(StockholmFormatError, r'Sequence-specific positional metadata.*' r'must contain a single character.*Found ' r'value\(s\) in column AC'): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) def test_gc_wrong_dataframe_item_length_error(self): seq1 = list('GAGCAAGCCACTAGA') seq1.append('GG') pos_metadata_dataframe = pd.DataFrame({'AC': seq1, 'SS': list('TCCTTGAACTACCCGA'), 'AS': list('TCAGCTCTGCAGCGTT')}) msa = TabularMSA([DNA('TCCTTGAACTACCCGA')], positional_metadata=pos_metadata_dataframe) message = (r'Multiple sequence alignment positional metadata.*must ' r'contain a single character.*Found value\(s\) in column ' 'AC') with self.assertRaisesRegex(StockholmFormatError, message): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) def test_rn_not_list_of_refs_error(self): msa = TabularMSA([], metadata={'RN': '1'}) with self.assertRaisesRegex(StockholmFormatError, r"Expected 'RN'.*list of reference" ".*got '1'"): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) def test_rn_data_not_in_dict_error(self): msa = TabularMSA([], metadata={'RN': [OrderedDict([('RL', 'Flagstaff')]), 'Incorrect Item']}) with self.assertRaisesRegex(StockholmFormatError, r"Expected reference information.*stored" " as a dictionary, found.*2 stored as " "'str'"): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) def test_invalid_reference_tag_error(self): msa = TabularMSA([], metadata={'RN': [OrderedDict([('RL', 'Flagstaff'), ('foo', 'bar')])]}) with self.assertRaisesRegex(StockholmFormatError, r"Invalid reference.*foo' found " "in.*1.*Valid reference tags are:"): fh = io.StringIO() _tabular_msa_to_stockholm(msa, fh) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/io/format/tests/test_taxdump.py000066400000000000000000000175011464262511300230270ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest from io import StringIO import pandas as pd import numpy as np from skbio.util import get_data_path, assert_data_frame_almost_equal from skbio.util._testing import _data_frame_to_default_int_type from skbio.io.format.taxdump import _taxdump_to_data_frame class TestTaxdumpReader(unittest.TestCase): def test_nodes_default(self): # subset of a real NCBI taxonomy nodes.dmp file fp = get_data_path('taxdump_nodes.dmp') obs = _taxdump_to_data_frame(fp, scheme='nodes') exp = pd.DataFrame([ [1, 1, 'no rank', np.nan, 8, False, 1, False, 0, False, False, False, np.nan], [2, 131567, 'superkingdom', np.nan, 0, False, 11, False, 0, False, False, False, np.nan], [543, 91347, 'family', np.nan, 0, True, 11, True, 0, True, False, False, np.nan], [548, 570, 'species', 'KA', 0, True, 11, True, 0, True, True, False, np.nan], [561, 543, 'genus', np.nan, 0, True, 11, True, 0, True, False, False, np.nan], [562, 561, 'species', 'EC', 0, True, 11, True, 0, True, True, False, np.nan], [570, 543, 'genus', np.nan, 0, True, 11, True, 0, True, False, False, np.nan], [620, 543, 'genus', np.nan, 0, True, 11, True, 0, True, False, False, np.nan], [622, 620, 'species', 'SD', 0, True, 11, True, 0, True, True, False, np.nan], [766, 28211, 'order', np.nan, 0, True, 11, True, 0, True, False, False, np.nan], [1224, 2, 'phylum', np.nan, 0, True, 11, True, 0, True, False, False, np.nan], [1236, 1224, 'class', np.nan, 0, True, 11, True, 0, True, False, False, np.nan], [28211, 1224, 'class', np.nan, 0, True, 11, True, 0, True, False, False, np.nan], [91347, 1236, 'order', np.nan, 0, True, 11, True, 0, True, False, False, np.nan], [118884, 1236, 'no rank', np.nan, 0, True, 11, True, 0, True, True, False, np.nan], [126792, 36549, 'species', 'PP', 0, True, 11, True, 0, True, True, False, np.nan], [131567, 1, 'no rank', np.nan, 8, True, 1, True, 0, True, True, False, np.nan], [585056, 562, 'no rank', np.nan, 0, True, 11, True, 0, True, True, False, np.nan], [1038927, 562, 'no rank', np.nan, 0, True, 11, True, 0, True, True, False, np.nan], [2580236, 488338, 'species', 'SE', 7, True, 11, True, 0, True, False, False, np.nan]], columns=[ 'tax_id', 'parent_tax_id', 'rank', 'embl_code', 'division_id', 'inherited_div_flag', 'genetic_code_id', 'inherited_GC_flag', 'mitochondrial_genetic_code_id', 'inherited_MGC_flag', 'GenBank_hidden_flag', 'hidden_subtree_root_flag', 'comments']).set_index('tax_id') exp['comments'] = exp['comments'].astype('O') _data_frame_to_default_int_type(exp) assert_data_frame_almost_equal(obs, exp) def test_names_default(self): # subset of a real NCBI taxonomy names.dmp file fp = get_data_path('taxdump_names.dmp') obs = _taxdump_to_data_frame(fp, scheme='names') exp = pd.DataFrame([ [1, 'root', np.nan, 'scientific name'], [2, 'Bacteria', 'Bacteria ', 'scientific name'], [2, 'eubacteria', np.nan, 'genbank common name'], [543, 'Enterobacteriaceae', np.nan, 'scientific name'], [548, 'Klebsiella aerogenes', np.nan, 'scientific name'], [561, 'Escherichia', np.nan, 'scientific name'], [562, '"Bacillus coli" Migula 1895', np.nan, 'authority'], [562, 'Escherichia coli', np.nan, 'scientific name'], [562, 'Escherichia/Shigella coli', np.nan, 'equivalent name'], [570, 'Donovania', np.nan, 'synonym'], [570, 'Klebsiella', np.nan, 'scientific name'], [620, 'Shigella', np.nan, 'scientific name'], [622, 'Shigella dysenteriae', np.nan, 'scientific name'], [766, 'Rickettsiales', np.nan, 'scientific name'], [1224, 'Proteobacteria', np.nan, 'scientific name'], [1236, 'Gammaproteobacteria', np.nan, 'scientific name'], [28211, 'Alphaproteobacteria', np.nan, 'scientific name'], [91347, 'Enterobacterales', np.nan, 'scientific name'], [118884, 'unclassified Gammaproteobacteria', np.nan, 'scientific name'], [126792, 'Plasmid pPY113', np.nan, 'scientific name'], [131567, 'cellular organisms', np.nan, 'scientific name'], [585056, 'Escherichia coli UMN026', np.nan, 'scientific name'], [1038927, 'Escherichia coli O104:H4', np.nan, 'scientific name'], [2580236, 'synthetic Escherichia coli Syn61', np.nan, 'scientific name']], columns=['tax_id', 'name_txt', 'unique_name', 'name_class']).set_index('tax_id') assert_data_frame_almost_equal(obs, exp) def test_nodes_slim(self): fp = get_data_path('taxdump_nodes.dmp') obs = _taxdump_to_data_frame(fp, scheme='nodes_slim') exp = pd.DataFrame([ [1, 1, 'no rank'], [2, 131567, 'superkingdom'], [543, 91347, 'family'], [548, 570, 'species'], [561, 543, 'genus'], [562, 561, 'species'], [570, 543, 'genus'], [620, 543, 'genus'], [622, 620, 'species'], [766, 28211, 'order'], [1224, 2, 'phylum'], [1236, 1224, 'class'], [28211, 1224, 'class'], [91347, 1236, 'order'], [118884, 1236, 'no rank'], [126792, 36549, 'species'], [131567, 1, 'no rank'], [585056, 562, 'no rank'], [1038927, 562, 'no rank'], [2580236, 488338, 'species']], columns=['tax_id', 'parent_tax_id', 'rank']).set_index('tax_id') _data_frame_to_default_int_type(exp) assert_data_frame_almost_equal(obs, exp) def test_custom_scheme(self): fs = StringIO('\n'.join(map('\t|\t'.join, [ ('a', 'a'), ('b', 'a'), ('c', 'a') ]))) obs = _taxdump_to_data_frame(fs, scheme={'self': str, 'parent': str}) exp = pd.DataFrame([ ['a', 'a'], ['b', 'a'], ['c', 'a']], columns=['self', 'parent']).set_index('self') assert_data_frame_almost_equal(obs, exp) def test_invalid_scheme(self): fp = get_data_path('taxdump_names.dmp') with self.assertRaises(ValueError) as ctx: _taxdump_to_data_frame(fp, scheme='hello') self.assertEqual(str(ctx.exception), 'Invalid taxdump column scheme: "hello".') def test_invalid_id(self): fs = StringIO('\n'.join(map('\t|\t'.join, [ ('1', '2', 'family'), ('3', '4', 'genus'), ('x', '6', 'species'), # 'x' is not a number ]))) with self.assertRaises(ValueError) as ctx: _taxdump_to_data_frame(fs, scheme='nodes_slim') self.assertEqual(str(ctx.exception), 'Invalid taxdump file format.') if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/io/registry.py000066400000000000000000001240271464262511300175260ustar00rootroot00000000000000r"""I/O Registry (:mod:`skbio.io.registry`) ======================================= .. currentmodule:: skbio.io.registry Classes ------- .. autosummary:: :toctree: IORegistry Format Functions --------- .. autosummary:: :toctree: create_format Exceptions ---------- .. autosummary:: DuplicateRegistrationError InvalidRegistrationError Creating a new format for scikit-bio ------------------------------------ scikit-bio makes it simple to add new file formats to its I/O registry. scikit-bio maintains a singleton of the :class:`IORegistry` class called `io_registry`. This is where all scikit-bio file formats are registered. One could also instantiate their own :class:`IORegistry`, but that is not the focus of this tutorial. The first step to creating a new format is to add a submodule in `skbio/io/format/` named after the file format you are implementing. For example, if the format you are implementing is called `myformat` then you would create a file called `skbio/io/format/myformat.py`. The next step is to import the :func:`create_format` factory from :mod:`skbio.io`. This will allow you to create a new :class:`Format` object that `io_registry` will know about. Ideally you should name the result of :func:`create_format` as your file name. For example: .. code-block:: python from skbio.io import create_format myformat = create_format('myformat') The `myformat` object is what we will use to register our new functionality. At this point you should evaulate whether your format is binary or text. If your format is binary, your :func:`create_format` call should look like this: .. code-block:: python myformat = create_format('myformat', encoding='binary') Alternatively if your format is text and has a specific encoding or newline handling you can also specify that: .. code-block:: python myformat = create_format('myformat', encoding='ascii', newline='\n') This will ensure that our registry will open files with a default encoding of `'ascii'` for `'myformat'` and expect all newlines to be `'\n'` characters. Having worked out these details, we are ready to register the actual functionality of our format (e.g., sniffer, readers, and writers). To create a sniffer simply decorate the following onto your sniffer function: .. code-block:: python @myformat.sniffer() def _myformat_sniffer(fh): # do something with `fh` to determine the membership of the file For futher details on sniffer functions see :func:`Format.sniffer`. Creating a reader is very similar, but has one difference: .. code-block:: python @myformat.reader(SomeSkbioClass) def _myformat_to_some_skbio_class(fh, kwarg1='default', extra=FileSentinel): # parse `fh` and return a SomeSkbioClass instance here # `extra` will also be an open filehandle if provided else None Here we bound a function to a specific class. We also demonstrated using our FileSentinel object to indicate to the registry that this reader can take auxilary files that should be handled in the same way as the primary file. For futher details on reader functions see :func:`Format.reader`. Creating a writer is about the same: .. code-block:: python @myformat.writer(SomeSkbioClass) def _some_skbio_class_to_myformat(obj, fh, kwarg1='whatever', extra=FileSentinel): # write the contents of `obj` into `fh` and whatever else into `extra` # do not return anything, it will be ignored This is exactly the same as the `reader` above just in reverse, we also receive the object we are writing as the first parameter instead of the file (which is the second one). For further details on writer functions see :func:`Format.writer`. .. note:: When raising errors in readers and writers, the error should be a subclass of ``FileFormatError`` specific to your new format. Once you are satisfied with the functionality, you will need to ensure that `skbio/io/__init__.py` contains an import of your new submodule so the decorators are executed. Add the function ``import_module('skbio.io.format.myformat')`` with your module name to the existing list. .. note:: Because scikit-bio handles all of the I/O boilerplate, you only need to unit-test the actual business logic of your `readers`, `writers`, and `sniffers`. Reserved Keyword Arguments -------------------------- The following keyword args may not be used when defining new `readers` or `writers` as they already have special meaning to the registry system: - `format` - `into` - `verify` - `mode` - `encoding` - `errors` - `newline` - `compression` - `compresslevel` The following are not yet used but should be avoided as well: - `auth` - `user` - `password` - `buffering` - `buffer_size` - `closefd` - `exclusive` - `append` """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from warnings import warn import types import traceback import itertools import inspect from functools import wraps from ._exception import DuplicateRegistrationError, InvalidRegistrationError from . import ( UnrecognizedFormatError, ArgumentOverrideWarning, FormatIdentificationWarning, ) from .util import _resolve_file, open_file, open_files, _d as _open_kwargs from skbio.util._misc import make_sentinel, find_sentinels from skbio.util._decorator import classonlymethod FileSentinel = make_sentinel("FileSentinel") class IORegistry: """Create a registry of formats and implementations which map to classes.""" def __init__(self): """Initialize registry mapping formats and implementations to classes. This seperation of binary and text formats is useful because there are many situations where we may have recieved a text-file. When this happens, the binary data fundamentally does not exist. We could assume encoding should be interpreted in reverse, however this misses the bigger point: why would the user ever want text to be treated as binary? They already went through the effort to hand us text. Therefore, during format resolution, we should skip the binary formats if they are irrelevant. (They are incompatible with such a filehandle anyways.) """ self._binary_formats = {} self._text_formats = {} self._lookups = (self._binary_formats, self._text_formats) def create_format(self, *args, **kwargs): """Create new file formats. A simple factory for creating new file formats. This will automatically register the format with this regsistry. All arguments are passed through to the Format constructor. Returns ------- Format A new format that is registered with the registry. """ format = Format(*args, **kwargs) self.add_format(format) return format def add_format(self, format_object): """Add a format to the registry. Parameters ---------- format_object : Format The format to add to the registry. """ # See comment in the constructor for an explanation for why this split # occurs. name = format_object.name if name in self._binary_formats or name in self._text_formats: raise DuplicateRegistrationError( "A format already exists with" " that name: %s" % name ) if format_object.is_binary_format: self._binary_formats[name] = format_object else: self._text_formats[name] = format_object def get_sniffer(self, format_name): """Locate the sniffer for a format. Parameters ---------- format_name : str The name of the format to lookup. Returns ------- function or None The sniffer associated with `format_name` """ for lookup in self._lookups: if format_name in lookup: return lookup[format_name].sniffer_function return None def get_reader(self, format_name, cls): """Locate the reader for a format and class. Parameters ---------- format_name : str The name of the format to lookup. cls : type or None The class which the reader will return an instance of. If `cls` is None, the reader will return a generator. Default is None. Returns ------- function or None The reader associated with `format_name` and `cls` """ return self._get_rw(format_name, cls, "readers") def get_writer(self, format_name, cls): """Locate the writer for a format and class. Parameters ---------- format_name : str The name of the format to lookup. cls : type or None The class which the writer will expect an instance of. If `cls` is None, the writer will expect a generator. Default is None. Returns ------- function or None The writer associated with `format_name` and `cls` """ return self._get_rw(format_name, cls, "writers") def _get_rw(self, format_name, cls, lookup_name): for lookup in self._lookups: if format_name in lookup: format_lookup = getattr(lookup[format_name], lookup_name) if cls in format_lookup: return format_lookup[cls] return None def list_read_formats(self, cls): """Return a list of available read formats for a given `cls` type. Parameters ---------- cls : type The class which will be used to determine what read formats exist for an instance of `cls`. Returns ------- list A list of available read formats for an instance of `cls`. List may be empty. """ return list(self._iter_rw_formats(cls, "readers")) def list_write_formats(self, cls): """Return a list of available write formats for a given `cls` type. Parameters ---------- cls : type The class which will be used to determine what write formats exist for an instance of `cls`. Returns ------- list A list of available write formats for an instance of `cls`. List may be empty. """ return list(self._iter_rw_formats(cls, "writers")) def _iter_rw_formats(self, cls, lookup_name): for lookup in self._lookups: for format in lookup.values(): if cls in getattr(format, lookup_name): yield format.name def sniff(self, file, **kwargs): r"""Detect the format of a given file and suggest kwargs for reading. Parameters ---------- file : openable (filepath, URL, filehandle, etc.) The file to sniff. Something that is understood by :func:`skbio.io.open`. kwargs : dict, optional Keyword arguments will be passed to :func:`skbio.io.open`. ``newline`` cannot be provided. Returns ------- (str, dict) The name of the format of the file and any suggested kwargs for use with the corresponding reader. Raises ------ UnrecognizedFormatError This occurs when the format is not 'claimed' by any registered sniffer or when the format is ambiguous and has been 'claimed' by more than one sniffer. TypeError If ``newline`` is provided in ``kwargs``. """ if "newline" in kwargs: raise TypeError("Cannot provide `newline` keyword argument when sniffing.") # By resolving the input here, we have the oppurtunity to reuse the # file (which is potentially ephemeral). Each sniffer will also resolve # the file, but that call will short-circuit and won't claim # responsibility for closing the file. This means that the file # should only close after leaving this context. This is also the reason # that we have to use SaneTextIOWrapper because each sniffer will # wrap the file to produce an appropriate default encoding for their # format (if unspecified). This results in the SaneTextIOWrapper being # garbage collected (using io.TextIOBase results in close being called # on our buffer by the deconstructor which we wanted to share with the # next sniffer) with _resolve_file(file, mode="r", **kwargs) as (fh, _, is_binary_file): # tell may fail noisily if the user provided a TextIOBase or # BufferedReader which has already been iterated over (via next()). matches = [] backup = fh.tell() if is_binary_file and kwargs.get("encoding", "binary") == "binary": matches = self._find_matches(fh, self._binary_formats, **kwargs) if kwargs.get("encoding", None) != "binary": # We can always turn a binary file into a text file, but the # reverse doesn't make sense. matches += self._find_matches(fh, self._text_formats, **kwargs) fh.seek(backup) elif not is_binary_file: raise ValueError("Cannot decode text source (%r) as binary." % file) # else we are a binary_file and our encoding did not exclude binary # so we have already handled that condition if len(matches) > 1: raise UnrecognizedFormatError( "File format for %r is ambiguous," " may be one of: %r" % (file, [m for m, s in matches]) ) elif len(matches) == 0: raise UnrecognizedFormatError("Could not detect the format of %r" % file) return matches[0] def _find_matches(self, file, lookup, **kwargs): matches = [] for format in lookup.values(): if format.sniffer_function is not None: is_format, skwargs = format.sniffer_function(file, **kwargs) file.seek(0) if is_format: matches.append((format.name, skwargs)) return matches def read(self, file, format=None, into=None, verify=True, **kwargs): r"""Read a file as certain format into an object. Parameters ---------- file : openable (filepath, URL, filehandle, etc.) The file to read. Something that is understood by :func:`skbio.io.open`. format : str, optional The format of the file if known. If None, the format will be inferred from the file. into : type or None, optional The object which will be returned. If None, a generator will be returned. verify : bool, optional When True, will double check the ``format`` if provided. kwargs : dict, optional Keyword arguments will be passed to their respective handlers (:func:`skbio.io.open` and the reader for ``format``). ``newline`` cannot be provided. Returns ------- object or generator An instance of ``into`` if ``into`` is not None else generator Raises ------ ValueError Raised when ``format`` and ``into`` are both None. TypeError If ``newline`` is provided in ``kwargs``. UnrecognizedFormatError Raised when a reader could not be found for a given ``format`` or the format could not be guessed. FormatIdentificationWarning Raised when ``verify`` is True and the sniffer of a ``format`` did not agree that ``file`` is a member of ``format`` ArgumentOverrideWarning Raised when ``verify`` is True and a user-supplied argument is overriding the suggestion provided by the sniffer of ``format``. """ if "newline" in kwargs: raise TypeError("Cannot provide `newline` keyword argument when reading.") # Context managers do not compose well with generators. We have to # duplicate the logic so that the file will stay open while yielding. # Otherwise the context exits as soon as the generator is returned # (making any iteration fail as the file is closed from its # perspective). if into is None: if format is None: raise ValueError("`into` and `format` cannot both be None") gen = self._read_gen(file, format, into, verify, kwargs) # This is done so that any errors occur immediately instead of # on the first call from __iter__ # eta-reduction is possible, but we want to the type to be # GeneratorType try: return (x for x in itertools.chain([next(gen)], gen)) except StopIteration: # If the error was a StopIteration, then we want to return an # empty generator as `next(gen)` failed. # See #1313 for more info. return (x for x in []) else: return self._read_ret(file, format, into, verify, kwargs) def _read_ret(self, file, fmt, into, verify, kwargs): io_kwargs = self._find_io_kwargs(kwargs) with _resolve_file(file, **io_kwargs) as (file, _, _): reader, kwargs = self._init_reader( file, fmt, into, verify, kwargs, io_kwargs ) return reader(file, **kwargs) def _read_gen(self, file, fmt, into, verify, kwargs): io_kwargs = self._find_io_kwargs(kwargs) # We needed to get the io_kwargs from kwargs for things like # _resolve_file and for verifying a format. # kwargs should still retain the contents of io_kwargs because the # actual reader will also need them. with _resolve_file(file, **io_kwargs) as (file, _, _): reader, kwargs = self._init_reader( file, fmt, into, verify, kwargs, io_kwargs ) yield from reader(file, **kwargs) def _find_io_kwargs(self, kwargs): return {k: kwargs[k] for k in _open_kwargs if k in kwargs} def _init_reader(self, file, fmt, into, verify, kwargs, io_kwargs): skwargs = {} if fmt is None: fmt, skwargs = self.sniff(file, **io_kwargs) elif verify: sniffer = self.get_sniffer(fmt) if sniffer is not None: backup = file.tell() is_format, skwargs = sniffer(file, **io_kwargs) file.seek(backup) if not is_format: warn( "%r does not look like a %s file" % (file, fmt), FormatIdentificationWarning, ) for key in skwargs: if key not in kwargs: kwargs[key] = skwargs[key] elif kwargs[key] != skwargs[key]: warn( "Best guess was: %s=%r, continuing with user" " supplied: %r" % (key, skwargs[key], kwargs[key]), ArgumentOverrideWarning, ) reader = self.get_reader(fmt, into) if reader is None: possible_intos = [r.__name__ for r in self._get_possible_readers(fmt)] message = "" if possible_intos: message = "Possible values for `into` include: %s" % ", ".join( possible_intos ) into_message = "`into` also not provided." if not into else "" raise UnrecognizedFormatError( "Cannot read %r from %r, no %s reader found. %s %s" % ( fmt, file, into.__name__ if into else "generator", into_message, message, ) ) return reader, kwargs def _get_possible_readers(self, fmt): for lookup in self._lookups: if fmt in lookup: return list(lookup[fmt].readers) return [] def write(self, obj, format, into, **kwargs): r"""Write an object as certain format into a file. Parameters ---------- obj : object The object to write as ``format``. format : str The format to write ``obj`` as. into : openable (filepath, URL, filehandle, etc.) What to write ``obj`` to. Something that is understood by :func:`skbio.io.open`. kwargs : dict, optional Keyword arguments will be passed to their respective handlers (:func:`skbio.io.open` and the writer for ``format``). Returns ------- openable (filepath, URL, filehandle, etc.) Will pass back the user argument for ``into`` as a convenience. Raises ------ UnrecognizedFormatError Raised when a writer for writing ``obj`` as ``format`` could not be found. """ # The simplest functionality here. cls = None if not isinstance(obj, types.GeneratorType): cls = obj.__class__ writer = self.get_writer(format, cls) if writer is None: raise UnrecognizedFormatError( "Cannot write %r into %r, no %s writer found." % (format, into, obj.__class__.__name__) ) writer(obj, into, **kwargs) return into def monkey_patch(self): r"""Monkey-patch ``read`` and ``write`` methods onto registered classes. Will modify classes which have been registered to a reader or writer to have ``read`` and ``write`` methods which will contain documentation specifying useable formats for that class. The actual functionality will be a pass-through to :func:`skbio.io.read` and :func:`skbio.io.write` respectively. """ reads = set() writes = set() for lookup in self._lookups: for format in lookup.values(): reads |= format.monkey_patched_readers writes |= format.monkey_patched_writers for cls in reads: self._apply_read(cls) for cls in writes: self._apply_write(cls) def _apply_read(registry, cls): """Add read method if any formats have a reader for `cls`.""" read_formats = registry.list_read_formats(cls) @classonlymethod def read(cls, file, format=None, **kwargs): return registry.read(file, into=cls, format=format, **kwargs) imports = registry._import_paths(read_formats) doc_list = registry._formats_for_docs(read_formats, imports) read.__func__.__doc__ = _read_docstring % { "name": cls.__name__, "list": doc_list, "see": "\n".join(imports), } cls.read = read def _apply_write(registry, cls): """Add write method if any formats have a writer for `cls`.""" write_formats = registry.list_write_formats(cls) if not hasattr(cls, "default_write_format"): raise NotImplementedError( "Classes with registered writers must provide a " "`default_write_format`. Please add `default_write_format`" " to '%s'." % cls.__name__ ) def write(self, file, format=cls.default_write_format, **kwargs): return registry.write(self, into=file, format=format, **kwargs) imports = registry._import_paths(write_formats) doc_list = registry._formats_for_docs(write_formats, imports) write.__doc__ = _write_docstring % { "name": cls.__name__, "list": doc_list, "see": "\n".join(imports), "default": cls.default_write_format, } cls.write = write def _import_paths(self, formats): lines = [] for fmt in formats: lines.append("skbio.io.format." + fmt) return lines def _formats_for_docs(self, formats, imports): lines = [] for fmt, imp in zip(formats, imports): lines.append("- ``'%s'`` (:mod:`%s`)" % (fmt, imp)) return "\n".join(lines) _read_docstring = """Create a new ``%(name)s`` instance from a file. This is a convenience method for :func:`skbio.io.registry.read`. For more information about the I/O system in scikit-bio, please see :mod:`skbio.io`. Supported file formats include: %(list)s Parameters ---------- file : openable (filepath, URL, filehandle, etc.) The location to read the given `format`. Something that is understood by :func:`skbio.io.util.open`. Filehandles are not automatically closed, it is the responsibility of the caller. format : str, optional The format must be a format name with a reader for ``%(name)s``. If a `format` is not provided or is None, it will attempt to guess the format. kwargs : dict, optional Keyword arguments passed to :func:`skbio.io.registry.read` and the file format reader for ``%(name)s``. Returns ------- %(name)s A new instance. See Also -------- write skbio.io.registry.read skbio.io.util.open %(see)s """ _write_docstring = """Write an instance of ``%(name)s`` to a file. This is a convenience method for :func:`skbio.io.registry.write`. For more information about the I/O system in scikit-bio, please see :mod:`skbio.io`. Supported file formats include: %(list)s Parameters ---------- file : openable (filepath, URL, filehandle, etc.) The location to write the given `format` into. Something that is understood by :func:`skbio.io.util.open`. Filehandles are not automatically closed, it is the responsibility of the caller. format : str The format must be a registered format name with a writer for ``%(name)s``. Default is `'%(default)s'`. kwargs : dict, optional Keyword arguments passed to :func:`skbio.io.registry.write` and the file format writer. See Also -------- read skbio.io.registry.write skbio.io.util.open %(see)s """ class Format: """Defines a format on which readers/writers/sniffer can be registered. Parameters ---------- name : str The name of this format. encoding : str, optional What the default encoding of this format is. If set to 'binary' then all registered handlers will receive an :class:`io.BufferedReader` or :class:`io.BufferedWriter` instead of an :class:`io.TextIOBase`. The user will also be unable to override the encoding in that case. newline : str, optional What the default newline handling of this format is. Default is to use universal newline handling. """ @property def name(self): """The name of this format.""" return self._name @property def is_binary_format(self): """Return True if this is a binary format.""" return self._encoding == "binary" @property def sniffer_function(self): """The sniffer function associated with this format.""" return self._sniffer_function @property def readers(self): """Dictionary that maps classes to their readers for this format.""" return self._readers @property def writers(self): """Dictionary that maps classes to their writers for this format.""" return self._writers @property def monkey_patched_readers(self): """Set of classes bound to readers to monkey patch.""" return self._monkey_patch["read"] @property def monkey_patched_writers(self): """Set of classes bound to writers to monkey patch.""" return self._monkey_patch["write"] def __init__(self, name, encoding=None, newline=None): """Initialize format for registering sniffers, readers, and writers.""" self._encoding = encoding self._newline = newline self._name = name self._sniffer_function = None self._readers = {} self._writers = {} self._monkey_patch = {"read": set(), "write": set()} def sniffer(self, override=False): r"""Decorate a function to act as the sniffer for this format. The function should take one argument which will be an implementation of either :class:`io.TextIOBase` or :class:`io.BufferedReader` depending on if the format is text or binary, respectively. The sniffer will always receive a filehandle which is pointing to the beginning of the file. It must return a tuple of bool and a dict of suggested keyword arguments (if any) to pass to the reader. .. note:: Keyword arguments are not permitted in `sniffers`. `Sniffers` may not raise exceptions; if an exception is thrown by a `sniffer`, the user will be asked to report it on our `issue tracker `_. Parameters ---------- override : bool, optional If True, the existing sniffer will be overriden. Raises ------ DuplicateRegistrationError When `override` is False and a sniffer is already registered for this format. Examples -------- >>> from skbio.io.registry import Format >>> # If developing a new format for skbio, use the create_format() >>> # factory instead of this constructor. >>> myformat = Format('myformat') >>> @myformat.sniffer() ... def myformat_sniffer(fh): ... check = fh.read(8) == "myformat" ... if check: ... version = int(fh.read(1)) ... return True, {'version': version} ... return False, {} ... >>> myformat_sniffer(["myformat2\n", "some content\n"]) (True, {'version': 2}) >>> myformat_sniffer(["something else\n"]) (False, {}) """ if isinstance(override, bool) is not True: raise InvalidRegistrationError( "`override` must be a bool not %r" % override ) if not override and self._sniffer_function is not None: raise DuplicateRegistrationError( "A sniffer is already registered" " to format: %s" % self._name ) def decorator(sniffer): @wraps(sniffer) def wrapped_sniffer( file, encoding=self._encoding, errors="ignore", newline=self._newline, **kwargs, ): self._validate_encoding(encoding) if encoding == "binary": # Errors is irrelevant so set to default to prevent raising # a usage exception in open. errors = _open_kwargs["errors"] with open_file( file, mode="r", encoding=encoding, newline=newline, errors=errors, **kwargs, ) as fh: try: # Some formats may have headers which indicate their # format sniffers should be able to rely on the # filehandle to point at the beginning of the file. fh.seek(0) return sniffer(fh) except UnicodeDecodeError: pass except Exception: warn( f"'{sniffer.__name__}' has encountered a " "problem.\nPlease send the following to our " "issue tracker at\nhttps://github.com/scikit-" "bio/scikit-bio/issues\n\n" f"{traceback.format_exc()}", FormatIdentificationWarning, ) return False, {} self._sniffer_function = wrapped_sniffer return wrapped_sniffer return decorator def reader(self, cls, monkey_patch=True, override=False): r"""Decorate a function to act as the reader for a class in this format. The function should take an argument which will be an implementation of either :class:`io.TextIOBase` or :class:`io.BufferedReader` depending on if the format is text or binary, respectively. Any kwargs given by the user which are not handled by :func:`skbio.io.util.open` will be passed into the function. Any kwarg with a default of `FileSentinel` will transform user input for that parameter into a filehandle or `None` if not provided. Parameters ---------- cls : type or None The class which the function will be registered to handle. If None, it is assumed that the function will produce a generator. monkey_patch : bool, optional Whether to allow an IORegistry to attach a `read` method to `cls` with this format listed as an option. override : bool, optional If True, any existing readers for `cls` in this format will be overriden. Raises ------ DuplicateRegistrationError When `override` is False and a reader is already registered to `cls` for this format. Examples -------- >>> from skbio.io.registry import Format, IORegistry >>> registry = IORegistry() >>> myformat = Format('myformat') >>> registry.add_format(myformat) >>> # If developing a new format for skbio, use the create_format() >>> # factory instead of the above. >>> class MyObject: ... def __init__(self, content): ... self.content = content ... >>> @myformat.reader(MyObject) ... def myformat_reader(fh): ... return MyObject(fh.readlines()[1:]) ... >>> registry.monkey_patch() # If developing skbio, this isn't needed >>> MyObject.read(["myformat2\n", "some content here!\n"], ... format='myformat').content ['some content here!\n'] """ self._check_registration(cls) def decorator(reader_function): file_params = find_sentinels(reader_function, FileSentinel) # This split has to occur for the same reason as in IORegistry.read if cls is not None: @wraps(reader_function) def wrapped_reader( file, encoding=self._encoding, newline=self._newline, **kwargs ): file_keys, files, io_kwargs = self._setup_locals( file_params, file, encoding, newline, kwargs ) with open_files(files, mode="r", **io_kwargs) as fhs: # The primary file is at the end of fh because append # is cheaper than insert kwargs.update(zip(file_keys, fhs[:-1])) return reader_function(fhs[-1], **kwargs) else: @wraps(reader_function) def wrapped_reader( file, encoding=self._encoding, newline=self._newline, **kwargs ): file_keys, files, io_kwargs = self._setup_locals( file_params, file, encoding, newline, kwargs ) with open_files(files, mode="r", **io_kwargs) as fhs: kwargs.update(zip(file_keys, fhs[:-1])) yield from reader_function(fhs[-1], **kwargs) self._add_reader(cls, wrapped_reader, monkey_patch, override) return wrapped_reader return decorator def writer(self, cls, monkey_patch=True, override=False): r"""Decorate a function to act as the writer for a class in this format. The function should take an instance of `cls` as its first argument and the second argument is a filehandle which will be an implementation of either :class:`io.TextIOBase` or :class:`io.BufferedWriter` depending on if the format is text or binary, respectively. Any kwargs given by the user which are not handled by :func:`skbio.io.util.open` will be passed into the function. Any kwarg with a default of `FileSentinel` will transform user input for that parameter into a filehandle or `None` if not provided. Parameters ---------- cls : type or None The class which the function will be registered to handle. If None, it is assumed that the function will consume a generator. monkey_patch : bool, optional Whether to allow an IORegistry to attach a `write` method to `cls` with this format listed as an option. override : bool, optional If True, any existing writers for `cls` in this format will be overriden. Raises ------ DuplicateRegistrationError When `override` is False and a writer is already registered to `cls` for this format. Examples -------- >>> from skbio.io.registry import Format, IORegistry >>> registry = IORegistry() >>> myformat = Format('myformat') >>> registry.add_format(myformat) >>> # If developing a new format for skbio, use the create_format() >>> # factory instead of the above. >>> class MyObject: ... default_write_format = 'myformat' ... def __init__(self, content): ... self.content = content ... >>> @myformat.writer(MyObject) ... def myformat_reader(obj, fh): ... fh.write("myformat2\n") ... for c in obj.content: ... fh.write(c) ... >>> registry.monkey_patch() # If developing skbio, this isn't needed >>> obj = MyObject(["some content here!\n"]) >>> obj.write([], format='myformat') ['myformat2\n', 'some content here!\n'] """ self._check_registration(cls) def decorator(writer_function): file_params = find_sentinels(writer_function, FileSentinel) @wraps(writer_function) def wrapped_writer( obj, file, encoding=self._encoding, newline=self._newline, **kwargs ): file_keys, files, io_kwargs = self._setup_locals( file_params, file, encoding, newline, kwargs ) with open_files(files, mode="w", **io_kwargs) as fhs: kwargs.update(zip(file_keys, fhs[:-1])) writer_function(obj, fhs[-1], **kwargs) self._add_writer(cls, wrapped_writer, monkey_patch, override) return wrapped_writer return decorator def _check_registration(self, cls): if cls is not None and not inspect.isclass(cls): raise InvalidRegistrationError( "`cls` must be a class or None, not" " %r" % cls ) def _setup_locals(self, file_params, file, encoding, newline, kwargs): self._validate_encoding(encoding) io_kwargs = self._pop_io_kwargs(kwargs, encoding, newline) file_keys, files = self._setup_file_args(kwargs, file_params) files.append(file) return file_keys, files, io_kwargs def _validate_encoding(self, encoding): if encoding != self._encoding: if self._encoding == "binary": raise ValueError("Encoding must be 'binary' for %r" % self.name) if encoding == "binary": raise ValueError("Encoding must not be 'binary' for %r" % self.name) def _pop_io_kwargs(self, kwargs, encoding, newline): io_kwargs = dict(encoding=encoding, newline=newline) for key in _open_kwargs: if key in kwargs: io_kwargs[key] = kwargs.pop(key) return io_kwargs def _setup_file_args(self, kwargs, file_params): file_keys = [] files = [] for param in file_params: arg = kwargs.get(param, None) if arg is not None: file_keys.append(param) files.append(arg) else: # set to None to mask FileSentinel when user neglected argument kwargs[param] = None return file_keys, files def _add_writer(self, cls, writer, monkey_patch, override): if cls in self._writers and not override: raise DuplicateRegistrationError( "There is already a writer" " registered to %s in format: %s" % (cls, self._name) ) self._writers[cls] = writer if monkey_patch and cls is not None: self._monkey_patch["write"].add(cls) def _add_reader(self, cls, reader, monkey_patch, override): if cls in self._readers and not override: raise DuplicateRegistrationError( "There is already a reader" " registered to %s in format: %s" % (cls, self._name) ) self._readers[cls] = reader if monkey_patch and cls is not None: self._monkey_patch["read"].add(cls) io_registry = IORegistry() @wraps(IORegistry.sniff) def sniff(file, **kwargs): """Detect the format of a given `file` and suggest kwargs.""" return io_registry.sniff(file, **kwargs) @wraps(IORegistry.read) def read(file, format=None, into=None, verify=True, **kwargs): """Read data from file using specified format.""" return io_registry.read(file, format=format, into=into, verify=verify, **kwargs) @wraps(IORegistry.write) def write(obj, format, into, **kwargs): """Write data to file using specified format.""" return io_registry.write(obj, format, into, **kwargs) @wraps(IORegistry.create_format) def create_format(*args, **kwargs): """Make a new format.""" return io_registry.create_format(*args, **kwargs) scikit-bio-0.6.2/skbio/io/tests/000077500000000000000000000000001464262511300164405ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/io/tests/__init__.py000066400000000000000000000005411464262511300205510ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- scikit-bio-0.6.2/skbio/io/tests/data/000077500000000000000000000000001464262511300173515ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/io/tests/data/big5_file000066400000000000000000000000051464262511300211140ustar00rootroot00000000000000An scikit-bio-0.6.2/skbio/io/tests/data/big5_file.bz2000066400000000000000000000000561464262511300216160ustar00rootroot00000000000000BZh91AY&SY*E  0H \EWscikit-bio-0.6.2/skbio/io/tests/data/big5_file.gz000066400000000000000000000000431464262511300215350ustar00rootroot00000000000000[Ubig5_file[, &scikit-bio-0.6.2/skbio/io/tests/data/example_file000066400000000000000000000000651464262511300217270ustar00rootroot00000000000000This is some content It occurs on more than one line scikit-bio-0.6.2/skbio/io/tests/data/example_file.bz2000066400000000000000000000001241464262511300224170ustar00rootroot00000000000000BZh91AY&SYlW@ *g TPL S2S&Ԏ*#$j66 O)/໒)„~`scikit-bio-0.6.2/skbio/io/tests/data/example_file.gz000066400000000000000000000001231464262511300223410ustar00rootroot00000000000000wTUexample_file ,VTԼ.Ңb\n', 'ACGT\n'] exp = io_registry.read(input, into=DNA) result = read(input, into=DNA) self.assertEqual(exp, result) self.assertEqual(exp, DNA('ACGT', metadata={'id': '', 'description': ''})) def test_write_matches(self): input = DNA('ACGT') exp = io_registry.write(input, format='fasta', into=[]) result = write(input, format='fasta', into=[]) self.assertEqual(exp, result) self.assertEqual(exp, ['>\n', 'ACGT\n']) def test_create_format_matches(self): with self.assertRaises(DuplicateRegistrationError): io_registry.create_format('fasta') with self.assertRaises(DuplicateRegistrationError): create_format('fasta') if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/io/tests/test_util.py000066400000000000000000000564701464262511300210420ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest import tempfile import shutil import io import os.path import gc try: import responses except ImportError: has_responses = False else: has_responses = True import skbio.io from skbio.io.registry import open_file from skbio.util import get_data_path class TestOpen(unittest.TestCase): def test_open_invalid_mode(self): with self.assertRaises(ValueError): skbio.io.open([], mode='a') def test_open_invalid_source(self): with self.assertRaises(skbio.io.IOSourceError): skbio.io.open(42) def test_open_invalid_source_compression(self): with self.assertRaises(ValueError): skbio.io.open(['foo'], compression='gzip') def test_open_invalid_source_encoding(self): with self.assertRaises(ValueError): skbio.io.open(['foo'], encoding='binary') with self.assertRaises(ValueError): skbio.io.open(['foo'], encoding='binary', newline='\r') def test_open_invalid_compression(self): with self.assertRaises(ValueError): skbio.io.open(io.BytesIO(), compression='foo') class ReadableBinarySourceTests: def check_closed(self, file, expected): if hasattr(file, 'closed'): self.assertEqual(file.closed, expected) def check_open_state_contents(self, file, contents, is_binary, **kwargs): result = skbio.io.open(file, **kwargs) self.assertTrue(result.readable()) if is_binary: self.assertIsInstance(result, (io.BufferedReader, io.BufferedRandom)) actual_contents = result.read().decode().replace("\r\n", "\n").encode() else: self.assertIsInstance(result, io.TextIOBase) actual_contents = result.read().replace("\r\n", "\n") self.assertEqual(actual_contents, contents) self.assertFalse(result.closed) result.close() self.assertTrue(result.closed) self.check_closed(file, True) def check_open_file_state_contents(self, file, contents, is_binary, **kwargs): with open_file(file, **kwargs) as f: self.assertTrue(f.readable()) if is_binary: self.assertIsInstance(f, (io.BufferedReader, io.BufferedRandom)) actual_contents = f.read().decode().replace("\r\n", "\n").encode() else: self.assertIsInstance(f, io.TextIOBase) actual_contents = f.read().replace("\r\n", "\n") self.assertEqual(actual_contents, contents) self.assertEqual(f.closed, self.expected_close) f.close() self.assertTrue(f.closed) self.check_closed(file, True) def check_open_buffer_close_behaviour(self, file, **kwargs): if hasattr(file, 'close'): wrapped = skbio.io.open(file, **kwargs) file.close() self.assertTrue(wrapped.closed) def check_open_file_buffer_close_behaviour(self, file, **kwargs): if hasattr(file, 'close'): with open_file(file, **kwargs) as wrapped: file.close() self.assertTrue(wrapped.closed) def check_open_gc_behaviour(self, file, **kwargs): def mangle(file): result = skbio.io.open(file, **kwargs) self.assertIsInstance(result, io.TextIOBase) f = skbio.io.open(file, encoding='binary') mangle(f) self.assertFalse(f.closed) f.close() def check_open_file_gc_behaviour(self, file, **kwargs): def mangle(file): with open_file(file, **kwargs) as result: self.assertIsInstance(result, io.TextIOBase) with open_file(file, encoding='binary') as f: mangle(f) self.assertFalse(f.closed) def test_open_gc_binary(self): self.check_open_gc_behaviour(self.read_file) def test_open_gc_encoding(self): self.check_open_gc_behaviour(self.encoded_file) def test_open_gc_compression(self): self.check_open_gc_behaviour(self.gzip_file) self.check_open_gc_behaviour(self.bz2_file) def test_open_gc_compression_encoding(self): self.check_open_gc_behaviour(self.gzip_encoded_file) self.check_open_gc_behaviour(self.bz2_encoded_file) def test_open_file_gc_binary(self): self.check_open_file_gc_behaviour(self.read_file) def test_open_file_gc_encoding(self): self.check_open_file_gc_behaviour(self.encoded_file) def test_open_file_gc_compression(self): self.check_open_file_gc_behaviour(self.gzip_file) self.check_open_file_gc_behaviour(self.bz2_file) def test_open_file_gc_compression_encoding(self): self.check_open_file_gc_behaviour(self.gzip_encoded_file) self.check_open_file_gc_behaviour(self.bz2_encoded_file) def test_open_underclose_binary(self): self.check_open_buffer_close_behaviour(self.read_file) def test_open_underclose_encoding(self): self.check_open_buffer_close_behaviour(self.encoded_file) def test_open_underclose_compression(self): self.check_open_buffer_close_behaviour(self.gzip_file) self.check_open_buffer_close_behaviour(self.bz2_file) def test_open_underclose_compression_encoding(self): self.check_open_buffer_close_behaviour(self.gzip_encoded_file) self.check_open_buffer_close_behaviour(self.bz2_encoded_file) def test_open_file_underclose_binary(self): self.check_open_file_buffer_close_behaviour(self.read_file) def test_open_file_underclose_encoding(self): self.check_open_file_buffer_close_behaviour(self.encoded_file) def test_open_file_underclose_compression(self): self.check_open_file_buffer_close_behaviour(self.gzip_file) self.check_open_file_buffer_close_behaviour(self.bz2_file) def test_open_file_underclose_compression_encoding(self): self.check_open_file_buffer_close_behaviour(self.gzip_encoded_file) self.check_open_file_buffer_close_behaviour(self.bz2_encoded_file) def test_open_binary(self): self.check_open_state_contents(self.read_file, self.binary_contents, True, mode='r', encoding='binary') def test_open_binary_compression_none(self): self.check_open_state_contents(self.read_file, self.binary_contents, True, mode='r', encoding='binary', compression=None) def test_open_encoding(self): self.check_open_state_contents(self.encoded_file, self.decoded_contents, False, mode='r', encoding=self.encoding) def test_open_auto_compression_binary(self): self.check_open_state_contents(self.gzip_file, self.binary_contents, True, mode='r', encoding='binary', compression='auto') self.check_open_state_contents(self.bz2_file, self.binary_contents, True, mode='r', encoding='binary', compression='auto') def test_open_gzip_compression_binary(self): self.check_open_state_contents(self.gzip_file, self.binary_contents, True, mode='r', encoding='binary', compression='gzip') def test_open_bz2_compression_binary(self): self.check_open_state_contents(self.bz2_file, self.binary_contents, True, mode='r', encoding='binary', compression='bz2') def test_open_default_compression_encoding(self): self.check_open_state_contents(self.gzip_encoded_file, self.decoded_contents, False, mode='r', encoding=self.encoding) self.check_open_state_contents(self.bz2_encoded_file, self.decoded_contents, False, mode='r', encoding=self.encoding) def test_open_file_binary(self): self.check_open_file_state_contents(self.read_file, self.binary_contents, True, mode='r', encoding='binary') def test_open_file_binary_compression_none(self): self.check_open_file_state_contents(self.read_file, self.binary_contents, True, mode='r', encoding='binary', compression=None) def test_open_file_encoding(self): self.check_open_file_state_contents(self.encoded_file, self.decoded_contents, False, mode='r', encoding=self.encoding) def test_open_file_auto_compression_binary(self): self.check_open_file_state_contents(self.gzip_file, self.binary_contents, True, mode='r', encoding='binary', compression='auto') self.check_open_file_state_contents(self.bz2_file, self.binary_contents, True, mode='r', encoding='binary', compression='auto') def test_open_file_gzip_compression_binary(self): self.check_open_file_state_contents(self.gzip_file, self.binary_contents, True, mode='r', encoding='binary', compression='gzip') def test_open_file_bz2_compression_binary(self): self.check_open_file_state_contents(self.bz2_file, self.binary_contents, True, mode='r', encoding='binary', compression='bz2') def test_open_file_default_compression_encoding(self): self.check_open_file_state_contents(self.gzip_encoded_file, self.decoded_contents, False, mode='r', encoding=self.encoding) self.check_open_file_state_contents(self.bz2_encoded_file, self.decoded_contents, False, mode='r', encoding=self.encoding) class ReadableSourceTest(unittest.TestCase): def setUp(self): self.read_file = self.get_fileobj(get_data_path("example_file")) self.gzip_file = \ self.get_fileobj(get_data_path("example_file.gz")) self.bz2_file = \ self.get_fileobj(get_data_path("example_file.bz2")) self.encoded_file = self.get_fileobj(get_data_path("big5_file")) self.gzip_encoded_file = \ self.get_fileobj(get_data_path("big5_file.gz")) self.bz2_encoded_file = \ self.get_fileobj(get_data_path("big5_file.bz2")) self.binary_contents = ( f"This is some content{os.linesep}" f"It occurs on more than one line{os.linesep}" ).replace("\r\n", "\n").encode() self.decoded_contents = '\u4f60\u597d\n' # Ni Hau self.compression = 'gzip' self.encoding = "big5" def tearDown(self): self.safe_close(self.read_file) self.safe_close(self.gzip_file) self.safe_close(self.bz2_file) self.safe_close(self.encoded_file) self.safe_close(self.gzip_encoded_file) self.safe_close(self.bz2_encoded_file) def safe_close(self, f): if hasattr(f, 'close'): f.close() class WritableBinarySourceTests: def check_closed(self, file, expected): if hasattr(file, 'closed'): self.assertEqual(file.closed, expected) def check_open_state_contents(self, file, contents, is_binary, **kwargs): if is_binary: result = skbio.io.open(file, mode='w', **kwargs) self.assertIsInstance(result, (io.BufferedWriter, io.BufferedRandom)) else: result = skbio.io.open(file, mode='w', **kwargs, newline='') self.assertIsInstance(result, io.TextIOBase) self.assertTrue(result.writable()) result.write(contents) self.assertFalse(result.closed) if self.expected_close: result.close() self.assertTrue(result.closed) self.check_closed(file, True) gc.collect() del result gc.collect() def compare_gzip_file_contents(self, a, b): # The first 10 bytes of a gzip header include a timestamp. The header # can be followed by other "volatile" metadata, so only compare gzip # footers (last 8 bytes) which contain a CRC-32 checksum and the length # of the uncompressed data. self.assertEqual(a[-8:], b[-8:]) def test_open_binary(self): self.check_open_state_contents(self.binary_file, self.binary_contents, True, encoding='binary', compression=None) self.assertEqual(self.get_contents(self.binary_file), self.binary_contents) def test_open_gzip(self): self.check_open_state_contents(self.gzip_file, self.text_contents, False, compression='gzip') self.compare_gzip_file_contents(self.get_contents(self.gzip_file), self.gzip_contents) def test_open_bz2(self): self.check_open_state_contents(self.bz2_file, self.text_contents, False, compression='bz2') self.assertEqual(self.get_contents(self.bz2_file), self.bz2_contents) def test_open_encoding(self): self.check_open_state_contents(self.big5_file, self.decoded_contents, False, encoding='big5') self.assertEqual( self.get_contents(self.big5_file) .decode(encoding="big5") .replace("\r\n", "\n") .encode(encoding="big5"), self.encoded_contents) def test_open_gzip_encoding(self): self.check_open_state_contents(self.gzip_encoded_file, self.decoded_contents, False, compression='gzip', encoding='big5') self.compare_gzip_file_contents( self.get_contents(self.gzip_encoded_file), self.gzip_encoded_contents) def test_open_bz2_encoding(self): self.check_open_state_contents(self.bz2_encoded_file, self.decoded_contents, False, compression='bz2', encoding='big5') self.assertEqual(self.get_contents(self.bz2_encoded_file), self.bz2_encoded_contents) class WritableSourceTest(unittest.TestCase): def setUp(self): self._dir = tempfile.mkdtemp() with io.open(get_data_path('example_file'), mode='rb') as f: self.binary_contents = f.read().decode('utf8').replace("\r\n", "\n", -1).encode('utf8') self.binary_file = self._make_file('example_file') with io.open(get_data_path('big5_file'), mode='rb') as f: self.encoded_contents = f.read().decode('big5').replace("\r\n", "\n", -1).encode('big5') self.big5_file = self._make_file('big5_file') with io.open(get_data_path('example_file.gz'), mode='rb') as f: self.gzip_contents = f.read() self.gzip_file = self._make_file('example_file.gz') with io.open(get_data_path('example_file.bz2'), mode='rb') as f: self.bz2_contents = f.read() self.bz2_file = self._make_file('example_file.bz2') with io.open(get_data_path('big5_file.gz'), mode='rb') as f: self.gzip_encoded_contents = f.read() self.gzip_encoded_file = self._make_file('big5_file.gz') with io.open(get_data_path('big5_file.bz2'), mode='rb') as f: self.bz2_encoded_contents = f.read() self.bz2_encoded_file = self._make_file('big5_file.bz2') self.decoded_contents = self.encoded_contents.decode('big5') self.text_contents = self.binary_contents.decode('utf8') def tearDown(self): self.safe_close(self.binary_file) self.safe_close(self.gzip_file) self.safe_close(self.bz2_file) self.safe_close(self.big5_file) self.safe_close(self.gzip_encoded_file) self.safe_close(self.bz2_encoded_file) shutil.rmtree(self._dir) def safe_close(self, f): if hasattr(f, 'close'): f.close() def _make_file(self, name): return self.get_fileobj(os.path.join(self._dir, name)) class TestReadFilepath(ReadableBinarySourceTests, ReadableSourceTest): expected_close = True def get_fileobj(self, path): return path class TestWriteFilepath(WritableBinarySourceTests, WritableSourceTest): expected_close = True def get_fileobj(self, path): return path def get_contents(self, file): with io.open(file, mode='rb') as f: return f.read() @unittest.skipIf(not has_responses, "Responses not available to mock tests.") class TestReadURL(ReadableBinarySourceTests, ReadableSourceTest): expected_close = True def setUp(self): super(TestReadURL, self).setUp() responses.start() for file in (get_data_path('example_file'), get_data_path('big5_file'), get_data_path('example_file.gz'), get_data_path('example_file.bz2'), get_data_path('big5_file.gz'), get_data_path('big5_file.bz2')): with io.open(file, mode='rb') as f: responses.add(responses.GET, self.get_fileobj(file), body=f.read(), content_type="application/octet-stream") def tearDown(self): super(TestReadURL, self).setUp() responses.stop() responses.reset() def get_fileobj(self, path): return "http://example.com/" + os.path.split(path)[1] class TestReadBytesIO(ReadableBinarySourceTests, ReadableSourceTest): expected_close = False def get_fileobj(self, path): with io.open(path, mode='rb') as f: return io.BytesIO(f.read()) class TestWriteBytesIO(WritableBinarySourceTests, WritableSourceTest): expected_close = False def get_fileobj(self, path): return io.BytesIO() def get_contents(self, file): return file.getvalue() def test_open_gzip(self): self.check_open_state_contents(self.gzip_file, self.text_contents, False, compression='gzip') self.compare_gzip_file_contents(self.get_contents(self.gzip_file), self.gzip_contents) def test_open_gzip_encoding(self): self.check_open_state_contents(self.gzip_encoded_file, self.decoded_contents, False, compression='gzip', encoding='big5') self.compare_gzip_file_contents( self.get_contents(self.gzip_encoded_file), self.gzip_encoded_contents) class TestReadBufferedReader(ReadableBinarySourceTests, ReadableSourceTest): expected_close = False def get_fileobj(self, path): return io.open(path, mode='rb') class TestWriteBufferedReader(WritableBinarySourceTests, WritableSourceTest): expected_close = False def get_fileobj(self, path): return io.open(path, mode='w+b') def get_contents(self, file): file.close() with io.open(file.name, mode='rb') as f: return f.read() class TestReadNamedTemporaryFile(ReadableBinarySourceTests, ReadableSourceTest): expected_close = False def get_fileobj(self, path): fileobj = tempfile.NamedTemporaryFile() with io.open(path, mode='rb') as fh: fileobj.write(fh.read()) fileobj.flush() fileobj.seek(0) return fileobj class TestWriteNamedTemporaryFile(WritableBinarySourceTests, WritableSourceTest): expected_close = False def get_fileobj(self, path): return tempfile.NamedTemporaryFile() def get_contents(self, file): file.flush() file.seek(0) contents = file.read() file.close() return contents class TestReadTemporaryFile(ReadableBinarySourceTests, ReadableSourceTest): expected_close = False def get_fileobj(self, path): fileobj = tempfile.TemporaryFile() with io.open(path, mode='rb') as fh: fileobj.write(fh.read()) fileobj.flush() fileobj.seek(0) return fileobj class TestWriteTemporaryFile(WritableBinarySourceTests, WritableSourceTest): expected_close = False def get_fileobj(self, path): return tempfile.TemporaryFile() def get_contents(self, file): file.flush() file.seek(0) contents = file.read() file.close() return contents class TestIterableReaderWriter(unittest.TestCase): def test_open(self): def gen(): yield from ('a', 'b', 'c') list_ = list(gen()) for input_ in gen(), list_: with skbio.io.open(input_) as result: self.assertIsInstance(result, io.TextIOBase) self.assertEqual(result.read(), 'abc') def test_open_with_newline(self): lines = ['a\r', 'b\r', 'c\r'] with skbio.io.open(lines, newline='\r') as result: self.assertIsInstance(result, io.TextIOBase) self.assertEqual(result.readlines(), lines) def test_open_invalid_iterable(self): with self.assertRaises(skbio.io.IOSourceError): skbio.io.open([1, 2, 3]) def test_open_empty_iterable(self): with skbio.io.open([]) as result: self.assertIsInstance(result, io.TextIOBase) self.assertEqual(result.read(), '') def test_open_write_mode(self): lines = [] with skbio.io.open(lines, mode='w') as fh: fh.write('abc') self.assertEqual(lines, ['abc']) lines = [] with skbio.io.open(lines, mode='w', newline='\r') as fh: fh.write('ab\nc\n') self.assertEqual(lines, ['ab\r', 'c\r']) self.assertTrue(fh.closed) fh.close() self.assertTrue(fh.closed) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/io/util.py000066400000000000000000000255031464262511300166320ustar00rootroot00000000000000r"""I/O utilities (:mod:`skbio.io.util`) ==================================== .. currentmodule:: skbio.io.util This module provides utility functions to deal with files and I/O in general. Functions --------- .. autosummary:: :toctree: open open_file open_files """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io from contextlib import contextmanager, ExitStack from skbio.io import IOSourceError from skbio.io._iosources import get_io_sources, get_compression_handler from skbio.io._fileobject import ( is_binary_file, SaneTextIOWrapper, CompressedBufferedReader, CompressedBufferedWriter, ) _d = dict( mode="r", encoding=None, errors=None, newline=None, compression="auto", compresslevel=9, ) def _resolve( file, mode=_d["mode"], encoding=_d["encoding"], errors=_d["errors"], newline=_d["newline"], compression=_d["compression"], compresslevel=_d["compresslevel"], ): arguments = locals().copy() if mode not in {"r", "w"}: raise ValueError("Unsupported mode: %r, use 'r' or 'w'" % mode) newfile = None source = None for source_handler in get_io_sources(): source = source_handler(file, arguments) if mode == "r" and source.can_read(): newfile = source.get_reader() break elif mode == "w" and source.can_write(): newfile = source.get_writer() break if newfile is None: raise IOSourceError("Could not open source: %r (mode: %r)" % (file, mode)) return newfile, source, is_binary_file(newfile) def open( file, mode=_d["mode"], encoding=_d["encoding"], errors=_d["errors"], newline=_d["newline"], compression=_d["compression"], compresslevel=_d["compresslevel"], ): r"""Convert input into a filehandle. Supported inputs: +--------------------------------------+--------+---------+-----------+ | type | can \ | can \ | source \ | | | read | write | type | +======================================+========+=========+===========+ | file path | True | True | Binary | +--------------------------------------+--------+---------+-----------+ | URL | True | False | Binary | +--------------------------------------+--------+---------+-----------+ | ``["lines list\n"]`` | True | True | Text | +--------------------------------------+--------+---------+-----------+ | :class:`io.StringIO` | True | True | Text | +--------------------------------------+--------+---------+-----------+ | :class:`io.BytesIO` | True | True | Binary | +--------------------------------------+--------+---------+-----------+ | :class:`io.TextIOWrapper` | True | True | Text | +--------------------------------------+--------+---------+-----------+ | :class:`io.BufferedReader` | True | False | Binary | +--------------------------------------+--------+---------+-----------+ | :class:`io.BufferedWriter` | False | True | Binary | +--------------------------------------+--------+---------+-----------+ | :class:`io.BufferedRandom` | True | True | Binary | +--------------------------------------+--------+---------+-----------+ | :func:`tempfile.TemporaryFile` | True | True | Binary | +--------------------------------------+--------+---------+-----------+ | :func:`tempfile.NamedTemporaryFile` | True | True | Binary | +--------------------------------------+--------+---------+-----------+ .. note:: When reading a list of unicode (str) lines, the input for `newline` is used to determine the number of lines in the resulting file handle, not the number of elements in the list. This is to allow composition with ``file.readlines()``. Parameters ---------- file : filepath, url, filehandle, list The input to convert to a filehandle. mode : {'r', 'w'}, optional Whether to return a readable or writable file. Conversely, this does not imply that the returned file will be unwritable or unreadable. To get a binary filehandle set `encoding` to binary. encoding : str, optional The encoding scheme to use for the file. If set to 'binary', no bytes will be translated. Otherwise this matches the behavior of :func:`io.open`. errors : str, optional Specifies how encoding and decoding errors are to be handled. This has no effect when `encoding` is binary (as there can be no errors). Otherwise this matches the behavior of :func:`io.open`. newline : {None, "", '\\n', '\\r\\n', '\\r'}, optional Matches the behavior of :func:`io.open`. compression : {'auto', 'gzip', 'bz2', None}, optional Will compress or decompress `file` depending on `mode`. If 'auto' then determining the compression of the file will be attempted and the result will be transparently decompressed. 'auto' will do nothing when writing. Other legal values will use their respective compression schemes. `compression` cannot be used with a text source. compresslevel : int (0-9 inclusive), optional The level of compression to use, will be passed to the appropriate compression handler. This is only used when writing. Returns ------- filehandle : io.TextIOBase or io.BufferedReader/Writer When `encoding='binary'` an :class:`io.BufferedReader` or :class:`io.BufferedWriter` will be returned depending on `mode`. Otherwise an implementation of :class:`io.TextIOBase` will be returned. .. note:: Any underlying resources needed to create `filehandle` are managed transparently. If `file` was closeable, garbage collection of `filehandle` will not close `file`. Calling `close` on `filehandle` will close `file`. Conversely calling `close` on `file` will cause `filehandle` to reflect a closed state. **This does not mean that a `flush` has occured for `filehandle`, there may still have been data in its buffer! Additionally, resources may not have been cleaned up properly, so ALWAYS call `close` on `filehandle` and NOT on `file`.** """ arguments = locals().copy() del arguments["file"] file, _, is_binary_file = _resolve(file, **arguments) return _munge_file(file, is_binary_file, arguments) def _munge_file(file, is_binary_file, arguments): mode = arguments.get("mode", _d["mode"]) encoding = arguments.get("encoding", _d["encoding"]) errors = arguments.get("errors", _d["errors"]) newline = arguments.get("newline", _d["newline"]) compression = arguments.get("compression", _d["compression"]) is_output_binary = encoding == "binary" newfile = file compression_handler = get_compression_handler(compression) if is_output_binary and newline is not _d["newline"]: raise ValueError("Cannot use `newline` with binary encoding.") if compression is not None and not compression_handler: raise ValueError("Unsupported compression: %r" % compression) if is_binary_file: if compression: c = compression_handler(newfile, arguments) if mode == "w": newfile = CompressedBufferedWriter( file, c.get_writer(), streamable=c.streamable ) else: newfile = CompressedBufferedReader(file, c.get_reader()) if not is_output_binary: newfile = SaneTextIOWrapper( newfile, encoding=encoding, errors=errors, newline=newline ) else: if compression is not None and compression != "auto": raise ValueError("Cannot use compression with that source.") if is_output_binary: raise ValueError("Source is not a binary source") return newfile @contextmanager def _resolve_file(file, **kwargs): file, source, is_binary_file = _resolve(file, **kwargs) try: yield file, source, is_binary_file finally: if source.closeable: file.close() @contextmanager def open_file(file, **kwargs): r"""Context manager for :func:`skbio.io.util.open`. The signature matches :func:`open`. This context manager will not close filehandles that it did not create itself. Examples -------- Here our input isn't a filehandle and so `f` will get closed. >>> with open_file(['a\n']) as f: ... f.read() ... 'a\n' >>> f.closed True Here we provide an open file and so `f` will not get closed and neither will `file`. >>> file = io.BytesIO(b'BZh91AY&SY\x03\x89\x0c\xa6\x00\x00\x01\xc1\x00\x00' ... b'\x108\x00 \x00!\x9ah3M\x1c\xb7\x8b\xb9"\x9c(H\x01' ... b'\xc4\x86S\x00') >>> with open_file(file) as f: ... f.read() ... 'a\nb\nc\n' >>> f.closed False >>> file.closed False """ with _resolve_file(file, **kwargs) as (file, source, is_binary_file): newfile = _munge_file(file, is_binary_file, source.options) try: yield newfile finally: # As soon as we leave the above context manager file will be closed # It is important to realize that because we are closing an inner # buffer, the outer buffer will reflect that state, but it won't # get flushed as the inner buffer is oblivious to the outer # buffer's existence. if not newfile.closed: newfile.flush() _flush_compressor(newfile) def _flush_compressor(file): if isinstance(file, io.TextIOBase) and hasattr(file, "buffer"): file = file.buffer if isinstance(file, CompressedBufferedWriter) and not file.streamable: # Some formats like BZ2 compress the entire file, and so they will # only flush once they have been closed. These kinds of files do not # close their underlying buffer, but only testing can prove that... file.raw.close() @contextmanager def open_files(files, **kwargs): """Context manager for :func:`skbio.io.util.open`. A plural form of :func:`open_file`. """ with ExitStack() as stack: yield [stack.enter_context(open_file(f, **kwargs)) for f in files] scikit-bio-0.6.2/skbio/metadata/000077500000000000000000000000001464262511300164475ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/metadata/__init__.py000066400000000000000000000031231464262511300205570ustar00rootroot00000000000000r"""Metadata (:mod:`skbio.metadata`) ================================ .. currentmodule:: skbio.metadata This module provides functionality for storing and working with metadata -- the data that describes other data. While a typical data table (see :mod:`skbio.table`) stores the measurements of features in samples, metadata provides information about the samples or features themselves. Examples of metadata include experimental grouping, demographic and clinical properties of subjects, functional categories of genes and metabolites, etc. Sample metadata --------------- .. autosummary:: :toctree: generated/ SampleMetadata Metadata columns ---------------- .. autosummary:: :toctree: generated/ MetadataColumn NumericMetadataColumn CategoricalMetadataColumn Interval metadata ----------------- .. autosummary:: :toctree: generated/ Interval IntervalMetadata """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from ._metadata import ( SampleMetadata, MetadataColumn, NumericMetadataColumn, CategoricalMetadataColumn, ) from ._interval import Interval, IntervalMetadata __all__ = [ "SampleMetadata", "MetadataColumn", "NumericMetadataColumn", "CategoricalMetadataColumn", "Interval", "IntervalMetadata", ] scikit-bio-0.6.2/skbio/metadata/_enan.py000066400000000000000000000071231464262511300201040ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2016-2023, QIIME 2 development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- import struct def _float_to_int(number: float) -> int: # convert float to a native-endian 8-byte sequence (a double): # (alignment doesn't matter because this isn't a struct, and since we are # on the same hardware when we go to int, the endian-ness doesn't matter # either) bytes_ = struct.pack("=d", number) (integer,) = struct.unpack("=Q", bytes_) return integer def _int_to_float(number: int) -> float: bytes_ = struct.pack("=Q", number) (float_,) = struct.unpack("=d", bytes_) return float_ # 1954 is an homage to R's NA value which is a quiet NaN with a mantissa which # appears to represent 1954, birth-year of Ross Ihaka (source: Hadley Wickham) # http://www.markvanderloo.eu # /yaRb/2012/07/08/representation-of-numerical-nas-in-r-and-the-1954-enigma/ # It also happens to let us tolerate small negative values (such as -1 used # in pd.Categorical.codes) without trashing the entire NaN. # ('Q' from struct does fortunately catch this issue before it becomes a # larger problem) _R_OFFSET = 1954 _DEFAULT_NAN_INT = _float_to_int(float("nan")) # at this point, calling `bin(_DEFAULT_NAN_INT)` should produce a # 64-bit positive quiet nan: # 0 11111111111 1000000000000000000000000000000000000000000000000000 # https://www.csee.umbc.edu/courses/undergraduate/CMSC211/spring03 # /burt/tech_help/IEEE-754references.html # unless Python changes some implementation detail, which isn't a problem so # long as XOR is used instead of AND def make_nan_with_payload(payload: int, namespace: int = 255): """Construct a NaN with a namespace and payload. The payload must be in the range [-1953, 2141] The namespace must be in the range [0, 255] sign exp mantissa v v---------v v----------------------------------------------------------v +qNaN "header" (includes 1 bit of the mantissa) namespace payload v-------------v v-------v v------------v 0 11111111111 10000000 00000000 00000000 00000000 0000 0000 0000 0000 0000 The namespace + payload requires 20 bits of the mantissa, which will support both 32-bit floats and 64-bit doubles. The purpose is to allow enumerations to be identified and values preserved. Custom enumerations will have a namespace of 255 and will require user guidance. Other built-in schemes should organize themselves within an unsigned byte. The enumeration values are then stored in the payload which uses an offset of 1954 to distinguish between default +qNaN and an enumeration scheme. This also permits small negative values in the payload. """ # To be safe, we will XOR our payload (instead of AND) so that we can take # the difference later, even if the default NaN changes to include a # mantissa payload for some reason nan_int = _DEFAULT_NAN_INT ^ (namespace << 12) nan_int = nan_int ^ (_R_OFFSET + payload) return _int_to_float(nan_int) def get_payload_from_nan(nan: float): nan_int = _float_to_int(nan) namespaced_payload = nan_int ^ _DEFAULT_NAN_INT if namespaced_payload == 0: return (None, None) namespace = namespaced_payload >> 12 payload = namespaced_payload - (namespace << 12) return (payload - _R_OFFSET, namespace) scikit-bio-0.6.2/skbio/metadata/_intersection.pyx000066400000000000000000000443601464262511300220650ustar00rootroot00000000000000# ----------------------------------------------------------------------------- # This code is taken from bx-python project and added with a new function # `update` from line 195 to 211. The license for this code is included in # licenses/bx_python.txt. # ----------------------------------------------------------------------------- """ Data structure for performing intersect queries on a set of intervals which preserves all information about the intervals (unlike bitset projection methods). :Authors: James Taylor (james@jamestaylor.org), Ian Schenk (ian.schenck@gmail.com), Brent Pedersen (bpederse@gmail.com) """ # Historical note: # This module original contained an implementation based on sorted endpoints # and a binary search, using an idea from Scott Schwartz and Piotr Berman. # Later an interval tree implementation was implemented by Ian for Galaxy's # join tool (see `bx.intervals.operations.quicksect.py`). This was then # converted to Cython by Brent, who also added support for # upstream/downstream/neighbor queries. This was modified by James to # handle half-open intervals strictly, to maintain sort order, and to # implement the same interface as the original Intersecter. #cython: cdivision=True import operator cdef extern from "stdlib.h": int ceil(float f) float log(float f) int RAND_MAX int rand() int strlen(char *) int iabs(int) cdef inline int imax2(int a, int b): if b > a: return b return a cdef inline int imax3(int a, int b, int c): if b > a: if c > b: return c return b if a > c: return a return c cdef inline int imin3(int a, int b, int c): if b < a: if c < b: return c return b if a < c: return a return c cdef inline int imin2(int a, int b): if b < a: return b return a cdef float nlog = -1.0 / log(0.5) cdef class IntervalNode: """ A single node of an `IntervalTree`. NOTE: Unless you really know what you are doing, you probably should us `IntervalTree` rather than using this directly. """ cdef float priority cdef public object interval cdef public int start, end cdef int minend, maxend, minstart cdef IntervalNode cleft, cright, croot property left_node: def __get__(self): return self.cleft if self.cleft is not EmptyNode else None property right_node: def __get__(self): return self.cright if self.cright is not EmptyNode else None property root_node: def __get__(self): return self.croot if self.croot is not EmptyNode else None def __repr__(self): return "IntervalNode(%i, %i)" % (self.start, self.end) def __cinit__(IntervalNode self, int start, int end, object interval): # Python lacks the binomial distribution, so we convert a # uniform into a binomial because it naturally scales with # tree size. Also, python's uniform is perfect since the # upper limit is not inclusive, which gives us undefined here. self.priority = ceil(nlog * log(-1.0/(1.0 * rand()/(RAND_MAX + 1) - 1))) self.start = start self.end = end self.interval = interval self.maxend = end self.minstart = start self.minend = end self.cleft = EmptyNode self.cright = EmptyNode self.croot = EmptyNode cpdef IntervalNode insert(IntervalNode self, int start, int end, object interval): """ Insert a new IntervalNode into the tree of which this node is currently the root. The return value is the new root of the tree (which may or may not be this node!) """ cdef IntervalNode croot = self # If starts are the same, decide which to add interval to based on # end, thus maintaining sortedness relative to start/end cdef int decision_endpoint = start if start == self.start: decision_endpoint = end if decision_endpoint > self.start: # insert to cright tree if self.cright is not EmptyNode: self.cright = self.cright.insert( start, end, interval ) else: self.cright = IntervalNode( start, end, interval ) # rebalance tree if self.priority < self.cright.priority: croot = self.rotate_left() else: # insert to cleft tree if self.cleft is not EmptyNode: self.cleft = self.cleft.insert( start, end, interval) else: self.cleft = IntervalNode( start, end, interval) # rebalance tree if self.priority < self.cleft.priority: croot = self.rotate_right() croot.set_ends() self.cleft.croot = croot self.cright.croot = croot return croot cdef IntervalNode rotate_right(IntervalNode self): cdef IntervalNode croot = self.cleft self.cleft = self.cleft.cright croot.cright = self self.set_ends() return croot cdef IntervalNode rotate_left(IntervalNode self): cdef IntervalNode croot = self.cright self.cright = self.cright.cleft croot.cleft = self self.set_ends() return croot cdef inline void set_ends(IntervalNode self): if self.cright is not EmptyNode and self.cleft is not EmptyNode: self.maxend = imax3(self.end, self.cright.maxend, self.cleft.maxend) self.minend = imin3(self.end, self.cright.minend, self.cleft.minend) self.minstart = imin3(self.start, self.cright.minstart, self.cleft.minstart) elif self.cright is not EmptyNode: self.maxend = imax2(self.end, self.cright.maxend) self.minend = imin2(self.end, self.cright.minend) self.minstart = imin2(self.start, self.cright.minstart) elif self.cleft is not EmptyNode: self.maxend = imax2(self.end, self.cleft.maxend) self.minend = imin2(self.end, self.cleft.minend) self.minstart = imin2(self.start, self.cleft.minstart) def intersect( self, int start, int end, sort=True ): """ given a start and a end, return a list of features falling within that range """ cdef list results = [] self._intersect( start, end, results ) return results find = intersect cdef void _intersect( IntervalNode self, int start, int end, list results): cdef int send, qend # Left subtree if self.cleft is not EmptyNode and self.cleft.maxend >= start: self.cleft._intersect( start, end, results ) # This interval if start == end: qend = end else: qend = end - 1 if self.end == self.start: send = self.end else: send = self.end - 1 if ( send >= start ) and ( self.start <= qend ): results.append( self.interval ) # Right subtree if self.cright is not EmptyNode and self.start <= qend: self.cright._intersect( start, end, results ) cpdef void update(IntervalNode self, int start, int end, object old_feature, object new_feature): """ given a start and end, replace all objects that match the old_feature with new_feature. """ # Left subtree if self.cleft is not EmptyNode and self.cleft.maxend > start: self.cleft.update( start, end, old_feature, new_feature ) # This interval if ( self.end > start ) and ( self.start < end ): if self.interval == old_feature: self.interval = new_feature # Right subtree if self.cright is not EmptyNode and self.start < end: self.cright.update( start, end, old_feature, new_feature ) cdef void _seek_left(IntervalNode self, int position, list results, int n, int max_dist): # we know we can bail in these 2 cases. if self.maxend + max_dist < position: return if self.minstart > position: return # the ordering of these 3 blocks makes it so the results are # ordered nearest to farest from the query position if self.cright is not EmptyNode: self.cright._seek_left(position, results, n, max_dist) if -1 < position - self.end < max_dist: results.append(self.interval) # TODO: can these conditionals be more stringent? if self.cleft is not EmptyNode: self.cleft._seek_left(position, results, n, max_dist) cdef void _seek_right(IntervalNode self, int position, list results, int n, int max_dist): # we know we can bail in these 2 cases. if self.maxend < position: return if self.minstart - max_dist > position: return #print "SEEK_RIGHT:",self, self.cleft, self.maxend, self.minstart, position # the ordering of these 3 blocks makes it so the results are # ordered nearest to farest from the query position if self.cleft is not EmptyNode: self.cleft._seek_right(position, results, n, max_dist) if -1 < self.start - position < max_dist: results.append(self.interval) if self.cright is not EmptyNode: self.cright._seek_right(position, results, n, max_dist) cpdef left(self, position, int n=1, int max_dist=2500): """ find n features with a start > than `position` f: a Interval object (or anything with an `end` attribute) n: the number of features to return max_dist: the maximum distance to look before giving up. """ cdef list results = [] # use start - 1 becuase .left() assumes strictly left-of self._seek_left( position - 1, results, n, max_dist ) if len(results) == n: return results r = results r.sort(key=operator.attrgetter('end'), reverse=True) return r[:n] cpdef right(self, position, int n=1, int max_dist=2500): """ find n features with a end < than position f: a Interval object (or anything with a `start` attribute) n: the number of features to return max_dist: the maximum distance to look before giving up. """ cdef list results = [] # use end + 1 becuase .right() assumes strictly right-of self._seek_right(position + 1, results, n, max_dist) if len(results) == n: return results r = results r.sort(key=operator.attrgetter('start')) return r[:n] def traverse(self, func): self._traverse(func) cdef void _traverse(IntervalNode self, object func): if self.cleft is not EmptyNode: self.cleft._traverse(func) func(self) if self.cright is not EmptyNode: self.cright._traverse(func) cdef IntervalNode EmptyNode = IntervalNode( 0, 0, IntervalObj(0, 0)) ## ---- Wrappers that retain the old interface ------------------------------- cdef class IntervalObj: """ Basic feature, with required integer start and end properties. Also accepts optional strand as +1 or -1 (used for up/downstream queries), a name, and any arbitrary data is sent in on the info keyword argument >>> from skbio.metadata._intersection import IntervalObj >>> f1 = IntervalObj(23, 36) >>> f2 = IntervalObj(34, 48, value={'chr':12, 'anno':'transposon'}) >>> f2 # doctest: +SKIP IntervalObj(34, 48, value={'anno': 'transposon', 'chr': 12}) """ cdef public int start, end cdef public object value, chrom, strand def __init__(self, int start, int end, object value=None, object chrom=None, object strand=None ): assert start <= end, "start must be less than end" self.start = start self.end = end self.value = value self.chrom = chrom self.strand = strand def __repr__(self): fstr = "IntervalObj(%d, %d" % (self.start, self.end) if not self.value is None: fstr += ", value=" + str(self.value) fstr += ")" return fstr def __richcmp__(self, other, op): if op == 0: # < return self.start < other.start or self.end < other.end elif op == 1: # <= return self == other or self < other elif op == 2: # == return self.start == other.start and self.end == other.end elif op == 3: # != return self.start != other.start or self.end != other.end elif op == 4: # > return self.start > other.start or self.end > other.end elif op == 5: # >= return self == other or self > other cdef class IntervalTree: """ Data structure for performing window intersect queries on a set of of possibly overlapping 1d intervals. Usage ===== Create an empty IntervalTree >>> from skbio.metadata._intersection import IntervalObj, IntervalTree >>> intersecter = IntervalTree() An interval is a start and end position and a value (possibly None). You can add any object as an interval: >>> intersecter.insert( 0, 10, "food" ) >>> intersecter.insert( 3, 7, dict(foo='bar') ) >>> intersecter.find( 2, 5 ) ['food', {'foo': 'bar'}] If the object has start and end attributes (like the Interval class) there is are some shortcuts: >>> intersecter = IntervalTree() >>> intersecter.insert_interval( IntervalObj( 0, 10 ) ) >>> intersecter.insert_interval( IntervalObj( 3, 7 ) ) >>> intersecter.insert_interval( IntervalObj( 3, 40 ) ) >>> intersecter.insert_interval( IntervalObj( 13, 50 ) ) >>> intersecter.find( 30, 50 ) [IntervalObj(3, 40), IntervalObj(13, 50)] >>> intersecter.find( 100, 200 ) [] Before/after for intervals >>> intersecter.before_interval( IntervalObj( 10, 20 ) ) [IntervalObj(3, 7)] >>> intersecter.before_interval( IntervalObj( 5, 20 ) ) [] Upstream/downstream >>> intersecter.upstream_of_interval(IntervalObj(11, 12)) [IntervalObj(0, 10)] >>> intersecter.upstream_of_interval(IntervalObj(11, 12, strand="-")) [IntervalObj(13, 50)] >>> intersecter.upstream_of_interval(IntervalObj(1, 2, strand="-"), num_intervals=3) [IntervalObj(3, 7), IntervalObj(3, 40), IntervalObj(13, 50)] """ cdef IntervalNode root def __cinit__( self ): root = None # ---- Position based interfaces ----------------------------------------- def insert( self, int start, int end, object value=None ): """ Insert the interval [start,end) associated with value `value`. """ if self.root is None: self.root = IntervalNode( start, end, value ) else: self.root = self.root.insert( start, end, value ) add = insert def update( self, start, end, old_feature, new_feature): """ Given an interval [start, end), replace all objects that match the `old_feature` with `new_feature`. """ if self.root is not None: self.root.update(start, end, old_feature, new_feature) def find( self, start, end ): """ Return a sorted list of all intervals overlapping [start,end). """ if self.root is None: return [] return self.root.find( start, end ) def before( self, position, num_intervals=1, max_dist=2500 ): """ Find `num_intervals` intervals that lie before `position` and are no further than `max_dist` positions away """ if self.root is None: return [] return self.root.left( position, num_intervals, max_dist ) def after( self, position, num_intervals=1, max_dist=2500 ): """ Find `num_intervals` intervals that lie after `position` and are no further than `max_dist` positions away """ if self.root is None: return [] return self.root.right( position, num_intervals, max_dist ) # ---- Interval-like object based interfaces ----------------------------- def insert_interval( self, interval ): """ Insert an "interval" like object (one with at least start and end attributes) """ self.insert( interval.start, interval.end, interval ) add_interval = insert_interval def before_interval( self, interval, num_intervals=1, max_dist=2500 ): """ Find `num_intervals` intervals that lie completely before `interval` and are no further than `max_dist` positions away """ if self.root is None: return [] return self.root.left( interval.start, num_intervals, max_dist ) def after_interval( self, interval, num_intervals=1, max_dist=2500 ): """ Find `num_intervals` intervals that lie completely after `interval` and are no further than `max_dist` positions away """ if self.root is None: return [] return self.root.right( interval.end, num_intervals, max_dist ) def upstream_of_interval( self, interval, num_intervals=1, max_dist=2500 ): """ Find `num_intervals` intervals that lie completely upstream of `interval` and are no further than `max_dist` positions away """ if self.root is None: return [] if interval.strand == -1 or interval.strand == "-": return self.root.right( interval.end, num_intervals, max_dist ) else: return self.root.left( interval.start, num_intervals, max_dist ) def downstream_of_interval( self, interval, num_intervals=1, max_dist=2500 ): """ Find `num_intervals` intervals that lie completely downstream of `interval` and are no further than `max_dist` positions away """ if self.root is None: return [] if interval.strand == -1 or interval.strand == "-": return self.root.left( interval.start, num_intervals, max_dist ) else: return self.root.right( interval.end, num_intervals, max_dist ) def traverse(self, fn): """ call fn for each element in the tree """ if self.root is None: return None return self.root.traverse(fn) # For backward compatibility Intersecter = IntervalTree scikit-bio-0.6.2/skbio/metadata/_interval.py000066400000000000000000001045611464262511300210130ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import operator import copy import functools from ._intersection import IntervalTree from skbio.util._decorator import classonlymethod class Interval: """Stores the bounds and metadata of an interval feature. This class stores an interval feature. An interval feature is defined as a sub-region of a biological sequence or sequence alignment that is a functional entity, e.g., a gene, a riboswitch, an exon, etc. It can span a single contiguous region or multiple non-contiguous regions (e.g. multiple exons in a transcript, or multiple genes in an operon). Parameters ---------- interval_metadata : object A reference to the ``IntervalMetadata`` object that this ``Interval`` object is associated to. bounds : iterable of tuple of int Tuples representing start and end coordinates. It is *zero-based* numbering. It is always inclusive on start bound and exclusive on end bound. fuzzy : iterable of tuple of bool, optional Tuples representing the fuzziness of each bound coordinates. If this isn't specified, then the fuzziness of all bound coordinates are ``False``. If any of the coordinate fuzziness is ``True``, it indicates that the exact bound point of a interval feature is unknown. The bound may begin or end at some points outside the specified coordinates. This accommodates the location format [1]_ of INSDC. metadata : dict, optional Dictionary of attributes storing information of the feature such as "strand", "gene_name", or "product". See Also -------- skbio.metadata.IntervalMetadata Notes ----- While the construction of an ``Interval`` object automatically add itself to its associated ``IntervalMetadata`` object, ``IntervalMetadata.add`` is the typical/easier way to create and add it to ``IntervalMetadata``. References ---------- .. [1] ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/FT_current.html#3.4.3 Examples -------- Hypothetically, let's say we have a gene called "genA" with 10 nt as shown in the following diagram. The second row represents the two exons (indicated by "=") on this gene: :: TGGATTCTGC -====--==- 0123456789 We can create an ``Interval`` object to represent the exons of the gene: >>> from skbio.metadata import Interval, IntervalMetadata >>> interval_metadata = IntervalMetadata(10) Remember the coordinates are inclusive in lower bound and exclusive on upper bound: >>> gene = Interval(interval_metadata, ... bounds=[(1, 5), (7, 9)], ... metadata={'name': 'genA'}) >>> gene # doctest: +ELLIPSIS Interval(interval_metadata=..., bounds=[(1, 5), (7, 9)], \ fuzzy=[(False, False), (False, False)], metadata={'name': 'genA'}) """ def __init__(self, interval_metadata, bounds, fuzzy=None, metadata=None): if not isinstance(interval_metadata, IntervalMetadata): raise TypeError( "You need to provide an IntervalMetadata" "object, not %r" % interval_metadata ) # Intervals self._interval_metadata = interval_metadata self._bounds_fuzzy_setter(bounds, fuzzy) # Metadata if metadata is None: metadata = {} self.metadata = metadata # add this interval feature to the associated IntervalMetadata self._add() def _add(self): """Add the current ``Interval`` to the IntervalMetadata object.""" for bound in self.bounds: start, end = bound self._interval_metadata._interval_tree.add(start, end, self) self._interval_metadata._intervals.append(self) def __eq__(self, other): """Test if this ``Interval`` object is equal to another. The equality is performed by checking if the ``metadata``, ``bounds`` and ``fuzzy`` are equal. Since the ``bounds`` and the ``fuzzy`` are sorted, the permutations of them during the ``Interval`` construction or assignment won't matter. Parameters ---------- other : Interval Interval to test for equality against. Returns ------- bool Indicates if the two objects are equal. """ return ( (self.metadata == other.metadata) and (self.bounds == other.bounds) and (self.fuzzy == other.fuzzy) ) def __ne__(self, other): """Test if this ``Interval`` object is not equal to another. Parameters ---------- other : Interval Interval to test for inequality against. Returns ------- bool Indicates if the two objects are not equal. """ return not (self == other) def __repr__(self): """Return a string representation of this ``Interval`` object. Returns ------- str String representation of this ``Interval`` object. """ if self.dropped: s = "{}(dropped=True, bounds={!r}, " "fuzzy={!r}, metadata={!r})" return s.format( self.__class__.__name__, self.bounds, self.fuzzy, self.metadata ) else: s = ( "{}(interval_metadata=<{!r}>, bounds={!r}, " "fuzzy={!r}, metadata={!r})" ) return s.format( self.__class__.__name__, id(self._interval_metadata), self.bounds, self.fuzzy, self.metadata, ) def drop(self): """Drop this ``Interval`` object from interval metadata it links to. If the ``Interval`` object is dropped, you can still get values of ``bounds``, ``fuzzy``, and ``metadata`` attributes, but you can not change their values with the setters. See Also -------- skbio.metadata.IntervalMetadata.drop """ if not self.dropped: self._interval_metadata.drop([self]) def _bounds_fuzzy_setter(self, bounds=None, fuzzy=None): if self.dropped: raise RuntimeError( "Cannot change `bounds` or `fuzzy` " "on a dropped Interval object." ) # Casts to `list`, validation, sorting, and setting of `bounds` # and `fuzzy` happen here. if bounds is not None: # check iterability try: # check iterability bounds = list(bounds) except TypeError: raise TypeError( "Cannot give an non-iterable (%r) " "to `bounds`." % bounds ) # check it is not empty if not bounds: raise ValueError("Cannot give empty `bounds`.") # check each contiguous span is in right format for bound in bounds: _assert_valid_bound(bound) spans = len(bounds) else: spans = len(self.bounds) if fuzzy is not None: try: fuzzy = list(fuzzy) except TypeError: raise TypeError( "Cannot give a non-iterable (%r) " "to `fuzzy`." % fuzzy ) if len(fuzzy) != spans: raise ValueError( "The length of fuzzy must " "be equal to the length of bounds." ) for fuzzy_i in fuzzy: _assert_valid_fuzzy(fuzzy_i) if bounds is None: # `bounds` and `fuzzy` cannot both be omitted. if fuzzy is None: raise ValueError("Cannot give `None` to both `bounds` " "and `fuzzy`.") # If only `fuzzy` is provided, set `self.fuzzy` and don't # change `self.bounds`. else: self._fuzzy = fuzzy else: # If only `bounds` is provided, reset `self.fuzzy` to # all `False`. if fuzzy is None: bounds.sort() self._check_bounds(bounds) self._bounds = bounds # reset all the fuzzy to False!! del self.fuzzy # If both `bounds` and `fuzzy` are provided, set # `self.bounds` and `self.fuzzy`. else: bounds, fuzzy = [list(e) for e in zip(*sorted(zip(bounds, fuzzy)))] self._check_bounds(bounds) self._bounds = bounds self._fuzzy = fuzzy self._interval_metadata._is_stale_tree = True def _check_bounds(self, bounds): """Input `bounds` must be sorted.""" upper_bound = self._interval_metadata.upper_bound lower_bound = self._interval_metadata.lower_bound if upper_bound is not None and bounds[-1][-1] > upper_bound: raise ValueError( "Cannot set `bounds` (%r) with coordinate " "larger than upper bound (%r)" % (bounds, upper_bound) ) if bounds[0][0] < lower_bound: raise ValueError( "Cannot set `bounds` (%r) with coordinate " "smaller than lower bound (%r)." % (bounds, lower_bound) ) @property def fuzzy(self): """The openness of each coordinate. This indicates that the exact bound of a interval feature is unknown. The bound may begin or end at some points outside the specified coordinates. This accommodates the bound format [1]_ of INSDC. References ---------- .. [1] ftp://ftp.ebi.ac.uk/pub/databases/embl/doc/FT_current.html#3.4.3 """ return self._fuzzy @fuzzy.setter def fuzzy(self, value): """Set ``fuzzy``. The ``value`` should an iterable matching ``self.bounds``. """ self._bounds_fuzzy_setter(fuzzy=value) @fuzzy.deleter def fuzzy(self): """Delete ``fuzzy``. This set all fuzzy to be ``False``. """ if self.dropped: raise RuntimeError("Cannot change fuzzy on dropped " "Interval object.") self._fuzzy = [(False, False)] * len(self.bounds) @property def bounds(self): """The coordinates of the interval feature. It should be a list of tuples of int pair. Each tuple stores the start and end coordinates of a span of the interval feature. The coordinates are *zero-based*. They are inclusive on the start and exclusive on the end. """ return self._bounds @bounds.setter def bounds(self, value): """Set ``bounds``. WARNING: setting ``bounds`` will reset ``fuzzy`` value to ``False``. This is not totally surprising because it is justifiable your old ``fuzzy`` don't fit the new bounds. """ self._bounds_fuzzy_setter(bounds=value) @property def metadata(self): """The metadata of the interval feature. It stores the metadata (eg. gene name, function, ID, etc.) of the interval feature as a ``dict``. """ return self._metadata @metadata.setter def metadata(self, value): if self.dropped: raise RuntimeError("Cannot change metadata on dropped " "Interval object.") if not isinstance(value, dict): raise TypeError("metadata must be a dict, not %r" % value) self._metadata = value @metadata.deleter def metadata(self): """Delete metadata. This sets metadata to be empty dict. """ if self.dropped: raise RuntimeError("Cannot change metadata to dropped " "Interval object.") self._metadata = {} @property def dropped(self): """Boolean value indicating if the ``Interval`` object is dropped. If it is dropped, it means it is not associated with IntervalMetadata object any more. Notes ----- This property is not writable. See Also -------- skbio.metadata.Interval.drop skbio.metadata.IntervalMetadata.drop """ return self._interval_metadata is None class IntervalMetadata: """Stores the interval features. ``IntervalMetadata`` object allows storage, modification, and querying of interval features covering a region of a single coordinate system. For instance, this can be used to store functional annotations about genes across a genome. This object is also applied to the sequence alignment. This object is typically coupled with another object, such as a ``Sequence`` object (or its child class), or a ``TabularMSA`` object. Parameters ---------- upper_bound : int or None Defines the exclusive upper bound of the interval features. No coordinate can be greater than it. ``None`` means that the coordinate space is unbounded. copy_from : IntervalMetadata or None, optional Create a new object from the input ``IntervalMetadata`` object by shallow copying if it is not ``None``. The upper bound of the new object will be updated with the ``upper_bound`` parameter specified. Notes ----- This class stores coordinates of all feature bounds into a interval tree. It allows the speed up of query-by-bound. The building of interval tree is deferred until necessary to save computation. It is updated from all coordinates only when you need to fetch info from the interval tree. When you add a method into this class and if you method need to fetch info from ``IntervalMetadata._interval_tree``, you should decorate it with ``_rebuild_tree``. This decorator will check if the current interval tree is stale and will update it if so. Additionally, if your method add, delete, or changes the coordinates of any interval features, you should set ``self._is_stale_tree`` to ``True`` at the end of your method to indicate the interval tree becomes stale. See Also -------- skbio.metadata.Interval Examples -------- Let's say we have a sequence of length 10 and want to add annotation to it. Create an ``IntervalMetadata`` object: >>> from skbio.metadata import Interval, IntervalMetadata >>> im = IntervalMetadata(10) Let's add annotations of 3 genes: >>> im.add(bounds=[(3, 9)], ... metadata={'gene': 'sagB'}) # doctest: +ELLIPSIS Interval(interval_metadata=..., bounds=[(3, 9)], \ fuzzy=[(False, False)], metadata={'gene': 'sagB'}) >>> im.add(bounds=[(3, 7)], ... metadata={'gene': 'sagC'}) # doctest: +ELLIPSIS Interval(interval_metadata=..., bounds=[(3, 7)], \ fuzzy=[(False, False)], metadata={'gene': 'sagC'}) >>> im.add(bounds=[(1, 2), (4, 7)], ... metadata={'gene': 'sagA'}) # doctest: +ELLIPSIS Interval(interval_metadata=..., bounds=[(1, 2), (4, 7)], \ fuzzy=[(False, False), (False, False)], metadata={'gene': 'sagA'}) Show the object representation: >>> im # doctest: +ELLIPSIS 3 interval features ------------------- Interval(interval_metadata=..., bounds=[(3, 9)], \ fuzzy=[(False, False)], metadata={'gene': 'sagB'}) Interval(interval_metadata=..., bounds=[(3, 7)], \ fuzzy=[(False, False)], metadata={'gene': 'sagC'}) Interval(interval_metadata=..., bounds=[(1, 2), (4, 7)], \ fuzzy=[(False, False), (False, False)], metadata={'gene': 'sagA'}) We can sort the genes by their bounds: >>> im.sort() >>> im # doctest: +ELLIPSIS 3 interval features ------------------- Interval(interval_metadata=..., bounds=[(1, 2), (4, 7)], \ fuzzy=[(False, False), (False, False)], metadata={'gene': 'sagA'}) Interval(interval_metadata=..., bounds=[(3, 7)], \ fuzzy=[(False, False)], metadata={'gene': 'sagC'}) Interval(interval_metadata=..., bounds=[(3, 9)], \ fuzzy=[(False, False)], metadata={'gene': 'sagB'}) Query the genes by bound and/or metadata: >>> intvls = im.query([(1, 2)], metadata={'gene': 'foo'}) >>> list(intvls) [] >>> intvls = im.query([(7, 9)]) >>> list(intvls) # doctest: +ELLIPSIS [Interval(interval_metadata=..., bounds=[(3, 9)], \ fuzzy=[(False, False)], metadata={'gene': 'sagB'})] >>> intvls = im.query(metadata={'gene': 'sagA'}) >>> intvls = list(intvls) >>> intvls # doctest: +ELLIPSIS [Interval(interval_metadata=..., bounds=[(1, 2), (4, 7)], \ fuzzy=[(False, False), (False, False)], metadata={'gene': 'sagA'})] Drop the gene(s) we get from query: >>> im.drop(intvls) >>> im.sort() >>> im # doctest: +ELLIPSIS 2 interval features ------------------- Interval(interval_metadata=..., bounds=[(3, 7)], \ fuzzy=[(False, False)], metadata={'gene': 'sagC'}) Interval(interval_metadata=..., bounds=[(3, 9)], \ fuzzy=[(False, False)], metadata={'gene': 'sagB'}) """ default_write_format = "gff3" def __init__(self, upper_bound, copy_from=None): self._upper_bound = upper_bound if self.upper_bound is not None: if self.upper_bound < self.lower_bound: raise ValueError( "Cannot set `upper_bound` (%r) " "smaller than `lower_bound` (%r)" % (self.upper_bound, self.lower_bound) ) # List of Interval objects. self._intervals = [] # IntervalTree object to allow faster querying of interval objects. self._interval_tree = IntervalTree() # Indicates if the IntervalTree needs to be rebuilt. self._is_stale_tree = False if copy_from is not None: for interval in copy_from._intervals: bounds_cp = interval.bounds[:] fuzzy_cp = interval.fuzzy[:] metadata_cp = copy.copy(interval.metadata) self.add(bounds_cp, fuzzy=fuzzy_cp, metadata=metadata_cp) @property def upper_bound(self): """The exclusive upper bound of interval features.""" return self._upper_bound @property def lower_bound(self): """The inclusive lower bound of interval features.""" return 0 @property def num_interval_features(self): """The total number of interval features.""" return len(self._intervals) def _rebuild_tree(method): """Rebuild the IntervalTree.""" @functools.wraps(method) def inner(self, *args, **kwargs): if self._is_stale_tree is False: return method(self, *args, **kwargs) self._interval_tree = IntervalTree() for f in self._intervals: for start, end in f.bounds: self._interval_tree.add(start, end, f) self._is_stale_tree = False return method(self, *args, **kwargs) return inner def _reverse(self): """Reverse ``IntervalMetadata`` object. This operation reverses all of the interval coordinates. For instance, this can be used to compare coordinates in the forward strand to coordinates in the reversal strand. """ for f in self._intervals: try: intvls = [ (self.upper_bound - x[1], self.upper_bound - x[0]) for x in reversed(f.bounds) ] except TypeError: raise TypeError( "You cannot reverse the coordinates " "when the upper bound is `None`" ) f.bounds = intvls # DON'T forget this!!! self._is_stale_tree = True @classonlymethod def concat(cls, interval_metadata): """Concatenate an iterable of ``IntervalMetadata`` objects. It concatenates the multiple ``IntervalMetadata`` objects into one coordinate space. The order of the objects in the input iterable matters. The coordinate of the second ``InterableMetadata`` will be shifted up with the length of the first ``IntervalMetadata`` object. This function is useful when you concatenate multiple sequences. Parameters ---------- interval_metadata : Iterable (IntervalMetadata) The interval metadata to concatenate. Returns ------- IntervalMetadata Concatenated interval metadata. Examples -------- >>> from skbio.metadata import IntervalMetadata Create two ``IntervalMetadata`` objects: >>> im1 = IntervalMetadata(3) >>> _ = im1.add([(0, 2)], [(True, False)], {'gene': 'sagA'}) >>> im2 = IntervalMetadata(4) >>> _ = im2.add([(1, 4)], [(True, True)], {'gene': 'sagB'}) Concatenate them into a single coordinate space. The second ``IntervalMetadata``'s interval features are all shifted up. The resulting ``IntervalMetadata``'s upper bound is the sum of upper bounds of concatenated objects: >>> im = IntervalMetadata.concat([im1, im2]) >>> im # doctest: +ELLIPSIS 2 interval features ------------------- Interval(interval_metadata=<...>, bounds=[(0, 2)], \ fuzzy=[(True, False)], metadata={'gene': 'sagA'}) Interval(interval_metadata=<...>, bounds=[(4, 7)], \ fuzzy=[(True, True)], metadata={'gene': 'sagB'}) >>> im.upper_bound 7 """ interval_metadata = list(interval_metadata) if len(interval_metadata) == 0: return cls(0) upper_bound = 0 for im in interval_metadata: try: upper_bound += im.upper_bound except TypeError: raise TypeError( "You cannot concat the interval metadata " "because its upper bound is `None`:\n%r" % im ) new = cls(upper_bound) length = 0 for i, im in enumerate(interval_metadata): for intvl in im._intervals: bounds = intvl.bounds fuzzy = intvl.fuzzy if i != 0: bounds = [(start + length, end + length) for start, end in bounds] new.add(bounds, fuzzy, intvl.metadata) length += im.upper_bound return new def merge(self, other): """Merge the interval features of another ``IntervalMetadata`` object. It adds all the interval features of the other object into ``self``. Note this will not check if there are any duplicates of interval features after merge. Notes ----- It will raise error if you merge an unbounded ``IntervalMetadata`` object to the current object if it is bounded. This avoids partially updating the current object if the merge fails in the middle of the process due to the possibility that some interval features to be merged may live outside the current defined upper bound. Parameters ---------- other : ``IntervalMetadata`` The other ``IntervalMetadata`` to be merged. """ if self.upper_bound is not None: if other.upper_bound is None: raise ValueError( "Cannot merge an unbound IntervalMetadata object " "to a bounded one" ) elif self.upper_bound != other.upper_bound: raise ValueError( "The upper bounds of the two IntervalMetadata objects " "are not equal (%r != %r)" % (self.upper_bound, other.upper_bound) ) if self.lower_bound != other.lower_bound: raise ValueError( "The lower bounds of the two IntervalMetadata objects " "are not equal (%d != %d)" % (self.lower_bound, other.lower_bound) ) for intvl in other._intervals: self.add(intvl.bounds, intvl.fuzzy, intvl.metadata) def sort(self, ascending=True): """Sort interval features by their coordinates. It sorts by the start coordinate first. If they are the same between two interval features, they will be sorted by comparing their end coordinates. For example, an interval feature with [(1, 2), (4, 7)] will be sorted in front of another one with [(1, 2), (3, 8)]. Parameters ---------- ascending : bool, optional sort in ascending or descending coordinates. """ self._intervals.sort( key=lambda i: [i.bounds[0][0], i.bounds[-1][1]], reverse=not ascending ) def add(self, bounds, fuzzy=None, metadata=None): """Create and add an ``Interval`` to this ``IntervalMetadata``. This method creates an ``Interval`` object and inserts it into the ``IntervalMetadata`` object. Parameters ---------- bounds : iterable of tuple of ints Tuples representing start and end coordinates. It is *zero-based* numbering. It is always inclusive on start bound and exclusive on end bound. fuzzy : iterable of tuple of bool, optional Tuples representing the fuzziness of each bound coordinates. metadata : dict, optional A dictionary of key-value pairs associated with the ``Interval`` object. Returns ------- Interval The ``Interval`` object added. See Also -------- skbio.metadata.Interval """ # Add an interval to the tree. Note that the add functionality is # built within the Interval constructor. return Interval( interval_metadata=self, bounds=bounds, fuzzy=fuzzy, metadata=metadata ) @_rebuild_tree def _query_interval(self, bound): """Yield ``Interval`` objects that overlap with the bound.""" _assert_valid_bound(bound) start, end = bound intvls = self._interval_tree.find(start, end) # if a ``Interval`` has many non-contiguous spans and # multiple of them overlap with the bound, then # this ``Interval`` object will be returned # multiple times. So we need to remove duplicates. seen = set() for intvl in intvls: if id(intvl) not in seen: seen.add(id(intvl)) yield intvl def _query_attribute(self, metadata, intervals=None): """Yield ``Interval`` objects based on query attributes. Parameters ---------- metadata : dict or ``None`` If it is ``None``, return empty iterator; if it is ``{}``, return an interator of all the ``Interval`` objects. intervals : iterable An iterable of ``Interval`` objects. """ if metadata is None: return if intervals is None: intervals = self._intervals for intvl in intervals: for key, value in metadata.items(): if key not in intvl.metadata or intvl.metadata[key] != value: break else: yield intvl @_rebuild_tree def query(self, bounds=None, metadata=None): """Yield ``Interval`` object with the bounds and attributes. The ``Interval`` objects must meet both requirements: 1) overlap with any of the spans specified by ``bounds``; 2) satisfy ``metadata`` specification. For instance, you can identify all the recA genes that overlap with (10, 100) or (900, 1000) with this code ``interval_metadata.query([(10, 100), (900, 1000)], {'gene': 'recA'})``. Parameters ---------- bounds : iterable of tuples of int pair, optional Specifies bounds to look for the ``Interval`` objects. An satisfying interval feature only need to overlap with one bound. Default (``None``) means all ``Intervals`` meet this requirement. metadata : dict, optional A dictionary of key word attributes associated with the ``Interval`` object. It specifies what metadata keywords and values to look for. Default (``None``) means all ``Intervals`` meet this requirement. Yields ------ Interval ``Interval`` object satisfying the search criteria. """ if bounds is None and metadata is None: metadata = {} if bounds is None: for intvl in self._query_attribute(metadata): yield intvl else: for loc in bounds: intvls = self._query_interval(loc) if metadata is None: metadata = {} for intvl in self._query_attribute(metadata, intvls): yield intvl def drop(self, intervals, negate=False): """Drop Interval objects. The given ``Interval`` objects will be removed and their associated ``IntervalMetadata`` will be set to ``None``. Parameters ---------- intervals : iterable of ``Interval`` ``Interval`` objects to drop from this object. negate : bool Negate the drop operation, i.e. keeping the specified intervals instead of dropping them. """ to_delete = {id(f) for f in intervals} new_intvls = [] # iterate through queries and drop them for intvl in self._intervals: drop = id(intvl) in to_delete if negate is True: drop = not drop if drop: intvl._interval_metadata = None else: new_intvls.append(intvl) self._intervals = new_intvls self._is_stale_tree = True def __eq__(self, other): """Test if this object is equal to another. It checks if the coordinate spaces are the same between the two objects. If so, then check if all the interval features are equal between the two objects after sorting them by bounds. Parameters ---------- other : IntervalMetadata Interval metadata to test for equality against. Returns ------- bool Indicates if the two objects are equal. """ if ( self.upper_bound != other.upper_bound or self.lower_bound != other.lower_bound ): return False else: self_intervals = sorted(self._intervals, key=operator.attrgetter("bounds")) other_intervals = sorted( other._intervals, key=operator.attrgetter("bounds") ) return self_intervals == other_intervals def __ne__(self, other): """Test if this object is not equal to another. Parameters ---------- other : IntervalMetadata Interval metadata to test for inequality against. Returns ------- bool Indicates if the two objects are not equal. See Also -------- skbio.metadata.IntervalMetadata.__eq__ """ return not (self == other) def __repr__(self): """Return a string representation of this object. Returns ------- str String representation of this ``IntervalMetadata`` object. """ n = self.num_interval_features l1 = "{} interval feature".format(n) if n != 1: l1 += "s" l2 = "-" * len(l1) if n <= 5: items = [repr(i) for i in self._intervals] else: # intentionally overwrite items[2] to make code cleaner items = [repr(self._intervals[i]) for i in [0, 1, 2, n - 2, n - 1]] items[2] = "..." return "\n".join([l1, l2] + items) def __copy__(self): """Return a shallow copy. Notes ----- The ``IntervalMetadata`` copy will have copies of the ``Interval`` objects present in this object. The ``metadata`` dictionary of each ``Interval`` object will be a shallow copy. See Also -------- __deepcopy__ """ return self._copy(False, {}) def __deepcopy__(self, memo): """Return a deep copy. Notes ----- The ``IntervalMetadata`` copy will have copies of the ``Interval`` objects present in this object. The ``metadata`` dictionary of each ``Interval`` object will be a deep copy. See Also -------- __copy__ """ return self._copy(True, memo) def _copy(self, deep, memo): cp = IntervalMetadata(self.upper_bound) for interval in self._intervals: # Only need to shallow-copy `bounds` and `fuzzy` # because their elements are immutable. bounds_cp = interval.bounds[:] fuzzy_cp = interval.fuzzy[:] if deep: metadata_cp = copy.deepcopy(interval.metadata, memo) else: metadata_cp = copy.copy(interval.metadata) cp.add(bounds_cp, fuzzy=fuzzy_cp, metadata=metadata_cp) return cp def _assert_valid_bound(bound): if isinstance(bound, tuple): try: start, end = bound except ValueError: raise ValueError( "A `bound` must be a tuple of exactly " "two coordinates, not {!r}".format(bound) ) if not (isinstance(start, int) and isinstance(end, int)) or start > end: raise ValueError( "`start` (%r) cannot be a larger int " "than `end` (%r)." % (start, end) ) else: raise TypeError("Each `bound` must be a tuple, not {!r}".format(bound)) def _assert_valid_fuzzy(fuzzy): if isinstance(fuzzy, tuple): try: start, end = fuzzy except ValueError: raise ValueError( "A `fuzzy` must be a tuple of exactly " "two, not {!r}".format(fuzzy) ) if not (isinstance(start, bool) and isinstance(end, bool)): raise TypeError("A `fuzzy` must be a tuple of two booleans") else: raise TypeError("Each `fuzzy` must be a tuple, not {!r}".format(fuzzy)) scikit-bio-0.6.2/skbio/metadata/_metadata.py000066400000000000000000001264561464262511300207560ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2016-2023, QIIME 2 development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- import abc import collections import itertools import sqlite3 import types import warnings import pandas as pd import numpy as np import skbio.metadata.missing as _missing from skbio.util import find_duplicates from .base import SUPPORTED_COLUMN_TYPES, FORMATTED_ID_HEADERS, is_id_header DEFAULT_MISSING = _missing.DEFAULT_MISSING class _MetadataBase: """Base class for functionality shared between Metadata and MetadataColumn. Parameters ---------- index : pandas.Index IDs associated with the metadata. """ @property def id_header(self): """Name identifying the IDs associated with the metadata. This property is read-only. Returns ------- str Name of IDs associated with the metadata. """ return self._id_header @property def ids(self): """IDs associated with the metadata. This property is read-only. Returns ------- tuple of str Metadata IDs. """ return self._ids @property def id_count(self): """Number of metadata IDs. This property is read-only. Returns ------- int Number of metadata IDs. """ return len(self._ids) def __init__(self, index): if index.empty: raise ValueError( "%s must contain at least one ID." % self.__class__.__name__ ) id_header = index.name self._assert_valid_id_header(id_header) self._id_header = id_header self._validate_index(index, axis="id") self._ids = tuple(index) def __eq__(self, other): return isinstance(other, self.__class__) and self._id_header == other._id_header def __ne__(self, other): return not (self == other) # Static helpers below for code reuse in Metadata and MetadataColumn @classmethod def _assert_valid_id_header(cls, name): if not is_id_header(name): raise ValueError( "pandas index name (`Index.name`) must be one of the " "following values, not %r:\n\n%s" % (name, FORMATTED_ID_HEADERS) ) @classmethod def _validate_index(cls, index, *, axis): if axis == "id": label = "ID" elif axis == "column": label = "column name" else: raise NotImplementedError for value in index: if not isinstance(value, str): raise TypeError( "Detected non-string metadata %s of type %r: %r" % (label, type(value), value) ) if not value: raise ValueError( "Detected empty metadata %s. %ss must consist of at least " "one character." % (label, label) ) if axis == "id" and value.startswith("#"): raise ValueError( "Detected metadata %s that begins with a pound sign " "(#): %r" % (label, value) ) if is_id_header(value): raise ValueError( "Detected metadata %s %r that conflicts with a name " "reserved for the ID header. Reserved ID headers:\n\n%s" % (label, value, FORMATTED_ID_HEADERS) ) if len(index) != len(set(index)): duplicates = find_duplicates(index) raise ValueError( "Metadata %ss must be unique. The following %ss are " "duplicated: %s" % (label, label, ", ".join(repr(e) for e in sorted(duplicates))) ) @classmethod def _filter_ids_helper(cls, df_or_series, ids, ids_to_keep): # `ids_to_keep` can be any iterable, so turn it into a list so that it # can be iterated over multiple times below (and length-checked). ids_to_keep = list(ids_to_keep) if len(ids_to_keep) == 0: raise ValueError("`ids_to_keep` must contain at least one ID.") duplicates = find_duplicates(ids_to_keep) if duplicates: raise ValueError( "`ids_to_keep` must contain unique IDs. The following IDs are " "duplicated: %s" % (", ".join(repr(e) for e in sorted(duplicates))) ) ids_to_keep = set(ids_to_keep) missing_ids = ids_to_keep - ids if missing_ids: raise ValueError( "The following IDs are not present in the metadata: %s" % (", ".join(repr(e) for e in sorted(missing_ids))) ) # While preserving order, get rid of any IDs not contained in # `ids_to_keep`. ids_to_discard = ids - ids_to_keep return df_or_series.drop( labels=ids_to_discard, axis="index", inplace=False, errors="raise" ) def save(self, filepath, ext=None): """Save a TSV metadata file. The TSV metadata file format is described at https://docs.qiime2.org in the Metadata Tutorial. The file will always include the ``#sk:types`` directive in order to make the file roundtrippable without relying on column type inference. Parameters ---------- filepath : str Path to save TSV metadata file at. ext : str Preferred file extension (.tsv, .txt, etc). Will be left blank if no extension is included. Including a period in the extension is optional, and any additional periods delimiting the filepath and the extension will be reduced to a single period. Returns ------- str Filepath and extension (if provided) that the file was saved to. See Also -------- Metadata.load """ from .io import MetadataWriter if ext is None: ext = "" else: ext = "." + ext.lstrip(".") filepath = filepath.rstrip(".") if not filepath.endswith(ext): filepath += ext MetadataWriter(self).write(filepath) return filepath # Other properties such as units can be included here in the future! ColumnProperties = collections.namedtuple( "ColumnProperties", ["type", "missing_scheme"] ) class SampleMetadata(_MetadataBase): """Store metadata associated with identifiers in a study. Metadata is tabular in nature, mapping study identifiers (e.g. sample or feature IDs) to columns of metadata associated with each ID. For more details about metadata in QIIME 2, including the TSV metadata file format, see the Metadata Tutorial at https://docs.qiime2.org. The following text focuses on design and considerations when working with ``Metadata`` objects at the API level. A ``Metadata`` object is composed of zero or more ``MetadataColumn`` objects. A ``Metadata`` object always contains at least one ID, regardless of the number of columns. Each column in the ``Metadata`` object has an associated column type representing either *categorical* or *numeric* data. Each metadata column is represented by an object corresponding to the column's type: ``CategoricalMetadataColumn`` or ``NumericMetadataColumn``, respectively. A ``Metadata`` object is closely linked to its corresponding TSV metadata file format described at https://docs.qiime2.org. Therefore, certain requirements present in the file format are also enforced on the in-memory object in order to make serialized ``Metadata`` objects roundtrippable when loaded from disk again. For example, IDs cannot begin with a pound character (``#``) because those IDs would be interpreted as comment rows when written to disk as TSV. See the metadata file format spec for more details about data formatting requirements. In addition to being loaded from or saved to disk, a ``Metadata`` object can be constructed from a ``pandas.DataFrame`` object. See the *Parameters* section below for details on how to construct ``Metadata`` objects from dataframes. ``Metadata`` objects have various methods to access, filter, and merge data. A dataframe can be retrieved from the ``Metadata`` object for further data manipulation using the pandas API. Individual ``MetadataColumn`` objects can be retrieved to gain access to APIs applicable to a single metadata column. Missing values may be encoded in one of the following schemes: 'blank' The default, which treats `None`/`NaN` as the only valid missing values. 'no-missing' Indicates there are no missing values in a column, any `None`/`NaN` values should be considered an error. If a scheme other than 'blank' is used by default, this scheme can be provided to preserve strings as categorical terms. 'INSDC:missing' The INSDC vocabulary for missing values. The current implementation supports only lower-case terms which match exactly: 'not applicable', 'missing', 'not provided', 'not collected', and 'restricted access'. Parameters ---------- dataframe : pandas.DataFrame Dataframe containing metadata. The dataframe's index defines the IDs, and the index name (``Index.name``) must match one of the required ID headers described in the metadata file format spec. Each column in the dataframe defines a metadata column, and the metadata column's type (i.e. *categorical* or *numeric*) is determined based on the column's dtype. If a column has ``dtype=object``, it may contain strings or pandas missing values (e.g. ``np.nan``, ``None``). Columns matching this requirement are assumed to be *categorical*. If a column in the dataframe has ``dtype=float`` or ``dtype=int``, it may contain floating point numbers or integers, as well as pandas missing values (e.g. ``np.nan``). Columns matching this requirement are assumed to be *numeric*. Regardless of column type (categorical vs numeric), the dataframe stored within the ``Metadata`` object will have any missing values normalized to ``np.nan``. Columns with ``dtype=int`` will be cast to ``dtype=float``. To obtain a dataframe from the ``Metadata`` object containing these normalized data types and values, use ``Metadata.to_dataframe()``. column_missing_schemes : dict, optional Describe the metadata column handling for missing values described in the dataframe. This is a dict mapping column names (str) to missing-value schemes (str). Valid values are 'blank', 'no-missing', and 'INSDC:missing'. Column names may be omitted. default_missing_scheme : str, optional The missing scheme to use when none has been provided in the file or in `column_missing_schemes`. """ default_write_format = "sample_metadata" @classmethod def load( cls, filepath, column_types=None, column_missing_schemes=None, default_missing_scheme=DEFAULT_MISSING, ): """Load a TSV metadata file. The TSV metadata file format is described at https://docs.qiime2.org in the Metadata Tutorial. Parameters ---------- filepath : str Path to TSV metadata file to be loaded. column_types : dict, optional Override metadata column types specified or inferred in the file. This is a dict mapping column names (str) to column types (str). Valid column types are 'categorical' and 'numeric'. Column names may be omitted from this dict to use the column types read from the file. column_missing_schemes : dict, optional Override the metadata column handling for missing values described in the file. This is a dict mapping column names (str) to missing-value schemes (str). Valid values are 'blank', 'no-missing', and 'INSDC:missing'. Column names may be omitted. default_missing_scheme : str, optional The missing scheme to use when none has been provided in the file or in `column_missing_schemes`. Returns ------- Metadata Metadata object loaded from `filepath`. Raises ------ MetadataFileError If the metadata file is invalid in any way (e.g. doesn't meet the file format's requirements). See Also -------- save """ from .io import MetadataReader return MetadataReader(filepath).read( into=cls, column_types=column_types, column_missing_schemes=column_missing_schemes, default_missing_scheme=default_missing_scheme, ) @property def columns(self): """Ordered mapping of column names to ColumnProperties. The mapping that is returned is read-only. This property is also read-only. Returns ------- types.MappingProxyType Ordered mapping of column names to ColumnProperties. """ # Read-only proxy to the OrderedDict mapping column names to # ColumnProperties. return types.MappingProxyType(self._columns) @property def column_count(self): """Number of metadata columns. This property is read-only. Returns ------- int Number of metadata columns. Notes ----- Zero metadata columns are allowed. See Also -------- id_count """ return len(self._columns) def __init__( self, dataframe, column_missing_schemes=None, default_missing_scheme=DEFAULT_MISSING, ): if not isinstance(dataframe, pd.DataFrame): raise TypeError( "%s constructor requires a pandas.DataFrame object, not " "%r" % (self.__class__.__name__, type(dataframe)) ) super().__init__(dataframe.index) if column_missing_schemes is None: column_missing_schemes = {} self._dataframe, self._columns = self._normalize_dataframe( dataframe, column_missing_schemes, default_missing_scheme ) self._validate_index(self._dataframe.columns, axis="column") def _normalize_dataframe( self, dataframe, column_missing_schemes, default_missing_scheme ): norm_df = dataframe.copy() # Do not attempt to strip empty metadata if not norm_df.columns.empty: norm_df.columns = norm_df.columns.str.strip() norm_df.index = norm_df.index.str.strip() columns = collections.OrderedDict() for column_name, series in norm_df.items(): missing_scheme = column_missing_schemes.get( column_name, default_missing_scheme ) metadata_column = self._metadata_column_factory(series, missing_scheme) norm_df[column_name] = metadata_column.to_series() properties = ColumnProperties( type=metadata_column.type, missing_scheme=missing_scheme ) columns[column_name] = properties return norm_df, columns def _metadata_column_factory(self, series, missing_scheme): series = _missing.series_encode_missing(series, missing_scheme) # Collapse dtypes except for all NaN columns so that we can preserve # empty categorical columns. Empty numeric columns will already have # the expected dtype and values if not series.isna().all(): series = series.infer_objects() dtype = series.dtype if NumericMetadataColumn._is_supported_dtype(dtype): column = NumericMetadataColumn(series, missing_scheme) elif CategoricalMetadataColumn._is_supported_dtype(dtype): column = CategoricalMetadataColumn(series, missing_scheme) else: raise TypeError( "Metadata column %r has an unsupported pandas dtype of %s. " "Supported dtypes: float, int, object" % (series.name, dtype) ) return column def __repr__(self): """Return the string summary of the metadata and its columns.""" lines = [] # Header lines.append(self.__class__.__name__) lines.append("-" * len(self.__class__.__name__)) # Dimensions lines.append( "%d ID%s x %d column%s" % ( self.id_count, "" if self.id_count == 1 else "s", self.column_count, "" if self.column_count == 1 else "s", ) ) # Column properties if self.column_count != 0: max_name_len = max((len(name) for name in self.columns)) for name, props in self.columns.items(): padding = " " * ((max_name_len - len(name)) + 1) lines.append("%s:%s%r" % (name, padding, props)) # Epilogue lines.append("") lines.append("Call to_dataframe() for a tabular representation.") return "\n".join(lines) def __eq__(self, other): """Determine if this metadata is equal to another. ``Metadata`` objects are equal if their IDs, columns (including column names, types, and ordering), ID headers, and metadata values are equal. Parameters ---------- other : Metadata Metadata to test for equality. Returns ------- bool Indicates whether this ``Metadata`` object is equal to `other`. See Also -------- __ne__ """ return ( super().__eq__(other) and self._columns == other._columns and self._dataframe.equals(other._dataframe) ) def __ne__(self, other): """Determine if this metadata is not equal to another. ``Metadata`` objects are not equal if their IDs, columns (including column names, types, or ordering), ID headers, or metadata values are not equal. Parameters ---------- other : Metadata Metadata to test for inequality. Returns ------- bool Indicates whether this ``Metadata`` object is not equal to `other`. See Also -------- __eq__ """ return not (self == other) def to_dataframe(self, encode_missing=False): """Create a pandas dataframe from the metadata. The dataframe's index name (``Index.name``) will match this metadata object's ``id_header``, and the index will contain this metadata object's IDs. The dataframe's column names will match the column names in this metadata. Categorical columns will be stored as ``dtype=object`` (containing strings), and numeric columns will be stored as ``dtype=float``. Parameters ---------- encode_missing : bool, optional Whether to convert missing values (NaNs) back into their original vocabulary (strings) if a missing scheme was used. Returns ------- pandas.DataFrame Dataframe constructed from the metadata. """ df = self._dataframe.copy() if encode_missing: def replace_nan(series): missing = _missing.series_extract_missing(series) # avoid dtype changing if there's no missing values if not missing.empty: series = series.astype(object) series[missing.index] = missing return series df = df.apply(replace_nan) return df def get_column(self, name): """Retrieve metadata column based on column name. Parameters ---------- name : str Name of the metadata column to retrieve. Returns ------- MetadataColumn Requested metadata column (``CategoricalMetadataColumn`` or ``NumericMetadataColumn``). See Also -------- get_ids """ try: series = self._dataframe[name] missing_scheme = self._columns[name].missing_scheme except KeyError: raise ValueError( "%r is not a column in the metadata. Available columns: " "%s" % (name, ", ".join(repr(c) for c in self.columns)) ) return self._metadata_column_factory(series, missing_scheme) def get_ids(self, where=None): """Retrieve IDs matching search criteria. Parameters ---------- where : str, optional SQLite WHERE clause specifying criteria IDs must meet to be included in the results. All IDs are included by default. Returns ------- set IDs matching search criteria specified in `where`. See Also -------- ids filter_ids get_column Notes ----- The ID header (``Metadata.id_header``) may be used in the `where` clause to query the table's ID column. """ if where is None: return set(self._ids) conn = sqlite3.connect(":memory:") conn.row_factory = lambda cursor, row: row[0] # https://github.com/pandas-dev/pandas/blob/ # 7c7bd569ce8e0f117c618d068e3d2798134dbc73/pandas/io/sql.py#L1306 with warnings.catch_warnings(): warnings.filterwarnings( "ignore", "The spaces in these column names will not.*" ) self._dataframe.to_sql( "metadata", conn, index=True, index_label=self.id_header ) c = conn.cursor() # In general we wouldn't want to format our query in this way because # it leaves us open to sql injection, but it seems acceptable here for # a few reasons: # 1) This is a throw-away database which we're just creating to have # access to the query language, so any malicious behavior wouldn't # impact any data that isn't temporary # 2) The substitution syntax recommended in the docs doesn't allow # us to specify complex `where` statements, which is what we need to # do here. For example, we need to specify things like: # WHERE Subject='subject-1' AND SampleType='gut' # but their qmark/named-style syntaxes only supports substition of # variables, such as: # WHERE Subject=? # 3) sqlite3.Cursor.execute will only execute a single statement so # inserting multiple statements # (e.g., "Subject='subject-1'; DROP...") will result in an # OperationalError being raised. query = ( 'SELECT "{0}" FROM metadata WHERE {1} GROUP BY "{0}" ' 'ORDER BY "{0}";'.format(self.id_header, where) ) try: c.execute(query) except sqlite3.OperationalError as e: conn.close() raise ValueError( "Selection of IDs failed with query:\n %s\n\n" "If one of the metadata column names specified " "in the `where` statement is on this list " "of reserved keywords " "(http://www.sqlite.org/lang_keywords.html), " "please ensure it is quoted appropriately in the " "`where` statement." % query ) from e ids = set(c.fetchall()) conn.close() return ids def merge(self, *others): """Merge this ``Metadata`` object with other ``Metadata`` objects. Returns a new ``Metadata`` object containing the merged contents of this ``Metadata`` object and `others`. The merge is not in-place and will always return a **new** merged ``Metadata`` object. The merge will include only those IDs that are shared across **all** ``Metadata`` objects being merged (i.e. the merge is an *inner join*). Each metadata column being merged must have a unique name; merging metadata with overlapping column names will result in an error. Parameters ---------- others : tuple One or more ``Metadata`` objects to merge with this ``Metadata`` object. Returns ------- Metadata New object containing merged metadata. The merged IDs will be in the same relative order as the IDs in this ``Metadata`` object after performing the inner join. The merged column order will match the column order of ``Metadata`` objects being merged from left to right. Raises ------ ValueError If zero ``Metadata`` objects are provided in `others` (there is nothing to merge in this case). Notes ----- The merged ``Metadata`` object will always have its ``id_header`` property set to ``'id'``, regardless of the ``id_header`` values on the ``Metadata`` objects being merged. """ if len(others) < 1: raise ValueError( "At least one Metadata object must be provided to merge into " "this Metadata object (otherwise there is nothing to merge)." ) dfs = [] columns = [] for md in itertools.chain([self], others): df = md._dataframe dfs.append(df) columns.extend(df.columns.tolist()) columns = pd.Index(columns) if columns.has_duplicates: raise ValueError( "Cannot merge metadata with overlapping columns. The " "following columns overlap: %s" % ", ".join([repr(e) for e in columns[columns.duplicated()].unique()]) ) merged_df = dfs[0].join(dfs[1:], how="inner") # Not using DataFrame.empty because empty columns are allowed in # Metadata. if merged_df.index.empty: raise ValueError( "Cannot merge because there are no IDs shared across metadata " "objects." ) merged_df.index.name = "id" merged_md = self.__class__(merged_df) return merged_md def filter_ids(self, ids_to_keep): """Filter metadata by IDs. Parameters ---------- ids_to_keep : iterable of str IDs that should be retained in the filtered ``Metadata`` object. If any IDs in `ids_to_keep` are not contained in this ``Metadata`` object, a ``ValueError`` will be raised. The filtered ``Metadata`` object will retain the same relative ordering of IDs in this ``Metadata`` object. Thus, the ordering of IDs in `ids_to_keep` does not determine the ordering of IDs in the filtered ``Metadata`` object. Returns ------- Metadata The metadata filtered by IDs. See Also -------- get_ids filter_columns """ filtered_df = self._filter_ids_helper( self._dataframe, self.get_ids(), ids_to_keep ) filtered_md = self.__class__(filtered_df) return filtered_md def filter_columns( self, *, column_type=None, drop_all_unique=False, drop_zero_variance=False, drop_all_missing=False, ): """Filter metadata by columns. Parameters ---------- column_type : str, optional If supplied, will retain only columns of this type. The currently supported column types are 'numeric' and 'categorical'. drop_all_unique : bool, optional If ``True``, columns that contain a unique value for every ID will be dropped. Missing data (``np.nan``) are ignored when determining unique values. If a column consists solely of missing data, it will be dropped. drop_zero_variance : bool, optional If ``True``, columns that contain the same value for every ID will be dropped. Missing data (``np.nan``) are ignored when determining variance. If a column consists solely of missing data, it will be dropped. drop_all_missing : bool, optional If ``True``, columns that have a missing value (``np.nan``) for every ID will be dropped. Returns ------- Metadata The metadata filtered by columns. See Also -------- filter_ids """ if column_type is not None and column_type not in SUPPORTED_COLUMN_TYPES: raise ValueError( "Unknown column type %r. Supported column types: %s" % (column_type, ", ".join(sorted(SUPPORTED_COLUMN_TYPES))) ) # Build up a set of columns to drop. Short-circuit as soon as we know a # given column can be dropped (no need to apply further filters to it). columns_to_drop = set() for column, props in self.columns.items(): if column_type is not None and props.type != column_type: columns_to_drop.add(column) continue series = self._dataframe[column] if drop_all_unique or drop_zero_variance: # Ignore nans in the unique count, and compare to the number of # non-nan values in the series. num_unique = series.nunique(dropna=True) if drop_all_unique and num_unique == series.count(): columns_to_drop.add(column) continue # If num_unique == 0, the series was empty (all nans). If # num_unique == 1, the series contained only a single unique # value (ignoring nans). if drop_zero_variance and num_unique < 2: columns_to_drop.add(column) continue if drop_all_missing and series.isna().all(): columns_to_drop.add(column) continue filtered_df = self._dataframe.drop(columns_to_drop, axis=1, inplace=False) filtered_md = self.__class__(filtered_df) return filtered_md class MetadataColumn(_MetadataBase, metaclass=abc.ABCMeta): """Abstract base class representing a single metadata column. Concrete subclasses represent specific metadata column types, e.g. ``CategoricalMetadataColumn`` and ``NumericMetadataColumn``. See the ``Metadata`` class docstring for details about ``Metadata`` and ``MetadataColumn`` objects, including a description of column types. The main difference in constructing ``MetadataColumn`` vs ``Metadata`` objects is that ``MetadataColumn`` objects are constructed from a ``pandas.Series`` object instead of a ``pandas.DataFrame``. Otherwise, the same restrictions, considerations, and data normalization are applied as with ``Metadata`` objects. Parameters ---------- series : pd.Series The series to construct a column from. missing_scheme : "blank", "no-missing", "INSDC:missing" How to interpret terms for missing values. These will be converted to NaN. The default treatment is to take no action. """ # Abstract, must be defined by subclasses. type = None @classmethod @abc.abstractmethod def _is_supported_dtype(cls, dtype): """Return True if dtype is supported False otherwise. Contract: Return ``True`` if the series `dtype` is supported by this object and can be handled appropriately by ``_normalize_``. Return ``False`` otherwise. """ raise NotImplementedError @classmethod @abc.abstractmethod def _normalize_(cls, series): """Return a normalized copy of series. Contract: Return a copy of `series` that has been converted to the appropriate internal dtype and has any other necessary normalization or validation applied (e.g. missing value representations, disallowing certain values, etc). Raise an error with a detailed error message if the operation cannot be completed. """ raise NotImplementedError @property def name(self): """Metadata column name. This property is read-only. Returns ------- str Metadata column name. """ return self._series.name @property def missing_scheme(self): """Return the vocabulary used to encode missing values. This property is read-only. Returns ------- str "blank", "no-missing", or "INSDC:missing" """ return self._missing_scheme def __init__(self, series, missing_scheme=DEFAULT_MISSING): if not isinstance(series, pd.Series): raise TypeError( "%s constructor requires a pandas.Series object, not %r" % (self.__class__.__name__, type(series)) ) super().__init__(series.index) series = _missing.series_encode_missing(series, missing_scheme) # if the series has values with a consistent dtype, make the series # that dtype. Don't change the dtype if there is a column of all NaN if not series.isna().all(): series = series.infer_objects() if not self._is_supported_dtype(series.dtype): raise TypeError( "%s %r does not support a pandas.Series object with dtype %s" % (self.__class__.__name__, series.name, series.dtype) ) self._missing_scheme = missing_scheme self._series = self._normalize_(series) self._validate_index([self._series.name], axis="column") def __repr__(self): """Return String summary of the metadata column.""" return "<%s name=%r id_count=%d>" % ( self.__class__.__name__, self.name, self.id_count, ) def __eq__(self, other): """Determine if this metadata column is equal to another. ``MetadataColumn`` objects are equal if their IDs, column names, column types, ID headers and metadata values are equal. Parameters ---------- other : MetadataColumn Metadata column to test for equality. Returns ------- bool Indicates whether this ``MetadataColumn`` object is equal to `other`. See Also -------- __ne__ """ return ( super().__eq__(other) and self.name == other.name and self._series.equals(other._series) ) def __ne__(self, other): """Determine if this metadata column is not equal to another. ``MetadataColumn`` objects are not equal if their IDs, column names, column types, ID headers, or metadata values are not equal. Parameters ---------- other : MetadataColumn Metadata column to test for inequality. Returns ------- bool Indicates whether this ``MetadataColumn`` object is not equal to `other`. See Also -------- __eq__ """ return not (self == other) def to_series(self, encode_missing=False): """Create a pandas series from the metadata column. The series index name (``Index.name``) will match this metadata column's ``id_header``, and the index will contain this metadata column's IDs. The series name will match this metadata column's name. Parameters ---------- encode_missing : bool, optional Whether to convert missing values (NaNs) back into their original vocabulary (strings) if a missing scheme was used. Returns ------- pandas.Series Series constructed from the metadata column. See Also -------- to_dataframe """ series = self._series.copy() if encode_missing: missing = self.get_missing() if not missing.empty: series[missing.index] = missing return series def to_dataframe(self, encode_missing=False): """Create a pandas dataframe from the metadata column. The dataframe will contain exactly one column. The dataframe's index name (``Index.name``) will match this metadata column's ``id_header``, and the index will contain this metadata column's IDs. The dataframe's column name will match this metadata column's name. Parameters ---------- encode_missing : bool, optional Whether to convert missing values (NaNs) back into their original vocabulary (strings) if a missing scheme was used. Returns ------- pandas.DataFrame Dataframe constructed from the metadata column. See Also -------- to_series """ return self.to_series(encode_missing=encode_missing).to_frame() def get_missing(self): """Return a series containing only missing values (with an index). If the column was constructed with a missing scheme, then the values of the series will be the original terms instead of NaN. """ return _missing.series_extract_missing(self._series) def get_value(self, id): """Retrieve metadata column value associated with an ID. Parameters ---------- id : str ID corresponding to the metadata column value to retrieve. Returns ------- object Value associated with the provided `id`. """ if id not in self._series.index: raise ValueError("ID %r is not present in %r" % (id, self)) return self._series.loc[id] def has_missing_values(self): """Determine if the metadata column has one or more missing values. Returns ------- bool ``True`` if the metadata column has one or more missing values (``np.nan``), ``False`` otherwise. See Also -------- drop_missing_values get_ids """ return len(self.get_ids(where_values_missing=True)) > 0 def drop_missing_values(self): """Filter out missing values from the metadata column. Returns ------- MetadataColumn Metadata column with missing values removed. See Also -------- has_missing_values get_ids """ missing = self.get_ids(where_values_missing=True) present = self.get_ids() - missing return self.filter_ids(present) def get_ids(self, where_values_missing=False): """Retrieve IDs matching search criteria. Parameters ---------- where_values_missing : bool, optional If ``True``, only return IDs that are associated with missing values (``np.nan``). If ``False`` (the default), return all IDs in the metadata column. Returns ------- set IDs matching search criteria. See Also -------- ids filter_ids has_missing_values drop_missing_values """ if where_values_missing: ids = self._series.index[self._series.isna()] else: ids = self._ids return set(ids) def filter_ids(self, ids_to_keep): """Filter metadata column by IDs. Parameters ---------- ids_to_keep : iterable of str IDs that should be retained in the filtered ``MetadataColumn`` object. If any IDs in `ids_to_keep` are not contained in this ``MetadataColumn`` object, a ``ValueError`` will be raised. The filtered ``MetadataColumn`` object will retain the same relative ordering of IDs in this ``MetadataColumn`` object. Thus, the ordering of IDs in `ids_to_keep` does not determine the ordering of IDs in the filtered ``MetadataColumn`` object. Returns ------- MetadataColumn The metadata column filtered by IDs. See Also -------- get_ids """ filtered_series = self._filter_ids_helper( self._series, self.get_ids(), ids_to_keep ) filtered_mdc = self.__class__(filtered_series) return filtered_mdc class CategoricalMetadataColumn(MetadataColumn): """A single metadata column containing categorical data. See the ``Metadata`` class docstring for details about ``Metadata`` and ``MetadataColumn`` objects, including a description of column types and supported data formats. """ type = "categorical" @classmethod def _is_supported_dtype(cls, dtype): return dtype == "object" @classmethod def _normalize_(cls, series): def normalize(value): if isinstance(value, str): value = value.strip() if value == "": raise ValueError( "%s does not support empty strings as values. Use an " "appropriate pandas missing value type " "(e.g. `numpy.nan`) or supply a non-empty string as " "the value in column %r." % (cls.__name__, series.name) ) else: return value elif pd.isna(value): # permits np.nan, Python float nan, None if isinstance(value, float) and np.isnan(value): # if type(value) is float and np.isnan(value): return value return np.nan else: raise TypeError( "%s only supports strings or missing values. Found value " "%r of type %r in column %r." % (cls.__name__, value, type(value), series.name) ) norm_series = series.apply(normalize) norm_series = norm_series.astype(object) norm_series.index = norm_series.index.str.strip() norm_series.name = norm_series.name.strip() return norm_series class NumericMetadataColumn(MetadataColumn): """A single metadata column containing numeric data. See the ``Metadata`` class docstring for details about ``Metadata`` and ``MetadataColumn`` objects, including a description of column types and supported data formats. """ type = "numeric" @classmethod def _is_supported_dtype(cls, dtype): return dtype == "float" or dtype == "int" or dtype == "int64" @classmethod def _normalize_(cls, series): series = series.astype(float, copy=True, errors="raise") if np.isinf(series).any(): raise ValueError( "%s does not support positive or negative infinity as a " "floating point value in column %r." % (cls.__name__, series.name) ) return series scikit-bio-0.6.2/skbio/metadata/_mixin.py000066400000000000000000000436231464262511300203140ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import abc import copy import pandas as pd from skbio.metadata import IntervalMetadata class MetadataMixin(metaclass=abc.ABCMeta): @property def metadata(self): """``dict`` containing metadata which applies to the entire object. Notes ----- This property can be set and deleted. When setting new metadata a shallow copy of the dictionary is made. Examples -------- .. note:: scikit-bio objects with metadata share a common interface for accessing and manipulating their metadata. The following examples use scikit-bio's ``Sequence`` class to demonstrate metadata behavior. These examples apply to all other scikit-bio objects storing metadata. Create a sequence with metadata: >>> from skbio import Sequence >>> seq = Sequence('ACGT', metadata={'description': 'seq description', ... 'id': 'seq-id'}) Retrieve metadata: >>> print(seq.metadata) {'description': 'seq description', 'id': 'seq-id'} Update metadata: >>> seq.metadata['id'] = 'new-id' >>> seq.metadata['pubmed'] = 12345 >>> print(seq.metadata) {'description': 'seq description', 'id': 'new-id', 'pubmed': 12345} Set metadata: >>> seq.metadata = {'abc': 123} >>> seq.metadata {'abc': 123} Delete metadata: >>> seq.has_metadata() True >>> del seq.metadata >>> seq.metadata {} >>> seq.has_metadata() False """ if self._metadata is None: # Not using setter to avoid copy. self._metadata = {} return self._metadata @metadata.setter def metadata(self, metadata): if not isinstance(metadata, dict): raise TypeError( "metadata must be a dict, not type %r" % type(metadata).__name__ ) # Shallow copy. self._metadata = metadata.copy() @metadata.deleter def metadata(self): self._metadata = None @abc.abstractmethod def __init__(self, metadata=None): raise NotImplementedError def _init_(self, metadata=None): if metadata is None: # Could use deleter but this is less overhead and needs to be fast. self._metadata = None else: # Use setter for validation and copy. self.metadata = metadata @abc.abstractmethod def __eq__(self, other): raise NotImplementedError def _eq_(self, other): # We're not simply comparing self.metadata to other.metadata in order # to avoid creating "empty" metadata representations on the objects if # they don't have metadata. if self.has_metadata() and other.has_metadata(): return self.metadata == other.metadata elif not (self.has_metadata() or other.has_metadata()): # Both don't have metadata. return True else: # One has metadata while the other does not. return False @abc.abstractmethod def __ne__(self, other): raise NotImplementedError def _ne_(self, other): return not (self == other) @abc.abstractmethod def __copy__(self): raise NotImplementedError def _copy_(self): if self.has_metadata(): return self.metadata.copy() else: return None @abc.abstractmethod def __deepcopy__(self, memo): raise NotImplementedError def _deepcopy_(self, memo): if self.has_metadata(): return copy.deepcopy(self.metadata, memo) else: return None def has_metadata(self): """Determine if the object has metadata. An object has metadata if its ``metadata`` dictionary is not empty (i.e., has at least one key-value pair). Returns ------- bool Indicates whether the object has metadata. Examples -------- .. note:: scikit-bio objects with metadata share a common interface for accessing and manipulating their metadata. The following examples use scikit-bio's ``Sequence`` class to demonstrate metadata behavior. These examples apply to all other scikit-bio objects storing metadata. >>> from skbio import Sequence >>> seq = Sequence('ACGT') >>> seq.has_metadata() False >>> seq = Sequence('ACGT', metadata={}) >>> seq.has_metadata() False >>> seq = Sequence('ACGT', metadata={'id': 'seq-id'}) >>> seq.has_metadata() True """ return self._metadata is not None and bool(self.metadata) class PositionalMetadataMixin(metaclass=abc.ABCMeta): @abc.abstractmethod def _positional_metadata_axis_len_(self): """Return length of axis that positional metadata applies to. Returns ------- int Positional metadata axis length. """ raise NotImplementedError @property def positional_metadata(self): """``pd.DataFrame`` containing metadata along an axis. Notes ----- This property can be set and deleted. When setting new positional metadata, a shallow copy is made and the ``pd.DataFrame`` index is set to ``pd.RangeIndex(start=0, stop=axis_len, step=1)``. Examples -------- .. note:: scikit-bio objects with positional metadata share a common interface for accessing and manipulating their positional metadata. The following examples use scikit-bio's ``DNA`` class to demonstrate positional metadata behavior. These examples apply to all other scikit-bio objects storing positional metadata. Create a DNA sequence with positional metadata: >>> from skbio import DNA >>> seq = DNA( ... 'ACGT', ... positional_metadata={'exons': [True, True, False, True], ... 'quality': [3, 3, 20, 11]}) >>> seq DNA ----------------------------- Positional metadata: 'exons': 'quality': Stats: length: 4 has gaps: False has degenerates: False has definites: True GC-content: 50.00% ----------------------------- 0 ACGT Retrieve positional metadata: >>> seq.positional_metadata exons quality 0 True 3 1 True 3 2 False 20 3 True 11 Update positional metadata: >>> seq.positional_metadata['gaps'] = seq.gaps() >>> seq.positional_metadata exons quality gaps 0 True 3 False 1 True 3 False 2 False 20 False 3 True 11 False Set positional metadata: >>> seq.positional_metadata = {'degenerates': seq.degenerates()} >>> seq.positional_metadata # doctest: +NORMALIZE_WHITESPACE degenerates 0 False 1 False 2 False 3 False Delete positional metadata: >>> seq.has_positional_metadata() True >>> del seq.positional_metadata >>> seq.positional_metadata Empty DataFrame Columns: [] Index: [0, 1, 2, 3] >>> seq.has_positional_metadata() False """ if self._positional_metadata is None: # Not using setter to avoid copy. self._positional_metadata = pd.DataFrame( index=self._get_positional_metadata_index() ) return self._positional_metadata @positional_metadata.setter def positional_metadata(self, positional_metadata): try: # Pass copy=True to copy underlying data buffer. positional_metadata = pd.DataFrame(positional_metadata, copy=True) # Different versions of pandas will raise different error types. We # don't really care what the type of the error is, just its message, so # a blanket Exception will do. except Exception as e: raise TypeError( "Invalid positional metadata. Must be consumable by " "`pd.DataFrame` constructor. Original pandas error message: " '"%s"' % e ) num_rows = len(positional_metadata.index) axis_len = self._positional_metadata_axis_len_() if num_rows != axis_len: raise ValueError( "Number of positional metadata values (%d) must match the " "positional metadata axis length (%d)." % (num_rows, axis_len) ) positional_metadata.index = self._get_positional_metadata_index() self._positional_metadata = positional_metadata @positional_metadata.deleter def positional_metadata(self): self._positional_metadata = None def _get_positional_metadata_index(self): """Create a memory-efficient integer index for positional metadata.""" return pd.RangeIndex( start=0, stop=self._positional_metadata_axis_len_(), step=1 ) @abc.abstractmethod def __init__(self, positional_metadata=None): raise NotImplementedError def _init_(self, positional_metadata=None): if positional_metadata is None: # Could use deleter but this is less overhead and needs to be fast. self._positional_metadata = None else: # Use setter for validation and copy. self.positional_metadata = positional_metadata @abc.abstractmethod def __eq__(self, other): raise NotImplementedError def _eq_(self, other): # We're not simply comparing self.positional_metadata to # other.positional_metadata in order to avoid creating "empty" # positional metadata representations on the objects if they don't have # positional metadata. if self.has_positional_metadata() and other.has_positional_metadata(): return self.positional_metadata.equals(other.positional_metadata) elif not (self.has_positional_metadata() or other.has_positional_metadata()): # Both don't have positional metadata. return ( self._positional_metadata_axis_len_() == other._positional_metadata_axis_len_() ) else: # One has positional metadata while the other does not. return False @abc.abstractmethod def __ne__(self, other): raise NotImplementedError def _ne_(self, other): return not (self == other) @abc.abstractmethod def __copy__(self): raise NotImplementedError def _copy_(self): if self.has_positional_metadata(): # deep=True makes a shallow copy of the underlying data buffer. return self.positional_metadata.copy(deep=True) else: return None @abc.abstractmethod def __deepcopy__(self, memo): raise NotImplementedError def _deepcopy_(self, memo): if self.has_positional_metadata(): # `copy.deepcopy` no longer recursively copies contents of the # DataFrame, so we must handle the deep copy ourselves. # Reference: https://github.com/pandas-dev/pandas/issues/17406 df = self.positional_metadata data_cp = copy.deepcopy(df.values.tolist(), memo) return pd.DataFrame( data_cp, index=df.index.copy(deep=True), columns=df.columns.copy(deep=True), copy=False, ) else: return None def has_positional_metadata(self): """Determine if the object has positional metadata. An object has positional metadata if its ``positional_metadata`` ``pd.DataFrame`` has at least one column. Returns ------- bool Indicates whether the object has positional metadata. Examples -------- .. note:: scikit-bio objects with positional metadata share a common interface for accessing and manipulating their positional metadata. The following examples use scikit-bio's ``DNA`` class to demonstrate positional metadata behavior. These examples apply to all other scikit-bio objects storing positional metadata. >>> import pandas as pd >>> from skbio import DNA >>> seq = DNA('ACGT') >>> seq.has_positional_metadata() False >>> seq = DNA('ACGT', positional_metadata=pd.DataFrame(index=range(4))) >>> seq.has_positional_metadata() False >>> seq = DNA('ACGT', positional_metadata={'quality': range(4)}) >>> seq.has_positional_metadata() True """ return ( self._positional_metadata is not None and len(self.positional_metadata.columns) > 0 ) class IntervalMetadataMixin(metaclass=abc.ABCMeta): @abc.abstractmethod def _interval_metadata_axis_len_(self): """Return length of axis that interval metadata applies to. Returns ------- int Interval metadata axis length. """ raise NotImplementedError @abc.abstractmethod def __init__(self, interval_metadata=None): raise NotImplementedError def _init_(self, interval_metadata=None): if interval_metadata is None: # Could use deleter but this is less overhead and needs to be fast. self._interval_metadata = None else: # Use setter for validation and copy. self.interval_metadata = interval_metadata @property def interval_metadata(self): """``IntervalMetadata`` object containing info about interval features. Notes ----- This property can be set and deleted. When setting new interval metadata, a shallow copy of the ``IntervalMetadata`` object is made. """ if self._interval_metadata is None: # Not using setter to avoid copy. self._interval_metadata = IntervalMetadata( self._interval_metadata_axis_len_() ) return self._interval_metadata @interval_metadata.setter def interval_metadata(self, interval_metadata): if isinstance(interval_metadata, IntervalMetadata): upper_bound = interval_metadata.upper_bound lower_bound = interval_metadata.lower_bound axis_len = self._interval_metadata_axis_len_() if lower_bound != 0: raise ValueError( "The lower bound for the interval features (%d) " "must be zero." % lower_bound ) if upper_bound is not None and upper_bound != axis_len: raise ValueError( "The upper bound for the interval features (%d) " "must match the interval metadata axis length (%d)" % (upper_bound, axis_len) ) # copy all the data to the mixin self._interval_metadata = IntervalMetadata( axis_len, copy_from=interval_metadata ) else: raise TypeError( "You must provide `IntervalMetadata` object, " "not type %s." % type(interval_metadata).__name__ ) @interval_metadata.deleter def interval_metadata(self): self._interval_metadata = None def has_interval_metadata(self): """Determine if the object has interval metadata. An object has interval metadata if its ``interval_metadata`` has at least one ```Interval`` objects. Returns ------- bool Indicates whether the object has interval metadata. """ return ( self._interval_metadata is not None and self.interval_metadata.num_interval_features > 0 ) @abc.abstractmethod def __eq__(self, other): raise NotImplementedError def _eq_(self, other): # We're not simply comparing self.interval_metadata to # other.interval_metadata in order to avoid creating "empty" # interval metadata representations on the objects if they don't have # interval metadata. if self.has_interval_metadata() and other.has_interval_metadata(): return self.interval_metadata == other.interval_metadata elif not (self.has_interval_metadata() or other.has_interval_metadata()): # Both don't have interval metadata. return ( self._interval_metadata_axis_len_() == other._interval_metadata_axis_len_() ) else: # One has interval metadata while the other does not. return False @abc.abstractmethod def __ne__(self, other): raise NotImplementedError def _ne_(self, other): return not (self == other) @abc.abstractmethod def __copy__(self): raise NotImplementedError def _copy_(self): if self.has_interval_metadata(): return copy.copy(self.interval_metadata) else: return None @abc.abstractmethod def __deepcopy__(self, memo): raise NotImplementedError def _deepcopy_(self, memo): if self.has_interval_metadata(): return copy.deepcopy(self.interval_metadata, memo) else: return None scikit-bio-0.6.2/skbio/metadata/_repr.py000066400000000000000000000150101464262511300201250ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import itertools import numbers import textwrap from abc import ABCMeta, abstractmethod from skbio._base import ElasticLines class _MetadataReprBuilder(metaclass=ABCMeta): """An ABC for building a repr from an object containing metadata. This abstract base class constructs a repr string for an object which contains metadata, positional metadata and/or interval metadata. Parameters ---------- obj : Type varies depending on subclass Object to build repr for. width : int Maximum width of the repr. indent : int Number of spaces to use for indented lines. """ def __init__(self, obj, width, indent): self._obj = obj self._width = width self._indent = " " * indent @abstractmethod def _process_header(self): """Build header for the repr. Used by `build` Template Method.""" raise NotImplementedError @abstractmethod def _process_data(self): """Build data lines for the repr. Used by `build` Template Method.""" raise NotImplementedError def build(self): """Template method for building the repr.""" self._lines = ElasticLines() self._process_header() self._process_metadata() self._process_positional_metadata() self._process_interval_metadata() self._process_stats() self._process_data() return self._lines.to_str() def _process_metadata(self): if self._obj.has_metadata(): self._lines.add_line("Metadata:") # Python 3 doesn't allow sorting of mixed types so we can't just # use sorted() on the metadata keys. Sort first by type then sort # by value within each type. for key in self._sorted_keys_grouped_by_type(self._obj.metadata): value = self._obj.metadata[key] self._lines.add_lines(self._format_metadata_key_value(key, value)) def _sorted_keys_grouped_by_type(self, dict_): """Group keys within a dict by their type and sort within type.""" type_sorted = sorted(dict_, key=self._type_sort_key) type_and_value_sorted = [] for _, group in itertools.groupby(type_sorted, self._type_sort_key): type_and_value_sorted.extend(sorted(group)) return type_and_value_sorted def _type_sort_key(self, key): return repr(type(key)) def _format_metadata_key_value(self, key, value): """Format metadata key:value, wrapping across lines if necessary.""" key_fmt = self._format_key(key) supported_type = True if isinstance(value, str): # extra indent of 1 so that wrapped text lines up: # # 'foo': 'abc def ghi # jkl mno' value_repr = repr(value) extra_indent = 1 elif isinstance(value, bytes): # extra indent of 2 so that wrapped text lines up: # # 'foo': b'abc def ghi # jkl mno' value_repr = repr(value) extra_indent = 2 # handles any number, this includes bool elif value is None or isinstance(value, numbers.Number): value_repr = repr(value) extra_indent = 0 else: supported_type = False if not supported_type or len(value_repr) > 140: value_repr = str(type(value)) # extra indent of 1 so that wrapped text lines up past the bracket: # # 'foo': extra_indent = 1 return self._wrap_text_with_indent(value_repr, key_fmt, extra_indent) def _process_positional_metadata(self): if self._obj.has_positional_metadata(): self._lines.add_line("Positional metadata:") for key in self._obj.positional_metadata.columns.values.tolist(): dtype = self._obj.positional_metadata[key].dtype self._lines.add_lines( self._format_positional_metadata_column(key, dtype) ) def _format_positional_metadata_column(self, key, dtype): key_fmt = self._format_key(key) dtype_fmt = "" % str(dtype) return self._wrap_text_with_indent(dtype_fmt, key_fmt, 1) def _process_interval_metadata(self): # TODO: this hasattr check can be removed once all the relevant # classes have interval_metadata added to it. if ( hasattr(self._obj, "has_interval_metadata") and self._obj.has_interval_metadata() ): self._lines.add_line("Interval metadata:") n = self._obj.interval_metadata.num_interval_features line = self._indent + "%d interval feature" % n if n > 1: line += "s" self._lines.add_line(line) def _format_key(self, key): """Format metadata key. Includes initial indent and trailing colon and space: 'foo': """ key_fmt = self._indent + repr(key) supported_types = (str, bytes, numbers.Number, type(None)) if len(key_fmt) > (self._width / 2) or not isinstance(key, supported_types): key_fmt = self._indent + str(type(key)) return "%s: " % key_fmt def _wrap_text_with_indent(self, text, initial_text, extra_indent): """Wrap text across lines with an initial indentation. For example: 'foo': 'abc def ghi jkl mno pqr' 'foo': is `initial_text`. `extra_indent` is 1. Wrapped lines are indented such that they line up with the start of the previous line of wrapped text. """ return textwrap.wrap( text, width=self._width, expand_tabs=False, initial_indent=initial_text, subsequent_indent=" " * (len(initial_text) + extra_indent), ) def _process_stats(self): self._lines.add_line("Stats:") for label, value in self._obj._repr_stats(): self._lines.add_line("%s%s: %s" % (self._indent, label, value)) self._lines.add_separator() scikit-bio-0.6.2/skbio/metadata/_testing.py000066400000000000000000001461741464262511300206520ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import copy import pandas as pd import numpy as np import numpy.testing as npt from skbio.util._testing import assert_data_frame_almost_equal from skbio.metadata import IntervalMetadata class MetadataMixinTests: def test_constructor_invalid_type(self): for md in (0, "a", ("f", "o", "o"), np.array([]), pd.DataFrame()): with self.assertRaisesRegex(TypeError, "metadata must be a dict"): self._metadata_constructor_(metadata=md) def test_constructor_no_metadata(self): for md in None, {}: obj = self._metadata_constructor_(metadata=md) self.assertFalse(obj.has_metadata()) self.assertEqual(obj.metadata, {}) def test_constructor_with_metadata(self): obj = self._metadata_constructor_(metadata={"foo": "bar"}) self.assertEqual(obj.metadata, {"foo": "bar"}) obj = self._metadata_constructor_(metadata={"": "", 123: {"a": "b", "c": "d"}}) self.assertEqual(obj.metadata, {"": "", 123: {"a": "b", "c": "d"}}) def test_constructor_handles_missing_metadata_efficiently(self): self.assertIsNone(self._metadata_constructor_()._metadata) self.assertIsNone(self._metadata_constructor_(metadata=None)._metadata) def test_constructor_makes_shallow_copy_of_metadata(self): md = {"foo": "bar", 42: []} obj = self._metadata_constructor_(metadata=md) self.assertEqual(obj.metadata, md) self.assertIsNot(obj.metadata, md) md["foo"] = "baz" self.assertEqual(obj.metadata, {"foo": "bar", 42: []}) md[42].append(True) self.assertEqual(obj.metadata, {"foo": "bar", 42: [True]}) def test_eq(self): self.assertReallyEqual( self._metadata_constructor_(metadata={"foo": 42}), self._metadata_constructor_(metadata={"foo": 42}), ) self.assertReallyEqual( self._metadata_constructor_(metadata={"foo": 42, 123: {}}), self._metadata_constructor_(metadata={"foo": 42, 123: {}}), ) def test_eq_missing_metadata(self): self.assertReallyEqual( self._metadata_constructor_(), self._metadata_constructor_() ) self.assertReallyEqual( self._metadata_constructor_(), self._metadata_constructor_(metadata={}) ) self.assertReallyEqual( self._metadata_constructor_(metadata={}), self._metadata_constructor_(metadata={}), ) def test_eq_handles_missing_metadata_efficiently(self): obj1 = self._metadata_constructor_() obj2 = self._metadata_constructor_() self.assertReallyEqual(obj1, obj2) self.assertIsNone(obj1._metadata) self.assertIsNone(obj2._metadata) def test_ne(self): # Both have metadata. obj1 = self._metadata_constructor_(metadata={"id": "foo"}) obj2 = self._metadata_constructor_(metadata={"id": "bar"}) self.assertReallyNotEqual(obj1, obj2) # One has metadata. obj1 = self._metadata_constructor_(metadata={"id": "foo"}) obj2 = self._metadata_constructor_() self.assertReallyNotEqual(obj1, obj2) def test_copy_metadata_none(self): obj = self._metadata_constructor_() obj_copy = copy.copy(obj) self.assertEqual(obj, obj_copy) self.assertIsNot(obj, obj_copy) self.assertIsNone(obj._metadata) self.assertIsNone(obj_copy._metadata) def test_copy_metadata_empty(self): obj = self._metadata_constructor_(metadata={}) obj_copy = copy.copy(obj) self.assertEqual(obj, obj_copy) self.assertIsNot(obj, obj_copy) self.assertEqual(obj._metadata, {}) self.assertIsNone(obj_copy._metadata) def test_copy_with_metadata(self): obj = self._metadata_constructor_(metadata={"foo": [1]}) obj_copy = copy.copy(obj) self.assertEqual(obj, obj_copy) self.assertIsNot(obj, obj_copy) self.assertIsNot(obj._metadata, obj_copy._metadata) self.assertIs(obj._metadata["foo"], obj_copy._metadata["foo"]) obj_copy.metadata["foo"].append(2) obj_copy.metadata["foo2"] = 42 self.assertEqual(obj_copy.metadata, {"foo": [1, 2], "foo2": 42}) self.assertEqual(obj.metadata, {"foo": [1, 2]}) def test_deepcopy_metadata_none(self): obj = self._metadata_constructor_() obj_copy = copy.deepcopy(obj) self.assertEqual(obj, obj_copy) self.assertIsNot(obj, obj_copy) self.assertIsNone(obj._metadata) self.assertIsNone(obj_copy._metadata) def test_deepcopy_metadata_empty(self): obj = self._metadata_constructor_(metadata={}) obj_copy = copy.deepcopy(obj) self.assertEqual(obj, obj_copy) self.assertIsNot(obj, obj_copy) self.assertEqual(obj._metadata, {}) self.assertIsNone(obj_copy._metadata) def test_deepcopy_with_metadata(self): obj = self._metadata_constructor_(metadata={"foo": [1]}) obj_copy = copy.deepcopy(obj) self.assertEqual(obj, obj_copy) self.assertIsNot(obj, obj_copy) self.assertIsNot(obj._metadata, obj_copy._metadata) self.assertIsNot(obj._metadata["foo"], obj_copy._metadata["foo"]) obj_copy.metadata["foo"].append(2) obj_copy.metadata["foo2"] = 42 self.assertEqual(obj_copy.metadata, {"foo": [1, 2], "foo2": 42}) self.assertEqual(obj.metadata, {"foo": [1]}) def test_deepcopy_memo_is_respected(self): # Basic test to ensure deepcopy's memo is passed through to recursive # deepcopy calls. obj = self._metadata_constructor_(metadata={"foo": "bar"}) memo = {} copy.deepcopy(obj, memo) self.assertGreater(len(memo), 2) def test_metadata_getter(self): obj = self._metadata_constructor_(metadata={42: "foo", ("hello", "world"): 43}) self.assertIsInstance(obj.metadata, dict) self.assertEqual(obj.metadata, {42: "foo", ("hello", "world"): 43}) obj.metadata[42] = "bar" self.assertEqual(obj.metadata, {42: "bar", ("hello", "world"): 43}) def test_metadata_getter_no_metadata(self): obj = self._metadata_constructor_() self.assertIsNone(obj._metadata) self.assertIsInstance(obj.metadata, dict) self.assertEqual(obj.metadata, {}) self.assertIsNotNone(obj._metadata) def test_metadata_setter(self): obj = self._metadata_constructor_() self.assertFalse(obj.has_metadata()) obj.metadata = {"hello": "world"} self.assertTrue(obj.has_metadata()) self.assertEqual(obj.metadata, {"hello": "world"}) obj.metadata = {} self.assertFalse(obj.has_metadata()) self.assertEqual(obj.metadata, {}) def test_metadata_setter_makes_shallow_copy(self): obj = self._metadata_constructor_() md = {"foo": "bar", 42: []} obj.metadata = md self.assertEqual(obj.metadata, md) self.assertIsNot(obj.metadata, md) md["foo"] = "baz" self.assertEqual(obj.metadata, {"foo": "bar", 42: []}) md[42].append(True) self.assertEqual(obj.metadata, {"foo": "bar", 42: [True]}) def test_metadata_setter_invalid_type(self): obj = self._metadata_constructor_(metadata={123: 456}) for md in (None, 0, "a", ("f", "o", "o"), np.array([]), pd.DataFrame()): with self.assertRaisesRegex(TypeError, "metadata must be a dict"): obj.metadata = md self.assertEqual(obj.metadata, {123: 456}) def test_metadata_deleter(self): obj = self._metadata_constructor_(metadata={"foo": "bar"}) self.assertEqual(obj.metadata, {"foo": "bar"}) del obj.metadata self.assertIsNone(obj._metadata) self.assertFalse(obj.has_metadata()) # Delete again. del obj.metadata self.assertIsNone(obj._metadata) self.assertFalse(obj.has_metadata()) obj = self._metadata_constructor_() self.assertIsNone(obj._metadata) self.assertFalse(obj.has_metadata()) del obj.metadata self.assertIsNone(obj._metadata) self.assertFalse(obj.has_metadata()) def test_has_metadata(self): obj = self._metadata_constructor_() self.assertFalse(obj.has_metadata()) # Handles metadata efficiently. self.assertIsNone(obj._metadata) self.assertFalse(self._metadata_constructor_(metadata={}).has_metadata()) self.assertTrue(self._metadata_constructor_(metadata={"": ""}).has_metadata()) self.assertTrue( self._metadata_constructor_(metadata={"foo": 42}).has_metadata() ) class PositionalMetadataMixinTests: def test_constructor_invalid_positional_metadata_type(self): with self.assertRaisesRegex( TypeError, "Invalid positional metadata. Must be " "consumable by `pd.DataFrame` constructor." " Original pandas error message: ", ): self._positional_metadata_constructor_(0, positional_metadata=2) def test_constructor_positional_metadata_len_mismatch(self): # Zero elements. with self.assertRaisesRegex(ValueError, r"\(0\).*\(4\)"): self._positional_metadata_constructor_(4, positional_metadata=[]) # Not enough elements. with self.assertRaisesRegex(ValueError, r"\(3\).*\(4\)"): self._positional_metadata_constructor_(4, positional_metadata=[2, 3, 4]) # Too many elements. with self.assertRaisesRegex(ValueError, r"\(5\).*\(4\)"): self._positional_metadata_constructor_( 4, positional_metadata=[2, 3, 4, 5, 6] ) # Series not enough rows. with self.assertRaisesRegex(ValueError, r"\(3\).*\(4\)"): self._positional_metadata_constructor_( 4, positional_metadata=pd.Series(range(3)) ) # Series too many rows. with self.assertRaisesRegex(ValueError, r"\(5\).*\(4\)"): self._positional_metadata_constructor_( 4, positional_metadata=pd.Series(range(5)) ) # DataFrame not enough rows. with self.assertRaisesRegex(ValueError, r"\(3\).*\(4\)"): self._positional_metadata_constructor_( 4, positional_metadata=pd.DataFrame({"quality": range(3)}) ) # DataFrame too many rows. with self.assertRaisesRegex(ValueError, r"\(5\).*\(4\)"): self._positional_metadata_constructor_( 4, positional_metadata=pd.DataFrame({"quality": range(5)}) ) # Empty DataFrame wrong size. with self.assertRaisesRegex(ValueError, r"\(2\).*\(3\)"): self._positional_metadata_constructor_( 3, positional_metadata=pd.DataFrame(index=range(2)) ) def test_constructor_no_positional_metadata(self): # Length zero with missing/empty positional metadata. for empty in None, {}, pd.DataFrame(): obj = self._positional_metadata_constructor_(0, positional_metadata=empty) self.assertFalse(obj.has_positional_metadata()) self.assertIsInstance(obj.positional_metadata.index, pd.RangeIndex) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame(index=range(0)) ) # Nonzero length with missing positional metadata. obj = self._positional_metadata_constructor_(3, positional_metadata=None) self.assertFalse(obj.has_positional_metadata()) self.assertIsInstance(obj.positional_metadata.index, pd.RangeIndex) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame(index=range(3)) ) def test_constructor_with_positional_metadata_len_zero(self): for data in [], (), np.array([]): obj = self._positional_metadata_constructor_( 0, positional_metadata={"foo": data} ) self.assertTrue(obj.has_positional_metadata()) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": data}, index=range(0)) ) def test_constructor_with_positional_metadata_len_one(self): for data in [2], (2,), np.array([2]): obj = self._positional_metadata_constructor_( 1, positional_metadata={"foo": data} ) self.assertTrue(obj.has_positional_metadata()) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": data}, index=range(1)) ) def test_constructor_with_positional_metadata_len_greater_than_one(self): for data in ( [0, 42, 42, 1, 0, 8, 100, 0, 0], (0, 42, 42, 1, 0, 8, 100, 0, 0), np.array([0, 42, 42, 1, 0, 8, 100, 0, 0]), ): obj = self._positional_metadata_constructor_( 9, positional_metadata={"foo": data} ) self.assertTrue(obj.has_positional_metadata()) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": data}, index=range(9)) ) def test_constructor_with_positional_metadata_multiple_columns(self): obj = self._positional_metadata_constructor_( 5, positional_metadata={"foo": np.arange(5), "bar": np.arange(5)[::-1]} ) self.assertTrue(obj.has_positional_metadata()) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame( {"foo": np.arange(5), "bar": np.arange(5)[::-1]}, index=range(5) ), ) def test_constructor_with_positional_metadata_custom_index(self): df = pd.DataFrame( {"foo": np.arange(5), "bar": np.arange(5)[::-1]}, index=["a", "b", "c", "d", "e"], ) obj = self._positional_metadata_constructor_(5, positional_metadata=df) self.assertTrue(obj.has_positional_metadata()) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame( {"foo": np.arange(5), "bar": np.arange(5)[::-1]}, index=range(5) ), ) def test_constructor_with_positional_metadata_int64_index(self): # Test that memory-inefficient index is converted to memory-efficient # index. df = pd.DataFrame( {"foo": np.arange(5), "bar": np.arange(5)[::-1]}, index=np.arange(5) ) self.assertEqual(df.index.dtype, np.int_) obj = self._positional_metadata_constructor_(5, positional_metadata=df) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame( {"foo": np.arange(5), "bar": np.arange(5)[::-1]}, index=range(5) ), ) self.assertIsInstance(obj.positional_metadata.index, pd.RangeIndex) def test_constructor_handles_missing_positional_metadata_efficiently(self): obj = self._positional_metadata_constructor_(4) self.assertIsNone(obj._positional_metadata) obj = self._positional_metadata_constructor_(4, positional_metadata=None) self.assertIsNone(obj._positional_metadata) def test_constructor_makes_shallow_copy_of_positional_metadata(self): df = pd.DataFrame( {"foo": [22, 22, 0], "bar": [[], [], []]}, index=["a", "b", "c"] ) obj = self._positional_metadata_constructor_(3, positional_metadata=df) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": [22, 22, 0], "bar": [[], [], []]}, index=range(3)), ) self.assertIsNot(obj.positional_metadata, df) # Original df is not mutated. orig_df = pd.DataFrame( {"foo": [22, 22, 0], "bar": [[], [], []]}, index=["a", "b", "c"] ) assert_data_frame_almost_equal(df, orig_df) # Change values of column (using same dtype). df["foo"] = [42, 42, 42] assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": [22, 22, 0], "bar": [[], [], []]}, index=range(3)), ) # Change single value of underlying data. df.values[0][0] = 10 assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": [22, 22, 0], "bar": [[], [], []]}, index=range(3)), ) # Mutate list (not a deep copy). df["bar"].iloc[0].append(42) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": [22, 22, 0], "bar": [[42], [], []]}, index=range(3)), ) def test_eq_basic(self): obj1 = self._positional_metadata_constructor_( 3, positional_metadata={"foo": [1, 2, 3]} ) obj2 = self._positional_metadata_constructor_( 3, positional_metadata={"foo": [1, 2, 3]} ) self.assertReallyEqual(obj1, obj2) def test_eq_from_different_source(self): obj1 = self._positional_metadata_constructor_( 3, positional_metadata={"foo": np.array([1, 2, 3], dtype=np.int_)} ) obj2 = self._positional_metadata_constructor_( 3, positional_metadata=pd.DataFrame( {"foo": [1, 2, 3]}, index=["foo", "bar", "baz"], dtype=np.int_ ), ) self.assertReallyEqual(obj1, obj2) def test_eq_missing_positional_metadata(self): for empty in None, {}, pd.DataFrame(), pd.DataFrame(index=[]): obj = self._positional_metadata_constructor_(0, positional_metadata=empty) self.assertReallyEqual(obj, self._positional_metadata_constructor_(0)) self.assertReallyEqual( obj, self._positional_metadata_constructor_(0, positional_metadata=empty), ) for empty in None, pd.DataFrame(index=["a", "b"]): obj = self._positional_metadata_constructor_(2, positional_metadata=empty) self.assertReallyEqual(obj, self._positional_metadata_constructor_(2)) self.assertReallyEqual( obj, self._positional_metadata_constructor_(2, positional_metadata=empty), ) def test_eq_handles_missing_positional_metadata_efficiently(self): obj1 = self._positional_metadata_constructor_(1) obj2 = self._positional_metadata_constructor_(1) self.assertReallyEqual(obj1, obj2) self.assertIsNone(obj1._positional_metadata) self.assertIsNone(obj2._positional_metadata) def test_ne_len_zero(self): # Both have positional metadata. obj1 = self._positional_metadata_constructor_( 0, positional_metadata={"foo": []} ) obj2 = self._positional_metadata_constructor_( 0, positional_metadata={"foo": [], "bar": []} ) self.assertReallyNotEqual(obj1, obj2) # One has positional metadata. obj1 = self._positional_metadata_constructor_( 0, positional_metadata={"foo": []} ) obj2 = self._positional_metadata_constructor_(0) self.assertReallyNotEqual(obj1, obj2) def test_ne_len_greater_than_zero(self): # Both have positional metadata. obj1 = self._positional_metadata_constructor_( 3, positional_metadata={"foo": [1, 2, 3]} ) obj2 = self._positional_metadata_constructor_( 3, positional_metadata={"foo": [1, 2, 2]} ) self.assertReallyNotEqual(obj1, obj2) # One has positional metadata. obj1 = self._positional_metadata_constructor_( 3, positional_metadata={"foo": [1, 2, 3]} ) obj2 = self._positional_metadata_constructor_(3) self.assertReallyNotEqual(obj1, obj2) def test_ne_len_mismatch(self): obj1 = self._positional_metadata_constructor_( 3, positional_metadata=pd.DataFrame(index=range(3)) ) obj2 = self._positional_metadata_constructor_( 2, positional_metadata=pd.DataFrame(index=range(2)) ) self.assertReallyNotEqual(obj1, obj2) def test_copy_positional_metadata_none(self): obj = self._positional_metadata_constructor_(3) obj_copy = copy.copy(obj) self.assertEqual(obj, obj_copy) self.assertIsNot(obj, obj_copy) self.assertIsNone(obj._positional_metadata) self.assertIsNone(obj_copy._positional_metadata) def test_copy_positional_metadata_empty(self): obj = self._positional_metadata_constructor_( 3, positional_metadata=pd.DataFrame(index=range(3)) ) obj_copy = copy.copy(obj) self.assertEqual(obj, obj_copy) self.assertIsNot(obj, obj_copy) assert_data_frame_almost_equal( obj._positional_metadata, pd.DataFrame(index=range(3)) ) self.assertIsNone(obj_copy._positional_metadata) def test_copy_with_positional_metadata(self): obj = self._positional_metadata_constructor_( 4, positional_metadata={"bar": [[], [], [], []], "baz": [42, 42, 42, 42]} ) obj_copy = copy.copy(obj) self.assertEqual(obj, obj_copy) self.assertIsNot(obj, obj_copy) self.assertIsNot(obj._positional_metadata, obj_copy._positional_metadata) self.assertIsNot( obj._positional_metadata.values, obj_copy._positional_metadata.values ) self.assertIs( obj._positional_metadata.loc[0, "bar"], obj_copy._positional_metadata.loc[0, "bar"], ) obj_copy.positional_metadata.loc[0, "bar"].append(1) obj_copy.positional_metadata.loc[0, "baz"] = 43 assert_data_frame_almost_equal( obj_copy.positional_metadata, pd.DataFrame({"bar": [[1], [], [], []], "baz": [43, 42, 42, 42]}), ) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"bar": [[1], [], [], []], "baz": [42, 42, 42, 42]}), ) def test_copy_preserves_range_index(self): for pm in None, {"foo": ["a", "b", "c"]}: obj = self._positional_metadata_constructor_(3, positional_metadata=pm) obj_copy = copy.copy(obj) self.assertIsInstance(obj.positional_metadata.index, pd.RangeIndex) self.assertIsInstance(obj_copy.positional_metadata.index, pd.RangeIndex) def test_deepcopy_positional_metadata_none(self): obj = self._positional_metadata_constructor_(3) obj_copy = copy.deepcopy(obj) self.assertEqual(obj, obj_copy) self.assertIsNot(obj, obj_copy) self.assertIsNone(obj._positional_metadata) self.assertIsNone(obj_copy._positional_metadata) def test_deepcopy_positional_metadata_empty(self): obj = self._positional_metadata_constructor_( 3, positional_metadata=pd.DataFrame(index=range(3)) ) obj_copy = copy.deepcopy(obj) self.assertEqual(obj, obj_copy) self.assertIsNot(obj, obj_copy) assert_data_frame_almost_equal( obj._positional_metadata, pd.DataFrame(index=range(3)) ) self.assertIsNone(obj_copy._positional_metadata) def test_deepcopy_with_positional_metadata(self): obj = self._positional_metadata_constructor_( 4, positional_metadata={"bar": [[], [], [], []], "baz": [42, 42, 42, 42]} ) obj_copy = copy.deepcopy(obj) self.assertEqual(obj, obj_copy) self.assertIsNot(obj, obj_copy) self.assertIsNot(obj._positional_metadata, obj_copy._positional_metadata) self.assertIsNot( obj._positional_metadata.values, obj_copy._positional_metadata.values ) self.assertIsNot( obj._positional_metadata.loc[0, "bar"], obj_copy._positional_metadata.loc[0, "bar"], ) obj_copy.positional_metadata.loc[0, "bar"].append(1) obj_copy.positional_metadata.loc[0, "baz"] = 43 assert_data_frame_almost_equal( obj_copy.positional_metadata, pd.DataFrame({"bar": [[1], [], [], []], "baz": [43, 42, 42, 42]}), ) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"bar": [[], [], [], []], "baz": [42, 42, 42, 42]}), ) def test_deepcopy_preserves_range_index(self): for pm in None, {"foo": ["a", "b", "c"]}: obj = self._positional_metadata_constructor_(3, positional_metadata=pm) obj_copy = copy.deepcopy(obj) self.assertIsInstance(obj.positional_metadata.index, pd.RangeIndex) self.assertIsInstance(obj_copy.positional_metadata.index, pd.RangeIndex) def test_deepcopy_memo_is_respected(self): # Basic test to ensure deepcopy's memo is passed through to recursive # deepcopy calls. obj = self._positional_metadata_constructor_( 3, positional_metadata={"foo": [1, 2, 3]} ) memo = {} copy.deepcopy(obj, memo) self.assertGreater(len(memo), 2) def test_positional_metadata_getter(self): obj = self._positional_metadata_constructor_( 3, positional_metadata={"foo": [22, 22, 0]} ) self.assertIsInstance(obj.positional_metadata, pd.DataFrame) self.assertIsInstance(obj.positional_metadata.index, pd.RangeIndex) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": [22, 22, 0]}) ) # Update existing column. obj.positional_metadata["foo"] = [42, 42, 43] assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": [42, 42, 43]}) ) # Add new column. obj.positional_metadata["foo2"] = [True, False, True] assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": [42, 42, 43], "foo2": [True, False, True]}), ) def test_positional_metadata_getter_no_positional_metadata(self): obj = self._positional_metadata_constructor_(4) self.assertIsNone(obj._positional_metadata) self.assertIsInstance(obj.positional_metadata, pd.DataFrame) self.assertIsInstance(obj.positional_metadata.index, pd.RangeIndex) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame(index=range(4)) ) self.assertIsNotNone(obj._positional_metadata) def test_positional_metadata_getter_set_column_series(self): length = 8 obj = self._positional_metadata_constructor_( length, positional_metadata={"foo": range(length)} ) obj.positional_metadata["bar"] = pd.Series(range(length - 3)) # pandas.Series will be padded with NaN if too short. npt.assert_equal( obj.positional_metadata["bar"], np.array(list(range(length - 3)) + [np.nan] * 3), ) obj.positional_metadata["baz"] = pd.Series(range(length + 3)) # pandas.Series will be truncated if too long. npt.assert_equal(obj.positional_metadata["baz"], np.array(range(length))) def test_positional_metadata_getter_set_column_array(self): length = 8 obj = self._positional_metadata_constructor_( length, positional_metadata={"foo": range(length)} ) # array-like objects will fail if wrong size. for array_like in ( np.array(range(length - 1)), range(length - 1), np.array(range(length + 1)), range(length + 1), ): with self.assertRaisesRegex( ValueError, r"Length of values \(" + str(len(array_like)) + r"\) does not match length" r" of index \(8\)", ): obj.positional_metadata["bar"] = array_like def test_positional_metadata_setter_pandas_consumable(self): obj = self._positional_metadata_constructor_(3) self.assertFalse(obj.has_positional_metadata()) obj.positional_metadata = {"foo": [3, 2, 1]} self.assertTrue(obj.has_positional_metadata()) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": [3, 2, 1]}) ) obj.positional_metadata = pd.DataFrame(index=np.arange(3)) self.assertFalse(obj.has_positional_metadata()) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame(index=range(3)) ) def test_positional_metadata_setter_data_frame(self): obj = self._positional_metadata_constructor_(3) self.assertFalse(obj.has_positional_metadata()) obj.positional_metadata = pd.DataFrame( {"foo": [3, 2, 1]}, index=["a", "b", "c"] ) self.assertTrue(obj.has_positional_metadata()) self.assertIsInstance(obj.positional_metadata.index, pd.RangeIndex) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": [3, 2, 1]}) ) obj.positional_metadata = pd.DataFrame(index=np.arange(3)) self.assertFalse(obj.has_positional_metadata()) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame(index=range(3)) ) def test_positional_metadata_setter_none(self): obj = self._positional_metadata_constructor_(0, positional_metadata={"foo": []}) self.assertTrue(obj.has_positional_metadata()) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": []}) ) # `None` behavior differs from constructor. obj.positional_metadata = None self.assertFalse(obj.has_positional_metadata()) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame(index=range(0)) ) def test_positional_metadata_setter_int64_index(self): # Test that memory-inefficient index is converted to memory-efficient # index. obj = self._positional_metadata_constructor_(5) df = pd.DataFrame( {"foo": np.arange(5), "bar": np.arange(5)[::-1]}, index=np.arange(5) ) self.assertEqual(df.index.dtype, np.int_) obj.positional_metadata = df assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame( {"foo": np.arange(5), "bar": np.arange(5)[::-1]}, index=range(5) ), ) self.assertIsInstance(obj.positional_metadata.index, pd.RangeIndex) def test_positional_metadata_setter_makes_shallow_copy(self): obj = self._positional_metadata_constructor_(3) df = pd.DataFrame( {"foo": [22, 22, 0], "bar": [[], [], []]}, index=["a", "b", "c"] ) obj.positional_metadata = df assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": [22, 22, 0], "bar": [[], [], []]}, index=range(3)), ) self.assertIsNot(obj.positional_metadata, df) # Original df is not mutated. orig_df = pd.DataFrame( {"foo": [22, 22, 0], "bar": [[], [], []]}, index=["a", "b", "c"] ) assert_data_frame_almost_equal(df, orig_df) # Change values of column (using same dtype). df["foo"] = [42, 42, 42] assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": [22, 22, 0], "bar": [[], [], []]}, index=range(3)), ) # Change single value of underlying data. df.values[0][0] = 10 assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": [22, 22, 0], "bar": [[], [], []]}, index=range(3)), ) # Mutate list (not a deep copy). df["bar"].iloc[0].append(42) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": [22, 22, 0], "bar": [[42], [], []]}, index=range(3)), ) def test_positional_metadata_setter_invalid_type(self): obj = self._positional_metadata_constructor_( 3, positional_metadata={"foo": [1, 2, 42]} ) with self.assertRaisesRegex( TypeError, "Invalid positional metadata. Must be " "consumable by `pd.DataFrame` constructor." " Original pandas error message: ", ): obj.positional_metadata = 2 assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": [1, 2, 42]}) ) def test_positional_metadata_setter_len_mismatch(self): obj = self._positional_metadata_constructor_( 3, positional_metadata={"foo": [1, 2, 42]} ) # `None` behavior differs from constructor. with self.assertRaisesRegex(ValueError, r"\(0\).*\(3\)"): obj.positional_metadata = None assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": [1, 2, 42]}) ) with self.assertRaisesRegex(ValueError, r"\(4\).*\(3\)"): obj.positional_metadata = [1, 2, 3, 4] assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": [1, 2, 42]}) ) def test_positional_metadata_deleter(self): obj = self._positional_metadata_constructor_( 3, positional_metadata={"foo": [1, 2, 3]} ) self.assertIsInstance(obj.positional_metadata.index, pd.RangeIndex) assert_data_frame_almost_equal( obj.positional_metadata, pd.DataFrame({"foo": [1, 2, 3]}) ) del obj.positional_metadata self.assertIsNone(obj._positional_metadata) self.assertFalse(obj.has_positional_metadata()) # Delete again. del obj.positional_metadata self.assertIsNone(obj._positional_metadata) self.assertFalse(obj.has_positional_metadata()) obj = self._positional_metadata_constructor_(3) self.assertIsNone(obj._positional_metadata) self.assertFalse(obj.has_positional_metadata()) del obj.positional_metadata self.assertIsNone(obj._positional_metadata) self.assertFalse(obj.has_positional_metadata()) def test_has_positional_metadata(self): obj = self._positional_metadata_constructor_(4) self.assertFalse(obj.has_positional_metadata()) self.assertIsNone(obj._positional_metadata) obj = self._positional_metadata_constructor_(0, positional_metadata={}) self.assertFalse(obj.has_positional_metadata()) obj = self._positional_metadata_constructor_( 4, positional_metadata=pd.DataFrame(index=np.arange(4)) ) self.assertFalse(obj.has_positional_metadata()) obj = self._positional_metadata_constructor_( 4, positional_metadata=pd.DataFrame(index=["a", "b", "c", "d"]) ) self.assertFalse(obj.has_positional_metadata()) obj = self._positional_metadata_constructor_(0, positional_metadata={"foo": []}) self.assertTrue(obj.has_positional_metadata()) obj = self._positional_metadata_constructor_( 4, positional_metadata={"foo": [1, 2, 3, 4]} ) self.assertTrue(obj.has_positional_metadata()) obj = self._positional_metadata_constructor_( 2, positional_metadata={"foo": [1, 2], "bar": ["abc", "def"]} ) self.assertTrue(obj.has_positional_metadata()) class IntervalMetadataMixinTests: def _set_up(self): self.upper_bound = 9 self.im = IntervalMetadata(self.upper_bound) self.intvls = [ {"bounds": [(0, 1), (2, 9)], "metadata": {"gene": "sagA"}}, {"bounds": [(0, 1)], "metadata": {"gene": ["a"], "product": "foo"}}, ] def test_constructor_invalid(self): with self.assertRaisesRegex( TypeError, "You must provide `IntervalMetadata` " "object." ): self._interval_metadata_constructor_(0, "") def test_constructor_empty_interval_metadata_upper_bound_is_none(self): im = IntervalMetadata(None) for i in [0, 1, 3, 100]: x = self._interval_metadata_constructor_(i, im) # the upper bound is reset to seq/axis length self.assertEqual(x.interval_metadata.upper_bound, i) self.assertEqual(x.interval_metadata._intervals, im._intervals) # original interval metadata upper bound is not changed self.assertIsNone(im.upper_bound) def test_constructor_interval_metadata_upper_bound_is_none(self): im = IntervalMetadata(None) # populate im im.add(**self.intvls[0]) im.add(**self.intvls[1]) for i in [1000, 100]: x = self._interval_metadata_constructor_(i, im) # the upper bound is reset to seq/axis length self.assertEqual(x.interval_metadata.upper_bound, i) self.assertEqual(x.interval_metadata._intervals, im._intervals) # original interval metadata upper bound is not changed self.assertIsNone(im.upper_bound) def test_constructor_interval_bounds_larger_than_len(self): im = IntervalMetadata(None) # populate im im.add(**self.intvls[0]) im.add(**self.intvls[1]) for i in [0, 1, 3]: # error to reset upper bound to a smaller value than seq/axis len with self.assertRaisesRegex( ValueError, r"larger than upper bound \(%r\)" % i ): self._interval_metadata_constructor_(i, im) # original interval metadata upper bound is not changed self.assertIsNone(im.upper_bound) def test_constructor_interval_metadata_len_mismatch(self): for i in [0, 1, 3, 100]: with self.assertRaisesRegex( ValueError, r"\(%d\).*\(%d\)" % (self.upper_bound, i) ): self._interval_metadata_constructor_(i, self.im) def test_constructor_interval_metadata_len(self): for n in 1, 2, 3: im = IntervalMetadata(n) im.add([(0, 1)], metadata={"a": "b"}) obj = self._interval_metadata_constructor_(n, im) self.assertTrue(obj.has_interval_metadata()) self.assertIsInstance(obj.interval_metadata, IntervalMetadata) def test_constructor_interval_metadata_len_0(self): im = IntervalMetadata(0) obj = self._interval_metadata_constructor_(0, im) self.assertFalse(obj.has_interval_metadata()) def test_constructor_no_interval_metadata(self): for i, im in [(0, None), (self.upper_bound, self.im)]: obj = self._interval_metadata_constructor_(i, im) self.assertFalse(obj.has_interval_metadata()) self.assertIsInstance(obj.interval_metadata, IntervalMetadata) def test_constructor_handles_missing_interval_metadata_efficiently(self): obj = self._interval_metadata_constructor_(self.upper_bound) self.assertIsNone(obj._interval_metadata) obj = self._interval_metadata_constructor_( self.upper_bound, interval_metadata=None ) self.assertIsNone(obj._interval_metadata) def test_constructor_makes_shallow_copy_of_interval_metadata(self): intvl = self.im.add(**self.intvls[1]) obj = self._interval_metadata_constructor_(self.upper_bound, self.im) self.assertEqual(obj.interval_metadata, self.im) self.assertIsNot(obj.interval_metadata, self.im) # Changing mutable value of metadata of the old interval # also changes obj. intvl.metadata["gene"].append("b") self.assertEqual(obj.interval_metadata, self.im) # Changing old interval doesn't change obj intvl.bounds = [(3, 6)] self.assertNotEqual(obj.interval_metadata, self.im) def test_eq_basic(self): im1 = IntervalMetadata(self.upper_bound) im1.add(**self.intvls[0]) obj1 = self._interval_metadata_constructor_(self.upper_bound, im1) im2 = IntervalMetadata(self.upper_bound) im2.add(**self.intvls[0]) obj2 = self._interval_metadata_constructor_(self.upper_bound, im2) self.assertReallyEqual(obj1, obj2) def test_eq_populated_differently(self): im1 = IntervalMetadata(self.upper_bound) im1.add(**self.intvls[0]) obj1 = self._interval_metadata_constructor_(self.upper_bound, im1) obj2 = self._interval_metadata_constructor_(self.upper_bound) obj2.interval_metadata.add(**self.intvls[0]) self.assertReallyEqual(obj1, obj2) def test_eq_handles_missing_positional_metadata_efficiently(self): obj1 = self._interval_metadata_constructor_(self.upper_bound) obj2 = self._interval_metadata_constructor_(self.upper_bound) self.assertReallyEqual(obj1, obj2) self.assertIsNone(obj1._interval_metadata) self.assertIsNone(obj2._interval_metadata) def test_ne_diff_len(self): obj1 = self._interval_metadata_constructor_(0) obj2 = self._interval_metadata_constructor_(self.upper_bound) self.assertReallyNotEqual(obj1, obj2) def test_ne_only_one_is_empty(self): im1 = IntervalMetadata(self.upper_bound) im1.add(**self.intvls[0]) obj1 = self._interval_metadata_constructor_(self.upper_bound, im1) obj2 = self._interval_metadata_constructor_(self.upper_bound) self.assertReallyNotEqual(obj1, obj2) def test_ne(self): im1 = IntervalMetadata(self.upper_bound) im1.add(**self.intvls[0]) obj1 = self._interval_metadata_constructor_(self.upper_bound, im1) im2 = IntervalMetadata(self.upper_bound) im2.add(**self.intvls[1]) obj2 = self._interval_metadata_constructor_(self.upper_bound, im2) self.assertReallyNotEqual(obj1, obj2) def test_copy_interval_metadata_empty(self): obj = self._interval_metadata_constructor_(self.upper_bound, self.im) obj_copy = copy.copy(obj) self.assertEqual(obj, obj_copy) self.assertIsNot(obj, obj_copy) self.assertIsNone(obj_copy._interval_metadata) self.assertEqual(obj._interval_metadata, self.im) def test_copy_interval_metadata_none(self): obj = self._interval_metadata_constructor_(self.upper_bound) obj_copy = copy.copy(obj) self.assertEqual(obj, obj_copy) self.assertIsNot(obj, obj_copy) self.assertIsNone(obj._interval_metadata) self.assertIsNone(obj_copy._interval_metadata) def test_copy_interval_metadata(self): self.im.add(**self.intvls[1]) obj = self._interval_metadata_constructor_(self.upper_bound, self.im) obj_copy = copy.copy(obj) self.assertEqual(obj, obj_copy) self.assertIsNot(obj, obj_copy) self.assertIsNot(obj.interval_metadata, obj_copy.interval_metadata) self.assertIsNot( obj.interval_metadata._intervals, obj_copy.interval_metadata._intervals ) for i, j in zip( obj.interval_metadata._intervals, obj_copy.interval_metadata._intervals ): self.assertIsNot(i, j) self.assertIsNot(i.metadata, j.metadata) for k in i.metadata: self.assertIs(i.metadata[k], j.metadata[k]) def test_deepcopy_interval_metadata(self): self.im.add(**self.intvls[1]) obj = self._interval_metadata_constructor_(self.upper_bound, self.im) obj_copy = copy.deepcopy(obj) self.assertEqual(obj, obj_copy) self.assertIsNot(obj, obj_copy) self.assertIsNot(obj.interval_metadata, obj_copy.interval_metadata) self.assertIsNot( obj.interval_metadata._intervals, obj_copy.interval_metadata._intervals ) for i, j in zip( obj.interval_metadata._intervals, obj_copy.interval_metadata._intervals ): self.assertIsNot(i, j) self.assertIsNot(i.metadata, j.metadata) self.assertIsNot(i.metadata["gene"], j.metadata["gene"]) self.assertIs(i.metadata["product"], j.metadata["product"]) def test_deepcopy_interval_metadata_empty(self): obj = self._interval_metadata_constructor_(self.upper_bound, self.im) obj_copy = copy.deepcopy(obj) self.assertEqual(obj, obj_copy) self.assertIsNot(obj, obj_copy) self.assertIsNone(obj_copy._interval_metadata) self.assertEqual(obj._interval_metadata, self.im) def test_deepcopy_interval_metadata_none(self): obj = self._interval_metadata_constructor_(self.upper_bound, None) obj_copy = copy.deepcopy(obj) self.assertEqual(obj, obj_copy) self.assertIsNot(obj, obj_copy) self.assertIsNone(obj._interval_metadata) self.assertIsNone(obj_copy._interval_metadata) def test_deepcopy_memo_is_respected(self): # Basic test to ensure deepcopy's memo is passed through to recursive # deepcopy calls. obj = self._interval_metadata_constructor_(self.upper_bound, self.im) memo = {} copy.deepcopy(obj, memo) self.assertGreater(len(memo), 1) def test_interval_metadata_getter(self): self.im.add(**self.intvls[0]) obj = self._interval_metadata_constructor_(self.upper_bound, self.im) self.assertIsInstance(obj.interval_metadata, IntervalMetadata) self.assertEqual(self.im, obj.interval_metadata) # Update existing metadata. obj.interval_metadata._intervals[0].metadata["gene"] = "sagB" self.assertNotEqual(obj.interval_metadata, self.im) self.im._intervals[0].metadata["gene"] = "sagB" self.assertEqual(obj.interval_metadata, self.im) # Add new interval feature. obj.interval_metadata.add(**self.intvls[1]) self.im.add(**self.intvls[1]) self.assertEqual(obj.interval_metadata, self.im) def test_interval_metadata_getter_no_interval_metadata(self): obj = self._interval_metadata_constructor_(self.upper_bound) self.assertIsNone(obj._interval_metadata) self.assertIsInstance(obj.interval_metadata, IntervalMetadata) self.assertEqual(obj.interval_metadata, self.im) self.assertIsNotNone(obj._interval_metadata) def test_interval_metadata_setter(self): obj = self._interval_metadata_constructor_(self.upper_bound) self.assertFalse(obj.has_interval_metadata()) obj.interval_metadata = self.im self.assertFalse(obj.has_interval_metadata()) self.assertEqual(obj.interval_metadata, self.im) self.im.add(**self.intvls[1]) obj.interval_metadata = self.im self.assertTrue(obj.has_interval_metadata()) self.assertEqual(obj.interval_metadata, self.im) def test_interval_metadata_setter_makes_copy(self): intvl = self.im.add(**self.intvls[1]) obj = self._interval_metadata_constructor_(self.upper_bound) obj.interval_metadata = self.im self.assertEqual(obj.interval_metadata, self.im) self.assertIsNot(obj.interval_metadata, self.im) # Changing mutable value of metadata of the old interval # also changes obj. intvl.metadata["gene"].append("b") self.assertEqual(obj.interval_metadata, self.im) # Changing old interval doesn't change obj intvl.bounds = [(3, 6)] self.assertNotEqual(obj.interval_metadata, self.im) def test_interval_metadata_setter_len_mismatch(self): self.im.add(**self.intvls[1]) obj = self._interval_metadata_constructor_(self.upper_bound, self.im) for i in 0, 1, 3, 100: with self.assertRaisesRegex( ValueError, r"\(%d\).*\(%d\)" % (i, self.upper_bound) ): obj.interval_metadata = IntervalMetadata(i) self.assertEqual(obj.interval_metadata, self.im) def test_interval_metadata_setter_invalid_type(self): self.im.add(**self.intvls[0]) obj = self._interval_metadata_constructor_(self.upper_bound, self.im) for i in [2, None, "", {}, []]: with self.assertRaisesRegex( TypeError, "You must provide `IntervalMetadata` object" ): obj.interval_metadata = i self.assertEqual(self.im, obj.interval_metadata) def test_interval_metadata_setter_empty_upper_bound_is_none(self): im = IntervalMetadata(None) for i in [0, 1, 3, 100]: x = self._interval_metadata_constructor_(i) x.interval_metadata = im self.assertFalse(x.has_interval_metadata()) # the upper bound is reset to seq/axis length self.assertEqual(x.interval_metadata.upper_bound, i) # original interval metadata upper bound is not changed self.assertIsNone(im.upper_bound) def test_interval_metadata_setter_upper_bound_is_none(self): im = IntervalMetadata(None) # populate im im.add(**self.intvls[0]) im.add(**self.intvls[1]) for i in [1000, 100]: x = self._interval_metadata_constructor_(i) x.interval_metadata = im # the upper bound is reset to seq/axis length self.assertEqual(x.interval_metadata.upper_bound, i) self.assertEqual(x.interval_metadata._intervals, im._intervals) # original interval metadata upper bound is not changed self.assertIsNone(im.upper_bound) def test_interval_metadata_setter_interval_bounds_larger_than_len(self): im = IntervalMetadata(None) # populate im im.add(**self.intvls[0]) im.add(**self.intvls[1]) for i in [0, 1, 3]: # error to reset upper bound to a smaller value than seq/axis len with self.assertRaisesRegex( ValueError, r"larger than upper bound \(%r\)" % i ): x = self._interval_metadata_constructor_(i) x.interval_metadata = im # original interval metadata upper bound is not changed self.assertIsNone(im.upper_bound) def test_interval_metadata_deleter_empty(self): obj = self._interval_metadata_constructor_(self.upper_bound, self.im) del obj.interval_metadata self.assertIsNone(obj._interval_metadata) self.assertFalse(obj.has_interval_metadata()) # Delete again. test idempotent del obj.interval_metadata self.assertIsNone(obj._interval_metadata) self.assertFalse(obj.has_interval_metadata()) def test_interval_metadata_deleter(self): self.im.add(**self.intvls[0]) obj = self._interval_metadata_constructor_(self.upper_bound, self.im) del obj.interval_metadata self.assertIsNone(obj._interval_metadata) self.assertFalse(obj.has_interval_metadata()) def test_has_interval_metadata(self): obj = self._interval_metadata_constructor_(self.upper_bound) self.assertFalse(obj.has_interval_metadata()) obj = self._interval_metadata_constructor_(self.upper_bound, self.im) self.assertFalse(obj.has_interval_metadata()) self.im.add([(0, 1)]) obj = self._interval_metadata_constructor_(self.upper_bound, self.im) self.assertTrue(obj.has_interval_metadata()) scikit-bio-0.6.2/skbio/metadata/base.py000066400000000000000000000041151464262511300177340ustar00rootroot00000000000000"""Base for the metadata module.""" # ---------------------------------------------------------------------------- # Copyright (c) 2016-2023, QIIME 2 development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- SUPPORTED_COLUMN_TYPES = {"categorical", "numeric"} SUPPORTED_ID_HEADERS = { "case_insensitive": { "id", "sampleid", "sample id", "sample-id", "featureid", "feature id", "feature-id", }, # For backwards-compatibility with existing formats. "exact_match": { # QIIME 1 mapping files. "#Sample ID" was never supported, but # we're including it here for symmetry with the other supported # headers that allow a space between words. "#SampleID", "#Sample ID", # biom-format: observation metadata and "classic" (TSV) OTU tables. "#OTUID", "#OTU ID", # Qiita sample/prep information files. "sample_name", }, } FORMATTED_ID_HEADERS = "Case-insensitive: %s\n\nCase-sensitive: %s" % ( ", ".join(repr(e) for e in sorted(SUPPORTED_ID_HEADERS["case_insensitive"])), ", ".join(repr(e) for e in sorted(SUPPORTED_ID_HEADERS["exact_match"])), ) def is_id_header(name): """Determine if a name is a valid ID column header. This function may be used to determine if a value in a metadata file is a valid ID column header, or if a pandas ``Index.name`` matches the ID header requirements. The "ID header" corresponds to the ``Metadata.id_header`` and ``MetadataColumn.id_header`` properties. Parameters ---------- name : string or None Name to check against ID header requirements. Returns ------- bool ``True`` if `name` is a valid ID column header, ``False`` otherwise. """ return name and ( name in SUPPORTED_ID_HEADERS["exact_match"] or name.lower() in SUPPORTED_ID_HEADERS["case_insensitive"] ) scikit-bio-0.6.2/skbio/metadata/io.py000066400000000000000000000533521464262511300174400ustar00rootroot00000000000000"""Contains io functionality for the Metadata module.""" # ---------------------------------------------------------------------------- # Copyright (c) 2016-2023, QIIME 2 development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- import csv import itertools import os.path import re import numpy as np import pandas as pd from skbio.io._fileobject import SaneTextIOWrapper from skbio.util import find_duplicates from .missing import DEFAULT_MISSING, BUILTIN_MISSING, series_encode_missing from .base import SUPPORTED_COLUMN_TYPES, FORMATTED_ID_HEADERS, is_id_header from ..metadata._metadata import SampleMetadata, MetadataColumn class MetadataFileError(Exception): """Exception for errors with Metadata files.""" _suffix = ( "There may be more errors present in the metadata file. To get a full " "report, sample/feature metadata files can be validated with Keemei: " "https://keemei.qiime2.org\n\nFind details on QIIME 2 metadata " "requirements here: https://docs.qiime2.org/" ) def __init__(self, message, include_suffix=True): """Initialize the MetadataFileError.""" # LH NOTE/TODO: in Qiime2 this linked to the specific Qiime2 release. # However since this is not Qiime2 It did break and I removed this if include_suffix: message = message + "\n\n" + self._suffix super().__init__(message) class MetadataReader: """Reader for Metadata files.""" def __init__(self, filepath_or_filehandle): """Initialize the Reader for Metadata files.""" # check if the filepath_filehandle is a path... if it is check if it # points to a file # TODO: Refine this check to be more specific if isinstance(filepath_or_filehandle, str): self._file_is_filehandle = False if not os.path.isfile(filepath_or_filehandle): raise MetadataFileError( "Metadata file path doesn't exist, or the path points to " "something other than a file. Please check that the path " "exists, has read permissions, and points to a regular file " "(not a directory): %s" % filepath_or_filehandle ) else: self._file_is_filehandle = True self._filepath = filepath_or_filehandle # Used by `read()` to store an iterator yielding rows with # leading/trailing whitespace stripped from their cells (this is a # preprocessing step that should happen with *every* row). The iterator # protocol is the only guaranteed API on this object. self._reader = None def read( self, into, column_types=None, column_missing_schemes=None, default_missing_scheme=DEFAULT_MISSING, ): """Return a Metadata object read from the given file.""" if column_types is None: column_types = {} try: # choose the appropriate context manager depending # on if a filehandle has been passed. if self._file_is_filehandle: cm = self._filepath else: # Newline settings based on recommendation from csv docs: # https://docs.python.org/3/library/csv.html#id3 # Ignore BOM on read (but do not write BOM) cm = open(self._filepath, "r", newline="", encoding="utf-8-sig") with cm as fh: tsv_reader = csv.reader(fh, dialect="excel-tab", strict=True) self._reader = (self._strip_cell_whitespace(row) for row in tsv_reader) header = self._read_header() directives = self._read_directives(header) ids, data = self._read_data(header) except UnicodeDecodeError as e: if "0xff in position 0" in str(e) or "0xfe in position 0" in str(e): raise MetadataFileError( "Metadata file must be encoded as UTF-8 or ASCII, found " "UTF-16. If this file is from Microsoft Excel, save " "as a plain text file, not 'UTF-16 Unicode'" ) raise MetadataFileError( "Metadata file must be encoded as UTF-8 or ASCII. The " "following error occurred when decoding the file:\n\n%s" % e ) finally: self._reader = None index = pd.Index(ids, name=header[0], dtype=object) df = pd.DataFrame(data, columns=header[1:], index=index, dtype=object) # TODO: move these checks over to Metadata.__init__() so that you can # pass column_types with an untyped dataframe. This would require a bit # of a refactor and doesn't buy a whole lot at the moment, hence the # TODO. for name, type in column_types.items(): if name not in df.columns: raise MetadataFileError( "Column name %r specified in `column_types` is not a " "column in the metadata file." % name ) if type not in SUPPORTED_COLUMN_TYPES: fmt_column_types = ", ".join( repr(e) for e in sorted(SUPPORTED_COLUMN_TYPES) ) raise MetadataFileError( "Column name %r specified in `column_types` has an " "unrecognized column type %r. Supported column types: %s" % (name, type, fmt_column_types) ) resolved_column_types = directives.get("types", {}) resolved_column_types.update(column_types) if column_missing_schemes is None: column_missing_schemes = {} resolved_missing = {c: default_missing_scheme for c in df.columns} resolved_missing.update(directives.get("missing", {})) resolved_missing.update(column_missing_schemes) try: # Cast each column to the appropriate dtype based on column type. df = df.apply( self._cast_column, axis="index", column_types=resolved_column_types, missing_schemes=resolved_missing, ) except MetadataFileError as e: # HACK: If an exception is raised within `DataFrame.apply`, pandas # adds an extra tuple element to `e.args`, making the original # error message difficult to read because a tuple is repr'd instead # of a string. To work around this, we catch and reraise a # MetadataFileError with the original error message. We use # `include_suffix=False` to avoid adding another suffix to the # error message we're reraising. msg = e.args[0] raise MetadataFileError(msg, include_suffix=False) try: return into( df, column_missing_schemes=resolved_missing, default_missing_scheme=default_missing_scheme, ) except Exception as e: raise MetadataFileError( "There was an issue with loading the metadata file:\n\n%s" % e ) def _read_header(self): header = None for row in self._reader: if self._is_header(row): header = row break elif self._is_comment(row): continue elif self._is_empty(row): continue elif self._is_directive(row): raise MetadataFileError( "Found directive %r while searching for header. " "Directives may only appear immediately after the header." % row[0] ) else: raise MetadataFileError( "Found unrecognized ID column name %r while searching for " "header. The first column name in the header defines the " "ID column, and must be one of these values:\n\n%s\n\n" "NOTE: Metadata files must contain tab-separated values." % (row[0], FORMATTED_ID_HEADERS) ) if header is None: raise MetadataFileError( "Failed to locate header. The metadata file may be empty, or " "consists only of comments or empty rows." ) # Trim trailing empty cells from header. data_extent = None for idx, cell in enumerate(header): if cell != "": data_extent = idx header = header[: data_extent + 1] # Basic validation to 1) fail early before processing entire file; and # 2) make some basic guarantees about the header for things in this # class that use the header as part of reading the file. column_names = set(header) if "" in column_names: raise MetadataFileError( "Found at least one column without a name in the header. Each " "column must be named." ) elif len(header) != len(column_names): duplicates = find_duplicates(header) raise MetadataFileError( "Column names must be unique. The following column names are " "duplicated: %s" % (", ".join(repr(e) for e in sorted(duplicates))) ) # Skip the first element of the header because we know it is a valid ID # header. The other column names are validated to ensure they *aren't* # valid ID headers. for column_name in header[1:]: if is_id_header(column_name): raise MetadataFileError( "Metadata column name %r conflicts with a name reserved " "for the ID column header. Reserved ID column headers:" "\n\n%s" % (column_name, FORMATTED_ID_HEADERS) ) return header def _read_directives(self, header): directives = {} for row in self._reader: directive_kind = None if not self._is_directive(row): self._reader = itertools.chain([row], self._reader) break if self._is_column_types_directive(row): directive_kind = "types" elif self._is_missing_directive(row): directive_kind = "missing" else: raise MetadataFileError( "Unrecognized directive %r. Only the #sk:types, #q2:types" " and #sk:missing, #q2:missing directives are supported at this" " time." % row[0] ) if directive_kind in directives: raise MetadataFileError( "Found duplicate directive %r. Each directive may " "only be specified a single time." % row[0] ) row = self._match_header_len(row, header) collected = {name: arg for name, arg in zip(header[1:], row[1:]) if arg} directives[directive_kind] = collected if "types" in directives: column_types = directives["types"] for column_name, column_type in column_types.items(): type_nocase = column_type.lower() if type_nocase in SUPPORTED_COLUMN_TYPES: column_types[column_name] = type_nocase else: fmt_column_types = ", ".join( repr(e) for e in sorted(SUPPORTED_COLUMN_TYPES) ) raise MetadataFileError( "Column %r has an unrecognized column type %r " "specified in its #sk:types or #q2:types directive. " "Supported column types (case-insensitive): %s" % (column_name, column_type, fmt_column_types) ) if "missing" in directives: for column_name, column_missing in directives["missing"].items(): if column_missing not in BUILTIN_MISSING: raise MetadataFileError( "Column %r has an unrecognized missing value scheme %r" " specified in its #sk:missing or #q2:missing directive." " Supported missing value schemes (case-sensitive): %s" % (column_name, column_missing, list(BUILTIN_MISSING)) ) return directives def _read_data(self, header): ids = [] data = [] for row in self._reader: if self._is_comment(row): continue elif self._is_empty(row): continue elif self._is_directive(row): raise MetadataFileError( "Found directive %r outside of the directives section of " "the file. Directives may only appear immediately after " "the header." % row[0] ) elif self._is_header(row): raise MetadataFileError( "Metadata ID %r conflicts with a name reserved for the ID " "column header. Reserved ID column headers:\n\n%s" % (row[0], FORMATTED_ID_HEADERS) ) row = self._match_header_len(row, header) ids.append(row[0]) data.append(row[1:]) return ids, data def _strip_cell_whitespace(self, row): return [cell.strip() for cell in row] def _match_header_len(self, row, header): row_len = len(row) header_len = len(header) if row_len < header_len: # Pad row with empty cells to match header length. row = row + [""] * (header_len - row_len) elif row_len > header_len: trailing_row = row[header_len:] if not self._is_empty(trailing_row): raise MetadataFileError( "Metadata row contains more cells than are declared by " "the header. The row has %d cells, while the header " "declares %d cells." % (row_len, header_len) ) row = row[:header_len] return row def _is_empty(self, row): # `all` returns True for an empty iterable, so this check works for a # row of zero elements (corresponds to a blank line in the file). return all((cell == "" for cell in row)) def _is_comment(self, row): return ( len(row) > 0 and row[0].startswith("#") and not self._is_directive(row) and not self._is_header(row) ) def _is_header(self, row): if len(row) == 0: return False return is_id_header(row[0]) def _is_directive(self, row): return len(row) > 0 and row[0].startswith(("#sk:", "#q2:")) def _is_column_types_directive(self, row): return len(row) > 0 and (row[0].split(" ")[0] in ["#sk:types", "#q2:types"]) def _is_missing_directive(self, row): return len(row) > 0 and (row[0].split(" ")[0] in ["#sk:missing", "#q2:missing"]) def _cast_column(self, series, column_types, missing_schemes): if series.name in missing_schemes: scheme = missing_schemes[series.name] series = series_encode_missing(series, scheme) if series.name in column_types: if column_types[series.name] == "numeric": return self._to_numeric(series) else: # 'categorical' return self._to_categorical(series) else: # Infer type try: return self._to_numeric(series) except MetadataFileError: return self._to_categorical(series) def _to_categorical(self, series): # Replace empty strings with `None` to force the series to remain # dtype=object (this only matters if the series consists solely of # missing data). Replacing with np.nan and casting to dtype=object # won't retain the correct dtype in the resulting dataframe # (`DataFrame.apply` seems to force series consisting solely of np.nan # to dtype=float64, even if dtype=object is specified. # # To replace a value with `None`, the following invocation of # `Series.replace` must be used because `None` is a sentinel: # https://stackoverflow.com/a/17097397/3776794 return series.replace([""], [None]) def _to_numeric(self, series): series = series.replace("", np.nan) is_numeric = series.apply(self._is_numeric) if is_numeric.all(): return pd.to_numeric(series, errors="raise") else: non_numerics = series[~is_numeric].unique() raise MetadataFileError( "Cannot convert metadata column %r to numeric. The following " "values could not be interpreted as numeric: %s" % (series.name, ", ".join(repr(e) for e in sorted(non_numerics))) ) def _is_numeric(self, value): return isinstance(value, float) or len(_numeric_regex.findall(value)) == 1 class MetadataWriter: """Writer for Metadata.""" def __init__(self, metadata): """Initialize Writer for Metadata.""" self._metadata = metadata def write(self, filepath_or_filehandle): """Write metadata object to passed file or filehandle.""" if isinstance(filepath_or_filehandle, str): # Newline settings based on recommendation from csv docs: # https://docs.python.org/3/library/csv.html#id3 # Do NOT write a BOM, hence utf-8 not utf-8-sig cm = open(filepath_or_filehandle, "w", newline="", encoding="utf-8") else: cm = filepath_or_filehandle with cm as fh: tsv_writer = csv.writer(fh, dialect="excel-tab", strict=True) md = self._metadata header = [md.id_header] # NOTE/TODO: The Metadata files written with this method # will always have the directives of type #sk: # even if a metadata file with directives of type #q2: # has been read. This can be changed in the future # however we could also decide to just stick with the sk: types. types_directive = ["#sk:types"] missing_directive = ["#sk:missing"] if isinstance(md, SampleMetadata): for name, props in md.columns.items(): header.append(name) types_directive.append(props.type) missing_directive.append(props.missing_scheme) elif isinstance(md, MetadataColumn): header.append(md.name) types_directive.append(md.type) missing_directive.append(md.missing_scheme) else: raise NotImplementedError tsv_writer.writerow(header) tsv_writer.writerow(types_directive) if self._non_default_missing(missing_directive): tsv_writer.writerow(missing_directive) df = md.to_dataframe(encode_missing=True) df.fillna("", inplace=True) # since `applymap` is going to be deprecated soon # and `map` may not work on older versions of pandas try: mapper_ = df.map except AttributeError: mapper_ = df.applymap df = mapper_(self._format) tsv_writer.writerows(df.itertuples(index=True)) def _non_default_missing(self, missing_directive): missing = missing_directive[1:] result = False for m in missing: if m != DEFAULT_MISSING: result = True break return result def _format(self, value): if isinstance(value, str): return value elif isinstance(value, float): # Use fixed precision or scientific notation as necessary (both are # roundtrippable in the metadata file format), with up to 15 digits # *total* precision (i.e. before and after the decimal point), # rounding if necessary. Trailing zeros or decimal points will not # be included in the formatted string (e.g. 42.0 will be formatted # as "42"). A precision of 15 digits is used because that is within # the 64-bit floating point spec (things get weird after that). # # Using repr() and str() each have their own predefined precision # which varies across Python versions. Using the string formatting # presentation types (e.g. %g, %f) without specifying a precision # will usually default to 6 digits past the decimal point, which # seems a little low. # # References: # # - https://stackoverflow.com/a/2440786/3776794 # - https://stackoverflow.com/a/2440708/3776794 # - https://docs.python.org/3/library/string.html# # format-specification-mini-language # - https://stackoverflow.com/a/20586479/3776794 # - https://drj11.wordpress.com/2007/07/03/python-poor-printing- # of-floating-point/ return "{0:.15g}".format(value) else: raise NotImplementedError # Credit: https://stackoverflow.com/a/4703508/3776794 _numeric_pattern = r""" ^[-+]? # optional sign (?: (?: \d* \. \d+ ) # .1 .12 .123 etc 9.1 etc 98.1 etc | (?: \d+ \.? ) # 1. 12. 123. etc 1 12 123 etc ) # followed by optional exponent part if desired (?: [Ee] [+-]? \d+ ) ?$ """ _numeric_regex = re.compile(_numeric_pattern, re.VERBOSE) scikit-bio-0.6.2/skbio/metadata/missing.py000066400000000000000000000064411464262511300204770ustar00rootroot00000000000000"""Defines Handling of different Missing classes for Metadata module.""" # ---------------------------------------------------------------------------- # Copyright (c) 2016-2023, QIIME 2 development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- import pandas as pd import numpy as np from ._enan import make_nan_with_payload as _make_nan_with_payload from ._enan import get_payload_from_nan as _get_payload_from_nan def _encode_terms(namespace): enum = _MISSING_ENUMS[namespace] namespace = _NAMESPACE_LOOKUP.index(namespace) def encode(x): if not isinstance(x, str): return x try: code = enum.index(x) except ValueError: return x return _make_nan_with_payload(code, namespace=namespace) return encode def _handle_insdc_missing(series): return series.apply(_encode_terms("INSDC:missing")) def _handle_blank(series): return series def _handle_no_missing(series): if series.isna().any(): raise ValueError( "Missing values are not allowed in series/column" " (name=%r) when using scheme 'no-missing'." % series.name ) return series BUILTIN_MISSING = { "INSDC:missing": _handle_insdc_missing, "blank": _handle_blank, "no-missing": _handle_no_missing, } _MISSING_ENUMS = { "INSDC:missing": ( "not applicable", "missing", "not collected", "not provided", "restricted access", ) } # list index reflects the nan namespace, the "blank"/"no-missing" enums don't # apply here, since they aren't actually encoded in the NaNs _NAMESPACE_LOOKUP = ["INSDC:missing"] DEFAULT_MISSING = "blank" def series_encode_missing(series: pd.Series, enumeration: str) -> pd.Series: """Return encoded Missing values.""" if not isinstance(enumeration, str): TypeError("Wrong type for `enumeration`, expected string") try: encoder = BUILTIN_MISSING[enumeration] except KeyError: raise ValueError( "Unknown enumeration: %r, (available: %r)" % (enumeration, list(BUILTIN_MISSING.keys())) ) new = encoder(series) if series.dtype == object and new.isna().all(): # return to categorical of all missing values return new.astype(object) return new def series_extract_missing(series: pd.Series) -> pd.Series: """Return extracted Missing types from passed Series.""" def _decode(x): if np.issubdtype(type(x), np.floating) and np.isnan(x): code, namespace = _get_payload_from_nan(x) if namespace is None: return x elif namespace == 255: raise ValueError("Custom enumerations are not yet supported") else: try: enum = _MISSING_ENUMS[_NAMESPACE_LOOKUP[namespace]] except (IndexError, KeyError): return x try: return enum[code] except IndexError: return x return x missing = series[series.isna()] missing = missing.apply(_decode) return missing.astype(object) scikit-bio-0.6.2/skbio/metadata/tests/000077500000000000000000000000001464262511300176115ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/metadata/tests/__init__.py000066400000000000000000000005411464262511300217220ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- scikit-bio-0.6.2/skbio/metadata/tests/data/000077500000000000000000000000001464262511300205225ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/000077500000000000000000000000001464262511300221505ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/column-name-conflicts-with-id-header.tsv000066400000000000000000000001001464262511300316630ustar00rootroot00000000000000sampleid col1 featureid col3 id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/comments-and-empty-rows-only.tsv000066400000000000000000000000561464262511300303770ustar00rootroot00000000000000# # Hello, World! # Foo, # Bar, # Baz scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/data-longer-than-header.tsv000066400000000000000000000001041464262511300272540ustar00rootroot00000000000000sampleid col1 col2 col3 id1 1 a foo id2 2 b bar overflow id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/directive-after-directives-section.tsv000066400000000000000000000002231464262511300315610ustar00rootroot00000000000000id col1 col2 col3 # directives must appear *immediately* below header #q2:types numeric categorical categorical id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/directive-before-header.tsv000066400000000000000000000001371464262511300273530ustar00rootroot00000000000000#q2:types numeric categorical categorical id col1 col2 col3 id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/directive-longer-than-header.tsv000066400000000000000000000001551464262511300303270ustar00rootroot00000000000000sampleid col1 col2 col3 #q2:types numeric categorical categorical numeric id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/duplicate-column-names-with-whitespace.tsv000066400000000000000000000000771464262511300323630ustar00rootroot00000000000000id " col1 " col2 col1 id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/duplicate-column-names.tsv000066400000000000000000000000651464262511300272550ustar00rootroot00000000000000id col1 col2 col1 id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/duplicate-directives.tsv000066400000000000000000000002151464262511300270150ustar00rootroot00000000000000id col1 col2 col3 #q2:types numeric categorical categorical #q2:types categorical categorical categorical id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/duplicate-ids-with-whitespace.tsv000066400000000000000000000000741464262511300305410ustar00rootroot00000000000000id col1 col2 col3 id1 1 a foo id2 2 b bar "id1 " 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/duplicate-ids.tsv000066400000000000000000000000651464262511300254360ustar00rootroot00000000000000id col1 col2 col3 id1 1 a foo id2 2 b bar id1 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/empty-column-name.tsv000066400000000000000000000000611464262511300262520ustar00rootroot00000000000000id col1 col3 id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/empty-file000066400000000000000000000000001464262511300241340ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/empty-id.tsv000066400000000000000000000000621464262511300244340ustar00rootroot00000000000000ID col1 col2 col3 id1 1 a foo 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/header-only-with-comments-and-empty-rows.tsv000066400000000000000000000000761464262511300326000ustar00rootroot00000000000000# Hello, World! id col1 col2 col3 # Foo, # Bar, # Baz scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/header-only.tsv000066400000000000000000000000221464262511300251070ustar00rootroot00000000000000id col1 col2 col3 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/id-conflicts-with-id-header.tsv000066400000000000000000000000721464262511300300540ustar00rootroot00000000000000sampleid col1 col2 col3 id1 1 a foo id 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/invalid-header.tsv000066400000000000000000000001041464262511300255550ustar00rootroot00000000000000invalid_id_header col1 col2 col3 id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/missing-unknown-scheme.tsv000066400000000000000000000004061464262511300273160ustar00rootroot00000000000000id col1 col2 col3 #q2:types numeric categorical categorical #q2:missing BAD:SCHEME INSDC:missing no-missing id1 1 a foo id2 2 b bar id3 3 c 42 id4 not applicable missing anything id5 not collected not provided whatever id6 restricted access restricted access 10 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/non-utf-8.tsv000066400000000000000000000001541464262511300244410ustar00rootroot00000000000000id col1 col2 col3 id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/qiime1-empty.tsv000066400000000000000000000001301464262511300252210ustar00rootroot00000000000000#SampleID col1 col2 col3 # A QIIME 1 mapping file can have comments # below the header. scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/simple-utf-16be.txt000066400000000000000000000001541464262511300255310ustar00rootroot00000000000000id col1 col2 col3 id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/simple-utf-16le.txt000066400000000000000000000001541464262511300255430ustar00rootroot00000000000000id col1 col2 col3 id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/types-directive-non-numeric.tsv000066400000000000000000000007451464262511300302640ustar00rootroot00000000000000# All sorts of strings that shouldn't be interpreted as numbers in `col2`! # Note that the first few values in `col2` *can* be interpreted as numbers, # just to have a mixed set of numeric and non-numeric values. id col1 col2 #q2:types numeric numeric id1 1 42 id2 1 -42.50 id3 1 id4 1 a id5 1 foo id6 1 1,000 id7 1 1.000.0 id8 1 $42 id9 1 nan id10 1 NaN id11 1 NA id12 1 inf id13 1 +inf id14 1 -inf id15 1 Infinity id16 1 1_000_000 id17 1 0xAF id18 1 1e3e4 id19 1 e3 id20 1 sample-1 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/unrecognized-column-type.tsv000066400000000000000000000001271464262511300276540ustar00rootroot00000000000000id col1 col2 col3 #q2:types numeric foo categorical id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/unrecognized-directive.tsv000066400000000000000000000001111464262511300273470ustar00rootroot00000000000000id col1 col2 col3 #q2:foo bar baz bar id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/whitespace-only-column-name.tsv000066400000000000000000000000701464262511300302270ustar00rootroot00000000000000id col1 " " col3 id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/invalid/whitespace-only-id.tsv000066400000000000000000000000701464262511300264100ustar00rootroot00000000000000ID col1 col2 col3 id1 1 a foo " " 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/000077500000000000000000000000001464262511300216215ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/metadata/tests/data/valid/BOM-simple.txt000066400000000000000000000000701464262511300242630ustar00rootroot00000000000000id col1 col2 col3 id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/all-cells-padded.tsv000066400000000000000000000000361464262511300254450ustar00rootroot00000000000000id col1 col2 col3 id1 id2 id3 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/biom-observation-metadata.tsv000066400000000000000000000001561464262511300274160ustar00rootroot00000000000000#OTUID taxonomy confidence # optional comments OTU_1 k__Bacteria;p__Firmicutes 0.890 OTU_2 k__Bacteria 0.9999 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/case-insensitive-types-directive.tsv000066400000000000000000000001361464262511300307460ustar00rootroot00000000000000id col1 col2 col3 #q2:types CATEGORICAL CategoricaL NuMeRiC id1 1 a -5 id2 2 b 0.0 id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/column-order.tsv000066400000000000000000000000541464262511300247640ustar00rootroot00000000000000id z y x id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/comments.tsv000066400000000000000000000010461464262511300242050ustar00rootroot00000000000000# pre-header # comment id col1 col2 col3 # post-header # comment id1 1 a foo id2 2 b bar # intra-data comment with another # sign # ## # comment with leading whitespace is still a comment. # comment with tab characters is also a comment! "# if the first cell is quoted, the parsing rules first process and strip double quotes, then check if the first cell begins with a pound sign" " # same rule applies if the de-quoted cell has leading whitespace (leading/trailing whitespace is *always* ignored)" id3 3 c 42 # trailing # comment scikit-bio-0.6.2/skbio/metadata/tests/data/valid/complete-types-directive.tsv000066400000000000000000000001431464262511300273030ustar00rootroot00000000000000id col1 col2 col3 #q2:types categorical categorical categorical id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/complete-types-directive_w_sk.tsv000066400000000000000000000001431464262511300303260ustar00rootroot00000000000000id col1 col2 col3 #sk:types categorical categorical categorical id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/empty-rows.tsv000066400000000000000000000002561464262511300245100ustar00rootroot00000000000000 id col1 col2 col3 id1 1 a foo id2 2 b bar " " "" " " " " " " id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/empty-types-directive.tsv000066400000000000000000000000771464262511300266370ustar00rootroot00000000000000id col1 col2 col3 #q2:types id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/jagged-trailing-columns.tsv000066400000000000000000000000701464262511300270620ustar00rootroot00000000000000id col1 col2 col3 id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/jagged.tsv000066400000000000000000000000551464262511300236000ustar00rootroot00000000000000id col1 col2 col3 id1 1 a id2 2 b bar id3 3 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/leading-trailing-whitespace.tsv000066400000000000000000000006271464262511300277300ustar00rootroot00000000000000 # Leading/trailing whitespace is ignored in *any* type of cell, including # comments, empty rows, headers, directives, and data cells. # Double-quotes are always processed prior to stripping leading/trailing # whitespace within the cell. id "col1 " " col2" col3 #q2:types " numeric " categorical " categorical " id1 " 1 " a foo " " " id2 " 2 b "bar " id3 3 "c " 42 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/mac-line-endings.tsv000066400000000000000000000000651464262511300254720ustar00rootroot00000000000000id col1 col2 col3 id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/minimal.tsv000066400000000000000000000000051464262511300240000ustar00rootroot00000000000000id a scikit-bio-0.6.2/skbio/metadata/tests/data/valid/missing-data.tsv000066400000000000000000000010041464262511300247320ustar00rootroot00000000000000# Missing data can be represented with empty cells or whitespace-only cells. # Test that values used to represent missing data in other programs # (e.g. pandas) are not treated as missing (e.g. "NA", "N/A"). Also test # columns that consist solely of missing data. By default, an empty column will # be treated as numeric data (column "NA" in this example). "col4" overrides # this behavior to make its empty column categorical. id col1 NA col3 col4 #q2:types categorical None 1 null nan N/A NA " " NA scikit-bio-0.6.2/skbio/metadata/tests/data/valid/missing-insdc-no-directive.tsv000066400000000000000000000002541464262511300275150ustar00rootroot00000000000000id col1 col2 col3 id1 1 a foo id2 2 b bar id3 3 c 42 id4 not applicable missing anything id5 not collected not provided whatever id6 restricted access restricted access 10 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/missing-insdc.tsv000066400000000000000000000004111464262511300251220ustar00rootroot00000000000000id col1 col2 col3 #q2:types numeric categorical categorical #q2:missing INSDC:missing INSDC:missing no-missing id1 1 a foo id2 2 b bar id3 3 c 42 id4 not applicable missing anything id5 not collected not provided whatever id6 restricted access restricted access 10 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/no-columns.tsv000066400000000000000000000000151464262511300244450ustar00rootroot00000000000000id a b my-id scikit-bio-0.6.2/skbio/metadata/tests/data/valid/no-id-or-column-name-type-cast.tsv000066400000000000000000000001101464262511300301130ustar00rootroot00000000000000id 42.0 1000 -4.2 0.000001 2 b 2.5 0.004000 1 b 4.2 0.000000 3 c -9.999 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/no-newline-at-eof.tsv000066400000000000000000000000641464262511300256030ustar00rootroot00000000000000id col1 col2 col3 id1 1 a foo id2 2 b bar id3 3 c 42scikit-bio-0.6.2/skbio/metadata/tests/data/valid/non-standard-characters.tsv000066400000000000000000000013731464262511300270700ustar00rootroot00000000000000# See the corresponding unit test case for the goals of this file. The file # tests the following cases for IDs, column names, and cells. Many of the # choices are based on use-cases/bugs reported on the forum, Slack, etc. # # - Unicode characters # - Parentheses, underscores, less than (<), and greater than (>) # - Single and double quotes. Double quotes must be escaped according to the # Excel TSV dialect's double quote escaping rules. # - Escaped newlines (\n), carriage returns (\r), tabs (\t), and spaces # - Inline comment characters aren't treated as comments id ↩c@l1™ col(#2) #col'3 """""" "col 5" ©id##1 ƒoo (foo) #f o #o "fo o" ((id))2 ''2'' b#r "ba r" 'id_3<>' "b""ar" "c d" "4 2" """id#4""" b__a_z <42> >42 "i d 5" baz 42 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/numeric-column.tsv000066400000000000000000000001571464262511300253170ustar00rootroot00000000000000id col1 id1 0 id2 2.0 id3 0.00030 id4 -4.2 id5 1e-4 id6 1e4 id7 +1.5E+2 id8 id9 1. id10 .5 id11 1e-08 id12 -0 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/override-insdc.tsv000066400000000000000000000001371464262511300252750ustar00rootroot00000000000000id col1 #q2:missing no-missing id1 collected id2 not collected id3 not collected id4 collected scikit-bio-0.6.2/skbio/metadata/tests/data/valid/partial-types-directive.tsv000066400000000000000000000001131464262511300271240ustar00rootroot00000000000000id col1 col2 col3 #q2:types categorical id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/qiime1.tsv000066400000000000000000000001731464262511300235450ustar00rootroot00000000000000#SampleID col1 col2 col3 # A QIIME 1 mapping file can have comments # below the header. id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/qiita-preparation-information.tsv000066400000000000000000000001541464262511300303330ustar00rootroot00000000000000sample_name BARCODE EXPERIMENT_DESIGN_DESCRIPTION id.1 ACGT longitudinal study id.2 TGCA longitudinal study scikit-bio-0.6.2/skbio/metadata/tests/data/valid/qiita-sample-information.tsv000066400000000000000000000001321464262511300272640ustar00rootroot00000000000000sample_name DESCRIPTION TITLE id.1 description 1 A Title id.2 description 2 Another Title scikit-bio-0.6.2/skbio/metadata/tests/data/valid/recommended-ids.tsv000066400000000000000000000000731464262511300254160ustar00rootroot00000000000000id col1 c6ca034a-223f-40b4-a0e0-45942912a5ea foo My.ID bar scikit-bio-0.6.2/skbio/metadata/tests/data/valid/rows-shorter-than-header.tsv000066400000000000000000000000441464262511300272110ustar00rootroot00000000000000id col1 col2 col3 id1 1 a id2 2 id3 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/simple-with-directive.tsv000066400000000000000000000001371464262511300265760ustar00rootroot00000000000000id col1 col2 col3 #q2:types numeric categorical categorical id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/simple.tsv000066400000000000000000000000651464262511300236510ustar00rootroot00000000000000id col1 col2 col3 id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/simple.txt000066400000000000000000000000651464262511300236540ustar00rootroot00000000000000id col1 col2 col3 id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/single-column.tsv000066400000000000000000000000321464262511300251260ustar00rootroot00000000000000id col1 id1 1 id2 2 id3 3 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/single-id.tsv000066400000000000000000000000361464262511300242310ustar00rootroot00000000000000id col1 col2 col3 id1 1 a foo scikit-bio-0.6.2/skbio/metadata/tests/data/valid/trailing-columns.tsv000066400000000000000000000000751464262511300256500ustar00rootroot00000000000000id col1 col2 col3 id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/unix-line-endings.tsv000066400000000000000000000000651464262511300257150ustar00rootroot00000000000000id col1 col2 col3 id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/data/valid/windows-line-endings.tsv000066400000000000000000000000711464262511300264210ustar00rootroot00000000000000id col1 col2 col3 id1 1 a foo id2 2 b bar id3 3 c 42 scikit-bio-0.6.2/skbio/metadata/tests/test_enan.py000066400000000000000000000025341464262511300221470ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2016-2023, QIIME 2 development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- import unittest from skbio.metadata._enan import get_payload_from_nan, make_nan_with_payload class TestNanPayloads(unittest.TestCase): def test_normal_nan(self): normal_nan = float('nan') payload, namespace = get_payload_from_nan(normal_nan) self.assertIs(payload, None) self.assertIs(namespace, None) def test_roundtrip_payload(self): for namespace in range(0, 256): for payload in range(-50, 500): nan = make_nan_with_payload(payload, namespace) new_payload, new_namespace = get_payload_from_nan(nan) self.assertEqual(namespace, new_namespace) self.assertEqual(payload, new_payload) self.assertNotEqual(nan, nan) def test_user_namespace_default(self): nan = make_nan_with_payload(42) payload, namespace = get_payload_from_nan(nan) self.assertEqual(42, payload) self.assertEqual(255, namespace) self.assertNotEqual(nan, nan) scikit-bio-0.6.2/skbio/metadata/tests/test_intersection.py000066400000000000000000000177741464262511300237500ustar00rootroot00000000000000# checklist.py:CopyrightHeadersValidator IGNORE # ---------------------------------------------------------------------------- # This code is taken from bx-python project and added with a test for a new # function `update` from line 195 to 211. The license for this code is # included in licenses/bx_python.txt. # ---------------------------------------------------------------------------- import sys import unittest import random from skbio.metadata._intersection import IntervalObj from skbio.metadata._intersection import IntervalNode from skbio.metadata._intersection import IntervalTree # Note: More bounds checking on input parameters are performed # within test_interval.py class NeighborTestCase(unittest.TestCase): def setUp(self): iv = IntervalNode(50, 59, IntervalObj(50, 59)) for i in range(0, 110, 10): if i == 50: continue f = IntervalObj(i, i + 9) iv = iv.insert(f.start, f.end, f) self.intervals = iv def test_left(self): iv = self.intervals self.assertEqual(str(iv.left(60, n=2)), str([IntervalObj(50, 59), IntervalObj(40, 49)])) for i in range(10, 100, 10): r = iv.left(i, max_dist=10, n=1) self.assertEqual(r[0].end, i - 1) def test_toomany(self): iv = self.intervals self.assertEqual(len(iv.left(60, n=200)), 6) def test_right(self): iv = self.intervals self.assertEqual(str(iv.left(60, n=2)), str([IntervalObj(50, 59), IntervalObj(40, 49)])) def get_right_start(b10): r = iv.right(b10+1, n=1) assert len(r) == 1 return r[0].start for i in range(10, 100, 10): self.assertEqual(get_right_start(i), i + 10) for i in range(0, 100, 10): r = iv.right(i-1, max_dist=10, n=1) self.assertEqual(r[0].start, i) class UpDownStreamTestCase(unittest.TestCase): def setUp(self): iv = IntervalTree() iv.add_interval(IntervalObj(50, 59)) for i in range(0, 110, 10): if i == 50: continue f = IntervalObj(i, i + 9) iv.add_interval(f) self.intervals = iv def test_upstream(self): iv = self.intervals upstreams = iv.upstream_of_interval(IntervalObj(59, 60), num_intervals=200) for u in upstreams: self.assertTrue(u.end < 59) upstreams = iv.upstream_of_interval(IntervalObj(60, 70, strand=-1), num_intervals=200) for u in upstreams: self.assertTrue(u.start > 70) upstreams = iv.upstream_of_interval(IntervalObj(58, 58, strand=-1), num_intervals=200) for u in upstreams: self.assertTrue(u.start > 59) def test_downstream(self): iv = self.intervals downstreams = iv.downstream_of_interval(IntervalObj(59, 60), num_intervals=200) for d in downstreams: self.assertTrue(d.start > 60) downstreams = iv.downstream_of_interval(IntervalObj(59, 60, strand=-1), num_intervals=200) for d in downstreams: self.assertTrue(d.start < 59) def test_n(self): iv = self.intervals for i in range(0, 90, 10): r = iv.after(i, max_dist=20, num_intervals=2) self.assertEqual(r[0].start, i + 10) self.assertEqual(r[1].start, i + 20) r = iv.after_interval(IntervalObj(i, i), max_dist=20, num_intervals=2) self.assertEqual(r[0].start, i + 10) self.assertEqual(r[1].start, i + 20) class LotsaTestCase(unittest.TestCase): """ put lotsa data in the tree and make sure it works""" def setUp(self): iv = IntervalNode(1, 2, IntervalObj(1, 2)) self.max = 1000000 for i in range(0, self.max, 10): f = IntervalObj(i, i) iv = iv.insert(f.start, f.end, f) for i in range(600): iv = iv.insert(0, 1, IntervalObj(0, 1)) self.intervals = iv def test_count(self): iv = self.intervals r = iv.right(1, n=33) self.assertEqual(len(r), 33) left = iv.left(1, n=33) self.assertEqual(len(left), 1) u = iv.right(1, n=9999) self.assertEqual(len(u), 250) # now increase max_dist u = iv.right(1, n=9999, max_dist=99999) self.assertEqual(len(u), 9999) def test_max_dist(self): iv = self.intervals r = iv.right(1, max_dist=0, n=10) self.assertEqual(len(r), 0) for n, d in enumerate(range(10, 1000, 10)): r = iv.right(1, max_dist=d, n=10000) self.assertEqual(len(r), n + 1) def test_find(self): iv = self.intervals path = sys.path[:] sys.path = sys.path[2:] sys.path = path for t in range(25): start = random.randint(0, self.max - 10000) end = start + random.randint(100, 10000) results = iv.find(start, end) for feat in results: self.assertTrue( (feat.end >= start and feat.end <= end) or (feat.start <= end and feat.start >= start)) class IntervalTreeTest(unittest.TestCase): def setUp(self): iv = IntervalTree() n = 0 for i in range(1, 1000, 80): iv.insert(i, i + 10, dict(value=i*i)) # add is synonym for insert. iv.add(i + 20, i + 30, dict(astr=str(i*i))) # or insert/add an interval object with start, end attrs. iv.insert_interval(IntervalObj(i + 40, i + 50, value=dict(astr=str(i*i)))) iv.add_interval(IntervalObj(i + 60, i + 70, value=dict(astr=str(i*i)))) n += 4 self.intervals = self.iv = iv self.nintervals = n def test_find(self): r = self.iv.find(100, 200) self.assertEqual(len(r), 5) def test_edge_cases(self): iv = IntervalTree() iv.insert(1, 1, 'foo') iv.insert(3, 7, 'spam') iv.insert(8, 8, 'abc') self.assertEqual(iv.find(0, 1), []) self.assertEqual(iv.find(1, 1), ['foo']) self.assertEqual(iv.find(1, 2), ['foo']) self.assertEqual(iv.find(2, 3), []) self.assertEqual(iv.find(3, 3), ['spam']) self.assertEqual(iv.find(3, 4), ['spam']) self.assertEqual(iv.find(6, 7), ['spam']) self.assertEqual(iv.find(7, 7), []) self.assertEqual(iv.find(0, 8), ['foo', 'spam']) self.assertEqual(iv.find(8, 8), ['abc']) self.assertEqual(iv.find(8, 9), ['abc']) self.assertEqual(iv.find(9, 9), []) self.assertEqual(iv.find(0, 10), ['foo', 'spam', 'abc']) self.assertEqual(iv.find(6, 10), ['spam', 'abc']) self.assertEqual(iv.find(7, 9), ['abc']) def test_traverse(self): a = [] fn = a.append self.iv.traverse(fn) self.assertEqual(len(a), self.nintervals) def test_empty(self): iv = IntervalTree() self.assertEqual([], iv.find(100, 300)) self.assertEqual([], iv.after(100)) self.assertEqual([], iv.before(100)) self.assertEqual([], iv.after_interval(100)) self.assertEqual([], iv.before_interval(100)) self.assertEqual([], iv.upstream_of_interval(100)) self.assertEqual([], iv.downstream_of_interval(100)) self.assertEqual(None, iv.traverse(lambda x: x.append(1))) def test_public_interval(self): self.iv.traverse(lambda ival: self.assertTrue(ival.interval)) def test_update(self): i = 1 self.iv.update(i, i + 10, dict(value=i*i), dict(value=-1)) self.assertEqual([dict(value=-1)], self.iv.find(i, i+10)) if __name__ == "__main__": unittest.main() scikit-bio-0.6.2/skbio/metadata/tests/test_interval.py000066400000000000000000000772251464262511300230630ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest from copy import deepcopy, copy from skbio.metadata._interval import (_assert_valid_bound, _assert_valid_fuzzy) from skbio.metadata import Interval from skbio.metadata import IntervalMetadata from skbio.metadata._intersection import IntervalTree from skbio.util._testing import ReallyEqualMixin class TestInterval(unittest.TestCase, ReallyEqualMixin): def setUp(self): self.upper_bound = 100 self.im = IntervalMetadata(self.upper_bound) def test_init_default(self): f = Interval(self.im, bounds=[(0, 2), (4, self.upper_bound)]) self.assertTrue(f._interval_metadata is not None) self.assertListEqual(f.bounds, [(0, 2), (4, self.upper_bound)]) self.assertListEqual(f.fuzzy, [(False, False), (False, False)]) self.assertDictEqual(f.metadata, {}) def test_init(self): f = Interval(interval_metadata=self.im, bounds=[(1, 2), (4, 7)], fuzzy=[(True, False), (False, False)], metadata={'name': 'sagA', 'function': 'transport'}) self.assertTrue(f._interval_metadata is not None) self.assertListEqual(f.bounds, [(1, 2), (4, 7)]) self.assertListEqual(f.fuzzy, [(True, False), (False, False)]) self.assertDictEqual(f.metadata, {'name': 'sagA', 'function': 'transport'}) def test_init_iterables(self): f = Interval(interval_metadata=self.im, bounds=((1, 2), (4, 7)), fuzzy=((True, False), (False, False)), metadata={'name': 'sagA', 'function': 'transport'}) self.assertTrue(f._interval_metadata is not None) self.assertListEqual(f.bounds, [(1, 2), (4, 7)]) self.assertListEqual(f.fuzzy, [(True, False), (False, False)]) self.assertDictEqual(f.metadata, {'name': 'sagA', 'function': 'transport'}) def test_init_generator(self): def gen(): for x in [(1, 2), (4, 7)]: yield x f = Interval(interval_metadata=self.im, bounds=gen(), fuzzy=((True, False), (False, False)), metadata={'name': 'sagA', 'function': 'transport'}) self.assertTrue(f._interval_metadata is not None) self.assertListEqual(f.bounds, [(1, 2), (4, 7)]) self.assertListEqual(f.fuzzy, [(True, False), (False, False)]) self.assertDictEqual(f.metadata, {'name': 'sagA', 'function': 'transport'}) def test_init_bounds_scrambled(self): f = Interval(interval_metadata=self.im, bounds=[(4, 7), (1, 2)], fuzzy=[(True, False), (False, True)], metadata={'name': 'sagA', 'function': 'transport'}) self.assertTrue(f._interval_metadata is not None) self.assertListEqual(f.bounds, [(1, 2), (4, 7)]) self.assertListEqual(f.fuzzy, [(False, True), (True, False)]) self.assertDictEqual(f.metadata, {'name': 'sagA', 'function': 'transport'}) def test_init_no_interval_metadata(self): with self.assertRaises(TypeError): Interval(interval_metadata=None, bounds=[(4, 7)], metadata={'name': 'sagA', 'function': 'transport'}) def test_init_empty_metadata(self): for i in 0, 1: # test that no exception is raised Interval(interval_metadata=self.im, bounds=[(i, i)]) def test_init_out_of_bounds(self): with self.assertRaises(ValueError): Interval(interval_metadata=self.im, bounds=[(1, 2), (4, 101)], fuzzy=[(True, False), (False, False)], metadata={'name': 'sagA', 'function': 'transport'}) with self.assertRaises(ValueError): Interval(interval_metadata=self.im, bounds=[(-1, 2), (4, 6)], fuzzy=[(True, False), (False, False)], metadata={'name': 'sagA', 'function': 'transport'}) def test_init_bad_bounds(self): with self.assertRaises(TypeError): Interval(interval_metadata=self.im, bounds=[1, (4, 7)], fuzzy=[(True, False), (False, False)], metadata={'name': 'sagA', 'function': 'transport'}) def test_init_bad_fuzzy(self): with self.assertRaises(ValueError): Interval(interval_metadata=self.im, bounds=[(1, 2), (4, 7)], fuzzy=[(False, False)], metadata={'name': 'sagA', 'function': 'transport'}) def test_repr(self): f = Interval(interval_metadata=self.im, bounds=[(1, 2)], metadata={'name': 'sagA'}) exp = (r"Interval\(interval_metadata=<[0-9]+>, bounds=\[\(1, 2\)\]," r" fuzzy=\[\(False, False\)\], metadata={'name': 'sagA'}\)") obs = repr(f) self.assertRegex(obs, exp) # test for dropped f.drop() exp = (r"Interval\(dropped=True, bounds=\[\(1, 2\)\]," r" fuzzy=\[\(False, False\)\], metadata={'name': 'sagA'}\)") obs = repr(f) self.assertRegex(obs, exp) def test_drop(self): f = Interval(interval_metadata=self.im, bounds=[(1, 2)], metadata={'name': 'sagA'}) f.drop() self.assertTrue(f._interval_metadata is None) self.assertTrue(f.dropped) self.assertTrue(f.bounds, [(1, 2)]) self.assertTrue(f.metadata, {'name': 'sagA'}) # test the idempotence f.drop() self.assertTrue(f._interval_metadata is None) self.assertTrue(f.dropped) self.assertTrue(f.bounds, [(1, 2)]) self.assertTrue(f.metadata, {'name': 'sagA'}) def test_eq(self): f0 = Interval(interval_metadata=self.im, bounds=[(4, 7), (1, 2)], fuzzy=[(False, False), (True, False)], metadata={'name': 'sagA', 'function': 'transport'}) f1 = Interval(interval_metadata=self.im, bounds=[(1, 2), (4, 7)], fuzzy=[(True, False), (False, False)], metadata={'name': 'sagA', 'function': 'transport'}) f2 = Interval(interval_metadata=self.im, bounds=[(1, 2), (4, 7)], fuzzy=[(True, False), (False, False)], metadata={'name': 'sagA', 'function': 'transport'}) f3 = Interval(interval_metadata=self.im, bounds=[(1, 2), (4, 7)], fuzzy=[(True, True), (False, False)], metadata={'name': 'sagA', 'function': 'transport'}) f4 = Interval(interval_metadata=self.im, bounds=[(1, 2), (4, 8)], fuzzy=[(True, False), (False, False)], metadata={'name': 'sagA', 'function': 'transport'}) f5 = Interval(interval_metadata=self.im, bounds=[(1, 2), (4, 7)], fuzzy=[(True, False), (False, False)], metadata={'name': 'sagB', 'function': 'transport'}) # scramble bounds/fuzzy self.assertReallyEqual(f0, f1) self.assertReallyEqual(f2, f1) # diff fuzzy self.assertReallyNotEqual(f1, f3) # diff bounds self.assertReallyNotEqual(f1, f4) # diff metadata self.assertReallyNotEqual(f1, f5) def test_get_bounds(self): f = Interval(interval_metadata=self.im, bounds=[(1, 2), (4, 7)], fuzzy=[(True, False), (False, False)], metadata={'name': 'sagA', 'function': 'transport'}) self.assertEqual(f.bounds, [(1, 2), (4, 7)]) self.assertEqual(self.im._is_stale_tree, True) def test_set_bounds(self): f = Interval(interval_metadata=self.im, bounds=[(1, 2), (4, 7)], fuzzy=[(True, False), (False, False)], metadata={'name': 'sagA', 'function': 'transport'}) f.bounds = [(4, 7), (1, 3)] self.assertEqual(f.bounds, [(1, 3), (4, 7)]) self.assertEqual(f.fuzzy, [(False, False), (False, False)]) self.assertEqual(self.im._is_stale_tree, True) def test_set_bounds_bad(self): f = Interval(interval_metadata=self.im, bounds=[(1, 2), (4, 7)], fuzzy=[(True, False), (False, False)], metadata={'name': 'sagA', 'function': 'transport'}) for value in [1, 's']: with self.assertRaises(TypeError): f.bounds = value for value in [[(-1, 2)], # start < lower_bound [(1, 101)], # end > upper_bound [(3, 1)], # start < end [('s', 1)], (), None]: # invalid values with self.assertRaises(ValueError): f.bounds = value def test_get_fuzzy(self): f = Interval(interval_metadata=self.im, bounds=[(1, 2), (4, 7)], fuzzy=[(True, False), (False, False)], metadata={'name': 'sagA', 'function': 'transport'}) self.assertEqual(f.fuzzy, [(True, False), (False, False)]) def test_set_fuzzy(self): f = Interval(interval_metadata=self.im, bounds=[(1, 2), (4, 7)], fuzzy=[(True, False), (False, False)], metadata={'name': 'sagA', 'function': 'transport'}) f.fuzzy = [(False, False), (False, False)] self.assertEqual(f.fuzzy, [(False, False), (False, False)]) def test_set_fuzzy_bad(self): f = Interval(interval_metadata=self.im, bounds=[(1, 2), (4, 7)], fuzzy=[(True, False), (False, False)], metadata={'name': 'sagA', 'function': 'transport'}) for value in [[(False, False)], (), None]: with self.assertRaises(ValueError): f.fuzzy = value for value in [1, True]: with self.assertRaises(TypeError): f.fuzzy = value def test_delete_fuzzy(self): f = Interval(interval_metadata=self.im, bounds=[(1, 2), (4, 7)], fuzzy=[(True, False), (False, False)], metadata={'name': 'sagA', 'function': 'transport'}) del f.fuzzy self.assertEqual(f.fuzzy, [(False, False), (False, False)]) # delete again del f.fuzzy self.assertEqual(f.fuzzy, [(False, False), (False, False)]) def test_get_metadata(self): f = Interval(interval_metadata=self.im, bounds=[(1, 2), (4, 7)], fuzzy=[(True, False), (False, False)], metadata={'name': 'sagA', 'function': 'transport'}) f.metadata['name'] = 'sagB' self.assertEqual(f.metadata, {'name': 'sagB', 'function': 'transport'}) def test_set_metadata(self): f = Interval(interval_metadata=self.im, bounds=[(1, 2), (4, 7)], fuzzy=[(True, False), (False, False)], metadata={'name': 'sagA', 'function': 'transport'}) f.metadata = {'name': 'sagB', 'function': 'transport'} self.assertDictEqual(f.metadata, {'name': 'sagB', 'function': 'transport'}) f.metadata = {} self.assertDictEqual(f.metadata, {}) def test_set_metadata_bad(self): f = Interval(interval_metadata=self.im, bounds=[(1, 2), (4, 7)], fuzzy=[(True, False), (False, False)], metadata={'name': 'sagA', 'function': 'transport'}) for value in [1, '', None]: with self.assertRaises(TypeError): f.metadata = value def test_delete_metadata(self): f = Interval(interval_metadata=self.im, bounds=[(1, 2), (4, 7)], fuzzy=[(True, False), (False, False)], metadata={'name': 'sagA', 'function': 'transport'}) del f.metadata self.assertEqual(f.metadata, {}) def test_set_delete_on_dropped(self): f = Interval(interval_metadata=self.im, bounds=[(1, 2)], fuzzy=[(True, False)], metadata={'name': 'sagA'}) f.drop() with self.assertRaises(RuntimeError): f.fuzzy = None with self.assertRaises(RuntimeError): f.bounds = [(1, 2)] with self.assertRaises(RuntimeError): f.metadata = {} with self.assertRaises(RuntimeError): del f.fuzzy with self.assertRaises(RuntimeError): del f.metadata def test_get_on_dropped(self): f = Interval(interval_metadata=self.im, bounds=[(1, 2)], fuzzy=[(True, False)], metadata={'name': 'sagA'}) f.drop() self.assertEqual(f.fuzzy, [(True, False)]) self.assertEqual(f.bounds, [(1, 2)]) self.assertEqual(f.metadata, {'name': 'sagA'}) class TestIntervalUtil(unittest.TestCase): def test_assert_valid_bound(self): intvls = [(1, 2), (-1, 2)] for intvl in intvls: try: _assert_valid_bound(intvl) except TypeError: self.assertTrue(False) def test_assert_valid_bound_wrong_type(self): intvls = [[1, 2], 1, [1, 2, 3]] for intvl in intvls: with self.assertRaises(TypeError): _assert_valid_bound(intvl) def test_assert_valid_bound_wrong_value(self): intvls = [(1, 2, 3), (2, 1), (True, 0), ('s', 'r')] for intvl in intvls: with self.assertRaises(ValueError): _assert_valid_bound(intvl) def test_assert_valid_fuzzy(self): fuzzy = [(True, False), (True, True)] for fuzzy in fuzzy: try: _assert_valid_fuzzy(fuzzy) except Exception: self.assertTrue(False) def test_assert_valid_fuzzy_wrong_value(self): fuzzy = [(True, False, True), ()] for fuzzy in fuzzy: with self.assertRaises(ValueError): _assert_valid_fuzzy(fuzzy) def test_assert_valid_fuzzy_wrong_type(self): fuzzy = [[True, False], 's', 1, (0, 1), ('s', '')] for fuzzy in fuzzy: with self.assertRaises(TypeError): _assert_valid_fuzzy(fuzzy) class TestIntervalMetadata(unittest.TestCase, ReallyEqualMixin): def setUp(self): self.upper_bound = 10 self.im_empty = IntervalMetadata(self.upper_bound) self.im_1 = IntervalMetadata(self.upper_bound) self.im_1_1 = Interval( interval_metadata=self.im_1, bounds=[(1, 2), (4, self.upper_bound)], metadata={'gene': 'sagA', 'bound': 0}) self.im_2 = IntervalMetadata(self.upper_bound) self.im_2_1 = Interval( interval_metadata=self.im_2, bounds=[(1, 2), (4, self.upper_bound)], metadata={'gene': 'sagA', 'bound': 0}) self.im_2_2 = Interval( interval_metadata=self.im_2, bounds=[(3, 5)], metadata={'gene': 'sagB', 'bound': 0, 'spam': [0]}) def test_copy_empty(self): obs = copy(self.im_empty) self.assertEqual(obs, self.im_empty) self.assertIsNot(obs._intervals, self.im_empty._intervals) self.assertIsNot(obs._interval_tree, self.im_empty._interval_tree) def test_copy(self): obs = copy(self.im_2) self.assertEqual(obs, self.im_2) self.assertIsNot(obs._intervals, self.im_2._intervals) self.assertIsNot(obs._interval_tree, self.im_2._interval_tree) for i in range(self.im_2.num_interval_features): i1, i2 = obs._intervals[i], self.im_2._intervals[i] self.assertIsNot(i1, i2) self.assertIsNot(i1.bounds, i2.bounds) self.assertIsNot(i1.fuzzy, i2.fuzzy) self.assertIsNot(i1._interval_metadata, i2._interval_metadata) self.assertIsNot(i1.metadata, i2.metadata) for k in i1.metadata: self.assertIs(i1.metadata[k], i2.metadata[k]) def test_deepcopy(self): obs = deepcopy(self.im_2) self.assertEqual(obs, self.im_2) self.assertIsNot(obs._intervals, self.im_2._intervals) self.assertIsNot(obs._interval_tree, self.im_2._interval_tree) for i in range(self.im_2.num_interval_features): i1, i2 = obs._intervals[i], self.im_2._intervals[i] self.assertIsNot(i1, i2) self.assertIsNot(i1.bounds, i2.bounds) self.assertIsNot(i1.fuzzy, i2.fuzzy) self.assertIsNot(i1.metadata, i2.metadata) i2.metadata['spam'].append(1) self.assertEqual(i2.metadata, {'gene': 'sagB', 'bound': 0, 'spam': [0, 1]}) self.assertEqual(i1.metadata, {'gene': 'sagB', 'bound': 0, 'spam': [0]}) def test_deepcopy_memo_is_respected(self): memo = {} deepcopy(self.im_1, memo) self.assertGreater(len(memo), 2) def test_init(self): self.assertFalse(self.im_empty._is_stale_tree) self.assertEqual(self.im_empty._intervals, []) def test_init_upper_bound_lt_lower_bound(self): # test that no exception is raised IntervalMetadata(0) with self.assertRaises(ValueError): IntervalMetadata(-1) def test_upper_bound_is_none(self): im = IntervalMetadata(None) # should not raise error im.add([(0, 1000000000)]) self.assertIsNone(im.upper_bound) with self.assertRaisesRegex( TypeError, r'upper bound is `None`'): im._reverse() with self.assertRaisesRegex( TypeError, r'upper bound is `None`'): IntervalMetadata.concat([self.im_1, im]) def test_init_copy_from(self): for i in [None, 99, 999]: obs = IntervalMetadata(i, self.im_1) exp = IntervalMetadata(i) exp.add(bounds=[(1, 2), (4, self.upper_bound)], metadata={'gene': 'sagA', 'bound': 0}) self.assertEqual(obs, exp) def test_init_copy_from_empty(self): for i in [None, 0, 9, 99, 999]: obs = IntervalMetadata(i, self.im_empty) exp = IntervalMetadata(i) self.assertEqual(obs, exp) # test it is shallow copy self.assertIsNot(obs._intervals, self.im_empty._intervals) self.assertIsNot(obs._interval_tree, self.im_empty._interval_tree) def test_init_copy_from_shallow_copy(self): obs = IntervalMetadata(self.upper_bound, self.im_2) self.assertEqual(self.im_2, obs) # test it is shallow copy self.assertIsNot(obs._intervals, self.im_2._intervals) self.assertIsNot(obs._interval_tree, self.im_2._interval_tree) for i in range(self.im_2.num_interval_features): i1, i2 = obs._intervals[i], self.im_2._intervals[i] self.assertIsNot(i1, i2) self.assertIsNot(i1.bounds, i2.bounds) self.assertIsNot(i1.fuzzy, i2.fuzzy) self.assertIsNot(i1._interval_metadata, i2._interval_metadata) self.assertIsNot(i1.metadata, i2.metadata) for k in i1.metadata: self.assertIs(i1.metadata[k], i2.metadata[k]) def test_init_copy_from_error(self): i = self.upper_bound - 1 with self.assertRaisesRegex( ValueError, r'larger than upper bound \(%r\)' % i): IntervalMetadata(i, self.im_2) def test_num_interval_features(self): self.assertEqual(self.im_empty.num_interval_features, 0) self.assertEqual(self.im_1.num_interval_features, 1) self.assertEqual(self.im_2.num_interval_features, 2) def test_duplicate(self): '''Test query and drop methods on duplicate Intervals.''' intvl_1 = self.im_empty.add([(1, 2)]) intvl_2 = self.im_empty.add([(1, 2)]) self.assertEqual(len(list(self.im_empty.query([(1, 2)]))), 2) self.im_empty.drop([intvl_1]) self.assertEqual(len(self.im_empty._intervals), 1) self.assertTrue(self.im_empty._intervals[0] is intvl_2) def test_duplicate_bounds(self): intvl = self.im_empty.add([(1, 2), (1, 2)]) intvls = list(self.im_empty.query([(1, 2)])) self.assertEqual(len(intvls), 1) self.assertTrue(intvl is intvls[0]) def test_concat_empty(self): for i in 0, 1, 2: obs = IntervalMetadata.concat([self.im_empty] * i) exp = IntervalMetadata(self.upper_bound * i) self.assertEqual(obs, exp) obs = IntervalMetadata.concat([]) self.assertEqual(obs, IntervalMetadata(0)) def test_concat(self): im1 = IntervalMetadata(3) im2 = IntervalMetadata(4) im3 = IntervalMetadata(5) im1.add([(0, 2)], [(True, True)]) im2.add([(0, 3)], [(True, False)], {'gene': 'sagA'}) im2.add([(2, 4)], metadata={'gene': 'sagB'}) im3.add([(1, 5)], [(False, True)], {'gene': 'sagC'}) obs = IntervalMetadata.concat([im1, im2, im3]) exp = IntervalMetadata(12) exp.add(bounds=[(0, 2)], fuzzy=[(True, True)]) exp.add(bounds=[(3, 6)], fuzzy=[(True, False)], metadata={'gene': 'sagA'}) exp.add(bounds=[(5, 7)], metadata={'gene': 'sagB'}) exp.add(bounds=[(8, 12)], fuzzy=[(False, True)], metadata={'gene': 'sagC'}) self.assertEqual(obs, exp) def test_merge(self): # empty + empty im = IntervalMetadata(self.upper_bound) self.im_empty.merge(im) self.assertEqual(self.im_empty, im) # empty + non-empty self.im_empty.merge(self.im_1) self.assertEqual(self.im_empty, self.im_1) # non-empty + non-empty self.im_empty.merge(self.im_2) self.im_2.merge(self.im_1) self.assertEqual(self.im_empty, self.im_2) def test_merge_unequal_upper_bounds(self): n = 3 im1 = IntervalMetadata(n) for im in [self.im_empty, self.im_1]: with self.assertRaisesRegex( ValueError, r'not equal \(%d != %d\)' % (self.upper_bound, n)): im.merge(im1) def test_merge_to_unbounded(self): for im in [self.im_empty, self.im_1, IntervalMetadata(None)]: obs = IntervalMetadata(None) obs.merge(im) self.assertIsNone(obs.upper_bound) self.assertEqual(obs._intervals, im._intervals) def test_merge_unbounded_to_bounded(self): im = IntervalMetadata(None) with self.assertRaisesRegex( ValueError, r'Cannot merge an unbound IntervalMetadata object ' 'to a bounded one'): self.im_1.merge(im) # original im is not changed self.assertIsNone(im.upper_bound) self.assertEqual(im._intervals, []) def test_sort(self): interval = Interval( self.im_2, [(1, 2), (3, 8)], metadata={'gene': 'sagA', 'bound': 0}) im = deepcopy(self.im_2) self.im_2.sort(False) # check sorting does not have other side effects self.assertEqual(im, self.im_2) self.assertEqual(self.im_2._intervals, [self.im_2_2, self.im_2_1, interval]) self.im_2.sort() self.assertEqual(im, self.im_2) self.assertEqual(self.im_2._intervals, [interval, self.im_2_1, self.im_2_2]) self.im_empty.sort() self.assertEqual(self.im_empty, IntervalMetadata(self.upper_bound)) def test_add_eq_upper_bound(self): self.im_empty.add(bounds=[(1, 2), (4, self.upper_bound)], metadata={'gene': 'sagA', 'bound': 0}) self.assertTrue(self.im_empty._is_stale_tree) interval = self.im_empty._intervals[0] self.assertEqual(interval.bounds, [(1, 2), (4, self.upper_bound)]) self.assertEqual(interval.metadata, {'gene': 'sagA', 'bound': 0}) self.assertTrue(isinstance(self.im_empty._interval_tree, IntervalTree)) def test_add_gt_upper_bound(self): with self.assertRaises(ValueError): self.im_empty.add(bounds=[(1, 2), (4, self.upper_bound+1)], metadata={'gene': 'sagA', 'bound': 0}) def test_add_eq_start_end_bound(self): for i in 0, 1, self.upper_bound: # test that no exception is raised self.im_empty.add(bounds=[(i, i)], metadata={'gene': 'sagA', 'bound': 0}) def test_query_attribute(self): intervals = self.im_2._query_attribute({}) for i, j in zip(intervals, self.im_2._intervals): self.assertEqual(i, j) intervals = list(self.im_2._query_attribute(None)) self.assertEqual(len(intervals), 0) for i in self.im_2._intervals: intervals = list(self.im_2._query_attribute(i.metadata)) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], i) def test_query_interval(self): intervals = list(self.im_2._query_interval((1, 2))) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], self.im_2_1) intervals = list(self.im_2._query_interval((3, 4))) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], self.im_2_2) intervals = {repr(i) for i in self.im_2._query_interval((1, 7))} self.assertEqual(len(intervals), 2) self.assertSetEqual(intervals, {repr(i) for i in self.im_2._intervals}) def test_query_interval_upper_bound(self): intervals = list(self.im_2._query_interval((self.upper_bound-1, self.upper_bound))) self.assertEqual(intervals, [self.im_2_1]) def test_query(self): intervals = list(self.im_2.query(bounds=[(1, 5)], metadata={'gene': 'sagA'})) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], self.im_2_1) def test_query_empty(self): intervals = list(self.im_1.query()) self.assertEqual(len(intervals), 1) def test_query_no_hits(self): intervals = list(self.im_2.query(bounds=[(self.upper_bound, 200)])) self.assertEqual(len(intervals), 0) intervals = list(self.im_2.query(metadata={'gene': 'sagC'})) self.assertEqual(len(intervals), 0) intervals = list(self.im_2.query(bounds=[(1, 2)], metadata={'gene': 'sagC'})) self.assertEqual(len(intervals), 0) def test_query_interval_only(self): for loc in [[(1, 7)], [(1, 2), (3, 4)]]: intervals = list(self.im_2.query(bounds=loc)) self.assertEqual(len(intervals), 2) self.assertEqual(intervals[0], self.im_2_1) self.assertEqual(intervals[1], self.im_2_2) def test_query_metadata_only(self): intervals = list(self.im_2.query(metadata={'gene': 'sagB'})) self.assertEqual(len(intervals), 1) self.assertEqual(intervals[0], self.im_2_2) intervals = list(self.im_2.query(metadata={'bound': 0})) self.assertEqual(len(intervals), 2) self.assertEqual(intervals[0], self.im_2_1) self.assertEqual(intervals[1], self.im_2_2) def test_drop(self): intvl = self.im_2._intervals[0] self.im_2.drop([intvl]) self.assertEqual(len(self.im_2._intervals), 1) self.assertEqual(self.im_2._intervals[0], self.im_2_2) # test the intvl was set to dropped self.assertTrue(intvl.dropped) def test_drop_all(self): self.im_2.drop(self.im_2._intervals) self.assertEqual(self.im_2, self.im_empty) def test_drop_negate(self): intvl = self.im_2._intervals[0] self.im_2.drop([intvl], negate=True) self.assertEqual(len(self.im_2._intervals), 1) self.assertEqual(self.im_2._intervals[0], intvl) # test the dropped intvl was set to dropped self.assertTrue(self.im_2_2.dropped) def test_reverse(self): self.im_2._reverse() Interval( interval_metadata=self.im_empty, bounds=[(0, 6), (8, 9)], metadata={'gene': 'sagA', 'bound': 0}) Interval( interval_metadata=self.im_empty, bounds=[(5, 7)], metadata={'gene': 'sagB', 'bound': 0, 'spam': [0]}) self.assertEqual(self.im_2, self.im_empty) def test_eq_ne(self): im1 = IntervalMetadata(10) im1.add(metadata={'gene': 'sagA', 'bound': '0'}, bounds=[(0, 2), (4, 7)]) im1.add(metadata={'gene': 'sagB', 'bound': '3'}, bounds=[(3, 5)]) # The ordering shouldn't matter im2 = IntervalMetadata(10) im2.add(metadata={'gene': 'sagB', 'bound': '3'}, bounds=[(3, 5)]) im2.add(metadata={'gene': 'sagA', 'bound': '0'}, bounds=[(0, 2), (4, 7)]) im3 = IntervalMetadata(10) im3.add(metadata={'gene': 'sagA', 'bound': '3'}, bounds=[(0, 2), (4, 7)]) im3.add(metadata={'gene': 'sagB', 'bound': '3'}, bounds=[(3, 5)]) self.assertReallyEqual(im1, im2) self.assertReallyNotEqual(im1, im3) def test_ne_diff_bounds(self): im1 = IntervalMetadata(10) im2 = IntervalMetadata(9) intvl = {'bounds': [(0, 1)], 'metadata': {'spam': 'foo'}} im1.add(**intvl) im2.add(**intvl) self.assertReallyNotEqual(im1, im2) def test_repr(self): exp = '''0 interval features -------------------''' self.assertEqual(repr(self.im_empty), exp) self.im_empty.add([(1, 2)], metadata={'gene': 'sagA'}) exp = ("1 interval feature\n" "------------------\n" r"Interval\(interval_metadata=<[0-9]+>, bounds=\[\(1, 2\)\], " r"fuzzy=\[\(False, False\)\], metadata={'gene': 'sagA'}\)") self.assertRegex(repr(self.im_empty), exp) self.im_empty.add([(3, 4)], metadata={'gene': 'sagB'}) self.im_empty.add([(3, 4)], metadata={'gene': 'sagC'}) self.im_empty.add([(3, 4)], metadata={'gene': 'sagD'}) self.im_empty.add([(3, 4)], metadata={'gene': 'sagE'}) self.im_empty.add([(3, 4)], metadata={'gene': 'sagF'}) exp = ("6 interval features\n" "-------------------\n" r"Interval\(interval_metadata=<[0-9]+>, bounds=\[\(1, 2\)\], " r"fuzzy=\[\(False, False\)\], metadata={'gene': 'sagA'}\)\n" r"Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], " r"fuzzy=\[\(False, False\)\], metadata={'gene': 'sagB'}\)\n" r"...\n" r"Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], " r"fuzzy=\[\(False, False\)\], metadata={'gene': 'sagE'}\)\n" r"Interval\(interval_metadata=<[0-9]+>, bounds=\[\(3, 4\)\], " r"fuzzy=\[\(False, False\)\], metadata={'gene': 'sagF'}\)") self.assertRegex(repr(self.im_empty), exp) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/metadata/tests/test_io.py000066400000000000000000001560751464262511300216470ustar00rootroot00000000000000import collections import os.path import tempfile import unittest import numpy as np import pandas as pd from skbio.util import get_data_path from skbio.metadata._metadata import (SampleMetadata, CategoricalMetadataColumn, NumericMetadataColumn) from skbio.metadata.io import MetadataFileError # NOTE: many of the test files in the `data` directory intentionally have # leading/trailing whitespace characters on some lines, as well as mixed usage # of spaces, tabs, carriage returns, and newlines. When editing these files, # please make sure your code editor doesn't strip these leading/trailing # whitespace characters (e.g. Atom does this by default), nor automatically # modify the files in some other way such as converting Windows-style CRLF # line terminators to Unix-style newlines. # # When committing changes to the files, carefully review the diff to make sure # unintended changes weren't introduced. class TestLoadErrors(unittest.TestCase): def test_path_does_not_exist(self): with self.assertRaisesRegex(MetadataFileError, "Metadata file path doesn't exist"): SampleMetadata.load( '/qiime2/unit/tests/hopefully/this/path/does/not/exist') def test_path_is_directory(self): fp = get_data_path('valid') with self.assertRaisesRegex(MetadataFileError, "path points to something other than a " "file"): SampleMetadata.load(fp) def test_non_utf_8_file(self): fp = get_data_path('invalid/non-utf-8.tsv') with self.assertRaisesRegex(MetadataFileError, 'encoded as UTF-8 or ASCII'): SampleMetadata.load(fp) def test_utf_16_le_file(self): fp = get_data_path('invalid/simple-utf-16le.txt') with self.assertRaisesRegex(MetadataFileError, 'UTF-16 Unicode'): SampleMetadata.load(fp) def test_utf_16_be_file(self): fp = get_data_path('invalid/simple-utf-16be.txt') with self.assertRaisesRegex(MetadataFileError, 'UTF-16 Unicode'): SampleMetadata.load(fp) def test_empty_file(self): fp = get_data_path('invalid/empty-file') with self.assertRaisesRegex(MetadataFileError, 'locate header.*file may be empty'): SampleMetadata.load(fp) def test_comments_and_empty_rows_only(self): fp = get_data_path('invalid/comments-and-empty-rows-only.tsv') with self.assertRaisesRegex(MetadataFileError, 'locate header.*only of comments or empty ' 'rows'): SampleMetadata.load(fp) def test_header_only(self): fp = get_data_path('invalid/header-only.tsv') with self.assertRaisesRegex(MetadataFileError, 'at least one ID'): SampleMetadata.load(fp) def test_header_only_with_comments_and_empty_rows(self): fp = get_data_path( 'invalid/header-only-with-comments-and-empty-rows.tsv') with self.assertRaisesRegex(MetadataFileError, 'at least one ID'): SampleMetadata.load(fp) def test_qiime1_empty_mapping_file(self): fp = get_data_path('invalid/qiime1-empty.tsv') with self.assertRaisesRegex(MetadataFileError, 'at least one ID'): SampleMetadata.load(fp) def test_invalid_header(self): fp = get_data_path('invalid/invalid-header.tsv') with self.assertRaisesRegex(MetadataFileError, 'unrecognized ID column name.*' 'invalid_id_header'): SampleMetadata.load(fp) def test_empty_id(self): fp = get_data_path('invalid/empty-id.tsv') with self.assertRaisesRegex(MetadataFileError, 'empty metadata ID'): SampleMetadata.load(fp) def test_whitespace_only_id(self): fp = get_data_path('invalid/whitespace-only-id.tsv') with self.assertRaisesRegex(MetadataFileError, 'empty metadata ID'): SampleMetadata.load(fp) def test_empty_column_name(self): fp = get_data_path('invalid/empty-column-name.tsv') with self.assertRaisesRegex(MetadataFileError, 'column without a name'): SampleMetadata.load(fp) def test_whitespace_only_column_name(self): fp = get_data_path('invalid/whitespace-only-column-name.tsv') with self.assertRaisesRegex(MetadataFileError, 'column without a name'): SampleMetadata.load(fp) def test_duplicate_ids(self): fp = get_data_path('invalid/duplicate-ids.tsv') with self.assertRaisesRegex(MetadataFileError, 'IDs must be unique.*id1'): SampleMetadata.load(fp) def test_duplicate_ids_with_whitespace(self): fp = get_data_path('invalid/duplicate-ids-with-whitespace.tsv') with self.assertRaisesRegex(MetadataFileError, 'IDs must be unique.*id1'): SampleMetadata.load(fp) def test_duplicate_column_names(self): fp = get_data_path('invalid/duplicate-column-names.tsv') with self.assertRaisesRegex(MetadataFileError, 'Column names must be unique.*col1'): SampleMetadata.load(fp) def test_duplicate_column_names_with_whitespace(self): fp = get_data_path( 'invalid/duplicate-column-names-with-whitespace.tsv') with self.assertRaisesRegex(MetadataFileError, 'Column names must be unique.*col1'): SampleMetadata.load(fp) def test_id_conflicts_with_id_header(self): fp = get_data_path('invalid/id-conflicts-with-id-header.tsv') with self.assertRaisesRegex(MetadataFileError, "ID 'id' conflicts.*ID column header"): SampleMetadata.load(fp) def test_column_name_conflicts_with_id_header(self): fp = get_data_path( 'invalid/column-name-conflicts-with-id-header.tsv') with self.assertRaisesRegex(MetadataFileError, "column name 'featureid' conflicts.*ID " "column header"): SampleMetadata.load(fp) def test_column_types_unrecognized_column_name(self): fp = get_data_path('valid/simple.tsv') with self.assertRaisesRegex(MetadataFileError, 'not_a_column.*column_types.*not a column ' 'in the metadata file'): SampleMetadata.load(fp, column_types={'not_a_column': 'numeric'}) def test_column_types_unrecognized_column_type(self): fp = get_data_path('valid/simple.tsv') with self.assertRaisesRegex(MetadataFileError, 'col2.*column_types.*unrecognized column ' 'type.*CATEGORICAL'): SampleMetadata.load(fp, column_types={'col1': 'numeric', 'col2': 'CATEGORICAL'}) def test_column_types_not_convertible_to_numeric(self): fp = get_data_path('valid/simple.tsv') with self.assertRaisesRegex(MetadataFileError, "column 'col3' to numeric.*could not be " "interpreted as numeric: 'bar', 'foo'"): SampleMetadata.load(fp, column_types={'col1': 'numeric', 'col2': 'categorical', 'col3': 'numeric'}) def test_column_types_override_directive_not_convertible_to_numeric(self): fp = get_data_path('valid/simple-with-directive.tsv') with self.assertRaisesRegex(MetadataFileError, "column 'col3' to numeric.*could not be " "interpreted as numeric: 'bar', 'foo'"): SampleMetadata.load(fp, column_types={'col3': 'numeric'}) def test_directive_before_header(self): fp = get_data_path('invalid/directive-before-header.tsv') with self.assertRaisesRegex(MetadataFileError, 'directive.*#q2:types.*searching for ' 'header'): SampleMetadata.load(fp) def test_unrecognized_directive(self): fp = get_data_path('invalid/unrecognized-directive.tsv') with self.assertRaisesRegex(MetadataFileError, 'Unrecognized directive.*#q2:foo.*' '#q2:types.*#q2:missing.*directive'): SampleMetadata.load(fp) def test_duplicate_directives(self): fp = get_data_path('invalid/duplicate-directives.tsv') with self.assertRaisesRegex(MetadataFileError, 'duplicate directive.*#q2:types'): SampleMetadata.load(fp) def test_unrecognized_column_type_in_directive(self): fp = get_data_path('invalid/unrecognized-column-type.tsv') with self.assertRaisesRegex(MetadataFileError, 'col2.*unrecognized column type.*foo.*' '#q2:types directive'): SampleMetadata.load(fp) def test_column_types_directive_not_convertible_to_numeric(self): fp = get_data_path('invalid/types-directive-non-numeric.tsv') # This error message regex is intentionally verbose because we want to # assert that many different types of non-numeric strings aren't # interpreted as numbers. The error message displays a sorted list of # all values that couldn't be converted to numbers, making it possible # to test a variety of non-numeric strings in a single test case. msg = (r"column 'col2' to numeric.*could not be interpreted as " r"numeric: '\$42', '\+inf', '-inf', '0xAF', '1,000', " r"'1\.000\.0', '1_000_000', '1e3e4', 'Infinity', 'NA', 'NaN', " "'a', 'e3', 'foo', 'inf', 'nan', 'sample-1'") with self.assertRaisesRegex(MetadataFileError, msg): SampleMetadata.load(fp) def test_directive_after_directives_section(self): fp = get_data_path( 'invalid/directive-after-directives-section.tsv') with self.assertRaisesRegex(MetadataFileError, '#q2:types.*outside of the directives ' 'section'): SampleMetadata.load(fp) def test_directive_longer_than_header(self): fp = get_data_path('invalid/directive-longer-than-header.tsv') with self.assertRaisesRegex(MetadataFileError, 'row has 5 cells.*header declares 4 ' 'cells'): SampleMetadata.load(fp) def test_data_longer_than_header(self): fp = get_data_path('invalid/data-longer-than-header.tsv') with self.assertRaisesRegex(MetadataFileError, 'row has 5 cells.*header declares 4 ' 'cells'): SampleMetadata.load(fp) def test_unknown_missing_scheme(self): fp = get_data_path('invalid/missing-unknown-scheme.tsv') with self.assertRaisesRegex(MetadataFileError, 'col1.*BAD:SCHEME.*#sk:missing'): SampleMetadata.load(fp) class TestLoadSuccess(unittest.TestCase): def setUp(self): self.temp_dir_obj = tempfile.TemporaryDirectory( prefix='qiime2-metadata-tests-temp-') self.temp_dir = self.temp_dir_obj.name # This Metadata object is compared against observed Metadata objects in # many of the tests, so just define it once here. self.simple_md = SampleMetadata( pd.DataFrame({'col1': [1.0, 2.0, 3.0], 'col2': ['a', 'b', 'c'], 'col3': ['foo', 'bar', '42']}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) # Basic sanity check to make sure the columns are ordered and typed as # expected. It'd be unfortunate to compare observed results to expected # results that aren't representing what we think they are! obs_columns = [(name, props.type) for name, props in self.simple_md.columns.items()] exp_columns = [('col1', 'numeric'), ('col2', 'categorical'), ('col3', 'categorical')] self.assertEqual(obs_columns, exp_columns) def tearDown(self): self.temp_dir_obj.cleanup() def test_simple(self): # Simple metadata file without comments, empty rows, jaggedness, # missing data, odd IDs or column names, directives, etc. The file has # multiple column types (numeric, categorical, and something that has # mixed numbers and strings, which must be interpreted as categorical). fp = get_data_path('valid/simple.tsv') obs_md = SampleMetadata.load(fp) self.assertEqual(obs_md, self.simple_md) def test_bom_simple_txt(self): # This is the encoding that notepad.exe will use most commonly fp = get_data_path('valid/BOM-simple.txt') obs_md = SampleMetadata.load(fp) self.assertEqual(obs_md, self.simple_md) def test_different_file_extension(self): fp = get_data_path('valid/simple.txt') obs_md = SampleMetadata.load(fp) self.assertEqual(obs_md, self.simple_md) def test_no_newline_at_eof(self): fp = get_data_path('valid/no-newline-at-eof.tsv') obs_md = SampleMetadata.load(fp) self.assertEqual(obs_md, self.simple_md) def test_unix_line_endings(self): fp = get_data_path('valid/unix-line-endings.tsv') obs_md = SampleMetadata.load(fp) self.assertEqual(obs_md, self.simple_md) def test_windows_line_endings(self): fp = get_data_path('valid/windows-line-endings.tsv') obs_md = SampleMetadata.load(fp) self.assertEqual(obs_md, self.simple_md) def test_mac_line_endings(self): fp = get_data_path('valid/mac-line-endings.tsv') obs_md = SampleMetadata.load(fp) self.assertEqual(obs_md, self.simple_md) def test_retains_column_order(self): # Explicitly test that the file's column order is retained in the # Metadata object. Many of the test cases use files with column names # in alphabetical order (e.g. "col1", "col2", "col3"), which matches # how pandas orders columns in a DataFrame when supplied with a dict # (many of the test cases use this feature of the DataFrame # constructor when constructing the expected DataFrame). fp = get_data_path('valid/column-order.tsv') obs_md = SampleMetadata.load(fp) # Supply DataFrame constructor with explicit column ordering instead of # a dict. exp_index = pd.Index(['id1', 'id2', 'id3'], name='id') exp_columns = ['z', 'y', 'x'] exp_data = [ [1.0, 'a', 'foo'], [2.0, 'b', 'bar'], [3.0, 'c', '42'] ] exp_df = pd.DataFrame(exp_data, index=exp_index, columns=exp_columns) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_leading_trailing_whitespace(self): fp = get_data_path('valid/leading-trailing-whitespace.tsv') obs_md = SampleMetadata.load(fp) self.assertEqual(obs_md, self.simple_md) def test_comments(self): fp = get_data_path('valid/comments.tsv') obs_md = SampleMetadata.load(fp) self.assertEqual(obs_md, self.simple_md) def test_empty_rows(self): fp = get_data_path('valid/empty-rows.tsv') obs_md = SampleMetadata.load(fp) self.assertEqual(obs_md, self.simple_md) def test_qiime1_mapping_file(self): fp = get_data_path('valid/qiime1.tsv') obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['id1', 'id2', 'id3'], name='#SampleID') exp_df = pd.DataFrame({'col1': [1.0, 2.0, 3.0], 'col2': ['a', 'b', 'c'], 'col3': ['foo', 'bar', '42']}, index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_qiita_sample_information_file(self): fp = get_data_path('valid/qiita-sample-information.tsv') obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['id.1', 'id.2'], name='sample_name') exp_df = pd.DataFrame({ 'DESCRIPTION': ['description 1', 'description 2'], 'TITLE': ['A Title', 'Another Title']}, index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_qiita_preparation_information_file(self): fp = get_data_path('valid/qiita-preparation-information.tsv') obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['id.1', 'id.2'], name='sample_name') exp_df = pd.DataFrame({ 'BARCODE': ['ACGT', 'TGCA'], 'EXPERIMENT_DESIGN_DESCRIPTION': ['longitudinal study', 'longitudinal study']}, index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_biom_observation_metadata_file(self): fp = get_data_path('valid/biom-observation-metadata.tsv') obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['OTU_1', 'OTU_2'], name='#OTUID') exp_df = pd.DataFrame([['k__Bacteria;p__Firmicutes', 0.890], ['k__Bacteria', 0.9999]], columns=['taxonomy', 'confidence'], index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_supported_id_headers(self): case_insensitive = { 'id', 'sampleid', 'sample id', 'sample-id', 'featureid', 'feature id', 'feature-id' } exact_match = { '#SampleID', '#Sample ID', '#OTUID', '#OTU ID', 'sample_name' } # Build a set of supported headers, including exact matches and headers # with different casing. headers = set() for header in case_insensitive: headers.add(header) headers.add(header.upper()) headers.add(header.title()) for header in exact_match: headers.add(header) fp = os.path.join(self.temp_dir, 'metadata.tsv') count = 0 for header in headers: with open(fp, 'w') as fh: fh.write('%s\tcolumn\nid1\tfoo\nid2\tbar\n' % header) obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['id1', 'id2'], name=header) exp_df = pd.DataFrame({'column': ['foo', 'bar']}, index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) count += 1 # Since this test case is a little complicated, make sure that the # expected number of comparisons are happening. self.assertEqual(count, 26) def test_recommended_ids(self): fp = get_data_path('valid/recommended-ids.tsv') obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['c6ca034a-223f-40b4-a0e0-45942912a5ea', 'My.ID'], name='id') exp_df = pd.DataFrame({'col1': ['foo', 'bar']}, index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_non_standard_characters(self): # Test that non-standard characters in IDs, column names, and cells are # handled correctly. The test case isn't exhaustive (e.g. it doesn't # test every Unicode character; that would be a nice additional test # case to have in the future). Instead, this test aims to be more of an # integration test for the robustness of the reader to non-standard # data. Many of the characters and their placement within the data file # are based on use-cases/bugs reported on the forum, Slack, etc. The # data file has comments explaining these test case choices in more # detail. fp = get_data_path('valid/non-standard-characters.tsv') obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['©id##1', '((id))2', "'id_3<>'", '"id#4"', 'i d\r\t\n5'], name='id') exp_columns = ['↩c@l1™', 'col(#2)', "#col'3", '""', 'col\t \r\n5'] exp_data = [ ['ƒoo', '(foo)', '#f o #o', 'fo\ro', np.nan], ["''2''", 'b#r', 'ba\nr', np.nan, np.nan], ['b"ar', 'c\td', '4\r\n2', np.nan, np.nan], ['b__a_z', '<42>', '>42', np.nan, np.nan], ['baz', np.nan, '42'] ] exp_df = pd.DataFrame(exp_data, index=exp_index, columns=exp_columns) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_missing_data(self): print(f"Running things...!") fp = get_data_path('valid/missing-data.tsv') obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['None', 'nan', 'NA'], name='id') exp_df = pd.DataFrame(collections.OrderedDict([ ('col1', [1.0, np.nan, np.nan]), ('NA', [np.nan, np.nan, np.nan]), ('col3', ['null', 'N/A', 'NA']), ('col4', np.array([np.nan, np.nan, np.nan], dtype=object))]), index=exp_index) exp_md = SampleMetadata(exp_df) print(f"obs_md: {obs_md}, exp_md: {exp_md}") self.assertEqual(obs_md, exp_md) # Test that column types are correct (mainly for the two empty columns; # one should be numeric, the other categorical). obs_columns = [(name, props.type) for name, props in obs_md.columns.items()] exp_columns = [('col1', 'numeric'), ('NA', 'numeric'), ('col3', 'categorical'), ('col4', 'categorical')] self.assertEqual(obs_columns, exp_columns) def test_missing_insdc(self): fp = get_data_path('valid/missing-insdc.tsv') obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['id1', 'id2', 'id3', 'id4', 'id5', 'id6'], name='id') exp_df = pd.DataFrame({'col1': [1, 2, 3] + ([float('nan')] * 3), 'col2': ['a', 'b', 'c'] + ([float('nan')] * 3), 'col3': ['foo', 'bar', '42', 'anything', 'whatever', '10']}, index=exp_index) # not testing column_missing_schemes here on purpose, externally the # nan's shouldn't be meaningfully different exp_md = SampleMetadata(exp_df) pd.testing.assert_frame_equal(obs_md.to_dataframe(), exp_md.to_dataframe()) obs_columns = [(name, props.type, props.missing_scheme) for name, props in obs_md.columns.items()] exp_columns = [ ('col1', 'numeric', 'INSDC:missing'), ('col2', 'categorical', 'INSDC:missing'), ('col3', 'categorical', 'no-missing') ] self.assertEqual(obs_columns, exp_columns) def test_insdc_no_directives(self): fp = get_data_path('valid/missing-insdc-no-directive.tsv') obs_md = SampleMetadata.load(fp, default_missing_scheme='INSDC:missing') exp_index = pd.Index(['id1', 'id2', 'id3', 'id4', 'id5', 'id6'], name='id') exp_df = pd.DataFrame({'col1': [1, 2, 3] + ([float('nan')] * 3), 'col2': ['a', 'b', 'c'] + ([float('nan')] * 3), 'col3': ['foo', 'bar', '42', 'anything', 'whatever', '10']}, index=exp_index) # not testing column_missing_schemes here on purpose, externally the # nan's shouldn't be meaningfully different exp_md = SampleMetadata(exp_df) pd.testing.assert_frame_equal(obs_md.to_dataframe(), exp_md.to_dataframe()) obs_columns = [(name, props.type, props.missing_scheme) for name, props in obs_md.columns.items()] exp_columns = [ ('col1', 'numeric', 'INSDC:missing'), ('col2', 'categorical', 'INSDC:missing'), ('col3', 'categorical', 'INSDC:missing') ] self.assertEqual(obs_columns, exp_columns) def test_insdc_override(self): fp = get_data_path('valid/override-insdc.tsv') # This file has INSDC terms, but they aren't missing values. obs_md = SampleMetadata.load(fp, default_missing_scheme='INSDC:missing') exp_index = pd.Index(['id1', 'id2', 'id3', 'id4'], name='id') exp_df = pd.DataFrame({'col1': ['collected', 'not collected', 'not collected', 'collected']}, index=exp_index) pd.testing.assert_frame_equal(obs_md.to_dataframe(), exp_df) obs_columns = [(name, props.type, props.missing_scheme) for name, props in obs_md.columns.items()] exp_columns = [ ('col1', 'categorical', 'no-missing'), ] self.assertEqual(obs_columns, exp_columns) def test_minimal_file(self): # Simplest possible metadata file consists of one ID and zero columns. fp = get_data_path('valid/minimal.tsv') obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['a'], name='id') exp_df = pd.DataFrame({}, index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_single_id(self): fp = get_data_path('valid/single-id.tsv') obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['id1'], name='id') exp_df = pd.DataFrame({'col1': [1.0], 'col2': ['a'], 'col3': ['foo']}, index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_no_columns(self): fp = get_data_path('valid/no-columns.tsv') obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['a', 'b', 'my-id'], name='id') exp_df = pd.DataFrame({}, index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_single_column(self): fp = get_data_path('valid/single-column.tsv') obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['id1', 'id2', 'id3'], name='id') exp_df = pd.DataFrame({'col1': [1.0, 2.0, 3.0]}, index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_trailing_columns(self): fp = get_data_path('valid/trailing-columns.tsv') obs_md = SampleMetadata.load(fp) self.assertEqual(obs_md, self.simple_md) def test_jagged_trailing_columns(self): # Test case based on https://github.com/qiime2/qiime2/issues/335 fp = get_data_path('valid/jagged-trailing-columns.tsv') obs_md = SampleMetadata.load(fp) self.assertEqual(obs_md, self.simple_md) def test_padding_rows_shorter_than_header(self): fp = get_data_path('valid/rows-shorter-than-header.tsv') obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['id1', 'id2', 'id3'], name='id') exp_df = pd.DataFrame({'col1': [1.0, 2.0, np.nan], 'col2': ['a', np.nan, np.nan], 'col3': [np.nan, np.nan, np.nan]}, index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_all_cells_padded(self): fp = get_data_path('valid/all-cells-padded.tsv') obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['id1', 'id2', 'id3'], name='id') exp_df = pd.DataFrame({'col1': [np.nan, np.nan, np.nan], 'col2': [np.nan, np.nan, np.nan], 'col3': [np.nan, np.nan, np.nan]}, index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_does_not_cast_ids_or_column_names(self): fp = get_data_path('valid/no-id-or-column-name-type-cast.tsv') obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['0.000001', '0.004000', '0.000000'], dtype=object, name='id') exp_columns = ['42.0', '1000', '-4.2'] exp_data = [ [2.0, 'b', 2.5], [1.0, 'b', 4.2], [3.0, 'c', -9.999] ] exp_df = pd.DataFrame(exp_data, index=exp_index, columns=exp_columns) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_numeric_column(self): fp = get_data_path('valid/numeric-column.tsv') obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['id1', 'id2', 'id3', 'id4', 'id5', 'id6', 'id7', 'id8', 'id9', 'id10', 'id11', 'id12'], name='id') exp_df = pd.DataFrame({'col1': [0.0, 2.0, 0.0003, -4.2, 1e-4, 1e4, 1.5e2, np.nan, 1.0, 0.5, 1e-8, -0.0]}, index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_numeric_column_as_categorical(self): fp = get_data_path('valid/numeric-column.tsv') obs_md = SampleMetadata.load(fp, column_types={'col1': 'categorical'}) exp_index = pd.Index(['id1', 'id2', 'id3', 'id4', 'id5', 'id6', 'id7', 'id8', 'id9', 'id10', 'id11', 'id12'], name='id') exp_df = pd.DataFrame({'col1': ['0', '2.0', '0.00030', '-4.2', '1e-4', '1e4', '+1.5E+2', np.nan, '1.', '.5', '1e-08', '-0']}, index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_with_complete_types_sk_directive(self): fp = get_data_path('valid/complete-types-directive_w_sk.tsv') obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['id1', 'id2', 'id3'], name='id') exp_df = pd.DataFrame({'col1': ['1', '2', '3'], 'col2': ['a', 'b', 'c'], 'col3': ['foo', 'bar', '42']}, index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_with_complete_types_directive(self): fp = get_data_path('valid/complete-types-directive.tsv') obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['id1', 'id2', 'id3'], name='id') exp_df = pd.DataFrame({'col1': ['1', '2', '3'], 'col2': ['a', 'b', 'c'], 'col3': ['foo', 'bar', '42']}, index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_with_partial_types_directive(self): fp = get_data_path('valid/partial-types-directive.tsv') obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['id1', 'id2', 'id3'], name='id') exp_df = pd.DataFrame({'col1': ['1', '2', '3'], 'col2': ['a', 'b', 'c'], 'col3': ['foo', 'bar', '42']}, index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_with_empty_types_directive(self): fp = get_data_path('valid/empty-types-directive.tsv') obs_md = SampleMetadata.load(fp) self.assertEqual(obs_md, self.simple_md) def test_with_case_insensitive_types_directive(self): fp = get_data_path('valid/case-insensitive-types-directive.tsv') obs_md = SampleMetadata.load(fp) exp_index = pd.Index(['id1', 'id2', 'id3'], name='id') exp_df = pd.DataFrame({'col1': ['1', '2', '3'], 'col2': ['a', 'b', 'c'], 'col3': [-5.0, 0.0, 42.0]}, index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_column_types_without_directive(self): fp = get_data_path('valid/simple.tsv') obs_md = SampleMetadata.load(fp, column_types={'col1': 'categorical'}) exp_index = pd.Index(['id1', 'id2', 'id3'], name='id') exp_df = pd.DataFrame({'col1': ['1', '2', '3'], 'col2': ['a', 'b', 'c'], 'col3': ['foo', 'bar', '42']}, index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) def test_column_types_override_directive(self): fp = get_data_path('valid/simple-with-directive.tsv') obs_md = SampleMetadata.load(fp, column_types={'col1': 'categorical', 'col2': 'categorical'}) exp_index = pd.Index(['id1', 'id2', 'id3'], name='id') exp_df = pd.DataFrame({'col1': ['1', '2', '3'], 'col2': ['a', 'b', 'c'], 'col3': ['foo', 'bar', '42']}, index=exp_index) exp_md = SampleMetadata(exp_df) self.assertEqual(obs_md, exp_md) class TestSave(unittest.TestCase): def setUp(self): self.temp_dir_obj = tempfile.TemporaryDirectory( prefix='qiime2-metadata-tests-temp-') self.temp_dir = self.temp_dir_obj.name self.filepath = os.path.join(self.temp_dir, 'metadata.tsv') def tearDown(self): self.temp_dir_obj.cleanup() def test_simple(self): md = SampleMetadata(pd.DataFrame( {'col1': [1.0, 2.0, 3.0], 'col2': ['a', 'b', 'c'], 'col3': ['foo', 'bar', '42']}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\tcol1\tcol2\tcol3\n" "#sk:types\tnumeric\tcategorical\tcategorical\n" "id1\t1\ta\tfoo\n" "id2\t2\tb\tbar\n" "id3\t3\tc\t42\n" ) self.assertEqual(obs, exp) def test_save_metadata_auto_extension(self): md = SampleMetadata(pd.DataFrame( {'col1': [1.0, 2.0, 3.0], 'col2': ['a', 'b', 'c'], 'col3': ['foo', 'bar', '42']}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) # Filename & extension endswith is matching (non-default). fp = os.path.join(self.temp_dir, 'metadatatsv') obs_md = md.save(fp, '.tsv') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadatatsv.tsv') # No period in filename; no extension included. fp = os.path.join(self.temp_dir, 'metadata') obs_md = md.save(fp) obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata') # No period in filename; no period in extension. fp = os.path.join(self.temp_dir, 'metadata') obs_md = md.save(fp, 'tsv') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv') # No period in filename; multiple periods in extension. fp = os.path.join(self.temp_dir, 'metadata') obs_md = md.save(fp, '..tsv') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv') # Single period in filename; no period in extension. fp = os.path.join(self.temp_dir, 'metadata.') obs_md = md.save(fp, 'tsv') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv') # Single period in filename; single period in extension. fp = os.path.join(self.temp_dir, 'metadata.') obs_md = md.save(fp, '.tsv') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv') # Single period in filename; multiple periods in extension. fp = os.path.join(self.temp_dir, 'metadata.') obs_md = md.save(fp, '..tsv') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv') # Multiple periods in filename; single period in extension. fp = os.path.join(self.temp_dir, 'metadata..') obs_md = md.save(fp, '.tsv') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv') # Multiple periods in filename; multiple periods in extension. fp = os.path.join(self.temp_dir, 'metadata..') obs_md = md.save(fp, '..tsv') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv') # No extension in filename; no extension input. fp = os.path.join(self.temp_dir, 'metadata') obs_md = md.save(fp) obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata') # No extension in filename; extension input. fp = os.path.join(self.temp_dir, 'metadata') obs_md = md.save(fp, '.tsv') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv') # Extension in filename; no extension input. fp = os.path.join(self.temp_dir, 'metadata.tsv') obs_md = md.save(fp) obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv') # Extension in filename; extension input (non-matching). fp = os.path.join(self.temp_dir, 'metadata.tsv') obs_md = md.save(fp, '.txt') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv.txt') # Extension in filename; extension input (matching). fp = os.path.join(self.temp_dir, 'metadata.tsv') obs_md = md.save(fp, '.tsv') obs_filename = os.path.basename(obs_md) self.assertEqual(obs_filename, 'metadata.tsv') def test_no_bom(self): md = SampleMetadata(pd.DataFrame( {'col1': [1.0, 2.0, 3.0], 'col2': ['a', 'b', 'c'], 'col3': ['foo', 'bar', '42']}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) md.save(self.filepath) with open(self.filepath, 'rb') as fh: obs = fh.read(2) self.assertEqual(obs, b'id') def test_different_file_extension(self): md = SampleMetadata(pd.DataFrame( {'col1': [1.0, 2.0, 3.0], 'col2': ['a', 'b', 'c'], 'col3': ['foo', 'bar', '42']}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) filepath = os.path.join(self.temp_dir, 'metadata.txt') md.save(filepath) with open(filepath, 'r') as fh: obs = fh.read() exp = ( "id\tcol1\tcol2\tcol3\n" "#sk:types\tnumeric\tcategorical\tcategorical\n" "id1\t1\ta\tfoo\n" "id2\t2\tb\tbar\n" "id3\t3\tc\t42\n" ) self.assertEqual(obs, exp) def test_some_missing_data(self): md = SampleMetadata( pd.DataFrame({'col1': [42.0, np.nan, -3.5], 'col2': ['a', np.nan, np.nan]}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\tcol1\tcol2\n" "#sk:types\tnumeric\tcategorical\n" "id1\t42\ta\n" "id2\t\t\n" "id3\t-3.5\t\n" ) self.assertEqual(obs, exp) def test_all_missing_data(self): # nan-only columns that are numeric or categorical. md = SampleMetadata( pd.DataFrame({'col1': [np.nan, np.nan, np.nan], 'col2': np.array([np.nan, np.nan, np.nan], dtype=object)}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\tcol1\tcol2\n" "#sk:types\tnumeric\tcategorical\n" "id1\t\t\n" "id2\t\t\n" "id3\t\t\n" ) self.assertEqual(obs, exp) def test_missing_schemes(self): md = SampleMetadata( pd.DataFrame({'col1': [42.0, np.nan, -3.5], 'col2': ['a', 'not applicable', 'restricted access']}, index=pd.Index(['id1', 'id2', 'id3'], name='id')), column_missing_schemes={ 'col1': 'blank', 'col2': 'INSDC:missing'} ) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\tcol1\tcol2\n" "#sk:types\tnumeric\tcategorical\n" "#sk:missing\tblank\tINSDC:missing\n" "id1\t42\ta\n" "id2\t\tnot applicable\n" "id3\t-3.5\trestricted access\n" ) self.assertEqual(obs, exp) def test_default_missing_scheme(self): md = SampleMetadata( pd.DataFrame({'col1': [42.0, np.nan, -3.5], 'col2': ['a', 'not applicable', 'restricted access']}, index=pd.Index(['id1', 'id2', 'id3'], name='id')), default_missing_scheme='INSDC:missing') md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\tcol1\tcol2\n" "#sk:types\tnumeric\tcategorical\n" "#sk:missing\tINSDC:missing\tINSDC:missing\n" "id1\t42\ta\n" "id2\t\tnot applicable\n" "id3\t-3.5\trestricted access\n" ) self.assertEqual(obs, exp) def test_default_missing_scheme_override(self): md = SampleMetadata( pd.DataFrame({'col1': [42.0, np.nan, -3.5], 'col2': ['a', 'not applicable', 'restricted access']}, index=pd.Index(['id1', 'id2', 'id3'], name='id')), default_missing_scheme='sk:error', column_missing_schemes=dict(col1='INSDC:missing', col2='INSDC:missing')) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\tcol1\tcol2\n" "#sk:types\tnumeric\tcategorical\n" "#sk:missing\tINSDC:missing\tINSDC:missing\n" "id1\t42\ta\n" "id2\t\tnot applicable\n" "id3\t-3.5\trestricted access\n" ) self.assertEqual(obs, exp) def test_unsorted_column_order(self): index = pd.Index(['id1', 'id2', 'id3'], name='id') columns = ['z', 'b', 'y'] data = [ [1.0, 'a', 'foo'], [2.0, 'b', 'bar'], [3.0, 'c', '42'] ] md = SampleMetadata(pd.DataFrame(data, index=index, columns=columns)) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\tz\tb\ty\n" "#sk:types\tnumeric\tcategorical\tcategorical\n" "id1\t1\ta\tfoo\n" "id2\t2\tb\tbar\n" "id3\t3\tc\t42\n" ) self.assertEqual(obs, exp) def test_alternate_id_header(self): md = SampleMetadata(pd.DataFrame( {'col1': [1.0, 2.0, 3.0], 'col2': ['a', 'b', 'c'], 'col3': ['foo', 'bar', '42']}, index=pd.Index(['id1', 'id2', 'id3'], name='#SampleID'))) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "#SampleID\tcol1\tcol2\tcol3\n" "#sk:types\tnumeric\tcategorical\tcategorical\n" "id1\t1\ta\tfoo\n" "id2\t2\tb\tbar\n" "id3\t3\tc\t42\n" ) self.assertEqual(obs, exp) def test_various_numbers(self): numbers = [ 0.0, -0.0, np.nan, 1.0, 42.0, -33.0, 1e-10, 1.5e15, 0.0003, -4.234, # This last number should be rounded because it exceeds 15 digits # of precision. 12.34567891234567 ] index = pd.Index(['id1', 'id2', 'id3', 'id4', 'id5', 'id6', 'id7', 'id8', 'id9', 'id10', 'id11'], name='ID') md = SampleMetadata(pd.DataFrame({'numbers': numbers}, index=index)) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "ID\tnumbers\n" "#sk:types\tnumeric\n" "id1\t0\n" "id2\t-0\n" "id3\t\n" "id4\t1\n" "id5\t42\n" "id6\t-33\n" "id7\t1e-10\n" "id8\t1.5e+15\n" "id9\t0.0003\n" "id10\t-4.234\n" "id11\t12.3456789123457\n" ) self.assertEqual(obs, exp) def test_minimal(self): md = SampleMetadata(pd.DataFrame({}, index=pd.Index(['my-id'], name='id'))) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\n" "#sk:types\n" "my-id\n" ) self.assertEqual(obs, exp) def test_single_id(self): md = SampleMetadata(pd.DataFrame( {'col1': ['foo'], 'col2': [4.002]}, index=pd.Index(['my-id'], name='featureid'))) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "featureid\tcol1\tcol2\n" "#sk:types\tcategorical\tnumeric\n" "my-id\tfoo\t4.002\n" ) self.assertEqual(obs, exp) def test_no_columns(self): md = SampleMetadata(pd.DataFrame( {}, index=pd.Index(['foo', 'bar', 'baz'], name='id'))) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\n" "#sk:types\n" "foo\n" "bar\n" "baz\n" ) self.assertEqual(obs, exp) def test_single_column(self): md = SampleMetadata(pd.DataFrame( {'col1': ['42', '4.3', '4.4000']}, index=pd.Index(['foo', 'bar', 'baz'], name='id'))) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\tcol1\n" "#sk:types\tcategorical\n" "foo\t42\n" "bar\t4.3\n" "baz\t4.4000\n" ) self.assertEqual(obs, exp) def test_ids_and_column_names_as_numeric_strings(self): index = pd.Index(['0.000001', '0.004000', '0.000000'], dtype=object, name='id') columns = ['42.0', '1000', '-4.2'] data = [ [2.0, 'b', 2.5], [1.0, 'b', 4.2], [3.0, 'c', -9.999] ] df = pd.DataFrame(data, index=index, columns=columns) md = SampleMetadata(df) md.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\t42.0\t1000\t-4.2\n" "#sk:types\tnumeric\tcategorical\tnumeric\n" "0.000001\t2\tb\t2.5\n" "0.004000\t1\tb\t4.2\n" "0.000000\t3\tc\t-9.999\n" ) self.assertEqual(obs, exp) # A couple of basic tests for CategoricalMetadataColumn and # NumericMetadataColumn below. Those classes simply transform themselves # into single-column Metadata objects within `MetadataColumn.save()` and # use the same writer code from there on. def test_categorical_metadata_column(self): mdc = CategoricalMetadataColumn(pd.Series( ['foo', 'bar', '42.50'], name='categorical-column', index=pd.Index(['id1', 'id2', 'id3'], name='id'))) mdc.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\tcategorical-column\n" "#sk:types\tcategorical\n" "id1\tfoo\n" "id2\tbar\n" "id3\t42.50\n" ) self.assertEqual(obs, exp) def test_categorical_metadata_column_insdc_no_missing(self): mdc = CategoricalMetadataColumn(pd.Series( ['foo', 'bar', '42.50'], name='categorical-column', index=pd.Index(['id1', 'id2', 'id3'], name='id')), missing_scheme='INSDC:missing') mdc.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\tcategorical-column\n" "#sk:types\tcategorical\n" "#sk:missing\tINSDC:missing\n" "id1\tfoo\n" "id2\tbar\n" "id3\t42.50\n" ) self.assertEqual(obs, exp) def test_categorical_metadata_column_insdc_missing(self): mdc = CategoricalMetadataColumn(pd.Series( ['foo', 'missing', '42.50'], name='categorical-column', index=pd.Index(['id1', 'id2', 'id3'], name='id')), missing_scheme='INSDC:missing') mdc.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\tcategorical-column\n" "#sk:types\tcategorical\n" "#sk:missing\tINSDC:missing\n" "id1\tfoo\n" "id2\tmissing\n" "id3\t42.50\n" ) self.assertEqual(obs, exp) def test_numeric_metadata_column(self): mdc = NumericMetadataColumn(pd.Series( [1e-15, 42.50, -999.0], name='numeric-column', index=pd.Index(['id1', 'id2', 'id3'], name='#OTU ID'))) mdc.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "#OTU ID\tnumeric-column\n" "#sk:types\tnumeric\n" "id1\t1e-15\n" "id2\t42.5\n" "id3\t-999\n" ) self.assertEqual(obs, exp) def test_numeric_metadata_column_insdc_no_missing(self): mdc = NumericMetadataColumn(pd.Series( [1e-15, 42.50, -999.0], name='numeric-column', index=pd.Index(['id1', 'id2', 'id3'], name='#OTU ID')), missing_scheme='INSDC:missing') mdc.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "#OTU ID\tnumeric-column\n" "#sk:types\tnumeric\n" "#sk:missing\tINSDC:missing\n" "id1\t1e-15\n" "id2\t42.5\n" "id3\t-999\n" ) self.assertEqual(obs, exp) def test_numeric_metadata_column_insdc_missing(self): mdc = NumericMetadataColumn(pd.Series( [1e-15, 'missing', -999.0], name='numeric-column', index=pd.Index(['id1', 'id2', 'id3'], name='#OTU ID')), missing_scheme='INSDC:missing') mdc.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "#OTU ID\tnumeric-column\n" "#sk:types\tnumeric\n" "#sk:missing\tINSDC:missing\n" "id1\t1e-15\n" "id2\tmissing\n" "id3\t-999\n" ) self.assertEqual(obs, exp) # TODO this class spot-checks some of the more "difficult" valid files to make # sure they can be read, written to disk, and read again in a lossless way. # A more complete strategy (with fewer test cases) would be performing a # roundtrip test on every valid file under the `data` directory (e.g. with a # `glob` and for loop). class TestRoundtrip(unittest.TestCase): def setUp(self): self.temp_dir_obj = tempfile.TemporaryDirectory( prefix='qiime2-metadata-tests-temp-') self.temp_dir = self.temp_dir_obj.name self.filepath = os.path.join(self.temp_dir, 'metadata.tsv') def tearDown(self): self.temp_dir_obj.cleanup() def test_simple(self): fp = get_data_path('valid/simple.tsv') md1 = SampleMetadata.load(fp) md1.save(self.filepath) md2 = SampleMetadata.load(self.filepath) self.assertEqual(md1, md2) def test_non_standard_characters(self): fp = get_data_path('valid/non-standard-characters.tsv') md1 = SampleMetadata.load(fp) md1.save(self.filepath) md2 = SampleMetadata.load(self.filepath) self.assertEqual(md1, md2) def test_missing_data(self): fp = get_data_path('valid/missing-data.tsv') md1 = SampleMetadata.load(fp) md1.save(self.filepath) md2 = SampleMetadata.load(self.filepath) self.assertEqual(md1, md2) def test_missing_insdc(self): fp = get_data_path('valid/missing-insdc.tsv') md1 = SampleMetadata.load(fp) md1.save(self.filepath) md2 = SampleMetadata.load(self.filepath) self.assertEqual(md1, md2) def test_minimal_file(self): fp = get_data_path('valid/minimal.tsv') md1 = SampleMetadata.load(fp) md1.save(self.filepath) md2 = SampleMetadata.load(self.filepath) self.assertEqual(md1, md2) def test_numeric_column(self): fp = get_data_path('valid/numeric-column.tsv') md1 = SampleMetadata.load(fp) md1.save(self.filepath) md2 = SampleMetadata.load(self.filepath) self.assertEqual(md1, md2) def test_all_cells_padded(self): fp = get_data_path('valid/all-cells-padded.tsv') md1 = SampleMetadata.load(fp) md1.save(self.filepath) md2 = SampleMetadata.load(self.filepath) self.assertEqual(md1, md2) def test_categorical_metadata_column(self): fp = get_data_path('valid/simple.tsv') md1 = SampleMetadata.load(fp) mdc1 = md1.get_column('col2') self.assertIsInstance(mdc1, CategoricalMetadataColumn) mdc1.save(self.filepath) md2 = SampleMetadata.load(self.filepath) mdc2 = md2.get_column('col2') self.assertIsInstance(mdc1, CategoricalMetadataColumn) self.assertEqual(mdc1, mdc2) def test_numeric_metadata_column(self): fp = get_data_path('valid/simple.tsv') md1 = SampleMetadata.load(fp) mdc1 = md1.get_column('col1') self.assertIsInstance(mdc1, NumericMetadataColumn) mdc1.save(self.filepath) md2 = SampleMetadata.load(self.filepath) mdc2 = md2.get_column('col1') self.assertIsInstance(mdc1, NumericMetadataColumn) self.assertEqual(mdc1, mdc2) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/metadata/tests/test_metadata.py000066400000000000000000001060671464262511300230140ustar00rootroot00000000000000import collections import unittest import warnings import pandas as pd import numpy as np from skbio.metadata._metadata import (SampleMetadata, CategoricalMetadataColumn, NumericMetadataColumn) class TestInvalidMetadataConstruction(unittest.TestCase): def test_non_dataframe(self): with self.assertRaisesRegex( TypeError, 'Metadata constructor.*DataFrame.*not.*Series'): SampleMetadata(pd.Series([1, 2, 3], name='col', index=pd.Index(['a', 'b', 'c'], name='id'))) def test_no_ids(self): with self.assertRaisesRegex(ValueError, 'Metadata.*at least one ID'): SampleMetadata(pd.DataFrame({}, index=pd.Index([], name='id'))) with self.assertRaisesRegex(ValueError, 'Metadata.*at least one ID'): SampleMetadata(pd.DataFrame({'column': []}, index=pd.Index([], name='id'))) def test_invalid_id_header(self): # default index name with self.assertRaisesRegex(ValueError, r'Index\.name.*None'): SampleMetadata(pd.DataFrame( {'col': [1, 2, 3]}, index=pd.Index(['a', 'b', 'c']))) with self.assertRaisesRegex(ValueError, r'Index\.name.*my-id-header'): SampleMetadata(pd.DataFrame( {'col': [1, 2, 3]}, index=pd.Index(['a', 'b', 'c'], name='my-id-header'))) def test_non_str_id(self): with self.assertRaisesRegex( TypeError, 'non-string metadata ID.*type.*float.*nan'): SampleMetadata(pd.DataFrame( {'col': [1, 2, 3]}, index=pd.Index(['a', np.nan, 'c'], name='id'))) def test_non_str_column_name(self): with self.assertRaisesRegex( TypeError, 'non-string metadata column name.*type.*' 'float.*nan'): SampleMetadata(pd.DataFrame( {'col': [1, 2, 3], np.nan: [4, 5, 6]}, index=pd.Index(['a', 'b', 'c'], name='id'))) def test_empty_id(self): with self.assertRaisesRegex( ValueError, 'empty metadata ID.*at least one character'): SampleMetadata(pd.DataFrame( {'col': [1, 2, 3]}, index=pd.Index(['a', '', 'c'], name='id'))) def test_empty_column_name(self): with self.assertRaisesRegex( ValueError, 'empty metadata column name.*' 'at least one character'): SampleMetadata(pd.DataFrame( {'col': [1, 2, 3], '': [4, 5, 6]}, index=pd.Index(['a', 'b', 'c'], name='id'))) def test_pound_sign_id(self): with self.assertRaisesRegex( ValueError, "metadata ID.*begins with a pound sign.*'#b'"): SampleMetadata(pd.DataFrame( {'col': [1, 2, 3]}, index=pd.Index(['a', '#b', 'c'], name='id'))) def test_id_conflicts_with_id_header(self): with self.assertRaisesRegex( ValueError, "metadata ID 'sample-id'.*conflicts.*reserved.*" "ID header"): SampleMetadata(pd.DataFrame( {'col': [1, 2, 3]}, index=pd.Index(['a', 'sample-id', 'c'], name='id'))) def test_column_name_conflicts_with_id_header(self): with self.assertRaisesRegex( ValueError, "metadata column name 'featureid'.*conflicts.*" "reserved.*ID header"): SampleMetadata(pd.DataFrame( {'col': [1, 2, 3], 'featureid': [4, 5, 6]}, index=pd.Index(['a', 'b', 'c'], name='id'))) def test_duplicate_ids(self): with self.assertRaisesRegex(ValueError, "Metadata IDs.*unique.*'a'"): SampleMetadata(pd.DataFrame( {'col': [1, 2, 3]}, index=pd.Index(['a', 'b', 'a'], name='id'))) def test_duplicate_column_names(self): data = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] with self.assertRaisesRegex(ValueError, "Metadata column names.*unique.*'col1'"): SampleMetadata(pd.DataFrame(data, columns=['col1', 'col2', 'col1'], index=pd.Index(['a', 'b', 'c'], name='id'))) def test_unsupported_column_dtype(self): with self.assertRaisesRegex( TypeError, "Metadata column 'col2'.*unsupported.*dtype.*bool"): SampleMetadata(pd.DataFrame( {'col1': [1, 2, 3], 'col2': [True, False, True]}, index=pd.Index(['a', 'b', 'c'], name='id'))) def test_categorical_column_unsupported_type(self): with self.assertRaisesRegex( TypeError, "CategoricalMetadataColumn.*strings or missing " r"values.*42\.5.*float.*'col2'"): SampleMetadata(pd.DataFrame( {'col1': [1, 2, 3], 'col2': ['foo', 'bar', 42.5]}, index=pd.Index(['a', 'b', 'c'], name='id'))) def test_categorical_column_empty_str(self): with self.assertRaisesRegex( ValueError, "CategoricalMetadataColumn.*empty strings.*" "column 'col2'"): SampleMetadata(pd.DataFrame( {'col1': [1, 2, 3], 'col2': ['foo', '', 'bar']}, index=pd.Index(['a', 'b', 'c'], name='id'))) def test_numeric_column_infinity(self): with self.assertRaisesRegex( ValueError, "NumericMetadataColumn.*positive or negative " "infinity.*column 'col2'"): SampleMetadata(pd.DataFrame( {'col1': ['foo', 'bar', 'baz'], 'col2': [42, float('+inf'), 4.3]}, index=pd.Index(['a', 'b', 'c'], name='id'))) def test_unknown_missing_scheme(self): with self.assertRaisesRegex(ValueError, "BAD:SCHEME"): SampleMetadata(pd.DataFrame( {'col1': [1, 2, 3], 'col2': ['foo', 'bar', 'bar']}, index=pd.Index(['a', 'b', 'c'], name='id')), default_missing_scheme='BAD:SCHEME') def test_missing_q2_error(self): index = pd.Index(['None', 'nan', 'NA', 'foo'], name='id') df = pd.DataFrame(collections.OrderedDict([ ('col1', [1.0, np.nan, np.nan, np.nan]), ('NA', [np.nan, np.nan, np.nan, np.nan]), ('col3', ['null', 'N/A', np.nan, 'NA']), ('col4', np.array([np.nan, np.nan, np.nan, np.nan], dtype=object))]), index=index) with self.assertRaisesRegex(ValueError, 'col1.*no-missing'): SampleMetadata(df, default_missing_scheme='no-missing') class TestMetadataConstructionAndProperties(unittest.TestCase): def assertEqualColumns(self, obs_columns, exp): obs = [(name, props.type) for name, props in obs_columns.items()] self.assertEqual(obs, exp) def test_minimal(self): md = SampleMetadata(pd.DataFrame({}, index=pd.Index(['a'], name='id'))) self.assertEqual(md.id_count, 1) self.assertEqual(md.column_count, 0) self.assertEqual(md.id_header, 'id') self.assertEqual(md.ids, ('a',)) self.assertEqualColumns(md.columns, []) def test_single_id(self): index = pd.Index(['id1'], name='id') df = pd.DataFrame({'col1': [1.0], 'col2': ['a'], 'col3': ['foo']}, index=index) md = SampleMetadata(df) self.assertEqual(md.id_count, 1) self.assertEqual(md.column_count, 3) self.assertEqual(md.id_header, 'id') self.assertEqual(md.ids, ('id1',)) self.assertEqualColumns(md.columns, [('col1', 'numeric'), ('col2', 'categorical'), ('col3', 'categorical')]) def test_no_columns(self): index = pd.Index(['id1', 'id2', 'foo'], name='id') df = pd.DataFrame({}, index=index) md = SampleMetadata(df) self.assertEqual(md.id_count, 3) self.assertEqual(md.column_count, 0) self.assertEqual(md.id_header, 'id') self.assertEqual(md.ids, ('id1', 'id2', 'foo')) self.assertEqualColumns(md.columns, []) def test_single_column(self): index = pd.Index(['id1', 'a', 'my-id'], name='id') df = pd.DataFrame({'column': ['foo', 'bar', 'baz']}, index=index) md = SampleMetadata(df) self.assertEqual(md.id_count, 3) self.assertEqual(md.column_count, 1) self.assertEqual(md.id_header, 'id') self.assertEqual(md.ids, ('id1', 'a', 'my-id')) self.assertEqualColumns(md.columns, [('column', 'categorical')]) def test_retains_column_order(self): # Supply DataFrame constructor with explicit column ordering instead of # a dict. index = pd.Index(['id1', 'id2', 'id3'], name='id') columns = ['z', 'a', 'ch'] data = [ [1.0, 'a', 'foo'], [2.0, 'b', 'bar'], [3.0, 'c', '42'] ] df = pd.DataFrame(data, index=index, columns=columns) md = SampleMetadata(df) self.assertEqual(md.id_count, 3) self.assertEqual(md.column_count, 3) self.assertEqual(md.id_header, 'id') self.assertEqual(md.ids, ('id1', 'id2', 'id3')) self.assertEqualColumns(md.columns, [('z', 'numeric'), ('a', 'categorical'), ('ch', 'categorical')]) def test_supported_id_headers(self): case_insensitive = { 'id', 'sampleid', 'sample id', 'sample-id', 'featureid', 'feature id', 'feature-id' } exact_match = { '#SampleID', '#Sample ID', '#OTUID', '#OTU ID', 'sample_name' } # Build a set of supported headers, including exact matches and headers # with different casing. headers = set() for header in case_insensitive: headers.add(header) headers.add(header.upper()) headers.add(header.title()) for header in exact_match: headers.add(header) count = 0 for header in headers: index = pd.Index(['id1', 'id2'], name=header) df = pd.DataFrame({'column': ['foo', 'bar']}, index=index) md = SampleMetadata(df) self.assertEqual(md.id_header, header) count += 1 # Since this test case is a little complicated, make sure that the # expected number of comparisons are happening. self.assertEqual(count, 26) def test_recommended_ids(self): index = pd.Index(['c6ca034a-223f-40b4-a0e0-45942912a5ea', 'My.ID'], name='id') df = pd.DataFrame({'col1': ['foo', 'bar']}, index=index) md = SampleMetadata(df) self.assertEqual(md.id_count, 2) self.assertEqual(md.column_count, 1) self.assertEqual(md.id_header, 'id') self.assertEqual(md.ids, ('c6ca034a-223f-40b4-a0e0-45942912a5ea', 'My.ID')) self.assertEqualColumns(md.columns, [('col1', 'categorical')]) def test_non_standard_characters(self): index = pd.Index(['©id##1', '((id))2', "'id_3<>'", '"id#4"', 'i d\r\t\n5'], name='id') columns = ['↩c@l1™', 'col(#2)', "#col'3", '""', 'col\t \r\n5'] data = [ ['ƒoo', '(foo)', '#f o #o', 'fo\ro', np.nan], ["''2''", 'b#r', 'ba\nr', np.nan, np.nan], ['b"ar', 'c\td', '4\r\n2', np.nan, np.nan], ['b__a_z', '<42>', '>42', np.nan, np.nan], ['baz', np.nan, '42'] ] df = pd.DataFrame(data, index=index, columns=columns) md = SampleMetadata(df) self.assertEqual(md.id_count, 5) self.assertEqual(md.column_count, 5) self.assertEqual(md.id_header, 'id') self.assertEqual( md.ids, ('©id##1', '((id))2', "'id_3<>'", '"id#4"', 'i d\r\t\n5')) self.assertEqualColumns(md.columns, [('↩c@l1™', 'categorical'), ('col(#2)', 'categorical'), ("#col'3", 'categorical'), ('""', 'categorical'), ('col\t \r\n5', 'numeric')]) def test_missing_data(self): index = pd.Index(['None', 'nan', 'NA', 'foo'], name='id') df = pd.DataFrame(collections.OrderedDict([ ('col1', [1.0, np.nan, np.nan, np.nan]), ('NA', [np.nan, np.nan, np.nan, np.nan]), ('col3', ['null', 'N/A', np.nan, 'NA']), ('col4', np.array([np.nan, np.nan, np.nan, np.nan], dtype=object))]), index=index) md = SampleMetadata(df) self.assertEqual(md.id_count, 4) self.assertEqual(md.column_count, 4) self.assertEqual(md.id_header, 'id') self.assertEqual(md.ids, ('None', 'nan', 'NA', 'foo')) self.assertEqualColumns(md.columns, [('col1', 'numeric'), ('NA', 'numeric'), ('col3', 'categorical'), ('col4', 'categorical')]) def test_missing_data_insdc(self): index = pd.Index(['None', 'nan', 'NA', 'foo'], name='id') df = pd.DataFrame(collections.OrderedDict([ ('col1', [1.0, np.nan, 'missing', np.nan]), # TODO: it is not currently possible to have an ENTIRELY numeric # column from missing terms, as the dtype of the series is object # and there is not way to indicate the dtype beyond that. # ('NA', [np.nan, np.nan, 'not applicable', np.nan]), ('col3', ['null', 'N/A', 'not collected', 'NA']), ('col4', np.array([np.nan, np.nan, 'restricted access', np.nan], dtype=object))]), index=index) md = SampleMetadata(df, default_missing_scheme='INSDC:missing') self.assertEqual(md.id_count, 4) self.assertEqual(md.column_count, 3) self.assertEqual(md.id_header, 'id') self.assertEqual(md.ids, ('None', 'nan', 'NA', 'foo')) self.assertEqualColumns(md.columns, [('col1', 'numeric'), ('col3', 'categorical'), ('col4', 'categorical')]) pd.testing.assert_frame_equal(md.to_dataframe(), pd.DataFrame( {'col1': [1.0, np.nan, np.nan, np.nan], 'col3': ['null', 'N/A', np.nan, 'NA'], 'col4': np.array([np.nan, np.nan, np.nan, np.nan], dtype=object)}, index=index)) def test_missing_data_insdc_column_missing(self): index = pd.Index(['None', 'nan', 'NA', 'foo'], name='id') df = pd.DataFrame(collections.OrderedDict([ ('col1', [1.0, np.nan, 'missing', np.nan]), # TODO: it is not currently possible to have an ENTIRELY numeric # column from missing terms, as the dtype of the series is object # and there is not way to indicate the dtype beyond that. # ('NA', [np.nan, np.nan, 'not applicable', np.nan]), ('col3', ['null', 'N/A', 'not collected', 'NA']), ('col4', np.array([np.nan, np.nan, 'restricted access', np.nan], dtype=object))]), index=index) md = SampleMetadata(df, column_missing_schemes={ 'col1': 'INSDC:missing', 'col3': 'INSDC:missing', 'col4': 'INSDC:missing' }) self.assertEqual(md.id_count, 4) self.assertEqual(md.column_count, 3) self.assertEqual(md.id_header, 'id') self.assertEqual(md.ids, ('None', 'nan', 'NA', 'foo')) self.assertEqualColumns(md.columns, [('col1', 'numeric'), ('col3', 'categorical'), ('col4', 'categorical')]) pd.testing.assert_frame_equal(md.to_dataframe(), pd.DataFrame( {'col1': [1.0, np.nan, np.nan, np.nan], 'col3': ['null', 'N/A', np.nan, 'NA'], 'col4': np.array([np.nan, np.nan, np.nan, np.nan], dtype=object)}, index=index)) def test_missing_data_default_override(self): index = pd.Index(['None', 'nan', 'NA', 'foo'], name='id') df = pd.DataFrame(collections.OrderedDict([ ('col1', [1.0, np.nan, 'missing', np.nan]), # TODO: it is not currently possible to have an ENTIRELY numeric # column from missing terms, as the dtype of the series is object # and there is not way to indicate the dtype beyond that. # ('NA', [np.nan, np.nan, 'not applicable', np.nan]), ('col3', ['null', 'N/A', 'not collected', 'NA']), ('col4', np.array([np.nan, np.nan, 'restricted access', np.nan], dtype=object))]), index=index) md = SampleMetadata(df, column_missing_schemes={ 'col1': 'INSDC:missing', 'col3': 'INSDC:missing', 'col4': 'INSDC:missing' }, default_missing_scheme='no-missing') self.assertEqual(md.id_count, 4) self.assertEqual(md.column_count, 3) self.assertEqual(md.id_header, 'id') self.assertEqual(md.ids, ('None', 'nan', 'NA', 'foo')) self.assertEqualColumns(md.columns, [('col1', 'numeric'), ('col3', 'categorical'), ('col4', 'categorical')]) pd.testing.assert_frame_equal(md.to_dataframe(), pd.DataFrame( {'col1': [1.0, np.nan, np.nan, np.nan], 'col3': ['null', 'N/A', np.nan, 'NA'], 'col4': np.array([np.nan, np.nan, np.nan, np.nan], dtype=object)}, index=index)) def test_does_not_cast_ids_or_column_names(self): index = pd.Index(['0.000001', '0.004000', '0.000000'], dtype=object, name='id') columns = ['42.0', '1000', '-4.2'] data = [ [2.0, 'b', 2.5], [1.0, 'b', 4.2], [3.0, 'c', -9.999] ] df = pd.DataFrame(data, index=index, columns=columns) md = SampleMetadata(df) self.assertEqual(md.id_count, 3) self.assertEqual(md.column_count, 3) self.assertEqual(md.id_header, 'id') self.assertEqual(md.ids, ('0.000001', '0.004000', '0.000000')) self.assertEqualColumns(md.columns, [('42.0', 'numeric'), ('1000', 'categorical'), ('-4.2', 'numeric')]) def test_mixed_column_types(self): md = SampleMetadata( pd.DataFrame({'col0': [1.0, 2.0, 3.0], 'col1': ['a', 'b', 'c'], 'col2': ['foo', 'bar', '42'], 'col3': ['1.0', '2.5', '-4.002'], 'col4': [1, 2, 3], 'col5': [1, 2, 3.5], 'col6': [1e-4, -0.0002, np.nan], 'col7': ['cat', np.nan, 'dog'], 'col8': ['a', 'a', 'a'], 'col9': [0, 0, 0]}, index=pd.Index(['id1', 'id2', 'id3'], name='id'))) self.assertEqual(md.id_count, 3) self.assertEqual(md.column_count, 10) self.assertEqual(md.id_header, 'id') self.assertEqual(md.ids, ('id1', 'id2', 'id3')) self.assertEqualColumns(md.columns, [('col0', 'numeric'), ('col1', 'categorical'), ('col2', 'categorical'), ('col3', 'categorical'), ('col4', 'numeric'), ('col5', 'numeric'), ('col6', 'numeric'), ('col7', 'categorical'), ('col8', 'categorical'), ('col9', 'numeric')]) def test_case_insensitive_duplicate_ids(self): index = pd.Index(['a', 'b', 'A'], name='id') df = pd.DataFrame({'column': ['1', '2', '3']}, index=index) metadata = SampleMetadata(df) self.assertEqual(metadata.ids, ('a', 'b', 'A')) def test_case_insensitive_duplicate_column_names(self): index = pd.Index(['a', 'b', 'c'], name='id') df = pd.DataFrame({'column': ['1', '2', '3'], 'Column': ['4', '5', '6']}, index=index) metadata = SampleMetadata(df) self.assertEqual(set(metadata.columns), {'column', 'Column'}) def test_categorical_column_leading_trailing_whitespace_value(self): md1 = SampleMetadata(pd.DataFrame( {'col1': [1, 2, 3], 'col2': ['foo', ' bar ', 'baz']}, index=pd.Index(['a', 'b', 'c'], name='id'))) md2 = SampleMetadata(pd.DataFrame( {'col1': [1, 2, 3], 'col2': ['foo', 'bar', 'baz']}, index=pd.Index(['a', 'b', 'c'], name='id'))) self.assertEqual(md1, md2) def test_leading_trailing_whitespace_id(self): md1 = SampleMetadata(pd.DataFrame( {'col1': [1, 2, 3], 'col2': [4, 5, 6]}, index=pd.Index(['a', ' b ', 'c'], name='id'))) md2 = SampleMetadata(pd.DataFrame( {'col1': [1, 2, 3], 'col2': [4, 5, 6]}, index=pd.Index(['a', 'b', 'c'], name='id'))) self.assertEqual(md1, md2) def test_leading_trailing_whitespace_column_name(self): md1 = SampleMetadata(pd.DataFrame( {'col1': [1, 2, 3], ' col2 ': [4, 5, 6]}, index=pd.Index(['a', 'b', 'c'], name='id'))) md2 = SampleMetadata(pd.DataFrame( {'col1': [1, 2, 3], 'col2': [4, 5, 6]}, index=pd.Index(['a', 'b', 'c'], name='id'))) self.assertEqual(md1, md2) class TestRepr(unittest.TestCase): def test_singular(self): md = SampleMetadata(pd.DataFrame({'col1': [42]}, index=pd.Index(['a'], name='id'))) obs = repr(md) self.assertIn('Metadata', obs) self.assertIn('1 ID x 1 column', obs) self.assertIn("col1: ColumnProperties(type='numeric'," " missing_scheme='blank')", obs) def test_plural(self): md = SampleMetadata(pd.DataFrame({'col1': [42, 42], 'col2': ['foo', 'bar']}, index=pd.Index(['a', 'b'], name='id'))) obs = repr(md) self.assertIn('Metadata', obs) self.assertIn('2 IDs x 2 columns', obs) self.assertIn("col1: ColumnProperties(type='numeric'," " missing_scheme='blank')", obs) self.assertIn("col2: ColumnProperties(type='categorical'," " missing_scheme='blank')", obs) def test_column_name_padding(self): data = [[0, 42, 'foo']] index = pd.Index(['my-id'], name='id') columns = ['col1', 'longer-column-name', 'c'] md = SampleMetadata(pd.DataFrame(data, index=index, columns=columns)) obs = repr(md) self.assertIn('Metadata', obs) self.assertIn('1 ID x 3 columns', obs) self.assertIn( "col1: ColumnProperties(type='numeric'," " missing_scheme='blank')", obs) self.assertIn( "longer-column-name: ColumnProperties(type='numeric'," " missing_scheme='blank')", obs) self.assertIn( "c: ColumnProperties(type='categorical'," " missing_scheme='blank')", obs) class TestToDataframe(unittest.TestCase): def test_minimal(self): df = pd.DataFrame({}, index=pd.Index(['id1'], name='id')) md = SampleMetadata(df) obs = md.to_dataframe() pd.testing.assert_frame_equal(obs, df) def test_id_header_preserved(self): df = pd.DataFrame({'col1': [42, 2.5], 'col2': ['foo', 'bar']}, index=pd.Index(['id1', 'id2'], name='#SampleID')) md = SampleMetadata(df) obs = md.to_dataframe() pd.testing.assert_frame_equal(obs, df) self.assertEqual(obs.index.name, '#SampleID') def test_dataframe_copy(self): df = pd.DataFrame({'col1': [42, 2.5], 'col2': ['foo', 'bar']}, index=pd.Index(['id1', 'id2'], name='id')) md = SampleMetadata(df) obs = md.to_dataframe() pd.testing.assert_frame_equal(obs, df) self.assertIsNot(obs, df) def test_retains_column_order(self): index = pd.Index(['id1', 'id2'], name='id') columns = ['z', 'a', 'ch'] data = [ [1.0, 'a', 'foo'], [2.0, 'b', 'bar'] ] df = pd.DataFrame(data, index=index, columns=columns) md = SampleMetadata(df) obs = md.to_dataframe() pd.testing.assert_frame_equal(obs, df) self.assertEqual(obs.columns.tolist(), ['z', 'a', 'ch']) def test_missing_data(self): # Different missing data representations should be normalized to np.nan index = pd.Index(['None', 'nan', 'NA', 'id1'], name='id') df = pd.DataFrame(collections.OrderedDict([ ('col1', [42.5, np.nan, float('nan'), 3]), ('NA', [np.nan, 'foo', float('nan'), None]), ('col3', ['null', 'N/A', np.nan, 'NA']), ('col4', np.array([np.nan, np.nan, np.nan, np.nan], dtype=object))]), index=index) md = SampleMetadata(df) obs = md.to_dataframe() exp = pd.DataFrame(collections.OrderedDict([ ('col1', [42.5, np.nan, np.nan, 3.0]), ('NA', [np.nan, 'foo', np.nan, np.nan]), ('col3', ['null', 'N/A', np.nan, 'NA']), ('col4', np.array([np.nan, np.nan, np.nan, np.nan], dtype=object))]), index=index) pd.testing.assert_frame_equal(obs, exp) self.assertEqual(obs.dtypes.to_dict(), {'col1': np.float64, 'NA': object, 'col3': object, 'col4': object}) self.assertTrue(np.isnan(obs['col1']['NA'])) self.assertTrue(np.isnan(obs['NA']['NA'])) self.assertTrue(np.isnan(obs['NA']['id1'])) def test_dtype_int_normalized_to_dtype_float(self): index = pd.Index(['id1', 'id2', 'id3'], name='id') df = pd.DataFrame({'col1': [42, -43, 0], 'col2': [42.0, -43.0, 0.0], 'col3': [42, np.nan, 0]}, index=index) self.assertEqual(df.dtypes.to_dict(), {'col1': np.int64, 'col2': np.float64, 'col3': np.float64}) md = SampleMetadata(df) obs = md.to_dataframe() exp = pd.DataFrame({'col1': [42.0, -43.0, 0.0], 'col2': [42.0, -43.0, 0.0], 'col3': [42.0, np.nan, 0.0]}, index=index) pd.testing.assert_frame_equal(obs, exp) self.assertEqual(obs.dtypes.to_dict(), {'col1': np.float64, 'col2': np.float64, 'col3': np.float64}) def test_encode_missing_no_missing(self): df = pd.DataFrame({'col1': [42.0, 50.0], 'col2': ['foo', 'bar']}, index=pd.Index(['id1', 'id2'], name='id')) md = SampleMetadata(df, default_missing_scheme='INSDC:missing') obs = md.to_dataframe(encode_missing=True) pd.testing.assert_frame_equal(obs, df) self.assertIsNot(obs, df) def test_insdc_missing_encode_missing_true(self): df = pd.DataFrame({'col1': [42, 'missing'], 'col2': ['foo', 'not applicable']}, index=pd.Index(['id1', 'id2'], name='id')) md = SampleMetadata(df, default_missing_scheme='INSDC:missing') obs = md.to_dataframe(encode_missing=True) pd.testing.assert_frame_equal(obs, df) self.assertIsNot(obs, df) def test_insdc_missing_encode_missing_false(self): df = pd.DataFrame({'col1': [42, 'missing'], 'col2': ['foo', 'not applicable']}, index=pd.Index(['id1', 'id2'], name='id')) md = SampleMetadata(df, default_missing_scheme='INSDC:missing') obs = md.to_dataframe() exp = pd.DataFrame({'col1': [42, np.nan], 'col2': ['foo', np.nan]}, index=pd.Index(['id1', 'id2'], name='id')) pd.testing.assert_frame_equal(obs, exp) self.assertIsNot(obs, df) class TestGetIDs(unittest.TestCase): def test_default(self): df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'], 'SampleType': ['gut', 'tongue', 'gut']}, index=pd.Index(['S1', 'S2', 'S3'], name='id')) metadata = SampleMetadata(df) actual = metadata.get_ids() expected = {'S1', 'S2', 'S3'} self.assertEqual(actual, expected) def test_incomplete_where(self): df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'], 'SampleType': ['gut', 'tongue', 'gut']}, index=pd.Index(['S1', 'S2', 'S3'], name='sampleid')) metadata = SampleMetadata(df) where = "Subject='subject-1' AND SampleType=" with self.assertRaises(ValueError): metadata.get_ids(where) where = "Subject=" with self.assertRaises(ValueError): metadata.get_ids(where) def test_invalid_where(self): df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'], 'SampleType': ['gut', 'tongue', 'gut']}, index=pd.Index(['S1', 'S2', 'S3'], name='sampleid')) metadata = SampleMetadata(df) where = "not-a-column-name='subject-1'" with self.assertRaises(ValueError): metadata.get_ids(where) def test_empty_result(self): df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'], 'SampleType': ['gut', 'tongue', 'gut']}, index=pd.Index(['S1', 'S2', 'S3'], name='id')) metadata = SampleMetadata(df) where = "Subject='subject-3'" actual = metadata.get_ids(where) expected = set() self.assertEqual(actual, expected) def test_simple_expression(self): df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'], 'SampleType': ['gut', 'tongue', 'gut']}, index=pd.Index(['S1', 'S2', 'S3'], name='id')) metadata = SampleMetadata(df) where = "Subject='subject-1'" actual = metadata.get_ids(where) expected = {'S1', 'S2'} self.assertEqual(actual, expected) where = "Subject='subject-2'" actual = metadata.get_ids(where) expected = {'S3'} self.assertEqual(actual, expected) where = "Subject='subject-3'" actual = metadata.get_ids(where) expected = set() self.assertEqual(actual, expected) where = "SampleType='gut'" actual = metadata.get_ids(where) expected = {'S1', 'S3'} self.assertEqual(actual, expected) where = "SampleType='tongue'" actual = metadata.get_ids(where) expected = {'S2'} self.assertEqual(actual, expected) def test_more_complex_expressions(self): df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'], 'SampleType': ['gut', 'tongue', 'gut']}, index=pd.Index(['S1', 'S2', 'S3'], name='id')) metadata = SampleMetadata(df) where = "Subject='subject-1' OR Subject='subject-2'" actual = metadata.get_ids(where) expected = {'S1', 'S2', 'S3'} self.assertEqual(actual, expected) where = "Subject='subject-1' AND Subject='subject-2'" actual = metadata.get_ids(where) expected = set() self.assertEqual(actual, expected) where = "Subject='subject-1' AND SampleType='gut'" actual = metadata.get_ids(where) expected = {'S1'} self.assertEqual(actual, expected) def test_query_by_id(self): df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'], 'SampleType': ['gut', 'tongue', 'gut']}, index=pd.Index(['S1', 'S2', 'S3'], name='id')) metadata = SampleMetadata(df) actual = metadata.get_ids(where="id='S2' OR id='S1'") expected = {'S1', 'S2'} self.assertEqual(actual, expected) def test_query_by_alternate_id_header(self): metadata = SampleMetadata(pd.DataFrame( {}, index=pd.Index(['id1', 'id2', 'id3'], name='#OTU ID'))) obs = metadata.get_ids(where="\"#OTU ID\" IN ('id2', 'id3')") exp = {'id2', 'id3'} self.assertEqual(obs, exp) def test_no_columns(self): metadata = SampleMetadata( pd.DataFrame({}, index=pd.Index(['a', 'b', 'my-id'], name='id'))) obs = metadata.get_ids() exp = {'a', 'b', 'my-id'} self.assertEqual(obs, exp) def test_query_mixed_column_types(self): df = pd.DataFrame({'Name': ['Foo', 'Bar', 'Baz', 'Baaz'], # numbers that would sort incorrectly as strings 'Age': [9, 10, 11, 101], 'Age_Str': ['9', '10', '11', '101'], 'Weight': [80.5, 85.3, np.nan, 120.0]}, index=pd.Index(['S1', 'S2', 'S3', 'S4'], name='id')) metadata = SampleMetadata(df) # string pattern matching obs = metadata.get_ids(where="Name LIKE 'Ba_'") exp = {'S2', 'S3'} self.assertEqual(obs, exp) # string comparison obs = metadata.get_ids(where="Age_Str >= 11") exp = {'S1', 'S3'} self.assertEqual(obs, exp) # numeric comparison obs = metadata.get_ids(where="Age >= 11") exp = {'S3', 'S4'} self.assertEqual(obs, exp) # numeric comparison with missing data obs = metadata.get_ids(where="Weight < 100") exp = {'S1', 'S2'} self.assertEqual(obs, exp) def test_column_with_space_in_name(self): df = pd.DataFrame({'Subject': ['subject-1', 'subject-1', 'subject-2'], 'Sample Type': ['gut', 'tongue', 'gut']}, index=pd.Index(['S1', 'S2', 'S3'], name='id')) metadata = SampleMetadata(df) with warnings.catch_warnings(record=True) as w: warnings.simplefilter('always') metadata.get_ids() # The list of captured warnings should be empty self.assertFalse(w) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/metadata/tests/test_metadata_column.py000066400000000000000000001006611464262511300243630ustar00rootroot00000000000000import os.path import tempfile import unittest import pandas as pd import numpy as np from skbio.metadata._metadata import (MetadataColumn, CategoricalMetadataColumn, NumericMetadataColumn) # Dummy class for testing MetadataColumn ABC class DummyMetadataColumn(MetadataColumn): type = 'dummy' @classmethod def _is_supported_dtype(cls, dtype): return dtype == 'float' or dtype == 'int' or dtype == 'int64' @classmethod def _normalize_(cls, series): return series.astype(float, copy=True, errors='raise') class TestInvalidMetadataColumnConstruction(unittest.TestCase): def test_non_series(self): with self.assertRaisesRegex( TypeError, 'DummyMetadataColumn constructor.*Series.*not.*' 'DataFrame'): DummyMetadataColumn(pd.DataFrame( {'col1': [1, 2, 3]}, index=pd.Index(['a', 'b', 'c'], name='id'))) def test_no_ids(self): with self.assertRaisesRegex(ValueError, 'DummyMetadataColumn.*at least one ID'): DummyMetadataColumn(pd.Series([], name='col', index=pd.Index([], name='id'), dtype=object)) def test_invalid_id_header(self): # default index name with self.assertRaisesRegex(ValueError, r'Index\.name.*None'): DummyMetadataColumn(pd.Series([1, 2, 3], name='col', index=pd.Index(['a', 'b', 'c'], dtype=object))) with self.assertRaisesRegex(ValueError, r'Index\.name.*my-id-header'): DummyMetadataColumn(pd.Series( [1, 2, 3], name='col', index=pd.Index(['a', 'b', 'c'], name='my-id-header'))) def test_non_str_id(self): with self.assertRaisesRegex( TypeError, 'non-string metadata ID.*type.*float.*nan'): DummyMetadataColumn(pd.Series( [1, 2, 3], name='col', index=pd.Index(['a', np.nan, 'c'], name='id'))) def test_non_str_column_name(self): # default series name with self.assertRaisesRegex( TypeError, 'non-string metadata column name.*type.*' 'NoneType.*None'): DummyMetadataColumn(pd.Series( [1, 2, 3], index=pd.Index(['a', 'b', 'c'], name='id'))) with self.assertRaisesRegex( TypeError, 'non-string metadata column name.*type.*' 'float.*nan'): DummyMetadataColumn(pd.Series( [1, 2, 3], name=np.nan, index=pd.Index(['a', 'b', 'c'], name='id'))) def test_empty_id(self): with self.assertRaisesRegex( ValueError, 'empty metadata ID.*at least one character'): DummyMetadataColumn(pd.Series( [1, 2, 3], name='col', index=pd.Index(['a', '', 'c'], name='id'))) def test_empty_column_name(self): with self.assertRaisesRegex( ValueError, 'empty metadata column name.*' 'at least one character'): DummyMetadataColumn(pd.Series( [1, 2, 3], name='', index=pd.Index(['a', 'b', 'c'], name='id'))) def test_pound_sign_id(self): with self.assertRaisesRegex( ValueError, "metadata ID.*begins with a pound sign.*'#b'"): DummyMetadataColumn(pd.Series( [1, 2, 3], name='col', index=pd.Index(['a', '#b', 'c'], name='id'))) def test_id_conflicts_with_id_header(self): with self.assertRaisesRegex( ValueError, "metadata ID 'sample-id'.*conflicts.*reserved.*" "ID header"): DummyMetadataColumn(pd.Series( [1, 2, 3], name='col', index=pd.Index(['a', 'sample-id', 'c'], name='id'))) def test_column_name_conflicts_with_id_header(self): with self.assertRaisesRegex( ValueError, "metadata column name 'featureid'.*conflicts.*" "reserved.*ID header"): DummyMetadataColumn(pd.Series( [1, 2, 3], name='featureid', index=pd.Index(['a', 'b', 'c'], name='id'))) def test_duplicate_ids(self): with self.assertRaisesRegex(ValueError, "Metadata IDs.*unique.*'a'"): DummyMetadataColumn(pd.Series( [1, 2, 3], name='col', index=pd.Index(['a', 'b', 'a'], name='id'))) def test_unsupported_column_dtype(self): with self.assertRaisesRegex( TypeError, "DummyMetadataColumn 'col1' does not support.*" "Series.*dtype.*bool"): DummyMetadataColumn(pd.Series( [True, False, True], name='col1', index=pd.Index(['a', 'b', 'c'], name='id'))) def test_unknown_missing_scheme(self): with self.assertRaisesRegex(ValueError, "BAD:SCHEME"): DummyMetadataColumn(pd.Series( [1, 2, 3], name='col1', index=pd.Index(['a', 'b', 'c'], name='id')), missing_scheme='BAD:SCHEME') def test_missing_q2_error(self): with self.assertRaisesRegex(ValueError, "col1.*no-missing"): DummyMetadataColumn(pd.Series( [1, np.nan, 3], name='col1', index=pd.Index(['a', 'b', 'c'], name='id')), missing_scheme='no-missing') class TestMetadataColumnConstructionAndProperties(unittest.TestCase): def test_single_id(self): index = pd.Index(['id1'], name='id') series = pd.Series([42], name='col1', index=index) mdc = DummyMetadataColumn(series) self.assertEqual(mdc.id_count, 1) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('id1',)) self.assertEqual(mdc.name, 'col1') def test_multiple_ids(self): index = pd.Index(['id1', 'a', 'my-id'], name='id') series = pd.Series([42, 4.2, -4.2], name='column', index=index) mdc = DummyMetadataColumn(series) self.assertEqual(mdc.id_count, 3) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('id1', 'a', 'my-id')) self.assertEqual(mdc.name, 'column') def test_supported_id_headers(self): case_insensitive = { 'id', 'sampleid', 'sample id', 'sample-id', 'featureid', 'feature id', 'feature-id' } exact_match = { '#SampleID', '#Sample ID', '#OTUID', '#OTU ID', 'sample_name' } # Build a set of supported headers, including exact matches and headers # with different casing. headers = set() for header in case_insensitive: headers.add(header) headers.add(header.upper()) headers.add(header.title()) for header in exact_match: headers.add(header) count = 0 for header in headers: index = pd.Index(['id1', 'id2'], name=header) series = pd.Series([0, 123], name='column', index=index) mdc = DummyMetadataColumn(series) self.assertEqual(mdc.id_header, header) count += 1 # Since this test case is a little complicated, make sure that the # expected number of comparisons are happening. self.assertEqual(count, 26) def test_recommended_ids(self): index = pd.Index(['c6ca034a-223f-40b4-a0e0-45942912a5ea', 'My.ID'], name='id') series = pd.Series([-1, -2], name='col1', index=index) mdc = DummyMetadataColumn(series) self.assertEqual(mdc.id_count, 2) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('c6ca034a-223f-40b4-a0e0-45942912a5ea', 'My.ID')) self.assertEqual(mdc.name, 'col1') def test_non_standard_characters(self): index = pd.Index(['©id##1', '((id))2', "'id_3<>'", '"id#4"', 'i d\r\t\n5'], name='id') series = pd.Series([0, 1, 2, 3, 4], name='↩c@l1™', index=index) mdc = DummyMetadataColumn(series) self.assertEqual(mdc.id_count, 5) self.assertEqual(mdc.id_header, 'id') self.assertEqual( mdc.ids, ('©id##1', '((id))2', "'id_3<>'", '"id#4"', 'i d\r\t\n5')) self.assertEqual(mdc.name, '↩c@l1™') def test_missing_data(self): index = pd.Index(['None', 'nan', 'NA'], name='id') series = pd.Series([np.nan, np.nan, np.nan], name='NA', index=index) mdc = DummyMetadataColumn(series) self.assertEqual(mdc.id_count, 3) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('None', 'nan', 'NA')) self.assertEqual(mdc.name, 'NA') def test_missing_insdc(self): index = pd.Index(['None', 'nan', 'NA'], name='id') # TODO: note we cannot make a numeric style column of entirely encoded # nans, as there's no way to indicate the true type of the column series = pd.Series(['missing', 'not applicable', 5.0], name='NA', index=index) mdc = DummyMetadataColumn(series, missing_scheme='INSDC:missing') self.assertEqual(mdc.id_count, 3) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('None', 'nan', 'NA')) self.assertEqual(mdc.name, 'NA') pd.testing.assert_series_equal( mdc.to_series(), pd.Series( [np.nan, np.nan, 5.0], name='NA', index=index)) def test_does_not_cast_ids_or_column_name(self): index = pd.Index(['0.000001', '0.004000', '0.000000'], dtype=object, name='id') series = pd.Series([2.0, 1.0, 3.0], name='42.0', index=index) mdc = DummyMetadataColumn(series) self.assertEqual(mdc.id_count, 3) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('0.000001', '0.004000', '0.000000')) self.assertEqual(mdc.name, '42.0') def test_case_insensitive_duplicate_ids(self): index = pd.Index(['a', 'b', 'A'], name='id') series = pd.Series([1, 2, 3], name='column', index=index) mdc = DummyMetadataColumn(series) self.assertEqual(mdc.ids, ('a', 'b', 'A')) class TestRepr(unittest.TestCase): def test_single_id(self): mdc = DummyMetadataColumn(pd.Series( [42], name='foo', index=pd.Index(['id1'], name='id'))) obs = repr(mdc) self.assertEqual(obs, "") def test_multiple_ids(self): mdc = DummyMetadataColumn(pd.Series( [42, 43, 44], name='my column', index=pd.Index(['id1', 'id2', 'id3'], name='id'))) obs = repr(mdc) self.assertEqual( obs, "") # Extensive tests of the MetadataWriter are performed in test_io.py. This test # is a sanity check that a new MetadataColumn subclass (DummyMetadataColumn) # can be written to disk with its column type preserved. This test would have # caught a bug in the original implementation of MetadataColumn.save(), which # converted itself into a Metadata object, losing the "dummy" column type and # replacing it with "numeric". In order for a MetadataColumn to turn itself # into a Metadata object in a lossless/safe way, the Metadata constructor needs # a `column_types` parameter to preserve column types. class TestSave(unittest.TestCase): def setUp(self): self.temp_dir_obj = tempfile.TemporaryDirectory( prefix='qiime2-metadata-tests-temp-') self.temp_dir = self.temp_dir_obj.name self.filepath = os.path.join(self.temp_dir, 'metadata.tsv') def tearDown(self): self.temp_dir_obj.cleanup() def test_basic(self): mdc = DummyMetadataColumn(pd.Series( [42, 42.5, -999.123], name='dummy-column', index=pd.Index(['id1', 'id2', 'id3'], name='id'))) mdc.save(self.filepath) with open(self.filepath, 'r') as fh: obs = fh.read() exp = ( "id\tdummy-column\n" "#sk:types\tdummy\n" "id1\t42\n" "id2\t42.5\n" "id3\t-999.123\n" ) self.assertEqual(obs, exp) class TestToSeries(unittest.TestCase): def test_single_id(self): series = pd.Series([0.0], name='col', index=pd.Index(['id1'], name='id')) mdc = DummyMetadataColumn(series) obs = mdc.to_series() pd.testing.assert_series_equal(obs, series) def test_multiple_ids(self): series = pd.Series([-1.5, np.nan, 42], name='col', index=pd.Index(['id1', 'id2', 'id3'], name='id')) mdc = DummyMetadataColumn(series) obs = mdc.to_series() pd.testing.assert_series_equal(obs, series) def test_id_header_preserved(self): series = pd.Series( [-1.5, 0.0, 42], name='col', index=pd.Index(['id1', 'id2', 'id3'], name='#OTU ID')) mdc = DummyMetadataColumn(series) obs = mdc.to_series() pd.testing.assert_series_equal(obs, series) self.assertEqual(obs.index.name, '#OTU ID') def test_series_copy(self): series = pd.Series([1, 2.5, 3], name='col', index=pd.Index(['id1', 'id2', 'id3'], name='id')) mdc = DummyMetadataColumn(series) obs = mdc.to_series() pd.testing.assert_series_equal(obs, series) self.assertIsNot(obs, series) def test_encode_missing_no_missing(self): series = pd.Series([1, 2.5, 3], name='col', index=pd.Index(['id1', 'id2', 'id3'], name='id')) mdc = DummyMetadataColumn(series, missing_scheme='INSDC:missing') obs = mdc.to_series(encode_missing=True) pd.testing.assert_series_equal(obs, series) self.assertIsNot(obs, series) def test_encode_missing_true(self): series = pd.Series([1, 2.5, 'missing'], name='col', index=pd.Index(['id1', 'id2', 'id3'], name='id')) mdc = DummyMetadataColumn(series, missing_scheme='INSDC:missing') obs = mdc.to_series(encode_missing=True) pd.testing.assert_series_equal(obs, series) self.assertIsNot(obs, series) def test_encode_missing_false(self): series = pd.Series([1, 2.5, 'missing'], name='col', index=pd.Index(['id1', 'id2', 'id3'], name='id')) mdc = DummyMetadataColumn(series, missing_scheme='INSDC:missing') obs = mdc.to_series() exp = pd.Series([1, 2.5, np.nan], name='col', index=pd.Index(['id1', 'id2', 'id3'], name='id')) pd.testing.assert_series_equal(obs, exp) self.assertIsNot(obs, series) class TestToDataframe(unittest.TestCase): def test_single_id(self): series = pd.Series([0.0], name='col', index=pd.Index(['id1'], name='id')) mdc = DummyMetadataColumn(series) obs = mdc.to_dataframe() exp = pd.DataFrame({'col': [0.0]}, index=pd.Index(['id1'], name='id')) pd.testing.assert_frame_equal(obs, exp) def test_multiple_ids(self): series = pd.Series([0.0, 4.2, np.nan], name='my column', index=pd.Index(['a', 'b', 'c'], name='id')) mdc = DummyMetadataColumn(series) obs = mdc.to_dataframe() exp = pd.DataFrame({'my column': [0.0, 4.2, np.nan]}, index=pd.Index(['a', 'b', 'c'], name='id')) pd.testing.assert_frame_equal(obs, exp) def test_id_header_preserved(self): series = pd.Series([0.0, 4.2, 123], name='my column', index=pd.Index(['a', 'b', 'c'], name='#Sample ID')) mdc = DummyMetadataColumn(series) obs = mdc.to_dataframe() exp = pd.DataFrame({'my column': [0.0, 4.2, 123]}, index=pd.Index(['a', 'b', 'c'], name='#Sample ID')) pd.testing.assert_frame_equal(obs, exp) self.assertEqual(obs.index.name, '#Sample ID') def test_encode_missing_no_missing(self): series = pd.Series([1, 2.5, 3], name='col', index=pd.Index(['id1', 'id2', 'id3'], name='id')) mdc = DummyMetadataColumn(series, missing_scheme='INSDC:missing') obs = mdc.to_dataframe(encode_missing=True) exp = pd.DataFrame({'col': series}, index=series.index) pd.testing.assert_frame_equal(obs, exp) def test_encode_missing_true(self): series = pd.Series([1, 2.5, 'missing'], name='col', index=pd.Index(['id1', 'id2', 'id3'], name='id')) mdc = DummyMetadataColumn(series, missing_scheme='INSDC:missing') obs = mdc.to_dataframe(encode_missing=True) exp = pd.DataFrame({'col': series}, index=series.index) pd.testing.assert_frame_equal(obs, exp) def test_encode_missing_false(self): series = pd.Series([1, 2.5, 'missing'], name='col', index=pd.Index(['id1', 'id2', 'id3'], name='id')) mdc = DummyMetadataColumn(series, missing_scheme='INSDC:missing') obs = mdc.to_dataframe() exp = pd.DataFrame({'col': [1, 2.5, np.nan]}, index=series.index) pd.testing.assert_frame_equal(obs, exp) class TestGetValue(unittest.TestCase): def test_id_not_found(self): series = pd.Series([1, 2, 3], name='col1', index=pd.Index(['a', 'b', 'c'], name='id')) mdc = DummyMetadataColumn(series) with self.assertRaisesRegex( ValueError, "'d' is not present.*DummyMetadataColumn.*'col1'"): mdc.get_value('d') def test_get_value(self): series = pd.Series([1, 2, np.nan], name='col1', index=pd.Index(['a', 'b', 'c'], name='id')) mdc = DummyMetadataColumn(series) obs = mdc.get_value('a') self.assertEqual(obs, 1.0) obs = mdc.get_value('b') self.assertEqual(obs, 2.0) obs = mdc.get_value('c') self.assertTrue(np.isnan(obs)) class TestHasMissingValues(unittest.TestCase): def test_no_missing_values(self): series = pd.Series([0.0, 2.2, 3.3], name='col1', index=pd.Index(['a', 'b', 'c'], name='id')) mdc = DummyMetadataColumn(series) obs = mdc.has_missing_values() self.assertEqual(obs, False) def test_with_missing_values(self): series = pd.Series([0.0, np.nan, 3.3], name='col1', index=pd.Index(['a', 'b', 'c'], name='id')) mdc = DummyMetadataColumn(series) obs = mdc.has_missing_values() self.assertEqual(obs, True) class TestDropMissingValues(unittest.TestCase): def test_no_missing_values(self): series = pd.Series([0.0, 2.2, 3.3], name='col1', index=pd.Index(['a', 'b', 'c'], name='id')) mdc = DummyMetadataColumn(series) obs = mdc.drop_missing_values() self.assertEqual(obs, mdc) self.assertIsNot(obs, mdc) def test_with_missing_values(self): series = pd.Series( [0.0, np.nan, 3.3, np.nan, np.nan, 4.4], name='col1', index=pd.Index(['a', 'b', 'c', 'd', 'e', 'f'], name='sampleid')) mdc = DummyMetadataColumn(series) obs = mdc.drop_missing_values() exp = DummyMetadataColumn(pd.Series( [0.0, 3.3, 4.4], name='col1', index=pd.Index(['a', 'c', 'f'], name='sampleid'))) self.assertEqual(obs, exp) def test_with_missing_scheme(self): series = pd.Series( [0.0, np.nan, 3.3, 'missing', 'not applicable', 4.4], name='col1', index=pd.Index(['a', 'b', 'c', 'd', 'e', 'f'], name='sampleid')) mdc = DummyMetadataColumn(series, missing_scheme='INSDC:missing') obs = mdc.drop_missing_values() exp = DummyMetadataColumn(pd.Series( [0.0, 3.3, 4.4], name='col1', index=pd.Index(['a', 'c', 'f'], name='sampleid'))) self.assertEqual(obs, exp) class TestGetIDs(unittest.TestCase): def test_single_id(self): series = pd.Series([1.234], name='col1', index=pd.Index(['my id'], name='id')) mdc = DummyMetadataColumn(series) obs = mdc.get_ids() self.assertEqual(obs, {'my id'}) def test_multiple_ids(self): series = pd.Series( [1.234, np.nan, 5.67, np.nan, 8.9], name='col1', index=pd.Index(['id1', 'id2', 'id3', 'id4', 'id5'], name='id')) mdc = DummyMetadataColumn(series) obs = mdc.get_ids() self.assertEqual(obs, {'id1', 'id2', 'id3', 'id4', 'id5'}) def test_where_values_missing(self): series = pd.Series( [1.234, np.nan, 5.67, np.nan, 8.9], name='col1', index=pd.Index(['id1', 'id2', 'id3', 'id4', 'id5'], name='id')) mdc = DummyMetadataColumn(series) obs = mdc.get_ids(where_values_missing=True) self.assertEqual(obs, {'id2', 'id4'}) def test_where_values_missing_all_missing(self): series = pd.Series( [np.nan, np.nan, np.nan], name='col1', index=pd.Index(['id1', 'id2', 'id3'], name='id')) mdc = DummyMetadataColumn(series) obs = mdc.get_ids(where_values_missing=True) self.assertEqual(obs, {'id1', 'id2', 'id3'}) class TestGetMissing(unittest.TestCase): def test_missing_mixed(self): series = pd.Series( [0.0, np.nan, 3.3, 'missing', 'not applicable', 4.4], name='col1', index=pd.Index(['a', 'b', 'c', 'd', 'e', 'f'], name='sampleid')) mdc = DummyMetadataColumn(series, missing_scheme='INSDC:missing') missing = mdc.get_missing() exp = pd.Series([np.nan, 'missing', 'not applicable'], name='col1', index=pd.Index(['b', 'd', 'e'], name='sampleid')) pd.testing.assert_series_equal(missing, exp) def test_missing_blanks(self): series = pd.Series( [0.0, np.nan, 3.3, np.nan, np.nan, 4.4], name='col1', index=pd.Index(['a', 'b', 'c', 'd', 'e', 'f'], name='sampleid')) mdc = DummyMetadataColumn(series, missing_scheme='INSDC:missing') missing = mdc.get_missing() exp = pd.Series([np.nan, np.nan, np.nan], name='col1', dtype=object, index=pd.Index(['b', 'd', 'e'], name='sampleid')) pd.testing.assert_series_equal(missing, exp) def test_no_missing(self): series = pd.Series( [0.0, 1.1, 3.3, 3.5, 4.0, 4.4], name='col1', index=pd.Index(['a', 'b', 'c', 'd', 'e', 'f'], name='sampleid')) mdc = DummyMetadataColumn(series, missing_scheme='INSDC:missing') missing = mdc.get_missing() exp = pd.Series([], name='col1', dtype=object, index=pd.Index([], name='sampleid')) pd.testing.assert_series_equal(missing, exp) # The tests for CategoricalMetadataColumn and NumericMetadataColumn only test # behavior specific to these subclasses. More extensive tests of these objects # are performed above by testing the MetadataColumn ABC in a generic way. class TestCategoricalMetadataColumn(unittest.TestCase): def test_unsupported_dtype(self): with self.assertRaisesRegex( TypeError, "CategoricalMetadataColumn 'col1' does not support" ".*Series.*dtype.*float64"): CategoricalMetadataColumn(pd.Series( [42.5, 42.6, 42.7], name='col1', index=pd.Index(['a', 'b', 'c'], name='id'))) def test_unsupported_type_value(self): with self.assertRaisesRegex( TypeError, "CategoricalMetadataColumn.*strings or missing " r"values.*42\.5.*float.*'col1'"): CategoricalMetadataColumn(pd.Series( ['foo', 'bar', 42.5], name='col1', index=pd.Index(['a', 'b', 'c'], name='id'))) def test_empty_str_value(self): with self.assertRaisesRegex( ValueError, "CategoricalMetadataColumn.*empty strings.*" "column 'col1'"): CategoricalMetadataColumn(pd.Series( ['foo', '', 'bar'], name='col1', index=pd.Index(['a', 'b', 'c'], name='id'))) def test_type_property(self): self.assertEqual(CategoricalMetadataColumn.type, 'categorical') def test_supported_dtype(self): series = pd.Series( ['foo', np.nan, 'bar', 'foo'], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) mdc = CategoricalMetadataColumn(series) self.assertEqual(mdc.id_count, 4) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd')) self.assertEqual(mdc.name, 'my column') obs_series = mdc.to_series() pd.testing.assert_series_equal(obs_series, series) self.assertEqual(obs_series.dtype, object) def test_numeric_strings_preserved_as_strings(self): series = pd.Series( ['1', np.nan, '2.5', '3.0'], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) mdc = CategoricalMetadataColumn(series) self.assertEqual(mdc.id_count, 4) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd')) self.assertEqual(mdc.name, 'my column') obs_series = mdc.to_series() pd.testing.assert_series_equal(obs_series, series) self.assertEqual(obs_series.dtype, object) def test_missing_data_normalized(self): # Different missing data representations should be normalized to np.nan mdc = CategoricalMetadataColumn(pd.Series( [np.nan, 'foo', float('nan'), None], name='col1', index=pd.Index(['a', 'b', 'c', 'd'], name='id'))) obs = mdc.to_series() exp = pd.Series( [np.nan, 'foo', np.nan, np.nan], name='col1', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) pd.testing.assert_series_equal(obs, exp) self.assertEqual(obs.dtype, object) self.assertTrue(np.isnan(obs['a'])) self.assertTrue(np.isnan(obs['c'])) self.assertTrue(np.isnan(obs['d'])) def test_all_missing_data(self): mdc = CategoricalMetadataColumn(pd.Series( np.array([np.nan, np.nan, np.nan], dtype=object), name='col1', index=pd.Index(['a', 'b', 'c'], name='id'))) obs = mdc.to_series() exp = pd.Series( np.array([np.nan, np.nan, np.nan], dtype=object), name='col1', index=pd.Index(['a', 'b', 'c'], name='id')) pd.testing.assert_series_equal(obs, exp) self.assertEqual(obs.dtype, object) def test_leading_trailing_whitespace_value(self): col1 = CategoricalMetadataColumn(pd.Series( ['foo', ' bar ', 'baz'], name='col1', index=pd.Index(['a', 'b', 'c'], name='id'))) col2 = CategoricalMetadataColumn(pd.Series( ['foo', 'bar', 'baz'], name='col1', index=pd.Index(['a', 'b', 'c'], name='id'))) self.assertEqual(col1, col2) def test_leading_trailing_whitespace_id(self): col1 = CategoricalMetadataColumn(pd.Series( ['foo', ' bar ', 'baz'], name='col', index=pd.Index(['a', ' b ', 'c'], name='id'))) col2 = CategoricalMetadataColumn(pd.Series( ['foo', ' bar ', 'baz'], name='col', index=pd.Index(['a', 'b', 'c'], name='id'))) self.assertEqual(col1, col2) def test_leading_trailing_whitespace_column_name(self): col1 = CategoricalMetadataColumn(pd.Series( ['foo', ' bar ', 'baz'], name=' col ', index=pd.Index(['a', 'b', 'c'], name='id'))) col2 = CategoricalMetadataColumn(pd.Series( ['foo', ' bar ', 'baz'], name='col', index=pd.Index(['a', 'b', 'c'], name='id'))) self.assertEqual(col1, col2) def test_missing_insdc(self): mdc = CategoricalMetadataColumn(pd.Series( ['missing', 'foo', float('nan'), None], name='col1', index=pd.Index(['a', 'b', 'c', 'd'], name='id')), missing_scheme='INSDC:missing') obs = mdc.to_series() exp = pd.Series( [np.nan, 'foo', np.nan, np.nan], name='col1', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) pd.testing.assert_series_equal(obs, exp) self.assertEqual(obs.dtype, object) self.assertTrue(np.isnan(obs['a'])) self.assertTrue(np.isnan(obs['c'])) self.assertTrue(np.isnan(obs['d'])) class TestNumericMetadataColumn(unittest.TestCase): def test_unsupported_dtype(self): with self.assertRaisesRegex( TypeError, "NumericMetadataColumn 'col1' does not support" ".*Series.*dtype.*bool"): NumericMetadataColumn(pd.Series( [True, False, True], name='col1', index=pd.Index(['a', 'b', 'c'], name='id'))) def test_infinity_value(self): with self.assertRaisesRegex( ValueError, "NumericMetadataColumn.*positive or negative " "infinity.*column 'col1'"): NumericMetadataColumn(pd.Series( [42, float('+inf'), 4.3], name='col1', index=pd.Index(['a', 'b', 'c'], name='id'))) def test_type_property(self): self.assertEqual(NumericMetadataColumn.type, 'numeric') def test_supported_dtype_float(self): series = pd.Series( [1.23, np.nan, 4.56, -7.891], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) mdc = NumericMetadataColumn(series) self.assertEqual(mdc.id_count, 4) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd')) self.assertEqual(mdc.name, 'my column') obs_series = mdc.to_series() pd.testing.assert_series_equal(obs_series, series) self.assertEqual(obs_series.dtype, np.float64) def test_supported_dtype_int(self): series = pd.Series( [0, 1, 42, -2], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) mdc = NumericMetadataColumn(series) self.assertEqual(mdc.id_count, 4) self.assertEqual(mdc.id_header, 'id') self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd')) self.assertEqual(mdc.name, 'my column') obs_series = mdc.to_series() exp_series = pd.Series( [0.0, 1.0, 42.0, -2.0], name='my column', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) pd.testing.assert_series_equal(obs_series, exp_series) self.assertEqual(obs_series.dtype, np.float64) def test_missing_data_normalized(self): # Different missing data representations should be normalized to np.nan mdc = NumericMetadataColumn(pd.Series( [np.nan, 4.2, float('nan'), -5.678], name='col1', index=pd.Index(['a', 'b', 'c', 'd'], name='id'))) obs = mdc.to_series() exp = pd.Series( [np.nan, 4.2, np.nan, -5.678], name='col1', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) pd.testing.assert_series_equal(obs, exp) self.assertEqual(obs.dtype, np.float64) self.assertTrue(np.isnan(obs['a'])) self.assertTrue(np.isnan(obs['c'])) def test_all_missing_data(self): mdc = NumericMetadataColumn(pd.Series( [np.nan, np.nan, np.nan], name='col1', index=pd.Index(['a', 'b', 'c'], name='id'))) obs = mdc.to_series() exp = pd.Series( [np.nan, np.nan, np.nan], name='col1', index=pd.Index(['a', 'b', 'c'], name='id')) pd.testing.assert_series_equal(obs, exp) self.assertEqual(obs.dtype, np.float64) def test_missing_insdc(self): mdc = NumericMetadataColumn(pd.Series( ['missing', 4.2, float('nan'), -5.678], name='col1', index=pd.Index(['a', 'b', 'c', 'd'], name='id')), missing_scheme='INSDC:missing') obs = mdc.to_series() exp = pd.Series( [np.nan, 4.2, np.nan, -5.678], name='col1', index=pd.Index(['a', 'b', 'c', 'd'], name='id')) pd.testing.assert_series_equal(obs, exp) self.assertEqual(obs.dtype, np.float64) self.assertTrue(np.isnan(obs['a'])) self.assertTrue(np.isnan(obs['c'])) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/metadata/tests/test_missing.py000066400000000000000000000062441464262511300227010ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2016-2023, QIIME 2 development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- import unittest import pandas as pd import pandas.testing as pdt import numpy as np from skbio.metadata.missing import series_encode_missing, series_extract_missing class RoundTripMixin: def check_roundtrip(self, real_value, dtype): notna_exp = [real_value] series = pd.Series(notna_exp + self.missing_terms) encoded = series_encode_missing(series, self.enum) missing = series_extract_missing(encoded) self.assertEqual(encoded.dtype, dtype) # the non-null side of the series self.assertEqual(list(encoded[encoded.notna()]), notna_exp) # the null end (but in the original vocabulary) pdt.assert_series_equal(missing, series[1:].astype(object)) def test_roundtrip_float(self): self.check_roundtrip(0.05, float) def test_roundtrip_string(self): self.check_roundtrip('hello', object) def test_roundtrip_int(self): self.check_roundtrip(42, float) def test_roundtrip_bool(self): self.check_roundtrip(True, object) def test_roundtrip_all_missing_object(self): expected = [None, float('nan')] + self.missing_terms series = pd.Series(expected, dtype=object) encoded = series_encode_missing(series, self.enum) missing = series_extract_missing(encoded) self.assertEqual(encoded.dtype, object) pdt.assert_series_equal(missing, series.astype(object)) class TestISNDC(RoundTripMixin, unittest.TestCase): def setUp(self): self.enum = 'INSDC:missing' self.missing_terms = ['not applicable', 'missing', 'not collected', 'not provided', 'restricted access'] class TestOmitted(RoundTripMixin, unittest.TestCase): def setUp(self): self.enum = 'blank' self.missing_terms = [None, float('nan')] # test_roundtrip_all_missing_float is not possible with other schemes def test_roundtrip_all_missing_float(self): expected = [None, float('nan')] + self.missing_terms series = pd.Series(expected, dtype=float) encoded = series_encode_missing(series, self.enum) missing = series_extract_missing(encoded) self.assertEqual(encoded.dtype, float) pdt.assert_series_equal(missing, series.astype(object)) class TestError(RoundTripMixin, unittest.TestCase): def setUp(self): self.enum = 'no-missing' self.missing_terms = [] # no missing values, so bool and int are not object and float def test_roundtrip_bool(self): self.check_roundtrip(True, bool) def test_roundtrip_int(self): self.check_roundtrip(42, np.int64) def test_roundtrip_all_missing_object(self): with self.assertRaisesRegex(ValueError, 'Missing values.*name=None'): super().test_roundtrip_all_missing_object() if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/metadata/tests/test_mixin.py000066400000000000000000000106461464262511300223550ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest from skbio.metadata._mixin import (MetadataMixin, PositionalMetadataMixin, IntervalMetadataMixin) from skbio.util._decorator import overrides from skbio.util._testing import ReallyEqualMixin from skbio.metadata._testing import (MetadataMixinTests, PositionalMetadataMixinTests, IntervalMetadataMixinTests) class TestMetadataMixin(unittest.TestCase, ReallyEqualMixin, MetadataMixinTests): def setUp(self): class ExampleMetadataMixin(MetadataMixin): def __init__(self, metadata=None): MetadataMixin._init_(self, metadata=metadata) def __eq__(self, other): return MetadataMixin._eq_(self, other) def __ne__(self, other): return MetadataMixin._ne_(self, other) def __copy__(self): copy = self.__class__(metadata=None) copy._metadata = MetadataMixin._copy_(self) return copy def __deepcopy__(self, memo): copy = self.__class__(metadata=None) copy._metadata = MetadataMixin._deepcopy_(self, memo) return copy self._metadata_constructor_ = ExampleMetadataMixin class TestPositionalMetadataMixin(unittest.TestCase, ReallyEqualMixin, PositionalMetadataMixinTests): def setUp(self): class ExamplePositionalMetadataMixin(PositionalMetadataMixin): @overrides(PositionalMetadataMixin) def _positional_metadata_axis_len_(self): return self._axis_len def __init__(self, axis_len, positional_metadata=None): self._axis_len = axis_len PositionalMetadataMixin._init_( self, positional_metadata=positional_metadata) def __eq__(self, other): return PositionalMetadataMixin._eq_(self, other) def __ne__(self, other): return PositionalMetadataMixin._ne_(self, other) def __copy__(self): copy = self.__class__(self._axis_len, positional_metadata=None) copy._positional_metadata = \ PositionalMetadataMixin._copy_(self) return copy def __deepcopy__(self, memo): copy = self.__class__(self._axis_len, positional_metadata=None) copy._positional_metadata = \ PositionalMetadataMixin._deepcopy_(self, memo) return copy self._positional_metadata_constructor_ = ExamplePositionalMetadataMixin class TestIntervalMetadataMixin(unittest.TestCase, ReallyEqualMixin, IntervalMetadataMixinTests): def setUp(self): super()._set_up() class ExampleIntervalMetadataMixin(IntervalMetadataMixin): @overrides(IntervalMetadataMixin) def _interval_metadata_axis_len_(self): return self._axis_len def __init__(self, axis_len, interval_metadata=None): self._axis_len = axis_len IntervalMetadataMixin._init_( self, interval_metadata=interval_metadata) def __eq__(self, other): return IntervalMetadataMixin._eq_(self, other) def __ne__(self, other): return IntervalMetadataMixin._ne_(self, other) def __copy__(self): copy = self.__class__(self._axis_len, interval_metadata=None) copy._interval_metadata = IntervalMetadataMixin._copy_(self) return copy def __deepcopy__(self, memo): copy = self.__class__(self._axis_len, interval_metadata=None) copy._interval_metadata = IntervalMetadataMixin._deepcopy_( self, memo) return copy self._interval_metadata_constructor_ = ExampleIntervalMetadataMixin if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/sequence/000077500000000000000000000000001464262511300164775ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/sequence/__init__.py000066400000000000000000000177351464262511300206250ustar00rootroot00000000000000r"""Biological Sequences (:mod:`skbio.sequence`) ============================================ .. currentmodule:: skbio.sequence This module provides functionality for storing and working with sequences, including molecular sequences based on IUPAC-defined alphabets (:class:`DNA`, :class:`RNA`, :class:`Protein`), sequences based on custom alphabets (:class:`GrammaredSequence`), and generic/non-biological sequences with no alphabet restrictions (:class:`Sequence`). Additionally, this module defines the :class:`GeneticCode` class, which represents an immutable object that translates DNA or RNA sequences into protein sequences, and the :class:`SubstitutionMatrix` class, which stores scores of substitutions between sequence characters. Sequence types -------------- .. autosummary:: :toctree: generated/ Sequence GrammaredSequence DNA RNA Protein Sequence utilities ------------------ .. autosummary:: :toctree: generated/ GeneticCode SubstitutionMatrix Distance calculation -------------------- .. autosummary:: :toctree: generated/ distance Tutorial -------- The primary information stored for each different type of sequence object is the underlying sequence data itself. This is stored as an immutable NumPy array. Additionally, each type of sequence may include optional metadata and positional metadata. Note that metadata and positional metadata are mutable. Common operations are defined as methods, for example computing the reverse complement of a DNA sequence, or searching for N-glycosylation motifs in protein sequences. Class attributes provide valid character sets, complement maps for different sequence types, and degenerate character definitions. New sequences are created with optional metadata and positional metadata. Metadata is stored as a Python ``dict``, while positional metadata is stored as a pandas ``DataFrame``. >>> from skbio import DNA, RNA >>> d = DNA('ACCGGGTA', metadata={'id':"my-sequence", 'description':"GFP"}, ... positional_metadata={'quality':[22, 25, 22, 18, 23, 25, 25, 25]}) >>> d DNA ----------------------------- Metadata: 'description': 'GFP' 'id': 'my-sequence' Positional metadata: 'quality': Stats: length: 8 has gaps: False has degenerates: False has definites: True GC-content: 62.50% ----------------------------- 0 ACCGGGTA New sequences can also be created from existing sequences, for example as their reverse complement or degapped (i.e., unaligned) version. >>> d1 = DNA('.ACC--GGG-TA...', metadata={'id':'my-sequence'}) >>> d2 = d1.degap() >>> d2 DNA -------------------------- Metadata: 'id': 'my-sequence' Stats: length: 8 has gaps: False has degenerates: False has definites: True GC-content: 62.50% -------------------------- 0 ACCGGGTA >>> d3 = d2.reverse_complement() >>> d3 DNA -------------------------- Metadata: 'id': 'my-sequence' Stats: length: 8 has gaps: False has degenerates: False has definites: True GC-content: 62.50% -------------------------- 0 TACCCGGT It's also straightforward to compute distances between sequences (optionally using user-defined distance metrics, the default is Hamming distance which requires that the sequences being compared are the same length) for use in sequence clustering, phylogenetic reconstruction, etc. >>> r1 = RNA('GACCCGCUUU') >>> r2 = RNA('GCCCCCCUUU') >>> r1.distance(r2) 0.2 Similarly, you can calculate the percent (dis)similarity between a pair of aligned sequences. >>> r3 = RNA('ACCGUUAGUC') >>> r4 = RNA('ACGGGU--UC') >>> r3.match_frequency(r4, relative=True) 0.6 >>> r3.mismatch_frequency(r4, relative=True) 0.4 Sequences can be searched for known motif types. This returns the slices that describe the matches. >>> r5 = RNA('AGG-GGACUGAA') >>> for motif in r5.find_motifs('purine-run', min_length=2): ... motif slice(0, 3, None) slice(4, 7, None) slice(9, 12, None) Those slices can be used to extract the relevant subsequences. >>> for motif in r5.find_motifs('purine-run', min_length=2): ... r5[motif] ... print('') RNA -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True GC-content: 66.67% -------------------------- 0 AGG RNA -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True GC-content: 66.67% -------------------------- 0 GGA RNA -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True GC-content: 33.33% -------------------------- 0 GAA And gaps or other features can be ignored while searching, as these may disrupt otherwise meaningful motifs. >>> for motif in r5.find_motifs('purine-run', min_length=2, ignore=r5.gaps()): ... r5[motif] ... print('') RNA -------------------------- Stats: length: 7 has gaps: True has degenerates: False has definites: True GC-content: 66.67% -------------------------- 0 AGG-GGA RNA -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True GC-content: 33.33% -------------------------- 0 GAA In the above example, removing gaps from the resulting motif matches is easily achieved, as the sliced matches themselves are sequences of the same type as the input. >>> for motif in r5.find_motifs('purine-run', min_length=2, ignore=r5.gaps()): ... r5[motif].degap() ... print('') RNA -------------------------- Stats: length: 6 has gaps: False has degenerates: False has definites: True GC-content: 66.67% -------------------------- 0 AGGGGA RNA -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True GC-content: 33.33% -------------------------- 0 GAA Sequences can similarly be searched for arbitrary patterns using regular expressions. >>> for match in r5.find_with_regex('(G+AC[UT])'): ... match slice(4, 9, None) DNA can be transcribed to RNA: >>> dna = DNA('ATGTGTATTTGA') >>> rna = dna.transcribe() >>> rna RNA -------------------------- Stats: length: 12 has gaps: False has degenerates: False has definites: True GC-content: 25.00% -------------------------- 0 AUGUGUAUUU GA Both DNA and RNA can be translated into a protein sequence. For example, let's translate our DNA and RNA sequences using NCBI's standard genetic code (table ID 1, the default genetic code in scikit-bio): >>> protein_from_dna = dna.translate() >>> protein_from_dna Protein -------------------------- Stats: length: 4 has gaps: False has degenerates: False has definites: True has stops: True -------------------------- 0 MCI* >>> protein_from_rna = rna.translate() >>> protein_from_rna Protein -------------------------- Stats: length: 4 has gaps: False has degenerates: False has definites: True has stops: True -------------------------- 0 MCI* The two translations are equivalent: >>> protein_from_dna == protein_from_rna True Class-level methods contain information about the molecule types. >>> sorted(DNA.degenerate_map['B']) ['C', 'G', 'T'] >>> sorted(RNA.degenerate_map['B']) ['C', 'G', 'U'] """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from ._sequence import Sequence from ._protein import Protein from ._dna import DNA from ._rna import RNA from ._genetic_code import GeneticCode from ._grammared_sequence import GrammaredSequence from ._substitution import SubstitutionMatrix __all__ = [ "Sequence", "Protein", "DNA", "RNA", "GeneticCode", "GrammaredSequence", "SubstitutionMatrix", ] scikit-bio-0.6.2/skbio/sequence/_alphabet.py000066400000000000000000000203111464262511300207650ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np def _encode_alphabet(alphabet): """Encode an alphabet as a vector of ASCII code points. Parameters ---------- alphabet : str, list, tuple or 1D np.ndarray Input alphabet. Must consist of single ASCII characters. Elements may be string or byte characters, or integers representing code points. Returns ------- 1D np.ndarray of np.uint8 Vector of ASCII code points representing the alphabet. Raises ------ TypeError If alphabet or its components are of a wrong data type. ValueError If some elements are not single characters. ValueError If some code points are beyond the ASCII range. UnicodeEncodeError If some characters are beyond the ASCII range. Notes ----- ASCII has 128 code points (0 to 127) [1]_ (not to be confused with extended ASCII). Therefore, output values are within the range of [0, 127]. References ---------- .. [1] https://en.wikipedia.org/wiki/ASCII """ errmsg = "Alphabet is of an invalid data type." # string if isinstance(alphabet, str): alphabet = alphabet.encode("ascii") return np.frombuffer(alphabet, dtype=np.uint8) # list or tuple elif isinstance(alphabet, (list, tuple)): alphabet = np.array(alphabet) # 1d numpy array elif not isinstance(alphabet, np.ndarray): raise TypeError(errmsg) if alphabet.ndim != 1: raise TypeError(errmsg) dtype = alphabet.dtype # integers represent ascii code points if np.issubdtype(dtype, np.integer): # ascii code points are within [0, 127] if np.all((alphabet >= 0) & (alphabet <= 127)): if dtype is np.uint8: return alphabet # cast data type to uint8 else: return alphabet.astype(np.uint8) else: raise ValueError("Not all code points are within the ASCII range.") # encode strings as ascii characters elif np.issubdtype(dtype, np.str_): alphabet = np.char.encode(alphabet, encoding="ascii") # bytes are already encoded elif not np.issubdtype(dtype, np.bytes_): raise TypeError(errmsg) # must be single characters if not (np.char.str_len(alphabet) == 1).all(): raise ValueError("Not all elements are single characters.") return alphabet.view(np.uint8) def _alphabet_to_hashes(alphabet): """Convert an alphabet into a hash table of ASCII code points to indices. Parameters ---------- alphabet : iterable Input alphabet. Must consist of single ASCII characters. Returns ------- np.ndarray of np.uint8 of shape (128,) Hash table of ASCII code points to indices. Raises ------ ValueError If the absence character is not in the alphabet. ValueError If one or multiple characters in the sequence are absent from the alphabet, whereas `absence` is not set. See Also -------- _indices_in_alphabet_ascii Notes ----- The resulting data structure enables efficient conversion of a sequence into indices of characters in an alphabet. The hash table has a constant size of 128, which is the total number of ASCII characters. Code points absent from the alphabet are filled with 255, which is beyond the range of ASCII characters, hence the maximum index in the alphabet. """ idx = _encode_alphabet(alphabet) res = np.full(128, 255, dtype=np.uint8) res[idx] = np.arange(idx.size) return res def _indices_in_alphabet(seq, alphabet, wildcard=None): """Convert a sequence into indices of characters in an alphabet. Parameters ---------- seq : iterable Input sequence. alphabet : dict or iterable Input alphabet. Can be a dictionary of characters to indices, or an iterable of other types from which the dictionary will be constructed. wildcard : hashable, optional Character to replace any characters that are absent from the alphabet. If omitted, will raise an error if the latter characters exist. Returns ------- 1D np.ndarray of int Vector of indices of characters in an alphabet. Raises ------ ValueError If the wildcard character is not in the alphabet. ValueError If one or multiple characters in the sequence are absent from the alphabet, whereas `wildcard` is not set. See Also -------- _indices_in_alphabet_ascii Notes ----- This function is versatile to the type of characters. """ if not isinstance(alphabet, dict): alphabet = {x: i for i, x in enumerate(alphabet)} pos = list(map(alphabet.get, seq)) if wildcard is not None: try: wildcard = alphabet[wildcard] except KeyError: raise ValueError( f'Wildcard character "{wildcard}" is not in the ' "alphabet." ) pos = [wildcard if x is None else x for x in pos] elif None in pos: raise ValueError( "One or multiple characters in the sequence are " "absent from the alphabet." ) return np.array(pos) def _indices_in_alphabet_ascii(seq, alphabet, wildcard=None): """Convert a sequence into indices of characters in an ASCII alphabet. Parameters ---------- seq : 1D np.ndarray of int Input sequence as ASCII code points. alphabet : np.ndarray of shape (128,) of int Input alphabet as a hash table of all ASCII code points to character indices, or 255 if absent from the alphabet. wildcard : int, optional Code point of character to replace any characters that are absent from the alphabet. If omitted, will raise an error if such characters exist. Returns ------- 1D np.ndarray of uint8 Vector of indices of characters in an alphabet. Raises ------ ValueError If the wildcard character is not in the alphabet. ValueError If one or multiple characters in the sequence are absent from the alphabet, whereas `wildcard` is not set. See Also -------- _indices_in_alphabet _alphabet_to_hashes Notes ----- This function is optimized for single ASCII characters. """ pos = alphabet[seq] absent = pos == 255 if absent.any(): if wildcard is None: raise ValueError( "One or multiple characters in the sequence are " "absent from the alphabet." ) try: assert (wild := alphabet[wildcard]) != 255 except AssertionError: raise ValueError( f'Wildcard character "{chr(wildcard)}" is not in ' "the alphabet." ) pos = np.where(absent, wild, pos) return pos def _indices_in_observed(seqs): """Convert sequences into vectors of indices in observed characters. Parameters ---------- seqs : iterable of iterable Input sequences. Returns ------- list of 1D np.ndarray Vectors of indices representing the sequences. 1D np.ndarray Sorted vector of unique characters observed in the sequences. """ # This function uses np.unique to extract unique characters and their # indices. It applies np.unique on individual sequences, then merges # results. This design is to avoid concatenating too many sequences. alpha_lst, index_lst = zip( *[ np.unique(tuple(x) if isinstance(x, str) else x, return_inverse=True) for x in seqs ] ) alpha_union, index_union = np.unique(np.concatenate(alpha_lst), return_inverse=True) index_bounds = np.cumsum([x.size for x in alpha_lst])[:-1] index_chunks = np.split(index_union, index_bounds) index_lst_trans = [x[y] for x, y in zip(index_chunks, index_lst)] return index_lst_trans, alpha_union scikit-bio-0.6.2/skbio/sequence/_dna.py000066400000000000000000000344161464262511300177620ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import skbio from skbio.util._decorator import classproperty, overrides from ._nucleotide_mixin import NucleotideMixin, _motifs as _parent_motifs from ._grammared_sequence import GrammaredSequence class DNA(GrammaredSequence, NucleotideMixin): r"""Store DNA sequence data and optional associated metadata. Parameters ---------- sequence : str, Sequence, or 1D np.ndarray (np.uint8 or '\|S1') Characters representing the DNA sequence itself. metadata : dict, optional Arbitrary metadata which applies to the entire sequence. positional_metadata : Pandas DataFrame consumable, optional Arbitrary per-character metadata. For example, quality data from sequencing reads. Must be able to be passed directly to the Pandas DataFrame constructor. interval_metadata : IntervalMetadata Arbitrary interval metadata which applies to intervals within a sequence to store interval features (such as genes on the DNA sequence). lowercase : bool or str, optional If ``True``, lowercase sequence characters will be converted to uppercase characters in order to be valid IUPAC DNA characters. If ``False``, no characters will be converted. If a str, it will be treated as a key into the positional metadata of the object. All lowercase characters will be converted to uppercase, and a ``True`` value will be stored in a boolean array in the positional metadata under the key. validate : bool, optional If ``True``, validation will be performed to ensure that all sequence characters are in the IUPAC DNA character set. If ``False``, validation will not be performed. Turning off validation will improve runtime performance. If invalid characters are present, however, there is **no guarantee that operations performed on the resulting object will work or behave as expected.** Only turn off validation if you are certain that the sequence characters are valid. To store sequence data that is not IUPAC-compliant, use ``Sequence``. See Also -------- RNA GrammaredSequence Notes ----- According to the IUPAC DNA character set [1]_ , a DNA sequence may contain the following four definite characters (canonical nucleotides): +-----+-----------+ |Code |Nucleobase | +=====+===========+ |``A``|Adenine | +-----+-----------+ |``C``|Cytosine | +-----+-----------+ |``G``|Guanine | +-----+-----------+ |``T``|Thymine | +-----+-----------+ And the following 11 degenerate characters, each of which representing 2-4 nucleotides: +-----+-------------+-----------+ |Code |Nucleobases |Meaning | +=====+=============+===========+ |``R``|A or G |Purine | +-----+-------------+-----------+ |``Y``|C or T |Pyrimidine | +-----+-------------+-----------+ |``S``|G or C |Strong | +-----+-------------+-----------+ |``W``|A or T |Weak | +-----+-------------+-----------+ |``K``|G or T |Keto | +-----+-------------+-----------+ |``M``|A or C |Amino | +-----+-------------+-----------+ |``B``|C, G or T |Not A | +-----+-------------+-----------+ |``D``|A, G or T |Not C | +-----+-------------+-----------+ |``H``|A, C or T |Not G | +-----+-------------+-----------+ |``V``|A, C or G |Not T | +-----+-------------+-----------+ |``N``|A, C, G or T |Any | +-----+-------------+-----------+ Plus two gap characters: ``-`` and ``.``. Characters other than the above 17 are not allowed. If you intend to use additional characters to represent non-canonical nucleobases, such as ``I`` (Inosine), you may create a custom alphabet using ``GrammaredSequence``. Directly modifying the alphabet of ``DNA`` may break methods that rely on the IUPAC alphabet. It should be noted that some functions do not support degenerate characters characters. In such cases, they will be replaced with `N` to represent any of the canonical nucleotides. References ---------- .. [1] Nomenclature for incompletely specified bases in nucleic acid sequences: recommendations 1984. Nucleic Acids Res. May 10, 1985; 13(9): 3021-3030. A Cornish-Bowden Examples -------- >>> from skbio import DNA >>> DNA('ACCGAAT') DNA -------------------------- Stats: length: 7 has gaps: False has degenerates: False has definites: True GC-content: 42.86% -------------------------- 0 ACCGAAT Convert lowercase characters to uppercase: >>> DNA('AcCGaaT', lowercase=True) DNA -------------------------- Stats: length: 7 has gaps: False has degenerates: False has definites: True GC-content: 42.86% -------------------------- 0 ACCGAAT """ @classproperty @overrides(NucleotideMixin) def complement_map(cls): comp_map = { "A": "T", "T": "A", "G": "C", "C": "G", "Y": "R", "R": "Y", "S": "S", "W": "W", "K": "M", "M": "K", "B": "V", "D": "H", "H": "D", "V": "B", "N": "N", } comp_map.update({c: c for c in cls.gap_chars}) return comp_map @classproperty @overrides(GrammaredSequence) def definite_chars(cls): return set("ACGT") @classproperty @overrides(GrammaredSequence) def degenerate_map(cls): return { "R": set("AG"), "Y": set("CT"), "M": set("AC"), "K": set("TG"), "W": set("AT"), "S": set("GC"), "B": set("CGT"), "D": set("AGT"), "H": set("ACT"), "V": set("ACG"), "N": set("ACGT"), } @classproperty @overrides(GrammaredSequence) def default_gap_char(cls): return "-" @classproperty @overrides(GrammaredSequence) def gap_chars(cls): return set("-.") @classproperty @overrides(GrammaredSequence) def wildcard_char(cls): return "N" @property def _motifs(self): return _motifs def transcribe(self): """Transcribe DNA into RNA. DNA sequence is assumed to be the coding strand. Thymine (T) is replaced with uracil (U) in the transcribed sequence. Returns ------- RNA Transcribed sequence. See Also -------- translate translate_six_frames Notes ----- DNA sequence's metadata, positional, and interval metadata are included in the transcribed RNA sequence. Examples -------- Transcribe DNA into RNA: >>> from skbio import DNA >>> dna = DNA('TAACGTTA') >>> dna DNA -------------------------- Stats: length: 8 has gaps: False has degenerates: False has definites: True GC-content: 25.00% -------------------------- 0 TAACGTTA >>> dna.transcribe() RNA -------------------------- Stats: length: 8 has gaps: False has degenerates: False has definites: True GC-content: 25.00% -------------------------- 0 UAACGUUA """ seq = self._string.replace(b"T", b"U") metadata = None if self.has_metadata(): metadata = self.metadata positional_metadata = None if self.has_positional_metadata(): positional_metadata = self.positional_metadata interval_metadata = None if self.has_interval_metadata(): interval_metadata = self.interval_metadata # turn off validation because `seq` is guaranteed to be valid return skbio.RNA( seq, metadata=metadata, positional_metadata=positional_metadata, interval_metadata=interval_metadata, validate=False, ) def translate(self, *args, **kwargs): """Translate DNA sequence into protein sequence. DNA sequence is assumed to be the coding strand. DNA sequence is first transcribed into RNA and then translated into protein. Parameters ---------- args : tuple Positional arguments accepted by ``RNA.translate``. kwargs : dict Keyword arguments accepted by ``RNA.translate``. Returns ------- Protein Translated sequence. See Also -------- RNA.reverse_transcribe RNA.translate translate_six_frames transcribe Notes ----- DNA sequence's metadata are included in the translated protein sequence. Positional metadata are not included. Examples -------- Translate DNA into protein using NCBI's standard genetic code (table ID 1, the default genetic code in scikit-bio): >>> from skbio import DNA >>> dna = DNA('ATGCCACTTTAA') >>> dna.translate() Protein -------------------------- Stats: length: 4 has gaps: False has degenerates: False has definites: True has stops: True -------------------------- 0 MPL* Translate the same DNA sequence using a different NCBI genetic code (table ID 3, the yeast mitochondrial code) and specify that translation must terminate at the first stop codon: >>> dna.translate(3, stop='require') Protein -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True has stops: False -------------------------- 0 MPT """ return self.transcribe().translate(*args, **kwargs) def translate_six_frames(self, *args, **kwargs): """Translate DNA into protein using six possible reading frames. DNA sequence is assumed to be the coding strand. DNA sequence is first transcribed into RNA and then translated into protein. The six possible reading frames are: * 1 (forward) * 2 (forward) * 3 (forward) * -1 (reverse) * -2 (reverse) * -3 (reverse) Translated sequences are yielded in this order. Parameters ---------- args : tuple Positional arguments accepted by ``RNA.translate_six_frames``. kwargs : dict Keyword arguments accepted by ``RNA.translate_six_frames``. Yields ------ Protein Translated sequence in the current reading frame. See Also -------- RNA.translate_six_frames translate transcribe Notes ----- This method is faster than (and equivalent to) performing six independent translations using, for example: ``(seq.translate(reading_frame=rf) for rf in GeneticCode.reading_frames)`` DNA sequence's metadata are included in each translated protein sequence. Positional metadata are not included. Examples -------- Translate DNA into protein using the six possible reading frames and NCBI's standard genetic code (table ID 1, the default genetic code in scikit-bio): >>> from skbio import DNA >>> dna = DNA('ATGCCACTTTAA') >>> for protein in dna.translate_six_frames(): ... protein ... print('') Protein -------------------------- Stats: length: 4 has gaps: False has degenerates: False has definites: True has stops: True -------------------------- 0 MPL* Protein -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True has stops: False -------------------------- 0 CHF Protein -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True has stops: False -------------------------- 0 ATL Protein -------------------------- Stats: length: 4 has gaps: False has degenerates: False has definites: True has stops: False -------------------------- 0 LKWH Protein -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True has stops: True -------------------------- 0 *SG Protein -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True has stops: False -------------------------- 0 KVA """ return self.transcribe().translate_six_frames(*args, **kwargs) @overrides(GrammaredSequence) def _repr_stats(self): """Define custom statistics to display in the sequence's repr.""" stats = super(DNA, self)._repr_stats() stats.append(("GC-content", "{:.2%}".format(self.gc_content()))) return stats _motifs = _parent_motifs.copy() # Leave this at the bottom _motifs.interpolate(DNA, "find_motifs") scikit-bio-0.6.2/skbio/sequence/_genetic_code.py000066400000000000000000000742441464262511300216330ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np from skbio.util._decorator import classproperty, classonlymethod from skbio._base import SkbioObject from skbio.sequence import Protein, RNA from skbio._base import ElasticLines class GeneticCode(SkbioObject): """Genetic code for translating codons to amino acids. Parameters ---------- amino_acids : consumable by ``skbio.Protein`` constructor 64-character vector containing IUPAC amino acid characters. The order of the amino acids should correspond to NCBI's codon order (see *Notes* section below). `amino_acids` is the "AAs" field in NCBI's genetic code format [1]_. starts : consumable by ``skbio.Protein`` constructor 64-character vector containing only M and - characters, with start codons indicated by M. The order of the amino acids should correspond to NCBI's codon order (see *Notes* section below). `starts` is the "Starts" field in NCBI's genetic code format [1]_. name : str, optional Genetic code name. This is simply metadata and does not affect the functionality of the genetic code itself. See Also -------- RNA.translate DNA.translate GeneticCode.from_ncbi Notes ----- The genetic codes available via ``GeneticCode.from_ncbi`` and used throughout the examples are defined in [1]_. The genetic code strings defined there are directly compatible with the ``GeneticCode`` constructor. The order of `amino_acids` and `starts` should correspond to NCBI's codon order, defined in [1]_:: UUUUUUUUUUUUUUUUCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG UUUUCCCCAAAAGGGGUUUUCCCCAAAAGGGGUUUUCCCCAAAAGGGGUUUUCCCCAAAAGGGG UCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAG Note that scikit-bio displays this ordering using the IUPAC RNA alphabet, while NCBI displays this same ordering using the IUPAC DNA alphabet (for historical purposes). References ---------- .. [1] http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi Examples -------- Get NCBI's standard genetic code (table ID 1, the default genetic code in scikit-bio): >>> from skbio import GeneticCode >>> GeneticCode.from_ncbi() GeneticCode (Standard) ------------------------------------------------------------------------- AAs = FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG Starts = ---M---------------M---------------M---------------------------- Base1 = UUUUUUUUUUUUUUUUCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG Base2 = UUUUCCCCAAAAGGGGUUUUCCCCAAAAGGGGUUUUCCCCAAAAGGGGUUUUCCCCAAAAGGGG Base3 = UCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAG Get a different NCBI genetic code (25): >>> GeneticCode.from_ncbi(25) GeneticCode (Candidate Division SR1 and Gracilibacteria) ------------------------------------------------------------------------- AAs = FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG Starts = ---M-------------------------------M---------------M------------ Base1 = UUUUUUUUUUUUUUUUCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG Base2 = UUUUCCCCAAAAGGGGUUUUCCCCAAAAGGGGUUUUCCCCAAAAGGGGUUUUCCCCAAAAGGGG Base3 = UCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAG Define a custom genetic code: >>> GeneticCode('M' * 64, '-' * 64) GeneticCode ------------------------------------------------------------------------- AAs = MMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMMM Starts = ---------------------------------------------------------------- Base1 = UUUUUUUUUUUUUUUUCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG Base2 = UUUUCCCCAAAAGGGGUUUUCCCCAAAAGGGGUUUUCCCCAAAAGGGGUUUUCCCCAAAAGGGG Base3 = UCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAG Translate an RNA sequence to protein using NCBI's standard genetic code: >>> from skbio import RNA >>> rna = RNA('AUGCCACUUUAA') >>> GeneticCode.from_ncbi().translate(rna) Protein -------------------------- Stats: length: 4 has gaps: False has degenerates: False has definites: True has stops: True -------------------------- 0 MPL* """ _num_codons = 64 _radix_multiplier = np.asarray([16, 4, 1], dtype=np.uint8) _start_stop_options = ["ignore", "optional", "require"] __offset_table = None @classproperty def _offset_table(cls): if cls.__offset_table is None: # create lookup table that is filled with 255 everywhere except for # indices corresponding to U, C, A, and G. 255 was chosen to # represent invalid character offsets because it will create an # invalid (out of bounds) index into `amino_acids` which should # error noisily. this is important in case the valid definite # IUPAC RNA characters change in the future and the assumptions # currently made by the code become invalid table = np.empty(ord(b"U") + 1, dtype=np.uint8) table.fill(255) table[ord(b"U")] = 0 table[ord(b"C")] = 1 table[ord(b"A")] = 2 table[ord(b"G")] = 3 cls.__offset_table = table return cls.__offset_table @classonlymethod def from_ncbi(cls, table_id=1): """Return NCBI genetic code specified by table ID. Parameters ---------- table_id : int, optional Table ID of the NCBI genetic code to return. Returns ------- GeneticCode NCBI genetic code specified by `table_id`. Notes ----- The table IDs and genetic codes available in this method and used throughout the examples are defined in [1]_. References ---------- .. [1] http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi Examples -------- Get the NCBI thraustochytrium mitochondrial genetic code (23): >>> tmgc = GeneticCode.from_ncbi(23) >>> tmgc.name 'Thraustochytrium Mitochondrial' """ if table_id not in _ncbi_genetic_codes: raise ValueError( "`table_id` must be one of %r, not %r" % (sorted(_ncbi_genetic_codes), table_id) ) return _ncbi_genetic_codes[table_id] @classproperty def reading_frames(cls): """Six possible reading frames. Reading frames are ordered: * 1 (forward) * 2 (forward) * 3 (forward) * -1 (reverse) * -2 (reverse) * -3 (reverse) This property can be passed into ``GeneticCode.translate(reading_frame)``. Returns ------- list (int) Six possible reading frames. """ return [1, 2, 3, -1, -2, -3] @property def name(self): """Genetic code name. This is simply metadata and does not affect the functionality of the genetic code itself. Returns ------- str Genetic code name. """ return self._name def __init__(self, amino_acids, starts, name=""): self._set_amino_acids(amino_acids) self._set_starts(starts) self._name = name def _set_amino_acids(self, amino_acids): amino_acids = Protein(amino_acids) if len(amino_acids) != self._num_codons: raise ValueError( "`amino_acids` must be length %d, not %d" % (self._num_codons, len(amino_acids)) ) indices = (amino_acids.values == b"M").nonzero()[0] if indices.size < 1: raise ValueError( "`amino_acids` must contain at least one M " "(methionine) character" ) self._amino_acids = amino_acids self._m_character_codon = self._index_to_codon(indices[0]) def _set_starts(self, starts): starts = Protein(starts) if len(starts) != self._num_codons: raise ValueError( "`starts` must be length %d, not %d" % (self._num_codons, len(starts)) ) if (starts.values == b"M").sum() + (starts.values == b"-").sum() != len(starts): # to prevent the user from accidentally swapping `starts` and # `amino_acids` and getting a translation back raise ValueError("`starts` may only contain M and - characters") self._starts = starts indices = (self._starts.values == b"M").nonzero()[0] codons = np.empty((indices.size, 3), dtype=np.uint8) for i, index in enumerate(indices): codons[i] = self._index_to_codon(index) self._start_codons = codons def _index_to_codon(self, index): """Convert AA index (0-63) to codon encoded in offsets (0-3).""" codon = np.empty(3, dtype=np.uint8) for i, multiplier in enumerate(self._radix_multiplier): offset, index = divmod(index, multiplier) codon[i] = offset return codon def __str__(self): """Return string representation of the genetic code. Returns ------- str Genetic code in NCBI genetic code format. Notes ----- Representation uses NCBI genetic code format defined in [1]_. References ---------- .. [1] http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi """ return self._build_repr(include_name=False) def __repr__(self): """Return string representation of the genetic code. Returns ------- str Genetic code in NCBI genetic code format. Notes ----- Representation uses NCBI genetic code format defined in [1]_ preceded by a header. If the genetic code has a name, it will be included in the header. References ---------- .. [1] http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi """ return self._build_repr(include_name=True) def _build_repr(self, include_name): lines = ElasticLines() if include_name: name_line = self.__class__.__name__ if len(self.name) > 0: name_line += " (%s)" % self.name lines.add_line(name_line) lines.add_separator() lines.add_line(" AAs = %s" % str(self._amino_acids)) lines.add_line("Starts = %s" % str(self._starts)) base1 = "U" * 16 + "C" * 16 + "A" * 16 + "G" * 16 lines.add_line("Base1 = %s" % base1) base2 = ("U" * 4 + "C" * 4 + "A" * 4 + "G" * 4) * 4 lines.add_line("Base2 = %s" % base2) base3 = "UCAG" * 16 lines.add_line("Base3 = %s" % base3) return lines.to_str() def __eq__(self, other): """Determine if the genetic code is equal to another. Genetic codes are equal if they are *exactly* the same type and defined by the same `amino_acids` and `starts`. A genetic code's name (accessed via ``name`` property) does not affect equality. Parameters ---------- other : GeneticCode Genetic code to test for equality against. Returns ------- bool Indicates whether the genetic code is equal to `other`. Examples -------- NCBI genetic codes 1 and 2 are not equal: >>> GeneticCode.from_ncbi(1) == GeneticCode.from_ncbi(2) False Define a custom genetic code: >>> gc = GeneticCode('M' * 64, '-' * 64) Define a second genetic code with the same `amino_acids` and `starts`. Note that the presence of a name does not make the genetic codes unequal: >>> named_gc = GeneticCode('M' * 64, '-' * 64, name='example name') >>> gc == named_gc True """ if self.__class__ != other.__class__: return False # convert Protein to str so that metadata is ignored in comparison. we # only care about the sequence data defining the genetic code if str(self._amino_acids) != str(other._amino_acids): return False if str(self._starts) != str(other._starts): return False return True def __ne__(self, other): """Determine if the genetic code is not equal to another. Genetic codes are not equal if their type, `amino_acids`, or `starts` differ. A genetic code's name (accessed via ``name`` property) does not affect equality. Parameters ---------- other : GeneticCode Genetic code to test for inequality against. Returns ------- bool Indicates whether the genetic code is not equal to `other`. """ return not (self == other) def translate(self, sequence, reading_frame=1, start="ignore", stop="ignore"): """Translate RNA sequence into protein sequence. Parameters ---------- sequence : RNA RNA sequence to translate. reading_frame : {1, 2, 3, -1, -2, -3} Reading frame to use in translation. 1, 2, and 3 are forward frames and -1, -2, and -3 are reverse frames. If reverse (negative), will reverse complement the sequence before translation. start : {'ignore', 'require', 'optional'} How to handle start codons: * "ignore": translation will start from the beginning of the reading frame, regardless of the presence of a start codon. * "require": translation will start at the first start codon in the reading frame, ignoring all prior positions. The first amino acid in the translated sequence will *always* be methionine (M character), even if an alternative start codon was used in translation. This behavior most closely matches the underlying biology since fMet doesn't have a corresponding IUPAC character. If a start codon does not exist, a ``ValueError`` is raised. * "optional": if a start codon exists in the reading frame, matches the behavior of "require". If a start codon does not exist, matches the behavior of "ignore". stop : {'ignore', 'require', 'optional'} How to handle stop codons: * "ignore": translation will ignore the presence of stop codons and translate to the end of the reading frame. * "require": translation will terminate at the first stop codon. The stop codon will not be included in the translated sequence. If a stop codon does not exist, a ``ValueError`` is raised. * "optional": if a stop codon exists in the reading frame, matches the behavior of "require". If a stop codon does not exist, matches the behavior of "ignore". Returns ------- Protein Translated sequence. See Also -------- translate_six_frames Notes ----- Input RNA sequence metadata are included in the translated protein sequence. Positional metadata are not included. Examples -------- Translate RNA into protein using NCBI's standard genetic code (table ID 1, the default genetic code in scikit-bio): >>> from skbio import RNA, GeneticCode >>> rna = RNA('AGUAUUCUGCCACUGUAAGAA') >>> sgc = GeneticCode.from_ncbi() >>> sgc.translate(rna) Protein -------------------------- Stats: length: 7 has gaps: False has degenerates: False has definites: True has stops: True -------------------------- 0 SILPL*E In this command, we used the default ``start`` behavior, which starts translation at the beginning of the reading frame, regardless of the presence of a start codon. If we specify "require", translation will start at the first start codon in the reading frame (in this example, CUG), ignoring all prior positions: >>> sgc.translate(rna, start='require') Protein -------------------------- Stats: length: 5 has gaps: False has degenerates: False has definites: True has stops: True -------------------------- 0 MPL*E Note that the codon coding for L (CUG) is an alternative start codon in this genetic code. Since we specified "require" mode, methionine (M) was used in place of the alternative start codon (L). This behavior most closely matches the underlying biology since fMet doesn't have a corresponding IUPAC character. Translate the same RNA sequence, also specifying that translation terminate at the first stop codon in the reading frame: >>> sgc.translate(rna, start='require', stop='require') Protein -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True has stops: False -------------------------- 0 MPL Passing "require" to both ``start`` and ``stop`` trims the translation to the CDS (and in fact requires that one is present in the reading frame). Changing the reading frame to 2 causes an exception to be raised because a start codon doesn't exist in the reading frame: >>> sgc.translate(rna, start='require', stop='require', ... reading_frame=2) # doctest: +IGNORE_EXCEPTION_DETAIL Traceback (most recent call last): ... ValueError: ... """ self._validate_translate_inputs(sequence, reading_frame, start, stop) offset = abs(reading_frame) - 1 if reading_frame < 0: sequence = sequence.reverse_complement() # Translation strategy: # # 1. Obtain view of underlying sequence bytes from the beginning of # the reading frame. # 2. Convert bytes to offsets (0-3, base 4 since there are only 4 # characters allowed: UCAG). # 3. Reshape byte vector into (N, 3), where N is the number of codons # in the reading frame. Each row represents a codon in the # sequence. # 4. (Optional) Find start codon in the reading frame and trim to # this position. Replace start codon with M codon. # 5. Convert each codon (encoded as offsets) into an index # corresponding to an amino acid (0-63). # 6. Obtain translated sequence by indexing into the amino acids # vector (`amino_acids`) using the indices defined in step 5. # 7. (Optional) Find first stop codon and trim to this position. data = sequence.values[offset:].view(np.uint8) # since advanced indexing is used with an integer ndarray, a copy is # always returned. thus, the in-place modification made below # (replacing the start codon) is safe. data = self._offset_table[data] data = data[: data.size // 3 * 3].reshape((-1, 3)) if start in {"require", "optional"}: start_codon_index = data.shape[0] for start_codon in self._start_codons: indices = np.all(data == start_codon, axis=1).nonzero()[0] if indices.size > 0: first_index = indices[0] if first_index < start_codon_index: start_codon_index = first_index if start_codon_index != data.shape[0]: data = data[start_codon_index:] data[0] = self._m_character_codon elif start == "require": self._raise_require_error("start", reading_frame) indices = (data * self._radix_multiplier).sum(axis=1) translated = self._amino_acids.values[indices] if stop in {"require", "optional"}: stop_codon_indices = (translated == b"*").nonzero()[0] if stop_codon_indices.size > 0: translated = translated[: stop_codon_indices[0]] elif stop == "require": self._raise_require_error("stop", reading_frame) metadata = None if sequence.has_metadata(): metadata = sequence.metadata # turn off validation because `translated` is guaranteed to be valid return Protein(translated, metadata=metadata, validate=False) def _validate_translate_inputs(self, sequence, reading_frame, start, stop): if not isinstance(sequence, RNA): raise TypeError( "Sequence to translate must be RNA, not %s" % type(sequence).__name__ ) if reading_frame not in self.reading_frames: raise ValueError( "`reading_frame` must be one of %r, not %r" % (self.reading_frames, reading_frame) ) for name, value in ("start", start), ("stop", stop): if value not in self._start_stop_options: raise ValueError( "`%s` must be one of %r, not %r" % (name, self._start_stop_options, value) ) if sequence.has_gaps(): raise ValueError( "scikit-bio does not support translation of " "gapped sequences." ) if sequence.has_degenerates(): raise NotImplementedError( "scikit-bio does not currently support " "translation of degenerate sequences." "`RNA.expand_degenerates` can be used " "to obtain all definite versions " "of a degenerate sequence." ) def _raise_require_error(self, name, reading_frame): raise ValueError( "Sequence does not contain a %s codon in the " "current reading frame (`reading_frame=%d`). Presence " "of a %s codon is required with `%s='require'`" % (name, reading_frame, name, name) ) def translate_six_frames(self, sequence, start="ignore", stop="ignore"): """Translate RNA into protein using six possible reading frames. The six possible reading frames are: * 1 (forward) * 2 (forward) * 3 (forward) * -1 (reverse) * -2 (reverse) * -3 (reverse) Translated sequences are yielded in this order. Parameters ---------- sequence : RNA RNA sequence to translate. start : {'ignore', 'require', 'optional'} How to handle start codons. See ``GeneticCode.translate`` for details. stop : {'ignore', 'require', 'optional'} How to handle stop codons. See ``GeneticCode.translate`` for details. Yields ------ Protein Translated sequence in the current reading frame. See Also -------- translate Notes ----- This method is faster than (and equivalent to) performing six independent translations using, for example: ``(gc.translate(seq, reading_frame=rf) for rf in GeneticCode.reading_frames)`` Input RNA sequence metadata are included in each translated protein sequence. Positional metadata are not included. Examples -------- Translate RNA into protein using the six possible reading frames and NCBI's standard genetic code (table ID 1, the default genetic code in scikit-bio): >>> from skbio import RNA, GeneticCode >>> rna = RNA('AUGCCACUUUAA') >>> sgc = GeneticCode.from_ncbi() >>> for protein in sgc.translate_six_frames(rna): ... protein ... print('') Protein -------------------------- Stats: length: 4 has gaps: False has degenerates: False has definites: True has stops: True -------------------------- 0 MPL* Protein -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True has stops: False -------------------------- 0 CHF Protein -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True has stops: False -------------------------- 0 ATL Protein -------------------------- Stats: length: 4 has gaps: False has degenerates: False has definites: True has stops: False -------------------------- 0 LKWH Protein -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True has stops: True -------------------------- 0 *SG Protein -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True has stops: False -------------------------- 0 KVA """ rc = sequence.reverse_complement() for reading_frame in range(1, 4): yield self.translate( sequence, reading_frame=reading_frame, start=start, stop=stop ) for reading_frame in range(1, 4): yield self.translate( rc, reading_frame=reading_frame, start=start, stop=stop ) # defined at http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi _ncbi_genetic_codes = { 1: GeneticCode( "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M---------------M---------------M----------------------------", "Standard", ), 2: GeneticCode( "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG", "--------------------------------MMMM---------------M------------", "Vertebrate Mitochondrial", ), 3: GeneticCode( "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "----------------------------------MM----------------------------", "Yeast Mitochondrial", ), 4: GeneticCode( "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--MM---------------M------------MMMM---------------M------------", "Mold, Protozoan, and Coelenterate Mitochondrial, and " "Mycoplasma/Spiroplasma", ), 5: GeneticCode( "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG", "---M----------------------------MMMM---------------M------------", "Invertebrate Mitochondrial", ), 6: GeneticCode( "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "-----------------------------------M----------------------------", "Ciliate, Dasycladacean and Hexamita Nuclear", ), 9: GeneticCode( "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "-----------------------------------M---------------M------------", "Echinoderm and Flatworm Mitochondrial", ), 10: GeneticCode( "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "-----------------------------------M----------------------------", "Euplotid Nuclear", ), 11: GeneticCode( "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M---------------M------------MMMM---------------M------------", "Bacterial, Archaeal and Plant Plastid", ), 12: GeneticCode( "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "-------------------M---------------M----------------------------", "Alternative Yeast Nuclear", ), 13: GeneticCode( "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG", "---M------------------------------MM---------------M------------", "Ascidian Mitochondrial", ), 14: GeneticCode( "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "-----------------------------------M----------------------------", "Alternative Flatworm Mitochondrial", ), 16: GeneticCode( "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "-----------------------------------M----------------------------", "Chlorophycean Mitochondrial", ), 21: GeneticCode( "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG", "-----------------------------------M---------------M------------", "Trematode Mitochondrial", ), 22: GeneticCode( "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "-----------------------------------M----------------------------", "Scenedesmus obliquus Mitochondrial", ), 23: GeneticCode( "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "--------------------------------M--M---------------M------------", "Thraustochytrium Mitochondrial", ), 24: GeneticCode( "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG", "---M---------------M---------------M---------------M------------", "Pterobranchia Mitochondrial", ), 25: GeneticCode( "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", "---M-------------------------------M---------------M------------", "Candidate Division SR1 and Gracilibacteria", ), } scikit-bio-0.6.2/skbio/sequence/_grammared_sequence.py000066400000000000000000000660461464262511300230530ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from warnings import warn from abc import ABCMeta, abstractproperty from itertools import product import re import numpy as np from skbio.util._decorator import classproperty, overrides from skbio.util._misc import MiniRegistry from ._sequence import Sequence class GrammaredSequenceMeta(ABCMeta, type): def __new__(mcs, name, bases, dct): cls = super(GrammaredSequenceMeta, mcs).__new__(mcs, name, bases, dct) concrete_gap_chars = type(cls.gap_chars) is not abstractproperty concrete_degenerate_map = type(cls.degenerate_map) is not abstractproperty concrete_definite_chars = type(cls.definite_chars) is not abstractproperty concrete_default_gap_char = type(cls.default_gap_char) is not abstractproperty # degenerate_chars is not abstract but it depends on degenerate_map # which is abstract. concrete_degenerate_chars = concrete_degenerate_map # Only perform metaclass checks if none of the attributes on the class # are abstract. # TODO: Rather than hard-coding a list of attributes to check, we can # probably check all the attributes on the class and make sure none of # them are abstract. if ( concrete_gap_chars and concrete_degenerate_map and concrete_definite_chars and concrete_default_gap_char and concrete_degenerate_chars ): if cls.default_gap_char not in cls.gap_chars: raise TypeError( "default_gap_char must be in gap_chars for class %s" % name ) if len(cls.gap_chars & cls.degenerate_chars) > 0: raise TypeError( "gap_chars and degenerate_chars must not share any " "characters for class %s" % name ) for key in cls.degenerate_map.keys(): for definite_char in cls.degenerate_map[key]: if definite_char not in cls.definite_chars: raise TypeError( "degenerate_map must expand only to " "characters included in definite_chars " "for class %s" % name ) if len(cls.degenerate_chars & cls.definite_chars) > 0: raise TypeError( "degenerate_chars and definite_chars must not " "share any characters for class %s" % name ) if len(cls.gap_chars & cls.definite_chars) > 0: raise TypeError( "gap_chars and definite_chars must not share any " "characters for class %s" % name ) return cls class GrammaredSequence(Sequence, metaclass=GrammaredSequenceMeta): """Store sequence data conforming to a character set. This is an abstract base class (ABC) that cannot be instantiated. This class is intended to be inherited from to create grammared sequences with custom alphabets. Raises ------ ValueError If sequence characters are not in the character set [1]_. See Also -------- DNA RNA Protein References ---------- .. [1] Nomenclature for incompletely specified bases in nucleic acid sequences: recommendations 1984. Nucleic Acids Res. May 10, 1985; 13(9): 3021-3030. A Cornish-Bowden Examples -------- Note in the example below that properties either need to be static or use skbio's `classproperty` decorator. >>> from skbio.sequence import GrammaredSequence >>> from skbio.util import classproperty >>> class CustomSequence(GrammaredSequence): ... @classproperty ... def degenerate_map(cls): ... return {"X": set("AB")} ... ... @classproperty ... def definite_chars(cls): ... return set("ABC") ... ... ... @classproperty ... def default_gap_char(cls): ... return '-' ... ... @classproperty ... def gap_chars(cls): ... return set('-.') >>> seq = CustomSequence('ABABACAC') >>> seq CustomSequence -------------------------- Stats: length: 8 has gaps: False has degenerates: False has definites: True -------------------------- 0 ABABACAC >>> seq = CustomSequence('XXXXXX') >>> seq CustomSequence ------------------------- Stats: length: 6 has gaps: False has degenerates: True has definites: False ------------------------- 0 XXXXXX """ __validation_mask = None __degenerate_codes = None __definite_char_codes = None __gap_codes = None __noncanonical_codes = None __degenerate_hash = None __degen_nonca_hash = None __gap_hash = None __definite_hash = None @classproperty def _validation_mask(cls): # TODO These masks could be defined (as literals) on each concrete # object. For now, memoize! if cls.__validation_mask is None: as_bytes = "".join(cls.alphabet).encode("ascii") cls.__validation_mask = np.invert( np.bincount( np.frombuffer(as_bytes, dtype=np.uint8), minlength=cls._num_extended_ascii_codes, ).astype(bool) ) return cls.__validation_mask @classproperty def _degenerate_codes(cls): if cls.__degenerate_codes is None: degens = cls.degenerate_chars cls.__degenerate_codes = np.asarray([ord(d) for d in degens]) return cls.__degenerate_codes @classproperty def _definite_char_codes(cls): if cls.__definite_char_codes is None: definite_chars = cls.definite_chars cls.__definite_char_codes = np.asarray([ord(d) for d in definite_chars]) return cls.__definite_char_codes @classproperty def _gap_codes(cls): if cls.__gap_codes is None: gaps = cls.gap_chars cls.__gap_codes = np.asarray([ord(g) for g in gaps]) return cls.__gap_codes @classproperty def _noncanonical_codes(cls): if cls.__noncanonical_codes is None: noncanonical_chars = cls.noncanonical_chars cls.__noncanonical_codes = np.asarray([ord(c) for c in noncanonical_chars]) return cls.__noncanonical_codes @classproperty def _degenerate_hash(cls): if cls.__degenerate_hash is None: cls.__degenerate_hash = np.zeros((Sequence._num_ascii_codes,), dtype=bool) cls.__degenerate_hash[cls._degenerate_codes] = True return cls.__degenerate_hash @classproperty def _degen_nonca_hash(cls): if cls.__degen_nonca_hash is None: cls.__degen_nonca_hash = cls._degenerate_hash.copy() cls.__degen_nonca_hash[cls._noncanonical_codes] = True return cls.__degen_nonca_hash @classproperty def _gap_hash(cls): if cls.__gap_hash is None: cls.__gap_hash = np.zeros((Sequence._num_ascii_codes,), dtype=bool) cls.__gap_hash[cls._gap_codes] = True return cls.__gap_hash @classproperty def _definite_hash(cls): if cls.__definite_hash is None: cls.__definite_hash = np.zeros((Sequence._num_ascii_codes,), dtype=bool) cls.__definite_hash[cls._definite_char_codes] = True return cls.__definite_hash @classproperty def alphabet(cls): """Return valid characters. This includes gap, definite, and degenerate characters. Returns ------- set Valid characters. """ return cls.degenerate_chars | cls.definite_chars | cls.gap_chars @abstractproperty @classproperty def gap_chars(cls): """Return characters defined as gaps. Returns ------- set Characters defined as gaps. """ raise NotImplementedError @abstractproperty @classproperty def default_gap_char(cls): """Gap character to use when constructing a new gapped sequence. This character is used when it is necessary to represent gap characters in a new sequence. For example, a majority consensus sequence will use this character to represent gaps. Returns ------- str Default gap character. """ raise NotImplementedError @classproperty def degenerate_chars(cls): """Return degenerate characters. Returns ------- set Degenerate characters. """ return set(cls.degenerate_map) @classproperty def nondegenerate_chars(cls): """Return non-degenerate characters. Returns ------- set Non-degenerate characters. Warnings -------- ``nondegenerate_chars`` is deprecated as of ``0.5.0``. It has been renamed to ``definite_chars``. See Also -------- definite_chars """ # noqa: D416 # @deprecated warn("nondegenerate_chars is deprecated as of 0.5.0", DeprecationWarning) return cls.definite_chars @abstractproperty @classproperty def definite_chars(cls): """Return definite characters. Returns ------- set Definite characters. """ raise NotImplementedError @classproperty def noncanonical_chars(cls): """Return non-canonical characters. Returns ------- set Non-canonical characters. """ return set() @abstractproperty @classproperty def degenerate_map(cls): """Return mapping of degenerate to definite characters. Returns ------- dict (set) Mapping of each degenerate character to the set of definite characters it represents. """ raise NotImplementedError @classproperty def wildcard_char(cls): """Return wildcard character. Returns ------- str of length 1 Wildcard character. """ return None @property def _motifs(self): return _motifs @overrides(Sequence) def __init__( self, sequence, metadata=None, positional_metadata=None, interval_metadata=None, lowercase=False, validate=True, ): super(GrammaredSequence, self).__init__( sequence, metadata, positional_metadata, interval_metadata, lowercase ) if validate: self._validate() def _validate(self): # This is the fastest way that we have found to identify the # presence or absence of certain characters (numbers). # It works by multiplying a mask where the numbers which are # permitted have a zero at their index, and all others have a one. # The result is a vector which will propogate counts of invalid # numbers and remove counts of valid numbers, so that we need only # see if the array is empty to determine validity. invalid_characters = ( np.bincount(self._bytes, minlength=self._num_extended_ascii_codes) * self._validation_mask ) if np.any(invalid_characters): bad = list(np.where(invalid_characters > 0)[0].astype(np.uint8).view("|S1")) raise ValueError( "Invalid character%s in sequence: %r. \n" "Valid characters: %r\n" "Note: Use `lowercase` if your sequence contains lowercase " "characters not in the sequence's alphabet." % ( "s" if len(bad) > 1 else "", [str(b.tobytes().decode("ascii")) for b in bad] if len(bad) > 1 else bad[0], list(self.alphabet), ) ) def gaps(self): """Find positions containing gaps in the biological sequence. Returns ------- 1D np.ndarray (bool) Boolean vector where ``True`` indicates a gap character is present at that position in the biological sequence. See Also -------- has_gaps Examples -------- >>> from skbio import DNA >>> s = DNA('AC-G-') >>> s.gaps() array([False, False, True, False, True], dtype=bool) """ return self._gap_hash[self._bytes] def has_gaps(self): """Determine if the sequence contains one or more gap characters. Returns ------- bool Indicates whether there are one or more occurrences of gap characters in the biological sequence. Examples -------- >>> from skbio import DNA >>> s = DNA('ACACGACGTT') >>> s.has_gaps() False >>> t = DNA('A.CAC--GACGTT') >>> t.has_gaps() True """ # TODO use count, there aren't that many gap chars # TODO: cache results return bool(self.gaps().any()) def degenerates(self): """Find positions containing degenerate characters in the sequence. Returns ------- 1D np.ndarray (bool) Boolean vector where ``True`` indicates a degenerate character is present at that position in the biological sequence. See Also -------- has_degenerates definites has_definites Examples -------- >>> from skbio import DNA >>> s = DNA('ACWGN') >>> s.degenerates() array([False, False, True, False, True], dtype=bool) """ return self._degenerate_hash[self._bytes] def has_degenerates(self): """Determine if sequence contains one or more degenerate characters. Returns ------- bool Indicates whether there are one or more occurrences of degenerate characters in the biological sequence. See Also -------- degenerates definites has_definites Examples -------- >>> from skbio import DNA >>> s = DNA('ACAC-GACGTT') >>> s.has_degenerates() False >>> t = DNA('ANCACWWGACGTT') >>> t.has_degenerates() True """ # TODO use bincount! # TODO: cache results return bool(self.degenerates().any()) def definites(self): """Find positions containing definite characters in the sequence. Returns ------- 1D np.ndarray (bool) Boolean vector where ``True`` indicates a definite character is present at that position in the biological sequence. See Also -------- has_definites degenerates Examples -------- >>> from skbio import DNA >>> s = DNA('ACWGN') >>> s.definites() array([ True, True, False, True, False], dtype=bool) """ return self._definite_hash[self._bytes] def nondegenerates(self): """Find positions containing non-degenerate characters in the sequence. Returns ------- 1D np.ndarray (bool) Boolean vector where ``True`` indicates a non-degenerate character is present at that position in the biological sequence. Warnings -------- ``nondegenerates`` is deprecated as of ``0.5.0``. It has been renamed to ``definites``. See Also -------- definites has_definites degenerates Examples -------- >>> from skbio import DNA >>> s = DNA('ACWGN') >>> s.nondegenerates() array([ True, True, False, True, False], dtype=bool) """ # noqa: D416 # @deprecated warn("nondenengerates is deprecated as of 0.5.0.", DeprecationWarning) return self.definites() def has_definites(self): """Determine if sequence contains one or more definite characters. Returns ------- bool Indicates whether there are one or more occurrences of definite characters in the biological sequence. See Also -------- definites degenerates has_degenerates Examples -------- >>> from skbio import DNA >>> s = DNA('NWNNNNNN') >>> s.has_definites() False >>> t = DNA('ANCACWWGACGTT') >>> t.has_definites() True """ # TODO: cache results return bool(self.definites().any()) def has_nondegenerates(self): """Determine if sequence contains one or more non-degenerate characters. Returns ------- bool Indicates whether there are one or more occurrences of non-degenerate characters in the biological sequence. Warnings -------- ``has_nondegenerates`` is deprecated as of ``0.5.0``. It has been renamed to ``has_definites``. See Also -------- definites has_definites degenerates has_degenerates Examples -------- >>> from skbio import DNA >>> s = DNA('NWNNNNNN') >>> s.has_nondegenerates() False >>> t = DNA('ANCACWWGACGTT') >>> t.has_nondegenerates() True """ # noqa: D416 # TODO: cache results # @deprecated warn("has_nondegenerates is deprecated as of 0.5.0", DeprecationWarning) return self.has_definites() def degap(self): """Return a new sequence with gap characters removed. Returns ------- GrammaredSequence A new sequence with all gap characters removed. See Also -------- gap_chars Notes ----- The type and metadata of the result will be the same as the biological sequence. If positional metadata is present, it will be filtered in the same manner as the sequence characters and included in the resulting degapped sequence. Examples -------- >>> from skbio import DNA >>> s = DNA('GGTC-C--ATT-C.', ... positional_metadata={'quality':range(14)}) >>> s.degap() DNA ----------------------------- Positional metadata: 'quality': Stats: length: 9 has gaps: False has degenerates: False has definites: True GC-content: 55.56% ----------------------------- 0 GGTCCATTC """ return self[np.invert(self.gaps())] def expand_degenerates(self): """Yield all possible definite versions of the sequence. Yields ------ GrammaredSequence Definite version of the sequence. See Also -------- degenerate_map Notes ----- There is no guaranteed ordering to the definite sequences that are yielded. Each definite sequence will have the same type, metadata, and positional metadata as the biological sequence. Examples -------- >>> from skbio import DNA >>> seq = DNA('TRG') >>> seq_generator = seq.expand_degenerates() >>> for s in sorted(seq_generator, key=str): ... s ... print('') DNA -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True GC-content: 33.33% -------------------------- 0 TAG DNA -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True GC-content: 66.67% -------------------------- 0 TGG """ degen_chars = self.degenerate_map nonexpansion_chars = self.definite_chars.union(self.gap_chars) expansions = [] for char in self: char = str(char) if char in nonexpansion_chars: expansions.append(char) else: expansions.append(degen_chars[char]) metadata = None if self.has_metadata(): metadata = self.metadata positional_metadata = None if self.has_positional_metadata(): positional_metadata = self.positional_metadata for definite_seq in product(*expansions): yield self._constructor( sequence="".join(definite_seq), metadata=metadata, positional_metadata=positional_metadata, interval_metadata=self.interval_metadata, ) def to_regex(self, within_capture=False): """Return regular expression object that accounts for degenerate chars. Parameters ---------- within_capture : bool If ``True``, format the regex pattern for the sequence into a single capture group. If ``False``, compile the regex pattern as-is with no capture groups. Returns ------- regex Pre-compiled regular expression object (as from ``re.compile``) that matches all definite versions of this sequence, and nothing else. Examples -------- >>> from skbio import DNA >>> seq = DNA('TRG') >>> regex = seq.to_regex() >>> regex.match('TAG').string 'TAG' >>> regex.match('TGG').string 'TGG' >>> regex.match('TCG') is None True >>> regex = seq.to_regex(within_capture=True) >>> regex.match('TAG').groups(0) ('TAG',) """ regex_parts = [] for base in str(self): if base in self.degenerate_chars: regex_parts.append("[{0}]".format("".join(self.degenerate_map[base]))) else: regex_parts.append(base) regex_string = "".join(regex_parts) if within_capture: regex_string = "({})".format(regex_string) return re.compile(regex_string) def to_definites(self, degenerate="wild", noncanonical=True): """Convert degenerate and noncanonical characters to alternative characters. Parameters ---------- degenerate : {"wild", "gap", "del", str of length 1}, optional How degenerate/non-canonical characters should be treated: Replace them with the wildcard character ("wild", default), or the default gap character ("gap"), or a user-defined character (str of length 1), or remove them ("del"). noncanonical : bool, optional Treat non-canonical characters in the same way as degenerate characters (``True``, default), or leave them as-is (``False``). Returns ------- GrammaredSequence Converted version of the sequence. """ errmsg = ( f'%s character for sequence type "{self.__class__}" is undefined or ' "invalid." ) if noncanonical: pos = self._degen_nonca_hash[self._bytes] else: pos = self._degenerate_hash[self._bytes] if degenerate == "del": seq = self._bytes[np.where(1 - pos)[0]] else: if degenerate == "wild": sub_char = self.wildcard_char if not isinstance(sub_char, str): raise ValueError(errmsg % "Wildcard") elif degenerate == "gap": sub_char = self.default_gap_char elif isinstance(degenerate, str) and len(degenerate) == 1: if degenerate in self.alphabet: sub_char = degenerate else: raise ValueError( f"Invalid character '{degenerate}' in sequence. Character must " f"be within sequence alphabet: {self.alphabet}" ) else: raise ValueError('Invalid value for parameter "degenerate".') seq = np.where(pos, ord(sub_char), self._bytes) return self._constructor(sequence=seq) def find_motifs(self, motif_type, min_length=1, ignore=None): """Search the biological sequence for motifs. Options for `motif_type`: Parameters ---------- motif_type : str Type of motif to find. min_length : int, optional Only motifs at least as long as `min_length` will be returned. ignore : 1D array_like (bool), optional Boolean vector indicating positions to ignore when matching. Yields ------ slice Location of the motif in the biological sequence. Raises ------ ValueError If an unknown `motif_type` is specified. Examples -------- >>> from skbio import DNA >>> s = DNA('ACGGGGAGGCGGAG') >>> for motif_slice in s.find_motifs('purine-run', min_length=2): ... motif_slice ... str(s[motif_slice]) slice(2, 9, None) 'GGGGAGG' slice(10, 14, None) 'GGAG' Gap characters can disrupt motifs: >>> s = DNA('GG-GG') >>> for motif_slice in s.find_motifs('purine-run'): ... motif_slice slice(0, 2, None) slice(3, 5, None) Gaps can be ignored by passing the gap boolean vector to `ignore`: >>> s = DNA('GG-GG') >>> for motif_slice in s.find_motifs('purine-run', ignore=s.gaps()): ... motif_slice slice(0, 5, None) """ if motif_type not in self._motifs: raise ValueError( "Not a known motif (%r) for this sequence (%s)." % (motif_type, self.__class__.__name__) ) return self._motifs[motif_type](self, min_length, ignore) @overrides(Sequence) def _constructor(self, **kwargs): return self.__class__(validate=False, lowercase=False, **kwargs) @overrides(Sequence) def _repr_stats(self): """Define custom statistics to display in the sequence's repr.""" stats = super(GrammaredSequence, self)._repr_stats() stats.append(("has gaps", "%r" % self.has_gaps())) stats.append(("has degenerates", "%r" % self.has_degenerates())) stats.append(("has definites", "%r" % self.has_definites())) return stats _motifs = MiniRegistry() # Leave this at the bottom _motifs.interpolate(GrammaredSequence, "find_motifs") scikit-bio-0.6.2/skbio/sequence/_nucleotide_mixin.py000066400000000000000000000253551464262511300225610ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from abc import ABCMeta, abstractproperty import numpy as np from skbio.util._decorator import classproperty from ._grammared_sequence import _motifs as parent_motifs class NucleotideMixin(metaclass=ABCMeta): """Mixin for adding funtionality for working with sequences of nucleotides. This is an abstract base class (ABC) that cannot be instantiated. See Also -------- DNA RNA """ __complement_lookup = None __gc_codes = None @classproperty def _complement_lookup(cls): if cls.__complement_lookup is not None: return cls.__complement_lookup lookup = np.zeros(cls._num_extended_ascii_codes, dtype=np.uint8) for key, value in cls.complement_map.items(): lookup[ord(key)] = ord(value) cls.__complement_lookup = lookup return lookup @classproperty def _gc_codes(cls): if cls.__gc_codes is None: gc_iupac_chars = "GCS" cls.__gc_codes = np.asarray([ord(g) for g in gc_iupac_chars]) return cls.__gc_codes @property def _motifs(self): return _motifs @abstractproperty @classproperty def complement_map(cls): """Return mapping of nucleotide characters to their complements. Returns ------- dict Mapping of each character to its complement. Notes ----- Complements cannot be defined for a generic nucleotide sequence because the complement of ``A`` is ambiguous. Thanks, nature... """ raise NotImplementedError def complement(self, reverse=False): """Return the complement of the nucleotide sequence. Parameters ---------- reverse : bool, optional If ``True``, return the reverse complement. If positional and/or interval metadata are present, they will be reversed. Returns ------- NucleotideMixin The (reverse) complement of the nucleotide sequence. The type and metadata of the result will be the same as the nucleotide sequence. If `reverse` is ``True``, positional or interval metadata will be reversed if it is present. See Also -------- reverse_complement complement_map Examples -------- >>> from skbio import DNA >>> seq = DNA('TTCATT', positional_metadata={'quality':range(6)}) >>> seq DNA ----------------------------- Positional metadata: 'quality': Stats: length: 6 has gaps: False has degenerates: False has definites: True GC-content: 16.67% ----------------------------- 0 TTCATT >>> seq.complement() DNA ----------------------------- Positional metadata: 'quality': Stats: length: 6 has gaps: False has degenerates: False has definites: True GC-content: 16.67% ----------------------------- 0 AAGTAA >>> rc = seq.complement(reverse=True) >>> rc DNA ----------------------------- Positional metadata: 'quality': Stats: length: 6 has gaps: False has degenerates: False has definites: True GC-content: 16.67% ----------------------------- 0 AATGAA >>> rc.positional_metadata['quality'].values # doctest: +ELLIPSIS array([5, 4, 3, 2, 1, 0]... """ result = self._complement_lookup[self._bytes] metadata = None if self.has_metadata(): metadata = self.metadata positional_metadata = None if self.has_positional_metadata(): positional_metadata = self.positional_metadata complement = self._constructor( sequence=result, metadata=metadata, positional_metadata=positional_metadata ) if reverse: # this has to be before the interval metadata code, # because __gititem__ drops interval_metadata. complement = complement[::-1] if self.has_interval_metadata(): complement.interval_metadata = self.interval_metadata if reverse: # TODO: this can be revised to match # positional_metadata when __getitem__ # supports interval_metadata complement.interval_metadata._reverse() return complement def reverse_complement(self): """Return the reverse complement of the nucleotide sequence. Returns ------- NucleotideMixin The reverse complement of the nucleotide sequence. The type and metadata of the result will be the same as the nucleotide sequence. If positional metadata is present, it will be reversed. See Also -------- complement is_reverse_complement Notes ----- This method is equivalent to ``self.complement(reverse=True)``. Examples -------- >>> from skbio import DNA >>> seq = DNA('TTCATT', ... positional_metadata={'quality':range(6)}) >>> seq = seq.reverse_complement() >>> seq DNA ----------------------------- Positional metadata: 'quality': Stats: length: 6 has gaps: False has degenerates: False has definites: True GC-content: 16.67% ----------------------------- 0 AATGAA >>> seq.positional_metadata['quality'].values # doctest: +ELLIPSIS array([5, 4, 3, 2, 1, 0]... """ return self.complement(reverse=True) def is_reverse_complement(self, other): r"""Determine if a sequence is the reverse complement of this sequence. Parameters ---------- other : str, Sequence, or 1D np.ndarray (np.uint8 or '\|S1') Sequence to compare to. Returns ------- bool ``True`` if `other` is the reverse complement of the nucleotide sequence. Raises ------ TypeError If `other` is a ``Sequence`` object with a different type than the nucleotide sequence. See Also -------- reverse_complement Examples -------- >>> from skbio import DNA >>> DNA('TTCATT').is_reverse_complement('AATGAA') True >>> DNA('TTCATT').is_reverse_complement('AATGTT') False >>> DNA('ACGT').is_reverse_complement('ACGT') True """ other = self._munge_to_sequence(other, "is_reverse_complement") # avoid computing the reverse complement if possible if len(self) != len(other): return False else: # we reverse complement ourselves because `other` is a `Sequence` # object at this point and we only care about comparing the # underlying sequence data return self.reverse_complement()._string == other._string def gc_content(self): """Calculate the relative frequency of G's and C's in the sequence. This includes G, C, and S characters. This is equivalent to calling ``gc_frequency(relative=True)``. Note that the sequence will be degapped before the operation, so gap characters will not be included when calculating the length of the sequence. Returns ------- float Relative frequency of G's and C's in the sequence. See Also -------- gc_frequency Examples -------- >>> from skbio import DNA >>> DNA('ACGT').gc_content() 0.5 >>> DNA('ACGTACGT').gc_content() 0.5 >>> DNA('ACTTAGTT').gc_content() 0.25 >>> DNA('ACGT--..').gc_content() 0.5 >>> DNA('--..').gc_content() 0 `S` means `G` or `C`, so it counts: >>> DNA('ASST').gc_content() 0.5 Other degenerates don't count: >>> DNA('RYKMBDHVN').gc_content() 0.0 """ return self.gc_frequency(relative=True) def gc_frequency(self, relative=False): """Calculate frequency of G's and C's in the sequence. This calculates the minimum GC frequency, which corresponds to IUPAC characters G, C, and S (which stands for G or C). Parameters ---------- relative : bool, optional If False return the frequency of G, C, and S characters (ie the count). If True return the relative frequency, ie the proportion of G, C, and S characters in the sequence. In this case the sequence will also be degapped before the operation, so gap characters will not be included when calculating the length of the sequence. Returns ------- int or float Either frequency (count) or relative frequency (proportion), depending on `relative`. See Also -------- gc_content Examples -------- >>> from skbio import DNA >>> DNA('ACGT').gc_frequency() 2 >>> DNA('ACGT').gc_frequency(relative=True) 0.5 >>> DNA('ACGT--..').gc_frequency(relative=True) 0.5 >>> DNA('--..').gc_frequency(relative=True) 0 `S` means `G` or `C`, so it counts: >>> DNA('ASST').gc_frequency() 2 Other degenerates don't count: >>> DNA('RYKMBDHVN').gc_frequency() 0 """ counts = np.bincount(self._bytes, minlength=self._num_extended_ascii_codes) gc = counts[self._gc_codes].sum() if relative: seq = self.degap() if len(seq) != 0: gc /= len(seq) return gc _motifs = parent_motifs.copy() @_motifs("purine-run") def _motif_purine_run(sequence, min_length, ignore): """Identify purine runs.""" return sequence.find_with_regex("([AGR]{%d,})" % min_length, ignore=ignore) @_motifs("pyrimidine-run") def _motif_pyrimidine_run(sequence, min_length, ignore): """Identify pyrimidine runs.""" return sequence.find_with_regex("([CTUY]{%d,})" % min_length, ignore=ignore) scikit-bio-0.6.2/skbio/sequence/_protein.py000066400000000000000000000226741464262511300207030ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np from skbio.util._decorator import classproperty, overrides from ._grammared_sequence import GrammaredSequence, _motifs as parent_motifs class Protein(GrammaredSequence): r"""Store protein sequence data and optional associated metadata. Parameters ---------- sequence : str, Sequence, or 1D np.ndarray (np.uint8 or '\|S1') Characters representing the protein sequence itself. metadata : dict, optional Arbitrary metadata which applies to the entire sequence. positional_metadata : Pandas DataFrame consumable, optional Arbitrary per-character metadata. For example, quality data from sequencing reads. Must be able to be passed directly to the Pandas DataFrame constructor. interval_metadata : IntervalMetadata Arbitrary interval metadata which applies to intervals within a sequence to store interval features (such as protein domains). lowercase : bool or str, optional If ``True``, lowercase sequence characters will be converted to uppercase characters in order to be valid IUPAC Protein characters. If ``False``, no characters will be converted. If a str, it will be treated as a key into the positional metadata of the object. All lowercase characters will be converted to uppercase, and a ``True`` value will be stored in a boolean array in the positional metadata under the key. validate : bool, optional If ``True``, validation will be performed to ensure that all sequence characters are in the IUPAC protein character set. If ``False``, validation will not be performed. Turning off validation will improve runtime performance. If invalid characters are present, however, there is **no guarantee that operations performed on the resulting object will work or behave as expected.** Only turn off validation if you are certain that the sequence characters are valid. To store sequence data that is not IUPAC-compliant, use ``Sequence``. See Also -------- GrammaredSequence Notes ----- According to the IUPAC notation [1]_ , a protein sequence may contain the following 20 definite characters (canonical amino acids): +-----+---------+--------------+ |Code |3-letter |Amino acid | +=====+=========+==============+ |``A``|Ala |Alanine | +-----+---------+--------------+ |``C``|Cys |Cysteine | +-----+---------+--------------+ |``D``|Asp |Aspartic acid | +-----+---------+--------------+ |``E``|Glu |Glutamic acid | +-----+---------+--------------+ |``F``|Phe |Phenylalanine | +-----+---------+--------------+ |``G``|Gly |Glycine | +-----+---------+--------------+ |``H``|His |Histidine | +-----+---------+--------------+ |``I``|Ile |Isoleucine | +-----+---------+--------------+ |``K``|Lys |Lysine | +-----+---------+--------------+ |``L``|Leu |Leucine | +-----+---------+--------------+ |``M``|Met |Methionine | +-----+---------+--------------+ |``N``|Asn |Asparagine | +-----+---------+--------------+ |``P``|Pro |Proline | +-----+---------+--------------+ |``Q``|Gln |Glutamine | +-----+---------+--------------+ |``R``|Arg |Arginine | +-----+---------+--------------+ |``S``|Ser |Serine | +-----+---------+--------------+ |``T``|Thr |Threonine | +-----+---------+--------------+ |``V``|Val |Valine | +-----+---------+--------------+ |``W``|Trp |Tryptophan | +-----+---------+--------------+ |``Y``|Tyr |Tyrosine | +-----+---------+--------------+ And the following four degenerate characters, each of which representing two or more amino acids: +-----+---------+------------+ |Code |3-letter |Amino acids | +=====+=========+============+ |``B``|Asx |D or N | +-----+---------+------------+ |``Z``|Glx |E or Q | +-----+---------+------------+ |``J``|Xle |I or L | +-----+---------+------------+ |``X``|Xaa |All 20 | +-----+---------+------------+ Plus one stop character: ``*`` (Ter), and two gap characters: ``-`` and ``.``. Characters other than the above 27 are not allowed. If you intend to use additional characters to represent non-canonical amino acids, such as ``U`` (Sec, Selenocysteine) and ``O`` (Pyl, Pyrrolysine), you may create a custom alphabet using ``GrammaredSequence``. Directly modifying the alphabet of ``Protein`` may break functions that rely on the IUPAC alphabet. It should be noted that some functions do not support certain characters. For example, the BLOSUM and PAM substitution matrices do not support ``J`` (Xle). In such circumstances, unsupported characters will be replaced with ``X`` to represent any of the canonical amino acids. References ---------- .. [1] Nomenclature for incompletely specified bases in nucleic acid sequences: recommendations 1984. Nucleic Acids Res. May 10, 1985; 13(9): 3021-3030. A Cornish-Bowden Examples -------- >>> from skbio import Protein >>> Protein('PAW') Protein -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True has stops: False -------------------------- 0 PAW Convert lowercase characters to uppercase: >>> Protein('paW', lowercase=True) Protein -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True has stops: False -------------------------- 0 PAW """ __stop_codes = None @classproperty def _stop_codes(cls): if cls.__stop_codes is None: stops = cls.stop_chars cls.__stop_codes = np.asarray([ord(s) for s in stops]) return cls.__stop_codes @classproperty @overrides(GrammaredSequence) def alphabet(cls): return super(Protein, cls).alphabet | cls.stop_chars @classproperty @overrides(GrammaredSequence) def definite_chars(cls): return set("ACDEFGHIKLMNOPQRSTUVWY") @classproperty @overrides(GrammaredSequence) def noncanonical_chars(cls): return set("OU") @classproperty @overrides(GrammaredSequence) def degenerate_map(cls): return { "B": set("DN"), "Z": set("EQ"), "J": set("IL"), "X": set("ACDEFGHIKLMNOPQRSTUVWY"), } @classproperty def stop_chars(cls): """Return characters representing translation stop codons. Returns ------- set Characters representing translation stop codons. """ return set("*") @classproperty @overrides(GrammaredSequence) def gap_chars(cls): return set("-.") @classproperty @overrides(GrammaredSequence) def default_gap_char(cls): return "-" @classproperty @overrides(GrammaredSequence) def wildcard_char(cls): return "X" @property def _motifs(self): return _motifs def stops(self): """Find positions containing stop characters in the protein sequence. Returns ------- 1D np.ndarray (bool) Boolean vector where ``True`` indicates a stop character is present at that position in the protein sequence. See Also -------- has_stops Examples -------- >>> from skbio import Protein >>> s = Protein('PAW') >>> s.stops() array([False, False, False], dtype=bool) >>> s = Protein('PAW*E*') >>> s.stops() array([False, False, False, True, False, True], dtype=bool) """ return np.in1d(self._bytes, self._stop_codes) def has_stops(self): """Determine if the sequence contains one or more stop characters. Returns ------- bool Indicates whether there are one or more occurrences of stop characters in the protein sequence. Examples -------- >>> from skbio import Protein >>> s = Protein('PAW') >>> s.has_stops() False >>> s = Protein('PAW*E*') >>> s.has_stops() True """ return bool(self.stops().any()) @overrides(GrammaredSequence) def _repr_stats(self): """Define custom statistics to display in the sequence's repr.""" stats = super(Protein, self)._repr_stats() stats.append(("has stops", "%r" % self.has_stops())) return stats _motifs = parent_motifs.copy() @_motifs("N-glycosylation") def _motif_nitro_glycosylation(sequence, min_length, ignore): """Identify N-glycosylation runs.""" return sequence.find_with_regex("(N[^PX][ST][^PX])", ignore=ignore) # Leave this at the bottom _motifs.interpolate(Protein, "find_motifs") scikit-bio-0.6.2/skbio/sequence/_repr.py000066400000000000000000000102421464262511300201570ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import math from skbio.util._misc import chunk_str from skbio.metadata._repr import _MetadataReprBuilder class _SequenceReprBuilder(_MetadataReprBuilder): """Build a ``Sequence`` repr. Parameters ---------- seq : Sequence Sequence to repr. width : int Maximum width of the repr. indent : int Number of spaces to use for indented lines. chunk_size: int Number of characters in each chunk of a sequence. """ def __init__(self, seq, width, indent, chunk_size): super(_SequenceReprBuilder, self).__init__(seq, width, indent) self._chunk_size = chunk_size def _process_header(self): cls_name = self._obj.__class__.__name__ self._lines.add_line(cls_name) self._lines.add_separator() def _process_data(self): num_lines, num_chars, column_width = self._find_optimal_seq_chunking() # display entire sequence if we can, else display the first two and # last two lines separated by ellipsis if num_lines <= 5: self._lines.add_lines( self._format_chunked_seq(range(num_lines), num_chars, column_width) ) else: self._lines.add_lines( self._format_chunked_seq(range(2), num_chars, column_width) ) self._lines.add_line("...") self._lines.add_lines( self._format_chunked_seq( range(num_lines - 2, num_lines), num_chars, column_width ) ) def _find_optimal_seq_chunking(self): """Find the optimal number of sequence chunks to fit on a single line. Returns the number of lines the sequence will occupy, the number of sequence characters displayed on each line, and the column width necessary to display position info using the optimal number of sequence chunks. """ # strategy: use an iterative approach to find the optimal number of # sequence chunks per line. start with a single chunk and increase # until the max line width is exceeded. when this happens, the previous # number of chunks is optimal num_lines = 0 num_chars = 0 column_width = 0 num_chunks = 1 not_exceeded = True while not_exceeded: line_len, new_chunk_info = self._compute_chunked_seq_line_len(num_chunks) not_exceeded = line_len <= self._width if not_exceeded: num_lines, num_chars, column_width = new_chunk_info num_chunks += 1 return num_lines, num_chars, column_width def _compute_chunked_seq_line_len(self, num_chunks): """Compute line length based on a number of chunks.""" num_chars = num_chunks * self._chunk_size # ceil to account for partial line num_lines = int(math.ceil(len(self._obj) / num_chars)) # position column width is fixed width, based on the number of # characters necessary to display the position of the final line (all # previous positions will be left justified using this width) column_width = len("%d " % ((num_lines - 1) * num_chars)) # column width + number of sequence characters + spaces between chunks line_len = column_width + num_chars + (num_chunks - 1) return line_len, (num_lines, num_chars, column_width) def _format_chunked_seq(self, line_idxs, num_chars, column_width): """Format specified lines of chunked sequence data.""" lines = [] for line_idx in line_idxs: seq_idx = line_idx * num_chars chars = str(self._obj[seq_idx : seq_idx + num_chars]) chunked_chars = chunk_str(chars, self._chunk_size, " ") lines.append(("%d" % seq_idx).ljust(column_width) + chunked_chars) return lines scikit-bio-0.6.2/skbio/sequence/_rna.py000066400000000000000000000336251464262511300200010ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import skbio from skbio.util._decorator import classproperty, overrides from ._nucleotide_mixin import NucleotideMixin, _motifs as _parent_motifs from ._grammared_sequence import GrammaredSequence class RNA(GrammaredSequence, NucleotideMixin): r"""Store RNA sequence data and optional associated metadata. Parameters ---------- sequence : str, Sequence, or 1D np.ndarray (np.uint8 or '\|S1') Characters representing the RNA sequence itself. metadata : dict, optional Arbitrary metadata which applies to the entire sequence. positional_metadata : Pandas DataFrame consumable, optional Arbitrary per-character metadata. For example, quality data from sequencing reads. Must be able to be passed directly to the Pandas DataFrame constructor. interval_metadata : IntervalMetadata Arbitrary metadata which applies to intervals within a sequence to store interval features (such as exons or introns on the sequence). lowercase : bool or str, optional If ``True``, lowercase sequence characters will be converted to uppercase characters in order to be valid IUPAC RNA characters. If ``False``, no characters will be converted. If a str, it will be treated as a key into the positional metadata of the object. All lowercase characters will be converted to uppercase, and a ``True`` value will be stored in a boolean array in the positional metadata under the key. validate : bool, optional If ``True``, validation will be performed to ensure that all sequence characters are in the IUPAC RNA character set. If ``False``, validation will not be performed. Turning off validation will improve runtime performance. If invalid characters are present, however, there is **no guarantee that operations performed on the resulting object will work or behave as expected.** Only turn off validation if you are certain that the sequence characters are valid. To store sequence data that is not IUPAC-compliant, use ``Sequence``. See Also -------- DNA GrammaredSequence Notes ----- According to the IUPAC RNA character set [1]_ , an RNA sequence may contain the following four definite characters (canonical nucleotides): +-----+-----------+ |Code |Nucleobase | +=====+===========+ |``A``|Adenine | +-----+-----------+ |``C``|Cytosine | +-----+-----------+ |``G``|Guanine | +-----+-----------+ |``U``|Uracil | +-----+-----------+ Plus 11 degenerate characters: ``R``, ``Y``, ``S``, ``W``, ``K``, ``M``, ``B``, ``D``, ``H``, ``V`` and ``N``, and two gap characters: ``-`` and ``.``. The definitions of degenerate characters are provided in ``DNA``, in which ``T`` should be replaced with ``U`` for RNA sequences. Characters other than the above 17 are not allowed. To include additional characters, you may create a custom alphabet using ``GrammaredSequence``. Directly modifying the alphabet of ``RNA`` may break methods that rely on the IUPAC alphabet. It should be noted that some functions do not support degenerate characters characters. In such cases, they will be replaced with `N` to represent any of the canonical nucleotides. References ---------- .. [1] Nomenclature for incompletely specified bases in nucleic acid sequences: recommendations 1984. Nucleic Acids Res. May 10, 1985; 13(9): 3021-3030. A Cornish-Bowden Examples -------- >>> from skbio import RNA >>> RNA('ACCGAAU') RNA -------------------------- Stats: length: 7 has gaps: False has degenerates: False has definites: True GC-content: 42.86% -------------------------- 0 ACCGAAU Convert lowercase characters to uppercase: >>> RNA('AcCGaaU', lowercase=True) RNA -------------------------- Stats: length: 7 has gaps: False has degenerates: False has definites: True GC-content: 42.86% -------------------------- 0 ACCGAAU """ @classproperty @overrides(NucleotideMixin) def complement_map(cls): comp_map = { "A": "U", "U": "A", "G": "C", "C": "G", "Y": "R", "R": "Y", "S": "S", "W": "W", "K": "M", "M": "K", "B": "V", "D": "H", "H": "D", "V": "B", "N": "N", } comp_map.update({c: c for c in cls.gap_chars}) return comp_map @classproperty @overrides(GrammaredSequence) def definite_chars(cls): return set("ACGU") @classproperty @overrides(GrammaredSequence) def degenerate_map(cls): return { "R": set("AG"), "Y": set("CU"), "M": set("AC"), "K": set("UG"), "W": set("AU"), "S": set("GC"), "B": set("CGU"), "D": set("AGU"), "H": set("ACU"), "V": set("ACG"), "N": set("ACGU"), } @classproperty @overrides(GrammaredSequence) def default_gap_char(cls): return "-" @classproperty @overrides(GrammaredSequence) def gap_chars(cls): return set("-.") @classproperty @overrides(GrammaredSequence) def wildcard_char(cls): return "N" @property def _motifs(self): return _motifs def reverse_transcribe(self): """Reverse transcribe RNA into DNA. It returns the coding DNA strand of the RNA sequence, i.e. uracil (U) is replaced with thymine (T) in the reverse transcribed sequence. Returns ------- DNA Reverse transcribed sequence. See Also -------- DNA.transcribe translate translate_six_frames Notes ----- RNA sequence's metadata and positional metadata are included in the transcribed DNA sequence. Examples -------- Reverse transcribe RNA into DNA: >>> from skbio import RNA >>> rna = RNA('UAACGUUA') >>> rna RNA -------------------------- Stats: length: 8 has gaps: False has degenerates: False has definites: True GC-content: 25.00% -------------------------- 0 UAACGUUA >>> rna.reverse_transcribe() DNA -------------------------- Stats: length: 8 has gaps: False has degenerates: False has definites: True GC-content: 25.00% -------------------------- 0 TAACGTTA """ seq = self._string.replace(b"U", b"T") metadata = None if self.has_metadata(): metadata = self.metadata positional_metadata = None if self.has_positional_metadata(): positional_metadata = self.positional_metadata interval_metadata = None if self.has_interval_metadata(): interval_metadata = self.interval_metadata # turn off validation because `seq` is guaranteed to be valid return skbio.DNA( seq, metadata=metadata, positional_metadata=positional_metadata, interval_metadata=interval_metadata, validate=False, ) def translate(self, genetic_code=1, *args, **kwargs): """Translate RNA sequence into protein sequence. Parameters ---------- genetic_code : int, GeneticCode, optional Genetic code to use in translation. If ``int``, used as a table ID to look up the corresponding NCBI genetic code. args : tuple Positional arguments accepted by ``GeneticCode.translate``. kwargs : dict Keyword arguments accepted by ``GeneticCode.translate``. Returns ------- Protein Translated sequence. See Also -------- GeneticCode.translate GeneticCode.from_ncbi translate_six_frames Notes ----- RNA sequence's metadata are included in the translated protein sequence. Positional metadata are not included. Examples -------- Translate RNA into protein using NCBI's standard genetic code (table ID 1, the default genetic code in scikit-bio): >>> from skbio import RNA >>> rna = RNA('AUGCCACUUUAA') >>> rna.translate() Protein -------------------------- Stats: length: 4 has gaps: False has degenerates: False has definites: True has stops: True -------------------------- 0 MPL* Translate the same RNA sequence using a different NCBI genetic code (table ID 3, the yeast mitochondrial code) and specify that translation must terminate at the first stop codon: >>> rna.translate(3, stop='require') Protein -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True has stops: False -------------------------- 0 MPT """ if not isinstance(genetic_code, skbio.GeneticCode): genetic_code = skbio.GeneticCode.from_ncbi(genetic_code) return genetic_code.translate(self, *args, **kwargs) def translate_six_frames(self, genetic_code=1, *args, **kwargs): """Translate RNA into protein using six possible reading frames. The six possible reading frames are: * 1 (forward) * 2 (forward) * 3 (forward) * -1 (reverse) * -2 (reverse) * -3 (reverse) Translated sequences are yielded in this order. Parameters ---------- genetic_code : int, GeneticCode, optional Genetic code to use in translation. If ``int``, used as a table ID to look up the corresponding NCBI genetic code. args : tuple Positional arguments accepted by ``GeneticCode.translate_six_frames``. kwargs : dict Keyword arguments accepted by ``GeneticCode.translate_six_frames``. Yields ------ Protein Translated sequence in the current reading frame. See Also -------- GeneticCode.translate_six_frames GeneticCode.from_ncbi translate Notes ----- This method is faster than (and equivalent to) performing six independent translations using, for example: ``(seq.translate(reading_frame=rf) for rf in GeneticCode.reading_frames)`` RNA sequence's metadata are included in each translated protein sequence. Positional metadata are not included. Examples -------- Translate RNA into protein using the six possible reading frames and NCBI's standard genetic code (table ID 1, the default genetic code in scikit-bio): >>> from skbio import RNA >>> rna = RNA('AUGCCACUUUAA') >>> for protein in rna.translate_six_frames(): ... protein ... print('') Protein -------------------------- Stats: length: 4 has gaps: False has degenerates: False has definites: True has stops: True -------------------------- 0 MPL* Protein -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True has stops: False -------------------------- 0 CHF Protein -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True has stops: False -------------------------- 0 ATL Protein -------------------------- Stats: length: 4 has gaps: False has degenerates: False has definites: True has stops: False -------------------------- 0 LKWH Protein -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True has stops: True -------------------------- 0 *SG Protein -------------------------- Stats: length: 3 has gaps: False has degenerates: False has definites: True has stops: False -------------------------- 0 KVA """ if not isinstance(genetic_code, skbio.GeneticCode): genetic_code = skbio.GeneticCode.from_ncbi(genetic_code) return genetic_code.translate_six_frames(self, *args, **kwargs) @overrides(GrammaredSequence) def _repr_stats(self): """Define custom statistics to display in the sequence's repr.""" stats = super(RNA, self)._repr_stats() stats.append(("GC-content", "{:.2%}".format(self.gc_content()))) return stats _motifs = _parent_motifs.copy() # Leave this at the bottom _motifs.interpolate(RNA, "find_motifs") scikit-bio-0.6.2/skbio/sequence/_sequence.py000066400000000000000000002330331464262511300210240ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import re import collections import numbers from contextlib import contextmanager import numpy as np import pandas as pd import skbio.sequence.distance from skbio._base import SkbioObject from skbio.metadata._mixin import ( MetadataMixin, PositionalMetadataMixin, IntervalMetadataMixin, ) from skbio.metadata import IntervalMetadata from skbio.sequence._repr import _SequenceReprBuilder from skbio.sequence._alphabet import ( _alphabet_to_hashes, _indices_in_alphabet_ascii, _indices_in_observed, ) from skbio.util import find_duplicates from skbio.util._decorator import classonlymethod, overrides class Sequence( MetadataMixin, PositionalMetadataMixin, IntervalMetadataMixin, collections.abc.Sequence, SkbioObject, ): """Store generic sequence data and optional associated metadata. ``Sequence`` objects do not enforce an alphabet or grammar and are thus the most generic objects for storing sequence data. ``Sequence`` objects do not necessarily represent biological sequences. For example, ``Sequence`` can be used to represent a position in a multiple sequence alignment. Subclasses ``DNA``, ``RNA``, and ``Protein`` enforce the IUPAC character set [1]_ for, and provide operations specific to, each respective molecule type. ``Sequence`` objects consist of the underlying sequence data, as well as optional metadata and positional metadata. The underlying sequence is immutable, while the metdata and positional metadata are mutable. Parameters ---------- sequence : str, Sequence, or 1D np.ndarray (np.uint8 or '\\|S1') Characters representing the sequence itself. metadata : dict, optional Arbitrary metadata which applies to the entire sequence. A shallow copy of the ``dict`` will be made (see Examples section below for details). positional_metadata : pd.DataFrame consumable, optional Arbitrary per-character metadata (e.g., sequence read quality scores). Must be able to be passed directly to ``pd.DataFrame`` constructor. Each column of metadata must be the same length as `sequence`. A shallow copy of the positional metadata will be made if necessary (see Examples section below for details). interval_metadata : IntervalMetadata Arbitrary metadata which applies to intervals within a sequence to store interval features (such as genes, ncRNA on the sequence). lowercase : bool or str, optional If ``True``, lowercase sequence characters will be converted to uppercase characters. If ``False``, no characters will be converted. If a str, it will be treated as a key into the positional metadata of the object. All lowercase characters will be converted to uppercase, and a ``True`` value will be stored in a boolean array in the positional metadata under the key. See Also -------- DNA RNA Protein References ---------- .. [1] Nomenclature for incompletely specified bases in nucleic acid sequences: recommendations 1984. Nucleic Acids Res. May 10, 1985; 13(9): 3021-3030. A Cornish-Bowden Examples -------- >>> from skbio import Sequence >>> from skbio.metadata import IntervalMetadata **Creating sequences:** Create a sequence without any metadata: >>> seq = Sequence('GGUCGUGAAGGA') >>> seq Sequence --------------- Stats: length: 12 --------------- 0 GGUCGUGAAG GA Create a sequence with metadata and positional metadata: >>> metadata = {'authors': ['Alice'], 'desc':'seq desc', 'id':'seq-id'} >>> positional_metadata = {'exons': [True, True, False, True], ... 'quality': [3, 3, 4, 10]} >>> interval_metadata = IntervalMetadata(4) >>> interval = interval_metadata.add([(1, 3)], metadata={'gene': 'sagA'}) >>> seq = Sequence('ACGT', metadata=metadata, ... positional_metadata=positional_metadata, ... interval_metadata=interval_metadata) >>> seq Sequence ----------------------------- Metadata: 'authors': 'desc': 'seq desc' 'id': 'seq-id' Positional metadata: 'exons': 'quality': Interval metadata: 1 interval feature Stats: length: 4 ----------------------------- 0 ACGT **Retrieving underlying sequence data:** Retrieve underlying sequence: >>> seq.values # doctest: +NORMALIZE_WHITESPACE array([b'A', b'C', b'G', b'T'], dtype='|S1') Underlying sequence immutable: >>> values = np.array([b'T', b'C', b'G', b'A'], dtype='|S1') >>> seq.values = values # doctest: +SKIP Traceback (most recent call last): ... AttributeError: can't set attribute >>> seq.values[0] = b'T' Traceback (most recent call last): ... ValueError: assignment destination is read-only **Retrieving sequence metadata:** Retrieve metadata: >>> seq.metadata {'authors': ['Alice'], 'desc': 'seq desc', 'id': 'seq-id'} Retrieve positional metadata: >>> seq.positional_metadata exons quality 0 True 3 1 True 3 2 False 4 3 True 10 Retrieve interval metadata: >>> seq.interval_metadata # doctest: +ELLIPSIS 1 interval feature ------------------ Interval(interval_metadata=<...>, bounds=[(1, 3)], \ fuzzy=[(False, False)], metadata={'gene': 'sagA'}) **Updating sequence metadata:** .. warning:: Be aware that a shallow copy of ``metadata`` and ``positional_metadata`` is made for performance. Since a deep copy is not made, changes made to mutable Python objects stored as metadata may affect the metadata of other ``Sequence`` objects or anything else that shares a reference to the object. The following examples illustrate this behavior. First, let's create a sequence and update its metadata: >>> metadata = {'id': 'seq-id', 'desc': 'seq desc', 'authors': ['Alice']} >>> seq = Sequence('ACGT', metadata=metadata) >>> seq.metadata['id'] = 'new-id' >>> seq.metadata['pubmed'] = 12345 >>> seq.metadata {'id': 'new-id', 'desc': 'seq desc', 'authors': ['Alice'], 'pubmed': 12345} Note that the original metadata dictionary (stored in variable ``metadata``) hasn't changed because a shallow copy was made: >>> metadata {'id': 'seq-id', 'desc': 'seq desc', 'authors': ['Alice']} >>> seq.metadata == metadata False Note however that since only a *shallow* copy was made, updates to mutable objects will also change the original metadata dictionary: >>> seq.metadata['authors'].append('Bob') >>> seq.metadata['authors'] ['Alice', 'Bob'] >>> metadata['authors'] ['Alice', 'Bob'] This behavior can also occur when manipulating a sequence that has been derived from another sequence: >>> subseq = seq[1:3] >>> subseq Sequence ----------------------------- Metadata: 'authors': 'desc': 'seq desc' 'id': 'new-id' 'pubmed': 12345 Stats: length: 2 ----------------------------- 0 CG >>> subseq.metadata {'id': 'new-id', 'desc': 'seq desc', 'authors': ['Alice', 'Bob'], \ 'pubmed': 12345} The subsequence has inherited the metadata of its parent sequence. If we update the subsequence's author list, we see the changes propagated in the parent sequence and original metadata dictionary: >>> subseq.metadata['authors'].append('Carol') >>> subseq.metadata['authors'] ['Alice', 'Bob', 'Carol'] >>> seq.metadata['authors'] ['Alice', 'Bob', 'Carol'] >>> metadata['authors'] ['Alice', 'Bob', 'Carol'] The behavior for updating positional metadata is similar. Let's create a new sequence with positional metadata that is already stored in a ``pd.DataFrame``: >>> positional_metadata = pd.DataFrame( ... {'list': [[], [], [], []], 'quality': [3, 3, 4, 10]}) >>> seq = Sequence('ACGT', positional_metadata=positional_metadata) >>> seq Sequence ----------------------------- Positional metadata: 'list': 'quality': Stats: length: 4 ----------------------------- 0 ACGT >>> seq.positional_metadata list quality 0 [] 3 1 [] 3 2 [] 4 3 [] 10 Now let's update the sequence's positional metadata by adding a new column and changing a value in another column: >>> seq.positional_metadata['gaps'] = [False, False, False, False] >>> seq.positional_metadata.loc[0, 'quality'] = 999 >>> seq.positional_metadata list quality gaps 0 [] 999 False 1 [] 3 False 2 [] 4 False 3 [] 10 False Note that the original positional metadata (stored in variable ``positional_metadata``) hasn't changed because a shallow copy was made: >>> positional_metadata list quality 0 [] 3 1 [] 3 2 [] 4 3 [] 10 >>> seq.positional_metadata.equals(positional_metadata) False Next let's create a sequence that has been derived from another sequence: >>> subseq = seq[1:3] >>> subseq Sequence ----------------------------- Positional metadata: 'list': 'quality': 'gaps': Stats: length: 2 ----------------------------- 0 CG >>> subseq.positional_metadata list quality gaps 0 [] 3 False 1 [] 4 False As described above for metadata, since only a *shallow* copy was made of the positional metadata, updates to mutable objects will also change the parent sequence's positional metadata and the original positional metadata ``pd.DataFrame``: >>> subseq.positional_metadata.loc[0, 'list'].append('item') >>> subseq.positional_metadata list quality gaps 0 [item] 3 False 1 [] 4 False >>> seq.positional_metadata list quality gaps 0 [] 999 False 1 [item] 3 False 2 [] 4 False 3 [] 10 False >>> positional_metadata list quality 0 [] 3 1 [item] 3 2 [] 4 3 [] 10 You can also update the interval metadata. Let's re-create a ``Sequence`` object with interval metadata at first: >>> seq = Sequence('ACGT') >>> interval = seq.interval_metadata.add( ... [(1, 3)], metadata={'gene': 'foo'}) You can update directly on the ``Interval`` object: >>> interval # doctest: +ELLIPSIS Interval(interval_metadata=<...>, bounds=[(1, 3)], \ fuzzy=[(False, False)], metadata={'gene': 'foo'}) >>> interval.bounds = [(0, 2)] >>> interval # doctest: +ELLIPSIS Interval(interval_metadata=<...>, bounds=[(0, 2)], \ fuzzy=[(False, False)], metadata={'gene': 'foo'}) You can also query and obtain the interval features you are interested and then modify them: >>> intervals = list(seq.interval_metadata.query(metadata={'gene': 'foo'})) >>> intervals[0].fuzzy = [(True, False)] >>> print(intervals[0]) # doctest: +ELLIPSIS Interval(interval_metadata=<...>, bounds=[(0, 2)], \ fuzzy=[(True, False)], metadata={'gene': 'foo'}) """ _num_ascii_codes = 128 _num_extended_ascii_codes = 256 # ASCII is built such that the difference between uppercase and lowercase # is the 6th bit. _ascii_invert_case_bit_offset = 32 _ascii_lowercase_boundary = 90 default_write_format = "fasta" __hash__ = None @property def values(self): r"""Array containing underlying sequence characters. Notes ----- This property is not writeable. Examples -------- >>> from skbio import Sequence >>> s = Sequence('AACGA') >>> s.values # doctest: +NORMALIZE_WHITESPACE array([b'A', b'A', b'C', b'G', b'A'], dtype='|S1') """ return self._bytes.view("|S1") @property def __array_interface__(self): r"""Array interface for compatibility with numpy. This property allows a ``Sequence`` object to share its underlying data buffer (``Sequence.values``) with numpy. See [1]_ for more details. References ---------- .. [1] http://docs.scipy.org/doc/numpy/reference/arrays.interface.html Examples -------- >>> import numpy as np >>> from skbio import Sequence >>> seq = Sequence('ABC123') >>> np.asarray(seq) # doctest: +NORMALIZE_WHITESPACE array([b'A', b'B', b'C', b'1', b'2', b'3'], dtype='|S1') """ return self.values.__array_interface__ @property def observed_chars(self): r"""Set of observed characters in the sequence. Notes ----- This property is not writeable. Examples -------- >>> from skbio import Sequence >>> s = Sequence('AACGAC') >>> s.observed_chars == {'G', 'A', 'C'} True """ return set(str(self)) @property def _string(self): return self._bytes.tobytes() @classonlymethod def concat(cls, sequences, how="strict"): r"""Concatenate an iterable of ``Sequence`` objects. Parameters ---------- sequences : iterable (Sequence) An iterable of ``Sequence`` objects or appropriate subclasses. how : {'strict', 'inner', 'outer'}, optional How to intersect the `positional_metadata` of the sequences. If 'strict': the `positional_metadata` must have the exact same columns; 'inner': an inner-join of the columns (only the shared set of columns are used); 'outer': an outer-join of the columns (all columns are used: missing values will be padded with NaN). Returns ------- Sequence The returned sequence will be an instance of the class which called this class-method. Raises ------ ValueError If `how` is not one of: 'strict', 'inner', or 'outer'. ValueError If `how` is 'strict' and the `positional_metadata` of each sequence does not have the same columns. TypeError If the sequences cannot be cast as the calling class. Notes ----- The sequence-wide metadata (``Sequence.metadata``) is not retained during concatenation. Sequence objects can be cast to a different type only when the new type is an ancestor or child of the original type. Casting between sibling types is not allowed, e.g. ``DNA`` -> ``RNA`` is not allowed, but ``DNA`` -> ``Sequence`` or ``Sequence`` -> ``DNA`` would be. Examples -------- Concatenate two DNA sequences into a new DNA object: >>> from skbio import DNA, Sequence >>> s1 = DNA("ACGT") >>> s2 = DNA("GGAA") >>> DNA.concat([s1, s2]) DNA -------------------------- Stats: length: 8 has gaps: False has degenerates: False has definites: True GC-content: 50.00% -------------------------- 0 ACGTGGAA Concatenate DNA sequences into a Sequence object (type coercion): >>> Sequence.concat([s1, s2]) Sequence ------------- Stats: length: 8 ------------- 0 ACGTGGAA Positional metadata is conserved: >>> s1 = DNA('AcgT', lowercase='one') >>> s2 = DNA('GGaA', lowercase='one', ... positional_metadata={'two': [1, 2, 3, 4]}) >>> result = DNA.concat([s1, s2], how='outer') >>> result DNA --------------------------- Positional metadata: 'one': 'two': Stats: length: 8 has gaps: False has degenerates: False has definites: True GC-content: 50.00% --------------------------- 0 ACGTGGAA >>> result.positional_metadata one two 0 False NaN 1 True NaN 2 True NaN 3 False NaN 4 False 1.0 5 False 2.0 6 True 3.0 7 False 4.0 """ if how not in {"strict", "inner", "outer"}: raise ValueError("`how` must be 'strict', 'inner', or 'outer'.") seqs = list(sequences) if len(seqs) == 0: return cls("") for seq in seqs: seq._assert_can_cast_to(cls) if how == "strict": how = "inner" cols = set() for s in seqs: if s.has_positional_metadata(): cols.add(frozenset(s.positional_metadata)) else: cols.add(frozenset()) if len(cols) > 1: raise ValueError( "The positional metadata of the sequences do" " not have matching columns. Consider setting" " how='inner' or how='outer'" ) seq_data = [] pm_data = [] for seq in seqs: seq_data.append(seq._bytes) pm_data.append(seq.positional_metadata) if not seq.has_positional_metadata(): del seq.positional_metadata pm = pd.concat(pm_data, join=how, ignore_index=True, sort=True) bytes_ = np.concatenate(seq_data) im = IntervalMetadata.concat(i.interval_metadata for i in seqs) return cls(bytes_, positional_metadata=pm, interval_metadata=im) @classmethod def _assert_can_cast_to(cls, target): if not (issubclass(cls, target) or issubclass(target, cls)): raise TypeError("Cannot cast %r as %r." % (cls.__name__, target.__name__)) @overrides(PositionalMetadataMixin) def _positional_metadata_axis_len_(self): return len(self) @overrides(IntervalMetadataMixin) def _interval_metadata_axis_len_(self): return len(self) def __init__( self, sequence, metadata=None, positional_metadata=None, interval_metadata=None, lowercase=False, ): if isinstance(sequence, np.ndarray): if sequence.dtype == np.uint8: self._set_bytes_contiguous(sequence) elif sequence.dtype == "|S1": sequence = sequence.view(np.uint8) # Guarantee the sequence is an array (might be scalar before # this). if sequence.shape == (): sequence = np.array([sequence], dtype=np.uint8) self._set_bytes_contiguous(sequence) else: raise TypeError( "Can only create sequence from numpy.ndarray of dtype " "np.uint8 or '|S1'. Invalid dtype: %s" % sequence.dtype ) elif isinstance(sequence, Sequence): # Sequence casting is acceptable between direct # decendants/ancestors sequence._assert_can_cast_to(type(self)) if metadata is None and sequence.has_metadata(): metadata = sequence.metadata if positional_metadata is None and sequence.has_positional_metadata(): positional_metadata = sequence.positional_metadata if interval_metadata is None and sequence.has_interval_metadata(): interval_metadata = sequence.interval_metadata sequence = sequence._bytes self._owns_bytes = False self._set_bytes(sequence) else: # Encode as ascii to raise UnicodeEncodeError if necessary. if isinstance(sequence, str): sequence = sequence.encode("ascii") s = np.frombuffer(sequence, dtype=np.uint8) # There are two possibilities (to our knowledge) at this point: # Either the sequence we were given was something string-like, # (else it would not have made it past frombuffer), or it was a # numpy scalar, and so our length must be 1. if isinstance(sequence, np.generic) and len(s) != 1: raise TypeError( "Can cannot create a sequence with %r" % type(sequence).__name__ ) sequence = s self._owns_bytes = False self._set_bytes(sequence) MetadataMixin._init_(self, metadata=metadata) PositionalMetadataMixin._init_(self, positional_metadata=positional_metadata) IntervalMetadataMixin._init_(self, interval_metadata=interval_metadata) if lowercase is False: pass elif lowercase is True or isinstance(lowercase, str): lowercase_mask = self._bytes > self._ascii_lowercase_boundary self._convert_to_uppercase(lowercase_mask) # If it isn't True, it must be a string_type if lowercase is not True: self.positional_metadata[lowercase] = lowercase_mask else: raise TypeError( "lowercase keyword argument expected a bool or " "string, but got %s" % type(lowercase) ) def _set_bytes_contiguous(self, sequence): r"""Munge the sequence data into a numpy array of dtype uint8.""" if not sequence.flags["C_CONTIGUOUS"]: # numpy doesn't support views of non-contiguous arrays. Since we're # making heavy use of views internally, and users may also supply # us with a view, make sure we *always* store a contiguous array to # avoid hard-to-track bugs. See # https://github.com/numpy/numpy/issues/5716 sequence = np.ascontiguousarray(sequence) self._owns_bytes = True else: self._owns_bytes = False self._set_bytes(sequence) def _set_bytes(self, sequence): sequence.flags.writeable = False self._bytes = sequence def _convert_to_uppercase(self, lowercase): if np.any(lowercase): with self._byte_ownership(): self._bytes[lowercase] ^= self._ascii_invert_case_bit_offset def __contains__(self, subsequence): r"""Determine if a subsequence is contained in this sequence. Parameters ---------- subsequence : str, Sequence, or 1D np.ndarray (np.uint8 or '\|S1') The putative subsequence. Returns ------- bool Indicates whether `subsequence` is contained in this sequence. Raises ------ TypeError If `subsequence` is a ``Sequence`` object with a different type than this sequence. Examples -------- >>> from skbio import Sequence >>> s = Sequence('GGUCGUGAAGGA') >>> 'GGU' in s True >>> 'CCC' in s False """ return self._munge_to_bytestring(subsequence, "in") in self._string def __eq__(self, other): r"""Determine if this sequence is equal to another. Sequences are equal if they are *exactly* the same type and their sequence characters, metadata, and positional metadata are the same. Parameters ---------- other : Sequence Sequence to test for equality against. Returns ------- bool Indicates whether this sequence is equal to `other`. Examples -------- Define two ``Sequence`` objects that have the same underlying sequence of characters: >>> from skbio import Sequence >>> s = Sequence('ACGT') >>> t = Sequence('ACGT') The two sequences are considered equal because they are the same type, their underlying sequence of characters are the same, and their optional metadata attributes (``metadata`` and ``positional_metadata``) were not provided: >>> s == t True >>> t == s True Define another sequence object with a different sequence of characters than the previous two sequence objects: >>> u = Sequence('ACGA') >>> u == t False Define a sequence with the same sequence of characters as ``u`` but with different metadata, positional metadata, and interval metadata: >>> v = Sequence('ACGA', metadata={'id': 'abc'}, ... positional_metadata={'quality':[1, 5, 3, 3]}) >>> _ = v.interval_metadata.add([(0, 1)]) The two sequences are not considered equal because their metadata, positional metadata, and interval metadata do not match: >>> u == v False """ # checks ordered from least to most expensive if self.__class__ != other.__class__: return False if not MetadataMixin._eq_(self, other): return False if self._string != other._string: return False if not PositionalMetadataMixin._eq_(self, other): return False if not IntervalMetadataMixin._eq_(self, other): return False return True def __ne__(self, other): r"""Determine if this sequence is not equal to another. Sequences are not equal if they are not *exactly* the same type, or their sequence characters, metadata, or positional metadata differ. Parameters ---------- other : Sequence Sequence to test for inequality against. Returns ------- bool Indicates whether this sequence is not equal to `other`. Examples -------- >>> from skbio import Sequence >>> s = Sequence('ACGT') >>> t = Sequence('ACGT') >>> s != t False >>> u = Sequence('ACGA') >>> u != t True >>> v = Sequence('ACGA', metadata={'id': 'v'}) >>> u != v True """ return not (self == other) def __getitem__(self, indexable): r"""Slice this sequence. Notes ----- This drops the ``self.interval_metadata`` from the returned new ``Sequence`` object. Parameters ---------- indexable : int, slice, iterable (int and slice), 1D array_like (bool) The position(s) to return from this sequence. If `indexable` is an iterable of integers, these are assumed to be indices in the sequence to keep. If `indexable` is a 1D ``array_like`` of booleans, these are assumed to be the positions in the sequence to keep. Returns ------- Sequence New sequence containing the position(s) specified by `indexable` in this sequence. Positional metadata will be sliced in the same manner and included in the returned sequence. `metadata` is included in the returned sequence. Examples -------- >>> from skbio import Sequence >>> s = Sequence('GGUCGUGAAGGA') Obtain a single character from the sequence: >>> s[1] Sequence ------------- Stats: length: 1 ------------- 0 G Obtain a slice: >>> s[7:] Sequence ------------- Stats: length: 5 ------------- 0 AAGGA Obtain characters at the following indices: >>> s[[3, 4, 7, 0, 3]] Sequence ------------- Stats: length: 5 ------------- 0 CGAGC Obtain characters at positions evaluating to `True`: >>> s = Sequence('GGUCG') >>> index = [True, False, True, 'a' == 'a', False] >>> s[index] Sequence ------------- Stats: length: 3 ------------- 0 GUC """ if not isinstance(indexable, np.ndarray) and ( (not isinstance(indexable, str)) and hasattr(indexable, "__iter__") ): indexable_ = indexable indexable = np.asarray(indexable) if indexable.dtype == object: indexable = list(indexable_) # TODO: Don't blow out memory if len(indexable) == 0: # indexing with an empty list, so convert to ndarray and # fall through to ndarray slicing below indexable = np.asarray(indexable) else: seq = np.concatenate( list(_slices_from_iter(self._bytes, indexable)) ) index = _as_slice_if_single_index(indexable) positional_metadata = None if self.has_positional_metadata(): pos_md_slices = list( _slices_from_iter(self.positional_metadata, index) ) positional_metadata = pd.concat(pos_md_slices, sort=True) metadata = None if self.has_metadata(): metadata = self.metadata return self._constructor( sequence=seq, metadata=metadata, positional_metadata=positional_metadata, ) elif isinstance(indexable, str) or isinstance(indexable, bool): raise IndexError( "Cannot index with %s type: %r" % (type(indexable).__name__, indexable) ) if ( isinstance(indexable, np.ndarray) and indexable.dtype == bool and len(indexable) != len(self) ): raise IndexError( "An boolean vector index must be the same length" " as the sequence (%d, not %d)." % (len(self), len(indexable)) ) if isinstance(indexable, np.ndarray) and indexable.size == 0: # convert an empty ndarray to a supported dtype for slicing a numpy # array indexable = indexable.astype(int) seq = self._bytes[indexable] positional_metadata = self._slice_positional_metadata(indexable) metadata = None if self.has_metadata(): metadata = self.metadata return self._constructor( sequence=seq, metadata=metadata, positional_metadata=positional_metadata ) def _slice_positional_metadata(self, indexable): if self.has_positional_metadata(): if _is_single_index(indexable): index = _single_index_to_slice(indexable) else: index = indexable return self.positional_metadata.iloc[index] else: return None def __len__(self): r"""Return the number of characters in this sequence. Returns ------- int The length of this sequence. Examples -------- >>> from skbio import Sequence >>> s = Sequence('GGUC') >>> len(s) 4 """ return self._bytes.size def __bool__(self): r"""Return truth value (truthiness) of sequence. Returns ------- bool True if length of sequence is greater than 0, else False. Examples -------- >>> from skbio import Sequence >>> bool(Sequence('')) False >>> bool(Sequence('ACGT')) True """ return len(self) > 0 def __iter__(self): r"""Iterate over positions in this sequence. Yields ------ Sequence Single character subsequence, one for each position in the sequence. Examples -------- >>> from skbio import Sequence >>> s = Sequence('GGUC') >>> for c in s: ... str(c) 'G' 'G' 'U' 'C' """ for i in range(len(self)): yield self[i] def __reversed__(self): r"""Iterate over positions in this sequence in reverse order. Yields ------ Sequence Single character subsequence, one for each position in the sequence. Examples -------- >>> from skbio import Sequence >>> s = Sequence('GGUC') >>> for c in reversed(s): ... str(c) 'C' 'U' 'G' 'G' """ return iter(self[::-1]) def __str__(self): r"""Return sequence characters as a string. Returns ------- str Sequence characters as a string. No metadata or positional metadata will be included. See Also -------- sequence Examples -------- >>> from skbio import Sequence >>> s = Sequence('GGUCGUAAAGGA', metadata={'id':'hello'}) >>> str(s) 'GGUCGUAAAGGA' """ return str(self._string.decode("ascii")) def __repr__(self): r"""Return a string representation of this sequence object. Representation includes: * sequence type * metadata keys and values: will display key/value if it is an understood type, otherwise just the type will be displayed. If it is an understood type whose representation is too long, just the type will be displayed * positional metadata: column names and column dtypes will be displayed in the order they appear in the positional metadata ``pd.DataFrame``. Column names (i.e., keys) follow the same display rules as metadata keys * interval metadata: the number of interval features will be displayed. * sequence stats (e.g., length) * up to five lines of chunked sequence data. Each line of chunked sequence data displays the current position in the sequence Returns ------- str String representation of this sequence object. Notes ----- Subclasses can override Sequence._repr_stats to provide custom statistics. Examples -------- Short sequence without metadata: >>> from skbio import Sequence >>> from skbio.metadata._interval import IntervalMetadata >>> Sequence('ACGTAATGGATACGTAATGCA') Sequence ------------------------- Stats: length: 21 ------------------------- 0 ACGTAATGGA TACGTAATGC A Longer sequence displays first two lines and last two lines: >>> Sequence('ACGT' * 100) Sequence --------------------------------------------------------------------- Stats: length: 400 --------------------------------------------------------------------- 0 ACGTACGTAC GTACGTACGT ACGTACGTAC GTACGTACGT ACGTACGTAC GTACGTACGT 60 ACGTACGTAC GTACGTACGT ACGTACGTAC GTACGTACGT ACGTACGTAC GTACGTACGT ... 300 ACGTACGTAC GTACGTACGT ACGTACGTAC GTACGTACGT ACGTACGTAC GTACGTACGT 360 ACGTACGTAC GTACGTACGT ACGTACGTAC GTACGTACGT Sequence with metadata, positional metadata, and interval metadata: >>> metadata = { ... 'id': 'seq-id', ... 'description': 'description of the sequence, wrapping across ' ... 'lines if it\'s too long', ... 'authors': ['Alice', 'Bob', 'Carol'], ... 'year': 2015, ... 'published': True ... } >>> positional_metadata = { ... 'exons': [True, True, False, True], ... 'quality': [3, 10, 11, 10] ... } >>> seq = Sequence('ACGT', metadata=metadata, ... positional_metadata=positional_metadata) >>> _ = seq.interval_metadata.add([(0, 2)], metadata={'gene': 'sagA'}) >>> seq Sequence ---------------------------------------------------------------------- Metadata: 'authors': 'description': "description of the sequence, wrapping across lines if it's too long" 'id': 'seq-id' 'published': True 'year': 2015 Positional metadata: 'exons': 'quality': Interval metadata: 1 interval feature Stats: length: 4 ---------------------------------------------------------------------- 0 ACGT """ return _SequenceReprBuilder( seq=self, width=71, # 79 for pep8, 8 space indent for docstrings indent=4, chunk_size=10, ).build() def _repr_stats(self): r"""Define statistics to display in the sequence's repr. Subclasses can override this method to provide type-specific statistics. This method computes a single statistic: length. Returns ------- list List of tuples where each tuple represents a statistic. Each tuple contains exactly two ``str`` elements: the statistic's name/label, and the str-formatted value of the statistic. Ordering of statistics (i.e., list order) determines display order in the sequence repr. """ return [("length", "%d" % len(self))] def __copy__(self): r"""Return a shallow copy of this sequence. See Also -------- copy Notes ----- This method is equivalent to ``seq.copy(deep=False)``. """ return self._copy(False, {}) def __deepcopy__(self, memo): r"""Return a deep copy of this sequence. See Also -------- copy Notes ----- This method is equivalent to ``seq.copy(deep=True)``. """ return self._copy(True, memo) def _copy(self, deep, memo): # strategy: copy the sequence without metadata first, then set metadata # attributes with copies. we take this approach instead of simply # passing the metadata through the Sequence constructor because we # don't want to copy twice (this could happen when deep=True, where we # deep copy here and then shallow copy in the Sequence constructor). we # also directly set the private metadata attributes instead of using # their public setters to avoid an unnecessary copy # we don't make a distinction between deep vs. shallow copy of bytes # because dtype=np.uint8. we only need to make the distinction when # dealing with object dtype bytes_ = np.copy(self._bytes) seq_copy = self._constructor( sequence=bytes_, metadata=None, positional_metadata=None, interval_metadata=None, ) if deep: seq_copy._metadata = MetadataMixin._deepcopy_(self, memo) seq_copy._positional_metadata = PositionalMetadataMixin._deepcopy_( self, memo ) seq_copy._interval_metadata = IntervalMetadataMixin._deepcopy_(self, memo) else: seq_copy._metadata = MetadataMixin._copy_(self) seq_copy._positional_metadata = PositionalMetadataMixin._copy_(self) seq_copy._interval_metadata = IntervalMetadataMixin._copy_(self) return seq_copy def lowercase(self, lowercase): r"""Return a case-sensitive string representation of the sequence. Parameters ---------- lowercase: str or boolean vector If lowercase is a boolean vector, it is used to set sequence characters to lowercase in the output string. True values in the boolean vector correspond to lowercase characters. If lowercase is a str, it is treated like a key into the positional metadata, pointing to a column which must be a boolean vector. That boolean vector is then used as described previously. Returns ------- str String representation of sequence with specified characters set to lowercase. Examples -------- >>> from skbio import Sequence >>> s = Sequence('ACGT') >>> s.lowercase([True, True, False, False]) 'acGT' >>> s = Sequence('ACGT', ... positional_metadata={ ... 'exons': [True, False, False, True]}) >>> s.lowercase('exons') 'aCGt' Constructor automatically populates a column in positional metadata when the ``lowercase`` keyword argument is provided with a column name: >>> s = Sequence('ACgt', lowercase='introns') >>> s.lowercase('introns') 'ACgt' >>> s = Sequence('ACGT', lowercase='introns') >>> s.lowercase('introns') 'ACGT' """ index = self._munge_to_index_array(lowercase) outbytes = self._bytes.copy() outbytes[index] ^= self._ascii_invert_case_bit_offset return str(outbytes.tobytes().decode("ascii")) def count(self, subsequence, start=None, end=None): r"""Count occurrences of a subsequence in this sequence. Parameters ---------- subsequence : str, Sequence, or 1D np.ndarray (np.uint8 or '\|S1') Subsequence to count occurrences of. start : int, optional The position at which to start counting (inclusive). end : int, optional The position at which to stop counting (exclusive). Returns ------- int Number of occurrences of `subsequence` in this sequence. Raises ------ ValueError If `subsequence` is of length 0. TypeError If `subsequence` is a ``Sequence`` object with a different type than this sequence. Examples -------- >>> from skbio import Sequence >>> s = Sequence('GGUCG') >>> s.count('G') 3 >>> s.count('GG') 1 >>> s.count('T') 0 >>> s.count('G', 2, 5) 1 """ if len(subsequence) == 0: raise ValueError("`count` is not defined for empty subsequences.") return self._string.count( self._munge_to_bytestring(subsequence, "count"), start, end ) def replace(self, where, character): r"""Replace values in this sequence with a different character. Parameters ---------- where : 1D array_like (bool) or iterable (slices or ints) or str Indicates positions in the sequence to replace with `character`. Can be a boolean vector, an iterable of indices/slices, or a string that is a key in `positional_metadata` pointing to a boolean vector. character : str or bytes Character that will replace chosen items in this sequence. Returns ------- Sequence Copy of this sequence, with chosen items replaced with chosen character. All metadata is retained. Examples -------- Let's create and display a Sequence: >>> from skbio import Sequence >>> sequence = Sequence('GGTACCAACG') >>> str(sequence) 'GGTACCAACG' Let's call ``replace`` on the Sequence using a boolean vector for ``where`` and assign it to a new variable: >>> seq = sequence.replace([False, False, False, True, False, False, ... True, True, False, False], '-') Let's take a look at the new Sequence: >>> str(seq) 'GGT-CC--CG' Other types of input are accepted by the ``where`` parameter. Let's pass in a list of indices and slices that is equivalent to the boolean vector we used previously: >>> str(seq) == str(sequence.replace([3, slice(6, 8)], '-')) True ``where`` also accepts a boolean vector contained in ``Sequence.positional_metadata``: >>> sequence.positional_metadata = {'where': ... [False, False, False, True, False, ... False, True, True, False, False]} Let's pass in the key ``'where'`` and compare to ``seq``: >>> str(seq) == str(sequence.replace('where', '-')) True """ if isinstance(character, bytes) is not True: character = character.encode("ascii") character = ord(character) index = self._munge_to_index_array(where) seq_bytes = self._bytes.copy() seq_bytes[index] = character metadata = None if self.has_metadata(): metadata = self.metadata positional_metadata = None if self.has_positional_metadata(): positional_metadata = self.positional_metadata interval_metadata = None if self.has_interval_metadata(): interval_metadata = self.interval_metadata # Use __class__ instead of _constructor so that validations are # performed for subclasses (the user could have introduced invalid # characters). return self.__class__( seq_bytes, metadata=metadata, positional_metadata=positional_metadata, interval_metadata=interval_metadata, ) def index(self, subsequence, start=None, end=None): r"""Find position where subsequence first occurs in the sequence. Parameters ---------- subsequence : str, Sequence, or 1D np.ndarray (np.uint8 or '\|S1') Subsequence to search for in this sequence. start : int, optional The position at which to start searching (inclusive). end : int, optional The position at which to stop searching (exclusive). Returns ------- int Position where `subsequence` first occurs in this sequence. Raises ------ ValueError If `subsequence` is not present in this sequence. TypeError If `subsequence` is a ``Sequence`` object with a different type than this sequence. Examples -------- >>> from skbio import Sequence >>> s = Sequence('ACACGACGTT-') >>> s.index('ACG') 2 """ try: return self._string.index( self._munge_to_bytestring(subsequence, "index"), start, end ) except ValueError: raise ValueError("%r is not present in %r." % (subsequence, self)) def distance(self, other, metric=None): r"""Compute the distance to another sequence. Parameters ---------- other : str, Sequence, or 1D np.ndarray (np.uint8 or '\|S1') Sequence to compute the distance to. If `other` is a ``Sequence`` object, it must be the same type as this sequence. Other input types will be converted into a ``Sequence`` object of the same type as this sequence. metric : function, optional Function used to compute the distance between this sequence and `other`. If ``None`` (the default), Hamming distance will be used (:func:`skbio.sequence.distance.hamming`). `metric` should take two ``skbio.Sequence`` objects and return a ``float``. The sequence objects passed to `metric` will be the same type as this sequence. See :mod:`skbio.sequence.distance` for other predefined metrics that can be supplied via `metric`. Returns ------- float Distance between this sequence and `other` as defined by `metric`. Raises ------ TypeError If `other` is a ``Sequence`` object with a different type than this sequence. See Also -------- skbio.sequence.distance fraction_diff fraction_same Examples -------- >>> from skbio import Sequence >>> s = Sequence('GGUC') >>> t = Sequence('AGUC') Compute Hamming distance (the default metric): >>> s.distance(t) 0.25 Use a custom metric: >>> def custom_metric(s1, s2): return 0.42 >>> s.distance(t, custom_metric) 0.42 """ # TODO refactor this method to accept a name (string) of the distance # metric to apply and accept **kwargs other = self._munge_to_self_type(other, "distance") if metric is None: metric = skbio.sequence.distance.hamming return float(metric(self, other)) def matches(self, other): r"""Find positions that match with another sequence. Parameters ---------- other : str, Sequence, or 1D np.ndarray (np.uint8 or '\|S1') Sequence to compare to. Returns ------- 1D np.ndarray (bool) Boolean vector where ``True`` at position ``i`` indicates a match between the sequences at their positions ``i``. Raises ------ ValueError If the sequences are not the same length. TypeError If `other` is a ``Sequence`` object with a different type than this sequence. See Also -------- mismatches Examples -------- >>> from skbio import Sequence >>> s = Sequence('GGUC') >>> t = Sequence('GAUU') >>> s.matches(t) array([ True, False, True, False], dtype=bool) """ other = self._munge_to_sequence(other, "matches/mismatches") if len(self) != len(other): raise ValueError( "Match and mismatch vectors can only be " "generated from equal length sequences." ) return self._bytes == other._bytes def mismatches(self, other): r"""Find positions that do not match with another sequence. Parameters ---------- other : str, Sequence, or 1D np.ndarray (np.uint8 or '\|S1') Sequence to compare to. Returns ------- 1D np.ndarray (bool) Boolean vector where ``True`` at position ``i`` indicates a mismatch between the sequences at their positions ``i``. Raises ------ ValueError If the sequences are not the same length. TypeError If `other` is a ``Sequence`` object with a different type than this sequence. See Also -------- matches Examples -------- >>> from skbio import Sequence >>> s = Sequence('GGUC') >>> t = Sequence('GAUU') >>> s.mismatches(t) array([False, True, False, True], dtype=bool) """ return np.invert(self.matches(other)) def match_frequency(self, other, relative=False): r"""Return count of positions that are the same between two sequences. Parameters ---------- other : str, Sequence, or 1D np.ndarray (np.uint8 or '\|S1') Sequence to compare to. relative : bool, optional If ``True``, return the relative frequency of matches instead of the count. Returns ------- int or float Number of positions that are the same between the sequences. This will be an ``int`` if `relative` is ``False`` and a ``float`` if `relative` is ``True``. Raises ------ ValueError If the sequences are not the same length. TypeError If `other` is a ``Sequence`` object with a different type than this sequence. See Also -------- mismatch_frequency matches mismatches distance Examples -------- >>> from skbio import Sequence >>> s = Sequence('GGUC') >>> t = Sequence('AGUC') >>> s.match_frequency(t) 3 >>> s.match_frequency(t, relative=True) 0.75 """ if relative: return float(self.matches(other).mean()) else: return int(self.matches(other).sum()) def mismatch_frequency(self, other, relative=False): r"""Return count of positions that differ between two sequences. Parameters ---------- other : str, Sequence, or 1D np.ndarray (np.uint8 or '\|S1') Sequence to compare to. relative : bool, optional If ``True``, return the relative frequency of mismatches instead of the count. Returns ------- int or float Number of positions that differ between the sequences. This will be an ``int`` if `relative` is ``False`` and a ``float`` if `relative` is ``True``. Raises ------ ValueError If the sequences are not the same length. TypeError If `other` is a ``Sequence`` object with a different type than this sequence. See Also -------- match_frequency matches mismatches distance Examples -------- >>> from skbio import Sequence >>> s = Sequence('GGUC') >>> t = Sequence('AGUC') >>> s.mismatch_frequency(t) 1 >>> s.mismatch_frequency(t, relative=True) 0.25 """ if relative: return float(self.mismatches(other).mean()) else: return int(self.mismatches(other).sum()) def frequencies(self, chars=None, relative=False): r"""Compute frequencies of characters in the sequence. Parameters ---------- chars : str or set of str, optional Characters to compute the frequencies of. May be a ``str`` containing a single character or a ``set`` of single-character strings. If ``None``, frequencies will be computed for all characters present in the sequence. relative : bool, optional If ``True``, return the relative frequency of each character instead of its count. If `chars` is provided, relative frequencies will be computed with respect to the number of characters in the sequence, **not** the total count of characters observed in `chars`. Thus, the relative frequencies will not necessarily sum to 1.0 if `chars` is provided. Returns ------- dict Frequencies of characters in the sequence. Raises ------ TypeError If `chars` is not a ``str`` or ``set`` of ``str``. ValueError If `chars` is not a single-character ``str`` or a ``set`` of single-character strings. ValueError If `chars` contains characters outside the allowable range of characters in a ``Sequence`` object. See Also -------- kmer_frequencies iter_kmers Notes ----- If the sequence is empty (i.e., length zero), ``relative=True``, **and** `chars` is provided, the relative frequency of each specified character will be ``np.nan``. If `chars` is not provided, this method is equivalent to, but faster than, ``seq.kmer_frequencies(k=1)``. If `chars` is not provided, it is equivalent to, but faster than, passing ``chars=seq.observed_chars``. Examples -------- Compute character frequencies of a sequence: >>> from skbio import Sequence >>> seq = Sequence('AGAAGACC') >>> freqs = seq.frequencies() >>> dict(sorted(freqs.items())) # display dict in sorted order {'A': 4, 'C': 2, 'G': 2} Compute relative character frequencies: >>> freqs = seq.frequencies(relative=True) >>> dict(sorted(freqs.items())) {'A': 0.5, 'C': 0.25, 'G': 0.25} Compute relative frequencies of characters A, C, and T: >>> freqs = seq.frequencies(chars={'A', 'C', 'T'}, relative=True) >>> dict(sorted(freqs.items())) {'A': 0.5, 'C': 0.25, 'T': 0.0} Note that since character T is not in the sequence we receive a relative frequency of 0.0. The relative frequencies of A and C are relative to the number of characters in the sequence (8), **not** the number of A and C characters (4 + 2 = 6). """ freqs = np.bincount(self._bytes, minlength=self._num_extended_ascii_codes) if chars is not None: chars, indices = self._chars_to_indices(chars) else: (indices,) = np.nonzero(freqs) # Downcast from int64 to uint8 then convert to str. This is safe # because we are guaranteed to have indices in the range 0 to 255 # inclusive. chars = indices.astype(np.uint8).tobytes().decode("ascii") obs_counts = freqs[indices] if relative: obs_counts = obs_counts / len(self) # Use tolist() for minor performance gain. return dict(zip(chars, obs_counts.tolist())) def _chars_to_indices(self, chars): """Convert characters to indices for Sequence.frequencies.""" if isinstance(chars, (str, bytes)): chars = set([chars]) elif not isinstance(chars, set): raise TypeError( "`chars` must be of type `set`, not %r" % type(chars).__name__ ) # Impose an (arbitrary) ordering to `chars` so that we can return # `indices` in that same order. chars = list(chars) indices = [] for char in chars: if not isinstance(char, (str, bytes)): raise TypeError( "Each element of `chars` must be string-like, not %r" % type(char).__name__ ) if len(char) != 1: raise ValueError( "Each element of `chars` must contain a single " "character (found %d characters)" % len(char) ) index = ord(char) if index >= self._num_extended_ascii_codes: raise ValueError( "Character %r in `chars` is outside the range of " "allowable characters in a `Sequence` object." % char ) indices.append(index) return chars, indices def iter_kmers(self, k, overlap=True): r"""Generate kmers of length `k` from this sequence. Parameters ---------- k : int The kmer length. overlap : bool, optional Defines whether the kmers should be overlapping or not. Yields ------ Sequence kmer of length `k` contained in this sequence. Raises ------ ValueError If `k` is less than 1. Examples -------- >>> from skbio import Sequence >>> s = Sequence('ACACGACGTT') >>> for kmer in s.iter_kmers(4, overlap=False): ... str(kmer) 'ACAC' 'GACG' >>> for kmer in s.iter_kmers(3, overlap=True): ... str(kmer) 'ACA' 'CAC' 'ACG' 'CGA' 'GAC' 'ACG' 'CGT' 'GTT' """ if k < 1: raise ValueError("k must be greater than 0.") if k > len(self): return if overlap: step = 1 count = len(self) - k + 1 else: step = k count = len(self) // k if len(self) == 0 or self.has_positional_metadata(): # Slower path when sequence is empty or positional metadata needs # to be sliced. for i in range(0, len(self) - k + 1, step): yield self[i : i + k] else: # Optimized path when positional metadata doesn't need slicing. kmers = np.lib.stride_tricks.as_strided( self._bytes, shape=(k, count), strides=(1, step) ).T metadata = None if self.has_metadata(): metadata = self.metadata for s in kmers: yield self._constructor( sequence=s, metadata=metadata, positional_metadata=None ) def kmer_frequencies(self, k, overlap=True, relative=False): r"""Return counts of words of length `k` from this sequence. Parameters ---------- k : int The word length. overlap : bool, optional Defines whether the kmers should be overlapping or not. relative : bool, optional If ``True``, return the relative frequency of each kmer instead of its count. Returns ------- dict Frequencies of words of length `k` contained in this sequence. Raises ------ ValueError If `k` is less than 1. Examples -------- >>> from skbio import Sequence >>> s = Sequence('ACACATTTATTA') >>> freqs = s.kmer_frequencies(3, overlap=False) >>> freqs {'ACA': 1, 'CAT': 1, 'TTA': 2} >>> freqs = s.kmer_frequencies(3, relative=True, overlap=False) >>> freqs {'ACA': 0.25, 'CAT': 0.25, 'TTA': 0.5} """ kmers = self.iter_kmers(k, overlap=overlap) freqs = dict(collections.Counter((str(seq) for seq in kmers))) if relative: if overlap: num_kmers = len(self) - k + 1 else: num_kmers = len(self) // k relative_freqs = {} for kmer, count in freqs.items(): relative_freqs[kmer] = count / num_kmers freqs = relative_freqs return freqs def find_with_regex(self, regex, ignore=None): r"""Generate slices for patterns matched by a regular expression. Parameters ---------- regex : str or regular expression object String to be compiled into a regular expression, or a pre- compiled regular expression object (e.g., from calling ``re.compile``). ignore : 1D array_like (bool) or iterable (slices or ints), optional Indicate the positions to ignore when matching. Yields ------ slice Location where the regular expression matched. Examples -------- >>> from skbio import Sequence >>> s = Sequence('AATATACCGGTTATAA') >>> for match in s.find_with_regex('(TATA+)'): ... match ... str(s[match]) slice(2, 6, None) 'TATA' slice(11, 16, None) 'TATAA' """ if isinstance(regex, str): regex = re.compile(regex) lookup = np.arange(len(self)) if ignore is None: string = str(self) else: ignore = self._munge_to_index_array(ignore) lookup = np.delete(lookup, ignore) string = str(self[lookup]) for match in regex.finditer(string): # We start at 1 because we don't want the group that contains all # other groups. for g in range(1, len(match.groups()) + 1): yield slice(lookup[match.start(g)], lookup[match.end(g) - 1] + 1) def iter_contiguous(self, included, min_length=1, invert=False): r"""Yield contiguous subsequences based on `included`. Parameters ---------- included : 1D array_like (bool) or iterable (slices or ints) `included` is transformed into a flat boolean vector where each position will either be included or skipped. All contiguous included positions will be yielded as a single region. min_length : int, optional The minimum length of a subsequence for it to be yielded. Default is 1. invert : bool, optional Whether to invert `included` such that it describes what should be skipped instead of included. Default is False. Yields ------ Sequence Contiguous subsequence as indicated by `included`. Notes ----- If slices provide adjacent ranges, then they will be considered the same contiguous subsequence. Examples -------- Here we use `iter_contiguous` to find all of the contiguous ungapped sequences using a boolean vector derived from our DNA sequence. >>> from skbio import DNA >>> s = DNA('AAA--TT-CCCC-G-') >>> no_gaps = ~s.gaps() >>> for ungapped_subsequence in s.iter_contiguous(no_gaps, ... min_length=2): ... print(ungapped_subsequence) AAA TT CCCC Note how the last potential subsequence was skipped because it would have been smaller than our `min_length` which was set to 2. We can also use `iter_contiguous` on a generator of slices as is produced by `find_motifs` (and `find_with_regex`). >>> from skbio import Protein >>> s = Protein('ACDFNASANFTACGNPNRTESL') >>> for subseq in s.iter_contiguous(s.find_motifs('N-glycosylation')): ... print(subseq) NASANFTA NRTE Note how the first subsequence contains two N-glycosylation sites. This happened because they were contiguous. """ idx = self._munge_to_index_array(included) if invert: idx = np.delete(np.arange(len(self)), idx) # Adapted from http://stackoverflow.com/a/7353335/579416 for contig in np.split(idx, np.where(np.diff(idx) != 1)[0] + 1): r = self[contig] if len(r) >= min_length: yield r def to_indices( self, alphabet=None, mask_gaps="auto", wildcard="auto", return_codes=False ): r"""Convert the sequence into indices of characters. The result will be indices of characters in an alphabet, if provided, otherwise indices of unique characters observed in the sequence, in which case the unique characters in sorted order will also be returned. Parameters ---------- alphabet : iterable of scalar or skbio.SubstitutionMatrix, optional Explicitly provided alphabet. The returned indices will be indices of characters in this alphabet. If `None`, will return indices of unique characters observed in the sequence.s mask_gaps : 'auto' or bool, optional Mask gap characters in the sequence, and return a masked array instead of a standard array. The gap characters are defined by the sequence's `gap_characters` attribute. If `'auto'` (default), will return a standard array if no gap character is found, or a masked array if gap character(s) are found. wildcard : 'auto', str of length 1 or None, optional A character to subsitute characters in the sequence that are absent from the alphabet. If `'auto'` (default), will adopt the sequence's `wildcard_char` attribute (if available). If no wildcard is given and there are absent characters, will raise an error. return_codes : bool, optional Return observed characters as an array of ASCII code points instead of a string. Not effective if `alphabet` is set. Returns ------- 1D np.ndarray or np.ma.ndarray of uint8 Vector of character indices representing the sequence str or 1D np.array of uint8, optional Sorted unique characters observed in the sequence. Raises ------ ValueError If alphabet are not valid ASCII characters or contains duplicates. ValueError If gap(s) are to be masked but gap character(s) are not defined. ValueError If wildcard character is not a valid ASCII character. Examples -------- Convert a protein sequence into indices of unique amino acids in it. Note that the unique characters are ordered. >>> from skbio import Protein >>> seq = Protein('MEEPQSDPSV') >>> idx, uniq = seq.to_indices() >>> idx array([2, 1, 1, 3, 4, 5, 0, 3, 5, 6], dtype=uint8) >>> uniq 'DEMPQSV' Convert a DNA sequence into indices of nucleotides in an alphabet. Note that the order of characters is consistent with the alphabet. >>> from skbio import DNA >>> seq = DNA('CTCAAAAGTC') >>> idx = seq.to_indices(alphabet='TCGA') >>> idx array([1, 0, 1, 3, 3, 3, 3, 2, 0, 1], dtype=uint8) Use the alphabet included in a substitution matrix. >>> from skbio import SubstitutionMatrix >>> sm = SubstitutionMatrix.by_name('NUC.4.4') >>> idx = seq.to_indices(alphabet=sm) >>> idx array([3, 1, 3, 0, 0, 0, 0, 2, 1, 3], dtype=uint8) Gap characters ("-" and ".") in the sequence will be masked (`mask_gaps='auto'` is the default behavior). >>> seq = DNA('GAG-CTC') >>> idx = seq.to_indices(alphabet='ACGTN', mask_gaps='auto') >>> print(idx) [2 0 2 -- 1 3 1] >>> print(idx.mask) [False False False True False False False] Characters not included in the alphabet will be substituted with a wildcard character, such as "N" for nucleotides and "X" for amino acids (`wildcard='auto'` is the default behavior). >>> seq = DNA('GAGRCTC') >>> idx = seq.to_indices(alphabet='ACGTN', wildcard='auto') >>> idx array([2, 0, 2, 4, 1, 3, 1], dtype=uint8) """ seq = self._bytes # mask gap characters mask = None if mask_gaps in (True, "auto"): gap_chars = getattr(self, "gap_chars", None) if gap_chars: # encode gap characters gap_chars = list(map(ord, gap_chars)) # locate gaps in sequence gaps = np.in1d(seq, gap_chars) if mask_gaps is True or gaps.any(): mask, seq = gaps, seq[~gaps] elif mask_gaps is True: raise ValueError( "Gap character(s) are not defined for the " "sequence." ) # according to an alphabet if alphabet is not None: # get wildcard character if wildcard == "auto": wildcard = getattr(self, "wildcard_char", None) # encode wildcard character if wildcard is not None: try: assert (wildcard := ord(wildcard)) < 128 except (TypeError, AssertionError): raise ValueError("Wildcard must be a single ASCII " "character.") # extract alphabet from a substitution matrix if hasattr(alphabet, "_is_ascii"): if alphabet._is_ascii is True: indices = _indices_in_alphabet_ascii( seq, alphabet._char_hash, wildcard=wildcard ) else: raise ValueError( "Alphabet in the substitution matrix " "are not single ASCII characters." ) # process alphabet from scratch else: if find_duplicates(alphabet): raise ValueError("Alphabet contains duplicated " "characters.") try: alphabet = _alphabet_to_hashes(alphabet) except (TypeError, ValueError, UnicodeEncodeError): raise ValueError( "Alphabet cannot be encoded as single " "ASCII characters." ) indices = _indices_in_alphabet_ascii(seq, alphabet, wildcard=wildcard) # according to observed characters else: (indices,), observed = _indices_in_observed([seq]) indices = indices.astype(np.uint8) if return_codes is False: observed = observed.tobytes().decode("ascii") # construct masked array if mask is not None: indices_ = np.full(mask.size, 255, dtype=np.uint8) indices_[~mask] = indices indices = np.ma.array(indices_, mask=mask) if alphabet is not None: return indices else: return indices, observed def _constructor(self, **kwargs): return self.__class__(**kwargs) def _munge_to_index_array(self, sliceable): r"""Return index array from something isomorphic to a boolean vector.""" if isinstance(sliceable, str): if sliceable in self.positional_metadata: if self.positional_metadata[sliceable].dtype == bool: sliceable = self.positional_metadata[sliceable] else: raise TypeError( "Column '%s' in positional metadata does " "not correspond to a boolean vector" % sliceable ) else: raise ValueError( "No positional metadata associated with key " "'%s'" % sliceable ) if not hasattr(sliceable, "dtype") or ( hasattr(sliceable, "dtype") and sliceable.dtype == "object" ): sliceable = tuple(sliceable) bool_mode = False int_mode = False for s in sliceable: if isinstance(s, (bool, np.bool_)): bool_mode = True elif isinstance(s, (slice, int, np.signedinteger)) or ( hasattr(s, "dtype") and s.dtype != bool ): int_mode = True else: raise TypeError( "Invalid type in iterable: %s, must be one" " of {bool, int, slice, np.signedinteger}" % s.__class__.__name__ ) if bool_mode and int_mode: raise TypeError("Cannot provide iterable of both bool and" " int.") sliceable = np.r_[sliceable] if sliceable.dtype == bool: if sliceable.size != len(self): raise ValueError( "Boolean array (%d) does not match length of" " sequence (%d)." % (sliceable.size, len(self)) ) (normalized,) = np.where(sliceable) else: normalized = np.bincount(sliceable) if np.any(normalized > 1): raise ValueError("Overlapping index regions are not allowed.") (normalized,) = np.where(normalized) if np.any(normalized != sliceable): raise ValueError("Index regions are out of order.") return normalized def _munge_to_self_type(self, other, method): if isinstance(other, Sequence): if type(other) is not type(self): raise TypeError( "Cannot use %s and %s together with `%s`" % (self.__class__.__name__, other.__class__.__name__, method) ) else: return other return self.__class__(other) def _munge_to_sequence(self, other, method): if isinstance(other, Sequence): if type(other) is not type(self): raise TypeError( "Cannot use %s and %s together with `%s`" % (self.__class__.__name__, other.__class__.__name__, method) ) else: return other # We don't use self.__class__ or self._constructor here because we want # to construct the most general type of Sequence object in order to # avoid validation errors. return Sequence(other) def _munge_to_bytestring(self, other, method): if isinstance(other, bytes): return other elif isinstance(other, str): return other.encode("ascii") else: return self._munge_to_sequence(other, method)._string @contextmanager def _byte_ownership(self): if not self._owns_bytes: self._bytes = self._bytes.copy() self._owns_bytes = True self._bytes.flags.writeable = True yield self._bytes.flags.writeable = False def _single_index_to_slice(start_index): end_index = None if start_index == -1 else start_index + 1 return slice(start_index, end_index) def _is_single_index(index): return isinstance(index, numbers.Integral) and not isinstance(index, bool) def _as_slice_if_single_index(indexable): if _is_single_index(indexable): return _single_index_to_slice(indexable) else: return indexable def _slices_from_iter(array, indexables): for i in indexables: if isinstance(i, slice): pass elif _is_single_index(i): i = _single_index_to_slice(i) else: raise IndexError( "Cannot slice sequence from iterable " "containing %r." % i ) yield array[i] scikit-bio-0.6.2/skbio/sequence/_substitution.py000066400000000000000000000620471464262511300217750ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np from skbio.util._decorator import classonlymethod from skbio.stats.distance import DissimilarityMatrix from skbio.sequence._alphabet import _alphabet_to_hashes class SubstitutionMatrix(DissimilarityMatrix): """Scoring matrix between characters in biological sequences. Parameters ---------- alphabet : iterable Characters that constitute the alphabet. scores : 2D array-like Scores of substitutions from one character (row, or axis=0) to another character (column, or axis=1). kwargs : dict Additional arguments for the ``DissimilarityMatrix`` constructor. See Also -------- skbio.stats.distance.DissimilarityMatrix Notes ----- A substitution matrix (a.k.a. replacement matrix) scores the substitution of each character by each other character and itself in an alphabet. The score usually represents the rate of substitution over evolutionary time in a biological sequence. A higher score usually indicates higher similarity in chemical properties or functional roles of two molecules, therefore a mutation from one to the other is easier. In sequence alignment, the score can measure the likelihood that a pair of aligned characters are homologous rather than by chance. This class provides a generalized interface for substitution matrices. The alphabet usually consists of individual characters, such as nucleotides or amino acids, but it can be generalized to any iterable of scalars (numbers, strings, etc.). Therefore, you may use this class to construct substitution matrices of complicated biological units (such as codons or non-canonical amino acids). The score matrix may be symmetric, as many existing matrices are, or asymmetric, where the score of one character substituted by another is unequal to the other way around. Only square matrices (i.e., numbers of rows and columns are equal) are supported. Multiple commonly used nucleotide and amino acid substitution matrices are pre-defined and can be referred to by name. Examples include NUC.4.4 for nucleotides, and variants of BLOSUM and PAM matrices for amino acids. ``SubstitutionMatrix`` is a subclass of ``DissimilarityMatrix``. Therefore, all attributes and methods of the latter also apply to the former. Examples -------- >>> from skbio import SubstitutionMatrix >>> mat = SubstitutionMatrix('ACGT', np.array([ ... [2, -1, -1, -1], ... [-1, 2, -1, -1], ... [-1, -1, 2, -1], ... [-1, -1, -1, 2]])) >>> mat.alphabet ('A', 'C', 'G', 'T') >>> mat.scores array([[ 2., -1., -1., -1.], [-1., 2., -1., -1.], [-1., -1., 2., -1.], [-1., -1., -1., 2.]]) >>> mat['A', 'T'] -1.0 >>> mat['G', 'G'] 2.0 >>> blosum62 = SubstitutionMatrix.by_name('BLOSUM62') """ @property def alphabet(self): """Alphabet of the substitution matrix. Each element (character) corresponds to one row/column in the matrix. Returns ------- tuple Alphabet of the substitution matrix. Notes ----- This is an alias of ``ids``. """ return self._ids @property def scores(self): """Matrix of substitution scores. Each value corresponds to the score of substituting the row character with the column character. Returns ------- 2D np.ndarray Matrix of substitution scores. Notes ----- This is an alias of ``data``. """ return self._data @property def is_ascii(self): """Whether alphabet consists of single ASCII characters. `True` if every character in the alphabet can be represented by a single ASCII character within code point range 0 to 255. Returns ------- bool Whether alphabet consists of single ASCII characters. """ return self._is_ascii def __init__(self, alphabet, scores, **kwargs): """Initialize a substitution matrix object.""" super().__init__(scores, alphabet, **kwargs) # `_char_map`: dictionary of characters to indices in the alphabet. # It is to enable efficient conversion of sequences into indices. # It is compatible with generalized sequences. self._char_map = {x: i for i, x in enumerate(alphabet)} # `_is_ascii`: whether alphabet can be encoded as ASCII characters. # `_char_hash`: hash table of ASCII code points to indices in the # alphabet. It is to enable efficient conversion of sequences into # into indices. It is optimized for ASCII sequences. try: hash_ = _alphabet_to_hashes(alphabet) except (TypeError, ValueError, UnicodeEncodeError): self._is_ascii = False else: self._is_ascii = True self._char_hash = hash_ def to_dict(self): """Create a 2D dictionary from the substitution matrix. Returns ------- dict of dict 2D dictionary constructed from the substitution matrix. """ return { id_: dict(zip(self._ids, row)) for id_, row in zip(self._ids, self._data) } @classonlymethod def from_dict(cls, dictionary): """Create a substitution matrix from a 2D dictionary. Parameters ---------- dictionary : dict of dict 2D dictionary of substitution scores from outer characters to inner characters. Returns ------- SubstitutionMatrix Substitution matrix constructed from the dictionary. Raises ------ ValueError If outer and inner characters are inconsistent. ValueError If scores are not numbers. Examples -------- >>> from skbio import SubstitutionMatrix >>> d = {'a': {'a': 1, 'b': 0, 'c': 0}, ... 'b': {'a': 0, 'b': 1, 'c': 0}, ... 'c': {'a': 0, 'b': 0, 'c': 1}} >>> mat = SubstitutionMatrix.from_dict(d) >>> mat.alphabet ('a', 'b', 'c') >>> mat.scores array([[ 1., 0., 0.], [ 0., 1., 0.], [ 0., 0., 1.]]) """ alphabet, rows = zip(*dictionary.items()) alphabet_set = set(alphabet) idmap = {x: i for i, x in enumerate(alphabet)} scores = np.zeros((n := len(alphabet), n)) for i, row in enumerate(rows): if set(row) != alphabet_set: raise ValueError( "The outer and inner layers of the dictionary" " must have the same set of keys." ) for key, value in row.items(): scores[i][idmap[key]] = float(value) return cls(alphabet, scores) @classonlymethod def identity(cls, alphabet, match, mismatch): """Create an identity substitution matrix. All matches and mismatches will have the identical scores, respectively, regardless of the character. Parameters ---------- alphabet : iterable Characters that constitute the alphabet. match : int or float Score assigned to all matches. mismatch : int or float Score assigned to all mismatches. Returns ------- SubstitutionMatrix Substitution matrix constructed from the alphabet and scores. Examples -------- >>> from skbio import SubstitutionMatrix >>> mat = SubstitutionMatrix.identity('ACGT', 1, -2) >>> mat.alphabet ('A', 'C', 'G', 'T') >>> mat.scores array([[ 1., -2., -2., -2.], [-2., 1., -2., -2.], [-2., -2., 1., -2.], [-2., -2., -2., 1.]]) """ alphabet = tuple(alphabet) scores = np.identity(len(alphabet)) * (match - mismatch) + mismatch return cls(alphabet, scores) @classonlymethod def by_name(cls, name): """Load a pre-defined substitution matrix by its name. Parameters ---------- name : str Name of the substitution matrix. Returns ------- SubstitutionMatrix Named substitution matrix. Raises ------ ValueError If named substitution matrix does not exist. See Also -------- get_names Notes ----- Names are case-insensitive. For instance, `BLOSUM62` and `blosum62` point to the same substitution matrix. Available substitution matrix names can be obtained by ``get_names``. Currently, the following names are supported: - `NUC.4.4` (a.k.a. DNAfull): A nucleotide substitution matrix covering all definite and degenerate nucleotides. - Point Accepted Mutation (PAM) [1]_: A set of amino acid substitution matrices, including `PAM30`, `PAM70` and `PAM250`. - BLOcks SUbstitution Matrix (BLOSUM) [2]_: A set of amino acid substitution matrices, including `BLOSUM45`, `BLOSUM50`, `BLOSUM62`, `BLOSUM80` and `BLOSUM90`. References ---------- .. [1] Dayhoff, M., Schwartz, R., & Orcutt, B. (1978). A model of evolutionary change in proteins. Atlas of protein sequence and structure, 5, 345-352. .. [2] Henikoff, S., & Henikoff, J. G. (1992). Amino acid substitution matrices from protein blocks. Proceedings of the National Academy of Sciences, 89(22), 10915-10919. Examples -------- >>> from skbio import SubstitutionMatrix >>> mat = SubstitutionMatrix.by_name('BLOSUM62') >>> len(mat.alphabet) 24 >>> mat['M', 'K'] -1.0 """ try: return _named_substitution_matrices[name] except KeyError: name_lower = name.lower() for key, value in _named_substitution_matrices.items(): if name_lower == key.lower(): return value raise ValueError(f'Substitution matrix "{name}" does not exist.') @classonlymethod def get_names(cls): """List names of pre-defined substitution matrices. Returns ------- list of str Names of pre-defined substitution matrices. See Also -------- by_name """ return list(_named_substitution_matrices.keys()) def _matrix_to_vector(mat): """Flatten a square matrix to a vector of the upper triangle and diagonal.""" assert len(mat.shape) == 2 assert mat.shape[0] == mat.shape[1] return mat[np.triu_indices(len(mat))] def _vector_to_matrix(vec): """Revert a vector representing a flattened matrix to square form.""" assert len(vec.shape) == 1 # a square matrix of shape (n, n) will have n * (n + 1) / 2 elements in the # flattened vector; the following code reverses this equation to obtain the # original shape of the matrix n = (np.sqrt(1 + 8 * len(vec)) - 1) / 2 assert n == (n := int(n)) mat = np.zeros((n, n)) mat[np.triu_indices(n)] = vec return mat + np.triu(mat, k=1).T # Defined according to the matrices hosted at the NCBI FTP server: # https://ftp.ncbi.nlm.nih.gov/blast/matrices/ # fmt: off _named_substitution_matrices = { # NUC.4.4, a.k.a. DNAfull "NUC.4.4": SubstitutionMatrix( "ATGCSWRYKMBVHDN", _vector_to_matrix( np.array( [5, -4, -4, -4, -4, 1, 1, -4, -4, 1, -4, -1, -1, -1, -2, 5, -4, -4, -4, 1, -4, 1, 1, -4, -1, -4, -1, -1, -2, 5, -4, 1, -4, 1, -4, 1, -4, -1, -1, -4, -1, -2, 5, 1, -4, -4, 1, -4, 1, -1, -1, -1, -4, -2, -1, -4, -2, -2, -2, -2, -1, -1, -3, -3, -1, -1, -2, -2, -2, -2, -3, -3, -1, -1, -1, -1, -4, -2, -2, -3, -1, -3, -1, -1, -1, -2, -2, -1, -3, -1, -3, -1, -1, -4, -1, -3, -3, -1, -1, -1, -3, -1, -1, -3, -1, -1, -2, -2, -2, -1, -1, -2, -2, -1, -1, -2, -1, -1, -1, -1] ) ), validate=False, ), # Point Accepted Mutation (PAM) "PAM30": SubstitutionMatrix( "ARNDCQEGHILKMFPSTWYVBZX*", _vector_to_matrix( np.array( [ 6, -7, -4, -3, -6, -4, -2, -2, -7, -5, -6, -7, -5, -8, -2, 0, -1, -13, -8, -2, -3, -3, -3, -17, 8, -6, -10, -8, -2, -9, -9, -2, -5, -8, 0, -4, -9, -4, -3, -6, -2, -10, -8, -7, -4, -6, -17, 8, 2, -11, -3, -2, -3, 0, -5, -7, -1, -9, -9, -6, 0, -2, -8, -4, -8, 6, -3, -3, -17, 8, -14, -2, 2, -3, -4, -7, -12, -4, -11, -15, -8, -4, -5, -15, -11, -8, 6, 1, -5, -17, 10, -14, -14, -9, -7, -6, -15, -14, -13, -13, -8, -3, -8, -15, -4, -6, -12, -14, -9, -17, 8, 1, -7, 1, -8, -5, -3, -4, -13, -3, -5, -5, -13, -12, -7, -3, 6, -5, -17, 8, -4, -5, -5, -9, -4, -7, -14, -5, -4, -6, -17, -8, -6, 1, 6, -5, -17, 6, -9, -11, -10, -7, -8, -9, -6, -2, -6, -15, -14, -5, -3, -5, -5, -17, 9, -9, -6, -6, -10, -6, -4, -6, -7, -7, -3, -6, -1, -1, -5, -17, 8, -1, -6, -1, -2, -8, -7, -2, -14, -6, 2, -6, -6, -5, -17, 7, -8, 1, -3, -7, -8, -7, -6, -7, -2, -9, -7, -6, -17, 7, -2, -14, -6, -4, -3, -12, -9, -9, -2, -4, -5, -17, 11, -4, -8, -5, -4, -13, -11, -1, -10, -5, -5, -17, 9, -10, -6, -9, -4, 2, -8, -10, -13, -8, -17, 8, -2, -4, -14, -13, -6, -7, -4, -5, -17, 6, 0, -5, -7, -6, -1, -5, -3, -17, 7, -13, -6, -3, -3, -6, -4, -17, 13, -5, -15, -10, -14, -11, -17, 10, -7, -6, -9, -7, -17, 7, -8, -6, -5, -17, 6, 0, -5, -17, 6, -5, -17, -5, -17, 1] ) ), validate=False, ), "PAM70": SubstitutionMatrix( "ARNDCQEGHILKMFPSTWYVBZX*", _vector_to_matrix( np.array( [5, -4, -2, -1, -4, -2, -1, 0, -4, -2, -4, -4, -3, -6, 0, 1, 1, -9, -5, -1, -1, -1, -2, -11, 8, -3, -6, -5, 0, -5, -6, 0, -3, -6, 2, -2, -7, -2, -1, -4, 0, -7, -5, -4, -2, -3, -11, 6, 3, -7, -1, 0, -1, 1, -3, -5, 0, -5, -6, -3, 1, 0, -6, -3, -5, 5, -1, -2, -11, 6, -9, 0, 3, -1, -1, -5, -8, -2, -7, -10, -4, -1, -2, -10, -7, -5, 5, 2, -3, -11, 9, -9, -9, -6, -5, -4, -10, -9, -9, -8, -5, -1, -5, -11, -2, -4, -8, -9, -6, -11, 7, 2, -4, 2, -5, -3, -1, -2, -9, -1, -3, -3, -8, -8, -4, -1, 5, -2, -11, 6, -2, -2, -4, -6, -2, -4, -9, -3, -2, -3, -11, -6, -4, 2, 5, -3, -11, 6, -6, -6, -7, -5, -6, -7, -3, 0, -3, -10, -9, -3, -1, -3, -3, -11, 8, -6, -4, -3, -6, -4, -2, -3, -4, -5, -1, -4, 0, 1, -3, -11, 7, 1, -4, 1, 0, -5, -4, -1, -9, -4, 3, -4, -4, -3, -11, 6, -5, 2, -1, -5, -6, -4, -4, -4, 0, -6, -4, -4, -11, 6, 0, -9, -4, -2, -1, -7, -7, -6, -1, -2, -3, -11, 10, -2, -5, -3, -2, -8, -7, 0, -6, -3, -3, -11, 8, -7, -4, -6, -2, 4, -5, -7, -9, -5, -11, 7, 0, -2, -9, -9, -3, -4, -2, -3, -11, 5, 2, -3, -5, -3, 0, -2, -1, -11, 6, -8, -4, -1, -1, -3, -2, -11, 13, -3, -10, -7, -10, -7, -11, 9, -5, -4, -7, -5, -11, 6, -5, -4, -2, -11, 5, 1, -2, -11, 5, -3, -11, -3, -11, 1] ) ), validate=False, ), "PAM250": SubstitutionMatrix( "ARNDCQEGHILKMFPSTWYVBZX*", _vector_to_matrix( np.array( [2, -2, 0, 0, -2, 0, 0, 1, -1, -1, -2, -1, -1, -3, 1, 1, 1, -6, -3, 0, 0, 0, 0, -8, 6, 0, -1, -4, 1, -1, -3, 2, -2, -3, 3, 0, -4, 0, 0, -1, 2, -4, -2, -1, 0, -1, -8, 2, 2, -4, 1, 1, 0, 2, -2, -3, 1, -2, -3, 0, 1, 0, -4, -2, -2, 2, 1, 0, -8, 4, -5, 2, 3, 1, 1, -2, -4, 0, -3, -6, -1, 0, 0, -7, -4, -2, 3, 3, -1, -8, 12, -5, -5, -3, -3, -2, -6, -5, -5, -4, -3, 0, -2, -8, 0, -2, -4, -5, -3, -8, 4, 2, -1, 3, -2, -2, 1, -1, -5, 0, -1, -1, -5, -4, -2, 1, 3, -1, -8, 4, 0, 1, -2, -3, 0, -2, -5, -1, 0, 0, -7, -4, -2, 3, 3, -1, -8, 5, -2, -3, -4, -2, -3, -5, 0, 1, 0, -7, -5, -1, 0, 0, -1, -8, 6, -2, -2, 0, -2, -2, 0, -1, -1, -3, 0, -2, 1, 2, -1, -8, 5, 2, -2, 2, 1, -2, -1, 0, -5, -1, 4, -2, -2, -1, -8, 6, -3, 4, 2, -3, -3, -2, -2, -1, 2, -3, -3, -1, -8, 5, 0, -5, -1, 0, 0, -3, -4, -2, 1, 0, -1, -8, 6, 0, -2, -2, -1, -4, -2, 2, -2, -2, -1, -8, 9, -5, -3, -3, 0, 7, -1, -4, -5, -2, -8, 6, 1, 0, -6, -5, -1, -1, 0, -1, -8, 2, 1, -2, -3, -1, 0, 0, 0, -8, 3, -5, -3, 0, 0, -1, 0, -8, 17, 0, -6, -5, -6, -4, -8, 10, -2, -3, -4, -2, -8, 4, -2, -2, -1, -8, 3, 2, -1, -8, 3, -1, -8, -1, -8, 1] ) ), validate=False, ), # BLOcks SUbstitution Matrix (BLOSUM) "BLOSUM45": SubstitutionMatrix( "ARNDCQEGHILKMFPSTWYVBZX*", _vector_to_matrix( np.array( [ 5, -2, -1, -2, -1, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -2, -2, 0, -1, -1, 0, -5, 7, 0, -1, -3, 1, 0, -2, 0, -3, -2, 3, -1, -2, -2, -1, -1, -2, -1, -2, -1, 0, -1, -5, 6, 2, -2, 0, 0, 0, 1, -2, -3, 0, -2, -2, -2, 1, 0, -4, -2, -3, 4, 0, -1, -5, 7, -3, 0, 2, -1, 0, -4, -3, 0, -3, -4, -1, 0, -1, -4, -2, -3, 5, 1, -1, -5, 12, -3, -3, -3, -3, -3, -2, -3, -2, -2, -4, -1, -1, -5, -3, -1, -2, -3, -2, -5, 6, 2, -2, 1, -2, -2, 1, 0, -4, -1, 0, -1, -2, -1, -3, 0, 4, -1, -5, 6, -2, 0, -3, -2, 1, -2, -3, 0, 0, -1, -3, -2, -3, 1, 4, -1, -5, 7, -2, -4, -3, -2, -2, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1, -5, 10, -3, -2, -1, 0, -2, -2, -1, -2, -3, 2, -3, 0, 0, -1, -5, 5, 2, -3, 2, 0, -2, -2, -1, -2, 0, 3, -3, -3, -1, -5, 5, -3, 2, 1, -3, -3, -1, -2, 0, 1, -3, -2, -1, -5, 5, -1, -3, -1, -1, -1, -2, -1, -2, 0, 1, -1, -5, 6, 0, -2, -2, -1, -2, 0, 1, -2, -1, -1, -5, 8, -3, -2, -1, 1, 3, 0, -3, -3, -1, -5, 9, -1, -1, -3, -3, -3, -2, -1, -1, -5, 4, 2, -4, -2, -1, 0, 0, 0, -5, 5, -3, -1, 0, 0, -1, 0, -5, 15, 3, -3, -4, -2, -2, -5, 8, -1, -2, -2, -1, -5, 5, -3, -3, -1, -5, 4, 2, -1, -5, 4, -1, -5, -1, -5, 1] ) ), validate=False, ), "BLOSUM50": SubstitutionMatrix( "ARNDCQEGHILKMFPSTWYVBZX*", _vector_to_matrix( np.array( [5, -2, -1, -2, -1, -1, -1, 0, -2, -1, -2, -1, -1, -3, -1, 1, 0, -3, -2, 0, -2, -1, -1, -5, 7, -1, -2, -4, 1, 0, -3, 0, -4, -3, 3, -2, -3, -3, -1, -1, -3, -1, -3, -1, 0, -1, -5, 7, 2, -2, 0, 0, 0, 1, -3, -4, 0, -2, -4, -2, 1, 0, -4, -2, -3, 4, 0, -1, -5, 8, -4, 0, 2, -1, -1, -4, -4, -1, -4, -5, -1, 0, -1, -5, -3, -4, 5, 1, -1, -5, 13, -3, -3, -3, -3, -2, -2, -3, -2, -2, -4, -1, -1, -5, -3, -1, -3, -3, -2, -5, 7, 2, -2, 1, -3, -2, 2, 0, -4, -1, 0, -1, -1, -1, -3, 0, 4, -1, -5, 6, -3, 0, -4, -3, 1, -2, -3, -1, -1, -1, -3, -2, -3, 1, 5, -1, -5, 8, -2, -4, -4, -2, -3, -4, -2, 0, -2, -3, -3, -4, -1, -2, -2, -5, 10, -4, -3, 0, -1, -1, -2, -1, -2, -3, 2, -4, 0, 0, -1, -5, 5, 2, -3, 2, 0, -3, -3, -1, -3, -1, 4, -4, -3, -1, -5, 5, -3, 3, 1, -4, -3, -1, -2, -1, 1, -4, -3, -1, -5, 6, -2, -4, -1, 0, -1, -3, -2, -3, 0, 1, -1, -5, 7, 0, -3, -2, -1, -1, 0, 1, -3, -1, -1, -5, 8, -4, -3, -2, 1, 4, -1, -4, -4, -2, -5, 10, -1, -1, -4, -3, -3, -2, -1, -2, -5, 5, 2, -4, -2, -2, 0, 0, -1, -5, 5, -3, -2, 0, 0, -1, 0, -5, 15, 2, -3, -5, -2, -3, -5, 8, -1, -3, -2, -1, -5, 5, -4, -3, -1, -5, 5, 2, -1, -5, 5, -1, -5, -1, -5, 1] ) ), validate=False, ), "BLOSUM62": SubstitutionMatrix( "ARNDCQEGHILKMFPSTWYVBZX*", _vector_to_matrix( np.array( [4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0, -4, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1, -4, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1, -4, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1, -4, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2, -4, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1, -4, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1, -4, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1, -4, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1, -4, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1, -4, 5, -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1, -4, 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1, -4, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2, -4, 4, 1, -3, -2, -2, 0, 0, 0, -4, 5, -2, -2, 0, -1, -1, 0, -4, 11, 2, -3, -4, -3, -2, -4, 7, -1, -3, -2, -1, -4, 4, -3, -2, -1, -4, 4, 1, -1, -4, 4, -1, -4, -1, -4, 1] ) ), validate=False, ), "BLOSUM80": SubstitutionMatrix( "ARNDCQEGHILKMFPSTWYVBZX*", _vector_to_matrix( np.array( [7, -3, -3, -3, -1, -2, -2, 0, -3, -3, -3, -1, -2, -4, -1, 2, 0, -5, -4, -1, -3, -2, -1, -8, 9, -1, -3, -6, 1, -1, -4, 0, -5, -4, 3, -3, -5, -3, -2, -2, -5, -4, -4, -2, 0, -2, -8, 9, 2, -5, 0, -1, -1, 1, -6, -6, 0, -4, -6, -4, 1, 0, -7, -4, -5, 5, -1, -2, -8, 10, -7, -1, 2, -3, -2, -7, -7, -2, -6, -6, -3, -1, -2, -8, -6, -6, 6, 1, -3, -8, 13, -5, -7, -6, -7, -2, -3, -6, -3, -4, -6, -2, -2, -5, -5, -2, -6, -7, -4, -8, 9, 3, -4, 1, -5, -4, 2, -1, -5, -3, -1, -1, -4, -3, -4, -1, 5, -2, -8, 8, -4, 0, -6, -6, 1, -4, -6, -2, -1, -2, -6, -5, -4, 1, 6, -2, -8, 9, -4, -7, -7, -3, -5, -6, -5, -1, -3, -6, -6, -6, -2, -4, -3, -8, 12, -6, -5, -1, -4, -2, -4, -2, -3, -4, 3, -5, -1, 0, -2, -8, 7, 2, -5, 2, -1, -5, -4, -2, -5, -3, 4, -6, -6, -2, -8, 6, -4, 3, 0, -5, -4, -3, -4, -2, 1, -7, -5, -2, -8, 8, -3, -5, -2, -1, -1, -6, -4, -4, -1, 1, -2, -8, 9, 0, -4, -3, -1, -3, -3, 1, -5, -3, -2, -8, 10, -6, -4, -4, 0, 4, -2, -6, -6, -3, -8, 12, -2, -3, -7, -6, -4, -4, -2, -3, -8, 7, 2, -6, -3, -3, 0, -1, -1, -8, 8, -5, -3, 0, -1, -2, -1, -8, 16, 3, -5, -8, -5, -5, -8, 11, -3, -5, -4, -3, -8, 7, -6, -4, -2, -8, 6, 0, -3, -8, 6, -1, -8, -2, -8, 1] ) ), validate=False, ), "BLOSUM90": SubstitutionMatrix( "ARNDCQEGHILKMFPSTWYVBZX*", _vector_to_matrix( np.array( [ 5, -2, -2, -3, -1, -1, -1, 0, -2, -2, -2, -1, -2, -3, -1, 1, 0, -4, -3, -1, -2, -1, -1, -6, 6, -1, -3, -5, 1, -1, -3, 0, -4, -3, 2, -2, -4, -3, -1, -2, -4, -3, -3, -2, 0, -2, -6, 7, 1, -4, 0, -1, -1, 0, -4, -4, 0, -3, -4, -3, 0, 0, -5, -3, -4, 4, -1, -2, -6, 7, -5, -1, 1, -2, -2, -5, -5, -1, -4, -5, -3, -1, -2, -6, -4, -5, 4, 0, -2, -6, 9, -4, -6, -4, -5, -2, -2, -4, -2, -3, -4, -2, -2, -4, -4, -2, -4, -5, -3, -6, 7, 2, -3, 1, -4, -3, 1, 0, -4, -2, -1, -1, -3, -3, -3, -1, 4, -1, -6, 6, -3, -1, -4, -4, 0, -3, -5, -2, -1, -1, -5, -4, -3, 0, 4, -2, -6, 6, -3, -5, -5, -2, -4, -5, -3, -1, -3, -4, -5, -5, -2, -3, -2, -6, 8, -4, -4, -1, -3, -2, -3, -2, -2, -3, 1, -4, -1, 0, -2, -6, 5, 1, -4, 1, -1, -4, -3, -1, -4, -2, 3, -5, -4, -2, -6, 5, -3, 2, 0, -4, -3, -2, -3, -2, 0, -5, -4, -2, -6, 6, -2, -4, -2, -1, -1, -5, -3, -3, -1, 1, -1, -6, 7, -1, -3, -2, -1, -2, -2, 0, -4, -2, -1, -6, 7, -4, -3, -3, 0, 3, -2, -4, -4, -2, -6, 8, -2, -2, -5, -4, -3, -3, -2, -2, -6, 5, 1, -4, -3, -2, 0, -1, -1, -6, 6, -4, -2, -1, -1, -1, -1, -6, 11, 2, -3, -6, -4, -3, -6, 8, -3, -4, -3, -2, -6, 5, -4, -3, -2, -6, 4, 0, -2, -6, 4, -1, -6, -2, -6, 1] ) ), validate=False, ), } # fmt: on scikit-bio-0.6.2/skbio/sequence/distance.py000066400000000000000000000120541464262511300206450ustar00rootroot00000000000000"""Sequence distance metrics (:mod:`skbio.sequence.distance`) ========================================================== .. currentmodule:: skbio.sequence.distance This module contains functions for computing distances between scikit-bio ``Sequence`` objects. These functions can be used directly or supplied to other parts of the scikit-bio API that accept a sequence distance metric as input, such as :meth:`skbio.sequence.Sequence.distance` and :meth:`skbio.stats.distance.DistanceMatrix.from_iterable`. Functions --------- .. autosummary:: :toctree: hamming kmer_distance """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np import scipy.spatial.distance import skbio def hamming(seq1, seq2): """Compute Hamming distance between two sequences. The Hamming distance between two equal-length sequences is the proportion of differing characters. Parameters ---------- seq1, seq2 : Sequence Sequences to compute Hamming distance between. Returns ------- float Hamming distance between `seq1` and `seq2`. Raises ------ TypeError If `seq1` and `seq2` are not ``Sequence`` instances. TypeError If `seq1` and `seq2` are not the same type. ValueError If `seq1` and `seq2` are not the same length. See Also -------- scipy.spatial.distance.hamming Notes ----- ``np.nan`` will be returned if the sequences do not contain any characters. This function does not make assumptions about the sequence alphabet in use. Each sequence object's underlying sequence of characters are used to compute Hamming distance. Characters that may be considered equivalent in certain contexts (e.g., `-` and `.` as gap characters) are treated as distinct characters when computing Hamming distance. Examples -------- >>> from skbio import Sequence >>> from skbio.sequence.distance import hamming >>> seq1 = Sequence('AGGGTA') >>> seq2 = Sequence('CGTTTA') >>> hamming(seq1, seq2) 0.5 """ _check_seqs(seq1, seq2) # Hamming requires equal length sequences. We are checking this here # because the error you would get otherwise is cryptic. if len(seq1) != len(seq2): raise ValueError( "Hamming distance can only be computed between sequences of equal " "length (%d != %d)" % (len(seq1), len(seq2)) ) # scipy throws a RuntimeWarning when computing Hamming distance on length 0 # input. if not seq1: distance = np.nan else: distance = scipy.spatial.distance.hamming(seq1.values, seq2.values) return float(distance) def kmer_distance(seq1, seq2, k, overlap=True): """Compute the kmer distance between a pair of sequences. The kmer distance between two sequences is the fraction of kmers that are unique to either sequence. Parameters ---------- seq1, seq2 : Sequence Sequences to compute kmer distance between. k : int The kmer length. overlap : bool, optional Defines whether the kmers should be overlapping or not. Returns ------- float kmer distance between `seq1` and `seq2`. Raises ------ ValueError If `k` is less than 1. TypeError If `seq1` and `seq2` are not ``Sequence`` instances. TypeError If `seq1` and `seq2` are not the same type. Notes ----- kmer counts are not incorporated in this distance metric. ``np.nan`` will be returned if there are no kmers defined for the sequences. Examples -------- >>> from skbio import Sequence >>> seq1 = Sequence('ATCGGCGAT') >>> seq2 = Sequence('GCAGATGTG') >>> kmer_distance(seq1, seq2, 3) # doctest: +ELLIPSIS 0.9230769230... """ _check_seqs(seq1, seq2) seq1_kmers = set(map(str, seq1.iter_kmers(k, overlap=overlap))) seq2_kmers = set(map(str, seq2.iter_kmers(k, overlap=overlap))) all_kmers = seq1_kmers | seq2_kmers if not all_kmers: return np.nan shared_kmers = seq1_kmers & seq2_kmers number_unique = len(all_kmers) - len(shared_kmers) fraction_unique = number_unique / len(all_kmers) return fraction_unique def _check_seqs(seq1, seq2): # Asserts both sequences are skbio.sequence objects for seq in seq1, seq2: if not isinstance(seq, skbio.Sequence): raise TypeError( "`seq1` and `seq2` must be Sequence instances, not %r" % type(seq).__name__ ) # Asserts sequences have the same type if type(seq1) is not type(seq2): raise TypeError( "Sequences must have matching type. Type %r does not match type %r" % (type(seq1).__name__, type(seq2).__name__) ) scikit-bio-0.6.2/skbio/sequence/tests/000077500000000000000000000000001464262511300176415ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/sequence/tests/__init__.py000066400000000000000000000005411464262511300217520ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- scikit-bio-0.6.2/skbio/sequence/tests/test_alphabet.py000066400000000000000000000211441464262511300230340ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main import numpy as np import numpy.testing as npt from skbio.sequence._alphabet import ( _encode_alphabet, _alphabet_to_hashes, _indices_in_alphabet, _indices_in_alphabet_ascii, _indices_in_observed) class TestAlphabet(TestCase): def test_encode_alphabet(self): # ascii characters alpha = 'ACGT' exp = np.array([65, 67, 71, 84], dtype=np.uint8) npt.assert_equal(_encode_alphabet(alpha), exp) npt.assert_equal(_encode_alphabet(list(alpha)), exp) npt.assert_equal(_encode_alphabet(tuple(alpha)), exp) npt.assert_equal(_encode_alphabet(np.array(list(alpha))), exp) npt.assert_equal(_encode_alphabet(np.char.encode(list(alpha))), exp) # ascii code points codes = list(map(ord, alpha)) npt.assert_equal(_encode_alphabet(codes), exp) npt.assert_equal(_encode_alphabet(np.array(codes)), exp) npt.assert_equal(_encode_alphabet(np.array(codes).astype( np.uint8)), exp) # wrong data types with self.assertRaises(TypeError): _encode_alphabet(123) with self.assertRaises(TypeError): _encode_alphabet(set(alpha)) with self.assertRaises(TypeError): _encode_alphabet([1.0, 1.5, 2.0]) with self.assertRaises(TypeError): _encode_alphabet([['a', 'b'], ['c', 'd']]) # not single characters with self.assertRaises(ValueError): _encode_alphabet(['this', 'is', 'not']) # exceed ascii range with self.assertRaises(ValueError): _encode_alphabet([100, 200, 300]) with self.assertRaises(UnicodeEncodeError): _encode_alphabet(chr(1234) + chr(5678)) with self.assertRaises(UnicodeEncodeError): _encode_alphabet([chr(1234), chr(5678)]) def test_alphabet_to_hashes(self): alpha = 'ATGCSWRYKMBVHDN' obs = _alphabet_to_hashes(alpha) exp = np.array([ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 10, 3, 13, 255, 255, 2, 12, 255, 255, 8, 255, 9, 14, 255, 255, 255, 6, 4, 1, 255, 11, 5, 255, 7, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255], dtype=np.uint8) npt.assert_equal(obs, exp) def test_indices_in_alphabet(self): seq = 'GAGCTCA' # DNA sequence without degenerate characters alpha = 'ACGTN' # DNA alphabet exp = np.array([2, 0, 2, 1, 3, 1, 0]) # indices of characters # either alphabet or sequence may be string, list/tuple, or iterator npt.assert_equal(_indices_in_alphabet(seq, alpha), exp) npt.assert_equal(_indices_in_alphabet(list(seq), list(alpha)), exp) npt.assert_equal(_indices_in_alphabet(iter(seq), iter(alpha)), exp) # alphabet is a dictionary of character : index (most performant) npt.assert_equal(_indices_in_alphabet(seq, dict(zip(alpha, range(len( alpha))))), exp) # one character is absent from alphabet seq = 'GAGRCTCA' msg = ('One or multiple characters in the sequence are absent from ' 'the alphabet.') with self.assertRaisesRegex(ValueError, msg): _indices_in_alphabet(seq, alpha) # replace absent character with wildcard obs = _indices_in_alphabet(seq, alpha, wildcard='N') exp = np.array([2, 0, 2, 4, 1, 3, 1, 0]) npt.assert_equal(obs, exp) # wildcard not in alphabet msg = 'Wildcard character "X" is not in the alphabet.' with self.assertRaisesRegex(ValueError, msg): _indices_in_alphabet(seq, alpha, wildcard='X') # amino acid seq = 'MEEPQSDPSV' alpha = 'ARNDCQEGHILKMFPSTWYVBZX' exp = np.array([12, 6, 6, 14, 5, 15, 3, 14, 15, 19]) npt.assert_equal(_indices_in_alphabet(seq, alpha), exp) # natural language seq = 'The quick brown fox jumps over the lazy dog'.split() alpha = ['dog', 'fox', 'jumps', 'the'] obs = _indices_in_alphabet(seq, alpha, wildcard='the') exp = np.array([3, 3, 3, 1, 2, 3, 3, 3, 0]) npt.assert_equal(obs, exp) # empty sequence self.assertEqual(_indices_in_alphabet('', alpha).size, 0) def test_indices_in_alphabet_ascii(self): # convert a sequence into a vector of code points seq = 'GAGCTCA' seq = np.frombuffer(seq.encode('ascii'), dtype=np.uint8) # convert an alphabet into a vector of indices alpha = 'ACGTN' idx = np.frombuffer(alpha.encode('ascii'), dtype=np.uint8) alpha = np.full(128, 255, dtype=np.uint8) alpha[idx] = np.arange(idx.size) # a typical case obs = _indices_in_alphabet_ascii(seq, alpha) exp = np.array([2, 0, 2, 1, 3, 1, 0]) npt.assert_equal(obs, exp) self.assertTrue(obs.dtype.type is np.uint8) # one character is absent seq = np.insert(seq, 3, ord('R')) msg = ('One or multiple characters in the sequence are absent from ' 'the alphabet.') with self.assertRaisesRegex(ValueError, msg): _indices_in_alphabet_ascii(seq, alpha) # replace absent character obs = _indices_in_alphabet_ascii(seq, alpha, wildcard=ord('N')) exp = np.array([2, 0, 2, 4, 1, 3, 1, 0]) npt.assert_equal(obs, exp) self.assertTrue(obs.dtype.type is np.uint8) # wildcard not in alphabet msg = 'Wildcard character "&" is not in the alphabet.' with self.assertRaisesRegex(ValueError, msg): _indices_in_alphabet_ascii(seq, alpha, wildcard=38) def test_indices_in_observed(self): # data from human TP53 protein (NP_000537.3) seqs = ('MEEPQSDPSVEPPLSQETFSDLWKLLPE', 'NNVLSPLPSQAMDDLMLSP', 'DDIEQWFTEDPGPDEAPRMPEAA') obs_idx, obs_alp = _indices_in_observed(seqs) exp_alp = np.array(tuple('ADEFGIKLMNPQRSTVW')) exp_idx = ( np.array([8, 2, 2, 10, 11, 13, 1, 10, 13, 15, 2, 10, 10, 7, 13, 11, 2, 14, 3, 13, 1, 7, 16, 6, 7, 7, 10, 2]), np.array([9, 9, 15, 7, 13, 10, 7, 10, 13, 11, 0, 8, 1, 1, 7, 8, 7, 13, 10]), np.array([1, 1, 5, 2, 11, 16, 3, 14, 2, 1, 10, 4, 10, 1, 2, 0, 10, 12, 8, 10, 2, 0, 0])) npt.assert_equal(obs_alp, exp_alp) for obs, exp in zip(obs_idx, exp_idx): npt.assert_equal(obs, exp) # reconstruct original sequences for idx, seq in zip(obs_idx, seqs): self.assertEqual(''.join(obs_alp[idx]), seq) # sequences are numbers seqs = ([1, 4, 6, 7, 8], [3, 3, 4, 1, 0], [5, 2, 5, 8, 0]) obs_idx, obs_alp = _indices_in_observed(seqs) npt.assert_equal(obs_alp, np.arange(9)) for idx, seq in zip(obs_idx, seqs): npt.assert_equal(obs_alp[idx], np.array(seq)) # sequences are natural language seqs = (['this', 'is', 'a', 'cat'], ['that', 'is', 'a', 'dog'], ['cat', 'is', 'not', 'dog']) obs_idx, obs_alp = _indices_in_observed(seqs) exp_alp = np.unique(np.concatenate(seqs)) npt.assert_equal(obs_alp, exp_alp) for idx, seq in zip(obs_idx, seqs): npt.assert_equal(obs_alp[idx], np.array(seq)) # sequences are individual characters obs_idx, obs_alp = _indices_in_observed(['hello']) npt.assert_equal(obs_alp, np.array(['e', 'h', 'l', 'o'])) self.assertEqual(''.join(obs_alp[np.concatenate(obs_idx)]), 'hello') # empty sequence obs_idx, obs_alp = _indices_in_observed([[]]) self.assertEqual(obs_alp.size, 0) self.assertEqual(len(obs_idx), 1) self.assertEqual(obs_idx[0].size, 0) if __name__ == "__main__": main() scikit-bio-0.6.2/skbio/sequence/tests/test_distance.py000066400000000000000000000160571464262511300230550ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import itertools import unittest import numpy as np import numpy.testing as npt from skbio import Sequence, DNA from skbio.sequence.distance import hamming, kmer_distance class TestHamming(unittest.TestCase): def test_non_sequence(self): seq1 = Sequence('abc') seq2 = 'abc' with self.assertRaisesRegex(TypeError, r'seq1.*seq2.*Sequence.*str'): hamming(seq1, seq2) with self.assertRaisesRegex(TypeError, r'seq1.*seq2.*Sequence.*str'): hamming(seq2, seq1) def test_type_mismatch(self): seq1 = Sequence('ABC') seq2 = DNA('ACG') with self.assertRaisesRegex(TypeError, r'Sequence.*does not match.*DNA'): hamming(seq1, seq2) def test_length_mismatch(self): seq1 = Sequence('ABC') seq2 = Sequence('ABCD') with self.assertRaisesRegex(ValueError, r'equal length.*3 != 4'): hamming(seq1, seq2) def test_return_type(self): seq1 = Sequence('ABC') seq2 = Sequence('ABC') distance = hamming(seq1, seq2) self.assertIsInstance(distance, float) self.assertEqual(distance, 0.0) def test_minimum_distance(self): seq1 = Sequence('ABC') seq2 = Sequence('ABC') distance = hamming(seq1, seq2) self.assertEqual(distance, 0.0) def test_mid_range_distance(self): seq1 = Sequence("abcdefgh") seq2 = Sequence("1b23ef45") distance = hamming(seq1, seq2) self.assertEqual(distance, 5.0/8.0) def test_maximum_distance(self): seq1 = Sequence('ABC') seq2 = Sequence('CAB') distance = hamming(seq1, seq2) self.assertEqual(distance, 1.0) def test_empty_sequences(self): seq1 = Sequence('') seq2 = Sequence('') distance = hamming(seq1, seq2) npt.assert_equal(distance, np.nan) def test_single_character_sequences(self): seq1 = Sequence('a') seq2 = Sequence('b') self.assertEqual(hamming(seq1, seq1), 0.0) self.assertEqual(hamming(seq1, seq2), 1.0) def test_sequence_subclass(self): seq1 = DNA('ACG-T') seq2 = DNA('ACCTT') distance = hamming(seq1, seq2) self.assertEqual(distance, 2.0/5.0) def test_sequences_with_metadata(self): # test for #1254 seqs1 = [ Sequence("ACGT"), Sequence("ACGT", metadata={'id': 'abc'}), Sequence("ACGT", positional_metadata={'qual': range(4)}) ] seqs2 = [ Sequence("AAAA"), Sequence("AAAA", metadata={'id': 'def'}), Sequence("AAAA", positional_metadata={'qual': range(4, 8)}) ] for seqs in seqs1, seqs2: for seq1, seq2 in itertools.product(seqs, repeat=2): distance = hamming(seq1, seq2) self.assertEqual(distance, 0.0) for seq1, seq2 in itertools.product(seqs1, seqs2): distance = hamming(seq1, seq2) self.assertEqual(distance, 0.75) class TestKmerDistance(unittest.TestCase): def test_default_kwargs(self): seq1 = Sequence('AACCTAGCAATGGAT') seq2 = Sequence('CAGGCAGTTCTCACC') obs = kmer_distance(seq1, seq2, 3) exp = 0.9130434782608695 self.assertAlmostEqual(obs, exp) def test_nondefault_k(self): seq1 = Sequence('GCTTATGGAGAGAGA') seq2 = Sequence('CTCGAACTCCAGCCA') obs = kmer_distance(seq1, seq2, 2) exp = 0.7333333333333333 self.assertAlmostEqual(obs, exp) seq1 = Sequence('EADDECAEECDEACD') seq2 = Sequence('DCBCBADADABCCDA') obs = kmer_distance(seq1, seq2, 1) exp = 0.4 self.assertAlmostEqual(obs, exp) def test_overlap_false(self): seq1 = Sequence('CGTTATGTCTGTGAT') seq2 = Sequence('CTGAATCGGTAGTGT') obs = kmer_distance(seq1, seq2, 3, overlap=False) exp = 0.8888888888888888 self.assertAlmostEqual(obs, exp) def test_entirely_different_sequences(self): seq1 = Sequence('CCGTGGTCGTATAAG') seq2 = Sequence('CGCCTTCCACATCAG') obs = kmer_distance(seq1, seq2, 3) exp = 1.0 self.assertEqual(obs, exp) def test_same_sequence(self): seq1 = Sequence('CTGCGACAGTTGGTA') seq2 = Sequence('CTGCGACAGTTGGTA') obs = kmer_distance(seq1, seq2, 3) exp = 0.0 self.assertEqual(obs, exp) def test_differing_length_seqs(self): seq1 = Sequence('AGAAATCTGAGCAAGGATCA') seq2 = Sequence('TTAGTGCGTAATCCG') obs = kmer_distance(seq1, seq2, 3) exp = 0.9285714285714286 self.assertAlmostEqual(obs, exp) def test_with_sequence_subclass(self): seq1 = DNA('GATGGTACTGTAGGT') seq2 = DNA('AGGGTGAAGGTATCA') obs = kmer_distance(seq1, seq2, 3) exp = 0.8421052631578947 self.assertAlmostEqual(obs, exp) def test_with_metadata_sanity(self): seq1 = Sequence('AACCTAGCAATGGAT', metadata={'Name': 'Kestrel Gorlick'}, positional_metadata={'seq': list('ACTCAAGCTACGAAG')}) seq2 = Sequence('CAGGCAGTTCTCACC') obs = kmer_distance(seq1, seq2, 3) exp = 0.9130434782608695 self.assertAlmostEqual(obs, exp) def test_return_type(self): seq1 = Sequence('ATCG') seq2 = Sequence('ATCG') obs = kmer_distance(seq1, seq2, 3) self.assertIsInstance(obs, float) self.assertEqual(obs, 0.0) def test_empty_sequences(self): seq1 = Sequence('') seq2 = Sequence('') obs = kmer_distance(seq1, seq2, 3) npt.assert_equal(obs, np.nan) def test_one_empty_sequence(self): seq1 = Sequence('') seq2 = Sequence('CGGGCAGCTCCTACCTGCTA') obs = kmer_distance(seq1, seq2, 3) exp = 1.0 self.assertAlmostEqual(obs, exp) def test_no_kmers_found(self): seq1 = Sequence('ATCG') seq2 = Sequence('ACGT') obs = kmer_distance(seq1, seq2, 5) npt.assert_equal(obs, np.nan) def test_k_less_than_one_error(self): seq1 = Sequence('ATCG') seq2 = Sequence('ACTG') with self.assertRaisesRegex(ValueError, r'k must be greater than 0.'): kmer_distance(seq1, seq2, 0) def test_type_mismatch_error(self): seq1 = Sequence('ABC') seq2 = DNA('ATC') with self.assertRaisesRegex(TypeError, r"Type 'Sequence'.*type 'DNA'"): kmer_distance(seq1, seq2, 3) def test_non_sequence_error(self): seq1 = Sequence('ATCG') seq2 = 'ATCG' with self.assertRaisesRegex(TypeError, r"not 'str'"): kmer_distance(seq1, seq2, 3) if __name__ == "__main__": unittest.main() scikit-bio-0.6.2/skbio/sequence/tests/test_dna.py000066400000000000000000000034311464262511300220150ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest from skbio import DNA, RNA from skbio.metadata import IntervalMetadata # tests specific to DNA go here. tests for functionality shared by DNA and RNA # go in test_nucleotide_sequences.py class TestDNA(unittest.TestCase): def test_transcribe(self): # without changes self.assertEqual(DNA('').transcribe(), RNA('')) self.assertEqual(DNA('A').transcribe(), RNA('A')) self.assertEqual(DNA('.ACGW-').transcribe(), RNA('.ACGW-')) # with changes self.assertEqual(DNA('T').transcribe(), RNA('U')) self.assertEqual(DNA('TT').transcribe(), RNA('UU')) self.assertEqual(DNA('ATCTG').transcribe(), RNA('AUCUG')) self.assertEqual(DNA('TTTG').transcribe(), RNA('UUUG')) def test_transcribe_preserves_all_metadata(self): im = IntervalMetadata(4) im.add([(0, 2)], metadata={'gene': 'p53'}) exp = RNA('AGUU', metadata={'foo': 'bar'}, positional_metadata={'foo': range(4)}, interval_metadata=im) seq = DNA('AGTT', metadata={'foo': 'bar'}, positional_metadata={'foo': range(4)}, interval_metadata=im) self.assertEqual(seq.transcribe(), exp) def test_transcribe_does_not_modify_input(self): seq = DNA('ATAT') self.assertEqual(seq.transcribe(), RNA('AUAU')) self.assertEqual(seq, DNA('ATAT')) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/sequence/tests/test_genetic_code.py000066400000000000000000000463221464262511300236710ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import itertools import unittest import numpy as np import numpy.testing as npt from skbio import Sequence, DNA, RNA, Protein, GeneticCode from skbio.sequence._genetic_code import _ncbi_genetic_codes class TestGeneticCode(unittest.TestCase): def setUp(self): self.sgc = GeneticCode.from_ncbi(1) def test_from_ncbi_valid_table_ids(self): # spot check a few tables self.assertEqual(GeneticCode.from_ncbi().name, 'Standard') self.assertEqual(GeneticCode.from_ncbi(2).name, 'Vertebrate Mitochondrial') self.assertEqual(GeneticCode.from_ncbi(12).name, 'Alternative Yeast Nuclear') self.assertEqual(GeneticCode.from_ncbi(25).name, 'Candidate Division SR1 and Gracilibacteria') def test_from_ncbi_invalid_input(self): with self.assertRaisesRegex(ValueError, r'table_id.*7'): GeneticCode.from_ncbi(7) with self.assertRaisesRegex(ValueError, r'table_id.*42'): GeneticCode.from_ncbi(42) def test_reading_frames(self): exp = [1, 2, 3, -1, -2, -3] self.assertEqual(GeneticCode.reading_frames, exp) self.assertEqual(self.sgc.reading_frames, exp) GeneticCode.reading_frames.append(42) self.assertEqual(GeneticCode.reading_frames, exp) self.assertEqual(self.sgc.reading_frames, exp) with self.assertRaises(AttributeError): self.sgc.reading_frames = [1, 2, 42] def test_name(self): self.assertEqual(self.sgc.name, 'Standard') self.assertEqual(GeneticCode('M' * 64, '-' * 64).name, '') self.assertEqual(GeneticCode('M' * 64, '-' * 64, 'foo').name, 'foo') with self.assertRaises(AttributeError): self.sgc.name = 'foo' def test_init_varied_equivalent_input(self): for args in (('M' * 64, '-' * 64), (Protein('M' * 64), Protein('-' * 64)), (Sequence('M' * 64), Sequence('-' * 64))): gc = GeneticCode(*args) self.assertEqual(gc.name, '') self.assertEqual(gc._amino_acids, Protein('M' * 64)) self.assertEqual(gc._starts, Protein('-' * 64)) npt.assert_array_equal(gc._m_character_codon, np.asarray([0, 0, 0], dtype=np.uint8)) self.assertEqual(len(gc._start_codons), 0) def test_init_invalid_input(self): # `amino_acids` invalid protein with self.assertRaisesRegex(ValueError, r'Invalid character.*&'): GeneticCode('&' * 64, '-' * 64) # wrong number of amino acids with self.assertRaisesRegex(ValueError, r'amino_acids.*64.*42'): GeneticCode('M' * 42, '-' * 64) # `amino_acids` missing M with self.assertRaisesRegex(ValueError, r'amino_acids.*M.*character'): GeneticCode('A' * 64, '-' * 64) # `starts` invalid protein with self.assertRaisesRegex(ValueError, r'Invalid character.*&'): GeneticCode('M' * 64, '&' * 64) # wrong number of starts with self.assertRaisesRegex(ValueError, r'starts.*64.*42'): GeneticCode('M' * 64, '-' * 42) # invalid characters in `starts` with self.assertRaisesRegex(ValueError, r'starts.*M and - characters'): GeneticCode('M' * 64, '-M' * 30 + '*AQR') def test_str(self): # predefined exp = ( ' AAs = FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAA' 'DDEEGGGG\n' 'Starts = ---M---------------M---------------M--------------------' '--------\n' 'Base1 = UUUUUUUUUUUUUUUUCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGG' 'GGGGGGGG\n' 'Base2 = UUUUCCCCAAAAGGGGUUUUCCCCAAAAGGGGUUUUCCCCAAAAGGGGUUUUCCCC' 'AAAAGGGG\n' 'Base3 = UCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAG' 'UCAGUCAG' ) self.assertEqual(str(self.sgc), exp) # custom, no name obs = str(GeneticCode('M' * 64, '-' * 64)) self.assertIn('M' * 64, obs) self.assertIn('-' * 64, obs) def test_repr(self): # predefined exp = ( 'GeneticCode (Standard)\n' '-----------------------------------------------------------------' '--------\n' ' AAs = FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAA' 'DDEEGGGG\n' 'Starts = ---M---------------M---------------M--------------------' '--------\n' 'Base1 = UUUUUUUUUUUUUUUUCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGG' 'GGGGGGGG\n' 'Base2 = UUUUCCCCAAAAGGGGUUUUCCCCAAAAGGGGUUUUCCCCAAAAGGGGUUUUCCCC' 'AAAAGGGG\n' 'Base3 = UCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAGUCAG' 'UCAGUCAG' ) self.assertEqual(repr(self.sgc), exp) # custom, no name obs = repr(GeneticCode('M' * 64, '-' * 64)) self.assertTrue(obs.startswith('GeneticCode\n')) self.assertIn('M' * 64, obs) self.assertIn('-' * 64, obs) def test_eq(self): amino_acids = 'AMPM' * 16 starts = '--M-' * 16 equal_gcs = [ GeneticCode(amino_acids, starts), # name should be ignored GeneticCode(amino_acids, starts, 'foo'), # metadata/positional metadata should be ignored if Sequence # subclass is provided GeneticCode( Protein(amino_acids, metadata={'foo': 'bar'}), Protein(starts, positional_metadata={'foo': range(64)})) ] # every gc should be equal to itself for gc in equal_gcs: self.assertTrue(gc == gc) self.assertFalse(gc != gc) # every pair of gcs should be equal. use permutations instead of # combinations to test that comparing gc1 to gc2 and gc2 to gc1 are # both equal for gc1, gc2 in itertools.permutations(equal_gcs, 2): self.assertTrue(gc1 == gc2) self.assertFalse(gc1 != gc2) def test_ne(self): class GeneticCodeSubclass(GeneticCode): pass amino_acids = 'AMPM' * 16 starts = '--M-' * 16 unequal_gcs = [ GeneticCode(amino_acids, starts), # type must match GeneticCodeSubclass(amino_acids, starts), # completely different type 'foo' ] # none of the NCBI genetic codes should be equal to each other unequal_gcs.extend(_ncbi_genetic_codes.values()) for gc in unequal_gcs: self.assertTrue(gc == gc) self.assertFalse(gc != gc) for gc1, gc2 in itertools.permutations(unequal_gcs, 2): self.assertTrue(gc1 != gc2) self.assertFalse(gc1 == gc2) def test_translate_preserves_metadata(self): obs = self.sgc.translate( RNA('AUG', metadata={'foo': 'bar', 'baz': 42}, positional_metadata={'foo': range(3)})) # metadata retained, positional metadata dropped self.assertEqual(obs, Protein('M', metadata={'foo': 'bar', 'baz': 42})) def test_translate_default_behavior(self): # empty translation exp = Protein('') for seq in RNA(''), RNA('A'), RNA('AU'): obs = self.sgc.translate(seq) self.assertEqual(obs, exp) # no start or stop codons obs = self.sgc.translate(RNA('CCU')) self.assertEqual(obs, Protein('P')) # multiple alternative start codons, no stop codons, length is multiple # of 3 obs = self.sgc.translate(RNA('CAUUUGCUGAAA')) self.assertEqual(obs, Protein('HLLK')) # multiple stop codons, length isn't multiple of 3 obs = self.sgc.translate(RNA('UUUUUUUAAAGUUAAGGGAU')) self.assertEqual(obs, Protein('FF*S*G')) def test_translate_reading_frame_empty_translation(self): exp = Protein('') for seq in RNA(''), RNA('A'), RNA('AU'): for reading_frame in GeneticCode.reading_frames: obs = self.sgc.translate(seq, reading_frame=reading_frame) self.assertEqual(obs, exp) # reading frames that yield a partial codon for reading_frame in 2, 3, -2, -3: obs = self.sgc.translate(RNA('AUG'), reading_frame=reading_frame) self.assertEqual(obs, exp) def test_translate_reading_frame_non_empty_translation(self): seq = RNA('AUGGUGGAA') # rc = UUCCACCAU for reading_frame, exp_str in ((1, 'MVE'), (2, 'WW'), (3, 'GG'), (-1, 'FHH'), (-2, 'ST'), (-3, 'PP')): exp = Protein(exp_str) obs = self.sgc.translate(seq, reading_frame=reading_frame) self.assertEqual(obs, exp) def test_translate_start_empty_translation(self): exp = Protein('') for seq in RNA(''), RNA('A'), RNA('AU'): for start in {'optional', 'ignore'}: obs = self.sgc.translate(seq, start=start) self.assertEqual(obs, exp) with self.assertRaisesRegex(ValueError, r'reading_frame=1.*start=\'require\''): self.sgc.translate(seq, start='require') def test_translate_start_with_start_codon(self): # trim before start codon, replace with M. ensure alternative start # codons following the start codon aren't replaced with M. ensure # default behavior for handling stop codons is retained seq = RNA('CAUUUGCUGAAAUGA') exp = Protein('MLK*') for start in {'require', 'optional'}: obs = self.sgc.translate(seq, start=start) self.assertEqual(obs, exp) # ignore start codon replacement and trimming; just translate exp = Protein('HLLK*') obs = self.sgc.translate(seq, start='ignore') self.assertEqual(obs, exp) # just a start codon, no replacement necessary seq = RNA('AUG') exp = Protein('M') for start in {'require', 'optional', 'ignore'}: obs = self.sgc.translate(seq, start=start) self.assertEqual(obs, exp) # single alternative start codon seq = RNA('CUG') exp = Protein('M') for start in {'require', 'optional'}: obs = self.sgc.translate(seq, start=start) self.assertEqual(obs, exp) exp = Protein('L') obs = self.sgc.translate(seq, start='ignore') self.assertEqual(obs, exp) def test_translate_start_no_start_codon(self): seq = RNA('CAACAACAGCAA') exp = Protein('QQQQ') for start in {'ignore', 'optional'}: obs = self.sgc.translate(seq, start=start) self.assertEqual(obs, exp) with self.assertRaisesRegex(ValueError, r'reading_frame=1.*start=\'require\''): self.sgc.translate(seq, start='require') # non-start codon that translates to an AA that start codons also map # to. should catch bug if code attempts to search and trim *after* # translation -- this must happen *before* translation seq = RNA('UUACAA') exp = Protein('LQ') for start in {'ignore', 'optional'}: obs = self.sgc.translate(seq, start=start) self.assertEqual(obs, exp) with self.assertRaisesRegex(ValueError, r'reading_frame=1.*start=\'require\''): self.sgc.translate(seq, start='require') def test_translate_start_no_accidental_mutation(self): # `start` mutates a vector in-place that is derived from # GeneticCode._offset_table. the current code doesn't perform an # explicit copy because numpy's advanced indexing is used, which always # returns a copy. test this assumption here in case that behavior # changes in the future offset_table = self.sgc._offset_table.copy() seq = RNA('CAUUUGCUGAAAUGA') obs = self.sgc.translate(seq, start='require') self.assertEqual(obs, Protein('MLK*')) npt.assert_array_equal(self.sgc._offset_table, offset_table) def test_translate_stop_empty_translation(self): exp = Protein('') for seq in RNA(''), RNA('A'), RNA('AU'): for stop in {'optional', 'ignore'}: obs = self.sgc.translate(seq, stop=stop) self.assertEqual(obs, exp) with self.assertRaisesRegex(ValueError, r'reading_frame=1.*stop=\'require\''): self.sgc.translate(seq, stop='require') def test_translate_stop_with_stop_codon(self): # multiple stop codons with trailing codons seq = RNA('UGGACUUGAUAUCGUUAGGAU') exp = Protein('WT') for stop in {'require', 'optional'}: obs = self.sgc.translate(seq, stop=stop) self.assertEqual(obs, exp) # ignore stop codon trimming; just translate exp = Protein('WT*YR*D') obs = self.sgc.translate(seq, stop='ignore') self.assertEqual(obs, exp) # ends with single stop codon seq = RNA('UGUCUGUAA') exp = Protein('CL') for stop in {'require', 'optional'}: obs = self.sgc.translate(seq, stop=stop) self.assertEqual(obs, exp) exp = Protein('CL*') obs = self.sgc.translate(seq, stop='ignore') self.assertEqual(obs, exp) # just a stop codon seq = RNA('UAG') exp = Protein('') for stop in {'require', 'optional'}: obs = self.sgc.translate(seq, stop=stop) self.assertEqual(obs, exp) exp = Protein('*') obs = self.sgc.translate(seq, stop='ignore') self.assertEqual(obs, exp) def test_translate_stop_no_stop_codon(self): seq = RNA('GAAUCU') exp = Protein('ES') for stop in {'ignore', 'optional'}: obs = self.sgc.translate(seq, stop=stop) self.assertEqual(obs, exp) with self.assertRaisesRegex(ValueError, r'reading_frame=1.*stop=\'require\''): self.sgc.translate(seq, stop='require') def test_translate_trim_to_cds(self): seq = RNA('UAAUUGCCUCAUUAAUAACAAUGA') # find first start codon, trim all before it, convert alternative start # codon to M, finally trim to first stop codon following the start # codon exp = Protein('MPH') for param in {'require', 'optional'}: obs = self.sgc.translate(seq, start=param, stop=param) self.assertEqual(obs, exp) exp = Protein('*LPH**Q*') obs = self.sgc.translate(seq, start='ignore', stop='ignore') self.assertEqual(obs, exp) # alternative reading frame disrupts cds: # AAUUGCCUCAUUAAUAACAAUGA # NCLINNN with self.assertRaisesRegex(ValueError, r'reading_frame=2.*start=\'require\''): self.sgc.translate(seq, reading_frame=2, start='require') with self.assertRaisesRegex(ValueError, r'reading_frame=2.*stop=\'require\''): self.sgc.translate(seq, reading_frame=2, stop='require') exp = Protein('NCLINNN') for param in {'ignore', 'optional'}: obs = self.sgc.translate(seq, reading_frame=2, start=param, stop=param) self.assertEqual(obs, exp) def test_translate_invalid_input(self): # invalid sequence type with self.assertRaisesRegex(TypeError, r'RNA.*DNA'): self.sgc.translate(DNA('ACG')) with self.assertRaisesRegex(TypeError, r'RNA.*str'): self.sgc.translate('ACG') # invalid reading frame with self.assertRaisesRegex(ValueError, r'\[1, 2, 3, -1, -2, -3\].*0'): self.sgc.translate(RNA('AUG'), reading_frame=0) # invalid start with self.assertRaisesRegex(ValueError, r'start.*foo'): self.sgc.translate(RNA('AUG'), start='foo') # invalid stop with self.assertRaisesRegex(ValueError, r'stop.*foo'): self.sgc.translate(RNA('AUG'), stop='foo') # gapped sequence with self.assertRaisesRegex(ValueError, r'gapped'): self.sgc.translate(RNA('UU-G')) # degenerate sequence with self.assertRaisesRegex(NotImplementedError, r'degenerate'): self.sgc.translate(RNA('RUG')) def test_translate_varied_genetic_codes(self): # spot check using a few NCBI and custom genetic codes to translate seq = RNA('AAUGAUGUGACUAUCAGAAGG') # table_id=2 exp = Protein('NDVTI**') obs = GeneticCode.from_ncbi(2).translate(seq) self.assertEqual(obs, exp) exp = Protein('MTI') obs = GeneticCode.from_ncbi(2).translate(seq, start='require', stop='require') self.assertEqual(obs, exp) # table_id=22 exp = Protein('NDVTIRR') obs = GeneticCode.from_ncbi(22).translate(seq) self.assertEqual(obs, exp) with self.assertRaisesRegex(ValueError, r'reading_frame=1.*start=\'require\''): GeneticCode.from_ncbi(22).translate(seq, start='require', stop='require') # custom, no start codons gc = GeneticCode('MWN*' * 16, '-' * 64) exp = Protein('MM*MWN*') obs = gc.translate(seq) self.assertEqual(obs, exp) with self.assertRaisesRegex(ValueError, r'reading_frame=1.*start=\'require\''): gc.translate(seq, start='require', stop='require') def test_translate_six_frames(self): seq = RNA('AUGCUAACAUAAA') # rc = UUUAUGUUAGCAU # test default behavior exp = [Protein('MLT*'), Protein('C*HK'), Protein('ANI'), Protein('FMLA'), Protein('LC*H'), Protein('YVS')] obs = list(self.sgc.translate_six_frames(seq)) self.assertEqual(obs, exp) # test that start/stop are respected exp = [Protein('MLT'), Protein('C'), Protein('ANI'), Protein('MLA'), Protein('LC'), Protein('YVS')] obs = list(self.sgc.translate_six_frames(seq, start='optional', stop='optional')) self.assertEqual(obs, exp) def test_translate_six_frames_preserves_metadata(self): seq = RNA('AUG', metadata={'foo': 'bar', 'baz': 42}, positional_metadata={'foo': range(3)}) obs = list(self.sgc.translate_six_frames(seq))[:2] # metadata retained, positional metadata dropped self.assertEqual( obs, [Protein('M', metadata={'foo': 'bar', 'baz': 42}), Protein('', metadata={'foo': 'bar', 'baz': 42})]) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/sequence/tests/test_grammared_sequence.py000066400000000000000000000733101464262511300251050ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main import numpy as np import numpy.testing as npt import pandas as pd from skbio.sequence import GrammaredSequence from skbio.util import classproperty from skbio.util import assert_data_frame_almost_equal from skbio.metadata import IntervalMetadata class ExampleGrammaredSequence(GrammaredSequence): @classproperty def degenerate_map(cls): return {"X": set("AB"), "Y": set("BC"), "Z": set("AC"), "W": set("ABCQ")} @classproperty def definite_chars(cls): return set("ABCQ") @classproperty def default_gap_char(cls): return '-' @classproperty def gap_chars(cls): return set('-.') @classproperty def noncanonical_chars(cls): return "Q" @classproperty def wildcard_char(cls): return "W" class ExampleMotifsTester(ExampleGrammaredSequence): @property def _motifs(self): # These aren't really motifs, just a way to excercise the code paths return { "name1": lambda x, _, __: str(x), "name2": lambda x, _, __: len(x) } class TestGrammaredSequence(TestCase): def test_default_gap_must_be_in_gap_chars(self): with self.assertRaisesRegex( TypeError, r"default_gap_char must be in gap_chars for class " "GrammaredSequenceInvalidDefaultGap"): class GrammaredSequenceInvalidDefaultGap(ExampleGrammaredSequence): @classproperty def default_gap_char(cls): return '*' def test_degenerates_must_expand_to_valid_definites(self): with self.assertRaisesRegex( TypeError, r"degenerate_map must expand only to characters included in " "definite_chars for class " "GrammaredSequenceInvalidDefaultGap"): class GrammaredSequenceInvalidDefaultGap(ExampleGrammaredSequence): @classproperty def degenerate_map(cls): return {"X": set("B")} @classproperty def definite_chars(cls): return set("A") def test_gap_chars_and_degenerates_share(self): with self.assertRaisesRegex( TypeError, r"gap_chars and degenerate_chars must not share any characters" " for class GrammaredSequenceGapInDegenerateMap"): class GrammaredSequenceGapInDegenerateMap( ExampleGrammaredSequence): @classproperty def degenerate_map(cls): return {"X": set("AB")} @classproperty def definite_chars(cls): return set("ABC") @classproperty def gap_chars(cls): return set(".-X") def test_gap_chars_and_definites_share(self): with self.assertRaisesRegex( TypeError, (r"gap_chars and definite_chars must not share any characters " "for class GrammaredSequenceGapInDefiniteMap")): class GrammaredSequenceGapInDefiniteMap( ExampleGrammaredSequence): @classproperty def degenerate_map(cls): return {"X": set("AB")} @classproperty def definite_chars(cls): return set("ABC") @classproperty def gap_chars(cls): return set(".-A") def test_degenerates_and_definites_share(self): with self.assertRaisesRegex( TypeError, (r"degenerate_chars and definite_chars must not share any " "characters for class GrammaredSequenceInvalid")): class GrammaredSequenceInvalid(ExampleGrammaredSequence): @classproperty def degenerate_map(cls): return {"X": set("AB")} @classproperty def definite_chars(cls): return set("ABCX") def test_instantiation_with_no_implementation(self): class GrammaredSequenceSubclassNoImplementation(GrammaredSequence): pass with self.assertRaises(TypeError) as cm: GrammaredSequenceSubclassNoImplementation() self.assertIn("abstract class", str(cm.exception)) self.assertIn("definite_chars", str(cm.exception)) self.assertIn("degenerate_map", str(cm.exception)) def test_init_default_parameters(self): seq = ExampleGrammaredSequence('.-ABCXYZ') npt.assert_equal(seq.values, np.array('.-ABCXYZ', dtype='c')) self.assertEqual(seq.metadata, {}) assert_data_frame_almost_equal(seq.positional_metadata, pd.DataFrame(index=range(8))) self.assertEqual(seq.interval_metadata, IntervalMetadata(8)) def test_init_nondefault_parameters(self): im = IntervalMetadata(8) im.add([(1, 8)], metadata={'gene': 'p53'}) seq = ExampleGrammaredSequence( '.-ABCXYZ', metadata={'id': 'foo'}, positional_metadata={'quality': range(8)}, interval_metadata=im) npt.assert_equal(seq.values, np.array('.-ABCXYZ', dtype='c')) self.assertEqual(seq.metadata, {'id': 'foo'}) assert_data_frame_almost_equal(seq.positional_metadata, pd.DataFrame({'quality': range(8)})) self.assertEqual(seq.interval_metadata, im) def test_init_valid_empty_sequence(self): # just make sure we can instantiate an empty sequence regardless of # `validate` and `lowercase` parameters. more extensive tests # are performed in Sequence base class unit tests for validate in (True, False): for lowercase in (True, False): seq = ExampleGrammaredSequence( '', validate=validate, lowercase=lowercase) self.assertEqual(seq, ExampleGrammaredSequence('')) def test_init_valid_single_character_sequence(self): for validate in (True, False): for lowercase in (True, False): seq = ExampleGrammaredSequence( 'C', validate=validate, lowercase=lowercase) self.assertEqual(seq, ExampleGrammaredSequence('C')) def test_init_valid_multiple_character_sequence(self): for validate in (True, False): for lowercase in (True, False): seq = ExampleGrammaredSequence( 'BAACB.XYY-AZ', validate=validate, lowercase=lowercase) self.assertEqual(seq, ExampleGrammaredSequence('BAACB.XYY-AZ')) def test_init_validate_parameter_single_character(self): seq = 'w' with self.assertRaisesRegex(ValueError, r"character.*'w'"): ExampleGrammaredSequence(seq) # test that we can instantiate an invalid sequence. we don't guarantee # anything working beyond instantiation ExampleGrammaredSequence(seq, validate=False) def test_init_validate_parameter_multiple_characters(self): # mix of valid and invalid characters with repeats and lowercased # alphabet characters seq = 'CBCBBbawCbbwBXYZ-.x' with self.assertRaisesRegex(ValueError, r"\['a', 'b', 'w', 'x'\]"): ExampleGrammaredSequence(seq) ExampleGrammaredSequence(seq, validate=False) def test_init_lowercase_all_lowercase(self): s = 'cbcbbbazcbbzbxyz-.x' with self.assertRaisesRegex(ValueError, r"\['a', 'b', 'c', 'x', 'y', 'z'\]"): ExampleGrammaredSequence(s) seq = ExampleGrammaredSequence(s, lowercase=True) self.assertEqual(seq, ExampleGrammaredSequence('CBCBBBAZCBBZBXYZ-.X')) def test_init_lowercase_mixed_case(self): s = 'CBCBBbazCbbzBXYZ-.x' with self.assertRaisesRegex(ValueError, r"\['a', 'b', 'x', 'z'\]"): ExampleGrammaredSequence(s) seq = ExampleGrammaredSequence(s, lowercase=True) self.assertEqual(seq, ExampleGrammaredSequence('CBCBBBAZCBBZBXYZ-.X')) def test_init_lowercase_no_validation(self): s = 'car' with self.assertRaisesRegex(ValueError, r"\['a', 'c', 'r'\]"): ExampleGrammaredSequence(s) with self.assertRaisesRegex(ValueError, r"character.*'R'"): ExampleGrammaredSequence(s, lowercase=True) ExampleGrammaredSequence(s, lowercase=True, validate=False) def test_init_lowercase_byte_ownership(self): bytes = np.array([97, 98, 97], dtype=np.uint8) with self.assertRaisesRegex(ValueError, r"\['a', 'b'\]"): ExampleGrammaredSequence(bytes) seq = ExampleGrammaredSequence(bytes, lowercase=True) self.assertEqual(seq, ExampleGrammaredSequence('ABA')) # should not share the same memory self.assertIsNot(seq._bytes, bytes) # we should have copied `bytes` before modifying in place to convert to # upper. make sure `bytes` hasn't been mutated npt.assert_equal(bytes, np.array([97, 98, 97], dtype=np.uint8)) def test_init_lowercase_invalid_keys(self): for invalid_key in ((), [], 2): invalid_type = type(invalid_key) with self.assertRaisesRegex(TypeError, r"lowercase keyword argument expected " "a bool or string, but got %s" % invalid_type): ExampleGrammaredSequence('ACGTacgt', lowercase=invalid_key) def test_definite_char_codes(self): definite_char_codes = set(ExampleGrammaredSequence._definite_char_codes) self.assertEqual(definite_char_codes, set([65, 66, 67, 81])) def test_gap_codes(self): gap_codes = set(ExampleGrammaredSequence._gap_codes) self.assertEqual(gap_codes, set([45, 46])) def test_noncanonical_codes(self): noncanonical_codes = set(ExampleGrammaredSequence._noncanonical_codes) self.assertEqual(noncanonical_codes, set([81])) def test_degenerate_chars(self): expected = set("WXYZ") self.assertIs(type(ExampleGrammaredSequence.degenerate_chars), set) self.assertEqual(ExampleGrammaredSequence.degenerate_chars, expected) ExampleGrammaredSequence.degenerate_chars.add("W") self.assertEqual(ExampleGrammaredSequence.degenerate_chars, expected) self.assertEqual(ExampleGrammaredSequence('').degenerate_chars, expected) with self.assertRaises(AttributeError): ExampleGrammaredSequence('').degenerate_chars = set("BAR") # TODO: duplicate of test_definite_chars, remove when nondegenerate_chars, # is removed def test_nondegenerate_chars(self): expected = set("ABCQ") self.assertEqual(ExampleGrammaredSequence.nondegenerate_chars, expected) ExampleGrammaredSequence.degenerate_chars.add("D") self.assertEqual(ExampleGrammaredSequence.nondegenerate_chars, expected) self.assertEqual(ExampleGrammaredSequence('').nondegenerate_chars, expected) with self.assertRaises(AttributeError): ExampleGrammaredSequence('').nondegenerate_chars = set("BAR") def test_definite_chars(self): expected = set("ABCQ") self.assertEqual(ExampleGrammaredSequence.definite_chars, expected) ExampleGrammaredSequence.degenerate_chars.add("D") self.assertEqual(ExampleGrammaredSequence.definite_chars, expected) self.assertEqual(ExampleGrammaredSequence('').definite_chars, expected) with self.assertRaises(AttributeError): ExampleGrammaredSequence('').definite_chars = set("BAR") def test_gap_chars(self): expected = set(".-") self.assertIs(type(ExampleGrammaredSequence.gap_chars), set) self.assertEqual(ExampleGrammaredSequence.gap_chars, expected) ExampleGrammaredSequence.gap_chars.add("_") self.assertEqual(ExampleGrammaredSequence.gap_chars, expected) self.assertEqual(ExampleGrammaredSequence('').gap_chars, expected) with self.assertRaises(AttributeError): ExampleGrammaredSequence('').gap_chars = set("_ =") def test_default_gap_char(self): self.assertIs(type(ExampleGrammaredSequence.default_gap_char), str) self.assertEqual(ExampleGrammaredSequence.default_gap_char, '-') self.assertEqual(ExampleGrammaredSequence('').default_gap_char, '-') with self.assertRaises(AttributeError): ExampleGrammaredSequence('').default_gap_char = '.' def test_alphabet(self): expected = set("ABC.-XYZQW") self.assertIs(type(ExampleGrammaredSequence.alphabet), set) self.assertEqual(ExampleGrammaredSequence.alphabet, expected) ExampleGrammaredSequence.alphabet.add("DEF") self.assertEqual(ExampleGrammaredSequence.alphabet, expected) self.assertEqual(ExampleGrammaredSequence('').alphabet, expected) with self.assertRaises(AttributeError): ExampleGrammaredSequence('').alphabet = set("ABCDEFG.-WXYZ") def test_degenerate_map(self): expected = {"X": set("AB"), "Y": set("BC"), "Z": set("AC"), "W": set("ABCQ")} self.assertEqual(ExampleGrammaredSequence.degenerate_map, expected) ExampleGrammaredSequence.degenerate_map['W'] = set("ABC") ExampleGrammaredSequence.degenerate_map['X'] = set("CA") self.assertEqual(ExampleGrammaredSequence.degenerate_map, expected) self.assertEqual(ExampleGrammaredSequence('').degenerate_map, expected) with self.assertRaises(AttributeError): ExampleGrammaredSequence('').degenerate_map = {'W': "ABC"} def test_gaps(self): self.assertIs(type(ExampleGrammaredSequence("").gaps()), np.ndarray) self.assertIs(ExampleGrammaredSequence("").gaps().dtype, np.dtype('bool')) npt.assert_equal(ExampleGrammaredSequence("ABCXBZYABC").gaps(), np.zeros(10).astype(bool)) npt.assert_equal(ExampleGrammaredSequence(".-.-.").gaps(), np.ones(5).astype(bool)) npt.assert_equal(ExampleGrammaredSequence("A.B-C.X-Y.").gaps(), np.array([0, 1] * 5, dtype=bool)) npt.assert_equal(ExampleGrammaredSequence("AB.AC.XY-").gaps(), np.array([0, 0, 1] * 3, dtype=bool)) npt.assert_equal(ExampleGrammaredSequence("A.BC.-").gaps(), np.array([0, 1, 0, 0, 1, 1], dtype=bool)) def test_has_gaps(self): self.assertIs(type(ExampleGrammaredSequence("").has_gaps()), bool) self.assertIs(type(ExampleGrammaredSequence("-").has_gaps()), bool) self.assertFalse(ExampleGrammaredSequence("").has_gaps()) self.assertFalse(ExampleGrammaredSequence("ABCXYZ").has_gaps()) self.assertTrue(ExampleGrammaredSequence("-").has_gaps()) self.assertTrue(ExampleGrammaredSequence("ABCXYZ-").has_gaps()) def test_degenerates(self): self.assertIs(type(ExampleGrammaredSequence("").degenerates()), np.ndarray) self.assertIs(ExampleGrammaredSequence("").degenerates().dtype, np.dtype('bool')) npt.assert_equal(ExampleGrammaredSequence("ABCBC-.AB.").degenerates(), np.zeros(10).astype(bool)) npt.assert_equal(ExampleGrammaredSequence("ZYZYZ").degenerates(), np.ones(5).astype(bool)) npt.assert_equal(ExampleGrammaredSequence("AX.Y-ZBXCZ").degenerates(), np.array([0, 1] * 5, dtype=bool)) npt.assert_equal(ExampleGrammaredSequence("ABXACY.-Z").degenerates(), np.array([0, 0, 1] * 3, dtype=bool)) npt.assert_equal(ExampleGrammaredSequence("AZBCXY").degenerates(), np.array([0, 1, 0, 0, 1, 1], dtype=bool)) def test_has_degenerates(self): self.assertIs(type(ExampleGrammaredSequence("").has_degenerates()), bool) self.assertIs(type(ExampleGrammaredSequence("X").has_degenerates()), bool) self.assertFalse(ExampleGrammaredSequence("").has_degenerates()) self.assertFalse(ExampleGrammaredSequence("A-.BC").has_degenerates()) self.assertTrue(ExampleGrammaredSequence("Z").has_degenerates()) self.assertTrue(ExampleGrammaredSequence("ABC.XYZ-").has_degenerates()) # TODO: duplicate of test_definites; remove when nondegenerates is removed def test_nondegenerates(self): self.assertIs(type(ExampleGrammaredSequence("").nondegenerates()), np.ndarray) self.assertIs(ExampleGrammaredSequence("").nondegenerates().dtype, np.dtype('bool')) npt.assert_equal( ExampleGrammaredSequence("XYZYZ-.XY.").nondegenerates(), np.zeros(10).astype(bool)) npt.assert_equal(ExampleGrammaredSequence("ABABA").nondegenerates(), np.ones(5).astype(bool)) npt.assert_equal( ExampleGrammaredSequence("XA.B-AZCXA").nondegenerates(), np.array([0, 1] * 5, dtype=bool)) npt.assert_equal( ExampleGrammaredSequence("XXAZZB.-C").nondegenerates(), np.array([0, 0, 1] * 3, dtype=bool)) npt.assert_equal(ExampleGrammaredSequence("YB.-AC").nondegenerates(), np.array([0, 1, 0, 0, 1, 1], dtype=bool)) def test_definites(self): self.assertIs(type(ExampleGrammaredSequence("").definites()), np.ndarray) self.assertIs(ExampleGrammaredSequence("").definites().dtype, np.dtype('bool')) npt.assert_equal( ExampleGrammaredSequence("XYZYZ-.XY.").definites(), np.zeros(10).astype(bool)) npt.assert_equal(ExampleGrammaredSequence("ABABA").definites(), np.ones(5).astype(bool)) npt.assert_equal( ExampleGrammaredSequence("XA.B-AZCXA").definites(), np.array([0, 1] * 5, dtype=bool)) npt.assert_equal( ExampleGrammaredSequence("XXAZZB.-C").definites(), np.array([0, 0, 1] * 3, dtype=bool)) npt.assert_equal(ExampleGrammaredSequence("YB.-AC").definites(), np.array([0, 1, 0, 0, 1, 1], dtype=bool)) # TODO: duplicate of test_has_definites; remove when has_nondegenerates is # removed. def test_has_nondegenerates(self): self.assertIs(type(ExampleGrammaredSequence("").has_nondegenerates()), bool) self.assertIs(type(ExampleGrammaredSequence("A").has_nondegenerates()), bool) self.assertFalse(ExampleGrammaredSequence("").has_nondegenerates()) self.assertFalse( ExampleGrammaredSequence("X-.YZ").has_nondegenerates()) self.assertTrue(ExampleGrammaredSequence("C").has_nondegenerates()) self.assertTrue( ExampleGrammaredSequence(".XYZ-ABC").has_nondegenerates()) def test_has_definites(self): self.assertIs(type(ExampleGrammaredSequence("").has_definites()), bool) self.assertIs(type(ExampleGrammaredSequence("A").has_definites()), bool) self.assertFalse(ExampleGrammaredSequence("").has_definites()) self.assertFalse( ExampleGrammaredSequence("X-.YZ").has_definites()) self.assertTrue(ExampleGrammaredSequence("C").has_definites()) self.assertTrue( ExampleGrammaredSequence(".XYZ-ABC").has_definites()) def test_degap(self): kw = { 'metadata': { 'id': 'some_id', 'description': 'some description', }, } self.assertEqual( ExampleGrammaredSequence( "", positional_metadata={'qual': []}, **kw).degap(), ExampleGrammaredSequence( "", positional_metadata={'qual': []}, **kw)) self.assertEqual( ExampleGrammaredSequence( "ABCXYZ", positional_metadata={'qual': np.arange(6)}, **kw).degap(), ExampleGrammaredSequence( "ABCXYZ", positional_metadata={'qual': np.arange(6)}, **kw)) self.assertEqual( ExampleGrammaredSequence( "ABC-XYZ", positional_metadata={'qual': np.arange(7, dtype=np.int64)}, **kw).degap(), ExampleGrammaredSequence( "ABCXYZ", positional_metadata={'qual': np.asarray([0, 1, 2, 4, 5, 6], dtype=np.int64)}, **kw)) self.assertEqual( ExampleGrammaredSequence( ".-ABC-XYZ.", positional_metadata={'qual': np.arange(10, dtype=np.int64)}, **kw).degap(), ExampleGrammaredSequence( "ABCXYZ", positional_metadata={'qual': np.asarray([2, 3, 4, 6, 7, 8], dtype=np.int64)}, **kw)) self.assertEqual( ExampleGrammaredSequence( "---.-.-.-.-.", positional_metadata={'quality': np.arange(12, dtype=np.int64)}, **kw).degap(), ExampleGrammaredSequence( "", positional_metadata={'quality': np.array([], dtype=np.int64)}, **kw)) def test_expand_degenerates_no_degens(self): seq = ExampleGrammaredSequence("ABCABCABC") self.assertEqual(list(seq.expand_degenerates()), [seq]) def test_expand_degenerates_all_degens(self): exp = [ ExampleGrammaredSequence('ABA'), ExampleGrammaredSequence('ABC'), ExampleGrammaredSequence('ACA'), ExampleGrammaredSequence('ACC'), ExampleGrammaredSequence('BBA'), ExampleGrammaredSequence('BBC'), ExampleGrammaredSequence('BCA'), ExampleGrammaredSequence('BCC') ] # Sort based on sequence string, as order is not guaranteed. obs = sorted(ExampleGrammaredSequence('XYZ').expand_degenerates(), key=str) self.assertEqual(obs, exp) def test_expand_degenerates_with_metadata(self): kw = { "metadata": { "id": "some_id", "description": "some description" }, "positional_metadata": { "quality": np.arange(3), }, } exp = [ExampleGrammaredSequence('ABA', **kw), ExampleGrammaredSequence('ABC', **kw), ExampleGrammaredSequence('BBA', **kw), ExampleGrammaredSequence('BBC', **kw)] obs = sorted( ExampleGrammaredSequence('XBZ', **kw).expand_degenerates(), key=str) self.assertEqual(obs, exp) def test_to_regex_no_degens(self): seq = ExampleGrammaredSequence('ABC') regex = seq.to_regex() self.assertEqual(regex.pattern, str(seq)) def test_to_regex_with_degens(self): seq = ExampleGrammaredSequence('AYZ') regex = seq.to_regex() self.assertFalse(any(regex.match(s) is None for s in 'ABA ABC ACA ACC'.split())) self.assertTrue(all(regex.match(s) is None for s in 'CBA BBA ABB AAA'.split())) def test_to_regex_within_capture(self): seq = ExampleGrammaredSequence('XYC') regex = seq.to_regex(within_capture=True) for ref in 'ABA BBB CCA'.split(): self.assertFalse(any(len(match.groups()) == 1 for match in regex.finditer(ref))) for ref in 'ABC BBC ACC'.split(): self.assertTrue(all(len(match.groups()) == 1 for match in regex.finditer(ref))) def test_find_motifs_no_motif(self): seq = ExampleMotifsTester("ABCABCABC") with self.assertRaises(ValueError) as cm: seq.find_motifs("doesn't-exist") self.assertIn("doesn't-exist", str(cm.exception)) seq = ExampleGrammaredSequence("ABCABCABC") with self.assertRaises(ValueError) as cm: seq.find_motifs("doesn't-exist") self.assertIn("doesn't-exist", str(cm.exception)) def test_find_motifs(self): seq = ExampleMotifsTester("ABC") self.assertEqual(seq.find_motifs("name1"), "ABC") self.assertEqual(seq.find_motifs("name2"), 3) def test_repr(self): # basic sanity checks for custom repr stats. more extensive testing is # performed on Sequence.__repr__ # minimal obs = repr(ExampleGrammaredSequence('')) self.assertEqual(obs.count('\n'), 7) self.assertTrue(obs.startswith('ExampleGrammaredSequence')) self.assertIn('length: 0', obs) self.assertIn('has gaps: False', obs) self.assertIn('has degenerates: False', obs) self.assertIn('has definites: False', obs) self.assertTrue(obs.endswith('-')) # no metadata, mix of gaps, degenerates, and definites obs = repr(ExampleGrammaredSequence('AY-B')) self.assertEqual(obs.count('\n'), 8) self.assertTrue(obs.startswith('ExampleGrammaredSequence')) self.assertIn('length: 4', obs) self.assertIn('has gaps: True', obs) self.assertIn('has degenerates: True', obs) self.assertIn('has definites: True', obs) self.assertTrue(obs.endswith('0 AY-B')) # metadata and positional metadata of mixed types obs = repr( ExampleGrammaredSequence( 'ABCA', metadata={'foo': 42, b'bar': 33.33, None: True, False: {}, (1, 2): 3, 'acb' * 100: "'"}, positional_metadata={'foo': range(4), 42: ['a', 'b', [], 'c']})) self.assertEqual(obs.count('\n'), 18) self.assertTrue(obs.startswith('ExampleGrammaredSequence')) self.assertIn('None: True', obs) self.assertIn('\'foo\': 42', obs) self.assertIn('42: ', obs) self.assertIn('\'foo\': ', obs) self.assertIn('length: 4', obs) self.assertIn('has gaps: False', obs) self.assertIn('has degenerates: False', obs) self.assertIn('has definites: True', obs) self.assertTrue(obs.endswith('0 ABCA')) # sequence spanning > 5 lines obs = repr(ExampleGrammaredSequence('A' * 301)) self.assertEqual(obs.count('\n'), 12) self.assertTrue(obs.startswith('ExampleGrammaredSequence')) self.assertIn('length: 301', obs) self.assertIn('has gaps: False', obs) self.assertIn('has degenerates: False', obs) self.assertIn('has definites: True', obs) self.assertIn('...', obs) self.assertTrue(obs.endswith('300 A')) def test_to_definites(self): seq = ExampleGrammaredSequence("ABCQXYZ") # default behavior, here I expect to see the sequence "ABCWWWW" returned obs = seq.to_definites() exp = ExampleGrammaredSequence("ABCWWWW") self.assertEqual(obs, exp) # noncanonical wildcard, expect to see "ABCQWWW" returned obs = seq.to_definites(noncanonical=False) exp = ExampleGrammaredSequence("ABCQWWW") # gap behavior, I expect to see the sequence "ABC----" returned obs = seq.to_definites(degenerate="gap") exp = ExampleGrammaredSequence("ABC----") self.assertEqual(obs, exp) # noncanonical gap obs = seq.to_definites(degenerate="gap", noncanonical=False) exp = ExampleGrammaredSequence("ABCQ---") self.assertEqual(obs, exp) # canonical trim obs = seq.to_definites(degenerate="del") exp = ExampleGrammaredSequence("ABC") self.assertEqual(obs, exp) # noncanonical trim obs = seq.to_definites(degenerate="del", noncanonical=False) exp = ExampleGrammaredSequence("ABCQ") self.assertEqual(obs, exp) # single char, acceptable input obs = seq.to_definites(degenerate="A") exp = ExampleGrammaredSequence("ABCAAAA") self.assertEqual(obs, exp) # noncanonical single char, acceptable input obs = seq.to_definites(degenerate="A", noncanonical=False) exp = ExampleGrammaredSequence("ABCQAAA") self.assertEqual(obs, exp) # test that single char outside of alphabet will throw error with self.assertRaises(ValueError): seq.to_definites("P") # test that an invalid wildcard (not a string) will throw an error ExampleGrammaredSequence.wildcard_char = 1 with self.assertRaises(ValueError): seq.to_definites() ExampleGrammaredSequence.wildcard_char = 'W' # test that nonsense input for 'to' will throw error with self.assertRaises(ValueError): seq.to_definites(degenerate='nonsense') def test_noncanonical_chars(self): self.assertTrue(isinstance(GrammaredSequence.noncanonical_chars, set)) self.assertEqual(len(GrammaredSequence.noncanonical_chars), 0) def test_wildcard_char(self): exp = None self.assertEqual(GrammaredSequence.wildcard_char, exp) if __name__ == "__main__": main() scikit-bio-0.6.2/skbio/sequence/tests/test_nucleotide_sequences.py000066400000000000000000000513771464262511300254750ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest import numpy as np from skbio import DNA, RNA, Protein, GeneticCode from skbio.sequence._nucleotide_mixin import NucleotideMixin from skbio.sequence import GrammaredSequence from skbio.util import classproperty from skbio.metadata import IntervalMetadata # This file contains tests for functionality of sequence types which implement # NucleotideMixin. Currently this means DNA and RNA. These types are so # similar that the testing logic can be shared and parameterized across # different test data. class TestNucleotideSequence(unittest.TestCase): def setUp(self): self.sequence_kinds = frozenset([ str, lambda s: np.frombuffer(s.encode('ascii'), dtype='|S1'), lambda s: np.frombuffer(s.encode('ascii'), dtype=np.uint8)]) dna_str = 'ACGTMRWSYKVHDBN.-' dna_comp_str = 'TGCAKYWSRMBDHVN.-' dna_rev_comp_str = '-.NVHDBMRSWYKACGT' rna_str = 'ACGUMRWSYKVHDBN.-' rna_comp_str = 'UGCAKYWSRMBDHVN.-' rna_rev_comp_str = '-.NVHDBMRSWYKACGU' qual = tuple(range(len(dna_str))) self.dna = (DNA, dna_str) self.rna = (RNA, rna_str) dna_comp = self.dna + (dna_comp_str,) rna_comp = self.rna + (rna_comp_str,) dna_comp_qual = dna_comp + (qual,) rna_comp_qual = rna_comp + (qual,) self.all_combos_comp_qual = (dna_comp_qual, rna_comp_qual) dna_rev_comp = self.dna + (dna_rev_comp_str,) rna_rev_comp = self.rna + (rna_rev_comp_str,) self.all_combos_rev_comp = (dna_rev_comp, rna_rev_comp) dna_rev_comp_qual = dna_rev_comp + (qual,) rna_rev_comp_qual = rna_rev_comp + (qual,) self.all_combos_rev_comp_qual = \ (dna_rev_comp_qual, rna_rev_comp_qual) def test_instantiation_with_no_implementation(self): class NucleotideSequenceSubclassNoImplementation(NucleotideMixin): pass with self.assertRaises(TypeError) as cm: NucleotideSequenceSubclassNoImplementation() self.assertIn("abstract class", str(cm.exception)) self.assertIn("complement_map", str(cm.exception)) # TODO: remove when nondegenerate_chars is removed def test_nondegenerate_chars(self): dna = (DNA, "ACGT") rna = (RNA, "ACGU") for constructor, nondegenerate in (dna, rna): exp = set(nondegenerate) self.assertEqual(constructor('').nondegenerate_chars, exp) self.assertEqual(constructor.nondegenerate_chars, exp) def test_definite_chars(self): dna = (DNA, "ACGT") rna = (RNA, "ACGU") for constructor, definite_char in (dna, rna): exp = set(definite_char) self.assertEqual(constructor('').definite_chars, exp) self.assertEqual(constructor.definite_chars, exp) def test_degenerate_map(self): dna_exp = (DNA, { 'B': set(['C', 'T', 'G']), 'D': set(['A', 'T', 'G']), 'H': set(['A', 'C', 'T']), 'K': set(['T', 'G']), 'M': set(['A', 'C']), 'N': set(['A', 'C', 'T', 'G']), 'S': set(['C', 'G']), 'R': set(['A', 'G']), 'W': set(['A', 'T']), 'V': set(['A', 'C', 'G']), 'Y': set(['C', 'T']) }) rna_exp = (RNA, { 'B': set(['C', 'U', 'G']), 'D': set(['A', 'U', 'G']), 'H': set(['A', 'C', 'U']), 'K': set(['U', 'G']), 'M': set(['A', 'C']), 'N': set(['A', 'C', 'U', 'G']), 'S': set(['C', 'G']), 'R': set(['A', 'G']), 'W': set(['A', 'U']), 'V': set(['A', 'C', 'G']), 'Y': set(['C', 'U']) }) for constructor, degenerate in (dna_exp, rna_exp): self.assertEqual(constructor('').degenerate_map, degenerate) self.assertEqual(constructor.degenerate_map, degenerate) def test_complement_map(self): dna_exp = (DNA, { '-': '-', '.': '.', 'A': 'T', 'C': 'G', 'B': 'V', 'D': 'H', 'G': 'C', 'H': 'D', 'K': 'M', 'M': 'K', 'N': 'N', 'S': 'S', 'R': 'Y', 'T': 'A', 'W': 'W', 'V': 'B', 'Y': 'R' }) rna_exp = (RNA, { '-': '-', '.': '.', 'A': 'U', 'C': 'G', 'B': 'V', 'D': 'H', 'G': 'C', 'H': 'D', 'K': 'M', 'M': 'K', 'N': 'N', 'S': 'S', 'R': 'Y', 'U': 'A', 'W': 'W', 'V': 'B', 'Y': 'R' }) for constructor, comp_map in (dna_exp, rna_exp): self.assertEqual(constructor('').complement_map, comp_map) self.assertEqual(constructor.complement_map, comp_map) # immutable constructor.complement_map['A'] = 'X' constructor.complement_map['C'] = 'W' self.assertEqual(constructor.complement_map, comp_map) with self.assertRaises(AttributeError): constructor('').complement_map = {'W': 'X'} def test_translate_ncbi_table_id(self): for seq in RNA('AAAUUUAUGCAU'), DNA('AAATTTATGCAT'): # default obs = seq.translate() self.assertEqual(obs, Protein('KFMH')) obs = seq.translate(9) self.assertEqual(obs, Protein('NFMH')) def test_translate_genetic_code_object(self): gc = GeneticCode('M' * 64, '-' * 64) for seq in RNA('AAAUUUAUGCAU'), DNA('AAATTTATGCAT'): obs = seq.translate(gc) self.assertEqual(obs, Protein('MMMM')) def test_translate_passes_parameters_through(self): exp = Protein('MW') for seq in RNA('UAAAUUGUGGUAA'), DNA('TAAATTGTGGTAA'): # mix of args and kwargs obs = seq.translate(13, reading_frame=2, start='require', stop='require') self.assertEqual(obs, exp) # kwargs only obs = seq.translate(genetic_code=13, reading_frame=2, start='require', stop='require') self.assertEqual(obs, exp) # args only obs = seq.translate(13, 2, 'require', 'require') self.assertEqual(obs, exp) def test_translate_preserves_metadata(self): metadata = {'foo': 'bar', 'baz': 42} positional_metadata = {'foo': range(3)} for seq in (RNA('AUG', metadata=metadata, positional_metadata=positional_metadata), DNA('ATG', metadata=metadata, positional_metadata=positional_metadata)): obs = seq.translate() # metadata retained, positional metadata dropped self.assertEqual(obs, Protein('M', metadata={'foo': 'bar', 'baz': 42})) def test_translate_invalid_id(self): for seq in RNA('AUG'), DNA('ATG'): with self.assertRaisesRegex(ValueError, r'table_id.*42'): seq.translate(42) def test_translate_six_frames_ncbi_table_id(self): # rc = CAAUUU for seq in RNA('AAAUUG'), DNA('AAATTG'): # default obs = list(seq.translate_six_frames()) self.assertEqual(obs, [Protein('KL'), Protein('N'), Protein('I'), Protein('QF'), Protein('N'), Protein('I')]) obs = list(seq.translate_six_frames(9)) self.assertEqual(obs, [Protein('NL'), Protein('N'), Protein('I'), Protein('QF'), Protein('N'), Protein('I')]) def test_translate_six_frames_genetic_code_object(self): gc = GeneticCode('M' * 64, '-' * 64) for seq in RNA('AAAUUG'), DNA('AAATTG'): obs = list(seq.translate_six_frames(gc)) self.assertEqual(obs, [Protein('MM'), Protein('M'), Protein('M'), Protein('MM'), Protein('M'), Protein('M')]) def test_translate_six_frames_passes_parameters_through(self): for seq in RNA('UUUAUGUGGUGA'), DNA('TTTATGTGGTGA'): # mix of args and kwargs obs = next(seq.translate_six_frames(11, start='require', stop='require')) self.assertEqual(obs, Protein('MW')) # kwargs only obs = next(seq.translate_six_frames(genetic_code=11, start='require', stop='require')) self.assertEqual(obs, Protein('MW')) # args only obs = next(seq.translate_six_frames(11, 'require', 'require')) self.assertEqual(obs, Protein('MW')) def test_translate_six_frames_preserves_metadata(self): metadata = {'foo': 'bar', 'baz': 42} positional_metadata = {'foo': range(3)} for seq in (RNA('AUG', metadata=metadata, positional_metadata=positional_metadata), DNA('ATG', metadata=metadata, positional_metadata=positional_metadata)): obs = list(seq.translate_six_frames())[:2] # metadata retained, positional metadata dropped self.assertEqual( obs, [Protein('M', metadata={'foo': 'bar', 'baz': 42}), Protein('', metadata={'foo': 'bar', 'baz': 42})]) def test_translate_six_frames_invalid_id(self): for seq in RNA('AUG'), DNA('ATG'): with self.assertRaisesRegex(ValueError, r'table_id.*42'): seq.translate_six_frames(42) def test_repr(self): # basic sanity checks for custom repr stats. more extensive testing is # performed on Sequence.__repr__ for seq in DNA(''), RNA(''): obs = repr(seq) # obtained from super() self.assertIn('has gaps: False', obs) # custom to Protein self.assertIn('GC-content: 0.00%', obs) for seq in DNA('ACGT'), RNA('ACGU'): obs = repr(seq) self.assertIn('has gaps: False', obs) self.assertIn('GC-content: 50.00%', obs) for seq in DNA('CST'), RNA('CSU'): obs = repr(seq) self.assertIn('has gaps: False', obs) self.assertIn('GC-content: 66.67%', obs) for seq in DNA('GCSSCG'), RNA('GCSSCG'): obs = repr(seq) self.assertIn('has gaps: False', obs) self.assertIn('GC-content: 100.00%', obs) for seq in DNA('-GCSSCG.'), RNA('-GCSSCG.'): obs = repr(seq) self.assertIn('has gaps: True', obs) self.assertIn('GC-content: 100.00%', obs) def test_complement_without_reverse_empty(self): for constructor in (DNA, RNA): # without optional attributes comp = constructor('').complement() self.assertEqual(comp, constructor('')) # with optional attributes comp = constructor( '', metadata={'id': 'foo', 'description': 'bar'}, positional_metadata={'quality': []}, interval_metadata=IntervalMetadata(0)).complement() self.assertEqual( comp, constructor( '', metadata={'id': 'foo', 'description': 'bar'}, positional_metadata={'quality': []})) def test_complement_without_reverse_non_empty(self): for (constructor, seq_str, comp_str, qual) in self.all_combos_comp_qual: comp = constructor(seq_str).complement() self.assertEqual(comp, constructor(comp_str)) im = IntervalMetadata(len(seq_str)) im.add([(0, 1)], metadata={'gene': 'p53'}) comp = constructor( seq_str, metadata={'id': 'foo', 'description': 'bar'}, positional_metadata={'quality': qual}, interval_metadata=im).complement() self.assertEqual( comp, constructor( comp_str, metadata={'id': 'foo', 'description': 'bar'}, positional_metadata={'quality': qual}, interval_metadata=im)) def test_complement_with_reverse_empty(self): for constructor in (DNA, RNA): rc = constructor('').complement(reverse=True) self.assertEqual(rc, constructor('')) rc = constructor( '', metadata={'id': 'foo', 'description': 'bar'}, positional_metadata={'quality': []}, interval_metadata=IntervalMetadata(0)).complement(reverse=True) self.assertEqual( rc, constructor( '', metadata={'id': 'foo', 'description': 'bar'}, positional_metadata={'quality': []})) def test_complement_with_reverse_non_empty(self): for (constructor, seq_str, rev_comp_str, qual) in self.all_combos_rev_comp_qual: rc = constructor(seq_str).complement(reverse=True) self.assertEqual(rc, constructor(rev_comp_str)) length = len(seq_str) im = IntervalMetadata(length) im.add([(0, 1)], metadata={'gene': 'p53'}) im_rc = IntervalMetadata(length) im_rc.add([(length-1, length)], metadata={'gene': 'p53'}) original = constructor( seq_str, metadata={'id': 'foo', 'description': 'bar'}, positional_metadata={ 'quality': qual}, interval_metadata=im) rc = original.complement(reverse=True) self.assertEqual( rc, constructor( rev_comp_str, metadata={'id': 'foo', 'description': 'bar'}, positional_metadata={'quality': list(qual)[::-1]}, interval_metadata=im_rc)) # assert the original object is not changed self.assertIsNot(original.interval_metadata, im) self.assertEqual(original.interval_metadata, im) def test_reverse_complement(self): # light tests because this just calls # NucleotideSequence.complement(reverse=True), which is tested more # extensively for (constructor, seq_str, rev_comp_str, qual) in self.all_combos_rev_comp_qual: rc = constructor( seq_str, metadata={'id': 'foo', 'description': 'bar'}, positional_metadata={'quality': qual}).reverse_complement() self.assertEqual( rc, constructor( rev_comp_str, metadata={'id': 'foo', 'description': 'bar'}, positional_metadata={'quality': list(qual)[::-1]})) def test_is_reverse_complement_varied_types(self): tested = 0 for constructor, seq_str, rev_comp_str in self.all_combos_rev_comp: seq_kinds = self.sequence_kinds.union(frozenset([constructor])) for sequence in seq_kinds: tested += 1 seq1 = constructor(seq_str) seq2 = sequence(rev_comp_str) self.assertTrue(seq1.is_reverse_complement(seq2)) self.assertEqual(tested, 8) def test_is_reverse_complement_empty(self): for constructor in (DNA, RNA): seq1 = constructor('') self.assertTrue(seq1.is_reverse_complement(seq1)) # optional attributes are ignored, only the sequence is compared seq2 = constructor( '', metadata={'id': 'foo', 'description': 'bar'}, positional_metadata={'quality': np.array([], dtype=np.int64)}) self.assertTrue(seq2.is_reverse_complement(seq2)) self.assertTrue(seq1.is_reverse_complement(seq2)) self.assertTrue(seq2.is_reverse_complement(seq1)) def test_is_reverse_complement_metadata_ignored(self): for (constructor, seq_str, rev_comp_str, qual) in self.all_combos_rev_comp_qual: seq1 = constructor(seq_str) seq2 = constructor( rev_comp_str, metadata={'id': 'foo', 'description': 'bar'}, positional_metadata={'quality': qual}) self.assertFalse(seq1.is_reverse_complement(seq1)) self.assertFalse(seq2.is_reverse_complement(seq2)) self.assertTrue(seq1.is_reverse_complement(seq2)) self.assertTrue(seq2.is_reverse_complement(seq1)) def test_is_reverse_complement_non_reverse_complements(self): for constructor in (DNA, RNA): # same length seq1 = constructor('ACAG') seq2 = constructor('AAAA') self.assertFalse(seq1.is_reverse_complement(seq1)) self.assertFalse(seq2.is_reverse_complement(seq2)) self.assertFalse(seq1.is_reverse_complement(seq2)) self.assertFalse(seq2.is_reverse_complement(seq1)) # different length seq1 = constructor('ACAG') seq2 = constructor('AAAAA') self.assertFalse(seq1.is_reverse_complement(seq1)) self.assertFalse(seq2.is_reverse_complement(seq2)) self.assertFalse(seq1.is_reverse_complement(seq2)) self.assertFalse(seq2.is_reverse_complement(seq1)) def test_is_reverse_complement_type_mismatch(self): for Class in (DNA, RNA): class DifferentSequenceClass(GrammaredSequence): @classproperty def degenerate_map(cls): return {"X": set("AB")} @classproperty def definite_chars(cls): return set("ABC") @classproperty def default_gap_char(cls): return '-' @classproperty def gap_chars(cls): return set('-.') seq1 = Class('ABC') seq2 = DifferentSequenceClass('ABC') with self.assertRaisesRegex(TypeError, r"Cannot use.*and " "DifferentSequenceClass together"): seq1.is_reverse_complement(seq2) def test_motif_purine_run(self): dna = (DNA, "AARC--TCRG", "AA-RC--TCR-G") rna = (RNA, "AARC--UCRG", "AA-RC--UCR-G") all_sets = (dna, rna) for constructor, run1, run2 in all_sets: seq = constructor("") self.assertEqual(list(seq.find_motifs("purine-run")), []) seq = constructor(run1) self.assertEqual(list(seq.find_motifs("purine-run")), [slice(0, 3), slice(8, 10)]) seq = constructor(run2) self.assertEqual(list(seq.find_motifs("purine-run", min_length=3, ignore=seq.gaps())), [slice(0, 4)]) def test_motif_pyrimidine_run(self): dna = (DNA, "AARC--TCRA", "AA-RC--TCR-A") rna = (RNA, "AARC--UCRG", "AA-RC--UCR-G") all_sets = (dna, rna) for constructor, run1, run2 in all_sets: seq = constructor("") self.assertEqual(list(seq.find_motifs("pyrimidine-run")), []) seq = constructor(run1) self.assertEqual(list(seq.find_motifs("pyrimidine-run")), [slice(3, 4), slice(6, 8)]) seq = constructor(run2) self.assertEqual(list(seq.find_motifs("pyrimidine-run", min_length=3, ignore=seq.gaps())), [slice(4, 9)]) def test_gc_frequency_and_gc_content(self): universal_sets = (('', 0, 0.0), ('ADDDH', 0, 0.0), ('ACGA', 2, 0.5), ('ACGS', 3, 0.75), ('AAAAAAAG', 1, 0.125), ('CCC', 3, 1.0), ('GGG', 3, 1.0), ('SSS', 3, 1.0), ('CGS', 3, 1.0), ('----....', 0, 0.0), ('G--..', 1, 1.0), ('ACGA', 2, 0.5)) dna = (DNA, universal_sets + (('ATMRWYKVHDBN.-', 0, 0.0),)) rna = (RNA, universal_sets + (('AUMRWYKVHDBN.-', 0, 0.0),)) for constructor, current_set in (dna, rna): for seq_str, count, ratio in current_set: seq = constructor(seq_str) self.assertEqual(count, seq.gc_frequency()) self.assertEqual(count, seq.gc_frequency(relative=False)) self.assertEqual(ratio, seq.gc_frequency(relative=True)) self.assertEqual(ratio, seq.gc_content()) if __name__ == "__main__": unittest.main() scikit-bio-0.6.2/skbio/sequence/tests/test_protein.py000066400000000000000000000117131464262511300227350ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest import numpy as np import numpy.testing as npt from skbio import Protein class TestProtein(unittest.TestCase): def test_alphabet(self): expected = set("ABCDEFGHIJKLMNOPQRSTUVWXYZ-.*") self.assertIs(type(Protein.alphabet), set) self.assertEqual(Protein.alphabet, expected) Protein.alphabet.add("&") self.assertEqual(Protein.alphabet, expected) self.assertEqual(Protein('').alphabet, expected) with self.assertRaises(AttributeError): Protein('').alphabet = set("ABCD") # TODO: duplicate of test_definite_chars, remove when nondegenerate_chars, # is removed def test_nondegenerate_chars(self): exp = set("ACDEFGHIKLMNOPQRSTUVWY") self.assertEqual(Protein("").nondegenerate_chars, exp) self.assertEqual(Protein.nondegenerate_chars, exp) def test_definite_chars(self): exp = set("ACDEFGHIKLMNOPQRSTUVWY") self.assertEqual(Protein("").definite_chars, exp) self.assertEqual(Protein.definite_chars, exp) def test_noncanonical_chars(self): exp = set("OU") self.assertEqual(Protein("").noncanonical_chars, exp) self.assertEqual(Protein.noncanonical_chars, exp) def test_wildcard_char(self): exp = "X" self.assertEqual(Protein("").wildcard_char, exp) self.assertEqual(Protein.wildcard_char, exp) def test_degenerate_map(self): exp = { 'B': set(['D', 'N']), 'Z': set(['E', 'Q']), 'J': set(['I', 'L']), 'X': set(['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'Y']) } self.assertEqual(Protein("").degenerate_map, exp) self.assertEqual(Protein.degenerate_map, exp) def test_stop_chars(self): expected = set('*') self.assertIs(type(Protein.stop_chars), set) self.assertEqual(Protein.stop_chars, expected) Protein.stop_chars.add("JO") self.assertEqual(Protein.stop_chars, expected) self.assertEqual(Protein('').stop_chars, expected) with self.assertRaises(AttributeError): Protein('').stop_chars = set("^&") def test_stops(self): npt.assert_array_equal(Protein('').stops(), np.array([])) npt.assert_array_equal(Protein('P').stops(), np.array([False])) npt.assert_array_equal(Protein('PAW').stops(), np.array([False, False, False])) npt.assert_array_equal(Protein('PAW*').stops(), np.array([False, False, False, True])) npt.assert_array_equal(Protein('P*W*').stops(), np.array([False, True, False, True])) npt.assert_array_equal(Protein('****').stops(), np.array([True, True, True, True])) npt.assert_array_equal(Protein('XZB-.').stops(), np.array([False, False, False, False, False])) def test_has_stops(self): self.assertFalse(Protein('').has_stops()) self.assertFalse(Protein('P').has_stops()) self.assertFalse(Protein('PAW').has_stops()) self.assertTrue(Protein('PAW*').has_stops()) self.assertTrue(Protein('P*W*').has_stops()) self.assertTrue(Protein('****').has_stops()) self.assertFalse(Protein('XZB-.').has_stops()) def test_motif_n_glycosylation(self): seq = Protein("ACDFFACGNPSL") self.assertEqual(list(seq.find_motifs("N-glycosylation")), []) seq = Protein("ACDFNFTACGNPSL") self.assertEqual(list(seq.find_motifs("N-glycosylation")), [slice(4, 8)]) seq = Protein("AC-DFN-FTACGNPSL") self.assertEqual(list(seq.find_motifs("N-glycosylation", ignore=seq.gaps())), [slice(5, 10)]) def test_repr(self): # basic sanity checks for custom repr stats. more extensive testing is # performed on Sequence.__repr__ obs = repr(Protein('')) # obtained from super() self.assertIn('has gaps: False', obs) # custom to Protein self.assertIn('has stops: False', obs) obs = repr(Protein('PAW')) self.assertIn('has gaps: False', obs) self.assertIn('has stops: False', obs) obs = repr(Protein('PA*W-')) self.assertIn('has gaps: True', obs) self.assertIn('has stops: True', obs) obs = repr(Protein('*****')) self.assertIn('has gaps: False', obs) self.assertIn('has stops: True', obs) if __name__ == "__main__": unittest.main() scikit-bio-0.6.2/skbio/sequence/tests/test_rna.py000066400000000000000000000035711464262511300220400ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest from skbio import DNA, RNA from skbio.metadata import IntervalMetadata # tests specific to RNA go here. tests for functionality shared by DNA and RNA # go in test_nucleotide_sequences.py class TestRNA(unittest.TestCase): def test_reverse_transcribe(self): # without changes self.assertEqual(RNA('').reverse_transcribe(), DNA('')) self.assertEqual(RNA('A').reverse_transcribe(), DNA('A')) self.assertEqual(RNA('.ACGW-').reverse_transcribe(), DNA('.ACGW-')) # with changes self.assertEqual(DNA('T'), RNA('U').reverse_transcribe()) self.assertEqual(DNA('TT'), RNA('UU').reverse_transcribe()) self.assertEqual(DNA('ATCTG'), RNA('AUCUG').reverse_transcribe()) self.assertEqual(DNA('TTTG'), RNA('UUUG').reverse_transcribe()) def test_reverse_transcribe_preserves_all_metadata(self): im = IntervalMetadata(4) im.add([(0, 2)], metadata={'gene': 'p53'}) seq = RNA('AGUU', metadata={'foo': 'bar'}, positional_metadata={'foo': range(4)}, interval_metadata=im) exp = DNA('AGTT', metadata={'foo': 'bar'}, positional_metadata={'foo': range(4)}, interval_metadata=im) self.assertEqual(seq.reverse_transcribe(), exp) def test_reverse_transcribe_does_not_modify_input(self): seq = RNA('AUAU') self.assertEqual(seq.reverse_transcribe(), DNA('ATAT')) self.assertEqual(seq, RNA('AUAU')) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/sequence/tests/test_sequence.py000066400000000000000000003404231464262511300230700ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import copy import functools import itertools import re from types import GeneratorType from collections.abc import Hashable from unittest import TestCase, main import numpy as np import numpy.testing as npt import pandas as pd import scipy.spatial.distance import skbio.sequence.distance from skbio import Sequence, DNA, SubstitutionMatrix from skbio.util import assert_data_frame_almost_equal from skbio.sequence._sequence import (_single_index_to_slice, _is_single_index, _as_slice_if_single_index) from skbio.util._testing import ReallyEqualMixin from skbio.metadata._testing import (MetadataMixinTests, IntervalMetadataMixinTests, PositionalMetadataMixinTests) from skbio.metadata import IntervalMetadata class SequenceSubclass(Sequence): """Used for testing purposes.""" pass class SequenceSubclassTwo(Sequence): """Used for testing purposes.""" pass class TestSequenceMetadata(TestCase, ReallyEqualMixin, MetadataMixinTests): def setUp(self): self._metadata_constructor_ = functools.partial(Sequence, '') class TestSequencePositionalMetadata(TestCase, ReallyEqualMixin, PositionalMetadataMixinTests): def setUp(self): def factory(axis_len, positional_metadata=None): return Sequence('Z' * axis_len, positional_metadata=positional_metadata) self._positional_metadata_constructor_ = factory class TestSequenceIntervalMetadata(TestCase, ReallyEqualMixin, IntervalMetadataMixinTests): def setUp(self): super()._set_up() def factory(axis_len, interval_metadata=None): return Sequence('Z' * axis_len, interval_metadata=interval_metadata) self._interval_metadata_constructor_ = factory class TestSequenceBase(TestCase): def setUp(self): self.sequence_kinds = frozenset([ str, Sequence, lambda s: np.frombuffer(s.encode('ascii'), dtype='|S1'), lambda s: np.frombuffer(s.encode('ascii'), dtype=np.uint8)]) class TestSequence(TestSequenceBase, ReallyEqualMixin): def setUp(self): super(TestSequence, self).setUp() self.lowercase_seq = Sequence('AAAAaaaa', lowercase='key') def empty_generator(): yield from () self.getitem_empty_indices = [ [], (), {}, empty_generator(), # ndarray of implicit float dtype np.array([]), np.array([], dtype=int)] def test_concat_bad_how(self): seq1 = seq2 = Sequence("123") with self.assertRaises(ValueError): Sequence.concat([seq1, seq2], how='foo') def test_concat_on_subclass(self): seq1 = SequenceSubclass("123") seq2 = Sequence("123") result = SequenceSubclass.concat([seq1, seq2]) self.assertIs(type(result), SequenceSubclass) self.assertEqual(result, SequenceSubclass("123123")) def test_concat_on_empty_iterator(self): result = SequenceSubclass.concat((_ for _ in [])) self.assertIs(type(result), SequenceSubclass) self.assertEqual(result, SequenceSubclass("")) def test_concat_on_bad_subclass(self): seq1 = Sequence("123") seq2 = SequenceSubclassTwo("123") with self.assertRaises(TypeError): SequenceSubclass.concat([seq1, seq2]) def test_concat_interval_metadata(self): seq1 = Sequence("1234") seq1.interval_metadata.add( [(0, 2)], [(True, False)], {'gene': 'sagA'}) seq2 = Sequence("5678") seq2.interval_metadata.add( [(1, 3)], [(False, True)], {'gene': 'sagB'}) obs = Sequence.concat([seq1, seq2]) exp = Sequence('12345678') exp.interval_metadata.add( [(0, 2)], [(True, False)], {'gene': 'sagA'}) exp.interval_metadata.add( [(5, 7)], [(False, True)], {'gene': 'sagB'}) self.assertEqual(exp, obs) def test_concat_one_seq_has_none_interval_metadata(self): seq1 = Sequence("1234") seq1.interval_metadata.add( [(0, 2)], [(True, False)], {'gene': 'sagA'}) seq2 = Sequence("5678") seq3 = Sequence("910") seq3.interval_metadata.add( [(1, 3)], [(False, True)], {'gene': 'sagB'}) obs = Sequence.concat([seq1, seq2, seq3]) exp = Sequence('12345678910') exp.interval_metadata.add( [(0, 2)], [(True, False)], {'gene': 'sagA'}) exp.interval_metadata.add( [(9, 11)], [(False, True)], {'gene': 'sagB'}) self.assertEqual(exp, obs) def test_concat_default_how(self): seq1 = Sequence("1234", positional_metadata={'a': [1]*4}) seq2 = Sequence("5678", positional_metadata={'a': [2]*4}) seqbad = Sequence("9", positional_metadata={'b': [9]}) result1 = Sequence.concat([seq1, seq2]) result2 = Sequence.concat([seq1, seq2], how='strict') self.assertEqual(result1, result2) with self.assertRaisesRegex(ValueError, r'.*positional.*metadata.*inner.*outer.*'): Sequence.concat([seq1, seq2, seqbad]) def test_concat_strict_simple(self): expected = Sequence( "12345678", positional_metadata={'a': [1, 1, 1, 1, 2, 2, 2, 2]}) seq1 = Sequence("1234", positional_metadata={'a': [1]*4}) seq2 = Sequence("5678", positional_metadata={'a': [2]*4}) result = Sequence.concat([seq1, seq2], how='strict') self.assertEqual(result, expected) self.assertFalse(result.metadata) def test_concat_strict_many(self): odd_key = frozenset() expected = Sequence("13579", positional_metadata={odd_key: [1, 2, 3, 4, 5], 'a': list('skbio')}) result = Sequence.concat([ Sequence("1", positional_metadata={'a': ['s'], odd_key: [1]}), Sequence("3", positional_metadata={'a': ['k'], odd_key: [2]}), Sequence("5", positional_metadata={'a': ['b'], odd_key: [3]}), Sequence("7", positional_metadata={'a': ['i'], odd_key: [4]}), Sequence("9", positional_metadata={'a': ['o'], odd_key: [5]}) ], how='strict') self.assertEqual(result, expected) self.assertFalse(result.metadata) def test_concat_strict_fail(self): seq1 = Sequence("1", positional_metadata={'a': [1]}) seq2 = Sequence("2", positional_metadata={'b': [2]}) with self.assertRaisesRegex(ValueError, r'.*positional.*metadata.*inner.*outer.*'): Sequence.concat([seq1, seq2], how='strict') def test_concat_outer_simple(self): seq1 = Sequence("1234") seq2 = Sequence("5678") result = Sequence.concat([seq1, seq2], how='outer') self.assertEqual(result, Sequence("12345678")) self.assertFalse(result.metadata) def test_concat_outer_missing(self): a = {} b = {} seq1 = Sequence("12", positional_metadata={'a': ['1', '2']}) seq2 = Sequence("34", positional_metadata={'b': [3, 4], 'c': [a, b]}) seq3 = Sequence("56") seq4 = Sequence("78", positional_metadata={'a': [7, 8]}) seq5 = Sequence("90", positional_metadata={'b': [9, 0]}) result = Sequence.concat([seq1, seq2, seq3, seq4, seq5], how='outer') expected = Sequence("1234567890", positional_metadata={ 'a': ['1', '2', np.nan, np.nan, np.nan, np.nan, 7, 8, np.nan, np.nan], 'b': [np.nan, np.nan, 3, 4, np.nan, np.nan, np.nan, np.nan, 9, 0], 'c': [np.nan, np.nan, a, b, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan] }) self.assertEqual(result, expected) self.assertFalse(result.metadata) def test_concat_inner_simple(self): seq1 = Sequence("1234") seq2 = Sequence("5678", positional_metadata={'discarded': [1] * 4}) result = Sequence.concat([seq1, seq2], how='inner') self.assertEqual(result, Sequence("12345678")) self.assertFalse(result.metadata) def test_concat_inner_missing(self): seq1 = Sequence("12", positional_metadata={'a': ['1', '2'], 'c': [{}, {}]}) seq2 = Sequence("34", positional_metadata={'a': [3, 4], 'b': [3, 4]}) seq3 = Sequence("56", positional_metadata={'a': [5, 6], 'b': [5, 6]}) result = Sequence.concat([seq1, seq2, seq3], how='inner') expected = Sequence("123456", positional_metadata={'a': ['1', '2', 3, 4, 5, 6]}) self.assertEqual(result, expected) self.assertFalse(result.metadata) def test_init_default_parameters(self): seq = Sequence('.ABC123xyz-') npt.assert_equal(seq.values, np.array('.ABC123xyz-', dtype='c')) self.assertEqual('.ABC123xyz-', str(seq)) self.assertFalse(seq.metadata) self.assertEqual(seq.metadata, {}) assert_data_frame_almost_equal(seq.positional_metadata, pd.DataFrame(index=range(11))) self.assertEqual(seq.interval_metadata, IntervalMetadata(len(seq))) def test_init_nondefault_parameters(self): s = '.ABC123xyz-' im = IntervalMetadata(len(s)) im.add([(0, 1)], metadata={'gene': 'sagA'}) seq = Sequence(s, metadata={'id': 'foo', 'description': 'bar baz'}, positional_metadata={'quality': range(11)}, interval_metadata=im) self.assertEqual(seq.interval_metadata, im) npt.assert_equal(seq.values, np.array('.ABC123xyz-', dtype='c')) self.assertEqual(s, str(seq)) self.assertTrue(seq.metadata) self.assertEqual(seq.metadata, {'id': 'foo', 'description': 'bar baz'}) assert_data_frame_almost_equal( seq.positional_metadata, pd.DataFrame({'quality': range(11)}, index=range(11))) def test_init_empty_sequence(self): # Test constructing an empty sequence using each supported input type. for s in (b'', # bytes '', # unicode np.array('', dtype='c'), # char vector np.frombuffer(b'', dtype=np.uint8), # byte vec Sequence('')): # another Sequence object seq = Sequence(s) self.assertIsInstance(seq.values, np.ndarray) self.assertEqual(seq.values.dtype, '|S1') self.assertEqual(seq.values.shape, (0, )) npt.assert_equal(seq.values, np.array('', dtype='c')) self.assertEqual(str(seq), '') self.assertEqual(len(seq), 0) self.assertFalse(seq.metadata) self.assertEqual(seq.metadata, {}) self.assertEqual(seq.interval_metadata, IntervalMetadata(0)) assert_data_frame_almost_equal(seq.positional_metadata, pd.DataFrame(index=range(0))) def test_init_single_character_sequence(self): for s in (b'A', 'A', np.array('A', dtype='c'), np.frombuffer(b'A', dtype=np.uint8), Sequence('A')): seq = Sequence(s) self.assertIsInstance(seq.values, np.ndarray) self.assertEqual(seq.values.dtype, '|S1') self.assertEqual(seq.values.shape, (1,)) npt.assert_equal(seq.values, np.array('A', dtype='c')) self.assertEqual(str(seq), 'A') self.assertEqual(len(seq), 1) self.assertFalse(seq.metadata) self.assertEqual(seq.metadata, {}) self.assertEqual(seq.interval_metadata, IntervalMetadata(1)) assert_data_frame_almost_equal(seq.positional_metadata, pd.DataFrame(index=range(1))) def test_init_multiple_character_sequence(self): for s in (b'.ABC\t123 xyz-', '.ABC\t123 xyz-', np.array('.ABC\t123 xyz-', dtype='c'), np.frombuffer(b'.ABC\t123 xyz-', dtype=np.uint8), Sequence('.ABC\t123 xyz-')): seq = Sequence(s) self.assertIsInstance(seq.values, np.ndarray) self.assertEqual(seq.values.dtype, '|S1') self.assertEqual(seq.values.shape, (14,)) npt.assert_equal(seq.values, np.array('.ABC\t123 xyz-', dtype='c')) self.assertEqual(str(seq), '.ABC\t123 xyz-') self.assertEqual(len(seq), 14) self.assertFalse(seq.metadata) self.assertEqual(seq.metadata, {}) self.assertEqual(seq.interval_metadata, IntervalMetadata(14)) assert_data_frame_almost_equal(seq.positional_metadata, pd.DataFrame(index=range(14))) def test_init_from_sequence_object(self): # We're testing this in its simplest form in other tests. This test # exercises more complicated cases of building a sequence from another # sequence. # just the sequence, no other metadata seq = Sequence('ACGT') self.assertEqual(Sequence(seq), seq) # sequence with metadata should have everything propagated seq = Sequence('ACGT', metadata={'id': 'foo', 'description': 'bar baz'}, positional_metadata={'quality': range(4)}) seq.interval_metadata.add([(0, 1)], metadata={'gene': 'sagA'}) self.assertEqual(Sequence(seq), seq) # should be able to override metadata im = IntervalMetadata(4) im.add([(0, 2)], metadata={'gene': 'sagB'}) self.assertEqual( Sequence(seq, metadata={'id': 'abc', 'description': '123'}, positional_metadata={'quality': [42] * 4}, interval_metadata=im), Sequence('ACGT', metadata={'id': 'abc', 'description': '123'}, positional_metadata={'quality': [42] * 4}, interval_metadata=im)) # subclasses work too im = IntervalMetadata(4) im.add([(0, 2)], metadata={'gene': 'sagB'}) seq = SequenceSubclass('ACGT', metadata={'id': 'foo', 'description': 'bar baz'}, positional_metadata={'quality': range(4)}, interval_metadata=im) self.assertEqual( Sequence(seq), Sequence('ACGT', metadata={'id': 'foo', 'description': 'bar baz'}, positional_metadata={'quality': range(4)}, interval_metadata=im)) def test_init_from_non_descendant_sequence_object(self): seq = SequenceSubclass('ACGT') with self.assertRaises(TypeError) as cm: SequenceSubclassTwo(seq) error = str(cm.exception) self.assertIn("SequenceSubclass", error) self.assertIn("SequenceSubclassTwo", error) self.assertIn("cast", error) def test_init_from_contiguous_sequence_bytes_view(self): bytes = np.array([65, 42, 66, 42, 65], dtype=np.uint8) view = bytes[:3] seq = Sequence(view) # sequence should be what we'd expect self.assertEqual(seq, Sequence('A*B')) # we shouldn't own the memory because no copy should have been made self.assertFalse(seq._owns_bytes) # can't mutate view because it isn't writeable anymore with self.assertRaises(ValueError): view[1] = 100 # sequence shouldn't have changed self.assertEqual(seq, Sequence('A*B')) # mutate bytes (*not* the view) bytes[0] = 99 # Sequence changed because we are only able to make the view read-only, # not its source (bytes). This is somewhat inconsistent behavior that # is (to the best of our knowledge) outside our control. self.assertEqual(seq, Sequence('c*B')) def test_init_from_noncontiguous_sequence_bytes_view(self): bytes = np.array([65, 42, 66, 42, 65], dtype=np.uint8) view = bytes[::2] seq = Sequence(view) # sequence should be what we'd expect self.assertEqual(seq, Sequence('ABA')) # we should own the memory because a copy should have been made self.assertTrue(seq._owns_bytes) # mutate bytes and its view bytes[0] = 99 view[1] = 100 # sequence shouldn't have changed self.assertEqual(seq, Sequence('ABA')) def test_init_no_copy_of_sequence(self): bytes = np.array([65, 66, 65], dtype=np.uint8) seq = Sequence(bytes) # should share the same memory self.assertIs(seq._bytes, bytes) # shouldn't be able to mutate the Sequence object's internals by # mutating the shared memory with self.assertRaises(ValueError): bytes[1] = 42 def test_init_invalid_sequence(self): # invalid dtype (numpy.ndarray input) with self.assertRaises(TypeError): # int64 Sequence(np.array([1, 2, 3])) with self.assertRaises(TypeError): # |S21 Sequence(np.array([1, "23", 3])) with self.assertRaises(TypeError): # object Sequence(np.array([1, {}, ()], dtype=object)) # invalid input type (non-numpy.ndarray input) with self.assertRaisesRegex(TypeError, r'tuple'): Sequence(('a', 'b', 'c')) with self.assertRaisesRegex(TypeError, r'list'): Sequence(['a', 'b', 'c']) with self.assertRaisesRegex(TypeError, r'set'): Sequence({'a', 'b', 'c'}) with self.assertRaisesRegex(TypeError, r'dict'): Sequence({'a': 42, 'b': 43, 'c': 44}) with self.assertRaisesRegex(TypeError, r'int'): Sequence(42) with self.assertRaisesRegex(TypeError, r'float'): Sequence(4.2) with self.assertRaisesRegex(TypeError, r'int64'): Sequence(np.int64(50)) with self.assertRaisesRegex(TypeError, r'float64'): Sequence(np.float64(50)) with self.assertRaisesRegex(TypeError, r'Foo'): class Foo: pass Sequence(Foo()) # out of ASCII range with self.assertRaises(UnicodeEncodeError): Sequence('abc\u1F30') def test_values_property(self): # Property tests are only concerned with testing the interface # provided by the property: that it can be accessed, can't be # reassigned or mutated in place, and that the correct type is # returned. More extensive testing of border cases (e.g., different # sequence lengths or input types, odd characters, etc.) are performed # in Sequence.__init__ tests. seq = Sequence('ACGT') # should get back a numpy.ndarray of '|S1' dtype self.assertIsInstance(seq.values, np.ndarray) self.assertEqual(seq.values.dtype, '|S1') npt.assert_equal(seq.values, np.array('ACGT', dtype='c')) # test that we can't mutate the property with self.assertRaises(ValueError): seq.values[1] = 'A' # test that we can't set the property with self.assertRaises(AttributeError): seq.values = np.array("GGGG", dtype='c') def test_sequence_numpy_compatibility(self): seq = Sequence('abc123') array = np.asarray(seq) self.assertIsInstance(array, np.ndarray) self.assertEqual(array.dtype, '|S1') npt.assert_equal(array, np.array('abc123', dtype='c')) npt.assert_equal(array, seq.values) with self.assertRaises(ValueError): array[1] = 'B' def test_observed_chars_property(self): self.assertEqual(Sequence('').observed_chars, set()) self.assertEqual(Sequence('x').observed_chars, {'x'}) self.assertEqual(Sequence('xYz').observed_chars, {'x', 'Y', 'z'}) self.assertEqual(Sequence('zzz').observed_chars, {'z'}) self.assertEqual(Sequence('xYzxxZz').observed_chars, {'x', 'Y', 'z', 'Z'}) self.assertEqual(Sequence('\t ').observed_chars, {' ', '\t'}) im = IntervalMetadata(6) im.add([(0, 2)], metadata={'gene': 'sagB'}) self.assertEqual( Sequence('aabbcc', metadata={'foo': 'bar'}, positional_metadata={'foo': range(6)}, interval_metadata=im).observed_chars, {'a', 'b', 'c'}) with self.assertRaises(AttributeError): Sequence('ACGT').observed_chars = {'a', 'b', 'c'} def test_eq_and_ne(self): seq_a = Sequence("A") seq_b = Sequence("B") im = IntervalMetadata(1) im.add([(0, 1)], metadata={'gene': 'sagA'}) im2 = IntervalMetadata(1) im.add([(0, 1)], metadata={'gene': 'sagB'}) self.assertTrue(seq_a == seq_a) self.assertTrue(Sequence("a") == Sequence("a")) self.assertTrue(Sequence("a", metadata={'id': 'b'}) == Sequence("a", metadata={'id': 'b'})) self.assertTrue(Sequence("a", metadata={'id': 'b', 'description': 'c'}) == Sequence("a", metadata={'id': 'b', 'description': 'c'})) self.assertTrue(Sequence("a", metadata={'id': 'b', 'description': 'c'}, positional_metadata={'quality': [1]}, interval_metadata=im) == Sequence("a", metadata={'id': 'b', 'description': 'c'}, positional_metadata={'quality': [1]}, interval_metadata=im)) self.assertTrue(seq_a != seq_b) self.assertTrue(SequenceSubclass("a") != Sequence("a")) self.assertTrue(Sequence("a") != Sequence("b")) self.assertTrue(Sequence("a") != Sequence("a", metadata={'id': 'b'})) self.assertTrue(Sequence("a", metadata={'id': 'c'}) != Sequence("a", metadata={'id': 'c', 'description': 't'})) self.assertTrue(Sequence("a", positional_metadata={'quality': [1]}) != Sequence("a")) self.assertTrue(Sequence("a", interval_metadata=im) != Sequence("a")) self.assertTrue(Sequence("a", positional_metadata={'quality': [1]}) != Sequence("a", positional_metadata={'quality': [2]})) self.assertTrue(Sequence("a", interval_metadata=im) != Sequence("a", interval_metadata=im2)) self.assertTrue(Sequence("c", positional_metadata={'quality': [3]}) != Sequence("b", positional_metadata={'quality': [3]})) self.assertTrue(Sequence("c", interval_metadata=im) != Sequence("b", interval_metadata=im)) self.assertTrue(Sequence("a", metadata={'id': 'b'}) != Sequence("c", metadata={'id': 'b'})) def test_eq_sequences_without_metadata_compare_equal(self): self.assertTrue(Sequence('') == Sequence('')) self.assertTrue(Sequence('z') == Sequence('z')) self.assertTrue( Sequence('ACGT') == Sequence('ACGT')) def test_eq_sequences_with_metadata_compare_equal(self): seq1 = Sequence('ACGT', metadata={'id': 'foo', 'desc': 'abc'}, positional_metadata={'qual': [1, 2, 3, 4]}) seq2 = Sequence('ACGT', metadata={'id': 'foo', 'desc': 'abc'}, positional_metadata={'qual': [1, 2, 3, 4]}) self.assertTrue(seq1 == seq2) # order shouldn't matter self.assertTrue(seq2 == seq1) def test_eq_sequences_from_different_sources_compare_equal(self): # sequences that have the same data but are constructed from different # types of data should compare equal im = IntervalMetadata(4) im.add([(0, 2)], metadata={'gene': 'sagB'}) seq1 = Sequence('ACGT', metadata={'id': 'foo', 'desc': 'abc'}, positional_metadata={'quality': np.array((1, 2, 3, 4), dtype=np.int64)}, interval_metadata=im) seq2 = Sequence(np.array([65, 67, 71, 84], dtype=np.uint8), metadata={'id': 'foo', 'desc': 'abc'}, positional_metadata={'quality': np.array([1, 2, 3, 4], dtype=np.int64)}, interval_metadata=im) self.assertTrue(seq1 == seq2) def test_eq_type_mismatch(self): seq1 = Sequence('ACGT') seq2 = SequenceSubclass('ACGT') self.assertFalse(seq1 == seq2) def test_eq_positional_metadata_mismatch(self): # both provided seq1 = Sequence('ACGT', positional_metadata={'quality': [1, 2, 3, 4]}) seq2 = Sequence('ACGT', positional_metadata={'quality': [1, 2, 3, 5]}) self.assertFalse(seq1 == seq2) # one provided seq1 = Sequence('ACGT', positional_metadata={'quality': [1, 2, 3, 4]}) seq2 = Sequence('ACGT') self.assertFalse(seq1 == seq2) def test_eq_interval_metadata_mismatch(self): im1 = IntervalMetadata(4) im1.add([(0, 3)], metadata={'gene': 'sagA'}) im2 = IntervalMetadata(4) im2.add([(0, 2)], metadata={'gene': 'sagA'}) # both provided seq1 = Sequence('ACGT', interval_metadata=im1) seq2 = Sequence('ACGT', interval_metadata=im2) self.assertFalse(seq1 == seq2) # one provided seq1 = Sequence('ACGT', interval_metadata=im1) seq2 = Sequence('ACGT') self.assertFalse(seq1 == seq2) def test_eq_sequence_mismatch(self): seq1 = Sequence('ACGT') seq2 = Sequence('TGCA') self.assertFalse(seq1 == seq2) def test_getitem_gives_new_sequence(self): seq = Sequence("Sequence string !1@2#3?.,") self.assertFalse(seq is seq[:]) def test_getitem_drops_interval_metadata(self): s = "Sequence string !1@2#3?.," seq = Sequence(s, metadata={'id': 'id', 'description': 'dsc'}) seq.interval_metadata.add([(0, 3)], metadata={'gene': 'sagA'}) eseq = Sequence('Se', metadata={'id': 'id', 'description': 'dsc'}) self.assertEqual(seq[:2], eseq) def test_getitem_with_int_has_positional_metadata(self): s = "Sequence string !1@2#3?.," length = len(s) seq = Sequence(s, metadata={'id': 'id', 'description': 'dsc'}, positional_metadata={'quality': np.arange(length, dtype=np.int64)}) eseq = Sequence("S", {'id': 'id', 'description': 'dsc'}, positional_metadata={'quality': np.array([0], dtype=np.int64)}) self.assertEqual(seq[0], eseq) eseq = Sequence(",", metadata={'id': 'id', 'description': 'dsc'}, positional_metadata={'quality': np.array([len(seq) - 1], dtype=np.int64)}) self.assertEqual(seq[len(seq) - 1], eseq) eseq = Sequence("t", metadata={'id': 'id', 'description': 'dsc'}, positional_metadata={'quality': np.asarray([10], dtype=np.int64)}) self.assertEqual(seq[10], eseq) def test_single_index_to_slice(self): a = [1, 2, 3, 4] self.assertEqual(slice(0, 1), _single_index_to_slice(0)) self.assertEqual([1], a[_single_index_to_slice(0)]) self.assertEqual(slice(-1, None), _single_index_to_slice(-1)) self.assertEqual([4], a[_single_index_to_slice(-1)]) def test_is_single_index(self): self.assertTrue(_is_single_index(0)) self.assertFalse(_is_single_index(True)) self.assertFalse(_is_single_index(bool())) self.assertFalse(_is_single_index('a')) def test_as_slice_if_single_index(self): self.assertEqual(slice(0, 1), _as_slice_if_single_index(0)) slice_obj = slice(2, 3) self.assertIs(slice_obj, _as_slice_if_single_index(slice_obj)) def test_slice_positional_metadata(self): seq = Sequence('ABCDEFGHIJ', positional_metadata={'foo': np.arange(10, dtype=np.int64), 'bar': np.arange(100, 110, dtype=np.int64)}) self.assertTrue(pd.DataFrame({'foo': [0], 'bar': [100]}, dtype=np.int64).equals( seq._slice_positional_metadata(0))) self.assertTrue(pd.DataFrame({'foo': [0], 'bar': [100]}).equals( seq._slice_positional_metadata(slice(0, 1)))) self.assertTrue(pd.DataFrame({'foo': [0, 1], 'bar': [100, 101]}).equals( seq._slice_positional_metadata(slice(0, 2)))) self.assertTrue(pd.DataFrame( {'foo': [9], 'bar': [109]}, index=[9]).equals( seq._slice_positional_metadata(9))) def test_getitem_with_int_no_positional_metadata(self): seq = Sequence("Sequence string !1@2#3?.,", metadata={'id': 'id2', 'description': 'no_qual'}) eseq = Sequence("t", metadata={'id': 'id2', 'description': 'no_qual'}) self.assertEqual(seq[10], eseq) def test_getitem_with_slice_has_positional_metadata(self): s = "0123456789abcdef" length = len(s) seq = Sequence(s, metadata={'id': 'id3', 'description': 'dsc3'}, positional_metadata={'quality': np.arange(length, dtype=np.int64)}) eseq = Sequence("012", metadata={'id': 'id3', 'description': 'dsc3'}, positional_metadata={'quality': np.arange(3, dtype=np.int64)}) self.assertEqual(seq[0:3], eseq) self.assertEqual(seq[:3], eseq) self.assertEqual(seq[:3:1], eseq) eseq = Sequence("def", metadata={'id': 'id3', 'description': 'dsc3'}, positional_metadata={'quality': np.asarray([13, 14, 15], dtype=np.int64)}) self.assertEqual(seq[-3:], eseq) self.assertEqual(seq[-3::1], eseq) eseq = Sequence("02468ace", metadata={'id': 'id3', 'description': 'dsc3'}, positional_metadata={'quality': np.asarray([0, 2, 4, 6, 8, 10, 12, 14], dtype=np.int64)}) self.assertEqual(seq[0:length:2], eseq) self.assertEqual(seq[::2], eseq) eseq = Sequence(s[::-1], metadata={'id': 'id3', 'description': 'dsc3'}, positional_metadata={'quality': np.arange(length, dtype=np.int64)[::-1]}) self.assertEqual(seq[length::-1], eseq) self.assertEqual(seq[::-1], eseq) eseq = Sequence('fdb97531', metadata={'id': 'id3', 'description': 'dsc3'}, positional_metadata={'quality': np.asarray([15, 13, 11, 9, 7, 5, 3, 1], dtype=np.int64)}) self.assertEqual(seq[length::-2], eseq) self.assertEqual(seq[::-2], eseq) self.assertEqual(seq[0:500:], seq) eseq = Sequence('', metadata={'id': 'id3', 'description': 'dsc3'}, positional_metadata={'quality': np.array([], dtype=np.int64)}) self.assertEqual(seq[length:0], eseq) self.assertEqual(seq[-length:0], eseq) self.assertEqual(seq[1:0], eseq) eseq = Sequence("0", metadata={'id': 'id3', 'description': 'dsc3'}, positional_metadata={'quality': np.asarray([0], dtype=np.int64)}) self.assertEqual(seq[0:1], eseq) self.assertEqual(seq[0:1:1], eseq) self.assertEqual(seq[-length::-1], eseq) def test_getitem_with_slice_no_positional_metadata(self): s = "0123456789abcdef" length = len(s) seq = Sequence(s, metadata={'id': 'id4', 'description': 'no_qual4'}) eseq = Sequence("02468ace", metadata={'id': 'id4', 'description': 'no_qual4'}) self.assertEqual(seq[0:length:2], eseq) self.assertEqual(seq[::2], eseq) def test_getitem_with_tuple_of_mixed_with_positional_metadata(self): s = "0123456789abcdef" length = len(s) seq = Sequence(s, metadata={'id': 'id5', 'description': 'dsc5'}, positional_metadata={'quality': np.arange(length, dtype=np.int64)}) eseq = Sequence("00000", metadata={'id': 'id5', 'description': 'dsc5'}, positional_metadata={'quality': np.asarray([0, 0, 0, 0, 0], dtype=np.int64)}) self.assertEqual(seq[0, 0, 0, 0, 0], eseq) self.assertEqual(seq[0, 0:1, 0, 0, 0], eseq) self.assertEqual(seq[0, 0:1, 0, -length::-1, 0, 1:0], eseq) self.assertEqual(seq[0:1, 0:1, 0:1, 0:1, 0:1], eseq) self.assertEqual(seq[0:1, 0, 0, 0, 0], eseq) eseq = Sequence("0123fed9", metadata={'id': 'id5', 'description': 'dsc5'}, positional_metadata={'quality': [0, 1, 2, 3, 15, 14, 13, 9]}) self.assertEqual(seq[0, 1, 2, 3, 15, 14, 13, 9], eseq) self.assertEqual(seq[0, 1, 2, 3, :-4:-1, 9], eseq) self.assertEqual(seq[0:4, :-4:-1, 9, 1:0], eseq) self.assertEqual(seq[0:4, :-4:-1, 9:10], eseq) def test_getitem_with_tuple_of_mixed_no_positional_metadata(self): seq = Sequence("0123456789abcdef", metadata={'id': 'id6', 'description': 'no_qual6'}) eseq = Sequence("0123fed9", metadata={'id': 'id6', 'description': 'no_qual6'}) self.assertEqual(seq[0, 1, 2, 3, 15, 14, 13, 9], eseq) self.assertEqual(seq[0, 1, 2, 3, :-4:-1, 9], eseq) self.assertEqual(seq[0:4, :-4:-1, 9], eseq) self.assertEqual(seq[0:4, :-4:-1, 9:10], eseq) def test_getitem_with_tuple_of_mixed_no_metadata(self): seq = Sequence("0123456789abcdef") eseq = Sequence("0123fed9") self.assertEqual(seq[0, 1, 2, 3, 15, 14, 13, 9], eseq) self.assertEqual(seq[0, 1, 2, 3, :-4:-1, 9], eseq) self.assertEqual(seq[0:4, :-4:-1, 9], eseq) self.assertEqual(seq[0:4, :-4:-1, 9:10], eseq) def test_getitem_with_iterable_of_mixed_has_positional_metadata(self): s = "0123456789abcdef" length = len(s) seq = Sequence(s, metadata={'id': 'id7', 'description': 'dsc7'}, positional_metadata={'quality': np.arange(length, dtype=np.int64)}) def generator(): yield slice(0, 4) yield slice(200, 400) yield -1 yield slice(-2, -4, -1) yield 9 eseq = Sequence("0123fed9", metadata={'id': 'id7', 'description': 'dsc7'}, positional_metadata={'quality': np.asarray([0, 1, 2, 3, 15, 14, 13, 9], dtype=np.int64)}) self.assertEqual(seq[[0, 1, 2, 3, 15, 14, 13, 9]], eseq) self.assertEqual(seq[generator()], eseq) self.assertEqual(seq[[slice(0, 4), slice(None, -4, -1), 9]], eseq) self.assertEqual(seq[ [slice(0, 4), slice(None, -4, -1), slice(9, 10)]], eseq) def test_getitem_with_iterable_of_mixed_no_positional_metadata(self): s = "0123456789abcdef" seq = Sequence(s, metadata={'id': 'id7', 'description': 'dsc7'}) def generator(): yield slice(0, 4) yield slice(200, 400) yield slice(None, -4, -1) yield 9 eseq = Sequence("0123fed9", metadata={'id': 'id7', 'description': 'dsc7'}) self.assertEqual(seq[[0, 1, 2, 3, 15, 14, 13, 9]], eseq) self.assertEqual(seq[generator()], eseq) self.assertEqual(seq[[slice(0, 4), slice(None, -4, -1), 9]], eseq) self.assertEqual(seq[ [slice(0, 4), slice(None, -4, -1), slice(9, 10)]], eseq) def test_getitem_with_numpy_index_has_positional_metadata(self): s = "0123456789abcdef" length = len(s) seq = Sequence(s, metadata={'id': 'id9', 'description': 'dsc9'}, positional_metadata={'quality': np.arange(length, dtype=np.int64)}) eseq = Sequence("0123fed9", metadata={'id': 'id9', 'description': 'dsc9'}, positional_metadata={'quality': np.asarray([0, 1, 2, 3, 15, 14, 13, 9], dtype=np.int64)}) self.assertEqual(seq[np.array([0, 1, 2, 3, 15, 14, 13, 9])], eseq) def test_getitem_with_numpy_index_no_positional_metadata(self): s = "0123456789abcdef" seq = Sequence(s, metadata={'id': 'id10', 'description': 'dsc10'}) eseq = Sequence("0123fed9", metadata={'id': 'id10', 'description': 'dsc10'}) self.assertEqual(seq[np.array([0, 1, 2, 3, 15, 14, 13, 9])], eseq) def test_getitem_with_empty_indices_empty_seq_no_pos_metadata(self): s = "" seq = Sequence(s, metadata={'id': 'id10', 'description': 'dsc10'}) eseq = Sequence('', metadata={'id': 'id10', 'description': 'dsc10'}) tested = 0 for index in self.getitem_empty_indices: tested += 1 self.assertEqual(seq[index], eseq) self.assertEqual(tested, 6) def test_getitem_with_empty_indices_non_empty_seq_no_pos_metadata(self): s = "0123456789abcdef" seq = Sequence(s, metadata={'id': 'id10', 'description': 'dsc10'}) eseq = Sequence('', metadata={'id': 'id10', 'description': 'dsc10'}) tested = 0 for index in self.getitem_empty_indices: tested += 1 self.assertEqual(seq[index], eseq) self.assertEqual(tested, 6) def test_getitem_with_boolean_vector_has_qual(self): s = "0123456789abcdef" length = len(s) seq = Sequence(s, metadata={'id': 'id11', 'description': 'dsc11'}, positional_metadata={'quality': np.arange(length, dtype=np.int64)}) eseq = Sequence("13579bdf", metadata={'id': 'id11', 'description': 'dsc11'}, positional_metadata={'quality': np.asarray([1, 3, 5, 7, 9, 11, 13, 15], dtype=np.int64)}) self.assertEqual(seq[np.array([False, True] * 8)], eseq) self.assertEqual(seq[[False, True] * 8], eseq) def test_getitem_with_boolean_vector_no_positional_metadata(self): s = "0123456789abcdef" seq = Sequence(s, metadata={'id': 'id11', 'description': 'dsc11'}) eseq = Sequence("13579bdf", metadata={'id': 'id11', 'description': 'dsc11'}) self.assertEqual(seq[np.array([False, True] * 8)], eseq) def test_getitem_with_invalid(self): seq = Sequence("123456", metadata={'id': 'idm', 'description': 'description'}, positional_metadata={'quality': [1, 2, 3, 4, 5, 6]}) with self.assertRaises(IndexError): seq['not an index'] with self.assertRaises(IndexError): seq[['1', '2']] with self.assertRaises(IndexError): seq[[1, slice(1, 2), 'a']] with self.assertRaises(IndexError): seq[[1, slice(1, 2), True]] with self.assertRaises(IndexError): seq[True] with self.assertRaises(IndexError): seq[np.array([True, False])] with self.assertRaises(IndexError): seq[999] with self.assertRaises(IndexError): seq[0, 0, 999] # numpy 1.8.1 and 1.9.2 raise different error types # (ValueError, IndexError). with self.assertRaises(Exception): seq[100 * [True, False, True]] def test_getitem_empty_positional_metadata(self): seq = Sequence('ACGT') seq.positional_metadata # This will create empty positional_metadata self.assertEqual(Sequence('A'), seq[0]) def test_len(self): self.assertEqual(len(Sequence("")), 0) self.assertEqual(len(Sequence("a")), 1) self.assertEqual(len(Sequence("abcdef")), 6) def test_nonzero(self): # blank self.assertFalse(Sequence("")) self.assertFalse(Sequence("", metadata={'id': 'foo'}, positional_metadata={'quality': range(0)})) # single self.assertTrue(Sequence("A")) self.assertTrue(Sequence("A", metadata={'id': 'foo'}, positional_metadata={'quality': range(1)})) # multi self.assertTrue(Sequence("ACGT")) self.assertTrue(Sequence("ACGT", metadata={'id': 'foo'}, positional_metadata={'quality': range(4)})) def test_contains(self): seq = Sequence("#@ACGT,24.13**02") tested = 0 for c in self.sequence_kinds: tested += 1 self.assertTrue(c(',24') in seq) self.assertTrue(c('*') in seq) self.assertTrue(c('') in seq) self.assertFalse(c("$") in seq) self.assertFalse(c("AGT") in seq) self.assertEqual(tested, 4) def test_contains_sequence_subclass(self): with self.assertRaises(TypeError): SequenceSubclass("A") in Sequence("AAA") self.assertTrue(SequenceSubclass("A").values in Sequence("AAA")) def test_hash(self): with self.assertRaises(TypeError): hash(Sequence("ABCDEFG")) self.assertNotIsInstance(Sequence("ABCDEFG"), Hashable) def test_iter_has_positional_metadata(self): tested = False seq = Sequence("0123456789", metadata={'id': 'a', 'desc': 'b'}, positional_metadata={'qual': np.arange(10, dtype=np.int64)}) for i, s in enumerate(seq): tested = True self.assertEqual(s, Sequence(str(i), metadata={'id': 'a', 'desc': 'b'}, positional_metadata={'qual': np.asarray([i], dtype=np.int64)})) self.assertTrue(tested) def test_iter_no_positional_metadata(self): tested = False seq = Sequence("0123456789", metadata={'id': 'a', 'desc': 'b'}) for i, s in enumerate(seq): tested = True self.assertEqual(s, Sequence(str(i), metadata={'id': 'a', 'desc': 'b'})) self.assertTrue(tested) def test_reversed_has_positional_metadata(self): tested = False seq = Sequence("0123456789", metadata={'id': 'a', 'desc': 'b'}, positional_metadata={'qual': np.arange(10, dtype=np.int64)}) for i, s in enumerate(reversed(seq)): tested = True self.assertEqual(s, Sequence(str(9 - i), metadata={'id': 'a', 'desc': 'b'}, positional_metadata={'qual': np.asarray([9 - i], dtype=np.int64)})) self.assertTrue(tested) def test_reversed_no_positional_metadata(self): tested = False seq = Sequence("0123456789", metadata={'id': 'a', 'desc': 'b'}) for i, s in enumerate(reversed(seq)): tested = True self.assertEqual(s, Sequence(str(9 - i), metadata={'id': 'a', 'desc': 'b'})) self.assertTrue(tested) def test_repr(self): # basic sanity checks -- more extensive testing of formatting and # special cases is performed in SequenceReprDoctests below. here we # only test that pieces of the repr are present. these tests also # exercise coverage in case doctests stop counting towards coverage in # the future # minimal obs = repr(Sequence('')) self.assertEqual(obs.count('\n'), 4) self.assertTrue(obs.startswith('Sequence')) self.assertIn('length: 0', obs) self.assertTrue(obs.endswith('-')) # no metadata obs = repr(Sequence('ACGT')) self.assertEqual(obs.count('\n'), 5) self.assertTrue(obs.startswith('Sequence')) self.assertIn('length: 4', obs) self.assertTrue(obs.endswith('0 ACGT')) # metadata and positional metadata of mixed types obs = repr( Sequence( 'ACGT', metadata={'foo': 'bar', b'bar': 33.33, None: True, False: {}, (1, 2): 3, 'acb' * 100: "'", 10: 11}, positional_metadata={'foo': range(4), 42: ['a', 'b', [], 'c']})) self.assertEqual(obs.count('\n'), 16) self.assertTrue(obs.startswith('Sequence')) self.assertIn('None: True', obs) self.assertIn('\'foo\': \'bar\'', obs) self.assertIn('42: ', obs) self.assertIn('\'foo\': ', obs) self.assertIn('length: 4', obs) self.assertTrue(obs.endswith('0 ACGT')) # sequence spanning > 5 lines obs = repr(Sequence('A' * 301)) self.assertEqual(obs.count('\n'), 9) self.assertTrue(obs.startswith('Sequence')) self.assertIn('length: 301', obs) self.assertIn('...', obs) self.assertTrue(obs.endswith('300 A')) def test_str(self): self.assertEqual(str(Sequence("GATTACA")), "GATTACA") self.assertEqual(str(Sequence("ACCGGTACC")), "ACCGGTACC") self.assertEqual(str(Sequence("GREG")), "GREG") self.assertEqual( str(Sequence("ABC", positional_metadata={'quality': [1, 2, 3]})), "ABC") self.assertIs(type(str(Sequence("A"))), str) def test_count(self): def construct_char_array(s): return np.frombuffer(s.encode('ascii'), dtype='|S1') def construct_uint8_array(s): return np.frombuffer(s.encode('ascii'), dtype=np.uint8) seq = Sequence("1234567899876555") tested = 0 for c in self.sequence_kinds: tested += 1 self.assertEqual(seq.count(c('4')), 1) self.assertEqual(seq.count(c('8')), 2) self.assertEqual(seq.count(c('5')), 4) self.assertEqual(seq.count(c('555')), 1) self.assertEqual(seq.count(c('555'), 0, 4), 0) self.assertEqual(seq.count(c('555'), start=0, end=4), 0) self.assertEqual(seq.count(c('5'), start=10), 3) self.assertEqual(seq.count(c('5'), end=10), 1) with self.assertRaises(ValueError): seq.count(c('')) self.assertEqual(tested, 4) def test_count_on_subclass(self): with self.assertRaises(TypeError) as cm: Sequence("abcd").count(SequenceSubclass("a")) self.assertIn("Sequence", str(cm.exception)) self.assertIn("SequenceSubclass", str(cm.exception)) def test_replace_sanity(self): seq = Sequence('AAGCATGCCCTTTACATTTG') index = self._make_index('10011011001111110111') obs = seq.replace(index, '_') exp = Sequence('_AG__T__CC______T___') self.assertEqual(obs, exp) def test_replace_index_array(self): seq = Sequence('TCGGGTGTTGTGCAACCACC') for _type in list, tuple, np.array, pd.Series: index = _type([0, 2, 5, 8, 9]) obs = seq.replace(index, '-') exp = Sequence('-C-GG-GT--TGCAACCACC') self.assertEqual(obs, exp) def test_replace_iterable_slices(self): seq = Sequence('CATTATGGACCCAGCGTGCC') slices = (slice(0, 5), slice(8, 12), slice(15, 17)) mixed_slices = (0, 1, 2, 3, 4, slice(8, 12), 15, 16) for _type in (lambda x: x, list, tuple, lambda x: np.array(tuple(x)), lambda x: pd.Series(tuple(x))): index = (_type(slices), _type(mixed_slices)) obs_slices = seq.replace(index[0], '-') obs_mixed = seq.replace(index[1], '-') exp = Sequence('-----TGG----AGC--GCC') self.assertEqual(obs_slices, exp) self.assertEqual(obs_mixed, exp) def test_replace_index_in_positional_metadata(self): positional_metadata = {'where': self._make_index('001110110' '10001110000')} seq = Sequence('AAGATTGATACCACAGTTGT', positional_metadata=positional_metadata) obs = seq.replace('where', '-') exp = Sequence('AA---T--T-CCA---TTGT', positional_metadata=positional_metadata) self.assertEqual(obs, exp) def test_replace_does_not_mutate_original(self): seq = Sequence('ATCG') index = self._make_index('0011') seq.replace(index, '-') obs = seq exp = Sequence('ATCG') self.assertEqual(obs, exp) def test_replace_with_metadata(self): seq = Sequence('GCACGGCAAGAAGCGCCCCA', metadata={'NM': 'Kestrel Gorlick'}, positional_metadata={'diff': list('01100001110010001100')}) seq.interval_metadata.add([(0, 1)], metadata={'gene': 'sagA'}) index = self._make_index('01100001110010001100') obs = seq.replace(index, '-') exp = Sequence('G--CGGC---AA-CGC--CA', metadata={'NM': 'Kestrel Gorlick'}, positional_metadata={'diff': list('01100001110010001100')}) exp.interval_metadata.add([(0, 1)], metadata={'gene': 'sagA'}) self.assertEqual(obs, exp) def test_replace_with_subclass(self): seq = DNA('CGACAACCGATGTGCTGTAA') index = self._make_index('10101000111111110011') obs = seq.replace(index, '-') exp = DNA('-G-C-ACC--------GT--') self.assertEqual(obs, exp) def test_replace_with_bytes(self): seq = Sequence('ABC123') obs = seq.replace([1, 3, 5], b'*') self.assertEqual(obs, Sequence('A*C*2*')) def test_replace_invalid_char_for_type_error(self): seq = DNA('TAAACGGAACGCTACGTCTG') index = self._make_index('01000001101011001001') with self.assertRaisesRegex(ValueError, r"Invalid character.*'F'"): seq.replace(index, 'F') def test_replace_invalid_char_error(self): seq = Sequence('GGGAGCTAGA') index = self._make_index('1000101110') with self.assertRaisesRegex(UnicodeEncodeError, r"can't encode character.*not in " r"range\(128\)"): seq.replace(index, '\uFFFF') def test_replace_non_single_character_error(self): seq = Sequence('CCGAACTGTC') index = self._make_index('1100110011') with self.assertRaisesRegex(TypeError, r'string of length 2 found'): seq.replace(index, 'AB') def _make_index(self, bools): return [bool(int(char)) for char in bools] def test_lowercase_mungeable_key(self): # NOTE: This test relies on Sequence._munge_to_index_array working # properly. If the internal implementation of the lowercase method # changes to no longer use _munge_to_index_array, this test may need # to be updated to cover cases currently covered by # _munge_to_index_array self.assertEqual('AAAAaaaa', self.lowercase_seq.lowercase('key')) def test_lowercase_array_key(self): # NOTE: This test relies on Sequence._munge_to_index_array working # properly. If the internal implementation of the lowercase method # changes to no longer use _munge_to_index_array, this test may need # to be updated to cover cases currently covered by # _munge_to_index_array self.assertEqual('aaAAaaaa', self.lowercase_seq.lowercase( np.array([True, True, False, False, True, True, True, True]))) self.assertEqual('AaAAaAAA', self.lowercase_seq.lowercase([1, 4])) def test_matches(self): tested = 0 for constructor in self.sequence_kinds: tested += 1 seq1 = Sequence("AACCEEGG") seq2 = constructor("ABCDEFGH") expected = np.array([True, False] * 4) npt.assert_equal(seq1.matches(seq2), expected) self.assertEqual(tested, 4) def test_matches_on_subclass(self): seq1 = Sequence("AACCEEGG") seq2 = SequenceSubclass("ABCDEFGH") with self.assertRaises(TypeError): seq1.matches(seq2) def test_matches_unequal_length(self): seq1 = Sequence("AACCEEGG") seq2 = Sequence("TOOLONGTOCOMPARE") with self.assertRaises(ValueError): seq1.matches(seq2) def test_mismatches(self): tested = 0 for constructor in self.sequence_kinds: tested += 1 seq1 = Sequence("AACCEEGG") seq2 = constructor("ABCDEFGH") expected = np.array([False, True] * 4) npt.assert_equal(seq1.mismatches(seq2), expected) self.assertEqual(tested, 4) def test_mismatches_on_subclass(self): seq1 = Sequence("AACCEEGG") seq2 = SequenceSubclass("ABCDEFGH") with self.assertRaises(TypeError): seq1.mismatches(seq2) def test_mismatches_unequal_length(self): seq1 = Sequence("AACCEEGG") seq2 = Sequence("TOOLONGTOCOMPARE") with self.assertRaises(ValueError): seq1.mismatches(seq2) def test_mismatch_frequency(self): seq1 = Sequence("AACCEEGG") seq2 = Sequence("ABCDEFGH") seq3 = Sequence("TTTTTTTT") self.assertIs(type(seq1.mismatch_frequency(seq1)), int) self.assertEqual(seq1.mismatch_frequency(seq1), 0) self.assertEqual(seq1.mismatch_frequency(seq2), 4) self.assertEqual(seq1.mismatch_frequency(seq3), 8) def test_mismatch_frequency_relative(self): seq1 = Sequence("AACCEEGG") seq2 = Sequence("ABCDEFGH") seq3 = Sequence("TTTTTTTT") self.assertIs(type(seq1.mismatch_frequency(seq1, relative=True)), float) self.assertEqual(seq1.mismatch_frequency(seq1, relative=True), 0.0) self.assertEqual(seq1.mismatch_frequency(seq2, relative=True), 0.5) self.assertEqual(seq1.mismatch_frequency(seq3, relative=True), 1.0) def test_mismatch_frequency_unequal_length(self): seq1 = Sequence("AACCEEGG") seq2 = Sequence("TOOLONGTOCOMPARE") with self.assertRaises(ValueError): seq1.mismatch_frequency(seq2) def test_mismatch_frequence_on_subclass(self): seq1 = Sequence("AACCEEGG") seq2 = SequenceSubclass("ABCDEFGH") with self.assertRaises(TypeError): seq1.mismatch_frequency(seq2) def test_match_frequency(self): seq1 = Sequence("AACCEEGG") seq2 = Sequence("ABCDEFGH") seq3 = Sequence("TTTTTTTT") self.assertIs(type(seq1.match_frequency(seq1)), int) self.assertEqual(seq1.match_frequency(seq1), 8) self.assertEqual(seq1.match_frequency(seq2), 4) self.assertEqual(seq1.match_frequency(seq3), 0) def test_match_frequency_relative(self): seq1 = Sequence("AACCEEGG") seq2 = Sequence("ABCDEFGH") seq3 = Sequence("TTTTTTTT") self.assertIs(type(seq1.match_frequency(seq1, relative=True)), float) self.assertEqual(seq1.match_frequency(seq1, relative=True), 1.0) self.assertEqual(seq1.match_frequency(seq2, relative=True), 0.5) self.assertEqual(seq1.match_frequency(seq3, relative=True), 0.0) def test_match_frequency_unequal_length(self): seq1 = Sequence("AACCEEGG") seq2 = Sequence("TOOLONGTOCOMPARE") with self.assertRaises(ValueError): seq1.match_frequency(seq2) def test_match_frequency_on_subclass(self): seq1 = Sequence("AACCEEGG") seq2 = SequenceSubclass("ABCDEFGH") with self.assertRaises(TypeError): seq1.match_frequency(seq2) def test_index(self): tested = 0 for c in self.sequence_kinds: tested += 1 seq = Sequence("ABCDEFG@@ABCDFOO") self.assertEqual(seq.index(c("A")), 0) self.assertEqual(seq.index(c("@")), 7) self.assertEqual(seq.index(c("@@")), 7) with self.assertRaises(ValueError): seq.index("A", start=1, end=5) self.assertEqual(tested, 4) def test_index_on_subclass(self): with self.assertRaises(TypeError): Sequence("ABCDEFG").index(SequenceSubclass("A")) self.assertEqual( SequenceSubclass("ABCDEFG").index(SequenceSubclass("A")), 0) def test_frequencies_empty_sequence(self): seq = Sequence('') self.assertEqual(seq.frequencies(), {}) self.assertEqual(seq.frequencies(relative=True), {}) self.assertEqual(seq.frequencies(chars=set()), {}) self.assertEqual(seq.frequencies(chars=set(), relative=True), {}) self.assertEqual(seq.frequencies(chars={'a', 'b'}), {'a': 0, 'b': 0}) # use npt.assert_equal to explicitly handle nan comparisons npt.assert_equal(seq.frequencies(chars={'a', 'b'}, relative=True), {'a': np.nan, 'b': np.nan}) def test_frequencies_observed_chars(self): seq = Sequence('x') self.assertEqual(seq.frequencies(), {'x': 1}) self.assertEqual(seq.frequencies(relative=True), {'x': 1.0}) seq = Sequence('xYz') self.assertEqual(seq.frequencies(), {'x': 1, 'Y': 1, 'z': 1}) self.assertEqual(seq.frequencies(relative=True), {'x': 1/3, 'Y': 1/3, 'z': 1/3}) seq = Sequence('zzz') self.assertEqual(seq.frequencies(), {'z': 3}) self.assertEqual(seq.frequencies(relative=True), {'z': 1.0}) seq = Sequence('xYzxxZz') self.assertEqual(seq.frequencies(), {'x': 3, 'Y': 1, 'Z': 1, 'z': 2}) self.assertEqual(seq.frequencies(relative=True), {'x': 3/7, 'Y': 1/7, 'Z': 1/7, 'z': 2/7}) seq = Sequence('\t ') self.assertEqual(seq.frequencies(), {'\t': 1, ' ': 3}) self.assertEqual(seq.frequencies(relative=True), {'\t': 1/4, ' ': 3/4}) seq = Sequence('aabbcc', metadata={'foo': 'bar'}, positional_metadata={'foo': range(6)}) self.assertEqual(seq.frequencies(), {'a': 2, 'b': 2, 'c': 2}) self.assertEqual(seq.frequencies(relative=True), {'a': 2/6, 'b': 2/6, 'c': 2/6}) def test_frequencies_specified_chars(self): seq = Sequence('abcbca') self.assertEqual(seq.frequencies(chars=set()), {}) self.assertEqual(seq.frequencies(chars=set(), relative=True), {}) self.assertEqual(seq.frequencies(chars='a'), {'a': 2}) self.assertEqual(seq.frequencies(chars='a', relative=True), {'a': 2/6}) self.assertEqual(seq.frequencies(chars={'a'}), {'a': 2}) self.assertEqual(seq.frequencies(chars={'a'}, relative=True), {'a': 2/6}) self.assertEqual(seq.frequencies(chars={'a', 'b'}), {'a': 2, 'b': 2}) self.assertEqual(seq.frequencies(chars={'a', 'b'}, relative=True), {'a': 2/6, 'b': 2/6}) self.assertEqual(seq.frequencies(chars={'a', 'b', 'd'}), {'a': 2, 'b': 2, 'd': 0}) self.assertEqual(seq.frequencies(chars={'a', 'b', 'd'}, relative=True), {'a': 2/6, 'b': 2/6, 'd': 0.0}) self.assertEqual(seq.frequencies(chars={'x', 'y', 'z'}), {'x': 0, 'y': 0, 'z': 0}) self.assertEqual(seq.frequencies(chars={'x', 'y', 'z'}, relative=True), {'x': 0.0, 'y': 0.0, 'z': 0.0}) def test_frequencies_chars_varied_type(self): seq = Sequence('zabczzzabcz') # single character case (shortcut) chars = b'z' self.assertEqual(seq.frequencies(chars=chars), {b'z': 5}) self.assertEqual(seq.frequencies(chars=chars, relative=True), {b'z': 5/11}) chars = 'z' self.assertEqual(seq.frequencies(chars=chars), {'z': 5}) self.assertEqual(seq.frequencies(chars=chars, relative=True), {'z': 5/11}) chars = np.frombuffer('z'.encode('ascii'), dtype='|S1')[0] self.assertEqual(seq.frequencies(chars=chars), {b'z': 5}) self.assertEqual(seq.frequencies(chars=chars, relative=True), {b'z': 5/11}) # set of characters, some present, some not chars = {b'x', b'z'} self.assertEqual(seq.frequencies(chars=chars), {b'x': 0, b'z': 5}) self.assertEqual(seq.frequencies(chars=chars, relative=True), {b'x': 0.0, b'z': 5/11}) chars = {'x', 'z'} self.assertEqual(seq.frequencies(chars=chars), {'x': 0, 'z': 5}) self.assertEqual(seq.frequencies(chars=chars, relative=True), {'x': 0.0, 'z': 5/11}) chars = { np.frombuffer('x'.encode('ascii'), dtype='|S1')[0], np.frombuffer('z'.encode('ascii'), dtype='|S1')[0] } self.assertEqual(seq.frequencies(chars=chars), {b'x': 0, b'z': 5}) self.assertEqual(seq.frequencies(chars=chars, relative=True), {b'x': 0.0, b'z': 5/11}) def test_frequencies_equivalent_to_kmer_frequencies_k_of_1(self): seq = Sequence('abcabc') exp = {'a': 2, 'b': 2, 'c': 2} self.assertEqual(seq.frequencies(chars=None), exp) self.assertEqual(seq.kmer_frequencies(k=1), exp) exp = {'a': 2/6, 'b': 2/6, 'c': 2/6} self.assertEqual(seq.frequencies(chars=None, relative=True), exp) self.assertEqual(seq.kmer_frequencies(k=1, relative=True), exp) def test_frequencies_passing_observed_chars_equivalent_to_default(self): seq = Sequence('abcabc') exp = {'a': 2, 'b': 2, 'c': 2} self.assertEqual(seq.frequencies(chars=None), exp) self.assertEqual(seq.frequencies(chars=seq.observed_chars), exp) exp = {'a': 2/6, 'b': 2/6, 'c': 2/6} self.assertEqual(seq.frequencies(chars=None, relative=True), exp) self.assertEqual( seq.frequencies(chars=seq.observed_chars, relative=True), exp) def test_frequencies_invalid_chars(self): seq = Sequence('abcabc') with self.assertRaisesRegex(ValueError, r'0 characters'): seq.frequencies(chars='') with self.assertRaisesRegex(ValueError, r'0 characters'): seq.frequencies(chars={''}) with self.assertRaisesRegex(ValueError, r'2 characters'): seq.frequencies(chars='ab') with self.assertRaisesRegex(ValueError, r'2 characters'): seq.frequencies(chars={'b', 'ab'}) with self.assertRaisesRegex(TypeError, r'string.*NoneType'): seq.frequencies(chars={'a', None}) with self.assertRaisesRegex(ValueError, r'outside the range'): seq.frequencies(chars='\u1F30') with self.assertRaisesRegex(ValueError, r'outside the range'): seq.frequencies(chars={'c', '\u1F30'}) with self.assertRaisesRegex(TypeError, r'set.*int'): seq.frequencies(chars=42) def _compare_kmers_results(self, observed, expected): for obs, exp in itertools.zip_longest(observed, expected, fillvalue=None): self.assertEqual(obs, exp) def test_iter_kmers(self): seq = Sequence('GATTACA', positional_metadata={'quality': range(7)}) expected = [ Sequence('G', positional_metadata={'quality': [0]}), Sequence('A', positional_metadata={'quality': [1]}), Sequence('T', positional_metadata={'quality': [2]}), Sequence('T', positional_metadata={'quality': [3]}), Sequence('A', positional_metadata={'quality': [4]}), Sequence('C', positional_metadata={'quality': [5]}), Sequence('A', positional_metadata={'quality': [6]}) ] self._compare_kmers_results( seq.iter_kmers(1, overlap=False), expected) expected = [ Sequence('GA', positional_metadata={'quality': [0, 1]}), Sequence('TT', positional_metadata={'quality': [2, 3]}), Sequence('AC', positional_metadata={'quality': [4, 5]}) ] self._compare_kmers_results( seq.iter_kmers(2, overlap=False), expected) expected = [ Sequence('GAT', positional_metadata={'quality': [0, 1, 2]}), Sequence('TAC', positional_metadata={'quality': [3, 4, 5]}) ] self._compare_kmers_results( seq.iter_kmers(3, overlap=False), expected) expected = [ Sequence('GATTACA', positional_metadata={'quality': [0, 1, 2, 3, 4, 5, 6]}) ] self._compare_kmers_results( seq.iter_kmers(7, overlap=False), expected) expected = [] self._compare_kmers_results( seq.iter_kmers(8, overlap=False), expected) self.assertIs(type(seq.iter_kmers(1)), GeneratorType) def test_iter_kmers_no_positional_metadata(self): seq = Sequence('GATTACA') expected = [ Sequence('G'), Sequence('A'), Sequence('T'), Sequence('T'), Sequence('A'), Sequence('C'), Sequence('A') ] self._compare_kmers_results( seq.iter_kmers(1, overlap=False), expected) expected = [ Sequence('GA'), Sequence('TT'), Sequence('AC') ] self._compare_kmers_results( seq.iter_kmers(2, overlap=False), expected) expected = [ Sequence('GAT'), Sequence('TAC') ] self._compare_kmers_results( seq.iter_kmers(3, overlap=False), expected) expected = [ Sequence('GATTACA') ] self._compare_kmers_results( seq.iter_kmers(7, overlap=False), expected) expected = [] self._compare_kmers_results( seq.iter_kmers(8, overlap=False), expected) self.assertIs(type(seq.iter_kmers(1)), GeneratorType) def test_iter_kmers_with_overlap(self): seq = Sequence('GATTACA', positional_metadata={'quality': range(7)}) expected = [ Sequence('G', positional_metadata={'quality': [0]}), Sequence('A', positional_metadata={'quality': [1]}), Sequence('T', positional_metadata={'quality': [2]}), Sequence('T', positional_metadata={'quality': [3]}), Sequence('A', positional_metadata={'quality': [4]}), Sequence('C', positional_metadata={'quality': [5]}), Sequence('A', positional_metadata={'quality': [6]}) ] self._compare_kmers_results( seq.iter_kmers(1, overlap=True), expected) expected = [ Sequence('GA', positional_metadata={'quality': [0, 1]}), Sequence('AT', positional_metadata={'quality': [1, 2]}), Sequence('TT', positional_metadata={'quality': [2, 3]}), Sequence('TA', positional_metadata={'quality': [3, 4]}), Sequence('AC', positional_metadata={'quality': [4, 5]}), Sequence('CA', positional_metadata={'quality': [5, 6]}) ] self._compare_kmers_results( seq.iter_kmers(2, overlap=True), expected) expected = [ Sequence('GAT', positional_metadata={'quality': [0, 1, 2]}), Sequence('ATT', positional_metadata={'quality': [1, 2, 3]}), Sequence('TTA', positional_metadata={'quality': [2, 3, 4]}), Sequence('TAC', positional_metadata={'quality': [3, 4, 5]}), Sequence('ACA', positional_metadata={'quality': [4, 5, 6]}) ] self._compare_kmers_results( seq.iter_kmers(3, overlap=True), expected) expected = [ Sequence('GATTACA', positional_metadata={'quality': [0, 1, 2, 3, 4, 5, 6]}) ] self._compare_kmers_results( seq.iter_kmers(7, overlap=True), expected) expected = [] self._compare_kmers_results( seq.iter_kmers(8, overlap=True), expected) self.assertIs(type(seq.iter_kmers(1)), GeneratorType) def test_iter_kmers_with_overlap_no_positional_metadata(self): seq = Sequence('GATTACA') expected = [ Sequence('G'), Sequence('A'), Sequence('T'), Sequence('T'), Sequence('A'), Sequence('C'), Sequence('A') ] self._compare_kmers_results( seq.iter_kmers(1, overlap=True), expected) expected = [ Sequence('GA'), Sequence('AT'), Sequence('TT'), Sequence('TA'), Sequence('AC'), Sequence('CA') ] self._compare_kmers_results( seq.iter_kmers(2, overlap=True), expected) expected = [ Sequence('GAT'), Sequence('ATT'), Sequence('TTA'), Sequence('TAC'), Sequence('ACA') ] self._compare_kmers_results( seq.iter_kmers(3, overlap=True), expected) expected = [ Sequence('GATTACA') ] self._compare_kmers_results( seq.iter_kmers(7, overlap=True), expected) expected = [] self._compare_kmers_results( seq.iter_kmers(8, overlap=True), expected) self.assertIs(type(seq.iter_kmers(1)), GeneratorType) def test_iter_kmers_large_k(self): """Addressing issue 1723.""" # k larger than sequence length seq = Sequence('TATATA') expected = [] self._compare_kmers_results(seq.iter_kmers(10), expected) # k equal to sequence length expected = [Sequence('TATATA'), ] self._compare_kmers_results(seq.iter_kmers(6), expected) # with positional metadata seq = Sequence('GATTACA', positional_metadata={'quality': range(7)}) expected = [] self._compare_kmers_results(seq.iter_kmers(10), expected) def test_iter_kmers_invalid_k(self): seq = Sequence('GATTACA', positional_metadata={'quality': range(7)}) with self.assertRaises(ValueError): list(seq.iter_kmers(0)) with self.assertRaises(ValueError): list(seq.iter_kmers(-42)) def test_iter_kmers_invalid_k_no_positional_metadata(self): seq = Sequence('GATTACA') with self.assertRaises(ValueError): list(seq.iter_kmers(0)) with self.assertRaises(ValueError): list(seq.iter_kmers(-42)) def test_iter_kmers_different_sequences(self): seq = Sequence('HE..--..LLO', metadata={'id': 'hello', 'desc': 'gapped hello'}, positional_metadata={'quality': range(11)}) expected = [ Sequence('HE.', positional_metadata={'quality': [0, 1, 2]}, metadata={'id': 'hello', 'desc': 'gapped hello'}), Sequence('.--', positional_metadata={'quality': [3, 4, 5]}, metadata={'id': 'hello', 'desc': 'gapped hello'}), Sequence('..L', positional_metadata={'quality': [6, 7, 8]}, metadata={'id': 'hello', 'desc': 'gapped hello'}) ] self._compare_kmers_results(seq.iter_kmers(3, overlap=False), expected) def test_iter_kmers_different_sequences_no_positional_metadata(self): seq = Sequence('HE..--..LLO', metadata={'id': 'hello', 'desc': 'gapped hello'}) expected = [ Sequence('HE.', metadata={'id': 'hello', 'desc': 'gapped hello'}), Sequence('.--', metadata={'id': 'hello', 'desc': 'gapped hello'}), Sequence('..L', metadata={'id': 'hello', 'desc': 'gapped hello'}) ] self._compare_kmers_results(seq.iter_kmers(3, overlap=False), expected) def test_iter_kmers_empty_sequence(self): seq = Sequence('') expected = [] self._compare_kmers_results(seq.iter_kmers(3, overlap=False), expected) def test_iter_kmers_empty_sequence_with_positional_metadata(self): seq = Sequence('', positional_metadata={'quality': []}) expected = [] self._compare_kmers_results(seq.iter_kmers(3, overlap=False), expected) def test_kmer_frequencies_empty_sequence(self): seq = Sequence('') self.assertEqual(seq.kmer_frequencies(1), {}) self.assertEqual(seq.kmer_frequencies(1, overlap=False), {}) self.assertEqual(seq.kmer_frequencies(1, relative=True), {}) self.assertEqual(seq.kmer_frequencies(1, relative=True, overlap=False), {}) def test_kmer_frequencies(self): seq = Sequence('GATTACA', positional_metadata={'quality': range(7)}) # overlap = True expected = {'G': 1, 'A': 3, 'T': 2, 'C': 1} self.assertEqual(seq.kmer_frequencies(1, overlap=True), expected) expected = {'GAT': 1, 'ATT': 1, 'TTA': 1, 'TAC': 1, 'ACA': 1} self.assertEqual(seq.kmer_frequencies(3, overlap=True), expected) expected = {} self.assertEqual(seq.kmer_frequencies(8, overlap=True), expected) # overlap = False expected = {'GAT': 1, 'TAC': 1} self.assertEqual(seq.kmer_frequencies(3, overlap=False), expected) expected = {'GATTACA': 1} self.assertEqual(seq.kmer_frequencies(7, overlap=False), expected) expected = {} self.assertEqual(seq.kmer_frequencies(8, overlap=False), expected) def test_kmer_frequencies_relative(self): seq = Sequence('GATTACA', positional_metadata={'quality': range(7)}) # overlap = True expected = {'A': 3/7, 'C': 1/7, 'G': 1/7, 'T': 2/7} self.assertEqual(seq.kmer_frequencies(1, overlap=True, relative=True), expected) expected = {'GAT': 1/5, 'ATT': 1/5, 'TTA': 1/5, 'TAC': 1/5, 'ACA': 1/5} self.assertEqual(seq.kmer_frequencies(3, overlap=True, relative=True), expected) expected = {} self.assertEqual(seq.kmer_frequencies(8, overlap=True, relative=True), expected) # overlap = False expected = {'GAT': 1/2, 'TAC': 1/2} self.assertEqual(seq.kmer_frequencies(3, overlap=False, relative=True), expected) expected = {'GATTACA': 1.0} self.assertEqual(seq.kmer_frequencies(7, overlap=False, relative=True), expected) expected = {} self.assertEqual(seq.kmer_frequencies(8, overlap=False, relative=True), expected) def test_kmer_frequencies_floating_point_precision(self): # Test that a sequence having no variation in k-words yields a # frequency of exactly 1.0. Note that it is important to use # self.assertEqual here instead of self.assertAlmostEqual because we # want to test for exactly 1.0. A previous implementation of # Sequence.kmer_frequencies(relative=True) added (1 / num_words) for # each occurrence of a k-word to compute the frequencies (see # https://github.com/scikit-bio/scikit-bio/issues/801). In certain # cases, this yielded a frequency slightly less than 1.0 due to # roundoff error. The test case here uses a sequence with 10 characters # that are all identical and computes k-word frequencies with k=1. This # test case exposes the roundoff error present in the previous # implementation because there are 10 k-words (which are all # identical), so 1/10 added 10 times yields a number slightly less than # 1.0. This occurs because 1/10 cannot be represented exactly as a # floating point number. seq = Sequence('AAAAAAAAAA') self.assertEqual(seq.kmer_frequencies(1, relative=True), {'A': 1.0}) def test_find_with_regex(self): seq = Sequence('GATTACA', positional_metadata={'quality': range(7)}) pat = re.compile('(T+A)(CA)') obs = list(seq.find_with_regex(pat)) exp = [slice(2, 5), slice(5, 7)] self.assertEqual(obs, exp) self.assertIs(type(seq.find_with_regex(pat)), GeneratorType) def test_find_with_regex_string_as_input(self): seq = Sequence('GATTACA', positional_metadata={'quality': range(7)}) pat = '(T+A)(CA)' obs = list(seq.find_with_regex(pat)) exp = [slice(2, 5), slice(5, 7)] self.assertEqual(obs, exp) self.assertIs(type(seq.find_with_regex(pat)), GeneratorType) def test_find_with_regex_no_groups(self): seq = Sequence('GATTACA', positional_metadata={'quality': range(7)}) pat = re.compile('(FOO)') self.assertEqual(list(seq.find_with_regex(pat)), []) def test_find_with_regex_ignore_no_difference(self): seq = Sequence('..ABCDEFG..') pat = "([A-Z]+)" exp = [slice(2, 9)] self.assertEqual(list(seq.find_with_regex(pat)), exp) obs = seq.find_with_regex( pat, ignore=np.array([1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1], dtype=bool)) self.assertEqual(list(obs), exp) def test_find_with_regex_ignore(self): obs = Sequence('A..A..BBAAB.A..AB..A.').find_with_regex( "(A+)", ignore=np.array([0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1], dtype=bool)) self.assertEqual(list(obs), [slice(0, 4), slice(8, 10), slice(12, 16), slice(19, 20)]) def test_find_with_regex_ignore_index_array(self): obs = Sequence('A..A..BBAAB.A..AB..A.').find_with_regex( "(A+)", ignore=np.array([1, 2, 4, 5, 11, 13, 14, 17, 18, 20])) self.assertEqual(list(obs), [slice(0, 4), slice(8, 10), slice(12, 16), slice(19, 20)]) def test_iter_contiguous_index_array(self): s = Sequence("0123456789abcdef") for c in list, tuple, np.array, pd.Series: exp = [Sequence("0123"), Sequence("89ab")] obs = s.iter_contiguous(c([0, 1, 2, 3, 8, 9, 10, 11])) self.assertEqual(list(obs), exp) def test_iter_contiguous_boolean_vector(self): s = Sequence("0123456789abcdef") for c in list, tuple, np.array, pd.Series: exp = [Sequence("0123"), Sequence("89ab")] obs = s.iter_contiguous(c(([True] * 4 + [False] * 4) * 2)) self.assertEqual(list(obs), exp) def test_iter_contiguous_iterable_slices(self): def spaced_out(): yield slice(0, 4) yield slice(8, 12) def contiguous(): yield slice(0, 4) yield slice(4, 8) yield slice(12, 16) s = Sequence("0123456789abcdef") for c in (lambda x: x, list, tuple, lambda x: np.array(tuple(x)), lambda x: pd.Series(tuple(x))): exp = [Sequence("0123"), Sequence("89ab")] obs = s.iter_contiguous(c(spaced_out())) self.assertEqual(list(obs), exp) exp = [Sequence("01234567"), Sequence("cdef")] obs = s.iter_contiguous(c(contiguous())) self.assertEqual(list(obs), exp) def test_iter_contiguous_with_max_length(self): s = Sequence("0123456789abcdef") for c in list, tuple, np.array, pd.Series: exp = [Sequence("234"), Sequence("678"), Sequence("abc")] obs = s.iter_contiguous(c([True, False, True, True] * 4), min_length=3) self.assertEqual(list(obs), exp) exp = [Sequence("0"), Sequence("234"), Sequence("678"), Sequence("abc"), Sequence("ef")] obs1 = list(s.iter_contiguous(c([True, False, True, True] * 4), min_length=1)) obs2 = list(s.iter_contiguous(c([True, False, True, True] * 4))) self.assertEqual(obs1, obs2) self.assertEqual(obs1, exp) def test_iter_contiguous_with_invert(self): def spaced_out(): yield slice(0, 4) yield slice(8, 12) def contiguous(): yield slice(0, 4) yield slice(4, 8) yield slice(12, 16) s = Sequence("0123456789abcdef") for c in (lambda x: x, list, tuple, lambda x: np.array(tuple(x)), lambda x: pd.Series(tuple(x))): exp = [Sequence("4567"), Sequence("cdef")] obs = s.iter_contiguous(c(spaced_out()), invert=True) self.assertEqual(list(obs), exp) exp = [Sequence("89ab")] obs = s.iter_contiguous(c(contiguous()), invert=True) self.assertEqual(list(obs), exp) def test_to_indices_observed(self): # arbitrary sequence seq = Sequence('hello') obs_idx, obs_alp = seq.to_indices() exp_idx, exp_alp = np.array([1, 0, 2, 2, 3]), 'ehlo' npt.assert_equal(obs_idx, exp_idx) self.assertEqual(obs_alp, exp_alp) # return ASCII code points obs_idx, obs_alp = seq.to_indices(return_codes=True) exp_alp = np.frombuffer('ehlo'.encode('ascii'), dtype=np.uint8) npt.assert_equal(obs_idx, exp_idx) npt.assert_equal(obs_alp, exp_alp) # grammared sequence obs_idx, obs_alp = DNA('GAGCTC').to_indices() npt.assert_equal(obs_idx, np.array([2, 0, 2, 1, 3, 1])) self.assertEqual(obs_alp, 'ACGT') def test_to_indices_alphabet(self): # arbitrary sequence seq = Sequence('hello') obs = seq.to_indices('oleh') exp = np.array([3, 2, 1, 1, 0]) npt.assert_equal(obs, exp) # grammared sequence seq = DNA('GAGCTC') obs = seq.to_indices('ACGT') exp = np.array([2, 0, 2, 1, 3, 1]) npt.assert_equal(obs, exp) # alphabet has duplicates msg = 'Alphabet contains duplicated characters.' with self.assertRaisesRegex(ValueError, msg): Sequence('hello').to_indices('there') # non-ASCII alphabet msg = 'Alphabet cannot be encoded as single ASCII characters.' with self.assertRaisesRegex(ValueError, msg): Sequence('hello').to_indices('how are you'.split()) def test_to_indices_submat(self): # basic nucleotides sm = SubstitutionMatrix.identity('ACGT', 1, -2) obs = DNA('GAGCTC').to_indices(sm) exp = np.array([2, 0, 2, 1, 3, 1]) npt.assert_equal(obs, exp) # extended nucleotides (and not sorted) # ATGCSWRYKMBVHDN sm = SubstitutionMatrix.by_name('NUC.4.4') obs = DNA('GAGRCTC').to_indices(sm) exp = np.array([2, 0, 2, 6, 3, 1, 3]) npt.assert_equal(obs, exp) # non-ASCII alphabet sm = SubstitutionMatrix.identity('how are you'.split(), 1, -2) msg = ('Alphabet in the substitution matrix are not single ASCII ' 'characters.') with self.assertRaisesRegex(ValueError, msg): Sequence('hello').to_indices(sm) def test_to_indices_wildcard(self): # default wildcard seq = DNA('GAGRCTC') obs = seq.to_indices('ACGTN') exp = np.array([2, 0, 2, 4, 1, 3, 1]) npt.assert_equal(obs, exp) # non-default wildcard obs = seq.to_indices('ACGTN', wildcard='A') exp = np.array([2, 0, 2, 0, 1, 3, 1]) npt.assert_equal(obs, exp) # invalid wildcard msg = 'Wildcard character "X" is not in the alphabet.' with self.assertRaisesRegex(ValueError, msg): seq.to_indices('ACGTN', wildcard='X') msg = 'Wildcard must be a single ASCII character.' with self.assertRaisesRegex(ValueError, msg): seq.to_indices('ACGTN', wildcard='hi') with self.assertRaisesRegex(ValueError, msg): seq.to_indices('ACGTN', wildcard=chr(200)) def test_to_indices_masked(self): # gaps are automatically identified and masked obs_idx, obs_alp = DNA('GAG-CTC').to_indices() self.assertTrue(isinstance(obs_idx, np.ma.MaskedArray)) npt.assert_equal(obs_idx.data, [2, 0, 2, 255, 1, 3, 1]) npt.assert_equal(obs_idx.mask, [0, 0, 0, 1, 0, 0, 0]) self.assertEqual(obs_alp, 'ACGT') # force masking regardless of gap presence obs_idx, obs_alp = DNA('GAGCTC').to_indices(mask_gaps=True) self.assertTrue(isinstance(obs_idx, np.ma.MaskedArray)) npt.assert_equal(obs_idx.data, [2, 0, 2, 1, 3, 1]) npt.assert_equal(obs_idx.mask, [0, 0, 0, 0, 0, 0]) self.assertEqual(obs_alp, 'ACGT') # force not masking regardless of gap presence obs_idx, obs_alp = DNA('GAG-CTC').to_indices(mask_gaps=False) self.assertFalse(isinstance(obs_idx, np.ma.MaskedArray)) npt.assert_equal(obs_idx, [3, 1, 3, 0, 2, 4, 2]) self.assertEqual(obs_alp, '-ACGT') # gap character(s) are not defined msg = r'Gap character\(s\) are not defined for the sequence.' with self.assertRaisesRegex(ValueError, msg): Sequence('hello').to_indices(mask_gaps=True) # with alphabet obs = DNA('GAG-CTC').to_indices('ACGT') self.assertTrue(isinstance(obs, np.ma.MaskedArray)) npt.assert_equal(obs.data, [2, 0, 2, 255, 1, 3, 1]) npt.assert_equal(obs.mask, [0, 0, 0, 1, 0, 0, 0]) def test_copy_without_metadata(self): # shallow vs deep copy with sequence only should be equivalent for copy_method in copy.copy, copy.deepcopy: seq = Sequence('ACGT') seq_copy = copy_method(seq) self.assertEqual(seq_copy, seq) self.assertIsNot(seq_copy, seq) self.assertIsNot(seq_copy._bytes, seq._bytes) def test_copy_with_metadata_shallow(self): seq = Sequence('ACGT', metadata={'foo': [1]}, positional_metadata={'bar': [[], [], [], []], 'baz': [42, 42, 42, 42]}) seq.interval_metadata.add([(0, 3)], metadata={'gene': ['sagA']}) seq_copy = copy.copy(seq) self.assertEqual(seq_copy, seq) self.assertIsNot(seq_copy, seq) self.assertIsNot(seq_copy._bytes, seq._bytes) self.assertIsNot(seq_copy._metadata, seq._metadata) self.assertIsNot(seq_copy._positional_metadata, seq._positional_metadata) self.assertIsNot(seq_copy._positional_metadata.values, seq._positional_metadata.values) self.assertIs(seq_copy._metadata['foo'], seq._metadata['foo']) self.assertIs(seq_copy._positional_metadata.loc[0, 'bar'], seq._positional_metadata.loc[0, 'bar']) self.assertIsNot(seq_copy.interval_metadata, seq.interval_metadata) self.assertIsNot(seq_copy.interval_metadata._intervals[0], seq.interval_metadata._intervals[0]) self.assertIsNot(seq_copy.interval_metadata._intervals[0].metadata, seq.interval_metadata._intervals[0].metadata) self.assertIs( seq_copy.interval_metadata._intervals[0].metadata['gene'], seq.interval_metadata._intervals[0].metadata['gene']) seq_copy.metadata['foo'].append(2) seq_copy.metadata['foo2'] = 42 self.assertEqual(seq_copy.metadata, {'foo': [1, 2], 'foo2': 42}) self.assertEqual(seq.metadata, {'foo': [1, 2]}) seq_copy.positional_metadata.loc[0, 'bar'].append(1) seq_copy.positional_metadata.loc[0, 'baz'] = 43 assert_data_frame_almost_equal( seq_copy.positional_metadata, pd.DataFrame({'bar': [[1], [], [], []], 'baz': [43, 42, 42, 42]})) assert_data_frame_almost_equal( seq.positional_metadata, pd.DataFrame({'bar': [[1], [], [], []], 'baz': [42, 42, 42, 42]})) def test_copy_with_metadata_deep(self): seq = Sequence('ACGT', metadata={'foo': [1]}, positional_metadata={'bar': [[], [], [], []], 'baz': [42, 42, 42, 42]}) seq.interval_metadata.add([(0, 3)], metadata={'gene': ['sagA']}) seq_copy = copy.deepcopy(seq) self.assertEqual(seq_copy, seq) self.assertIsNot(seq_copy, seq) self.assertIsNot(seq_copy._bytes, seq._bytes) self.assertIsNot(seq_copy._metadata, seq._metadata) self.assertIsNot(seq_copy._positional_metadata, seq._positional_metadata) self.assertIsNot(seq_copy._positional_metadata.values, seq._positional_metadata.values) self.assertIsNot(seq_copy._metadata['foo'], seq._metadata['foo']) self.assertIsNot(seq_copy._positional_metadata.loc[0, 'bar'], seq._positional_metadata.loc[0, 'bar']) self.assertIsNot(seq_copy.interval_metadata, seq.interval_metadata) self.assertIsNot(seq_copy.interval_metadata._intervals[0], seq.interval_metadata._intervals[0]) self.assertIsNot(seq_copy.interval_metadata._intervals[0].metadata, seq.interval_metadata._intervals[0].metadata) self.assertIsNot( seq_copy.interval_metadata._intervals[0].metadata['gene'], seq.interval_metadata._intervals[0].metadata['gene']) seq_copy.metadata['foo'].append(2) seq_copy.metadata['foo2'] = 42 self.assertEqual(seq_copy.metadata, {'foo': [1, 2], 'foo2': 42}) self.assertEqual(seq.metadata, {'foo': [1]}) seq_copy.positional_metadata.loc[0, 'bar'].append(1) seq_copy.positional_metadata.loc[0, 'baz'] = 43 assert_data_frame_almost_equal( seq_copy.positional_metadata, pd.DataFrame({'bar': [[1], [], [], []], 'baz': [43, 42, 42, 42]})) assert_data_frame_almost_equal( seq.positional_metadata, pd.DataFrame({'bar': [[], [], [], []], 'baz': [42, 42, 42, 42]})) def test_copy_preserves_read_only_flag_on_bytes(self): seq = Sequence('ACGT') seq_copy = copy.copy(seq) with self.assertRaises(ValueError): seq_copy._bytes[0] = 'B' def test_deepcopy_memo_is_respected(self): # basic test to ensure deepcopy's memo is passed through to recursive # deepcopy calls seq = Sequence('ACGT', metadata={'foo': 'bar'}) memo = {} copy.deepcopy(seq, memo) self.assertGreater(len(memo), 2) def test_munge_to_index_array_valid_index_array(self): s = Sequence('123456') for c in list, tuple, np.array, pd.Series: exp = np.array([1, 2, 3], dtype=int) obs = s._munge_to_index_array(c([1, 2, 3])) npt.assert_equal(obs, exp) exp = np.array([1, 3, 5], dtype=int) obs = s._munge_to_index_array(c([1, 3, 5])) npt.assert_equal(obs, exp) def test_munge_to_index_array_invalid_index_array(self): s = Sequence("12345678") for c in list, tuple, np.array, pd.Series: with self.assertRaises(ValueError): s._munge_to_index_array(c([3, 2, 1])) with self.assertRaises(ValueError): s._munge_to_index_array(c([5, 6, 7, 2])) with self.assertRaises(ValueError): s._munge_to_index_array(c([0, 1, 2, 1])) def test_munge_to_index_array_valid_bool_array(self): s = Sequence('123456') for c in list, tuple, np.array, pd.Series: exp = np.array([2, 3, 5], dtype=int) obs = s._munge_to_index_array( c([False, False, True, True, False, True])) npt.assert_equal(obs, exp) exp = np.array([], dtype=int) obs = s._munge_to_index_array( c([False] * 6)) npt.assert_equal(obs, exp) exp = np.arange(6) obs = s._munge_to_index_array( c([True] * 6)) npt.assert_equal(obs, exp) def test_munge_to_index_array_invalid_bool_array(self): s = Sequence('123456') for c in (list, tuple, lambda x: np.array(x, dtype=bool), lambda x: pd.Series(x, dtype=bool)): with self.assertRaises(ValueError): s._munge_to_index_array(c([])) with self.assertRaises(ValueError): s._munge_to_index_array(c([True])) with self.assertRaises(ValueError): s._munge_to_index_array(c([True] * 10)) def test_munge_to_index_array_valid_iterable(self): s = Sequence('') def slices_only(): return (slice(i, i+1) for i in range(0, 10, 2)) def mixed(): return (slice(i, i+1) if i % 2 == 0 else i for i in range(10)) for c in (lambda x: x, list, tuple, lambda x: np.array(tuple(x)), lambda x: pd.Series(tuple(x))): exp = np.arange(10, dtype=int) obs = s._munge_to_index_array(c(mixed())) npt.assert_equal(obs, exp) exp = np.arange(10, step=2, dtype=int) obs = s._munge_to_index_array(c(slices_only())) npt.assert_equal(obs, exp) def test_munge_to_index_array_invalid_iterable(self): s = Sequence('') def bad1(): yield "r" yield [1, 2, 3] def bad2(): yield 1 yield 'str' def bad3(): yield False yield True yield 2 def bad4(): yield np.array([False, True]) yield slice(2, 5) for c in (lambda x: x, list, tuple, lambda x: np.array(tuple(x)), lambda x: pd.Series(tuple(x))): with self.assertRaises(TypeError): s._munge_to_index_array(bad1()) with self.assertRaises(TypeError): s._munge_to_index_array(bad2()) with self.assertRaises(TypeError): s._munge_to_index_array(bad3()) with self.assertRaises(TypeError): s._munge_to_index_array(bad4()) def test_munge_to_index_array_valid_string(self): seq = Sequence('ACGTACGT', positional_metadata={'introns': [False, True, True, False, False, True, False, False]}) npt.assert_equal(np.array([1, 2, 5]), seq._munge_to_index_array('introns')) seq.positional_metadata['exons'] = ~seq.positional_metadata['introns'] npt.assert_equal(np.array([0, 3, 4, 6, 7]), seq._munge_to_index_array('exons')) def test_munge_to_index_array_invalid_string(self): seq_str = 'ACGT' seq = Sequence(seq_str, positional_metadata={'quality': range(len(seq_str))}) with self.assertRaisesRegex(ValueError, r"No positional metadata associated with " "key 'introns'"): seq._munge_to_index_array('introns') with self.assertRaisesRegex(TypeError, r"Column 'quality' in positional metadata " "does not correspond to a boolean " "vector"): seq._munge_to_index_array('quality') def test_munge_to_bytestring_return_bytes(self): seq = Sequence('') m = 'dummy_method' str_inputs = ('', 'a', 'acgt') unicode_inputs = ('', 'a', 'acgt') byte_inputs = (b'', b'a', b'acgt') seq_inputs = (Sequence(''), Sequence('a'), Sequence('acgt')) all_inputs = str_inputs + unicode_inputs + byte_inputs + seq_inputs all_expected = [b'', b'a', b'acgt'] * 4 for input_, expected in zip(all_inputs, all_expected): observed = seq._munge_to_bytestring(input_, m) self.assertEqual(observed, expected) self.assertIs(type(observed), bytes) def test_munge_to_bytestring_unicode_out_of_ascii_range(self): seq = Sequence('') all_inputs = ('\x80', 'abc\x80', '\x80abc') for input_ in all_inputs: with self.assertRaisesRegex(UnicodeEncodeError, r"'ascii' codec can't encode character" r".*in position.*: ordinal not in" r" range\(128\)"): seq._munge_to_bytestring(input_, 'dummy_method') class TestDistance(TestSequenceBase): def test_mungeable_inputs_to_sequence(self): def metric(a, b): self.assertEqual(a, Sequence("abcdef")) self.assertEqual(b, Sequence("12bcef")) return 42.0 for constructor in self.sequence_kinds: seq1 = Sequence("abcdef") seq2 = constructor("12bcef") distance = seq1.distance(seq2, metric=metric) self.assertEqual(distance, 42.0) def test_mungeable_inputs_to_sequence_subclass(self): def metric(a, b): self.assertEqual(a, SequenceSubclass("abcdef")) self.assertEqual(b, SequenceSubclass("12bcef")) return -42.0 sequence_kinds = frozenset([ str, SequenceSubclass, lambda s: np.frombuffer(s.encode('ascii'), dtype='|S1'), lambda s: np.frombuffer(s.encode('ascii'), dtype=np.uint8)]) for constructor in sequence_kinds: seq1 = SequenceSubclass("abcdef") seq2 = constructor("12bcef") distance = seq1.distance(seq2, metric=metric) self.assertEqual(distance, -42.0) def test_sequence_type_mismatch(self): seq1 = SequenceSubclass("abcdef") seq2 = Sequence("12bcef") with self.assertRaisesRegex(TypeError, r'SequenceSubclass.*Sequence.*`distance`'): seq1.distance(seq2) with self.assertRaisesRegex(TypeError, r'Sequence.*SequenceSubclass.*`distance`'): seq2.distance(seq1) def test_munging_invalid_characters_to_self_type(self): with self.assertRaisesRegex(ValueError, r'Invalid characters.*X'): DNA("ACGT").distance("WXYZ") def test_munging_invalid_type_to_self_type(self): with self.assertRaises(TypeError): Sequence("ACGT").distance(42) def test_return_type_coercion(self): def metric(a, b): return 42 distance = Sequence('abc').distance('cba', metric=metric) self.assertIsInstance(distance, float) def test_invalid_return_type(self): def metric(a, b): return 'too far' with self.assertRaisesRegex(ValueError, r'string.*float'): Sequence('abc').distance('cba', metric=metric) def test_arbitrary_metric(self): def metric(x, y): return len(x) ** 2 + len(y) ** 2 seq1 = Sequence("12345678") seq2 = Sequence("1234") distance = seq1.distance(seq2, metric=metric) self.assertEqual(distance, 80.0) def test_scipy_hamming_metric_with_metadata(self): # test for #1254 seqs1 = [ Sequence("ACGT"), Sequence("ACGT", metadata={'id': 'abc'}), Sequence("ACGT", positional_metadata={'qual': range(4)}) ] seqs2 = [ Sequence("AAAA"), Sequence("AAAA", metadata={'id': 'def'}), Sequence("AAAA", positional_metadata={'qual': range(4, 8)}) ] for seqs in seqs1, seqs2: for seq1, seq2 in itertools.product(seqs, repeat=2): distance = seq1.distance(seq2, metric=scipy.spatial.distance.hamming) self.assertEqual(distance, 0.0) for seq1, seq2 in itertools.product(seqs1, seqs2): distance = seq1.distance(seq2, metric=scipy.spatial.distance.hamming) self.assertEqual(distance, 0.75) def test_default_metric_with_metadata(self): # test for #1254 seqs1 = [ Sequence("ACGT"), Sequence("ACGT", metadata={'id': 'abc'}), Sequence("ACGT", positional_metadata={'qual': range(4)}) ] seqs2 = [ Sequence("AAAA"), Sequence("AAAA", metadata={'id': 'def'}), Sequence("AAAA", positional_metadata={'qual': range(4, 8)}) ] for seqs in seqs1, seqs2: for seq1, seq2 in itertools.product(seqs, repeat=2): distance = seq1.distance(seq2) self.assertEqual(distance, 0.0) for seq1, seq2 in itertools.product(seqs1, seqs2): distance = seq1.distance(seq2) self.assertEqual(distance, 0.75) def test_default_metric_matches_hamming(self): seq1 = Sequence("abcdef") seq2 = Sequence("12bcef") seq_wrong = Sequence("abcdefghijklmnop") distance1 = seq1.distance(seq2) distance2 = skbio.sequence.distance.hamming(seq1, seq2) self.assertEqual(distance1, distance2) with self.assertRaises(ValueError): seq1.distance(seq_wrong) with self.assertRaises(ValueError): seq_wrong.distance(seq1) # NOTE: this must be a *separate* class for doctests only (no unit tests). nose # will not run the unit tests otherwise # TODO: check if this is still the case since nose is no longer used # # these doctests exercise the correct formatting of Sequence's repr in a # variety of situations. they are more extensive than the unit tests above # (TestSequence.test_repr) but cannot be relied upon for coverage (the unit # tests take care of this) class SequenceReprDoctests: r""">>> import pandas as pd >>> from skbio import Sequence Empty (minimal) sequence: >>> Sequence('') Sequence ------------- Stats: length: 0 ------------- Single character sequence: >>> Sequence('G') Sequence ------------- Stats: length: 1 ------------- 0 G Multicharacter sequence: >>> Sequence('ACGT') Sequence ------------- Stats: length: 4 ------------- 0 ACGT Full single line: >>> Sequence('A' * 60) Sequence ------------------------------------------------------------------- Stats: length: 60 ------------------------------------------------------------------- 0 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA Full single line with 1 character overflow: >>> Sequence('A' * 61) Sequence -------------------------------------------------------------------- Stats: length: 61 -------------------------------------------------------------------- 0 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 60 A Two full lines: >>> Sequence('T' * 120) Sequence -------------------------------------------------------------------- Stats: length: 120 -------------------------------------------------------------------- 0 TTTTTTTTTT TTTTTTTTTT TTTTTTTTTT TTTTTTTTTT TTTTTTTTTT TTTTTTTTTT 60 TTTTTTTTTT TTTTTTTTTT TTTTTTTTTT TTTTTTTTTT TTTTTTTTTT TTTTTTTTTT Two full lines with 1 character overflow: >>> Sequence('T' * 121) Sequence --------------------------------------------------------------------- Stats: length: 121 --------------------------------------------------------------------- 0 TTTTTTTTTT TTTTTTTTTT TTTTTTTTTT TTTTTTTTTT TTTTTTTTTT TTTTTTTTTT 60 TTTTTTTTTT TTTTTTTTTT TTTTTTTTTT TTTTTTTTTT TTTTTTTTTT TTTTTTTTTT 120 T Five full lines (maximum amount of information): >>> Sequence('A' * 300) Sequence --------------------------------------------------------------------- Stats: length: 300 --------------------------------------------------------------------- 0 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 60 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 120 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 180 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 240 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA Six lines starts "summarized" output: >>> Sequence('A' * 301) Sequence --------------------------------------------------------------------- Stats: length: 301 --------------------------------------------------------------------- 0 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 60 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA ... 240 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 300 A A naive algorithm would assume the width of the first column (noting position) based on the sequence's length alone. This can be off by one if the last position (in the last line) has a shorter width than the width calculated from the sequence's length. This test case ensures that only a single space is inserted between position 99960 and the first sequence chunk: >>> Sequence('A' * 100000) Sequence ----------------------------------------------------------------------- Stats: length: 100000 ----------------------------------------------------------------------- 0 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 60 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA ... 99900 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 99960 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA The largest sequence that can be displayed using six chunks per line: >>> Sequence('A' * 100020) Sequence ----------------------------------------------------------------------- Stats: length: 100020 ----------------------------------------------------------------------- 0 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 60 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA ... 99900 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 99960 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA A single character longer than the previous sequence causes the optimal number of chunks per line to be 5: >>> Sequence('A' * 100021) Sequence ------------------------------------------------------------- Stats: length: 100021 ------------------------------------------------------------- 0 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 50 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA ... 99950 AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA 100000 AAAAAAAAAA AAAAAAAAAA A Wide range of characters (locale-independent): >>> import string >>> Sequence((string.ascii_letters + string.punctuation + string.digits + ... 'a space') * 567) Sequence ----------------------------------------------------------------------- Stats: length: 57267 ----------------------------------------------------------------------- 0 abcdefghij klmnopqrst uvwxyzABCD EFGHIJKLMN OPQRSTUVWX YZ!"#$%&'( 60 )*+,-./:;< =>?@[\]^_` {|}~012345 6789a spac eabcdefghi jklmnopqrs ... 57180 opqrstuvwx yzABCDEFGH IJKLMNOPQR STUVWXYZ!" #$%&'()*+, -./:;<=>?@ 57240 [\]^_`{|}~ 0123456789 a space Supply horrendous metadata, positional, and interval metadata to exercise a variety of metadata formatting cases and rules. Sorting should be by type, then by value within each type (Python 3 doesn't allow sorting of mixed types): >>> metadata = { ... # str key, str value ... 'abc': 'some description', ... # int value ... 'foo': 42, ... # unsupported type (dict) value ... 'bar': {}, ... # int key, wrapped str (single line) ... 42: 'some words to test text wrapping and such... yada yada yada ' ... 'yada yada yada yada yada.', ... # bool key, wrapped str (multi-line) ... True: 'abc ' * 34, ... # float key, truncated str (too long) ... 42.5: 'abc ' * 200, ... # unsupported type (tuple) key, unsupported type (list) value ... ('foo', 'bar'): [1, 2, 3], ... # bytes key, single long word that wraps ... b'long word': 'abc' * 30, ... # truncated key (too long), None value ... 'too long of a key name to display in repr': None, ... # wrapped bytes value (has b'' prefix) ... 'bytes wrapped value': b'abcd' * 25, ... # float value ... 0.1: 99.9999, ... # bool value ... 43: False, ... # None key, complex value ... None: complex(-1.0, 0.0), ... # nested quotes ... 10: '"\'' ... } >>> positional_metadata = pd.DataFrame({ ... # str key, int list value ... 'foo': [1, 2, 3, 4], ... # float key, float list value ... 42.5: [2.5, 3.0, 4.2, -0.00001], ... # int key, object list value ... 42: [[], 4, 5, {}], ... # truncated key (too long), bool list value ... 'abc' * 90: [True, False, False, True], ... # None key ... None: range(4)}) >>> positional_metadata = positional_metadata.reindex( ... columns=['foo', 42.5, 42, 'abc' * 90, None]) >>> interval_metadata = IntervalMetadata(4) >>> _ = interval_metadata.add([(0, 2), (1, 3)], ... [(False, True), (False, False)], ... {'gene': 'p53'}) >>> _ = interval_metadata.add([(1, 4)]) >>> Sequence('ACGT', metadata=metadata, ... positional_metadata=positional_metadata, ... interval_metadata=interval_metadata) Sequence ----------------------------------------------------------------------- Metadata: None: (-1+0j) True: 'abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc abc ' b'long word': 'abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabca bcabcabcabcabcabcabcabcabcabcabcabcabc' 0.1: 99.9999 42.5: 10: '"\'' 42: 'some words to test text wrapping and such... yada yada yada yada yada yada yada yada.' 43: False 'abc': 'some description' 'bar': 'bytes wrapped value': b'abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdab cdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd abcdabcdabcdabcd' 'foo': 42 : None : Positional metadata: 'foo': 42.5: 42: : None: Interval metadata: 2 interval features Stats: length: 4 ----------------------------------------------------------------------- 0 ACGT """ pass if __name__ == "__main__": main() scikit-bio-0.6.2/skbio/sequence/tests/test_substitution.py000066400000000000000000000133211464262511300240260ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main import numpy as np from numpy.testing import assert_array_equal from skbio import SubstitutionMatrix class TestSubstitutionMatrix(TestCase): def setUp(self): self.alphabet = 'ACGTN' self.scores = np.array([ [1, -2, -2, -2, 0], [-2, 1, -2, -2, 0], [-2, -2, 1, -2, 0], [-2, -2, -2, 1, 0], [0, 0, 0, 0, 0]]) def test_init(self): # typical usage # alphabet becomes tuple of characters alphabet = tuple(self.alphabet) obs = SubstitutionMatrix(self.alphabet, self.scores) self.assertTupleEqual(obs.alphabet, alphabet) # alphabet is an alias of ids self.assertTupleEqual(obs.alphabet, obs.ids) # matrix is ndarray (this is important for alignment efficiency) self.assertTrue(isinstance(obs.scores, np.ndarray)) self.assertTupleEqual(obs.shape, (5, 5)) assert_array_equal(obs.scores, self.scores) # data type becomes float self.assertEqual(obs.dtype, np.float64) # scores is an alias of data assert_array_equal(obs.scores, obs.data) # character to index mapping self.assertDictEqual(obs._char_map, dict(zip( alphabet, range(len(alphabet))))) # alphabet can be encoded as ASCII characters self.assertTrue(obs._is_ascii) # hash table of ASCII characters self.assertTrue(isinstance(obs._char_hash, np.ndarray)) self.assertTrue(obs._char_hash.dtype.type is np.uint8) for i, char in enumerate(alphabet): self.assertEqual(i, obs._char_hash[ord(char)]) def test_init_alt_alphabet(self): # alternative formats of alphabet: list, dictionary (only keys matter), # and iterator alphabet = tuple(self.alphabet) for alp in (list(alphabet), dict.fromkeys(alphabet), iter(alphabet)): obs = SubstitutionMatrix(alp, self.scores) self.assertTupleEqual(obs.alphabet, alphabet) def test_init_alt_scores(self): # alternative format of scores: nested list obs = SubstitutionMatrix(self.alphabet, self.scores.tolist()) assert_array_equal(obs.scores, self.scores) # condensed matrix (less likely because diagonal is zero) obs = SubstitutionMatrix('ACGT', [-1] * 6) assert_array_equal(obs.scores, np.identity(4) - 1) def test_to_dict(self): mat = SubstitutionMatrix(self.alphabet, self.scores) obs = mat.to_dict() exp = {'A': {'A': 1., 'C': -2., 'G': -2., 'T': -2., 'N': 0.}, 'C': {'A': -2., 'C': 1., 'G': -2., 'T': -2., 'N': 0.}, 'G': {'A': -2., 'C': -2., 'G': 1., 'T': -2., 'N': 0.}, 'T': {'A': -2., 'C': -2., 'G': -2., 'T': 1., 'N': 0.}, 'N': {'A': 0., 'C': 0., 'G': 0., 'T': 0., 'N': 0.}} self.assertDictEqual(obs, exp) def test_from_dict(self): d = {'a': {'a': 1, 'b': 0, 'c': 0}, 'b': {'a': 0, 'b': 1, 'c': 0}, 'c': {'a': 0, 'b': 0, 'c': 1}} obs = SubstitutionMatrix.from_dict(d) self.assertTrue(isinstance(obs, SubstitutionMatrix)) self.assertTupleEqual(obs.alphabet, tuple('abc')) exp = np.array([[1., 0., 0.], [0., 1., 0.], [0., 0., 1.]]) assert_array_equal(obs.data, exp) # alphabet is inconsistent msg = ('The outer and inner layers of the dictionary must have the ' 'same set of keys.') d['d'] = {'a': 0, 'b': 0, 'c': 0} with self.assertRaisesRegex(ValueError, msg): SubstitutionMatrix.from_dict(d) del d['d'] d['a']['d'] = 2 with self.assertRaisesRegex(ValueError, msg): SubstitutionMatrix.from_dict(d) del d['a']['d'] # scores are not numbers d['a']['b'] = 'hello' with self.assertRaises(ValueError): SubstitutionMatrix.from_dict(d) d['a']['b'] = None with self.assertRaises(TypeError): SubstitutionMatrix.from_dict(d) def test_identity(self): obs = SubstitutionMatrix.identity('ACGT', 1, -2) self.assertTrue(isinstance(obs, SubstitutionMatrix)) self.assertTupleEqual(obs.alphabet, tuple('ACGT')) exp = np.array([[1., -2., -2., -2.], [-2., 1., -2., -2.], [-2., -2., 1., -2.], [-2., -2., -2., 1.]]) assert_array_equal(obs.scores, exp) def test_by_name(self): obs = SubstitutionMatrix.by_name('NUC.4.4') self.assertEqual(len(obs.alphabet), 15) self.assertEqual(obs['A', 'T'], -4) obs = SubstitutionMatrix.by_name('BLOSUM50') self.assertEqual(len(obs.alphabet), 24) self.assertEqual(obs['M', 'K'], -2) obs = SubstitutionMatrix.by_name('blosum50') self.assertEqual(len(obs.alphabet), 24) self.assertEqual(obs['M', 'K'], -2) msg = 'Substitution matrix "hello" does not exist.' with self.assertRaisesRegex(ValueError, msg): SubstitutionMatrix.by_name('hello') def test_get_names(self): obs = SubstitutionMatrix.get_names() self.assertTrue('NUC.4.4' in obs) self.assertTrue('PAM250' in obs) self.assertTrue('BLOSUM62' in obs) if __name__ == "__main__": main() scikit-bio-0.6.2/skbio/stats/000077500000000000000000000000001464262511300160255ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/stats/__init__.py000066400000000000000000000026711464262511300201440ustar00rootroot00000000000000r"""Multivariate Statistics (:mod:`skbio.stats`) ============================================ .. currentmodule:: skbio.stats This module provides various statistical methods to support the analyses of high-dimensional biological data to uncover the relationships among samples, features and metadata. Examples include distance matrix-based statistics, ordination methods, composition statistics, and data subsampling techniques. Distance matrix statistics -------------------------- .. autosummary:: :toctree: generated/ distance Ordination methods ------------------ .. autosummary:: :toctree: generated/ ordination Composition statistics ---------------------- .. autosummary:: :toctree: generated/ composition Data subsampling ---------------- .. autosummary:: :toctree: generated/ subsample_counts isubsample Other statistical methods ------------------------- .. autosummary:: :toctree: generated/ evolve gradient power """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from ._subsample import subsample_counts, isubsample __all__ = ["subsample_counts", "isubsample"] scikit-bio-0.6.2/skbio/stats/_misc.py000066400000000000000000000015701464262511300174740ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- def _pprint_strs( strs, max_chars=80, delimiter=", ", suffix="...", ): """Pretty-print an iterable of strings, truncating if necessary.""" # Adapted from http://stackoverflow.com/a/250373 joined_str = delimiter.join(repr(s) for s in strs) if len(joined_str) > max_chars: truncated = joined_str[: max_chars + 1].split(delimiter)[0:-1] joined_str = delimiter.join(truncated) if joined_str: joined_str += delimiter joined_str += suffix return joined_str scikit-bio-0.6.2/skbio/stats/_subsample.py000066400000000000000000000172171464262511300205410ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import sys from heapq import heappush, heappop from collections import defaultdict from copy import copy import numpy as np import scipy.sparse as sparse from skbio.util import get_rng from biom import subsample as biom_subsample def isubsample(items, maximum, minimum=1, buf_size=1000, bin_f=None): """Randomly subsample items from bins, without replacement. Randomly subsample items without replacement from an unknown number of input items, that may fall into an unknown number of bins. This method is intended for data that either a) cannot fit into memory or b) subsampling collections of arbitrary datatypes. Parameters ---------- items : Iterable The items to evaluate. maximum : unsigned int The maximum number of items per bin. minimum : unsigned int, optional The minimum number of items per bin. The default is 1. buf_size : unsigned int, optional The size of the random value buffer. This buffer holds the random values assigned to each item from items. In practice, it is unlikely that this value will need to change. Increasing it will require more resident memory, but potentially reduce the number of function calls made to the PRNG, whereas decreasing it will result in more function calls and lower memory overhead. The default is 1000. bin_f : function, optional Method to determine what bin an item is associated with. If None (the default), then all items are considered to be part of the same bin. This function will be provided with each entry in items, and must return a hashable value indicating the bin that that entry should be placed in. Returns ------- generator (bin, item) Raises ------ ValueError If ``minimum`` is > ``maximum``. ValueError If ``minimum`` < 1 or if ``maximum`` < 1. See Also -------- subsample_counts Notes ----- Randomly get up to ``maximum`` items for each bin. If the bin has less than ``maximum``, only those bins that have >= ``minimum`` items are returned. This method will at most hold ``maximum`` * N data, where N is the number of bins. All items associated to a bin have an equal probability of being retained. Examples -------- Randomly keep up to 2 sequences per sample from a set of demultiplexed sequences: >>> from skbio.stats import isubsample >>> import numpy as np >>> np.random.seed(123) >>> seqs = [('sampleA', 'AATTGG'), ... ('sampleB', 'ATATATAT'), ... ('sampleC', 'ATGGCC'), ... ('sampleB', 'ATGGCT'), ... ('sampleB', 'ATGGCG'), ... ('sampleA', 'ATGGCA')] >>> bin_f = lambda item: item[0] >>> for bin_, item in sorted(isubsample(seqs, 2, bin_f=bin_f)): ... print(bin_, item[1]) sampleA AATTGG sampleA ATGGCA sampleB ATATATAT sampleB ATGGCG sampleC ATGGCC Now, let's set the minimum to 2: >>> bin_f = lambda item: item[0] >>> for bin_, item in sorted(isubsample(seqs, 2, 2, bin_f=bin_f)): ... print(bin_, item[1]) sampleA AATTGG sampleA ATGGCA sampleB ATATATAT sampleB ATGGCG """ if minimum > maximum: raise ValueError("minimum cannot be > maximum.") if minimum < 1 or maximum < 1: raise ValueError("minimum and maximum must be > 0.") if bin_f is None: def bin_f(x): return True # buffer some random values random_values = np.random.randint(0, sys.maxsize, buf_size, dtype=np.int64) random_idx = 0 result = defaultdict(list) for item in items: bin_ = bin_f(item) heap = result[bin_] # pull a random value, and recompute random values if we've consumed # our buffer random_value = random_values[random_idx] random_idx += 1 if random_idx >= buf_size: random_values = np.random.randint(0, sys.maxsize, buf_size, dtype=np.int64) random_idx = 0 # push our item on to the heap and drop the smallest if necessary heappush(heap, (random_value, copy(item))) if len(heap) > maximum: heappop(heap) # yield items for bin_, heap in result.items(): if len(heap) < minimum: continue for _, item in heap: yield (bin_, item) def subsample_counts(counts, n, replace=False, seed=None): """Randomly subsample from a vector of counts, with or without replacement. Parameters ---------- counts : 1-D array_like Vector of counts (integers or floats) to randomly subsample from. n : int Number of items to subsample from `counts`. Must be less than or equal to the sum of `counts`. replace : bool, optional If ``True``, subsample with replacement. If ``False`` (the default), subsample without replacement. seed : int or np.random.Generator, optional A user-provided random seed or random generator instance. Returns ------- subsampled : ndarray Subsampled vector of counts where the sum of the elements equals `n` (i.e., ``subsampled.sum() == n``). Will have the same shape as `counts`. Raises ------ ValueError If `n` is less than zero or greater than the sum of `counts` when `replace=False`. EfficiencyWarning If the accelerated code isn't present or hasn't been compiled. See Also -------- isubsample skbio.diversity.alpha Notes ----- If subsampling is performed without replacement (``replace=False``), a copy of `counts` is returned if `n` is equal to the number of items in `counts`, as all items will be chosen from the original vector. If subsampling is performed with replacement (``replace=True``) and `n` is equal to the number of items in `counts`, the subsampled vector that is returned may not necessarily be the same vector as `counts`. Examples -------- Subsample 4 items (without replacement) from a vector of counts: >>> import numpy as np >>> from skbio.stats import subsample_counts >>> a = np.array([4, 5, 0, 2, 1]) >>> sub = subsample_counts(a, 4) >>> sub.sum() 4 >>> sub.shape (5,) Trying to subsample an equal number of items (without replacement) results in the same vector as our input: >>> subsample_counts([0, 3, 0, 1], 4) array([0, 3, 0, 1]) Subsample 5 items (with replacement): >>> sub = subsample_counts([1, 0, 1, 2, 2, 3, 0, 1], 5, replace=True) >>> sub.sum() 5 >>> sub.shape (8,) """ if n < 0: raise ValueError("n cannot be negative.") # cast to float as that's what the biom subsample method currently requires counts = np.asarray(counts, dtype=float) if counts.ndim != 1: raise ValueError("Only 1-D vectors are supported.") # csr_matrix will report ndim of 2 if vector counts = sparse.csr_matrix(counts) counts_sum = counts.sum() if n > counts_sum and not replace: raise ValueError( "Cannot subsample more items than exist in input " "counts vector when `replace=False`." ) rng = get_rng(seed) biom_subsample(counts, n, replace, rng) return np.atleast_1d(counts.astype(int).toarray().squeeze()) scikit-bio-0.6.2/skbio/stats/composition.py000066400000000000000000002053701464262511300207510ustar00rootroot00000000000000r"""Composition Statistics (:mod:`skbio.stats.composition`) ======================================================= .. currentmodule:: skbio.stats.composition This module provides functions for compositional data analysis. Many omics datasets are inherently compositional -- meaning that they are best interpreted as proportions or percentages rather than absolute counts. Formally, sample :math:`x` is a composition if :math:`\sum_{i=0}^D x_{i} = c` and :math:`x_{i} > 0`, :math:`1 \leq i \leq D` and :math:`c` is a real-valued constant and there are :math:`D` components (features) for this composition. In this module :math:`c=1`. Compositional data can be analyzed using **Aitchison geometry** [1]_. However, in this framework, standard real Euclidean operations such as addition and multiplication no longer apply. Only operations such as perturbation and power can be used to manipulate this data. This module allows two styles of manipulation of compositional data. Compositional data can be analyzed using perturbation and power operations, which can be useful for simulation studies. The alternative strategy is to transform compositional data into the real space. Right now, the centre log ratio transform (clr) and the isometric log ratio transform (ilr) [2]_ can be used to accomplish this. This transform can be useful for performing standard statistical methods such as parametric hypothesis testing, regression and more. The major caveat of using this framework is dealing with zeros. In Aitchison geometry, only compositions with non-zero components can be considered. The multiplicative replacement technique [3]_ can be used to substitute these zeros with small pseudocounts without introducing major distortions to the data. Functions --------- .. autosummary:: :toctree: closure multi_replace multiplicative_replacement perturb perturb_inv power inner clr clr_inv ilr ilr_inv alr alr_inv centralize vlr pairwise_vlr tree_basis ancom sbp_basis dirmult_ttest References ---------- .. [1] V. Pawlowsky-Glahn, J. J. Egozcue, R. Tolosana-Delgado (2015), Modeling and Analysis of Compositional Data, Wiley, Chichester, UK .. [2] J. J. Egozcue., "Isometric Logratio Transformations for Compositional Data Analysis" Mathematical Geology, 35.3 (2003) .. [3] J. A. Martin-Fernandez, "Dealing With Zeros and Missing Values in Compositional Data Sets Using Nonparametric Imputation", Mathematical Geology, 35.3 (2003) """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from warnings import warn, simplefilter import numpy as np import pandas as pd import scipy.stats from scipy.sparse import coo_matrix from scipy.stats import t, gmean from statsmodels.stats.weightstats import CompareMeans from skbio.stats.distance import DistanceMatrix from skbio.util import find_duplicates from skbio.util._misc import get_rng from skbio.util._warning import _warn_deprecated from statsmodels.stats.multitest import multipletests as sm_multipletests def closure(mat): """Perform closure to ensure that all elements add up to 1. Parameters ---------- mat : array_like of shape (n_compositions, n_components) A matrix of proportions. Returns ------- ndarray of shape (n_compositions, n_components) The matrix where all of the values are non-zero and each composition (row) adds up to 1. Raises ------ ValueError If any values are negative. ValueError If the matrix has more than two dimensions. ValueError If there is a row that has all zeros. Examples -------- >>> import numpy as np >>> from skbio.stats.composition import closure >>> X = np.array([[2, 2, 6], [4, 4, 2]]) >>> closure(X) array([[ 0.2, 0.2, 0.6], [ 0.4, 0.4, 0.2]]) """ mat = np.atleast_2d(mat) if np.any(mat < 0): raise ValueError("Cannot have negative proportions") if mat.ndim > 2: raise ValueError("Input matrix can only have two dimensions or less") if np.all(mat == 0, axis=1).sum() > 0: raise ValueError("Input matrix cannot have rows with all zeros") mat = mat / mat.sum(axis=1, keepdims=True) return mat.squeeze() def multi_replace(mat, delta=None): r"""Replace all zeros with small non-zero values. It uses the multiplicative replacement strategy [1]_, replacing zeros with a small positive :math:`\delta` and ensuring that the compositions still add up to 1. Parameters ---------- mat : array_like of shape (n_compositions, n_components) A matrix of proportions. delta : float, optional A small number to be used to replace zeros. If not specified, the default value is :math:`\delta = \frac{1}{N^2}` where :math:`N` is the number of components. Returns ------- ndarray of shape (n_compositions, n_components) The matrix where all of the values are non-zero and each composition (row) adds up to 1. Raises ------ ValueError If negative proportions are created due to a large ``delta``. Notes ----- This method will result in negative proportions if a large delta is chosen. References ---------- .. [1] J. A. Martin-Fernandez. "Dealing With Zeros and Missing Values in Compositional Data Sets Using Nonparametric Imputation" Examples -------- >>> import numpy as np >>> from skbio.stats.composition import multi_replace >>> X = np.array([[.2, .4, .4, 0],[0, .5, .5, 0]]) >>> multi_replace(X) array([[ 0.1875, 0.375 , 0.375 , 0.0625], [ 0.0625, 0.4375, 0.4375, 0.0625]]) """ mat = closure(mat) z_mat = mat == 0 num_feats = mat.shape[-1] tot = z_mat.sum(axis=-1, keepdims=True) if delta is None: delta = (1.0 / num_feats) ** 2 zcnts = 1 - tot * delta if np.any(zcnts) < 0: raise ValueError( "The multiplicative replacement created negative " "proportions. Consider using a smaller `delta`." ) mat = np.where(z_mat, delta, zcnts * mat) return mat.squeeze() def multiplicative_replacement(mat, delta=None): r"""Replace all zeros with small non-zero values. This function is an alias for ``multi_replace``. Parameters ---------- mat : array_like of shape (n_compositions, n_components) A matrix of proportions. delta : float, optional A small number to be used to replace zeros. If not specified, the default value is :math:`\delta = \frac{1}{N^2}` where :math:`N` is the number of components. Returns ------- ndarray of shape (n_compositions, n_components) The matrix where all of the values are non-zero and each composition (row) adds up to 1. Raises ------ ValueError If negative proportions are created due to a large ``delta``. Warnings -------- ``multiplicative_replacement`` is deprecated as of ``0.6.0`` in favor of ``multi_replace``. See Also -------- multi_replace """ _warn_deprecated(multiplicative_replacement, "0.6.0") return multi_replace(mat, delta) def perturb(x, y): r"""Perform the perturbation operation. This operation is defined as: .. math:: x \oplus y = C[x_1 y_1, \ldots, x_D y_D] :math:`C[x]` is the closure operation defined as: .. math:: C[x] = \left[\frac{x_1}{\sum_{i=1}^{D} x_i},\ldots, \frac{x_D}{\sum_{i=1}^{D} x_i} \right] for some :math:`D` dimensional real vector :math:`x` and :math:`D` is the number of components for every composition. Parameters ---------- x : array_like of shape (n_compositions, n_components) A matrix of proportions. y : array_like of shape (n_compositions, n_components) A matrix of proportions. Returns ------- ndarray of shape (n_compositions, n_components) A matrix of proportions where all of the values are non-zero and each composition (row) adds up to 1. Examples -------- >>> import numpy as np >>> from skbio.stats.composition import perturb Consider a very simple environment with only three species. The species in the environment are evenly distributed and their proportions are equal: >>> before = np.array([1/3, 1/3, 1/3]) Suppose that an antibiotic kills off half of the population for the first two species, but doesn't harm the third species. Then the perturbation vector would be as follows: >>> after = np.array([1/2, 1/2, 1]) And the resulting perturbation would be: >>> perturb(before, after) array([ 0.25, 0.25, 0.5 ]) """ x, y = closure(x), closure(y) return closure(x * y) def perturb_inv(x, y): r"""Perform the inverse perturbation operation. This operation is defined as: .. math:: x \ominus y = C[x_1 y_1^{-1}, \ldots, x_D y_D^{-1}] :math:`C[x]` is the closure operation defined as: .. math:: C[x] = \left[\frac{x_1}{\sum_{i=1}^{D} x_i},\ldots, \frac{x_D}{\sum_{i=1}^{D} x_i} \right] for some :math:`D` dimensional real vector :math:`x` and :math:`D` is the number of components for every composition. Parameters ---------- x : array_like of shape (n_compositions, n_components) A matrix of proportions. y : array_like of shape (n_compositions, n_components) A matrix of proportions. Returns ------- ndarray of shape (n_compositions, n_components) A matrix of proportions where all of the values are non-zero and each composition (row) adds up to 1. Examples -------- >>> import numpy as np >>> from skbio.stats.composition import perturb_inv >>> x = np.array([.1, .3, .4, .2]) >>> y = np.array([1/6, 1/6, 1/3, 1/3]) >>> perturb_inv(x, y) array([ 0.14285714, 0.42857143, 0.28571429, 0.14285714]) """ x, y = closure(x), closure(y) return closure(x / y) def power(x, a): r"""Perform the power operation. This operation is defined as follows: .. math:: `x \odot a = C[x_1^a, \ldots, x_D^a] :math:`C[x]` is the closure operation defined as: .. math:: C[x] = \left[\frac{x_1}{\sum_{i=1}^{D} x_i},\ldots, \frac{x_D}{\sum_{i=1}^{D} x_i} \right] for some :math:`D` dimensional real vector :math:`x` and :math:`D` is the number of components for every composition. Parameters ---------- x : array_like of shape (n_compositions, n_components) A matrix of proportions. a : float A scalar exponent. Returns ------- ndarray of shape (n_compositions, n_components) The matrix where all of the values are non-zero and each composition (row) adds up to 1. Examples -------- >>> import numpy as np >>> from skbio.stats.composition import power >>> x = np.array([.1, .3, .4, .2]) >>> power(x, .1) array([ 0.23059566, 0.25737316, 0.26488486, 0.24714631]) """ x = closure(x) return closure(x**a).squeeze() def inner(x, y): r"""Calculate the Aitchson inner product. This inner product is defined as follows: .. math:: \langle x, y \rangle_a = \frac{1}{2D} \sum\limits_{i=1}^{D} \sum\limits_{j=1}^{D} \ln\left(\frac{x_i}{x_j}\right) \ln\left(\frac{y_i}{y_j}\right) Parameters ---------- x : array_like of shape (n_compositions, n_components) A matrix of proportions. y : array_like of shape (n_compositions, n_components) A matrix of proportions. Returns ------- ndarray or scalar of shape (n_compositions, n_compositions) Inner product result. Examples -------- >>> import numpy as np >>> from skbio.stats.composition import inner >>> x = np.array([.1, .3, .4, .2]) >>> y = np.array([.2, .4, .2, .2]) >>> inner(x, y) # doctest: +ELLIPSIS 0.2107852473... """ x = closure(x) y = closure(y) a, b = clr(x), clr(y) return a.dot(b.T) def clr(mat): r"""Perform centre log ratio transformation. This function transforms compositions from Aitchison geometry to the real space. The :math:`clr` transform is both an isometry and an isomorphism defined on the following spaces: .. math:: clr: S^D \rightarrow U where :math:`U= \{x :\sum\limits_{i=1}^D x = 0 \; \forall x \in \mathbb{R}^D\}` It is defined for a composition :math:`x` as follows: .. math:: clr(x) = \ln\left[\frac{x_1}{g_m(x)}, \ldots, \frac{x_D}{g_m(x)}\right] where :math:`g_m(x) = (\prod\limits_{i=1}^{D} x_i)^{1/D}` is the geometric mean of :math:`x`. Parameters ---------- mat : array_like of shape (n_compositions, n_components) A matrix of proportions. Returns ------- ndarray of shape (n_compositions, n_components) Clr-transformed matrix. Examples -------- >>> import numpy as np >>> from skbio.stats.composition import clr >>> x = np.array([.1, .3, .4, .2]) >>> clr(x) array([-0.79451346, 0.30409883, 0.5917809 , -0.10136628]) """ mat = closure(mat) lmat = np.log(mat) gm = lmat.mean(axis=-1, keepdims=True) return (lmat - gm).squeeze() def clr_inv(mat): r"""Perform inverse centre log ratio transformation. This function transforms compositions from the real space to Aitchison geometry. The :math:`clr^{-1}` transform is both an isometry, and an isomorphism defined on the following spaces: .. math:: clr^{-1}: U \rightarrow S^D where :math:`U= \{x :\sum\limits_{i=1}^D x = 0 \; \forall x \in \mathbb{R}^D\}` This transformation is defined as follows: .. math:: clr^{-1}(x) = C[\exp( x_1, \ldots, x_D)] Parameters ---------- mat : array_like of shape (n_compositions, n_components) A matrix of clr-transformed data. Returns ------- ndarray of shape (n_compositions, n_components) Inverse clr-transformed matrix. Examples -------- >>> import numpy as np >>> from skbio.stats.composition import clr_inv >>> x = np.array([.1, .3, .4, .2]) >>> clr_inv(x) array([ 0.21383822, 0.26118259, 0.28865141, 0.23632778]) """ # for numerical stability (aka softmax trick) mat = np.atleast_2d(mat) emat = np.exp(mat - mat.max(axis=-1, keepdims=True)) return closure(emat) def ilr(mat, basis=None, check=True): r"""Perform isometric log ratio transformation. This function transforms compositions from Aitchison simplex to the real space. The :math:`ilr` transform is both an isometry, and an isomorphism defined on the following spaces: .. math:: ilr: S^D \rightarrow \mathbb{R}^{D-1} The ilr transformation is defined as follows: .. math:: ilr(x) = [\langle x, e_1 \rangle_a, \ldots, \langle x, e_{D-1} \rangle_a] where :math:`[e_1,\ldots,e_{D-1}]` is an orthonormal basis in the simplex. If an orthornormal basis isn't specified, the J. J. Egozcue orthonormal basis derived from Gram-Schmidt orthogonalization will be used by default. Parameters ---------- mat : array_like of shape (n_compositions, n_components) A matrix of proportions. basis : ndarray or sparse matrix, optional Orthonormal basis for Aitchison simplex. Defaults to J. J. Egozcue orthonormal basis. check : bool Check to see if basis is orthonormal. Returns ------- ndarray of shape (n_compositions, n_components - 1) Ilr-transformed matrix. Examples -------- >>> import numpy as np >>> from skbio.stats.composition import ilr >>> x = np.array([.1, .3, .4, .2]) >>> ilr(x) array([-0.7768362 , -0.68339802, 0.11704769]) Notes ----- If the ``basis`` parameter is specified, it is expected to be a basis in the Aitchison simplex. If there are :math:`D - 1` elements specified in ``mat``, then the dimensions of the basis needs be :math:`(D-1) \times D`, where rows represent basis vectors, and the columns represent proportions. """ mat = closure(mat) if basis is None: d = mat.shape[-1] basis = _gram_schmidt_basis(d) # dimension (d-1) x d else: if len(basis.shape) != 2: raise ValueError( "Basis needs to be a 2D matrix, " "not a %dD matrix." % (len(basis.shape)) ) if check: _check_orthogonality(basis) return clr(mat) @ basis.T def ilr_inv(mat, basis=None, check=True): r"""Perform inverse isometric log ratio transform. This function transforms compositions from the real space to Aitchison geometry. The :math:`ilr^{-1}` transform is both an isometry, and an isomorphism defined on the following spaces: .. math:: ilr^{-1}: \mathbb{R}^{D-1} \rightarrow S^D The inverse ilr transformation is defined as follows: .. math:: ilr^{-1}(x) = \bigoplus\limits_{i=1}^{D-1} x \odot e_i where :math:`[e_1,\ldots, e_{D-1}]` is an orthonormal basis in the simplex. If an orthonormal basis isn't specified, the J. J. Egozcue orthonormal basis derived from Gram-Schmidt orthogonalization will be used by default. Parameters ---------- mat : array_like of shape (n_compositions, n_components - 1) A matrix of ilr-transformed data. basis : ndarray or sparse matrix, optional Orthonormal basis for Aitchison simplex. Defaults to J. J. Egozcue orthonormal basis. check : bool Check to see if basis is orthonormal. Returns ------- ndarray of shape (n_compositions, n_components) Inverse ilr-transformed matrix. Examples -------- >>> import numpy as np >>> from skbio.stats.composition import ilr >>> x = np.array([.1, .3, .6,]) >>> ilr_inv(x) array([ 0.34180297, 0.29672718, 0.22054469, 0.14092516]) Notes ----- If the ``basis`` parameter is specified, it is expected to be a basis in the Aitchison simplex. If there are :math:`D - 1` elements specified in ``mat``, then the dimensions of the basis needs be :math:`(D-1) \times D`, where rows represent basis vectors, and the columns represent proportions. """ mat = np.atleast_2d(mat) if basis is None: # dimension d-1 x d basis basis = _gram_schmidt_basis(mat.shape[-1] + 1) else: if len(basis.shape) != 2: raise ValueError( "Basis needs to be a 2D matrix, " "not a %dD matrix." % (len(basis.shape)) ) if check: _check_orthogonality(basis) # this is necessary, since the clr function # performs np.squeeze() basis = np.atleast_2d(basis) return clr_inv(mat @ basis) def alr(mat, denominator_idx=0): r"""Perform additive log ratio transformation. This function transforms compositions from a D-part Aitchison simplex to a non-isometric real space of D-1 dimensions. The argument ``denominator_col`` defines the index of the column used as the common denominator. The :math:`alr` transformed data are amenable to multivariate analysis as long as statistics don't involve distances. .. math:: alr: S^D \rightarrow \mathbb{R}^{D-1} The alr transformation is defined as follows .. math:: alr(x) = \left[ \ln \frac{x_1}{x_D}, \ldots, \ln \frac{x_{D-1}}{x_D} \right] where :math:`D` is the index of the part used as common denominator. Parameters ---------- mat : array_like of shape (n_compositions, n_components) A matrix of proportions. denominator_idx : int The index of the column (2-D matrix) or position (vector) of ``mat`` which should be used as the reference composition. Default is 0 which specifies the first column or position. Returns ------- ndarray of shape (n_compositions, n_components - 1) Alr-transformed data projected in a non-isometric real space of :math:`D - 1` dimensions for a *D*-parts composition. Examples -------- >>> import numpy as np >>> from skbio.stats.composition import alr >>> x = np.array([.1, .3, .4, .2]) >>> alr(x) array([ 1.09861229, 1.38629436, 0.69314718]) """ mat = closure(mat) if mat.ndim == 2: mat_t = mat.T numerator_idx = list(range(0, mat_t.shape[0])) del numerator_idx[denominator_idx] lr = np.log(mat_t[numerator_idx, :] / mat_t[denominator_idx, :]).T elif mat.ndim == 1: numerator_idx = list(range(0, mat.shape[0])) del numerator_idx[denominator_idx] lr = np.log(mat[numerator_idx] / mat[denominator_idx]) else: raise ValueError("mat must be either 1D or 2D") return lr def alr_inv(mat, denominator_idx=0): r"""Perform inverse additive log ratio transform. This function transforms compositions from the non-isometric real space of alrs to Aitchison geometry. .. math:: alr^{-1}: \mathbb{R}^{D-1} \rightarrow S^D The inverse alr transformation is defined as follows: .. math:: alr^{-1}(x) = C[exp([y_1, y_2, ..., y_{D-1}, 0])] where :math:`C[x]` is the closure operation defined as .. math:: C[x] = \left[\frac{x_1}{\sum_{i=1}^{D} x_i},\ldots, \frac{x_D}{\sum_{i=1}^{D} x_i} \right] for some :math:`D` dimensional real vector :math:`x` and :math:`D` is the number of components for every composition. Parameters ---------- mat : array_like of shape (n_compositions, n_components - 1) A matrix of alr-transformed data. denominator_idx : int The index of the column (2-D matrix) or position (vector) of ``mat`` which should be used as the reference composition. Default is 0 which specifies the first column or position. Returns ------- ndarray of shape (n_compositions, n_components) Inverse alr-transformed matrix or vector where rows sum to 1. Examples -------- >>> import numpy as np >>> from skbio.stats.composition import alr, alr_inv >>> x = np.array([.1, .3, .4, .2]) >>> alr_inv(alr(x)) array([ 0.1, 0.3, 0.4, 0.2]) """ mat = np.array(mat) if mat.ndim == 2: mat_idx = np.insert(mat, denominator_idx, np.repeat(0, mat.shape[0]), axis=1) comp = np.zeros(mat_idx.shape) comp[:, denominator_idx] = 1 / (np.exp(mat).sum(axis=1) + 1) numerator_idx = list(range(0, comp.shape[1])) del numerator_idx[denominator_idx] for i in numerator_idx: comp[:, i] = comp[:, denominator_idx] * np.exp(mat_idx[:, i]) elif mat.ndim == 1: mat_idx = np.insert(mat, denominator_idx, 0, axis=0) comp = np.zeros(mat_idx.shape) comp[denominator_idx] = 1 / (np.exp(mat).sum(axis=0) + 1) numerator_idx = list(range(0, comp.shape[0])) del numerator_idx[denominator_idx] for i in numerator_idx: comp[i] = comp[denominator_idx] * np.exp(mat_idx[i]) else: raise ValueError("mat must be either 1D or 2D") return comp def centralize(mat): r"""Center data around its geometric average. Parameters ---------- mat : array_like of shape (n_compositions, n_components) A matrix of proportions. Returns ------- ndarray of shape (n_compositions, n_components) Centered composition matrix. Examples -------- >>> import numpy as np >>> from skbio.stats.composition import centralize >>> X = np.array([[.1, .3, .4, .2], [.2, .2, .2, .4]]) >>> centralize(X) array([[ 0.17445763, 0.30216948, 0.34891526, 0.17445763], [ 0.32495488, 0.18761279, 0.16247744, 0.32495488]]) """ mat = closure(mat) cen = gmean(mat, axis=0) return perturb_inv(mat, cen) def _vlr(x, y, ddof): r"""Calculate variance log ratio. Parameters ---------- x : array_like of shape (n_components,) A vector of proportions. y : array_like of shape (n_components,) A vector of proportions. ddof : int Degrees of freedom. Returns ------- float Variance log ratio value. """ # Log transformation x = np.log(x) y = np.log(y) # Variance log ratio return np.var(x - y, ddof=ddof) def _robust_vlr(x, y, ddof): r"""Calculate variance log ratio while masking zeros. Parameters ---------- x : array_like of shape (n_components,) A vector of proportions. y : array_like of shape (n_components,) A vector of proportions. ddof : int Degrees of freedom. Returns ------- float Variance log ratio value. """ # Mask zeros x = np.ma.masked_array(x, mask=x == 0) y = np.ma.masked_array(y, mask=y == 0) # Log transformation x = np.ma.log(x) y = np.ma.log(y) # Variance log ratio return np.ma.var(x - y, ddof=ddof) def vlr(x, y, ddof=1, robust=False): r"""Calculate variance log ratio. Parameters ---------- x : array_like of shape (n_components,) A vector of proportions. y : array_like of shape (n_components,) A vector of proportions. ddof : int Degrees of freedom. robust : bool Whether to mask zeros at the cost of performance. Returns ------- float Variance log ratio value. Notes ----- Variance log ratio was described in [1]_ and [2]_. References ---------- .. [1] V. Lovell D, Pawlowsky-Glahn V, Egozcue JJ, Marguerat S, Bähler J (2015) Proportionality: A Valid Alternative to Correlation for Relative Data. PLoS Comput Biol 11(3): e1004075. https://doi.org/10.1371/journal.pcbi.1004075 .. [2] Erb, I., Notredame, C. How should we measure proportionality on relative gene expression data?. Theory Biosci. 135, 21-36 (2016). https://doi.org/10.1007/s12064-015-0220-8 Examples -------- >>> import numpy as np >>> from skbio.stats.composition import vlr >>> x = np.exp([1, 2, 3]) >>> y = np.exp([2, 3, 4]) >>> vlr(x, y) # no zeros 0.0 """ # Convert array_like to numpy array x = closure(x) y = closure(y) # Set up input and parameters kwargs = { "x": x, "y": y, "ddof": ddof, } # Run backend function if robust: return _robust_vlr(**kwargs) else: return _vlr(**kwargs) def _pairwise_vlr(mat, ddof): r"""Perform pairwise variance log ratio transformation. Parameters ---------- mat : array_like of shape (n_compositions, n_components) A matrix of proportions. ddof : int Degrees of freedom. Returns ------- ndarray of shape (n_compositions, n_compositions) Distance matrix of variance log ratio values. """ # Log Transform X_log = np.log(mat) # Variance Log Ratio covariance = np.cov(X_log.T, ddof=ddof) diagonal = np.diagonal(covariance) vlr_data = -2 * covariance + diagonal[:, np.newaxis] + diagonal return vlr_data def pairwise_vlr(mat, ids=None, ddof=1, robust=False, validate=True): r"""Perform pairwise variance log ratio transformation. Parameters ---------- mat : array_like of shape (n_compositions, n_components) A matrix of proportions. ids : array_like of str of shape (n_components,) Component names. ddof : int Degrees of freedom. robust : bool Whether to mask zeros at the cost of performance. validate : bool Whether to validate the distance matrix after construction. Returns ------- skbio.DistanceMatrix if validate=True Distance matrix of variance log ratio values. skbio.DissimilarityMatrix if validate=False Dissimilarity matrix of variance log ratio values. Notes ----- Pairwise variance log ratio transformation was described in [1]_ and [2]_. References ---------- .. [1] V. Lovell D, Pawlowsky-Glahn V, Egozcue JJ, Marguerat S, Bähler J (2015) Proportionality: A Valid Alternative to Correlation for Relative Data. PLoS Comput Biol 11(3): e1004075. https://doi.org/10.1371/journal.pcbi.1004075 .. [2] Erb, I., Notredame, C. How should we measure proportionality on relative gene expression data?. Theory Biosci. 135, 21-36 (2016). https://doi.org/10.1007/s12064-015-0220-8 Examples -------- >>> import numpy as np >>> from skbio.stats.composition import pairwise_vlr >>> mat = np.array([np.exp([1, 2, 2]), ... np.exp([2, 3, 6]), ... np.exp([2, 3, 12])]).T >>> dism = pairwise_vlr(mat) >>> dism.redundant_form() array([[ 0., 3., 27.], [ 3., 0., 12.], [ 27., 12., 0.]]) """ # Mask zeros mat = closure(mat.astype(np.float64)) # Set up input and parameters kwargs = { "mat": mat, "ddof": ddof, } # Variance log ratio if robust: raise NotImplementedError("Pairwise version of robust VLR not implemented.") else: vlr_data = _pairwise_vlr(**kwargs) # Return distance matrix if validate: vlr_data = 0.5 * (vlr_data + vlr_data.T) return DistanceMatrix(vlr_data, ids=ids) # Return dissimilarity matrix else: return DistanceMatrix(vlr_data, ids=ids, validate=False) def tree_basis(tree): r"""Calculate the sparse representation of an ilr basis from a tree. This computes an orthonormal basis specified from a bifurcating tree. Parameters ---------- tree : skbio.TreeNode Input bifurcating tree. Must be strictly bifurcating (i.e. every internal node needs to have exactly two children). This is used to specify the ilr basis. Returns ------- scipy.sparse.coo_matrix The ilr basis required to perform the ilr_inv transform. This is also known as the sequential binary partition. Note that this matrix is represented in clr coordinates. list of str List of tree node names indicating the ordering in the basis. Raises ------ ValueError If the tree doesn't contain two branches. Examples -------- >>> from skbio import TreeNode >>> tree = u"((b,c)a, d)root;" >>> t = TreeNode.read([tree]) >>> basis, nodes = tree_basis(t) >>> basis.toarray() array([[-0.40824829, -0.40824829, 0.81649658], [-0.70710678, 0.70710678, 0. ]]) """ # Specifies which child is numerator and denominator # within any given node in a tree. NUMERATOR = 1 DENOMINATOR = 0 # this is inspired by @wasade in # https://github.com/biocore/gneiss/pull/8 t = tree.copy() D = len(list(tree.tips())) # calculate number of tips under each node for n in t.postorder(include_self=True): if n.is_tip(): n._tip_count = 1 else: if len(n.children) == 2: left, right = ( n.children[NUMERATOR], n.children[DENOMINATOR], ) else: raise ValueError("Not a strictly bifurcating tree.") n._tip_count = left._tip_count + right._tip_count # calculate k, r, s, t coordinate for each node left, right = ( t.children[NUMERATOR], t.children[DENOMINATOR], ) t._k, t._r, t._s, t._t = 0, left._tip_count, right._tip_count, 0 for n in t.preorder(include_self=False): if n.is_tip(): n._k, n._r, n._s, n._t = 0, 0, 0, 0 elif n == n.parent.children[NUMERATOR]: n._k = n.parent._k n._r = n.children[NUMERATOR]._tip_count n._s = n.children[DENOMINATOR]._tip_count n._t = n.parent._s + n.parent._t elif n == n.parent.children[DENOMINATOR]: n._k = n.parent._r + n.parent._k n._r = n.children[NUMERATOR]._tip_count n._s = n.children[DENOMINATOR]._tip_count n._t = n.parent._t else: raise ValueError("Tree topology is not correct.") # navigate through tree to build the basis in a sparse matrix form value = [] row, col = [], [] nodes = [] i = 0 for n in t.levelorder(include_self=True): if n.is_tip(): continue for j in range(n._k, n._k + n._r): row.append(i) # consider tips in reverse order. May want to rethink # this orientation in the future. col.append(D - 1 - j) A = np.sqrt(n._s / (n._r * (n._s + n._r))) value.append(A) for j in range(n._k + n._r, n._k + n._r + n._s): row.append(i) col.append(D - 1 - j) B = -np.sqrt(n._r / (n._s * (n._s + n._r))) value.append(B) i += 1 nodes.append(n.name) basis = coo_matrix((value, (row, col)), shape=(D - 1, D)) return basis, nodes def _calc_p_adjust(name, p): """ Calculate the p-value adjustment for a given method. Parameters ------- name : str The name of the *p*-value correction function. This should match one of the method names available in `statsmodels.stats.multitest.multipletests`. p : ndarray of shape (n_tests,) Original *p*-values. Returns ------- p : ndarray of shape (n_tests,) Corrected *p*-values. Raises ------- ValueError: If the given method name is not available. See Also -------- statsmodels.stats.multitest.multipletests References ---------- .. [1] https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html """ name_ = name.lower() # Original options are kept for backwards compatibility if name_ in ("holm", "holm-bonferroni"): name_ = "holm" if name_ in ("bh", "fdr_bh", "benjamini-hochberg"): name_ = "fdr_bh" try: res = sm_multipletests(pvals=p, alpha=0.05, method=name_) except ValueError as e: if "method not recognized" in str(e): raise ValueError(f"{name} is not an available FDR correction method.") else: raise ValueError(f"Cannot perform FDR correction using the {name} method.") else: return res[1] def ancom( table, grouping, alpha=0.05, tau=0.02, theta=0.1, p_adjust="holm", significance_test="f_oneway", percentiles=(0.0, 25.0, 50.0, 75.0, 100.0), multiple_comparisons_correction="holm-bonferroni", ): r"""Perform a differential abundance test using ANCOM. Analysis of composition of microbiomes (ANCOM) is done by calculating pairwise log ratios between all features and performing a significance test to determine if there is a significant difference in feature ratios with respect to the variable of interest. In an experiment with only two treatments, this tests the following hypothesis for feature :math:`i`: .. math:: H_{0i}: \mathbb{E}[\ln(u_i^{(1)})] = \mathbb{E}[\ln(u_i^{(2)})] where :math:`u_i^{(1)}` is the mean abundance for feature :math:`i` in the first group and :math:`u_i^{(2)}` is the mean abundance for feature :math:`i` in the second group. Parameters ---------- table : pd.DataFrame A 2-D matrix of strictly positive values (i.e. counts or proportions) where the rows correspond to samples and the columns correspond to features. grouping : pd.Series Vector indicating the assignment of samples to groups. For example, these could be strings or integers denoting which group a sample belongs to. It must be the same length as the samples in `table`. The index must be the same on `table` and `grouping` but need not be in the same order. alpha : float, optional Significance level for each of the statistical tests. This can can be anywhere between 0 and 1 exclusive. tau : float, optional A constant used to determine an appropriate cutoff. A value close to zero indicates a conservative cutoff. This can can be anywhere between 0 and 1 exclusive. theta : float, optional Lower bound for the proportion for the *W*-statistic. If all *W*- statistics are lower than theta, then no features will be detected to be significantly different. This can can be anywhere between 0 and 1 exclusive. p_adjust : str or None, optional Method to correct *p*-values for multiple comparisons. Options are Holm- Boniferroni ("holm" or "holm-bonferroni") (default), Benjamini- Hochberg ("bh", "fdr_bh" or "benjamini-hochberg"), or any method supported by statsmodels' ``multipletests`` function. Case-insensitive. If None, no correction will be performed. .. versionchanged:: 0.6.0 Replaces ``multiple_comparisons_correction`` for conciseness. significance_test : str or callable, optional A function to test for significance between classes. It must be able to accept at least two vectors of floats and returns a test statistic and a *p*-value. Functions under ``scipy.stats`` can be directly specified by name. The default is one-way ANOVA ("f_oneway"). .. versionchanged:: 0.6.0 Accepts test names in addition to functions. percentiles : iterable of floats, optional Percentile abundances to return for each feature in each group. By default, will return the minimum, 25th percentile, median, 75th percentile, and maximum abundances for each feature in each group. multiple_comparisons_correction : str or None, optional Alias for ``p_adjust``. For backward compatibility. Deprecated. Returns ------- pd.DataFrame A table of features, their *W*-statistics and whether the null hypothesis is rejected. - ``W``: *W*-statistic, or the number of features that the current feature is tested to be significantly different against. - ``Reject null hypothesis``: Whether the feature is differentially abundant across groups (``True``) or not (``False``). pd.DataFrame A table of features and their percentile abundances in each group. If ``percentiles`` is empty, this will be an empty ``pd.DataFrame``. The rows in this object will be features, and the columns will be a multi-index where the first index is the percentile, and the second index is the group. See Also -------- multi_replace scipy.stats.ttest_ind scipy.stats.f_oneway scipy.stats.wilcoxon scipy.stats.kruskal Warnings -------- ``multiple_comparisons_correction`` is deprecated as of ``0.6.0``. It has been renamed to ``p_adjust``. ``significance_test=None`` is deprecated as of ``0.6.0``. The default value is now "f_oneway". Notes ----- The developers of ANCOM recommend the following significance tests ([1]_, Supplementary File 1, top of page 11): - If there are two groups, use the standard parametric *t*-test (``ttest_ind``) or the non-parametric Mann-Whitney rank test (``mannwhitneyu``). - For paired samples, use the parametric paired *t*-test (``ttest_rel``) or the non-parametric Wilcoxon signed-rank test (``wilcoxon``). - If there are more than two groups, use the parametric one-way ANOVA (``f_oneway``) or the non-parametric Kruskal-Wallis test (``kruskal``). - If there are multiple measurements obtained from the individuals, use a Friedman test (``friedmanchisquare``). Because one-way ANOVA is equivalent to the standard *t*-test when the number of groups is two, we default to ``f_oneway`` here, which can be used when there are two or more groups. Users should refer to the documentation of these tests in SciPy to understand the assumptions made by each test. This method cannot handle any zero counts as input, since the logarithm of zero cannot be computed. While this is an unsolved problem, many studies, including [1]_, have shown promising results by adding pseudocounts to all values in the matrix. In [1]_, a pseudocount of 0.001 was used, though the authors note that a pseudocount of 1.0 may also be useful. Zero counts can also be addressed using the ``multi_replace`` method. References ---------- .. [1] Mandal et al. "Analysis of composition of microbiomes: a novel method for studying microbial composition", Microbial Ecology in Health & Disease, (2015), 26. Examples -------- >>> from skbio.stats.composition import ancom >>> import pandas as pd Let's load in a DataFrame with six samples and seven features (e.g., these may be bacterial taxa): >>> table = pd.DataFrame([[12, 11, 10, 10, 10, 10, 10], ... [9, 11, 12, 10, 10, 10, 10], ... [1, 11, 10, 11, 10, 5, 9], ... [22, 21, 9, 10, 10, 10, 10], ... [20, 22, 10, 10, 13, 10, 10], ... [23, 21, 14, 10, 10, 10, 10]], ... index=['s1', 's2', 's3', 's4', 's5', 's6'], ... columns=['b1', 'b2', 'b3', 'b4', 'b5', 'b6', ... 'b7']) Then create a grouping vector. In this example, there is a treatment group and a placebo group. >>> grouping = pd.Series(['treatment', 'treatment', 'treatment', ... 'placebo', 'placebo', 'placebo'], ... index=['s1', 's2', 's3', 's4', 's5', 's6']) Now run ``ancom`` to determine if there are any features that are significantly different in abundance between the treatment and the placebo groups. The first DataFrame that is returned contains the ANCOM test results, and the second contains the percentile abundance data for each feature in each group. >>> ancom_df, percentile_df = ancom(table, grouping) >>> ancom_df['W'] # doctest: +ELLIPSIS b1 0 b2 4 b3 0 b4 1 b5 1 b6 0 b7 1 Name: W, dtype: ... The *W*-statistic is the number of features that a single feature is tested to be significantly different against. In this scenario, ``b2`` was detected to have significantly different abundances compared to four of the other features. To summarize the results from the *W*-statistic, let's take a look at the results from the hypothesis test. The ``Reject null hypothesis`` column in the table indicates whether the null hypothesis was rejected, and that a feature was therefore observed to be differentially abundant across the groups. >>> ancom_df['Reject null hypothesis'] b1 False b2 True b3 False b4 False b5 False b6 False b7 False Name: Reject null hypothesis, dtype: bool From this we can conclude that only ``b2`` was significantly different in abundance between the treatment and the placebo. We still don't know, for example, in which group ``b2`` was more abundant. We therefore may next be interested in comparing the abundance of ``b2`` across the two groups. We can do that using the second DataFrame that was returned. Here we compare the median (50th percentile) abundance of ``b2`` in the treatment and placebo groups: >>> percentile_df[50.0].loc['b2'] Group placebo 21.0 treatment 11.0 Name: b2, dtype: float64 We can also look at a full five-number summary for ``b2`` in the treatment and placebo groups: >>> percentile_df.loc['b2'] # doctest: +NORMALIZE_WHITESPACE Percentile Group 0.0 placebo 21.0 25.0 placebo 21.0 50.0 placebo 21.0 75.0 placebo 21.5 100.0 placebo 22.0 0.0 treatment 11.0 25.0 treatment 11.0 50.0 treatment 11.0 75.0 treatment 11.0 100.0 treatment 11.0 Name: b2, dtype: float64 Taken together, these data tell us that ``b2`` is present in significantly higher abundance in the placebo group samples than in the treatment group samples. """ if not isinstance(table, pd.DataFrame): raise TypeError( "`table` must be a `pd.DataFrame`, " "not %r." % type(table).__name__ ) if not isinstance(grouping, pd.Series): raise TypeError( "`grouping` must be a `pd.Series`," " not %r." % type(grouping).__name__ ) if np.any(table <= 0): raise ValueError( "Cannot handle zeros or negative values in `table`. " "Use pseudocounts or ``multi_replace``." ) if not 0 < alpha < 1: raise ValueError("`alpha`=%f is not within 0 and 1." % alpha) if not 0 < tau < 1: raise ValueError("`tau`=%f is not within 0 and 1." % tau) if not 0 < theta < 1: raise ValueError("`theta`=%f is not within 0 and 1." % theta) # @deprecated if multiple_comparisons_correction != "holm-bonferroni": _warn_deprecated(ancom, "0.6.0") p_adjust = multiple_comparisons_correction if (grouping.isnull()).any(): raise ValueError("Cannot handle missing values in `grouping`.") if (table.isnull()).any().any(): raise ValueError("Cannot handle missing values in `table`.") percentiles = list(percentiles) for percentile in percentiles: if not 0.0 <= percentile <= 100.0: raise ValueError( "Percentiles must be in the range [0, 100], %r " "was provided." % percentile ) duplicates = find_duplicates(percentiles) if duplicates: formatted_duplicates = ", ".join(repr(e) for e in duplicates) raise ValueError( "Percentile values must be unique. The following" " value(s) were duplicated: %s." % formatted_duplicates ) groups = np.unique(grouping) num_groups = len(groups) if num_groups == len(grouping): raise ValueError( "All values in `grouping` are unique. This method cannot " "operate on a grouping vector with only unique values (e.g., " "there are no 'within' variance because each group of samples " "contains only a single sample)." ) if num_groups == 1: raise ValueError( "All values the `grouping` are the same. This method cannot " "operate on a grouping vector with only a single group of samples" "(e.g., there are no 'between' variance because there is only a " "single group)." ) # @deprecated if significance_test is None: significance_test = "f_oneway" table_index_len = len(table.index) grouping_index_len = len(grouping.index) mat, cats = table.align(grouping, axis=0, join="inner") if len(mat) != table_index_len or len(cats) != grouping_index_len: raise ValueError("`table` index and `grouping` " "index must be consistent.") n_feat = mat.shape[1] _logratio_mat = _log_compare(mat.values, cats.values, significance_test) logratio_mat = _logratio_mat + _logratio_mat.T # Multiple comparisons if p_adjust is not None: logratio_mat = np.apply_along_axis( lambda arr: _calc_p_adjust(p_adjust, arr), 1, logratio_mat ) np.fill_diagonal(logratio_mat, 1) W = (logratio_mat < alpha).sum(axis=1) c_start = W.max() / n_feat if c_start < theta: reject = np.zeros_like(W, dtype=bool) else: # Select appropriate cutoff cutoff = c_start - np.linspace(0.05, 0.25, 5) prop_cut = np.array([(W > n_feat * cut).mean() for cut in cutoff]) dels = np.abs(prop_cut - np.roll(prop_cut, -1)) dels[-1] = 0 if (dels[0] < tau) and (dels[1] < tau) and (dels[2] < tau): nu = cutoff[1] elif (dels[0] >= tau) and (dels[1] < tau) and (dels[2] < tau): nu = cutoff[2] elif (dels[1] >= tau) and (dels[2] < tau) and (dels[3] < tau): nu = cutoff[3] else: nu = cutoff[4] reject = W >= nu * n_feat feat_ids = mat.columns ancom_df = pd.DataFrame( { "W": pd.Series(W, index=feat_ids), "Reject null hypothesis": pd.Series(reject, index=feat_ids), } ) if len(percentiles) == 0: return ancom_df, pd.DataFrame() else: data = [] columns = [] for group in groups: feat_dists = mat[cats == group] for percentile in percentiles: columns.append((percentile, group)) data.append(np.percentile(feat_dists, percentile, axis=0)) columns = pd.MultiIndex.from_tuples(columns, names=["Percentile", "Group"]) percentile_df = pd.DataFrame( np.asarray(data).T, columns=columns, index=feat_ids ) return ancom_df, percentile_df def _log_compare(mat, cats, test="ttest_ind"): """Calculate pairwise log ratios and perform a significance test. Calculate pairwise log ratios between all features and perform a significance test (i.e. *t*-test) to determine if there is a significant difference in feature ratios with respect to the variable of interest. Parameters ---------- mat : array_like of shape (n_samples, n_features) A matrix of proportions. cats : array_like of shape (n_samples,) A vector of categories. test : str or callable Statistical test to run. Returns ------- log_ratio : ndarray Log ratio *p*-value matrix. Raises ------ ValueError If specified test name is not a function under ``scipy.stats``. """ c = mat.shape[1] log_ratio = np.zeros((c, c)) log_mat = np.log(mat) cs = np.unique(cats) if isinstance(test, str): try: test = getattr(scipy.stats, test) except AttributeError: raise ValueError(f'Function "{test}" does not exist under scipy.stats.') def func(x): return test(*[x[cats == k] for k in cs]) for i in range(c - 1): ratio = (log_mat[:, i].T - log_mat[:, i + 1 :].T).T _, p = np.apply_along_axis(func, axis=0, arr=ratio) log_ratio[i, i + 1 :] = np.squeeze(np.array(p.T)) return log_ratio def _gram_schmidt_basis(n): """Build clr-transformed basis derived from Gram-Schmidt orthogonalization. Parameters ---------- n : int Dimension of the Aitchison simplex. Returns ------- basis : array_like of shape (n - 1, n) Basis matrix. """ basis = np.zeros((n, n - 1)) for j in range(n - 1): i = j + 1 e = np.array([(1 / i)] * i + [-1] + [0] * (n - i - 1)) * np.sqrt(i / (i + 1)) basis[:, j] = e return basis.T def sbp_basis(sbp): r"""Build an orthogonal basis from a sequential binary partition (SBP). A SBP is a hierarchical collection of binary divisions of compositional parts ([1]_). The child groups are divided again until all groups contain a single part. The SBP can be encoded in a :math:`(D - 1) \times D` matrix where, for each row, parts can be grouped by -1 and +1 tags, and 0 for excluded parts. The *i*-th balance is computed as follows: .. math:: b_i = \sqrt{ \frac{r_i s_i}{r_i+s_i} } \ln \left( \frac{g(x_{r_i})}{g(x_{s_i})} \right) where :math:`b_i` is the *i*-th balance corresponding to the *i*-th row in the SBP, :math:`r_i` and :math:`s_i` and the number of respectively ``+1`` and ``-1`` labels in the *i*-th row of the SBP and where :math:`g(x) = (\prod\limits_{i=1}^{D} x_i)^{1/D}` is the geometric mean of :math:`x`. Parameters ---------- sbp : array_like of shape (n_partitions, n_features) A contrast matrix, also known as a sequential binary partition, where every row represents a partition between two groups of features. A part labelled ``+1`` would correspond to that feature being in the numerator of the given row partition, a part labelled ``-1`` would correspond to features being in the denominator of that given row partition, and ``0`` would correspond to features excluded in the row partition. Returns ------- ndarray of shape (n_partitions, n_features) An orthonormal basis in the Aitchison simplex. Notes ----- The ``sbp_basis`` method was derived from the ``gsi.buildilrBase()`` function implemented in the R package "compositions" [2]_. Examples -------- >>> import numpy as np >>> sbp = np.array([[1, 1,-1,-1,-1], ... [1,-1, 0, 0, 0], ... [0, 0, 1,-1,-1], ... [0, 0, 0, 1,-1]]) ... >>> sbp_basis(sbp) array([[ 0.54772256, 0.54772256, -0.36514837, -0.36514837, -0.36514837], [ 0.70710678, -0.70710678, 0. , 0. , 0. ], [ 0. , 0. , 0.81649658, -0.40824829, -0.40824829], [ 0. , 0. , 0. , 0.70710678, -0.70710678]]) References ---------- .. [1] Parent, S.É., Parent, L.E., Egozcue, J.J., Rozane, D.E., Hernandes, A., Lapointe, L., Hébert-Gentile, V., Naess, K., Marchand, S., Lafond, J., Mattos, D., Barlow, P., Natale, W., 2013. The plant ionome revisited by the nutrient balance concept. Front. Plant Sci. 4, 39. .. [2] van den Boogaart, K. Gerald, Tolosana-Delgado, Raimon and Bren, Matevz, 2014. `compositions`: Compositional Data Analysis. R package version 1.40-1. https://CRAN.R-project.org/package=compositions. """ n_pos = (sbp == 1).sum(axis=1) n_neg = (sbp == -1).sum(axis=1) psi = np.zeros(sbp.shape) for i in range(0, sbp.shape[0]): psi[i, :] = sbp[i, :] * np.sqrt( (n_neg[i] / n_pos[i]) ** sbp[i, :] / np.sum(np.abs(sbp[i, :])) ) return psi def _check_orthogonality(basis): r"""Check to see if basis is truly orthonormal in the Aitchison simplex. Parameters ---------- basis : ndarray Basis in the Aitchison simplex of dimension :math:`(D - 1) \times D`. """ basis = np.atleast_2d(basis) if not np.allclose(basis @ basis.T, np.identity(len(basis)), rtol=1e-4, atol=1e-6): raise ValueError("Basis is not orthonormal.") def _welch_ttest(a, b): r"""Perform Welch's *t*-test on two samples of unequal variances. Parameters ---------- a, b : 1-D array_like Samples to test. Returns ------- pd.DataFrame Test result. Columns are: T statistic, df, pvalue, Difference, CI(2.5), CI(97.5). See Also -------- scipy.stats.ttest_ind statsmodels.stats.weightstats.CompareMeans Notes ----- Compared with ``scipy.stats.ttest_ind`` with ``equal_var=False``, this function additionally returns confidence intervals. This implementation uses the ``CompareMeans`` class from ``statsmodels.stats.weightstats``. """ # See https://stats.stackexchange.com/a/475345 # See https://www.statsmodels.org/dev/generated/statsmodels.stats.weightstats.CompareMeans.html # Creating a CompareMeans object to perform Welch's t-test statsmodel_cm_object = CompareMeans.from_data( data1=a, data2=b, weights1=None, weights2=None ) # Performing Welch's t-test using the object to obtain tstat, pvalue, and df ttest_cm_result = statsmodel_cm_object.ttest_ind( alternative="two-sided", usevar="unequal", value=0 ) tstat = ttest_cm_result[0] p = ttest_cm_result[1] df = ttest_cm_result[2] # Calculating difference between the two means m1 = np.mean(a) m2 = np.mean(b) delta = m1 - m2 # Calculating confidence intervals using the aformentioned CompareMeans object conf_int = statsmodel_cm_object.tconfint_diff( alpha=0.05, alternative="two-sided", usevar="unequal" ) lb = conf_int[0] ub = conf_int[1] return pd.DataFrame( np.array([tstat, df, p, delta, lb, ub]).reshape(1, -1), columns=["T statistic", "df", "pvalue", "Difference", "CI(2.5)", "CI(97.5)"], ) def dirmult_ttest( table, grouping, treatment, reference, pseudocount=0.5, draws=128, p_adjust="holm", seed=None, ): r"""*T*-test using Dirichlet-multinomial distribution. The Dirichlet-multinomial distribution is a compound distribution that combines a Dirichlet distribution over the probabilities of a multinomial distribution. This distribution is used to model the distribution of species abundances in a community. To perform the *t*-test, we first fit a Dirichlet-multinomial distribution for each sample, and then we compute the fold change and *p*-value for each feature. The fold change is computed as the difference between the samples of the two groups. *t*-tests are then performed on the posterior samples, drawn from each Dirichlet-multinomial distribution. The log-fold changes as well as their credible intervals, the *p*-values and the multiple comparison corrected *p*-values are reported. This process mirrors the approach performed by the R package "ALDEx2" [1]_. Parameters ---------- table : pd.DataFrame Contingency table of counts where rows are features and columns are samples. grouping : pd.Series Vector indicating the assignment of samples to groups. For example, these could be strings or integers denoting which group a sample belongs to. It must be the same length as the samples in ``table``. The index must be the same on ``table`` and ``grouping`` but need not be in the same order. The *t*-test is computed between the ``treatment`` group and the ``reference`` group specified in the ``grouping`` vector. treatment : str Name of the treatment group. reference : str Name of the reference group. pseudocount : float, optional A non-zero value added to the input counts to ensure that all of the estimated abundances are strictly greater than zero. draws : int, optional The number of draws from the Dirichilet-multinomial posterior distribution More draws provide higher uncertainty surrounding the estimated log-fold changes and *p*-values. p_adjust : str or None, optional Method to correct *p*-values for multiple comparisons. Options are Holm- Boniferroni ("holm" or "holm-bonferroni") (default), Benjamini- Hochberg ("bh", "fdr_bh" or "benjamini-hochberg"), or any method supported by statsmodels' ``multipletests`` function. Case-insensitive. If None, no correction will be performed. seed : int or np.random.Generator, optional A user-provided random seed or random generator instance. Returns ------- pd.DataFrame A table of features, their log-fold changes and other relevant statistics. ``T statistic`` is the *t*-statistic outputted from the *t*-test. *t*-statistics are generated from each posterior draw. The reported ``T statistic`` is the average across all of the posterior draws. ``df`` is the degrees of freedom from the *t*-test. ``Log2(FC)`` is the expected log2-fold change. Within each posterior draw the log2 fold-change is computed as the difference between the mean log-abundance the ``treatment`` group and the ``reference`` group. All log2 fold changes are expressed in clr coordinates. The reported ``Log2(FC)`` is the average of all of the log2-fold changes computed from each of the posterior draws. ``CI(2.5)`` is the 2.5% quantile of the log2-fold change. The reported ``CI(2.5)`` is the 2.5% quantile of all of the log2-fold changes computed from each of the posterior draws. ``CI(97.5)`` is the 97.5% quantile of the log2-fold change. The reported ``CI(97.5)`` is the 97.5% quantile of all of the log2-fold changes computed from each of the posterior draws. ``pvalue`` is the *p*-value of the *t*-test. The reported values are the average of all of the *p*-values computed from the *t*-tests calculated across all of the posterior draws. ``qvalue`` is the *p*-value of the *t*-test after performing multiple comparison correction. ``Reject null hypothesis`` indicates if feature is differentially abundant across groups (``True``) or not (``False``). In order for a feature to be differentially abundant, the qvalue needs to be significant (i.e. <0.05) and the confidence intervals reported by ``CI(2.5)`` and ``CI(97.5)`` must not overlap with zero. See Also -------- scipy.stats.ttest_ind Notes ----- The confidence intervals are computed using the mininum 2.5% and maximum 97.5% bounds computed across all of the posterior draws. The reference frame here is the geometric mean. Extracting absolute log fold changes from this test assumes that the average feature abundance between the ``treatment`` and the ``reference`` groups are the same. If this assumption is violated, then the log-fold changes will be biased, and the *p*-values will not be reliable. However, the bias is the same across each feature, as a result the ordering of the log-fold changes can still be useful. One benefit of using the Dirichlet-multinomial distribution is that the statistical power increases with regards to the abundance magnitude. More counts per sample will shrink the size of the confidence intervals, and can result in lower *p*-values. References ---------- .. [1] Fernandes et al. "Unifying the analysis of high-throughput sequencing datasets: characterizing RNA-seq, 16S rRNA gene sequencing and selective growth experiments by compositional data analysis." Microbiome (2014). Examples -------- >>> import pandas as pd >>> from skbio.stats.composition import dirmult_ttest >>> table = pd.DataFrame([[20, 110, 100, 101, 100, 103, 104], ... [33, 110, 120, 100, 101, 100, 102], ... [12, 110, 100, 110, 100, 50, 90], ... [202, 201, 9, 10, 10, 11, 11], ... [200, 202, 10, 10, 13, 10, 10], ... [203, 201, 14, 10, 10, 13, 12]], ... index=['s1', 's2', 's3', 's4', 's5', 's6'], ... columns=['b1', 'b2', 'b3', 'b4', 'b5', 'b6', ... 'b7']) >>> grouping = pd.Series(['treatment', 'treatment', 'treatment', ... 'placebo', 'placebo', 'placebo'], ... index=['s1', 's2', 's3', 's4', 's5', 's6']) >>> lfc_result = dirmult_ttest(table, grouping, 'treatment', 'placebo', ... seed=0) >>> lfc_result[["Log2(FC)", "CI(2.5)", "CI(97.5)", "qvalue"]] Log2(FC) CI(2.5) CI(97.5) qvalue b1 -4.991987 -7.884498 -2.293463 0.020131 b2 -2.533729 -3.594590 -1.462339 0.007446 b3 1.627677 -1.048219 4.750792 0.068310 b4 1.707221 -0.467481 4.164998 0.065613 b5 1.528243 -1.036910 3.978387 0.068310 b6 1.182343 -0.702656 3.556061 0.068310 b7 1.480232 -0.601277 4.043888 0.068310 """ rng = get_rng(seed) if not isinstance(table, pd.DataFrame): raise TypeError( "`table` must be a `pd.DataFrame`, " "not %r." % type(table).__name__ ) if not isinstance(grouping, pd.Series): raise TypeError( "`grouping` must be a `pd.Series`," " not %r." % type(grouping).__name__ ) if np.any(table < 0): raise ValueError("Cannot handle negative values in `table`. ") if (grouping.isnull()).any(): raise ValueError("Cannot handle missing values in `grouping`.") if (table.isnull()).any().any(): raise ValueError("Cannot handle missing values in `table`.") table_index_len = len(table.index) grouping_index_len = len(grouping.index) mat, cats = table.align(grouping, axis=0, join="inner") if len(mat) != table_index_len or len(cats) != grouping_index_len: raise ValueError("`table` index and `grouping` " "index must be consistent.") trt_group = grouping.loc[grouping == treatment] ref_group = grouping.loc[grouping == reference] posterior = [ rng.dirichlet(table.values[i] + pseudocount) for i in range(table.shape[0]) ] dir_table = pd.DataFrame(clr(posterior), index=table.index, columns=table.columns) res = [ _welch_ttest( np.array(dir_table.loc[trt_group.index, x].values), np.array(dir_table.loc[ref_group.index, x].values), ) for x in table.columns ] res = pd.concat(res) for i in range(1, draws): posterior = [ rng.dirichlet(table.values[i] + pseudocount) for i in range(table.shape[0]) ] dir_table = pd.DataFrame( clr(posterior), index=table.index, columns=table.columns ) ires = [ _welch_ttest( np.array(dir_table.loc[trt_group.index, x].values), np.array(dir_table.loc[ref_group.index, x].values), ) for x in table.columns ] ires = pd.concat(ires) # online average to avoid holding all of the results in memory res["Difference"] = (i * res["Difference"] + ires["Difference"]) / (i + 1) res["pvalue"] = (i * res["pvalue"] + ires["pvalue"]) / (i + 1) res["CI(2.5)"] = np.minimum(res["CI(2.5)"], ires["CI(2.5)"]) res["CI(97.5)"] = np.maximum(res["CI(97.5)"], ires["CI(97.5)"]) res["T statistic"] = (i * res["T statistic"] + ires["T statistic"]) / (i + 1) res.index = table.columns # convert all log fold changes to base 2 res["Difference"] = res["Difference"] / np.log(2) res["CI(2.5)"] = res["CI(2.5)"] / np.log(2) res["CI(97.5)"] = res["CI(97.5)"] / np.log(2) # multiple comparison if p_adjust is not None: qval = _calc_p_adjust(p_adjust, res["pvalue"]) else: qval = res["pvalue"].values # test to see if confidence interval includes 0. sig = np.logical_or( np.logical_and(res["CI(2.5)"] > 0, res["CI(97.5)"] > 0), np.logical_and(res["CI(2.5)"] < 0, res["CI(97.5)"] < 0), ) reject = np.logical_and(qval[0], sig) res = res.rename(columns={"Difference": "Log2(FC)"}) res["qvalue"] = qval res["Reject null hypothesis"] = reject col_order = [ "T statistic", "df", "Log2(FC)", "CI(2.5)", "CI(97.5)", "pvalue", "qvalue", "Reject null hypothesis", ] return res[col_order] scikit-bio-0.6.2/skbio/stats/distance/000077500000000000000000000000001464262511300176175ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/stats/distance/__init__.py000066400000000000000000000126161464262511300217360ustar00rootroot00000000000000r"""Distance matrix-based statistics (:mod:`skbio.stats.distance`) ============================================================== .. currentmodule:: skbio.stats.distance This module provides functionality for serializing, deserializing, and manipulating dissimilarity and distance matrices in memory. It also contains various statistical methods that operate on distance matrices, often relating distances (e.g., community distances) to categorical and/or continuous variables of interest (e.g., gender or age). Methods are also provided for comparing distance matrices (e.g., computing the correlation between two or more distance matrices using the Mantel test). Data structures --------------- This package provides two matrix classes, `DissimilarityMatrix` and `DistanceMatrix`. Both classes can store measures of difference/distinction between objects. A dissimilarity/distance matrix includes both a matrix of dissimilarities/distances (floats) between objects, as well as unique IDs (object labels; strings) identifying each object in the matrix. `DissimilarityMatrix` can be used to store measures of dissimilarity between objects, and does not require that the dissimilarities are symmetric (e.g., dissimilarities obtained using the *Gain in PD* measure [1]_). `DissimilarityMatrix` is a more general container to store differences than `DistanceMatrix`. `DistanceMatrix` has the additional requirement that the differences it stores are symmetric (e.g., Euclidean or Hamming distances). .. note:: `DissimilarityMatrix` can be used to store distances, but it is recommended to use `DistanceMatrix` to store this type of data as it provides an additional check for symmetry. A distance matrix *is a* dissimilarity matrix; this is modeled in the class design by having `DistanceMatrix` subclass `DissimilarityMatrix`. Classes ^^^^^^^ .. autosummary:: :toctree: DissimilarityMatrix DistanceMatrix Functions ^^^^^^^^^ .. autosummary:: :toctree: randdm Exceptions ^^^^^^^^^^ .. autosummary:: DissimilarityMatrixError DistanceMatrixError MissingIDError Distance-based statistics ------------------------- In addition to the data structures described above, this package provides the following distance-based statistical methods. Categorical Variable Stats ^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autosummary:: :toctree: anosim permanova permdisp Continuous Variable Stats ^^^^^^^^^^^^^^^^^^^^^^^^^ .. autosummary:: :toctree: bioenv Distance Matrix Comparisons ^^^^^^^^^^^^^^^^^^^^^^^^^^^ .. autosummary:: :toctree: mantel pwmantel Examples -------- Assume we have the following delimited text file storing distances between three objects with IDs ``a``, ``b``, and ``c``:: \ta\tb\tc a\t0.0\t0.5\t1.0 b\t0.5\t0.0\t0.75 c\t1.0\t0.75\t0.0 Load a distance matrix from the file: >>> from io import StringIO >>> from skbio import DistanceMatrix >>> dm_fh = StringIO("\ta\tb\tc\n" ... "a\t0.0\t0.5\t1.0\n" ... "b\t0.5\t0.0\t0.75\n" ... "c\t1.0\t0.75\t0.0\n") >>> dm = DistanceMatrix.read(dm_fh) >>> print(dm) 3x3 distance matrix IDs: 'a', 'b', 'c' Data: [[ 0. 0.5 1. ] [ 0.5 0. 0.75] [ 1. 0.75 0. ]] Access the distance (scalar) between objects ``'a'`` and ``'c'``: >>> dm['a', 'c'] 1.0 Get a row vector of distances between object ``'b'`` and all other objects: >>> dm['b'] array([ 0.5 , 0. , 0.75]) numpy indexing/slicing also works as expected. Extract the third column: >>> dm[:, 2] array([ 1. , 0.75, 0. ]) Serialize the distance matrix to delimited text file: >>> out_fh = StringIO() >>> _ = dm.write(out_fh) >>> out_fh.getvalue() == dm_fh.getvalue() True A distance matrix object can also be created from an existing ``numpy.array`` (or an array-like object, such as a nested Python list): >>> import numpy as np >>> data = np.array([[0.0, 0.5, 1.0], ... [0.5, 0.0, 0.75], ... [1.0, 0.75, 0.0]]) >>> ids = ["a", "b", "c"] >>> dm_from_np = DistanceMatrix(data, ids) >>> print(dm_from_np) 3x3 distance matrix IDs: 'a', 'b', 'c' Data: [[ 0. 0.5 1. ] [ 0.5 0. 0.75] [ 1. 0.75 0. ]] >>> dm_from_np == dm True IDs may be omitted when constructing a dissimilarity/distance matrix. Monotonically-increasing integers (cast as strings) will be automatically used: >>> dm = DistanceMatrix(data) >>> dm.ids ('0', '1', '2') References ---------- .. [1] Faith, D. P. (1992). "Conservation evaluation and phylogenetic diversity". """ # noqa: D407, D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from ._base import ( DissimilarityMatrixError, DistanceMatrixError, MissingIDError, DissimilarityMatrix, DistanceMatrix, randdm, ) from ._bioenv import bioenv from ._anosim import anosim from ._permanova import permanova from ._mantel import mantel, pwmantel from ._permdisp import permdisp __all__ = [ "DissimilarityMatrixError", "DistanceMatrixError", "MissingIDError", "DissimilarityMatrix", "DistanceMatrix", "randdm", "anosim", "permanova", "bioenv", "mantel", "pwmantel", "permdisp", ] scikit-bio-0.6.2/skbio/stats/distance/_anosim.py000066400000000000000000000203651464262511300216240ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from functools import partial import numpy as np from scipy.stats import rankdata from ._base import _preprocess_input, _run_monte_carlo_stats, _build_results def anosim(distance_matrix, grouping, column=None, permutations=999): """Test for significant differences between groups using ANOSIM. Analysis of Similarities (ANOSIM) is a non-parametric method that tests whether two or more groups of objects (e.g., samples) are significantly different based on a categorical factor. The ranks of the distances in the distance matrix are used to calculate an R statistic, which ranges between -1 (anti-grouping) to +1 (strong grouping), with an R value of 0 indicating random grouping. Statistical significance is assessed via a permutation test. The assignment of objects to groups (`grouping`) is randomly permuted a number of times (controlled via `permutations`). An R statistic is computed for each permutation and the p-value is the proportion of permuted R statisics that are equal to or greater than the original (unpermuted) R statistic. Parameters ---------- distance_matrix : DistanceMatrix Distance matrix containing distances between objects (e.g., distances between samples of microbial communities). grouping : 1-D array_like or pandas.DataFrame Vector indicating the assignment of objects to groups. For example, these could be strings or integers denoting which group an object belongs to. If `grouping` is 1-D ``array_like``, it must be the same length and in the same order as the objects in `distance_matrix`. If `grouping` is a ``DataFrame``, the column specified by `column` will be used as the grouping vector. The ``DataFrame`` must be indexed by the IDs in `distance_matrix` (i.e., the row labels must be distance matrix IDs), but the order of IDs between `distance_matrix` and the ``DataFrame`` need not be the same. All IDs in the distance matrix must be present in the ``DataFrame``. Extra IDs in the ``DataFrame`` are allowed (they are ignored in the calculations). column : str, optional Column name to use as the grouping vector if `grouping` is a ``DataFrame``. Must be provided if `grouping` is a ``DataFrame``. Cannot be provided if `grouping` is 1-D ``array_like``. permutations : int, optional Number of permutations to use when assessing statistical significance. Must be greater than or equal to zero. If zero, statistical significance calculations will be skipped and the p-value will be ``np.nan``. Returns ------- pandas.Series Results of the statistical test, including ``test statistic`` and ``p-value``. See Also -------- permanova Notes ----- See [1]_ for the original method reference. The general algorithm and interface are similar to ``vegan::anosim``, available in R's vegan package [2]_. The p-value will be ``np.nan`` if `permutations` is zero. References ---------- .. [1] Clarke, KR. "Non-parametric multivariate analyses of changes in community structure." Australian journal of ecology 18.1 (1993): 117-143. .. [2] http://cran.r-project.org/web/packages/vegan/index.html Examples -------- Load a 4x4 distance matrix and grouping vector denoting 2 groups of objects: >>> from skbio import DistanceMatrix >>> dm = DistanceMatrix([[0, 1, 1, 4], ... [1, 0, 3, 2], ... [1, 3, 0, 3], ... [4, 2, 3, 0]], ... ['s1', 's2', 's3', 's4']) >>> grouping = ['Group1', 'Group1', 'Group2', 'Group2'] Run ANOSIM using 99 permutations to calculate the p-value: >>> import numpy as np >>> # make output deterministic; not necessary for normal use >>> np.random.seed(0) >>> from skbio.stats.distance import anosim >>> anosim(dm, grouping, permutations=99) method name ANOSIM test statistic name R sample size 4 number of groups 2 test statistic 0.25 p-value 0.67 number of permutations 99 Name: ANOSIM results, dtype: object The return value is a ``pandas.Series`` object containing the results of the statistical test. To suppress calculation of the p-value and only obtain the R statistic, specify zero permutations: >>> anosim(dm, grouping, permutations=0) method name ANOSIM test statistic name R sample size 4 number of groups 2 test statistic 0.25 p-value NaN number of permutations 0 Name: ANOSIM results, dtype: object You can also provide a ``pandas.DataFrame`` and a column denoting the grouping instead of a grouping vector. The following ``DataFrame``'s ``Group`` column specifies the same grouping as the vector we used in the previous examples: >>> # make output deterministic; not necessary for normal use >>> np.random.seed(0) >>> import pandas as pd >>> df = pd.DataFrame.from_dict( ... {'Group': {'s2': 'Group1', 's3': 'Group2', 's4': 'Group2', ... 's5': 'Group3', 's1': 'Group1'}}) >>> anosim(dm, df, column='Group', permutations=99) method name ANOSIM test statistic name R sample size 4 number of groups 2 test statistic 0.25 p-value 0.67 number of permutations 99 Name: ANOSIM results, dtype: object The results match the first example above. Note that when providing a ``DataFrame``, the ordering of rows and/or columns does not affect the grouping vector that is extracted. The ``DataFrame`` must be indexed by the distance matrix IDs (i.e., the row labels must be distance matrix IDs). If IDs (rows) are present in the ``DataFrame`` but not in the distance matrix, they are ignored. The previous example's ``s5`` ID illustrates this behavior: note that even though the ``DataFrame`` had 5 objects, only 4 were used in the test (see the "Sample size" row in the results above to confirm this). Thus, the ``DataFrame`` can be a superset of the distance matrix IDs. Note that the reverse is not true: IDs in the distance matrix *must* be present in the ``DataFrame`` or an error will be raised. """ sample_size, num_groups, grouping, tri_idxs, distances = _preprocess_input( distance_matrix, grouping, column ) divisor = sample_size * ((sample_size - 1) / 4) ranked_dists = rankdata(distances, method="average") test_stat_function = partial(_compute_r_stat, tri_idxs, ranked_dists, divisor) stat, p_value = _run_monte_carlo_stats(test_stat_function, grouping, permutations) return _build_results( "ANOSIM", "R", sample_size, num_groups, stat, p_value, permutations ) def _compute_r_stat(tri_idxs, ranked_dists, divisor, grouping): """Compute ANOSIM R statistic (between -1 and +1).""" # Create a matrix where True means that the two objects are in the same # group. This ufunc requires that grouping is a numeric vector (e.g., it # won't work with a grouping vector of strings). grouping_matrix = np.equal.outer(grouping, grouping) # Extract upper triangle from the grouping matrix. It is important to # extract the values in the same order that the distances are extracted # from the distance matrix (see ranked_dists). Extracting the upper # triangle (excluding the diagonal) preserves this order. grouping_tri = grouping_matrix[tri_idxs] # within r_W = np.mean(ranked_dists[grouping_tri]) # between r_B = np.mean(ranked_dists[np.invert(grouping_tri)]) return (r_B - r_W) / divisor scikit-bio-0.6.2/skbio/stats/distance/_base.py000066400000000000000000001366261464262511300212600ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import itertools from copy import deepcopy import numpy as np import pandas as pd from scipy.spatial.distance import squareform from skbio._base import SkbioObject from skbio.stats._misc import _pprint_strs from skbio.util import find_duplicates from skbio.util._decorator import classonlymethod from skbio.util._misc import resolve_key from skbio.util._plotting import PlottableMixin from ._utils import is_symmetric_and_hollow from ._utils import distmat_reorder, distmat_reorder_condensed class DissimilarityMatrixError(Exception): """General error for dissimilarity matrix validation failures.""" pass class DistanceMatrixError(DissimilarityMatrixError): """General error for distance matrix validation failures.""" pass class MissingIDError(DissimilarityMatrixError): """Error for ID lookup that doesn't exist in the dissimilarity matrix.""" def __init__(self, missing_id): super(MissingIDError, self).__init__() self.args = ("The ID '%s' is not in the dissimilarity matrix." % missing_id,) class DissimilarityMatrix(SkbioObject, PlottableMixin): """Store dissimilarities between objects. A `DissimilarityMatrix` instance stores a square, hollow, two-dimensional matrix of dissimilarities between objects. Objects could be, for example, samples or DNA sequences. A sequence of IDs accompanies the dissimilarities. Methods are provided to load and save dissimilarity matrices from/to disk, as well as perform common operations such as extracting dissimilarities based on object ID. Parameters ---------- data : array_like or DissimilarityMatrix Square, hollow, two-dimensional ``numpy.ndarray`` of dissimilarities (floats), or a structure that can be converted to a ``numpy.ndarray`` using ``numpy.asarray`` or a one-dimensional vector of dissimilarities (floats), as defined by `scipy.spatial.distance.squareform`. Can instead be a `DissimilarityMatrix` (or subclass) instance, in which case the instance's data will be used. Data will be converted to a float ``dtype`` if necessary. A copy will *not* be made if already a ``numpy.ndarray`` with a float ``dtype``. ids : sequence of str, optional Sequence of strings to be used as object IDs. Must match the number of rows/cols in `data`. If ``None`` (the default), IDs will be monotonically-increasing integers cast as strings, with numbering starting from zero, e.g., ``('0', '1', '2', '3', ...)``. validate : bool, optional If `validate` is ``True`` (the default) and data is not a DissimilarityMatrix object, the input data will be validated. See Also -------- DistanceMatrix scipy.spatial.distance.squareform Notes ----- The dissimilarities are stored in redundant (square-form) format [1]_. The data are not checked for symmetry, nor guaranteed/assumed to be symmetric. References ---------- .. [1] http://docs.scipy.org/doc/scipy/reference/spatial.distance.html """ default_write_format = "lsmat" # Used in __str__ _matrix_element_name = "dissimilarity" def __init__(self, data, ids=None, validate=True): validate_full = validate validate_shape = False validate_ids = False if isinstance(data, DissimilarityMatrix): if isinstance(data, self.__class__): # Never validate when copying from an object # of the same type # We should be able to assume it is already # in a good state. validate_full = False validate_shape = False # but do validate ids, if redefining them validate_ids = False if ids is None else True ids = data.ids if ids is None else ids data = data.data # It is necessary to standardize the representation of the .data # attribute of this object. The input types might be list, tuple, # np.array, or possibly some other object type. Generally, this # normalization of type will require a copy of data. For example, # moving from a Python type representation (e.g., [[0, 1], [1, 0]]) # requires casting all of the values to numpy types, which is handled # as an implicit copy via np.asarray. However, these copies are # unnecessary if the data object is already a numpy array. np.asarray # is smart enough to not copy the data, however if a dtype change is # requested it will. The following block of code limits the use of # np.asarray to situations where the data are (a) not already a numpy # array or (b) the data are not a single or double precision numpy # data type. _issue_copy = True if isinstance(data, np.ndarray): if data.dtype in (np.float32, np.float64): _issue_copy = False if _issue_copy: data = np.asarray(data, dtype="float") if data.ndim == 1: # We can assume squareform will return a symmetric square matrix # so no need for full validation. # Still do basic checks (e.g. zero length) # and id validation data = squareform(data, force="tomatrix", checks=False) validate_full = False validate_shape = True validate_ids = True if ids is None: ids = (str(i) for i in range(data.shape[0])) # I just created the ids, so no need to re-validate them validate_ids = False ids = tuple(ids) if validate_full: self._validate(data, ids) else: if validate_shape: self._validate_shape(data) if validate_ids: self._validate_ids(data, ids) self._data = data self._ids = ids self._id_index = self._index_list(self._ids) @classonlymethod def from_iterable(cls, iterable, metric, key=None, keys=None): """Create DissimilarityMatrix from an iterable given a metric. Parameters ---------- iterable : iterable Iterable containing objects to compute pairwise dissimilarities on. metric : callable A function that takes two arguments and returns a float representing the dissimilarity between the two arguments. key : callable or metadata key, optional A function that takes one argument and returns a string representing the id of the element in the dissimilarity matrix. Alternatively, a key to a `metadata` property if it exists for each element in the `iterable`. If None, then default ids will be used. keys : iterable, optional An iterable of the same length as `iterable`. Each element will be used as the respective key. Returns ------- DissimilarityMatrix The `metric` applied to all pairwise elements in the `iterable`. Raises ------ ValueError If `key` and `keys` are both provided. """ iterable = list(iterable) if key is not None and keys is not None: raise ValueError("Cannot use both `key` and `keys` at the same" " time.") keys_ = None if key is not None: keys_ = [resolve_key(e, key) for e in iterable] elif keys is not None: keys_ = keys dm = np.empty((len(iterable),) * 2) for i, a in enumerate(iterable): for j, b in enumerate(iterable): dm[i, j] = metric(a, b) return cls(dm, keys_) @property def data(self): """Array of dissimilarities. A square, hollow, two-dimensional ``numpy.ndarray`` of dissimilarities (floats). A copy is *not* returned. Notes ----- This property is not writeable. """ return self._data @property def ids(self): """Tuple of object IDs. A tuple of strings, one for each object in the dissimilarity matrix. Notes ----- This property is writeable, but the number of new IDs must match the number of objects in `data`. """ return self._ids @ids.setter def ids(self, ids_): ids_ = tuple(ids_) self._validate_ids(self.data, ids_) self._ids = ids_ self._id_index = self._index_list(self._ids) @property def dtype(self): """Data type of the dissimilarities.""" return self.data.dtype @property def shape(self): """Two-element tuple containing the dissimilarity matrix dimensions. Notes ----- As the dissimilarity matrix is guaranteed to be square, both tuple entries will always be equal. """ return self.data.shape @property def size(self): """Total number of elements in the dissimilarity matrix. Notes ----- Equivalent to ``self.shape[0] * self.shape[1]``. """ return self.data.size @property def T(self): """Transpose of the dissimilarity matrix. See Also -------- transpose """ return self.transpose() def transpose(self): """Return the transpose of the dissimilarity matrix. Notes ----- A deep copy is returned. Returns ------- DissimilarityMatrix Transpose of the dissimilarity matrix. Will be the same type as `self`. """ # Note: Skip validation, since we assume self was already validated return self.__class__(self.data.T.copy(), deepcopy(self.ids), validate=False) def index(self, lookup_id): """Return the index of the specified ID. Parameters ---------- lookup_id : str ID whose index will be returned. Returns ------- int Row/column index of `lookup_id`. Raises ------ MissingIDError If `lookup_id` is not in the dissimilarity matrix. """ if lookup_id in self: return self._id_index[lookup_id] else: raise MissingIDError(lookup_id) def redundant_form(self): """Return an array of dissimilarities in redundant format. As this is the native format that the dissimilarities are stored in, this is simply an alias for `data`. Returns ------- ndarray Two-dimensional ``numpy.ndarray`` of dissimilarities in redundant format. Notes ----- Redundant format is described in [1]_. Does *not* return a copy of the data. References ---------- .. [1] http://docs.scipy.org/doc/scipy/reference/spatial.distance.html """ return self.data def copy(self): """Return a deep copy of the dissimilarity matrix. Returns ------- DissimilarityMatrix Deep copy of the dissimilarity matrix. Will be the same type as `self`. """ # We deepcopy IDs in case the tuple contains mutable objects at some # point in the future. # Note: Skip validation, since we assume self was already validated return self.__class__(self.data.copy(), deepcopy(self.ids), validate=False) def rename(self, mapper, strict=True): """Rename IDs in the dissimilarity matrix. Parameters ---------- mapper : dict or callable A dictionary or function that maps current IDs to new IDs. strict : bool, optional If ``True`` (default), every ID in the matrix must be included in ``mapper``. If ``False``, only the specified IDs will be renamed. Raises ------ ValueError If ``mapper`` does not contain all of the same IDs in the matrix whereas in strict mode. Examples -------- >>> from skbio import DistanceMatrix >>> dm = DistanceMatrix([[0, 1], [1, 0]], ids=['a', 'b']) >>> dm.rename({'a': 'x', 'b': 'y'}) >>> print(dm.ids) ('x', 'y') """ if isinstance(mapper, dict): if strict and not set(self.ids).issubset(mapper): raise ValueError( "The IDs in mapper do not include all IDs in the matrix." ) new_ids = [mapper.get(x, x) for x in self.ids] else: new_ids = [mapper(x) for x in self.ids] self.ids = new_ids def filter(self, ids, strict=True): """Filter the dissimilarity matrix by IDs. Parameters ---------- ids : iterable of str IDs to retain. May not contain duplicates or be empty. Each ID must be present in the dissimilarity matrix. strict : bool, optional If `strict` is ``True`` and an ID that is not found in the distance matrix is found in `ids`, a ``MissingIDError`` exception will be raised, otherwise the ID will be ignored. Returns ------- DissimilarityMatrix Filtered dissimilarity matrix containing only the IDs specified in `ids`. IDs will be in the same order as they appear in `ids`. Raises ------ MissingIDError If an ID in `ids` is not in the object's list of IDs. """ if tuple(self._ids) == tuple(ids): return self.__class__(self._data, self._ids) if strict: idxs = [self.index(id_) for id_ in ids] else: # get the indices to slice the inner numpy array idxs = [] # save the IDs that were found in the distance matrix found_ids = [] for id_ in ids: try: idxs.append(self.index(id_)) found_ids.append(id_) except MissingIDError: pass ids = found_ids # Note: Skip validation, since we assume self was already validated # But ids are new, so validate them explicitly filtered_data = distmat_reorder(self._data, idxs) self._validate_ids(filtered_data, ids) return self.__class__(filtered_data, ids, validate=False) def _stable_order(self, ids): """Obtain a stable ID order with respect to self. Parameters ---------- ids : Iterable of ids The IDs to establish a stable ordering for. Returns ------- np.array, dtype=int The corresponding index values """ id_order = sorted(self._id_index[i] for i in ids) return np.array(id_order, dtype=int) def within(self, ids): """Obtain all the distances among the set of IDs. Parameters ---------- ids : Iterable of str The IDs to obtain distances for. All pairs of distances are returned such that, if provided ['a', 'b', 'c'], the distances for [('a', 'a'), ('a', 'b'), ('a', 'c'), ('b', 'a'), ('b', 'b'), ('b', 'c'), ('c', 'a'), ('c', 'b'), ('c', 'c')] are gathered. Returns ------- pd.DataFrame (i, j, value) representing the source ID ("i"), the target ID ("j") and the distance ("value"). Raises ------ MissingIDError If an ID(s) specified is not in the dissimilarity matrix. Notes ----- Order of the return items is stable, meaning that requesting IDs ['a', 'b'] is equivalent to ['b', 'a']. The order is with respect to the order of the .ids attribute of self. Examples -------- >>> from skbio.stats.distance import DissimilarityMatrix >>> dm = DissimilarityMatrix([[0, 1, 2, 3, 4], [1, 0, 1, 2, 3], ... [2, 1, 0, 1, 2], [3, 2, 1, 0, 1], ... [4, 3, 2, 1, 0]], ... ['A', 'B', 'C', 'D', 'E']) >>> dm.within(['A', 'B', 'C']) i j value 0 A A 0.0 1 A B 1.0 2 A C 2.0 3 B A 1.0 4 B B 0.0 5 B C 1.0 6 C A 2.0 7 C B 1.0 8 C C 0.0 """ ids = set(ids) not_present = ids - set(self._id_index) if not_present: raise MissingIDError( "At least one ID (e.g., '%s') was not " "found." % not_present.pop() ) return self._subset_to_dataframe(ids, ids) def between(self, from_, to_, allow_overlap=False): """Obtain the distances between the two groups of IDs. Parameters ---------- from_ : Iterable of str The IDs to obtain distances from. Distances from all pairs of IDs in from and to will be obtained. to_ : Iterable of str The IDs to obtain distances to. Distances from all pairs of IDs in to and from will be obtained. allow_overlap : bool, optional If True, allow overlap in the IDs of from and to (which would in effect be collecting the within distances). Default is False. Returns ------- pd.DataFrame (i, j, value) representing the source ID ("i"), the target ID ("j") and the distance ("value"). Raises ------ MissingIDError If an ID(s) specified is not in the dissimilarity matrix. Notes ----- Order of the return items is stable, meaning that requesting IDs ['a', 'b'] is equivalent to ['b', 'a']. The order is with respect to the .ids attribute of self. Examples -------- >>> from skbio.stats.distance import DissimilarityMatrix >>> dm = DissimilarityMatrix([[0, 1, 2, 3, 4], [1, 0, 1, 2, 3], ... [2, 1, 0, 1, 2], [3, 2, 1, 0, 1], ... [4, 3, 2, 1, 0]], ... ['A', 'B', 'C', 'D', 'E']) >>> dm.between(['A', 'B'], ['C', 'D', 'E']) i j value 0 A C 2.0 1 A D 3.0 2 A E 4.0 3 B C 1.0 4 B D 2.0 5 B E 3.0 """ from_ = set(from_) to_ = set(to_) all_ids = from_ | to_ not_present = all_ids - set(self._id_index) if not_present: raise MissingIDError( "At least one ID (e.g., '%s') was not " "found." % not_present.pop() ) overlapping = from_ & to_ if not allow_overlap and overlapping: raise KeyError( "At least one ID overlaps in from_ and to_ " "(e.g., '%s'). This constraint can removed with " "allow_overlap=True." % overlapping.pop() ) return self._subset_to_dataframe(from_, to_) def _subset_to_dataframe(self, i_ids, j_ids): """Extract a subset of self and express as a DataFrame. Parameters ---------- i_ids : Iterable of str The "from" IDs. j_ids : Iterable of str The "to" IDs. Notes ----- ID membership is not tested by this private method, and it is assumed the caller has asserted the IDs are present. Returns ------- pd.DataFrame (i, j, value) representing the source ID ("i"), the target ID ("j") and the distance ("value"). """ i_indices = self._stable_order(i_ids) j_indices = self._stable_order(j_ids) j_length = len(j_indices) j_labels = tuple([self.ids[j] for j in j_indices]) i = [] j = [] # np.hstack([]) throws a ValueError. However, np.hstack([np.array([])]) # is valid and returns an empty array. Accordingly, an empty array is # included here so that np.hstack works in the event that either i_ids # or j_ids is empty. values = [np.array([])] for i_idx in i_indices: i.extend([self.ids[i_idx]] * j_length) j.extend(j_labels) subset = self._data[i_idx, j_indices] values.append(subset) i = pd.Series(i, name="i", dtype=str) j = pd.Series(j, name="j", dtype=str) values = pd.Series(np.hstack(values), name="value") return pd.concat([i, j, values], axis=1) def plot(self, cmap=None, title=""): """Create a heatmap of the dissimilarity matrix. Parameters ---------- cmap: str or matplotlib.colors.Colormap, optional Sets the color scheme of the heatmap If ``None``, defaults to the colormap specified in the matplotlib rc file. title: str, optional Sets the title label of the heatmap (Default is blank) Returns ------- matplotlib.figure.Figure Figure containing the heatmap and colorbar of the plotted dissimilarity matrix. Examples -------- .. plot:: Define a dissimilarity matrix with five objects labeled A-E: >>> from skbio.stats.distance import DissimilarityMatrix >>> dm = DissimilarityMatrix([[0, 1, 2, 3, 4], [1, 0, 1, 2, 3], ... [2, 1, 0, 1, 2], [3, 2, 1, 0, 1], ... [4, 3, 2, 1, 0]], ... ['A', 'B', 'C', 'D', 'E']) Plot the dissimilarity matrix as a heatmap: >>> fig = dm.plot(cmap='Reds', title='Example heatmap') # doctest: +SKIP """ self._get_mpl_plt() # based on http://stackoverflow.com/q/14391959/3776794 fig, ax = self.plt.subplots() # use pcolormesh instead of pcolor for performance heatmap = ax.pcolormesh(self.data, cmap=cmap) fig.colorbar(heatmap) # center labels within each cell ticks = np.arange(0.5, self.shape[0]) ax.set_xticks(ticks, minor=False) ax.set_yticks(ticks, minor=False) # Ensure there is no white border around the heatmap by manually # setting the limits ax.set_ylim(0, len(self.ids)) ax.set_xlim(0, len(self.ids)) # display data as it is stored in the dissimilarity matrix # (default is to have y-axis inverted) ax.invert_yaxis() ax.set_xticklabels(self.ids, rotation=90, minor=False) ax.set_yticklabels(self.ids, minor=False) ax.set_title(title) return fig def to_data_frame(self): """Create a ``pandas.DataFrame`` from this ``DissimilarityMatrix``. Returns ------- pd.DataFrame ``pd.DataFrame`` with IDs on index and columns. Examples -------- >>> from skbio import DistanceMatrix >>> dm = DistanceMatrix([[0, 1, 2], ... [1, 0, 3], ... [2, 3, 0]], ids=['a', 'b', 'c']) >>> df = dm.to_data_frame() >>> df a b c a 0.0 1.0 2.0 b 1.0 0.0 3.0 c 2.0 3.0 0.0 """ return pd.DataFrame(data=self.data, index=self.ids, columns=self.ids) def __str__(self): """Return a string representation of the dissimilarity matrix. Summary includes matrix dimensions, a (truncated) list of IDs, and (truncated) array of dissimilarities. Returns ------- str String representation of the dissimilarity matrix. """ return "%dx%d %s matrix\nIDs:\n%s\nData:\n" % ( self.shape[0], self.shape[1], self._matrix_element_name, _pprint_strs(self.ids), ) + str(self.data) def __eq__(self, other): """Compare this dissimilarity matrix to another for equality. Two dissimilarity matrices are equal if they have the same shape, IDs (in the same order!), and have data arrays that are equal. Checks are *not* performed to ensure that `other` is a `DissimilarityMatrix` instance. Parameters ---------- other : DissimilarityMatrix Dissimilarity matrix to compare to for equality. Returns ------- bool ``True`` if `self` is equal to `other`, ``False`` otherwise. """ equal = True # The order these checks are performed in is important to be as # efficient as possible. The check for shape equality is not strictly # necessary as it should be taken care of in np.array_equal, but I'd # rather explicitly bail before comparing IDs or data. Use array_equal # instead of (a == b).all() because of this issue: # http://stackoverflow.com/a/10582030 try: if self.shape != other.shape: equal = False elif self.ids != other.ids: equal = False elif not np.array_equal(self.data, other.data): equal = False except AttributeError: equal = False return equal def __ne__(self, other): """Determine whether two dissimilarity matrices are not equal. Parameters ---------- other : DissimilarityMatrix Dissimilarity matrix to compare to. Returns ------- bool ``True`` if `self` is not equal to `other`, ``False`` otherwise. See Also -------- __eq__ """ return not self == other def __contains__(self, lookup_id): """Check if the specified ID is in the dissimilarity matrix. Parameters ---------- lookup_id : str ID to search for. Returns ------- bool ``True`` if `lookup_id` is in the dissimilarity matrix, ``False`` otherwise. See Also -------- index """ return lookup_id in self._id_index def __getitem__(self, index): """Slice into dissimilarity data by object ID or numpy indexing. Extracts data from the dissimilarity matrix by object ID, a pair of IDs, or numpy indexing/slicing. Parameters ---------- index : str, two-tuple of str, or numpy index `index` can be one of the following forms: an ID, a pair of IDs, or a numpy index. If `index` is a string, it is assumed to be an ID and a ``numpy.ndarray`` row vector is returned for the corresponding ID. Note that the ID's row of dissimilarities is returned, *not* its column. If the matrix is symmetric, the two will be identical, but this makes a difference if the matrix is asymmetric. If `index` is a two-tuple of strings, each string is assumed to be an ID and the corresponding matrix element is returned that represents the dissimilarity between the two IDs. Note that the order of lookup by ID pair matters if the matrix is asymmetric: the first ID will be used to look up the row, and the second ID will be used to look up the column. Thus, ``dm['a', 'b']`` may not be the same as ``dm['b', 'a']`` if the matrix is asymmetric. Otherwise, `index` will be passed through to ``DissimilarityMatrix.data.__getitem__``, allowing for standard indexing of a ``numpy.ndarray`` (e.g., slicing). Returns ------- ndarray or scalar Indexed data, where return type depends on the form of `index` (see description of `index` for more details). Raises ------ MissingIDError If the ID(s) specified in `index` are not in the dissimilarity matrix. Notes ----- The lookup based on ID(s) is quick. """ if isinstance(index, str): return self.data[self.index(index)] elif self._is_id_pair(index): return self.data[self.index(index[0]), self.index(index[1])] else: return self.data.__getitem__(index) def _validate_ids(self, data, ids): """Validate the IDs. Checks that IDs are unique and that the number of IDs matches the number of rows/cols in the data array. Subclasses can override this method to perform different/more specific validation. Notes ----- Accepts arguments instead of inspecting instance attributes to avoid creating an invalid dissimilarity matrix before raising an error. Otherwise, the invalid dissimilarity matrix could be used after the exception is caught and handled. """ duplicates = find_duplicates(ids) if duplicates: formatted_duplicates = ", ".join(repr(e) for e in duplicates) raise DissimilarityMatrixError( "IDs must be unique. Found the " "following duplicate IDs: %s" % formatted_duplicates ) if 0 == len(ids): raise DissimilarityMatrixError("IDs must be at least 1 in " "size.") if len(ids) != data.shape[0]: raise DissimilarityMatrixError( "The number of IDs (%d) must match " "the number of rows/columns in the " "data (%d)." % (len(ids), data.shape[0]) ) def _validate_shape(self, data): """Validate the data array shape. Checks that the data is at least 1x1 in size, 2D, square, and contains only floats. Notes ----- Accepts arguments instead of inspecting instance attributes to avoid creating an invalid dissimilarity matrix before raising an error. Otherwise, the invalid dissimilarity matrix could be used after the exception is caught and handled. """ if 0 in data.shape: raise DissimilarityMatrixError("Data must be at least 1x1 in " "size.") if len(data.shape) != 2: raise DissimilarityMatrixError("Data must have exactly two " "dimensions.") if data.shape[0] != data.shape[1]: raise DissimilarityMatrixError( "Data must be square (i.e., have " "the same number of rows and " "columns)." ) if data.dtype not in (np.float32, np.float64): raise DissimilarityMatrixError( "Data must contain only floating " "point values." ) def _validate(self, data, ids): """Validate the data array and IDs. Checks that the data is at least 1x1 in size, 2D, square, and contains only floats. Also checks that IDs are unique and that the number of IDs matches the number of rows/cols in the data array. Subclasses can override this method to perform different/more specific validation (e.g., see `DistanceMatrix`). Notes ----- Accepts arguments instead of inspecting instance attributes to avoid creating an invalid dissimilarity matrix before raising an error. Otherwise, the invalid dissimilarity matrix could be used after the exception is caught and handled. """ self._validate_shape(data) self._validate_ids(data, ids) def _index_list(self, list_): return {id_: idx for idx, id_ in enumerate(list_)} def _is_id_pair(self, index): return ( isinstance(index, tuple) and len(index) == 2 and all(map(lambda e: isinstance(e, str), index)) ) class DistanceMatrix(DissimilarityMatrix): """Store distances between objects. A `DistanceMatrix` is a `DissimilarityMatrix` with the additional requirement that the matrix data is symmetric. There are additional methods made available that take advantage of this symmetry. See Also -------- DissimilarityMatrix Notes ----- The distances are stored in redundant (square-form) format [1]_. To facilitate use with other scientific Python routines (e.g., scipy), the distances can be retrieved in condensed (vector-form) format using `condensed_form`. `DistanceMatrix` only requires that the distances it stores are symmetric. Checks are *not* performed to ensure the other three metric properties hold (non-negativity, identity of indiscernibles, and triangle inequality) [2]_. Thus, a `DistanceMatrix` instance can store distances that are not metric. References ---------- .. [1] http://docs.scipy.org/doc/scipy/reference/spatial.distance.html .. [2] http://planetmath.org/metricspace """ # Override here, used in superclass __str__ _matrix_element_name = "distance" @classonlymethod def from_iterable(cls, iterable, metric, key=None, keys=None, validate=True): """Create DistanceMatrix from all pairs in an iterable given a metric. Parameters ---------- iterable : iterable Iterable containing objects to compute pairwise distances on. metric : callable A function that takes two arguments and returns a float representing the distance between the two arguments. key : callable or metadata key, optional A function that takes one argument and returns a string representing the id of the element in the distance matrix. Alternatively, a key to a `metadata` property if it exists for each element in the `iterable`. If None, then default ids will be used. keys : iterable, optional An iterable of the same length as `iterable`. Each element will be used as the respective key. validate : boolean, optional If ``True``, all pairwise distances are computed, including upper and lower triangles and the diagonal, and the resulting matrix is validated for symmetry and hollowness. If ``False``, `metric` is assumed to be hollow and symmetric and only the lower triangle (excluding the diagonal) is computed. Pass ``validate=False`` if you are sure `metric` is hollow and symmetric for improved performance. Returns ------- DistanceMatrix The `metric` applied to pairwise elements in the `iterable`. Raises ------ ValueError If `key` and `keys` are both provided. """ if validate: return super(DistanceMatrix, cls).from_iterable(iterable, metric, key, keys) iterable = list(iterable) if key is not None and keys is not None: raise ValueError("Cannot use both `key` and `keys` at the same" " time.") keys_ = None if key is not None: keys_ = [resolve_key(e, key) for e in iterable] elif keys is not None: keys_ = keys dm = np.zeros((len(iterable),) * 2) for i, a in enumerate(iterable): for j, b in enumerate(iterable[:i]): dm[i, j] = dm[j, i] = metric(a, b) return cls(dm, keys_) def condensed_form(self): """Return an array of distances in condensed format. Returns ------- ndarray One-dimensional ``numpy.ndarray`` of distances in condensed format. Notes ----- Condensed format is described in [1]_. The conversion is not a constant-time operation, though it should be relatively quick to perform. References ---------- .. [1] http://docs.scipy.org/doc/scipy/reference/spatial.distance.html """ return squareform(self._data, force="tovector", checks=False) def permute(self, condensed=False): """Randomly permute both rows and columns in the matrix. Randomly permutes the ordering of rows and columns in the matrix. The same permutation is applied to both rows and columns in order to maintain symmetry and hollowness. Only the rows/columns in the distance matrix are permuted; the IDs are *not* permuted. Parameters ---------- condensed : bool, optional If ``True``, return the permuted distance matrix in condensed format. Otherwise, return the permuted distance matrix as a new ``DistanceMatrix`` instance. Returns ------- DistanceMatrix or ndarray Permuted distances as a new ``DistanceMatrix`` or as a ``ndarray`` in condensed format. See Also -------- condensed_form Notes ----- This method does not modify the distance matrix that it is called on. It is more efficient to pass ``condensed=True`` than permuting the distance matrix and then converting to condensed format. """ order = np.random.permutation(self.shape[0]) if condensed: permuted_condensed = distmat_reorder_condensed(self._data, order) return permuted_condensed else: # Note: Skip validation, since we assume self was already validated permuted = distmat_reorder(self._data, order) return self.__class__(permuted, self.ids, validate=False) def _validate(self, data, ids): """Validate the data array and IDs. Overrides the superclass `_validate`. Performs a check for symmetry in addition to the checks performed in the superclass. """ super(DistanceMatrix, self)._validate(data, ids) data_sym, data_hol = is_symmetric_and_hollow(data) if not data_sym: raise DistanceMatrixError("Data must be symmetric and cannot contain NaNs.") if not data_hol: raise DistanceMatrixError( "Data must be hollow (i.e., the diagonal" " can only contain zeros)." ) def to_series(self): """Create a ``pandas.Series`` from this ``DistanceMatrix``. The series will contain distances in condensed form: only distances from one matrix triangle are included, and the diagonal is excluded. The series' index will be a ``pd.MultiIndex`` relating pairs of IDs to distances. The pairs of IDs will be in row-major order with respect to the upper matrix triangle. To obtain all distances (i.e. both upper and lower matrix triangles and the diagonal), use ``DistanceMatrix.to_data_frame``. To obtain *only* the distances in condensed form (e.g. for use with SciPy), use ``DistanceMatrix.condensed_form``. Returns ------- pd.Series ``pd.Series`` with pairs of IDs on the index. See Also -------- to_data_frame condensed_form scipy.spatial.distance.squareform Examples -------- >>> from skbio import DistanceMatrix >>> dm = DistanceMatrix([[0, 1, 2, 3], ... [1, 0, 4, 5], ... [2, 4, 0, 6], ... [3, 5, 6, 0]], ids=['a', 'b', 'c', 'd']) >>> dm.to_series() a b 1.0 c 2.0 d 3.0 b c 4.0 d 5.0 c d 6.0 dtype: float64 """ distances = self.condensed_form() # `id_pairs` will not be interpreted as a `pd.MultiIndex` if it is an # iterable returned by `itertools.combinations`. id_pairs = list(itertools.combinations(self.ids, 2)) index = pd.Index(id_pairs, tupleize_cols=True) return pd.Series(data=distances, index=index, dtype=float) def randdm(num_objects, ids=None, constructor=None, random_fn=None): """Generate a distance matrix populated with random distances. Using the default `random_fn`, distances are randomly drawn from a uniform distribution over ``[0, 1)``. Regardless of `random_fn`, the resulting distance matrix is guaranteed to be symmetric and hollow. Parameters ---------- num_objects : int The number of objects in the resulting distance matrix. For example, if `num_objects` is 3, a 3x3 distance matrix will be returned. ids : sequence of str or None, optional A sequence of strings to be used as IDs. ``len(ids)`` must be equal to `num_objects`. If not provided, IDs will be monotonically-increasing integers cast as strings (numbering starts at 1). For example, ``('1', '2', '3')``. constructor : type, optional `DissimilarityMatrix` or subclass constructor to use when creating the random distance matrix. The returned distance matrix will be of this type. If ``None`` (the default), a `DistanceMatrix` instance will be returned. random_fn : function, optional Function to generate random values. `random_fn` must accept two arguments (number of rows and number of columns) and return a 2D ``numpy.ndarray`` of floats (or something that can be cast to float). If ``None`` (the default), ``numpy.random.rand`` will be used. Returns ------- DissimilarityMatrix `DissimilarityMatrix` (or subclass) instance of random distances. Type depends on `constructor`. See Also -------- numpy.random.rand """ if constructor is None: constructor = DistanceMatrix if random_fn is None: random_fn = np.random.rand data = np.tril(random_fn(num_objects, num_objects), -1) data += data.T if not ids: ids = map(str, range(1, num_objects + 1)) return constructor(data, ids) # helper functions for anosim and permanova def _preprocess_input_sng(ids, sample_size, grouping, column): """Compute intermediate results not affected by permutations. These intermediate results can be computed a single time for efficiency, regardless of grouping vector permutations (i.e., when calculating the p-value). These intermediate results are used by both ANOSIM and PERMANOVA. Also validates and normalizes input (e.g., converting ``DataFrame`` column into grouping vector). """ if isinstance(grouping, pd.DataFrame): if column is None: raise ValueError("Must provide a column name if supplying a DataFrame.") else: grouping = _df_to_vector(ids, grouping, column) elif isinstance(grouping, pd.Series): if (column is not None) and (column != grouping.name): raise ValueError( "Column name does not match your Series name. Try not" " providing column at all." ) else: grouping = _df_to_vector(ids, grouping.to_frame(), column=grouping.name) elif column is not None: raise ValueError("Must provide a DataFrame if supplying a column name.") if len(grouping) != sample_size: raise ValueError( "Grouping vector size must match the number of IDs in the " "distance matrix." ) # Find the group labels and convert grouping to an integer vector # (factor). groups, grouping = np.unique(grouping, return_inverse=True) num_groups = len(groups) if num_groups == len(grouping): raise ValueError( "All values in the grouping vector are unique. This method cannot " "operate on a grouping vector with only unique values (e.g., " "there are no 'within' distances because each group of objects " "contains only a single object)." ) if num_groups == 1: raise ValueError( "All values in the grouping vector are the same. This method " "cannot operate on a grouping vector with only a single group of " "objects (e.g., there are no 'between' distances because there is " "only a single group)." ) return num_groups, grouping def _preprocess_input(distance_matrix, grouping, column): """Compute intermediate results not affected by permutations. These intermediate results can be computed a single time for efficiency, regardless of grouping vector permutations (i.e., when calculating the p-value). These intermediate results are used by both ANOSIM and PERMANOVA. Also validates and normalizes input (e.g., converting ``DataFrame`` column into grouping vector). """ if not isinstance(distance_matrix, DistanceMatrix): raise TypeError("Input must be a DistanceMatrix.") sample_size = distance_matrix.shape[0] num_groups, grouping = _preprocess_input_sng( distance_matrix.ids, sample_size, grouping, column ) tri_idxs = np.triu_indices(sample_size, k=1) distances = distance_matrix.condensed_form() return sample_size, num_groups, grouping, tri_idxs, distances def _df_to_vector(ids, df, column): """Return a grouping vector from a ``DataFrame`` column. Parameters ---------- ids : liat IDs that will be mapped to group labels. df : pandas.DataFrame ``DataFrame`` (indexed by distance matrix ID). column : str Column name in `df` containing group labels. Returns ------- list Grouping vector (vector of labels) based on the IDs in `ids`. Each ID's label is looked up in the ``DataFrame`` under the column specified by `column`. Raises ------ ValueError If `column` is not in the ``DataFrame``, or a distance matrix ID is not in the ``DataFrame``. """ if column not in df: raise ValueError("Column '%s' not in DataFrame." % column) grouping = df.reindex(ids, axis=0).loc[:, column] if grouping.isnull().any(): raise ValueError( "One or more IDs in the distance matrix are not in the data " "frame." ) return grouping.tolist() def _run_monte_carlo_stats(test_stat_function, grouping, permutations): """Run stat test and compute significance with Monte Carlo permutations.""" if permutations < 0: raise ValueError( "Number of permutations must be greater than or equal to zero." ) stat = test_stat_function(grouping) p_value = np.nan if permutations > 0: perm_stats = np.empty(permutations, dtype=np.float64) for i in range(permutations): perm_grouping = np.random.permutation(grouping) perm_stats[i] = test_stat_function(perm_grouping) p_value = ((perm_stats >= stat).sum() + 1) / (permutations + 1) return stat, p_value def _build_results( method_name, test_stat_name, sample_size, num_groups, stat, p_value, permutations ): """Return ``pandas.Series`` containing results of statistical test.""" return pd.Series( data=[ method_name, test_stat_name, sample_size, num_groups, stat, p_value, permutations, ], index=[ "method name", "test statistic name", "sample size", "number of groups", "test statistic", "p-value", "number of permutations", ], name="%s results" % method_name, ) scikit-bio-0.6.2/skbio/stats/distance/_bioenv.py000066400000000000000000000221661464262511300216210ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from itertools import combinations import numpy as np import pandas as pd from scipy.spatial.distance import pdist from scipy.stats import spearmanr from skbio.stats.distance import DistanceMatrix def bioenv(distance_matrix, data_frame, columns=None): r"""Find subset of variables maximally correlated with distances. Finds subsets of variables whose Euclidean distances (after scaling the variables; see Notes section below for details) are maximally rank-correlated with the distance matrix. For example, the distance matrix might contain distances between communities, and the variables might be numeric environmental variables (e.g., pH). Correlation between the community distance matrix and Euclidean environmental distance matrix is computed using Spearman's rank correlation coefficient (:math:`\\rho`). Subsets of environmental variables range in size from 1 to the total number of variables (inclusive). For example, if there are 3 variables, the "best" variable subsets will be computed for subset sizes 1, 2, and 3. The "best" subset is chosen by computing the correlation between the community distance matrix and all possible Euclidean environmental distance matrices at the given subset size. The combination of environmental variables with maximum correlation is chosen as the "best" subset. Parameters ---------- distance_matrix : DistanceMatrix Distance matrix containing distances between objects (e.g., distances between samples of microbial communities). data_frame : pandas.DataFrame Contains columns of variables (e.g., numeric environmental variables such as pH) associated with the objects in `distance_matrix`. Must be indexed by the IDs in `distance_matrix` (i.e., the row labels must be distance matrix IDs), but the order of IDs between `distance_matrix` and `data_frame` need not be the same. All IDs in the distance matrix must be present in `data_frame`. Extra IDs in `data_frame` are allowed (they are ignored in the calculations). columns : iterable of strs, optional Column names in `data_frame` to include as variables in the calculations. If not provided, defaults to all columns in `data_frame`. The values in each column must be numeric or convertible to a numeric type. Returns ------- pandas.DataFrame Data frame containing the "best" subset of variables at each subset size, as well as the correlation coefficient of each. Raises ------ TypeError If invalid input types are provided, or if one or more specified columns in `data_frame` are not numeric. ValueError If column name(s) or `distance_matrix` IDs cannot be found in `data_frame`, if there is missing data (``NaN``) in the environmental variables, or if the environmental variables cannot be scaled (e.g., due to zero variance). See Also -------- scipy.stats.spearmanr Notes ----- See [1]_ for the original method reference (originally called BIO-ENV). The general algorithm and interface are similar to ``vegan::bioenv``, available in R's vegan package [2]_. This method can also be found in PRIMER-E [3]_ (originally called BIO-ENV, but is now called BEST). .. warning:: This method can take a *long* time to run if a large number of variables are specified, as all possible subsets are evaluated at each subset size. The variables are scaled before computing the Euclidean distance: each column is centered and then scaled by its standard deviation. References ---------- .. [1] Clarke, K. R & Ainsworth, M. 1993. "A method of linking multivariate community structure to environmental variables". Marine Ecology Progress Series, 92, 205-219. .. [2] http://cran.r-project.org/web/packages/vegan/index.html .. [3] http://www.primer-e.com/primer.htm Examples -------- Import the functionality we'll use in the following examples: >>> import pandas as pd >>> from skbio import DistanceMatrix >>> from skbio.stats.distance import bioenv Load a 4x4 community distance matrix: >>> dm = DistanceMatrix([[0.0, 0.5, 0.25, 0.75], ... [0.5, 0.0, 0.1, 0.42], ... [0.25, 0.1, 0.0, 0.33], ... [0.75, 0.42, 0.33, 0.0]], ... ['A', 'B', 'C', 'D']) Load a ``pandas.DataFrame`` with two environmental variables, pH and elevation: >>> df = pd.DataFrame([[7.0, 400], ... [8.0, 530], ... [7.5, 450], ... [8.5, 810]], ... index=['A','B','C','D'], ... columns=['pH', 'Elevation']) Note that the data frame is indexed with the same IDs (``'A'``, ``'B'``, ``'C'``, and ``'D'``) that are in the distance matrix. This is necessary in order to link the environmental variables (metadata) to each of the objects in the distance matrix. In this example, the IDs appear in the same order in both the distance matrix and data frame, but this is not necessary. Find the best subsets of environmental variables that are correlated with community distances: >>> bioenv(dm, df) # doctest: +NORMALIZE_WHITESPACE size correlation vars pH 1 0.771517 pH, Elevation 2 0.714286 We see that in this simple example, pH alone is maximally rank-correlated with the community distances (:math:`\rho=0.771517`). """ if not isinstance(distance_matrix, DistanceMatrix): raise TypeError("Must provide a DistanceMatrix as input.") if not isinstance(data_frame, pd.DataFrame): raise TypeError("Must provide a pandas.DataFrame as input.") if columns is None: columns = data_frame.columns.values.tolist() if len(set(columns)) != len(columns): raise ValueError("Duplicate column names are not supported.") if len(columns) < 1: raise ValueError("Must provide at least one column.") for column in columns: if column not in data_frame: raise ValueError("Column '%s' not in data frame." % column) # Subset and order the vars data frame to match the IDs in the distance # matrix, only keeping the specified columns. vars_df = data_frame.reindex(distance_matrix.ids, axis=0).loc[:, columns] if vars_df.isnull().any().any(): raise ValueError( "One or more IDs in the distance matrix are not " "in the data frame, or there is missing data in the " "data frame." ) try: vars_df = vars_df.astype(float) except ValueError: raise TypeError("All specified columns in the data frame must be " "numeric.") # Scale the vars and extract the underlying numpy array from the data # frame. We mainly do this for performance as we'll be taking subsets of # columns within a tight loop and using a numpy array ends up being ~2x # faster. vars_array = _scale(vars_df).values dm_flat = distance_matrix.condensed_form() num_vars = len(columns) var_idxs = np.arange(num_vars) # For each subset size, store the best combination of variables: # (string identifying best vars, subset size, rho) max_rhos = np.empty( num_vars, dtype=[("vars", object), ("size", int), ("correlation", float)] ) for subset_size in range(1, num_vars + 1): max_rho = None for subset_idxs in combinations(var_idxs, subset_size): # Compute Euclidean distances using the current subset of # variables. pdist returns the distances in condensed form. vars_dm_flat = pdist(vars_array[:, subset_idxs], metric="euclidean") rho = spearmanr(dm_flat, vars_dm_flat)[0] # If there are ties for the best rho at a given subset size, choose # the first one in order to match vegan::bioenv's behavior. if max_rho is None or rho > max_rho[0]: max_rho = (rho, subset_idxs) vars_label = ", ".join([columns[i] for i in max_rho[1]]) max_rhos[subset_size - 1] = (vars_label, subset_size, max_rho[0]) return pd.DataFrame.from_records(max_rhos, index="vars") def _scale(df): """Center and scale each column in a data frame. Each column is centered (by subtracting the mean) and then scaled by its standard deviation. """ # Modified from http://stackoverflow.com/a/18005745 df = df.copy() df -= df.mean() df /= df.std() if df.isnull().any().any(): raise ValueError( "Column(s) in the data frame could not be scaled, " "likely because the column(s) had no variance." ) return df scikit-bio-0.6.2/skbio/stats/distance/_cutils.pyx000066400000000000000000000302361464262511300220270ustar00rootroot00000000000000# ----------------------------------------------------------------------------- # Copyright (c) 2021-2021, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ----------------------------------------------------------------------------- import numpy as np cimport numpy as cnp cnp.import_array() cimport cython from cython.parallel import prange from libc.math cimport sqrt, fabs ctypedef cnp.npy_intp intp_t ctypedef fused TReal: float double ctypedef fused floating: cnp.float64_t cnp.float32_t ctypedef cnp.float64_t float64_t ctypedef cnp.float32_t float32_t @cython.boundscheck(False) @cython.wraparound(False) def is_symmetric_and_hollow_cy(TReal[:, ::1] mat): """ Check if mat is symmetric and hollow. Equivalent to [not (mat.T != mat).any(), np.trace(mat) == 0] Parameters ---------- mat : 2D array_like Distance matrix. Result: ------- is_symmetric: Boolean not (mat.T != mat).any() is_hollow: Boolean np.trace(mat) == 0 """ cdef Py_ssize_t in_n = mat.shape[0] cdef Py_ssize_t in2 = mat.shape[1] assert in_n == in2 cdef Py_ssize_t trow,tcol cdef Py_ssize_t trow_max,tcol_max cdef Py_ssize_t row,col cdef TReal testval # use int instead of bool for portability cdef int is_sym = True cdef int is_hollow = True # use a tiled approach to maximize memory locality for trow in prange(0, in_n, 24, nogil=True): trow_max = min(trow+24, in_n) for tcol in range(0, in_n, 24): tcol_max = min(tcol+24, in_n) for row in range(trow, trow_max, 1): for col in range(tcol, tcol_max, 1): is_sym &= (mat[row,col]==mat[col,row]) if (trow==tcol): # diagonal block, only ones that can have col==row for col in range(tcol, tcol_max, 1): is_hollow &= (mat[col,col]==0) return [(is_sym==True), (is_hollow==True)] @cython.boundscheck(False) @cython.wraparound(False) def distmat_reorder_cy(TReal[:, ::1] in_mat, intp_t[::1] reorder_vec, TReal[:, ::1] out_mat): """ Reorder the rows and columns of a distance matrix given a reorder vector. Not all of the columns need to be used. For example: [ [0, 1, 2, 3] , [1, 0, 4, 5] , [2, 4, 0, 6] , [3, 5, 6, 0] ] with [1,0,3,2] will result in [ [0, 1, 5, 4] , [1, 0, 3, 2] , [5, 3, 0, 6] , [4, 2, 6, 0] ] Note: No error checking is performed. The caller must ensure that all values in reorder_vec are valid Parameters ---------- in_mat : 2D array_like Distance matrix. reorder_vec : 1D_array_like List of permutation indexes out_mat : 2D array_like Output, Distance matrix, must be same size as reorder_vec """ cdef Py_ssize_t in_n = in_mat.shape[0] cdef Py_ssize_t in2 = in_mat.shape[1] cdef Py_ssize_t out_n = reorder_vec.shape[0] cdef Py_ssize_t on2 = out_mat.shape[0] cdef Py_ssize_t on3 = out_mat.shape[1] assert in_n == in2 assert out_n == on2 assert out_n == on3 cdef Py_ssize_t row,col cdef Py_ssize_t vrow for row in prange(out_n, nogil=True): vrow = reorder_vec[row] for col in range(out_n): out_mat[row,col] = in_mat[vrow, reorder_vec[col]] @cython.boundscheck(False) @cython.wraparound(False) def distmat_reorder_condensed_cy(TReal[:, ::1] in_mat, intp_t[::1] reorder_vec, TReal[::1] out_mat_condensed): """ Reorder the rows and columns of a distance matrix given a reorder vector. Not all of the columns need to be used. For example: [ [0, 1, 2, 3] , [1, 0, 4, 5] , [2, 4, 0, 6] , [3, 5, 6, 0] ] with [1,0,3,2] will result in [ 1, 5, 4, 3, 2, 6 ] Note: No error checking is performed. The caller must ensure that all values in reorder_vec are valid Parameters ---------- in_mat : 2D array_like Distance matrix. reorder_vec : 1D_array_like List of permutation indexes out_mat_condensed : 1D array_like Output, condensed distance matrix """ cdef Py_ssize_t in_n = in_mat.shape[0] cdef Py_ssize_t in2 = in_mat.shape[1] cdef Py_ssize_t out_n = reorder_vec.shape[0] cdef Py_ssize_t on2 = out_mat_condensed.shape[0] assert in_n == in2 assert on2 == ((out_n-1)*out_n)/2 cdef Py_ssize_t row,col cdef Py_ssize_t vrow cdef Py_ssize_t idx for row in prange(out_n-1, nogil=True): vrow = reorder_vec[row] idx = row*(out_n-1) - ((row-1)*row)//2 for col in range(out_n-row-1): out_mat_condensed[idx+col] = in_mat[vrow, reorder_vec[col+row+1]] @cython.boundscheck(False) @cython.wraparound(False) def mantel_perm_pearsonr_cy(TReal[:, ::1] x_data, intp_t[:, ::1] perm_order, TReal xmean, TReal normxm, TReal[::1] ym_normalized, TReal[::1] permuted_stats): """ Fused permute, fma, pearsonr for mantel. Replaces the following python code: def _mantel_perm_pearsonr_one(x_flat, xmean, normxm, ym_normalized): # inline pearsonr, condensed from scipy.stats.pearsonr # and reusing some of the known values xm_normalized = (x_flat - xmean)/normxm one_stat = np.dot(xm_normalized, ym_normalized) one_stat = max(min(one_stat, 1.0), -1.0) return one_stat perm_gen = (_mantel_perm_pearsonr_one(distmat_reorder_condensed(x._data, perm_order[p,:]), xmean, normxm, ym_normalized) for p in range(permutations)) permuted_stats = np.fromiter(perm_gen, np.float, count=permutations) Parameters ---------- x_data : 2D array_like Distance matrix. perm_order : 2D array_like List of permutation orders. xmean: real Mean value of condensed x_data normxm: real Norm of pre-processed xm ym_normalized : 1D_array_like Normalized condensed y_data permuted_stats : 1D array_like Output, Pearson stats """ cdef Py_ssize_t in_n = x_data.shape[0] cdef Py_ssize_t in2 = x_data.shape[1] cdef Py_ssize_t perms_n = perm_order.shape[0] cdef Py_ssize_t out_n = perm_order.shape[1] cdef Py_ssize_t y_n = ym_normalized.shape[0] cdef Py_ssize_t on2 = permuted_stats.shape[0] assert in_n == in2 assert y_n == ((out_n-1)*out_n)/2 assert perms_n == on2 cdef Py_ssize_t p cdef Py_ssize_t row,col,icol cdef Py_ssize_t vrow cdef Py_ssize_t idx cdef TReal mul = 1.0/normxm cdef TReal add = -xmean/normxm cdef TReal my_ps cdef TReal yval cdef TReal xval for p in prange(perms_n, nogil=True): my_ps = 0.0 for row in range(out_n-1): vrow = perm_order[p, row] idx = row*(out_n-1) - ((row-1)*row)//2 for icol in range(out_n-row-1): col = icol+row+1 yval = ym_normalized[idx+icol] xval = x_data[vrow, perm_order[p, col]]*mul + add # do not use += to avoid having prange consider it for reduction my_ps = yval*xval + my_ps # Presumably, if abs(one_stat) > 1, then it is only some small artifact of # floating point arithmetic. if my_ps>1.0: my_ps = 1.0 elif my_ps<-1.0: my_ps = -1.0 permuted_stats[p] = my_ps @cython.boundscheck(False) @cython.wraparound(False) def permanova_f_stat_sW_cy(TReal[:, ::1] distance_matrix, Py_ssize_t[::1] group_sizes, Py_ssize_t[::1] grouping): """Compute PERMANOVA pseudo-F partial statistic.""" cdef Py_ssize_t in_n = distance_matrix.shape[0] cdef Py_ssize_t in2 = distance_matrix.shape[1] cdef Py_ssize_t in3 = grouping.shape[0] assert in_n == in2 assert in_n == in3 cdef double s_W = 0.0 cdef Py_ssize_t group_idx cdef double local_s_W cdef double val cdef Py_ssize_t row, col, rowi, coli cdef Py_ssize_t in_n_2 = in_n//2 for rowi in prange(in_n_2, nogil=True): # since columns get shorter, combine first and last row=rowi local_s_W = 0.0 group_idx = grouping[row] for coli in range(in_n-row-1): col = coli+row+1 if grouping[col] == group_idx: val = distance_matrix[row,col] local_s_W = local_s_W + val * val s_W += local_s_W/group_sizes[group_idx] row = in_n-rowi-2 if row!=rowi: # don't double count local_s_W = 0.0 group_idx = grouping[row] for coli in range(in_n-row-1): col = coli+row+1 if grouping[col] == group_idx: val = distance_matrix[row,col] local_s_W = local_s_W + val * val s_W += local_s_W/group_sizes[group_idx] return s_W @cython.boundscheck(False) @cython.wraparound(False) def geomedian_axis_one(floating[:, :] X, floating eps=1e-7, size_t maxiters=500): """Compute high dimensional median. This function, its helpers (dist_euclidean, norm_euclidean, sum), and necessary type definitions (floating) were ported from hdmedians v0.14.2. The only change was changing "cdef" to "def" on the line defining the function. See https://github.com/daleroberts/hdmedians for more information. """ cdef size_t p = X.shape[0] cdef size_t n = X.shape[1] cdef floating[:] y = np.mean(X, axis=1) if n == 1: return y if floating is cnp.float32_t: dtype = np.float32 else: dtype = np.float64 cdef floating[:] D = np.empty(n, dtype=dtype) cdef floating[:] Dinv = np.empty(n, dtype=dtype) cdef floating[:] W = np.empty(n, dtype=dtype) cdef floating[:] T = np.empty(p, dtype=dtype) cdef floating[:] y1 = np.empty(p, dtype=dtype) cdef floating[:] R = np.empty(p, dtype=dtype) cdef floating dist, Dinvs, total, r, rinv, tmp, Di cdef size_t nzeros = n cdef size_t iteration with nogil: iteration = 0 while iteration < maxiters: for i in range(n): Di = _dist_euclidean(X[:, i], y) D[i] = Di if fabs(Di) > eps: Dinv[i] = 1. / Di else: Dinv[i] = 0. Dinvs = _sum(Dinv) for i in range(n): W[i] = Dinv[i] / Dinvs for j in range(p): total = 0. for i in range(n): if fabs(D[i]) > eps: total += W[i] * X[j, i] T[j] = total nzeros = n for i in range(n): if fabs(D[i]) > eps: nzeros -= 1 if nzeros == 0: y1 = T elif nzeros == n: break else: for j in range(p): R[j] = (T[j] - y[j]) * Dinvs r = _norm_euclidean(R) if r > eps: rinv = nzeros/r else: rinv = 0. for j in range(p): y1[j] = max(0, 1-rinv)*T[j] + min(1, rinv)*y[j] dist = _dist_euclidean(y, y1) if dist < eps: break y[:] = y1 iteration = iteration + 1 return y cdef floating _dist_euclidean(floating[:] x, floating[:] y) nogil: cdef size_t n = x.shape[0] cdef float64_t d = 0. cdef float64_t tmp for i in range(n): tmp = x[i] - y[i] d += tmp * tmp return sqrt(d) cdef floating _norm_euclidean(floating[:] x) nogil: cdef size_t n = x.shape[0] cdef float64_t d = 0. for i in range(n): d += x[i] * x[i] return sqrt(d) cdef floating _sum(floating[:] x) nogil: cdef size_t n = x.shape[0] cdef float64_t total = 0. for i in range(n): total += x[i] return total scikit-bio-0.6.2/skbio/stats/distance/_mantel.py000066400000000000000000000625461464262511300216250ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from itertools import combinations import warnings import numpy as np import pandas as pd import scipy.special from scipy.stats import kendalltau from scipy.stats import ConstantInputWarning from scipy.stats import NearConstantInputWarning from skbio.stats.distance import DistanceMatrix from ._cutils import mantel_perm_pearsonr_cy def mantel( x, y, method="pearson", permutations=999, alternative="two-sided", strict=True, lookup=None, ): r"""Compute correlation between distance matrices using the Mantel test. The Mantel test compares two distance matrices by computing the correlation between the distances in the lower (or upper) triangular portions of the symmetric distance matrices. Correlation can be computed using Pearson's product-moment correlation coefficient or Spearman's rank correlation coefficient. As defined in [1]_, the Mantel test computes a test statistic :math:`r_M` given two symmetric distance matrices :math:`D_X` and :math:`D_Y`. :math:`r_M` is defined as .. math:: r_M=\frac{1}{d-1}\sum_{i=1}^{n-1}\sum_{j=i+1}^{n} stand(D_X)_{ij}stand(D_Y)_{ij} where .. math:: d=\frac{n(n-1)}{2} and :math:`n` is the number of rows/columns in each of the distance matrices. :math:`stand(D_X)` and :math:`stand(D_Y)` are distance matrices with their upper triangles containing standardized distances. Note that since :math:`D_X` and :math:`D_Y` are symmetric, the lower triangular portions of the matrices could equivalently have been used instead of the upper triangular portions (the current function behaves in this manner). If ``method='spearman'``, the above equation operates on ranked distances instead of the original distances. Statistical significance is assessed via a permutation test. The rows and columns of the first distance matrix (`x`) are randomly permuted a number of times (controlled via `permutations`). A correlation coefficient is computed for each permutation and the p-value is the proportion of permuted correlation coefficients that are equal to or more extreme than the original (unpermuted) correlation coefficient. Whether a permuted correlation coefficient is "more extreme" than the original correlation coefficient depends on the alternative hypothesis (controlled via `alternative`). Parameters ---------- x, y : DistanceMatrix or array_like Input distance matrices to compare. If `x` and `y` are both ``DistanceMatrix`` instances, they will be reordered based on matching IDs (see `strict` and `lookup` below for handling matching/mismatching IDs); thus they are not required to be in the same ID order. If `x` and `y` are ``array_like``, no reordering is applied and both matrices must have the same shape. In either case, `x` and `y` must be at least 3x3 in size *after* reordering and matching of IDs. method : {'pearson', 'spearman','kendalltau'} Method used to compute the correlation between distance matrices. permutations : int, optional Number of times to randomly permute `x` when assessing statistical significance. Must be greater than or equal to zero. If zero, statistical significance calculations will be skipped and the p-value will be ``np.nan``. alternative : {'two-sided', 'greater', 'less'} Alternative hypothesis to use when calculating statistical significance. The default ``'two-sided'`` alternative hypothesis calculates the proportion of permuted correlation coefficients whose magnitude (i.e. after taking the absolute value) is greater than or equal to the absolute value of the original correlation coefficient. ``'greater'`` calculates the proportion of permuted coefficients that are greater than or equal to the original coefficient. ``'less'`` calculates the proportion of permuted coefficients that are less than or equal to the original coefficient. strict : bool, optional If ``True``, raises a ``ValueError`` if IDs are found that do not exist in both distance matrices. If ``False``, any nonmatching IDs are discarded before running the test. See `n` (in Returns section below) for the number of matching IDs that were used in the test. This parameter is ignored if `x` and `y` are ``array_like``. lookup : dict, optional Maps each ID in the distance matrices to a new ID. Used to match up IDs across distance matrices prior to running the Mantel test. If the IDs already match between the distance matrices, this parameter is not necessary. This parameter is disallowed if `x` and `y` are ``array_like``. Returns ------- corr_coeff : float Correlation coefficient of the test (depends on `method`). p_value : float p-value of the test. n : int Number of rows/columns in each of the distance matrices, after any reordering/matching of IDs. If ``strict=False``, nonmatching IDs may have been discarded from one or both of the distance matrices prior to running the Mantel test, so this value may be important as it indicates the *actual* size of the matrices that were compared. Raises ------ ValueError If `x` and `y` are not at least 3x3 in size after reordering/matching of IDs, or an invalid `method`, number of `permutations`, or `alternative` are provided. TypeError If `x` and `y` are not both ``DistanceMatrix`` instances or ``array_like``. See Also -------- DistanceMatrix scipy.stats.pearsonr scipy.stats.spearmanr pwmantel Notes ----- The Mantel test was first described in [2]_. The general algorithm and interface are similar to ``vegan::mantel``, available in R's vegan package [3]_. ``np.nan`` will be returned for the p-value if `permutations` is zero or if the correlation coefficient is ``np.nan``. The correlation coefficient will be ``np.nan`` if one or both of the inputs does not have any variation (i.e. the distances are all constant) and ``method='spearman'``. References ---------- .. [1] Legendre, P. and Legendre, L. (2012) Numerical Ecology. 3rd English Edition. Elsevier. .. [2] Mantel, N. (1967). "The detection of disease clustering and a generalized regression approach". Cancer Research 27 (2): 209-220. PMID 6018555. .. [3] http://cran.r-project.org/web/packages/vegan/index.html Examples -------- Import the functionality we'll use in the following examples: >>> from skbio import DistanceMatrix >>> from skbio.stats.distance import mantel Define two 3x3 distance matrices: >>> x = DistanceMatrix([[0, 1, 2], ... [1, 0, 3], ... [2, 3, 0]]) >>> y = DistanceMatrix([[0, 2, 7], ... [2, 0, 6], ... [7, 6, 0]]) Compute the Pearson correlation between them and assess significance using a two-sided test with 999 permutations: >>> coeff, p_value, n = mantel(x, y) >>> print(round(coeff, 4)) 0.7559 Thus, we see a moderate-to-strong positive correlation (:math:`r_M=0.7559`) between the two matrices. In the previous example, the distance matrices (``x`` and ``y``) have the same IDs, in the same order: >>> x.ids ('0', '1', '2') >>> y.ids ('0', '1', '2') If necessary, ``mantel`` will reorder the distance matrices prior to running the test. The function also supports a ``lookup`` dictionary that maps distance matrix IDs to new IDs, providing a way to match IDs between distance matrices prior to running the Mantel test. For example, let's reassign the distance matrices' IDs so that there are no matching IDs between them: >>> x.ids = ('a', 'b', 'c') >>> y.ids = ('d', 'e', 'f') If we rerun ``mantel``, we get the following error notifying us that there are nonmatching IDs (this is the default behavior with ``strict=True``): >>> mantel(x, y) Traceback (most recent call last): ... ValueError: IDs exist that are not in both distance matrices. If we pass ``strict=False`` to ignore/discard nonmatching IDs, we see that no matches exist between `x` and `y`, so the Mantel test still cannot be run: >>> mantel(x, y, strict=False) Traceback (most recent call last): ... ValueError: No matching IDs exist between the distance matrices. To work around this, we can define a ``lookup`` dictionary to specify how the IDs should be matched between distance matrices: >>> lookup = {'a': 'A', 'b': 'B', 'c': 'C', ... 'd': 'A', 'e': 'B', 'f': 'C'} ``lookup`` maps each ID to ``'A'``, ``'B'``, or ``'C'``. If we rerun ``mantel`` with ``lookup``, we get the same results as the original example where all distance matrix IDs matched: >>> coeff, p_value, n = mantel(x, y, lookup=lookup) >>> print(round(coeff, 4)) 0.7559 ``mantel`` also accepts input that is ``array_like``. For example, if we redefine `x` and `y` as nested Python lists instead of ``DistanceMatrix`` instances, we obtain the same result: >>> x = [[0, 1, 2], ... [1, 0, 3], ... [2, 3, 0]] >>> y = [[0, 2, 7], ... [2, 0, 6], ... [7, 6, 0]] >>> coeff, p_value, n = mantel(x, y) >>> print(round(coeff, 4)) 0.7559 It is import to note that reordering/matching of IDs (and hence the ``strict`` and ``lookup`` parameters) do not apply when input is ``array_like`` because there is no notion of IDs. """ special = False # set to true, if we have a dedicated implementation if method == "pearson": special = True elif method == "spearman": special = True elif method == "kendalltau": corr_func = kendalltau else: raise ValueError("Invalid correlation method '%s'." % method) if permutations < 0: raise ValueError( "Number of permutations must be greater than or " "equal to zero." ) if alternative not in ("two-sided", "greater", "less"): raise ValueError("Invalid alternative hypothesis '%s'." % alternative) x, y = _order_dms(x, y, strict=strict, lookup=lookup) n = x.shape[0] if n < 3: raise ValueError( "Distance matrices must have at least 3 matching IDs " "between them (i.e., minimum 3x3 in size)." ) if special: if method == "pearson": orig_stat, comp_stat, permuted_stats = _mantel_stats_pearson( x, y, permutations ) elif method == "spearman": orig_stat, comp_stat, permuted_stats = _mantel_stats_spearman( x, y, permutations ) else: raise ValueError("Invalid correlation method '%s'." % method) else: x_flat = x.condensed_form() y_flat = y.condensed_form() orig_stat = comp_stat = corr_func(x_flat, y_flat)[0] del x_flat permuted_stats = [] if not (permutations == 0 or np.isnan(orig_stat)): perm_gen = ( corr_func(x.permute(condensed=True), y_flat)[0] for _ in range(permutations) ) permuted_stats = np.fromiter(perm_gen, float, count=permutations) del y_flat if permutations == 0 or np.isnan(orig_stat): p_value = np.nan else: if alternative == "two-sided": count_better = (np.absolute(permuted_stats) >= np.absolute(comp_stat)).sum() elif alternative == "greater": count_better = (permuted_stats >= comp_stat).sum() else: count_better = (permuted_stats <= comp_stat).sum() p_value = (count_better + 1) / (permutations + 1) return orig_stat, p_value, n def _mantel_stats_pearson_flat(x, y_flat, permutations): """Compute original and permuted stats using pearsonr. Parameters ---------- x : DistanceMatrix Input distance matrix. y_flat: 1D array Compact representation of a distance matrix. permutations : int Number of times to randomly permute `x` when assessing statistical significance. Must be greater than or equal to zero. If zero, statistical significance calculations will be skipped and permuted_stats will be an empty array. Returns ------- orig_stat : 1D array_like Correlation coefficient of the test. comp_stat : 1D array_like Correlation coefficient to compare against permuted_stats, usually the same as orig_stat, but on certain architectures it will differ. This should be used for any p-value calculation as it will match the values for any "self-permutations" in the permuted_stats. permuted_stats : 1D array_like Permuted correlation coefficients of the test. """ x_flat = x.condensed_form() # If an input is constant, the correlation coefficient is not defined. if (x_flat == x_flat[0]).all() or (y_flat == y_flat[0]).all(): warnings.warn(ConstantInputWarning()) return np.nan, np.nan, [] # inline pearsonr, condensed from scipy.stats.pearsonr xmean = x_flat.mean() xm = x_flat - xmean normxm = np.linalg.norm(xm) xm_normalized = xm / normxm del xm del x_flat ymean = y_flat.mean() ym = y_flat - ymean normym = np.linalg.norm(ym) ym_normalized = ym / normym del ym threshold = 1e-13 if (normxm < threshold * abs(xmean)) or (normym < threshold * abs(ymean)): # If all the values in x (likewise y) are very close to the mean, # the loss of precision that occurs in the subtraction xm = x - xmean # might result in large errors in r. warnings.warn(NearConstantInputWarning()) orig_stat = np.dot(xm_normalized, ym_normalized) # Presumably, if abs(orig_stat) > 1, then it is only some small artifact of # floating point arithmetic. orig_stat = max(min(orig_stat, 1.0), -1.0) mat_n = x._data.shape[0] # note: xmean and normxm do not change with permutations permuted_stats = [] comp_stat = orig_stat if not (permutations == 0 or np.isnan(orig_stat)): # inline DistanceMatrix.permute, grouping them together x_data = x._data if not x_data.flags.c_contiguous: x_data = np.asarray(x_data, order="C") # compute all pearsonr permutations at once # create first the list of permutations perm_order = np.empty((permutations + 1, mat_n), dtype=np.intp) # first row/statistic will be comp_stat perm_order[0, :] = np.arange(mat_n) for row in range(1, permutations + 1): perm_order[row, :] = np.random.permutation(mat_n) permuted_stats = np.empty(permutations + 1, dtype=x_data.dtype) mantel_perm_pearsonr_cy( x_data, perm_order, xmean, normxm, ym_normalized, permuted_stats ) comp_stat = permuted_stats[0] permuted_stats = permuted_stats[1:] return orig_stat, comp_stat, permuted_stats def _mantel_stats_pearson(x, y, permutations): """Compute original and permuted stats using pearsonr. Parameters ---------- x, y : DistanceMatrix Input distance matrices to compare. permutations : int Number of times to randomly permute `x` when assessing statistical significance. Must be greater than or equal to zero. If zero, statistical significance calculations will be skipped and permuted_stats will be an empty array. Returns ------- orig_stat : 1D array_like Correlation coefficient of the test. comp_stat : 1D array_like Correlation coefficient to compare against permuted_stats, usually the same as orig_stat, but on certain architectures it will differ. This should be used for any p-value calculation as it will match the values for any "self-permutations" in the permuted_stats. permuted_stats : 1D array_like Permuted correlation coefficients of the test. """ y_flat = y.condensed_form() return _mantel_stats_pearson_flat(x, y_flat, permutations) def _mantel_stats_spearman(x, y, permutations): """Compute original and permuted stats using spearmanr. Parameters ---------- x, y : DistanceMatrix Input distance matrices to compare. permutations : int Number of times to randomly permute `x` when assessing statistical significance. Must be greater than or equal to zero. If zero, statistical significance calculations will be skipped and permuted_stats will be an empty array. Returns ------- orig_stat : 1D array_like Correlation coefficient of the test. comp_stat : 1D array_like Correlation coefficient to compare against permuted_stats, usually the same as orig_stat, but on certain architectures it will differ. This should be used for any p-value calculation as it will match the values for any "self-permutations" in the permuted_stats. permuted_stats : 1D array_like Permuted correlation coefficients of the test. """ x_flat = x.condensed_form() y_flat = y.condensed_form() # If an input is constant, the correlation coefficient is not defined. if (x_flat == x_flat[0]).all() or (y_flat == y_flat[0]).all(): warnings.warn(ConstantInputWarning()) return np.nan, np.nan, [] y_rank = scipy.stats.rankdata(y_flat) del y_flat x_rank = scipy.stats.rankdata(x_flat) del x_flat x_rank_matrix = DistanceMatrix(x_rank, x.ids) del x_rank # for our purposes, spearman is just pearson on rankdata return _mantel_stats_pearson_flat(x_rank_matrix, y_rank, permutations) def pwmantel( dms, labels=None, method="pearson", permutations=999, alternative="two-sided", strict=True, lookup=None, ): """Run Mantel tests for every pair of given distance matrices. Runs a Mantel test for each pair of distance matrices and collates the results in a ``DataFrame``. Distance matrices do not need to be in the same ID order if they are ``DistanceMatrix`` instances. Distance matrices will be re-ordered prior to running each pairwise test, and if ``strict=False``, IDs that don't match between a pair of distance matrices will be dropped prior to running the test (otherwise a ``ValueError`` will be raised if there are nonmatching IDs between any pair of distance matrices). Parameters ---------- dms : iterable of DistanceMatrix objects, array_like objects, or filepaths to distance matrices. If they are ``array_like``, no reordering or matching of IDs will be performed. labels : iterable of str or int, optional Labels for each distance matrix in `dms`. These are used in the results ``DataFrame`` to identify the pair of distance matrices used in a pairwise Mantel test. If ``None``, defaults to monotonically-increasing integers starting at zero. method : {'pearson', 'spearman'} Correlation method. See ``mantel`` function for more details. permutations : int, optional Number of permutations. See ``mantel`` function for more details. alternative : {'two-sided', 'greater', 'less'} Alternative hypothesis. See ``mantel`` function for more details. strict : bool, optional Handling of nonmatching IDs. See ``mantel`` function for more details. lookup : dict, optional Map existing IDs to new IDs. See ``mantel`` function for more details. Returns ------- pandas.DataFrame ``DataFrame`` containing the results of each pairwise test (one per row). Includes the number of objects considered in each test as column ``n`` (after applying `lookup` and filtering nonmatching IDs if ``strict=False``). Column ``p-value`` will display p-values as ``NaN`` if p-values could not be computed (they are stored as ``np.nan`` within the ``DataFrame``; see ``mantel`` for more details). See Also -------- mantel DistanceMatrix.read Notes ----- Passing a list of filepaths can be useful as it allows for a smaller amount of memory consumption as it only loads two matrices at a time as opposed to loading all distance matrices into memory. Examples -------- Import the functionality we'll use in the following examples: >>> from skbio import DistanceMatrix >>> from skbio.stats.distance import pwmantel Define three 3x3 distance matrices: >>> x = DistanceMatrix([[0, 1, 2], ... [1, 0, 3], ... [2, 3, 0]]) >>> y = DistanceMatrix([[0, 2, 7], ... [2, 0, 6], ... [7, 6, 0]]) >>> z = DistanceMatrix([[0, 5, 6], ... [5, 0, 1], ... [6, 1, 0]]) Run Mantel tests for each pair of distance matrices (there are 3 possible pairs): >>> pwmantel((x, y, z), labels=('x', 'y', 'z'), ... permutations=0) # doctest: +NORMALIZE_WHITESPACE statistic p-value n method permutations alternative dm1 dm2 x y 0.755929 NaN 3 pearson 0 two-sided z -0.755929 NaN 3 pearson 0 two-sided y z -0.142857 NaN 3 pearson 0 two-sided Note that we passed ``permutations=0`` to suppress significance tests; the p-values in the output are labelled ``NaN``. """ num_dms = len(dms) if num_dms < 2: raise ValueError("Must provide at least two distance matrices.") if labels is None: labels = range(num_dms) else: if num_dms != len(labels): raise ValueError( "Number of labels must match the number of " "distance matrices." ) if len(set(labels)) != len(labels): raise ValueError("Labels must be unique.") num_combs = scipy.special.comb(num_dms, 2, exact=True) results_dtype = [ ("dm1", object), ("dm2", object), ("statistic", float), ("p-value", float), ("n", int), ("method", object), ("permutations", int), ("alternative", object), ] results = np.empty(num_combs, dtype=results_dtype) for i, pair in enumerate(combinations(zip(labels, dms), 2)): (xlabel, x), (ylabel, y) = pair if isinstance(x, str): x = DistanceMatrix.read(x) if isinstance(y, str): y = DistanceMatrix.read(y) stat, p_val, n = mantel( x, y, method=method, permutations=permutations, alternative=alternative, strict=strict, lookup=lookup, ) results[i] = (xlabel, ylabel, stat, p_val, n, method, permutations, alternative) return pd.DataFrame.from_records(results, index=("dm1", "dm2")) def _order_dms(x, y, strict=True, lookup=None): """Intersect distance matrices and put them in the same order.""" x_is_dm = isinstance(x, DistanceMatrix) y_is_dm = isinstance(y, DistanceMatrix) if (x_is_dm and not y_is_dm) or (y_is_dm and not x_is_dm): raise TypeError( "Mixing DistanceMatrix and array_like input types is not " "supported. Both x and y must either be DistanceMatrix instances " "or array_like, but not mixed." ) elif x_is_dm and y_is_dm: if lookup is not None: x = _remap_ids(x, lookup, "x", "first") y = _remap_ids(y, lookup, "y", "second") if tuple(x.ids) == tuple(y.ids): return x, y id_order = [id_ for id_ in x.ids if id_ in y] num_matches = len(id_order) if strict and ((num_matches != len(x.ids)) or (num_matches != len(y.ids))): raise ValueError("IDs exist that are not in both distance " "matrices.") if num_matches < 1: raise ValueError("No matching IDs exist between the distance " "matrices.") return x.filter(id_order), y.filter(id_order) else: # Both x and y aren't DistanceMatrix instances. if lookup is not None: raise ValueError( "ID lookup can only be provided if inputs are " "DistanceMatrix instances." ) x = DistanceMatrix(x) y = DistanceMatrix(y) if x.shape != y.shape: raise ValueError("Distance matrices must have the same shape.") return x, y def _remap_ids(dm, lookup, label, order): """Return a copy of `dm` with its IDs remapped based on `lookup`.""" try: remapped_ids = [lookup[id_] for id_ in dm.ids] except KeyError as e: raise KeyError( "All IDs in the %s distance matrix (%s) must be in " "the lookup. Missing ID: %s" % (order, label, str(e)) ) # Create a copy as we'll be modifying the IDs in place. dm_copy = dm.copy() dm_copy.ids = remapped_ids return dm_copy scikit-bio-0.6.2/skbio/stats/distance/_permanova.py000066400000000000000000000117751464262511300223330ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from functools import partial import numpy as np from ._base import ( _preprocess_input_sng, _run_monte_carlo_stats, _build_results, DistanceMatrix, ) from ._cutils import permanova_f_stat_sW_cy def permanova(distance_matrix, grouping, column=None, permutations=999): """Test for significant differences between groups using PERMANOVA. Permutational Multivariate Analysis of Variance (PERMANOVA) is a non-parametric method that tests whether two or more groups of objects (e.g., samples) are significantly different based on a categorical factor. It is conceptually similar to ANOVA except that it operates on a distance matrix, which allows for multivariate analysis. PERMANOVA computes a pseudo-F statistic. Statistical significance is assessed via a permutation test. The assignment of objects to groups (`grouping`) is randomly permuted a number of times (controlled via `permutations`). A pseudo-F statistic is computed for each permutation and the p-value is the proportion of permuted pseudo-F statisics that are equal to or greater than the original (unpermuted) pseudo-F statistic. Parameters ---------- distance_matrix : DistanceMatrix Distance matrix containing distances between objects (e.g., distances between samples of microbial communities). grouping : 1-D array_like or pandas.DataFrame Vector indicating the assignment of objects to groups. For example, these could be strings or integers denoting which group an object belongs to. If `grouping` is 1-D ``array_like``, it must be the same length and in the same order as the objects in `distance_matrix`. If `grouping` is a ``DataFrame``, the column specified by `column` will be used as the grouping vector. The ``DataFrame`` must be indexed by the IDs in `distance_matrix` (i.e., the row labels must be distance matrix IDs), but the order of IDs between `distance_matrix` and the ``DataFrame`` need not be the same. All IDs in the distance matrix must be present in the ``DataFrame``. Extra IDs in the ``DataFrame`` are allowed (they are ignored in the calculations). column : str, optional Column name to use as the grouping vector if `grouping` is a ``DataFrame``. Must be provided if `grouping` is a ``DataFrame``. Cannot be provided if `grouping` is 1-D ``array_like``. permutations : int, optional Number of permutations to use when assessing statistical significance. Must be greater than or equal to zero. If zero, statistical significance calculations will be skipped and the p-value will be ``np.nan``. Returns ------- pandas.Series Results of the statistical test, including ``test statistic`` and ``p-value``. See Also -------- anosim Notes ----- See [1]_ for the original method reference, as well as ``vegan::adonis``, available in R's vegan package [2]_. The p-value will be ``np.nan`` if `permutations` is zero. References ---------- .. [1] Anderson, Marti J. "A new method for non-parametric multivariate analysis of variance." Austral Ecology 26.1 (2001): 32-46. .. [2] http://cran.r-project.org/web/packages/vegan/index.html Examples -------- See :mod:`skbio.stats.distance.anosim` for usage examples (both functions provide similar interfaces). """ if not isinstance(distance_matrix, DistanceMatrix): raise TypeError("Input must be a DistanceMatrix.") sample_size = distance_matrix.shape[0] num_groups, grouping = _preprocess_input_sng( distance_matrix.ids, sample_size, grouping, column ) # Calculate number of objects in each group. group_sizes = np.bincount(grouping) s_T = (distance_matrix[:] ** 2).sum() / sample_size # we are going over the whole matrix, instead of just upper triangle # so cut in half s_T /= 2.0 test_stat_function = partial( _compute_f_stat, sample_size, num_groups, distance_matrix, group_sizes, s_T ) stat, p_value = _run_monte_carlo_stats(test_stat_function, grouping, permutations) return _build_results( "PERMANOVA", "pseudo-F", sample_size, num_groups, stat, p_value, permutations ) def _compute_f_stat( sample_size, num_groups, distance_matrix, group_sizes, s_T, grouping ): """Compute PERMANOVA pseudo-F statistic.""" # Calculate s_W for each group, accounting for different group sizes. s_W = permanova_f_stat_sW_cy(distance_matrix.data, group_sizes, grouping) s_A = s_T - s_W return (s_A / (num_groups - 1)) / (s_W / (sample_size - num_groups)) scikit-bio-0.6.2/skbio/stats/distance/_permdisp.py000066400000000000000000000304221464262511300221540ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from functools import partial import numpy as np import pandas as pd from scipy.stats import f_oneway from scipy.spatial.distance import cdist from ._cutils import geomedian_axis_one from ._base import ( _preprocess_input_sng, _run_monte_carlo_stats, _build_results, DistanceMatrix, ) from skbio.stats.ordination import pcoa, OrdinationResults def permdisp( distance_matrix, grouping, column=None, test="median", permutations=999, method="eigh", number_of_dimensions=10, ): """Test for Homogeneity of Multivariate Groups Disperisons. PERMDISP is a multivariate analogue of Levene's test for homogeneity of multivariate variances. Distances are handled by reducing the original distances to principal coordinates. PERMDISP calculates an F-statistic to assess whether the dispersions between groups is significant Parameters ---------- distance_matrix : DistanceMatrix or OrdinationResults Distance matrix containing distances between objects (e.g., distances between samples of microbial communities) or result of pcoa on such a matrix. grouping : 1-D array_like or pandas.DataFrame Vector indicating the assignment of objects to groups. For example, these could be strings or integers denoting which group an object belongs to. If `grouping` is 1-D ``array_like``, it must be the same length and in the same order as the objects in `distance_matrix`. If `grouping` is a ``DataFrame``, the column specified by `column` will be used as the grouping vector. The ``DataFrame`` must be indexed by the IDs in `distance_matrix` (i.e., the row labels must be distance matrix IDs), but the order of IDs between `distance_matrix` and the ``DataFrame`` need not be the same. All IDs in the distance matrix must be present in the ``DataFrame``. Extra IDs in the ``DataFrame`` are allowed (they are ignored in the calculations). column : str, optional Column name to use as the grouping vector if `grouping` is a ``DataFrame``. Must be provided if `grouping` is a ``DataFrame``. Cannot be provided if `grouping` is 1-D ``array_like``. test : {'centroid', 'median'} determines whether the analysis is done using centroid or spaitial median. permutations : int, optional Number of permutations to use when assessing statistical significance. Must be greater than or equal to zero. If zero, statistical significance calculations will be skipped and the p-value will be ``np.nan``. method : str, optional Eigendecomposition method to use in performing PCoA. By default, uses SciPy's `eigh`, which computes exact eigenvectors and eigenvalues for all dimensions. The alternate method, `fsvd`, uses faster heuristic eigendecomposition but loses accuracy. The magnitude of accuracy lost is dependent on dataset. Note that using `fsvd` is still considered experimental and should be used with care. Not used if distance_matrix is a OrdinationResults object. number_of_dimensions : int, optional Dimensions to reduce the distance matrix to if using the `fsvd` method. Not used if the `eigh` method is being selected. Returns ------- pandas.Series Results of the statistical test, including ``test statistic`` and ``p-value``. Raises ------ TypeError If, when using the spatial median test, the pcoa ordination is not of type np.float32 or np.float64, the spatial median function will fail and the centroid test should be used instead ValueError If the test is not centroid or median, or if method is not eigh or fsvd TypeError If the distance matrix is not an instance of a ``skbio.DistanceMatrix``. ValueError If there is only one group ValueError If a list and a column name are both provided ValueError If a list is provided for `grouping` and it's length does not match the number of ids in distance_matrix ValueError If all of the values in the grouping vector are unique KeyError If there are ids in grouping that are not in distance_matrix See Also -------- permanova anosim Notes ----- This function uses Marti Anderson's PERMDISP2 procedure. The significance of the results from this function will be the same as the results found in vegan's betadisper, however due to floating point variability the F-statistic results may vary slightly. See [1]_ for the original method reference, as well as ``vegan::betadisper``, available in R's vegan package [2]_. References ---------- .. [1] Anderson, M. J. (2006). Distance-based tests for homogeneity of multivariate dispersions. Biometrics, 62(1), 245-253. .. [2] http://cran.r-project.org/web/packages/vegan/index.html Examples -------- Load a 6x6 distance matrix and grouping vector denoting 2 groups of objects: >>> from skbio import DistanceMatrix >>> dm = DistanceMatrix([[0, 0.5, 0.75, 1, 0.66, 0.33], ... [0.5, 0, 0.25, 0.33, 0.77, 0.61], ... [0.75, 0.25, 0, 0.1, 0.44, 0.55], ... [1, 0.33, 0.1, 0, 0.75, 0.88], ... [0.66, 0.77, 0.44, 0.75, 0, 0.77], ... [0.33, 0.61, 0.55, 0.88, 0.77, 0]], ... ['s1', 's2', 's3', 's4', 's5', 's6']) >>> grouping = ['G1', 'G1', 'G1', 'G2', 'G2', 'G2'] Run PERMDISP using 99 permutations to caluculate the p-value: >>> from skbio.stats.distance import permdisp >>> import numpy as np >>> #make output deterministic, should not be included during normal use >>> np.random.seed(0) >>> permdisp(dm, grouping, permutations=99) method name PERMDISP test statistic name F-value sample size 6 number of groups 2 test statistic ... 1.03... p-value ... number of permutations 99 Name: PERMDISP results, dtype: object The return value is a ``pandas.Series`` object containing the results of the statistical test. To suppress calculation of the p-value and only obtain the F statistic, specify zero permutations: >>> permdisp(dm, grouping, permutations=0) method name PERMDISP test statistic name F-value sample size 6 number of groups 2 test statistic ... 1.03... p-value NaN number of permutations 0 Name: PERMDISP results, dtype: object PERMDISP computes variances based on two types of tests, using either centroids or spatial medians, also commonly referred to as a geometric median. The spatial median is thought to yield a more robust test statistic, and this test is used by default. Spatial medians are computed using an iterative algorithm to find the optimally minimum point from all other points in a group while centroids are computed using a deterministic formula. As such the two different tests yeild slightly different F statistics. >>> np.random.seed(0) >>> permdisp(dm, grouping, test='centroid', permutations=6) method name PERMDISP test statistic name F-value sample size 6 number of groups 2 test statistic ... 3.67... p-value ... 0.42... number of permutations 6 Name: PERMDISP results, dtype: object You can also provide a ``pandas.DataFrame`` and a column denoting the grouping instead of a grouping vector. The following DataFrame's Grouping column specifies the same grouping as the vector we used in the previous examples.: >>> import pandas as pd >>> df = pd.DataFrame.from_dict( ... {'Grouping': {'s1': 'G1', 's2': 'G1', 's3': 'G1', 's4': 'G2', ... 's5': 'G2', 's6': 'G2'}}) >>> permdisp(dm, df, 'Grouping', permutations=6, test='centroid') method name PERMDISP test statistic name F-value sample size 6 number of groups 2 test statistic ... 3.67... p-value ... 0.42... number of permutations 6 Name: PERMDISP results, dtype: object Note that when providing a ``DataFrame``, the ordering of rows and/or columns does not affect the grouping vector that is extracted. The ``DataFrame`` must be indexed by the distance matrix IDs (i.e., the row labels must be distance matrix IDs). If IDs (rows) are present in the ``DataFrame`` but not in the distance matrix, they are ignored. The previous example's ``s7`` ID illustrates this behavior: note that even though the ``DataFrame`` had 7 objects, only 6 were used in the test (see the "Sample size" row in the results above to confirm this). Thus, the ``DataFrame`` can be a superset of the distance matrix IDs. Note that the reverse is not true: IDs in the distance matrix *must* be present in the ``DataFrame`` or an error will be raised. PERMDISP should be used to determine whether the dispersions between the groups in your distance matrix are significantly separated. A non-significant test result indicates that group dispersions are similar to each other. PERMANOVA or ANOSIM should then be used in conjunction to determine whether clustering within groups is significant. """ if test not in ["centroid", "median"]: raise ValueError("Test must be centroid or median") if isinstance(distance_matrix, OrdinationResults): ordination = distance_matrix ids = ordination.samples.axes[0].to_list() sample_size = len(ids) distance_matrix = None # not used anymore, avoid using by mistake elif isinstance(distance_matrix, DistanceMatrix): if method == "eigh": # eigh does not natively support specifying number_of_dimensions # and pcoa expects it to be 0 number_of_dimensions = 0 elif method != "fsvd": raise ValueError("Method must be eigh or fsvd") ids = distance_matrix.ids sample_size = distance_matrix.shape[0] ordination = pcoa( distance_matrix, method=method, number_of_dimensions=number_of_dimensions ) else: raise TypeError("Input must be a DistanceMatrix or OrdinationResults.") samples = ordination.samples num_groups, grouping = _preprocess_input_sng(ids, sample_size, grouping, column) test_stat_function = partial(_compute_groups, samples, test) stat, p_value = _run_monte_carlo_stats(test_stat_function, grouping, permutations) return _build_results( "PERMDISP", "F-value", sample_size, num_groups, stat, p_value, permutations ) def _compute_groups(samples, test_type, grouping): groups = [] samples["grouping"] = grouping if test_type == "centroid": centroids = samples.groupby("grouping").aggregate("mean") elif test_type == "median": grouping_cols = samples.columns.to_list() centroids = samples.groupby("grouping")[grouping_cols].apply(_config_med) for label, df in samples.groupby("grouping"): groups.append( cdist( df.values[:, :-1].astype("float64"), [centroids.loc[label].values], metric="euclidean", ) ) stat, _ = f_oneway(*groups) stat = stat[0] return stat def _config_med(x): """Slice and transpose the vector. Slice the vector up to the last value to exclude grouping column and transpose the vector to be compatible with hd.geomedian. """ X = x.values[:, :-1] return pd.Series(np.array(geomedian_axis_one(X.T)), index=x.columns[:-1]) scikit-bio-0.6.2/skbio/stats/distance/_utils.py000066400000000000000000000131501464262511300214700ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np from ._cutils import is_symmetric_and_hollow_cy from ._cutils import distmat_reorder_cy, distmat_reorder_condensed_cy def is_symmetric_and_hollow(mat): """Check if a Distance Matrix is symmetric and hollow. Equivalent to [not (mat.T != mat).any(), np.trace(mat) == 0] Parameters ---------- mat : 2D array_like Distance matrix. Result: ------- is_symmetric: Boolean not (mat.T != mat).any() is_hollow: Boolean np.trace(mat) == 0 """ # is_symmetric_and_hollow_cy is optimized # for the common cas of c_contiguous. # For all other cases, make a copy. if not mat.flags.c_contiguous: mat = np.asarray(mat, order="C") return is_symmetric_and_hollow_cy(mat) def is_symmetric(mat): """Check if a Distance Matrix is symmetric. Equivalent to not (mat.T != mat).any() Parameters ---------- mat : 2D array_like Distance matrix. Result: ------- is_symmetric: Boolean not (mat.T != mat).any() """ # the is_hollow check is really cheap, # so can reuse is_symmetric_and_hollow return is_symmetric_and_hollow(mat)[0] def is_hollow(mat): """Check if a Distance Matrix is hollow. Equivalent to np.trace(mat) == 0 Parameters ---------- mat : 2D array_like Distance matrix. Result: ------- is_hollow: Boolean np.trace(mat) == 0 """ # is_symmetric_and_hollow_cy spends most # of its time in symetry check, just use numpy return np.trace(mat) == 0 def distmat_reorder_buf(in_mat, reorder_vec, out_mat, validate=False): """Reorder the rows and columns of a distance matrix given a reorder vector. Not all of the columns need to be used. For example: [ [0, 1, 2, 3] , [1, 0, 4, 5] , [2, 4, 0, 6] , [3, 5, 6, 0] ] with [1,0,3,2] will result in [ [0, 1, 5, 4] , [1, 0, 3, 2] , [5, 3, 0, 6] , [4, 2, 6, 0] ] Parameters ---------- in_mat : 2D array_like Distance matrix reorder_vec : 1D_array_like List of permutation indexes out_mat : 2D array_like Output, Distance matrix, must be in c_order and same size as reorder_vec validate: boolean Optional, if True, validate reorder_vec content, detaults to False """ np_reorder = np.asarray(reorder_vec, dtype=np.intp) if validate: maxsize = in_mat.shape[0] bad_cnt = np.where((np_reorder < 0) or (np_reorder >= maxsize))[0].size if bad_cnt > 0: raise ValueError("Invalid reorder_vec") if not in_mat.flags.c_contiguous: in_mat = np.asarray(in_mat, order="C") distmat_reorder_cy(in_mat, np_reorder, out_mat) def distmat_reorder(in_mat, reorder_vec, validate=False): """Reorder the rows and columns of a distance matrix given a reorder vector. Not all of the columns need to be used. For example: [ [0, 1, 2, 3] , [1, 0, 4, 5] , [2, 4, 0, 6] , [3, 5, 6, 0] ] with [1,0,3,2] will result in [ [0, 1, 5, 4] , [1, 0, 3, 2] , [5, 3, 0, 6] , [4, 2, 6, 0] ] Parameters ---------- in_mat : 2D array_like Distance matrix, must be in c_order reorder_vec : 1D_array_like List of permutation indexes validate: boolean Optional, if True, validate reorder_vec content, detaults to False Returns ------- out_mat : 2D array_like Distance matrix """ np_reorder = np.asarray(reorder_vec, dtype=np.intp) if validate: maxsize = in_mat.shape[0] bad_cnt = np.where((np_reorder < 0) or (np_reorder >= maxsize))[0].size if bad_cnt > 0: raise ValueError("Invalid reorder_vec") if not in_mat.flags.c_contiguous: in_mat = np.asarray(in_mat, order="C") out_mat = np.empty([np_reorder.size, np_reorder.size], in_mat.dtype) distmat_reorder_cy(in_mat, np_reorder, out_mat) return out_mat def distmat_reorder_condensed(in_mat, reorder_vec, validate=False): """Reorder the rows and columns of a distance matrix given a reorder vector. Not all of the columns need to be used. For example: [ [0, 1, 2, 3] , [1, 0, 4, 5] , [2, 4, 0, 6] , [3, 5, 6, 0] ] with [1,0,3,2] will result in [ 1, 5, 4 , 3, 2, 6 ] Parameters ---------- in_mat : 2D array_like Distance matrix, must be in c_order reorder_vec : 1D_array_like List of permutation indexes validate: boolean Optional, if True, validate reorder_vec content, detaults to False Returns ------- out_mat_condensed : 1D array_like Condensed distance matrix """ np_reorder = np.asarray(reorder_vec, dtype=np.intp) if validate: maxsize = in_mat.shape[0] bad_cnt = np.where((np_reorder < 0) or (np_reorder >= maxsize))[0].size if bad_cnt > 0: raise ValueError("Invalid reorder_vec") if not in_mat.flags.c_contiguous: in_mat = np.asarray(in_mat, order="C") csize = ((np_reorder.size - 1) * np_reorder.size) // 2 out_mat_condensed = np.empty([csize], in_mat.dtype) distmat_reorder_condensed_cy(in_mat, np_reorder, out_mat_condensed) return out_mat_condensed scikit-bio-0.6.2/skbio/stats/distance/tests/000077500000000000000000000000001464262511300207615ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/stats/distance/tests/__init__.py000066400000000000000000000005411464262511300230720ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- scikit-bio-0.6.2/skbio/stats/distance/tests/data/000077500000000000000000000000001464262511300216725ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/stats/distance/tests/data/bioenv_df_vegan.txt000066400000000000000000000021241464262511300255450ustar00rootroot00000000000000#SampleID log(N) P K Ca pH Al 18 2.98568193770049 42.1 139.9 519.4 2.7 39 15 2.59525470695687 39.1 167.3 356.7 2.8 88.1 24 3.00568260440716 67.7 207.1 973.3 3 138 27 3.02529107579554 60.8 233.7 834 2.8 15.4 23 3.16968558067743 54.5 180.6 777 2.7 24.2 19 3.1267605359604 40.9 171.4 691.8 2.7 104.8 22 3.28091121578765 36.7 171.4 738.6 2.8 20.7 16 3.18635263316264 31 138.2 394.6 2.8 74.2 28 3.39450839351136 73.5 260 748.6 2.8 17.9 13 3.3357695763397 40.5 313.8 540.7 2.8 329.7 14 3.08190996979504 38.1 146.8 512.2 2.7 92.3 20 3.26575941076705 61.9 202.2 741.2 2.9 124.3 25 3.1267605359604 50.6 151.7 648 2.9 12.1 7 3.41772668361337 24.6 78.7 188.5 3.1 294.9 5 3.49953328238302 22.7 43.6 240.3 3.1 39 6 2.94968833505258 26.4 61.1 259.1 3 155.1 3 3.43720781918519 32.3 73.7 219 3.3 304.6 4 2.89037175789616 64.9 224.5 517.6 2.9 435.1 2 3.10458667846607 47.4 165.9 436.1 2.9 316.5 9 2.70805020110221 48.4 127.4 499.6 3 227.1 12 2.77258872223978 32.7 126.4 471.4 2.9 108.8 10 2.66025953726586 62.8 215.2 709.7 3.2 168.2 11 2.81540871942271 55.8 205.3 1169.7 3.6 253.6 21 3.04452243772342 26.5 104.4 484.8 3 35.8 scikit-bio-0.6.2/skbio/stats/distance/tests/data/bioenv_dm_vegan.txt000066400000000000000000000234471464262511300255670ustar00rootroot00000000000000 18 15 24 27 23 19 22 16 28 13 14 20 25 7 5 6 3 4 2 9 12 10 11 21 18 0 0.452897212693772 0.715360803826621 0.624766305873718 0.493154219155204 0.43865006075341 0.432340187193151 0.440822999771334 0.698420319937299 0.46351050707088 0.523211654445006 0.408294556891784 0.604911324328014 0.434178076197442 0.545291972003212 0.393272103797605 0.330797959207167 0.563648430837013 0.66523173799306 0.511183841155759 0.46804467412999 0.515485401604058 0.580143332456965 0.668700755517761 15 0.452897212693772 0 0.611610336420561 0.655733483106344 0.410315012072556 0.453712867932398 0.424523404131089 0.377720269664141 0.654941298927269 0.417480326012996 0.35798188425419 0.360483800701559 0.589951055270385 0.532758773890324 0.584210632971972 0.37822819560664 0.53024185913761 0.584066987502168 0.778451630131581 0.543847849995263 0.527747948281903 0.582043972050997 0.591017841508146 0.753206717859593 24 0.715360803826621 0.611610336420561 0 0.749908842875888 0.624415191219002 0.598837193048421 0.660837272556231 0.638184679569268 0.724777756165372 0.708236862111778 0.645430446977294 0.535631415741837 0.693070808707219 0.71138410637102 0.810870811602888 0.638724301482429 0.721802013418365 0.751900340516049 0.812623821498287 0.596958779537393 0.58038306362995 0.631041437421227 0.634765906326142 0.634430266921848 27 0.624766305873718 0.655733483106344 0.749908842875888 0 0.595583136283997 0.480812012211359 0.509183154263298 0.68272679039539 0.503151848497109 0.745346748197303 0.742254576600809 0.581672470591416 0.574612957968774 0.720436148000664 0.836899929519594 0.712346970098655 0.722493161303423 0.81059478353512 0.67179686908506 0.741190787334003 0.632934861831476 0.743347684665419 0.787761710107256 0.545259963579148 23 0.493154219155204 0.410315012072556 0.624415191219002 0.595583136283997 0 0.39798190453075 0.551027253590122 0.530245080842824 0.688639670633179 0.558293400969046 0.499126157677784 0.374655747006781 0.560198031134205 0.575636505169288 0.709011461350035 0.555007697254655 0.569632137079713 0.700292123849093 0.686164552961924 0.610923773488132 0.522084519099983 0.614601499571679 0.573249035174906 0.619889410480321 19 0.43865006075341 0.453712867932398 0.598837193048421 0.480812012211359 0.39798190453075 0 0.447090470008913 0.462780164587808 0.62210941999066 0.517037833858162 0.547198044950618 0.436336586468505 0.466295105039809 0.533153195372235 0.649768050102464 0.468877587536534 0.489021376289978 0.59989451354897 0.600155810641532 0.482373873247893 0.455971043613941 0.485136325934296 0.561030342274039 0.514596020968001 22 0.432340187193151 0.424523404131089 0.660837272556231 0.509183154263298 0.551027253590122 0.447090470008913 0 0.318721801577202 0.560878982326328 0.493770323127832 0.484678838434732 0.466044129553462 0.540556449240843 0.537716889466216 0.700184066308371 0.472868628510347 0.525506721244604 0.632959267911546 0.74368261302233 0.514539804165303 0.555492307675902 0.545820329626384 0.605832461377114 0.55345791449267 16 0.440822999771334 0.377720269664141 0.638184679569268 0.68272679039539 0.530245080842824 0.462780164587808 0.318721801577202 0 0.664347219281937 0.418561808219052 0.408452070724878 0.358924933851484 0.5815245364665 0.515594594372192 0.532206785163483 0.4397020901552 0.529224758564709 0.566927472518065 0.753903949731256 0.515921987762518 0.494683547387077 0.495896853216952 0.583525904687767 0.658433720786117 28 0.698420319937299 0.654941298927269 0.724777756165372 0.503151848497109 0.688639670633179 0.62210941999066 0.560878982326328 0.664347219281937 0 0.733169240464629 0.745486578838407 0.618846839169478 0.713792541454381 0.74890906664669 0.853976950339078 0.756087305765939 0.762966538206308 0.791917677364091 0.81258384801323 0.69092466328736 0.711493718852431 0.72961696443748 0.777088600232844 0.628275585712577 13 0.46351050707088 0.417480326012996 0.708236862111778 0.745346748197303 0.558293400969046 0.517037833858162 0.493770323127832 0.418561808219052 0.733169240464629 0 0.434107073238778 0.495251428454508 0.659888526193112 0.482068398983308 0.561095050557647 0.335593085096115 0.482301178660398 0.475008544079799 0.738041440415793 0.50014768619111 0.58859107425281 0.560373485926146 0.58991583345561 0.703705376166457 14 0.523211654445006 0.35798188425419 0.645430446977294 0.742254576600809 0.499126157677784 0.547198044950618 0.484678838434732 0.408452070724878 0.745486578838407 0.434107073238778 0 0.460778501195483 0.598177860722774 0.566437502573166 0.60997028881234 0.437494609608184 0.560278994397196 0.564565884853771 0.814568858546278 0.563078236117464 0.634425207383389 0.539163427744901 0.64719734947765 0.746247657025891 20 0.408294556891784 0.360483800701559 0.535631415741837 0.581672470591416 0.374655747006781 0.436336586468505 0.466044129553462 0.358924933851484 0.618846839169478 0.495251428454508 0.460778501195483 0 0.614834763166026 0.527001028768971 0.636029763289345 0.402589693145724 0.520623812716001 0.635315798842479 0.669112826680887 0.536517399269416 0.410136150776988 0.536831397119922 0.57987764447421 0.650019383574573 25 0.604911324328014 0.589951055270385 0.693070808707219 0.574612957968774 0.560198031134205 0.466295105039809 0.540556449240843 0.5815245364665 0.713792541454381 0.659888526193112 0.598177860722774 0.614834763166026 0 0.635642388269005 0.743838694259872 0.621437815990208 0.63532741224439 0.661420102186089 0.725910634940022 0.669519591989177 0.689034754247501 0.693127422615701 0.709588799419957 0.678749793333 7 0.434178076197442 0.532758773890324 0.71138410637102 0.720436148000664 0.575636505169288 0.533153195372235 0.537716889466216 0.515594594372192 0.74890906664669 0.482068398983308 0.566437502573166 0.527001028768971 0.635642388269005 0 0.510261674576661 0.323081424973504 0.332861861510004 0.588465187037358 0.640307330403348 0.582254042755471 0.59221097693992 0.57611902810367 0.682931566080637 0.694784212730897 5 0.545291972003212 0.584210632971972 0.810870811602888 0.836899929519594 0.709011461350035 0.649768050102464 0.700184066308371 0.532206785163483 0.853976950339078 0.561095050557647 0.60997028881234 0.636029763289345 0.743838694259872 0.510261674576661 0 0.476300258746816 0.526916663583772 0.593846530768998 0.758314271175518 0.691571027310973 0.761795894022086 0.702775696818748 0.581204902584864 0.85891984357451 6 0.393272103797605 0.37822819560664 0.638724301482429 0.712346970098655 0.555007697254655 0.468877587536534 0.472868628510347 0.4397020901552 0.756087305765939 0.335593085096115 0.437494609608184 0.402589693145724 0.621437815990208 0.323081424973504 0.476300258746816 0 0.382921704843115 0.495001919452646 0.623361805111518 0.50370863979105 0.494400467755648 0.488742161380673 0.573395486170862 0.701044642267538 3 0.330797959207167 0.53024185913761 0.721802013418365 0.722493161303423 0.569632137079713 0.489021376289978 0.525506721244604 0.529224758564709 0.762966538206308 0.482301178660398 0.560278994397196 0.520623812716001 0.63532741224439 0.332861861510004 0.526916663583772 0.382921704843115 0 0.541116521910622 0.538929257690873 0.50363315957546 0.477667500339432 0.485484212285837 0.642536439557206 0.731616786185991 4 0.563648430837013 0.584066987502168 0.751900340516049 0.81059478353512 0.700292123849093 0.59989451354897 0.632959267911546 0.566927472518065 0.791917677364091 0.475008544079799 0.564565884853771 0.635315798842479 0.661420102186089 0.588465187037358 0.593846530768998 0.495001919452646 0.541116521910622 0 0.569041488360324 0.555333827555126 0.650999789659056 0.618391387774669 0.620122895019617 0.79004091039811 2 0.66523173799306 0.778451630131581 0.812623821498287 0.67179686908506 0.686164552961924 0.600155810641532 0.74368261302233 0.753903949731256 0.81258384801323 0.738041440415793 0.814568858546278 0.669112826680887 0.725910634940022 0.640307330403348 0.758314271175518 0.623361805111518 0.538929257690873 0.569041488360324 0 0.6473786039708 0.537771707661098 0.605751675531862 0.787300504670568 0.71419000634693 9 0.511183841155759 0.543847849995263 0.596958779537393 0.741190787334003 0.610923773488132 0.482373873247893 0.514539804165303 0.515921987762518 0.69092466328736 0.50014768619111 0.563078236117464 0.536517399269416 0.669519591989177 0.582254042755471 0.691571027310973 0.50370863979105 0.50363315957546 0.555333827555126 0.6473786039708 0 0.458910772686871 0.197543573969677 0.453468643433417 0.589071475907458 12 0.46804467412999 0.527747948281903 0.58038306362995 0.632934861831476 0.522084519099983 0.455971043613941 0.555492307675902 0.494683547387077 0.711493718852431 0.58859107425281 0.634425207383389 0.410136150776988 0.689034754247501 0.59221097693992 0.761795894022086 0.494400467755648 0.477667500339432 0.650999789659056 0.537771707661098 0.458910772686871 0 0.405413621307249 0.521219574609625 0.6041813253885 10 0.515485401604058 0.582043972050997 0.631041437421227 0.743347684665419 0.614601499571679 0.485136325934296 0.545820329626384 0.495896853216952 0.72961696443748 0.560373485926146 0.539163427744901 0.536831397119922 0.693127422615701 0.57611902810367 0.702775696818748 0.488742161380673 0.485484212285837 0.618391387774669 0.605751675531862 0.197543573969677 0.405413621307249 0 0.438888571031113 0.584480238627684 11 0.580143332456965 0.591017841508146 0.634765906326142 0.787761710107256 0.573249035174906 0.561030342274039 0.605832461377114 0.583525904687767 0.777088600232844 0.58991583345561 0.64719734947765 0.57987764447421 0.709588799419957 0.682931566080637 0.581204902584864 0.573395486170862 0.642536439557206 0.620122895019617 0.787300504670568 0.453468643433417 0.521219574609625 0.438888571031113 0 0.747120312482333 21 0.668700755517761 0.753206717859593 0.634430266921848 0.545259963579148 0.619889410480321 0.514596020968001 0.55345791449267 0.658433720786117 0.628275585712577 0.703705376166457 0.746247657025891 0.650019383574573 0.678749793333 0.694784212730897 0.85891984357451 0.701044642267538 0.731616786185991 0.79004091039811 0.71419000634693 0.589071475907458 0.6041813253885 0.584480238627684 0.747120312482333 0 scikit-bio-0.6.2/skbio/stats/distance/tests/data/bioenv_exp_results_vegan.txt000066400000000000000000000003441464262511300275330ustar00rootroot00000000000000vars size correlation P 1 0.25163022609618835 P, Al 2 0.4003778484896049 P, Ca, Al 3 0.4004805867496183 P, Ca, pH, Al 4 0.3618749732452448 log(N), P, Ca, pH, Al 5 0.3215524892624249 log(N), P, K, Ca, pH, Al 6 0.2821814757209515 scikit-bio-0.6.2/skbio/stats/distance/tests/data/df.txt000066400000000000000000000012041464262511300230210ustar00rootroot00000000000000#SampleID TOT_ORG_CARB SILT_CLAY ELEVATION SOIL_MOISTURE_DEFICIT CARB_NITRO_RATIO ANNUAL_SEASON_TEMP ANNUAL_SEASON_PRECPT PH CMIN_RATE LONGITUDE LATITUDE CA1.141704 16.7 73 2003 198 13 10.3 400 7.27 2.276 -111.7666667 36.05 MT2.141698 39.1 35 1000 70 23.087 7 450 6.66 19.7 -114 46.8 CO2.141657 18.1 24 2400 104 31.8 6.1 350 5.68 9.223 -105.3333333 40.58333333 TL3.141709 53.9 52 894 -212 24.6 -9.3 400 4.23 16.456 -149.5833333 68.63333333 SN3.141650 16.6 20 3000 -252 13.9 3.6 600 5.74 6.289 -118.1666667 36.45 ExtraSample 42.0 42 -42 breh 3.6 602 1.74 0.289 -122.7 36.45 BB2.141659 52.2 44 400 -680 21.4 6.1 1200 4.6 2.223 -68.1 44.86666667 scikit-bio-0.6.2/skbio/stats/distance/tests/data/df_extra_column.txt000066400000000000000000000012701464262511300256040ustar00rootroot00000000000000#SampleID SILT_CLAY ELEVATION ExtraColumn SOIL_MOISTURE_DEFICIT ANNUAL_SEASON_TEMP CARB_NITRO_RATIO ANNUAL_SEASON_PRECPT PH CMIN_RATE LONGITUDE LATITUDE TOT_ORG_CARB TL3.141709 52 894 42 -212 -9.3 24.6 400 4.23 16.456 -149.5833333 68.63333333 53.9 MT2.141698 35 1000 column 70 7 23.087 450 6.66 19.7 -114 46.8 39.1 CO2.141657 24 2400 with 104 6.1 31.8 350 5.68 9.223 -105.3333333 40.58333333 18.1 ExtraSample 42 some -42 3.6 breh 602 1.74 0.289 -122.7 36.45 42.0 SN3.141650 20 3000 really -252 3.6 13.9 600 5.74 6.289 -118.1666667 36.45 16.6 BB2.141659 44 400 invalid -680 6.1 21.4 1200 4.6 2.223 -68.1 44.86666667 52.2 CA1.141704 73 2003 data 198 10.3 13 400 7.27 2.276 -111.7666667 36.05 16.7 scikit-bio-0.6.2/skbio/stats/distance/tests/data/dm.txt000066400000000000000000000011371464262511300230350ustar00rootroot00000000000000 MT2.141698 CA1.141704 BB2.141659 CO2.141657 TL3.141709 SN3.141650 MT2.141698 0.0 0.623818643706 0.750015427505 0.585201193913 0.729023583672 0.622135587669 CA1.141704 0.623818643706 0.0 0.774881224555 0.649822398416 0.777203137034 0.629507320436 BB2.141659 0.750015427505 0.774881224555 0.0 0.688845424001 0.567470311282 0.721707516043 CO2.141657 0.585201193913 0.649822398416 0.688845424001 0.0 0.658853575764 0.661223617505 TL3.141709 0.729023583672 0.777203137034 0.567470311282 0.658853575764 0.0 0.711173405838 SN3.141650 0.622135587669 0.629507320436 0.721707516043 0.661223617505 0.711173405838 0.0 scikit-bio-0.6.2/skbio/stats/distance/tests/data/dm2.txt000066400000000000000000000011371464262511300231170ustar00rootroot00000000000000 MT2.141698 CA1.141704 BB2.141659 CO2.141657 TL3.141709 SN3.141650 MT2.141698 0.0 0.623818643706 0.750015427505 0.585201193913 0.729023583672 0.822135587669 CA1.141704 0.623818643706 0.0 0.974881224555 0.649822398416 0.737203137034 0.629507320436 BB2.141659 0.750015427505 0.974881224555 0.0 0.688845424001 0.567470311282 0.721707516043 CO2.141657 0.585201193913 0.649822398416 0.688845424001 0.0 0.658853575764 0.661223617505 TL3.141709 0.729023583672 0.737203137034 0.567470311282 0.658853575764 0.0 0.711173405838 SN3.141650 0.822135587669 0.629507320436 0.721707516043 0.661223617505 0.711173405838 0.0 scikit-bio-0.6.2/skbio/stats/distance/tests/data/dm3.txt000066400000000000000000000011371464262511300231200ustar00rootroot00000000000000 MT2.141698 CA1.141704 BB2.141659 CO2.141657 TL3.141709 SN3.141650 MT2.141698 0.0 0.523818643706 0.750015427505 0.585201193913 0.729023583672 0.622135587669 CA1.141704 0.523818643706 0.0 0.774881224555 0.649822398416 0.777203137034 0.629507320436 BB2.141659 0.750015427505 0.774881224555 0.0 0.687745424001 0.567470311282 0.721707516043 CO2.141657 0.585201193913 0.649822398416 0.687745424001 0.0 0.658853575764 0.661223617505 TL3.141709 0.729023583672 0.777203137034 0.567470311282 0.658853575764 0.0 0.781173405838 SN3.141650 0.622135587669 0.629507320436 0.721707516043 0.661223617505 0.781173405838 0.0 scikit-bio-0.6.2/skbio/stats/distance/tests/data/dm4.txt000066400000000000000000000011371464262511300231210ustar00rootroot00000000000000 MT2.141698 CA1.141704 BB2.141659 CO2.141657 TL3.141709 SN3.141650 MT2.141698 0.0 0.623818643706 0.750015427505 0.585201193913 0.729023583672 0.652135587669 CA1.141704 0.623818643706 0.0 0.974881224555 0.649822398416 0.737203137034 0.629507320436 BB2.141659 0.750015427505 0.974881224555 0.0 0.688845424001 0.657470311282 0.721707516043 CO2.141657 0.585201193913 0.649822398416 0.688845424001 0.0 0.658853575764 0.661223617505 TL3.141709 0.729023583672 0.737203137034 0.657470311282 0.658853575764 0.0 0.711173405838 SN3.141650 0.652135587669 0.629507320436 0.721707516043 0.661223617505 0.711173405838 0.0 scikit-bio-0.6.2/skbio/stats/distance/tests/data/dm_reordered.txt000066400000000000000000000011371464262511300250700ustar00rootroot00000000000000 MT2.141698 CO2.141657 BB2.141659 CA1.141704 TL3.141709 SN3.141650 MT2.141698 0.0 0.585201193913 0.750015427505 0.623818643706 0.729023583672 0.622135587669 CO2.141657 0.585201193913 0.0 0.688845424001 0.649822398416 0.658853575764 0.661223617505 BB2.141659 0.750015427505 0.688845424001 0.0 0.774881224555 0.567470311282 0.721707516043 CA1.141704 0.623818643706 0.649822398416 0.774881224555 0.0 0.777203137034 0.629507320436 TL3.141709 0.729023583672 0.658853575764 0.567470311282 0.777203137034 0.0 0.711173405838 SN3.141650 0.622135587669 0.661223617505 0.721707516043 0.629507320436 0.711173405838 0.0 scikit-bio-0.6.2/skbio/stats/distance/tests/data/exp_results.txt000066400000000000000000000022251464262511300250110ustar00rootroot00000000000000vars size correlation PH 1 0.75 SOIL_MOISTURE_DEFICIT, PH 2 0.7464285714285714 SOIL_MOISTURE_DEFICIT, CARB_NITRO_RATIO, PH 3 0.8107142857142857 TOT_ORG_CARB, SOIL_MOISTURE_DEFICIT, CARB_NITRO_RATIO, PH 4 0.8107142857142857 ELEVATION, SOIL_MOISTURE_DEFICIT, CARB_NITRO_RATIO, ANNUAL_SEASON_TEMP, PH 5 0.7892857142857144 TOT_ORG_CARB, ELEVATION, SOIL_MOISTURE_DEFICIT, CARB_NITRO_RATIO, ANNUAL_SEASON_TEMP, PH 6 0.775 TOT_ORG_CARB, ELEVATION, SOIL_MOISTURE_DEFICIT, CARB_NITRO_RATIO, ANNUAL_SEASON_TEMP, ANNUAL_SEASON_PRECPT, PH 7 0.7285714285714285 TOT_ORG_CARB, ELEVATION, SOIL_MOISTURE_DEFICIT, CARB_NITRO_RATIO, ANNUAL_SEASON_TEMP, ANNUAL_SEASON_PRECPT, PH, CMIN_RATE 8 0.675 TOT_ORG_CARB, SILT_CLAY, ELEVATION, SOIL_MOISTURE_DEFICIT, CARB_NITRO_RATIO, ANNUAL_SEASON_TEMP, ANNUAL_SEASON_PRECPT, PH, CMIN_RATE 9 0.6392857142857143 TOT_ORG_CARB, SILT_CLAY, ELEVATION, SOIL_MOISTURE_DEFICIT, CARB_NITRO_RATIO, ANNUAL_SEASON_TEMP, ANNUAL_SEASON_PRECPT, PH, CMIN_RATE, LATITUDE 10 0.6071428571428571 TOT_ORG_CARB, SILT_CLAY, ELEVATION, SOIL_MOISTURE_DEFICIT, CARB_NITRO_RATIO, ANNUAL_SEASON_TEMP, ANNUAL_SEASON_PRECPT, PH, CMIN_RATE, LONGITUDE, LATITUDE 11 0.5392857142857144 scikit-bio-0.6.2/skbio/stats/distance/tests/data/exp_results_different_column_order.txt000066400000000000000000000022251464262511300316070ustar00rootroot00000000000000vars size correlation PH 1 0.75 PH, SOIL_MOISTURE_DEFICIT 2 0.7464285714285714 PH, CARB_NITRO_RATIO, SOIL_MOISTURE_DEFICIT 3 0.8107142857142857 PH, CARB_NITRO_RATIO, SOIL_MOISTURE_DEFICIT, TOT_ORG_CARB 4 0.8107142857142857 PH, ANNUAL_SEASON_TEMP, CARB_NITRO_RATIO, SOIL_MOISTURE_DEFICIT, ELEVATION 5 0.7892857142857144 PH, ANNUAL_SEASON_TEMP, CARB_NITRO_RATIO, SOIL_MOISTURE_DEFICIT, ELEVATION, TOT_ORG_CARB 6 0.775 PH, ANNUAL_SEASON_PRECPT, ANNUAL_SEASON_TEMP, CARB_NITRO_RATIO, SOIL_MOISTURE_DEFICIT, ELEVATION, TOT_ORG_CARB 7 0.7285714285714285 CMIN_RATE, PH, ANNUAL_SEASON_PRECPT, ANNUAL_SEASON_TEMP, CARB_NITRO_RATIO, SOIL_MOISTURE_DEFICIT, ELEVATION, TOT_ORG_CARB 8 0.675 CMIN_RATE, PH, ANNUAL_SEASON_PRECPT, ANNUAL_SEASON_TEMP, CARB_NITRO_RATIO, SOIL_MOISTURE_DEFICIT, ELEVATION, SILT_CLAY, TOT_ORG_CARB 9 0.6392857142857143 LATITUDE, CMIN_RATE, PH, ANNUAL_SEASON_PRECPT, ANNUAL_SEASON_TEMP, CARB_NITRO_RATIO, SOIL_MOISTURE_DEFICIT, ELEVATION, SILT_CLAY, TOT_ORG_CARB 10 0.6071428571428571 LATITUDE, LONGITUDE, CMIN_RATE, PH, ANNUAL_SEASON_PRECPT, ANNUAL_SEASON_TEMP, CARB_NITRO_RATIO, SOIL_MOISTURE_DEFICIT, ELEVATION, SILT_CLAY, TOT_ORG_CARB 11 0.5392857142857144 scikit-bio-0.6.2/skbio/stats/distance/tests/data/exp_results_single_column.txt000066400000000000000000000000401464262511300277200ustar00rootroot00000000000000vars size correlation PH 1 0.75 scikit-bio-0.6.2/skbio/stats/distance/tests/data/frameSeries_dm.tsv000066400000000000000000001035771464262511300253720ustar00rootroot00000000000000 zr5156_10V3V4_R1 zr5156_11V3V4_R1 zr5156_12V3V4_R1 zr5156_13V3V4_R1 zr5156_14V3V4_R1 zr5156_15V3V4_R1 zr5156_16V3V4_R1 zr5156_17V3V4_R1 zr5156_18V3V4_R1 zr5156_19V3V4_R1 zr5156_1V3V4_R1 zr5156_20V3V4_R1 zr5156_21V3V4_R1 zr5156_22V3V4_R1 zr5156_23V3V4_R1 zr5156_24V3V4_R1 zr5156_25V3V4_R1 zr5156_26V3V4_R1 zr5156_27V3V4_R1 zr5156_28V3V4_R1 zr5156_29V3V4_R1 zr5156_2V3V4_R1 zr5156_30V3V4_R1 zr5156_31V3V4_R1 zr5156_32V3V4_R1 zr5156_33V3V4_R1 zr5156_34V3V4_R1 zr5156_35V3V4_R1 zr5156_36V3V4_R1 zr5156_37V3V4_R1 zr5156_38V3V4_R1 zr5156_39V3V4_R1 zr5156_3V3V4_R1 zr5156_40V3V4_R1 zr5156_41V3V4_R1 zr5156_42V3V4_R1 zr5156_4V3V4_R1 zr5156_5V3V4_R1 zr5156_6V3V4_R1 zr5156_7V3V4_R1 zr5156_8V3V4_R1 zr5156_9V3V4_R1 zr5156_10V3V4_R1 0.0 0.1262357062761839 0.1193865365457798 0.08369713736578445 0.1659985845782383 0.2413835508626568 0.09222586294019872 0.1332458611895229 0.1530725883054793 0.1030425072609837 0.1507649170302824 0.1787555671808348 0.1597616025172856 0.1898693942662075 0.1360359317680881 0.1049860511593583 0.1746896194906567 0.161129211588336 0.165837544051112 0.1752147914800797 0.1480519516874988 0.1112465247679185 0.1692371646842593 0.1252088150316864 0.08268117179980342 0.1742274829101774 0.1496018871532361 0.1500606848826225 0.0912813927832417 0.144559101309172 0.05810838197701153 0.1171949649446682 0.1380765955687044 0.1182590284387932 0.1356949184522072 0.1492389709105595 0.1719400665956264 0.1291549435784331 0.1468766374442677 0.1091956031010886 0.1143689310080433 0.09455770835222979 zr5156_11V3V4_R1 0.1262357062761839 0.0 0.1051514077284487 0.1436161968850221 0.2326146299642306 0.3219897477207319 0.1306845763682954 0.1726909080063596 0.2094546841590949 0.1785200181052513 0.1752774412896001 0.2154764037924464 0.1105956384726629 0.2579305923909834 0.159789071992378 0.1401926653555514 0.253467238411307 0.1946535754346474 0.1888771836724367 0.2275422217301586 0.2115565064058448 0.1783473469005498 0.2007589770499096 0.2181750700928601 0.1577708345699125 0.2414698314172274 0.1670926567844305 0.1808713654953637 0.1546989219851927 0.1088772819983929 0.1167616325358589 0.1891301816506275 0.1775778470758768 0.1349910326716275 0.1588695258762032 0.1882842047156271 0.1991533541817128 0.1721427946534862 0.2244956129113895 0.1670845888464353 0.2052035117066867 0.1903787489339654 zr5156_12V3V4_R1 0.1193865365457798 0.1051514077284487 0.0 0.1093122761611469 0.1987041345338616 0.2777605474410032 0.09575160285950673 0.1403344910533776 0.1770438457407355 0.1400693448388553 0.1364153324857176 0.1870373808998719 0.09101061477732886 0.2260548001840255 0.1172769090821173 0.1072421916820582 0.2087469669032011 0.1733522824178857 0.1696871005126365 0.187931858353296 0.162882215924352 0.1553874410694003 0.1764760473412724 0.167489650486269 0.1519273675868082 0.1973084855519143 0.1480477158740635 0.1431819486645803 0.1069299038720177 0.1332297613857648 0.1091566636633538 0.1492424223061563 0.152491287745897 0.1083567819591257 0.1427214101618832 0.1611511306235009 0.1595761509757549 0.1500966809337344 0.1781044627881863 0.1159089070519137 0.1687749540518114 0.1445521299410403 zr5156_13V3V4_R1 0.08369713736578445 0.1436161968850221 0.1093122761611469 0.0 0.2099081302071139 0.2547902145261156 0.1183371783778977 0.1595841200777173 0.1877367852297378 0.1389454310660076 0.1728683959814479 0.2188698322004292 0.1554574052267208 0.2346773645328648 0.1586484644018412 0.1308941061459895 0.2012966580779219 0.1917774480379034 0.1982756055583835 0.1921503882730374 0.1557335645633633 0.1629089780904155 0.2032389001242923 0.131159826282788 0.1012771564034674 0.1973138613804784 0.1758042169438702 0.1815975767536929 0.1326513138992096 0.1166921858969914 0.100028511360236 0.1468560396558712 0.1715386724734005 0.1460793979744434 0.1844397746403332 0.1851404905894722 0.2035008838651794 0.1756409562316918 0.1844940306823346 0.1310043927080653 0.1274660625319308 0.1210815382689097 zr5156_14V3V4_R1 0.1659985845782383 0.2326146299642306 0.1987041345338616 0.2099081302071139 0.0 0.1368761603689816 0.1549363528889497 0.1026266319009563 0.06375393548141987 0.1061964577740717 0.1213465082848441 0.07231770914919781 0.2107012493298301 0.057900260661145 0.1424163593326227 0.1471611534836962 0.04812334702685623 0.09078969190065313 0.1086189565288966 0.05872593113570378 0.09151669454001352 0.07821269786940772 0.06996212339862032 0.1368410645527744 0.1756972879054398 0.04427944492288369 0.1206901971025525 0.1140531027519637 0.1152803819279663 0.2484366499552812 0.1479235418939771 0.09430963935200642 0.1121609301391732 0.1397797338573526 0.102358713414537 0.08604434738251315 0.09680238766551985 0.07981215038454734 0.05161595501220772 0.1309676378366985 0.1736693926661831 0.1278738876176231 zr5156_15V3V4_R1 0.2413835508626568 0.3219897477207319 0.2777605474410032 0.2547902145261156 0.1368761603689816 0.0 0.2420450180602975 0.1907140876094187 0.1640753724994711 0.195309117326092 0.2109650211311947 0.1624040520528475 0.2960016006630153 0.1332445557925075 0.2191940344010214 0.2278414102839472 0.1372424420935046 0.1841649631042498 0.2074798245699567 0.1418995799966432 0.1590265675013176 0.1841790746735276 0.1650967973752359 0.1607704526607447 0.2343947977674807 0.1448627995068159 0.2077583468932165 0.202723110815676 0.2119801976204397 0.328921866244096 0.2413729053962549 0.1932601053934642 0.2116749344567443 0.2343824623829839 0.2084371690181891 0.190025213701608 0.184979573702458 0.1814099370308643 0.147337106494635 0.2100482721957215 0.2008409170468961 0.1876003493219673 zr5156_16V3V4_R1 0.09222586294019872 0.1306845763682954 0.09575160285950673 0.1183371783778977 0.1549363528889497 0.2420450180602975 0.0 0.08263613637407852 0.1249797932713108 0.09870148787969783 0.102176599081265 0.1365276515293163 0.116439510078064 0.1898848340513761 0.07786822802993801 0.05401145288005917 0.1608441923525799 0.12045107182917 0.1213421821687465 0.148512405859414 0.1289832913735298 0.1095169693535252 0.1245630108816215 0.1327209909999326 0.1116237507126911 0.1555075513525551 0.1044792964275772 0.1096892560630082 0.07831354584298185 0.1573140686222914 0.08551518076451084 0.1061046095352267 0.09936325979161925 0.07361150317912567 0.1107051637876943 0.1058724170650332 0.1228288320488214 0.1037345909087888 0.1342508169187087 0.07394194432267176 0.1357310753392193 0.09874232058267786 zr5156_17V3V4_R1 0.1332458611895229 0.1726909080063596 0.1403344910533776 0.1595841200777173 0.1026266319009563 0.1907140876094187 0.08263613637407852 0.0 0.08303520928891407 0.06985979390369568 0.07174115638796125 0.08057183456765618 0.1429028601943221 0.1329634637216814 0.0740927879837481 0.07418478958392143 0.1100300707087181 0.07008865816197911 0.07929519403596762 0.09201463976355255 0.08314086994757977 0.07587333147399185 0.06920007949144595 0.112561577121862 0.1351865948955731 0.1001349594748285 0.05998740222523407 0.06601518804953017 0.07115231158863734 0.2064501976046656 0.1229157997755125 0.07753559725691035 0.06881803748322107 0.0820846987106403 0.06395163415235482 0.07172432182235257 0.07559215505254606 0.06027907145870632 0.07871286915773762 0.07430033371847444 0.1330161099409772 0.08857449130841386 zr5156_18V3V4_R1 0.1530725883054793 0.2094546841590949 0.1770438457407355 0.1877367852297378 0.06375393548141987 0.1640753724994711 0.1249797932713108 0.08303520928891407 0.0 0.0832255363968509 0.09796527139450667 0.05608936279497666 0.1755865429819896 0.08740310762140162 0.1193713860376948 0.1145587407339521 0.07865635506232996 0.07683918177728462 0.08384703972285515 0.0654764926759044 0.08287220015736114 0.06969073853841316 0.06692467160975255 0.1238846903158282 0.167732914442687 0.07224645743270168 0.1030843783407304 0.08225899574587957 0.09645584208844683 0.222334873242736 0.1466127868580658 0.07222266017834206 0.09172516908628203 0.1162768066095267 0.07905265996221633 0.0567104313060754 0.07543050314656505 0.0660325232357851 0.05072631112103963 0.09949584542379035 0.1522582519137235 0.1107092109216859 zr5156_19V3V4_R1 0.1030425072609837 0.1785200181052513 0.1400693448388553 0.1389454310660076 0.1061964577740717 0.195309117326092 0.09870148787969783 0.06985979390369568 0.0832255363968509 0.0 0.09753610290549938 0.1017346420142612 0.1644783872622975 0.1251234274252112 0.1115062704977202 0.08954636025289847 0.1098613401946223 0.09871639363275223 0.1028380618525378 0.1084763047550623 0.08667891705872721 0.0609989726364468 0.1037837460743364 0.09207793857497111 0.1192469688205176 0.1093112721543453 0.08860707657380694 0.09312238939191526 0.06703878855206988 0.1785201686029534 0.1013136603848457 0.05611783879110421 0.07991271618393228 0.1016673356245131 0.07696853985358303 0.08080498078476354 0.1052781884103818 0.07799275478850214 0.08496026819850905 0.0787607762755338 0.1123099056561567 0.070619182570014 zr5156_1V3V4_R1 0.1507649170302824 0.1752774412896001 0.1364153324857176 0.1728683959814479 0.1213465082848441 0.2109650211311947 0.102176599081265 0.07174115638796125 0.09796527139450667 0.09753610290549938 0.0 0.1058795883794136 0.1265643434969865 0.155743644118801 0.06725291003387059 0.09267213929408216 0.1381552217190008 0.09799732995730347 0.09099430012236363 0.1143484501154464 0.1103350912367392 0.09941983080123719 0.086317365683536 0.1389912666262949 0.1561556350671514 0.1248455745183124 0.06899021103431674 0.04491924818433251 0.08853138818432028 0.2150451203669508 0.141020216879513 0.09776379105212425 0.08602353922419155 0.07643882594134431 0.06559889924995091 0.08577151478557646 0.06141240424591926 0.08088966140739555 0.1147481134136431 0.08839165726498709 0.1473847349914805 0.1081185183786368 zr5156_20V3V4_R1 0.1787555671808348 0.2154764037924464 0.1870373808998719 0.2188698322004292 0.07231770914919781 0.1624040520528475 0.1365276515293163 0.08057183456765618 0.05608936279497666 0.1017346420142612 0.1058795883794136 0.0 0.1856215824235532 0.08124894543727004 0.1203346033797456 0.1210423617877242 0.08678691470162736 0.07878321710685555 0.08251367293404212 0.07431170062957689 0.1012643196396393 0.09394507079725747 0.05876448547475446 0.1413412612940679 0.1841899618245461 0.07403227741159515 0.1010815612105938 0.08596708777539988 0.1196311645508068 0.2561636268431229 0.1686669900374906 0.1038716112494349 0.104780687821961 0.121413112290244 0.08363180629625183 0.0738963402814959 0.06333925762127142 0.08108268378100926 0.06524006389939632 0.1262928005887744 0.1813701927393017 0.1383471986853116 zr5156_21V3V4_R1 0.1597616025172856 0.1105956384726629 0.09101061477732886 0.1554574052267208 0.2107012493298301 0.2960016006630153 0.116439510078064 0.1429028601943221 0.1755865429819896 0.1644783872622975 0.1265643434969865 0.1856215824235532 0.0 0.2422305212056287 0.1191151038625533 0.1169683527831099 0.2226713215858389 0.1725616625016525 0.1674819007527343 0.2000242854405594 0.1893928451337126 0.1651755734154186 0.1696921049902433 0.2038597221154419 0.1761684912419932 0.2128256620069059 0.1444194353155745 0.1334576325902176 0.1363377877720563 0.1545135185562693 0.1363237846992682 0.1673044393176452 0.1526877722173743 0.105266118247574 0.1397791086721317 0.1602177138243876 0.146685116887321 0.1591529090740322 0.1936066013830202 0.1364861137771962 0.2008336857804045 0.1725630164924993 zr5156_22V3V4_R1 0.1898693942662075 0.2579305923909834 0.2260548001840255 0.2346773645328648 0.057900260661145 0.1332445557925075 0.1898848340513761 0.1329634637216814 0.08740310762140162 0.1251234274252112 0.155743644118801 0.08124894543727004 0.2422305212056287 0.0 0.1755267816641163 0.1756754639634829 0.073606405409486 0.1180374369948224 0.1235755058514064 0.08494248874020731 0.1174110944530593 0.1123675569281386 0.1074794677126668 0.1595554892825317 0.1984663371830582 0.06210691037205323 0.1481196194399604 0.1388728467906852 0.1418026627986534 0.2768515563562317 0.1825557453516704 0.1196410278389074 0.1409395006935493 0.1669410671259341 0.1214607606546082 0.110851678484599 0.1206105207768592 0.1122631081988461 0.08220976258824157 0.1646064624803332 0.2042014882530901 0.1646434831272248 zr5156_23V3V4_R1 0.1360359317680881 0.159789071992378 0.1172769090821173 0.1586484644018412 0.1424163593326227 0.2191940344010214 0.07786822802993801 0.0740927879837481 0.1193713860376948 0.1115062704977202 0.06725291003387059 0.1203346033797456 0.1191151038625533 0.1755267816641163 0.0 0.06531911833602928 0.1447300758373689 0.1045145823019871 0.1027061087038626 0.1327128900376398 0.1255703871944387 0.1152778230783219 0.1060029076129174 0.1370131392756702 0.137458916857107 0.1438363335802246 0.07624260366493245 0.07524276786752448 0.08764847433444532 0.1967450721390073 0.1191018090830029 0.1151892353153167 0.09909052502969176 0.05931195788646768 0.08339582346188876 0.09811943883304841 0.08949239010667165 0.09259485726203597 0.1209557771688734 0.08013308923594474 0.1365335693351368 0.110998504901641 zr5156_24V3V4_R1 0.1049860511593583 0.1401926653555514 0.1072421916820582 0.1308941061459895 0.1471611534836962 0.2278414102839472 0.05401145288005917 0.07418478958392143 0.1145587407339521 0.08954636025289847 0.09267213929408216 0.1210423617877242 0.1169683527831099 0.1756754639634829 0.06531911833602928 0.0 0.1502832416013179 0.1067710181941987 0.1061290633013017 0.1379781501203592 0.12515638131281 0.09913849387830026 0.1164588445229939 0.1317057656664396 0.1169260600375063 0.1462928674062135 0.09185636105489747 0.08804859487181674 0.07108646529012899 0.1679802590670332 0.09921407494202536 0.09812719761369507 0.08820298476069452 0.06010303997262437 0.09020751232207111 0.09837968550510195 0.1104348923324209 0.09720692343733198 0.1229139652860976 0.07079867089860219 0.1327854232072213 0.09746813415503264 zr5156_25V3V4_R1 0.1746896194906567 0.253467238411307 0.2087469669032011 0.2012966580779219 0.04812334702685623 0.1372424420935046 0.1608441923525799 0.1100300707087181 0.07865635506232996 0.1098613401946223 0.1381552217190008 0.08678691470162736 0.2226713215858389 0.073606405409486 0.1447300758373689 0.1502832416013179 0.0 0.1043792539148186 0.1236315321827428 0.0709620550790759 0.08794307691682986 0.09904525329564624 0.0946654938685631 0.1302099875593525 0.1760172303189602 0.05135724925564918 0.1329067510430049 0.1281675140352214 0.1339038795483324 0.2533810718223353 0.1661064734830484 0.1052777804781497 0.1313728749046526 0.1615327641223157 0.1288630226916564 0.1074080498446083 0.1119841331692077 0.1029044559270918 0.05698861093521006 0.1373696060893588 0.1707707847040014 0.133037686536992 zr5156_26V3V4_R1 0.161129211588336 0.1946535754346474 0.1733522824178857 0.1917774480379034 0.09078969190065313 0.1841649631042498 0.12045107182917 0.07008865816197911 0.07683918177728462 0.09871639363275223 0.09799732995730347 0.07878321710685555 0.1725616625016525 0.1180374369948224 0.1045145823019871 0.1067710181941987 0.1043792539148186 0.0 0.04443749532122632 0.07526343284271764 0.08130402562282243 0.07338911020025321 0.06687342391905679 0.1234011810329324 0.125784220632367 0.09212529243856292 0.05543553476911059 0.08517475025845339 0.1005496762930421 0.2344744444268831 0.1436209415497395 0.08525782962378581 0.06135349734319131 0.09851630396881918 0.07490410326865986 0.06042391936866871 0.0709047818626396 0.0567232774169169 0.08118965111940799 0.1105407079365012 0.1642047146947 0.1225829219859804 zr5156_27V3V4_R1 0.165837544051112 0.1888771836724367 0.1696871005126365 0.1982756055583835 0.1086189565288966 0.2074798245699567 0.1213421821687465 0.07929519403596762 0.08384703972285515 0.1028380618525378 0.09099430012236363 0.08251367293404212 0.1674819007527343 0.1235755058514064 0.1027061087038626 0.1061290633013017 0.1236315321827428 0.04443749532122632 0.0 0.09843983697412532 0.1041635308603416 0.08500801186774593 0.0790653247825966 0.1420473050122881 0.1370681319561848 0.1092685826269802 0.04868840896146547 0.08040003540536432 0.104127688547458 0.2377777064433007 0.1572225496111248 0.08756271162253361 0.05713884128694471 0.09426314487323312 0.06132904768143902 0.0632648239213944 0.07590344590544494 0.07088252545912956 0.1043985706380321 0.116331928374334 0.1773828114798063 0.1316397181630972 zr5156_28V3V4_R1 0.1752147914800797 0.2275422217301586 0.187931858353296 0.1921503882730374 0.05872593113570378 0.1418995799966432 0.148512405859414 0.09201463976355255 0.0654764926759044 0.1084763047550623 0.1143484501154464 0.07431170062957689 0.2000242854405594 0.08494248874020731 0.1327128900376398 0.1379781501203592 0.0709620550790759 0.07526343284271764 0.09843983697412532 0.0 0.05864280757027062 0.09013980716622781 0.06596833655200689 0.1162127451167597 0.1607511973768047 0.05341404063172048 0.101482891446088 0.1117912976759633 0.1127973840937417 0.2399144610940579 0.1602139154980649 0.08871939095328259 0.1051599736026621 0.1264384414435232 0.1034236138857928 0.08663455480160505 0.09383929438294789 0.08360393754612507 0.05782839463975919 0.124745846611633 0.1693508749517296 0.1323556664653223 zr5156_29V3V4_R1 0.1480519516874988 0.2115565064058448 0.162882215924352 0.1557335645633633 0.09151669454001352 0.1590265675013176 0.1289832913735298 0.08314086994757977 0.08287220015736114 0.08667891705872721 0.1103350912367392 0.1012643196396393 0.1893928451337126 0.1174110944530593 0.1255703871944387 0.12515638131281 0.08794307691682986 0.08130402562282243 0.1041635308603416 0.05864280757027062 0.0 0.07630875662648294 0.07919833370291939 0.08696090717071786 0.1376305771883671 0.0808309425513034 0.09510023724624679 0.1132338807012602 0.09791462900397319 0.2131426008999406 0.1367106274269204 0.06827977668864908 0.1034362939298921 0.1202407917718846 0.1096511231485762 0.08790458984672868 0.1023554591348991 0.08145892335326672 0.06929952689801679 0.1040491012469253 0.1377790116599498 0.1084175346524984 zr5156_2V3V4_R1 0.1112465247679185 0.1783473469005498 0.1553874410694003 0.1629089780904155 0.07821269786940772 0.1841790746735276 0.1095169693535252 0.07587333147399185 0.06969073853841316 0.0609989726364468 0.09941983080123719 0.09394507079725747 0.1651755734154186 0.1123675569281386 0.1152778230783219 0.09913849387830026 0.09904525329564624 0.07338911020025321 0.08500801186774593 0.09013980716622781 0.07630875662648294 0.0 0.08687578609232423 0.1009315579237222 0.1326620868190357 0.1012494507527034 0.08844108777355007 0.09186086909218714 0.07272671088484615 0.2040286684103269 0.1013769576742951 0.05531367056656115 0.07350886607183753 0.1034727958228171 0.07613494436349319 0.0674369517807583 0.0789257782507407 0.0420327233925696 0.0698463677619778 0.08622211236971568 0.1277098088342294 0.08309564574942276 zr5156_30V3V4_R1 0.1692371646842593 0.2007589770499096 0.1764760473412724 0.2032389001242923 0.06996212339862032 0.1650967973752359 0.1245630108816215 0.06920007949144595 0.06692467160975255 0.1037837460743364 0.086317365683536 0.05876448547475446 0.1696921049902433 0.1074794677126668 0.1060029076129174 0.1164588445229939 0.0946654938685631 0.06687342391905679 0.0790653247825966 0.06596833655200689 0.07919833370291939 0.08687578609232423 0.0 0.1238984522112872 0.1644876473395898 0.08307955480904008 0.08343015904621844 0.08490608946949572 0.1068360768335047 0.235647394116081 0.1486211714669667 0.09084471310296907 0.09160424651084875 0.1027820712620189 0.07592115588638786 0.06792796390297036 0.06625077353928581 0.0644417469616961 0.0714730621416056 0.1156778361865623 0.165536508403527 0.1245190784169049 zr5156_31V3V4_R1 0.1252088150316864 0.2181750700928601 0.167489650486269 0.131159826282788 0.1368410645527744 0.1607704526607447 0.1327209909999326 0.112561577121862 0.1238846903158282 0.09207793857497111 0.1389912666262949 0.1413412612940679 0.2038597221154419 0.1595554892825317 0.1370131392756702 0.1317057656664396 0.1302099875593525 0.1234011810329324 0.1420473050122881 0.1162127451167597 0.08696090717071786 0.1009315579237222 0.1238984522112872 0.0 0.1240010381044835 0.1373375601400495 0.1299328250599692 0.1388719144556062 0.1146529538980112 0.2028384589348753 0.1337494407386485 0.0979591241135474 0.134027827423915 0.1413658681223808 0.1360229809862496 0.116912696834271 0.1406802119790954 0.1094172671246521 0.1082413342006877 0.1060190194170001 0.08482542432750666 0.07455250371809519 zr5156_32V3V4_R1 0.08268117179980342 0.1577708345699125 0.1519273675868082 0.1012771564034674 0.1756972879054398 0.2343947977674807 0.1116237507126911 0.1351865948955731 0.167732914442687 0.1192469688205176 0.1561556350671514 0.1841899618245461 0.1761684912419932 0.1984663371830582 0.137458916857107 0.1169260600375063 0.1760172303189602 0.125784220632367 0.1370681319561848 0.1607511973768047 0.1376305771883671 0.1326620868190357 0.1644876473395898 0.1240010381044835 0.0 0.1774916894187259 0.1138407775271362 0.1686100181194675 0.1178957857066783 0.1690434364297126 0.08383457056928388 0.1333717746469479 0.1189228624913752 0.1206213225244056 0.1521659068500048 0.1549336344833185 0.1788617068567274 0.1471852797083001 0.1564186804877641 0.1280145613178406 0.1222913335609594 0.1108203144145568 zr5156_33V3V4_R1 0.1742274829101774 0.2414698314172274 0.1973084855519143 0.1973138613804784 0.04427944492288369 0.1448627995068159 0.1555075513525551 0.1001349594748285 0.07224645743270168 0.1093112721543453 0.1248455745183124 0.07403227741159515 0.2128256620069059 0.06210691037205323 0.1438363335802246 0.1462928674062135 0.05135724925564918 0.09212529243856292 0.1092685826269802 0.05341404063172048 0.0808309425513034 0.1012494507527034 0.08307955480904008 0.1373375601400495 0.1774916894187259 0.0 0.1187013834079619 0.1131705830938557 0.1192823377168671 0.2433961939289964 0.1675122668634584 0.0912750400418213 0.1174453107218831 0.1461492540159815 0.1157637817868849 0.09502672361548307 0.09987904226520145 0.09505323762032163 0.06074165405775561 0.1332955653137387 0.1790707952239549 0.138269372102398 zr5156_34V3V4_R1 0.1496018871532361 0.1670926567844305 0.1480477158740635 0.1758042169438702 0.1206901971025525 0.2077583468932165 0.1044792964275772 0.05998740222523407 0.1030843783407304 0.08860707657380694 0.06899021103431674 0.1010815612105938 0.1444194353155745 0.1481196194399604 0.07624260366493245 0.09185636105489747 0.1329067510430049 0.05543553476911059 0.04868840896146547 0.101482891446088 0.09510023724624679 0.08844108777355007 0.08343015904621844 0.1299328250599692 0.1138407775271362 0.1187013834079619 0.0 0.07253031963137985 0.08955207791470898 0.2209310967325532 0.1338806374398232 0.09196437930107422 0.05556145297834826 0.07373480062868552 0.0569631231348962 0.08129464309027375 0.08027885251518388 0.07336079523131014 0.1078632904903195 0.09944659213735112 0.1529778505728458 0.1133863079229603 zr5156_35V3V4_R1 0.1500606848826225 0.1808713654953637 0.1431819486645803 0.1815975767536929 0.1140531027519637 0.202723110815676 0.1096892560630082 0.06601518804953017 0.08225899574587957 0.09312238939191526 0.04491924818433251 0.08596708777539988 0.1334576325902176 0.1388728467906852 0.07524276786752448 0.08804859487181674 0.1281675140352214 0.08517475025845339 0.08040003540536432 0.1117912976759633 0.1132338807012602 0.09186086909218714 0.08490608946949572 0.1388719144556062 0.1686100181194675 0.1131705830938557 0.07253031963137985 0.0 0.08929317958961092 0.2205530923235972 0.148931361561393 0.08952210500788307 0.08116302034006075 0.0796186162812691 0.05635847656736118 0.0741297127837791 0.0559730905798909 0.07726305230758866 0.1035251812645182 0.09140527550336644 0.1547068425897874 0.1100166471315029 zr5156_36V3V4_R1 0.0912813927832417 0.1546989219851927 0.1069299038720177 0.1326513138992096 0.1152803819279663 0.2119801976204397 0.07831354584298185 0.07115231158863734 0.09645584208844683 0.06703878855206988 0.08853138818432028 0.1196311645508068 0.1363377877720563 0.1418026627986534 0.08764847433444532 0.07108646529012899 0.1339038795483324 0.1005496762930421 0.104127688547458 0.1127973840937417 0.09791462900397319 0.07272671088484615 0.1068360768335047 0.1146529538980112 0.1178957857066783 0.1192823377168671 0.08955207791470898 0.08929317958961092 0.0 0.1688428724669093 0.08794329654551179 0.06931938640236024 0.07379289887134234 0.07116679305509278 0.0728436935296748 0.08522666507916347 0.1044775375989856 0.0731090544920879 0.1010511511905997 0.05019906030334988 0.1157586709622803 0.0794395863043969 zr5156_37V3V4_R1 0.144559101309172 0.1088772819983929 0.1332297613857648 0.1166921858969914 0.2484366499552812 0.328921866244096 0.1573140686222914 0.2064501976046656 0.222334873242736 0.1785201686029534 0.2150451203669508 0.2561636268431229 0.1545135185562693 0.2768515563562317 0.1967450721390073 0.1679802590670332 0.2533810718223353 0.2344744444268831 0.2377777064433007 0.2399144610940579 0.2131426008999406 0.2040286684103269 0.235647394116081 0.2028384589348753 0.1690434364297126 0.2433961939289964 0.2209310967325532 0.2205530923235972 0.1688428724669093 0.0 0.1352007578348229 0.180999355245605 0.2115009040682549 0.1835499498731898 0.2132309477093111 0.2128737552097922 0.2468673382678022 0.2132182939304277 0.22535850721593 0.1682463117714778 0.1874393102781126 0.18003132611175 zr5156_38V3V4_R1 0.05810838197701153 0.1167616325358589 0.1091566636633538 0.100028511360236 0.1479235418939771 0.2413729053962549 0.08551518076451084 0.1229157997755125 0.1466127868580658 0.1013136603848457 0.141020216879513 0.1686669900374906 0.1363237846992682 0.1825557453516704 0.1191018090830029 0.09921407494202536 0.1661064734830484 0.1436209415497395 0.1572225496111248 0.1602139154980649 0.1367106274269204 0.1013769576742951 0.1486211714669667 0.1337494407386485 0.08383457056928388 0.1675122668634584 0.1338806374398232 0.148931361561393 0.08794329654551179 0.1352007578348229 0.0 0.1139162857600752 0.1258731045422016 0.1002566675964894 0.1233097624859415 0.1353396237386873 0.158419444876432 0.113078898847648 0.1318772629319251 0.1025817862547318 0.1188094866787605 0.1040218736336048 zr5156_39V3V4_R1 0.1171949649446682 0.1891301816506275 0.1492424223061563 0.1468560396558712 0.09430963935200642 0.1932601053934642 0.1061046095352267 0.07753559725691035 0.07222266017834206 0.05611783879110421 0.09776379105212425 0.1038716112494349 0.1673044393176452 0.1196410278389074 0.1151892353153167 0.09812719761369507 0.1052777804781497 0.08525782962378581 0.08756271162253361 0.08871939095328259 0.06827977668864908 0.05531367056656115 0.09084471310296907 0.0979591241135474 0.1333717746469479 0.0912750400418213 0.09196437930107422 0.08952210500788307 0.06931938640236024 0.180999355245605 0.1139162857600752 0.0 0.0793363748306886 0.09667707293725775 0.07500183477834294 0.05998668523430459 0.09603735891837441 0.06407993221728969 0.07364087102953018 0.0841988972355663 0.1253477686721348 0.08425718423704588 zr5156_3V3V4_R1 0.1380765955687044 0.1775778470758768 0.152491287745897 0.1715386724734005 0.1121609301391732 0.2116749344567443 0.09936325979161925 0.06881803748322107 0.09172516908628203 0.07991271618393228 0.08602353922419155 0.104780687821961 0.1526877722173743 0.1409395006935493 0.09909052502969176 0.08820298476069452 0.1313728749046526 0.06135349734319131 0.05713884128694471 0.1051599736026621 0.1034362939298921 0.07350886607183753 0.09160424651084875 0.134027827423915 0.1189228624913752 0.1174453107218831 0.05556145297834826 0.08116302034006075 0.07379289887134234 0.2115009040682549 0.1258731045422016 0.0793363748306886 0.0 0.08373480193011155 0.06327159323423624 0.07347913747915685 0.08887165680412651 0.0638807158669014 0.104625081999776 0.0867735093246497 0.1510897179571402 0.1039641165049727 zr5156_40V3V4_R1 0.1182590284387932 0.1349910326716275 0.1083567819591257 0.1460793979744434 0.1397797338573526 0.2343824623829839 0.07361150317912567 0.0820846987106403 0.1162768066095267 0.1016673356245131 0.07643882594134431 0.121413112290244 0.105266118247574 0.1669410671259341 0.05931195788646768 0.06010303997262437 0.1615327641223157 0.09851630396881918 0.09426314487323312 0.1264384414435232 0.1202407917718846 0.1034727958228171 0.1027820712620189 0.1413658681223808 0.1206213225244056 0.1461492540159815 0.07373480062868552 0.0796186162812691 0.07116679305509278 0.1835499498731898 0.1002566675964894 0.09667707293725775 0.08373480193011155 0.0 0.0646901863579918 0.0841188930266444 0.09307281344584185 0.08932489766976665 0.1277825808297082 0.07926549125591863 0.149082220904614 0.1114119040038509 zr5156_41V3V4_R1 0.1356949184522072 0.1588695258762032 0.1427214101618832 0.1844397746403332 0.102358713414537 0.2084371690181891 0.1107051637876943 0.06395163415235482 0.07905265996221633 0.07696853985358303 0.06559889924995091 0.08363180629625183 0.1397791086721317 0.1214607606546082 0.08339582346188876 0.09020751232207111 0.1288630226916564 0.07490410326865986 0.06132904768143902 0.1034236138857928 0.1096511231485762 0.07613494436349319 0.07592115588638786 0.1360229809862496 0.1521659068500048 0.1157637817868849 0.0569631231348962 0.05635847656736118 0.0728436935296748 0.2132309477093111 0.1233097624859415 0.07500183477834294 0.06327159323423624 0.0646901863579918 0.0 0.05346196830622852 0.06874532295201652 0.05704439051065374 0.09660602733837584 0.09668633348787234 0.1544663798090666 0.1111660506442167 zr5156_42V3V4_R1 0.1492389709105595 0.1882842047156271 0.1611511306235009 0.1851404905894722 0.08604434738251315 0.190025213701608 0.1058724170650332 0.07172432182235257 0.0567104313060754 0.08080498078476354 0.08577151478557646 0.0738963402814959 0.1602177138243876 0.110851678484599 0.09811943883304841 0.09837968550510195 0.1074080498446083 0.06042391936866871 0.0632648239213944 0.08663455480160505 0.08790458984672868 0.0674369517807583 0.06792796390297036 0.116912696834271 0.1549336344833185 0.09502672361548307 0.08129464309027375 0.0741297127837791 0.08522666507916347 0.2128737552097922 0.1353396237386873 0.05998668523430459 0.07347913747915685 0.0841188930266444 0.05346196830622852 0.0 0.07072936610639428 0.05044240463618335 0.0746529662660631 0.09413368674655981 0.148989690647545 0.104263380286519 zr5156_4V3V4_R1 0.1719400665956264 0.1991533541817128 0.1595761509757549 0.2035008838651794 0.09680238766551985 0.184979573702458 0.1228288320488214 0.07559215505254606 0.07543050314656505 0.1052781884103818 0.06141240424591926 0.06333925762127142 0.146685116887321 0.1206105207768592 0.08949239010667165 0.1104348923324209 0.1119841331692077 0.0709047818626396 0.07590344590544494 0.09383929438294789 0.1023554591348991 0.0789257782507407 0.06625077353928581 0.1406802119790954 0.1788617068567274 0.09987904226520145 0.08027885251518388 0.0559730905798909 0.1044775375989856 0.2468673382678022 0.158419444876432 0.09603735891837441 0.08887165680412651 0.09307281344584185 0.06874532295201652 0.07072936610639428 0.0 0.06182863394483688 0.08518471960424352 0.1128638042424907 0.1703999656298808 0.1262217229766436 zr5156_5V3V4_R1 0.1291549435784331 0.1721427946534862 0.1500966809337344 0.1756409562316918 0.07981215038454734 0.1814099370308643 0.1037345909087888 0.06027907145870632 0.0660325232357851 0.07799275478850214 0.08088966140739555 0.08108268378100926 0.1591529090740322 0.1122631081988461 0.09259485726203597 0.09720692343733198 0.1029044559270918 0.0567232774169169 0.07088252545912956 0.08360393754612507 0.08145892335326672 0.0420327233925696 0.0644417469616961 0.1094172671246521 0.1471852797083001 0.09505323762032163 0.07336079523131014 0.07726305230758866 0.0731090544920879 0.2132182939304277 0.113078898847648 0.06407993221728969 0.0638807158669014 0.08932489766976665 0.05704439051065374 0.05044240463618335 0.06182863394483688 0.0 0.07205782034366952 0.08667352924759916 0.1403661231953189 0.09708488328178215 zr5156_6V3V4_R1 0.1468766374442677 0.2244956129113895 0.1781044627881863 0.1844940306823346 0.05161595501220772 0.147337106494635 0.1342508169187087 0.07871286915773762 0.05072631112103963 0.08496026819850905 0.1147481134136431 0.06524006389939632 0.1936066013830202 0.08220976258824157 0.1209557771688734 0.1229139652860976 0.05698861093521006 0.08118965111940799 0.1043985706380321 0.05782839463975919 0.06929952689801679 0.0698463677619778 0.0714730621416056 0.1082413342006877 0.1564186804877641 0.06074165405775561 0.1078632904903195 0.1035251812645182 0.1010511511905997 0.22535850721593 0.1318772629319251 0.07364087102953018 0.104625081999776 0.1277825808297082 0.09660602733837584 0.0746529662660631 0.08518471960424352 0.07205782034366952 0.0 0.102948784071646 0.1398437820759513 0.1022986047425159 zr5156_7V3V4_R1 0.1091956031010886 0.1670845888464353 0.1159089070519137 0.1310043927080653 0.1309676378366985 0.2100482721957215 0.07394194432267176 0.07430033371847444 0.09949584542379035 0.0787607762755338 0.08839165726498709 0.1262928005887744 0.1364861137771962 0.1646064624803332 0.08013308923594474 0.07079867089860219 0.1373696060893588 0.1105407079365012 0.116331928374334 0.124745846611633 0.1040491012469253 0.08622211236971568 0.1156778361865623 0.1060190194170001 0.1280145613178406 0.1332955653137387 0.09944659213735112 0.09140527550336644 0.05019906030334988 0.1682463117714778 0.1025817862547318 0.0841988972355663 0.0867735093246497 0.07926549125591863 0.09668633348787234 0.09413368674655981 0.1128638042424907 0.08667352924759916 0.102948784071646 0.0 0.09303708683358188 0.06804559561595384 zr5156_8V3V4_R1 0.1143689310080433 0.2052035117066867 0.1687749540518114 0.1274660625319308 0.1736693926661831 0.2008409170468961 0.1357310753392193 0.1330161099409772 0.1522582519137235 0.1123099056561567 0.1473847349914805 0.1813701927393017 0.2008336857804045 0.2042014882530901 0.1365335693351368 0.1327854232072213 0.1707707847040014 0.1642047146947 0.1773828114798063 0.1693508749517296 0.1377790116599498 0.1277098088342294 0.165536508403527 0.08482542432750666 0.1222913335609594 0.1790707952239549 0.1529778505728458 0.1547068425897874 0.1157586709622803 0.1874393102781126 0.1188094866787605 0.1253477686721348 0.1510897179571402 0.149082220904614 0.1544663798090666 0.148989690647545 0.1703999656298808 0.1403661231953189 0.1398437820759513 0.09303708683358188 0.0 0.07605080457182967 zr5156_9V3V4_R1 0.09455770835222979 0.1903787489339654 0.1445521299410403 0.1210815382689097 0.1278738876176231 0.1876003493219673 0.09874232058267786 0.08857449130841386 0.1107092109216859 0.070619182570014 0.1081185183786368 0.1383471986853116 0.1725630164924993 0.1646434831272248 0.110998504901641 0.09746813415503264 0.133037686536992 0.1225829219859804 0.1316397181630972 0.1323556664653223 0.1084175346524984 0.08309564574942276 0.1245190784169049 0.07455250371809519 0.1108203144145568 0.138269372102398 0.1133863079229603 0.1100166471315029 0.0794395863043969 0.18003132611175 0.1040218736336048 0.08425718423704588 0.1039641165049727 0.1114119040038509 0.1111660506442167 0.104263380286519 0.1262217229766436 0.09708488328178215 0.1022986047425159 0.06804559561595384 0.07605080457182967 0.0 scikit-bio-0.6.2/skbio/stats/distance/tests/data/frameSeries_grouping.tsv000066400000000000000000000027301464262511300266110ustar00rootroot00000000000000sample_name tumor zr5156_1V3V4_R1 no-tumor mice zr5156_2V3V4_R1 no-tumor mice zr5156_3V3V4_R1 no-tumor mice zr5156_4V3V4_R1 no-tumor mice zr5156_5V3V4_R1 no-tumor mice zr5156_6V3V4_R1 tumor-bearing mice zr5156_7V3V4_R1 tumor-bearing mice zr5156_8V3V4_R1 tumor-bearing mice zr5156_33V3V4_R1 tumor-bearing mice zr5156_10V3V4_R1 tumor-bearing mice zr5156_11V3V4_R1 tumor-bearing mice zr5156_12V3V4_R1 tumor-bearing mice zr5156_13V3V4_R1 tumor-bearing mice zr5156_14V3V4_R1 tumor-bearing mice zr5156_15V3V4_R1 tumor-bearing mice zr5156_16V3V4_R1 tumor-bearing mice zr5156_17V3V4_R1 tumor-bearing mice zr5156_18V3V4_R1 tumor-bearing mice zr5156_19V3V4_R1 tumor-bearing mice zr5156_20V3V4_R1 tumor-bearing mice zr5156_21V3V4_R1 tumor-bearing mice zr5156_22V3V4_R1 tumor-bearing mice zr5156_23V3V4_R1 tumor-bearing mice zr5156_24V3V4_R1 tumor-bearing mice zr5156_25V3V4_R1 tumor-bearing mice zr5156_26V3V4_R1 tumor-bearing mice zr5156_27V3V4_R1 tumor-bearing mice zr5156_28V3V4_R1 tumor-bearing mice zr5156_29V3V4_R1 tumor-bearing mice zr5156_30V3V4_R1 tumor-bearing mice zr5156_31V3V4_R1 tumor-bearing mice zr5156_32V3V4_R1 tumor-bearing mice zr5156_9V3V4_R1 tumor-bearing mice zr5156_34V3V4_R1 tumor-bearing mice zr5156_35V3V4_R1 tumor-bearing mice zr5156_36V3V4_R1 tumor-bearing mice zr5156_37V3V4_R1 tumor-bearing mice zr5156_38V3V4_R1 tumor-bearing mice zr5156_39V3V4_R1 tumor-bearing mice zr5156_40V3V4_R1 tumor-bearing mice zr5156_41V3V4_R1 tumor-bearing mice zr5156_42V3V4_R1 tumor-bearing mice scikit-bio-0.6.2/skbio/stats/distance/tests/data/mantel_env_dm_vegan.txt000066400000000000000000000222401464262511300264230ustar00rootroot000000000000000 2.07679533239178 6.88145502389971 4.47292990349355 2.50815747366843 3.58036212296744 2.6591292750976 2.29553760953804 4.71171667874897 5.16305059040855 2.21426134242764 3.92961213866286 3.13399015473536 4.90270725079791 5.49476199174931 4.24083600731234 6.11441501785041 6.05615282291975 4.95222984315722 4.21090492925648 2.85939902504818 4.97193013540636 6.55517500441812 2.54101803759533 2.07679533239178 0 6.90291144667953 4.64003084174476 3.72544116897342 4.23057909059422 3.32236571813861 2.52327233791657 5.13562550988481 5.1437583829781 2.50862666200143 4.72257599677909 3.43881324942989 4.85150831682718 5.50342943536056 3.52222099128756 5.87491943777315 5.49066317297654 4.27208092800677 3.15270231033824 1.74819033890028 4.12374288054719 6.15931692104792 3.35050234347791 6.88145502389971 6.90291144667953 0 5.48014130249836 5.71422767011491 5.44458516207639 6.21128694887343 7.57897769844682 5.72787031540342 5.50347777475312 6.05215034685817 5.13756253132332 6.81750519634229 8.76568212120474 9.04444618731187 8.33992506606364 9.11412838063313 5.67503738892289 6.94693258686523 5.30131540387761 6.85740763239976 5.5325902108287 6.30781548936649 7.84284307315349 4.47292990349355 4.64003084174476 5.48014130249836 0 3.48549610587919 3.96397362332123 3.14459815680532 5.59791469171996 2.25198909783448 5.18311849821211 4.54741248463763 3.71058624754798 2.69797588499755 7.82504504220559 7.61406607135237 7.13381545095109 8.44384430044265 6.71088081591996 6.35910146136893 5.18001442223706 5.2381960545819 4.7094220651569 6.46329094926891 5.88708881996361 2.50815747366843 3.72544116897342 5.71422767011491 3.48549610587919 0 2.09064557523382 2.35007842477788 3.84437623072582 3.37982049066074 4.82948860655148 2.61344414995761 3.08212085298007 3.39826001248126 6.34083256759322 6.78044024394629 5.9882682085243 7.368182657586 6.29229240618139 5.93365009305124 4.74001147278235 4.16875045563175 5.19193126631916 6.60549692940372 3.75170839493609 3.58036212296744 4.23057909059422 5.44458516207639 3.96397362332123 2.09064557523382 0 2.91550590841545 4.53693024483879 4.16796766816088 4.70318440048524 2.79541837375649 3.79457424274492 4.23335312544269 6.52562476479155 7.28216604836819 6.15919058556992 7.48332275942444 6.29601002300864 6.13547796850531 4.6792323698443 4.45343001677936 5.90808732531762 7.12563061433389 4.29741620103145 2.6591292750976 3.32236571813861 6.21128694887343 3.14459815680532 2.35007842477788 2.91550590841545 0 3.03537232143609 3.19717141237262 4.78031309163852 2.41753325811849 3.59607995374352 2.12854912039549 5.46704031619679 5.33701156479808 4.96043060818422 6.44826304723108 6.36381765874823 5.33050747387333 4.28625407689455 3.30334570818273 4.94667231796753 6.31277712868333 3.58610710419189 2.29553760953804 2.52327233791657 7.57897769844682 5.59791469171996 3.84437623072582 4.53693024483879 3.03537232143609 0 5.42581905199784 5.5382425777602 2.21628461900339 4.91569734557852 3.76898373844346 3.66958270784948 3.7596809535214 3.09087676998529 5.16423793264611 6.19348191138961 4.67007617153741 4.11204634372634 2.12921147954829 5.20716627366845 6.64769970583513 2.40407490833653 4.71171667874897 5.13562550988481 5.72787031540342 2.25198909783448 3.37982049066074 4.16796766816088 3.19717141237262 5.42581905199784 0 4.78731905692567 4.50479586376459 3.38670838656427 3.20693742829686 7.42683056394166 7.34570046678601 7.17015799953149 7.98002716787638 6.41561622183692 6.2124182195955 5.39657010741707 5.54110800674231 5.15776751659351 6.8516874226519 6.10574081766257 5.16305059040855 5.1437583829781 5.50347777475312 5.18311849821211 4.82948860655148 4.70318440048524 4.78031309163852 5.5382425777602 4.78731905692567 0 4.67746569480341 4.08051856974203 5.63341628143736 6.13993687315028 7.7127998868603 6.72017341542958 6.71492819699427 4.33113554849634 4.44684649984502 4.82408553908334 5.61768442520245 5.26230208160398 6.54992709860496 6.42845550946653 2.21426134242764 2.50862666200143 6.05215034685817 4.54741248463763 2.61344414995761 2.79541837375649 2.41753325811849 2.21628461900339 4.50479586376459 4.67746569480341 0 3.67536647198733 3.50853972062287 4.8908007524505 5.29902826407876 4.05541661524461 6.11473384050956 5.23487714375643 4.87089053925741 3.21806378630338 2.37451207574614 5.0909474774308 6.73970215554685 3.03452592937357 3.92961213866286 4.72257599677909 5.13756253132332 3.71058624754798 3.08212085298007 3.79457424274492 3.59607995374352 4.91569734557852 3.38670838656427 4.08051856974203 3.67536647198733 0 3.67506523963379 6.73163254348659 7.18535849292866 6.60015854488732 7.50539564845393 4.98323825896227 5.96071394777962 4.63371782472599 5.21021128432828 5.3351470840398 6.70375146848393 5.02891238126826 3.13399015473536 3.43881324942989 6.81750519634229 2.69797588499755 3.39826001248126 4.23335312544269 2.12854912039549 3.76898373844346 3.20693742829686 5.63341628143736 3.50853972062287 3.67506523963379 0 6.07359964931452 5.55282592144639 5.28753056505203 6.86459352948227 6.60873730627615 5.6314567316324 4.5625504604801 3.80862359775727 4.74234966729958 6.39367296383052 4.14114063107224 4.90270725079791 4.85150831682718 8.76568212120474 7.82504504220559 6.34083256759322 6.52562476479155 5.46704031619679 3.66958270784948 7.42683056394166 6.13993687315028 4.8908007524505 6.73163254348659 6.07359964931452 0 3.95581421039689 2.89070054815662 2.00584491640009 6.4169416509523 3.98054063154574 5.12176871169235 4.25495681827209 6.42605641292207 6.84353264931 4.64487610678987 5.49476199174931 5.50342943536056 9.04444618731187 7.61406607135237 6.78044024394629 7.28216604836819 5.33701156479808 3.7596809535214 7.34570046678601 7.7127998868603 5.29902826407876 7.18535849292866 5.55282592144639 3.95581421039689 0 4.04435991796256 5.01228422881506 8.11947221764963 5.86295047339197 5.99126094814824 4.58960572590818 6.61131730349409 7.40381675001318 4.84021195384958 4.24083600731234 3.52222099128756 8.33992506606364 7.13381545095109 5.9882682085243 6.15919058556992 4.96043060818422 3.09087676998529 7.17015799953149 6.72017341542958 4.05541661524461 6.60015854488732 5.28753056505203 2.89070054815662 4.04435991796256 0 3.64256798987379 6.01870458325898 4.26211882754801 3.82653767294834 2.5630508080946 5.89444500689165 6.82255468001791 4.23311465341886 6.11441501785041 5.87491943777315 9.11412838063313 8.44384430044265 7.368182657586 7.48332275942444 6.44826304723108 5.16423793264611 7.98002716787638 6.71492819699427 6.11473384050956 7.50539564845393 6.86459352948227 2.00584491640009 5.01228422881506 3.64256798987379 0 6.60501226761683 3.94055918447939 5.63216883653706 5.30106624617202 6.83112869949915 6.80321310613232 6.05396442753773 6.05615282291975 5.49066317297654 5.67503738892289 6.71088081591996 6.29229240618139 6.29601002300864 6.36381765874823 6.19348191138961 6.41561622183692 4.33113554849634 5.23487714375643 4.98323825896227 6.60873730627615 6.4169416509523 8.11947221764963 6.01870458325898 6.60501226761683 0 4.58594627180039 3.55328319122345 5.54504004833869 5.61499654775589 6.828621895957 7.2430575511398 4.95222984315722 4.27208092800677 6.94693258686523 6.35910146136893 5.93365009305124 6.13547796850531 5.33050747387333 4.67007617153741 6.2124182195955 4.44684649984502 4.87089053925741 5.96071394777962 5.6314567316324 3.98054063154574 5.86295047339197 4.26211882754801 3.94055918447939 4.58594627180039 0 3.90515299381786 4.28027849695426 4.38635292373633 5.39036567068314 6.01887662312089 4.21090492925648 3.15270231033824 5.30131540387761 5.18001442223706 4.74001147278235 4.6792323698443 4.28625407689455 4.11204634372634 5.39657010741707 4.82408553908334 3.21806378630338 4.63371782472599 4.5625504604801 5.12176871169235 5.99126094814824 3.82653767294834 5.63216883653706 3.55328319122345 3.90515299381786 0 2.65742337348713 3.85104238688071 5.57317463010724 5.08393769559955 2.85939902504818 1.74819033890028 6.85740763239976 5.2381960545819 4.16875045563175 4.45343001677936 3.30334570818273 2.12921147954829 5.54110800674231 5.61768442520245 2.37451207574614 5.21021128432828 3.80862359775727 4.25495681827209 4.58960572590818 2.5630508080946 5.30106624617202 5.54504004833869 4.28027849695426 2.65742337348713 0 4.28103353313767 5.86614162833534 3.27887996130286 4.97193013540636 4.12374288054719 5.5325902108287 4.7094220651569 5.19193126631916 5.90808732531762 4.94667231796753 5.20716627366845 5.15776751659351 5.26230208160398 5.0909474774308 5.3351470840398 4.74234966729958 6.42605641292207 6.61131730349409 5.89444500689165 6.83112869949915 5.61499654775589 4.38635292373633 3.85104238688071 4.28103353313767 0 3.33721452854301 5.88678552682047 6.55517500441812 6.15931692104792 6.30781548936649 6.46329094926891 6.60549692940372 7.12563061433389 6.31277712868333 6.64769970583513 6.8516874226519 6.54992709860496 6.73970215554685 6.70375146848393 6.39367296383052 6.84353264931 7.40381675001318 6.82255468001791 6.80321310613232 6.828621895957 5.39036567068314 5.57317463010724 5.86614162833534 3.33721452854301 0 6.7434003020233 2.54101803759533 3.35050234347791 7.84284307315349 5.88708881996361 3.75170839493609 4.29741620103145 3.58610710419189 2.40407490833653 6.10574081766257 6.42845550946653 3.03452592937357 5.02891238126826 4.14114063107224 4.64487610678987 4.84021195384958 4.23311465341886 6.05396442753773 7.2430575511398 6.01887662312089 5.08393769559955 3.27887996130286 5.88678552682047 6.7434003020233 0 scikit-bio-0.6.2/skbio/stats/distance/tests/data/mantel_veg_dm_vegan.txt000066400000000000000000000233161464262511300264210ustar00rootroot000000000000000 0.531002122667858 0.668066081456845 0.562124668311531 0.3747077813648 0.509473807708101 0.623441897868918 0.533760972316003 0.841820910455228 0.345334659061175 0.544980977465613 0.387906855391038 0.631889119284077 0.360369716481462 0.495569929879871 0.338230884557721 0.527747996119875 0.46940182727116 0.572409158546537 0.658356940509915 0.468803827751196 0.624899560429172 0.445852306473965 0.556086425651643 0.531002122667858 0 0.359778297016791 0.405560971081094 0.365209673840692 0.456075749605471 0.35795169856699 0.397667376920489 0.522541392379812 0.60638464347191 0.48037557590249 0.378418761774833 0.337611485751577 0.671739130434783 0.717861205915813 0.635512156237545 0.757850272278487 0.684397366160893 0.820626944245034 0.776103945014594 0.679419902681042 0.764456383429945 0.471627356163032 0.760728080850839 0.668066081456845 0.359778297016791 0 0.493494677463379 0.502030649266258 0.509231821290175 0.50100504976222 0.590762326930444 0.573666487726319 0.757674655787863 0.653360627772091 0.434689236988378 0.336909757263505 0.793106938610254 0.856175256640914 0.744137292184681 0.83821186778176 0.830987488829312 0.837255085778569 0.759051704257184 0.689453763842811 0.78428294394681 0.567737261531031 0.727272727272727 0.562124668311531 0.405560971081094 0.493494677463379 0 0.428611098255195 0.487819025522041 0.465522369035986 0.568393032270116 0.302780246286657 0.754373626373627 0.746791469651645 0.495783259540375 0.500159329903947 0.77929167212542 0.873218985388874 0.749693459050357 0.809023592216291 0.841379965785972 0.758192367410313 0.741589782845171 0.625361639704983 0.70965400435519 0.632291885091214 0.54560014236142 0.3747077813648 0.365209673840692 0.502030649266258 0.428611098255195 0 0.360624235718861 0.481270602337428 0.409431221365516 0.697951879101213 0.622147127631921 0.564580791354366 0.287701365077311 0.425861695209191 0.639083780437474 0.729525454839929 0.625248311481923 0.712879825495866 0.711791887728906 0.724986879144997 0.669388904533934 0.538476172358033 0.662547573180473 0.471028037383178 0.495122079839688 0.509473807708101 0.456075749605471 0.509231821290175 0.487819025522041 0.360624235718861 0 0.47264827514003 0.449673125625773 0.643173412512372 0.573924437650142 0.633194205745151 0.395377631035906 0.431129869389152 0.695857026807473 0.789820495341968 0.568402976531196 0.530275619878684 0.517760385310054 0.538922155688623 0.539314268896846 0.428855622420053 0.505990964447063 0.329349338004224 0.531589448015961 0.623441897868918 0.35795169856699 0.50100504976222 0.465522369035986 0.481270602337428 0.47264827514003 0 0.267803106717773 0.598566633402885 0.694873612095441 0.535760932030719 0.46270201874964 0.382298106172113 0.745988552125364 0.861145064293746 0.724916160609081 0.802615164256341 0.801531425047536 0.832146445891039 0.772508176966776 0.70517511761631 0.787532847973119 0.581221922731357 0.677116737663332 0.533760972316003 0.397667376920489 0.590762326930444 0.568393032270116 0.409431221365516 0.449673125625773 0.267803106717773 0 0.701535990362414 0.551494144215115 0.482635012046777 0.373779728755815 0.430605761857816 0.659614382490881 0.718478913197665 0.650987864807943 0.683795276397356 0.646264782232478 0.735420178184445 0.818586585827965 0.634216589861751 0.765659822656361 0.517282479141836 0.747455911343173 0.841820910455228 0.522541392379812 0.573666487726319 0.302780246286657 0.697951879101213 0.643173412512372 0.598566633402885 0.701535990362414 0 0.860012230114305 0.823966727319989 0.696356043704593 0.608614963860129 0.896020179372198 0.953959215457244 0.9014440433213 0.923448497952799 0.938116913005677 0.905321324457207 0.868666952421774 0.854316702819956 0.901660445359763 0.754406370224568 0.724877304998332 0.345334659061175 0.60638464347191 0.757674655787863 0.754373626373627 0.622147127631921 0.573924437650142 0.694873612095441 0.551494144215115 0.860012230114305 0 0.554756456358287 0.57855421686747 0.741260525644297 0.453305402640425 0.514898810129157 0.351567272898162 0.496547756041427 0.388174807197943 0.596869061623382 0.729252973406388 0.590238621498489 0.716043877642022 0.427280771201043 0.72127723067741 0.544980977465613 0.48037557590249 0.653360627772091 0.746791469651645 0.564580791354366 0.633194205745151 0.535760932030719 0.482635012046777 0.823966727319989 0.554756456358287 0 0.511525795828759 0.554151727277901 0.655082959520078 0.725768053508672 0.622747338353674 0.783666100254885 0.673474291206151 0.859248929176213 0.828249694002448 0.750707371556218 0.830408783452603 0.674327728932078 0.809644950779781 0.387906855391038 0.378418761774833 0.434689236988378 0.495783259540375 0.287701365077311 0.395377631035906 0.46270201874964 0.373779728755815 0.696356043704593 0.57855421686747 0.511525795828759 0 0.451855598406777 0.595916174099946 0.715382678751259 0.543911776469916 0.669047897880314 0.677185443802873 0.695153896529142 0.698248620855873 0.518242618672176 0.67063492063492 0.446171234812094 0.632043116119549 0.631889119284077 0.337611485751577 0.336909757263505 0.500159329903947 0.425861695209191 0.431129869389152 0.382298106172113 0.430605761857816 0.608614963860129 0.741260525644297 0.554151727277901 0.451855598406777 0 0.755672609400324 0.860085791452629 0.734387197501951 0.816868430229662 0.840013416065739 0.817908907547258 0.788424259207976 0.70625642943982 0.784595481217946 0.617593040115998 0.746623195156032 0.360369716481462 0.671739130434783 0.793106938610254 0.77929167212542 0.639083780437474 0.695857026807473 0.745988552125364 0.659614382490881 0.896020179372198 0.453305402640425 0.655082959520078 0.595916174099946 0.755672609400324 0 0.32374463790058 0.175471345902557 0.515448702689836 0.560172148132405 0.646577681901937 0.83184352597575 0.699166591988529 0.769745293466224 0.526223291092406 0.793334979017526 0.495569929879871 0.717861205915813 0.856175256640914 0.873218985388874 0.729525454839929 0.789820495341968 0.861145064293746 0.718478913197665 0.953959215457244 0.514898810129157 0.725768053508672 0.715382678751259 0.860085791452629 0.32374463790058 0 0.398453833811446 0.563443208895949 0.537750556792873 0.725759700794764 0.901458342917606 0.780864053688773 0.850419084461638 0.556379750493714 0.888831597401258 0.338230884557721 0.635512156237545 0.744137292184681 0.749693459050357 0.625248311481923 0.568402976531196 0.724916160609081 0.650987864807943 0.9014440433213 0.351567272898162 0.622747338353674 0.543911776469916 0.734387197501951 0.175471345902557 0.398453833811446 0 0.451762651100795 0.466509988249118 0.555275398861018 0.722312633832977 0.576246207195492 0.656792557980023 0.407794761777063 0.672014090541248 0.527747996119875 0.757850272278487 0.83821186778176 0.809023592216291 0.712879825495866 0.530275619878684 0.802615164256341 0.683795276397356 0.923448497952799 0.496547756041427 0.783666100254885 0.669047897880314 0.816868430229662 0.515448702689836 0.563443208895949 0.451762651100795 0 0.359268929503916 0.20992028343667 0.388581148863785 0.264185101779485 0.341337757596926 0.300259669263359 0.750777302759425 0.46940182727116 0.684397366160893 0.830987488829312 0.841379965785972 0.711791887728906 0.517760385310054 0.801531425047536 0.646264782232478 0.938116913005677 0.388174807197943 0.673474291206151 0.677185443802873 0.840013416065739 0.560172148132405 0.537750556792873 0.466509988249118 0.359268929503916 0 0.484114532261228 0.622234006652847 0.487074231539852 0.577606177606177 0.321596600394597 0.764130434782609 0.572409158546537 0.820626944245034 0.837255085778569 0.758192367410313 0.724986879144997 0.538922155688623 0.832146445891039 0.735420178184445 0.905321324457207 0.596869061623382 0.859248929176213 0.695153896529142 0.817908907547258 0.646577681901937 0.725759700794764 0.555275398861018 0.20992028343667 0.484114532261228 0 0.233028585612046 0.184614740719039 0.145672877846791 0.420959606845713 0.677966101694915 0.658356940509915 0.776103945014594 0.759051704257184 0.741589782845171 0.669388904533934 0.539314268896846 0.772508176966776 0.818586585827965 0.868666952421774 0.729252973406388 0.828249694002448 0.698248620855873 0.788424259207976 0.83184352597575 0.901458342917606 0.722312633832977 0.388581148863785 0.622234006652847 0.233028585612046 0 0.227722772277228 0.111727966689799 0.514525993883792 0.595256346665464 0.468803827751196 0.679419902681042 0.689453763842811 0.625361639704983 0.538476172358033 0.428855622420053 0.70517511761631 0.634216589861751 0.854316702819956 0.590238621498489 0.750707371556218 0.518242618672176 0.70625642943982 0.699166591988529 0.780864053688773 0.576246207195492 0.264185101779485 0.487074231539852 0.184614740719039 0.227722772277228 0 0.179336829499938 0.368810178817056 0.560213727907933 0.624899560429172 0.764456383429945 0.78428294394681 0.70965400435519 0.662547573180473 0.505990964447063 0.787532847973119 0.765659822656361 0.901660445359763 0.716043877642022 0.830408783452603 0.67063492063492 0.784595481217946 0.769745293466224 0.850419084461638 0.656792557980023 0.341337757596926 0.577606177606177 0.145672877846791 0.111727966689799 0.179336829499938 0 0.50435780791633 0.614787397309741 0.445852306473965 0.471627356163032 0.567737261531031 0.632291885091214 0.471028037383178 0.329349338004224 0.581221922731357 0.517282479141836 0.754406370224568 0.427280771201043 0.674327728932078 0.446171234812094 0.617593040115998 0.526223291092406 0.556379750493714 0.407794761777063 0.300259669263359 0.321596600394597 0.420959606845713 0.514525993883792 0.368810178817056 0.50435780791633 0 0.671336257723692 0.556086425651643 0.760728080850839 0.727272727272727 0.54560014236142 0.495122079839688 0.531589448015961 0.677116737663332 0.747455911343173 0.724877304998332 0.72127723067741 0.809644950779781 0.632043116119549 0.746623195156032 0.793334979017526 0.888831597401258 0.672014090541248 0.750777302759425 0.764130434782609 0.677966101694915 0.595256346665464 0.560213727907933 0.614787397309741 0.671336257723692 0 scikit-bio-0.6.2/skbio/stats/distance/tests/data/moving_pictures_dm.tsv000066400000000000000000000502311464262511300263260ustar00rootroot00000000000000 L1S105 L1S140 L1S208 L1S257 L1S281 L1S57 L1S76 L1S8 L2S155 L2S175 L2S204 L2S222 L2S240 L2S309 L2S357 L2S382 L3S294 L3S313 L3S341 L3S360 L3S378 L4S112 L4S137 L4S63 L5S104 L5S155 L5S174 L5S203 L5S222 L5S240 L6S20 L6S68 L6S93 L1S105 0.0 0.5390370298457626 0.5958380256061412 0.5768343811669847 0.5923046539353664 0.44786237128294343 0.39489596582130626 0.5545584734774127 0.8628589351714164 0.8434498336351259 0.8592370856547886 0.8506877032999233 0.7889826076831608 0.8359411856255106 0.8200856211804968 0.8426252468809847 0.8588300792888636 0.7569476110839489 0.7737531468949157 0.8944159171736104 0.6033584308249973 0.8470795512030315 0.821242904747876 0.8652874911410461 0.7549182333398126 0.7223305078861331 0.7614916771466183 0.7384823167933402 0.7685949288454377 0.7217074962665071 0.7245343115615573 0.7830441925323288 0.7712136041691006 L1S140 0.5390370298457626 0.0 0.5799280564700269 0.5498337267462643 0.5487499420209432 0.5617578336407074 0.5274803712970444 0.5842935186320399 0.8425845430236847 0.7832827608957819 0.832335123190656 0.83707961792672 0.7311087382394217 0.7969572429804197 0.74931933992796 0.791784337383268 0.7891927506641839 0.6721331780898249 0.7493321619468443 0.8773425730913526 0.3943371737754427 0.8380071824429106 0.7797415033649835 0.8604626784828243 0.7389385379102201 0.713927118153038 0.7409814677579339 0.6893827188871452 0.739005471405395 0.7134211109446568 0.7210216228178491 0.7548606893628177 0.75650714031249 L1S208 0.5958380256061412 0.5799280564700269 0.0 0.3514478448812178 0.3099429863936709 0.4908395118886828 0.5060147104062029 0.5921114631534051 0.8899108348691316 0.871025922389371 0.882455510304791 0.8798738374168472 0.8153291321982421 0.8616764262370401 0.8215036618481708 0.8762874977082885 0.8826136551026731 0.8218942402038897 0.8270103157643566 0.8918833194204532 0.5595063305032598 0.8896853519676307 0.8701383470494592 0.9064920956741176 0.8245445313262268 0.7952347925702751 0.8252913142216206 0.7959214036381839 0.8153225533087411 0.7836084181624914 0.791415482862234 0.8107398191951546 0.8044123679011221 L1S257 0.5768343811669847 0.5498337267462643 0.3514478448812178 0.0 0.25929060246530955 0.47255177559300116 0.43222880359346527 0.5884066995223238 0.8795468292082718 0.8633179655119932 0.8688146996880899 0.8629186587174017 0.8190468684000036 0.8492352934610714 0.8120267566814204 0.8666693265827448 0.8777562843225813 0.802227485419459 0.8160964503323095 0.8817724113978568 0.5576854601824073 0.8747926182003918 0.858838497674028 0.8925129391873841 0.828703636427477 0.8010491792499632 0.8063815782189153 0.8092051698995067 0.806495056831578 0.8032210970007331 0.7958200880953574 0.8161661389998109 0.8303929119421523 L1S281 0.5923046539353664 0.5487499420209432 0.3099429863936709 0.25929060246530955 0.0 0.4609450181065116 0.4768241494226335 0.6336173247999576 0.8985838283907426 0.8902364311520621 0.8972881153423283 0.8878063096662511 0.842173339458122 0.8796280371215733 0.8476576569873849 0.8898292712501708 0.9039077039484619 0.8485533231569067 0.852633569868443 0.9096947429872373 0.5498250250306412 0.9022662022069642 0.8832755726058907 0.9169203743757105 0.8572110146660553 0.833803405339537 0.8394041604332061 0.8290963638641423 0.8384368165385457 0.8247152739488143 0.832089497492543 0.8365271234666928 0.8483699050146564 L1S57 0.44786237128294343 0.5617578336407074 0.4908395118886828 0.47255177559300116 0.4609450181065116 0.0 0.31082469034707255 0.5024096303529431 0.9113813328862811 0.9014185555334876 0.9014865706960162 0.8945638042848107 0.8714620677099987 0.8806337056845028 0.8481649374200068 0.9084204945482022 0.913489362275528 0.852617211132442 0.8535720478866394 0.9122868711391121 0.5758415360633006 0.9020100495580599 0.9027332494336695 0.9186219021297435 0.853072903040757 0.8327332298901452 0.8629325424179554 0.838123751985925 0.8659733546560768 0.8346642658091125 0.8289313199906942 0.8703637908123152 0.8629123642673379 L1S76 0.39489596582130626 0.5274803712970444 0.5060147104062029 0.43222880359346527 0.4768241494226335 0.31082469034707255 0.0 0.4003824579445799 0.894830907945713 0.8782305450289447 0.8768848673183877 0.8694496317335333 0.834400640278251 0.8645838229241534 0.8020621529524625 0.8821339436331491 0.8821873080715426 0.8108074314407202 0.8071422086314133 0.8931657453933083 0.5741407822604754 0.8800714967479438 0.8737823256177506 0.8981103792379882 0.8193019560519106 0.7921706586389919 0.8200532532128909 0.8020963162128805 0.8249980087726344 0.7949183972892307 0.7923436575946494 0.8310029142646927 0.8322683023877507 L1S8 0.5545584734774127 0.5842935186320399 0.5921114631534051 0.5884066995223238 0.6336173247999576 0.5024096303529431 0.4003824579445799 0.0 0.8984532053428095 0.8752689122693623 0.892740827989152 0.8922691853549297 0.8314474877203453 0.8600974905243383 0.8016996748131581 0.8870353495912263 0.8805255216909961 0.7980602019240324 0.829887986255906 0.8798293357284456 0.5576624158659516 0.8948846831679798 0.8816102418936791 0.9112039786090779 0.8148374423400107 0.7791385980867408 0.8123406708143702 0.8036845403456274 0.8182429195392156 0.786233773105675 0.785902469716853 0.8323093879065956 0.8268942255033314 L2S155 0.8628589351714164 0.8425845430236847 0.8899108348691316 0.8795468292082718 0.8985838283907426 0.9113813328862811 0.894830907945713 0.8984532053428095 0.0 0.46760603559570263 0.4249569948218847 0.43461957349574365 0.7136184283615585 0.3913207521607729 0.6347075539715223 0.7018474049205657 0.48529937944992957 0.6445967049513042 0.6384694268897185 0.4614095269789821 0.8793414820335794 0.49283220104466596 0.6490593420832039 0.47563246735903575 0.7320469060900457 0.8046481081885503 0.7992253539435805 0.7608659584888517 0.761349180972591 0.8017496796623877 0.7844856344015433 0.7648448360194874 0.7728251640763542 L2S175 0.8434498336351259 0.7832827608957819 0.871025922389371 0.8633179655119932 0.8902364311520621 0.9014185555334876 0.8782305450289447 0.8752689122693623 0.46760603559570263 0.0 0.4521351118370733 0.47450759266616915 0.6496968752812166 0.4163459132801876 0.5642253392315422 0.6035203651725982 0.4127343227632307 0.5009839698168453 0.5720541940600513 0.5238301647807445 0.8519601477875526 0.45057960516670714 0.6012619962408408 0.5452699437274425 0.7004211741288418 0.7254489332170817 0.7241658196524235 0.6728749977683892 0.7120770444662118 0.7067601324302817 0.7154192599371452 0.6790044090495644 0.6670904831025087 L2S204 0.8592370856547886 0.832335123190656 0.882455510304791 0.8688146996880899 0.8972881153423283 0.9014865706960162 0.8768848673183877 0.892740827989152 0.4249569948218847 0.4521351118370733 0.0 0.34707557932356076 0.7317461021420034 0.32542423370295653 0.5940997497062588 0.7202722055635978 0.46284470929748595 0.6399875240134363 0.5427529698531464 0.41546588368326215 0.8806954039760829 0.3819763193450203 0.6864606389600179 0.4541251117472289 0.7433650255074369 0.8140552153241122 0.8171702240197111 0.7787482342908187 0.7789238492726243 0.8145023991202386 0.797521853405542 0.7823436739920101 0.7825923091798144 L2S222 0.8506877032999233 0.83707961792672 0.8798738374168472 0.8629186587174017 0.8878063096662511 0.8945638042848107 0.8694496317335333 0.8922691853549297 0.43461957349574365 0.47450759266616915 0.34707557932356076 0.0 0.7373605415524003 0.38933892308526385 0.6131841736075622 0.7071347655059155 0.5297960404918108 0.6776613871707217 0.614146849517274 0.41910689292075193 0.8807526952301693 0.43694693261646017 0.6992608535946145 0.4512186030792239 0.7657709194380475 0.8264506476297601 0.8235579667989716 0.7929926795664424 0.7900065630791773 0.8272758828816869 0.8177374352486667 0.7787324503994212 0.7937870099703493 L2S240 0.7889826076831608 0.7311087382394217 0.8153291321982421 0.8190468684000036 0.842173339458122 0.8714620677099987 0.834400640278251 0.8314474877203453 0.7136184283615585 0.6496968752812166 0.7317461021420034 0.7373605415524003 0.0 0.7032990730427625 0.5742088858732407 0.5358811200052477 0.6113293724148627 0.5506881270264428 0.5515051266923414 0.7398454298135383 0.7579742017535656 0.7115018729258218 0.48083389190461734 0.7661067183222632 0.36560695464990867 0.4412731503394013 0.41030162605879844 0.38926922412434417 0.3758838249617733 0.43476390441875573 0.4013288162093646 0.29292532554452677 0.35832282922330977 L2S309 0.8359411856255106 0.7969572429804197 0.8616764262370401 0.8492352934610714 0.8796280371215733 0.8806337056845028 0.8645838229241534 0.8600974905243383 0.3913207521607729 0.4163459132801876 0.32542423370295653 0.38933892308526385 0.7032990730427625 0.0 0.5284516343093534 0.7093129471508985 0.41393505549147774 0.6241411216254367 0.5766719921218544 0.36061963338100667 0.8395112062787546 0.4200940359303599 0.6691543813483889 0.43557544512779306 0.7065436739270591 0.7784024482700062 0.7846395088210678 0.7393917740028452 0.7397915081982174 0.7798934785055419 0.7729246906860544 0.763695527435426 0.7629847033876873 L2S357 0.8200856211804968 0.74931933992796 0.8215036618481708 0.8120267566814204 0.8476576569873849 0.8481649374200068 0.8020621529524625 0.8016996748131581 0.6347075539715223 0.5642253392315422 0.5940997497062588 0.6131841736075622 0.5742088858732407 0.5284516343093534 0.0 0.6247944934502809 0.5283785180280727 0.44838682641972477 0.375128958725295 0.6121711834124891 0.8188660070261687 0.5950936496526126 0.542689027968325 0.6689211737896941 0.5421787179552776 0.6601284562072681 0.6793754491062178 0.5918194682678256 0.6055221091296257 0.6631138650570884 0.6559008959245738 0.6414849884779625 0.6348752854444591 L2S382 0.8426252468809847 0.791784337383268 0.8762874977082885 0.8666693265827448 0.8898292712501708 0.9084204945482022 0.8821339436331491 0.8870353495912263 0.7018474049205657 0.6035203651725982 0.7202722055635978 0.7071347655059155 0.5358811200052477 0.7093129471508985 0.6247944934502809 0.0 0.680488155634992 0.5879411676849041 0.6300513986721148 0.7798117418893309 0.8451713307652351 0.7053126728281367 0.29452174278276233 0.7387926847081081 0.6760422527308095 0.6989286770361658 0.6126816214335168 0.6413026519516322 0.6319640587005974 0.6916898221776602 0.6638186733377722 0.5262410574638753 0.5609030584455355 L3S294 0.8588300792888636 0.7891927506641839 0.8826136551026731 0.8777562843225813 0.9039077039484619 0.913489362275528 0.8821873080715426 0.8805255216909961 0.48529937944992957 0.4127343227632307 0.46284470929748595 0.5297960404918108 0.6113293724148627 0.41393505549147774 0.5283785180280727 0.680488155634992 0.0 0.5793151362285137 0.532519942057993 0.4417588676636619 0.8329083701065813 0.5166391190254049 0.6344485180123575 0.5880518426822319 0.6166977936514635 0.6976139122442497 0.6965206807593246 0.6533170380479183 0.6465722615751633 0.7007111257543401 0.697083374921763 0.7001623873009802 0.7062166839383266 L3S313 0.7569476110839489 0.6721331780898249 0.8218942402038897 0.802227485419459 0.8485533231569067 0.852617211132442 0.8108074314407202 0.7980602019240324 0.6445967049513042 0.5009839698168453 0.6399875240134363 0.6776613871707217 0.5506881270264428 0.6241411216254367 0.44838682641972477 0.5879411676849041 0.5793151362285137 0.0 0.45248307788083786 0.7165487737423945 0.76582414397238 0.6574105445715698 0.560831971453598 0.7171733441832148 0.5902731094836756 0.5532059333861852 0.5853434250505234 0.5025483834097694 0.561609978051379 0.5582681387584417 0.5694158492427435 0.6032070350377756 0.5911945286328084 L3S341 0.7737531468949157 0.7493321619468443 0.8270103157643566 0.8160964503323095 0.852633569868443 0.8535720478866394 0.8071422086314133 0.829887986255906 0.6384694268897185 0.5720541940600513 0.5427529698531464 0.614146849517274 0.5515051266923414 0.5766719921218544 0.375128958725295 0.6300513986721148 0.532519942057993 0.45248307788083786 0.0 0.6196019964860159 0.783720015310488 0.5829777947168483 0.5534737998316789 0.6520900532121093 0.5493281969319693 0.6308837007391367 0.652197593637747 0.6025364874432444 0.6084801413450426 0.6341899420458715 0.6354966927534835 0.6495259871270996 0.6352447858014029 L3S360 0.8944159171736104 0.8773425730913526 0.8918833194204532 0.8817724113978568 0.9096947429872373 0.9122868711391121 0.8931657453933083 0.8798293357284456 0.4614095269789821 0.5238301647807445 0.41546588368326215 0.41910689292075193 0.7398454298135383 0.36061963338100667 0.6121711834124891 0.7798117418893309 0.4417588676636619 0.7165487737423945 0.6196019964860159 0.0 0.886859306656774 0.5343546555847905 0.7414678143333879 0.5400460290177217 0.7495782488980336 0.8108767861286648 0.8149762338575348 0.7802652073770237 0.7846158292767554 0.81147352370555 0.8005244117641637 0.8006143679532055 0.8014989491687546 L3S378 0.6033584308249973 0.3943371737754427 0.5595063305032598 0.5576854601824073 0.5498250250306412 0.5758415360633006 0.5741407822604754 0.5576624158659516 0.8793414820335794 0.8519601477875526 0.8806954039760829 0.8807526952301693 0.7579742017535656 0.8395112062787546 0.8188660070261687 0.8451713307652351 0.8329083701065813 0.76582414397238 0.783720015310488 0.886859306656774 0.0 0.8878531310256906 0.8359164049140062 0.8993931924960518 0.7252856380797102 0.6746582064114732 0.7140756986649465 0.7000095154033142 0.7254146177064372 0.6747284675539041 0.694536665024702 0.750964397649065 0.7478762997708954 L4S112 0.8470795512030315 0.8380071824429106 0.8896853519676307 0.8747926182003918 0.9022662022069642 0.9020100495580599 0.8800714967479438 0.8948846831679798 0.49283220104466596 0.45057960516670714 0.3819763193450203 0.43694693261646017 0.7115018729258218 0.4200940359303599 0.5950936496526126 0.7053126728281367 0.5166391190254049 0.6574105445715698 0.5829777947168483 0.5343546555847905 0.8878531310256906 0.0 0.6481421375149321 0.3575409039402891 0.7357714051501275 0.809583225542944 0.8029186032245896 0.7654724305797693 0.7692411532770428 0.8100058186769521 0.7895581361870725 0.7651442703099545 0.7646440275794989 L4S137 0.821242904747876 0.7797415033649835 0.8701383470494592 0.858838497674028 0.8832755726058907 0.9027332494336695 0.8737823256177506 0.8816102418936791 0.6490593420832039 0.6012619962408408 0.6864606389600179 0.6992608535946145 0.48083389190461734 0.6691543813483889 0.542689027968325 0.29452174278276233 0.6344485180123575 0.560831971453598 0.5534737998316789 0.7414678143333879 0.8359164049140062 0.6481421375149321 0.0 0.6908078779515394 0.6039667084060777 0.680281107717286 0.5936509777076375 0.6119432909211573 0.6028821638318781 0.6703353838958876 0.6453016109359295 0.5078212308619812 0.5169165993321925 L4S63 0.8652874911410461 0.8604626784828243 0.9064920956741176 0.8925129391873841 0.9169203743757105 0.9186219021297435 0.8981103792379882 0.9112039786090779 0.47563246735903575 0.5452699437274425 0.4541251117472289 0.4512186030792239 0.7661067183222632 0.43557544512779306 0.6689211737896941 0.7387926847081081 0.5880518426822319 0.7171733441832148 0.6520900532121093 0.5400460290177217 0.8993931924960518 0.3575409039402891 0.6908078779515394 0.0 0.7771055341254081 0.8401318632573123 0.8278791663557197 0.8033319145222341 0.8021181534933208 0.8397629806795364 0.8174036603850671 0.8034029168891261 0.8047837943956528 L5S104 0.7549182333398126 0.7389385379102201 0.8245445313262268 0.828703636427477 0.8572110146660553 0.853072903040757 0.8193019560519106 0.8148374423400107 0.7320469060900457 0.7004211741288418 0.7433650255074369 0.7657709194380475 0.36560695464990867 0.7065436739270591 0.5421787179552776 0.6760422527308095 0.6166977936514635 0.5902731094836756 0.5493281969319693 0.7495782488980336 0.7252856380797102 0.7357714051501275 0.6039667084060777 0.7771055341254081 0.0 0.299746565246475 0.3717459070188454 0.30611431267503736 0.2687293485232606 0.29488133279702533 0.35282575882032935 0.4059272692757093 0.3546188970208585 L5S155 0.7223305078861331 0.713927118153038 0.7952347925702751 0.8010491792499632 0.833803405339537 0.8327332298901452 0.7921706586389919 0.7791385980867408 0.8046481081885503 0.7254489332170817 0.8140552153241122 0.8264506476297601 0.4412731503394013 0.7784024482700062 0.6601284562072681 0.6989286770361658 0.6976139122442497 0.5532059333861852 0.6308837007391367 0.8108767861286648 0.6746582064114732 0.809583225542944 0.680281107717286 0.8401318632573123 0.299746565246475 0.0 0.24482970831325948 0.21540954545570457 0.28453959248410193 0.08866970541518727 0.2086655812403446 0.42817224711244256 0.35580061290923976 L5S174 0.7614916771466183 0.7409814677579339 0.8252913142216206 0.8063815782189153 0.8394041604332061 0.8629325424179554 0.8200532532128909 0.8123406708143702 0.7992253539435805 0.7241658196524235 0.8171702240197111 0.8235579667989716 0.41030162605879844 0.7846395088210678 0.6793754491062178 0.6126816214335168 0.6965206807593246 0.5853434250505234 0.652197593637747 0.8149762338575348 0.7140756986649465 0.8029186032245896 0.5936509777076375 0.8278791663557197 0.3717459070188454 0.24482970831325948 0.0 0.2877214934590952 0.25347139824912546 0.2605730633285677 0.2559815010487754 0.27892381163313434 0.3053187587048309 L5S203 0.7384823167933402 0.6893827188871452 0.7959214036381839 0.8092051698995067 0.8290963638641423 0.838123751985925 0.8020963162128805 0.8036845403456274 0.7608659584888517 0.6728749977683892 0.7787482342908187 0.7929926795664424 0.38926922412434417 0.7393917740028452 0.5918194682678256 0.6413026519516322 0.6533170380479183 0.5025483834097694 0.6025364874432444 0.7802652073770237 0.7000095154033142 0.7654724305797693 0.6119432909211573 0.8033319145222341 0.30611431267503736 0.21540954545570457 0.2877214934590952 0.0 0.24311981207597314 0.2201431600708006 0.2346438472041132 0.333150002145841 0.2850632669863039 L5S222 0.7685949288454377 0.739005471405395 0.8153225533087411 0.806495056831578 0.8384368165385457 0.8659733546560768 0.8249980087726344 0.8182429195392156 0.761349180972591 0.7120770444662118 0.7789238492726243 0.7900065630791773 0.3758838249617733 0.7397915081982174 0.6055221091296257 0.6319640587005974 0.6465722615751633 0.561609978051379 0.6084801413450426 0.7846158292767554 0.7254146177064372 0.7692411532770428 0.6028821638318781 0.8021181534933208 0.2687293485232606 0.28453959248410193 0.25347139824912546 0.24311981207597314 0.0 0.31116489519567736 0.30117438587754664 0.29331724421227606 0.2904033216874486 L5S240 0.7217074962665071 0.7134211109446568 0.7836084181624914 0.8032210970007331 0.8247152739488143 0.8346642658091125 0.7949183972892307 0.786233773105675 0.8017496796623877 0.7067601324302817 0.8145023991202386 0.8272758828816869 0.43476390441875573 0.7798934785055419 0.6631138650570884 0.6916898221776602 0.7007111257543401 0.5582681387584417 0.6341899420458715 0.81147352370555 0.6747284675539041 0.8100058186769521 0.6703353838958876 0.8397629806795364 0.29488133279702533 0.08866970541518727 0.2605730633285677 0.2201431600708006 0.31116489519567736 0.0 0.21735399370958108 0.3819989872862365 0.3241682922704345 L6S20 0.7245343115615573 0.7210216228178491 0.791415482862234 0.7958200880953574 0.832089497492543 0.8289313199906942 0.7923436575946494 0.785902469716853 0.7844856344015433 0.7154192599371452 0.797521853405542 0.8177374352486667 0.4013288162093646 0.7729246906860544 0.6559008959245738 0.6638186733377722 0.697083374921763 0.5694158492427435 0.6354966927534835 0.8005244117641637 0.694536665024702 0.7895581361870725 0.6453016109359295 0.8174036603850671 0.35282575882032935 0.2086655812403446 0.2559815010487754 0.2346438472041132 0.30117438587754664 0.21735399370958108 0.0 0.36633132081192926 0.3477206555495855 L6S68 0.7830441925323288 0.7548606893628177 0.8107398191951546 0.8161661389998109 0.8365271234666928 0.8703637908123152 0.8310029142646927 0.8323093879065956 0.7648448360194874 0.6790044090495644 0.7823436739920101 0.7787324503994212 0.29292532554452677 0.763695527435426 0.6414849884779625 0.5262410574638753 0.7001623873009802 0.6032070350377756 0.6495259871270996 0.8006143679532055 0.750964397649065 0.7651442703099545 0.5078212308619812 0.8034029168891261 0.4059272692757093 0.42817224711244256 0.27892381163313434 0.333150002145841 0.29331724421227606 0.3819989872862365 0.36633132081192926 0.0 0.19042209104190255 L6S93 0.7712136041691006 0.75650714031249 0.8044123679011221 0.8303929119421523 0.8483699050146564 0.8629123642673379 0.8322683023877507 0.8268942255033314 0.7728251640763542 0.6670904831025087 0.7825923091798144 0.7937870099703493 0.35832282922330977 0.7629847033876873 0.6348752854444591 0.5609030584455355 0.7062166839383266 0.5911945286328084 0.6352447858014029 0.8014989491687546 0.7478762997708954 0.7646440275794989 0.5169165993321925 0.8047837943956528 0.3546188970208585 0.35580061290923976 0.3053187587048309 0.2850632669863039 0.2904033216874486 0.3241682922704345 0.3477206555495855 0.19042209104190255 0.0 scikit-bio-0.6.2/skbio/stats/distance/tests/data/moving_pictures_mf.tsv000066400000000000000000000071261464262511300263350ustar00rootroot00000000000000#SampleID BarcodeSequence LinkerPrimerSequence BodySite Year Month Day Subject ReportedAntibioticUsage DaysSinceExperimentStart Description L1S8 AGCTGACTAGTC GTGCCAGCMGCCGCGGTAA gut 2008 10 28 subject-1 Yes 0 subject-1.gut.2008-10-28 L1S57 ACACACTATGGC GTGCCAGCMGCCGCGGTAA gut 2009 1 20 subject-1 No 84 subject-1.gut.2009-1-20 L1S76 ACTACGTGTGGT GTGCCAGCMGCCGCGGTAA gut 2009 2 17 subject-1 No 112 subject-1.gut.2009-2-17 L1S105 AGTGCGATGCGT GTGCCAGCMGCCGCGGTAA gut 2009 3 17 subject-1 No 140 subject-1.gut.2009-3-17 L2S155 ACGATGCGACCA GTGCCAGCMGCCGCGGTAA left palm 2009 1 20 subject-1 No 84 subject-1.left-palm.2009-1-20 L2S175 AGCTATCCACGA GTGCCAGCMGCCGCGGTAA left palm 2009 2 17 subject-1 No 112 subject-1.left-palm.2009-2-17 L2S204 ATGCAGCTCAGT GTGCCAGCMGCCGCGGTAA left palm 2009 3 17 subject-1 No 140 subject-1.left-palm.2009-3-17 L2S222 CACGTGACATGT GTGCCAGCMGCCGCGGTAA left palm 2009 4 14 subject-1 No 168 subject-1.left-palm.2009-4-14 L3S242 ACAGTTGCGCGA GTGCCAGCMGCCGCGGTAA right palm 2008 10 28 subject-1 Yes 0 subject-1.right-palm.2008-10-28 L3S294 CACGACAGGCTA GTGCCAGCMGCCGCGGTAA right palm 2009 1 20 subject-1 No 84 subject-1.right-palm.2009-1-20 L3S313 AGTGTCACGGTG GTGCCAGCMGCCGCGGTAA right palm 2009 2 17 subject-1 No 112 subject-1.right-palm.2009-2-17 L3S341 CAAGTGAGAGAG GTGCCAGCMGCCGCGGTAA right palm 2009 3 17 subject-1 No 140 subject-1.right-palm.2009-3-17 L3S360 CATCGTATCAAC GTGCCAGCMGCCGCGGTAA right palm 2009 4 14 subject-1 No 168 subject-1.right-palm.2009-4-14 L5S104 CAGTGTCAGGAC GTGCCAGCMGCCGCGGTAA tongue 2008 10 28 subject-1 Yes 0 subject-1.tongue.2008-10-28 L5S155 ATCTTAGACTGC GTGCCAGCMGCCGCGGTAA tongue 2009 1 20 subject-1 No 84 subject-1.tongue.2009-1-20 L5S174 CAGACATTGCGT GTGCCAGCMGCCGCGGTAA tongue 2009 2 17 subject-1 No 112 subject-1.tongue.2009-2-17 L5S203 CGATGCACCAGA GTGCCAGCMGCCGCGGTAA tongue 2009 3 17 subject-1 No 140 subject-1.tongue.2009-3-17 L5S222 CTAGAGACTCTT GTGCCAGCMGCCGCGGTAA tongue 2009 4 14 subject-1 No 168 subject-1.tongue.2009-4-14 L1S140 ATGGCAGCTCTA GTGCCAGCMGCCGCGGTAA gut 2008 10 28 subject-2 Yes 0 subject-2.gut.2008-10-28 L1S208 CTGAGATACGCG GTGCCAGCMGCCGCGGTAA gut 2009 1 20 subject-2 No 84 subject-2.gut.2009-1-20 L1S257 CCGACTGAGATG GTGCCAGCMGCCGCGGTAA gut 2009 3 17 subject-2 No 140 subject-2.gut.2009-3-17 L1S281 CCTCTCGTGATC GTGCCAGCMGCCGCGGTAA gut 2009 4 14 subject-2 No 168 subject-2.gut.2009-4-14 L2S240 CATATCGCAGTT GTGCCAGCMGCCGCGGTAA left palm 2008 10 28 subject-2 Yes 0 subject-2.left-palm.2008-10-28 L2S309 CGTGCATTATCA GTGCCAGCMGCCGCGGTAA left palm 2009 1 20 subject-2 No 84 subject-2.left-palm.2009-1-20 L2S357 CTAACGCAGTCA GTGCCAGCMGCCGCGGTAA left palm 2009 3 17 subject-2 No 140 subject-2.left-palm.2009-3-17 L2S382 CTCAATGACTCA GTGCCAGCMGCCGCGGTAA left palm 2009 4 14 subject-2 No 168 subject-2.left-palm.2009-4-14 L3S378 ATCGATCTGTGG GTGCCAGCMGCCGCGGTAA right palm 2008 10 28 subject-2 Yes 0 subject-2.right-palm.2008-10-28 L4S63 CTCGTGGAGTAG GTGCCAGCMGCCGCGGTAA right palm 2009 1 20 subject-2 No 84 subject-2.right-palm.2009-1-20 L4S112 GCGTTACACACA GTGCCAGCMGCCGCGGTAA right palm 2009 3 17 subject-2 No 140 subject-2.right-palm.2009-3-17 L4S137 GAACTGTATCTC GTGCCAGCMGCCGCGGTAA right palm 2009 4 14 subject-2 No 168 subject-2.right-palm.2009-4-14 L5S240 CTGGACTCATAG GTGCCAGCMGCCGCGGTAA tongue 2008 10 28 subject-2 Yes 0 subject-2.tongue.2008-10-28 L6S20 GAGGCTCATCAT GTGCCAGCMGCCGCGGTAA tongue 2009 1 20 subject-2 No 84 subject-2.tongue.2009-1-20 L6S68 GATACGTCCTGA GTGCCAGCMGCCGCGGTAA tongue 2009 3 17 subject-2 No 140 subject-2.tongue.2009-3-17 L6S93 GATTAGCACTCT GTGCCAGCMGCCGCGGTAA tongue 2009 4 14 subject-2 No 168 subject-2.tongue.2009-4-14scikit-bio-0.6.2/skbio/stats/distance/tests/data/pwmantel_exp_results_all_dms.txt000066400000000000000000000005721464262511300304160ustar00rootroot00000000000000dm1 dm2 statistic p-value n method permutations alternative 0 1 0.7020310705446676 0.001 6 pearson 999 two-sided 0 2 0.8633966325233801 0.002 6 pearson 999 two-sided 0 3 0.6476901774685102 0.006 6 pearson 999 two-sided 1 2 0.7784836464659731 0.003 6 pearson 999 two-sided 1 3 0.9206880242368882 0.004 6 pearson 999 two-sided 2 3 0.7172972179393844 0.003 6 pearson 999 two-sided scikit-bio-0.6.2/skbio/stats/distance/tests/data/pwmantel_exp_results_dm_dm2.txt000066400000000000000000000001471464262511300301430ustar00rootroot00000000000000dm1 dm2 statistic p-value n method permutations alternative 0 1 0.702031 0.001 6 pearson 999 two-sided scikit-bio-0.6.2/skbio/stats/distance/tests/data/pwmantel_exp_results_duplicate_dms.txt000066400000000000000000000002371464262511300316160ustar00rootroot00000000000000dm1 dm2 statistic p-value n method permutations alternative 0 1 1.0 1.000 3 pearson 999 less 0 2 1.0 1.000 3 pearson 999 less 1 2 1.0 1.000 3 pearson 999 less scikit-bio-0.6.2/skbio/stats/distance/tests/data/pwmantel_exp_results_minimal.txt000066400000000000000000000003261464262511300304260ustar00rootroot00000000000000dm1 dm2 statistic p-value n method permutations alternative 0 1 0.7559289460184544 0.324 3 pearson 999 greater 0 2 -0.989743318610787 1.000 3 pearson 999 greater 1 2 -0.8416975766245421 0.835 3 pearson 999 greater scikit-bio-0.6.2/skbio/stats/distance/tests/data/pwmantel_exp_results_minimal_with_labels.txt000066400000000000000000000003501464262511300330000ustar00rootroot00000000000000dm1 dm2 statistic p-value n method permutations alternative minx miny 0.7559289460184544 0.324 3 pearson 999 greater minx minz -0.989743318610787 1.000 3 pearson 999 greater miny minz -0.8416975766245421 0.835 3 pearson 999 greater scikit-bio-0.6.2/skbio/stats/distance/tests/data/pwmantel_exp_results_na_p_value.txt000066400000000000000000000001371464262511300311110ustar00rootroot00000000000000dm1 dm2 statistic p-value n method permutations alternative 0 1 0.5 NaN 3 spearman 0 two-sided pwmantel_exp_results_reordered_distance_matrices.txt000066400000000000000000000003271464262511300344360ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/stats/distance/tests/datadm1 dm2 statistic p-value n method permutations alternative 0 1 0.7559289460184544 0.324 3 pearson 999 greater 0 2 -0.9897433186107871 1.000 3 pearson 999 greater 1 2 -0.8416975766245421 0.822 3 pearson 999 greater scikit-bio-0.6.2/skbio/stats/distance/tests/test_anosim.py000066400000000000000000000113501464262511300236600ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io from functools import partial from unittest import TestCase, main import numpy as np import pandas as pd from pandas.testing import assert_series_equal from skbio import DistanceMatrix from skbio.stats.distance import anosim class TestANOSIM(TestCase): """All results were verified with R (vegan::anosim).""" def setUp(self): # Distance matrices with and without ties in the ranks, with 2 groups # of equal size. dm_ids = ['s1', 's2', 's3', 's4'] self.grouping_equal = ['Control', 'Control', 'Fast', 'Fast'] self.df = pd.read_csv( io.StringIO('ID,Group\ns2,Control\ns3,Fast\ns4,Fast\ns5,Control\n' 's1,Control'), index_col=0) self.dm_ties = DistanceMatrix([[0, 1, 1, 4], [1, 0, 3, 2], [1, 3, 0, 3], [4, 2, 3, 0]], dm_ids) self.dm_no_ties = DistanceMatrix([[0, 1, 5, 4], [1, 0, 3, 2], [5, 3, 0, 3], [4, 2, 3, 0]], dm_ids) # Test with 3 groups of unequal size. This data also generates a # negative R statistic. self.grouping_unequal = ['Control', 'Treatment1', 'Treatment2', 'Treatment1', 'Control', 'Control'] # Equivalent grouping but with different labels -- groups should be # assigned different integer labels but results should be the same. self.grouping_unequal_relabeled = ['z', 42, 'abc', 42, 'z', 'z'] self.dm_unequal = DistanceMatrix( [[0.0, 1.0, 0.1, 0.5678, 1.0, 1.0], [1.0, 0.0, 0.002, 0.42, 0.998, 0.0], [0.1, 0.002, 0.0, 1.0, 0.123, 1.0], [0.5678, 0.42, 1.0, 0.0, 0.123, 0.43], [1.0, 0.998, 0.123, 0.123, 0.0, 0.5], [1.0, 0.0, 1.0, 0.43, 0.5, 0.0]], ['s1', 's2', 's3', 's4', 's5', 's6']) # Expected series index is the same across all tests. self.exp_index = ['method name', 'test statistic name', 'sample size', 'number of groups', 'test statistic', 'p-value', 'number of permutations'] # Stricter series equality testing than the default. self.assert_series_equal = partial(assert_series_equal, check_index_type=True, check_series_type=True) def test_ties(self): # Ensure we get the same results if we rerun the method using the same # inputs. Also ensure we get the same results if we run the method # using a grouping vector or a data frame with equivalent groupings. exp = pd.Series(index=self.exp_index, data=['ANOSIM', 'R', 4, 2, 0.25, 0.671, 999], name='ANOSIM results') for _ in range(2): np.random.seed(0) obs = anosim(self.dm_ties, self.grouping_equal) self.assert_series_equal(obs, exp) for _ in range(2): np.random.seed(0) obs = anosim(self.dm_ties, self.df, column='Group') self.assert_series_equal(obs, exp) def test_no_ties(self): exp = pd.Series(index=self.exp_index, data=['ANOSIM', 'R', 4, 2, 0.625, 0.332, 999], name='ANOSIM results') np.random.seed(0) obs = anosim(self.dm_no_ties, self.grouping_equal) self.assert_series_equal(obs, exp) def test_no_permutations(self): exp = pd.Series(index=self.exp_index, data=['ANOSIM', 'R', 4, 2, 0.625, np.nan, 0], name='ANOSIM results') obs = anosim(self.dm_no_ties, self.grouping_equal, permutations=0) self.assert_series_equal(obs, exp) def test_unequal_group_sizes(self): exp = pd.Series(index=self.exp_index, data=['ANOSIM', 'R', 6, 3, -0.363636, 0.878, 999], name='ANOSIM results') np.random.seed(0) obs = anosim(self.dm_unequal, self.grouping_unequal) self.assert_series_equal(obs, exp) np.random.seed(0) obs = anosim(self.dm_unequal, self.grouping_unequal_relabeled) self.assert_series_equal(obs, exp) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/stats/distance/tests/test_base.py000066400000000000000000001316601464262511300233130ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io import unittest from unittest import TestCase, main import numpy as np import numpy.testing as npt import pandas as pd import pandas.testing as pdt import scipy.spatial.distance try: import matplotlib as mpl except ImportError: has_matplotlib = False else: has_matplotlib = True import skbio.sequence.distance from skbio import DistanceMatrix, Sequence from skbio.stats.distance import ( DissimilarityMatrixError, DistanceMatrixError, MissingIDError, DissimilarityMatrix, randdm) from skbio.stats.distance._base import (_preprocess_input, _run_monte_carlo_stats) from skbio.stats.distance._utils import is_symmetric_and_hollow from skbio.util import assert_data_frame_almost_equal from skbio.util._testing import assert_series_almost_equal class DissimilarityMatrixTestData: def setUp(self): self.dm_1x1_data = [[0.0]] self.dm_2x2_data = [[0.0, 0.123], [0.123, 0.0]] self.dm_2x2_asym_data = [[0.0, 1.0], [-2.0, 0.0]] self.dm_3x3_data = [[0.0, 0.01, 4.2], [0.01, 0.0, 12.0], [4.2, 12.0, 0.0]] self.dm_5x5_data = [[0, 1, 2, 3, 4], [5, 0, 6, 7, 8], [9, 1, 0, 2, 3], [4, 5, 6, 0, 7], [8, 9, 1, 2, 0]] class DissimilarityMatrixTestBase(DissimilarityMatrixTestData): matobj = None def setUp(self): super(DissimilarityMatrixTestBase, self).setUp() self.dm_1x1 = self.matobj(self.dm_1x1_data, ['a']) self.dm_2x2 = self.matobj(self.dm_2x2_data, ['a', 'b']) self.dm_2x2_asym = self.matobj(self.dm_2x2_asym_data, ['a', 'b']) self.dm_3x3 = self.matobj(self.dm_3x3_data, ['a', 'b', 'c']) self.dm_5x5 = self.matobj(self.dm_5x5_data, list('abcde')) self.dms = [self.dm_1x1, self.dm_2x2, self.dm_2x2_asym, self.dm_3x3] self.dm_shapes = [(1, 1), (2, 2), (2, 2), (3, 3)] self.dm_sizes = [1, 4, 4, 9] self.dm_transposes = [ self.dm_1x1, self.dm_2x2, self.matobj([[0, -2], [1, 0]], ['a', 'b']), self.dm_3x3] self.dm_redundant_forms = [np.array(self.dm_1x1_data), np.array(self.dm_2x2_data), np.array(self.dm_2x2_asym_data), np.array(self.dm_3x3_data)] def test_avoid_copy_on_construction(self): # ((data, expect_copy)) tests = (([[0, 1], [1, 0]], True), ([(0, 1), (1, 0)], True), (((0, 1), (1, 0)), True), (np.array([[0, 1], [1, 0]], dtype='int'), True), (np.array([[0, 1], [1, 0]], dtype='float'), False), (np.array([[0, 1], [1, 0]], dtype=np.float32), False), (np.array([[0, 1], [1, 0]], dtype=np.float64), False), (np.array([[0, 1], [1, 0]], dtype='double'), False)) for data, expect in tests: obj = DissimilarityMatrix(data) self.assertEqual(id(obj.data) != id(data), expect) def test_within(self): exp = pd.DataFrame([['a', 'a', 0.0], ['a', 'c', 4.2], ['c', 'a', 4.2], ['c', 'c', 0.0]], columns=['i', 'j', 'value']) obs = self.dm_3x3.within(['a', 'c']) pdt.assert_frame_equal(obs, exp) def test_within_order_stability(self): exp = pd.DataFrame([['a', 'a', 0.0], ['a', 'c', 4.2], ['c', 'a', 4.2], ['c', 'c', 0.0]], columns=['i', 'j', 'value']) # NOTE: order was changed from ['a', 'c'] to ['c', 'a'] # but the output order in exp is consistent with # test_within obs = self.dm_3x3.within(['c', 'a']) pdt.assert_frame_equal(obs, exp) obs = self.dm_3x3.within(['a', 'c']) pdt.assert_frame_equal(obs, exp) def test_within_missing_id(self): with self.assertRaisesRegex(MissingIDError, "not found."): self.dm_3x3.within(['x', 'a']) def test_between(self): exp = pd.DataFrame([['b', 'a', 5.], ['b', 'c', 6.], ['b', 'e', 8.], ['d', 'a', 4.], ['d', 'c', 6.], ['d', 'e', 7.]], columns=['i', 'j', 'value']) obs = self.dm_5x5.between(['b', 'd'], ['a', 'c', 'e']) pdt.assert_frame_equal(obs, exp) def test_between_order_stability(self): exp = pd.DataFrame([['b', 'a', 5.], ['b', 'c', 6.], ['b', 'e', 8.], ['d', 'a', 4.], ['d', 'c', 6.], ['d', 'e', 7.]], columns=['i', 'j', 'value']) # varying the order of the "i" values, result remains consistent # with the test_between result obs = self.dm_5x5.between(['d', 'b'], ['a', 'c', 'e']) pdt.assert_frame_equal(obs, exp) # varying the order of the "j" values, result remains consistent # with the test_between result obs = self.dm_5x5.between(['b', 'd'], ['a', 'e', 'c']) pdt.assert_frame_equal(obs, exp) # varying the order of the "i" and "j" values, result remains # consistent with the test_between result obs = self.dm_5x5.between(['d', 'b'], ['a', 'e', 'c']) pdt.assert_frame_equal(obs, exp) def test_between_overlap(self): exp = pd.DataFrame([['b', 'a', 5.], ['b', 'd', 7.], ['b', 'e', 8.], ['d', 'a', 4.], ['d', 'd', 0.], ['d', 'e', 7.]], columns=['i', 'j', 'value']) # 'd' in i and j overlap with self.assertRaisesRegex(KeyError, ("This constraint can " "removed with " "allow_overlap=True.")): self.dm_5x5.between(['b', 'd'], ['a', 'd', 'e']) obs = self.dm_5x5.between(['b', 'd'], ['a', 'd', 'e'], allow_overlap=True) pdt.assert_frame_equal(obs, exp) def test_between_missing_id(self): with self.assertRaisesRegex(MissingIDError, "not found."): self.dm_3x3.between(['x', 'a'], ['a', 'b', 'c']) with self.assertRaisesRegex(MissingIDError, "not found."): self.dm_3x3.between(['a', 'b'], ['a', 'x', 'c']) with self.assertRaisesRegex(MissingIDError, "not found."): self.dm_3x3.between(['a', 'y'], ['a', 'x', 'c']) def test_stable_order(self): exp = np.array([1, 3, 4], dtype=int) obs = self.dm_5x5._stable_order(['d', 'e', 'b']) npt.assert_equal(obs, exp) def test_subset_to_dataframe(self): exp = pd.DataFrame([['b', 'a', 5.], ['b', 'd', 7.], ['b', 'e', 8.], ['d', 'a', 4.], ['d', 'd', 0.], ['d', 'e', 7.]], columns=['i', 'j', 'value']) obs = self.dm_5x5._subset_to_dataframe(['b', 'd'], ['a', 'd', 'e']) pdt.assert_frame_equal(obs, exp) # and the empty edge cases exp = pd.DataFrame([], columns=['i', 'j', 'value'], index=pd.RangeIndex(start=0, stop=0)) obs = self.dm_5x5._subset_to_dataframe([], ['a', 'd', 'e']) pdt.assert_frame_equal(obs, exp, check_dtype=False) obs = self.dm_5x5._subset_to_dataframe(['b', 'd'], []) pdt.assert_frame_equal(obs, exp, check_dtype=False) obs = self.dm_5x5._subset_to_dataframe([], []) pdt.assert_frame_equal(obs, exp, check_dtype=False) def test_init_from_dm(self): ids = ['foo', 'bar', 'baz'] # DissimilarityMatrix -> DissimilarityMatrix exp = self.matobj(self.dm_3x3_data, ids) obs = self.matobj(self.dm_3x3, ids) self.assertEqual(obs, exp) # Test that copy of data is not made. self.assertTrue(obs.data is self.dm_3x3.data) obs.data[0, 1] = 424242 self.assertTrue(np.array_equal(obs.data, self.dm_3x3.data)) # DistanceMatrix -> DissimilarityMatrix exp = self.matobj(self.dm_3x3_data, ids) obs = self.matobj( self.matobj(self.dm_3x3_data, ('a', 'b', 'c')), ids) self.assertEqual(obs, exp) # DissimilarityMatrix -> DistanceMatrix with self.assertRaises(DistanceMatrixError): DistanceMatrix(self.dm_2x2_asym, ['foo', 'bar']) def test_init_non_hollow_dm(self): data = [[1, 1], [1, 1]] obs = self.matobj(data, ['a', 'b']) self.assertTrue(np.array_equal(obs.data, data)) data_hollow = skbio.stats.distance._utils.is_hollow(obs.data) self.assertEqual(data_hollow, False) def test_init_no_ids(self): exp = self.matobj(self.dm_3x3_data, ('0', '1', '2')) obs = self.matobj(self.dm_3x3_data) self.assertEqual(obs, exp) self.assertEqual(obs['1', '2'], 12.0) def test_init_invalid_input(self): # Empty data. with self.assertRaises(DissimilarityMatrixError): self.matobj([], []) # Another type of empty data. with self.assertRaises(DissimilarityMatrixError): self.matobj(np.empty((0, 0)), []) # Invalid number of dimensions. with self.assertRaises(DissimilarityMatrixError): self.matobj([1, 2, 3], ['a']) # Dimensions don't match. with self.assertRaises(DissimilarityMatrixError): self.matobj([[1, 2, 3]], ['a']) data = [[0, 1], [1, 0]] # Duplicate IDs. with self.assertRaises(DissimilarityMatrixError): self.matobj(data, ['a', 'a']) # Number of IDs don't match dimensions. with self.assertRaises(DissimilarityMatrixError): self.matobj(data, ['a', 'b', 'c']) with self.assertRaises(DissimilarityMatrixError): self.matobj(data, []) def test_from_iterable_non_hollow_data(self): iterable = (x for x in range(4)) exp = self.matobj([[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]) res = self.matobj.from_iterable(iterable, lambda a, b: 1) self.assertEqual(res, exp) def test_from_iterable_asymmetric_data(self): iterable = (x for x in range(4)) exp = self.matobj([[0, 1, 2, 3], [-1, 0, 1, 2], [-2, -1, 0, 1], [-3, -2, -1, 0]]) res = self.matobj.from_iterable(iterable, lambda a, b: b - a) self.assertEqual(res, exp) def test_from_iterable_no_key(self): iterable = (x for x in range(4)) exp = self.matobj([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]]) res = self.matobj.from_iterable(iterable, lambda a, b: abs(b - a)) self.assertEqual(res, exp) def test_from_iterable_with_key(self): iterable = (x for x in range(4)) exp = self.matobj([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]], ['0', '1', '4', '9']) res = self.matobj.from_iterable(iterable, lambda a, b: abs(b - a), key=lambda x: str(x ** 2)) self.assertEqual(res, exp) def test_from_iterable_empty(self): with self.assertRaises(DissimilarityMatrixError): self.matobj.from_iterable([], lambda x: x) def test_from_iterable_single(self): exp = self.matobj([[100]]) res = self.matobj.from_iterable(["boo"], lambda a, b: 100) self.assertEqual(res, exp) def test_from_iterable_with_keys(self): iterable = (x for x in range(4)) exp = self.matobj([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]], ['0', '1', '4', '9']) res = self.matobj.from_iterable(iterable, lambda a, b: abs(b - a), keys=iter(['0', '1', '4', '9']) ) self.assertEqual(res, exp) def test_from_iterable_with_key_and_keys(self): iterable = (x for x in range(4)) with self.assertRaises(ValueError): self.matobj.from_iterable(iterable, lambda a, b: abs(b - a), key=str, keys=['1', '2', '3', '4']) def test_from_iterable_scipy_hamming_metric_with_metadata(self): # test for #1254 seqs = [ Sequence('ACGT'), Sequence('ACGA', metadata={'id': 'seq1'}), Sequence('AAAA', metadata={'id': 'seq2'}), Sequence('AAAA', positional_metadata={'qual': range(4)}) ] exp = self.matobj([ [0, 0.25, 0.75, 0.75], [0.25, 0.0, 0.5, 0.5], [0.75, 0.5, 0.0, 0.0], [0.75, 0.5, 0.0, 0.0]], ['a', 'b', 'c', 'd']) dm = self.matobj.from_iterable( seqs, metric=scipy.spatial.distance.hamming, keys=['a', 'b', 'c', 'd']) self.assertEqual(dm, exp) def test_from_iterable_skbio_hamming_metric_with_metadata(self): # test for #1254 seqs = [ Sequence('ACGT'), Sequence('ACGA', metadata={'id': 'seq1'}), Sequence('AAAA', metadata={'id': 'seq2'}), Sequence('AAAA', positional_metadata={'qual': range(4)}) ] exp = self.matobj([ [0, 0.25, 0.75, 0.75], [0.25, 0.0, 0.5, 0.5], [0.75, 0.5, 0.0, 0.0], [0.75, 0.5, 0.0, 0.0]], ['a', 'b', 'c', 'd']) dm = self.matobj.from_iterable( seqs, metric=skbio.sequence.distance.hamming, keys=['a', 'b', 'c', 'd']) self.assertEqual(dm, exp) def test_data(self): for dm, exp in zip(self.dms, self.dm_redundant_forms): obs = dm.data self.assertTrue(np.array_equal(obs, exp)) with self.assertRaises(AttributeError): self.dm_3x3.data = 'foo' def test_ids(self): obs = self.dm_3x3.ids self.assertEqual(obs, ('a', 'b', 'c')) # Test that we overwrite the existing IDs and that the ID index is # correctly rebuilt. new_ids = ['foo', 'bar', 'baz'] self.dm_3x3.ids = new_ids obs = self.dm_3x3.ids self.assertEqual(obs, tuple(new_ids)) self.assertTrue(np.array_equal(self.dm_3x3['bar'], np.array([0.01, 0.0, 12.0]))) with self.assertRaises(MissingIDError): self.dm_3x3['b'] def test_ids_invalid_input(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.ids = ['foo', 'bar'] # Make sure that we can still use the dissimilarity matrix after trying # to be evil. obs = self.dm_3x3.ids self.assertEqual(obs, ('a', 'b', 'c')) def test_dtype(self): for dm in self.dms: self.assertEqual(dm.dtype, np.float64) def test_shape(self): for dm, shape in zip(self.dms, self.dm_shapes): self.assertEqual(dm.shape, shape) def test_size(self): for dm, size in zip(self.dms, self.dm_sizes): self.assertEqual(dm.size, size) def test_transpose(self): for dm, transpose in zip(self.dms, self.dm_transposes): self.assertEqual(dm.T, transpose) self.assertEqual(dm.transpose(), transpose) # We should get a reference to a different object back, even if the # transpose is the same as the original. self.assertTrue(dm.transpose() is not dm) def test_index(self): self.assertEqual(self.dm_3x3.index('a'), 0) self.assertEqual(self.dm_3x3.index('b'), 1) self.assertEqual(self.dm_3x3.index('c'), 2) with self.assertRaises(MissingIDError): self.dm_3x3.index('d') with self.assertRaises(MissingIDError): self.dm_3x3.index(1) def test_redundant_form(self): for dm, redundant in zip(self.dms, self.dm_redundant_forms): obs = dm.redundant_form() self.assertTrue(np.array_equal(obs, redundant)) def test_copy(self): copy = self.dm_2x2.copy() self.assertEqual(copy, self.dm_2x2) self.assertFalse(copy.data is self.dm_2x2.data) # deepcopy doesn't actually create a copy of the IDs because it is a # tuple of strings, which is fully immutable. self.assertTrue(copy.ids is self.dm_2x2.ids) new_ids = ['hello', 'world'] copy.ids = new_ids self.assertNotEqual(copy.ids, self.dm_2x2.ids) copy = self.dm_2x2.copy() copy.data[0, 1] = 0.0001 self.assertFalse(np.array_equal(copy.data, self.dm_2x2.data)) def test_filter_no_filtering(self): # Don't actually filter anything -- ensure we get back a different # object. obs = self.dm_3x3.filter(['a', 'b', 'c']) self.assertEqual(obs, self.dm_3x3) self.assertFalse(obs is self.dm_3x3) def test_filter_reorder(self): # Don't filter anything, but reorder the distance matrix. order = ['c', 'a', 'b'] exp = self.matobj( [[0, 4.2, 12], [4.2, 0, 0.01], [12, 0.01, 0]], order) obs = self.dm_3x3.filter(order) self.assertEqual(obs, exp) def test_filter_single_id(self): ids = ['b'] exp = self.matobj([[0]], ids) obs = self.dm_2x2_asym.filter(ids) self.assertEqual(obs, exp) def test_filter_asymmetric(self): # 2x2 ids = ['b', 'a'] exp = self.matobj([[0, -2], [1, 0]], ids) obs = self.dm_2x2_asym.filter(ids) self.assertEqual(obs, exp) # 3x3 dm = self.matobj([[0, 10, 53], [42, 0, 22.5], [53, 1, 0]], ('bro', 'brah', 'breh')) ids = ['breh', 'brah'] exp = self.matobj([[0, 1], [22.5, 0]], ids) obs = dm.filter(ids) self.assertEqual(obs, exp) def test_filter_subset(self): ids = ('c', 'a') exp = self.matobj([[0, 4.2], [4.2, 0]], ids) obs = self.dm_3x3.filter(ids) self.assertEqual(obs, exp) ids = ('b', 'a') exp = self.matobj([[0, 0.01], [0.01, 0]], ids) obs = self.dm_3x3.filter(ids) self.assertEqual(obs, exp) # 4x4 dm = self.matobj([[0, 1, 55, 7], [1, 0, 16, 1], [55, 16, 0, 23], [7, 1, 23, 0]]) ids = np.asarray(['3', '0', '1']) exp = self.matobj([[0, 7, 1], [7, 0, 1], [1, 1, 0]], ids) obs = dm.filter(ids) self.assertEqual(obs, exp) def test_filter_duplicate_ids(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.filter(['c', 'a', 'c']) def test_filter_missing_ids(self): with self.assertRaises(MissingIDError): self.dm_3x3.filter(['c', 'bro']) def test_filter_missing_ids_strict_false(self): # no exception should be raised ids = ('c', 'a') exp = self.matobj([[0, 4.2], [4.2, 0]], ids) obs = self.dm_3x3.filter(['c', 'a', 'not found'], strict=False) self.assertEqual(obs, exp) def test_filter_empty_ids(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3.filter([]) @unittest.skipUnless(has_matplotlib, "Matplotlib not available.") def test_plot_default(self): fig = self.dm_1x1.plot() self.assertIsInstance(fig, mpl.figure.Figure) axes = fig.get_axes() self.assertEqual(len(axes), 2) ax = axes[0] self.assertEqual(ax.get_title(), '') xticks = [] for tick in ax.get_xticklabels(): xticks.append(tick.get_text()) self.assertEqual(xticks, ['a']) yticks = [] for tick in ax.get_yticklabels(): yticks.append(tick.get_text()) self.assertEqual(yticks, ['a']) @unittest.skipUnless(has_matplotlib, "Matplotlib not available.") def test_plot_no_default(self): ids = ['0', 'one', '2', 'three', '4.000'] data = ([0, 1, 2, 3, 4], [1, 0, 1, 2, 3], [2, 1, 0, 1, 2], [3, 2, 1, 0, 1], [4, 3, 2, 1, 0]) dm = self.matobj(data, ids) fig = dm.plot(cmap='Reds', title='Testplot') self.assertIsInstance(fig, mpl.figure.Figure) axes = fig.get_axes() self.assertEqual(len(axes), 2) ax = axes[0] self.assertEqual(ax.get_title(), 'Testplot') xticks = [] for tick in ax.get_xticklabels(): xticks.append(tick.get_text()) self.assertEqual(xticks, ['0', 'one', '2', 'three', '4.000']) yticks = [] for tick in ax.get_yticklabels(): yticks.append(tick.get_text()) self.assertEqual(yticks, ['0', 'one', '2', 'three', '4.000']) def test_to_data_frame_1x1(self): df = self.dm_1x1.to_data_frame() exp = pd.DataFrame([[0.0]], index=['a'], columns=['a']) assert_data_frame_almost_equal(df, exp) def test_to_data_frame_3x3(self): df = self.dm_3x3.to_data_frame() exp = pd.DataFrame([[0.0, 0.01, 4.2], [0.01, 0.0, 12.0], [4.2, 12.0, 0.0]], index=['a', 'b', 'c'], columns=['a', 'b', 'c']) assert_data_frame_almost_equal(df, exp) def test_to_data_frame_default_ids(self): df = self.matobj(self.dm_2x2_data).to_data_frame() exp = pd.DataFrame([[0.0, 0.123], [0.123, 0.0]], index=['0', '1'], columns=['0', '1']) assert_data_frame_almost_equal(df, exp) def test_str(self): for dm in self.dms: obs = str(dm) # Do some very light testing here to make sure we're getting a # non-empty string back. We don't want to test the exact # formatting. self.assertTrue(obs) def test_eq(self): for dm in self.dms: copy = dm.copy() self.assertTrue(dm == dm) self.assertTrue(copy == copy) self.assertTrue(dm == copy) self.assertTrue(copy == dm) self.assertFalse(self.dm_1x1 == self.dm_3x3) def test_ne(self): # Wrong class. self.assertTrue(self.dm_3x3 != 'foo') # Wrong shape. self.assertTrue(self.dm_3x3 != self.dm_1x1) # Wrong IDs. other = self.dm_3x3.copy() other.ids = ['foo', 'bar', 'baz'] self.assertTrue(self.dm_3x3 != other) # Wrong data. other = self.dm_3x3.copy() other.data[1, 0] = 42.42 self.assertTrue(self.dm_3x3 != other) self.assertFalse(self.dm_2x2 != self.dm_2x2) def test_contains(self): self.assertTrue('a' in self.dm_3x3) self.assertTrue('b' in self.dm_3x3) self.assertTrue('c' in self.dm_3x3) self.assertFalse('d' in self.dm_3x3) def test_getslice(self): # Slice of first dimension only. Test that __getslice__ defers to # __getitem__. obs = self.dm_2x2[1:] self.assertTrue(np.array_equal(obs, np.array([[0.123, 0.0]]))) self.assertEqual(type(obs), np.ndarray) def test_getitem_by_id(self): obs = self.dm_1x1['a'] self.assertTrue(np.array_equal(obs, np.array([0.0]))) obs = self.dm_2x2_asym['b'] self.assertTrue(np.array_equal(obs, np.array([-2.0, 0.0]))) obs = self.dm_3x3['c'] self.assertTrue(np.array_equal(obs, np.array([4.2, 12.0, 0.0]))) with self.assertRaises(MissingIDError): self.dm_2x2['c'] def test_getitem_by_id_pair(self): # Same object. self.assertEqual(self.dm_1x1['a', 'a'], 0.0) # Different objects (symmetric). self.assertEqual(self.dm_3x3['b', 'c'], 12.0) self.assertEqual(self.dm_3x3['c', 'b'], 12.0) # Different objects (asymmetric). self.assertEqual(self.dm_2x2_asym['a', 'b'], 1.0) self.assertEqual(self.dm_2x2_asym['b', 'a'], -2.0) with self.assertRaises(MissingIDError): self.dm_2x2['a', 'c'] def test_getitem_ndarray_indexing(self): # Single element access. obs = self.dm_3x3[0, 1] self.assertEqual(obs, 0.01) # Single element access (via two __getitem__ calls). obs = self.dm_3x3[0][1] self.assertEqual(obs, 0.01) # Row access. obs = self.dm_3x3[1] self.assertTrue(np.array_equal(obs, np.array([0.01, 0.0, 12.0]))) self.assertEqual(type(obs), np.ndarray) # Grab all data. obs = self.dm_3x3[:, :] self.assertTrue(np.array_equal(obs, self.dm_3x3.data)) self.assertEqual(type(obs), np.ndarray) with self.assertRaises(IndexError): self.dm_3x3[:, 3] def test_validate_invalid_dtype(self): with self.assertRaises(DissimilarityMatrixError): self.dm_3x3._validate(np.array([[0, 42], [42, 0]]), ['a', 'b']) def test_validate_invalid_shape(self): # first check it actually likes good matrices self.dm_3x3._validate_shape(np.array([[0., 42.], [42., 0.]])) # it checks just the shape, not the content self.dm_3x3._validate_shape(np.array([[1., 2.], [3., 4.]])) # empty array with self.assertRaises(DissimilarityMatrixError): self.dm_3x3._validate_shape(np.array([])) # invalid shape with self.assertRaises(DissimilarityMatrixError): self.dm_3x3._validate_shape(np.array([[0., 42.], [42., 0.], [22., 22.]])) with self.assertRaises(DissimilarityMatrixError): self.dm_3x3._validate_shape(np.array([[[0., 42.], [42., 0.]], [[0., 24.], [24., 0.]]])) def test_validate_invalid_ids(self): # repeated ids with self.assertRaises(DissimilarityMatrixError): self.dm_3x3._validate_ids(self.dm_3x3.data, ['a', 'a']) # empty ids with self.assertRaises(DissimilarityMatrixError): self.dm_3x3._validate_ids(self.dm_3x3.data, []) # invalid shape with self.assertRaises(DissimilarityMatrixError): self.dm_3x3._validate_ids(self.dm_3x3.data, ['a', 'b', 'c', 'd']) class DistanceMatrixTestBase(DissimilarityMatrixTestData): matobj = None def setUp(self): super(DistanceMatrixTestBase, self).setUp() self.dm_1x1 = self.matobj(self.dm_1x1_data, ['a']) self.dm_2x2 = self.matobj(self.dm_2x2_data, ['a', 'b']) self.dm_3x3 = self.matobj(self.dm_3x3_data, ['a', 'b', 'c']) self.dms = [self.dm_1x1, self.dm_2x2, self.dm_3x3] self.dm_condensed_forms = [np.array([]), np.array([0.123]), np.array([0.01, 4.2, 12.0])] def test_init_from_condensed_form(self): data = [1, 2, 3] exp = self.matobj([[0, 1, 2], [1, 0, 3], [2, 3, 0]], ['0', '1', '2']) res = self.matobj(data) self.assertEqual(exp, res) def test_init_invalid_input(self): # Asymmetric. data = [[0.0, 2.0], [1.0, 0.0]] with self.assertRaises(DistanceMatrixError): self.matobj(data, ['a', 'b']) # Non-hollow data = [[1.0, 2.0], [2.0, 1.0]] with self.assertRaises(DistanceMatrixError): self.matobj(data, ['a', 'b']) # Ensure that the superclass validation is still being performed. with self.assertRaises(DissimilarityMatrixError): self.matobj([[1, 2, 3]], ['a']) def test_init_nans(self): with self.assertRaisesRegex(DistanceMatrixError, r'NaNs'): self.matobj([[0.0, np.nan], [np.nan, 0.0]], ['a', 'b']) def test_from_iterable_no_key(self): iterable = (x for x in range(4)) exp = self.matobj([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]]) res = self.matobj.from_iterable(iterable, lambda a, b: abs(b - a)) self.assertEqual(res, exp) def test_from_iterable_validate_equal_valid_data(self): validate_true = self.matobj.from_iterable((x for x in range(4)), lambda a, b: abs(b - a), validate=True) validate_false = self.matobj.from_iterable((x for x in range(4)), lambda a, b: abs(b - a), validate=False) self.assertEqual(validate_true, validate_false) def test_from_iterable_validate_false(self): iterable = (x for x in range(4)) exp = self.matobj([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]]) res = self.matobj.from_iterable(iterable, lambda a, b: abs(b - a), validate=False) self.assertEqual(res, exp) def test_from_iterable_validate_non_hollow(self): iterable = (x for x in range(4)) with self.assertRaises(DistanceMatrixError): self.matobj.from_iterable(iterable, lambda a, b: 1) def test_from_iterable_validate_false_non_symmetric(self): exp = self.matobj([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]]) res = self.matobj.from_iterable((x for x in range(4)), lambda a, b: a - b, validate=False) self.assertEqual(res, exp) def test_from_iterable_validate_asym(self): iterable = (x for x in range(4)) with self.assertRaises(DistanceMatrixError): self.matobj.from_iterable(iterable, lambda a, b: b - a) def test_from_iterable_with_key(self): iterable = (x for x in range(4)) exp = self.matobj([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]], ['0', '1', '4', '9']) res = self.matobj.from_iterable(iterable, lambda a, b: abs(b - a), key=lambda x: str(x**2)) self.assertEqual(res, exp) def test_from_iterable_empty(self): with self.assertRaises(DissimilarityMatrixError): self.matobj.from_iterable([], lambda x: x) def test_from_iterable_single(self): exp = self.matobj([[0]]) res = self.matobj.from_iterable(["boo"], lambda a, b: 0) self.assertEqual(res, exp) def test_from_iterable_with_keys(self): iterable = (x for x in range(4)) exp = self.matobj([[0, 1, 2, 3], [1, 0, 1, 2], [2, 1, 0, 1], [3, 2, 1, 0]], ['0', '1', '4', '9']) res = self.matobj.from_iterable(iterable, lambda a, b: abs(b - a), keys=iter(['0', '1', '4', '9'])) self.assertEqual(res, exp) def test_from_iterable_with_key_and_keys(self): iterable = (x for x in range(4)) with self.assertRaises(ValueError): self.matobj.from_iterable(iterable, lambda a, b: abs(b - a), key=str, keys=['1', '2', '3', '4']) def test_from_iterable_scipy_hamming_metric_with_metadata(self): # test for #1254 seqs = [ Sequence('ACGT'), Sequence('ACGA', metadata={'id': 'seq1'}), Sequence('AAAA', metadata={'id': 'seq2'}), Sequence('AAAA', positional_metadata={'qual': range(4)}) ] exp = self.matobj([ [0, 0.25, 0.75, 0.75], [0.25, 0.0, 0.5, 0.5], [0.75, 0.5, 0.0, 0.0], [0.75, 0.5, 0.0, 0.0]], ['a', 'b', 'c', 'd']) dm = self.matobj.from_iterable( seqs, metric=scipy.spatial.distance.hamming, keys=['a', 'b', 'c', 'd']) self.assertEqual(dm, exp) def test_from_iterable_skbio_hamming_metric_with_metadata(self): # test for #1254 seqs = [ Sequence('ACGT'), Sequence('ACGA', metadata={'id': 'seq1'}), Sequence('AAAA', metadata={'id': 'seq2'}), Sequence('AAAA', positional_metadata={'qual': range(4)}) ] exp = self.matobj([ [0, 0.25, 0.75, 0.75], [0.25, 0.0, 0.5, 0.5], [0.75, 0.5, 0.0, 0.0], [0.75, 0.5, 0.0, 0.0]], ['a', 'b', 'c', 'd']) dm = self.matobj.from_iterable( seqs, metric=skbio.sequence.distance.hamming, keys=['a', 'b', 'c', 'd']) self.assertEqual(dm, exp) def test_condensed_form(self): for dm, condensed in zip(self.dms, self.dm_condensed_forms): obs = dm.condensed_form() self.assertTrue(np.array_equal(obs, condensed)) def test_permute_condensed(self): # Can't really permute a 1x1 or 2x2... for _ in range(2): obs = self.dm_1x1.permute(condensed=True) npt.assert_equal(obs, np.array([])) for _ in range(2): obs = self.dm_2x2.permute(condensed=True) npt.assert_equal(obs, np.array([0.123])) dm_copy = self.dm_3x3.copy() np.random.seed(0) obs = self.dm_3x3.permute(condensed=True) npt.assert_equal(obs, np.array([12.0, 4.2, 0.01])) obs = self.dm_3x3.permute(condensed=True) npt.assert_equal(obs, np.array([4.2, 12.0, 0.01])) # Ensure dm hasn't changed after calling permute() on it a couple of # times. self.assertEqual(self.dm_3x3, dm_copy) def test_permute_not_condensed(self): obs = self.dm_1x1.permute() self.assertEqual(obs, self.dm_1x1) self.assertFalse(obs is self.dm_1x1) obs = self.dm_2x2.permute() self.assertEqual(obs, self.dm_2x2) self.assertFalse(obs is self.dm_2x2) np.random.seed(0) exp = self.matobj([[0, 12, 4.2], [12, 0, 0.01], [4.2, 0.01, 0]], self.dm_3x3.ids) obs = self.dm_3x3.permute() self.assertEqual(obs, exp) exp = self.matobj([[0, 4.2, 12], [4.2, 0, 0.01], [12, 0.01, 0]], self.dm_3x3.ids) obs = self.dm_3x3.permute() self.assertEqual(obs, exp) def test_eq(self): # Compare DistanceMatrix to DissimilarityMatrix, where both have the # same data and IDs. eq_dm = DissimilarityMatrix(self.dm_3x3_data, ['a', 'b', 'c']) self.assertTrue(self.dm_3x3 == eq_dm) self.assertTrue(eq_dm == self.dm_3x3) def test_to_series_1x1(self): series = self.dm_1x1.to_series() exp = pd.Series([], index=[], dtype='float64') assert_series_almost_equal(series, exp) def test_to_series_2x2(self): series = self.dm_2x2.to_series() exp = pd.Series([0.123], index=pd.Index([('a', 'b')])) assert_series_almost_equal(series, exp) def test_to_series_4x4(self): dm = self.matobj([ [0.0, 0.2, 0.3, 0.4], [0.2, 0.0, 0.5, 0.6], [0.3, 0.5, 0.0, 0.7], [0.4, 0.6, 0.7, 0.0]], ['a', 'b', 'c', 'd']) series = dm.to_series() exp = pd.Series([0.2, 0.3, 0.4, 0.5, 0.6, 0.7], index=pd.Index([('a', 'b'), ('a', 'c'), ('a', 'd'), ('b', 'c'), ('b', 'd'), ('c', 'd')])) assert_series_almost_equal(series, exp) def test_to_series_default_ids(self): series = self.matobj(self.dm_2x2_data).to_series() exp = pd.Series([0.123], index=pd.Index([('0', '1')])) assert_series_almost_equal(series, exp) def test_validate_asym_shape(self): # first check it actually likes good matrices data_good = np.array([[0., 42.], [42., 0.]]) data_sym, data_hollow = is_symmetric_and_hollow(data_good) self.assertEqual(data_sym, True) del data_sym self.assertEqual(data_hollow, True) del data_hollow data_sym = skbio.stats.distance._utils.is_symmetric(data_good) self.assertEqual(data_sym, True) del data_sym data_hollow = skbio.stats.distance._utils.is_hollow(data_good) self.assertEqual(data_hollow, True) del data_hollow self.dm_3x3._validate_shape(data_good) del data_good # _validate_shap checks just the shape, not the content bad_data = np.array([[1., 2.], [3., 4.]]) data_sym, data_hollow = is_symmetric_and_hollow(bad_data) self.assertEqual(data_sym, False) del data_sym self.assertEqual(data_hollow, False) del data_hollow data_sym = skbio.stats.distance._utils.is_symmetric(bad_data) self.assertEqual(data_sym, False) del data_sym data_hollow = skbio.stats.distance._utils.is_hollow(bad_data) self.assertEqual(data_hollow, False) del data_hollow self.dm_3x3._validate_shape(bad_data) del bad_data # re-try with partially bad data bad_data = np.array([[0., 2.], [3., 0.]]) data_sym, data_hollow = is_symmetric_and_hollow(bad_data) self.assertEqual(data_sym, False) del data_sym self.assertEqual(data_hollow, True) del data_hollow data_sym = skbio.stats.distance._utils.is_symmetric(bad_data) self.assertEqual(data_sym, False) del data_sym data_hollow = skbio.stats.distance._utils.is_hollow(bad_data) self.assertEqual(data_hollow, True) del data_hollow self.dm_3x3._validate_shape(bad_data) del bad_data def test_rename(self): # Test successful renaming with a dictionary in strict mode (default) dm = DistanceMatrix([[0, 1], [1, 0]], ids=['a', 'b']) rename_dict = {'a': 'x', 'b': 'y'} dm.rename(rename_dict) exp = ('x', 'y') self.assertEqual(dm.ids, exp) # Test successful renaming with a function in strict mode (default) dm = DistanceMatrix([[0, 1], [1, 0]], ids=['a', 'b']) rename_func = lambda x: x + '_1' dm.rename(rename_func) exp = ('a_1', 'b_1') self.assertEqual(dm.ids, exp) # Test renaming in non-strict mode where one ID is not included in the dictionary dm = DistanceMatrix([[0, 1], [1, 0]], ids=['a', 'b']) rename_dict = {'a': 'x'} # 'b' will retain its original ID dm.rename(rename_dict, strict=False) exp = ('x', 'b') self.assertEqual(dm.ids, exp) # Test that renaming with strict=True raises an error if not all IDs are included dm = DistanceMatrix([[0, 1], [1, 0]], ids=['a', 'b']) rename_dict = {'a': 'x'} # Missing 'b' with self.assertRaises(ValueError): dm.rename(rename_dict, strict=True) class RandomDistanceMatrixTests(TestCase): def test_default_usage(self): exp = DistanceMatrix(np.asarray([[0.0]]), ['1']) obs = randdm(1) self.assertEqual(obs, exp) obs = randdm(2) self.assertEqual(obs.shape, (2, 2)) self.assertEqual(obs.ids, ('1', '2')) obs1 = randdm(5) num_trials = 10 found_diff = False for _ in range(num_trials): obs2 = randdm(5) if obs1 != obs2: found_diff = True break self.assertTrue(found_diff) def test_large_matrix_for_symmetry(self): obs3 = randdm(100) self.assertEqual(obs3, obs3.T) def test_ids(self): ids = ['foo', 'bar', 'baz'] obs = randdm(3, ids=ids) self.assertEqual(obs.shape, (3, 3)) self.assertEqual(obs.ids, tuple(ids)) def test_constructor(self): exp = DissimilarityMatrix(np.asarray([[0.0]]), ['1']) obs = randdm(1, constructor=DissimilarityMatrix) self.assertEqual(obs, exp) self.assertEqual(type(obs), DissimilarityMatrix) def test_random_fn(self): def myrand(num_rows, num_cols): # One dm to rule them all... data = np.empty((num_rows, num_cols)) data.fill(42) return data exp = DistanceMatrix(np.asarray([[0, 42, 42], [42, 0, 42], [42, 42, 0]]), ['1', '2', '3']) obs = randdm(3, random_fn=myrand) self.assertEqual(obs, exp) def test_invalid_input(self): # Invalid dimensions. with self.assertRaises(DissimilarityMatrixError): randdm(0) # Invalid dimensions. with self.assertRaises(ValueError): randdm(-1) # Invalid number of IDs. with self.assertRaises(DissimilarityMatrixError): randdm(2, ids=['foo']) class CategoricalStatsHelperFunctionTests(TestCase): def setUp(self): self.dm = DistanceMatrix([[0.0, 1.0, 2.0], [1.0, 0.0, 3.0], [2.0, 3.0, 0.0]], ['a', 'b', 'c']) self.grouping = [1, 2, 1] # Ordering of IDs shouldn't matter, nor should extra IDs. self.df = pd.read_csv( io.StringIO('ID,Group\nb,Group2\na,Group1\nc,Group1\nd,Group3'), index_col=0) self.df_missing_id = pd.read_csv( io.StringIO('ID,Group\nb,Group2\nc,Group1'), index_col=0) def test_preprocess_input_with_valid_input(self): # Should obtain same result using grouping vector or data frame. exp = (3, 2, np.array([0, 1, 0]), (np.array([0, 0, 1]), np.array([1, 2, 2])), np.array([1., 2., 3.])) obs = _preprocess_input(self.dm, self.grouping, None) npt.assert_equal(obs, exp) obs = _preprocess_input(self.dm, self.df, 'Group') npt.assert_equal(obs, exp) def test_preprocess_input_raises_error(self): # Requires a DistanceMatrix. with self.assertRaises(TypeError): _preprocess_input( DissimilarityMatrix([[0, 2], [3, 0]], ['a', 'b']), [1, 2], None) # Requires column if DataFrame. with self.assertRaises(ValueError): _preprocess_input(self.dm, self.df, None) # Cannot provide column if not data frame. with self.assertRaises(ValueError): _preprocess_input(self.dm, self.grouping, 'Group') # Column must exist in data frame. with self.assertRaises(ValueError): _preprocess_input(self.dm, self.df, 'foo') # All distance matrix IDs must be in data frame. with self.assertRaises(ValueError): _preprocess_input(self.dm, self.df_missing_id, 'Group') # Grouping vector length must match number of objects in dm. with self.assertRaises(ValueError): _preprocess_input(self.dm, [1, 2], None) # Grouping vector cannot have only unique values. with self.assertRaises(ValueError): _preprocess_input(self.dm, [1, 2, 3], None) # Grouping vector cannot have only a single group. with self.assertRaises(ValueError): _preprocess_input(self.dm, [1, 1, 1], None) def test_run_monte_carlo_stats_with_permutations(self): obs = _run_monte_carlo_stats(lambda e: 42, self.grouping, 50) npt.assert_equal(obs, (42, 1.0)) def test_run_monte_carlo_stats_no_permutations(self): obs = _run_monte_carlo_stats(lambda e: 42, self.grouping, 0) npt.assert_equal(obs, (42, np.nan)) def test_run_monte_carlo_stats_invalid_permutations(self): with self.assertRaises(ValueError): _run_monte_carlo_stats(lambda e: 42, self.grouping, -1) class DissimilarityMatrixTests(DissimilarityMatrixTestBase, TestCase): matobj = DissimilarityMatrix def setUp(self): super(DissimilarityMatrixTests, self).setUp() class DistanceMatrixTests(DistanceMatrixTestBase, TestCase): matobj = DistanceMatrix def setUp(self): super(DistanceMatrixTests, self).setUp() if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/stats/distance/tests/test_bioenv.py000066400000000000000000000242541464262511300236630ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main import numpy as np import pandas as pd from skbio import DistanceMatrix from skbio.stats.distance import bioenv from skbio.stats.distance._bioenv import _scale from skbio.util import get_data_path, assert_data_frame_almost_equal from skbio.util._testing import _data_frame_to_default_int_type class BIOENVTests(TestCase): """Results were verified with R 3.0.2 and vegan 2.0-10 (vegan::bioenv).""" def setUp(self): # The test dataset used here is a subset of the Lauber et al. 2009 # "88 Soils" dataset. It has been altered to exercise various aspects # of the code, including (but not limited to): # # - order of distance matrix IDs and IDs in data frame (metadata) are # not exactly the same # - data frame has an extra sample that is not in the distance matrix # - this extra sample has non-numeric and missing values in some of its # cells # # Additional variations of the distance matrix and data frame are used # to test different orderings of rows/columns, extra non-numeric data # frame columns, etc. # # This dataset is also useful because it is non-trivial in size (6 # samples, 11 environment variables) and it includes positive/negative # floats and integers in the data frame. self.dm = DistanceMatrix.read(get_data_path('dm.txt')) # Reordered rows and columns (i.e., different ID order). Still # conceptually the same distance matrix. self.dm_reordered = DistanceMatrix.read( get_data_path('dm_reordered.txt')) self.df = pd.read_csv(get_data_path('df.txt'), sep='\t', index_col=0) # Similar to the above data frame, except that it has an extra # non-numeric column, and some of the other rows and columns have been # reordered. self.df_extra_column = pd.read_csv( get_data_path('df_extra_column.txt'), sep='\t', index_col=0) # All columns in the original data frame (these are all numeric # columns). self.cols = self.df.columns.tolist() # This second dataset is derived from vegan::bioenv's example dataset # (varespec and varechem). The original dataset includes a site x # species table (e.g., OTU table) and a data frame of environmental # variables. Since the bioenv function defined here accepts a distance # matrix, we use a Bray-Curtis distance matrix that is derived from the # site x species table (this matches what is done by vegan::bioenv when # provided an OTU table, using their default distance measure). The # data frame only includes the numeric environmental variables we're # interested in for these tests: log(N), P, K, Ca, pH, Al self.dm_vegan = DistanceMatrix.read( get_data_path('bioenv_dm_vegan.txt')) self.df_vegan = pd.read_csv( get_data_path('bioenv_df_vegan.txt'), sep='\t', converters={0: str}) self.df_vegan.set_index('#SampleID', inplace=True) # Load expected results. self.exp_results = pd.read_csv(get_data_path('exp_results.txt'), sep='\t', index_col=0) _data_frame_to_default_int_type(self.exp_results) self.exp_results_single_column = pd.read_csv( get_data_path('exp_results_single_column.txt'), sep='\t', index_col=0 ) _data_frame_to_default_int_type(self.exp_results_single_column) self.exp_results_different_column_order = pd.read_csv( get_data_path('exp_results_different_column_order.txt'), sep='\t', index_col=0 ) _data_frame_to_default_int_type(self.exp_results_different_column_order) self.exp_results_vegan = pd.read_csv( get_data_path('bioenv_exp_results_vegan.txt'), sep='\t', index_col=0 ) _data_frame_to_default_int_type(self.exp_results_vegan) def test_bioenv_all_columns_implicit(self): # Test with all columns in data frame (implicitly). obs = bioenv(self.dm, self.df) assert_data_frame_almost_equal(obs, self.exp_results) # Should get the same results if order of rows/cols in distance matrix # is changed. obs = bioenv(self.dm_reordered, self.df) assert_data_frame_almost_equal(obs, self.exp_results) def test_bioenv_all_columns_explicit(self): # Test with all columns being specified. obs = bioenv(self.dm, self.df, columns=self.cols) assert_data_frame_almost_equal(obs, self.exp_results) # Test against a data frame that has an extra non-numeric column and # some of the rows and columns reordered (we should get the same # result since we're specifying the same columns in the same order). obs = bioenv(self.dm, self.df_extra_column, columns=self.cols) assert_data_frame_almost_equal(obs, self.exp_results) def test_bioenv_single_column(self): obs = bioenv(self.dm, self.df, columns=['PH']) assert_data_frame_almost_equal(obs, self.exp_results_single_column) def test_bioenv_different_column_order(self): # Specifying columns in a different order will change the row labels in # the results data frame as the column subsets will be reordered, but # the actual results (e.g., correlation coefficients) shouldn't change. obs = bioenv(self.dm, self.df, columns=self.cols[::-1]) assert_data_frame_almost_equal( obs, self.exp_results_different_column_order) def test_bioenv_no_side_effects(self): # Deep copies of both primary inputs. dm_copy = self.dm.copy() df_copy = self.df.copy(deep=True) bioenv(self.dm, self.df) # Make sure we haven't modified the primary input in some way (e.g., # with scaling, type conversions, etc.). self.assertEqual(self.dm, dm_copy) assert_data_frame_almost_equal(self.df, df_copy) def test_bioenv_vegan_example(self): # The correlation coefficient in the first row of the # results (rho=0.2516) is different from the correlation coefficient # computed by vegan (rho=0.2513). This seems to occur due to # differences in numerical precision when calculating the Euclidean # distances, which affects the rank calculations in Spearman # (specifically, dealing with ties). The ranked distances end up being # slightly different between vegan and our implementation because some # distances are treated as ties in vegan but treated as distinct values # in our implementation. This explains the difference in rho values. I # verified that using Pearson correlation instead of Spearman on the # same distances yields *very* similar results. Thus, the discrepancy # seems to stem from differences when computing ranks/ties. obs = bioenv(self.dm_vegan, self.df_vegan) assert_data_frame_almost_equal( obs, self.exp_results_vegan, rtol=1e-3 ) def test_bioenv_no_distance_matrix(self): with self.assertRaises(TypeError): bioenv('breh', self.df) def test_bioenv_no_data_frame(self): with self.assertRaises(TypeError): bioenv(self.dm, None) def test_bioenv_duplicate_columns(self): with self.assertRaises(ValueError): bioenv(self.dm, self.df, columns=self.cols + ['PH']) def test_bioenv_no_columns(self): with self.assertRaises(ValueError): bioenv(self.dm, self.df, columns=[]) def test_bioenv_missing_columns(self): with self.assertRaises(ValueError): bioenv(self.dm, self.df, columns=self.cols + ['brofist']) def test_bioenv_missing_distance_matrix_ids(self): df = self.df[1:] with self.assertRaises(ValueError): bioenv(self.dm, df) def test_bioenv_nans(self): df = self.df.replace(53.9, np.nan) with self.assertRaises(ValueError): bioenv(self.dm, df) def test_bioenv_nonnumeric_columns(self): df = self.df.replace(2400, 'no cog yay') with self.assertRaises(TypeError): bioenv(self.dm, df) with self.assertRaises(TypeError): bioenv(self.dm, self.df_extra_column) def test_scale_single_column(self): df = pd.DataFrame([[1], [0], [2]], index=['A', 'B', 'C'], columns=['foo']) exp = pd.DataFrame([[0.0], [-1.0], [1.0]], index=['A', 'B', 'C'], columns=['foo']) obs = _scale(df) assert_data_frame_almost_equal(obs, exp) def test_scale_multiple_columns(self): # Floats and ints, including positives and negatives. df = pd.DataFrame([[7.0, 400, -1], [8.0, 530, -5], [7.5, 450, 1], [8.5, 810, -4]], index=['A', 'B', 'C', 'D'], columns=['pH', 'Elevation', 'negatives']) exp = pd.DataFrame([[-1.161895, -0.805979, 0.453921], [0.387298, -0.095625, -0.998625], [-0.387298, -0.532766, 1.180194], [1.161895, 1.434369, -0.635489]], index=['A', 'B', 'C', 'D'], columns=['pH', 'Elevation', 'negatives']) obs = _scale(df) assert_data_frame_almost_equal(obs, exp) def test_scale_no_variance(self): df = pd.DataFrame([[-7.0, -1.2], [6.2, -1.2], [2.9, -1.2]], index=['A', 'B', 'C'], columns=['foo', 'bar']) with self.assertRaises(ValueError): _scale(df) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/stats/distance/tests/test_mantel.py000066400000000000000000000743551464262511300236700ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main import numpy as np import numpy.testing as npt import pandas as pd import scipy from scipy.spatial.distance import squareform from scipy.stats import pearsonr, spearmanr from skbio import DistanceMatrix from skbio.stats.distance import (DissimilarityMatrixError, DistanceMatrixError, mantel, pwmantel) from skbio.stats.distance._mantel import _order_dms from skbio.stats.distance._mantel import _mantel_stats_pearson from skbio.stats.distance._mantel import _mantel_stats_spearman from skbio.stats.distance._cutils import mantel_perm_pearsonr_cy from skbio.stats.distance._utils import distmat_reorder_condensed from skbio.util import get_data_path, assert_data_frame_almost_equal from skbio.util._testing import _data_frame_to_default_int_type class MantelTestData(TestCase): def setUp(self): # Small dataset of minimal size (3x3). Mix of floats and ints in a # native Python nested list structure. self.minx = [[0, 1, 2], [1, 0, 3], [2, 3, 0]] self.miny = [[0, 2, 7], [2, 0, 6], [7, 6, 0]] self.minz = [[0, 0.5, 0.25], [0.5, 0, 0.1], [0.25, 0.1, 0]] # Version of the above dataset stored as DistanceMatrix instances. self.minx_dm = DistanceMatrix(self.minx) self.miny_dm = DistanceMatrix(self.miny) self.minz_dm = DistanceMatrix(self.minz) # Versions of self.minx_dm and self.minz_dm that each have an extra ID # on the end. self.minx_dm_extra = DistanceMatrix([[0, 1, 2, 7], [1, 0, 3, 2], [2, 3, 0, 4], [7, 2, 4, 0]], ['0', '1', '2', 'foo']) self.minz_dm_extra = DistanceMatrix([[0, 0.5, 0.25, 3], [0.5, 0, 0.1, 24], [0.25, 0.1, 0, 5], [3, 24, 5, 0]], ['0', '1', '2', 'bar']) class InternalMantelTests(MantelTestData): def setUp(self): super(InternalMantelTests, self).setUp() def _compute_perf_one(self, x_data, order, xmean, normxm, ym_normalized): x_flat = distmat_reorder_condensed(x_data, order) xm_normalized = (x_flat - xmean)/normxm one_stat = np.dot(xm_normalized, ym_normalized) one_stat = max(min(one_stat, 1.0), -1.0) return one_stat def test_perm_pearsonr3(self): # data pre-computed using released code x_data = np.asarray([[0., 1., 3.], [1., 0., 2.], [3., 2., 0.]]) perm_order = np.asarray([[2, 1, 0], [2, 0, 1], [0, 2, 1], [2, 0, 1]], dtype=np.intp) xmean = 2.0 normxm = 1.4142135623730951 ym_normalized = np.asarray([-0.80178373, 0.26726124, 0.53452248]) permuted_stats = np.empty(len(perm_order), dtype=x_data.dtype) mantel_perm_pearsonr_cy(x_data, perm_order, xmean, normxm, ym_normalized, permuted_stats) for i in range(len(perm_order)): exp_res = self._compute_perf_one(x_data, perm_order[i, :], xmean, normxm, ym_normalized) self.assertAlmostEqual(permuted_stats[i], exp_res) def test_perm_pearsonr6(self): # data pre-computed using released code x_data = np.asarray([[0., 0.62381864, 0.75001543, 0.58520119, 0.72902358, 0.65213559], [0.62381864, 0., 0.97488122, 0.6498224, 0.73720314, 0.62950732], [0.75001543, 0.97488122, 0., 0.68884542, 0.65747031, 0.72170752], [0.58520119, 0.6498224, 0.68884542, 0., 0.65885358, 0.66122362], [0.72902358, 0.73720314, 0.65747031, 0.65885358, 0., 0.71117341], [0.65213559, 0.62950732, 0.72170752, 0.66122362, 0.71117341, 0.]]) perm_order = np.asarray([[0, 2, 3, 4, 1, 5], [4, 3, 2, 5, 0, 1], [2, 5, 3, 1, 0, 4], [3, 5, 4, 1, 2, 0], [4, 3, 5, 2, 0, 1]], dtype=np.intp) xmean = 0.6953921578226 normxm = 0.3383126690576294 ym_normalized = np.asarray([-0.4999711, 0.24980825, -0.29650504, 0.18022614, -0.17407781, 0.33223145, -0.08230374, 0.33992794, -0.14964257, 0.04340053, -0.35527798, 0.15597541, -0.0523679, -0.04451187, 0.35308828]) permuted_stats = np.empty(len(perm_order), dtype=x_data.dtype) mantel_perm_pearsonr_cy(x_data, perm_order, xmean, normxm, ym_normalized, permuted_stats) for i in range(len(perm_order)): exp_res = self._compute_perf_one(x_data, perm_order[i, :], xmean, normxm, ym_normalized) self.assertAlmostEqual(permuted_stats[i], exp_res) def test_perm_pearsonr_full(self): x = DistanceMatrix.read(get_data_path('dm2.txt')) y = DistanceMatrix.read(get_data_path('dm3.txt')) x_data = x._data y_data = y._data x_flat = squareform(x_data, force='tovector', checks=False) y_flat = squareform(y_data, force='tovector', checks=False) xmean = x_flat.mean() ymean = y_flat.mean() xm = x_flat - xmean ym = y_flat - ymean normxm_la = scipy.linalg.norm(xm) normym_la = scipy.linalg.norm(ym) normxm = np.linalg.norm(xm) normym = np.linalg.norm(ym) self.assertAlmostEqual(normxm, normxm_la) self.assertAlmostEqual(normym, normym_la) perm_order = np.asarray([[0, 2, 3, 4, 1, 5], [4, 3, 2, 5, 0, 1], [2, 5, 3, 1, 0, 4], [3, 5, 4, 1, 2, 0], [4, 3, 5, 2, 0, 1], [4, 5, 1, 2, 0, 3], [3, 5, 1, 0, 4, 2], [4, 5, 3, 1, 2, 0], [2, 1, 5, 4, 0, 3], [4, 1, 0, 5, 2, 3], [1, 2, 5, 4, 0, 3], [5, 4, 0, 1, 3, 2], [3, 0, 1, 5, 4, 2], [5, 0, 2, 3, 1, 4]], dtype=np.intp) ym_normalized = ym/normym permuted_stats = np.empty(len(perm_order), dtype=x_data.dtype) mantel_perm_pearsonr_cy(x_data, perm_order, xmean, normxm, ym_normalized, permuted_stats) for i in range(len(perm_order)): exp_res = self._compute_perf_one(x_data, perm_order[i, :], xmean, normxm, ym_normalized) self.assertAlmostEqual(permuted_stats[i], exp_res) def test_pearsonr_full(self): """ Compare the optimized version of pearson mantel with the naive loop implementation """ x = DistanceMatrix.read(get_data_path('dm2.txt')) y = DistanceMatrix.read(get_data_path('dm3.txt')) num_perms = 12 np.random.seed(0) orig_stat_fast, comp_stat, permuted_stats_fast = \ _mantel_stats_pearson(x, y, num_perms) # compute the traditional way np.random.seed(0) x_flat = x.condensed_form() y_flat = y.condensed_form() orig_stat = pearsonr(x_flat, y_flat)[0] perm_gen = (pearsonr(x.permute(condensed=True), y_flat)[0] for _ in range(num_perms)) permuted_stats = np.fromiter(perm_gen, float, count=num_perms) self.assertAlmostEqual(orig_stat_fast, orig_stat) for i in range(num_perms): self.assertAlmostEqual(permuted_stats_fast[i], permuted_stats[i]) def test_spearmanr_full(self): """ Compare the optimized version of spearman mantel with the naive loop implementation """ x = DistanceMatrix.read(get_data_path('dm2.txt')) y = DistanceMatrix.read(get_data_path('dm3.txt')) num_perms = 12 np.random.seed(0) orig_stat_fast, comp_stat, permuted_stats_fast = \ _mantel_stats_spearman(x, y, num_perms) # compute the traditional way np.random.seed(0) x_flat = x.condensed_form() y_flat = y.condensed_form() orig_stat = spearmanr(x_flat, y_flat)[0] perm_gen = (spearmanr(x.permute(condensed=True), y_flat)[0] for _ in range(num_perms)) permuted_stats = np.fromiter(perm_gen, float, count=num_perms) self.assertAlmostEqual(orig_stat_fast, orig_stat) for i in range(num_perms): self.assertAlmostEqual(permuted_stats_fast[i], permuted_stats[i]) class MantelTests(MantelTestData): """Results were verified with R 3.1.0 and vegan 2.0-10 (vegan::mantel). vegan::mantel performs a one-sided (greater) test and does not have the option to specify different alternative hypotheses. In order to test the other alternative hypotheses, I modified vegan::mantel to perform the appropriate test, source()'d the file and verified the output. """ def setUp(self): super(MantelTests, self).setUp() self.methods = ('pearson', 'spearman', 'kendalltau') self.alternatives = ('two-sided', 'greater', 'less') # No variation in distances. Taken from Figure 10.20(b), pg. 603 in L&L # 3rd edition. Their example is 4x4 but using 3x3 here for easy # comparison to the minimal dataset above. self.no_variation = [[0, 0.667, 0.667], [0.667, 0, 0.667], [0.667, 0.667, 0]] # This second dataset is derived from vegan::mantel's example dataset. # The "veg" distance matrix contains Bray-Curtis distances derived from # the varespec data (named "veg.dist" in the example). The "env" # distance matrix contains Euclidean distances derived from scaled # varechem data (named "env.dist" in the example). self.veg_dm_vegan = np.loadtxt( get_data_path('mantel_veg_dm_vegan.txt')) self.env_dm_vegan = np.loadtxt( get_data_path('mantel_env_dm_vegan.txt')) # Expected test statistic when comparing x and y with method='pearson'. self.exp_x_vs_y = 0.7559289 # Expected test statistic when comparing x and z with method='pearson'. self.exp_x_vs_z = -0.9897433 def assert_mantel_almost_equal(self, left, right): # p-value is a count based on comparing two real value # it is thus very sensitive to minor rounding errors # When counts are rare, that may make huge proportional error # se we have to keep that number high for proper "almost" comparison self.assertAlmostEqual(left[0], right[0]) npt.assert_almost_equal(left[1] + 0.5, right[1] + 0.5, decimal=2) self.assertEqual(left[2], right[2]) def test_statistic_same_across_alternatives_and_permutations(self): # Varying permutations and alternative hypotheses shouldn't affect the # computed test statistics. for n in (0, 99, 999): for alt in self.alternatives: for method, exp in (('pearson', self.exp_x_vs_y), ('spearman', 0.5), ('kendalltau', 0.33333333333333337)): obs = mantel(self.minx, self.miny, method=method, permutations=n, alternative=alt)[0] self.assertAlmostEqual(obs, exp) def test_comparing_same_matrices(self): for method in self.methods: obs = mantel(self.minx, self.minx, method=method)[0] self.assertAlmostEqual(obs, 1) obs = mantel(self.miny, self.miny, method=method)[0] self.assertAlmostEqual(obs, 1) def test_negative_correlation(self): for method, exp in (('pearson', self.exp_x_vs_z), ('spearman', -1)): obs = mantel(self.minx, self.minz, method=method)[0] self.assertAlmostEqual(obs, exp) def test_zero_permutations(self): for alt in self.alternatives: for method, exp in (('pearson', self.exp_x_vs_y), ('spearman', 0.5), ('kendalltau', 0.33333333333333337)): obs = mantel(self.minx, self.miny, permutations=0, method=method, alternative=alt) self.assertAlmostEqual(obs[0], exp) npt.assert_equal(obs[1], np.nan) self.assertEqual(obs[2], 3) # swapping order of matrices should give same result obs = mantel(self.miny, self.minx, permutations=0, method=method, alternative=alt) self.assertAlmostEqual(obs[0], exp) npt.assert_equal(obs[1], np.nan) self.assertEqual(obs[2], 3) def test_distance_matrix_instances_as_input(self): # Matrices with all matching IDs in the same order. np.random.seed(0) obs = mantel(self.minx_dm, self.miny_dm, alternative='less') self.assert_mantel_almost_equal(obs, [self.exp_x_vs_y, 0.843, 3]) def test_distance_matrix_instances_with_reordering_and_nonmatching(self): x = self.minx_dm_extra.filter(['1', '0', 'foo', '2']) y = self.miny_dm.filter(['0', '2', '1']) # strict=True should disallow IDs that aren't found in both matrices with self.assertRaises(ValueError): mantel(x, y, alternative='less', strict=True) np.random.seed(0) # strict=False should ignore IDs that aren't found in both matrices obs = mantel(x, y, alternative='less', strict=False) self.assert_mantel_almost_equal(obs, [self.exp_x_vs_y, 0.843, 3]) def test_distance_matrix_instances_with_lookup(self): self.minx_dm.ids = ('a', 'b', 'c') self.miny_dm.ids = ('d', 'e', 'f') lookup = {'a': 'A', 'b': 'B', 'c': 'C', 'd': 'A', 'e': 'B', 'f': 'C'} np.random.seed(0) obs = mantel(self.minx_dm, self.miny_dm, alternative='less', lookup=lookup) self.assert_mantel_almost_equal(obs, [self.exp_x_vs_y, 0.843, 3]) def test_one_sided_greater(self): np.random.seed(0) obs = mantel(self.minx, self.miny, alternative='greater') self.assertAlmostEqual(obs[0], self.exp_x_vs_y) self.assertAlmostEqual(obs[1], 0.324) self.assertEqual(obs[2], 3) obs = mantel(self.minx, self.minx, alternative='greater') self.assert_mantel_almost_equal(obs, [1, 0.172, 3]) def test_one_sided_less(self): # no need to seed here as permuted test statistics will all be less # than or equal to the observed test statistic (1.0) for method in self.methods: obs = mantel(self.minx, self.minx, method=method, alternative='less') npt.assert_almost_equal(obs, (1, 1, 3)) np.random.seed(0) obs = mantel(self.minx, self.miny, alternative='less') self.assert_mantel_almost_equal(obs, [self.exp_x_vs_y, 0.843, 3]) obs = mantel(self.minx, self.minz, alternative='less') self.assert_mantel_almost_equal(obs, [self.exp_x_vs_z, 0.172, 3]) def test_two_sided(self): np.random.seed(0) obs = mantel(self.minx, self.minx, method='spearman', alternative='two-sided') self.assert_mantel_almost_equal(obs, [1.0, 0.328, 3]) obs = mantel(self.minx, self.miny, method='spearman', alternative='two-sided') self.assert_mantel_almost_equal(obs, [0.5, 1.0, 3]) obs = mantel(self.minx, self.minz, method='spearman', alternative='two-sided') self.assert_mantel_almost_equal(obs, [-1, 0.322, 3]) def test_vegan_example(self): np.random.seed(0) # pearson obs = mantel(self.veg_dm_vegan, self.env_dm_vegan, alternative='greater') self.assert_mantel_almost_equal(obs, [0.3047454, 0.002, 24]) # spearman obs = mantel(self.veg_dm_vegan, self.env_dm_vegan, alternative='greater', method='spearman') self.assert_mantel_almost_equal(obs, [0.283791, 0.003, 24]) def test_no_variation_pearson(self): for alt in self.alternatives: # test one or both inputs having no variation in their # distances obs = mantel(self.miny, self.no_variation, method='pearson', alternative=alt) npt.assert_equal(obs, (np.nan, np.nan, 3)) obs = mantel(self.no_variation, self.miny, method='pearson', alternative=alt) npt.assert_equal(obs, (np.nan, np.nan, 3)) obs = mantel(self.no_variation, self.no_variation, method='pearson', alternative=alt) npt.assert_equal(obs, (np.nan, np.nan, 3)) def test_no_variation_spearman(self): exp = (np.nan, np.nan, 3) for alt in self.alternatives: obs = mantel(self.miny, self.no_variation, method='spearman', alternative=alt) npt.assert_equal(obs, exp) obs = mantel(self.no_variation, self.miny, method='spearman', alternative=alt) npt.assert_equal(obs, exp) obs = mantel(self.no_variation, self.no_variation, method='spearman', alternative=alt) npt.assert_equal(obs, exp) def test_no_side_effects(self): minx = np.asarray(self.minx, dtype='float') miny = np.asarray(self.miny, dtype='float') minx_copy = np.copy(minx) miny_copy = np.copy(miny) mantel(minx, miny) # Make sure we haven't modified the input. npt.assert_equal(minx, minx_copy) npt.assert_equal(miny, miny_copy) def test_invalid_distance_matrix(self): # Single asymmetric, non-hollow distance matrix. with self.assertRaises(DissimilarityMatrixError): mantel([[1, 2], [3, 4]], [[0, 0], [0, 0]]) # Two asymmetric distance matrices. with self.assertRaises(DistanceMatrixError): mantel([[0, 2], [3, 0]], [[0, 1], [0, 0]]) def test_invalid_input(self): # invalid correlation method with self.assertRaises(ValueError): mantel([[1]], [[1]], method='brofist') # invalid permutations with self.assertRaises(ValueError): mantel([[1]], [[1]], permutations=-1) # invalid alternative with self.assertRaises(ValueError): mantel([[1]], [[1]], alternative='no cog yay') # too small dms with self.assertRaises(ValueError): mantel([[0, 3], [3, 0]], [[0, 2], [2, 0]]) class PairwiseMantelTests(MantelTestData): def setUp(self): super(PairwiseMantelTests, self).setUp() self.min_dms = (self.minx_dm, self.miny_dm, self.minz_dm) self.exp_results_minimal = pd.read_csv( get_data_path('pwmantel_exp_results_minimal.txt'), sep='\t', index_col=(0, 1) ) _data_frame_to_default_int_type(self.exp_results_minimal) self.exp_results_minimal_with_labels = pd.read_csv( get_data_path('pwmantel_exp_results_minimal_with_labels.txt'), sep='\t', index_col=(0, 1) ) _data_frame_to_default_int_type(self.exp_results_minimal_with_labels) self.exp_results_duplicate_dms = pd.read_csv( get_data_path('pwmantel_exp_results_duplicate_dms.txt'), sep='\t', index_col=(0, 1) ) _data_frame_to_default_int_type(self.exp_results_duplicate_dms) self.exp_results_na_p_value = pd.read_csv( get_data_path('pwmantel_exp_results_na_p_value.txt'), sep='\t', index_col=(0, 1) ) _data_frame_to_default_int_type(self.exp_results_na_p_value) self.exp_results_reordered_distance_matrices = pd.read_csv( get_data_path('pwmantel_exp_results_reordered_distance_matrices.txt'), sep='\t', index_col=(0, 1) ) _data_frame_to_default_int_type(self.exp_results_reordered_distance_matrices) self.exp_results_dm_dm2 = pd.read_csv( get_data_path('pwmantel_exp_results_dm_dm2.txt'), sep='\t', index_col=(0, 1)) self.exp_results_all_dms = pd.read_csv( get_data_path('pwmantel_exp_results_all_dms.txt'), sep='\t', index_col=(0, 1)) def assert_pwmantel_almost_equal(self, left, right): # p-value is a count based on comparing two real value # it is thus very sensitive to minor rounding errors # When counts are rare, that may make huge proportional error # se we have to keep that number high for proper "almost" comparison # stats use the normal precision npt.assert_almost_equal(left.values[:, 0], right.values[:, 0]) # p-values use modified check npt.assert_almost_equal(left.values[:, 1] + 0.5, right.values[:, 1] + 0.5, decimal=2) def test_minimal_compatible_input(self): # Matrices are already in the correct order and have matching IDs. np.random.seed(0) # input as DistanceMatrix instances obs = pwmantel(self.min_dms, alternative='greater') assert_data_frame_almost_equal(obs, self.exp_results_minimal) np.random.seed(0) # input as array_like obs = pwmantel((self.minx, self.miny, self.minz), alternative='greater') assert_data_frame_almost_equal(obs, self.exp_results_minimal) def test_minimal_compatible_input_with_labels(self): np.random.seed(0) obs = pwmantel(self.min_dms, alternative='greater', labels=('minx', 'miny', 'minz')) assert_data_frame_almost_equal( obs, self.exp_results_minimal_with_labels) def test_duplicate_dms(self): obs = pwmantel((self.minx_dm, self.minx_dm, self.minx_dm), alternative='less') assert_data_frame_almost_equal(obs, self.exp_results_duplicate_dms) def test_na_p_value(self): obs = pwmantel((self.miny_dm, self.minx_dm), method='spearman', permutations=0) assert_data_frame_almost_equal(obs, self.exp_results_na_p_value) def test_reordered_distance_matrices(self): # Matrices have matching IDs but they all have different ordering. x = self.minx_dm.filter(['1', '0', '2']) y = self.miny_dm.filter(['0', '2', '1']) z = self.minz_dm.filter(['1', '2', '0']) np.random.seed(0) obs = pwmantel((x, y, z), alternative='greater') assert_data_frame_almost_equal( obs, self.exp_results_reordered_distance_matrices) def test_strict(self): # Matrices have some matching and nonmatching IDs, with different # ordering. x = self.minx_dm_extra.filter(['1', '0', 'foo', '2']) y = self.miny_dm.filter(['0', '2', '1']) z = self.minz_dm_extra.filter(['bar', '1', '2', '0']) np.random.seed(0) # strict=False should discard IDs that aren't found in both matrices obs = pwmantel((x, y, z), alternative='greater', strict=False) assert_data_frame_almost_equal( obs, self.exp_results_reordered_distance_matrices) def test_id_lookup(self): # Matrices have mismatched IDs but a lookup is provided. self.minx_dm_extra.ids = ['a', 'b', 'c', 'foo'] self.minz_dm_extra.ids = ['d', 'e', 'f', 'bar'] lookup = {'a': '0', 'b': '1', 'c': '2', 'foo': 'foo', 'd': '0', 'e': '1', 'f': '2', 'bar': 'bar', '0': '0', '1': '1', '2': '2'} x = self.minx_dm_extra.filter(['b', 'a', 'foo', 'c']) y = self.miny_dm.filter(['0', '2', '1']) z = self.minz_dm_extra.filter(['bar', 'e', 'f', 'd']) x_copy = x.copy() y_copy = y.copy() z_copy = z.copy() np.random.seed(0) obs = pwmantel((x, y, z), alternative='greater', strict=False, lookup=lookup) assert_data_frame_almost_equal( obs, self.exp_results_reordered_distance_matrices) # Make sure the inputs aren't modified. self.assertEqual(x, x_copy) self.assertEqual(y, y_copy) self.assertEqual(z, z_copy) def test_too_few_dms(self): with self.assertRaises(ValueError): pwmantel([self.miny_dm]) def test_wrong_number_of_labels(self): with self.assertRaises(ValueError): pwmantel(self.min_dms, labels=['foo', 'bar']) def test_duplicate_labels(self): with self.assertRaises(ValueError): pwmantel(self.min_dms, labels=['foo', 'bar', 'foo']) def test_mixed_input_types(self): # DistanceMatrix, DistanceMatrix, array_like with self.assertRaises(TypeError): pwmantel((self.miny_dm, self.minx_dm, self.minz)) def test_filepaths_as_input(self): dms = [ get_data_path('dm.txt'), get_data_path('dm2.txt'), ] np.random.seed(0) obs = pwmantel(dms) self.assert_pwmantel_almost_equal(obs, self.exp_results_dm_dm2) def test_many_filepaths_as_input(self): dms = [ get_data_path('dm2.txt'), get_data_path('dm.txt'), get_data_path('dm4.txt'), get_data_path('dm3.txt') ] np.random.seed(0) obs = pwmantel(dms) self.assert_pwmantel_almost_equal(obs, self.exp_results_all_dms) class OrderDistanceMatricesTests(MantelTestData): def setUp(self): super(OrderDistanceMatricesTests, self).setUp() def test_array_like_input(self): obs = _order_dms(self.minx, self.miny) self.assertEqual(obs, (self.minx_dm, self.miny_dm)) def test_reordered_distance_matrices(self): # All matching IDs but with different orderings. x = self.minx_dm.filter(['1', '0', '2']) y = self.miny_dm.filter(['0', '2', '1']) exp = (x, y.filter(['1', '0', '2'])) obs = _order_dms(x, y) self.assertEqual(obs, exp) def test_reordered_and_nonmatching_distance_matrices(self): # Some matching and nonmatching IDs, with different ordering. x = self.minx_dm_extra.filter(['1', '0', 'foo', '2']) z = self.minz_dm_extra.filter(['bar', '0', '2', '1']) exp = (x.filter(['1', '0', '2']), z.filter(['1', '0', '2'])) obs = _order_dms(x, z, strict=False) self.assertEqual(obs, exp) def test_id_lookup(self): # Matrices have mismatched IDs but a lookup is provided. self.minx_dm_extra.ids = ['a', 'b', 'c', 'foo'] self.minz_dm_extra.ids = ['d', 'e', 'f', 'bar'] lookup = {'a': '0', 'b': '1', 'c': '2', 'foo': 'foo', 'd': '0', 'e': '1', 'f': '2', 'bar': 'bar'} x = self.minx_dm_extra.filter(['b', 'a', 'foo', 'c']) z = self.minz_dm_extra.filter(['bar', 'e', 'f', 'd']) x_copy = x.copy() z_copy = z.copy() exp = (self.minx_dm.filter(['1', '0', '2']), self.minz_dm.filter(['1', '0', '2'])) obs = _order_dms(x, z, strict=False, lookup=lookup) self.assertEqual(obs, exp) # Make sure the inputs aren't modified. self.assertEqual(x, x_copy) self.assertEqual(z, z_copy) def test_lookup_with_array_like(self): lookup = {'0': 'a', '1': 'b', '2': 'c'} with self.assertRaises(ValueError): _order_dms(self.minx, self.miny, lookup=lookup) def test_shape_mismatch(self): with self.assertRaises(ValueError): _order_dms(self.minx, [[0, 2], [2, 0]]) def test_missing_ids_in_lookup(self): # Mapping for '1' is missing. Should get an error while remapping IDs # for the first distance matrix. lookup = {'0': 'a', '2': 'c'} with self.assertRaisesRegex(KeyError, r"first.*(x).*'1'\"$"): _order_dms(self.minx_dm, self.miny_dm, lookup=lookup) # Mapping for 'bar' is missing. Should get an error while remapping IDs # for the second distance matrix. lookup = {'0': 'a', '1': 'b', '2': 'c', 'foo': 'a', 'baz': 'c'} self.miny_dm.ids = ('foo', 'bar', 'baz') with self.assertRaisesRegex(KeyError, r"second.*(y).*'bar'\"$"): _order_dms(self.minx_dm, self.miny_dm, lookup=lookup) def test_nonmatching_ids_strict_true(self): with self.assertRaises(ValueError): _order_dms(self.minx_dm, self.minz_dm_extra, strict=True) def test_no_matching_ids(self): self.minx_dm.ids = ['foo', 'bar', 'baz'] self.miny_dm.ids = ['a', 'b', 'c'] with self.assertRaises(ValueError): _order_dms(self.minx_dm, self.miny_dm, strict=False) def test_mixed_input_types(self): with self.assertRaises(TypeError): _order_dms(self.minx, self.minz_dm) with self.assertRaises(TypeError): _order_dms(self.minz_dm, self.minx) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/stats/distance/tests/test_permanova.py000066400000000000000000000171551464262511300243730ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io from functools import partial from unittest import TestCase, main import numpy as np import pandas as pd from pandas.testing import assert_series_equal from skbio import DistanceMatrix from skbio.stats.distance import permanova from skbio.util import get_data_path from skbio.stats.distance._base import _preprocess_input_sng class TestPERMANOVA(TestCase): """All results were verified with R (vegan::adonis).""" def setUp(self): # Distance matrices with and without ties in the ranks, with 2 groups # of equal size. dm_ids = ['s1', 's2', 's3', 's4'] self.grouping_equal = ['Control', 'Control', 'Fast', 'Fast'] self.df = pd.read_csv( io.StringIO('ID,Group\ns2,Control\ns3,Fast\ns4,Fast\ns5,Control\n' 's1,Control'), index_col=0) self.dm_ties = DistanceMatrix([[0, 1, 1, 4], [1, 0, 3, 2], [1, 3, 0, 3], [4, 2, 3, 0]], dm_ids) self.dm_no_ties = DistanceMatrix([[0, 1, 5, 4], [1, 0, 3, 2], [5, 3, 0, 3], [4, 2, 3, 0]], dm_ids) # Test with 3 groups of unequal size. self.grouping_unequal = ['Control', 'Treatment1', 'Treatment2', 'Treatment1', 'Control', 'Control'] # Equivalent grouping but with different labels -- groups should be # assigned different integer labels but results should be the same. self.grouping_unequal_relabeled = ['z', 42, 'abc', 42, 'z', 'z'] self.dm_unequal = DistanceMatrix( [[0.0, 1.0, 0.1, 0.5678, 1.0, 1.0], [1.0, 0.0, 0.002, 0.42, 0.998, 0.0], [0.1, 0.002, 0.0, 1.0, 0.123, 1.0], [0.5678, 0.42, 1.0, 0.0, 0.123, 0.43], [1.0, 0.998, 0.123, 0.123, 0.0, 0.5], [1.0, 0.0, 1.0, 0.43, 0.5, 0.0]], ['s1', 's2', 's3', 's4', 's5', 's6']) # Expected series index is the same across all tests. self.exp_index = ['method name', 'test statistic name', 'sample size', 'number of groups', 'test statistic', 'p-value', 'number of permutations'] # Stricter series equality testing than the default. self.assert_series_equal = partial(assert_series_equal, check_index_type=True, check_series_type=True) def test_call_ties(self): # Ensure we get the same results if we rerun the method using the same # inputs. Also ensure we get the same results if we run the method # using a grouping vector or a data frame with equivalent groupings. exp = pd.Series(index=self.exp_index, data=['PERMANOVA', 'pseudo-F', 4, 2, 2.0, 0.671, 999], name='PERMANOVA results') for _ in range(2): np.random.seed(0) obs = permanova(self.dm_ties, self.grouping_equal) self.assert_series_equal(obs, exp) for _ in range(2): np.random.seed(0) obs = permanova(self.dm_ties, self.df, column='Group') self.assert_series_equal(obs, exp) def test_call_no_ties(self): exp = pd.Series(index=self.exp_index, data=['PERMANOVA', 'pseudo-F', 4, 2, 4.4, 0.332, 999], name='PERMANOVA results') np.random.seed(0) obs = permanova(self.dm_no_ties, self.grouping_equal) self.assert_series_equal(obs, exp) def test_call_no_permutations(self): exp = pd.Series(index=self.exp_index, data=['PERMANOVA', 'pseudo-F', 4, 2, 4.4, np.nan, 0], name='PERMANOVA results') obs = permanova(self.dm_no_ties, self.grouping_equal, permutations=0) self.assert_series_equal(obs, exp) def test_call_unequal_group_sizes(self): exp = pd.Series( index=self.exp_index, data=['PERMANOVA', 'pseudo-F', 6, 3, 0.578848, 0.645, 999], name='PERMANOVA results') np.random.seed(0) obs = permanova(self.dm_unequal, self.grouping_unequal) self.assert_series_equal(obs, exp) np.random.seed(0) obs = permanova(self.dm_unequal, self.grouping_unequal_relabeled) self.assert_series_equal(obs, exp) def test_call_via_series(self): # test https://github.com/scikit-bio/scikit-bio/issues/1877 # permanova gives different results if grouping is either # a pd.DataFrame or a pd.Series dm = DistanceMatrix.read(get_data_path('frameSeries_dm.tsv')) grouping = pd.read_csv(get_data_path("frameSeries_grouping.tsv"), sep="\t", index_col=0) np.random.seed(0) obs_frame = permanova(dm, grouping, column='tumor') np.random.seed(0) obs_series = permanova(dm, grouping['tumor']) # in principle, both tests - if seed is the same - should return the # exact same results. However, they don't for the current example ... self.assert_series_equal(obs_frame, obs_series) # ... which is due to different result in computing "unique" values for # the grouping, which is illustrated with the following test grp_frame = _preprocess_input_sng( dm.ids, dm.shape[0], grouping, 'tumor' # grouping as a pd.DataFrame )[-1] grp_series = _preprocess_input_sng( dm.ids, dm.shape[0], grouping['tumor'], column=None # grouping as a pd.Series, note # that user of permanova do not # have to explicitly set # column=None )[-1] # convert np.array to tuple to ease comparison for equality self.assertEqual(tuple(grp_frame), tuple(grp_series)) # to better illustrate what is going wrong, we compare the computed # grouping labels (0 or 1) with the original user provided data, here # "no-tumor mice" and "tumor-bearing mice". We expect a one-to-one # correspondens, i.e. if we group on both columns at the same time, we # expect exactly two groups, like # tumor series # no-tumor mice 0 5 # tumor-bearing mice 1 37 # dtype: int64 # which is not the case of the pd.Series case g = pd.DataFrame(data={'series': list(grp_series), 'dataframe': list(grp_frame), 'tumor': grouping.loc[list(dm.ids), 'tumor']}, index=dm.ids) self.assertEqual(g.groupby(['tumor', 'dataframe']).size().shape[0], 2) self.assertEqual(g.groupby(['tumor', 'series']).size().shape[0], 2) # test that ValueError is raised, if use provided column does not match # the provided pd.Series name for grouping with self.assertRaises(ValueError): _preprocess_input_sng(dm.ids, dm.shape[0], grouping['tumor'], 'foo') if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/stats/distance/tests/test_permdisp.py000066400000000000000000000301321464262511300242140ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from functools import partial from unittest import TestCase, main import numpy as np import numpy.testing as npt import pandas as pd from pandas.testing import assert_series_equal from scipy.stats import f_oneway from skbio import DistanceMatrix from skbio.stats.ordination import pcoa from skbio.stats.distance import permdisp from skbio.stats.distance._permdisp import _compute_groups from skbio.stats.distance._cutils import geomedian_axis_one from skbio.util import get_data_path class testPERMDISP(TestCase): def setUp(self): # test with 2 groups of equal size # when assigned different labels, results should be the same self.grouping_eq = ['foo', 'foo', 'foo', 'bar', 'bar', 'bar'] self.grouping_eq_relab = ['pyt', 'pyt', 'pyt', 'hon', 'hon', 'hon'] self.exp_index = ['method name', 'test statistic name', 'sample size', 'number of groups', 'test statistic', 'p-value', 'number of permutations'] # test with 3 groups of different sizes # when assigned different labels results should be the same self.grouping_uneq = ['foo', 'foo', 'bar', 'bar', 'bar', 'qw', 'qw', 'qw', 'qw'] self.grouping_uneq_relab = [12, 12, 7, 7, 7, 23, 23, 23, 23] self.grouping_un_mixed = ['a', 'a', 7, 7, 7, 'b', 'b', 'b', 'b'] eq_ids = ['s1', 's2', 's3', 's4', 's5', 's6'] uneq_ids = ['s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9'] # matrix for equal grouping self.eq_mat = DistanceMatrix([[0, 4, 0, 0, 4, 2], [4, 0, 2, 0, 3, 1], [0, 2, 0, 5, 2, 5], [0, 0, 5, 0, 0, 2], [4, 3, 2, 0, 0, 2], [2, 1, 5, 2, 2, 0]], eq_ids) # matrix for unequal grouping self.uneq_mat = DistanceMatrix([[0, 0, 4, 0, 0, 3, 5, 3, 0], [0, 0, 0, 3, 4, 5, 3, 0, 3], [4, 0, 0, 4, 3, 1, 0, 5, 2], [0, 3, 4, 0, 0, 2, 1, 3, 5], [0, 4, 3, 0, 0, 1, 1, 5, 0], [3, 5, 1, 2, 1, 0, 2, 0, 5], [5, 3, 0, 1, 1, 2, 0, 4, 3], [3, 0, 5, 3, 5, 0, 4, 0, 4], [0, 3, 2, 5, 0, 5, 3, 4, 0]], uneq_ids) # null matrix for equal grouping self.null_mat = DistanceMatrix([[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]], eq_ids) unif_ids = ['PC.354', 'PC.355', 'PC.356', 'PC.481', 'PC.593', 'PC.607', 'PC.634', 'PC.635', 'PC.636'] self.unifrac_dm = DistanceMatrix( [[0.0, 0.595483768391, 0.618074717633, 0.582763100909, 0.566949022108, 0.714717232268, 0.772001731764, 0.690237118413, 0.740681707488], [0.595483768391, 0.0, 0.581427669668, 0.613726772383, 0.65945132763, 0.745176523638, 0.733836123821, 0.720305073505, 0.680785600439], [0.618074717633, 0.581427669668, 0.0, 0.672149021573, 0.699416863323, 0.71405573754, 0.759178215168, 0.689701276341, 0.725100672826], [0.582763100909, 0.613726772383, 0.672149021573, 0.0, 0.64756120797, 0.666018240373, 0.66532968784, 0.650464714994, 0.632524644216], [0.566949022108, 0.65945132763, 0.699416863323, 0.64756120797, 0.0, 0.703720200713, 0.748240937349, 0.73416971958, 0.727154987937], [0.714717232268, 0.745176523638, 0.71405573754, 0.666018240373, 0.703720200713, 0.0, 0.707316869557, 0.636288883818, 0.699880573956], [0.772001731764, 0.733836123821, 0.759178215168, 0.66532968784, 0.748240937349, 0.707316869557, 0.0, 0.565875193399, 0.560605525642], [0.690237118413, 0.720305073505, 0.689701276341, 0.650464714994, 0.73416971958, 0.636288883818, 0.565875193399, 0.0, 0.575788039321], [0.740681707488, 0.680785600439, 0.725100672826, 0.632524644216, 0.727154987937, 0.699880573956, 0.560605525642, 0.575788039321, 0.0]], unif_ids) self.unif_grouping = ['Control', 'Control', 'Control', 'Control', 'Control', 'Fast', 'Fast', 'Fast', 'Fast'] self.assert_series_equal = partial(assert_series_equal, check_index_type=True, check_series_type=True) def test_centroids_eq_groups(self): exp = [[1.2886811963240687, 1.890538910062923, 1.490527658097728], [2.17349240061718, 2.3192679626679946, 2.028338553903792]] exp_stat, _ = f_oneway(*exp) dm = pcoa(self.eq_mat) dm = dm.samples obs = _compute_groups(dm, 'centroid', self.grouping_eq) self.assertAlmostEqual(obs, exp_stat, places=6) obs_relab = _compute_groups(dm, 'centroid', self.grouping_eq_relab) self.assertAlmostEqual(obs_relab, obs, places=6) def test_centroids_uneq_groups(self): """ the expected result here was calculated by hand """ exp = [[2.5847022428144935, 2.285624595858895, 1.7022431146340287], [1.724817266046108, 1.724817266046108], [2.4333280644972795, 2.389000390879655, 2.8547180589306036, 3.218568759338847]] exp_stat, _ = f_oneway(*exp) dm = pcoa(self.uneq_mat) dm = dm.samples obs = _compute_groups(dm, 'centroid', self.grouping_uneq) self.assertAlmostEqual(obs, exp_stat, places=6) obs_relab = _compute_groups(dm, 'centroid', self.grouping_uneq_relab) self.assertAlmostEqual(obs, obs_relab, places=6) def test_centroids_mixedgroups(self): exp = [[2.5847022428144935, 2.285624595858895, 1.7022431146340287], [1.724817266046108, 1.724817266046108], [2.4333280644972795, 2.389000390879655, 2.8547180589306036, 3.218568759338847]] dm = pcoa(self.uneq_mat) dm = dm.samples exp_stat, _ = f_oneway(*exp) obs_mixed = _compute_groups(dm, 'centroid', self.grouping_un_mixed) self.assertAlmostEqual(exp_stat, obs_mixed, places=6) def test_centroids_null(self): dm = pcoa(self.null_mat) dm = dm.samples obs_null = _compute_groups(dm, 'centroid', self.grouping_eq) np.isnan(obs_null) def test_centroid_normal(self): exp = pd.Series(index=self.exp_index, data=['PERMDISP', 'F-value', 9, 2, 0.244501519876, 0.63, 99], name='PERMDISP results') grouping = ['Control', 'Control', 'Control', 'Control', 'Control', 'Fast', 'Fast', 'Fast', 'Fast'] np.random.seed(0) obs = permdisp(self.unifrac_dm, grouping, test='centroid', permutations=99) self.assert_series_equal(obs, exp) def test_median_normal(self): exp = pd.Series(index=self.exp_index, data=['PERMDISP', 'F-value', 9, 2, 0.139475441876, 0.61, 99], name='PERMDISP results') np.random.seed(0) obs = permdisp(self.unifrac_dm, self.unif_grouping, test='median', permutations=99) self.assert_series_equal(obs, exp) np.random.seed(0) po = pcoa(self.unifrac_dm) obs2 = permdisp(po, self.unif_grouping, test='median', permutations=99) self.assert_series_equal(obs2, exp) def test_median_fsvd(self): exp = pd.Series(index=self.exp_index, data=['PERMDISP', 'F-value', 9, 2, 0.04078077215673714, 0.8, 99], name='PERMDISP results') np.random.seed(0) obs = permdisp(self.unifrac_dm, self.unif_grouping, test='median', permutations=99, method='fsvd', number_of_dimensions=3) self.assert_series_equal(obs, exp) np.random.seed(0) po = pcoa(self.unifrac_dm, method='fsvd', number_of_dimensions=3) obs = permdisp(po, self.unif_grouping, test='median', permutations=99) self.assert_series_equal(obs, exp) def test_not_distance_matrix(self): dm = [] grouping = ['Control', 'Control', 'Control', 'Control', 'Control', 'Fast', 'Fast', 'Fast', 'Fast'] npt.assert_raises(TypeError, permdisp, dm, grouping, permutations=0) def test_mismatched_group(self): gr = ['foo', 'bar'] npt.assert_raises(ValueError, permdisp, self.unifrac_dm, gr) def test_single_group(self): gr = ['f', 'f', 'f', 'f', 'f', 'f', 'f', 'f', 'f'] npt.assert_raises(ValueError, permdisp, self.unifrac_dm, gr) def test_no_permuations(self): obs = permdisp(self.eq_mat, self.grouping_eq, permutations=0) pval = obs['p-value'] np.isnan(pval) def test_geomedian(self): exp = np.array([2.01956244, 1.53164546, 2.60571752, 0.91424179, 1.76214416, 1.69943057]) obs = np.array(geomedian_axis_one(self.eq_mat.data)) npt.assert_almost_equal(obs, exp, decimal=6) def test_confirm_betadispr_results(self): mp_dm = DistanceMatrix.read(get_data_path('moving_pictures_dm.tsv')) mp_mf = pd.read_csv(get_data_path('moving_pictures_mf.tsv'), sep='\t') mp_mf.set_index('#SampleID', inplace=True) obs_med_mp = permdisp(mp_dm, mp_mf, column='BodySite') obs_cen_mp = permdisp(mp_dm, mp_mf, column='BodySite', test='centroid') exp_data_m = ['PERMDISP', 'F-value', 33, 4, 10.1956, 0.001, 999] exp_data_c = ['PERMDISP', 'F-value', 33, 4, 17.4242, 0.001, 999] exp_ind = ['method name', 'test statistic name', 'sample size', 'number of groups', 'test statistic', 'p-value', 'number of permutations'] exp_med_mp = pd.Series(data=exp_data_m, index=exp_ind, dtype='object', name='PERMDISP results') exp_cen_mp = pd.Series(data=exp_data_c, index=exp_ind, dtype='object', name='PERMDISP results') self.assert_series_equal(exp_med_mp, obs_med_mp) self.assert_series_equal(exp_cen_mp, obs_cen_mp) def test_call_via_series(self): # test https://github.com/scikit-bio/scikit-bio/issues/1877 # actual issue is with _base._preprocess_input_sng but permdisp is # indirectly affected dm = DistanceMatrix.read(get_data_path('frameSeries_dm.tsv')) grouping = pd.read_csv(get_data_path("frameSeries_grouping.tsv"), sep="\t", index_col=0) np.random.seed(0) obs_frame = permdisp(dm, grouping, column='tumor') np.random.seed(0) obs_series = permdisp(dm, grouping['tumor']) # in principle, both tests - if seed is the same - should return the # exact same results. However, they don't for the current example ... self.assert_series_equal(obs_frame, obs_series) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/stats/evolve/000077500000000000000000000000001464262511300173255ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/stats/evolve/__init__.py000066400000000000000000000016641464262511300214450ustar00rootroot00000000000000"""Evolutionary statistics (:mod:`skbio.stats.evolve`) =================================================== .. currentmodule:: skbio.stats.evolve This package contains statistics pertaining to phylogenies and evolution. Cophylogenetic methods ---------------------- These functions test for correlation between phylogenies or representations of evolutionary distance (for example, genetic distance matrices). Functions ^^^^^^^^^ .. autosummary:: :toctree: hommola_cospeciation """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from ._hommola import hommola_cospeciation __all__ = ["hommola_cospeciation"] scikit-bio-0.6.2/skbio/stats/evolve/_hommola.py000066400000000000000000000253231464262511300214770ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np from scipy.stats import pearsonr from skbio import DistanceMatrix def hommola_cospeciation(host_dist, par_dist, interaction, permutations=999): """Perform Hommola et al (2009) host/parasite cospeciation test. This test for host/parasite cospeciation is as described in [1]_. This test is a modification of a Mantel test, expanded to accept the case where multiple hosts map to a single parasite (and vice versa). For a basic Mantel test, the distance matrices being compared must have the same number of values. To determine the significance of the correlations between distances in the two matrices, the correlation coefficient of those distances is calculated and compared to the correlation coefficients calculated from a set of matrices in which rows and columns have been permuted. In this test, rather than comparing host-host to parasite-parasite distances directly (requiring one host per parasite), the distances are compared for each interaction edge between host and parasite. Thus, a host interacting with two different parasites will be represented in two different edges, with the host-host distance for the comparison between those edges equal to zero, and the parasite-parasite distance equal to the distance between those two parasites. Like in the Mantel test, significance of the interaction is assessed by permutation, in this case permutation of the host-symbiont interaction links. Note that the null hypothesis being tested here is that the hosts and parasites have evolved independently of one another. The alternative to this is a somewhat weaker case than what is often implied with the term 'cospeciation,' which is that each incidence of host speciation is recapitulated in an incidence of symbiont speciation (strict co-cladogenesis). Although there may be many factors that could contribute to non-independence of host and symbiont phylogenies, this loss of explanatory specificity comes with increased robustness to phylogenetic uncertainty. Thus, this test may be especially useful for cases where host and/or symbiont phylogenies are poorly resolved, or when simple correlation between host and symbiont evolution is of more interest than strict co-cladogenesis. This test requires pairwise distance matrices for hosts and symbionts, as well as an interaction matrix specifying links between hosts (in columns) and symbionts (in rows). This interaction matrix should have the same number of columns as the host distance matrix, and the same number of rows as the symbiont distance matrix. Interactions between hosts and symbionts should be indicated by values of ``1`` or ``True``, with non-interactions indicated by values of ``0`` or ``False``. Parameters ---------- host_dist : 2-D array_like or DistanceMatrix Symmetric matrix of m x m pairwise distances between hosts. par_dist : 2-D array_like or DistanceMatrix Symmetric matrix of n x n pairwise distances between parasites. interaction : 2-D array_like, bool n x m binary matrix of parasite x host interactions. Order of hosts (columns) should be identical to order of hosts in `host_dist`, as should order of parasites (rows) be identical to order of parasites in `par_dist`. permutations : int, optional Number of permutations used to compute p-value. Must be greater than or equal to zero. If zero, statistical significance calculations will be skipped and the p-value will be ``np.nan``. Returns ------- corr_coeff : float Pearson correlation coefficient of host : parasite association. p_value : float Significance of host : parasite association computed using `permutations` and a one-sided (greater) alternative hypothesis. perm_stats : 1-D numpy.ndarray, float Correlation coefficients observed using permuted host : parasite interactions. Length will be equal to the number of permutations used to compute p-value (see `permutations` parameter above). See Also -------- skbio.stats.distance.mantel scipy.stats.pearsonr Notes ----- It is assumed that the ordering of parasites in `par_dist` and hosts in `host_dist` are identical to their ordering in the rows and columns, respectively, of the interaction matrix. This code is loosely based on the original R code from [1]_. References ---------- .. [1] Hommola K, Smith JE, Qiu Y, Gilks WR (2009) A Permutation Test of Host-Parasite Cospeciation. Molecular Biology and Evolution, 26, 1457-1468. Examples -------- >>> from skbio.stats.evolve import hommola_cospeciation Create arrays for host distances, parasite distances, and their interactions (data taken from example in [1]_): >>> hdist = [[0,3,8,8,9], [3,0,7,7,8], [8,7,0,6,7], [8,7,6,0,3], ... [9,8,7,3,0]] >>> pdist = [[0,5,8,8,8], [5,0,7,7,7], [8,7,0,4,4], [8,7,4,0,2], ... [8,7,4,2,0]] >>> interaction = [[1,0,0,0,0], [0,1,0,0,0], [0,0,1,0,0], [0,0,0,1,0], ... [0,0,0,1,1]] Run the cospeciation test with 99 permutations. Note that the correlation coefficient for the observed values counts against the final reported p-value: >>> corr_coeff, p_value, perm_stats = hommola_cospeciation( ... hdist, pdist, interaction, permutations=99) >>> print("%.3f" % corr_coeff) 0.832 In this case, the host distances have a fairly strong positive correlation with the symbiont distances. However, this may also reflect structure inherent in the phylogeny, and is not itself indicative of significance. >>> p_value <= 0.05 True After permuting host : parasite interactions, we find that the observed correlation is indeed greater than we would expect by chance. """ host_dist = DistanceMatrix(host_dist) par_dist = DistanceMatrix(par_dist) interaction = np.asarray(interaction, dtype=bool) num_hosts = host_dist.shape[0] num_pars = par_dist.shape[0] if num_hosts < 3 or num_pars < 3: raise ValueError("Distance matrices must be a minimum of 3x3 in size.") if num_hosts != interaction.shape[1]: raise ValueError( "Number of interaction matrix columns must match " "number of hosts in `host_dist`." ) if num_pars != interaction.shape[0]: raise ValueError( "Number of interaction matrix rows must match " "number of parasites in `par_dist`." ) if permutations < 0: raise ValueError( "Number of permutations must be greater than or " "equal to zero." ) if interaction.sum() < 3: raise ValueError( "Must have at least 3 host-parasite interactions in " "`interaction`." ) # shortcut to eliminate nested for-loops specifying pairwise interaction # partners as randomizeable indices pars, hosts = np.nonzero(interaction) pars_k_labels, pars_t_labels = _gen_lists(pars) hosts_k_labels, hosts_t_labels = _gen_lists(hosts) # get a vector of pairwise distances for each interaction edge x = _get_dist(hosts_k_labels, hosts_t_labels, host_dist.data, np.arange(num_hosts)) y = _get_dist(pars_k_labels, pars_t_labels, par_dist.data, np.arange(num_pars)) # calculate the observed correlation coefficient for these hosts/symbionts corr_coeff = pearsonr(x, y)[0] # now do permutatitons. initialize index lists of the appropriate size mp = np.arange(num_pars) mh = np.arange(num_hosts) # initialize list of shuffled correlation vals perm_stats = np.empty(permutations) if permutations == 0 or np.isnan(corr_coeff): p_value = np.nan perm_stats.fill(np.nan) else: for i in range(permutations): # generate a shuffled list of indexes for each permutation. this # effectively randomizes which host is associated with which # symbiont, but maintains the distribution of genetic distances np.random.shuffle(mp) np.random.shuffle(mh) # get pairwise distances in shuffled order y_p = _get_dist(pars_k_labels, pars_t_labels, par_dist.data, mp) x_p = _get_dist(hosts_k_labels, hosts_t_labels, host_dist.data, mh) # calculate shuffled correlation coefficient perm_stats[i] = pearsonr(x_p, y_p)[0] p_value = ((perm_stats >= corr_coeff).sum() + 1) / (permutations + 1) return corr_coeff, p_value, perm_stats def _get_dist(k_labels, t_labels, dists, index): """Subset a distance matrix using a set of (randomizable) index labels. Parameters ---------- k_labels : numpy.array index labels specifying row-wise member of pairwise interaction t_labels : numpy.array index labels specifying column-wise member of pairwise interaction dists : numpy.array pairwise distance matrix index : numpy.array of int permutable indices for changing order in pairwise distance matrix Returns ------- vec : list of float List of distances associated with host:parasite edges. """ return dists[index[k_labels], index[t_labels]] def _gen_lists(labels): """Generate matched lists of row and column index labels. Shortcut function for generating matched lists of row and col index labels for the set of pairwise comparisons specified by the list of those indices recovered using ``np.nonzero(interaction)``. Reproduces values of iterated indices from the nested for-loops contained in ``get_dist`` function in original code from [1]_. Parameters ---------- labels : numpy.array array containing the indices of nonzero elements in one dimension of an interaction matrix Returns ------- k_labels : numpy.array index labels specifying row-wise member of pairwise interaction t_labels : numpy.array index labels specifying column-wise member of pairwise interaction References ---------- .. [1] Hommola K, Smith JE, Qiu Y, Gilks WR (2009) A Permutation Test of Host-Parasite Cospeciation. Molecular Biology and Evolution, 26, 1457-1468. """ i_array, j_array = np.transpose(np.tri(len(labels) - 1)).nonzero() j_array += 1 return labels[i_array], labels[j_array] scikit-bio-0.6.2/skbio/stats/evolve/tests/000077500000000000000000000000001464262511300204675ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/stats/evolve/tests/__init__.py000066400000000000000000000005411464262511300226000ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- scikit-bio-0.6.2/skbio/stats/evolve/tests/test_hommola.py000066400000000000000000000174711464262511300235460ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest import numpy as np import numpy.testing as npt from skbio.stats.distance import mantel from skbio.stats.evolve import hommola_cospeciation from skbio.stats.evolve._hommola import _get_dist, _gen_lists class HommolaCospeciationTests(unittest.TestCase): def setUp(self): # Test matrices, as presented in original paper by Hommola et al. self.hdist = np.array([[0, 3, 8, 8, 9], [3, 0, 7, 7, 8], [ 8, 7, 0, 6, 7], [8, 7, 6, 0, 3], [9, 8, 7, 3, 0]]) self.pdist = np.array([[0, 5, 8, 8, 8], [5, 0, 7, 7, 7], [ 8, 7, 0, 4, 4], [8, 7, 4, 0, 2], [8, 7, 4, 2, 0]]) self.interact = np.array([[1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [ 0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 1, 1]]) # Reduced-size host matrix for testing asymmetric interaction matrix self.hdist_4x4 = np.array([[0, 3, 8, 8], [3, 0, 7, 7], [8, 7, 0, 6], [8, 7, 6, 0]]) self.interact_5x4 = np.array([[1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1], [0, 0, 0, 1]]) # One to one interaction matrix for comparing against Mantel output self.interact_1to1 = np.array([[1, 0, 0, 0, 0], [0, 1, 0, 0, 0], [ 0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1]]) # interaction matrix yielding non-significant results. # this matrix was picked because it will generate an r value that's # less than a standard deviation away from the mean of the normal # distribution of r vals self.interact_ns = np.array( [[0, 0, 0, 1, 0], [0, 0, 0, 0, 1], [1, 0, 0, 0, 0], [1, 0, 0, 0, 0], [0, 0, 0, 0, 1]]) # minimal size matrices for sanity checks of inputs self.h_dist_3x3 = np.array([[0, 1, 2], [1, 0, 1], [2, 1, 0]]) self.h_dist_2x2 = np.array([[0, 3], [3, 0]]) self.p_dist_3x3 = np.array([[0, 3, 2], [3, 0, 1], [2, 1, 0]]) self.interact_3x3 = np.array([[0, 1, 1], [1, 0, 1], [0, 0, 1]]) self.interact_3x2 = np.array([[0, 1], [1, 0], [1, 1]]) self.interact_2x3 = np.array([[0, 1, 1], [1, 0, 1]]) self.interact_zero = np.array([[0, 0, 0], [0, 0, 0], [0, 0, 0]]) def test_hommola_cospeciation_sig(self): np.random.seed(1) obs_r, obs_p, obs_perm_stats = hommola_cospeciation( self.hdist, self.pdist, self.interact, 9) exp_p = .1 exp_r = 0.83170965463247915 exp_perm_stats = np.array([-0.14928122, 0.26299538, -0.21125858, 0.24143838, 0.61557855, -0.24258293, 0.09885203, 0.02858, 0.42742399]) self.assertAlmostEqual(obs_p, exp_p) self.assertAlmostEqual(obs_r, exp_r) npt.assert_allclose(obs_perm_stats, exp_perm_stats) def test_hommola_cospeciation_asymmetric(self): np.random.seed(1) obs_r, obs_p, obs_perm_stats = hommola_cospeciation( self.hdist_4x4, self.pdist, self.interact_5x4, 9) exp_p = 0.2 exp_r = 0.85732140997411233 exp_perm_stats = np.array([-0.315244162496, -0.039405520312, 0.093429386594, -0.387835875941, 0.183711730709, 0.056057631956, 0.945732487487, 0.056057631956, -0.020412414523]) self.assertAlmostEqual(obs_p, exp_p) self.assertAlmostEqual(obs_r, exp_r) npt.assert_allclose(obs_perm_stats, exp_perm_stats) def test_hommola_cospeciation_no_sig(self): np.random.seed(1) obs_r, obs_p, obs_perm_stats = hommola_cospeciation( self.hdist, self.pdist, self.interact_ns, 9) exp_p = .6 exp_r = -0.013679391379114569 exp_perm_stats = np.array([-0.22216543, -0.14836061, -0.04434843, 0.1478281, -0.29105645, 0.56395839, 0.47304992, 0.79125657, 0.06804138]) self.assertAlmostEqual(obs_p, exp_p) self.assertAlmostEqual(obs_r, exp_r) npt.assert_allclose(obs_perm_stats, exp_perm_stats) def test_hommola_vs_mantel(self): # we don't compare p-values because the two methods use different # permutation strategies r_mantel, p_mantel, _ = mantel( self.hdist, self.pdist, method='pearson', permutations=0, alternative='greater' ) r_hommola, p_hommola, _ = hommola_cospeciation( self.hdist, self.pdist, self.interact_1to1, permutations=0 ) self.assertAlmostEqual(r_hommola, r_mantel) npt.assert_equal(p_hommola, p_mantel) def test_zero_permutations(self): obs_r, obs_p, obs_perm_stats = hommola_cospeciation( self.hdist, self.pdist, self.interact, 0) exp_p = np.nan exp_r = 0.83170965463247915 exp_perm_stats = np.array([]) npt.assert_equal(obs_p, exp_p) self.assertAlmostEqual(obs_r, exp_r) npt.assert_equal(obs_perm_stats, exp_perm_stats) def test_get_dist(self): labels = np.array([0, 1, 1, 2, 3]) k_labels, t_labels = _gen_lists(labels) dists = np.array([[0, 2, 6, 3], [2, 0, 5, 4], [6, 5, 0, 7], [3, 4, 7, 0]]) index = np.array([2, 3, 1, 0]) expected_vec = np.array([7, 7, 5, 6, 0, 4, 3, 4, 3, 2]) actual_vec = _get_dist(k_labels, t_labels, dists, index) npt.assert_allclose(actual_vec, expected_vec) def test_gen_lists(self): exp_pars_k_labels = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 4]) exp_pars_t_labels = np.array([1, 2, 3, 4, 4, 2, 3, 4, 4, 3, 4, 4, 4, 4, 4]) exp_host_k_labels = np.array([0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3]) exp_host_t_labels = np.array([1, 2, 3, 3, 4, 2, 3, 3, 4, 3, 3, 4, 3, 4, 4]) pars, hosts = np.nonzero(self.interact) obs_pars_k_labels, obs_pars_t_labels = _gen_lists(pars) obs_hosts_k_labels, obs_hosts_t_labels = _gen_lists(hosts) npt.assert_allclose(exp_pars_k_labels, obs_pars_k_labels) npt.assert_allclose(exp_pars_t_labels, obs_pars_t_labels) npt.assert_allclose(exp_host_k_labels, obs_hosts_k_labels) npt.assert_allclose(exp_host_t_labels, obs_hosts_t_labels) def test_dm_too_small(self): with self.assertRaises(ValueError): hommola_cospeciation(self.h_dist_2x2, self.p_dist_3x3, self.interact_3x3) def test_host_interaction_not_equal(self): with self.assertRaises(ValueError): hommola_cospeciation(self.h_dist_3x3, self.p_dist_3x3, self.interact_2x3) def test_par_interaction_not_equal(self): with self.assertRaises(ValueError): hommola_cospeciation(self.h_dist_3x3, self.p_dist_3x3, self.interact_3x2) def test_interaction_too_few(self): with self.assertRaises(ValueError): hommola_cospeciation(self.h_dist_3x3, self.p_dist_3x3, self.interact_zero) def test_permutations_too_few(self): with self.assertRaises(ValueError): hommola_cospeciation(self.h_dist_3x3, self.p_dist_3x3, self.interact_3x3, -1) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/stats/gradient.py000066400000000000000000000770031464262511300202030ustar00rootroot00000000000000r"""Gradient analyses (:mod:`skbio.stats.gradient`) =============================================== .. currentmodule:: skbio.stats.gradient This module provides functionality for performing gradient analyses. The algorithms included in this module mainly allows performing analysis of volatility on time series data, but they can be applied to any data that contains a gradient. Classes ------- .. autosummary:: :toctree: GradientANOVA AverageGradientANOVA TrajectoryGradientANOVA FirstDifferenceGradientANOVA WindowDifferenceGradientANOVA GroupResults CategoryResults GradientANOVAResults Examples -------- Assume we have the following coordinates: >>> import numpy as np >>> import pandas as pd >>> from skbio.stats.gradient import AverageGradientANOVA >>> coord_data = {'PC.354': np.array([0.2761, -0.0341, 0.0633, 0.1004]), ... 'PC.355': np.array([0.2364, 0.2186, -0.0301, -0.0225]), ... 'PC.356': np.array([0.2208, 0.0874, -0.3519, -0.0031]), ... 'PC.607': np.array([-0.1055, -0.4140, -0.15, -0.116]), ... 'PC.634': np.array([-0.3716, 0.1154, 0.0721, 0.0898])} >>> coords = pd.DataFrame.from_dict(coord_data, orient='index') the following metadata map: >>> metadata_map = {'PC.354': {'Treatment': 'Control', 'Weight': '60'}, ... 'PC.355': {'Treatment': 'Control', 'Weight': '55'}, ... 'PC.356': {'Treatment': 'Control', 'Weight': '50'}, ... 'PC.607': {'Treatment': 'Fast', 'Weight': '65'}, ... 'PC.634': {'Treatment': 'Fast', 'Weight': '68'}} >>> metadata_map = pd.DataFrame.from_dict(metadata_map, orient='index') and the following array with the proportion explained of each coord: >>> prop_expl = np.array([25.6216, 15.7715, 14.1215, 11.6913, 9.8304]) Then to compute the average trajectory of this data: >>> av = AverageGradientANOVA(coords, prop_expl, metadata_map, ... trajectory_categories=['Treatment'], ... sort_category='Weight') >>> trajectory_results = av.get_trajectories() Check the algorithm used to compute the trajectory_results: >>> print(trajectory_results.algorithm) avg Check if we weighted the data or not: >>> print(trajectory_results.weighted) False Check the results of one of the categories: >>> print(trajectory_results.categories[0].category) Treatment >>> print(trajectory_results.categories[0].probability) 0.0118478282382 Check the results of one group of one of the categories: >>> print(trajectory_results.categories[0].groups[0].name) Control >>> print(trajectory_results.categories[0].groups[0].trajectory) [ 3.52199973 2.29597001 3.20309816] >>> print(trajectory_results.categories[0].groups[0].info) {'avg': 3.007022633956606} """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from copy import deepcopy from collections import defaultdict from numbers import Integral import numpy as np from natsort import realsorted from scipy.stats import f_oneway def _weight_by_vector(trajectories, w_vector): r"""Weight the values of `trajectories` given a weighting vector `w_vector`. Each value in `trajectories` will be weighted by the 'rate of change' to 'optimal rate of change' ratio. The 'rate of change' of a vector measures how each point in the vector changes with respect to its predecessor point. The 'optimal rate of change' is the rate of change in which each point in the vector performs the same change than its predecessor, meaning that when calling this function over evenly spaced `w_vector` values, no change will be reflected on the output. Parameters ---------- trajectories: pandas.DataFrame Values to weight w_vector: pandas.Series Values used to weight `trajectories` Returns ------- pandas.DataFrame A weighted version of `trajectories`. Raises ------ ValueError If `trajectories` and `w_vector` don't have equal lengths If `w_vector` is not a gradient TypeError If `trajectories` and `w_vector` are not iterables """ try: if len(trajectories) != len(w_vector): raise ValueError( "trajectories (%d) & w_vector (%d) must be equal " "lengths" % (len(trajectories), len(w_vector)) ) except TypeError: raise TypeError("trajectories and w_vector must be iterables") # check no repeated values are passed in the weighting vector if len(set(w_vector)) != len(w_vector): raise ValueError("The weighting vector must be a gradient") # no need to weight in case of a one element vector if len(w_vector) == 1: return trajectories # Cast to float so divisions have a floating point resolution total_length = float(max(w_vector) - min(w_vector)) # Reflects the expected gradient between subsequent values in w_vector # the first value isn't weighted so subtract one from the number of # elements optimal_gradient = total_length / (len(w_vector) - 1) # for all elements apply the weighting function for i, idx in enumerate(trajectories.index): # Skipping the first element is it doesn't need to be weighted if i != 0: trajectories.loc[idx] = ( trajectories.loc[idx] * optimal_gradient / np.abs((w_vector.iloc[i] - w_vector.iloc[i - 1])) ) return trajectories.astype("float64") def _ANOVA_trajectories(category, res_by_group): r"""Run ANOVA over `res_by_group`. If ANOVA cannot be run in the current category (because either there is only one group in category or there is a group with only one member) the result CategoryResults instance has `probability` and `groups` set to None and message is set to a string explaining why ANOVA was not run Returns ------- CategoryResults An instance of CategoryResults holding the results of the trajectory analysis applied on `category` """ # If there is only one group under category we cannot run ANOVA if len(res_by_group) == 1: return CategoryResults(category, None, None, "Only one value in the group.") # Check if groups can be tested using ANOVA. ANOVA testing requires # all elements to have at least size greater to one. values = [res.trajectory.astype(float) for res in res_by_group] if any([len(value) == 1 for value in values]): return CategoryResults( category, None, None, "This group can not be used. All groups " "should have more than 1 element.", ) # We are ok to run ANOVA _, p_val = f_oneway(*values) return CategoryResults(category, p_val, res_by_group, None) class GroupResults: """Store the trajectory results of a group of a metadata category. Attributes ---------- name : str The name of the group within the metadata category trajectory : array like The result trajectory in an 1-D numpy array mean : float The mean of the trajectory info : dict Any extra information computed by the trajectory algorithm. Depends on the algorithm message : str A message with information of the execution of the algorithm """ def __init__(self, name, trajectory, mean, info, message): """Construct all the necessary attributes for the GroupResults object.""" self.name = name self.trajectory = trajectory self.mean = mean self.info = info self.message = message def to_files(self, out_f, raw_f): r"""Save trajectory analysis results for a category group to text format files. Parameters ---------- out_f : file-like object File-like object to write trajectory analysis data to. Must have a `write` method. It is the caller's responsibility to close `out_f` when done (if necessary) raw_f : file-like object File-like object to write trajectories trajectory values. Must have a `write` method. It is the caller's responsibility to close `out_f` when done (if necessary) """ out_f.write('For group "%s", the group means is: %f\n' % (self.name, self.mean)) raw_f.write('For group "%s":\n' % self.name) if self.message: out_f.write("%s\n" % self.message) raw_f.write("%s\n" % self.message) out_f.write( "The info is: %s\n" % sorted(((k, v) for k, v in self.info.items())) ) raw_f.write("The trajectory is:\n[%s]\n" % ", ".join(map(str, self.trajectory))) class CategoryResults: """Store the trajectory results of a metadata category. Attributes ---------- category : str The name of the category probability : float The ANOVA probability that the category groups are independent groups : list of GroupResults The trajectory results for each group in the category message : str A message with information of the execution of the algorithm """ def __init__(self, category, probability, groups, message): """Construct all the necessary attributes for the CategoryResults object.""" self.category = category self.probability = probability self.groups = groups self.message = message def to_files(self, out_f, raw_f): r"""Save trajectory analysis results for a category to files in text format. Parameters ---------- out_f : file-like object File-like object to write trajectory analysis data to. Must have a `write` method. It is the caller's responsibility to close `out_f` when done (if necessary) raw_f : file-like object File-like object to write trajectory raw values. Must have a `write` method. It is the caller's responsibility to close `out_f` when done (if necessary) """ if self.probability is None: out_f.write('Grouped by "%s": %s\n' % (self.category, self.message)) else: out_f.write( 'Grouped by "%s", probability: %f\n' % (self.category, self.probability) ) raw_f.write('Grouped by "%s"\n' % self.category) for group in self.groups: group.to_files(out_f, raw_f) class GradientANOVAResults: """Store the trajectory results. Attributes ---------- algorithm : str The algorithm used to compute trajectories weighted : bool If true, a weighting vector was used categories : list of CategoryResults The trajectory results for each metadata category """ def __init__(self, algorithm, weighted, categories): """Construct all the attributes for the GradientANOVAResults object.""" self.algorithm = algorithm self.weighted = weighted self.categories = categories def to_files(self, out_f, raw_f): r"""Save the trajectory analysis results to files in text format. Parameters ---------- out_f : file-like object File-like object to write trajectories analysis data to. Must have a `write` method. It is the caller's responsibility to close `out_f` when done (if necessary) raw_f : file-like object File-like object to write trajectories raw values. Must have a `write` method. It is the caller's responsibility to close `out_f` when done (if necessary) """ out_f.write("Trajectory algorithm: %s\n" % self.algorithm) raw_f.write("Trajectory algorithm: %s\n" % self.algorithm) if self.weighted: out_f.write("** This output is weighted **\n") raw_f.write("** This output is weighted **\n") out_f.write("\n") raw_f.write("\n") for cat_results in self.categories: cat_results.to_files(out_f, raw_f) out_f.write("\n") raw_f.write("\n") class GradientANOVA: r"""Base class for the Trajectory algorithms. Parameters ---------- coords : pandas.DataFrame The coordinates for each sample id prop_expl : array like The numpy 1-D array with the proportion explained by each axis in coords metadata_map : pandas.DataFrame The metadata map, indexed by sample ids and columns are metadata categories trajectory_categories : list of str, optional A list of metadata categories to use to create the trajectories. If None is passed, the trajectories for all metadata categories are computed. Default: None, compute all of them sort_category : str, optional The metadata category to use to sort the trajectories. Default: None axes : int, optional The number of axes to account while doing the trajectory specific calculations. Pass 0 to compute all of them. Default: 3 weighted : bool, optional If true, the output is weighted by the space between samples in the `sort_category` column Raises ------ ValueError If any category of `trajectory_categories` is not present in `metadata_map` If `sort_category` is not present in `metadata_map` If `axes` is not between 0 and the maximum number of axes available If `weighted` is True and no `sort_category` is provided If `weighted` is True and the values under `sort_category` are not numerical If `coords` and `metadata_map` does not have samples in common """ # Should be defined by the derived classes _alg_name = None def __init__( self, coords, prop_expl, metadata_map, trajectory_categories=None, sort_category=None, axes=3, weighted=False, ): """Construct all the necessary attributes for the GradientANOVA object.""" if not trajectory_categories: # If trajectory_categories is not provided, use all the categories # present in the metadata map trajectory_categories = metadata_map.keys() else: # Check that trajectory_categories are in metadata_map for category in trajectory_categories: if category not in metadata_map: raise ValueError("Category %s not present in metadata." % category) # Check that sort_categories is in metadata_map if sort_category and sort_category not in metadata_map: raise ValueError( "Sort category %s not present in metadata." % sort_category ) if axes == 0: # If axes == 0, we should compute the trajectories for all axes axes = len(prop_expl) elif axes > len(prop_expl) or axes < 0: # Axes should be 0 <= axes <= len(prop_expl) raise ValueError( "axes should be between 0 and the max number of " "axes available (%d), found: %d " % (len(prop_expl), axes) ) # Restrict coordinates to those axes that we actually need to compute self._coords = coords.loc[:, : axes - 1] self._prop_expl = prop_expl[:axes] self._metadata_map = metadata_map self._weighted = weighted # Remove any samples from coords not present in mapping file # and remove any samples from metadata_map not present in coords self._normalize_samples() # Create groups self._make_groups(trajectory_categories, sort_category) # Compute the weighting_vector self._weighting_vector = None if weighted: if not sort_category: raise ValueError( "You should provide a sort category if you " "want to weight the trajectories" ) try: self._weighting_vector = self._metadata_map[sort_category].astype( np.float64 ) except ValueError: raise ValueError("The sorting category must be numeric") # Initialize the message buffer self._message_buffer = [] def get_trajectories(self): r"""Compute the trajectories for each group and category and run ANOVA. More specifically, compute the trajectories for each group in each category and run ANOVA over the results to test group independence. Returns ------- GradientANOVAResults An instance of GradientANOVAResults holding the results. """ result = GradientANOVAResults(self._alg_name, self._weighted, []) # Loop through all the categories that we should compute # the trajectories for cat, cat_groups in self._groups.items(): # Loop through all the category values present in the current # category and compute the trajectory for each of them res_by_group = [] for group in sorted(cat_groups, key=lambda k: str(k)): res_by_group.append( self._get_group_trajectories(group, cat_groups[group]) ) result.categories.append(_ANOVA_trajectories(cat, res_by_group)) return result def _normalize_samples(self): r"""Ensure `self._coords` and `self._metadata_map` have the same sample ids. Raises ------ ValueError If `coords` and `metadata_map` does not have samples in common """ # Figure out the sample ids in common coords_sample_ids = set(self._coords.index) mm_sample_ids = set(self._metadata_map.index) sample_ids = coords_sample_ids.intersection(mm_sample_ids) # Check if they actually have sample ids in common if not sample_ids: raise ValueError("Coordinates and metadata map had no samples " "in common") # pandas no longer allows use of set with .loc sample_ids = list(sample_ids) # Need to take a subset of coords if coords_sample_ids != sample_ids: self._coords = self._coords.loc[sample_ids] # Need to take a subset of metadata_map if mm_sample_ids != sample_ids: self._metadata_map = self._metadata_map.loc[sample_ids] def _make_groups(self, trajectory_categories, sort_category): r"""Group sample ids in `self._metadata_map` by `trajectory_categories` values. Creates `self._groups`, a dictionary keyed by category and values are dictionaries in which the keys represent the group name within the category and values are ordered lists of sample ids If `sort_category` is not None, the sample ids are sorted based on the values under this category in the metadata map. Otherwise, they are sorted using the sample id. Parameters ---------- trajectory_categories : list of str A list of metadata categories to use to create the groups. Default: None, compute all of them sort_category : str or None The category from self._metadata_map to use to sort groups """ # If sort_category is provided, we used the value of such category to # sort. Otherwise, we use the sample id. if sort_category: def sort_val(sid): return self._metadata_map[sort_category][sid] else: def sort_val(sid): return sid self._groups = defaultdict(dict) for cat in trajectory_categories: # Group samples by category gb = self._metadata_map.groupby(cat) for g, df in gb: self._groups[cat][g] = realsorted(df.index, key=sort_val) def _get_group_trajectories(self, group_name, sids): r"""Compute trajectory results for `group_name` containing the samples `sids`. Weights the data if `self._weighted` is True and ``len(sids) > 1`` Parameters ---------- group_name : str The name of the group sids : list of str The sample ids in the group Returns ------- GroupResults The trajectory results for the given group Raises ------ RuntimeError If sids is an empty list """ # We multiply the coord values with the prop_expl trajectories = self._coords.loc[sids] * self._prop_expl if trajectories.empty: # Raising a RuntimeError since in a usual execution this should # never happen. The only way this can happen is if the user # directly calls this method, which shouldn't be done # (that's why the method is private) raise RuntimeError( "No samples to process, an empty list cannot " "be processed" ) # The weighting can only be done over trajectories with a length # greater than 1 if self._weighted and len(sids) > 1: trajectories_copy = deepcopy(trajectories) try: trajectories = _weight_by_vector( trajectories_copy, self._weighting_vector[sids] ) except (FloatingPointError, ValueError): self._message_buffer.append( "Could not weight group, no " "gradient in the the " "weighting vector.\n" ) trajectories = trajectories_copy return self._compute_trajectories_results(group_name, trajectories.loc[sids]) def _compute_trajectories_results(self, group_name, trajectories): r"""Do the actual trajectories computation over trajectories. Parameters ---------- group_name : str The name of the group trajectories : pandas.DataFrame The sorted trajectories for each sample in the group Raises ------ NotImplementedError This is the base class """ raise NotImplementedError("No algorithm is implemented on the base " "class.") class AverageGradientANOVA(GradientANOVA): r"""Perform trajectory analysis using the RMS average algorithm. For each group in a category, it computes the average point among the samples in such group and then computes the norm of each sample from the averaged one. See Also -------- GradientANOVA """ _alg_name = "avg" def _compute_trajectories_results(self, group_name, trajectories): r"""Do the actual trajectory computation over trajectories. Parameters ---------- group_name : str The name of the group trajectories : pandas.DataFrame The sorted trajectories for each sample in the group Returns ------- GroupResults The trajectory results for `group_name` using the average trajectories method """ center = np.average(trajectories, axis=0) if len(trajectories) == 1: trajectory = np.array([np.linalg.norm(center)]) calc = {"avg": trajectory[0]} else: trajectory = np.array( [ np.linalg.norm(row[1].to_numpy() - center) for row in trajectories.iterrows() ] ) calc = {"avg": np.average(trajectory)} msg = "".join(self._message_buffer) if self._message_buffer else None # Reset the message buffer self._message_buffer = [] return GroupResults(group_name, trajectory, np.mean(trajectory), calc, msg) class TrajectoryGradientANOVA(GradientANOVA): r"""Perform trajectory analysis using the RMS trajectory algorithm. For each group in a category, each component of the result trajectory is computed as taking the sorted list of samples in the group and taking the norm of the coordinates of the 2nd sample minus 1st sample, 3rd sample minus 2nd sample and so on. See Also -------- GradientANOVA """ _alg_name = "trajectory" def _compute_trajectories_results(self, group_name, trajectories): r"""Do the actual trajectory computation over trajectories. Parameters ---------- group_name : str The name of the group trajectories : pandas.DataFrame The sorted trajectories for each sample in the group Returns ------- GroupResults The trajectory results for `group_name` using the trajectory method """ if len(trajectories) == 1: trajectory = np.array([np.linalg.norm(trajectories)]) calc = {"2-norm": trajectory[0]} else: # Loop through all the rows in trajectories and create '2-norm' # by taking the norm of the 2nd row - 1st row, 3rd row - 2nd row... trajectory = np.array( [ np.linalg.norm( trajectories.iloc[i + 1].to_numpy() - trajectories.iloc[i].to_numpy() ) for i in range(len(trajectories) - 1) ] ) calc = {"2-norm": np.linalg.norm(trajectory)} msg = "".join(self._message_buffer) if self._message_buffer else None # Reset the message buffer self._message_buffer = [] return GroupResults(group_name, trajectory, np.mean(trajectory), calc, msg) class FirstDifferenceGradientANOVA(GradientANOVA): r"""Perform trajectory analysis using the first difference algorithm. It calculates the norm for all the time-points and then calculates the first difference for each resulting point See Also -------- GradientANOVA """ _alg_name = "diff" def _compute_trajectories_results(self, group_name, trajectories): r"""Do the actual trajectory computation over trajectories. Parameters ---------- group_name : str The name of the group trajectories : pandas.DataFrame The sorted trajectories for each sample in the group Returns ------- GroupResults The trajectory results for `group_name` using the first difference method """ if len(trajectories) == 1: trajectory = np.array([np.linalg.norm(trajectories)]) calc = {"mean": trajectory[0], "std": 0} elif len(trajectories) == 2: trajectory = np.array([np.linalg.norm(trajectories[1] - trajectories[0])]) calc = {"mean": trajectory[0], "std": 0} else: vec_norm = np.array( [ np.linalg.norm( trajectories.iloc[i + 1].to_numpy() - trajectories.iloc[i].to_numpy() ) for i in range(len(trajectories) - 1) ] ) trajectory = np.diff(vec_norm) calc = {"mean": np.mean(trajectory), "std": np.std(trajectory)} msg = "".join(self._message_buffer) if self._message_buffer else None # Reset the message buffer self._message_buffer = [] return GroupResults(group_name, trajectory, np.mean(trajectory), calc, msg) class WindowDifferenceGradientANOVA(GradientANOVA): r"""Perform trajectory analysis using the modified first difference algorithm. It calculates the norm for all the time-points and subtracts the mean of the next number of elements specified in `window_size` and the current element. Parameters ---------- coords : pandas.DataFrame The coordinates for each sample id prop_expl : array like The numpy 1-D array with the proportion explained by each axis in coords metadata_map : pandas.DataFrame The metadata map, indexed by sample ids and columns are metadata categories window_size : int or long The window size to use while computing the differences Raises ------ ValueError If the window_size is not a positive integer See Also -------- GradientANOVA """ _alg_name = "wdiff" def __init__(self, coords, prop_expl, metadata_map, window_size, **kwargs): """Build all the attributes for the WindowDifferenceGradientANOVA object.""" super(WindowDifferenceGradientANOVA, self).__init__( coords, prop_expl, metadata_map, **kwargs ) if not isinstance(window_size, Integral) or window_size < 1: raise ValueError("The window_size must be a positive integer") self._window_size = window_size def _compute_trajectories_results(self, group_name, trajectories): r"""Do the actual trajectory computation over trajectories. If the first difference cannot be calculated of the provided window size, no difference is applied and a message is added to the results. Parameters ---------- group_name : str The name of the group trajectories : pandas.DataFrame The sorted trajectories for each sample in the group Returns ------- GroupResults The trajectory results for `group_name` using the windowed difference method """ if len(trajectories) == 1: trajectory = np.array([np.linalg.norm(trajectories)]) calc = {"mean": trajectory, "std": 0} elif len(trajectories) == 2: trajectory = np.array([np.linalg.norm(trajectories[1] - trajectories[0])]) calc = {"mean": trajectory, "std": 0} else: vec_norm = np.array( [ np.linalg.norm( trajectories.iloc[i + 1].to_numpy() - trajectories.iloc[i].to_numpy() ) for i in range(len(trajectories) - 1) ] ) # windowed first differences won't be able on every group, # specially given the variation of size that a trajectory tends # to have if len(vec_norm) <= self._window_size: trajectory = vec_norm self._message_buffer.append( "Cannot calculate the first " "difference with a window of size " "(%d)." % self._window_size ) else: # Replicate the last element as many times as required for idx in range(0, self._window_size): vec_norm = np.append(vec_norm, vec_norm[-1:], axis=0) trajectory = [] for idx in range(0, len(vec_norm) - self._window_size): # Meas has to be over axis 0 so it handles arrays of arrays element = np.mean( vec_norm[(idx + 1) : (idx + 1 + self._window_size)], axis=0 ) trajectory.append(element - vec_norm[idx]) trajectory = np.array(trajectory) calc = {"mean": np.mean(trajectory), "std": np.std(trajectory)} msg = "".join(self._message_buffer) if self._message_buffer else None # Reset the message buffer self._message_buffer = [] return GroupResults(group_name, trajectory, np.mean(trajectory), calc, msg) scikit-bio-0.6.2/skbio/stats/ordination/000077500000000000000000000000001464262511300201735ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/stats/ordination/__init__.py000066400000000000000000000117151464262511300223110ustar00rootroot00000000000000r"""Ordination methods (:mod:`skbio.stats.ordination`) ================================================== .. currentmodule:: skbio.stats.ordination This module provides functions for ordination -- a category of methods that aim at arranging data so that similar data points are proximate to each other. Ordination can preserve and represent the structure of high-dimensional data within a low-dimensional space, thereby facilitating visual exploration and statistical analysis. Mathematically, ordination shares similarities with, and is in multiple respects equivalent to, embedding and dimensionality reduction. While all three aim to represent high-dimensional data in a lower-dimensional space, the term "ordination" is mainly used in the field of ecology to reveal patterns such as groups or gradients underlying community data. However, the ordination methods implemented in scikit-bio are versatile, serving not only ecological studies but also broader applications in scientific computing. Multidimensional scaling ------------------------ .. autosummary:: :toctree: pcoa pcoa_biplot Correspondence analysis ----------------------- .. autosummary:: :toctree: ca Canonical analysis ------------------ .. autosummary:: :toctree: cca rda Ordination results ------------------ .. autosummary:: :toctree: OrdinationResults Utility functions ----------------- .. autosummary:: :toctree: mean_and_std corr scale svd_rank e_matrix f_matrix Examples -------- This is an artificial dataset (table 11.3 in [1]_) that represents fish abundance in different sites (`Y`, the response variables) and environmental variables (`X`, the explanatory variables). >>> import numpy as np >>> import pandas as pd First we need to construct our explanatory variable dataset `X`. >>> X = np.array([[1.0, 0.0, 1.0, 0.0], ... [2.0, 0.0, 1.0, 0.0], ... [3.0, 0.0, 1.0, 0.0], ... [4.0, 0.0, 0.0, 1.0], ... [5.0, 1.0, 0.0, 0.0], ... [6.0, 0.0, 0.0, 1.0], ... [7.0, 1.0, 0.0, 0.0], ... [8.0, 0.0, 0.0, 1.0], ... [9.0, 1.0, 0.0, 0.0], ... [10.0, 0.0, 0.0, 1.0]]) >>> transects = ['depth', 'substrate_coral', 'substrate_sand', ... 'substrate_other'] >>> sites = ['site1', 'site2', 'site3', 'site4', 'site5', 'site6', 'site7', ... 'site8', 'site9', 'site10'] >>> X = pd.DataFrame(X, sites, transects) Then we need to create a dataframe with the information about the species observed at different sites. >>> species = ['specie1', 'specie2', 'specie3', 'specie4', 'specie5', ... 'specie6', 'specie7', 'specie8', 'specie9'] >>> Y = np.array([[1, 0, 0, 0, 0, 0, 2, 4, 4], ... [0, 0, 0, 0, 0, 0, 5, 6, 1], ... [0, 1, 0, 0, 0, 0, 0, 2, 3], ... [11, 4, 0, 0, 8, 1, 6, 2, 0], ... [11, 5, 17, 7, 0, 0, 6, 6, 2], ... [9, 6, 0, 0, 6, 2, 10, 1, 4], ... [9, 7, 13, 10, 0, 0, 4, 5, 4], ... [7, 8, 0, 0, 4, 3, 6, 6, 4], ... [7, 9, 10, 13, 0, 0, 6, 2, 0], ... [5, 10, 0, 0, 2, 4, 0, 1, 3]]) >>> Y = pd.DataFrame(Y, sites, species) We can now perform canonical correspondence analysis. Matrix `X` contains a continuous variable (depth) and a categorical one (substrate type) encoded using a one-hot encoding. >>> from skbio.stats.ordination import cca We explicitly need to avoid perfect collinearity, so we'll drop one of the substrate types (the last column of `X`). >>> del X['substrate_other'] >>> ordination_result = cca(Y, X, scaling=2) Exploring the results we see that the first three axes explain about 80% of all the variance. >>> ordination_result.proportion_explained CCA1 0.466911 CCA2 0.238327 CCA3 0.100548 CCA4 0.104937 CCA5 0.044805 CCA6 0.029747 CCA7 0.012631 CCA8 0.001562 CCA9 0.000532 dtype: float64 References ---------- .. [1] Legendre P. and Legendre L. 1998. Numerical Ecology. Elsevier, Amsterdam. """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from ._redundancy_analysis import rda from ._correspondence_analysis import ca from ._canonical_correspondence_analysis import cca from ._principal_coordinate_analysis import pcoa, pcoa_biplot from ._ordination_results import OrdinationResults from ._utils import ( mean_and_std, scale, svd_rank, corr, e_matrix, f_matrix, center_distance_matrix, ) __all__ = [ "ca", "rda", "cca", "pcoa", "pcoa_biplot", "OrdinationResults", "mean_and_std", "scale", "svd_rank", "corr", "e_matrix", "f_matrix", "center_distance_matrix", ] scikit-bio-0.6.2/skbio/stats/ordination/_canonical_correspondence_analysis.py000066400000000000000000000176711464262511300276430ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np import pandas as pd from scipy.linalg import svd, lstsq from ._ordination_results import OrdinationResults from ._utils import corr, svd_rank, scale def cca(y, x, scaling=1): r"""Compute canonical (also known as constrained) correspondence analysis. Canonical (or constrained) correspondence analysis is a multivariate ordination technique. It appeared in community ecology [1]_ and relates community composition to the variation in the environment (or in other factors). It works from data on abundances or counts of samples and constraints variables, and outputs ordination axes that maximize sample separation among species. It is better suited to extract the niches of taxa than linear multivariate methods because it assumes unimodal response curves (habitat preferences are often unimodal functions of habitat variables [2]_). As more environmental variables are added, the result gets more similar to unconstrained ordination, so only the variables that are deemed explanatory should be included in the analysis. Parameters ---------- y : DataFrame Samples by features table (n, m) x : DataFrame Samples by constraints table (n, q) scaling : int, {1, 2}, optional Scaling type 1 maintains :math:`\chi^2` distances between rows. Scaling type 2 preserves :math:`\chi^2` distances between columns. For a more detailed explanation of the interpretation, check Legendre & Legendre 1998, section 9.4.3. Returns ------- OrdinationResults Object that stores the cca results. Raises ------ ValueError If `x` and `y` have different number of rows If `y` contains negative values If `y` contains a row of only 0's. NotImplementedError If scaling is not 1 or 2. See Also -------- ca rda OrdinationResults Notes ----- The algorithm is based on [3]_, \S 11.2, and is expected to give the same results as ``cca(y, x)`` in R's package vegan, except that this implementation won't drop constraining variables due to perfect collinearity: the user needs to choose which ones to input. Canonical *correspondence* analysis shouldn't be confused with canonical *correlation* analysis (CCorA, but sometimes called CCA), a different technique to search for multivariate relationships between two datasets. Canonical correlation analysis is a statistical tool that, given two vectors of random variables, finds linear combinations that have maximum correlation with each other. In some sense, it assumes linear responses of "species" to "environmental variables" and is not well suited to analyze ecological data. References ---------- .. [1] Cajo J. F. Ter Braak, "Canonical Correspondence Analysis: A New Eigenvector Technique for Multivariate Direct Gradient Analysis", Ecology 67.5 (1986), pp. 1167-1179. .. [2] Cajo J.F. Braak and Piet F.M. Verdonschot, "Canonical correspondence analysis and related multivariate methods in aquatic ecology", Aquatic Sciences 57.3 (1995), pp. 255-289. .. [3] Legendre P. and Legendre L. 1998. Numerical Ecology. Elsevier, Amsterdam. """ Y = y.values X = x.values # Perform parameter sanity checks if X.shape[0] != Y.shape[0]: raise ValueError( "The samples by features table 'y' and the samples by" " constraints table 'x' must have the same number of " " rows. 'y': {0} 'x': {1}".format(X.shape[0], Y.shape[0]) ) if Y.min() < 0: raise ValueError("The samples by features table 'y' must be nonnegative") row_max = Y.max(axis=1) if np.any(row_max <= 0): # Or else the lstsq call to compute Y_hat breaks raise ValueError( "The samples by features table 'y' cannot contain a " "row with only 0's" ) if scaling not in {1, 2}: raise NotImplementedError("Scaling {0} not implemented.".format(scaling)) # Step 1 (similar to Pearson chi-square statistic) grand_total = Y.sum() Q = Y / grand_total # Relative frequencies of Y (contingency table) # Features and sample weights (marginal totals) column_marginals = Q.sum(axis=0) row_marginals = Q.sum(axis=1) # Formula 9.32 in Lagrange & Lagrange (1998). Notice that it's an # scaled version of the contribution of each cell towards Pearson # chi-square statistic. expected = np.outer(row_marginals, column_marginals) Q_bar = (Q - expected) / np.sqrt(expected) # Step 2. Standardize columns of X with respect to sample weights, # using the maximum likelihood variance estimator (Legendre & # Legendre 1998, p. 595) X = scale(X, weights=row_marginals, ddof=0) # Step 3. Weighted multiple regression. X_weighted = row_marginals[:, None] ** 0.5 * X B, _, rank_lstsq, _ = lstsq(X_weighted, Q_bar) Y_hat = X_weighted.dot(B) Y_res = Q_bar - Y_hat # Step 4. Eigenvalue decomposition u, s, vt = svd(Y_hat, full_matrices=False) rank = svd_rank(Y_hat.shape, s) s = s[:rank] u = u[:, :rank] vt = vt[:rank] U = vt.T # Step 5. Eq. 9.38 U_hat = Q_bar.dot(U) * s**-1 # Residuals analysis u_res, s_res, vt_res = svd(Y_res, full_matrices=False) rank = svd_rank(Y_res.shape, s_res) s_res = s_res[:rank] u_res = u_res[:, :rank] vt_res = vt_res[:rank] U_res = vt_res.T U_hat_res = Y_res.dot(U_res) * s_res**-1 eigenvalues = np.r_[s, s_res] ** 2 # Scalings (p. 596 L&L 1998): # feature scores, scaling 1 V = (column_marginals**-0.5)[:, None] * U # sample scores, scaling 2 V_hat = (row_marginals**-0.5)[:, None] * U_hat # sample scores, scaling 1 F = V_hat * s # feature scores, scaling 2 F_hat = V * s # Sample scores which are linear combinations of constraint # variables Z_scaling1 = (row_marginals**-0.5)[:, None] * Y_hat.dot(U) Z_scaling2 = Z_scaling1 * s**-1 # Feature residual scores, scaling 1 V_res = (column_marginals**-0.5)[:, None] * U_res # Sample residual scores, scaling 2 V_hat_res = (row_marginals**-0.5)[:, None] * U_hat_res # Sample residual scores, scaling 1 F_res = V_hat_res * s_res # Feature residual scores, scaling 2 F_hat_res = V_res * s_res eigvals = eigenvalues if scaling == 1: features_scores = np.hstack((V, V_res)) sample_scores = np.hstack((F, F_res)) sample_constraints = np.hstack((Z_scaling1, F_res)) elif scaling == 2: features_scores = np.hstack((F_hat, F_hat_res)) sample_scores = np.hstack((V_hat, V_hat_res)) sample_constraints = np.hstack((Z_scaling2, V_hat_res)) biplot_scores = corr(X_weighted, u) pc_ids = ["CCA%d" % (i + 1) for i in range(len(eigenvalues))] sample_ids = y.index feature_ids = y.columns eigvals = pd.Series(eigenvalues, index=pc_ids) samples = pd.DataFrame(sample_scores, columns=pc_ids, index=sample_ids) features = pd.DataFrame(features_scores, columns=pc_ids, index=feature_ids) biplot_scores = pd.DataFrame( biplot_scores, index=x.columns, columns=pc_ids[: biplot_scores.shape[1]] ) sample_constraints = pd.DataFrame( sample_constraints, index=sample_ids, columns=pc_ids ) return OrdinationResults( "CCA", "Canonical Correspondence Analysis", eigvals, samples, features=features, biplot_scores=biplot_scores, sample_constraints=sample_constraints, proportion_explained=eigvals / eigvals.sum(), ) scikit-bio-0.6.2/skbio/stats/ordination/_correspondence_analysis.py000066400000000000000000000167471464262511300256370ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np import pandas as pd from scipy.linalg import svd from ._ordination_results import OrdinationResults from ._utils import svd_rank def ca(X, scaling=1): r"""Compute correspondence analysis. Correspondence analysis is a multivariate statistical technique for ordination. In general, rows in the data table will correspond to samples and columns to features, but the method is symmetric. In order to measure the correspondence between rows and columns, the :math:`\chi^2` distance is used, and those distances are preserved in the transformed space. The :math:`\chi^2` distance doesn't take double zeros into account, and so it is expected to produce better ordination that PCA when the data has lots of zero values. It is related to Principal Component Analysis (PCA) but it should be preferred in the case of steep or long gradients, that is, when there are many zeros in the input data matrix. Parameters ---------- X : pd.DataFrame Samples by features table (n, m). It can be applied to different kinds of data tables but data must be non-negative and dimensionally homogeneous (quantitative or binary). The rows correspond to the samples and the columns correspond to the features. scaling : {1, 2} For a more detailed explanation of the interpretation, check Legendre & Legendre 1998, section 9.4.3. The notes that follow are quick recommendations. Scaling type 1 maintains :math:`\chi^2` distances between rows (samples): in the transformed space, the euclidean distances between rows are equal to the :math:`\chi^2` distances between rows in the original space. It should be used when studying the ordination of samples. Rows (samples) that are near a column (features) have high contributions from it. Scaling type 2 preserves :math:`\chi^2` distances between columns (features), so euclidean distance between columns after transformation is equal to :math:`\chi^2` distance between columns in the original space. It is best used when we are interested in the ordination of features. A column (features) that is next to a row (sample) means that it is more abundant there. Other types of scalings are currently not implemented, as they're less used by ecologists (Legendre & Legendre 1998, p. 456). In general, features appearing far from the center of the biplot and far from its edges will probably exhibit better relationships than features either in the center (may be multimodal features, not related to the shown ordination axes...) or the edges (sparse features...). Returns ------- OrdinationResults Object that stores the computed eigenvalues, the transformed sample coordinates, the transformed features coordinates and the proportion explained. Raises ------ NotImplementedError If the scaling value is not either `1` or `2`. ValueError If any of the input matrix elements are negative. See Also -------- cca rda OrdinationResults Notes ----- The algorithm is based on [1]_, \S 9.4.1., and is expected to give the same results as ``cca(X)`` in R's package vegan. References ---------- .. [1] Legendre P. and Legendre L. 1998. Numerical Ecology. Elsevier, Amsterdam. """ if scaling not in {1, 2}: raise NotImplementedError("Scaling {0} not implemented.".format(scaling)) short_method_name = "CA" long_method_name = "Correspondance Analysis" # we deconstruct the dataframe to avoid duplicating the data and be able # to perform operations on the matrix row_ids = X.index column_ids = X.columns X = np.asarray(X.values, dtype=np.float64) # Correspondance Analysis r, c = X.shape if X.min() < 0: raise ValueError("Input matrix elements must be non-negative.") # Step 1 (similar to Pearson chi-square statistic) grand_total = X.sum() Q = X / grand_total column_marginals = Q.sum(axis=0) row_marginals = Q.sum(axis=1) # Formula 9.32 in Lagrange & Lagrange (1998). Notice that it's # an scaled version of the contribution of each cell towards # Pearson chi-square statistic. expected = np.outer(row_marginals, column_marginals) Q_bar = (Q - expected) / np.sqrt(expected) # Eq. 9.32 # Step 2 (Singular Value Decomposition) U_hat, W, Ut = svd(Q_bar, full_matrices=False) # Due to the centering, there are at most min(r, c) - 1 non-zero # eigenvalues (which are all positive) rank = svd_rank(Q_bar.shape, W) assert rank <= min(r, c) - 1 U_hat = U_hat[:, :rank] W = W[:rank] U = Ut[:rank].T # Both scalings are a bit intertwined, so we'll compute both and # then choose V = column_marginals[:, None] ** -0.5 * U V_hat = row_marginals[:, None] ** -0.5 * U_hat F = V_hat * W # According to Formula 9.43, this should hold # assert np.allclose(F, (row_marginals**-1)[:, None] * Q.dot(V)) # but it doesn't (notice that W**2==Lambda): # (9.43a) F = V_hat W = D(p_i+)^{-1/2} U_hat W # = D(p_i+)^{-1/2} Q_bar U W^{-1} W (substituting 9.38) # = D(p_i+)^{-1/2} Q_bar U # (9.43b) F = D(p_i+)^{-1} Q V # = D(p_i+)^{-1} Q D(p_+j)^{-1/2} U (substituting 9.41) # = D(p_i+)^{-1/2} D(p_i+)^{-1/2} Q D(p_+j)^{-1/2} U # = D(p_i+)^{-1/2} Q_tilde U (using 9.40) # It holds if we replace Q in 9.43b with Q after centering, ie # assert np.allclose( # F, # (row_marginals**-1)[:, None] * (Q - expected).dot(V)) # Comparing results with vegan and the examples in the book, 9.43a # is the right one. The same issue happens in 9.44, where also # 9.44a is the one that matches vegan's output. # (9.44a) F_hat = V W = D(p_+j)^{-1/2} U W # = D(p_+j)^{-1/2} Q_bar' U_hat W^{-1} W (using 9.39) # = D(p_+j)^{-1/2} Q_bar' U_hat # (9.44b) F_hat = D(p_+j)^{-1} Q' V_hat # = D(p_+j)^{-1/2} Q_tilde' U_hat (using 9.40 and 9.42) F_hat = V * W # Eigenvalues eigvals = W**2 # features scores features_scores = [V, F_hat][scaling - 1] # sample scores (weighted averages of features scores) sample_scores = [F, V_hat][scaling - 1] # build the OrdinationResults object sample_columns = [ "%s%d" % (short_method_name, i + 1) for i in range(sample_scores.shape[1]) ] feature_columns = [ "%s%d" % (short_method_name, i + 1) for i in range(features_scores.shape[1]) ] eigvals = pd.Series( eigvals, ["%s%d" % (short_method_name, i + 1) for i in range(eigvals.shape[0])] ) samples = pd.DataFrame(sample_scores, row_ids, sample_columns) features = pd.DataFrame(features_scores, column_ids, feature_columns) proportion_explained = eigvals / eigvals.sum() return OrdinationResults( short_method_name, long_method_name, eigvals, samples=samples, features=features, proportion_explained=proportion_explained, ) scikit-bio-0.6.2/skbio/stats/ordination/_cutils.pyx000066400000000000000000000121021464262511300223730ustar00rootroot00000000000000# ----------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ----------------------------------------------------------------------------- import numpy as np cimport cython from cython.parallel import prange from cpython.mem cimport PyMem_Malloc, PyMem_Realloc, PyMem_Free ctypedef fused TReal: float double @cython.boundscheck(False) @cython.wraparound(False) def e_matrix_means_cy(TReal[:, ::1] mat, TReal[:, ::1] centered, TReal[::1] row_means): """ Compute E matrix from a distance matrix, and also compute the means in the process. Squares and divides by -2 the input elementwise. Eq. 9.20 in Legendre & Legendre 1998. Parameters ---------- mat : 2D array_like Distance matrix. centered : 2D array_like Output, E matrix. Must be pre-allocated and same shape as mat. Can point to mat (i.e. in-place) row_means : 1D_array_like Output, Mean values of each row in `centered` Returns ------- global_mean : real Global mean value """ cdef Py_ssize_t n_samples = mat.shape[0] cdef Py_ssize_t d2 = mat.shape[1] cdef Py_ssize_t d3 = centered.shape[1] cdef Py_ssize_t d4 = centered.shape[1] cdef Py_ssize_t d5 = row_means.shape[0] assert n_samples == d2 assert n_samples == d3 assert n_samples == d4 assert n_samples == d5 cdef Py_ssize_t row,col cdef long double row_sum cdef TReal el0 cdef long double global_sum = 0.0 for row in prange(n_samples, nogil=True): row_sum = 0.0 for col in range(n_samples): el0 = mat[row,col] el0 = -0.5*el0*el0 centered[row,col] = el0 # Note: do not use +=, so it is not flagged as a global reduction row_sum = row_sum + el0 global_sum += row_sum row_means[row] = row_sum/n_samples cdef TReal global_mean = (global_sum/n_samples)/n_samples return global_mean @cython.boundscheck(False) @cython.wraparound(False) def f_matrix_inplace_cy(TReal[::1] row_means, TReal global_mean, TReal[:, ::1] centered): """ Compute F matrix from E matrix inplace. Centering step: for each element, the mean of the corresponding row and column are subtracted, and the mean of the whole matrix is added. Eq. 9.21 in Legendre & Legendre 1998. Modified from :func:`skbio.stats.ordination.f_matrix_inplace` function, Parameters ---------- row_means : 1D_array_like Mean values of each row in `centered` global_mean : real Global mean value in `centered` centered : 2D array_like, must be symmetric In, a matrix representing the "E matrix" as described above. Out, the centered matrix """ cdef Py_ssize_t n_samples = centered.shape[0] cdef Py_ssize_t d2 = centered.shape[1] cdef Py_ssize_t d3 = row_means.shape[0] assert n_samples == d2 assert n_samples == d3 cdef Py_ssize_t trow,tcol,row,col cdef Py_ssize_t trow_max,tcol_max cdef TReal gr_mean # use a tiled pattern to maximize locality of row_means for trow in prange(0, n_samples, 24, nogil=True): trow_max = min(trow+24, n_samples) for tcol in range(0, n_samples, 24): tcol_max = min(tcol+24, n_samples) for row in range(trow, trow_max, 1): gr_mean = global_mean - row_means[row] for col in range(tcol, tcol_max, 1): # Note: do not use +=, so it is not flagged as a global reduction centered[row,col] = centered[row,col] + (gr_mean - row_means[col]) @cython.boundscheck(False) @cython.wraparound(False) def center_distance_matrix_cy(TReal[:, ::1] mat, TReal[:, ::1] centered): """ Centers a distance matrix. Note: If the used distance was euclidean, pairwise distances needn't be computed from the data table Y because F_matrix = Y.dot(Y.T) (if Y has been centered). But since we're expecting distance_matrix to be non-euclidian, we do the following computation as per Numerical Ecology (Legendre & Legendre 1998). Parameters ---------- mat : 2D array_like Distance matrix. centered : 2D array_like Output centered matrix. Must be pre-allocated and same shape as mat. Can point to mat (i.e. in-place) """ cdef Py_ssize_t n_samples = mat.shape[0] cdef Py_ssize_t d2 = mat.shape[1] cdef Py_ssize_t d3 = centered.shape[1] cdef Py_ssize_t d4 = centered.shape[1] assert n_samples == d2 assert n_samples == d3 assert n_samples == d4 cdef TReal global_mean if TReal is float: dtype_real = np.float32 else: dtype_real = np.float64 row_means_np = np.zeros((n_samples,), dtype=dtype_real) cdef TReal[::1] row_means = row_means_np global_mean = e_matrix_means_cy(mat, centered, row_means) f_matrix_inplace_cy(row_means, global_mean, centered) scikit-bio-0.6.2/skbio/stats/ordination/_ordination_results.py000066400000000000000000000407241464262511300246420ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import functools import numpy as np from skbio._base import SkbioObject from skbio.stats._misc import _pprint_strs from skbio.util._plotting import PlottableMixin class OrdinationResults(SkbioObject, PlottableMixin): """Store ordination results, providing serialization and plotting support. Stores various components of ordination results. Provides methods for serializing/deserializing results, as well as generation of basic matplotlib 3-D scatterplots. Will automatically display PNG/SVG representations of itself within the IPython Notebook. Attributes ---------- short_method_name : str Abbreviated ordination method name. long_method_name : str Ordination method name. eigvals : pd.Series The resulting eigenvalues. The index corresponds to the ordination axis labels samples : pd.DataFrame The position of the samples in the ordination space, row-indexed by the sample id. features : pd.DataFrame The position of the features in the ordination space, row-indexed by the feature id. biplot_scores : pd.DataFrame Correlation coefficients of the samples with respect to the features. sample_constraints : pd.DataFrame Site constraints (linear combinations of constraining variables): coordinates of the sites in the space of the explanatory variables X. These are the fitted site scores proportion_explained : pd.Series Proportion explained by each of the dimensions in the ordination space. The index corresponds to the ordination axis labels See Also -------- ca cca pcoa rda """ default_write_format = "ordination" def __init__( self, short_method_name, long_method_name, eigvals, samples, features=None, biplot_scores=None, sample_constraints=None, proportion_explained=None, ): self.short_method_name = short_method_name self.long_method_name = long_method_name self.eigvals = eigvals self.samples = samples self.features = features self.biplot_scores = biplot_scores self.sample_constraints = sample_constraints self.proportion_explained = proportion_explained def __str__(self): """Return a string representation of the ordination results. String representation lists ordination results attributes and indicates whether or not they are present. If an attribute is present, its dimensions are listed. A truncated list of features and sample IDs are included (if they are present). Returns ------- str String representation of the ordination results. """ lines = ["Ordination results:"] method = "%s (%s)" % (self.long_method_name, self.short_method_name) lines.append(self._format_attribute(method, "Method", str)) attrs = [ (self.eigvals, "Eigvals"), (self.proportion_explained, "Proportion explained"), (self.features, "Features"), (self.samples, "Samples"), (self.biplot_scores, "Biplot Scores"), (self.sample_constraints, "Sample constraints"), ] for attr, attr_label in attrs: def formatter(e): return "x".join(["%d" % s for s in e.shape]) lines.append(self._format_attribute(attr, attr_label, formatter)) lines.append( self._format_attribute( self.features, "Feature IDs", lambda e: _pprint_strs(e.index.tolist()) ) ) lines.append( self._format_attribute( self.samples, "Sample IDs", lambda e: _pprint_strs(e.index.tolist()) ) ) return "\n".join(lines) def plot( self, df=None, column=None, axes=(0, 1, 2), axis_labels=None, title="", cmap=None, s=20, ): """Create a 3-D scatterplot of ordination results colored by metadata. Creates a 3-D scatterplot of the ordination results, where each point represents a sample. Optionally, these points can be colored by metadata (see `df` and `column` below). Parameters ---------- df : pd.DataFrame, optional ``DataFrame`` containing sample metadata. Must be indexed by sample ID, and all sample IDs in the ordination results must exist in the ``DataFrame``. If ``None``, samples (i.e., points) will not be colored by metadata. column : str, optional Column name in `df` to color samples (i.e., points in the plot) by. Cannot have missing data (i.e., ``np.nan``). `column` can be numeric or categorical. If numeric, all values in the column will be cast to ``float`` and mapped to colors using `cmap`. A colorbar will be included to serve as a legend. If categorical (i.e., not all values in `column` could be cast to ``float``), colors will be chosen for each category using evenly-spaced points along `cmap`. A legend will be included. If ``None``, samples (i.e., points) will not be colored by metadata. axes : iterable of int, optional Indices of sample coordinates to plot on the x-, y-, and z-axes. For example, if plotting PCoA results, ``axes=(0, 1, 2)`` will plot PC 1 on the x-axis, PC 2 on the y-axis, and PC 3 on the z-axis. Must contain exactly three elements. axis_labels : iterable of str, optional Labels for the x-, y-, and z-axes. If ``None``, labels will be the values of `axes` cast as strings. title : str, optional Plot title. cmap : str or matplotlib.colors.Colormap, optional Name or instance of matplotlib colormap to use for mapping `column` values to colors. If ``None``, defaults to the colormap specified in the matplotlib rc file. Qualitative colormaps (e.g., ``Set1``) are recommended for categorical data, while sequential colormaps (e.g., ``Greys``) are recommended for numeric data. See [1]_ for these colormap classifications. s : scalar or iterable of scalars, optional Size of points. See matplotlib's ``Axes3D.scatter`` documentation for more details. Returns ------- matplotlib.figure.Figure Figure containing the scatterplot and legend/colorbar if metadata were provided. Raises ------ ValueError Raised on invalid input, including the following situations: - there are not at least three dimensions to plot - there are not exactly three values in `axes`, they are not unique, or are out of range - there are not exactly three values in `axis_labels` - either `df` or `column` is provided without the other - `column` is not in the ``DataFrame`` - sample IDs in the ordination results are not in `df` or have missing data in `column` Notes ----- This method creates basic plots of ordination results, and is intended to provide a quick look at the results in the context of metadata (e.g., from within the Jupyter Lab). For more customization and to generate publication-quality figures, we recommend EMPeror [2]_. References ---------- .. [1] http://matplotlib.org/examples/color/colormaps_reference.html .. [2] EMPeror: a tool for visualizing high-throughput microbial community data. Vazquez-Baeza Y, Pirrung M, Gonzalez A, Knight R. Gigascience. 2013 Nov 26;2(1):16. http://biocore.github.io/emperor/ Examples -------- .. plot:: Define a distance matrix with four samples labelled A-D: >>> from skbio import DistanceMatrix >>> dm = DistanceMatrix([[0., 0.21712454, 0.5007512, 0.91769271], ... [0.21712454, 0., 0.45995501, 0.80332382], ... [0.5007512, 0.45995501, 0., 0.65463348], ... [0.91769271, 0.80332382, 0.65463348, 0.]], ... ['A', 'B', 'C', 'D']) Define metadata for each sample in a ``pandas.DataFrame``: >>> import pandas as pd >>> metadata = { ... 'A': {'body_site': 'skin'}, ... 'B': {'body_site': 'gut'}, ... 'C': {'body_site': 'gut'}, ... 'D': {'body_site': 'skin'}} >>> df = pd.DataFrame.from_dict(metadata, orient='index') Run principal coordinate analysis (PCoA) on the distance matrix: >>> from skbio.stats.ordination import pcoa >>> pcoa_results = pcoa(dm) Plot the ordination results, where each sample is colored by body site (a categorical variable): >>> fig = pcoa_results.plot( ... df=df, column='body_site', ... title='Samples colored by body site', ... cmap='Set1', s=50 ... ) # doctest: +SKIP """ # Note: New features should not be added to this method and should # instead be added to EMPeror (http://biocore.github.io/emperor/). # Only bug fixes and minor updates should be made to this method. self._get_mpl_plt() coord_matrix = self.samples.values.T self._validate_plot_axes(coord_matrix, axes) fig = self.plt.figure() ax = fig.add_subplot(projection="3d") xs = coord_matrix[axes[0]] ys = coord_matrix[axes[1]] zs = coord_matrix[axes[2]] point_colors, category_to_color = self._get_plot_point_colors( df, column, self.samples.index, cmap ) scatter_fn = functools.partial(ax.scatter, xs, ys, zs, s=s) if point_colors is None: plot = scatter_fn() else: plot = scatter_fn(c=point_colors) if axis_labels is None: axis_labels = ["%d" % axis for axis in axes] elif len(axis_labels) != 3: raise ValueError( "axis_labels must contain exactly three elements " "(found %d elements)." % len(axis_labels) ) ax.set_xlabel(axis_labels[0]) ax.set_ylabel(axis_labels[1]) ax.set_zlabel(axis_labels[2]) ax.set_xticklabels([]) ax.set_yticklabels([]) ax.set_zticklabels([]) ax.set_title(title) # create legend/colorbar if point_colors is not None: if category_to_color is None: fig.colorbar(plot) else: self._plot_categorical_legend(ax, category_to_color) return fig def _validate_plot_axes(self, coord_matrix, axes): """Validate `axes` against coordinates matrix.""" num_dims = coord_matrix.shape[0] if num_dims < 3: raise ValueError( "At least three dimensions are required to plot " "ordination results. There are only %d " "dimension(s)." % num_dims ) if len(axes) != 3: raise ValueError( "`axes` must contain exactly three elements " "(found %d elements)." % len(axes) ) if len(set(axes)) != 3: raise ValueError("The values provided for `axes` must be unique.") for idx, axis in enumerate(axes): if axis < 0 or axis >= num_dims: raise ValueError("`axes[%d]` must be >= 0 and < %d." % (idx, num_dims)) def _get_plot_point_colors(self, df, column, ids, cmap): """Return a list of colors for each plot point given a metadata column. If `column` is categorical, additionally returns a dictionary mapping each category (str) to color (used for legend creation). """ if (df is None and column is not None) or (df is not None and column is None): raise ValueError( "Both df and column must be provided, or both " "must be None." ) elif df is None and column is None: point_colors, category_to_color = None, None else: if column not in df: raise ValueError("Column '%s' not in data frame." % column) col_vals = df.reindex(ids, axis=0).loc[:, column] if col_vals.isnull().any(): raise ValueError( "One or more IDs in the ordination results " "are not in the data frame, or there is " "missing data in the data frame's '%s' " "column." % column ) category_to_color = None try: point_colors = col_vals.astype(float) except ValueError: # we have categorical data, so choose a color for each # category, where colors are evenly spaced across the # colormap. # derived from http://stackoverflow.com/a/14887119 categories = col_vals.unique() cmap = self.plt.get_cmap(cmap) category_colors = cmap(np.linspace(0, 1, len(categories))) category_to_color = dict(zip(categories, category_colors)) point_colors = col_vals.apply(lambda x: category_to_color[x]) point_colors = point_colors.tolist() return point_colors, category_to_color def _plot_categorical_legend(self, ax, color_dict): """Add legend to plot using specified mapping of category to color.""" # derived from http://stackoverflow.com/a/20505720 proxies = [] labels = [] for category in color_dict: proxy = self.mpl.lines.Line2D( [0], [0], linestyle="none", c=color_dict[category], marker="o" ) proxies.append(proxy) labels.append(category) # place legend outside of the axes (centered) # derived from http://matplotlib.org/users/legend_guide.html ax.legend( proxies, labels, numpoints=1, loc=6, bbox_to_anchor=(1.05, 0.5), borderaxespad=0.0, ) def _format_attribute(self, attr, attr_label, formatter): if attr is None: formatted_attr = "N/A" else: formatted_attr = formatter(attr) return "\t%s: %s" % (attr_label, formatted_attr) def rename(self, mapper, matrix="samples", strict=True): r"""Rename sample or feature IDs in the data matrix. Parameters ---------- mapper : dict or callable A dictionary or function that maps current IDs to new IDs. matrix : str, optional Specifies which matrix contains the IDs to be renamed. Either "samples" (default) or "features". strict : bool, optional If ``True`` (default), every ID in the matrix must be included in ``mapper``. If ``False``, only the specified IDs will be renamed. Raises ------ ValueError If ``mapper`` does not contain all of the same IDs in the matrix whereas in strict mode. ValueError If renaming features but self does not contain features. ValueError If ``matrix`` is neither "samples" nor "features". """ if matrix not in ("samples", "features"): raise ValueError('Matrix must be either "samples" or "features".') df = getattr(self, matrix) if matrix == "features" and df is None: raise ValueError( "`features` were not provided on the construction of this object." ) if strict and isinstance(mapper, dict) and not set(df.index).issubset(mapper): raise ValueError( "The IDs in mapper do not include all IDs in the %s matrix." % matrix ) df.rename(index=mapper, inplace=True) scikit-bio-0.6.2/skbio/stats/ordination/_principal_coordinate_analysis.py000066400000000000000000000407021464262511300270020ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np import pandas as pd from numpy import dot, hstack from numpy.linalg import qr, svd from numpy.random import standard_normal from scipy.linalg import eigh from warnings import warn from skbio.stats.distance import DistanceMatrix from ._ordination_results import OrdinationResults from ._utils import center_distance_matrix, scale def pcoa(distance_matrix, method="eigh", number_of_dimensions=0, inplace=False): r"""Perform Principal Coordinate Analysis. Principal Coordinate Analysis (PCoA) is a method similar to Principal Components Analysis (PCA) with the difference that PCoA operates on distance matrices, typically with non-euclidian and thus ecologically meaningful distances like UniFrac in microbiome research. In ecology, the euclidean distance preserved by Principal Component Analysis (PCA) is often not a good choice because it deals poorly with double zeros (Species have unimodal distributions along environmental gradients, so if a species is absent from two sites at the same site, it can't be known if an environmental variable is too high in one of them and too low in the other, or too low in both, etc. On the other hand, if an species is present in two sites, that means that the sites are similar.). Note that the returned eigenvectors are not normalized to unit length. Parameters ---------- distance_matrix : DistanceMatrix A distance matrix. method : str, optional Eigendecomposition method to use in performing PCoA. By default, uses SciPy's `eigh`, which computes exact eigenvectors and eigenvalues for all dimensions. The alternate method, `fsvd`, uses faster heuristic eigendecomposition but loses accuracy. The magnitude of accuracy lost is dependent on dataset. number_of_dimensions : int, optional Dimensions to reduce the distance matrix to. This number determines how many eigenvectors and eigenvalues will be returned. By default, equal to the number of dimensions of the distance matrix, as default eigendecomposition using SciPy's `eigh` method computes all eigenvectors and eigenvalues. If using fast heuristic eigendecomposition through `fsvd`, a desired number of dimensions should be specified. Note that the default eigendecomposition method `eigh` does not natively support a specifying number of dimensions to reduce a matrix to, so if this parameter is specified, all eigenvectors and eigenvalues will be simply be computed with no speed gain, and only the number specified by `number_of_dimensions` will be returned. Specifying a value of `0`, the default, will set `number_of_dimensions` equal to the number of dimensions of the specified `distance_matrix`. inplace : bool, optional If true, centers a distance matrix in-place in a manner that reduces memory consumption. Returns ------- OrdinationResults Object that stores the PCoA results, including eigenvalues, the proportion explained by each of them, and transformed sample coordinates. See Also -------- OrdinationResults Notes ----- .. note:: If the distance is not euclidean (for example if it is a semimetric and the triangle inequality doesn't hold), negative eigenvalues can appear. There are different ways to deal with that problem (see Legendre & Legendre 1998, \S 9.2.3), but none are currently implemented here. However, a warning is raised whenever negative eigenvalues appear, allowing the user to decide if they can be safely ignored. """ distance_matrix = DistanceMatrix(distance_matrix) # Center distance matrix, a requirement for PCoA here matrix_data = center_distance_matrix(distance_matrix.data, inplace=inplace) # If no dimension specified, by default will compute all eigenvectors # and eigenvalues if number_of_dimensions == 0: if method == "fsvd" and matrix_data.shape[0] > 10: warn( "FSVD: since no value for number_of_dimensions is specified, " "PCoA for all dimensions will be computed, which may " "result in long computation time if the original " "distance matrix is large.", RuntimeWarning, ) # distance_matrix is guaranteed to be square number_of_dimensions = matrix_data.shape[0] elif number_of_dimensions < 0: raise ValueError( "Invalid operation: cannot reduce distance matrix " "to negative dimensions using PCoA. Did you intend " 'to specify the default value "0", which sets ' "the number_of_dimensions equal to the " "dimensionality of the given distance matrix?" ) # Perform eigendecomposition if method == "eigh": # eigh does not natively support specifying number_of_dimensions, i.e. # there are no speed gains unlike in FSVD. Later, we slice off unwanted # dimensions to conform the result of eigh to the specified # number_of_dimensions. eigvals, eigvecs = eigh(matrix_data) long_method_name = "Principal Coordinate Analysis" elif method == "fsvd": eigvals, eigvecs = _fsvd(matrix_data, number_of_dimensions) long_method_name = "Approximate Principal Coordinate Analysis " "using FSVD" else: raise ValueError( "PCoA eigendecomposition method {} not supported.".format(method) ) # cogent makes eigenvalues positive by taking the # abs value, but that doesn't seem to be an approach accepted # by L&L to deal with negative eigenvalues. We raise a warning # in that case. First, we make values close to 0 equal to 0. negative_close_to_zero = np.isclose(eigvals, 0) eigvals[negative_close_to_zero] = 0 if np.any(eigvals < 0): warn( "The result contains negative eigenvalues." " Please compare their magnitude with the magnitude of some" " of the largest positive eigenvalues. If the negative ones" " are smaller, it's probably safe to ignore them, but if they" " are large in magnitude, the results won't be useful. See the" " Notes section for more details. The smallest eigenvalue is" " {0} and the largest is {1}.".format(eigvals.min(), eigvals.max()), RuntimeWarning, ) # eigvals might not be ordered, so we first sort them, then analogously # sort the eigenvectors by the ordering of the eigenvalues too idxs_descending = eigvals.argsort()[::-1] eigvals = eigvals[idxs_descending] eigvecs = eigvecs[:, idxs_descending] # If we return only the coordinates that make sense (i.e., that have a # corresponding positive eigenvalue), then Jackknifed Beta Diversity # won't work as it expects all the OrdinationResults to have the same # number of coordinates. In order to solve this issue, we return the # coordinates that have a negative eigenvalue as 0 num_positive = (eigvals >= 0).sum() eigvecs[:, num_positive:] = np.zeros(eigvecs[:, num_positive:].shape) eigvals[num_positive:] = np.zeros(eigvals[num_positive:].shape) if method == "fsvd": # Since the dimension parameter, hereafter referred to as 'd', # restricts the number of eigenvalues and eigenvectors that FSVD # computes, we need to use an alternative method to compute the sum # of all eigenvalues, used to compute the array of proportions # explained. Otherwise, the proportions calculated will only be # relative to d number of dimensions computed; whereas we want # it to be relative to the entire dimensionality of the # centered distance matrix. # An alternative method of calculating th sum of eigenvalues is by # computing the trace of the centered distance matrix. # See proof outlined here: https://goo.gl/VAYiXx sum_eigenvalues = np.trace(matrix_data) else: # Calculate proportions the usual way sum_eigenvalues = np.sum(eigvals) proportion_explained = eigvals / sum_eigenvalues # In case eigh is used, eigh computes all eigenvectors and -values. # So if number_of_dimensions was specified, we manually need to ensure # only the requested number of dimensions # (number of eigenvectors and eigenvalues, respectively) are returned. eigvecs = eigvecs[:, :number_of_dimensions] eigvals = eigvals[:number_of_dimensions] proportion_explained = proportion_explained[:number_of_dimensions] # Scale eigenvalues to have length = sqrt(eigenvalue). This # works because np.linalg.eigh returns normalized # eigenvectors. Each row contains the coordinates of the # objects in the space of principal coordinates. Note that at # least one eigenvalue is zero because only n-1 axes are # needed to represent n points in a euclidean space. coordinates = eigvecs * np.sqrt(eigvals) axis_labels = ["PC%d" % i for i in range(1, number_of_dimensions + 1)] return OrdinationResults( short_method_name="PCoA", long_method_name=long_method_name, eigvals=pd.Series(eigvals, index=axis_labels), samples=pd.DataFrame( coordinates, index=distance_matrix.ids, columns=axis_labels ), proportion_explained=pd.Series(proportion_explained, index=axis_labels), ) def _fsvd(centered_distance_matrix, number_of_dimensions=10): """Perform singular value decomposition. More specifically in this case eigendecomposition, using fast heuristic algorithm nicknamed "FSVD" (FastSVD), adapted and optimized from the algorithm described by Halko et al (2011). Parameters ---------- centered_distance_matrix : np.array Numpy matrix representing the distance matrix for which the eigenvectors and eigenvalues shall be computed number_of_dimensions : int Number of dimensions to keep. Must be lower than or equal to the rank of the given distance_matrix. Returns ------- np.array Array of eigenvectors, each with number_of_dimensions length. np.array Array of eigenvalues, a total number of number_of_dimensions. Notes ----- The algorithm is based on 'An Algorithm for the Principal Component analysis of Large Data Sets' by N. Halko, P.G. Martinsson, Y. Shkolnisky, and M. Tygert. Original Paper: https://arxiv.org/abs/1007.5510 Ported from MATLAB implementation described here: https://stats.stackexchange.com/a/11934/211065 """ m, n = centered_distance_matrix.shape # Number of levels of the Krylov method to use. # For most applications, num_levels=1 or num_levels=2 is sufficient. num_levels = 1 # Changes the power of the spectral norm, thus minimizing the error). use_power_method = False # Note: a (conjugate) transpose is removed for performance, since we # only expect square matrices. if m != n: raise ValueError("FSVD expects square distance matrix") if number_of_dimensions > m or number_of_dimensions > n: raise ValueError( "FSVD: number_of_dimensions cannot be larger than" " the dimensionality of the given distance matrix." ) if number_of_dimensions < 0: raise ValueError( "Invalid operation: cannot reduce distance matrix " "to negative dimensions using PCoA. Did you intend " 'to specify the default value "0", which sets ' "the number_of_dimensions equal to the " "dimensionality of the given distance matrix?" ) k = number_of_dimensions + 2 # Form a real nxl matrix G whose entries are independent, identically # distributed Gaussian random variables of zero mean and unit variance G = standard_normal(size=(n, k)) if use_power_method: # use only the given exponent H = dot(centered_distance_matrix, G) for x in range(2, num_levels + 2): # enhance decay of singular values # note: distance_matrix is no longer transposed, saves work # since we're expecting symmetric, square matrices anyway # (Daniel McDonald's changes) H = dot(centered_distance_matrix, dot(centered_distance_matrix, H)) else: # compute the m x l matrices H^{(0)}, ..., H^{(i)} # Note that this is done implicitly in each iteration below. H = dot(centered_distance_matrix, G) # to enhance performance H = hstack((H, dot(centered_distance_matrix, dot(centered_distance_matrix, H)))) for x in range(3, num_levels + 2): tmp = dot(centered_distance_matrix, dot(centered_distance_matrix, H)) H = hstack( (H, dot(centered_distance_matrix, dot(centered_distance_matrix, tmp))) ) # Using the pivoted QR-decomposition, form a real m * ((i+1)l) matrix Q # whose columns are orthonormal, s.t. there exists a real # ((i+1)l) * ((i+1)l) matrix R for which H = QR Q, R = qr(H) # Compute the n * ((i+1)l) product matrix T = A^T Q T = dot(centered_distance_matrix, Q) # step 3 # Form an SVD of T Vt, St, W = svd(T, full_matrices=False) W = W.transpose() # Compute the m * ((i+1)l) product matrix Ut = dot(Q, W) U_fsvd = Ut[:, :number_of_dimensions] S = St[:number_of_dimensions] # drop imaginary component, if we got one # Note: # In cogent, after computing eigenvalues/vectors, the imaginary part # is dropped, if any. We know for a fact that the eigenvalues are # real, so that's not necessary, but eigenvectors can in principle # be complex (see for example # http://math.stackexchange.com/a/47807/109129 for details) eigenvalues = S.real eigenvectors = U_fsvd.real return eigenvalues, eigenvectors def pcoa_biplot(ordination, y): """Compute the projection of descriptors into a PCoA matrix. This implementation is as described in Chapter 9 of Legendre & Legendre, Numerical Ecology 3rd edition. Parameters ---------- ordination: OrdinationResults The computed principal coordinates analysis of dimensions (n, c) where the matrix ``y`` will be projected onto. y: DataFrame Samples by features table of dimensions (n, m). These can be environmental features or abundance counts. This table should be normalized in cases of dimensionally heterogenous physical variables. Returns ------- OrdinationResults The modified input object that includes projected features onto the ordination space in the ``features`` attribute. """ # acknowledge that most saved ordinations lack a name, however if they have # a name, it should be PCoA if ordination.short_method_name != "" and ordination.short_method_name != "PCoA": raise ValueError( "This biplot computation can only be performed in a " "PCoA matrix." ) if set(y.index) != set(ordination.samples.index): raise ValueError( "The eigenvectors and the descriptors must describe " "the same samples." ) eigvals = ordination.eigvals.values coordinates = ordination.samples N = coordinates.shape[0] # align the descriptors and eigenvectors in a sample-wise fashion y = y.reindex(coordinates.index) # S_pc from equation 9.44 # Represents the covariance matrix between the features matrix and the # column-centered eigenvectors of the pcoa. spc = (1 / (N - 1)) * y.values.T.dot(scale(coordinates, ddof=1)) # U_proj from equation 9.55, is the matrix of descriptors to be projected. # # Only get the power of non-zero values, otherwise this will raise a # divide by zero warning. There shouldn't be negative eigenvalues(?) Uproj = np.sqrt(N - 1) * spc.dot( np.diag(np.power(eigvals, -0.5, where=eigvals > 0)) ) ordination.features = pd.DataFrame( data=Uproj, index=y.columns.copy(), columns=coordinates.columns.copy() ) ordination.features.fillna(0.0, inplace=True) return ordination scikit-bio-0.6.2/skbio/stats/ordination/_redundancy_analysis.py000066400000000000000000000220521464262511300247440ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np import pandas as pd from scipy.linalg import svd, lstsq from ._ordination_results import OrdinationResults from ._utils import corr, svd_rank, scale def rda(y, x, scale_Y=False, scaling=1): r"""Compute redundancy analysis, a type of canonical analysis. It is related to PCA and multiple regression because the explained variables `y` are fitted to the explanatory variables `x` and PCA is then performed on the fitted values. A similar process is performed on the residuals. RDA should be chosen if the studied gradient is small, and CCA when it's large, so that the contingency table is sparse. Parameters ---------- y : pd.DataFrame :math:`n \times p` response matrix, where :math:`n` is the number of samples and :math:`p` is the number of features. Its columns need be dimensionally homogeneous (or you can set `scale_Y=True`). This matrix is also referred to as the community matrix that commonly stores information about species abundances x : pd.DataFrame :math:`n \times m, n \geq m` matrix of explanatory variables, where :math:`n` is the number of samples and :math:`m` is the number of metadata variables. Its columns need not be standardized, but doing so turns regression coefficients into standard regression coefficients. scale_Y : bool, optional Controls whether the response matrix columns are scaled to have unit standard deviation. Defaults to `False`. scaling : int Scaling type 1 produces a distance biplot. It focuses on the ordination of rows (samples) because their transformed distances approximate their original euclidean distances. Especially interesting when most explanatory variables are binary. Scaling type 2 produces a correlation biplot. It focuses on the relationships among explained variables (`y`). It is interpreted like scaling type 1, but taking into account that distances between objects don't approximate their euclidean distances. See more details about distance and correlation biplots in [1]_, \S 9.1.4. Returns ------- OrdinationResults Object that stores the computed eigenvalues, the proportion explained by each of them (per unit), transformed coordinates for feature and samples, biplot scores, sample constraints, etc. See Also -------- ca cca OrdinationResults Notes ----- The algorithm is based on [1]_, \S 11.1, and is expected to give the same results as ``rda(y, x)`` in R's package vegan. The eigenvalues reported in vegan are re-normalized to :math:`\sqrt{\frac{s}{n-1}}` `n` is the number of samples, and `s` is the original eigenvalues. Here we will only return the original eigenvalues, as recommended in [1]_. References ---------- .. [1] Legendre P. and Legendre L. 1998. Numerical Ecology. Elsevier, Amsterdam. """ Y = y.values X = x.values n, p = y.shape n_, m = x.shape if n != n_: raise ValueError("Both data matrices must have the same number of rows.") if n < m: # Mmm actually vegan is able to do this case, too raise ValueError("Explanatory variables cannot have less rows than columns.") sample_ids = y.index feature_ids = y.columns # Centre response variables (they must be dimensionally # homogeneous) Y = scale(Y, with_std=scale_Y) # Centre explanatory variables X = scale(X, with_std=False) # Distribution of variables should be examined and transformed # if necessary (see paragraph 4 in p. 580 L&L 1998) # Compute Y_hat (fitted values by multivariate linear # regression, that is, linear least squares). Formula 11.6 in # L&L 1998 involves solving the normal equations, but that fails # when cond(X) ~ eps**(-0.5). A more expensive but much more # stable solution (fails when cond(X) ~ eps**-1) is computed # using the QR decomposition of X = QR: # (11.6) Y_hat = X [X' X]^{-1} X' Y # = QR [R'Q' QR]^{-1} R'Q' Y # = QR [R' R]^{-1} R'Q' Y # = QR R^{-1} R'^{-1} R' Q' Y # = Q Q' Y # and B (matrix of regression coefficients) # (11.4) B = [X' X]^{-1} X' Y # = R^{-1} R'^{-1} R' Q' Y # = R^{-1} Q' # Q, R = np.linalg.qr(X) # Y_hat = Q.dot(Q.T).dot(Y) # B = scipy.linalg.solve_triangular(R, Q.T.dot(Y)) # This works provided X has full rank. When not, you can still # fix it using R's pseudoinverse or partitioning R. To avoid any # issues, like the numerical instability when trying to # reproduce an example in L&L where X was rank-deficient, we'll # just use `np.linalg.lstsq`, which uses the SVD decomposition # under the hood and so it's also more expensive. B, _, rank_X, _ = lstsq(X, Y) Y_hat = X.dot(B) # Now let's perform PCA on the fitted values from the multiple # regression u, s, vt = svd(Y_hat, full_matrices=False) # vt are the right eigenvectors, which is what we need to # perform PCA. That is, we're changing points in Y_hat from the # canonical basis to the orthonormal basis given by the right # eigenvectors of Y_hat (or equivalently, the eigenvectors of # the covariance matrix Y_hat.T.dot(Y_hat)) # See 3) in p. 583 in L&L 1998 rank = svd_rank(Y_hat.shape, s) # Theoretically, there're at most min(p, m, n - 1) non-zero eigenvalues U = vt[:rank].T # U as in Fig. 11.2 # Ordination in the space of response variables. Its columns are # sample scores. (Eq. 11.12) F = Y.dot(U) # Ordination in the space of explanatory variables. Its columns # are fitted sample scores. (Eq. 11.13) Z = Y_hat.dot(U) # Canonical coefficients (formula 11.14) # C = B.dot(U) # Not used Y_res = Y - Y_hat # PCA on the residuals u_res, s_res, vt_res = svd(Y_res, full_matrices=False) # See 9) in p. 587 in L&L 1998 rank_res = svd_rank(Y_res.shape, s_res) # Theoretically, there're at most min(p, n - 1) non-zero eigenvalues as U_res = vt_res[:rank_res].T F_res = Y_res.dot(U_res) # Ordination in the space of residuals eigenvalues = np.r_[s[:rank], s_res[:rank_res]] # Compute scores if scaling not in {1, 2}: raise NotImplementedError("Only scalings 1, 2 available for RDA.") # According to the vegan-FAQ.pdf, the scaling factor for scores # is (notice that L&L 1998 says in p. 586 that such scaling # doesn't affect the interpretation of a biplot): eigvals = pd.Series( eigenvalues, index=["RDA%d" % (i + 1) for i in range(len(eigenvalues))] ) const = np.sum(eigenvalues**2) ** 0.25 if scaling == 1: scaling_factor = const elif scaling == 2: scaling_factor = eigenvalues / const feature_scores = np.hstack((U, U_res)) * scaling_factor sample_scores = np.hstack((F, F_res)) / scaling_factor feature_scores = pd.DataFrame( feature_scores, index=feature_ids, columns=["RDA%d" % (i + 1) for i in range(feature_scores.shape[1])], ) sample_scores = pd.DataFrame( sample_scores, index=sample_ids, columns=["RDA%d" % (i + 1) for i in range(sample_scores.shape[1])], ) # TODO not yet used/displayed sample_constraints = np.hstack((Z, F_res)) / scaling_factor sample_constraints = pd.DataFrame( sample_constraints, index=sample_ids, columns=["RDA%d" % (i + 1) for i in range(sample_constraints.shape[1])], ) # Vegan seems to compute them as corr(X[:, :rank_X], # u) but I don't think that's a good idea. In fact, if # you take the example shown in Figure 11.3 in L&L 1998 you # can see that there's an arrow for each of the 4 # environmental variables (depth, coral, sand, other) even if # other = not(coral or sand) biplot_scores = corr(X, u) biplot_scores = pd.DataFrame( biplot_scores, index=x.columns, columns=["RDA%d" % (i + 1) for i in range(biplot_scores.shape[1])], ) # The "Correlations of environmental variables with sample # scores" from table 11.4 are quite similar to vegan's biplot # scores, but they're computed like this: # corr(X, F)) p_explained = pd.Series( eigenvalues / eigenvalues.sum(), index=["RDA%d" % (i + 1) for i in range(len(eigenvalues))], ) return OrdinationResults( "RDA", "Redundancy Analysis", eigvals=eigvals, proportion_explained=p_explained, features=feature_scores, samples=sample_scores, biplot_scores=biplot_scores, sample_constraints=sample_constraints, ) scikit-bio-0.6.2/skbio/stats/ordination/_utils.py000066400000000000000000000233041464262511300220460ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np from ._cutils import center_distance_matrix_cy def mean_and_std(a, axis=None, weights=None, with_mean=True, with_std=True, ddof=0): """Compute the weighted average and standard deviation along the specified axis. Parameters ---------- a : array_like Calculate average and standard deviation of these values. axis : int, optional Axis along which the statistics are computed. The default is to compute them on the flattened array. weights : array_like, optional An array of weights associated with the values in `a`. Each value in `a` contributes to the average according to its associated weight. The weights array can either be 1-D (in which case its length must be the size of `a` along the given axis) or of the same shape as `a`. If `weights=None`, then all data in `a` are assumed to have a weight equal to one. with_mean : bool, optional, defaults to True Compute average if True. with_std : bool, optional, defaults to True Compute standard deviation if True. ddof : int, optional, defaults to 0 It means delta degrees of freedom. Variance is calculated by dividing by `n - ddof` (where `n` is the number of elements). By default it computes the maximum likelyhood estimator. Returns ------- average, std Return the average and standard deviation along the specified axis. If any of them was not required, returns `None` instead """ if not (with_mean or with_std): raise ValueError( "Either the mean or standard deviation need to be" " computed." ) a = np.asarray(a) if weights is None: avg = a.mean(axis=axis) if with_mean else None std = a.std(axis=axis, ddof=ddof) if with_std else None else: avg = np.average(a, axis=axis, weights=weights) if with_std: if axis is None: variance = np.average((a - avg) ** 2, weights=weights) else: # Make sure that the subtraction to compute variance works for # multidimensional arrays a_rolled = np.rollaxis(a, axis) # Numpy doesn't have a weighted std implementation, but this is # stable and fast variance = np.average((a_rolled - avg) ** 2, axis=0, weights=weights) if ddof != 0: # Don't waste time if variance doesn't need scaling if axis is None: variance *= a.size / (a.size - ddof) else: variance *= a.shape[axis] / (a.shape[axis] - ddof) std = np.sqrt(variance) else: std = None avg = avg if with_mean else None return avg, std def scale(a, weights=None, with_mean=True, with_std=True, ddof=0, copy=True): """Scale array by columns to have weighted average 0 and standard deviation 1. Parameters ---------- a : array_like 2D array whose columns are standardized according to the weights. weights : array_like, optional Array of weights associated with the columns of `a`. By default, the scaling is unweighted. with_mean : bool, optional, defaults to True Center columns to have 0 weighted mean. with_std : bool, optional, defaults to True Scale columns to have unit weighted std. ddof : int, optional, defaults to 0 If with_std is True, variance is calculated by dividing by `n - ddof` (where `n` is the number of elements). By default it computes the maximum likelyhood stimator. copy : bool, optional, defaults to True Whether to perform the standardization in place, or return a new copy of `a`. Returns ------- 2D ndarray Scaled array. Notes ----- Wherever std equals 0, it is replaced by 1 in order to avoid division by zero. """ if copy: a = a.copy() a = np.asarray(a, dtype=np.float64) avg, std = mean_and_std( a, axis=0, weights=weights, with_mean=with_mean, with_std=with_std, ddof=ddof ) if with_mean: a -= avg if with_std: std[std == 0] = 1.0 a /= std return a def svd_rank(M_shape, S, tol=None): """Matrix rank of `M` given its singular values `S`. See `np.linalg.matrix_rank` for a rationale on the tolerance (we're not using that function because it doesn't let us reuse a precomputed SVD). """ if tol is None: tol = S.max() * max(M_shape) * np.finfo(S.dtype).eps return np.sum(S > tol) def corr(x, y=None): """Compute correlation between columns of `x`, or `x` and `y`. Correlation is covariance of (columnwise) standardized matrices, so each matrix is first centered and scaled to have variance one, and then their covariance is computed. Parameters ---------- x : 2D array_like Matrix of shape (n, p). Correlation between its columns will be computed. y : 2D array_like, optional Matrix of shape (n, q). If provided, the correlation is computed between the columns of `x` and the columns of `y`. Else, it's computed between the columns of `x`. Returns ------- correlation Matrix of computed correlations. Has shape (p, p) if `y` is not provided, else has shape (p, q). """ x = np.asarray(x) if y is not None: y = np.asarray(y) if y.shape[0] != x.shape[0]: raise ValueError("Both matrices must have the same number of rows") x, y = scale(x), scale(y) else: x = scale(x) y = x # Notice that scaling was performed with ddof=0 (dividing by n, # the default), so now we need to remove it by also using ddof=0 # (dividing by n) return x.T.dot(y) / x.shape[0] def e_matrix(distance_matrix): """Compute E matrix from a distance matrix. Squares and divides by -2 the input elementwise. Eq. 9.20 in Legendre & Legendre 1998. """ return distance_matrix * distance_matrix / -2 def f_matrix(E_matrix): """Compute F matrix from E matrix. Centring step: for each element, the mean of the corresponding row and column are substracted, and the mean of the whole matrix is added. Eq. 9.21 in Legendre & Legendre 1998. """ row_means = E_matrix.mean(axis=1, keepdims=True) col_means = E_matrix.mean(axis=0, keepdims=True) matrix_mean = E_matrix.mean() return E_matrix - row_means - col_means + matrix_mean def center_distance_matrix(distance_matrix, inplace=False): """Centers a distance matrix. Note: If the used distance was euclidean, pairwise distances needn't be computed from the data table Y because F_matrix = Y.dot(Y.T) (if Y has been centered). But since we're expecting distance_matrix to be non-euclidian, we do the following computation as per Numerical Ecology (Legendre & Legendre 1998). Parameters ---------- distance_matrix : 2D array_like Distance matrix. inplace : bool, optional Whether or not to center the given distance matrix in-place, which is more efficient in terms of memory and computation. """ if not distance_matrix.flags.c_contiguous: # center_distance_matrix_cy requires c_contiguous, so make a copy distance_matrix = np.asarray(distance_matrix, order="C") if inplace: center_distance_matrix_cy(distance_matrix, distance_matrix) return distance_matrix else: centered = np.empty(distance_matrix.shape, distance_matrix.dtype) center_distance_matrix_cy(distance_matrix, centered) return centered def _e_matrix_inplace(distance_matrix): """Compute E matrix from a distance matrix inplace. Squares and divides by -2 the input element-wise. Eq. 9.20 in Legendre & Legendre 1998. Modified from :func:`skbio.stats.ordination.e_matrix` function, performing row-wise operations to avoid excessive memory allocations. Parameters ---------- distance_matrix : 2D array_like Distance matrix. """ distance_matrix = distance_matrix.astype(float) for i in np.arange(len(distance_matrix)): distance_matrix[i] = (distance_matrix[i] * distance_matrix[i]) / -2 return distance_matrix def _f_matrix_inplace(e_matrix): """Compute F matrix from E matrix inplace. Centering step: for each element, the mean of the corresponding row and column are subtracted, and the mean of the whole matrix is added. Eq. 9.21 in Legendre & Legendre 1998. Modified from :func:`skbio.stats.ordination.f_matrix` function, performing row-wise operations to avoid excessive memory allocations. Parameters ---------- e_matrix : 2D array_like A matrix representing the "E matrix" as described above. """ e_matrix = e_matrix.astype(float) row_means = np.zeros(len(e_matrix), dtype=float) col_means = np.zeros(len(e_matrix), dtype=float) matrix_mean = 0.0 for i in np.arange(len(e_matrix)): row_means[i] = e_matrix[i].mean() matrix_mean += e_matrix[i].sum() col_means += e_matrix[i] matrix_mean /= len(e_matrix) ** 2 col_means /= len(e_matrix) for i in np.arange(len(e_matrix)): v = e_matrix[i] v -= row_means[i] v -= col_means v += matrix_mean e_matrix[i] = v return e_matrix scikit-bio-0.6.2/skbio/stats/ordination/tests/000077500000000000000000000000001464262511300213355ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/stats/ordination/tests/__init__.py000066400000000000000000000005411464262511300234460ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- scikit-bio-0.6.2/skbio/stats/ordination/tests/data/000077500000000000000000000000001464262511300222465ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/stats/ordination/tests/data/L&L_CA_data000066400000000000000000000000301464262511300240730ustar00rootroot0000000000000010 10 20 10 15 10 15 5 5scikit-bio-0.6.2/skbio/stats/ordination/tests/data/PCoA_biplot_descriptors000066400000000000000000000021751464262511300267520ustar00rootroot00000000000000Taxon PC.636 PC.635 PC.356 PC.481 PC.354 PC.593 PC.355 PC.607 PC.634 Root;k__Bacteria;Other 0.0202702702703 0.0469798657718 0.0266666666667 0.027397260274 0.0134228187919 0.0134228187919 0.0136054421769 0.0469798657718 0.02 Root;k__Bacteria;p__Actinobacteria 0.00675675675676 0.00671140939597 0.0 0.00684931506849 0.0 0.0 0.0 0.0201342281879 0.02 Root;k__Bacteria;p__Bacteroidetes 0.695945945946 0.523489932886 0.193333333333 0.143835616438 0.0738255033557 0.389261744966 0.285714285714 0.275167785235 0.653333333333 Root;k__Bacteria;p__Deferribacteres 0.0472972972973 0.0134228187919 0.0 0.0 0.0 0.0 0.0 0.0201342281879 0.0333333333333 Root;k__Bacteria;p__Firmicutes 0.209459459459 0.395973154362 0.773333333333 0.787671232877 0.89932885906 0.41610738255 0.700680272109 0.456375838926 0.22 Root;k__Bacteria;p__Proteobacteria 0.00675675675676 0.00671140939597 0.0 0.0 0.0 0.0335570469799 0.0 0.0201342281879 0.0133333333333 Root;k__Bacteria;p__TM7 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0133333333333 Root;k__Bacteria;p__ 0.0135135135135 0.00671140939597 0.00666666666667 0.0342465753425 0.0134228187919 0.147651006711 0.0 0.161073825503 0.0266666666667 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/PCoA_biplot_projected_descriptors000066400000000000000000000030511464262511300310030ustar00rootroot00000000000000"","Axis.1","Axis.2","Axis.3","Axis.4","Axis.5","Axis.6","Axis.7","Axis.8" "Root;k__Bacteria;Other",-0.0251703458354009,-0.0387419997798285,0.0370307944752097,-0.0182485866183309,-0.00129741030252842,0.000814106793625313,-0.0173701207829898,0.0361211055519348 "Root;k__Bacteria;p__Actinobacteria",-0.0243896442435075,-0.0173750855733946,0.00412122918478027,-0.00122921164491863,-0.0133154056835269,-0.0154481346200809,0.0110025326657988,-0.0193889181281414 "Root;k__Bacteria;p__Bacteroidetes",-0.715120229604841,0.317840715450945,-0.183247320427738,0.616597812375811,0.261720590975174,-0.0233954969961052,0.0235684811385711,0.108576064612194 "Root;k__Bacteria;p__Deferribacteres",-0.0581057801759818,0.0109676872979955,0.000973434160884227,0.0403394413149601,-0.0126657203131676,0.0312522995897358,0.00445295277091873,-0.0391828157435708 "Root;k__Bacteria;p__Firmicutes",0.826514889978526,0.0338669291769973,0.258683679610306,-0.820041925817023,-0.236331008329088,0.0820371240664409,-0.146993916645359,-0.114613637622136 "Root;k__Bacteria;p__Proteobacteria",-0.00665357958782869,-0.0351957821869525,-0.0269691957766513,0.0384070851104123,0.019915981256556,-0.0152225877254676,0.019285238587548,0.0101464789573946 "Root;k__Bacteria;p__TM7",-0.00936607225354557,0.00504668520861811,-0.00329562583232389,-0.00189853964818609,0.00398022745872819,-0.0160400884328152,0.0128760393792986,-0.0162949930598132 "Root;k__Bacteria;p__",0.0122907617222583,-0.276409149593715,-0.0872969953941216,0.146073924926225,-0.022007255063595,-0.0439972226754927,0.0931787928855206,0.0346367154320729 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/PCoA_sample_data000066400000000000000000000021131464262511300253020ustar00rootroot000000000000000 0.099 0.033 0.183 0.148 0.198 0.462 0.628 0.113 0.173 0.434 0.762 0.53 0.586 0.099 0 0.022 0.114 0.224 0.039 0.266 0.442 0.07 0.119 0.419 0.633 0.389 0.435 0.033 0.022 0 0.042 0.059 0.053 0.322 0.444 0.046 0.162 0.339 0.781 0.482 0.55 0.183 0.114 0.042 0 0.068 0.085 0.435 0.406 0.047 0.331 0.505 0.7 0.579 0.53 0.148 0.224 0.059 0.068 0 0.051 0.268 0.24 0.034 0.177 0.469 0.758 0.597 0.552 0.198 0.039 0.053 0.085 0.051 0 0.025 0.129 0.002 0.039 0.39 0.625 0.498 0.509 0.462 0.266 0.322 0.435 0.268 0.025 0 0.014 0.106 0.089 0.315 0.469 0.374 0.369 0.628 0.442 0.444 0.406 0.24 0.129 0.014 0 0.129 0.237 0.349 0.618 0.562 0.471 0.113 0.07 0.046 0.047 0.034 0.002 0.106 0.129 0 0.071 0.151 0.44 0.247 0.234 0.173 0.119 0.162 0.331 0.177 0.039 0.089 0.237 0.071 0 0.43 0.538 0.383 0.346 0.434 0.419 0.339 0.505 0.469 0.39 0.315 0.349 0.151 0.43 0 0.607 0.387 0.456 0.762 0.633 0.781 0.7 0.758 0.625 0.469 0.618 0.44 0.538 0.607 0 0.084 0.09 0.53 0.389 0.482 0.579 0.597 0.498 0.374 0.562 0.247 0.383 0.387 0.084 0 0.038 0.586 0.435 0.55 0.53 0.552 0.509 0.369 0.471 0.234 0.346 0.456 0.09 0.038 0 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/PCoA_sample_data_12dim000066400000000000000000000051271464262511300263060ustar00rootroot00000000000000 1 2 3 4 5 6 7 8 9 10 11 12 1 0.0 0.909636950834479 0.9869428157422727 0.7190893636533138 0.6960053958431593 0.6477238596907942 0.30557161358653495 0.8946966124829346 0.12780699944110363 0.39915339691165386 0.7641239434153432 0.6248070796484706 2 0.909636950834479 0.0 0.3871354292850083 0.6881160960373599 0.5550593584027527 0.5855786600007656 0.4843215561734061 0.20448304199758327 0.4067028703340123 0.2754701840044086 0.6269219445617967 0.05629366991581264 3 0.9869428157422727 0.3871354292850083 0.0 0.6088130886750466 0.8611896463567201 0.2815827949525225 0.6500535832888426 0.8196046614443331 0.356410088497226 0.05164123821334954 0.7110953188954077 0.32855281988632934 4 0.7190893636533138 0.6881160960373599 0.6088130886750466 0.0 0.7453215102240474 0.9916540031629704 0.14394284428694282 0.8388378539413649 0.15115603934799038 0.13871462268568635 0.1934605692727246 0.9804118301943398 5 0.6960053958431593 0.5550593584027527 0.8611896463567201 0.7453215102240474 0.0 0.7996611932937304 0.30579824243478326 0.5227960305398314 0.8564730629853469 0.7786384040043949 0.06843106040159719 0.7715973816221341 6 0.6477238596907942 0.5855786600007656 0.2815827949525225 0.9916540031629704 0.7996611932937304 0.0 0.8869204659721949 0.1619378942802252 0.10200764546980268 0.17805335055828198 0.8796559972720953 0.20933243431218862 7 0.30557161358653495 0.4843215561734061 0.6500535832888426 0.14394284428694282 0.30579824243478326 0.8869204659721949 0.0 0.9489770186788868 0.9531210051205121 0.5834385348164726 0.31984891724102216 0.5852822100268925 8 0.8946966124829346 0.20448304199758327 0.8196046614443331 0.8388378539413649 0.5227960305398314 0.1619378942802252 0.9489770186788868 0.0 0.16681381452925792 0.9281238242741929 0.604480007052297 0.43978806925866687 9 0.12780699944110363 0.4067028703340123 0.356410088497226 0.15115603934799038 0.8564730629853469 0.10200764546980268 0.9531210051205121 0.16681381452925792 0.0 0.10766387594368387 0.9552101788877516 0.6135541732435132 10 0.39915339691165386 0.2754701840044086 0.05164123821334954 0.13871462268568635 0.7786384040043949 0.17805335055828198 0.5834385348164726 0.9281238242741929 0.10766387594368387 0.0 0.4157921207508062 0.31997143485314194 11 0.7641239434153432 0.6269219445617967 0.7110953188954077 0.1934605692727246 0.06843106040159719 0.8796559972720953 0.31984891724102216 0.604480007052297 0.9552101788877516 0.4157921207508062 0.0 0.11189976154475101 12 0.6248070796484706 0.05629366991581264 0.32855281988632934 0.9804118301943398 0.7715973816221341 0.20933243431218862 0.5852822100268925 0.43978806925866687 0.6135541732435132 0.31997143485314194 0.11189976154475101 0.0 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/PCoA_sample_data_2000066400000000000000000000016041464262511300255270ustar00rootroot000000000000000.000000000000000000e+00 7.123610999999999693e-01 7.684919799999999634e-01 8.001856299999999811e-01 6.824852399999999930e-01 7.463462899999999678e-01 7.123610999999999693e-01 0.000000000000000000e+00 8.664569100000000246e-01 8.048528200000000243e-01 8.338130099999999656e-01 7.388172600000000312e-01 7.684919799999999634e-01 8.664569100000000246e-01 0.000000000000000000e+00 8.230839599999999479e-01 7.745174599999999909e-01 7.649887199999999554e-01 8.001856299999999811e-01 8.048528200000000243e-01 8.230839599999999479e-01 0.000000000000000000e+00 8.416736500000000509e-01 7.761436600000000130e-01 6.824852399999999930e-01 8.338130099999999656e-01 7.745174599999999909e-01 8.416736500000000509e-01 0.000000000000000000e+00 7.266116299999999528e-01 7.463462899999999678e-01 7.388172600000000312e-01 7.649887199999999554e-01 7.761436600000000130e-01 7.266116299999999528e-01 0.000000000000000000e+00 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/PCoA_sample_data_3000066400000000000000000000023141464262511300255270ustar00rootroot00000000000000 PC.636 PC.635 PC.356 PC.481 PC.354 PC.593 PC.355 PC.607 PC.634 PC.636 0.0 0.60737953696 0.748276547061 0.669767446745 0.747947798007 0.7377413135 0.70062016649 0.722772498498 0.581124366673 PC.635 0.60737953696 0.0 0.72197480583 0.655979250216 0.70672669146 0.745154628257 0.745509981365 0.659269044234 0.558850879936 PC.356 0.748276547061 0.72197480583 0.0 0.697796152694 0.610208881707 0.718770654329 0.615310834175 0.735768463546 0.788355481145 PC.481 0.669767446745 0.655979250216 0.697796152694 0.0 0.595087696488 0.657351659549 0.647258584304 0.686601452856 0.663208945591 PC.354 0.747947798007 0.70672669146 0.610208881707 0.595087696488 0.0 0.585097582096 0.599541095803 0.726258935536 0.781832289452 PC.593 0.7377413135 0.745154628257 0.718770654329 0.657351659549 0.585097582096 0.0 0.660981656067 0.713741192899 0.763317384716 PC.355 0.70062016649 0.745509981365 0.615310834175 0.647258584304 0.599541095803 0.660981656067 0.0 0.77062394744 0.753575681654 PC.607 0.722772498498 0.659269044234 0.735768463546 0.686601452856 0.726258935536 0.713741192899 0.77062394744 0.0 0.726387392797 PC.634 0.581124366673 0.558850879936 0.788355481145 0.663208945591 0.781832289452 0.763317384716 0.753575681654 0.726387392797 0.0scikit-bio-0.6.2/skbio/stats/ordination/tests/data/PCoA_sample_data_3_eigh_ref_3dim000066400000000000000000000014761464262511300303030ustar00rootroot00000000000000Eigvals 3 0.5123672604605051 0.30071909442702155 0.26791206600414047 Proportion explained 3 0.26757383277657976 0.15704469604990076 0.13991186377402365 Species 0 0 Site 9 3 PC.636 -0.2584654611828421 0.17399954688273872 -0.03828757925519412 PC.635 -0.27100113539100934 -0.018595131906339258 0.08648419263485663 PC.356 0.23507789817473093 0.09625192544887005 0.34579272671386985 PC.481 0.026140766432533755 -0.011145967653319655 -0.14766060301460787 PC.354 0.2850075522831216 -0.019254988848331687 -0.062326337538532166 PC.593 0.20463632624145514 -0.13936115093164073 -0.29151381962286704 PC.355 0.23348240321199026 0.22525797406849954 0.018862309626814944 PC.607 -0.09496319113225934 -0.4209748024953033 0.15486945486941445 PC.634 -0.35991515863772167 0.11382259543482587 -0.06622034441375392 Biplot 0 0 Site constraints 0 0 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/PCoA_sample_data_3_fsvd_ref_3dim000066400000000000000000000014651464262511300303270ustar00rootroot00000000000000Eigvals 3 0.5123672604605054 0.3007190944270222 0.2679120660041405 Proportion explained 3 0.2675738327765797 0.15704469604990098 0.13991186377402356 Species 0 0 Site 9 3 PC.636 -0.2584654611828421 0.17399954688273822 -0.03828757925519378 PC.635 -0.2710011353910087 -0.018595131906339074 0.08648419263485721 PC.356 0.23507789817473107 0.0962519254488692 0.3457927267138707 PC.481 0.026140766432533644 -0.011145967653318808 -0.14766060301460796 PC.354 0.28500755228312136 -0.019254988848331694 -0.06232633753853236 PC.593 0.20463632624145525 -0.13936115093164014 -0.2915138196228672 PC.355 0.23348240321199093 0.22525797406849954 0.018862309626815132 PC.607 -0.09496319113225955 -0.4209748024953044 0.1548694548694131 PC.634 -0.35991515863772144 0.1138225954348267 -0.06622034441375402 Biplot 0 0 Site constraints 0 0 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/PCoA_skbio000066400000000000000000000037101464262511300241430ustar00rootroot00000000000000Eigvals 9 0.5123672604605048 0.30071909442702155 0.2679120660041405 0.20898868107836532 0.19169895326008157 0.16054234528018121 0.1501769571197782 0.12245774816668883 0.0 Proportion explained 9 0.2675738327765797 0.1570446960499008 0.1399118637740237 0.109140272453788 0.10011104850250738 0.08384011619116792 0.07842699390110737 0.06395117635092507 0.0 Species 0 0 Site 9 9 PC.636 -0.25846546118284214 0.17399954688273864 -0.03828757925519388 0.1944775056196811 -0.08311760208440848 0.2624303332005185 -0.023163639223487725 0.018479403958060633 -0.0 PC.635 -0.27100113539100923 -0.01859513190633933 0.08648419263485602 -0.11806424531525102 0.19880835843691802 -0.021172359953453967 -0.19102402756537798 -0.1556465923768283 -0.0 PC.356 0.2350778981747308 0.0962519254488697 0.3457927267138699 0.003208625776186833 0.09637776755188007 0.04570253869527276 0.1854728132858981 -0.04040939717932225 -0.0 PC.481 0.026140766432533637 -0.011145967653319279 -0.14766060301460832 -0.29087660852960373 -0.20394547280060757 0.06197123847575147 0.10164132870879576 -0.1056909987185146 -0.0 PC.354 0.2850075522831214 -0.019254988848331458 -0.062326337538532166 -0.13812679985198154 0.10479860242276298 0.09517207306283292 -0.12963609754202599 0.22068717037162142 -0.0 PC.593 0.20463632624145503 -0.13936115093164061 -0.2915138196228669 0.18156678682059035 0.1595801327151684 -0.024641213016179383 0.08662524044412884 -0.09962214768709698 -0.0 PC.355 0.23348240321199026 0.22525797406849948 0.018862309626814986 0.10772998183109622 -0.1771089995718839 -0.19290583515124646 -0.14981947140833857 -0.03835490374645002 -0.0 PC.607 -0.09496319113225948 -0.42097480249530345 0.1548694548694144 0.08984275092805967 -0.15261819448811528 -0.0334232691500879 -0.02512247773026579 0.05089885364091516 -0.0 PC.634 -0.35991515863772167 0.11382259543482588 -0.06622034441375366 -0.029757997278778266 0.05722540781828547 -0.1931335061634077 0.14502633103067314 0.14965861173761527 -0.0 Biplot 0 0 Site constraints 0 0 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/example2_X000066400000000000000000000017501464262511300242000ustar00rootroot000000000000001.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 2.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 3.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 4.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 5.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 6.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 7.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 8.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 9.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+01 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/example2_Y000066400000000000000000000002001464262511300241660ustar00rootroot000000000000001 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 11 4 0 0 8 1 11 5 17 7 0 0 9 6 0 0 6 2 9 7 13 10 0 0 7 8 0 0 4 3 7 9 10 13 0 0 5 10 0 0 2 4 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/example2_biplot_scaling1000066400000000000000000000004221464262511300270360ustar00rootroot000000000000000.422650 -0.559143 -0.713251 1.165734e-16 1.471046e-16 1.831868e-16 0.988496 0.150787 -0.011785 6.106227e-17 6.661338e-17 8.326673e-17 -0.556517 0.817600 0.147714 -4.996004e-17 4.440892e-17 -7.216450e-17 -0.404080 -0.905843 -0.127150 2.775558e-18 -2.220446e-17 0.000000e+00 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/example2_biplot_scaling2000066400000000000000000000004231464262511300270400ustar00rootroot00000000000000 0.422650 -0.559143 -0.713251 1.165734e-16 1.471046e-16 1.831868e-16 0.988496 0.150787 -0.011785 6.106227e-17 6.661338e-17 8.326673e-17 -0.556517 0.817600 0.147714 -4.996004e-17 4.440892e-17 -7.216450e-17 -0.404080 -0.905843 -0.127150 2.775558e-18 -2.220446e-17 0.000000e+00scikit-bio-0.6.2/skbio/stats/ordination/tests/data/example2_sample_constraints_scaling1000066400000000000000000000015021464262511300314550ustar00rootroot00000000000000-1.203552 0.973291 0.398346 -4.377164e-02 -2.025459e-01 -4.174845e-02 2.251712e-03 -1.233129 1.048075 0.112959 1.946350e-16 -3.553872e-16 8.349689e-02 -1.554395e-16 -1.262706 1.122859 -0.172429 4.377164e-02 2.025459e-01 -4.174845e-02 -2.251712e-03 -0.629153 -1.155379 0.778203 -3.794874e-01 5.000171e-02 3.937851e-16 2.503876e-04 2.249463 0.043725 0.561763 6.747053e-01 2.580938e-02 6.726139e-16 1.835041e-02 -0.688307 -1.005810 0.207427 -1.264958e-01 1.666724e-02 -6.333665e-17 8.346252e-05 2.190309 0.193293 -0.009012 -4.068089e-02 -1.574523e-02 -6.651371e-18 -3.978716e-02 -0.747462 -0.856242 -0.363348 1.264958e-01 -1.666724e-02 -4.098446e-16 -8.346252e-05 2.131154 0.342861 -0.579787 -6.340244e-01 -1.006415e-02 -4.849801e-16 2.143675e-02 -0.806617 -0.706674 -0.934123 3.794874e-01 -5.000171e-02 -7.280846e-16 -2.503876e-04 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/example2_sample_constraints_scaling2000066400000000000000000000014751464262511300314670ustar00rootroot00000000000000-1.481311 2.070632 1.420611 -2.272346e-01 -3.841304e+00 -2.304877e+00 2.600617e-01 -1.517714 2.229732 0.402842 1.010421e-15 -6.739956e-15 4.609755e+00 -1.795250e-14 -1.554117 2.388832 -0.614928 2.272346e-01 3.841304e+00 -2.304877e+00 -2.600617e-01 -0.774350 -2.458015 2.775281 -1.970058e+00 9.482876e-01 2.174036e-14 2.891853e-02 2.768601 0.093023 2.003399 3.502642e+00 4.894777e-01 3.713413e-14 2.119383e+00 -0.847157 -2.139816 0.739742 -6.566859e-01 3.160959e-01 -3.496734e-15 9.639511e-03 2.695794 0.411223 -0.032139 -2.111894e-01 -2.986100e-01 -3.672135e-16 -4.595222e+00 -0.919963 -1.821616 -1.295796 6.566859e-01 -3.160959e-01 -2.262699e-14 -9.639511e-03 2.622988 0.729422 -2.067677 -3.291452e+00 -1.908677e-01 -2.677512e-14 2.475840e+00 -0.992770 -1.503417 -3.331334 1.970058e+00 -9.482876e-01 -4.019660e-14 -2.891853e-02 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/example2_site_scaling1_from_vegan000066400000000000000000000014231464262511300307160ustar00rootroot00000000000000-1.209385 0.9996717 0.2040803 -0.04377164 -0.2025459 -0.04174845 0.002251712 -1.262748 1.114136 0.1333392 -5.659699e-17 2.478790e-16 0.0834969 -1.146481e-16 -1.227255 1.030418 0.001456920 0.04377164 0.2025459 -0.04174845 -0.002251712 -0.7091313 -1.234889 0.7539303 -0.3794874 0.05000171 6.953961e-17 0.0002503876 2.414956 0.1515006 0.7018385 0.6747053 0.02580938 2.771828e-16 0.01835041 -0.7149669 -1.032314 0.1993367 -0.1264958 0.01666724 5.423912e-17 8.346252e-05 2.146560 0.1833668 -0.02417943 -0.04068089 -0.01574523 -3.100586e-16 -0.03978716 -0.7208024 -0.8297385 -0.3552569 0.1264958 -0.01666724 7.539371e-18 -8.346252e-05 2.009409 0.2450122 -0.7046952 -0.6340244 -0.01006415 2.966077e-16 0.02143675 -0.726638 -0.6271632 -0.9098504 0.3794874 -0.05000171 9.260122e-17 -0.0002503876 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/example2_site_scaling2_from_vegan000066400000000000000000000013461464262511300307230ustar00rootroot00000000000000-1.488490 2.126756 0.7278053 -0.2272346 -3.841304 -2.304877 0.2600617 -1.554168 2.370273 0.4755236 -2.938157e-16 4.701051e-15 4.609755 -1.324129e-14 -1.510485 2.192167 0.005195769 0.2272346 3.841304 -2.304877 -0.2600617 -0.8727866 -2.627171 2.688719 -1.970058 0.9482876 3.839191e-15 0.02891853 2.972287 0.3223107 2.502946 3.502642 0.4894777 1.53029e-14 2.119383 -0.8799689 -2.196201 0.7108885 -0.6566859 0.3160959 2.994471e-15 0.009639511 2.641949 0.3901046 -0.08623036 -0.2111894 -0.2986100 -1.711793e-14 -4.595222 -0.8871512 -1.765231 -1.266942 0.6566859 -0.3160959 4.162388e-16 -0.009639511 2.473146 0.5212524 -2.513133 -3.291452 -0.1908677 1.637533e-14 2.475840 -0.8943335 -1.334261 -3.244772 1.970058 -0.9482876 5.112392e-15 -0.02891853 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/example2_species_scaling1_from_vegan000066400000000000000000000006501464262511300314060ustar00rootroot000000000000001.700926 -3.648509 2.254854 0.03700915 2.285531 -3.992165 -0.9423084 1.131309 -2.668483 -4.203715 -0.03700915 -2.285531 -3.992165 0.9423084 4.183397 0.9492028 1.450372 3.89008 1.505626 2.78929e-14 3.804618 3.105920 0.9508293 -1.474339 -3.319613 -1.214393 3.792181e-14 3.87476 -0.6542268 -2.856405 1.655304 -2.139057 3.761467 -1.190933e-14 0.6985568 -0.3552263 -1.215823 -1.449786 1.069529 -1.880733 -6.738172e-15 -0.3492784 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/example2_species_scaling2_from_vegan000066400000000000000000000007131464262511300314070ustar00rootroot000000000000001.381987 -1.714964 0.6322725 0.007128982 0.1205124 -0.07231043 -0.00815886 0.9191784 -1.254308 -1.178743 -0.007128982 -0.1205124 -0.07231043 0.00815886 3.398972 0.4461683 0.4066916 0.7493367 0.07938928 5.052266e-16 0.03294182 2.523533 0.4469328 -0.4134120 -0.639449 -0.064033 6.868811e-16 0.03354913 -0.5315534 -1.342640 0.4641556 -0.4120414 0.1983362 -2.157147e-16 0.006048367 -0.2886182 -0.5714919 -0.4065273 0.2060207 -0.0991681 -1.220491e-16 -0.003024184 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/example3_X000066400000000000000000000017501464262511300242010ustar00rootroot000000000000001.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 2.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 3.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 4.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 5.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 6.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 7.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 8.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 9.000000000000000000e+00 1.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+01 0.000000000000000000e+00 0.000000000000000000e+00 1.000000000000000000e+00 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/example3_Y000066400000000000000000000003041464262511300241740ustar00rootroot000000000000001 0 0 0 0 0 2 4 4 0 0 0 0 0 0 5 6 1 0 1 0 0 0 0 0 2 3 11 4 0 0 8 1 6 2 0 11 5 17 7 0 0 6 6 2 9 6 0 0 6 2 10 1 4 9 7 13 10 0 0 4 5 4 7 8 0 0 4 3 6 6 4 7 9 10 13 0 0 6 2 0 5 10 0 0 2 4 0 1 3 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/example3_biplot_scaling1000066400000000000000000000001261464262511300270400ustar00rootroot00000000000000-0.169747 0.630691 0.760769 -0.994017 0.060953 -0.044937 0.184353 -0.974868 0.030987 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/example3_biplot_scaling2000066400000000000000000000001261464262511300270410ustar00rootroot00000000000000-0.169747 0.630691 0.760769 -0.994017 0.060953 -0.044937 0.184353 -0.974868 0.030987 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/example3_sample_constraints_scaling1000066400000000000000000000015401464262511300314600ustar00rootroot00000000000000 0.418807 -1.331731 -0.092311 -0.357222 -0.201114 -0.077319 0.024296 -0.127088 0.023765 0.402119 -1.323781 0.064653 0.774418 0.400533 0.124249 0.046928 0.031790 -0.027526 0.385431 -1.315831 0.221617 -0.893930 -0.432358 -0.106748 -0.138400 0.169415 0.011485 0.670971 0.216324 -0.436938 0.191154 -0.206474 0.219195 -0.110093 0.000480 -0.000760 -0.586949 0.028310 -0.314662 -0.175743 0.184258 0.048211 0.057137 0.011500 0.017735 0.637595 0.232225 -0.123010 0.082373 -0.107580 -0.221430 0.169356 0.010714 -0.009033 -0.620325 0.044211 -0.000734 -0.120892 -0.020910 -0.060213 -0.067075 -0.013264 -0.035699 0.604219 0.248126 0.190919 -0.001622 0.236689 -0.162758 -0.145629 -0.005405 0.015899 -0.653701 0.060112 0.313195 0.335670 -0.188566 0.011227 0.008564 0.001462 0.019120 0.570843 0.264027 0.504847 -0.367420 0.068042 0.303396 0.104854 -0.008683 -0.009463 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/example3_sample_constraints_scaling2000066400000000000000000000015431464262511300314640ustar00rootroot00000000000000 0.692139 -3.080537 -0.328747 -1.245288 -1.072935 -0.506242 0.244127 -3.631648 1.163119 0.664560 -3.062146 0.230249 2.699651 2.136829 0.813520 0.471530 0.908423 -1.347244 0.636980 -3.043755 0.789246 -3.116275 -2.306609 -0.698930 -1.390626 4.841176 0.562103 1.108876 0.500397 -1.556068 0.666370 -1.101532 1.435176 -1.106200 0.013703 -0.037180 -0.970016 0.065487 -1.120607 -0.612647 0.983007 0.315662 0.574110 0.328630 0.868028 1.053717 0.537179 -0.438075 0.287157 -0.573935 -1.449806 1.701670 0.306164 -0.442116 -1.025175 0.102269 -0.002614 -0.421433 -0.111552 -0.394242 -0.673964 -0.379019 -1.747250 0.998559 0.573961 0.679918 -0.005653 1.262724 -1.065657 -1.463266 -0.154459 0.778140 -1.080333 0.139050 1.115379 1.170159 -1.005992 0.073507 0.086046 0.041765 0.935820 0.943400 0.610742 1.797911 -1.280840 0.363003 1.986480 1.053561 -0.248131 -0.463165 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/example3_site_scaling1_from_vegan000066400000000000000000000017301464262511300307200ustar00rootroot000000000000000.4299704 -1.332221 0.06167733 -0.3572215 -0.2011142 -0.07731854 0.02429630 -0.1270878 0.02376451 0.3538402 -1.299809 -0.2660402 0.7744182 0.4005334 0.1242493 0.04692828 0.03178983 -0.02752649 0.4615241 -1.362879 0.6006922 -0.8939302 -0.4323576 -0.1067479 -0.1383998 0.1694146 0.01148472 0.6730478 0.4632174 -0.5265713 0.191154 -0.2064744 0.2191950 -0.1100927 0.0004795276 -0.000759659 -0.5924553 -0.02607423 -0.1955122 -0.1757430 0.1842576 0.04821126 0.05713738 0.01150025 0.01773529 0.631247 0.1986126 -0.1796544 0.08237334 -0.1075801 -0.2214295 0.1693559 0.01071407 -0.009033185 -0.5775539 -0.03661717 0.03720809 -0.1208918 -0.02090970 -0.06021282 -0.06707516 -0.01326357 -0.03569931 0.5731844 -0.04684914 0.1477290 -0.001621561 0.2366887 -0.1627582 -0.1456291 -0.005405227 0.01589872 -0.6946946 0.2120237 0.1343200 0.3356701 -0.1885661 0.01122678 0.008563613 0.001461538 0.01912039 0.6250085 0.4474569 0.7713248 -0.3674199 0.06804227 0.3033959 0.1048540 -0.008683241 -0.009463257 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/example3_site_scaling2_from_vegan000066400000000000000000000016131464262511300307210ustar00rootroot000000000000000.7105873 -3.081668 0.2196514 -1.245288 -1.072935 -0.5062419 0.2441267 -3.631648 1.163119 0.5847714 -3.006693 -0.9474487 2.699651 2.136829 0.81352 0.4715303 0.908423 -1.347244 0.7627343 -3.152586 2.139244 -3.116275 -2.306609 -0.6989299 -1.390626 4.841176 0.562103 1.112307 1.071506 -1.875277 0.6663702 -1.101532 1.435176 -1.106200 0.01370293 -0.03718039 -0.9791168 -0.06031443 -0.6962774 -0.6126467 0.9830066 0.3156624 0.5741102 0.3286300 0.8680277 1.043226 0.459427 -0.6398028 0.2871566 -0.5739354 -1.449806 1.70167 0.3061643 -0.442116 -0.9544901 -0.08470217 0.1325091 -0.4214334 -0.1115523 -0.3942425 -0.673964 -0.3790186 -1.747250 0.9472688 -0.1083706 0.5261072 -0.005652824 1.262724 -1.065657 -1.463266 -0.1544592 0.7781397 -1.148082 0.4904493 0.4783537 1.170159 -1.005992 0.07350714 0.08604627 0.04176476 0.9358196 1.032916 1.035049 2.746918 -1.280840 0.3630026 1.986480 1.053561 -0.2481314 -0.4631652 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/example3_species_scaling1_from_vegan000066400000000000000000000014641464262511300314130ustar00rootroot000000000000000.1823706 0.6532424 -0.7230468 -0.006709309 -0.4387088 0.5613251 -0.1226234 -1.215044 0.2284292 0.2336159 0.7020405 1.408284 -0.4924596 -0.1434352 0.937947 0.4323209 1.361292 -0.1116756 -1.678295 0.2216763 -0.7060727 -0.3653404 0.6936791 0.1597882 0.4669268 0.7694895 1.713529 -1.712479 0.2535825 0.7869925 0.7796179 -1.300374 -0.1696223 -0.5366468 -0.9018874 1.255149 1.741418 1.242592 -1.560116 0.7790618 -1.728251 0.8161103 -1.198472 1.189485 -1.869424 1.650260 1.327675 2.421391 -1.359427 1.595568 2.150528 2.131749 -2.371434 -2.151822 0.4218299 -0.4121355 -0.7269577 1.510841 0.3772344 -1.23206 1.275166 0.1284123 -0.5996156 0.2422044 -1.983235 -0.05430946 0.1839251 1.891113 -0.2729081 -1.999644 -0.06107332 -0.3832015 0.6837066 -1.637614 0.7681627 -2.406462 -0.7918731 -2.188509 -0.06317185 -0.1040515 -0.6006456 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/example3_species_scaling2_from_vegan000066400000000000000000000015611464262511300314120ustar00rootroot000000000000000.1103509 0.2824 -0.2030290 -0.001924623 -0.08223286 0.08573143 -0.01220389 -0.04251988 0.004667199 0.1413590 0.3034956 0.3954412 -0.1412663 -0.02688592 0.1432531 0.04302603 0.04763777 -0.002281724 -1.015522 0.09583179 -0.1982627 -0.1048010 0.1300252 0.02440453 0.04647012 0.02692792 0.03501032 -1.036207 0.1096250 0.2209847 0.2236401 -0.2437459 -0.02590649 -0.05340889 -0.03156112 0.02564484 1.053717 0.5371787 -0.4380751 0.2234806 -0.3239485 0.1246449 -0.1192759 0.04162547 -0.03819552 0.9985587 0.5739606 0.6799181 -0.3899634 0.2990779 0.328451 0.2121588 -0.08298719 -0.0439654 0.2552457 -0.1781683 -0.2041272 0.4333976 0.07070992 -0.1881731 0.1269088 0.004493729 -0.01225117 0.1465559 -0.8573625 -0.01524991 0.0527605 0.3544758 -0.04168137 -0.1990112 -0.002137232 -0.007829461 0.4137051 -0.707949 0.2156974 -0.6903142 -0.148431 -0.3342519 -0.006287074 -0.003641234 -0.01227222 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/exp_PCoAEigenResults_site000066400000000000000000000040161464262511300272060ustar00rootroot00000000000000-2.584654599999999802e-01 1.739995500000000028e-01 3.828758000000000161e-02 -1.944775099999999923e-01 8.311759999999999982e-02 2.624303299999999894e-01 -2.316363999999999923e-02 -1.847939999999999999e-02 0.000000000000000000e+00 -2.710011399999999737e-01 -1.859513000000000138e-02 -8.648419000000000245e-02 1.180642499999999956e-01 -1.988083599999999895e-01 -2.117236000000000118e-02 -1.910240300000000113e-01 1.556465900000000013e-01 0.000000000000000000e+00 2.350779000000000063e-01 9.625192999999999943e-02 -3.457927299999999926e-01 -3.208630000000000076e-03 -9.637777000000000149e-02 4.570253999999999983e-02 1.854728099999999880e-01 4.040939999999999810e-02 0.000000000000000000e+00 2.614077000000000067e-02 -1.114596999999999984e-02 1.476606000000000030e-01 2.908766099999999799e-01 2.039454699999999898e-01 6.197123999999999688e-02 1.016413300000000020e-01 1.056909999999999933e-01 0.000000000000000000e+00 2.850075499999999984e-01 -1.925498999999999961e-02 6.232634000000000091e-02 1.381267999999999940e-01 -1.047986000000000056e-01 9.517206999999999750e-02 -1.296361000000000041e-01 -2.206871699999999881e-01 0.000000000000000000e+00 2.046363300000000052e-01 -1.393611499999999892e-01 2.915138199999999791e-01 -1.815667900000000057e-01 -1.595801299999999867e-01 -2.464121000000000020e-02 8.662524000000000612e-02 9.962215000000000653e-02 0.000000000000000000e+00 2.334824000000000066e-01 2.252579700000000018e-01 -1.886231000000000019e-02 -1.077299800000000030e-01 1.771089999999999887e-01 -1.929058399999999951e-01 -1.498194700000000101e-01 3.835489999999999733e-02 0.000000000000000000e+00 -9.496319000000000288e-02 -4.209747999999999823e-01 -1.548694499999999918e-01 -8.984274999999999900e-02 1.526181899999999869e-01 -3.342326999999999798e-02 -2.512247999999999912e-02 -5.089885000000000242e-02 0.000000000000000000e+00 -3.599151600000000117e-01 1.138225999999999960e-01 6.622034000000000253e-02 2.975799999999999973e-02 -5.722540999999999739e-02 -1.931335100000000082e-01 1.450263300000000088e-01 -1.496586099999999975e-01 0.000000000000000000e+00 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/exp_PCoAzeros_site000066400000000000000000000114441464262511300257420ustar00rootroot000000000000002.407881329999999875e-01 2.336771619999999938e-01 2.142755920000000147e-02 2.933648560000000002e-02 2.323964740000000057e-02 7.204402660000000513e-02 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.136560330000000035e-01 1.167860260000000011e-01 4.047749600000000170e-02 4.171719389999999777e-02 5.458618300000000340e-02 5.687754019999999994e-02 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 2.393598090000000067e-01 7.600313170000000396e-02 8.108063910000000429e-02 2.004822419999999911e-02 4.581686060000000373e-03 6.111682289999999934e-02 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 2.129341230000000029e-01 6.047901669999999896e-02 1.108399510000000060e-01 1.568068159999999878e-01 4.018964929999999797e-02 1.244166470000000052e-02 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 2.494895480000000054e-01 6.933176589999999473e-02 6.654290229999999806e-02 1.767019729999999914e-02 6.618815479999999585e-02 1.519420850000000045e-02 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.487285489999999877e-01 7.783569120000000119e-02 5.321890979999999854e-02 3.189647279999999890e-02 8.112682020000000493e-02 2.872780789999999917e-02 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 5.139397400000000199e-02 1.623059860000000132e-01 2.992259149999999986e-03 1.115089660000000010e-01 3.296634109999999757e-02 2.308574460000000091e-04 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 1.153621120000000028e-02 3.446314899999999848e-01 1.169549180000000084e-02 3.466400010000000170e-02 1.169378239999999992e-02 1.366916789999999930e-02 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 3.932616380000000106e-03 5.908732849999999946e-03 2.629546940000000027e-02 7.368117420000000473e-02 1.939265150000000007e-02 2.257115020000000083e-02 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 3.856929369999999996e-02 8.874156730000000784e-03 8.037018469999999970e-02 1.252655340000000117e-01 6.270869140000000230e-02 9.891935890000000051e-03 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 4.211582109999999907e-02 5.655546539999999761e-02 3.122599229999999948e-01 4.320305709999999744e-02 3.457770230000000149e-03 2.089115019999999989e-02 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 5.158303490000000213e-01 2.909775239999999943e-02 9.484347120000000131e-02 2.340326009999999998e-02 3.819325810000000210e-02 4.608673130000000284e-02 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 3.180272690000000013e-01 1.500964499999999924e-01 6.675638650000000063e-02 3.818534149999999738e-02 1.598195930000000098e-02 3.005567279999999883e-02 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 3.237616709999999731e-01 4.748628319999999986e-02 5.282383000000000217e-02 4.852971259999999676e-02 7.869736770000000659e-02 4.101528460000000015e-02 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 0.000000000000000000e+00 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/vare_rda_biplot_from_vegan.csv000066400000000000000000000033071464262511300303250ustar00rootroot00000000000000"RDA1","RDA2","RDA3","RDA4","RDA5","RDA6" "N",-0.374511707101879,0.279141775958977,-0.113262015862774,0.0291604466058106,-0.00381582755421537,0.0424863529236013 "P",-0.0788656563464819,-0.472632677542489,-0.0651887823463463,-0.0513067111958969,-0.0626909273376982,-0.00110889996021487 "K",-0.201461843066864,-0.381022612236819,-0.0251340010064783,0.0598912302377648,-0.0959494580327501,-0.0171435160344495 "Ca",-0.235421762881426,-0.415332965840277,0.0156842418693768,-0.0304645964200932,-0.0193425068993681,0.0307301502356508 "Mg",-0.258006129373729,-0.351149186002239,0.0198906297437563,-0.0603137013913655,-0.062802597760679,-0.0064268580320466 "S",0.110423012128282,-0.327827092711384,0.0273964647390279,0.0128822505351356,-0.131322241122595,-0.0161293619330301 "Al",0.622794612442927,0.223726801861401,-0.0774758868765043,0.0133921880598012,-0.0975006207689577,-0.0375948643932326 "Fe",0.538546175590668,0.252910024481109,-0.128206469586592,-0.00122698444588682,-0.0370755089903238,-0.0301862362243074 "Mn",-0.53181428311314,-0.387983018850023,-0.0369812755533416,0.028195676589086,0.0139082147231988,-0.010802072964436 "Zn",-0.189256949293269,-0.321619350980509,-0.0296694989201669,-0.0516020128947528,-0.100811021654885,0.0631043516762351 "Mo",0.0825455222136991,0.105558125042181,0.0444498655868074,-0.013149221839644,-0.129692423248781,-0.0279316900201847 "Baresoil",-0.584523854747717,0.0175764195098462,0.249347810472773,-0.0433132869859633,-0.0219797931388049,-0.0115874538804695 "Humdepth",-0.528249123722545,-0.348123615804582,0.139315047829644,0.0255225451725187,-0.00106320905614231,-0.0391497797327695 "pH",0.485293188422395,0.145941294383608,-0.144156426452744,-0.0680477191104673,0.017879488382429,0.028893908227529 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/vare_rda_constraints_from_vegan.csv000066400000000000000000000053541464262511300314070ustar00rootroot00000000000000"RDA1","RDA2","RDA3","RDA4","RDA5","RDA6" "18",-1.94540324526486,1.35619693843417,0.703939871143434,-0.273017904915305,0.430167111988789,-0.48660737084197 "15",0.141838231206451,-0.829758087643046,0.173141397491083,-0.0156875043824831,0.0136619216795438,-0.408528269058105 "24",-1.43248927964489,-0.54425653333442,-0.306236786536847,-0.979876701440387,-0.731804914672737,0.321212796309646 "27",-2.80798626244276,-1.82787096826377,-0.915538594054524,-0.10113999881667,0.165371808326588,-0.272482540524553 "23",-1.32166553682556,-0.588228004227234,0.79516884314486,-0.100577754862558,0.312457897704011,-0.162743742127975 "19",-0.525042043686618,-1.34621680872358,0.603541116383373,-0.350740907365001,-0.10559130781142,-0.222556236954221 "22",-1.6970783295333,-0.00841159288220255,0.350604237250266,1.06598318305367,0.329079144655896,0.216254552790676 "16",-1.02617620074073,1.16222063659566,0.276350890008082,0.508682734424414,0.110047918087361,0.229152398042771 "28",-2.81539141691889,-2.87330917539132,-1.4254501909053,0.278788463109637,-0.0715764088962425,0.0383464804901234 "13",-0.56793332760884,0.978469150560303,-0.0102101554071479,1.15471089199982,-0.893165637481288,-0.179567246057747 "14",0.123702948593575,-0.279611839517576,0.857490241180555,0.288565237262797,-0.0962678825382923,0.386913110717701 "20",-0.889372681662648,-0.172942499513205,1.20564982758643,-0.384640065668512,-0.329123722655264,0.40082339960546 "25",-1.25287864077043,-0.862513780744027,-0.209243578856711,-0.147987569283365,0.455041774670079,0.00853808793906775 "7",0.836595155865492,3.14264733462783,-0.552874592253968,-0.309850905157291,0.156192445586091,-0.608122966237938 "5",-1.03320113118202,2.71896020759242,-1.00737497595167,-0.217427871913452,0.20643732081764,0.836448050498042 "6",0.261185057560469,1.0060253513401,-0.216278932347733,0.191968910927705,0.205015879586198,-0.202134531184565 "3",2.0251525928349,0.838200337107143,-0.890575565259071,-0.221678715814349,0.0510468211613041,-0.177568585625492 "4",0.304232766529097,1.06258912371904,-0.346193660344245,-0.0766334378573768,-0.817513397231726,-0.386397972418401 "2",3.8927613225141,-0.214065093420055,-0.421802196571513,0.0641876476814947,0.0763106413897994,0.455052488664798 "9",4.49140913947389,-1.24888267627012,0.716772000188893,0.0761750818231677,-0.0738996910301799,-0.0631749857609068 "12",1.22495554555309,-0.302280829527584,0.345679740337339,0.568930336840597,0.189701343000118,-0.0324863384828909 "10",3.8583881810421,-1.69957275456566,-0.30263129042612,-0.244748829936061,0.13487999114786,-0.0898553381384633 "11",1.48652810795229,-0.077774982953761,-0.500871495640448,-0.0865802555774825,0.246429358792296,0.261018343487596 "21",-1.33213095284393,0.610386547000902,1.07694384984097,-0.687404064133014,0.0371015837235694,0.138466414867343 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/vare_rda_eigvals_from_vegan.csv000066400000000000000000000010501464262511300304570ustar00rootroot00000000000000"x" "RDA1",820.104210710977 "RDA2",399.28474306136 "RDA3",102.561678139934 "RDA4",47.6316939665912 "RDA5",26.8382218045756 "RDA6",24.0480874768085 "RDA7",19.0643755874934 "RDA8",10.1669953619382 "RDA9",4.42878597644952 "RDA10",2.27203567668609 "RDA11",1.5353256650454 "RDA12",0.925527669731662 "RDA13",0.715510164471211 "RDA14",0.311861227346199 "PC1",186.191732488638 "PC2",88.4641941356934 "PC3",38.1882774045901 "PC4",18.4020745527723 "PC5",12.8394334391492 "PC6",10.551972429882 "PC7",5.51940479440112 "PC8",4.52105681330178 "PC9",1.09220616230999 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/vare_rda_propexpl_from_vegan.csv000066400000000000000000000005261464262511300307050ustar00rootroot00000000000000"x" "RDA1",0.44921 "RDA2",0.21871 "RDA3",0.05618 "RDA4",0.02609 "RDA5",0.0147 "RDA6",0.01317 "RDA7",0.01044 "RDA8",0.00557 "RDA9",0.00243 "RDA10",0.00124 "RDA11",0.00084 "RDA12",0.00051 "RDA13",0.00039 "RDA14",0.00017 "PC1",0.10199 "PC2",0.04846 "PC3",0.02092 "PC4",0.01008 "PC5",0.00703 "PC6",0.00578 "PC7",0.00302 "PC8",0.00248 "PC9",6e-04 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/vare_rda_sites_from_vegan.csv000066400000000000000000000053561464262511300301710ustar00rootroot00000000000000"RDA1","RDA2","RDA3","RDA4","RDA5","RDA6" "18",-0.725180341740889,1.32304743578072,0.536735571384135,-0.39768741234737,0.233754657058704,-0.561316258413971 "15",-1.93529754271851,-0.766164733988552,-0.273348455556674,0.390736768713259,0.0177606558885408,-0.169349974419623 "24",-1.81353741848749,-0.970245493575222,-0.0374209701843787,-0.872025399986459,-0.789699995023076,0.493449832483016 "27",-2.33510121426858,-2.13826666457436,-0.985623024073038,-0.145132098331367,0.583205282290284,-0.313874765445323 "23",-1.41317135065566,-0.1177033753694,0.766850507492372,-0.501223786646214,0.401907289450705,-0.278342018223195 "19",-0.0575697569183517,-0.821681038610851,0.206606955195771,-0.182752763015451,-0.130617003087652,-0.244482299124097 "22",-1.84743663448918,-0.634524208599755,0.802635258758064,1.66371301803693,0.685511124204389,0.639715037447257 "16",-1.50266104356433,0.202771968994942,0.343618483724244,1.10930062109802,0.325281324139236,0.681725828414122 "28",-2.88734608017533,-2.8973024244037,-1.64121631383754,0.0549907838317931,-0.142301693170976,-0.220948911082786 "13",-0.216157151714584,1.4060194258153,-0.0466575925757337,0.799855819301344,-1.0946823960693,-0.416466375199514 "14",-1.20534703154947,0.418683116827144,1.3597696084289,0.513403260974061,-0.776146451075579,0.28265707897677 "20",-1.22313804827702,0.22033914415852,0.616483727488239,-0.430533982693562,-0.160467249867719,0.177868834807142 "25",-1.77661950967122,-0.825044182048943,0.361003390009536,-0.107164123598687,-0.148676491945586,0.237023999173496 "7",-0.678511723219697,2.89559652272012,-0.547824463296798,-0.235728554420752,0.165597328353004,-0.710828270319712 "5",-0.487596122987802,3.18852947968577,-1.41737309023615,-0.422865480680066,0.253132238647981,0.727755243784674 "6",-0.0243012352784496,2.50490917263305,-0.360296245554514,-0.0702120962654929,0.0909124415194519,-0.936567113418783 "3",2.80827785868984,0.455143313772751,-0.613132585731335,0.00451399728070384,0.0225377341796233,0.258222116793284 "4",0.963314315837764,0.84576412181764,-0.066781701948557,0.0887932875301872,-0.700374853742517,-0.116961021287795 "2",3.99917033463046,-0.482115663123818,-0.454217823172445,-0.0665100409733586,0.266626884554847,0.381471681881249 "9",4.36400742802721,-1.736534702518,0.405907326952517,-0.0368661340355564,0.0832432640777509,-0.136409557861582 "12",3.03564636652536,-0.827580749873411,0.102186438399489,-0.171696624931404,0.288862751499394,-0.0309441831749462 "10",4.48325878385124,-1.33798990585379,0.229165601207832,0.0330101538089084,0.11942053442103,-0.0256496604321859 "11",0.817850354912684,0.213402853934839,-0.88018842594682,-0.127740767588512,0.086264341451757,0.125072918411801 "21",-0.342553236757974,-0.119053413601001,1.59311782307288,-0.890178445060962,0.318948282245704,0.157177836230702 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/vare_rda_species_from_vegan.csv000066400000000000000000000126071464262511300304720ustar00rootroot00000000000000"RDA1","RDA2","RDA3","RDA4","RDA5","RDA6" "Callvulg",-0.226273730144909,0.693800038650831,0.338218922488515,6.67088802095297,-7.21896322666435,-0.779857240089175 "Empenigr",-0.238899164401276,-0.631301245805765,2.60480743178841,-1.55330692069587,5.61834172767659,-2.20332774370264 "Rhodtome",-0.17142303305129,-0.0654248519425288,0.185549293155117,-0.217665325317797,0.322372965190883,0.0853972606062568 "Vaccmyrt",-1.09428970452297,-1.23571473681342,-0.940502994941989,0.218545935413684,0.657932487096486,0.837134528181776 "Vaccviti",-0.162201143615588,-1.38843399510951,4.76670106297968,-3.77408048235071,5.43168646890937,-2.79074856413388 "Pinusylv",0.0369862347468032,-0.0238456401875,0.0478650516626519,0.0763337150565842,0.0639052442139435,0.0825651654004402 "Descflex",-0.147574817002457,-0.167722789057799,-0.27793799775637,0.0344024518701595,0.288412456210204,-0.299444024885208 "Betupube",-0.00425890842744653,0.00301626626963975,0.023633639469157,-0.0287099967609287,0.008285119424158,0.0144904045944417 "Vacculig",-0.00617517925092514,0.586329345378346,-0.424343136945186,-0.267181701965188,0.899708457788426,-2.10623236401806 "Diphcomp",-0.0193891282832212,0.0848409769290093,0.0886832373456673,-0.125925466951118,0.302273874297346,-0.479439541135259 "Dicrsp",-0.584662822766602,-0.554381688683121,-0.76501102421792,-4.62706081016606,-3.61916960506924,3.01061244828378 "Dicrfusc",-1.28109719764106,0.103470480549643,2.67976193286694,10.2747384364546,6.22585244770623,4.2615539016749 "Dicrpoly",-0.0527337929905133,-0.00090081484981692,0.221880786125258,-0.683724436531866,-0.345681664095091,0.394727452877124 "Hylosple",-0.522637153726359,-0.918065056916316,-1.62638166301937,0.295469951213255,-0.0135580694056817,-0.334186893959993 "Pleuschr",-5.50878076569324,-8.8886978183423,-8.37459165372512,1.41924983472743,1.57437830231155,-2.29995876920357 "Polypili",0.000632590233791712,0.0102615110484168,-0.0159075161955889,-0.018096445320708,0.031500746301268,0.0786160198924179 "Polyjuni",-0.12340157237603,-0.171463376700033,0.0728402465477063,-0.446428512892154,1.22564488464636,-0.0622820830373054 "Polycomm",-0.0103528544039597,-0.00993084811579174,0.0216129382454449,-0.0489313397543105,0.0229047076022281,-0.011862059575897 "Pohlnuta",0.00970583089927341,-0.018349828466775,0.0527104964420516,-0.0571342448078905,-0.020164288017587,0.0418920415970837 "Ptilcili",-0.159464075362797,0.10006784787628,1.05136092046601,-1.2908733474554,0.33545113524691,0.404450545385935 "Barbhatc",-0.0466890634925938,0.0382613228575175,0.276662021782069,-0.373812730583367,0.0219595583756616,0.138309628865647 "Cladarbu",-0.416846189233898,5.31641622718912,-0.930205800774416,2.81811168990724,-0.554814634260353,-10.4595841461921 "Cladrang",1.29592368856734,8.36409911724857,-8.96098432978265,-0.969135607167511,4.09265280992759,3.38742710658704 "Cladstel",12.9993162839447,-4.60357024457592,-2.45511784152944,1.63313879335609,0.811266886992104,-1.04082533645468 "Cladunci",-0.139665650154971,-0.200952644353237,2.01080043905276,0.112942947894908,-2.80683515519727,4.71782772042145 "Cladcocc",0.0140440451319673,0.0120659834096241,0.00879969627655237,0.0504748534035485,0.0439656404180021,0.0166991824468257 "Cladcorn",-0.0171421483490833,-0.0318891939249154,0.0851353096044147,-0.0712924559399341,0.247868762857871,-0.0985092679795218 "Cladgrac",-0.014291516155796,0.0207918228721213,0.0478283724736316,-0.0599532273483947,-0.0244356304170684,-0.0119910291097221 "Cladfimb",0.00461344583647884,0.00630264328769572,0.0615207799865213,0.0490653816228835,0.0599873314053227,-0.0755759161054924 "Cladcris",-0.00958758815249997,-0.0295565794036219,0.243789050898361,0.130760954099805,0.0658965803482975,-0.231256073310349 "Cladchlo",0.0187488915228099,-0.00928114626307454,0.0285623673752264,-0.0587116824010891,-0.00900681474395845,-0.0135004250016505 "Cladbotr",-0.00660812579632819,0.00125815962588916,0.0253237320949088,-0.0237179502029999,-0.0196117567973272,0.00711937194787014 "Cladamau",-0.00136745162702629,0.00480205186223178,-5.41472575201337e-05,-0.00654936924677102,0.0139883033489021,-0.00709922460632658 "Cladsp",0.00992686294206172,-0.00373711675547203,0.0158895402267187,0.0215400183589053,-0.0307100869494753,-0.0192034215304599 "Cetreric",0.00737025600460116,0.00496065101699207,0.0252933788488033,-0.0709698476248171,-0.295480904895558,0.15202504773877 "Cetrisla",0.0168171538461378,-0.00955018178379953,0.064611315838899,-0.0807719773413275,0.00582196133393798,0.0526210231757201 "Flavniva",0.0758578271997996,0.25014312602853,-0.305377423821448,-0.181549650656412,-2.66963212661746,-1.46522484606432 "Nepharct",-0.061783073424573,-0.0763675615694388,-0.106412306793283,-0.155075628433089,0.7492348600785,-0.0401582665032977 "Stersp",-0.110643408686953,0.675909058528055,-0.847389185822014,-0.584961498181175,0.824411871660421,2.59432085604071 "Peltapht",-0.00433509537730518,-0.00792550879085397,0.00628297623647816,-0.0102203772879107,0.0660236532497632,-0.00750239908018623 "Icmaeric",-0.001603267524481,0.00986109695445918,-0.00702150205760372,0.0019342085956161,0.010326600170813,0.0330394729689556 "Cladcerv",0.00116172819790058,0.000657287114342154,-0.00296642933466362,-0.000910130438819726,-0.00978815736822817,-0.00203673989458803 "Claddefo",-0.0471902500908342,-0.0319259473091497,0.429430035290953,0.0132849661625517,-0.0669419782195097,0.146247353879757 "Cladphyl",0.0267818636482677,-0.0171932277857551,0.00184089496193447,-0.00923266735869818,0.0239144294161575,0.0171690545470201 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/varechem.csv000066400000000000000000000033521464262511300245600ustar00rootroot00000000000000"ID","N","P","K","Ca","Mg","S","Al","Fe","Mn","Zn","Mo","Baresoil","Humdepth","pH" "18",19.8,42.1,139.9,519.4,90,32.3,39,40.9,58.1,4.5,0.3,43.9,2.2,2.7 "15",13.4,39.1,167.3,356.7,70.7,35.2,88.1,39,52.4,5.4,0.3,23.6,2.2,2.8 "24",20.2,67.7,207.1,973.3,209.1,58.1,138,35.4,32.1,16.8,0.8,21.2,2,3 "27",20.6,60.8,233.7,834,127.2,40.7,15.4,4.4,132,10.7,0.2,18.7,2.9,2.8 "23",23.8,54.5,180.6,777,125.8,39.5,24.2,3,50.1,6.6,0.3,46,3,2.7 "19",22.8,40.9,171.4,691.8,151.4,40.8,104.8,17.6,43.6,9.1,0.4,40.5,3.8,2.7 "22",26.6,36.7,171.4,738.6,94.9,33.8,20.7,2.5,77.6,7.4,0.3,23,2.8,2.8 "16",24.2,31,138.2,394.6,45.3,27.1,74.2,9.8,24.4,5.2,0.3,29.8,2,2.8 "28",29.8,73.5,260,748.6,105.3,42.5,17.9,2.4,106.6,9.3,0.3,17.6,3,2.8 "13",28.1,40.5,313.8,540.7,118.9,60.2,329.7,109.9,61.7,9.1,0.5,29.9,2.2,2.8 "14",21.8,38.1,146.8,512.2,75,36.6,92.3,4.6,29,8.1,0.5,33.3,2.7,2.7 "20",26.2,61.9,202.2,741.2,86.3,48.6,124.3,23.6,94.5,10.2,0.6,56.9,2.5,2.9 "25",22.8,50.6,151.7,648,64.8,30.2,12.1,2.3,122.9,8.1,0.2,23.7,2.6,2.9 "7",30.5,24.6,78.7,188.5,55.5,25.3,294.9,123.8,10.1,3,0.4,18.6,1.7,3.1 "5",33.1,22.7,43.6,240.3,25.7,14.9,39,8.4,26.8,8.4,0.2,8.1,1,3.1 "6",19.1,26.4,61.1,259.1,37,21.4,155.1,81.4,20.6,4,0.6,5.8,1.9,3 "3",31.1,32.3,73.7,219,52.5,25.5,304.6,204.4,14.2,2.6,0.5,3.6,1.8,3.3 "4",18,64.9,224.5,517.6,59.7,52.9,435.1,101.2,38,9.5,1.1,21.3,1.8,2.9 "2",22.3,47.4,165.9,436.1,64.3,42.3,316.5,200.1,28.2,7.2,0.3,0.01,1.5,2.9 "9",15,48.4,127.4,499.6,75.1,46.9,227.1,32.2,35.1,8.9,0.7,6.07,2.2,3 "12",16,32.7,126.4,471.4,61.3,31.1,108.8,9.5,26.4,6,0.4,11.2,2.2,2.9 "10",14.3,62.8,215.2,709.7,102.5,48.6,168.2,32,46.9,8.7,0.05,4.4,1.2,3.2 "11",16.7,55.8,205.3,1169.7,126.3,35.9,253.6,96.4,25.1,8.2,0.05,7.6,1.1,3.6 "21",21,26.5,104.4,484.8,74.4,22.2,35.8,5.9,27.5,5.3,0.2,56,2.5,3 scikit-bio-0.6.2/skbio/stats/ordination/tests/data/varespec.csv000066400000000000000000000105731464262511300246010ustar00rootroot00000000000000"ID","Callvulg","Empenigr","Rhodtome","Vaccmyrt","Vaccviti","Pinusylv","Descflex","Betupube","Vacculig","Diphcomp","Dicrsp","Dicrfusc","Dicrpoly","Hylosple","Pleuschr","Polypili","Polyjuni","Polycomm","Pohlnuta","Ptilcili","Barbhatc","Cladarbu","Cladrang","Cladstel","Cladunci","Cladcocc","Cladcorn","Cladgrac","Cladfimb","Cladcris","Cladchlo","Cladbotr","Cladamau","Cladsp","Cetreric","Cetrisla","Flavniva","Nepharct","Stersp","Peltapht","Icmaeric","Cladcerv","Claddefo","Cladphyl" "18",0.55,11.13,0,0,17.8,0.07,0,0,1.6,2.07,0,1.62,0,0,4.67,0.02,0.13,0,0.13,0.12,0,21.73,21.47,3.5,0.3,0.18,0.23,0.25,0.25,0.23,0,0,0.08,0.02,0.02,0,0.12,0.02,0.62,0.02,0,0,0.25,0 "15",0.67,0.17,0,0.35,12.13,0.12,0,0,0,0,0.33,10.92,0.02,0,37.75,0.02,0.23,0,0.03,0.02,0,12.05,8.13,0.18,2.65,0.13,0.18,0.23,0.25,1.23,0,0,0,0,0.15,0.03,0,0,0.85,0,0,0,1,0 "24",0.1,1.55,0,0,13.47,0.25,0,0,0,0,23.43,0,1.68,0,32.92,0,0.23,0,0.32,0.03,0,3.58,5.52,0.07,8.93,0,0.2,0.48,0,0.07,0.1,0.02,0,0,0.78,0.12,0,0,0.03,0,0,0,0.33,0 "27",0,15.13,2.42,5.92,15.97,0,3.7,0,1.12,0,0,3.63,0,6.7,58.07,0,0,0.13,0.02,0.08,0.08,1.42,7.63,2.55,0.15,0,0.38,0.12,0.1,0.03,0,0.02,0,0.02,0,0,0,0,0,0.07,0,0,0.15,0 "23",0,12.68,0,0,23.73,0.03,0,0,0,0,0,3.42,0.02,0,19.42,0.02,2.12,0,0.17,1.8,0.02,9.08,9.22,0.05,0.73,0.08,1.42,0.5,0.17,1.78,0.05,0.05,0,0,0,0,0.02,0,1.58,0.33,0,0,1.97,0 "19",0,8.92,0,2.42,10.28,0.12,0.02,0,0,0,0,0.32,0.02,0,21.03,0.02,1.58,0.18,0.07,0.27,0.02,7.23,4.95,22.08,0.25,0.1,0.25,0.18,0.1,0.12,0.05,0.02,0,0,0,0,0.02,0,0.28,0,0,0,0.37,0 "22",4.73,5.12,1.55,6.05,12.4,0.1,0.78,0.02,2,0,0.03,37.07,0,0,26.38,0,0,0,0.1,0.03,0,6.1,3.6,0.23,2.38,0.17,0.13,0.18,0.2,0.2,0.02,0,0,0.02,0.02,0,0,0,0,0,0,0,0.15,0 "16",4.47,7.33,0,2.15,4.33,0.1,0,0,0,0,1.02,25.8,0.23,0,18.98,0,0.02,0,0.13,0.1,0,7.13,14.03,0.02,0.82,0.15,0.05,0.22,0.22,0.17,0,0,0,0.02,0.18,0.08,0,0,0.03,0,0.07,0,0.67,0 "28",0,1.63,0.35,18.27,7.13,0.05,0.4,0,0.2,0,0.3,0.52,0.2,9.97,70.03,0,0.08,0,0.07,0.03,0,0.17,0.87,0,0.05,0.02,0.03,0.07,0.1,0.02,0,0.02,0,0,0,0.02,0,0,0.02,0,0,0,0.08,0 "13",24.13,1.9,0.07,0.22,5.3,0.12,0,0,0,0.07,0.02,2.5,0,0,5.52,0,0.02,0,0.03,0.25,0.07,23.07,23.67,11.9,0.95,0.17,0.05,0.23,0.18,0.57,0.02,0.07,0,0.07,0.18,0.02,0,0,0.03,0.02,0,0,0.47,0 "14",3.75,5.65,0,0.08,5.3,0.1,0,0,0,0,0,11.32,0,0,7.75,0,0.3,0.02,0.07,0,0,17.45,1.32,0.12,23.68,0.22,0.5,0.15,0.23,0.97,0,0,0,0,0.68,0.02,0,0,0.33,0,0.02,0,1.57,0.05 "20",0.02,6.45,0,0,14.13,0.07,0,0,0.47,0,0.85,1.87,0.08,1.35,13.73,0.07,0.05,0,0.12,0,0,6.8,11.22,0.05,4.75,0.03,0.12,0.22,0.18,0.07,0,0.02,0,0.02,0.15,0,0,0.02,0,0,0,0,1.2,0 "25",0,6.93,0,0,10.6,0.02,0.1,0.02,0.05,0.07,14.02,10.82,0,0.02,28.77,0,6.98,0.13,0,0.22,0,6,2.25,0,0.8,0.12,0.57,0.17,0.15,0.07,0,0,0,0.02,0.03,0.02,0,4.87,0.1,0.07,0,0.02,0.05,0 "7",0,5.3,0,0,8.2,0,0.05,0,8.1,0.28,0,0.45,0.03,0,0.1,0,0.25,0,0.03,0,0,35,42.5,0.28,0.35,0.08,0.2,0.25,0.18,0.13,0.08,0,0,0,0.05,0,0.23,0.2,0.93,0,0.03,0,0.1,0 "5",0,0.13,0,0,2.75,0.03,0,0,0,0,0,0.25,0.03,0,0.03,0.18,0.65,0,0,0,0,18.5,59,0.98,0.28,0.23,0.23,0.23,0.1,0.05,0,0,0.03,0,0.18,0,0.28,0,10.28,0,0.1,0,0.25,0 "6",0.3,5.75,0,0,10.5,0.1,0,0,0,0,0,0.85,0,0,0.05,0.03,0.08,0,0,0.08,0,39,37.5,11.3,3.45,0.18,0.2,0.25,0.25,0.23,0.03,0,0,0.03,0.35,0,0.08,0,0.03,0,0,0,0.28,0 "3",0.03,3.65,0,0,4.43,0,0,0,1.65,0.5,0,0.55,0,0,0.05,0,0,0,0.03,0.03,0,8.8,29.5,55.6,0.25,0.08,0.25,0.25,0.15,0.1,0.03,0,0.03,0,0.05,0,0.15,0.15,0.28,0,0,0,0.08,0 "4",3.4,0.63,0,0,1.98,0.05,0.05,0,0.03,0,0,0.2,0,0,1.53,0,0.1,0,0.05,0,0,15.73,20.03,28.2,0.73,0.1,0.15,0.13,0.1,0.15,0,0,0,0.05,0.28,0.05,10.03,0,0.95,0,0,0.05,0.08,0 "2",0.05,9.3,0,0,8.5,0.03,0,0,0,0,0,0.03,0,0,0.75,0,0.03,0,0,0.03,0,0.48,24.5,75,0.2,0,0.03,0.03,0.05,0.03,0.03,0,0,0,0,0,0,0,0,0,0,0.03,0.03,0 "9",0,3.47,0,0.25,20.5,0.25,0,0,0,0.25,0,0.38,0.25,0,4.07,0,0.25,0,0.25,0.25,0,0.46,4,84.3,0.25,0.25,0.25,0.25,0.25,0.25,0.25,0,0,0.25,0.25,0.25,0.67,0,0,0,0,0,0.25,0.25 "12",0.25,11.5,0,0,15.8,1.2,0,0,0,0,0.25,0.25,0,0,6,0,0,0,0.25,0,0,3.6,14.6,63.3,1.3,0,0.25,0.25,0.25,0.25,0,0,0,0,0,0.25,0,0,0,0,0,0,0.25,0 "10",0.25,11,0,0,11.9,0.25,0,0,0,0,0,0.25,0.25,0,0.67,0,0.25,0,0.25,0,0,1.3,8.7,84.3,0.25,0.25,0.25,0,0.25,0.25,0.25,0,0,0,0.25,0.25,0.25,0,0.25,0,0,0,0.25,0.25 "11",2.37,0.67,0,0,12.9,0.8,0,0,0,0,0,0.25,0.25,0,17.7,0.25,0.25,0,0.25,0.67,0,9.67,29.8,31.8,2.53,0.25,0.25,0.25,0,0.25,0,0,0,0,0,0.25,0,0,0.93,0.25,0,0,0,0.25 "21",0,16,4,15,25,0.25,0.5,0.25,0,0,0.25,0.25,3,0,2,0,0.25,0.25,0.25,10,3,0.7,4.7,10.9,0.25,0,0.05,0.25,0.25,0.25,0.25,0.25,0,0,0,0.67,0,0,0,0,0,0,0.4,0 scikit-bio-0.6.2/skbio/stats/ordination/tests/test_canonical_correspondence_analysis.py000066400000000000000000000147621464262511300317030ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np import numpy.testing as npt import pandas as pd from unittest import TestCase, main from skbio import OrdinationResults from skbio.stats.ordination import cca from skbio.util import get_data_path, assert_ordination_results_equal class TestCCAErrors(TestCase): def setUp(self): """Data from table 11.3 in Legendre & Legendre 1998.""" self.Y = pd.DataFrame(np.loadtxt(get_data_path('example3_Y'))) self.X = pd.DataFrame(np.loadtxt(get_data_path('example3_X'))) def test_shape(self): X, Y = self.X, self.Y with npt.assert_raises(ValueError): cca(Y, X[:-1]) def test_Y_values(self): X, Y = self.X, self.Y Y.iloc[0, 0] = -1 with npt.assert_raises(ValueError): cca(Y, X) Y.iloc[0] = 0 with npt.assert_raises(ValueError): cca(Y, X) def test_scaling(self): X, Y = self.X, self.Y with npt.assert_raises(NotImplementedError): cca(Y, X, 3) def test_all_zero_row(self): X, Y = pd.DataFrame(np.zeros((3, 3))), pd.DataFrame(np.zeros((3, 3))) with npt.assert_raises(ValueError): cca(X, Y) class TestCCAResults1(TestCase): def setUp(self): """Data from table 11.3 in Legendre & Legendre 1998 (p. 590). Loaded results as computed with vegan 2.0-8 and compared with table 11.5 if also there.""" self.feature_ids = ['Feature0', 'Feature1', 'Feature2', 'Feature3', 'Feature4', 'Feature5', 'Feature6', 'Feature7', 'Feature8'] self.sample_ids = ['Sample0', 'Sample1', 'Sample2', 'Sample3', 'Sample4', 'Sample5', 'Sample6', 'Sample7', 'Sample8', 'Sample9'] self.env_ids = ['Constraint0', 'Constraint1', 'Constraint2'] self.pc_ids = ['CCA1', 'CCA2', 'CCA3', 'CCA4', 'CCA5', 'CCA6', 'CCA7', 'CCA8', 'CCA9'] self.Y = pd.DataFrame( np.loadtxt(get_data_path('example3_Y')), columns=self.feature_ids, index=self.sample_ids) self.X = pd.DataFrame( np.loadtxt(get_data_path('example3_X'))[:, :-1], columns=self.env_ids, index=self.sample_ids ) def test_scaling1(self): scores = cca(self.Y, self.X, scaling=1) # Load data as computed with vegan 2.0-8 vegan_features = pd.DataFrame( np.loadtxt(get_data_path( 'example3_species_scaling1_from_vegan')), index=self.feature_ids, columns=self.pc_ids) vegan_samples = pd.DataFrame( np.loadtxt(get_data_path( 'example3_site_scaling1_from_vegan')), index=self.sample_ids, columns=self.pc_ids) sample_constraints = pd.DataFrame( np.loadtxt(get_data_path( 'example3_sample_constraints_scaling1')), index=self.sample_ids, columns=self.pc_ids) mat = np.loadtxt(get_data_path( 'example3_biplot_scaling1')) cropped_pcs = self.pc_ids[:mat.shape[1]] biplot_scores = pd.DataFrame(mat, index=self.env_ids, columns=cropped_pcs) proportion_explained = pd.Series([0.466911, 0.238327, 0.100548, 0.104937, 0.044805, 0.029747, 0.012631, 0.001562, 0.000532], index=self.pc_ids) eigvals = pd.Series([0.366136, 0.186888, 0.078847, 0.082288, 0.035135, 0.023327, 0.009905, 0.001225, 0.000417], index=self.pc_ids) exp = OrdinationResults( 'CCA', 'Canonical Correspondence Analysis', samples=vegan_samples, features=vegan_features, sample_constraints=sample_constraints, biplot_scores=biplot_scores, proportion_explained=proportion_explained, eigvals=eigvals) assert_ordination_results_equal(scores, exp, decimal=6) def test_scaling2(self): scores = cca(self.Y, self.X, scaling=2) # Load data as computed with vegan 2.0-8 vegan_features = pd.DataFrame( np.loadtxt(get_data_path( 'example3_species_scaling2_from_vegan')), index=self.feature_ids, columns=self.pc_ids) vegan_samples = pd.DataFrame( np.loadtxt(get_data_path( 'example3_site_scaling2_from_vegan')), index=self.sample_ids, columns=self.pc_ids) sample_constraints = pd.DataFrame( np.loadtxt(get_data_path( 'example3_sample_constraints_scaling2')), index=self.sample_ids, columns=self.pc_ids) mat = np.loadtxt(get_data_path( 'example3_biplot_scaling2')) cropped_pc_ids = self.pc_ids[:mat.shape[1]] biplot_scores = pd.DataFrame(mat, index=self.env_ids, columns=cropped_pc_ids) proportion_explained = pd.Series([0.466911, 0.238327, 0.100548, 0.104937, 0.044805, 0.029747, 0.012631, 0.001562, 0.000532], index=self.pc_ids) eigvals = pd.Series([0.366136, 0.186888, 0.078847, 0.082288, 0.035135, 0.023327, 0.009905, 0.001225, 0.000417], index=self.pc_ids) exp = OrdinationResults( 'CCA', 'Canonical Correspondence Analysis', samples=vegan_samples, features=vegan_features, sample_constraints=sample_constraints, biplot_scores=biplot_scores, proportion_explained=proportion_explained, eigvals=eigvals) assert_ordination_results_equal(scores, exp, decimal=6) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/stats/ordination/tests/test_correspondence_analysis.py000066400000000000000000000200011464262511300276530ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np import numpy.testing as npt import pandas as pd from scipy.spatial.distance import pdist from unittest import TestCase, main from skbio import OrdinationResults from skbio.stats.ordination import ca from skbio.util import get_data_path, assert_ordination_results_equal def chi_square_distance(data_table, between_rows=True): """Computes the chi-square distance between two rows or columns of input. It is a measure that has no upper limit, and it excludes double-zeros. Parameters ---------- data_table : 2D array_like An array_like object of shape (n, p). The input must be a frequency table (so that the sum of all cells equals 1, and all values are non-negative). between_rows : bool (defaults to True) Indicates whether distance is computed between rows (default) or columns. Returns ------- Y : ndarray Returns a condensed distance matrix. For each i and j (where i>> import numpy as np >>> np.random.seed(20) >>> ind = np.random.randint(0, 20, 15) >>> ind # doctest: +ELLIPSIS array([ 3, 15, 9, 11, 7, 2, 0, 8, 19, 16, 6, 6, 16, 9, 5]... >>> dep = (3 * ind + 5 + np.random.randn(15) * 5).round(3) >>> dep array([ 15.617, 47.533, 28.04 , 33.788, 19.602, 12.229, 4.779, 36.838, 67.256, 55.032, 22.157, 7.051, 58.601, 38.664, 18.783]) Let's define a test that will draw a list of sample pairs and determine if they're correlated. We'll use `scipy.stats.pearsonr` which takes two arrays and returns a correlation coefficient and a p-value representing the probability the two distributions are correlated. >>> from scipy.stats import pearsonr >>> f = lambda x: pearsonr(x[0], x[1])[1] Now, let's use random sampling to estimate the power of our test on the first distribution. >>> samples = [ind, dep] >>> print("%.3e" % f(samples)) 3.646e-08 In `subsample_power`, we can maintain a paired relationship between samples by setting `draw_mode` to "matched". We can also set our critical value, so that we estimate power for a critical value of :math:`\alpha = 0.05`, an estimate for the critical value of 0.01, and a critical value of 0.001. >>> from skbio.stats.power import subsample_power >>> pwr_100, counts_100 = subsample_power(test=f, ... samples=samples, ... max_counts=10, ... min_counts=3, ... counts_interval=1, ... draw_mode="matched", ... alpha_pwr=0.1, ... num_iter=25) >>> pwr_010, counts_010 = subsample_power(test=f, ... samples=samples, ... max_counts=10, ... min_counts=3, ... counts_interval=1, ... draw_mode="matched", ... alpha_pwr=0.01, ... num_iter=25) >>> pwr_001, counts_001 = subsample_power(test=f, ... samples=samples, ... max_counts=10, ... min_counts=3, ... counts_interval=1, ... draw_mode="matched", ... alpha_pwr=0.001, ... num_iter=25) >>> counts_100 array([3, 4, 5, 6, 7, 8, 9]) >>> pwr_100.mean(0) array([ 0.484, 0.844, 0.932, 0.984, 1. , 1. , 1. ]) >>> pwr_010.mean(0) array([ 0.044, 0.224, 0.572, 0.836, 0.928, 0.996, 1. ]) >>> pwr_001.mean(0) array([ 0. , 0.016, 0.108, 0.332, 0.572, 0.848, 0.956]) Based on this power estimate, as we increase our confidence that we have not committed a type I error and identified a false positive, the number of samples we need to be confident that we have not committed a type II error increases. """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import collections import copy import numpy as np import scipy.stats def subsample_power( test, samples, draw_mode="ind", alpha_pwr=0.05, ratio=None, max_counts=50, counts_interval=10, min_counts=None, num_iter=500, num_runs=10, ): r"""Subsample data to iteratively calculate power. Parameters ---------- test : function The statistical test which accepts a list of arrays of values (sample ids or numeric values) and returns a p value or one-dimensional array of p values. samples : array_like `samples` can be a list of lists or a list of arrays where each sublist or row in the array corresponds to a sampled group. draw_mode : {"ind", "matched"}, optional "matched" samples should be used when observations in samples have corresponding observations in other groups. For instance, this may be useful when working with regression data where :math:`x_{1}, x_{2}, ..., x_{n}` maps to :math:`y_{1}, y_{2}, ..., y_{n}`. Sample vectors must be the same length in "matched" mode. If there is no reciprocal relationship between samples, then "ind" mode should be used. alpha_pwr : float, optional The critical value used to calculate the power. ratio : 1-D array, optional The fraction of the sample counts which should be assigned to each group. If this is a 1-D array, it must be the same length as `samples`. If no value is supplied (`ratio` is None), then an equal number of observations will be drawn for each sample. In `matched` mode, this will be set to one. max_counts : positive int, optional The maximum number of samples per group to draw for effect size calculation. counts_interval : positive int, optional The difference between each subsampling count. min_counts : positive int, optional How many samples should be drawn for the smallest subsample. If this is None, the `counts_interval` will be used. num_iter : positive int, optional The number of p-values to generate for each point on the curve. num_runs : positive int, optional The number of times to calculate each curve. Returns ------- power : array The power calculated for each subsample at each count. The array has `num_runs` rows, a length with the same number of elements as `sample_counts` and a depth equal to the number of p values returned by `test`. If `test` returns a float, the returned array will be two-dimensional instead of three. sample_counts : array The number of samples drawn at each power calculation. Raises ------ ValueError If the `mode` is "matched", an error will occur if the arrays in `samples` are not the same length. ValueError There is a ValueError if there are fewer samples than the minimum count. ValueError If the `counts_interval` is greater than the difference between the sample start and the max value, the function raises a ValueError. ValueError There are not an equal number of groups in `samples` and in `ratios`. TypeError `test` does not return a float or a 1-dimensional numpy array. Examples -------- Let's say we wanted to look at the relationship between the presence of a specific bacteria, *Gardnerella vaginalis* in the vaginal community, and the probability of a pre or post menopausal woman experiencing a urinary tract infection (UTI). Healthy women were enrolled in the study either before or after menopause, and followed for eight weeks. Participants submitted fecal samples at the beginning of the study, and were then followed for clinical symptoms of a UTI. A confirmed UTI was an endpoint in the study. Using available literature and 16S sequencing, a set of candidate taxa were identified as correlated with UTIs, including *G. vaginalis*. In the 100 women (50 premenopausal and 50 postmenopausal samples) who had UTIs, the presence or absence of *G. vaginalis* was confirmed with quantitative PCR. We can model the probability that detectable *G. vaginalis* was found in these samples using a binomial model. (*Note that this is a simulation.*) >>> import numpy as np >>> np.random.seed(25) >>> pre_rate = np.random.binomial(1, 0.85, size=(50,)) >>> pre_rate.sum() 45 >>> pos_rate = np.random.binomial(1, 0.40, size=(50,)) >>> pos_rate.sum() 21 Let's set up a test function, so we can test the probability of finding a difference in frequency between the two groups. We'll use `scipy.stats.chisquare` to look for the difference in frequency between groups. >>> from scipy.stats import chisquare >>> test = lambda x: chisquare(np.array([x[i].sum() for i in ... range(len(x))]))[1] Let's make sure that our two distributions are different. >>> print(round(test([pre_rate, pos_rate]), 3)) 0.003 Since there are an even number of samples, and we don't have enough information to try controlling the data, we'll use `skbio.stats.power.subsample_power` to compare the two groups. If we had metadata about other risk factors, like a reproductive history, BMI, tobacco use, we might want to use `skbio.stats.power.subsample_paired_power`. We'll also use "ind" `draw_mode`, since there is no linkage between the two groups of samples. >>> from skbio.stats.power import subsample_power >>> pwr_est, counts = subsample_power(test=test, ... samples=[pre_rate, pos_rate], ... num_iter=100, ... num_runs=5, ... counts_interval=5) >>> counts array([ 5, 10, 15, 20, 25, 30, 35, 40, 45]) >>> np.nanmean(pwr_est, axis=0) # doctest: +NORMALIZE_WHITESPACE array([ 0.056, 0.074, 0.226, 0.46 , 0.61 , 0.806, 0.952, 1. , 1. ]) >>> counts[np.nanmean(pwr_est, axis=0) > 0.8].min() 30 So, we can estimate that we will see a significant difference in the presence of *G. vaginalis* in the stool of pre and post women with UTIs if we have at least 30 samples per group. If we wanted to test the relationship of a second candidate taxa which is more rare in the population, but may have a similar effect, based on available literature, we might also start by trying to identify 30 samples per group where the second candidate taxa is present. Suppose, now, that we want to test that a secondary metabolite seen only in the presence of *G vaginalis* to see if it is also correlated with UTIs. We can model the abundance of the metabolite as a normal distribution. >>> met_pos = (np.random.randn(pre_rate.sum() + pos_rate.sum()) * 2000 + ... 2500) >>> met_pos[met_pos < 0] = 0 >>> met_neg = met_neg = (np.random.randn(100 - (pre_rate.sum() + ... pos_rate.sum())) * 2000 + 500) >>> met_neg[met_neg < 0] = 0 Let's compare the populations with a kruskal-wallis test. Physically, there cannot be a negative concentration of a chemical, so we've set the lower bound at 0. This means that we can no longer assume our distribution is normal. >>> from scipy.stats import kruskal >>> def metabolite_test(x): ... return kruskal(x[0], x[1])[1] >>> print(round(metabolite_test([met_pos, met_neg]), 3)) 0.005 When we go to perform the statistical test on all the data, you might notice that there are twice as many samples from women with *G. vaginalis* than those without. It might make sense to account for this difference when we're testing power. So, we're going to set the `ratio` parameter, which lets us draw twice as many samples from women with *G. vaginalis*. >>> pwr_est2, counts2 = subsample_power(test=metabolite_test, ... samples=[met_pos, met_neg], ... counts_interval=5, ... num_iter=100, ... num_runs=5, ... ratio=[2, 1]) >>> counts2 array([ 5., 10., 15., 20., 25., 30.]) >>> np.nanmean(pwr_est2, axis=0) array([ 0.14 , 0.272, 0.426, 0.646, 0.824, 0.996]) >>> counts2[np.nanmean(pwr_est2, axis=0) > 0.8].min() 25.0 When we consider the number of samples per group needed in the power analysis, we need to look at the ratio. The analysis says that we need 25 samples in the smallest group, in this case, the group of women without *G. vaginalis* and 50 samples from women with *G. vaginalis* to see a significant difference in the abundance of our secondary metabolite at 80% power. """ # Checks the inputs ratio, num_p, sample_counts = _check_subsample_power_inputs( test=test, samples=samples, draw_mode=draw_mode, ratio=ratio, min_counts=min_counts, max_counts=max_counts, counts_interval=counts_interval, ) # Prealocates the power array power = np.zeros((num_runs, len(sample_counts), num_p)) # Calculates the power instances for id2, c in enumerate(sample_counts): count = np.round(c * ratio, 0).astype(int) for id1 in range(num_runs): ps = _compare_distributions( test=test, samples=samples, num_p=num_p, counts=count, num_iter=num_iter, mode=draw_mode, ) power[id1, id2, :] = _calculate_power(ps, alpha_pwr) power = power.squeeze() return power, sample_counts def subsample_paired_power( test, meta, cat, control_cats, order=None, strict_match=True, alpha_pwr=0.05, max_counts=50, counts_interval=10, min_counts=None, num_iter=500, num_runs=10, ): r"""Estimate power iteratively using samples with matching metadata. Parameters ---------- test : function The statistical test which accepts a list of arrays sample ids and returns a p value. meta : pandas.DataFrame The metadata associated with the samples. cat : str The metadata category being varied between samples. control_cats : list The metadata categories to be used as controls. For example, if you wanted to vary age (`cat` = "AGE"), you might want to control for gender and health status (i.e. `control_cats` = ["SEX", "HEALTHY"]). order : list, optional The order of groups in the category. This can be used to limit the groups selected. For example, if there's a category with groups 'A', 'B' and 'C', and you only want to look at A vs B, `order` would be set to ['A', 'B']. strict_match : bool, optional This determines how data is grouped using `control_cats`. If a sample within `meta` has an undefined value (NaN) for any of the columns in `control_cats`, the sample will not be considered as having a match and will be ignored when `strict_match` is True. If `strict_match` is False, missing values (NaN) in the `control_cats` can be considered matches. alpha_pwr : float, optional The critical value used to calculate the power. max_counts : positive int, optional The maximum number of observations per sample to draw for effect size calculation. counts_interval : positive int, optional The difference between each subsampling count. min_counts : positive int, optional How many samples should be drawn for the smallest subsample. If this is None, the `counts_interval` will be used. num_iter : positive int, optional The number of p-values to generate for each point on the curve. num_runs : positive int, optional The number of times to calculate each curve. Returns ------- power : array The power calculated for each subsample at each count. The array is `num_runs` rows, a length with the same number of elements as `sample_counts` and a depth equal to the number of p values returned by `test`. If `test` returns a float, the returned array will be two-dimensional instead of three. sample_counts : array The number of samples drawn at each power calculation. Raises ------ ValueError There is a ValueError if there are fewer samples than the minimum count. ValueError If the `counts_interval` is greater than the difference between the sample start and the max value, the function raises a ValueError. TypeError `test` does not return a float or a 1-dimensional numpy array. Examples -------- Assume you are interested in the role of a specific cytokine of protein translocation in myeloid-lineage cells. You are able to culture two macrophage lineages (bone marrow derived phagocytes and peritoneally-derived macrophages). Due to unfortunate circumstances, your growth media must be acquired from multiple sources (lab, company A, company B). Also unfortunate, you must use labor-intensive low throughput assays. You have some preliminary measurements, and you'd like to predict how many (more) cells you need to analyze for 80% power. You have information about 60 cells, which we'll simulate below. Note that we are setting a random seed value for consistency. >>> import numpy as np >>> import pandas as pd >>> np.random.seed(25) >>> data = pd.DataFrame.from_dict({ ... 'CELL_LINE': np.random.binomial(1, 0.5, size=(60,)), ... 'SOURCE': np.random.binomial(2, 0.33, size=(60,)), ... 'TREATMENT': np.hstack((np.zeros((30)), np.ones((30)))), ... 'INCUBATOR': np.random.binomial(1, 0.2, size=(60,))}) >>> data['OUTCOME'] = (0.25 + data.TREATMENT * 0.25) + \ ... np.random.randn(60) * (0.1 + data.SOURCE/10 + data.CELL_LINE/5) >>> data.loc[data.OUTCOME < 0, 'OUTCOME'] = 0 >>> data.loc[data.OUTCOME > 1, 'OUTCOME'] = 1 We will approach this by assuming that the distribution of our outcome is not normally distributed, and apply a kruskal-wallis test to compare between the cytokine treated and untreated cells. >>> from scipy.stats import kruskal >>> f = lambda x: kruskal(*[data.loc[i, 'OUTCOME'] for i in x])[1] Let's check that cytokine treatment has a significant effect across all the cells. >>> treatment_stat = [g for g in data.groupby('TREATMENT').groups.values()] >>> round(f(treatment_stat), 17) 0.00193863362662502 Now, let's pick the control categories. It seems reasonable to assume there may be an effect of cell line on the treatment outcome, which may be attributed to differences in receptor expression. It may also be possible that there are differences due cytokine source. Incubators were maintained under the same conditions throughout the experiment, within one degree of temperature difference at any given time, and the same level of CO2. So, at least initially, let's ignore differences due to the incubator. It's recommended that as a first pass analysis, control variables be selected based on an idea of what may be biologically relevant to the system, although further iteration might encourage the consideration of variable with effect sizes similar, or larger than the variable of interest. >>> control_cats = ['SOURCE', 'CELL_LINE'] >>> from skbio.stats.power import subsample_paired_power >>> pwr, cnt = subsample_paired_power(test=f, ... meta=data, ... cat='TREATMENT', ... control_cats=control_cats, ... counts_interval=5, ... num_iter=25, ... num_runs=5) >>> cnt array([ 5., 10., 15., 20.]) >>> pwr.mean(0) array([ 0.24 , 0.528, 0.68 , 0.88 ]) >>> pwr.std(0).round(3) array([ 0.088, 0.127, 0.168, 0.08 ]) Estimating off the power curve, it looks like 20 cells per group may provide adequate power for this experiment, although the large variance in power might suggest extending the curves or increasing the number of samples per group. """ # Handles the order argument if order is None: order = sorted(meta.groupby(cat).groups.keys()) order = np.array(order) # Checks for the number of sampling pairs available meta_pairs, index = _identify_sample_groups( meta, cat, control_cats, order, strict_match ) min_obs = min( [ _get_min_size(meta, cat, control_cats, order, strict_match), np.floor(len(index) * 0.9), ] ) sub_ids = _draw_paired_samples(meta_pairs, index, min_obs) ratio, num_p, sample_counts = _check_subsample_power_inputs( test=test, samples=sub_ids, draw_mode="matched", min_counts=min_counts, max_counts=max_counts, counts_interval=counts_interval, ) # Prealocates the power array power = np.zeros((num_runs, len(sample_counts), num_p)) # Calculates power instances for id2, c in enumerate(sample_counts): for id1 in range(num_runs): ps = np.zeros((num_p, num_iter)) for id3 in range(num_iter): subs = _draw_paired_samples(meta_pairs, index, c) ps[:, id3] = test(subs) power[id1, id2, :] = _calculate_power(ps, alpha_pwr) power = power.squeeze() return power, sample_counts def confidence_bound(vec, alpha=0.05, df=None, axis=None): r"""Calculate a confidence bound assuming a normal distribution. Parameters ---------- vec : array_like The array of values to use in the bound calculation. alpha : float, optional The critical value, used for the confidence bound calculation. df : float, optional The degrees of freedom associated with the distribution. If None is given, df is assumed to be the number of elements in specified axis. axis : positive int, optional The axis over which to take the deviation. When axis is None, a single value will be calculated for the whole matrix. Returns ------- bound : float The confidence bound around the mean. The confidence interval is [mean - bound, mean + bound]. """ # Determines the number of non-nan counts vec = np.asarray(vec) vec_shape = vec.shape if axis is None and len(vec_shape) == 1: num_counts = vec_shape[0] - np.isnan(vec).sum() elif axis is None: num_counts = vec_shape[0] * vec_shape[1] - np.isnan(vec).sum() else: num_counts = vec_shape[axis] - np.isnan(vec).sum() / ( vec_shape[0] * vec_shape[1] ) # Gets the df if not supplied if df is None: df = num_counts - 1 # Calculates the bound # In the conversion from scipy.stats.nanstd -> np.nanstd `ddof=1` had to be # added to match the scipy default of `bias=False`. bound = ( np.nanstd(vec, axis=axis, ddof=1) / np.sqrt(num_counts - 1) * scipy.stats.t.ppf(1 - alpha / 2, df) ) return bound def paired_subsamples(meta, cat, control_cats, order=None, strict_match=True): r"""Draw a list of samples varied by `cat` and matched for `control_cats`. This function is designed to provide controlled samples, based on a metadata category. For example, one could control for age, sex, education level, and diet type while measuring exercise frequency. Parameters ---------- meta : pandas.DataFrame The metadata associated with the samples. cat : str, list The metadata category (or a list of categories) for comparison. control_cats : list The metadata categories to be used as controls. For example, if you wanted to vary age (`cat` = "AGE"), you might want to control for gender and health status (i.e. `control_cats` = ["SEX", "HEALTHY"]) order : list, optional The order of groups in the category. This can be used to limit the groups selected. For example, if there's a category with groups 'A', 'B' and 'C', and you only want to look at A vs B, `order` would be set to ['A', 'B']. strict_match: bool, optional This determines how data is grouped using `control_cats`. If a sample within `meta` has an undefined value (`NaN`) for any of the columns in `control_cats`, the sample will not be considered as having a match and will be ignored when `strict_match` is True. If `strict_match` is False, missing values (NaN) in the `control_cats` can be considered matches. Returns ------- ids : array a set of ids which satisfy the criteria. These are not grouped by `cat`. An empty array indicates there are no sample ids which satisfy the requirements. Examples -------- If we have a mapping file for a set of random individuals looking at housing, sex, age and antibiotic use. >>> import pandas as pd >>> import numpy as np >>> meta = {'SW': {'HOUSING': '2', 'SEX': 'M', 'AGE': np.nan, 'ABX': 'Y'}, ... 'TS': {'HOUSING': '2', 'SEX': 'M', 'AGE': '40s', 'ABX': 'Y'}, ... 'CB': {'HOUSING': '3', 'SEX': 'M', 'AGE': '40s', 'ABX': 'Y'}, ... 'BB': {'HOUSING': '1', 'SEX': 'M', 'AGE': '40s', 'ABX': 'Y'}} >>> meta = pd.DataFrame.from_dict(meta, orient="index") >>> meta #doctest: +SKIP ABX HOUSING AGE SEX BB Y 1 40s M CB Y 3 40s M SW Y 2 NaN M TS Y 2 40s M We may want to vary an individual's housing situation, while holding constant their age, sex and antibiotic use so we can estimate the effect size for housing, and later compare it to the effects of other variables. >>> from skbio.stats.power import paired_subsamples >>> ids = paired_subsamples(meta, 'HOUSING', ['SEX', 'AGE', 'ABX']) >>> np.hstack(ids) #doctest: +ELLIPSIS array(['BB', 'TS', 'CB']...) So, for this set of data, we can match TS, CB, and BB based on their age, sex, and antibiotic use. SW cannot be matched in either group because `strict_match` was true, and there is missing AGE data for this sample. """ # Handles the order argument if order is None: order = sorted(meta.groupby(cat).groups.keys()) order = np.array(order) # Checks the groups in the category min_obs = _get_min_size(meta, cat, control_cats, order, strict_match) # Identifies all possible subsamples meta_pairs, index = _identify_sample_groups( meta=meta, cat=cat, control_cats=control_cats, order=order, strict_match=strict_match, ) # Draws paired ids ids = _draw_paired_samples(meta_pairs=meta_pairs, index=index, num_samps=min_obs) return ids def _get_min_size(meta, cat, control_cats, order, strict_match): """Determine the smallest group represented.""" if strict_match: all_cats = copy.deepcopy(control_cats) all_cats.append(cat) meta = meta[all_cats].dropna() return meta.groupby(cat).count().loc[order, control_cats[0]].min() def _check_nans(x, switch=False): r"""Return False if x is a nan and True is x is a string or number.""" if isinstance(x, str): return True elif isinstance(x, (float, int)): return not np.isnan(x) elif switch and isinstance(x, (list, tuple)) and np.nan in x: return False elif switch and isinstance(x, (list, tuple)): return True else: raise TypeError("input must be a string, float or a nan") def _calculate_power(p_values, alpha=0.05): r"""Calculate statistical power empirically. Parameters ---------- p_values : 1-D array A 1-D numpy array with the test results. alpha : float The critical value for the power calculation. Returns ------- power : float The empirical power, or the fraction of observed p values below the critical value. """ p_values = np.atleast_2d(p_values) w = (p_values < alpha).sum(axis=1) / p_values.shape[1] return w def _compare_distributions(test, samples, num_p, counts=5, mode="ind", num_iter=100): r"""Compare two distribution arrays iteratively. Parameters ---------- test : function The statistical test which accepts an array_like of sample ids (list of lists) and returns a p-value. This can be a one-dimensional array, or a float. samples : list of arrays A list where each 1-d array represents a sample. If `mode` is "matched", there must be an equal number of observations in each sample. num_p : positive int, optional The number of p-values returned by the test. counts : positive int or 1-D array, optional The number of samples to draw from each distribution. If this is a 1-D array, the length must correspond to the number of samples. The function will not draw more observations than are in a sample. In "matched" `mode`, the same number of observations will be drawn from each group. mode : {"ind", "matched", "paired"}, optional "matched" samples should be used when observations in samples have corresponding observations in other groups. For instance, this may be useful when working with regression data where :math:`x_{1}, x_{2}, ..., x_{n}` maps to :math:`y_{1}, y_{2}, ... , y_{n}`. num_iter : positive int, optional Default 1000. The number of p-values to generate for each point on the curve. Returns ------- p_values : array The p-values for the subsampled tests. If `test` returned a single p value, p_values is a one-dimensional array. If `test` returned an array, `p_values` has dimensions `num_iter` x `num_p` Raises ------ ValueError If mode is not "ind" or "matched". ValueError If the arrays in samples are not the same length in "matched" mode. ValueError If counts is a 1-D array and counts and samples are different lengths. """ # Prealocates the pvalue matrix p_values = np.zeros((num_p, num_iter)) # Determines the number of samples per group num_groups = len(samples) samp_lens = [len(sample) for sample in samples] if isinstance(counts, int): counts = np.array([counts] * num_groups) for idx in range(num_iter): if mode == "matched": pos = np.random.choice(np.arange(0, samp_lens[0]), counts[0], replace=False) subs = [sample[pos] for sample in samples] else: subs = [ np.random.choice(np.array(pop), counts[i], replace=False) for i, pop in enumerate(samples) ] p_values[:, idx] = test(subs) if num_p == 1: p_values = p_values.squeeze() return p_values def _check_subsample_power_inputs( test, samples, draw_mode="ind", ratio=None, max_counts=50, counts_interval=10, min_counts=None, ): r"""Make sure that everything is sane before power calculations. Parameters ---------- test : function The statistical test which accepts a list of arrays of values (sample ids or numeric values) and returns a p value or one-dimensional array of p values. samples : array_like `samples` can be a list of lists or a list of arrays where each sublist or row in the array corresponds to a sampled group. draw_mode : {"ind", "matched"}, optional "matched" samples should be used when observations in samples have corresponding observations in other groups. For instance, this may be useful when working with regression data where :math:`x_{1}, x_{2}, ..., x_{n}` maps to :math:`y_{1}, y_{2}, ..., y_{n}`. Sample vectors must be the same length in "matched" mode. If there is no reciprocal relationship between samples, then "ind" mode should be used. ratio : 1-D array, optional The fraction of the sample counts which should be assigned to each group. If this is a 1-D array, it must be the same length as `samples`. If no value is supplied (`ratio` is None), then an equal number of observations will be drawn for each sample. In `matched` mode, this will be set to one. max_counts : positive int, optional The maximum number of samples per group to draw for effect size calculation. counts_interval : positive int, optional The difference between each subsampling count. min_counts : positive int, optional How many samples should be drawn for the smallest subsample. If this is None, the `counts_interval` will be used. Returns ------- ratio : 1-D array The fraction of the sample counts which should be assigned to each group. num_p : positive integer The number of p values returned by `test`. sample_counts : array The number of samples drawn at each power calculation. Raises ------ ValueError If the `mode` is "matched", an error will occur if the arrays in `samples` are not the same length. ValueError There is a ValueError if there are fewer samples than the minimum count. ValueError If the `counts_interval` is greater than the difference between the sample start and the max value, the function raises a ValueError. ValueError There are not an equal number of groups in `samples` and in `ratios`. TypeError `test` does not return a float or a 1-dimensional numpy array. """ if draw_mode not in {"ind", "matched"}: raise ValueError('mode must be "matched" or "ind".') # Determines the minimum number of ids in a category id_counts = np.array([len(id_) for id_ in samples]) num_ids = id_counts.min() # Determines the number of groups num_groups = len(samples) # Checks that "matched" mode is handled appropriately if draw_mode == "matched": for id_ in samples: if not len(id_) == num_ids: raise ValueError( "Each vector in samples must be the same " 'length in "matched" draw_mode.' ) # Checks the number of counts is appropriate if min_counts is None: min_counts = counts_interval if (max_counts - min_counts) < counts_interval: raise ValueError("No subsamples of the specified size can be drawn.") # Checks the ratio argument is sane if ratio is None or draw_mode == "matched": ratio = np.ones((num_groups)) else: ratio = np.asarray(ratio) if not ratio.shape == (num_groups,): raise ValueError("There must be a ratio for each group.") ratio_counts = np.array([id_counts[i] / ratio[i] for i in range(num_groups)]) largest = ratio_counts.min() # Determines the number of p values returned by the test p_return = test(samples) if isinstance(p_return, float): num_p = 1 elif isinstance(p_return, np.ndarray) and len(p_return.shape) == 1: num_p = p_return.shape[0] else: raise TypeError("test must return a float or one-dimensional array.") # Calculates the same counts sample_counts = np.arange(min_counts, min(max_counts, largest), counts_interval) return ratio, num_p, sample_counts def _identify_sample_groups(meta, cat, control_cats, order, strict_match): """Aggregate samples matches for `control_cats` that vary by `cat`. Parameters ---------- meta : pandas.DataFrame The metadata associated with the samples. cat : str, list The metadata category (or a list of categories) for comparison. control_cats : list The metadata categories to be used as controls. For example, if you wanted to vary age (`cat` = "AGE"), you might want to control for gender and health status (i.e. `control_cats` = ["SEX", "HEALTHY"]) order : list The order of groups in the category. This can be used to limit the groups selected. For example, if there's a category with groups 'A', 'B' and 'C', and you only want to look at A vs B, `order` would be set to ['A', 'B']. ctrl_pos : int The location of the smallest group in `order`. strict_match: bool, optional This determines how data is grouped using `control_cats`. If a sample within `meta` has an undefined value (`NaN`) for any of the columns in `control_cats`, the sample will not be considered as having a match and will be ignored when `strict_match` is True. If `strict_match` is False, missing values (NaN) in the `control_cats` can be considered matches. Returns ------- meta_pairs : dict Describes the categories matched for metadata. The `control_cat`-grouped samples are numbered, corresponding to the second list in `index`. The group is keyed to the list of sample arrays with the same length of `order`. index : list A list of numpy arrays describing the positions of samples to be drawn. The first array is an index array. The second gives an integer corresponding to the `control_cat`-group, and the third lists the position of the reference group sample in the list of samples. """ # Sets up variables to be filled meta_pairs = {} index = [] i1 = 0 # Groups the data by the control groups ctrl_groups = meta.groupby(control_cats).groups # Identifies the samples that satisfy the control pairs. Keys are iterated # in sorted order so that results don't change with different dictionary # ordering. for g in sorted(ctrl_groups, key=lambda k: str(k)): ids = ctrl_groups[g] # If strict_match, Skips over data that has nans if not _check_nans(g, switch=True) and strict_match: continue # Draws the samples that are matched for control cats m_ids = meta.loc[ids].groupby(cat).groups # Checks if samples from the cat groups are represented in those # Samples id_vecs = [sorted(m_ids[o]) for o in order if o in m_ids] # If all groups are represented, the index and results are retained if len(id_vecs) == len(order): min_vec = np.array([len(v) for v in id_vecs]) loc_vec = np.arange(0, min_vec.min()) meta_pairs[i1] = id_vecs index.append(np.zeros(loc_vec.shape) + i1) i1 += 1 # If the groups are not represented, an empty array gets passed else: index.append(np.array([])) # Converts index to a 1d array index = np.hstack(index) # If index is empty, sets up meta_paris with a no key. if not meta_pairs: meta_pairs["no"] = order return meta_pairs, index def _draw_paired_samples(meta_pairs, index, num_samps): """Draw a random set of ids from a matched list. Parameters ---------- meta_pairs : dict Describes the categories matched for metadata. The `control_cat`-grouped samples are numbered, corresponding to the second list in `index`. The group is keyed to the list of sample arrays with the same length of `order`. index : list A list of numpy arrays describing the positions of samples to be drawn. The first array is an index array. The second gives an integer corresponding to the `control_cat`-group, and the third lists the position of the reference group sample in the list of samples. num_samps : int The number of samples. Returns ------- ids : list A set of randomly selected ids groups from each group. """ # Handles an empty paired vector if "no" in meta_pairs: return [np.array([]) for o in meta_pairs["no"]] # Identifies the absolute positions of the control group being drawn set_pos = np.random.choice(index, int(num_samps), replace=False).astype(int) subs = [] # Draws the other groups. Get a collection.Counter object for simplicity counter = collections.Counter(set_pos) # counter.items() order isn't guaranteed in python 3.6 and then the random # choice isn't reproducible between python version, even specifying seed; # so we access such values through sets. set_list = set(set_pos) # then, as stated by @RNAer, since we can't assure that items in sets are # ordered, we choose to order set_list before accessing values set_list = sorted(set_list) # now set_list is ordered and we can iterate over it to get counter obj for set_ in set_list: num_ = counter[set_] r2 = [np.random.choice(col, num_, replace=False) for col in meta_pairs[set_]] subs.append(r2) ids = [np.hstack(ids) for ids in zip(*subs)] return ids def _calculate_power_curve( test, samples, sample_counts, ratio=None, mode="ind", num_iter=1000, alpha=0.05 ): r"""Generate an empirical power curve for the samples. Parameters ---------- test : function The statistical test which accepts a list of arrays of values and returns a p value. samples : array_like `samples` can be a list of lists or an array where each sublist or row in the array corresponds to a sampled group. sample_counts : 1-D array A vector of the number of samples which should be sampled in each curve. mode : {"ind", "matched"}, optional "matched" samples should be used when observations in samples have corresponding observations in other groups. For instance, this may be useful when working with regression data where :math:`x_{1}, x_{2}, ..., x_{n}` maps to :math:`y_{1}, y_{2}, ... , y_{n}`. ratio : 1-D array, optional The fraction of the sample counts which should be assigned to each group. If this is a 1-D array, it must be the same length as `samples`. If no value is supplied (`ratio` is None), then an equal number of observations will be drawn for each sample. num_iter : int The default is 1000. The number of p-values to generate for each point on the curve. alpha : float, optional The significance level for the statistical test. Defaults to 0.05. Returns ------- p_values : array The p-values associated with the input sample counts. Raises ------ ValueError If ratio is an array and ratio is not the same length as samples """ # Casts array-likes to arrays sample_counts = np.asarray(sample_counts) # Determines the number of groups num_groups = len(samples) num_samps = len(sample_counts) if isinstance(alpha, float): vec = True pwr = np.zeros((num_samps)) alpha = np.array([alpha]) else: vec = False num_crit = alpha.shape[0] pwr = np.zeros((num_crit, num_samps)) # Checks the ratio argument if ratio is None: ratio = np.ones((num_groups)) ratio = np.asarray(ratio) if not ratio.shape == (num_groups,): raise ValueError("There must be a ratio for each group.") # Loops through the sample sizes for id2, s in enumerate(sample_counts): count = np.round(s * ratio, 0).astype(int) for id1, a in enumerate(alpha): ps = _compare_distributions( test=test, samples=samples, counts=count, num_p=1, num_iter=num_iter, mode=mode, ) if vec: pwr[id2] = (_calculate_power(ps, a)).item() else: pwr[id1, id2] = _calculate_power(ps, a) return pwr scikit-bio-0.6.2/skbio/stats/tests/000077500000000000000000000000001464262511300171675ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/stats/tests/__init__.py000066400000000000000000000005411464262511300213000ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- scikit-bio-0.6.2/skbio/stats/tests/data/000077500000000000000000000000001464262511300201005ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/stats/tests/data/cr_data_out000066400000000000000000000004511464262511300223070ustar00rootroot00000000000000Grouped by "bar", probability: 0.011000 For group "Foo", the group means is: -1.439800 The info is: [('mean', -1.4398), ('std', 1.3184)] For group "Bar", the group means is: 5.958900 Cannot calculate the first difference with a window of size (3). The info is: [('mean', 5.9589), ('std', 2.7942)] scikit-bio-0.6.2/skbio/stats/tests/data/cr_data_raw000066400000000000000000000003221464262511300222660ustar00rootroot00000000000000Grouped by "bar" For group "Foo": The trajectory is: [-2.675, -0.251, -2.8322, 0.0] For group "Bar": Cannot calculate the first difference with a window of size (3). The trajectory is: [9.6823, 2.9511, 5.2434] scikit-bio-0.6.2/skbio/stats/tests/data/cr_no_data_out000066400000000000000000000001321464262511300227770ustar00rootroot00000000000000Grouped by "foo": This group can not be used. All groups should have more than 1 element. scikit-bio-0.6.2/skbio/stats/tests/data/cr_no_data_raw000066400000000000000000000000001464262511300227530ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/stats/tests/data/gr_w_msg_out000066400000000000000000000002401464262511300225120ustar00rootroot00000000000000For group "Bar", the group means is: 5.958900 Cannot calculate the first difference with a window of size (3). The info is: [('mean', 5.9589), ('std', 2.7942)] scikit-bio-0.6.2/skbio/stats/tests/data/gr_w_msg_raw000066400000000000000000000001761464262511300225040ustar00rootroot00000000000000For group "Bar": Cannot calculate the first difference with a window of size (3). The trajectory is: [9.6823, 2.9511, 5.2434] scikit-bio-0.6.2/skbio/stats/tests/data/gr_wo_msg_out000066400000000000000000000001411464262511300226710ustar00rootroot00000000000000For group "Foo", the group means is: -1.439800 The info is: [('mean', -1.4398), ('std', 1.3184)] scikit-bio-0.6.2/skbio/stats/tests/data/gr_wo_msg_raw000066400000000000000000000001031464262511300226510ustar00rootroot00000000000000For group "Foo": The trajectory is: [-2.675, -0.251, -2.8322, 0.0] scikit-bio-0.6.2/skbio/stats/tests/data/vr_out000066400000000000000000000007001464262511300213360ustar00rootroot00000000000000Trajectory algorithm: wdiff ** This output is weighted ** Grouped by "foo": This group can not be used. All groups should have more than 1 element. Grouped by "bar", probability: 0.011000 For group "Foo", the group means is: -1.439800 The info is: [('mean', -1.4398), ('std', 1.3184)] For group "Bar", the group means is: 5.958900 Cannot calculate the first difference with a window of size (3). The info is: [('mean', 5.9589), ('std', 2.7942)] scikit-bio-0.6.2/skbio/stats/tests/data/vr_raw000066400000000000000000000004171464262511300213250ustar00rootroot00000000000000Trajectory algorithm: wdiff ** This output is weighted ** Grouped by "bar" For group "Foo": The trajectory is: [-2.675, -0.251, -2.8322, 0.0] For group "Bar": Cannot calculate the first difference with a window of size (3). The trajectory is: [9.6823, 2.9511, 5.2434] scikit-bio-0.6.2/skbio/stats/tests/data/vr_real_out000066400000000000000000000010051464262511300223400ustar00rootroot00000000000000Trajectory algorithm: avg Grouped by "Description": This group can not be used. All groups should have more than 1 element. Grouped by "DOB": This group can not be used. All groups should have more than 1 element. Grouped by "Weight": This group can not be used. All groups should have more than 1 element. Grouped by "Treatment", probability: 0.933100 For group "Control", the group means is: 4.050800 The info is: [('avg', 4.0508)] For group "Fast", the group means is: 4.159600 The info is: [('avg', 4.1596)] scikit-bio-0.6.2/skbio/stats/tests/data/vr_real_raw000066400000000000000000000003151464262511300223250ustar00rootroot00000000000000Trajectory algorithm: avg Grouped by "Treatment" For group "Control": The trajectory is: [2.3694, 3.3716, 5.4452, 4.5704, 4.4972] For group "Fast": The trajectory is: [7.2220, 4.2726, 1.1169, 4.0271] scikit-bio-0.6.2/skbio/stats/tests/test_composition.py000066400000000000000000001750131464262511300231520ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main import copy import numpy as np import numpy.testing as npt from numpy.random import normal import pandas as pd import pandas.testing as pdt from scipy.sparse import coo_matrix from scipy.stats import f_oneway from skbio import TreeNode from skbio.util import assert_data_frame_almost_equal from skbio.stats.distance import DistanceMatrixError from skbio.stats.composition import ( closure, multi_replace, multiplicative_replacement, perturb, perturb_inv, power, inner, clr, clr_inv, ilr, ilr_inv, alr, alr_inv, sbp_basis, _gram_schmidt_basis, centralize, _calc_p_adjust, ancom, vlr, pairwise_vlr, tree_basis, dirmult_ttest) def assert_coo_allclose(res, exp, rtol=1e-7, atol=1e-7): res_data = np.vstack((res.row, res.col, res.data)).T exp_data = np.vstack((exp.row, exp.col, exp.data)).T # sort by row and col res_data = res_data[res_data[:, 1].argsort()] res_data = res_data[res_data[:, 0].argsort()] exp_data = exp_data[exp_data[:, 1].argsort()] exp_data = exp_data[exp_data[:, 0].argsort()] npt.assert_allclose(res_data, exp_data, rtol=rtol, atol=atol) class CompositionTests(TestCase): def setUp(self): # Compositional data self.cdata1 = np.array([[2, 2, 6], [4, 4, 2]]) self.cdata2 = np.array([2, 2, 6]) self.cdata3 = np.array([[1, 2, 3, 0, 5], [1, 0, 0, 4, 5], [1, 2, 3, 4, 5]]) self.cdata4 = np.array([1, 2, 3, 0, 5]) self.cdata5 = [[2, 2, 6], [4, 4, 2]] self.cdata6 = [[1, 2, 3, 0, 5], [1, 0, 0, 4, 5], [1, 2, 3, 4, 5]] self.cdata7 = [np.exp(1), 1, 1] self.cdata8 = [np.exp(1), 1, 1, 1] # Simplicial orthonormal basis obtained from Gram-Schmidt self.ortho1 = [[0.44858053, 0.10905743, 0.22118102, 0.22118102], [0.3379924, 0.3379924, 0.0993132, 0.22470201], [0.3016453, 0.3016453, 0.3016453, 0.09506409]] # Real data self.rdata1 = [[0.70710678, -0.70710678, 0., 0.], [0.40824829, 0.40824829, -0.81649658, 0.], [0.28867513, 0.28867513, 0.28867513, -0.8660254]] # Bad datasets # negative count self.bad1 = np.array([1, 2, -1]) # zero count self.bad2 = np.array([[[1, 2, 3, 0, 5]]]) def test_closure(self): npt.assert_allclose(closure(self.cdata1), np.array([[.2, .2, .6], [.4, .4, .2]])) npt.assert_allclose(closure(self.cdata2), np.array([.2, .2, .6])) npt.assert_allclose(closure(self.cdata5), np.array([[.2, .2, .6], [.4, .4, .2]])) with self.assertRaises(ValueError): closure(self.bad1) with self.assertRaises(ValueError): closure(self.bad2) # make sure that inplace modification is not occurring closure(self.cdata2) npt.assert_allclose(self.cdata2, np.array([2, 2, 6])) def test_closure_warning(self): with self.assertRaises(ValueError): closure([0., 0., 0.]) with self.assertRaises(ValueError): closure([[0., 0., 0.], [0., 5., 5.]]) def test_perturb(self): pmat = perturb(closure(self.cdata1), closure(np.array([1, 1, 1]))) npt.assert_allclose(pmat, np.array([[.2, .2, .6], [.4, .4, .2]])) pmat = perturb(closure(self.cdata1), closure(np.array([10, 10, 20]))) npt.assert_allclose(pmat, np.array([[.125, .125, .75], [1./3, 1./3, 1./3]])) pmat = perturb(closure(self.cdata1), closure(np.array([10, 10, 20]))) npt.assert_allclose(pmat, np.array([[.125, .125, .75], [1./3, 1./3, 1./3]])) pmat = perturb(closure(self.cdata2), closure([1, 2, 1])) npt.assert_allclose(pmat, np.array([1./6, 2./6, 3./6])) pmat = perturb(closure(self.cdata5), closure(np.array([1, 1, 1]))) npt.assert_allclose(pmat, np.array([[.2, .2, .6], [.4, .4, .2]])) with self.assertRaises(ValueError): perturb(closure(self.cdata5), self.bad1) # make sure that inplace modification is not occurring perturb(self.cdata2, [1, 2, 3]) npt.assert_allclose(self.cdata2, np.array([2, 2, 6])) def test_power(self): pmat = power(closure(self.cdata1), 2) npt.assert_allclose(pmat, np.array([[.04/.44, .04/.44, .36/.44], [.16/.36, .16/.36, .04/.36]])) pmat = power(closure(self.cdata2), 2) npt.assert_allclose(pmat, np.array([.04, .04, .36])/.44) pmat = power(closure(self.cdata5), 2) npt.assert_allclose(pmat, np.array([[.04/.44, .04/.44, .36/.44], [.16/.36, .16/.36, .04/.36]])) with self.assertRaises(ValueError): power(self.bad1, 2) # make sure that inplace modification is not occurring power(self.cdata2, 4) npt.assert_allclose(self.cdata2, np.array([2, 2, 6])) def test_perturb_inv(self): pmat = perturb_inv(closure(self.cdata1), closure([.1, .1, .1])) imat = perturb(closure(self.cdata1), closure([10, 10, 10])) npt.assert_allclose(pmat, imat) pmat = perturb_inv(closure(self.cdata1), closure([1, 1, 1])) npt.assert_allclose(pmat, closure([[.2, .2, .6], [.4, .4, .2]])) pmat = perturb_inv(closure(self.cdata5), closure([.1, .1, .1])) imat = perturb(closure(self.cdata1), closure([10, 10, 10])) npt.assert_allclose(pmat, imat) with self.assertRaises(ValueError): perturb_inv(closure(self.cdata1), self.bad1) # make sure that inplace modification is not occurring perturb_inv(self.cdata2, [1, 2, 3]) npt.assert_allclose(self.cdata2, np.array([2, 2, 6])) def test_inner(self): a = inner(self.cdata5, self.cdata5) npt.assert_allclose(a, np.array([[0.80463264, -0.50766667], [-0.50766667, 0.32030201]])) b = inner(self.cdata7, self.cdata7) npt.assert_allclose(b, 0.66666666666666663) # Make sure that orthogonality holds npt.assert_allclose(inner(self.ortho1, self.ortho1), np.identity(3), rtol=1e-04, atol=1e-06) with self.assertRaises(ValueError): inner(self.cdata1, self.cdata8) # make sure that inplace modification is not occurring inner(self.cdata1, self.cdata1) npt.assert_allclose(self.cdata1, np.array([[2, 2, 6], [4, 4, 2]])) def test_multi_replace(self): amat = multi_replace(closure(self.cdata3)) npt.assert_allclose(amat, np.array([[0.087273, 0.174545, 0.261818, 0.04, 0.436364], [0.092, 0.04, 0.04, 0.368, 0.46], [0.066667, 0.133333, 0.2, 0.266667, 0.333333]]), rtol=1e-5, atol=1e-5) amat = multi_replace(closure(self.cdata4)) npt.assert_allclose(amat, np.array([0.087273, 0.174545, 0.261818, 0.04, 0.436364]), rtol=1e-5, atol=1e-5) amat = multi_replace(closure(self.cdata6)) npt.assert_allclose(amat, np.array([[0.087273, 0.174545, 0.261818, 0.04, 0.436364], [0.092, 0.04, 0.04, 0.368, 0.46], [0.066667, 0.133333, 0.2, 0.266667, 0.333333]]), rtol=1e-5, atol=1e-5) with self.assertRaises(ValueError): multi_replace(self.bad1) with self.assertRaises(ValueError): multi_replace(self.bad2) # make sure that inplace modification is not occurring multi_replace(self.cdata4) npt.assert_allclose(self.cdata4, np.array([1, 2, 3, 0, 5])) def multi_replace_warning(self): with self.assertRaises(ValueError): multi_replace([0, 1, 2], delta=1) def test_multiplicative_replacement(self): mat = closure(self.cdata3) npt.assert_allclose(multiplicative_replacement(mat), multi_replace(mat)) def test_clr(self): cmat = clr(closure(self.cdata1)) A = np.array([.2, .2, .6]) B = np.array([.4, .4, .2]) npt.assert_allclose(cmat, [np.log(A / np.exp(np.log(A).mean())), np.log(B / np.exp(np.log(B).mean()))]) cmat = clr(closure(self.cdata2)) A = np.array([.2, .2, .6]) npt.assert_allclose(cmat, np.log(A / np.exp(np.log(A).mean()))) cmat = clr(closure(self.cdata5)) A = np.array([.2, .2, .6]) B = np.array([.4, .4, .2]) npt.assert_allclose(cmat, [np.log(A / np.exp(np.log(A).mean())), np.log(B / np.exp(np.log(B).mean()))]) with self.assertRaises(ValueError): clr(self.bad1) with self.assertRaises(ValueError): clr(self.bad2) # make sure that inplace modification is not occurring clr(self.cdata2) npt.assert_allclose(self.cdata2, np.array([2, 2, 6])) def test_clr_inv(self): npt.assert_allclose(clr_inv(self.rdata1), self.ortho1) npt.assert_allclose(clr(clr_inv(self.rdata1)), self.rdata1, rtol=1e-4, atol=1e-5) # make sure that inplace modification is not occurring clr_inv(self.rdata1) npt.assert_allclose(self.rdata1, np.array([[0.70710678, -0.70710678, 0., 0.], [0.40824829, 0.40824829, -0.81649658, 0.], [0.28867513, 0.28867513, 0.28867513, -0.8660254]])) def test_centralize(self): cmat = centralize(closure(self.cdata1)) npt.assert_allclose(cmat, np.array([[0.22474487, 0.22474487, 0.55051026], [0.41523958, 0.41523958, 0.16952085]])) cmat = centralize(closure(self.cdata5)) npt.assert_allclose(cmat, np.array([[0.22474487, 0.22474487, 0.55051026], [0.41523958, 0.41523958, 0.16952085]])) with self.assertRaises(ValueError): centralize(self.bad1) with self.assertRaises(ValueError): centralize(self.bad2) # make sure that inplace modification is not occurring centralize(self.cdata1) npt.assert_allclose(self.cdata1, np.array([[2, 2, 6], [4, 4, 2]])) def test_ilr(self): mat = closure(self.cdata7) npt.assert_array_almost_equal(ilr(mat), np.array([0.70710678, 0.40824829])) # Should give same result as inner npt.assert_allclose(ilr(self.ortho1), np.identity(3), rtol=1e-04, atol=1e-06) # no check npt.assert_array_almost_equal(ilr(mat, check=False), np.array([0.70710678, 0.40824829])) with self.assertRaises(ValueError): ilr(self.cdata1, basis=self.cdata1) # make sure that inplace modification is not occurring ilr(self.cdata1) npt.assert_allclose(self.cdata1, np.array([[2, 2, 6], [4, 4, 2]])) def test_ilr_basis(self): table = np.array([[1., 10.], [1.14141414, 9.90909091], [1.28282828, 9.81818182], [1.42424242, 9.72727273], [1.56565657, 9.63636364]]) basis = np.atleast_2d(clr([[0.80442968, 0.19557032]])) res = ilr(table, basis=basis) exp = np.array([[np.log(1/10)*np.sqrt(1/2)], [np.log(1.14141414 / 9.90909091)*np.sqrt(1/2)], [np.log(1.28282828 / 9.81818182)*np.sqrt(1/2)], [np.log(1.42424242 / 9.72727273)*np.sqrt(1/2)], [np.log(1.56565657 / 9.63636364)*np.sqrt(1/2)]]) npt.assert_allclose(res, exp) def test_ilr_basis_one_dimension_error(self): table = np.array([[1., 10.], [1.14141414, 9.90909091], [1.28282828, 9.81818182], [1.42424242, 9.72727273], [1.56565657, 9.63636364]]) basis = np.array([0.80442968, 0.19557032]) with self.assertRaises(ValueError): ilr(table, basis=basis) def test_ilr_inv(self): mat = closure(self.cdata7) npt.assert_array_almost_equal(ilr_inv(ilr(mat)), mat) npt.assert_allclose(ilr_inv(np.identity(3)), self.ortho1, rtol=1e-04, atol=1e-06) # no check npt.assert_array_almost_equal(ilr_inv(ilr(mat), check=False), mat) with self.assertRaises(ValueError): ilr_inv(self.cdata1, basis=self.cdata1) # make sure that inplace modification is not occurring ilr_inv(self.cdata1) npt.assert_allclose(self.cdata1, np.array([[2, 2, 6], [4, 4, 2]])) def test_ilr_basis_isomorphism(self): # tests to make sure that the isomorphism holds # with the introduction of the basis. basis = np.atleast_2d(clr([[0.80442968, 0.19557032]])) table = np.array([[np.log(1/10)*np.sqrt(1/2), np.log(1.14141414 / 9.90909091)*np.sqrt(1/2), np.log(1.28282828 / 9.81818182)*np.sqrt(1/2), np.log(1.42424242 / 9.72727273)*np.sqrt(1/2), np.log(1.56565657 / 9.63636364)*np.sqrt(1/2)]]).T lr = ilr_inv(table, basis=basis) res = ilr(lr, basis=basis) npt.assert_allclose(res, table) table = np.array([[1., 10.], [1.14141414, 9.90909091], [1.28282828, 9.81818182], [1.42424242, 9.72727273], [1.56565657, 9.63636364]]) res = ilr_inv(ilr(table, basis=basis), basis=basis) npt.assert_allclose(res, closure(table.squeeze())) def test_ilr_inv_basis(self): exp = closure(np.array([[1., 10.], [1.14141414, 9.90909091], [1.28282828, 9.81818182], [1.42424242, 9.72727273], [1.56565657, 9.63636364]])) basis = np.atleast_2d(clr([[0.80442968, 0.19557032]])) table = np.array([[np.log(1/10)*np.sqrt(1/2), np.log(1.14141414 / 9.90909091)*np.sqrt(1/2), np.log(1.28282828 / 9.81818182)*np.sqrt(1/2), np.log(1.42424242 / 9.72727273)*np.sqrt(1/2), np.log(1.56565657 / 9.63636364)*np.sqrt(1/2)]]).T res = ilr_inv(table, basis=basis) npt.assert_allclose(res, exp) def test_ilr_inv_basis_one_dimension_error(self): basis = clr([0.80442968, 0.19557032]) table = np.array([[np.log(1/10)*np.sqrt(1/2), np.log(1.14141414 / 9.90909091)*np.sqrt(1/2), np.log(1.28282828 / 9.81818182)*np.sqrt(1/2), np.log(1.42424242 / 9.72727273)*np.sqrt(1/2), np.log(1.56565657 / 9.63636364)*np.sqrt(1/2)]]).T with self.assertRaises(ValueError): ilr_inv(table, basis=basis) def test_alr(self): # 2d-composition comp1 = closure(self.cdata1) alr2d_byhand = np.array([np.log(comp1[:, 0]/comp1[:, 1]), np.log(comp1[:, 2]/comp1[:, 1])]).T alr2d_method = alr(comp1, denominator_idx=1) npt.assert_allclose(alr2d_byhand, alr2d_method) # 1d-composition comp2 = closure(self.cdata2) alr1d_byhand = np.array([np.log(comp2[0]/comp2[1]), np.log(comp2[2]/comp2[1])]).T alr1d_method = alr(comp2, denominator_idx=1) npt.assert_allclose(alr1d_byhand, alr1d_method) with self.assertRaises(ValueError): alr(self.bad1) with self.assertRaises(ValueError): alr(self.bad2) # make sure that inplace modification is not occurring alr(self.cdata2) npt.assert_allclose(self.cdata2, np.array([2, 2, 6])) # matrix must be 1d or 2d with self.assertRaises(ValueError): alr(np.atleast_3d(self.cdata2)) def test_alr_inv(self): # 2d-composition comp1 = closure(self.cdata1) alr2d_byhand = np.array([np.log(comp1[:, 0]/comp1[:, 1]), np.log(comp1[:, 2]/comp1[:, 1])]).T alr2d_method = alr(comp1, denominator_idx=1) B = 1/(1 + np.exp(alr2d_byhand[:, 0]) + np.exp(alr2d_byhand[:, 1])) A = B * np.exp(alr2d_byhand[:, 0]) C = B * np.exp(alr2d_byhand[:, 1]) alrinv2d_byhand = np.column_stack((A, B, C)) alrinv2d_method = alr_inv(alr2d_method, denominator_idx=1) npt.assert_allclose(alrinv2d_byhand, alrinv2d_method) # 1d-composition comp2 = closure(self.cdata2) alr1d_byhand = np.array([np.log(comp2[0]/comp2[1]), np.log(comp2[2]/comp2[1])]).T alr1d_method = alr(comp2, denominator_idx=1) B = 1/(1 + np.exp(alr1d_byhand[0]) + np.exp(alr1d_byhand[1])) A = B * np.exp(alr1d_byhand[0]) C = B * np.exp(alr1d_byhand[1]) alrinv1d_byhand = np.column_stack((A, B, C))[0, :] alrinv1d_method = alr_inv(alr1d_method, denominator_idx=1) npt.assert_allclose(alrinv1d_byhand, alrinv1d_method) # make sure that inplace modification is not occurring alr_inv(self.rdata1) npt.assert_allclose(self.rdata1, np.array([[0.70710678, -0.70710678, 0., 0.], [0.40824829, 0.40824829, -0.81649658, 0.], [0.28867513, 0.28867513, 0.28867513, -0.8660254]])) with self.assertRaises(ValueError): alr_inv(self.bad2) def test_sbp_basis_gram_schmidt(self): gsbasis = _gram_schmidt_basis(5) sbp = np.array([[1, -1, 0, 0, 0], [1, 1, -1, 0, 0], [1, 1, 1, -1, 0], [1, 1, 1, 1, -1]]) sbpbasis = sbp_basis(sbp) npt.assert_allclose(gsbasis, sbpbasis) def test_sbp_basis_elementwise(self): sbp = np.array([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1], [1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, 0], [1, 1, 1, 1, 1, 1, -1, 0, 0, 0, 0, 0], [1, 1, -1, -1, -1, 1, 0, 0, 0, 0, 0, 0], [1, -1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0], [0, 0, 1, -1, -1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, -1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, -1, -1, 1, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, -1, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, -1, 0, 0]]) sbpbasis = sbp_basis(sbp) # by hand, element-wise r = np.apply_along_axis(func1d=lambda x: np.sum(x > 0), axis=1, arr=sbp) s = np.apply_along_axis(func1d=lambda x: np.sum(x < 0), axis=1, arr=sbp) psi = np.zeros(sbp.shape) for i in range(0, sbp.shape[0]): for j in range(0, sbp.shape[1]): if sbp[i, j] == 1: psi[i, j] = np.sqrt(s[i]/(r[i]*(r[i]+s[i]))) elif sbp[i, j] == -1: psi[i, j] = -np.sqrt(r[i]/(s[i]*(r[i]+s[i]))) npt.assert_allclose(psi, sbpbasis) class TestTreeBasis(TestCase): def test_tree_basis_base_case(self): tree = u"(a,b);" t = TreeNode.read([tree]) exp_basis = coo_matrix( np.array([[-np.sqrt(1. / 2), np.sqrt(1. / 2)]])) exp_keys = [t.name] res_basis, res_keys = tree_basis(t) assert_coo_allclose(exp_basis, res_basis) self.assertListEqual(exp_keys, res_keys) def test_tree_basis_invalid(self): with self.assertRaises(ValueError): tree = u"(a,b,c);" t = TreeNode.read([tree]) tree_basis(t) def test_tree_basis_unbalanced(self): tree = u"((a,b)c, d);" t = TreeNode.read([tree]) exp_basis = coo_matrix(np.array( [[-np.sqrt(1. / 6), -np.sqrt(1. / 6), np.sqrt(2. / 3)], [-np.sqrt(1. / 2), np.sqrt(1. / 2), 0]] )) exp_keys = [t.name, t[0].name] res_basis, res_keys = tree_basis(t) assert_coo_allclose(exp_basis, res_basis) self.assertListEqual(exp_keys, res_keys) def test_tree_basis_unbalanced2(self): tree = u"(d, (a,b)c);" t = TreeNode.read([tree]) exp_basis = coo_matrix(np.array( [ [-np.sqrt(2. / 3), np.sqrt(1. / 6), np.sqrt(1. / 6)], [0, -np.sqrt(1. / 2), np.sqrt(1. / 2)] ] )) exp_keys = [t.name, t[1].name] res_basis, res_keys = tree_basis(t) assert_coo_allclose(exp_basis, res_basis, atol=1e-7, rtol=1e-7) self.assertListEqual(exp_keys, res_keys) class AncomTests(TestCase): def setUp(self): # Basic count data with 2 groupings self.table1 = pd.DataFrame([ [10, 10, 10, 20, 20, 20], [11, 12, 11, 21, 21, 21], [10, 11, 10, 10, 11, 10], [10, 11, 10, 10, 10, 9], [10, 11, 10, 10, 10, 10], [10, 11, 10, 10, 10, 11], [10, 13, 10, 10, 10, 12]]).T self.cats1 = pd.Series([0, 0, 0, 1, 1, 1]) # Real valued data with 2 groupings D, L = 40, 80 np.random.seed(0) self.table2 = np.vstack((np.concatenate((normal(10, 1, D), normal(200, 1, D))), np.concatenate((normal(20, 1, D), normal(100000, 1, D))), normal(10, 1, L), normal(10, 1, L), np.concatenate((normal(20, 1, D), normal(100000, 1, D))), normal(10, 1, L), normal(10, 1, L), normal(10, 1, L), normal(10, 1, L))) self.table2 = np.absolute(self.table2) self.table2 = pd.DataFrame(self.table2.astype(int).T) self.cats2 = pd.Series([0]*D + [1]*D) # Real valued data with 2 groupings and no significant difference self.table3 = pd.DataFrame([ [10, 10.5, 10, 10, 10.5, 10.3], [11, 11.5, 11, 11, 11.5, 11.3], [10, 10.5, 10, 10, 10.5, 10.2], [10, 10.5, 10, 10, 10.5, 10.3], [10, 10.5, 10, 10, 10.5, 10.1], [10, 10.5, 10, 10, 10.5, 10.6], [10, 10.5, 10, 10, 10.5, 10.4]]).T self.cats3 = pd.Series([0, 0, 0, 1, 1, 1]) # Real valued data with 3 groupings D, L = 40, 120 np.random.seed(0) self.table4 = np.vstack((np.concatenate((normal(10, 1, D), normal(200, 1, D), normal(400, 1, D))), np.concatenate((normal(20, 1, D), normal(100000, 1, D), normal(2000, 1, D))), normal(10, 1, L), normal(10, 1, L), np.concatenate((normal(20, 1, D), normal(100000, 1, D), normal(2000, 1, D))), normal(10, 1, L), normal(10, 1, L), normal(10, 1, L), normal(10, 1, L))) self.table4 = np.absolute(self.table4) self.table4 = pd.DataFrame(self.table4.astype(int).T) self.cats4 = pd.Series([0]*D + [1]*D + [2]*D) # Noncontiguous case self.table5 = pd.DataFrame([ [11, 12, 21, 11, 21, 21], [10, 11, 10, 10, 11, 10], [10, 11, 10, 10, 10, 9], [10, 11, 10, 10, 10, 10], [10, 11, 10, 10, 10, 11], [10, 10, 20, 9, 20, 20], [10, 13, 10, 10, 10, 12]]).T self.cats5 = pd.Series([0, 0, 1, 0, 1, 1]) # Different number of classes case self.table6 = pd.DataFrame([ [11, 12, 9, 11, 21, 21], [10, 11, 10, 10, 11, 10], [10, 11, 10, 10, 10, 9], [10, 11, 10, 10, 10, 10], [10, 11, 10, 10, 10, 11], [10, 10, 10, 9, 20, 20], [10, 13, 10, 10, 10, 12]]).T self.cats6 = pd.Series([0, 0, 0, 0, 1, 1]) # Categories are letters self.table7 = pd.DataFrame([ [11, 12, 9, 11, 21, 21], [10, 11, 10, 10, 11, 10], [10, 11, 10, 10, 10, 9], [10, 11, 10, 10, 10, 10], [10, 11, 10, 10, 10, 11], [10, 10, 10, 9, 20, 20], [10, 13, 10, 10, 10, 12]]).T self.cats7 = pd.Series(['a', 'a', 'a', 'a', 'b', 'b']) # Swap samples self.table8 = pd.DataFrame([ [10, 10, 10, 20, 20, 20], [11, 12, 11, 21, 21, 21], [10, 11, 10, 10, 11, 10], [10, 11, 10, 10, 10, 9], [10, 11, 10, 10, 10, 10], [10, 11, 10, 10, 10, 11], [10, 13, 10, 10, 10, 12]]).T self.table8.index = ['a', 'b', 'c', 'd', 'e', 'f'] self.cats8 = pd.Series([0, 0, 1, 0, 1, 1], index=['a', 'b', 'd', 'c', 'e', 'f']) # Real valued data with 3 groupings D, L = 40, 120 np.random.seed(0) self.table9 = np.vstack((np.concatenate((normal(10, 1, D), normal(200, 1, D), normal(400, 1, D))), np.concatenate((normal(200000, 1, D), normal(10, 1, D), normal(2000, 1, D))), normal(10, 10, L), normal(10, 10, L), np.concatenate((normal(2000, 1, D), normal(100000, 1, D), normal(2000, 1, D))), normal(10000, 1000, L), normal(10, 10, L), normal(10, 10, L), normal(10, 10, L), normal(10000, 1000, L), normal(10, 10, L), normal(10, 10, L), normal(10, 10, L), np.concatenate((normal(2000, 1, D), normal(100000, 1, D), normal(2000, 1, D))), normal(10000, 1000, L), normal(10, 10, L), normal(10, 10, L), normal(10, 10, L))) self.table9 = np.absolute(self.table9)+1 self.table9 = pd.DataFrame(self.table9.astype(int).T) self.cats9 = pd.Series([0]*D + [1]*D + [2]*D) # Real valued data with 2 groupings D, L = 40, 80 np.random.seed(0) self.table10 = np.vstack((np.concatenate((normal(10, 1, D), normal(200, 1, D))), np.concatenate((normal(10, 1, D), normal(200, 1, D))), np.concatenate((normal(20, 10, D), normal(100, 10, D))), normal(10, 1, L), np.concatenate((normal(200, 100, D), normal(100000, 100, D))), np.concatenate((normal(200000, 100, D), normal(300, 100, D))), np.concatenate((normal(200000, 100, D), normal(300, 100, D))), np.concatenate((normal(20, 20, D), normal(40, 10, D))), np.concatenate((normal(20, 20, D), normal(40, 10, D))), np.concatenate((normal(20, 20, D), normal(40, 10, D))), normal(100, 10, L), normal(100, 10, L), normal(1000, 10, L), normal(1000, 10, L), normal(10, 10, L), normal(10, 10, L), normal(10, 10, L), normal(10, 10, L))) self.table10 = np.absolute(self.table10) + 1 self.table10 = pd.DataFrame(self.table10.astype(int).T) self.cats10 = pd.Series([0]*D + [1]*D) # zero count self.bad1 = pd.DataFrame(np.array([ [10, 10, 10, 20, 20, 0], [11, 11, 11, 21, 21, 21], [10, 10, 10, 10, 10, 10], [10, 10, 10, 10, 10, 10], [10, 10, 10, 10, 10, 10], [10, 10, 10, 10, 10, 10], [10, 10, 10, 10, 10, 10]]).T) # negative count self.bad2 = pd.DataFrame(np.array([ [10, 10, 10, 20, 20, 1], [11, 11, 11, 21, 21, 21], [10, 10, 10, 10, 10, 10], [10, 10, 10, 10, 10, 10], [10, 10, 10, 10, 10, 10], [10, 10, 10, 10, 10, -1], [10, 10, 10, 10, 10, 10]]).T) # missing count self.bad3 = pd.DataFrame(np.array([ [10, 10, 10, 20, 20, 1], [11, 11, 11, 21, 21, 21], [10, 10, 10, 10, 10, 10], [10, 10, 10, 10, 10, 10], [10, 10, 10, 10, 10, 10], [10, 10, 10, 10, 10, np.nan], [10, 10, 10, 10, 10, 10]]).T) self.badcats1 = pd.Series([0, 0, 0, 1, np.nan, 1]) self.badcats2 = pd.Series([0, 0, 0, 0, 0, 0]) self.badcats3 = pd.Series([0, 0, 1, 1]) self.badcats4 = pd.Series(range(len(self.table1))) self.badcats5 = pd.Series([1]*len(self.table1)) def test_ancom_basic_counts(self): test_table = pd.DataFrame(self.table1) original_table = copy.deepcopy(test_table) test_cats = pd.Series(self.cats1) original_cats = copy.deepcopy(test_cats) result = ancom(test_table, test_cats, p_adjust=None) # Test to make sure that the input table hasn't be altered assert_data_frame_almost_equal(original_table, test_table) # Test to make sure that the input table hasn't be altered pdt.assert_series_equal(original_cats, test_cats) exp = pd.DataFrame( {'W': np.array([5, 5, 2, 2, 2, 2, 2]), 'Reject null hypothesis': np.array([True, True, False, False, False, False, False], dtype=bool)}) assert_data_frame_almost_equal(result[0], exp) def test_ancom_percentiles(self): table = pd.DataFrame([[12, 11], [9, 11], [1, 11], [22, 100], [20, 53], [23, 1]], index=['s1', 's2', 's3', 's4', 's5', 's6'], columns=['b1', 'b2']) grouping = pd.Series(['a', 'a', 'a', 'b', 'b', 'b'], index=['s1', 's2', 's3', 's4', 's5', 's6']) percentiles = [0.0, 25.0, 50.0, 75.0, 100.0] groups = ['a', 'b'] tuples = [(p, g) for g in groups for p in percentiles] exp_mi = pd.MultiIndex.from_tuples(tuples, names=['Percentile', 'Group']) exp_data = np.array( [[1.0, 11.0], [5.0, 11.0], [9.0, 11.0], [10.5, 11.0], [12.0, 11.0], [20.0, 1.0], [21.0, 27.0], [22.0, 53.0], [22.5, 76.5], [23.0, 100.0]]) exp = pd.DataFrame(exp_data.T, columns=exp_mi, index=['b1', 'b2']) result = ancom(table, grouping)[1] assert_data_frame_almost_equal(result, exp) def test_ancom_percentiles_alt_categories(self): table = pd.DataFrame([[12], [9], [1], [22], [20], [23]], index=['s1', 's2', 's3', 's4', 's5', 's6'], columns=['b1']) grouping = pd.Series(['a', 'a', 'c', 'b', 'b', 'c'], index=['s1', 's2', 's3', 's4', 's5', 's6']) percentiles = [0.0, 25.0, 50.0, 75.0, 100.0] groups = ['a', 'b', 'c'] tuples = [(p, g) for g in groups for p in percentiles] exp_mi = pd.MultiIndex.from_tuples(tuples, names=['Percentile', 'Group']) exp_data = np.array([[9.0], [9.75], [10.5], [11.25], [12.0], # a [20.0], [20.5], [21.0], [21.5], [22.0], # b [1.0], [6.5], [12.0], [17.5], [23.0]]) # c exp = pd.DataFrame(exp_data.T, columns=exp_mi, index=['b1']) result = ancom(table, grouping, percentiles=percentiles)[1] assert_data_frame_almost_equal(result, exp) def test_ancom_alt_percentiles(self): table = pd.DataFrame([[12], [9], [1], [22], [20], [23]], index=['s1', 's2', 's3', 's4', 's5', 's6'], columns=['b1']) grouping = pd.Series(['a', 'a', 'a', 'b', 'b', 'b'], index=['s1', 's2', 's3', 's4', 's5', 's6']) percentiles = [42.0, 50.0] groups = ['a', 'b'] tuples = [(p, g) for g in groups for p in percentiles] exp_mi = pd.MultiIndex.from_tuples(tuples, names=['Percentile', 'Group']) exp_data = np.array([[7.71999999], [9.0], # a [21.68], [22.0]]) # b exp = pd.DataFrame(exp_data.T, columns=exp_mi, index=['b1']) result = ancom(table, grouping, percentiles=percentiles)[1] assert_data_frame_almost_equal(result, exp) def test_ancom_percentiles_swapped(self): table = pd.DataFrame([[12], [9], [1], [22], [20], [23]], index=['s1', 's2', 's3', 's4', 's5', 's6'], columns=['b1']) grouping = pd.Series(['a', 'a', 'b', 'a', 'b', 'b'], index=['s1', 's2', 's4', 's3', 's5', 's6']) percentiles = [42.0, 50.0] groups = ['a', 'b'] tuples = [(p, g) for g in groups for p in percentiles] exp_mi = pd.MultiIndex.from_tuples(tuples, names=['Percentile', 'Group']) exp_data = np.array([[7.71999999], [9.0], # a [21.68], [22.0]]) # b exp = pd.DataFrame(exp_data.T, columns=exp_mi, index=['b1']) result = ancom(table, grouping, percentiles=percentiles)[1] assert_data_frame_almost_equal(result, exp) def test_ancom_percentile_order_unimportant(self): table = pd.DataFrame([[12], [9], [1], [22], [20], [23]], index=['s1', 's2', 's3', 's4', 's5', 's6'], columns=['b1']) grouping = pd.Series(['a', 'a', 'a', 'b', 'b', 'b'], index=['s1', 's2', 's3', 's4', 's5', 's6']) # order of percentiles in unimportant after sorting result1 = ancom(table, grouping, percentiles=[50.0, 42.0])[1] result2 = ancom(table, grouping, percentiles=[42.0, 50.0])[1] assert_data_frame_almost_equal( result1.sort_index(axis=1), result2.sort_index(axis=1)) def test_ancom_percentiles_iterator(self): table = pd.DataFrame([[12], [9], [1], [22], [20], [23]], index=['s1', 's2', 's3', 's4', 's5', 's6'], columns=['b1']) grouping = pd.Series(['a', 'a', 'a', 'b', 'b', 'b'], index=['s1', 's2', 's3', 's4', 's5', 's6']) percentiles = [42.0, 50.0] groups = ['a', 'b'] tuples = [(p, g) for g in groups for p in percentiles] exp_mi = pd.MultiIndex.from_tuples(tuples, names=['Percentile', 'Group']) exp_data = np.array([[7.71999999], [9.0], # a [21.68], [22.0]]) # b exp = pd.DataFrame(exp_data.T, columns=exp_mi, index=['b1']) result = ancom(table, grouping, percentiles=iter(percentiles))[1] assert_data_frame_almost_equal(result, exp) def test_ancom_no_percentiles(self): table = pd.DataFrame([[12], [9], [1], [22], [20], [23]], index=['s1', 's2', 's3', 's4', 's5', 's6'], columns=['b1']) grouping = pd.Series(['a', 'a', 'a', 'b', 'b', 'b'], index=['s1', 's2', 's3', 's4', 's5', 's6']) result = ancom(table, grouping, percentiles=[])[1] assert_data_frame_almost_equal(result, pd.DataFrame()) def test_ancom_percentile_out_of_range(self): table = pd.DataFrame([[12], [9], [1], [22], [20], [23]], index=['s1', 's2', 's3', 's4', 's5', 's6'], columns=['b1']) grouping = pd.Series(['a', 'a', 'a', 'b', 'b', 'b'], index=['s1', 's2', 's3', 's4', 's5', 's6']) with self.assertRaises(ValueError): ancom(table, grouping, percentiles=[-1.0]) with self.assertRaises(ValueError): ancom(table, grouping, percentiles=[100.1]) with self.assertRaises(ValueError): ancom(table, grouping, percentiles=[10.0, 3.0, 101.0, 100]) def test_ancom_duplicate_percentiles(self): table = pd.DataFrame([[12], [9], [1], [22], [20], [23]], index=['s1', 's2', 's3', 's4', 's5', 's6'], columns=['b1']) grouping = pd.Series(['a', 'a', 'a', 'b', 'b', 'b'], index=['s1', 's2', 's3', 's4', 's5', 's6']) with self.assertRaises(ValueError): ancom(table, grouping, percentiles=[10.0, 10.0]) def test_ancom_basic_proportions(self): # Converts from counts to proportions test_table = pd.DataFrame(closure(self.table1)) original_table = copy.deepcopy(test_table) test_cats = pd.Series(self.cats1) original_cats = copy.deepcopy(test_cats) result = ancom(test_table, test_cats, p_adjust=None) # Test to make sure that the input table hasn't be altered assert_data_frame_almost_equal(original_table, test_table) # Test to make sure that the input table hasn't be altered pdt.assert_series_equal(original_cats, test_cats) exp = pd.DataFrame( {'W': np.array([5, 5, 2, 2, 2, 2, 2]), 'Reject null hypothesis': np.array([True, True, False, False, False, False, False], dtype=bool)}) assert_data_frame_almost_equal(result[0], exp) def test_ancom_multiple_groups(self): test_table = pd.DataFrame(self.table4) original_table = copy.deepcopy(test_table) test_cats = pd.Series(self.cats4) original_cats = copy.deepcopy(test_cats) result = ancom(test_table, test_cats) # Test to make sure that the input table hasn't be altered assert_data_frame_almost_equal(original_table, test_table) # Test to make sure that the input table hasn't be altered pdt.assert_series_equal(original_cats, test_cats) exp = pd.DataFrame( {'W': np.array([8, 7, 3, 3, 7, 3, 3, 3, 3]), 'Reject null hypothesis': np.array([True, True, False, False, True, False, False, False, False], dtype=bool)}) assert_data_frame_almost_equal(result[0], exp) def test_ancom_noncontiguous(self): result = ancom(self.table5, self.cats5, p_adjust=None) exp = pd.DataFrame( {'W': np.array([6, 2, 2, 2, 2, 6, 2]), 'Reject null hypothesis': np.array([True, False, False, False, False, True, False], dtype=bool)}) assert_data_frame_almost_equal(result[0], exp) def test_ancom_unbalanced(self): result = ancom(self.table6, self.cats6, p_adjust=None) exp = pd.DataFrame( {'W': np.array([5, 3, 3, 2, 2, 5, 2]), 'Reject null hypothesis': np.array([True, False, False, False, False, True, False], dtype=bool)}) assert_data_frame_almost_equal(result[0], exp) def test_ancom_letter_categories(self): result = ancom(self.table7, self.cats7, p_adjust=None) exp = pd.DataFrame( {'W': np.array([5, 3, 3, 2, 2, 5, 2]), 'Reject null hypothesis': np.array([True, False, False, False, False, True, False], dtype=bool)}) assert_data_frame_almost_equal(result[0], exp) def test_ancom_significance_test_none(self): exp = pd.DataFrame( {'W': np.array([5, 5, 2, 2, 2, 2, 2]), 'Reject null hypothesis': np.array([True, True, False, False, False, False, False], dtype=bool)}) result = ancom(self.table1, self.cats1, significance_test=None) assert_data_frame_almost_equal(result[0], exp) def test_ancom_significance_test_callable(self): exp = pd.DataFrame( {'W': np.array([5, 5, 2, 2, 2, 2, 2]), 'Reject null hypothesis': np.array([True, True, False, False, False, False, False], dtype=bool)}) result = ancom(self.table1, self.cats1, significance_test=f_oneway) assert_data_frame_almost_equal(result[0], exp) def test_ancom_multiple_comparisons(self): exp = pd.DataFrame( {'W': np.array([0] * 7), 'Reject null hypothesis': np.array([False] * 7, dtype=bool)}) for method in 'holm', 'bh': result = ancom(self.table1, self.cats1, p_adjust=method, significance_test='mannwhitneyu') assert_data_frame_almost_equal(result[0], exp) def test_ancom_multiple_comparisons_deprecated(self): # @deprecated exp = pd.DataFrame( {'W': np.array([0] * 7), 'Reject null hypothesis': np.array([False] * 7, dtype=bool)}) result = ancom(self.table1, self.cats1, significance_test='mannwhitneyu', multiple_comparisons_correction=None) assert_data_frame_almost_equal(result[0], exp) def test_ancom_alternative_test(self): result = ancom(self.table1, self.cats1, p_adjust=None, significance_test="ttest_ind") exp = pd.DataFrame( {'W': np.array([5, 5, 2, 2, 2, 2, 2]), 'Reject null hypothesis': np.array([True, True, False, False, False, False, False], dtype=bool)}) assert_data_frame_almost_equal(result[0], exp) def test_ancom_incorrect_test(self): with self.assertRaises(ValueError) as cm: ancom(self.table1, self.cats1, significance_test="not_a_test") msg = 'Function "not_a_test" does not exist under scipy.stats.' self.assertEqual(str(cm.exception), msg) def test_ancom_normal_data(self): result = ancom(self.table2, self.cats2, p_adjust=None, significance_test="ttest_ind") exp = pd.DataFrame( {'W': np.array([8, 8, 3, 3, 8, 3, 3, 3, 3]), 'Reject null hypothesis': np.array([True, True, False, False, True, False, False, False, False], dtype=bool)}) assert_data_frame_almost_equal(result[0], exp) def test_ancom_basic_counts_swapped(self): result = ancom(self.table8, self.cats8) exp = pd.DataFrame( {'W': np.array([5, 5, 2, 2, 2, 2, 2]), 'Reject null hypothesis': np.array([True, True, False, False, False, False, False], dtype=bool)}) assert_data_frame_almost_equal(result[0], exp) def test_ancom_no_signal(self): result = ancom(self.table3, self.cats3, p_adjust=None) exp = pd.DataFrame( {'W': np.array([0]*7), 'Reject null hypothesis': np.array([False]*7, dtype=bool)}) assert_data_frame_almost_equal(result[0], exp) def test_ancom_tau(self): exp1 = pd.DataFrame( {'W': np.array([8, 7, 3, 3, 7, 3, 3, 3, 3]), 'Reject null hypothesis': np.array([True, False, False, False, False, False, False, False, False], dtype=bool)}) exp2 = pd.DataFrame( {'W': np.array([17, 17, 5, 6, 16, 5, 7, 5, 4, 5, 8, 4, 5, 16, 5, 11, 4, 6]), 'Reject null hypothesis': np.array([True, True, False, False, True, False, False, False, False, False, False, False, False, True, False, False, False, False], dtype=bool)}) exp3 = pd.DataFrame( {'W': np.array([16, 16, 17, 10, 17, 16, 16, 15, 15, 15, 13, 10, 10, 10, 9, 9, 9, 9]), 'Reject null hypothesis': np.array([True, True, True, False, True, True, True, True, True, True, True, False, False, False, False, False, False, False], dtype=bool)}) result1 = ancom(self.table4, self.cats4, p_adjust=None, tau=0.25) result2 = ancom(self.table9, self.cats9, p_adjust=None, tau=0.02) result3 = ancom(self.table10, self.cats10, p_adjust=None, tau=0.02) assert_data_frame_almost_equal(result1[0], exp1) assert_data_frame_almost_equal(result2[0], exp2) assert_data_frame_almost_equal(result3[0], exp3) def test_ancom_theta(self): result = ancom(self.table1, self.cats1, theta=0.3) exp = pd.DataFrame( {'W': np.array([5, 5, 2, 2, 2, 2, 2]), 'Reject null hypothesis': np.array([True, True, False, False, False, False, False], dtype=bool)}) assert_data_frame_almost_equal(result[0], exp) def test_ancom_alpha(self): result = ancom(self.table1, self.cats1, p_adjust=None, alpha=0.5) exp = pd.DataFrame( {'W': np.array([6, 6, 4, 5, 5, 4, 2]), 'Reject null hypothesis': np.array([True, True, False, True, True, False, False], dtype=bool)}) assert_data_frame_almost_equal(result[0], exp) def test_ancom_fail_type(self): with self.assertRaises(TypeError): ancom(self.table1.values, self.cats1) with self.assertRaises(TypeError): ancom(self.table1, self.cats1.values) def test_ancom_fail_zeros(self): with self.assertRaises(ValueError): ancom(self.bad1, self.cats2, p_adjust=None) def test_ancom_fail_negative(self): with self.assertRaises(ValueError): ancom(self.bad2, self.cats2, p_adjust=None) def test_ancom_fail_not_implemented_p_adjust(self): with self.assertRaises(ValueError): ancom(self.table2, self.cats2, p_adjust='fdr') def test_ancom_fail_missing(self): with self.assertRaises(ValueError): ancom(self.bad3, self.cats1) with self.assertRaises(ValueError): ancom(self.table1, self.badcats1) def test_ancom_fail_groups(self): with self.assertRaises(ValueError): ancom(self.table1, self.badcats2) def test_ancom_fail_size_mismatch(self): with self.assertRaises(ValueError): ancom(self.table1, self.badcats3) def test_ancom_fail_group_unique(self): with self.assertRaises(ValueError): ancom(self.table1, self.badcats4) def test_ancom_fail_1_group(self): with self.assertRaises(ValueError): ancom(self.table1, self.badcats5) def test_ancom_fail_tau(self): with self.assertRaises(ValueError): ancom(self.table1, self.cats1, tau=-1) with self.assertRaises(ValueError): ancom(self.table1, self.cats1, tau=1.1) def test_ancom_fail_theta(self): with self.assertRaises(ValueError): ancom(self.table1, self.cats1, theta=-1) with self.assertRaises(ValueError): ancom(self.table1, self.cats1, theta=1.1) def test_ancom_fail_alpha(self): with self.assertRaises(ValueError): ancom(self.table1, self.cats1, alpha=-1) with self.assertRaises(ValueError): ancom(self.table1, self.cats1, alpha=1.1) def test_ancom_fail_multiple_groups(self): # np.exceptions was introduced in NumPy 1.25, before which errors were # members of np. The following code is for backward compatibility. npe = getattr(np, 'exceptions', np) with self.assertRaises((TypeError, npe.AxisError)): ancom(self.table4, self.cats4, significance_test="ttest_ind") class FDRTests(TestCase): def test_holm_bonferroni(self): p = [0.005, 0.011, 0.02, 0.04, 0.13] obs = _calc_p_adjust("holm-bonferroni", p) exp = p * np.arange(1, 6)[::-1] for a, b in zip(obs, exp): self.assertAlmostEqual(a, b) def test_benjamini_hochberg(self): p = [0.005, 0.011, 0.02, 0.04, 0.13] obs = _calc_p_adjust("benjamini-hochberg", p) exp = [0.025, 0.0275, 0.03333333, 0.05, 0.13] for a, b in zip(obs, exp): self.assertAlmostEqual(a, b) class VLRTests(TestCase): def setUp(self): self.mat = np.array([[1, 1, 2], [3, 5, 8], [13, 21, 55]]) self.mat_neg = np.array([[-1, 1, 2], [3, -5, 8], [13, 21, -55]]) self.mat_with_zero = np.array([[0, 1, 2], [3, 5, 8], [13, 21, 55]]) def test_vlr(self): # No zeros output = vlr( x=self.mat[0], y=self.mat[1], ddof=1, robust=False, ) self.assertAlmostEqual(output, 0.0655828061998637) # With zeros output = vlr( x=self.mat_with_zero[0], y=self.mat_with_zero[1], ddof=1, robust=False, ) assert np.isnan(output) # assert raises error with self.assertRaises(ValueError): vlr( x=self.mat_neg[0], y=self.mat_neg[1], ddof=1, robust=False, ) def test_robust_vlr(self): # No zeros output = vlr( x=self.mat[0], y=self.mat[1], ddof=1, robust=True, ) self.assertAlmostEqual(output, 0.0655828061998637) # With zeros output = vlr( x=self.mat_with_zero[0], y=self.mat_with_zero[1], ddof=1, robust=True, ) self.assertAlmostEqual(output, 0.024896522246558722) def test_pairwise_vlr(self): # No zeros dism = pairwise_vlr(self.mat, ids=None, ddof=1, robust=False) output = dism.condensed_form().sum() self.assertAlmostEqual(output, 0.2857382286903922) # With zeros with self.assertRaises(DistanceMatrixError): pairwise_vlr(self.mat_with_zero, ids=None, ddof=1, robust=False) # no validation dism = pairwise_vlr(self.mat, ids=None, ddof=1, robust=False, validate=False) output = dism.data.sum() / 2 self.assertAlmostEqual(output, 0.2857382286903922) class DirMultTTestTests(TestCase): def setUp(self): np.random.seed(0) # Create sample data for testing self.data = { 'feature1': [5, 8, 12, 15, 20], 'feature2': [3, 6, 9, 12, 15], 'feature3': [10, 15, 20, 25, 30], } self.table = pd.DataFrame(self.data) self.grouping = pd.Series(['Group1', 'Group1', 'Group2', 'Group2', 'Group2']) self.treatment = 'Group2' self.reference = 'Group1' d = 50 n = 200 self.depth = depth = 1000 p1 = np.random.lognormal(0, 1, size=d) * 10 p2 = np.random.lognormal(0.01, 1, size=d) * 10 self.p1, self.p2 = p1 / p1.sum(), p2 / p2.sum() self.data2 = np.vstack( ( [np.random.multinomial(depth, self.p1) for _ in range(n)], [np.random.multinomial(depth, self.p2) for _ in range(n)] ) ) self.table2 = pd.DataFrame(self.data2) self.grouping2 = pd.Series(['Group1'] * n + ['Group2'] * n) def test_dirmult_ttest_toy(self): p1 = np.array([5, 6, 7]) p2 = np.array([4, 7, 7]) p1, p2 = p1 / p1.sum(), p2 / p2.sum() depth = 1000 n = 100 data = np.vstack( ( [np.random.multinomial(depth, p1) for _ in range(n)], [np.random.multinomial(depth, p2) for _ in range(n)] ) ) table = pd.DataFrame(data) grouping = pd.Series(['Group1'] * n + ['Group2'] * n) exp_lfc = np.log2([4/5, 7/6, 7/7]) exp_lfc = (exp_lfc - exp_lfc.mean()) # convert to CLR coordinates res = dirmult_ttest(table, grouping, self.treatment, self.reference) npt.assert_array_less(exp_lfc, res['CI(97.5)']) npt.assert_array_less(res['CI(2.5)'], exp_lfc) def test_dirmult_ttest_toy_depth(self): p1 = np.array([5, 6, 7, 8, 9, 4]) p2 = np.array([4, 7, 7, 6, 5, 7]) p1, p2 = p1 / p1.sum(), p2 / p2.sum() depth = 100 n = 100 data = np.vstack( ( [np.random.multinomial(depth, p1) for _ in range(n)], [np.random.multinomial(depth, p2) for _ in range(n)] ) ) table = pd.DataFrame(data) grouping = pd.Series(['Group1'] * n + ['Group2'] * n) exp_lfc = np.log2([4/5, 7/6, 7/7, 6/8, 5/9, 7/4]) exp_lfc = (exp_lfc - exp_lfc.mean()) # convert to CLR coordinates res_100 = dirmult_ttest(table, grouping, self.treatment, self.reference) # increase sequencing depth by 100 fold depth = 10000 data = np.vstack( ( [np.random.multinomial(depth, p1) for _ in range(n)], [np.random.multinomial(depth, p2) for _ in range(n)] ) ) table = pd.DataFrame(data) res_10000 = dirmult_ttest(table, grouping, self.treatment, self.reference) # when the sequencing depth increases, the confidence intervals # should also shrink npt.assert_array_less(res_100['CI(2.5)'], res_10000['CI(2.5)']) npt.assert_array_less(res_10000['CI(97.5)'], res_100['CI(97.5)']) def test_dirmult_ttest_output(self): exp_lfc = np.log2(self.p2 / self.p1) exp_lfc = exp_lfc - exp_lfc.mean() res = dirmult_ttest(self.table2, self.grouping2, self.treatment, self.reference) npt.assert_array_less(res['Log2(FC)'], res['CI(97.5)']) npt.assert_array_less(res['CI(2.5)'], res['Log2(FC)']) # a couple of things that complicate the tests # first, there is going to be a little bit of a fudge factor due # to the pseudocount, so we will define it via log2(0.5) eps = np.abs(np.log2(0.5)) # second, the confidence interval is expected to be inaccurate # for (1/20) of the tests. So we should double check to # see if the confidence intervals were able to capture # 95% of the log-fold changes correctly self.assertGreater(np.mean(res['CI(2.5)'] - eps < exp_lfc), 0.95) self.assertGreater(np.mean(res['CI(97.5)'] + eps > exp_lfc), 0.95) def test_dirmult_ttest_valid_input(self): result = dirmult_ttest(self.table, self.grouping, self.treatment, self.reference) self.assertIsInstance(result, pd.DataFrame) self.assertEqual(result.shape[1], 8) # Expected number of columns pdt.assert_index_equal(result.index, pd.Index(['feature1', 'feature2', 'feature3'])) def test_dirmult_ttest_no_p_adjust(self): result = dirmult_ttest(self.table, self.grouping, self.treatment, self.reference, p_adjust=None) pdt.assert_series_equal(result['pvalue'], result['qvalue'], check_names=False) def test_dirmult_ttest_invalid_table_type(self): with self.assertRaises(TypeError): dirmult_ttest("invalid_table", self.grouping, self.treatment, self.reference) def test_dirmult_ttest_invalid_grouping_type(self): with self.assertRaises(TypeError): dirmult_ttest(self.table, "invalid_grouping", self.treatment, self.reference) def test_dirmult_ttest_negative_values_in_table(self): self.table.iloc[0, 0] = -5 # Modify a value to be negative with self.assertRaises(ValueError): dirmult_ttest(self.table, self.grouping, self.treatment, self.reference) def test_dirmult_ttest_missing_values_in_grouping(self): self.grouping[1] = np.nan # Introduce a missing value in grouping with self.assertRaises(ValueError): dirmult_ttest(self.table, self.grouping, self.treatment, self.reference) def test_dirmult_ttest_missing_values_in_table(self): self.table.iloc[2, 1] = np.nan # Introduce a missing value in the table with self.assertRaises(ValueError): dirmult_ttest(self.table, self.grouping, self.treatment, self.reference) def test_dirmult_ttest_inconsistent_indexes(self): self.table.index = ['a', 'b', 'c', 'd', 'e'] # Change table index with self.assertRaises(ValueError): dirmult_ttest(self.table, self.grouping, self.treatment, self.reference) if __name__ == "__main__": main() scikit-bio-0.6.2/skbio/stats/tests/test_gradient.py000066400000000000000000001542321464262511300224040ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io from operator import attrgetter from unittest import TestCase, main import numpy as np import pandas as pd import numpy.testing as npt import pandas.testing as pdt from skbio.util import get_data_path, assert_data_frame_almost_equal from skbio.stats.gradient import (GradientANOVA, AverageGradientANOVA, TrajectoryGradientANOVA, FirstDifferenceGradientANOVA, WindowDifferenceGradientANOVA, GroupResults, CategoryResults, GradientANOVAResults, _weight_by_vector, _ANOVA_trajectories) class BaseTests(TestCase): def setUp(self): """Initializes some data for testing""" coord_data = { 'PC.636': np.array([-0.212230626531, 0.216034194368, 0.03532727349, -0.254450494129, -0.0687468542543, 0.231895596562, 0.00496549154314, -0.0026246871695, 9.73837390723e-10]), 'PC.635': np.array([-0.277487312135, -0.0295483215975, -0.0744173437992, 0.0957182357964, 0.204714844022, -0.0055407341857, -0.190287966833, 0.16307126638, 9.73837390723e-10]), 'PC.356': np.array([0.220886492631, 0.0874848360559, -0.351990132198, -0.00316535032886, 0.114635191853, -0.00019194106125, 0.188557853937, 0.030002427212, 9.73837390723e-10]), 'PC.481': np.array([0.0308923744062, -0.0446295973489, 0.133996451689, 0.29318228566, -0.167812539312, 0.130996149793, 0.113551017379, 0.109987942454, 9.73837390723e-10]), 'PC.354': np.array([0.27616778138, -0.0341866951102, 0.0633000238256, 0.100446653327, 0.123802521199, 0.1285839664, -0.132852841046, -0.217514322505, 9.73837390723e-10]), 'PC.593': np.array([0.202458130052, -0.115216120518, 0.301820871723, -0.18300251046, 0.136208248567, -0.0989435556722, 0.0927738484879, 0.0909429797672, 9.73837390723e-10]), 'PC.355': np.array([0.236467470907, 0.21863434374, -0.0301637746424, -0.0225473129718, -0.205287183891, -0.180224615141, -0.165277751908, 0.0411933458557, 9.73837390723e-10]), 'PC.607': np.array([-0.105517545144, -0.41405687433, -0.150073017617, -0.116066751485, -0.158763393475, -0.0223918378516, -0.0263068046112, -0.0501209518091, 9.73837390723e-10]), 'PC.634': np.array([-0.371636765565, 0.115484234741, 0.0721996475289, 0.0898852445906, 0.0212491652909, -0.184183028843, 0.114877153051, -0.164938000185, 9.73837390723e-10]) } self.coords = pd.DataFrame.from_dict(coord_data, orient='index') coord_data = { 'PC.636': np.array([-0.212230626531, 0.216034194368, 0.03532727349]), 'PC.635': np.array([-0.277487312135, -0.0295483215975, -0.0744173437992]), 'PC.356': np.array([0.220886492631, 0.0874848360559, -0.351990132198]), 'PC.481': np.array([0.0308923744062, -0.0446295973489, 0.133996451689]), 'PC.354': np.array([0.27616778138, -0.0341866951102, 0.0633000238256]), 'PC.593': np.array([0.202458130052, -0.115216120518, 0.301820871723]), 'PC.355': np.array([0.236467470907, 0.21863434374, -0.0301637746424]), 'PC.607': np.array([-0.105517545144, -0.41405687433, -0.150073017617]), 'PC.634': np.array([-0.371636765565, 0.115484234741, 0.0721996475289]) } self.coords_3axes = pd.DataFrame.from_dict(coord_data, orient='index') metadata_map = {'PC.354': {'Treatment': 'Control', 'DOB': '20061218', 'Weight': '60', 'Description': 'Control_mouse_I.D._354'}, 'PC.355': {'Treatment': 'Control', 'DOB': '20061218', 'Weight': '55', 'Description': 'Control_mouse_I.D._355'}, 'PC.356': {'Treatment': 'Control', 'DOB': '20061126', 'Weight': '50', 'Description': 'Control_mouse_I.D._356'}, 'PC.481': {'Treatment': 'Control', 'DOB': '20070314', 'Weight': '52', 'Description': 'Control_mouse_I.D._481'}, 'PC.593': {'Treatment': 'Control', 'DOB': '20071210', 'Weight': '57', 'Description': 'Control_mouse_I.D._593'}, 'PC.607': {'Treatment': 'Fast', 'DOB': '20071112', 'Weight': '65', 'Description': 'Fasting_mouse_I.D._607'}, 'PC.634': {'Treatment': 'Fast', 'DOB': '20080116', 'Weight': '68', 'Description': 'Fasting_mouse_I.D._634'}, 'PC.635': {'Treatment': 'Fast', 'DOB': '20080116', 'Weight': '70', 'Description': 'Fasting_mouse_I.D._635'}, 'PC.636': {'Treatment': 'Fast', 'DOB': '20080116', 'Weight': '72', 'Description': 'Fasting_mouse_I.D._636'}} self.metadata_map = pd.DataFrame.from_dict(metadata_map, orient='index') self.prop_expl = np.array([25.6216900347, 15.7715955926, 14.1215046787, 11.6913885817, 9.83044890697, 8.51253468595, 7.88775505332, 6.56308246609, 4.42499350906e-16]) gr_wo_msg = GroupResults('Foo', np.array([-2.6750, -0.2510, -2.8322, 0.]), -1.4398, {'mean': -1.4398, 'std': 1.3184}, None) gr_w_msg = GroupResults('Bar', np.array([9.6823, 2.9511, 5.2434]), 5.9589, {'mean': 5.9589, 'std': 2.7942}, "Cannot calculate the first difference " "with a window of size (3).") self.groups = [gr_wo_msg, gr_w_msg] cr_no_data = CategoryResults('foo', None, None, 'This group can not be used. All groups ' 'should have more than 1 element.') cr_data = CategoryResults('bar', 0.0110, self.groups, None) self.categories = [cr_no_data, cr_data] vr = GradientANOVAResults('wdiff', True, self.categories) description = CategoryResults('Description', None, None, 'This group can not be used. All groups ' 'should have more than 1 element.') weight = CategoryResults('Weight', None, None, 'This group can not be used. All groups ' 'should have more than 1 element.') dob = CategoryResults('DOB', None, None, 'This group can not be used. All groups ' 'should have more than 1 element.') control_group = GroupResults('Control', np.array([2.3694, 3.3716, 5.4452, 4.5704, 4.4972]), 4.0508, {'avg': 4.0508}, None) fast_group = GroupResults('Fast', np.array([7.2220, 4.2726, 1.1169, 4.0271]), 4.1596, {'avg': 4.1596}, None) treatment = CategoryResults('Treatment', 0.9331, [control_group, fast_group], None) vr_real = GradientANOVAResults('avg', False, [description, weight, dob, treatment]) self.vec_results = [vr, vr_real] # This function makes the comparisons between the results classes easier def assert_group_results_almost_equal(self, obs, exp): """Tests that obs and exp are almost equal""" self.assertEqual(obs.name, exp.name) npt.assert_almost_equal(obs.trajectory, exp.trajectory) npt.assert_almost_equal(obs.mean, exp.mean) self.assertEqual(obs.info.keys(), exp.info.keys()) for key in obs.info: npt.assert_almost_equal(obs.info[key], exp.info[key]) self.assertEqual(obs.message, exp.message) def assert_category_results_almost_equal(self, obs, exp): """Tests that obs and exp are almost equal""" self.assertEqual(obs.category, exp.category) if exp.probability is None: self.assertTrue(obs.probability is None) self.assertTrue(obs.groups is None) else: npt.assert_almost_equal(obs.probability, exp.probability) for o, e in zip(sorted(obs.groups, key=attrgetter('name')), sorted(exp.groups, key=attrgetter('name'))): self.assert_group_results_almost_equal(o, e) def assert_gradientANOVA_results_almost_equal(self, obs, exp): """Tests that obs and exp are almost equal""" self.assertEqual(obs.algorithm, exp.algorithm) self.assertEqual(obs.weighted, exp.weighted) for o, e in zip(sorted(obs.categories, key=attrgetter('category')), sorted(exp.categories, key=attrgetter('category'))): self.assert_category_results_almost_equal(o, e) class GradientTests(BaseTests): def test_weight_by_vector(self): """Correctly weights the vectors""" trajectory = pd.DataFrame.from_dict({'s1': np.array([1]), 's2': np.array([2]), 's3': np.array([3]), 's4': np.array([4]), 's5': np.array([5]), 's6': np.array([6]), 's7': np.array([7]), 's8': np.array([8])}, orient='index') trajectory.sort_values(by=0, inplace=True) w_vector = pd.Series(np.array([1, 5, 8, 12, 45, 80, 85, 90]), ['s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8']).astype(np.float64) exp = pd.DataFrame.from_dict({'s1': np.array([1]), 's2': np.array([6.3571428571]), 's3': np.array([12.7142857142]), 's4': np.array([12.7142857142]), 's5': np.array([1.9264069264]), 's6': np.array([2.1795918367]), 's7': np.array([17.8]), 's8': np.array([20.3428571428])}, orient='index').astype(np.float64) obs = _weight_by_vector(trajectory, w_vector) assert_data_frame_almost_equal(obs.sort_index(), exp.sort_index()) trajectory = pd.DataFrame.from_dict({'s1': np.array([1]), 's2': np.array([2]), 's3': np.array([3]), 's4': np.array([4]), 's5': np.array([5]), 's6': np.array([6]), 's7': np.array([7]), 's8': np.array([8])}, orient='index') trajectory.sort_values(by=0, inplace=True) w_vector = pd.Series(np.array([1, 2, 3, 4, 5, 6, 7, 8]), ['s1', 's2', 's3', 's4', 's5', 's6', 's7', 's8']).astype(np.float64) exp = pd.DataFrame.from_dict({'s1': np.array([1]), 's2': np.array([2]), 's3': np.array([3]), 's4': np.array([4]), 's5': np.array([5]), 's6': np.array([6]), 's7': np.array([7]), 's8': np.array([8]) }, orient='index').astype(np.float64) obs = _weight_by_vector(trajectory, w_vector) assert_data_frame_almost_equal(obs.sort_index(), exp.sort_index()) trajectory = pd.DataFrame.from_dict({'s2': np.array([2]), 's3': np.array([3]), 's4': np.array([4]), 's5': np.array([5]), 's6': np.array([6])}, orient='index') trajectory.sort_values(by=0, inplace=True) w_vector = pd.Series(np.array([25, 30, 35, 40, 45]), ['s2', 's3', 's4', 's5', 's6']).astype(np.float64) exp = pd.DataFrame.from_dict({'s2': np.array([2]), 's3': np.array([3]), 's4': np.array([4]), 's5': np.array([5]), 's6': np.array([6])}, orient='index').astype(np.float64) obs = _weight_by_vector(trajectory, w_vector) assert_data_frame_almost_equal(obs.sort_index(), exp.sort_index()) trajectory = pd.DataFrame.from_dict({'s1': np.array([1, 2, 3]), 's2': np.array([2, 3, 4]), 's3': np.array([5, 6, 7]), 's4': np.array([8, 9, 10])}, orient='index') trajectory.sort_values(by=0, inplace=True) w_vector = pd.Series(np.array([1, 2, 3, 4]), ['s1', 's2', 's3', 's4']).astype(np.float64) exp = pd.DataFrame.from_dict({'s1': np.array([1, 2, 3]), 's2': np.array([2, 3, 4]), 's3': np.array([5, 6, 7]), 's4': np.array([8, 9, 10])}, orient='index').astype(np.float64) obs = _weight_by_vector(trajectory, w_vector) assert_data_frame_almost_equal(obs.sort_index(), exp.sort_index()) sample_ids = ['PC.356', 'PC.481', 'PC.355', 'PC.593', 'PC.354'] trajectory = pd.DataFrame.from_dict({'PC.356': np.array([5.65948525, 1.37977545, -4.9706303]), 'PC.481': np.array([0.79151484, -0.70387996, 1.89223152]), 'PC.355': np.array([6.05869624, 3.44821245, -0.42595788]), 'PC.593': np.array([5.18731945, -1.81714206, 4.26216485]), 'PC.354': np.array([7.07588529, -0.53917873, 0.89389158]) }, orient='index') w_vector = pd.Series(np.array([50, 52, 55, 57, 60]), sample_ids).astype(np.float64) exp = pd.DataFrame.from_dict({'PC.356': np.array([5.65948525, 1.37977545, -4.9706303]), 'PC.481': np.array([0.98939355, -0.87984995, 2.3652894]), 'PC.355': np.array([5.04891353, 2.87351038, -0.3549649]), 'PC.593': np.array([6.48414931, -2.27142757, 5.32770606]), 'PC.354': np.array([5.89657108, -0.44931561, 0.74490965]) }, orient='index') obs = _weight_by_vector(trajectory.loc[sample_ids], w_vector[sample_ids]) assert_data_frame_almost_equal(obs.sort_index(), exp.sort_index()) def test_weight_by_vector_single_element(self): trajectory = pd.DataFrame.from_dict({'s1': np.array([42])}, orient='index') w_vector = pd.Series(np.array([5]), ['s1']).astype(np.float64) obs = _weight_by_vector(trajectory, w_vector) assert_data_frame_almost_equal(obs, trajectory) def test_weight_by_vector_error(self): """Raises an error with erroneous inputs""" # Different vector lengths with self.assertRaises(ValueError): _weight_by_vector([1, 2, 3, 4], [1, 2, 3]) # Inputs are not iterables with self.assertRaises(TypeError): _weight_by_vector(9, 1) # Weighting vector is not a gradient with self.assertRaises(ValueError): _weight_by_vector([1, 2, 3, 4], [1, 2, 3, 3]) def test_ANOVA_trajectories(self): """Correctly performs the check before running ANOVA""" # Only one group in a given category group = GroupResults('Bar', np.array([2.3694943596755276, 3.3716388181385781, 5.4452089176253367, 4.5704258453173559, 4.4972603724478377]), 4.05080566264, {'avg': 4.0508056626409275}, None) obs = _ANOVA_trajectories('Foo', [group]) exp = CategoryResults('Foo', None, None, 'Only one value in the group.') self.assert_category_results_almost_equal(obs, exp) # One element have only one element group2 = GroupResults('FooBar', np.array([4.05080566264]), 4.05080566264, {'avg': 4.05080566264}, None) obs = _ANOVA_trajectories('Foo', [group, group2]) exp = CategoryResults('Foo', None, None, 'This group can not be used. All groups ' 'should have more than 1 element.') self.assert_category_results_almost_equal(obs, exp) gr1 = GroupResults('Foo', np.array([-0.219044992, 0.079674486, 0.09233683]), -0.015677892, {'avg': -0.015677892}, None) gr2 = GroupResults('Bar', np.array([-0.042258081, 0.000204041, 0.024837603]), -0.0732878716, {'avg': -0.0732878716}, None) gr3 = GroupResults('FBF', np.array([0.080504323, -0.212014503, -0.088353435]), -0.0057388123, {'avg': -0.0057388123}, None) obs = _ANOVA_trajectories('Cat', [gr1, gr2, gr3]) exp = CategoryResults('Cat', 0.8067456876, [gr1, gr2, gr3], None) self.assert_category_results_almost_equal(obs, exp) class GroupResultsTests(BaseTests): def test_to_file(self): out_paths = ['gr_wo_msg_out', 'gr_w_msg_out'] raw_paths = ['gr_wo_msg_raw', 'gr_w_msg_raw'] for gr, out_fp, raw_fp in zip(self.groups, out_paths, raw_paths): obs_out_f = io.StringIO() obs_raw_f = io.StringIO() gr.to_files(obs_out_f, obs_raw_f) obs_out = obs_out_f.getvalue() obs_raw = obs_raw_f.getvalue() obs_out_f.close() obs_raw_f.close() with open(get_data_path(out_fp)) as f: exp_out = f.read() with open(get_data_path(raw_fp)) as f: exp_raw = f.read() self.assertEqual(obs_out, exp_out) self.assertEqual(obs_raw, exp_raw) class CategoryResultsTests(BaseTests): def test_to_file(self): out_paths = ['cr_no_data_out', 'cr_data_out'] raw_paths = ['cr_no_data_raw', 'cr_data_raw'] for cat, out_fp, raw_fp in zip(self.categories, out_paths, raw_paths): obs_out_f = io.StringIO() obs_raw_f = io.StringIO() cat.to_files(obs_out_f, obs_raw_f) obs_out = obs_out_f.getvalue() obs_raw = obs_raw_f.getvalue() obs_out_f.close() obs_raw_f.close() with open(get_data_path(out_fp)) as f: exp_out = f.read() with open(get_data_path(raw_fp)) as f: exp_raw = f.read() self.assertEqual(obs_out, exp_out) self.assertEqual(obs_raw, exp_raw) class GradientANOVAResultsTests(BaseTests): def test_to_file(self): out_paths = ['vr_out'] raw_paths = ['vr_raw'] for vr, out_fp, raw_fp in zip(self.vec_results, out_paths, raw_paths): obs_out_f = io.StringIO() obs_raw_f = io.StringIO() vr.to_files(obs_out_f, obs_raw_f) obs_out = obs_out_f.getvalue() obs_raw = obs_raw_f.getvalue() obs_out_f.close() obs_raw_f.close() with open(get_data_path(out_fp)) as f: exp_out = f.read() with open(get_data_path(raw_fp)) as f: exp_raw = f.read() self.assertEqual(obs_out, exp_out) self.assertEqual(obs_raw, exp_raw) class GradientANOVATests(BaseTests): def test_init(self): """Correctly initializes the class attributes""" # Note self._groups is tested on test_make_groups # so we are not testing it here # Test with weighted = False bv = GradientANOVA(self.coords, self.prop_expl, self.metadata_map) assert_data_frame_almost_equal(bv._coords.loc[self.coords_3axes.index], self.coords_3axes) exp_prop_expl = np.array([25.6216900347, 15.7715955926, 14.1215046787]) npt.assert_equal(bv._prop_expl, exp_prop_expl) assert_data_frame_almost_equal(bv._metadata_map.loc[self.metadata_map.index], # noqa self.metadata_map) self.assertTrue(bv._weighting_vector is None) self.assertFalse(bv._weighted) # Test with weighted = True bv = GradientANOVA(self.coords, self.prop_expl, self.metadata_map, sort_category='Weight', weighted=True) assert_data_frame_almost_equal(bv._coords.loc[self.coords_3axes.index], self.coords_3axes) npt.assert_equal(bv._prop_expl, exp_prop_expl) assert_data_frame_almost_equal(bv._metadata_map.loc[self.metadata_map.index], # noqa self.metadata_map) exp_weighting_vector = pd.Series( np.array([60, 55, 50, 52, 57, 65, 68, 70, 72]), ['PC.354', 'PC.355', 'PC.356', 'PC.481', 'PC.593', 'PC.607', 'PC.634', 'PC.635', 'PC.636'], name='Weight' ).astype(np.float64) pdt.assert_series_equal(bv._weighting_vector.loc[exp_weighting_vector.index], # noqa exp_weighting_vector) self.assertTrue(bv._weighted) def test_init_error(self): """Raises an error with erroneous inputs""" # Raises ValueError if any category in trajectory_categories is not # present in metadata_map with self.assertRaises(ValueError): GradientANOVA(self.coords, self.prop_expl, self.metadata_map, trajectory_categories=['foo']) with self.assertRaises(ValueError): GradientANOVA(self.coords, self.prop_expl, self.metadata_map, trajectory_categories=['Weight', 'Treatment', 'foo']) # Raises ValueError if sort_category is not present in metadata_map with self.assertRaises(ValueError): GradientANOVA(self.coords, self.prop_expl, self.metadata_map, sort_category='foo') # Raises ValueError if weighted == True and sort_category == None with self.assertRaises(ValueError): GradientANOVA(self.coords, self.prop_expl, self.metadata_map, weighted=True) # Raises ValueError if weighted == True and the values under # sort_category are not numerical with self.assertRaises(ValueError): GradientANOVA(self.coords, self.prop_expl, self.metadata_map, sort_category='Treatment', weighted=True) # Raises ValueError if axes > len(prop_expl) with self.assertRaises(ValueError): GradientANOVA(self.coords, self.prop_expl, self.metadata_map, axes=10) # Raises ValueError if axes < 0 with self.assertRaises(ValueError): GradientANOVA(self.coords, self.prop_expl, self.metadata_map, axes=-1) def test_normalize_samples(self): """Correctly normalizes the samples between coords and metadata_map""" coord_data = { 'PC.636': np.array([-0.212230626531, 0.216034194368, 0.03532727349]), 'PC.635': np.array([-0.277487312135, -0.0295483215975, -0.0744173437992]), 'PC.355': np.array([0.236467470907, 0.21863434374, -0.0301637746424]), 'PC.607': np.array([-0.105517545144, -0.41405687433, -0.150073017617]), 'PC.634': np.array([-0.371636765565, 0.115484234741, 0.0721996475289]) } subset_coords = pd.DataFrame.from_dict(coord_data, orient='index') metadata_map = {'PC.355': {'Treatment': 'Control', 'DOB': '20061218', 'Weight': '55', 'Description': 'Control_mouse_I.D._355'}, 'PC.607': {'Treatment': 'Fast', 'DOB': '20071112', 'Weight': '65', 'Description': 'Fasting_mouse_I.D._607'}, 'PC.634': {'Treatment': 'Fast', 'DOB': '20080116', 'Weight': '68', 'Description': 'Fasting_mouse_I.D._634'}, 'PC.635': {'Treatment': 'Fast', 'DOB': '20080116', 'Weight': '70', 'Description': 'Fasting_mouse_I.D._635'}, 'PC.636': {'Treatment': 'Fast', 'DOB': '20080116', 'Weight': '72', 'Description': 'Fasting_mouse_I.D._636'}} subset_metadata_map = pd.DataFrame.from_dict(metadata_map, orient='index') # Takes a subset from metadata_map bv = GradientANOVA(subset_coords, self.prop_expl, self.metadata_map) assert_data_frame_almost_equal( bv._coords.sort_index(), subset_coords.sort_index()) assert_data_frame_almost_equal( bv._metadata_map.sort_index(), subset_metadata_map.sort_index()) # Takes a subset from coords bv = GradientANOVA(self.coords, self.prop_expl, subset_metadata_map) assert_data_frame_almost_equal( bv._coords.sort_index(), subset_coords.sort_index()) assert_data_frame_almost_equal( bv._metadata_map.sort_index(), subset_metadata_map.sort_index()) # Takes a subset from metadata_map and coords at the same time coord_data = { 'PC.636': np.array([-0.212230626531, 0.216034194368, 0.03532727349]), 'PC.635': np.array([-0.277487312135, -0.0295483215975, -0.0744173437992]), 'PC.355': np.array([0.236467470907, 0.21863434374, -0.0301637746424]) } subset_coords = pd.DataFrame.from_dict(coord_data, orient='index') metadata_map = {'PC.355': {'Treatment': 'Control', 'DOB': '20061218', 'Weight': '55', 'Description': 'Control_mouse_I.D._355'}, 'PC.607': {'Treatment': 'Fast', 'DOB': '20071112', 'Weight': '65', 'Description': 'Fasting_mouse_I.D._607'}, 'PC.634': {'Treatment': 'Fast', 'DOB': '20080116', 'Weight': '68', 'Description': 'Fasting_mouse_I.D._634'}} subset_metadata_map = pd.DataFrame.from_dict(metadata_map, orient='index') bv = GradientANOVA(subset_coords, self.prop_expl, subset_metadata_map) exp_coords = pd.DataFrame.from_dict( {'PC.355': np.array([0.236467470907, 0.21863434374, -0.0301637746424])}, orient='index') assert_data_frame_almost_equal( bv._coords.sort_index(), exp_coords.sort_index()) exp_metadata_map = pd.DataFrame.from_dict( {'PC.355': {'Treatment': 'Control', 'DOB': '20061218', 'Weight': '55', 'Description': 'Control_mouse_I.D._355'}}, orient='index') assert_data_frame_almost_equal( bv._metadata_map.sort_index(), exp_metadata_map.sort_index()) def test_normalize_samples_error(self): """Raises an error if coords and metadata_map does not have samples in common""" error_metadata_map = pd.DataFrame.from_dict( {'Foo': {'Treatment': 'Control', 'DOB': '20061218', 'Weight': '55', 'Description': 'Control_mouse_I.D._355'}, 'Bar': {'Treatment': 'Fast', 'DOB': '20071112', 'Weight': '65', 'Description': 'Fasting_mouse_I.D._607'}}, orient='index') with self.assertRaises(ValueError): GradientANOVA(self.coords, self.prop_expl, error_metadata_map) def test_make_groups(self): """Correctly generates the groups for trajectory_categories""" # Test with all categories bv = GradientANOVA(self.coords, self.prop_expl, self.metadata_map) exp_groups = {'Treatment': {'Control': ['PC.354', 'PC.355', 'PC.356', 'PC.481', 'PC.593'], 'Fast': ['PC.607', 'PC.634', 'PC.635', 'PC.636']}, 'DOB': {'20061218': ['PC.354', 'PC.355'], '20061126': ['PC.356'], '20070314': ['PC.481'], '20071210': ['PC.593'], '20071112': ['PC.607'], '20080116': ['PC.634', 'PC.635', 'PC.636']}, 'Weight': {'60': ['PC.354'], '55': ['PC.355'], '50': ['PC.356'], '52': ['PC.481'], '57': ['PC.593'], '65': ['PC.607'], '68': ['PC.634'], '70': ['PC.635'], '72': ['PC.636']}, 'Description': {'Control_mouse_I.D._354': ['PC.354'], 'Control_mouse_I.D._355': ['PC.355'], 'Control_mouse_I.D._356': ['PC.356'], 'Control_mouse_I.D._481': ['PC.481'], 'Control_mouse_I.D._593': ['PC.593'], 'Fasting_mouse_I.D._607': ['PC.607'], 'Fasting_mouse_I.D._634': ['PC.634'], 'Fasting_mouse_I.D._635': ['PC.635'], 'Fasting_mouse_I.D._636': ['PC.636']}} self.assertEqual(bv._groups, exp_groups) # Test with user-defined categories bv = GradientANOVA(self.coords, self.prop_expl, self.metadata_map, trajectory_categories=['Treatment', 'DOB']) exp_groups = {'Treatment': {'Control': ['PC.354', 'PC.355', 'PC.356', 'PC.481', 'PC.593'], 'Fast': ['PC.607', 'PC.634', 'PC.635', 'PC.636']}, 'DOB': {'20061218': ['PC.354', 'PC.355'], '20061126': ['PC.356'], '20070314': ['PC.481'], '20071210': ['PC.593'], '20071112': ['PC.607'], '20080116': ['PC.634', 'PC.635', 'PC.636']}} self.assertEqual(bv._groups, exp_groups) def test_make_groups_natural_sorting(self): # Ensure sample IDs are sorted using a natural sorting algorithm. df = pd.DataFrame.from_dict({ 'a2': {'Col1': 'foo', 'Col2': '1.0'}, 'a1': {'Col1': 'bar', 'Col2': '-42.0'}, 'a11.0': {'Col1': 'foo', 'Col2': '2e-5'}, 'a-10': {'Col1': 'foo', 'Col2': '5'}, 'a10': {'Col1': 'bar', 'Col2': '5'}}, orient='index') coords = pd.DataFrame.from_dict({ 'a10': np.array([-0.212230626531, 0.216034194368, 0.03532727349]), 'a11.0': np.array([-0.277487312135, -0.0295483215975, -0.0744173437992]), 'a1': np.array([0.220886492631, 0.0874848360559, -0.351990132198]), 'a2': np.array([0.0308923744062, -0.0446295973489, 0.133996451689]), 'a-10': np.array([0.27616778138, -0.0341866951102, 0.0633000238256])}, orient='index') prop_expl = np.array([25.6216900347, 15.7715955926, 14.1215046787, 11.6913885817, 9.83044890697]) # Sort by sample IDs. ga = GradientANOVA(coords, prop_expl, df) exp_groups = { 'Col1': { 'foo': ['a-10', 'a2', 'a11.0'], 'bar': ['a1', 'a10'] }, 'Col2': { '1.0': ['a2'], '-42.0': ['a1'], '2e-5': ['a11.0'], '5': ['a-10', 'a10'] } } self.assertEqual(ga._groups, exp_groups) # Sort sample IDs by Col2. ga = GradientANOVA(coords, prop_expl, df, trajectory_categories=['Col1'], sort_category='Col2') exp_groups = { 'Col1': { 'foo': ['a11.0', 'a2', 'a-10'], 'bar': ['a1', 'a10'] } } self.assertEqual(ga._groups, exp_groups) def test_get_trajectories(self): """Should raise a NotImplementedError as this is a base class""" bv = GradientANOVA(self.coords, self.prop_expl, self.metadata_map) with self.assertRaises(NotImplementedError): bv.get_trajectories() def test_get_group_trajectories(self): """Should raise a NotImplementedError in usual execution as this is a base class""" bv = GradientANOVA(self.coords, self.prop_expl, self.metadata_map) with self.assertRaises(NotImplementedError): bv.get_trajectories() def test_get_group_trajectories_error(self): """Should raise a RuntimeError if the user call _get_group_trajectories with erroneous inputs""" bv = GradientANOVA(self.coords, self.prop_expl, self.metadata_map) with self.assertRaises(KeyError): bv._get_group_trajectories("foo", ['foo']) with self.assertRaises(RuntimeError): bv._get_group_trajectories("bar", []) def test_compute_trajectories_results(self): """Should raise a NotImplementedError as this is a base class""" bv = GradientANOVA(self.coords, self.prop_expl, self.metadata_map) with self.assertRaises(NotImplementedError): bv._compute_trajectories_results("foo", []) class AverageGradientANOVATests(BaseTests): def test_get_trajectories_all(self): """get_trajectories returns the results of all categories""" av = AverageGradientANOVA(self.coords, self.prop_expl, self.metadata_map) obs = av.get_trajectories() exp_description = CategoryResults('Description', None, None, 'This group can not be used. All ' 'groups should have more than 1 ' 'element.') exp_weight = CategoryResults('Weight', None, None, 'This group can not be used. All groups ' 'should have more than 1 element.') exp_dob = CategoryResults('DOB', None, None, 'This group can not be used. All groups ' 'should have more than 1 element.') exp_control_group = GroupResults('Control', np.array([2.3694943596755276, 3.3716388181385781, 5.4452089176253367, 4.5704258453173559, 4.4972603724478377]), 4.05080566264, {'avg': 4.0508056626409275}, None) exp_fast_group = GroupResults('Fast', np.array([7.2220488239279126, 4.2726021564374372, 1.1169097274372082, 4.02717600030876]), 4.15968417703, {'avg': 4.1596841770278292}, None) exp_treatment = CategoryResults('Treatment', 0.93311555, [exp_control_group, exp_fast_group], None) exp = GradientANOVAResults('avg', False, [exp_description, exp_weight, exp_dob, exp_treatment]) self.assert_gradientANOVA_results_almost_equal(obs, exp) def test_get_trajectories_single(self): """get_trajectories returns the results of the provided category""" av = AverageGradientANOVA(self.coords, self.prop_expl, self.metadata_map, trajectory_categories=['Treatment']) obs = av.get_trajectories() exp_control_group = GroupResults('Control', np.array([2.3694943596755276, 3.3716388181385781, 5.4452089176253367, 4.5704258453173559, 4.4972603724478377]), 4.05080566264, {'avg': 4.0508056626409275}, None) exp_fast_group = GroupResults('Fast', np.array([7.2220488239279126, 4.2726021564374372, 1.1169097274372082, 4.02717600030876]), 4.15968417703, {'avg': 4.1596841770278292}, None) exp_treatment = CategoryResults('Treatment', 0.93311555, [exp_control_group, exp_fast_group], None) exp = GradientANOVAResults('avg', False, [exp_treatment]) self.assert_gradientANOVA_results_almost_equal(obs, exp) def test_get_trajectories_weighted(self): """get_trajectories returns the correct weighted results""" av = AverageGradientANOVA(self.coords, self.prop_expl, self.metadata_map, trajectory_categories=['Treatment'], sort_category='Weight', weighted=True) obs = av.get_trajectories() exp_control_group = GroupResults('Control', np.array([5.7926887872, 4.3242308936, 2.9212403501, 5.5400792151, 1.2326804315]), 3.9621839355, {'avg': 3.9621839355}, None) exp_fast_group = GroupResults('Fast', np.array([7.2187223286, 2.5522161282, 2.2349795861, 4.5278215248]), 4.1334348919, {'avg': 4.1334348919}, None) exp_treatment = CategoryResults('Treatment', 0.9057666800, [exp_control_group, exp_fast_group], None) exp = GradientANOVAResults('avg', True, [exp_treatment]) self.assert_gradientANOVA_results_almost_equal(obs, exp) class TrajectoryGradientANOVATests(BaseTests): def test_get_trajectories(self): tv = TrajectoryGradientANOVA(self.coords, self.prop_expl, self.metadata_map, trajectory_categories=['Treatment'], sort_category='Weight') obs = tv.get_trajectories() exp_control_group = GroupResults('Control', np.array([8.6681963576, 7.0962717982, 7.1036434615, 4.0675712674]), 6.73392072123, {'2-norm': 13.874494152}, None) exp_fast_group = GroupResults('Fast', np.array([11.2291654905, 3.9163741156, 4.4943507388]), 6.5466301150, {'2-norm': 12.713431181}, None) exp_treatment = CategoryResults('Treatment', 0.9374500147, [exp_control_group, exp_fast_group], None) exp = GradientANOVAResults('trajectory', False, [exp_treatment]) self.assert_gradientANOVA_results_almost_equal(obs, exp) def test_get_trajectories_weighted(self): tv = TrajectoryGradientANOVA(self.coords, self.prop_expl, self.metadata_map, trajectory_categories=['Treatment'], sort_category='Weight', weighted=True) obs = tv.get_trajectories() exp_control_group = GroupResults('Control', np.array([8.9850643421, 6.1617529749, 7.7989125908, 4.9666249268]), 6.9780887086, {'2-norm': 14.2894710091}, None) exp_fast_group = GroupResults('Fast', np.array([9.6823682852, 2.9511115209, 5.2434091953]), 5.9589630005, {'2-norm': 11.3995901159}, None) exp_treatment = CategoryResults('Treatment', 0.6248157720, [exp_control_group, exp_fast_group], None) exp = GradientANOVAResults('trajectory', True, [exp_treatment]) self.assert_gradientANOVA_results_almost_equal(obs, exp) class FirstDifferenceGradientANOVATests(BaseTests): def test_get_trajectories(self): dv = FirstDifferenceGradientANOVA(self.coords, self.prop_expl, self.metadata_map, trajectory_categories=['Treatment'], sort_category='Weight') obs = dv.get_trajectories() exp_control_group = GroupResults('Control', np.array([-1.5719245594, 0.0073716633, -3.0360721941]), -1.5335416967, {'mean': -1.5335416967, 'std': 1.2427771485}, None) exp_fast_group = GroupResults('Fast', np.array([-7.3127913749, 0.5779766231]), -3.3674073758, {'mean': -3.3674073758, 'std': 3.9453839990}, None) exp_treatment = CategoryResults('Treatment', 0.6015260608, [exp_control_group, exp_fast_group], None) exp = GradientANOVAResults('diff', False, [exp_treatment]) self.assert_gradientANOVA_results_almost_equal(obs, exp) def test_get_trajectories_weighted(self): dv = FirstDifferenceGradientANOVA(self.coords, self.prop_expl, self.metadata_map, trajectory_categories=['Treatment'], sort_category='Weight', weighted=True) obs = dv.get_trajectories() exp_control_group = GroupResults('Control', np.array([-2.8233113671, 1.6371596158, -2.8322876639]), -1.3394798050, {'mean': -1.3394798050, 'std': 2.1048051097}, None) exp_fast_group = GroupResults('Fast', np.array([-6.7312567642, 2.2922976743]), -2.2194795449, {'mean': -2.2194795449, 'std': 4.5117772193}, None) exp_treatment = CategoryResults('Treatment', 0.8348644420, [exp_control_group, exp_fast_group], None) exp = GradientANOVAResults('diff', True, [exp_treatment]) self.assert_gradientANOVA_results_almost_equal(obs, exp) class WindowDifferenceGradientANOVATests(BaseTests): def test_get_trajectories(self): wdv = WindowDifferenceGradientANOVA( self.coords, self.prop_expl, self.metadata_map, 3, trajectory_categories=['Treatment'], sort_category='Weight') obs = wdv.get_trajectories() exp_control_group = GroupResults('Control', np.array([-2.5790341819, -2.0166764661, -3.0360721941, 0.]), -1.9079457105, {'mean': -1.9079457105, 'std': 1.1592139913}, None) exp_fast_group = GroupResults('Fast', np.array([11.2291654905, 3.9163741156, 4.4943507388]), 6.5466301150, {'mean': 6.5466301150, 'std': 3.3194494926}, "Cannot calculate the first difference " "with a window of size (3).") exp_treatment = CategoryResults('Treatment', 0.0103976830, [exp_control_group, exp_fast_group], None) exp = GradientANOVAResults('wdiff', False, [exp_treatment]) self.assert_gradientANOVA_results_almost_equal(obs, exp) def test_get_trajectories_weighted(self): wdv = WindowDifferenceGradientANOVA( self.coords, self.prop_expl, self.metadata_map, 3, trajectory_categories=['Treatment'], sort_category='Weight', weighted=True) obs = wdv.get_trajectories() exp_control_group = GroupResults('Control', np.array([-2.6759675112, -0.2510321601, -2.8322876639, 0.]), -1.4398218338, {'mean': -1.4398218338, 'std': 1.31845790844}, None) exp_fast_group = GroupResults('Fast', np.array([9.6823682852, 2.9511115209, 5.2434091953]), 5.9589630005, {'mean': 5.9589630005, 'std': 2.7942163293}, "Cannot calculate the first difference " "with a window of size (3).") exp_treatment = CategoryResults('Treatment', 0.0110675605, [exp_control_group, exp_fast_group], None) exp = GradientANOVAResults('wdiff', True, [exp_treatment]) self.assert_gradientANOVA_results_almost_equal(obs, exp) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/stats/tests/test_misc.py000066400000000000000000000033701464262511300215360ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main from skbio.stats._misc import _pprint_strs class PPrintStrsTests(TestCase): def test_truncation(self): # truncation between items (on comma) exp = "'a', ..." obs = _pprint_strs(['a', 'b', 'c'], max_chars=4) self.assertEqual(obs, exp) # truncation between items (on space) exp = "'a', ..." obs = _pprint_strs(['a', 'b', 'c'], max_chars=5) self.assertEqual(obs, exp) # truncation on item exp = "'a', ..." obs = _pprint_strs(['a', 'b', 'c'], max_chars=6) self.assertEqual(obs, exp) # truncation (no items) exp = "..." obs = _pprint_strs(['a', 'b', 'c'], max_chars=2) self.assertEqual(obs, exp) def test_no_truncation(self): exp = "'a'" obs = _pprint_strs(['a'], max_chars=3) self.assertEqual(obs, exp) exp = "'a', 'b', 'c'" obs = _pprint_strs(['a', 'b', 'c']) self.assertEqual(obs, exp) exp = "'a', 'b', 'c'" obs = _pprint_strs(['a', 'b', 'c'], max_chars=13) self.assertEqual(obs, exp) def test_non_default_delimiter_and_suffix(self): exp = "'abc','defg',...." obs = _pprint_strs(['abc', 'defg', 'hi', 'jklmno'], max_chars=14, delimiter=',', suffix='....') self.assertEqual(obs, exp) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/stats/tests/test_power.py000066400000000000000000000525071464262511300217450ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main import numpy as np import numpy.testing as npt import pandas as pd from scipy.stats import kruskal from skbio.stats.power import (subsample_power, subsample_paired_power, _check_nans, confidence_bound, _calculate_power, _compare_distributions, _calculate_power_curve, _check_subsample_power_inputs, _identify_sample_groups, _draw_paired_samples, _get_min_size, paired_subsamples ) class PowerAnalysisTest(TestCase): def setUp(self): # Defines a testing functions def test_meta(ids, meta, cat, div): """Checks thhe div metric with a kruskal wallis""" out = [meta.loc[id_, div] for id_ in ids] return kruskal(*out)[1] def meta_f(x): """Applies `test_meta` to a result""" return test_meta(x, self.meta, 'INT', 'DIV') def f(x): """returns the p value of a kruskal wallis test""" return kruskal(*x)[1] self.test_meta = test_meta self.f = f self.meta_f = meta_f self.num_p = 1 # Sets the random seed np.random.seed(5) # Sets up the distributions of data for use self.s1 = np.arange(0, 10, 1) # Sets up two distributions which will never be equal by a rank-sum # test. self.samps = [np.ones((10))/10., np.ones((10))] self.pop = [np.arange(0, 10, 0.1), np.arange(0, 20, 0.2)] # Sets up a vector of alpha values self.alpha = np.power(10, np.array([-1, -1.301, -2, -3])).round(3) # Sets up a vector of samples self.num_samps = np.arange(10, 100, 10) # Sets up a mapping file meta = {'GW': {'INT': 'N', 'ABX': np.nan, 'DIV': 19.5, 'AGE': '30s', 'SEX': 'M'}, 'CB': {'INT': 'Y', 'ABX': np.nan, 'DIV': 42.7, 'AGE': '30s', 'SEX': 'M'}, 'WM': {'INT': 'N', 'ABX': 'N', 'DIV': 27.5, 'AGE': '20s', 'SEX': 'F'}, 'MH': {'INT': 'Y', 'ABX': 'N', 'DIV': 62.3, 'AGE': '30s', 'SEX': 'F'}, 'CD': {'INT': 'Y', 'ABX': 'Y', 'DIV': 36.4, 'AGE': '40s', 'SEX': 'F'}, 'LF': {'INT': 'Y', 'ABX': 'N', 'DIV': 50.2, 'AGE': '20s', 'SEX': 'M'}, 'PP': {'INT': 'N', 'ABX': 'Y', 'DIV': 10.8, 'AGE': '30s', 'SEX': 'F'}, 'MM': {'INT': 'N', 'ABX': 'N', 'DIV': 55.6, 'AGE': '40s', 'SEX': 'F'}, 'SR': {'INT': 'N', 'ABX': 'Y', 'DIV': 2.2, 'AGE': '20s', 'SEX': 'M'}, 'TS': {'INT': 'N', 'ABX': 'Y', 'DIV': 16.1, 'AGE': '40s', 'SEX': 'M'}, 'PC': {'INT': 'Y', 'ABX': 'N', 'DIV': 82.6, 'AGE': '40s', 'SEX': 'M'}, 'NR': {'INT': 'Y', 'ABX': 'Y', 'DIV': 15.7, 'AGE': '20s', 'SEX': 'F'}} self.meta = pd.DataFrame.from_dict(meta, orient='index') self.meta_pairs = {0: [['GW', 'SR', 'TS'], ['CB', 'LF', 'PC']], 1: [['MM', 'PP', 'WM'], ['CD', 'MH', 'NR']]} self.pair_index = np.array([0, 0, 0, 1, 1, 1]) self.counts = np.array([5, 15, 25, 35, 45]) self.powers = [np.array([[0.105, 0.137, 0.174, 0.208, 0.280], [0.115, 0.135, 0.196, 0.204, 0.281], [0.096, 0.170, 0.165, 0.232, 0.256], [0.122, 0.157, 0.202, 0.250, 0.279], [0.132, 0.135, 0.173, 0.203, 0.279]]), np.array([[0.157, 0.345, 0.522, 0.639, 0.739], [0.159, 0.374, 0.519, 0.646, 0.757], [0.161, 0.339, 0.532, 0.634, 0.745], [0.169, 0.372, 0.541, 0.646, 0.762], [0.163, 0.371, 0.522, 0.648, 0.746]]), np.array([[0.276, 0.626, 0.865, 0.927, 0.992], [0.267, 0.667, 0.848, 0.937, 0.978], [0.236, 0.642, 0.850, 0.935, 0.977], [0.249, 0.633, 0.828, 0.955, 0.986], [0.249, 0.663, 0.869, 0.951, 0.985]])] self.power_alpha = 0.1 self.effects = np.array([0.15245, 0.34877, 0.55830]) self.bounds = np.array([0.01049, 0.00299, 0.007492]) self.labels = np.array(['Age', 'Intervenption', 'Antibiotics']) self.cats = np.array(['AGE', 'INT', 'ABX']) self.cat = "AGE" self.control_cats = ['INT', 'ABX'] def test_subsample_power_defaults(self): test_p, test_c = subsample_power(self.f, self.pop, num_iter=10, num_runs=5) self.assertEqual(test_p.shape, (5, 4)) npt.assert_array_equal(np.array([10, 20, 30, 40]), test_c) def test_subsample_power_counts(self): test_p, test_c = subsample_power(self.f, samples=self.pop, num_iter=10, num_runs=2, min_counts=5) self.assertEqual(test_p.shape, (2, 5)) npt.assert_array_equal(np.arange(5, 50, 10), test_c) def test_subsample_power_matches(self): test_p, test_c = subsample_power(self.f, samples=self.pop, num_iter=10, num_runs=5, draw_mode="matched") self.assertEqual(test_p.shape, (5, 4)) npt.assert_array_equal(np.array([10, 20, 30, 40]), test_c) def test_subsample_power_multi_p(self): test_p, test_c = subsample_power(lambda x: np.array([0.5, 0.5]), samples=self.pop, num_iter=10, num_runs=5) self.assertEqual(test_p.shape, (5, 4, 2)) npt.assert_array_equal(np.array([10, 20, 30, 40]), test_c) def test_subsample_paired_power(self): known_c = np.array([1, 2, 3, 4]) # Sets up the handling values cat = 'INT' control_cats = ['SEX'] # Tests for the control cats test_p, test_c = subsample_paired_power(self.meta_f, meta=self.meta, cat=cat, control_cats=control_cats, counts_interval=1, num_iter=10, num_runs=2) # Test the output shapes are sane self.assertEqual(test_p.shape, (2, 4)) npt.assert_array_equal(known_c, test_c) def test_subsample_paired_power_multi_p(self): def f(x): return np.array([0.5, 0.5, 0.005]) cat = 'INT' control_cats = ['SEX'] # Tests for the control cats test_p, test_c = subsample_paired_power(f, meta=self.meta, cat=cat, control_cats=control_cats, counts_interval=1, num_iter=10, num_runs=2) self.assertEqual(test_p.shape, (2, 4, 3)) def test_check_nans_str(self): self.assertTrue(_check_nans('string')) def test_check_nans_num(self): self.assertTrue(_check_nans(4.2)) def test__check_nans_nan(self): self.assertFalse(_check_nans(np.nan)) def test__check_nans_clean_list(self): self.assertTrue(_check_nans(['foo', 'bar'], switch=True)) def test__check_nans_list_nan(self): self.assertFalse(_check_nans(['foo', np.nan], switch=True)) def test__check_str_error(self): with self.assertRaises(TypeError): _check_nans(self.f) def test__get_min_size_strict(self): known = 5 test = _get_min_size(self.meta, 'INT', ['ABX', 'SEX'], ['Y', 'N'], True) self.assertEqual(test, known) def test__get_min_size_relaxed(self): known = 5 test = _get_min_size(self.meta, 'INT', ['ABX', 'SEX'], ['Y', 'N'], False) self.assertEqual(known, test) def test_confidence_bound_default(self): # Sets the know confidence bound known = 2.2830070 test = confidence_bound(self.s1) npt.assert_almost_equal(test, known, 3) def test_confidence_bound_df(self): known = 2.15109 test = confidence_bound(self.s1, df=15) npt.assert_almost_equal(known, test, 3) def test_confidence_bound_alpha(self): known = 3.2797886 test = confidence_bound(self.s1, alpha=0.01) npt.assert_almost_equal(known, test, 3) def test_confidence_bound_nan(self): # Sets the value to test samples = np.array([[4, 3.2, 3.05], [2, 2.8, 2.95], [5, 2.9, 3.07], [1, 3.1, 2.93], [3, np.nan, 3.00]]) # Sets the know value known = np.array([2.2284, 0.2573, 0.08573]) # Tests the function test = confidence_bound(samples, axis=0) npt.assert_almost_equal(known, test, 3) def test_confidence_bound_axis_none(self): # Sets the value to test samples = np.array([[4, 3.2, 3.05], [2, 2.8, 2.95], [5, 2.9, 3.07], [1, 3.1, 2.93], [3, np.nan, 3.00]]) # Sest the known value known = 0.52852 # Tests the output test = confidence_bound(samples, axis=None) npt.assert_almost_equal(known, test, 3) def test__calculate_power(self): # Sets up the values to test crit = 0.025 # Sets the known value known = 0.5 # Calculates the test value test = _calculate_power(self.alpha, crit) # Checks the test value npt.assert_almost_equal(known, test) def test__calculate_power_n(self): crit = 0.025 known = np.array([0.5, 0.5]) alpha = np.vstack((self.alpha, self.alpha)) test = _calculate_power(alpha, crit) npt.assert_almost_equal(known, test) def test__compare_distributions_sample_counts_error(self): with self.assertRaises(ValueError): _compare_distributions(self.f, [self.pop[0][:5], self.pop[1]], 1, counts=25) def test__compare_distributions_all_mode(self): known = np.ones((100))*0.0026998 test = _compare_distributions(self.f, self.samps, 1, num_iter=100) npt.assert_allclose(known, test, 5) def test__compare_distributions_matched_mode(self): # Sets the known value known_mean = 0.162195 known_std = 0.121887 known_shape = (100,) # Tests the sample value test = _compare_distributions(self.f, self.pop, self.num_p, mode='matched', num_iter=100) npt.assert_allclose(known_mean, test.mean(), rtol=0.1, atol=0.02) npt.assert_allclose(known_std, test.std(), rtol=0.1, atol=0.02) self.assertEqual(known_shape, test.shape) def test__compare_distributions_draw_mode(self): draw_mode = 'Ultron' with self.assertRaises(ValueError): _check_subsample_power_inputs(self.f, self.pop, draw_mode, self.num_p) def test__compare_distributions_multiple_returns(self): known = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]]) def f(x): return np.array([1, 2, 3]) test = _compare_distributions(f, self.pop, 3, mode='matched', num_iter=3) npt.assert_array_equal(known, test) def test_check_subsample_power_inputs_matched_mode(self): with self.assertRaises(ValueError): _check_subsample_power_inputs(self.f, samples=[np.ones((2)), np.ones((5))], draw_mode="matched") def test_check_subsample_power_inputs_counts(self): with self.assertRaises(ValueError): _check_subsample_power_inputs(self.f, samples=[np.ones((3)), np.ones((5))], min_counts=5, counts_interval=1000, max_counts=7) def test_check_subsample_power_inputs_ratio(self): with self.assertRaises(ValueError): _check_subsample_power_inputs(self.f, self.samps, ratio=np.array([1, 2, 3])) def test_check_subsample_power_inputs_test(self): # Defines a test function def test(x): return 'Hello World!' with self.assertRaises(TypeError): _check_subsample_power_inputs(test, self.samps) def test_check_sample_power_inputs(self): # Defines the know returns known_num_p = 1 known_ratio = np.ones((2)) known_counts = np.arange(2, 10, 2) # Runs the code for the returns test_ratio, test_num_p, test_counts = \ _check_subsample_power_inputs(self.f, self.samps, counts_interval=2, max_counts=10) # Checks the returns are sane self.assertEqual(known_num_p, test_num_p) npt.assert_array_equal(known_ratio, test_ratio) npt.assert_array_equal(known_counts, test_counts) def test__calculate_power_curve_ratio_error(self): with self.assertRaises(ValueError): _calculate_power_curve(self.f, self.pop, self.num_samps, ratio=np.array([0.1, 0.2, 0.3]), num_iter=100) def test__calculate_power_curve_default(self): # Sets the known output known = np.array([0.509, 0.822, 0.962, 0.997, 1.000, 1.000, 1.000, 1.000, 1.000]) # Generates the test values test = _calculate_power_curve(self.f, self.pop, self.num_samps, num_iter=100) # Checks the samples returned sanely npt.assert_allclose(test, known, rtol=0.1, atol=0.01) def test__calculate_power_curve_alpha(self): # Sets the know output known = np.array([0.31, 0.568, 0.842, 0.954, 0.995, 1.000, 1.000, 1.000, 1.000]) # Generates the test values test = _calculate_power_curve(self.f, self.pop, self.num_samps, alpha=0.01, num_iter=100) # Checks the samples returned sanely npt.assert_allclose(test, known, rtol=0.1, atol=0.1) def test__calculate_power_curve_ratio(self): # Sets the know output known = np.array([0.096, 0.333, 0.493, 0.743, 0.824, 0.937, 0.969, 0.996, 0.998]) # Generates the test values test = _calculate_power_curve(self.f, self.pop, self.num_samps, ratio=np.array([0.25, 0.75]), num_iter=100) # Checks the samples returned sanely npt.assert_allclose(test, known, rtol=0.1, atol=0.1) def test_paired_subsamples_default(self): # Sets the known np.array set known_array = [{'MM', 'SR', 'TS', 'GW', 'PP', 'WM'}, {'CD', 'LF', 'PC', 'CB', 'MH', 'NR'}] # Gets the test value cat = 'INT' control_cats = ['SEX', 'AGE'] test_array = paired_subsamples(self.meta, cat, control_cats) self.assertEqual(known_array[0], set(test_array[0])) self.assertEqual(known_array[1], set(test_array[1])) def test_paired_subsamples_break(self): # Sets known np.array set known_array = [np.array([]), np.array([])] # Gets the test value cat = 'ABX' control_cats = ['SEX', 'AGE', 'INT'] test_array = paired_subsamples(self.meta, cat, control_cats) npt.assert_array_equal(known_array, test_array) def test_paired_subsample_undefined(self): known_array = np.zeros((2, 0)) cat = 'INT' order = ['Y', 'N'] control_cats = ['AGE', 'ABX', 'SEX'] test_array = paired_subsamples(self.meta, cat, control_cats, order=order) npt.assert_array_equal(test_array, known_array) def test_paired_subsample_fewer(self): # Set known value known_array = {'PP', 'MH', 'CD', 'PC', 'TS', 'MM'} # Sets up test values cat = 'AGE' order = ['30s', '40s'] control_cats = ['ABX'] test_array = paired_subsamples(self.meta, cat, control_cats, order=order) for v in test_array[0]: self.assertTrue(v in known_array) for v in test_array[1]: self.assertTrue(v in known_array) def test_paired_subsamples_not_strict(self): known_array = [{'WM', 'MM', 'GW', 'SR', 'TS'}, {'LF', 'PC', 'CB', 'NR', 'CD'}] # Gets the test values cat = 'INT' control_cats = ['ABX', 'AGE'] test_array = paired_subsamples(self.meta, cat, control_cats, strict_match=False) self.assertEqual(set(test_array[0]), known_array[0]) self.assertEqual(set(test_array[1]), known_array[1]) def test__identify_sample_groups(self): # Defines the know values known_pairs = {0: [['MM'], ['CD']], 1: [['SR'], ['LF']], 2: [['TS'], ['PC']], 3: [['GW'], ['CB']], 4: [['PP'], ['MH']], 5: [['WM'], ['NR']]} known_index = np.array([0, 1, 2, 3, 4, 5]) test_pairs, test_index = _identify_sample_groups(self.meta, 'INT', ['SEX', 'AGE'], order=['N', 'Y'], strict_match=True) self.assertEqual(known_pairs.keys(), test_pairs.keys()) self.assertEqual(sorted(known_pairs.values()), sorted(test_pairs.values())) npt.assert_array_equal(known_index, test_index) def test__identify_sample_groups_not_strict(self): # Defines the know values known_pairs = {1: [np.array(['PP'], dtype=object), np.array(['CD', 'NR'], dtype=object)], 0: [np.array(['MM', 'WM'], dtype=object), np.array(['MH'], dtype=object)], 2: [np.array(['GW'], dtype=object), np.array(['CB'], dtype=object)]} known_index = np.array([0, 1, 2]) test_pairs, test_index = _identify_sample_groups(self.meta, 'INT', ['SEX', 'ABX'], order=['N', 'Y'], strict_match=False) self.assertEqual(known_pairs.keys(), test_pairs.keys()) for k in known_pairs: for i in range(2): npt.assert_array_equal(known_pairs[k][i], test_pairs[k][i]) npt.assert_array_equal(known_index, test_index) def test__draw_paired_samples(self): num_samps = 3 known_sets = [{'GW', 'SR', 'TS', 'MM', 'PP', 'WM'}, {'CB', 'LF', 'PC', 'CD', 'MH', 'NR'}] test_samps = _draw_paired_samples(self.meta_pairs, self.pair_index, num_samps) for i, t in enumerate(test_samps): self.assertTrue(set(t).issubset(known_sets[i])) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/stats/tests/test_subsample.py000066400000000000000000000213401464262511300225730ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest import warnings import numpy as np import numpy.testing as npt from skbio.stats import subsample_counts, isubsample def setup(): """Ignore warnings during tests.""" warnings.simplefilter("ignore") def teardown(): """Clear the list of warning filters, so that no filters are active.""" warnings.resetwarnings() class SubsampleCountsTests(unittest.TestCase): def test_subsample_counts_nonrandom(self): a = np.array([0, 5, 0]) # Subsample same number of items that are in input (without # replacement). npt.assert_equal(subsample_counts(a, 5), a) # Can only choose from one bin. exp = np.array([0, 2, 0]) npt.assert_equal(subsample_counts(a, 2), exp) npt.assert_equal( subsample_counts(a, 2, replace=True), exp) # Subsample zero items. a = [3, 0, 1] exp = np.array([0, 0, 0]) npt.assert_equal(subsample_counts(a, 0), exp) npt.assert_equal(subsample_counts(a, 0, replace=True), exp) def test_subsample_counts_without_replacement(self): # Selecting 2 counts from the vector 1000 times yields each of the two # possible results at least once each. a = np.array([2, 0, 1]) actual = set() for i in range(1000): obs = subsample_counts(a, 2) actual.add(tuple(obs)) self.assertEqual(actual, {(1, 0, 1), (2, 0, 0)}) obs = subsample_counts(a, 2) self.assertTrue(np.array_equal(obs, np.array([1, 0, 1])) or np.array_equal(obs, np.array([2, 0, 0]))) def test_subsample_counts_with_replacement(self): # Can choose from all in first bin, all in last bin (since we're # sampling with replacement), or split across bins. a = np.array([2, 0, 1]) actual = set() for i in range(1000): obs = subsample_counts(a, 2, replace=True) actual.add(tuple(obs)) self.assertEqual(actual, {(1, 0, 1), (2, 0, 0), (0, 0, 2)}) # Test that selecting 35 counts from a 36-count vector 1000 times # yields more than 10 different subsamples. If we were subsampling # *without* replacement, there would be only 10 possible subsamples # because there are 10 nonzero bins in array a. However, there are more # than 10 possibilities when sampling *with* replacement. a = np.array([2, 0, 1, 2, 1, 8, 6, 0, 3, 3, 5, 0, 0, 0, 5]) actual = set() for i in range(1000): obs = subsample_counts(a, 35, replace=True) self.assertEqual(obs.sum(), 35) actual.add(tuple(obs)) self.assertTrue(len(actual) > 10) def test_subsample_counts_with_replacement_equal_n(self): # test when n == counts.sum() a = np.array([0, 0, 3, 4, 2, 1]) actual = set() for i in range(1000): obs = subsample_counts(a, 10, replace=True) self.assertEqual(obs.sum(), 10) actual.add(tuple(obs)) self.assertTrue(len(actual) > 1) def test_subsample_counts_invalid_input(self): # Negative n. with self.assertRaises(ValueError): subsample_counts([1, 2, 3], -1) # Wrong number of dimensions. with self.assertRaises(ValueError): subsample_counts([[1, 2, 3], [4, 5, 6]], 2) # Input has too few counts. with self.assertRaises(ValueError): subsample_counts([0, 5, 0], 6, replace=False) # Input has too counts, but should work with bootstrap subsample_counts([0, 5, 0], 6, replace=True) class ISubsampleTests(unittest.TestCase): def setUp(self): np.random.seed(123) # comment indicates the expected random value self.sequences = [ ('a_1', 'AATTGGCC-a1'), # 2, 3624216819017203053 ('a_2', 'AATTGGCC-a2'), # 5, 5278339153051796802 ('b_1', 'AATTGGCC-b1'), # 4, 4184670734919783522 ('b_2', 'AATTGGCC-b2'), # 0, 946590342492863505 ('a_4', 'AATTGGCC-a4'), # 3, 4048487933969823850 ('a_3', 'AATTGGCC-a3'), # 7, 7804936597957240377 ('c_1', 'AATTGGCC-c1'), # 8, 8868534167180302049 ('a_5', 'AATTGGCC-a5'), # 1, 3409506807702804593 ('c_2', 'AATTGGCC-c2'), # 9, 8871627813779918895 ('c_3', 'AATTGGCC-c3') # 6, 7233291490207274528 ] def mock_sequence_iter(self, items): return ({'SequenceID': sid, 'Sequence': seq} for sid, seq in items) def test_isubsample_simple(self): maximum = 10 def bin_f(x): return x['SequenceID'].rsplit('_', 1)[0] # note, the result here is sorted by sequence_id but is in heap order # by the random values associated to each sequence exp = sorted([('a', {'SequenceID': 'a_5', 'Sequence': 'AATTGGCC-a5'}), ('a', {'SequenceID': 'a_1', 'Sequence': 'AATTGGCC-a1'}), ('a', {'SequenceID': 'a_4', 'Sequence': 'AATTGGCC-a4'}), ('a', {'SequenceID': 'a_3', 'Sequence': 'AATTGGCC-a3'}), ('a', {'SequenceID': 'a_2', 'Sequence': 'AATTGGCC-a2'}), ('b', {'SequenceID': 'b_2', 'Sequence': 'AATTGGCC-b2'}), ('b', {'SequenceID': 'b_1', 'Sequence': 'AATTGGCC-b1'}), ('c', {'SequenceID': 'c_3', 'Sequence': 'AATTGGCC-c3'}), ('c', {'SequenceID': 'c_2', 'Sequence': 'AATTGGCC-c2'}), ('c', {'SequenceID': 'c_1', 'Sequence': 'AATTGGCC-c1'})], key=lambda x: x[0]) obs = isubsample(self.mock_sequence_iter(self.sequences), maximum, bin_f=bin_f) self.assertEqual(sorted(obs, key=lambda x: x[0]), exp) def test_per_sample_sequences_min_seqs(self): maximum = 10 minimum = 3 def bin_f(x): return x['SequenceID'].rsplit('_', 1)[0] # note, the result here is sorted by sequence_id but is in heap order # by the random values associated to each sequence exp = sorted([('a', {'SequenceID': 'a_5', 'Sequence': 'AATTGGCC-a5'}), ('a', {'SequenceID': 'a_1', 'Sequence': 'AATTGGCC-a1'}), ('a', {'SequenceID': 'a_4', 'Sequence': 'AATTGGCC-a4'}), ('a', {'SequenceID': 'a_3', 'Sequence': 'AATTGGCC-a3'}), ('a', {'SequenceID': 'a_2', 'Sequence': 'AATTGGCC-a2'}), ('c', {'SequenceID': 'c_3', 'Sequence': 'AATTGGCC-c3'}), ('c', {'SequenceID': 'c_2', 'Sequence': 'AATTGGCC-c2'}), ('c', {'SequenceID': 'c_1', 'Sequence': 'AATTGGCC-c1'})], key=lambda x: x[0]) obs = isubsample(self.mock_sequence_iter(self.sequences), maximum, minimum, bin_f=bin_f) self.assertEqual(sorted(obs, key=lambda x: x[0]), exp) def test_per_sample_sequences_complex(self): maximum = 2 def bin_f(x): return x['SequenceID'].rsplit('_', 1)[0] exp = sorted([('a', {'SequenceID': 'a_2', 'Sequence': 'AATTGGCC-a2'}), ('a', {'SequenceID': 'a_3', 'Sequence': 'AATTGGCC-a3'}), ('b', {'SequenceID': 'b_2', 'Sequence': 'AATTGGCC-b2'}), ('b', {'SequenceID': 'b_1', 'Sequence': 'AATTGGCC-b1'}), ('c', {'SequenceID': 'c_1', 'Sequence': 'AATTGGCC-c1'}), ('c', {'SequenceID': 'c_2', 'Sequence': 'AATTGGCC-c2'})], key=lambda x: x[0]) obs = isubsample(self.mock_sequence_iter(self.sequences), maximum, bin_f=bin_f, buf_size=1) self.assertEqual(sorted(obs, key=lambda x: x[0]), exp) def test_min_gt_max(self): gen = isubsample([1, 2, 3], maximum=2, minimum=10) with self.assertRaises(ValueError): next(gen) def test_min_lt_zero(self): gen = isubsample([1, 2, 3], maximum=0, minimum=-10) with self.assertRaises(ValueError): next(gen) def test_max_lt_zero(self): gen = isubsample([1, 2, 3], maximum=-10) with self.assertRaises(ValueError): next(gen) def test_binf_is_none(self): maximum = 2 items = [1, 2] exp = [(True, 1), (True, 2)] obs = isubsample(items, maximum) self.assertEqual(list(obs), exp) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/table/000077500000000000000000000000001464262511300157565ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/table/__init__.py000066400000000000000000000017131464262511300200710ustar00rootroot00000000000000r"""Data Table (:mod:`skbio.table`) ================================== .. currentmodule:: skbio.table This module provides support for interaction with data tables in the Biological Observation Matrix (BIOM) format. Please refer to the `BIOM documentation `__ for the instructions on working with BIOM tables. BIOM table ---------- .. autosummary:: :toctree: generated/ Table Example data ^^^^^^^^^^^^ .. autosummary:: :toctree: generated/ example_table """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from skbio.table._base import Table, example_table __all__ = ["Table", "example_table"] scikit-bio-0.6.2/skbio/table/_base.py000066400000000000000000000006551464262511300174070ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from biom import Table, example_table Table.default_write_format = "biom" scikit-bio-0.6.2/skbio/table/tests/000077500000000000000000000000001464262511300171205ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/table/tests/__init__.py000066400000000000000000000005411464262511300212310ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- scikit-bio-0.6.2/skbio/table/tests/test_base.py000066400000000000000000000037621464262511300214530ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main from skbio.table import Table import numpy as np class TableTests(TestCase): def test_table(self): data = np.arange(40).reshape(10, 4) sample_ids = ['S%d' % i for i in range(4)] observ_ids = ['O%d' % i for i in range(10)] sample_metadata = [{'environment': 'A'}, {'environment': 'B'}, {'environment': 'A'}, {'environment': 'B'}] observ_metadata = [{'taxonomy': ['Bacteria', 'Firmicutes']}, {'taxonomy': ['Bacteria', 'Firmicutes']}, {'taxonomy': ['Bacteria', 'Proteobacteria']}, {'taxonomy': ['Bacteria', 'Proteobacteria']}, {'taxonomy': ['Bacteria', 'Proteobacteria']}, {'taxonomy': ['Bacteria', 'Bacteroidetes']}, {'taxonomy': ['Bacteria', 'Bacteroidetes']}, {'taxonomy': ['Bacteria', 'Firmicutes']}, {'taxonomy': ['Bacteria', 'Firmicutes']}, {'taxonomy': ['Bacteria', 'Firmicutes']}] table = Table(data, observ_ids, sample_ids, observ_metadata, sample_metadata, table_id='Example Table') self.assertEqual(list(table.ids()), ['S0', 'S1', 'S2', 'S3']) self.assertEqual(list(table.ids(axis='observation')), ['O0', 'O1', 'O2', 'O3', 'O4', 'O5', 'O6', 'O7', 'O8', 'O9']) self.assertEqual(int(table.nnz), 39) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/test.py000066400000000000000000000007321464262511300162220ustar00rootroot00000000000000"""Run pytest in scikit-bio configuration. ---------------------------------------------------------------------------- Copyright (c) 2013--, scikit-bio development team. Distributed under the terms of the Modified BSD License. The full license is in the file LICENSE.txt, distributed with this software. ---------------------------------------------------------------------------- """ from skbio.util import pytestrunner if __name__ == "__main__": pytestrunner() scikit-bio-0.6.2/skbio/tests/000077500000000000000000000000001464262511300160315ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/tests/__init__.py000066400000000000000000000005411464262511300201420ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- scikit-bio-0.6.2/skbio/tests/test_base.py000066400000000000000000000030211464262511300203500ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest from skbio._base import SkbioObject, ElasticLines class TestSkbioObject(unittest.TestCase): def test_no_instantiation(self): class Foo(SkbioObject): pass with self.assertRaises(TypeError): Foo() class TestElasticLines(unittest.TestCase): def setUp(self): self.el = ElasticLines() def test_empty(self): self.assertEqual(self.el.to_str(), '') def test_add_line(self): self.el.add_line('foo') self.assertEqual(self.el.to_str(), 'foo') def test_add_lines(self): self.el = ElasticLines() self.el.add_lines(['alice', 'bob', 'carol']) self.assertEqual(self.el.to_str(), 'alice\nbob\ncarol') def test_add_separator(self): self.el.add_separator() self.assertEqual(self.el.to_str(), '') self.el.add_line('foo') self.assertEqual(self.el.to_str(), '---\nfoo') self.el.add_separator() self.el.add_lines(['bar', 'bazzzz']) self.el.add_separator() self.assertEqual(self.el.to_str(), '------\nfoo\n------\nbar\nbazzzz\n------') if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/tests/test_workflow.py000066400000000000000000000315571464262511300213270ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from collections import defaultdict from skbio.workflow import (Exists, NotExecuted, NotNone, Workflow, not_none, requires, method) from unittest import TestCase, main def construct_iterator(**kwargs): """make an iterator for testing purposes""" to_gen = [] for k in sorted(kwargs): if k.startswith('iter'): to_gen.append(kwargs[k]) if len(to_gen) == 1: return (x for x in to_gen[0]) else: return zip(*to_gen) class MockWorkflow(Workflow): def initialize_state(self, item): self.state[0] = None self.state[1] = item @method(priority=90) @requires(option='A', values=True) def wf_groupA(self): self.methodA1() self.methodA2() @method() @requires(option='B', values=True) def wf_groupB(self): self.methodB1() self.methodB2() @method(priority=10) @requires(option='C', values=True) def wf_groupC(self): self.methodC1() self.methodC2() def methodA1(self): name = 'A1' self.stats[name] += 1 if self.state[-1] == 'fail %s' % name: self.failed = True self.state = [name, self.state[-1]] def methodA2(self): name = 'A2' self.stats[name] += 1 if self.state[-1] == 'fail %s' % name: self.failed = True self.state = [name, self.state[-1]] def methodB1(self): name = 'B1' self.stats[name] += 1 if self.state[-1] == 'fail %s' % name: self.failed = True self.state = 'failed' else: self.state = [name, self.state[-1]] @requires(option='foo', values=[1, 2, 3]) def methodB2(self): name = 'B2' self.stats[name] += 1 if self.state[-1] == 'fail %s' % name: self.failed = True self.state = 'failed' else: self.state = [name, self.state[-1]] def methodC1(self): name = 'C1' self.stats[name] += 1 if self.state[-1] == 'fail %s' % name: self.failed = True self.state = [name, self.state[-1]] @requires(option='C2', values=[1, 2, 3]) def methodC2(self): name = 'C2' self.stats[name] += 1 if self.state[-1] == 'fail %s' % name: self.failed = True self.state = [name, self.state[-1]] class WorkflowTests(TestCase): def setUp(self): opts = {'A': True, 'C': True} self.obj_short = MockWorkflow([None, None], options=opts, stats=defaultdict(int)) self.obj_debug = MockWorkflow([None, None], debug=True, options=opts, stats=defaultdict(int)) self.obj_noshort = MockWorkflow([None, None], short_circuit=False, options=opts, stats=defaultdict(int)) def test_debug_trace(self): gen = construct_iterator(**{'iter_x': [1, 2, 3, 4, 5]}) obj = self.obj_debug(gen) exp = ['C1', 1] obs = next(obj) self.assertEqual(obs, exp) exp_trace = set([('wf_groupA', 0), ('methodA1', 1), ('methodA2', 2), ('wf_groupC', 3), ('methodC1', 4)]) exp_pre_state = {('wf_groupA', 0): [None, 1], ('methodA1', 1): [None, 1], ('methodA2', 2): ['A1', 1], ('wf_groupC', 3): ['A2', 1], ('methodC1', 4): ['A2', 1]} exp_post_state = {('wf_groupA', 0): ['A2', 1], ('methodA1', 1): ['A1', 1], ('methodA2', 2): ['A2', 1], ('wf_groupC', 3): ['C1', 1], ('methodC1', 4): ['C1', 1]} obs_trace = self.obj_debug.debug_trace obs_pre_state = self.obj_debug.debug_pre_state obs_post_state = self.obj_debug.debug_post_state self.assertEqual(obs_trace, exp_trace) self.assertEqual(obs_pre_state, exp_pre_state) self.assertEqual(obs_post_state, exp_post_state) def test_init(self): self.assertEqual(self.obj_short.options, {'A': True, 'C': True}) self.assertEqual(self.obj_short.stats, {}) self.assertTrue(self.obj_short.short_circuit) self.assertEqual(self.obj_noshort.options, {'A': True, 'C': True}) self.assertEqual(self.obj_noshort.stats, {}) self.assertFalse(self.obj_noshort.short_circuit) def test_init_reserved_attributes(self): with self.assertRaises(AttributeError): Workflow('foo', failed=True) def test_all_wf_methods(self): # note on priority: groupA:90, groupC:10, groupB:0 (default) exp = [self.obj_short.wf_groupA, self.obj_short.wf_groupC, self.obj_short.wf_groupB] obs = self.obj_short._all_wf_methods() self.assertEqual(obs, exp) def test_call_AC_no_fail(self): iter_ = construct_iterator(**{'iter_x': [1, 2, 3, 4, 5]}) # success function def sf(x): return x.state[:] exp_stats = {'A1': 5, 'A2': 5, 'C1': 5} # C2 isn't executed as its requirements aren't met in the options exp_result = [['C1', 1], ['C1', 2], ['C1', 3], ['C1', 4], ['C1', 5]] obs_result = list(self.obj_short(iter_, sf, None)) self.assertEqual(obs_result, exp_result) self.assertEqual(self.obj_short.stats, exp_stats) def test_call_AC_fail(self): iter_ = construct_iterator(**{'iter_x': [1, 2, 'fail A2', 4, 5]}) # success function def sf(x): return x.state[:] ff = sf # failed function exp_stats = {'A1': 5, 'A2': 5, 'C1': 4, 'C2': 4} self.obj_short.options['C2'] = 1 # pass in a failed callback to capture the result, and pause execution gen = self.obj_short(iter_, sf, ff) r1 = next(gen) self.assertEqual(r1, ['C2', 1]) self.assertFalse(self.obj_short.failed) r2 = next(gen) self.assertEqual(r2, ['C2', 2]) self.assertFalse(self.obj_short.failed) r3 = next(gen) self.assertEqual(self.obj_short.state, ['A2', 'fail A2']) self.assertTrue(self.obj_short.failed) self.assertEqual(r3, ['A2', 'fail A2']) r4 = next(gen) self.assertEqual(r4, ['C2', 4]) self.assertFalse(self.obj_short.failed) r5 = next(gen) self.assertEqual(r5, ['C2', 5]) self.assertFalse(self.obj_short.failed) self.assertEqual(self.obj_short.stats, exp_stats) def test_call_AC_fail_noshort(self): iter_ = construct_iterator(**{'iter_x': [1, 2, 'fail A2', 4, 5]}) # success function def sf(x): return x.state[:] ff = sf # failed function exp_stats = {'A1': 5, 'A2': 5, 'C1': 5} # pass in a failed callback to capture the result, and pause execution gen = self.obj_noshort(iter_, sf, ff) r1 = next(gen) self.assertEqual(r1, ['C1', 1]) self.assertFalse(self.obj_noshort.failed) r2 = next(gen) self.assertEqual(r2, ['C1', 2]) self.assertFalse(self.obj_noshort.failed) next(gen) self.assertEqual(self.obj_noshort.state, ['C1', 'fail A2']) self.assertTrue(self.obj_noshort.failed) r4 = next(gen) self.assertEqual(r4, ['C1', 4]) self.assertFalse(self.obj_noshort.failed) r5 = next(gen) self.assertEqual(r5, ['C1', 5]) self.assertFalse(self.obj_noshort.failed) self.assertEqual(self.obj_noshort.stats, exp_stats) class MockWorkflowReqTest(Workflow): def _allocate_state(self): self.state = None def initialize_state(self, item): self.state = [None, item] @method(priority=5) @requires(state=lambda x: x[-1] < 3) def wf_needs_data(self): name = 'needs_data' self.stats[name] += 1 if self.state[-1] == 'fail %s' % name: self.failed = True self.state = [name, self.state[-1]] @method(priority=10) def wf_always_run(self): name = 'always_run' self.stats[name] += 1 if self.state[-1] == 'fail %s' % name: self.failed = True self.state = [name, self.state[-1]] @method(priority=20) @requires(option='cannot_be_none', values=not_none) def wf_run_if_not_none(self): name = 'run_if_not_none' self.stats[name] += 1 if self.state[-1] == 'fail %s' % name: self.failed = True self.state = [name, self.state[-1]] class RequiresTests(TestCase): def test_validdata(self): obj = MockWorkflowReqTest([None, None], stats=defaultdict(int)) single_iter = construct_iterator(**{'iter_x': [1, 2, 3, 4, 5]}) exp_stats = {'needs_data': 2, 'always_run': 5} exp_result = [['needs_data', 1], ['needs_data', 2], ['always_run', 3], ['always_run', 4], ['always_run', 5]] obs_result = list(obj(single_iter)) self.assertEqual(obs_result, exp_result) self.assertEqual(obj.stats, exp_stats) def test_not_none_avoid(self): obj = MockWorkflowReqTest([None, None], {'cannot_be_none': None}, stats=defaultdict(int)) single_iter = construct_iterator(**{'iter_x': [1, 2, 3, 4, 5]}) exp_stats = {'needs_data': 2, 'always_run': 5} exp_result = [['needs_data', 1], ['needs_data', 2], ['always_run', 3], ['always_run', 4], ['always_run', 5]] obs_result = list(obj(single_iter)) self.assertEqual(obs_result, exp_result) self.assertEqual(obj.stats, exp_stats) def test_not_none_execute(self): obj = MockWorkflowReqTest([None, None], options={'cannot_be_none': True}, debug=True, stats=defaultdict(int)) single_iter = construct_iterator(**{'iter_x': [1, 2, 3, 4, 5]}) exp_stats = {'needs_data': 2, 'always_run': 5, 'run_if_not_none': 5} exp_result = [['needs_data', 1], ['needs_data', 2], ['always_run', 3], ['always_run', 4], ['always_run', 5]] obs_result = list(obj(single_iter)) self.assertEqual(obs_result, exp_result) self.assertEqual(obj.stats, exp_stats) def test_methodb1(self): obj = MockWorkflow([None, None], stats=defaultdict(int)) obj.initialize_state('test') obj.methodB1() self.assertEqual(obj.state, ['B1', 'test']) self.assertFalse(obj.failed) # methodb1 executes regardless of if self.failed obj.failed = True obj.initialize_state('test 2') obj.methodB1() self.assertEqual(obj.state, ['B1', 'test 2']) obj.failed = False obj.state = [None, 'fail B1'] obj.methodB1() self.assertEqual(obj.state, 'failed') self.assertEqual(obj.stats, {'B1': 3}) def test_methodb2_accept(self): # methodb2 is setup to be valid when foo is in [1,2,3], make sure we # can execute obj = MockWorkflow([None, None], options={'foo': 1}, stats=defaultdict(int)) obj.initialize_state('test') obj.methodB2() self.assertEqual(obj.state, ['B2', 'test']) self.assertEqual(obj.stats, {'B2': 1}) def test_methodb2_ignore(self): # methodb2 is setup to be valid when foo is in [1, 2, 3], make sure # we do not execute obj = MockWorkflow([None, None], options={'foo': 'bar'}, stats=defaultdict(int)) obj.methodB2() self.assertEqual(obj.state, [None, None]) self.assertEqual(obj.stats, {}) class PriorityTests(TestCase): def test_dec(self): @method(priority=10) def foo(x, y, z): """doc check""" return x + y + z self.assertEqual(foo.priority, 10) self.assertEqual(foo.__name__, 'foo') self.assertEqual(foo.__doc__, 'doc check') class NotExecutedTests(TestCase): def test_call(self): ne = NotExecuted() obs = ne('foo') self.assertTrue(obs is ne) self.assertEqual(obs.msg, 'foo') class ExistsTests(TestCase): def test_contains(self): e = Exists() self.assertTrue('foo' in e) self.assertTrue(None in e) class NotNoneTests(TestCase): def test_contains(self): nn = NotNone() self.assertTrue('foo' in nn) self.assertFalse(None in nn) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/tree/000077500000000000000000000000001464262511300156265ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/tree/__init__.py000066400000000000000000000134021464262511300177370ustar00rootroot00000000000000r"""Trees and Phylogenetics (:mod:`skbio.tree`) =========================================== .. currentmodule:: skbio.tree This module provides functionality for working with trees, including phylogenetic trees and hierarchies. Functionality is provided for constructing trees, for traversing in multiple ways, comparisons, fetching subtrees, and more. This module supports trees that are multifurcating and nodes that have single descendants. Tree structure and operations ----------------------------- .. autosummary:: :toctree: generated/ TreeNode Phylogenetic reconstruction --------------------------- .. autosummary:: :toctree: generated/ nj nni Tree utilities -------------- .. autosummary:: :toctree: generated/ majority_rule Exceptions ^^^^^^^^^^ .. autosummary:: TreeError NoLengthError DuplicateNodeError MissingNodeError NoParentError Tutorial -------- >>> from skbio import TreeNode >>> from io import StringIO A new tree can be constructed from a Newick string. Newick is a common format used to represent tree objects within a file. Newick was part of the original PHYLIP package from Joseph Felsenstein's group (defined `here `_), and is based around representing nesting with parentheses. For instance, the following string describes a 3 taxon tree, with one internal node: ((A, B)C, D)root; Where A, B, and D are tips of the tree, and C is an internal node that covers tips A and B. Now let's construct a simple tree and dump an ASCII representation: >>> tree = TreeNode.read(StringIO("((A, B)C, D)root;")) >>> print(tree.is_root()) # is this the root of the tree? True >>> print(tree.is_tip()) # is this node a tip? False >>> print(tree.ascii_art()) /-A /C-------| -root----| \-B | \-D There are a few common ways to traverse a tree, and depending on your use, some methods are more appropriate than others. Wikipedia has a well written page on tree `traversal methods `_, and will go into further depth than what we'll cover here. We're only going to cover two of the commonly used traversals here, preorder and postorder, but we will show examples of two other common helper traversal methods to gather tips or internal nodes. The first traversal we'll cover is a preorder traversal in which you evaluate from root to tips, looking at the left most child first. For instance: >>> for node in tree.preorder(): ... print(node.name) root C A B D The next method we'll look at is a postorder traveral which will evaluate the left subtree tips first before walking back up the tree: >>> for node in tree.postorder(): ... print(node.name) A B C D root `TreeNode` provides two helper methods as well for iterating over just the tips or for iterating over just the internal nodes. >>> for node in tree.tips(): ... print("Node name: %s, Is a tip: %s" % (node.name, node.is_tip())) Node name: A, Is a tip: True Node name: B, Is a tip: True Node name: D, Is a tip: True >>> for node in tree.non_tips(): ... print("Node name: %s, Is a tip: %s" % (node.name, node.is_tip())) Node name: C, Is a tip: False Note, by default, `non_tips` will ignore `self` (which is the root in this case). You can pass the `include_self` flag to `non_tips` if you wish to include `self`. The `TreeNode` provides a few ways to compare trees. First, let's create two similar trees and compare their topologies using `compare_subsets`. This distance is the fraction of common clades present in the two trees, where a distance of 0 means the trees contain identical clades, and a distance of 1 indicates the trees do not share any common clades: >>> tree1 = TreeNode.read(StringIO("((A, B)C, (D, E)F, (G, H)I)root;")) >>> tree2 = TreeNode.read(StringIO("((G, H)C, (D, E)F, (B, A)I)root;")) >>> tree3 = TreeNode.read(StringIO("((D, B)C, (A, E)F, (G, H)I)root;")) >>> print(tree1.compare_subsets(tree1)) # identity case 0.0 >>> print(tree1.compare_subsets(tree2)) # same tree but different clade order 0.0 >>> print(tree1.compare_subsets(tree3)) # only 1 of 3 common subsets 0.6666666666666667 We can additionally take into account branch length when computing distances between trees. First, we're going to construct two new trees with described branch length, note the difference in the Newick strings: >>> tree1 = \ ... TreeNode.read(StringIO("((A:0.1, B:0.2)C:0.3, D:0.4, E:0.5)root;")) >>> tree2 = \ ... TreeNode.read(StringIO("((A:0.4, B:0.8)C:0.3, D:0.1, E:0.5)root;")) In these two trees, we've added on a description of length from the node to its parent, so for instance: >>> for node in tree1.postorder(): ... print(node.name, node.length) A 0.1 B 0.2 C 0.3 D 0.4 E 0.5 root None Now let's compare two trees using the distances computed pairwise between tips in the trees. The distance computed, by default, is the correlation of all pairwise tip-to-tip distances between trees: >>> print(tree1.compare_tip_distances(tree1)) # identity case 0.0 >>> print(tree1.compare_tip_distances(tree2)) 0.120492524415 """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from ._tree import TreeNode from ._nj import nj, nni from ._majority_rule import majority_rule from ._exception import ( TreeError, NoLengthError, DuplicateNodeError, MissingNodeError, NoParentError, ) __all__ = [ "TreeNode", "nj", "majority_rule", "TreeError", "NoLengthError", "DuplicateNodeError", "MissingNodeError", "NoParentError", ] scikit-bio-0.6.2/skbio/tree/_exception.py000066400000000000000000000013661464262511300203430ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- class TreeError(Exception): """General tree error.""" pass class NoLengthError(TreeError): """Missing length when expected.""" pass class DuplicateNodeError(TreeError): """Duplicate nodes with identical names.""" pass class MissingNodeError(TreeError): """Expecting a node.""" pass class NoParentError(MissingNodeError): """Missing a parent.""" pass scikit-bio-0.6.2/skbio/tree/_majority_rule.py000066400000000000000000000244501464262511300212310ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from collections import defaultdict import numpy as np from skbio.tree import TreeNode def _walk_clades(trees, weights): """Walk all the clades of all the trees. Parameters ---------- trees : list of TreeNode The trees to walk weights : np.array Tree weights Returns ------- list of tuple The clades and support values sorted by support value such that the most supported clade is index 0. The tuples are of the form: (frozenset, float). defaultdict(float) The edge lengths, keyed by frozenset of the clade, and valued by the weighted average length of the clade by the trees the clade was observed in. """ clade_counts = defaultdict(float) edge_lengths = defaultdict(float) total = weights.sum() # get clade counts def tipnames_f(n): return [n.name] if n.is_tip() else [] for tree, weight in zip(trees, weights): tree.cache_attr(tipnames_f, "tip_names", frozenset) for node in tree.postorder(): tip_names = node.tip_names # if node.length is not None, fetch it and weight it length = node.length * weight if node.length is not None else None clade_counts[tip_names] += weight if length is None: edge_lengths[tip_names] = None else: edge_lengths[tip_names] += length / total # sort clades by number times observed clade_counts = sorted(clade_counts.items(), key=lambda x: len(x[0]), reverse=True) return clade_counts, edge_lengths def _filter_clades(clade_counts, cutoff_threshold): """Filter clades that not well supported or are contradicted. Parameters ---------- clade_counts : list of tuple Where the first element in each tuple is the frozenset of the clade, and the second element is the support value. It is expected that this list is sorted by descending order by support. cutoff_threshold : float The minimum weighted observation count that a clade must have to be considered supported. Returns ------- dict A dict of the accepted clades, keyed by the frozenset of the clade and valued by the support value. """ accepted_clades = {} for clade, count in clade_counts: conflict = False if count <= cutoff_threshold: continue if len(clade) > 1: # check the current clade against all the accepted clades to see if # it conflicts. A conflict is defined as: # 1. the clades are not disjoint # 2. neither clade is a subset of the other for accepted_clade in accepted_clades: intersect = clade.intersection(accepted_clade) subset = clade.issubset(accepted_clade) superset = clade.issuperset(accepted_clade) if intersect and not (subset or superset): conflict = True if conflict is False: accepted_clades[clade] = count return accepted_clades def _build_trees(clade_counts, edge_lengths, support_attr, tree_node_class): """Construct the trees with support. Parameters ---------- clade_counts : dict Keyed by the frozenset of the clade and valued by the support edge_lengths : dict Keyed by the frozenset of the clade and valued by the weighted length support_attr : str The name of the attribute to hold the support value tree_node_class : type Specifies type of consensus trees that are returned. Either ``TreeNode`` or a type that implements the same interface (most usefully, a subclass of ``TreeNode``). Returns ------- list of tree_node_class instances A list of the constructed trees """ nodes = {} queue = [(len(clade), clade) for clade in clade_counts] while queue: # The values within the queue are updated on each iteration, so it # doesn't look like an insertion sort will make sense unfortunately queue.sort() (clade_size, clade) = queue.pop(0) new_queue = [] # search for ancestors of clade for _, ancestor in queue: if clade.issubset(ancestor): # update ancestor such that, in the following example: # ancestor == {1, 2, 3, 4} # clade == {2, 3} # new_ancestor == {1, {2, 3}, 4} new_ancestor = (ancestor - clade) | frozenset([clade]) # update references for counts and lengths clade_counts[new_ancestor] = clade_counts.pop(ancestor) edge_lengths[new_ancestor] = edge_lengths.pop(ancestor) ancestor = new_ancestor new_queue.append((len(ancestor), ancestor)) # if the clade is a tip, then we have a name if clade_size == 1: name = list(clade)[0] else: name = None # the clade will not be in nodes if it is a tip children = [nodes.pop(c) for c in clade if c in nodes] length = edge_lengths[clade] node = tree_node_class(children=children, length=length, name=name) setattr(node, support_attr, clade_counts[clade]) nodes[clade] = node queue = new_queue return list(nodes.values()) def majority_rule( trees, weights=None, cutoff=0.5, support_attr="support", tree_node_class=TreeNode ): r"""Determine consensus trees from a list of rooted trees. Parameters ---------- trees : list of TreeNode The trees to operate on weights : list or np.array of {int, float}, optional If provided, the list must be in index order with `trees`. Each tree will receive the corresponding weight. If omitted, all trees will be equally weighted. cutoff : float, 0.0 <= cutoff <= 1.0, optional Any clade that has <= cutoff support will be dropped. If cutoff is < 0.5, then it is possible that ties will result. If so, ties are broken arbitrarily depending on list sort order. support_attr : str, optional The attribute to be decorated onto the resulting trees that contain the consensus support. tree_node_class : type, optional Specifies type of consensus trees that are returned. Either ``TreeNode`` (the default) or a type that implements the same interface (most usefully, a subclass of ``TreeNode``). Returns ------- list of tree_node_class instances Each tree will be of type `tree_node_class`. Multiple trees can be returned in the case of two or more disjoint sets of tips represented on input. Notes ----- This code was adapted from PyCogent's majority consensus code originally written by Matthew Wakefield. The method is based off the original description of consensus trees in [1]_. An additional description can be found in the Phylip manual [2]_. This method does not support majority rule extended. Support is computed as a weighted average of the tree weights in which the clade was observed in. For instance, if {A, B, C} was observed in 5 trees all with a weight of 1, its support would then be 5. References ---------- .. [1] Margush T, McMorris FR. (1981) "Consensus n-trees." Bulletin for Mathematical Biology 43(2) 239-44. .. [2] http://evolution.genetics.washington.edu/phylip/doc/consense.html Examples -------- Computing the majority consensus, using the example from the Phylip manual with the exception that we are computing majority rule and not majority rule extended. >>> from skbio.tree import TreeNode >>> from io import StringIO >>> trees = [ ... TreeNode.read(StringIO("(A,(B,(H,(D,(J,(((G,E),(F,I)),C))))));")), ... TreeNode.read(StringIO("(A,(B,(D,((J,H),(((G,E),(F,I)),C)))));")), ... TreeNode.read(StringIO("(A,(B,(D,(H,(J,(((G,E),(F,I)),C))))));")), ... TreeNode.read(StringIO("(A,(B,(E,(G,((F,I),((J,(H,D)),C))))));")), ... TreeNode.read(StringIO("(A,(B,(E,(G,((F,I),(((J,H),D),C))))));")), ... TreeNode.read(StringIO("(A,(B,(E,((F,I),(G,((J,(H,D)),C))))));")), ... TreeNode.read(StringIO("(A,(B,(E,((F,I),(G,(((J,H),D),C))))));")), ... TreeNode.read(StringIO("(A,(B,(E,((G,(F,I)),((J,(H,D)),C)))));")), ... TreeNode.read(StringIO("(A,(B,(E,((G,(F,I)),(((J,H),D),C)))));"))] >>> consensus = majority_rule(trees, cutoff=0.5)[0] >>> for node in sorted(consensus.non_tips(), ... key=lambda k: k.count(tips=True)): ... support_value = node.support ... names = ' '.join(sorted(n.name for n in node.tips())) ... print("Tips: %s, support: %s" % (names, support_value)) Tips: F I, support: 9.0 Tips: D H J, support: 6.0 Tips: C D H J, support: 6.0 Tips: C D F G H I J, support: 6.0 Tips: C D E F G H I J, support: 9.0 Tips: B C D E F G H I J, support: 9.0 In the next example, multiple trees will be returned which can happen if clades are not well supported across the trees. In addition, this can arise if not all tips are present across all trees. >>> trees = [ ... TreeNode.read(StringIO("((a,b),(c,d),(e,f));")), ... TreeNode.read(StringIO("(a,(c,d),b,(e,f));")), ... TreeNode.read(StringIO("((c,d),(e,f),b);")), ... TreeNode.read(StringIO("(a,(c,d),(e,f));"))] >>> consensus_trees = majority_rule(trees) >>> len(consensus_trees) 4 """ if weights is None: weights = np.ones(len(trees), dtype=float) else: weights = np.asarray(weights) if len(weights) != len(trees): raise ValueError("Number of weights and trees differ.") cutoff_threshold = cutoff * weights.sum() clade_counts, edge_lengths = _walk_clades(trees, weights) clade_counts = _filter_clades(clade_counts, cutoff_threshold) trees = _build_trees(clade_counts, edge_lengths, support_attr, tree_node_class) return trees scikit-bio-0.6.2/skbio/tree/_nj.py000066400000000000000000000601261464262511300167530ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io import numpy as np import heapq as hq from skbio.stats.distance import DistanceMatrix from skbio.tree import TreeNode def nj(dm, disallow_negative_branch_length=True, result_constructor=None): r"""Apply neighbor joining for phylogenetic reconstruction. Parameters ---------- dm : skbio.DistanceMatrix Input distance matrix containing distances between taxa. disallow_negative_branch_length : bool, optional Neighbor joining can result in negative branch lengths, which don't make sense in an evolutionary context. If `True`, negative branch lengths will be returned as zero, a common strategy for handling this issue that was proposed by the original developers of the algorithm. result_constructor : function, optional Function to apply to construct the result object. This must take a newick-formatted string as input. The result of applying this function to a newick-formatted string will be returned from this function. This defaults to ``lambda x: TreeNode.read(StringIO(x), format='newick')``. Returns ------- TreeNode By default, the result object is a `TreeNode`, though this can be overridden by passing `result_constructor`. See Also -------- TreeNode.root_at_midpoint Notes ----- Neighbor joining was initially described in Saitou and Nei (1987) [1]_. The example presented here is derived from the Wikipedia page on neighbor joining [2]_. Gascuel and Steel (2006) provide a detailed overview of Neighbor joining in terms of its biological relevance and limitations [3]_. Neighbor joining, by definition, creates unrooted trees. One strategy for rooting the resulting trees is midpoint rooting, which is accessible as ``TreeNode.root_at_midpoint``. References ---------- .. [1] Saitou N, and Nei M. (1987) "The neighbor-joining method: a new method for reconstructing phylogenetic trees." Molecular Biology and Evolution. PMID: 3447015. .. [2] http://en.wikipedia.org/wiki/Neighbour_joining .. [3] Gascuel O, and Steel M. (2006) "Neighbor-Joining Revealed" Molecular Biology and Evolution, Volume 23, Issue 11, November 2006, Pages 1997–2000, https://doi.org/10.1093/molbev/msl072 Examples -------- Define a new distance matrix object describing the distances between five taxa: a, b, c, d, and e. >>> from skbio import DistanceMatrix >>> from skbio.tree import nj >>> data = [[0, 5, 9, 9, 8], ... [5, 0, 10, 10, 9], ... [9, 10, 0, 8, 7], ... [9, 10, 8, 0, 3], ... [8, 9, 7, 3, 0]] >>> ids = list('abcde') >>> dm = DistanceMatrix(data, ids) Construct the neighbor joining tree representing the relationship between those taxa. This is returned as a TreeNode object. >>> tree = nj(dm) >>> print(tree.ascii_art()) /-d | | /-c |---------| ---------| | /-b | \--------| | \-a | \-e Again, construct the neighbor joining tree, but instead return the newick string representing the tree, rather than the TreeNode object. (Note that in this example the string output is truncated when printed to facilitate rendering.) >>> newick_str = nj(dm, result_constructor=str) >>> print(newick_str[:55], "...") (d:2.000000, (c:4.000000, (b:3.000000, a:2.000000):3.00 ... """ if dm.shape[0] < 3: raise ValueError( "Distance matrix must be at least 3x3 to " "generate a neighbor joining tree." ) if result_constructor is None: def result_constructor(x): return TreeNode.read(io.StringIO(x), format="newick") # initialize variables node_definition = None # while there are still more than three distances in the distance matrix, # join neighboring nodes. while dm.shape[0] > 3: # compute the Q matrix q = _compute_q(dm) # identify the pair of nodes that have the lowest Q value. if multiple # pairs have equally low Q values, the first pair identified (closest # to the top-left of the matrix) will be chosen. these will be joined # in the current node. idx1, idx2 = _lowest_index(q) pair_member_1 = dm.ids[idx1] pair_member_2 = dm.ids[idx2] # determine the distance of each node to the new node connecting them. pair_member_1_len, pair_member_2_len = _pair_members_to_new_node( dm, idx1, idx2, disallow_negative_branch_length ) # define the new node in newick style node_definition = "(%s:%f, %s:%f)" % ( pair_member_1, pair_member_1_len, pair_member_2, pair_member_2_len, ) # compute the new distance matrix, which will contain distances of all # other nodes to this new node dm = _compute_collapsed_dm( dm, pair_member_1, pair_member_2, disallow_negative_branch_length=disallow_negative_branch_length, new_node_id=node_definition, ) # When there are three distances left in the distance matrix, we have a # fully defined tree. The last node is internal, and its distances are # defined by these last three values. # First determine the distance between the last two nodes to be joined in # a pair... pair_member_1 = dm.ids[1] pair_member_2 = dm.ids[2] pair_member_1_len, pair_member_2_len = _pair_members_to_new_node( dm, pair_member_1, pair_member_2, disallow_negative_branch_length ) # ...then determine their distance to the other remaining node, but first # handle the trivial case where the input dm was only 3 x 3 node_definition = node_definition or dm.ids[0] internal_len = 0.5 * ( dm[pair_member_1, node_definition] + dm[pair_member_2, node_definition] - dm[pair_member_1, pair_member_2] ) if disallow_negative_branch_length and internal_len < 0: internal_len = 0 # ...and finally create the newick string describing the whole tree. newick = "(%s:%f, %s:%f, %s:%f);" % ( pair_member_1, pair_member_1_len, node_definition, internal_len, pair_member_2, pair_member_2_len, ) # package the result as requested by the user and return it. return result_constructor(newick) def _compute_q(dm): """Compute Q matrix, used to identify the next pair of nodes to join.""" q = np.zeros(dm.shape) n = dm.shape[0] big_sum = np.array([dm.data.sum(1)] * dm.shape[0]) big_sum_diffs = big_sum + big_sum.T q = (n - 2) * dm.data - big_sum_diffs np.fill_diagonal(q, 0) return DistanceMatrix(q, dm.ids) def _compute_collapsed_dm(dm, i, j, disallow_negative_branch_length, new_node_id): """Return the distance matrix resulting from joining ids i and j in a node. If the input distance matrix has shape ``(n, n)``, the result will have shape ``(n-1, n-1)`` as the ids `i` and `j` are collapsed to a single new ids. """ in_n = dm.shape[0] out_n = in_n - 1 out_ids = [new_node_id] out_ids.extend([e for e in dm.ids if e not in (i, j)]) result = np.zeros((out_n, out_n)) # pre-populate the result array with known distances ij_indexes = [dm.index(i), dm.index(j)] result[1:, 1:] = np.delete( np.delete(dm.data, ij_indexes, axis=0), ij_indexes, axis=1 ) # calculate the new distances from the current DistanceMatrix k_to_u = 0.5 * (dm[i] + dm[j] - dm[i, j]) # set negative branches to 0 if specified if disallow_negative_branch_length: k_to_u[k_to_u < 0] = 0 # drop nodes being joined k_to_u = np.delete(k_to_u, ij_indexes) # assign the distances to the result array result[0] = result[:, 0] = np.concatenate([[0], k_to_u]) return DistanceMatrix(result, out_ids) def _lowest_index(dm): """Return the index of the lowest value in the input distance matrix. If there are ties for the lowest value, the index of top-left most occurrence of that value will be returned. This should be ultimately be replaced with a new DistanceMatrix object method (#228). """ # get the positions of the lowest value results = np.vstack(np.where(dm.data == np.amin(dm.condensed_form()))).T # select results in the bottom-left of the array results = results[results[:, 0] > results[:, 1]] # calculate the distances of the results to [0, 0] res_distances = np.sqrt(results[:, 0] ** 2 + results[:, 1] ** 2) # detect distance ties & return the point which would have # been produced by the original function if np.count_nonzero(res_distances == np.amin(res_distances)) > 1: eqdistres = results[res_distances == np.amin(res_distances)] res_coords = eqdistres[np.argmin([r[0] for r in eqdistres])] else: res_coords = results[np.argmin(res_distances)] return tuple([res_coords[0], res_coords[1]]) def _pair_members_to_new_node(dm, i, j, disallow_negative_branch_length): """Return the distance between a new node and descendants of that new node. Parameters ---------- dm : skbio.DistanceMatrix The input distance matrix. i, j : str Identifiers of entries in the distance matrix to be collapsed (i.e., the descendants of the new node, which is internally represented as `u`). disallow_negative_branch_length : bool Neighbor joining can result in negative branch lengths, which don't make sense in an evolutionary context. If `True`, negative branch lengths will be returned as zero, a common strategy for handling this issue that was proposed by the original developers of the algorithm. """ n = dm.shape[0] i_to_j = dm[i, j] i_to_u = (0.5 * i_to_j) + ((dm[i].sum() - dm[j].sum()) / (2 * (n - 2))) if disallow_negative_branch_length and i_to_u < 0: i_to_u = 0 j_to_u = i_to_j - i_to_u if disallow_negative_branch_length and j_to_u < 0: j_to_u = 0 return i_to_u, j_to_u def nni(tree, dm, inplace=True): r"""Perform nearest neighbor interchange (NNI) on a phylogenetic tree. Parameters ---------- tree : skbio.TreeNode Input phylogenetic tree to be rearranged. dm : skbio.DistanceMatrix Input distance matrix containing distances between taxa. inplace : bool, optional Whether manipulate the tree in place (``True``, default) or return a copy of the tree (``False``). Returns ------- TreeNode Rearranged phylogenetic tree (if ``inplace`` is ``True``). Notes ----- NNI algorithm for minimum evolution problem on phylogenetic trees. It rearranges an initial tree topology by performing subtree exchanges such that the distance is minimized. This implementation is based on the FastNNI algorithm [1]_. The input tree is required to be binary and rooted at a leaf node such that there is a unique descendant from the root. References ---------- .. [1] Desper R, Gascuel O. Fast and accurate phylogeny reconstruction algorithms based on the minimum-evolution principle. J Comput Biol. 2002;9(5):687-705. doi: 10.1089/106652702761034136. PMID: 12487758. Examples -------- Define a new distance matrix object describing the distances between five taxa: human, monkey, pig, rat, and chicken. >>> from skbio import DistanceMatrix >>> from skbio.tree import nj >>> dm = DistanceMatrix([[0, 0.02, 0.18, 0.34, 0.55], ... [0.02, 0, 0.19, 0.35, 0.55], ... [0.18, 0.19, 0, 0.34, 0.54], ... [0.34, 0.35, 0.34, 0, 0.62], ... [0.55, 0.55, 0.54, 0.62, 0]], ... ['human','monkey','pig','rat','chicken']) Also, provide a tree topology to be rearranged. The tree provided is required to be a binary tree rooted at a leaf node. Note that the tree provided does not require to have assigned edge lengths. >>> from skbio.tree import TreeNode >>> tree = TreeNode.read(["(((human,chicken),(rat,monkey)))pig;"]) >>> print(tree.ascii_art()) /-human /--------| | \-chicken -pig----- /--------| | /-rat \--------| \-monkey Perform nearest neighbor interchange (NNI). By default, the tree is rearrangede in place. >>> nni(tree, dm) >>> print(tree.ascii_art()) /-rat /--------| | \-chicken -pig----- /--------| | /-monkey \--------| \-human Besides rearranging the tree, estimated edge lengths are assigned to the tree. >>> rat = tree.find('rat') >>> print(rat.length) 0.21 """ # Initialize and populate the average distance matrix if not inplace: tree = tree.copy() if len(tree.root().children) != 1: raise TypeError( "Could not perform NNI. " "Tree needs to be rooted at a leaf node." ) for node in tree.non_tips(): if len(node.children) != 2: raise TypeError("Could not perform NNI. Tree needs to be a binary tree.") adm = _average_distance_matrix(tree, dm) while True: # create heap of possible swaps and then swapping subtrees # until no more swaps are possible. adm = _average_distance_matrix(tree, dm) heap = _swap_heap(tree, adm) if not heap: break swap = hq.heappop(heap) _perform_swap(swap[1][0], swap[1][1]) # edge values are added using an OLS framework. _edge_estimation(tree, dm) if not inplace: return tree def _perform_swap(node1, node2): """Return a tree after swapping two subtrees.""" parent1, parent2 = node1.parent, node2.parent parent1.append(node2) parent2.append(node1) def _average_distance(node1, node2, dm): """Return the average distance between the leaves of two subtrees. Distances between nodes are calculated using a distance matrix. """ nodelist1 = _tip_or_root(node1) nodelist2 = _tip_or_root(node2) df = dm.between(nodelist1, nodelist2) return df["value"].mean() def _tip_or_root(node): """Get name(s) of a node if it's a tip or root, otherwise its descending tips.""" if node.is_tip() or node.is_root(): return [node.name] else: return [x.name for x in node.tips()] def _average_distance_upper(node1, node2, dm): """Return the average distance between the leaves of two subtrees. Used for subtrees which have a set of tips that are the complement of the set of tips that are descendants from the node defining the subtree. Given an internal edge of a binary tree, exactly one adjacent edge will connect to a node defining a subtree of this form. """ nodelist1 = _tip_or_root(node1) if node2.is_root(): nodelist2 = [] # Second subtree serves as the tree with a set of tips # complementary to the set of tips that descend from the # corresponding second node. else: root2 = node2.root() nodelist2 = [root2.name] nodelist2.extend(root2.subset() - node2.subset()) df = dm.between(nodelist1, nodelist2) return df["value"].mean() def _subtree_count(subtree): """Return the number of leaves in a subtree. Assumes the root as a leaf node. """ if subtree.is_tip() or subtree.is_root(): return 1 else: return subtree.count(tips=True) def _swap_length(a, b, c, d, i, j, k, m, adm): """Return the change in overall tree length after a given swap. The count of leaves contained in each subtree are denoted 'a, b, c, d' while each node defining the subtree has the index 'i, j, k, m', respectively. """ lambda1 = (a * d + b * c) / ((a + b) * (c + d)) lambda2 = (a * d + b * c) / ((a + c) * (b + d)) return 0.5 * ( (lambda1 - 1) * (adm[i][k] + adm[j][m]) - (lambda2 - 1) * (adm[i][j] + adm[k][m]) - (lambda1 - lambda2) * (adm[i][m] + adm[j][k]) ) def _swap_heap(tree, adm): """Return a maxheap ordered by the swap length for all possible swaps.""" heap = [] ordered = list(tree.postorder(include_self=False)) root = tree.root() n_taxa = root.count(tips=True) + 1 # begin by finding nodes which are the child node of an internal edge for node in ordered: # ignore tips of the tree if node.is_tip(): continue # identify the parent and grandparent nodes parent = node a = parent.parent # identify the index of each neighboring node for index, node in enumerate(ordered): if node == a: i1 = index for child in parent.children: if child.is_tip(): continue childnode = child c, d = childnode.children for sibling in childnode.siblings(): b = sibling for index, node in enumerate(ordered): if node == b: i2 = index elif node == c: i3 = index elif node == d: i4 = index # count the tips of the subtrees defined by the neighboring nodes sub_tips = [] for subtree in [b, c, d]: sub_tips.append(1 if subtree.is_tip() else subtree.count(tips=True)) b_, c_, d_ = sub_tips a_ = n_taxa - b_ - c_ - d_ # calculate the swap length for the two possible swaps given the edge swap_1 = _swap_length(a_, b_, c_, d_, i1, i2, i3, i4, adm) swap_2 = _swap_length(a_, b_, d_, c_, i1, i2, i4, i3, adm) # store the best possible swap into a maxheap if swap_1 > swap_2 and swap_1 > 0: swap = -1 * swap_1 hq.heappush(heap, (swap, (b, c))) elif swap_2 > swap_1 and swap_2 > 0: swap = -1 * swap_2 hq.heappush(heap, (swap, (b, d))) return heap def _average_subtree_distance(a, b, a1, a2, dm): """Return the average distance between two subtrees.""" return ( _subtree_count(a1) * _average_distance(a1, b, dm) + _subtree_count(a2) * _average_distance(a2, b, dm) ) / _subtree_count(a) def _average_distance_matrix(tree, dm): """Return the matrix of distances between pairs of subtrees.""" ordered = list(tree.postorder(include_self=False)) n = len(ordered) r = tree.root() taxa_size = r.count(tips=True) + 1 adm = np.empty((n, n)) for i, a in enumerate(ordered): # skip over unique descendant if a in tree.children: continue # find the average distance between given node and root if a.is_tip(): adm[n - 1, i] = adm[i, n - 1] = dm[a.name, r.name] else: a1, a2 = a.children adm[n - 1, i] = adm[i, n - 1] = _average_subtree_distance(a, r, a1, a2, dm) # find the average distance between first node and a second node # which is above the first node in the postorder as well as an ancestor for j in range(i + 1, n - 1): # part (a) b = ordered[j] # skipping over ancestors if b in a.ancestors(): continue # both nodes are tips if a.is_tip() and b.is_tip(): adm[i, j] = adm[j, i] = dm[a.name, b.name] # second node is a tip, but not the first node elif b.is_tip(): a1, a2 = a.children adm[i, j] = adm[j, i] = _average_subtree_distance(a, b, a1, a2, dm) # neither node is a tip else: b1, b2 = b.children adm[i, j] = adm[j, i] = _average_subtree_distance(b, a, b1, b2, dm) # calculating for second nodes which are ancestors for j, b in enumerate(ordered): # skipping over unique descendant if b in tree.children: continue s_ = b.siblings() for sibling in s_: s = sibling p = b.parent for i, a in enumerate(ordered): if b in a.ancestors(): adm[i, j] = adm[j, i] = ( _subtree_count(s) * _average_distance(a, s, dm) + (taxa_size - _subtree_count(p)) * _average_distance_upper(a, p, dm) ) / (taxa_size - _subtree_count(b)) # zero the diagonal adm[i, i] = 0 return adm def _edge_estimation(tree, dm): """Assign estimated edge values to a tree based on a given distance matrix. Estimation of edge values is based on an ordinary least squares (OLS) framework. """ adm = _average_distance_matrix(tree, dm) ordered = list(tree.postorder(include_self=False)) root = tree.root() taxa_size = root.count(tips=True) + 1 # identify edges by first finding the child node of an edge for edge_node in ordered: parent = edge_node.parent # skip over root node if edge_node.is_root(): continue # calculate edge length for the edge adjacent to the root elif parent.is_root(): for index, node in enumerate(ordered): if node == edge_node: i1 = index a, b = edge_node.children for index, node in enumerate(ordered): if node == a: i2 = index elif node == b: i3 = index edge_node.length = 0.5 * (adm[i2][i1] + adm[i3][i1] - adm[i2][i3]) # calculate edge lengths for external edges elif edge_node.is_tip(): a = parent.parent if a.is_root(): for child in a.children: a = child for siblingnode in edge_node.siblings(): b = siblingnode for index, node in enumerate(ordered): if node == edge_node: i1 = index if node == a: i2 = index if node == b: i3 = index edge_node.length = 0.5 * (adm[i2][i1] + adm[i3][i1] - adm[i2][i3]) # calculate edge lengths for internal edges else: a = parent.parent if a.is_root(): for child in a.children: a = child for index, node in enumerate(ordered): if node == a: i1 = index c, d = edge_node.children for sibling in edge_node.siblings(): b = sibling for index, node in enumerate(ordered): if node == b: i2 = index elif node == c: i3 = index elif node == d: i4 = index # count the tips of subtrees which are adjacent to the internal edge sub_tips = [] for subtree in [b, c, d]: sub_tips.append(1 if subtree.is_tip() else subtree.count(tips=True)) b_, c_, d_ = sub_tips a_ = taxa_size - b_ - c_ - d_ # calculate the edge length lambda1 = (a_ * d_ + b_ * c_) / ((a_ + b_) * (c_ + d_)) edge_node.length = 0.5 * ( (lambda1 * (adm[i1][i3] + adm[i2][i4])) + ((1 - lambda1) * (adm[i1][i4] + adm[i2][i3])) - (adm[i1][i2] + adm[i3][i4]) ) scikit-bio-0.6.2/skbio/tree/_tree.py000066400000000000000000003762571464262511300173220ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from warnings import warn, simplefilter from operator import or_, itemgetter from copy import copy, deepcopy from itertools import combinations from functools import reduce from collections import defaultdict import numpy as np import pandas as pd from scipy.spatial.distance import correlation from skbio._base import SkbioObject from skbio.stats.distance import DistanceMatrix from skbio.tree._exception import ( NoLengthError, DuplicateNodeError, NoParentError, MissingNodeError, TreeError, ) from skbio.util import RepresentationWarning from skbio.util._decorator import classonlymethod from skbio.util._warning import _warn_deprecated def distance_from_r(m1, m2): r"""Estimate distance as (1-r)/2: neg correl = max distance. Parameters ---------- m1 : DistanceMatrix a distance matrix to compare m2 : DistanceMatrix a distance matrix to compare Returns ------- float The distance between m1 and m2 """ return correlation(m1.data.flat, m2.data.flat) / 2 class TreeNode(SkbioObject): r"""Representation of a node within a tree. A `TreeNode` instance stores links to its parent and optional children nodes. In addition, the `TreeNode` can represent a `length` (e.g., a branch length) between itself and its parent. Within this object, the use of "children" and "descendants" is frequent in the documentation. A child is a direct descendant of a node, while descendants are all nodes that are below a given node (e.g., grand-children, etc). Parameters ---------- name : str or None A node can have a name. It is common for tips in particular to have names, for instance, in a phylogenetic tree where the tips correspond to species. length : float, int, or None Length of the branch connecting this node to its parent. Can represent ellapsed time, amount of mutations, or other measures of evolutionary distance. support : float, int, or None Support value of the branch connecting this node to its parent. Can be bootstrap value, posterior probability, or other metrics measuring the confidence or frequency of this branch. parent : TreeNode or None Connect this node to a parent children : list of TreeNode or None Connect this node to existing children """ default_write_format = "newick" _exclude_from_copy = set(["parent", "children", "_tip_cache", "_non_tip_cache"]) def __init__( self, name=None, length=None, support=None, parent=None, children=None ): self.name = name self.length = length self.support = support self.parent = parent self.children = [] self.id = None if children is not None: self.extend(children) def __repr__(self): r"""Return summary of the tree. Returns ------- str A summary of this node and all descendants Notes ----- This method returns the name of the node and a count of tips and the number of internal nodes in the tree Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c, d)root;"]) >>> repr(tree) '' """ nodes = [n for n in self.traverse(include_self=False)] n_tips = sum([n.is_tip() for n in nodes]) n_nontips = len(nodes) - n_tips classname = self.__class__.__name__ name = self.name if self.name is not None else "unnamed" return "<%s, name: %s, internal node count: %d, tips count: %d>" % ( classname, name, n_nontips, n_tips, ) def __str__(self): r"""Return string version of self, with names and distances. Returns ------- str Returns a Newick representation of the tree See Also -------- read write Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c);"]) >>> str(tree) '((a,b)c);\n' """ return str("".join(self.write([]))) def __iter__(self): r"""Node iter iterates over the `children`.""" return iter(self.children) def __len__(self): return len(self.children) def __getitem__(self, i): r"""Node delegates slicing to `children`.""" return self.children[i] def _adopt(self, node): r"""Update `parent` references but does NOT update `children`.""" if node.parent is not None: node.parent.remove(node) node.parent = self return node def append(self, node): r"""Add a node to self's children. Parameters ---------- node : TreeNode Node to add as a child. See Also -------- extend Notes ----- ``append`` will invalidate any node lookup caches, remove the node's parent if it exists, set the parent of node to self, and add the node to self's children. Examples -------- >>> from skbio import TreeNode >>> root = TreeNode(name="root") >>> child1 = TreeNode(name="child1") >>> child2 = TreeNode(name="child2") >>> root.append(child1) >>> root.append(child2) >>> print(root) (child1,child2)root; """ self.invalidate_caches() self.children.append(self._adopt(node)) def extend(self, nodes): r"""Add a list of nodes to self's children. Parameters ---------- nodes : list of TreeNode Nodes to add as children. See Also -------- append Notes ----- ``extend`` will invalidate any node lookup caches, remove existing parents of the nodes if they have any, set their parents to self and add the nodes to the children of self. Examples -------- >>> from skbio import TreeNode >>> root = TreeNode(name="root") >>> root.extend([TreeNode(name="child1"), TreeNode(name="child2")]) >>> print(root) (child1,child2)root; """ self.invalidate_caches() self.children.extend([self._adopt(n) for n in nodes[:]]) def insert(self, node, distance=None, branch_attrs=[]): r"""Insert a node into the branch connecting self and its parent. .. versionadded:: 0.6.2 Parameters ---------- node : TreeNode Node to insert. distance : float, int or None, optional Distance between self and the insertion point. Must not exceed ``self.length``. If ``None`` whereas ``self.length`` is not ``None``, will insert at the midpoint of the branch. branch_attrs : iterable of str, optional Attributes of self that should be transferred to the inserted node as they are considered as attributes of the branch. ``support`` will be automatically included as it is always a branch attribute. Raises ------ NoParentError If self has no parent. ValueError If distance is specified but branch has no length. ValueError If distance exceeds branch length. See Also -------- append Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a:1,b:2)c:4,d:5)e;"]) >>> print(tree.ascii_art()) /-a /c-------| -e-------| \-b | \-d >>> tree.find("c").insert(TreeNode("x")) >>> print(tree.ascii_art()) /-a /x------- /c-------| -e-------| \-b | \-d >>> tree.find("c").length 2.0 >>> tree.find("x").length 2.0 """ if (parent := self.parent) is None: raise NoParentError("Self has no parent.") # detach node from original tree if applicable if node.parent is not None: node.parent.remove(node) # See also `_adopt`. The current code replaces the node at the same # position in the parent's list of children, instead of appending to # the end. Additionally, the current code performs cache invalidation # only once. self.invalidate_caches() # replace self with node in the parent's list of children node.parent = parent for i, curr_node in enumerate(parent.children): if curr_node is self: parent.children[i] = node # add self to the beginning of the node's list of children self.parent = node node.children.insert(0, self) # transfer branch attributes to new node branch_attrs = set(branch_attrs) branch_attrs.add("support") branch_attrs.discard("length") for attr in branch_attrs: setattr(node, attr, getattr(self, attr, None)) # determine insertion point if distance is None: if self.length is None: node.length = None else: self.length *= 0.5 node.length = self.length else: if self.length is None: raise ValueError("Distance is provided but branch has no length.") elif distance > self.length: raise ValueError("Distance cannot exceed branch length.") node.length = self.length - distance self.length = distance def pop(self, index=-1): r"""Remove and return a child node by its index position from self. Parameters ---------- index : int The index position in ``children`` to pop. Returns ------- TreeNode The popped child node. See Also -------- remove remove_deleted Notes ----- All node lookup caches are invalidated, and the parent reference for the popped node will be set to ``None``. Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["(a,b)c;"]) >>> print(tree.pop(0)) a; """ return self._remove_node(index) def _remove_node(self, idx): r"""Perform node removal. The actual (and only) method that performs node removal. """ self.invalidate_caches() node = self.children.pop(idx) node.parent = None return node def remove(self, node): r"""Remove a node from self. Remove a `node` from `self` by identity of the node. Parameters ---------- node : TreeNode The node to remove from self's children Returns ------- bool `True` if the node was removed, `False` otherwise See Also -------- pop remove_deleted Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["(a,b)c;"]) >>> tree.remove(tree.children[0]) True """ for i, curr_node in enumerate(self.children): if curr_node is node: self._remove_node(i) return True return False def remove_deleted(self, func): r"""Delete nodes in which `func(node)` evaluates `True`. Remove all descendants from `self` that evaluate `True` from `func`. This has the potential to drop clades. Parameters ---------- func : a function A function that evaluates `True` when a node should be deleted See Also -------- pop remove Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["(a,b)c;"]) >>> tree.remove_deleted(lambda x: x.name == 'b') >>> print(tree) (a)c; """ for node in self.traverse(include_self=False): if func(node): node.parent.remove(node) def prune(self): r"""Reconstruct correct topology after nodes have been removed. Internal nodes with only one child will be removed and new connections will be made to reflect change. This method is useful to call following node removals as it will clean up nodes with singular children. Names and properties of singular children will override the names and properties of their parents following the prune. Node lookup caches are invalidated. See Also -------- shear remove pop remove_deleted Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c,(d,e)f)root;"]) >>> to_delete = tree.find('b') >>> tree.remove_deleted(lambda x: x == to_delete) >>> print(tree) ((a)c,(d,e)f)root; >>> tree.prune() >>> print(tree) ((d,e)f,a)root; """ # build up the list of nodes to remove so the topology is not altered # while traversing nodes_to_remove = [] for node in self.traverse(include_self=False): if len(node.children) == 1: nodes_to_remove.append(node) # clean up the single children nodes for node in nodes_to_remove: child = node.children[0] if child.length is None or node.length is None: child.length = child.length or node.length else: child.length += node.length if node.parent is None: continue node.parent.append(child) node.parent.remove(node) # if a single descendent from the root, the root adopts the childs # properties. we can't "delete" the root as that would be deleting # self. if len(self.children) == 1: node_to_copy = self.children[0] efc = self._exclude_from_copy for key in node_to_copy.__dict__: if key not in efc: self.__dict__[key] = deepcopy(node_to_copy.__dict__[key]) self.remove(node_to_copy) self.extend(node_to_copy.children) def shear(self, names): """Remove tips until the tree just has the desired tip names. Parameters ---------- names : Iterable of str The tip names on the tree to keep Returns ------- TreeNode The resulting tree Raises ------ ValueError If the names do not exist in the tree See Also -------- prune remove pop remove_deleted Examples -------- >>> from skbio import TreeNode >>> t = TreeNode.read(['((H:1,G:1):2,(R:0.5,M:0.7):3);']) >>> sheared = t.shear(['G', 'M']) >>> print(sheared) (G:3.0,M:3.7); """ tcopy = self.copy() all_tips = {n.name for n in tcopy.tips()} ids = set(names) if not ids.issubset(all_tips): raise ValueError("ids are not a subset of the tree.") marked = set() for tip in tcopy.tips(): if tip.name in ids: marked.add(tip) for anc in tip.ancestors(): if anc in marked: break else: marked.add(anc) for node in list(tcopy.traverse()): if node not in marked: node.parent.remove(node) tcopy.prune() return tcopy def _copy(self, deep, memo): """Return a copy of self.""" _copy = deepcopy if deep else copy _args = [memo] if deep else [] def __copy_node(node_to_copy): """Copy a node.""" # this is _possibly_ dangerous, we're assuming the node to copy is # of the same class as self, and has the same exclusion criteria. # however, it is potentially dangerous to mix TreeNode subclasses # within a tree, so... result = self.__class__() efc = self._exclude_from_copy for key in node_to_copy.__dict__: if key not in efc: result.__dict__[key] = _copy(node_to_copy.__dict__[key], *_args) return result root = __copy_node(self) nodes_stack = [[root, self, len(self.children)]] while nodes_stack: # check the top node, any children left unvisited? top = nodes_stack[-1] new_top_node, old_top_node, unvisited_children = top if unvisited_children: top[2] -= 1 old_child = old_top_node.children[-unvisited_children] new_child = __copy_node(old_child) new_top_node.append(new_child) nodes_stack.append([new_child, old_child, len(old_child.children)]) else: # no unvisited children nodes_stack.pop() return root def __copy__(self): """Return a shallow copy.""" return self._copy(False, {}) def __deepcopy__(self, memo): """Return a deep copy.""" return self._copy(True, memo) def copy(self, deep=True): r"""Return a copy of self using an iterative approach. Parameters ---------- deep : bool, optional Whether perform a deep (``True``, default) or shallow (``False``) copy of node attributes. .. versionadded:: 0.6.2 .. note:: The default value will be changed to ``False`` in 0.7.0. Returns ------- TreeNode A new copy of self. See Also -------- unrooted_copy Notes ----- This method iteratively copies the current node and its descendants. That is, if the current node is not the root of the tree, only the subtree below the node, instead of the entire tree, will be copied. All nodes and their attributes will be copied. The copies are new objects rather than references to the original objects. The distinction between deep and shallow copies only applies to each node attribute. Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c,(d,e)f)root;"]) >>> tree_copy = tree.copy() >>> tree_nodes = set([id(n) for n in tree.traverse()]) >>> tree_copy_nodes = set([id(n) for n in tree_copy.traverse()]) >>> print(len(tree_nodes.intersection(tree_copy_nodes))) 0 """ return self._copy(deep, {}) def deepcopy(self): r"""Return a deep copy of self using an iterative approach. Returns ------- TreeNode A new deep copy of self. See Also -------- copy Notes ----- ``deepcopy`` is equivalent to ``copy`` with ``deep=True``, which is currently the default behavior of the latter. Warnings -------- ``deepcopy`` is deprecated as of ``0.6.2``. Use ``copy`` instead. """ msg = "Use copy instead." _warn_deprecated(self.__class__.deepcopy, "0.6.2", msg) return self._copy(True, {}) def unrooted_copy( self, parent=None, branch_attrs={"name", "length", "support"}, root_name="root", deep=False, ): r"""Walk the tree unrooted-style and return a copy. Parameters ---------- parent : TreeNode or None Direction of walking (from parent to self). If specified, walking to the parent will be prohibited. branch_attrs : set of str, optional Attributes of ``TreeNode`` objects that should be considered as branch attributes during the operation. .. versionadded:: 0.6.2 .. note:: ``name`` will be removed from the default in 0.7.0, as it is usually considered as an attribute of the node instead of the branch. root_name : str or None, optional Name for the new root node, if it doesn't have one. .. versionadded:: 0.6.2 .. note:: This parameter will be removed in 0.7.0, and the root node will not be renamed. deep : bool, optional Whether perform a shallow (``False``, default) or deep (``True``) copy of node attributes. .. versionadded:: 0.6.2 Returns ------- TreeNode A new copy of the tree rooted at the given node. .. versionchanged:: 0.6.2 Node attributes other than name and length will also be copied. Warnings -------- The default behavior of ``unrooted_copy`` is subject to change in 0.7.0. The new default behavior can be achieved by specifying ``branch_attrs={"length", "support"}, root_name=None``. See Also -------- copy unrooted_move Notes ----- This method recursively walks a tree from a given node in an unrooted style (i.e., directions of branches are not assumed), and copies each node it visits, such that the copy of the given node becomes the root node of a new tree and the copies of all other nodes are re-positioned accordingly, whereas the topology of the new tree will be identical to the existing one. Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,(b,c)d)e,(f,g)h)i;"]) >>> new_tree = tree.find('d').unrooted_copy() >>> print(new_tree) (b,c,(a,((f,g)h)e)d)root; """ # future warning if branch_attrs == {"name", "length", "support"} and root_name == "root": func = self.__class__.unrooted_copy if not hasattr(func, "warned"): simplefilter("once", FutureWarning) warn( "The default behavior of `unrooted_copy` is subject to change in " "0.7.0. The new default behavior can be achieved by specifying " '`branch_attrs={"length", "support"}, root_name=None`.', FutureWarning, ) func.warned = True _copy = deepcopy if deep else copy # identify neighbors (adjacent nodes) of self, excluding the incoming node neighbors = self.neighbors(ignore=parent) # recursively copy each neighbor; they will become outgoing nodes (children) children = [ c.unrooted_copy( parent=self, branch_attrs=branch_attrs, root_name=root_name, deep=deep ) for c in neighbors ] # identify node from which branch attributes should be transferred # 1. starting point (becomes root) if parent is None: other = None # 2. walk up (parent becomes child) elif parent.parent is self: other = parent # 3. walk down (retain the same order) else: other = self # create a new node and attach children to it result = self.__class__(children=children) # transfer attributes to the new node efc = self._exclude_from_copy for key in self.__dict__: if key not in efc: source = other if key in branch_attrs else self if source is not None and key in source.__dict__: result.__dict__[key] = _copy(source.__dict__[key]) # name the new root if root_name and parent is None and result.name is None: result.name = root_name return result def unrooted_deepcopy(self, parent=None): r"""Walk the tree unrooted-style and returns a new deepcopy. Parameters ---------- parent : TreeNode or None Direction of walking (from parent to self). If specified, walking to the parent will be prohibited. Returns ------- TreeNode A new copy of the tree rooted at the given node. Warnings -------- ``unrooted_deepcopy`` is deprecated as of ``0.6.2``, as it generates a redundant copy of the tree. Use ``unrooted_copy`` instead. See Also -------- copy unrooted_copy root_at Notes ----- Perform a deepcopy of self and return a new copy of the tree as an unrooted copy. This is useful for defining a new root of the tree. This method calls ``unrooted_copy`` which is recursive. """ msg = "Use unrooted_copy instead." _warn_deprecated(self.__class__.unrooted_deepcopy, "0.6.2", msg) root = self.root() root.assign_ids() new_tree = root.copy() new_tree.assign_ids() new_tree_self = new_tree.find_by_id(self.id) return new_tree_self.unrooted_copy(parent, deep=True) def unrooted_move( self, parent=None, branch_attrs={"length", "support"}, ): r"""Walk the tree unrooted-style and rearrange it. .. versionadded:: 0.6.2 Parameters ---------- parent : TreeNode or None Direction of walking (from parent to self). If specified, walking to the parent will be prohibited. branch_attrs : set of str, optional Attributes of ``TreeNode`` objects that should be considered as branch attributes during the operation. See Also -------- root_at unrooted_copy Notes ----- This method recursively walks a tree from a given node in an unrooted style (i.e., directions of branches are not assumed). It rerranges the tree such that the given node becomes the root node and all other nodes are re-positioned accordingly, whereas the topology remains the same. This method manipulates the tree in place. There is no return value. The new tree should be referred to by the node where the operation started, as it has become the new root node. Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,(b,c)d)e,(f,g)h)i;"]) >>> new_root = tree.find('d') >>> new_root.unrooted_move() >>> print(new_root) (b,c,(a,((f,g)h)i)e)d; """ # recursively add parent to children children = self.children if (old_parent := self.parent) is not None: children.append(old_parent) old_parent.unrooted_move(parent=self) # 1. starting point (becomes root) if parent is None: self.parent = None for attr in branch_attrs: setattr(self, attr, None) # 2. walk up (parent becomes child) else: for i, child in enumerate(children): if child is parent: children.pop(i) break self.parent = parent for attr in branch_attrs: setattr(self, attr, getattr(parent, attr, None)) def count(self, tips=False): r"""Get the count of nodes in the tree. Parameters ---------- tips : bool If ``True``, only return the count of tips. Returns ------- int The number of nodes. Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,(b,c)d)e,(f,g)h)i;"]) >>> print(tree.count()) 9 >>> print(tree.count(tips=True)) 5 """ if tips: return len(list(self.tips())) else: return len(list(self.traverse(include_self=True))) def observed_node_counts(self, tip_counts): """Return counts of node observations from counts of tip observations. Parameters ---------- tip_counts : dict of ints Counts of observations of tips. Keys correspond to tip names in ``self``, and counts are unsigned ints. Returns ------- dict Counts of observations of nodes. Keys correspond to node names (internal nodes or tips), and counts are unsigned ints. Raises ------ ValueError If a count less than one is observed. MissingNodeError If a count is provided for a tip not in the tree, or for an internal node. """ result = defaultdict(int) for tip_name, count in tip_counts.items(): if count < 1: raise ValueError("All tip counts must be greater than zero.") else: t = self.find(tip_name) if not t.is_tip(): raise MissingNodeError( "Counts can only be for tips in the tree. %s is an " "internal node." % t.name ) result[t] += count for internal_node in t.ancestors(): result[internal_node] += count return result def subtree(self, tip_list=None): r"""Make a copy of the subtree.""" raise NotImplementedError() def subset(self): r"""Return set of names that descend from specified node. Get the set of `name` on tips that descend from this node. Returns ------- frozenset The set of names at the tips of the clade that descends from self See Also -------- subsets compare_subsets Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,(b,c)d)e,(f,g)h)i;"]) >>> sorted(tree.subset()) ['a', 'b', 'c', 'f', 'g'] """ return frozenset({i.name for i in self.tips()}) def subsets(self): r"""Return all sets of names that come from self and its descendants. Compute all subsets of tip names over `self`, or, represent a tree as a set of nested sets. Returns ------- frozenset A frozenset of frozensets of str See Also -------- subset compare_subsets Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["(((a,b)c,(d,e)f)h)root;"]) >>> subsets = tree.subsets() >>> len(subsets) 3 """ sets = [] for i in self.postorder(include_self=False): if not i.children: i.__leaf_set = frozenset([i.name]) else: leaf_set = reduce(or_, [c.__leaf_set for c in i.children]) if len(leaf_set) > 1: sets.append(leaf_set) i.__leaf_set = leaf_set return frozenset(sets) def unroot(self, side=None): r"""Convert a rooted tree into unrooted. .. versionadded:: 0.6.2 Parameters ---------- side : int, optional Which basal node (i.e., children of root) will be elevated to root. Must be 0 or 1. If not provided, will elevate the first basal node that is not a tip. See Also -------- root root_at Notes ----- In scikit-bio, every tree has a root node. A tree is considered as "rooted" if its root node has exactly two children. In contrast, an "unrooted" tree may have three (the most common case), one, or more than three children attached to its root node. This method will not modify the tree if it is already unrooted. This method unroots a tree by trifucating its root. Specifically, it removes one of the two basal nodes of the tree (i.e., children of the root), transfers the name of the removed node to the root, and re-attaches the removed node's children to the root. Additionally, the removed node's branch length, if available, will be added to the other basal node's branch. The outcome appears as if the root is removed and the two basal nodes are directly connected. The choice of the basal node to be elevated affects the positioning of the resulting tree, but does not affect its topology from a phylogenetic perspective, as it is considered as unrooted. This method manipulates the tree in place. There is no return value. .. note:: In the case where the basal node has just one child, the resulting tree will still appear rooted as it has two basal nodes. To avoid this scenario, call ``prune`` to remove all one-child internal nodes. Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(['(((a,b)c,(d,e)f)g,(h,i)j)k;']) >>> print(tree.ascii_art()) /-a /c-------| | \-b /g-------| | | /-d | \f-------| -k-------| \-e | | /-h \j-------| \-i >>> tree.unroot() >>> print(tree.ascii_art()) /-a /c-------| | \-b | | /-d -g-------|-f-------| | \-e | | /-h \j-------| \-i """ # return original tree if already unrooted root = self.root() if len(bases := root.children) != 2: return root # choose a basal node to elevate if side is None: side = 1 if (bases[0].is_tip() and not bases[1].is_tip()) else 0 chosen, other = bases[side], bases[1 - side] # remove chosen node and re-attach its children to root root.invalidate_caches() chosen.parent = None for child in chosen.children: child.parent = root if side: root.children = [other] + chosen.children else: root.children = chosen.children + [other] # transfer basal node's name to root root.name = chosen.name # TODO: also transfer other custom node attributes # add branch length to the other basal node if (L := chosen.length) is not None: if other.length is not None: other.length += L else: other.length = L def _insert_above(self, above, branch_attrs=[]): """Insert a node into the branch connecting a node to its parent.""" if above is False: return self node = self.__class__() if above is True: self.insert(node, None, branch_attrs) else: self.insert(node, above, branch_attrs) return node def root_at( self, node=None, above=False, reset=False, branch_attrs=["name"], root_name="root", ): r"""Reroot the tree at the provided node. This is useful for positioning a tree with an orientation that reflects knowledge of the true root location. Parameters ---------- node : TreeNode or str, optional The node to root at. Can either be a node object or the name of the node. If not provided, will root at self. If a root node provided, will return the original tree. .. versionchanged:: 0.6.2 Becomes optional. above : bool, float, or int, optional Whether and where to insert a new root node. If ``False`` (default), the target node will serve as the root node. If ``True``, a new root node will be created and inserted at the midpoint of the branch connecting the target node and its parent. If a number, the new root will be inserted at this distance from the target node. The number ranges between 0 and branch length. .. versionadded:: 0.6.2 reset : bool, optional Whether remove the original root of a rooted tree before performing the rerooting operation. Default is ``False``. .. versionadded:: 0.6.2 .. note:: The default value will be set as ``True`` in 0.7.0. branch_attrs : iterable of str, optional Attributes of each node that should be considered as attributes of the branch connecting the node to its parent. This is important for the correct rerooting operation. "length" and "support" will be automatically included as they are always branch attributes. .. versionadded:: 0.6.2 .. note:: ``name`` will be removed from the default in 0.7.0, as it is usually considered as an attribute of the node instead of the branch. root_name : str or None, optional Name for the root node, if it doesn't already have one. .. versionadded:: 0.6.2 .. note:: The default value will be set as ``None`` in 0.7.0. Returns ------- TreeNode A new copy of the tree rooted at the give node. Warnings -------- The default behavior of ``root_at`` is subject to change in 0.7.0. The new default behavior can be achieved by specifying ``reset=True, branch_attrs=[], root_name=None``. See Also -------- root_at_midpoint unrooted_copy unroot Notes ----- The specified node will be come the root of the new tree. Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["(((a,b)c,(d,e)f)g,h)i;"]) >>> print(tree.ascii_art()) /-a /c-------| | \-b /g-------| | | /-d -i-------| \f-------| | \-e | \-h Use the given node as the root node. This will typically create an unrooted tree (i.e., root node has three children). >>> t1 = tree.root_at("c", branch_attrs=[]) >>> print(t1) (a,b,((d,e)f,(h)i)g)c; >>> print(t1.ascii_art()) /-a | |--b -c-------| | /-d | /f-------| \g-------| \-e | \i------- /-h Insert a new root node into the branch above the given node. This will create a rooted tree (i.e., root node has two children). >>> t2 = tree.root_at("c", above=True, branch_attrs=[]) >>> print(t2) ((a,b)c,((d,e)f,(h)i)g)root; >>> print(t2.ascii_art()) /-a /c-------| | \-b -root----| | /-d | /f-------| \g-------| \-e | \i------- /-h """ # future warning if reset is False and branch_attrs == ["name"] and root_name == "root": func = self.__class__.root_at if not hasattr(func, "warned"): simplefilter("once", FutureWarning) warn( "The default behavior of `root_at` is subject to change in 0.7.0. " "The new default behavior can be achieved by specifying " "`reset=True, branch_attrs=[], root_name=None`.", FutureWarning, ) func.warned = True tree = self.root() if node is None: node = self elif isinstance(node, str): node = tree.find(node) if node.is_root(): return node.copy() if reset and len(tree.children) != 2: reset = False # copy the tree if it needs to be manipulated prior to walking if reset or above is not False: tree.assign_ids() new_tree = tree.copy() new_tree.assign_ids() node = new_tree.find_by_id(node.id) tree = new_tree # remove original root; we need to make sure the node itself is not the # basal node that gets removed if reset: side = None for i, base in enumerate(tree.children): if node is base: side = 1 - i break tree.unroot(side) # insert a new root node into the branch above node = node._insert_above(above, branch_attrs) branch_attrs = set(branch_attrs) branch_attrs.update(["length", "support"]) return node.unrooted_copy(branch_attrs=branch_attrs, root_name=root_name) def root_at_midpoint(self, reset=False, branch_attrs=["name"], root_name="root"): r"""Reroot the tree at the midpoint of the two tips farthest apart. Parameters ---------- reset : bool, optional Whether remove the original root of a rooted tree before performing the rerooting operation. Default is ``False``. .. versionadded:: 0.6.2 .. note:: The default value will be set as ``True`` in 0.7.0. branch_attrs : iterable of str, optional Attributes of each node that should be considered as attributes of the branch connecting the node to its parent. This is important for the correct rerooting operation. "length" and "support" will be automatically included as they are always branch attributes. .. versionadded:: 0.6.2 .. note:: ``name`` will be removed from the default in 0.7.0, as it is usually considered as an attribute of the node instead of the branch. root_name : str or None, optional Name for the new root node, if it doesn't have one. .. versionadded:: 0.6.2 .. note:: The default value will be set as ``None`` in 0.7.0. Returns ------- TreeNode A tree rooted at its midpoint. Raises ------ TreeError If a tip ends up being the mid point. LengthError Midpoint rooting requires `length` and will raise (indirectly) if evaluated nodes don't have length. Warnings -------- The default behavior of ``root_at_midpoint`` is subject to change in 0.7.0. The new default behavior can be achieved by specifying ``reset=True, branch_attrs=[], root_name=None``. See Also -------- root_at unrooted_copy Notes ----- The midpoint rooting (MPR) method was originally described in [1]_. References ---------- .. [1] Farris, J. S. (1972). Estimating phylogenetic trees from distance matrices. The American Naturalist, 106(951), 645-668. Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a:1,b:1)c:2,(d:3,e:4)f:5,g:1)h;"]) >>> print(tree.ascii_art()) /-a /c-------| | \-b | -h-------| /-d |-f-------| | \-e | \-g >>> t = tree.root_at_midpoint(branch_attrs=[]) >>> print(t) ((d:3.0,e:4.0)f:2.0,((a:1.0,b:1.0)c:2.0,g:1.0)h:3.0)root; >>> print(t.ascii_art()) /-d /f-------| | \-e -root----| | /-a | /c-------| \h-------| \-b | \-g """ # future warning if reset is False and branch_attrs == ["name"] and root_name == "root": func = self.__class__.root_at_midpoint if not hasattr(func, "warned"): simplefilter("once", FutureWarning) warn( "The default behavior of `root_at_midpoint` is subject to change " "in 0.7.0. The new default behavior can be achieved by specifying " "`reset=True, branch_attrs=[], root_name=None`.", FutureWarning, ) func.warned = True tree = self.copy() if reset: tree.unroot() max_dist, tips = tree.get_max_distance() half_max_dist = max_dist / 2.0 if max_dist == 0.0: return tree tip1 = tree.find(tips[0]) tip2 = tree.find(tips[1]) lca = tree.lowest_common_ancestor([tip1, tip2]) if tip1.accumulate_to_ancestor(lca) > half_max_dist: climb_node = tip1 else: climb_node = tip2 dist_climbed = 0.0 while dist_climbed + climb_node.length < half_max_dist: dist_climbed += climb_node.length climb_node = climb_node.parent # case 1: midpoint is at the climb node's parent # make the parent node as the new root if dist_climbed + climb_node.length == half_max_dist: new_root = climb_node.parent # case 2: midpoint is on the climb node's branch to its parent # insert a new root node into the branch else: new_root = tree.__class__() climb_node.insert(new_root, half_max_dist - dist_climbed) # TODO: Here, `branch_attrs` should be added to `insert`. However, this # will cause a backward-incompatible behavior. This change will be made # in version 0.7.0, along with the removal of `name` from the default of # `branch_attrs`. branch_attrs = set(branch_attrs) branch_attrs.update(["length", "support"]) return new_root.unrooted_copy(branch_attrs=branch_attrs, root_name=root_name) def root_by_outgroup( self, outgroup, above=True, reset=True, branch_attrs=[], root_name=None ): r"""Reroot the tree with a given set of taxa as outgroup. .. versionadded:: 0.6.2 Parameters ---------- outgroup : iterable of str Taxon set to serve as outgroup. Must be a proper subset of taxa in the tree. The tree will be rooted at the lowest common ancestor (LCA) of the outgroup. above : bool, float, or int, optional Whether and where to insert a new root node. If ``False``, the LCA will serve as the root node. If ``True`` (default), a new root node will be created and inserted at the midpoint of the branch connecting the LCA and its parent (i.e., the midpoint between outgroup and ingroup). If a number between 0 and branch length, the new root will be inserted at this distance from the LCA. reset : bool, optional Whether remove the original root of a rooted tree before performing the rerooting operation. Default is ``True``. branch_attrs : iterable of str, optional Attributes of each node that should be considered as attributes of the branch connecting the node to its parent. This is important for the correct rerooting operation. "length" and "support" will be automatically included as they are always branch attributes. root_name : str or None, optional Name for the root node, if it doesn't already have one. Returns ------- TreeNode A tree rooted by the outgroup. Raises ------ TreeError Outgroup is not a proper subset of taxa in the tree. TreeError Outgroup is not monophyletic in the tree. Notes ----- An outgroup is a subset of taxa that are usually distantly related from the remaining taxa (ingroup). The outgroup helps with locating the root of the ingroup, which are of interest in the study. This method reroots the tree at the lowest common ancestor (LCA) of the outgroup. By default, a new root will be placed at the midpoint between the LCA of outgroup and that of ingroup. But this behavior can be customized. This method requires the outgroup to be monophyletic, i.e., it forms a single clade in the tree. If the outgroup spans across the root of the tree, the method will reroot the tree within the ingroup such that the outgroup can form a clade in the rerooted tree, prior to rooting by outgroup. Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(['((((a,b),(c,d)),(e,f)),g);']) >>> print(tree.ascii_art()) /-a /--------| | \-b /--------| | | /-c | \--------| /--------| \-d | | | | /-e ---------| \--------| | \-f | \-g >>> rooted = tree.root_by_outgroup(['a', 'b']) >>> print(rooted.ascii_art()) /-a /--------| | \-b | ---------| /-c | /--------| | | \-d \--------| | /-e | /--------| \--------| \-f | \-g >>> rooted = tree.root_by_outgroup(['e', 'f', 'g']) >>> print(rooted.ascii_art()) /-e /--------| /--------| \-f | | | \-g ---------| | /-c | /--------| | | \-d \--------| | /-b \--------| \-a """ outgroup = set(outgroup) if not outgroup < self.subset(): raise TreeError("Outgroup is not a proper subset of taxa in the tree.") # locate the lowest common ancestor (LCA) of outgroup in the tree lca = self.lca(outgroup) # if LCA is root (i.e., outgroup is split across basal clades), root # the tree at a tip within the ingroup and locate LCA again if lca is self: for tip in self.tips(): if tip.name not in outgroup: tree = self.root_at(tip, reset=reset, branch_attrs=branch_attrs) break lca = tree.lca(outgroup) else: tree = self # test if outgroup is monophyletic if lca.count(tips=True) > len(outgroup): raise TreeError("Outgroup is not monophyletic in the tree.") # reroot the tree at LCA return tree.root_at( lca, above=above, reset=reset, branch_attrs=branch_attrs, root_name=root_name, ) def is_tip(self): r"""Return `True` if the current node has no `children`. Returns ------- bool `True` if the node is a tip See Also -------- is_root has_children Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c);"]) >>> print(tree.is_tip()) False >>> print(tree.find('a').is_tip()) True """ return not self.children def is_root(self): r"""Return `True` if the current is a root, i.e. has no `parent`. Returns ------- bool `True` if the node is the root See Also -------- is_tip has_children Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c);"]) >>> print(tree.is_root()) True >>> print(tree.find('a').is_root()) False """ return self.parent is None def has_children(self): r"""Return `True` if the node has `children`. Returns ------- bool `True` if the node has children. See Also -------- is_tip is_root Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c);"]) >>> print(tree.has_children()) True >>> print(tree.find('a').has_children()) False """ return not self.is_tip() def traverse(self, self_before=True, self_after=False, include_self=True): r"""Return iterator over descendants. This is a depth-first traversal. Since the trees are not binary, preorder and postorder traversals are possible, but inorder traversals would depend on the data in the tree and are not handled here. Parameters ---------- self_before : bool includes each node before its descendants if True self_after : bool includes each node after its descendants if True include_self : bool include the initial node if True `self_before` and `self_after` are independent. If neither is `True`, only terminal nodes will be returned. Note that if self is terminal, it will only be included once even if `self_before` and `self_after` are both `True`. Yields ------ TreeNode Traversed node. See Also -------- preorder postorder pre_and_postorder levelorder tips non_tips Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c);"]) >>> for node in tree.traverse(): ... print(node.name) None c a b """ if self_before: if self_after: return self.pre_and_postorder(include_self=include_self) else: return self.preorder(include_self=include_self) else: if self_after: return self.postorder(include_self=include_self) else: return self.tips(include_self=include_self) def preorder(self, include_self=True): r"""Perform preorder iteration over tree. Parameters ---------- include_self : bool include the initial node if True Yields ------ TreeNode Traversed node. See Also -------- traverse postorder pre_and_postorder levelorder tips non_tips Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c);"]) >>> for node in tree.preorder(): ... print(node.name) None c a b """ stack = [self] while stack: curr = stack.pop() if include_self or (curr is not self): yield curr if curr.children: stack.extend(curr.children[::-1]) def postorder(self, include_self=True): r"""Perform postorder iteration over tree. This is somewhat inelegant compared to saving the node and its index on the stack, but is 30% faster in the average case and 3x faster in the worst case (for a comb tree). Parameters ---------- include_self : bool include the initial node if True Yields ------ TreeNode Traversed node. See Also -------- traverse preorder pre_and_postorder levelorder tips non_tips Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c);"]) >>> for node in tree.postorder(): ... print(node.name) a b c None """ child_index_stack = [0] curr = self curr_children = self.children curr_children_len = len(curr_children) while 1: curr_index = child_index_stack[-1] # if there are children left, process them if curr_index < curr_children_len: curr_child = curr_children[curr_index] # if the current child has children, go there if curr_child.children: child_index_stack.append(0) curr = curr_child curr_children = curr.children curr_children_len = len(curr_children) curr_index = 0 # otherwise, yield that child else: yield curr_child child_index_stack[-1] += 1 # if there are no children left, return self, and move to # self's parent else: if include_self or (curr is not self): yield curr if curr is self: break curr = curr.parent curr_children = curr.children curr_children_len = len(curr_children) child_index_stack.pop() child_index_stack[-1] += 1 def pre_and_postorder(self, include_self=True): r"""Perform iteration over tree, visiting node before and after. Parameters ---------- include_self : bool include the initial node if True Yields ------ TreeNode Traversed node. See Also -------- traverse postorder preorder levelorder tips non_tips Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c);"]) >>> for node in tree.pre_and_postorder(): ... print(node.name) None c a b c None """ # handle simple case first if not self.children: if include_self: yield self return child_index_stack = [0] curr = self curr_children = self.children while 1: curr_index = child_index_stack[-1] if not curr_index: if include_self or (curr is not self): yield curr # if there are children left, process them if curr_index < len(curr_children): curr_child = curr_children[curr_index] # if the current child has children, go there if curr_child.children: child_index_stack.append(0) curr = curr_child curr_children = curr.children curr_index = 0 # otherwise, yield that child else: yield curr_child child_index_stack[-1] += 1 # if there are no children left, return self, and move to # self's parent else: if include_self or (curr is not self): yield curr if curr is self: break curr = curr.parent curr_children = curr.children child_index_stack.pop() child_index_stack[-1] += 1 def levelorder(self, include_self=True): r"""Perform levelorder iteration over tree. Parameters ---------- include_self : bool include the initial node if True Yields ------ TreeNode Traversed node. See Also -------- traverse postorder preorder pre_and_postorder tips non_tips Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c,(d,e)f);"]) >>> for node in tree.levelorder(): ... print(node.name) None c f a b d e """ queue = [self] while queue: curr = queue.pop(0) if include_self or (curr is not self): yield curr if curr.children: queue.extend(curr.children) def tips(self, include_self=False): r"""Iterate over tips descended from `self`. Node order is consistent between calls and is ordered by a postorder traversal of the tree. Parameters ---------- include_self : bool include the initial node if True Yields ------ TreeNode Traversed node. See Also -------- traverse postorder preorder pre_and_postorder levelorder non_tips Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c,(d,e)f);"]) >>> for node in tree.tips(): ... print(node.name) a b d e """ for n in self.postorder(include_self=include_self): if n.is_tip(): yield n def non_tips(self, include_self=False): r"""Iterate over nontips descended from self. `include_self`, if `True` (default is False), will return the current node as part of non_tips if it is a non_tip. Node order is consistent between calls and is ordered by a postorder traversal of the tree. Parameters ---------- include_self : bool include the initial node if True Yields ------ TreeNode Traversed node. See Also -------- traverse postorder preorder pre_and_postorder levelorder tips Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c,(d,e)f);"]) >>> for node in tree.non_tips(): ... print(node.name) c f """ for n in self.postorder(include_self): if not n.is_tip(): yield n def invalidate_caches(self, attr=True): r"""Delete lookup and attribute caches. Parameters ---------- attr : bool, optional If ``True``, invalidate attribute caches created by `TreeNode.cache_attr`. See Also -------- create_caches cache_attr find """ if not self.is_root(): self.root().invalidate_caches() else: if hasattr(self, "_tip_cache"): delattr(self, "_tip_cache") if hasattr(self, "_non_tip_cache"): delattr(self, "_non_tip_cache") if hasattr(self, "_registered_caches") and attr: for node in self.traverse(): for cache in self._registered_caches: if hasattr(node, cache): delattr(node, cache) def create_caches(self): r"""Construct an internal lookup table to facilitate searching by name. Raises ------ DuplicateNodeError The tip cache requires that names are unique (with the exception of names that are ``None``). See Also -------- invalidate_caches cache_attr find Notes ----- This method will not cache nodes whose name is ``None``. This method will raise ``DuplicateNodeError`` if a name conflict in the tips is discovered, but will not raise if on internal nodes. This is because, in practice, the tips of a tree are required to be unique while no such requirement holds for internal nodes. """ if not self.is_root(): self.root().create_caches() else: if hasattr(self, "_tip_cache") and hasattr(self, "_non_tip_cache"): return self.invalidate_caches(attr=False) tip_cache = {} non_tip_cache = defaultdict(list) for node in self.postorder(): name = node.name if name is None: continue if node.is_tip(): if name in tip_cache: raise DuplicateNodeError( f"Tip with name '{name}' already exists." ) tip_cache[name] = node else: non_tip_cache[name].append(node) self._tip_cache = tip_cache self._non_tip_cache = non_tip_cache def find_all(self, name): r"""Find all nodes that match `name`. The first call to `find_all` will cache all nodes in the tree on the assumption that additional calls to `find_all` will be made. Parameters ---------- name : TreeNode or str The name or node to find. If `name` is `TreeNode` then all other nodes with the same name will be returned. Raises ------ MissingNodeError Raises if the node to be searched for is not found Returns ------- list of TreeNode The nodes found See Also -------- find find_by_id find_by_func Examples -------- >>> from skbio.tree import TreeNode >>> tree = TreeNode.read(["((a,b)c,(d,e)d,(f,g)c);"]) >>> for node in tree.find_all('c'): ... print(node.name, node.children[0].name, node.children[1].name) c a b c f g >>> for node in tree.find_all('d'): ... print(node.name, str(node)) d (d,e)d; d d; """ root = self.root() # if what is being passed in looks like a node, just return it if isinstance(name, root.__class__): return [name] root.create_caches() tip = root._tip_cache.get(name, None) nodes = root._non_tip_cache.get(name, []) nodes.append(tip) if tip is not None else None if not nodes: raise MissingNodeError(f"Node '{name}' is not in self.") else: return nodes def find(self, name): r"""Find a node by name. Parameters ---------- name : TreeNode or str The name of the node to find. If a ``TreeNode`` object is provided, then it is simply returned. Raises ------ MissingNodeError Raises if the node to be searched for is not found. Returns ------- TreeNode The found node. See Also -------- find_all find_by_id find_by_func Notes ----- The first call to ``find`` will cache all nodes in the tree on the assumption that additional calls to ``find`` will be made. ``find`` will first attempt to find the node in the tips. If it cannot find a corresponding tip, then it will search through the internal nodes of the tree. In practice, phylogenetic trees and other common trees in biology do not have unique internal node names. As a result, this find method will only return the first occurrence of an internal node encountered on a postorder traversal of the tree. Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c,(d,e)f);"]) >>> print(tree.find('c').name) c """ root = self.root() # if what is being passed in looks like a node, just return it if isinstance(name, root.__class__): return name root.create_caches() node = root._tip_cache.get(name, None) if node is None: node = root._non_tip_cache.get(name, [None])[0] if node is None: raise MissingNodeError("Node %s is not in self" % name) else: return node def find_by_id(self, node_id): r"""Find a node by `id`. This search method is based from the root. Parameters ---------- node_id : int The `id` of a node in the tree Returns ------- TreeNode The tree node with the matching id Notes ----- This method does not cache id associations. A full traversal of the tree is performed to find a node by an id on every call. Raises ------ MissingNodeError This method will raise if the `id` cannot be found See Also -------- find find_all find_by_func Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c,(d,e)f);"]) >>> print(tree.find_by_id(2).name) d """ # if this method gets used frequently, then we should cache by ID # as well root = self.root() root.assign_ids() node = None for n in self.traverse(include_self=True): if n.id == node_id: node = n break if node is None: raise MissingNodeError("ID %d is not in self" % node_id) else: return node def find_by_func(self, func): r"""Find all nodes given a function. This search method is based on the current subtree, not the root. Parameters ---------- func : a function A function that accepts a TreeNode and returns `True` or `False`, where `True` indicates the node is to be yielded Yields ------ TreeNode Node found by `func`. See Also -------- find find_all find_by_id Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c,(d,e)f);"]) >>> func = lambda x: x.parent == tree.find('c') >>> [n.name for n in tree.find_by_func(func)] ['a', 'b'] """ for node in self.traverse(include_self=True): if func(node): yield node def ancestors(self): r"""Return all ancestors back to the root. This call will return all nodes in the path back to root, but does not include the node instance that the call was made from. Returns ------- list of TreeNode The path, toward the root, from self Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c,(d,e)f)root;"]) >>> [node.name for node in tree.find('a').ancestors()] ['c', 'root'] """ result = [] curr = self while not curr.is_root(): result.append(curr.parent) curr = curr.parent return result def root(self): r"""Return root of the tree which contains `self`. Returns ------- TreeNode The root of the tree Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c,(d,e)f)root;"]) >>> tip_a = tree.find('a') >>> root = tip_a.root() >>> root == tree True """ curr = self while not curr.is_root(): curr = curr.parent return curr def siblings(self): r"""Return all nodes that are `children` of `self` `parent`. This call excludes `self` from the list. Returns ------- list of TreeNode The list of sibling nodes relative to self See Also -------- neighbors Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c,(d,e,f)g)root;"]) >>> tip_e = tree.find('e') >>> [n.name for n in tip_e.siblings()] ['d', 'f'] """ if self.is_root(): return [] result = self.parent.children[:] result.remove(self) return result def neighbors(self, ignore=None): r"""Return all nodes that are connected to self. This call does not include `self` in the result Parameters ---------- ignore : TreeNode A node to ignore Returns ------- list of TreeNode The list of all nodes that are connected to self Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c,(d,e)f)root;"]) >>> node_c = tree.find('c') >>> [n.name for n in node_c.neighbors()] ['a', 'b', 'root'] """ nodes = [n for n in self.children + [self.parent] if n is not None] if ignore is None: return nodes else: return [n for n in nodes if n is not ignore] def lowest_common_ancestor(self, tipnames): r"""Find lowest common ancestor for a list of tips. Parameters ---------- tipnames : iterable of TreeNode or str The nodes of interest Returns ------- TreeNode The lowest common ancestor of the passed in nodes Raises ------ ValueError If no tips could be found in the tree, or if not all tips were found. Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c,(d,e)f)root;"]) >>> nodes = [tree.find('a'), tree.find('b')] >>> lca = tree.lowest_common_ancestor(nodes) >>> print(lca.name) c >>> nodes = [tree.find('a'), tree.find('e')] >>> lca = tree.lca(nodes) # lca is an alias for convience >>> print(lca.name) root """ if len(tipnames) == 1: return self.find(next(iter(tipnames))) tips = [self.find(name) for name in tipnames] if len(tips) == 0: raise ValueError("No tips found.") nodes_to_scrub = [] for t in tips: if t.is_root(): # has to be the LCA... return t prev = t curr = t.parent while curr and not hasattr(curr, "black"): setattr(curr, "black", [prev]) nodes_to_scrub.append(curr) prev = curr curr = curr.parent # increase black count, multiple children lead to here if curr: curr.black.append(prev) curr = self while len(curr.black) == 1: curr = curr.black[0] # clean up tree for n in nodes_to_scrub: delattr(n, "black") return curr lca = lowest_common_ancestor # for convenience @classonlymethod def from_taxonomy(cls, lineage_map): r"""Construct a tree from a taxonomy. Parameters ---------- lineage_map : dict, iterable of tuples, or pd.DataFrame Mapping of taxon IDs to lineages (iterables of taxonomic units from high to low in ranking). Returns ------- TreeNode The constructed taxonomy. See Also -------- from_taxdump Examples -------- >>> from skbio.tree import TreeNode >>> lineages = [ ... ('1', ['Bacteria', 'Firmicutes', 'Clostridia']), ... ('2', ['Bacteria', 'Firmicutes', 'Bacilli']), ... ('3', ['Bacteria', 'Bacteroidetes', 'Sphingobacteria']), ... ('4', ['Archaea', 'Euryarchaeota', 'Thermoplasmata']), ... ('5', ['Archaea', 'Euryarchaeota', 'Thermoplasmata']), ... ('6', ['Archaea', 'Euryarchaeota', 'Halobacteria']), ... ('7', ['Archaea', 'Euryarchaeota', 'Halobacteria']), ... ('8', ['Bacteria', 'Bacteroidetes', 'Sphingobacteria']), ... ('9', ['Bacteria', 'Bacteroidetes', 'Cytophagia'])] >>> tree = TreeNode.from_taxonomy(lineages) >>> print(tree.ascii_art()) /Clostridia-1 /Firmicutes | \Bacilli- /-2 /Bacteria| | | /-3 | | /Sphingobacteria | \Bacteroidetes \-8 | | ---------| \Cytophagia-9 | | /-4 | /Thermoplasmata | | \-5 \Archaea- /Euryarchaeota | /-6 \Halobacteria \-7 """ root = cls(name=None) root._lookup = {} if isinstance(lineage_map, dict): lineage_map = lineage_map.items() elif isinstance(lineage_map, pd.DataFrame): lineage_map = ((idx, row.tolist()) for idx, row in lineage_map.iterrows()) for id_, lineage in lineage_map: cur_node = root # for each name, see if we've seen it, if not, add that puppy on for name in lineage: if name in cur_node._lookup: cur_node = cur_node._lookup[name] else: new_node = cls(name=name) new_node._lookup = {} cur_node._lookup[name] = new_node cur_node.append(new_node) cur_node = new_node cur_node.append(cls(name=id_)) # scrub the lookups for node in root.non_tips(include_self=True): del node._lookup return root def _balanced_distance_to_tip(self): """Return the distance to tip from this node. The distance to every tip from this node must be equal for this to return a correct result. Returns ------- int The distance to tip of a length-balanced tree """ node = self distance = 0 while node.has_children(): distance += node.children[0].length node = node.children[0] return distance @classonlymethod def from_linkage_matrix(cls, linkage_matrix, id_list): """Return tree from SciPy linkage matrix. Parameters ---------- linkage_matrix : ndarray A SciPy linkage matrix as returned by `scipy.cluster.hierarchy.linkage` id_list : list The indices of the `id_list` will be used in the linkage_matrix Returns ------- TreeNode An unrooted bifurcated tree See Also -------- scipy.cluster.hierarchy.linkage """ tip_width = len(id_list) cluster_count = len(linkage_matrix) lookup_len = cluster_count + tip_width node_lookup = np.empty(lookup_len, dtype=cls) for i, name in enumerate(id_list): node_lookup[i] = cls(name=name) for i in range(tip_width, lookup_len): node_lookup[i] = cls() newest_cluster_index = cluster_count + 1 for link in linkage_matrix: child_a = node_lookup[int(link[0])] child_b = node_lookup[int(link[1])] path_length = link[2] / 2 child_a.length = path_length - child_a._balanced_distance_to_tip() child_b.length = path_length - child_b._balanced_distance_to_tip() new_cluster = node_lookup[newest_cluster_index] new_cluster.append(child_a) new_cluster.append(child_b) newest_cluster_index += 1 return node_lookup[-1] def to_taxonomy(self, allow_empty=False, filter_f=None): """Return a taxonomy representation of self. Parameters ---------- allow_empty : bool, optional Allow gaps the taxonomy (e.g., internal nodes without names). filter_f : function, optional Specify a filtering function that returns True if the lineage is to be returned. This function must accept a ``TreeNode`` as its first parameter, and a ``list`` that represents the lineage as the second parameter. Yields ------ tuple ``(tip, [lineage])`` where ``tip`` corresponds to a tip in the tree and ``[lineage]`` is the expanded names from root to tip. ``None`` and empty strings are omitted from the lineage. Notes ----- If ``allow_empty`` is ``True`` and the root node does not have a name, then that name will not be included. This is because it is common to have multiple domains represented in the taxonomy, which would result in a root node that does not have a name and does not make sense to represent in the output. Examples -------- >>> from skbio.tree import TreeNode >>> lineages = {'1': ['Bacteria', 'Firmicutes', 'Clostridia'], ... '2': ['Bacteria', 'Firmicutes', 'Bacilli'], ... '3': ['Bacteria', 'Bacteroidetes', 'Sphingobacteria'], ... '4': ['Archaea', 'Euryarchaeota', 'Thermoplasmata'], ... '5': ['Archaea', 'Euryarchaeota', 'Thermoplasmata'], ... '6': ['Archaea', 'Euryarchaeota', 'Halobacteria'], ... '7': ['Archaea', 'Euryarchaeota', 'Halobacteria'], ... '8': ['Bacteria', 'Bacteroidetes', 'Sphingobacteria'], ... '9': ['Bacteria', 'Bacteroidetes', 'Cytophagia']} >>> tree = TreeNode.from_taxonomy(lineages.items()) >>> lineages = sorted([(n.name, l) for n, l in tree.to_taxonomy()]) >>> for name, lineage in lineages: ... print(name, '; '.join(lineage)) 1 Bacteria; Firmicutes; Clostridia 2 Bacteria; Firmicutes; Bacilli 3 Bacteria; Bacteroidetes; Sphingobacteria 4 Archaea; Euryarchaeota; Thermoplasmata 5 Archaea; Euryarchaeota; Thermoplasmata 6 Archaea; Euryarchaeota; Halobacteria 7 Archaea; Euryarchaeota; Halobacteria 8 Bacteria; Bacteroidetes; Sphingobacteria 9 Bacteria; Bacteroidetes; Cytophagia """ if filter_f is None: def filter_f(a, b): return True self.assign_ids() seen = set() lineage = [] # visit internal nodes while traversing out to the tips, and on the # way back up for node in self.traverse(self_before=True, self_after=True): if node.is_tip(): if filter_f(node, lineage): yield (node, lineage[:]) else: if allow_empty: if node.is_root() and not node.name: continue else: if not node.name: continue if node.id in seen: lineage.pop(-1) else: lineage.append(node.name) seen.add(node.id) def to_array(self, attrs=None, nan_length_value=None): """Return an array representation of self. Parameters ---------- attrs : list of tuple or None The attributes and types to return. The expected form is [(attribute_name, type)]. If `None`, then `name`, `length`, and `id` are returned. nan_length_value : float, optional If provided, replaces any `nan` in the branch length vector (i.e., ``result['length']``) with this value. `nan` branch lengths can arise from an edge not having a length (common for the root node parent edge), which can making summing problematic. Returns ------- dict of array {id_index: {id: TreeNode}, child_index: ((node_id, left_child_id, right_child_id)), attr_1: array(...), ... attr_N: array(...)} Notes ----- Attribute arrays are in index order such that TreeNode.id can be used as a lookup into the array. Examples -------- >>> from skbio import TreeNode >>> t = TreeNode.read(['(((a:1,b:2,c:3)x:4,(d:5)y:6)z:7);']) >>> res = t.to_array() >>> sorted(res.keys()) ['child_index', 'id', 'id_index', 'length', 'name'] >>> res['child_index'] # doctest: +ELLIPSIS array([[4, 0, 2], [5, 3, 3], [6, 4, 5], [7, 6, 6]]... >>> for k, v in res['id_index'].items(): ... print(k, v) ... 0 a:1.0; 1 b:2.0; 2 c:3.0; 3 d:5.0; 4 (a:1.0,b:2.0,c:3.0)x:4.0; 5 (d:5.0)y:6.0; 6 ((a:1.0,b:2.0,c:3.0)x:4.0,(d:5.0)y:6.0)z:7.0; 7 (((a:1.0,b:2.0,c:3.0)x:4.0,(d:5.0)y:6.0)z:7.0); >>> res['id'] array([0, 1, 2, 3, 4, 5, 6, 7]) >>> res['name'] array(['a', 'b', 'c', 'd', 'x', 'y', 'z', None], dtype=object) """ if attrs is None: attrs = [("name", object), ("length", float), ("id", int)] else: for attr, dtype in attrs: if not hasattr(self, attr): raise AttributeError("Invalid attribute '%s'." % attr) id_index, child_index = self.index_tree() n = self.id + 1 # assign_ids starts at 0 tmp = [np.zeros(n, dtype=dtype) for attr, dtype in attrs] for node in self.traverse(include_self=True): n_id = node.id for idx, (attr, dtype) in enumerate(attrs): tmp[idx][n_id] = getattr(node, attr) results = {"id_index": id_index, "child_index": child_index} results.update({attr: arr for (attr, dtype), arr in zip(attrs, tmp)}) if nan_length_value is not None: length_v = results["length"] length_v[np.isnan(length_v)] = nan_length_value return results def _ascii_art(self, char1="-", show_internal=True, compact=False): LEN = 10 PAD = " " * LEN PA = " " * (LEN - 1) namestr = self._node_label() if self.children: mids = [] result = [] for c in self.children: if c is self.children[0]: char2 = "/" elif c is self.children[-1]: char2 = "\\" else: char2 = "-" (clines, mid) = c._ascii_art(char2, show_internal, compact) mids.append(mid + len(result)) result.extend(clines) if not compact: result.append("") if not compact: result.pop() (lo, hi, end) = (mids[0], mids[-1], len(result)) prefixes = ( [PAD] * (lo + 1) + [PA + "|"] * (hi - lo - 1) + [PAD] * (end - hi) ) mid = int(np.trunc((lo + hi) / 2)) prefixes[mid] = char1 + "-" * (LEN - 2) + prefixes[mid][-1] result = [p + L for (p, L) in zip(prefixes, result)] if show_internal: stem = result[mid] result[mid] = stem[0] + namestr + stem[len(namestr) + 1 :] return (result, mid) else: return ([char1 + "-" + namestr], 0) def ascii_art(self, show_internal=True, compact=False): r"""Return a string containing an ascii drawing of the tree. Note, this method calls a private recursive function and is not safe for large trees. Parameters ---------- show_internal : bool includes internal edge names compact : bool use exactly one line per tip Returns ------- str an ASCII formatted version of the tree Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b)c,(d,e)f)root;"]) >>> print(tree.ascii_art()) /-a /c-------| | \-b -root----| | /-d \f-------| \-e """ (lines, mid) = self._ascii_art(show_internal=show_internal, compact=compact) return "\n".join(lines) def accumulate_to_ancestor(self, ancestor): r"""Return the sum of the distance between self and ancestor. Parameters ---------- ancestor : TreeNode The node of the ancestor to accumulate distance too Returns ------- float The sum of lengths between self and ancestor Raises ------ NoParentError A NoParentError is raised if the ancestor is not an ancestor of self NoLengthError A NoLengthError is raised if one of the nodes between self and ancestor (including self) lacks a `length` attribute See Also -------- distance Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a:1,b:2)c:3,(d:4,e:5)f:6)root;"]) >>> root = tree >>> tree.find('a').accumulate_to_ancestor(root) 4.0 """ accum = 0.0 curr = self while curr is not ancestor: if curr.is_root(): raise NoParentError("Provided ancestor is not in the path") if curr.length is None: raise NoLengthError( "No length on node %s found." % curr.name or "unnamed" ) accum += curr.length curr = curr.parent return accum def distance(self, other): """Return the distance between self and other. This method can be used to compute the distances between two tips, however, it is not optimized for computing pairwise tip distances. Parameters ---------- other : TreeNode The node to compute a distance to Returns ------- float The distance between two nodes Raises ------ NoLengthError A NoLengthError will be raised if a node without `length` is encountered See Also -------- tip_tip_distances accumulate_to_ancestor compare_tip_distances get_max_distance Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a:1,b:2)c:3,(d:4,e:5)f:6)root;"]) >>> tip_a = tree.find('a') >>> tip_d = tree.find('d') >>> tip_a.distance(tip_d) 14.0 """ if self is other: return 0.0 self_ancestors = [self] + list(self.ancestors()) other_ancestors = [other] + list(other.ancestors()) if self in other_ancestors: return other.accumulate_to_ancestor(self) elif other in self_ancestors: return self.accumulate_to_ancestor(other) else: root = self.root() lca = root.lowest_common_ancestor([self, other]) accum = self.accumulate_to_ancestor(lca) accum += other.accumulate_to_ancestor(lca) return accum def _set_max_distance(self): """Propagate tip distance information up the tree. This method was originally implemented by Julia Goodrich with the intent of being able to determine max tip to tip distances between nodes on large trees efficiently. The code has been modified to track the specific tips the distance is between """ maxkey = itemgetter(0) for n in self.postorder(): if n.is_tip(): n.MaxDistTips = ((0.0, n), (0.0, n)) else: if len(n.children) == 1: raise TreeError("No support for single descedent nodes") else: tip_info = [(max(c.MaxDistTips, key=maxkey), c) for c in n.children] dists = [i[0][0] for i in tip_info] best_idx = np.argsort(dists)[-2:] (tip_a_d, tip_a), child_a = tip_info[best_idx[0]] (tip_b_d, tip_b), child_b = tip_info[best_idx[1]] tip_a_d += child_a.length or 0.0 tip_b_d += child_b.length or 0.0 n.MaxDistTips = ((tip_a_d, tip_a), (tip_b_d, tip_b)) def _get_max_distance_singledesc(self): """Return the max distance between any pair of tips. Also returns the tip names that it is between as a tuple """ distmtx = self.tip_tip_distances() idx_max = divmod(distmtx.data.argmax(), distmtx.shape[1]) max_pair = (distmtx.ids[idx_max[0]], distmtx.ids[idx_max[1]]) return distmtx[idx_max], max_pair def get_max_distance(self): """Return the max tip tip distance between any pair of tips. Returns ------- float The distance between the two most distant tips in the tree tuple of TreeNode The two most distant tips in the tree Raises ------ NoLengthError A NoLengthError will be thrown if a node without length is encountered See Also -------- distance tip_tip_distances compare_tip_distances Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a:1,b:2)c:3,(d:4,e:5)f:6)root;"]) >>> dist, tips = tree.get_max_distance() >>> dist 16.0 >>> [n.name for n in tips] ['b', 'e'] """ # _set_max_distance will throw a TreeError if a node with a single # child is encountered try: self._set_max_distance() except TreeError: # return self._get_max_distance_singledesc() longest = 0.0 tips = [None, None] for n in self.non_tips(include_self=True): tip_a, tip_b = n.MaxDistTips dist = tip_a[0] + tip_b[0] if dist > longest: longest = dist tips = [tip_a[1], tip_b[1]] # The MaxDistTips attribute causes problems during deep copy because it # contains references to other nodes. This patch removes the attribute. for n in self.traverse(): del n.MaxDistTips return longest, tips def tip_tip_distances(self, endpoints=None): """Return distance matrix between pairs of tips, and a tip order. By default, all pairwise distances are calculated in the tree. If `endpoints` are specified, then only the distances between those tips are computed. Parameters ---------- endpoints : list of TreeNode or str, or None A list of TreeNode objects or names of TreeNode objects Returns ------- DistanceMatrix The distance matrix Raises ------ ValueError If any of the specified `endpoints` are not tips See Also -------- distance compare_tip_distances Notes ----- If a node does not have an associated length, 0.0 will be used and a ``RepresentationWarning`` will be raised. Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a:1,b:2)c:3,(d:4,e:5)f:6)root;"]) >>> mat = tree.tip_tip_distances() >>> print(mat) 4x4 distance matrix IDs: 'a', 'b', 'd', 'e' Data: [[ 0. 3. 14. 15.] [ 3. 0. 15. 16.] [ 14. 15. 0. 9.] [ 15. 16. 9. 0.]] """ all_tips = list(self.tips()) if endpoints is None: tip_order = all_tips else: tip_order = [self.find(n) for n in endpoints] for n in tip_order: if not n.is_tip(): raise ValueError("Node with name '%s' is not a tip." % n.name) # linearize all tips in postorder # .__start, .__stop compose the slice in tip_order. for i, node in enumerate(all_tips): node.__start, node.__stop = i, i + 1 # the result map provides index in the result matrix result_map = {n.__start: i for i, n in enumerate(tip_order)} num_all_tips = len(all_tips) # total number of tips num_tips = len(tip_order) # total number of tips in result result = np.zeros((num_tips, num_tips), float) # tip by tip matrix distances = np.zeros((num_all_tips), float) # dist from tip to tip def update_result(): # set tip_tip distance between tips of different child for child1, child2 in combinations(node.children, 2): for tip1 in range(child1.__start, child1.__stop): if tip1 not in result_map: continue t1idx = result_map[tip1] for tip2 in range(child2.__start, child2.__stop): if tip2 not in result_map: continue t2idx = result_map[tip2] result[t1idx, t2idx] = distances[tip1] + distances[tip2] for node in self.postorder(): if not node.children: continue # subtree with solved child wedges # can possibly use np.zeros starts, stops = [], [] # to calc ._start and ._stop for curr node for child in node.children: length = child.length if length is None: warn( "`TreeNode.tip_tip_distances`: Node with name %r does " "not have an associated length, so a length of 0.0 " "will be used." % child.name, RepresentationWarning, ) length = 0.0 distances[child.__start : child.__stop] += length starts.append(child.__start) stops.append(child.__stop) node.__start, node.__stop = min(starts), max(stops) if len(node.children) > 1: update_result() return DistanceMatrix(result + result.T, [n.name for n in tip_order]) def compare_rfd(self, other, proportion=False): """Calculate the Robinson and Foulds symmetric difference. Parameters ---------- other : TreeNode A tree to compare against proportion : bool Return a proportional difference Returns ------- float The distance between the trees Notes ----- Implementation based off of code by Julia Goodrich. The original description of the algorithm can be found in [1]_. Raises ------ ValueError If the tip names between `self` and `other` are equal. See Also -------- compare_subsets compare_tip_distances References ---------- .. [1] Comparison of phylogenetic trees. Robinson and Foulds. Mathematical Biosciences. 1981. 53:131-141 Examples -------- >>> from skbio import TreeNode >>> tree1 = TreeNode.read(["((a,b),(c,d));"]) >>> tree2 = TreeNode.read(["(((a,b),c),d);"]) >>> tree1.compare_rfd(tree2) 2.0 """ t1names = {n.name for n in self.tips()} t2names = {n.name for n in other.tips()} if t1names != t2names: if t1names < t2names: tree1 = self tree2 = other.shear(t1names) else: tree1 = self.shear(t2names) tree2 = other else: tree1 = self tree2 = other tree1_sets = tree1.subsets() tree2_sets = tree2.subsets() not_in_both = tree1_sets.symmetric_difference(tree2_sets) dist = float(len(not_in_both)) if proportion: total_subsets = len(tree1_sets) + len(tree2_sets) dist /= total_subsets return dist def compare_subsets(self, other, exclude_absent_taxa=False): """Return fraction of overlapping subsets where self and other differ. Names present in only one of the two trees will count as mismatches, if you don't want this behavior, strip out the non-matching tips first. Parameters ---------- other : TreeNode The tree to compare exclude_absent_taxa : bool Strip out names that don't occur in both trees Returns ------- float The fraction of overlapping subsets that differ between the trees See Also -------- compare_rfd compare_tip_distances subsets Examples -------- >>> from skbio import TreeNode >>> tree1 = TreeNode.read(["((a,b),(c,d));"]) >>> tree2 = TreeNode.read(["(((a,b),c),d);"]) >>> tree1.compare_subsets(tree2) 0.5 """ self_sets, other_sets = self.subsets(), other.subsets() if exclude_absent_taxa: in_both = self.subset() & other.subset() self_sets = (i & in_both for i in self_sets) self_sets = frozenset({i for i in self_sets if len(i) > 1}) other_sets = (i & in_both for i in other_sets) other_sets = frozenset({i for i in other_sets if len(i) > 1}) total_subsets = len(self_sets) + len(other_sets) intersection_length = len(self_sets & other_sets) if not total_subsets: # no common subsets after filtering, so max dist return 1 return 1 - (2 * intersection_length / float(total_subsets)) def compare_tip_distances( self, other, sample=None, dist_f=distance_from_r, shuffle_f=np.random.shuffle ): """Compare self to other using tip-to-tip distance matrices. Value returned is `dist_f(m1, m2)` for the two matrices. Default is to use the Pearson correlation coefficient, with +1 giving a distance of 0 and -1 giving a distance of +1 (the maximum possible value). Depending on the application, you might instead want to use distance_from_r_squared, which counts correlations of both +1 and -1 as identical (0 distance). Note: automatically strips out the names that don't match (this is necessary for this method because the distance between non-matching names and matching names is undefined in the tree where they don't match, and because we need to reorder the names in the two trees to match up the distance matrices). Parameters ---------- other : TreeNode The tree to compare sample : int or None Randomly subsample the tips in common between the trees to compare. This is useful when comparing very large trees. dist_f : function The distance function used to compare two the tip-tip distance matrices shuffle_f : function The shuffling function used if `sample` is not None Returns ------- float The distance between the trees Raises ------ ValueError A ValueError is raised if there does not exist common tips between the trees See Also -------- compare_subsets compare_rfd Examples -------- >>> from skbio import TreeNode >>> # note, only three common taxa between the trees >>> tree1 = TreeNode.read(["((a:1,b:1):2,(c:0.5,X:0.7):3);"]) >>> tree2 = TreeNode.read(["(((a:1,b:1,Y:1):2,c:3):1,Z:4);"]) >>> dist = tree1.compare_tip_distances(tree2) >>> print("%.9f" % dist) 0.000133446 """ self_names = {i.name: i for i in self.tips()} other_names = {i.name: i for i in other.tips()} common_names = frozenset(self_names) & frozenset(other_names) common_names = list(common_names) if not common_names: raise ValueError("No tip names in common between the two trees.") if len(common_names) <= 2: return 1 # the two trees must match by definition in this case if sample is not None: shuffle_f(common_names) common_names = common_names[:sample] self_nodes = [self_names[k] for k in common_names] other_nodes = [other_names[k] for k in common_names] self_matrix = self.tip_tip_distances(endpoints=self_nodes) other_matrix = other.tip_tip_distances(endpoints=other_nodes) return dist_f(self_matrix, other_matrix) def bifurcate(self, insert_length=None): r"""Reorder the tree into a bifurcating tree. All nodes that have more than two children will have additional intermediate nodes inserted to ensure that every node has only two children. Parameters ---------- insert_length : int, optional The branch length assigned to all inserted nodes. See Also -------- prune Notes ----- Any nodes that have a single child can be collapsed using the prune method to create strictly bifurcating trees. Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b,g,h)c,(d,e)f)root;"]) >>> print(tree.ascii_art()) /-a | |--b /c-------| | |--g | | -root----| \-h | | /-d \f-------| \-e >>> tree.bifurcate() >>> print(tree.ascii_art()) /-h /c-------| | | /-g | \--------| | | /-a -root----| \--------| | \-b | | /-d \f-------| \-e """ for n in self.traverse(include_self=True): if len(n.children) > 2: stack = n.children while len(stack) > 2: ind = stack.pop() intermediate = self.__class__() intermediate.length = insert_length intermediate.extend(stack) n.append(intermediate) for k in stack: n.remove(k) n.extend([ind, intermediate]) def index_tree(self): """Index a tree for rapid lookups within a tree array. Indexes nodes in-place as `n._leaf_index`. Returns ------- dict A mapping {node_id: TreeNode} np.array of ints This arrays describes the IDs of every internal node, and the ID range of the immediate descendents. The first column in the array corresponds to node_id. The second column is the left most descendent's ID. The third column is the right most descendent's ID. """ self.assign_ids() id_index = {} child_index = [] for n in self.postorder(): for c in n.children: id_index[c.id] = c if c: # c has children itself, so need to add to result child_index.append((c.id, c.children[0].id, c.children[-1].id)) # handle root, which should be t itself id_index[self.id] = self # only want to add to the child_index if self has children... if self.children: child_index.append((self.id, self.children[0].id, self.children[-1].id)) child_index = np.asarray(child_index, dtype=np.int64) child_index = np.atleast_2d(child_index) return id_index, child_index def assign_ids(self): """Assign topologically stable unique ids to self. Following the call, all nodes in the tree will have their id attribute set. """ curr_index = 0 for n in self.postorder(): for c in n.children: c.id = curr_index curr_index += 1 self.id = curr_index def descending_branch_length(self, tip_subset=None): """Find total descending branch length from self or subset of self tips. Parameters ---------- tip_subset : Iterable, or None If None, the total descending branch length for all tips in the tree will be returned. If a list of tips is provided then only the total descending branch length associated with those tips will be returned. Returns ------- float The total descending branch length for the specified set of tips. Raises ------ ValueError A ValueError is raised if the list of tips supplied to tip_subset contains internal nodes or non-tips. Notes ----- This function replicates cogent's totalDescendingBranch Length method and extends that method to allow the calculation of total descending branch length of a subset of the tips if requested. The postorder guarantees that the function will always be able to add the descending branch length if the node is not a tip. Nodes with no length will have their length set to 0. The root length (if it exists) is ignored. Examples -------- >>> from skbio import TreeNode >>> tr = TreeNode.read(["(((A:.1,B:1.2)C:.6,(D:.9,E:.6)F:.9)G:2.4," ... "(H:.4,I:.5)J:1.3)K;"]) >>> tdbl = tr.descending_branch_length() >>> sdbl = tr.descending_branch_length(['A','E']) >>> print(round(tdbl, 1), round(sdbl, 1)) 8.9 2.2 """ self.assign_ids() if tip_subset is not None: all_tips = self.subset() if not set(tip_subset).issubset(all_tips): raise ValueError("tip_subset contains ids that aren't tip " "names.") lca = self.lowest_common_ancestor(tip_subset) ancestors = {} for tip in tip_subset: curr = self.find(tip) while curr is not lca: ancestors[curr.id] = curr.length if curr.length is not None else 0.0 curr = curr.parent return sum(ancestors.values()) else: return sum( n.length for n in self.postorder(include_self=False) if n.length is not None ) def cache_attr(self, func, cache_attrname, cache_type=list): """Cache attributes on internal nodes of the tree. Parameters ---------- func : function func will be provided the node currently being evaluated and must return a list of item (or items) to cache from that node or an empty list. cache_attrname : str Name of the attribute to decorate on containing the cached values cache_type : {set, frozenset, list} The type of the cache Notes ----- This method is particularly useful if you need to frequently look up attributes that would normally require a traversal of the tree. WARNING: any cache created by this method will be invalidated if the topology of the tree changes (e.g., if `TreeNode.invalidate_caches` is called). Raises ------ TypeError If an cache_type that is not a `set` or a `list` is specified. Examples -------- Cache the tip names of the tree on its internal nodes >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b,(c,d)e)f,(g,h)i)root;"]) >>> f = lambda n: [n.name] if n.is_tip() else [] >>> tree.cache_attr(f, 'tip_names') >>> for n in tree.traverse(include_self=True): ... print("Node name: %s, cache: %r" % (n.name, n.tip_names)) Node name: root, cache: ['a', 'b', 'c', 'd', 'g', 'h'] Node name: f, cache: ['a', 'b', 'c', 'd'] Node name: a, cache: ['a'] Node name: b, cache: ['b'] Node name: e, cache: ['c', 'd'] Node name: c, cache: ['c'] Node name: d, cache: ['d'] Node name: i, cache: ['g', 'h'] Node name: g, cache: ['g'] Node name: h, cache: ['h'] """ if cache_type in (set, frozenset): def reduce_f(a, b): return a | b elif cache_type is list: def reduce_f(a, b): return a + b else: raise TypeError("Only list, set and frozenset are supported.") for node in self.postorder(include_self=True): if not hasattr(node, "_registered_caches"): node._registered_caches = set() node._registered_caches.add(cache_attrname) cached = [getattr(c, cache_attrname) for c in node.children] cached.append(cache_type(func(node))) setattr(node, cache_attrname, reduce(reduce_f, cached)) def shuffle(self, k=None, names=None, shuffle_f=np.random.shuffle, n=1): """Yield trees with shuffled tip names. Parameters ---------- k : int, optional The number of tips to shuffle. If k is not `None`, k tips are randomly selected, and only those names will be shuffled. names : list, optional The specific tip names to shuffle. k and names cannot be specified at the same time. shuffle_f : func Shuffle method, this function must accept a list and modify inplace. n : int, optional The number of iterations to perform. Value must be > 0 and `np.inf` can be specified for an infinite number of iterations. Notes ----- Tip names are shuffled inplace. If neither `k` nor `names` are provided, all tips are shuffled. Yields ------ TreeNode Tree with shuffled tip names. Raises ------ ValueError If `k` is < 2 If `n` is < 1 ValueError If both `k` and `names` are specified MissingNodeError If `names` is specified but one of the names cannot be found Examples -------- Alternate the names on two of the tips, 'a', and 'b', and do this 5 times. >>> from skbio import TreeNode >>> tree = TreeNode.read(["((a,b),(c,d));"]) >>> rev = lambda items: items.reverse() >>> shuffler = tree.shuffle(names=['a', 'b'], shuffle_f=rev, n=5) >>> for shuffled_tree in shuffler: ... print(shuffled_tree) ((b,a),(c,d)); ((a,b),(c,d)); ((b,a),(c,d)); ((a,b),(c,d)); ((b,a),(c,d)); """ if k is not None and k < 2: raise ValueError("k must be None or >= 2") if k is not None and names is not None: raise ValueError("n and names cannot be specified at the sametime") if n < 1: raise ValueError("n must be > 0") self.assign_ids() if names is None: all_tips = list(self.tips()) if n is None: n = len(all_tips) shuffle_f(all_tips) names = [tip.name for tip in all_tips[:k]] nodes = [self.find(name) for name in names] # Since the names are being shuffled, the association between ID and # name is no longer reliable self.invalidate_caches() counter = 0 while counter < n: shuffle_f(names) for node, name in zip(nodes, names): node.name = name yield self counter += 1 def _extract_support(self): """Extract the support value from a node label, if available. Returns ------- tuple of int, float or None The support value extracted from the node label str or None The node label with the support value stripped """ support, label = None, None if self.name: # separate support value from node name by the first colon left, _, right = self.name.partition(":") try: support = int(left) except ValueError: try: support = float(left) except ValueError: pass # strip support value from node name label = right or None if support is not None else self.name return support, label def _node_label(self): """Generate a node label. The label will be in the format of "support:name" if both exist, or "support" or "name" if either exists. Returns ------- str Generated node label """ lblst = [] if self.support is not None: # prevents support of NoneType lblst.append(str(self.support)) if self.name: # prevents name of NoneType lblst.append(self.name) return ":".join(lblst) def assign_supports(self): """Extract support values from internal node labels of a tree. Notes ----- A "support value" measures the confidence or frequency of the incoming branch (the branch from parent to self) of an internal node in a tree. Roots and tips do not have support values. To extract a support value from a node label, this method reads from left and stops at the first ":" (if any), and attempts to convert it to a number. For examples: "(a,b)1.0", "(a,b)1.0:2.5", and "(a,b)'1.0:species_A'". In these cases the support values are all 1.0. For examples: "(a,b):1.0" and "(a,b)species_A". In these cases there are no support values. If a support value is successfully extracted, it will be stripped from the node label and assigned to the `support` property. IMPORTANT: mathematically, "support value" is a property of a branch, not a node, although they are usually attached to nodes in tree file formats [1]_. References ---------- .. [1] Czech, Lucas, Jaime Huerta-Cepas, and Alexandros Stamatakis. "A Critical Review on the Use of Support Values in Tree Viewers and Bioinformatics Toolkits." Molecular biology and evolution 34.6 (2017): 1535-1542. Examples -------- >>> from skbio import TreeNode >>> newick = "((a,b)95,(c,d):1.1,(e,f)'80:speciesA':1.0);" >>> tree = TreeNode.read([newick]) >>> tree.assign_supports() >>> tree.lca(['a', 'b']).support 95 >>> tree.lca(['c', 'd']).support is None True >>> tree.lca(['e', 'f']).support 80 >>> tree.lca(['e', 'f']).name 'speciesA' """ for node in self.traverse(): if node.is_root() or node.is_tip(): node.support = None else: node.support, node.name = node._extract_support() def unpack(self): """Unpack an internal node in place. Notes ----- This method sequentially: 1) elongates child nodes by branch length of self (omit if there is no branch length), 2) removes self from parent node, and 3) grafts child nodes to parent node. Raises ------ ValueError If input node is root or tip. See Also -------- unpack_by_func prune Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(['((c:2.0,d:3.0)a:1.0,(e:2.0,f:1.0)b:2.0);']) >>> tree.find('b').unpack() >>> print(tree) ((c:2.0,d:3.0)a:1.0,e:4.0,f:3.0); """ if self.is_root(): raise TreeError("Cannot unpack root.") if self.is_tip(): raise TreeError("Cannot unpack tip.") parent = self.parent blen = self.length or 0.0 for child in self.children: clen = child.length or 0.0 child.length = clen + blen or None parent.remove(self) parent.extend(self.children) def unpack_by_func(self, func): """Unpack internal nodes of a tree that meet certain criteria. Parameters ---------- func : function a function that accepts a TreeNode and returns `True` or `False`, where `True` indicates the node is to be unpacked See Also -------- unpack prune Examples -------- >>> from skbio import TreeNode >>> tree = TreeNode.read(['((c:2,d:3)a:1,(e:1,f:2)b:2);']) >>> tree.unpack_by_func(lambda x: x.length <= 1) >>> print(tree) ((e:1.0,f:2.0)b:2.0,c:3.0,d:4.0); >>> tree = TreeNode.read(['(((a,b)85,(c,d)78)75,(e,(f,g)64)80);']) >>> tree.assign_supports() >>> tree.unpack_by_func(lambda x: x.support < 75) >>> print(tree) (((a,b)85,(c,d)78)75,(e,f,g)80); """ nodes_to_unpack = [] for node in self.non_tips(include_self=False): if func(node): nodes_to_unpack.append(node) for node in nodes_to_unpack: node.unpack() @classonlymethod def from_taxdump(cls, nodes, names=None): r"""Construct a tree from the NCBI taxonomy database. Parameters ---------- nodes : pd.DataFrame Taxon hierarchy names : pd.DataFrame or dict, optional Taxon names Returns ------- TreeNode The constructed tree Notes ----- ``nodes`` and ``names`` correspond to "nodes.dmp" and "names.dmp" of the NCBI taxonomy database. The should be read into data frames using ``skbio.io.read`` prior to this operation. Alternatively, ``names`` may be provided as a dictionary. If ``names`` is omitted, taxonomy IDs be used as taxon names. Raises ------ ValueError If there is no top-level node ValueError If there are more than one top-level node See Also -------- from_taxonomy skbio.io.format.taxdump Examples -------- >>> import pandas as pd >>> from skbio.tree import TreeNode >>> nodes = pd.DataFrame([ ... [1, 1, 'no rank'], ... [2, 1, 'domain'], ... [3, 1, 'domain'], ... [4, 2, 'phylum'], ... [5, 2, 'phylum']], columns=[ ... 'tax_id', 'parent_tax_id', 'rank']).set_index('tax_id') >>> names = {1: 'root', 2: 'Bacteria', 3: 'Archaea', ... 4: 'Firmicutes', 5: 'Bacteroidetes'} >>> tree = TreeNode.from_taxdump(nodes, names) >>> print(tree.ascii_art()) /-Firmicutes /Bacteria| -root----| \-Bacteroidetes | \-Archaea """ # identify top level of hierarchy tops = nodes[nodes["parent_tax_id"] == nodes.index] # validate root uniqueness n_top = tops.shape[0] if n_top == 0: raise ValueError("There is no top-level node.") elif n_top > 1: raise ValueError("There are more than one top-level node.") # get root taxid root_id = tops.index[0] # get parent-to-child(ren) map to_children = { p: g.index.tolist() for p, g in nodes[nodes.index != root_id].groupby("parent_tax_id") } # get rank map ranks = nodes["rank"].to_dict() # get taxon-to-name map # if not provided, use tax_id as name if names is None: names = {x: str(x) for x in nodes.index} # use "scientific name" as name elif isinstance(names, pd.DataFrame): names = names[names["name_class"] == "scientific name"][ "name_txt" ].to_dict() # initiate tree tree = cls(names[root_id]) tree.id = root_id tree.rank = ranks[root_id] # helper for extending tree def _extend_tree(node): self_id = node.id if self_id not in to_children: return children = [] for id_ in to_children[self_id]: child = TreeNode(names[id_]) child.id = id_ child.rank = ranks[id_] _extend_tree(child) children.append(child) node.extend(children) # extend tree _extend_tree(tree) return tree scikit-bio-0.6.2/skbio/tree/tests/000077500000000000000000000000001464262511300167705ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/tree/tests/__init__.py000066400000000000000000000005411464262511300211010ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- scikit-bio-0.6.2/skbio/tree/tests/test_majority_rule.py000066400000000000000000000172531464262511300232760ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io from unittest import TestCase, main import numpy as np from skbio import TreeNode from skbio.tree import majority_rule from skbio.tree._majority_rule import (_walk_clades, _filter_clades, _build_trees) class MajorityRuleTests(TestCase): def test_majority_rule(self): trees = [ TreeNode.read( io.StringIO("(A,(B,(H,(D,(J,(((G,E),(F,I)),C))))));")), TreeNode.read( io.StringIO("(A,(B,(D,((J,H),(((G,E),(F,I)),C)))));")), TreeNode.read( io.StringIO("(A,(B,(D,(H,(J,(((G,E),(F,I)),C))))));")), TreeNode.read( io.StringIO("(A,(B,(E,(G,((F,I),((J,(H,D)),C))))));")), TreeNode.read( io.StringIO("(A,(B,(E,(G,((F,I),(((J,H),D),C))))));")), TreeNode.read( io.StringIO("(A,(B,(E,((F,I),(G,((J,(H,D)),C))))));")), TreeNode.read( io.StringIO("(A,(B,(E,((F,I),(G,(((J,H),D),C))))));")), TreeNode.read( io.StringIO("(A,(B,(E,((G,(F,I)),((J,(H,D)),C)))));")), TreeNode.read( io.StringIO("(A,(B,(E,((G,(F,I)),(((J,H),D),C)))));"))] exp = TreeNode.read(io.StringIO("(((E,(G,(F,I),(C,(D,J,H)))),B),A);")) obs = majority_rule(trees) self.assertEqual(exp.compare_subsets(obs[0]), 0.0) self.assertEqual(len(obs), 1) tree = obs[0] exp_supports = sorted([9.0, 9.0, 9.0, 6.0, 6.0, 6.0]) obs_supports = sorted([n.support for n in tree.non_tips()]) self.assertEqual(obs_supports, exp_supports) obs = majority_rule(trees, weights=np.ones(len(trees)) * 2) self.assertEqual(exp.compare_subsets(obs[0]), 0.0) self.assertEqual(len(obs), 1) tree = obs[0] exp_supports = sorted([18.0, 18.0, 12.0, 18.0, 12.0, 12.0]) obs_supports = sorted([n.support for n in tree.non_tips()]) with self.assertRaises(ValueError): majority_rule(trees, weights=[1, 2]) def test_majority_rule_multiple_trees(self): trees = [ TreeNode.read(io.StringIO("((a,b),(c,d),(e,f));")), TreeNode.read(io.StringIO("(a,(c,d),b,(e,f));")), TreeNode.read(io.StringIO("((c,d),(e,f),b);")), TreeNode.read(io.StringIO("(a,(c,d),(e,f));"))] trees = majority_rule(trees) self.assertEqual(len(trees), 4) for tree in trees: self.assertIs(type(tree), TreeNode) exp = set([ frozenset(['a']), frozenset(['b']), frozenset([None, 'c', 'd']), frozenset([None, 'e', 'f'])]) obs = set([frozenset([n.name for n in t.traverse()]) for t in trees]) self.assertEqual(obs, exp) def test_majority_rule_tree_node_class(self): class TreeNodeSubclass(TreeNode): pass trees = [ TreeNode.read(io.StringIO("((a,b),(c,d),(e,f));")), TreeNode.read(io.StringIO("(a,(c,d),b,(e,f));")), TreeNode.read(io.StringIO("((c,d),(e,f),b);")), TreeNode.read(io.StringIO("(a,(c,d),(e,f));"))] trees = majority_rule(trees, tree_node_class=TreeNodeSubclass) self.assertEqual(len(trees), 4) for tree in trees: self.assertIs(type(tree), TreeNodeSubclass) exp = set([ frozenset(['a']), frozenset(['b']), frozenset([None, 'c', 'd']), frozenset([None, 'e', 'f'])]) obs = set([frozenset([n.name for n in t.traverse()]) for t in trees]) self.assertEqual(obs, exp) def test_walk_clades(self): trees = [TreeNode.read(io.StringIO("((A,B),(D,E));")), TreeNode.read(io.StringIO("((A,B),(D,(E,X)));"))] exp_clades = [ (frozenset(['A']), 2.0), (frozenset(['B']), 2.0), (frozenset(['A', 'B']), 2.0), (frozenset(['D', 'E']), 1.0), (frozenset(['D', 'E', 'A', 'B']), 1.0), (frozenset(['D']), 2.0), (frozenset(['E']), 2.0), (frozenset(['X']), 1.0), (frozenset(['E', 'X']), 1.0), (frozenset(['D', 'E', 'X']), 1.0), (frozenset(['A', 'B', 'D', 'E', 'X']), 1.0)] exp_lengths_nolength = { frozenset(['A']): None, frozenset(['B']): None, frozenset(['A', 'B']): None, frozenset(['D', 'E']): None, frozenset(['D', 'E', 'A', 'B']): None, frozenset(['D']): None, frozenset(['E']): None, frozenset(['X']): None, frozenset(['E', 'X']): None, frozenset(['D', 'E', 'X']): None, frozenset(['A', 'B', 'D', 'E', 'X']): None} exp_lengths = { frozenset(['A']): 2.0, frozenset(['B']): 2.0, frozenset(['A', 'B']): 2.0, frozenset(['D', 'E']): 1.0, frozenset(['D', 'E', 'A', 'B']): 1.0, frozenset(['D']): 2.0, frozenset(['E']): 2.0, frozenset(['X']): 1.0, frozenset(['E', 'X']): 1.0, frozenset(['D', 'E', 'X']): 1.0, frozenset(['A', 'B', 'D', 'E', 'X']): 1.0} obs_clades, obs_lengths = _walk_clades(trees, np.ones(len(trees))) self.assertEqual(set(obs_clades), set(exp_clades)) self.assertEqual(obs_lengths, exp_lengths_nolength) for t in trees: for n in t.traverse(include_self=True): n.length = 2.0 obs_clades, obs_lengths = _walk_clades(trees, np.ones(len(trees))) self.assertEqual(set(obs_clades), set(exp_clades)) self.assertEqual(obs_lengths, exp_lengths) def test_filter_clades(self): clade_counts = [(frozenset(['A', 'B']), 8), (frozenset(['A', 'C']), 7), (frozenset(['A']), 6), (frozenset(['B']), 5)] obs = _filter_clades(clade_counts, 2) exp = {frozenset(['A', 'B']): 8, frozenset(['A']): 6, frozenset(['B']): 5} self.assertEqual(obs, exp) clade_counts = [(frozenset(['A']), 8), (frozenset(['B']), 7), (frozenset(['C']), 7), (frozenset(['A', 'B']), 6), (frozenset(['A', 'B', 'C']), 5), (frozenset(['D']), 2)] obs = _filter_clades(clade_counts, 4) exp = {frozenset(['A']): 8, frozenset(['B']): 7, frozenset(['C']): 7, frozenset(['A', 'B']): 6, frozenset(['A', 'B', 'C']): 5} self.assertEqual(obs, exp) def test_build_trees(self): clade_counts = {frozenset(['A', 'B']): 6, frozenset(['A']): 7, frozenset(['B']): 8} edge_lengths = {frozenset(['A', 'B']): 1, frozenset(['A']): 2, frozenset(['B']): 3} tree = _build_trees(clade_counts, edge_lengths, 'foo', TreeNode)[0] self.assertEqual(tree.foo, 6) tree_foos = set([c.foo for c in tree.children]) tree_lens = set([c.length for c in tree.children]) self.assertEqual(tree_foos, set([7, 8])) self.assertEqual(tree_lens, set([2, 3])) if __name__ == '__main__': main() scikit-bio-0.6.2/skbio/tree/tests/test_nj.py000066400000000000000000000444511464262511300210200ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io from unittest import TestCase, main from skbio import DistanceMatrix, TreeNode, nj from skbio.tree._nj import ( _compute_q, _compute_collapsed_dm, _lowest_index, _pair_members_to_new_node, nni, _perform_swap, _average_distance, _tip_or_root, _average_distance_upper, _subtree_count, _swap_length, _swap_heap, _average_subtree_distance, _average_distance_matrix, _edge_estimation) class NjTests(TestCase): def setUp(self): data1 = [[0, 5, 9, 9, 8], [5, 0, 10, 10, 9], [9, 10, 0, 8, 7], [9, 10, 8, 0, 3], [8, 9, 7, 3, 0]] ids1 = list('abcde') self.dm1 = DistanceMatrix(data1, ids1) # this newick string was confirmed against http://www.trex.uqam.ca/ # which generated the following (isomorphic) newick string: # (d:2.0000,e:1.0000,(c:4.0000,(a:2.0000,b:3.0000):3.0000):2.0000); self.expected1_str = ("(d:2.000000, (c:4.000000, (b:3.000000," " a:2.000000):3.000000):2.000000, e:1.000000);") self.expected1_TreeNode = TreeNode.read( io.StringIO(self.expected1_str)) # For nni testing an arbitrary tree is given alongside the distance # matrix. Tree topologies are equivalent to that of the unrooted tree # of the above newick string. self.pre1_nni_str = ("(((b,d),(e,c)))a;") self.pre1_nni_TreeNode = TreeNode.read( io.StringIO(self.pre1_nni_str)) self.post1_nni_str = ("((((e:1.0,d:2.0):2.0,c:4.0):3.0,b:3.0):2.0)a;") self.post1_nni_TreeNode = TreeNode.read( io.StringIO(self.post1_nni_str)) # this example was pulled from the Phylip manual # http://evolution.genetics.washington.edu/phylip/doc/neighbor.html data2 = [[0.0000, 1.6866, 1.7198, 1.6606, 1.5243, 1.6043, 1.5905], [1.6866, 0.0000, 1.5232, 1.4841, 1.4465, 1.4389, 1.4629], [1.7198, 1.5232, 0.0000, 0.7115, 0.5958, 0.6179, 0.5583], [1.6606, 1.4841, 0.7115, 0.0000, 0.4631, 0.5061, 0.4710], [1.5243, 1.4465, 0.5958, 0.4631, 0.0000, 0.3484, 0.3083], [1.6043, 1.4389, 0.6179, 0.5061, 0.3484, 0.0000, 0.2692], [1.5905, 1.4629, 0.5583, 0.4710, 0.3083, 0.2692, 0.0000]] ids2 = ["Bovine", "Mouse", "Gibbon", "Orang", "Gorilla", "Chimp", "Human"] self.dm2 = DistanceMatrix(data2, ids2) self.expected2_str = ("(Mouse:0.76891, (Gibbon:0.35793, (Orang:0.28469" ", (Gorilla:0.15393, (Chimp:0.15167, Human:0.117" "53):0.03982):0.02696):0.04648):0.42027, Bovine:" "0.91769);") self.expected2_TreeNode = TreeNode.read( io.StringIO(self.expected2_str)) self.pre2_nni_str = ("(((Mouse,Gorilla),(Gibbon,(Bovine,(Orang" ",Chimp)))))Human;") self.pre2_nni_TreeNode = TreeNode.read( io.StringIO(self.pre2_nni_str)) self.post2_nni_str = ("((((((Bovine:0.9117125,Mouse:0.7748875):0.42773" "33,Gibbon:0.3504666):0.0408666,Orang:0.2809083)" ":0.0345694,Gorilla:0.1475249):0.0414812,Chimp:0" ".1470600):0.1221399)Human;") self.post2_nni_TreeNode = TreeNode.read( io.StringIO(self.post2_nni_str)) data3 = [[0, 5, 4, 7, 6, 8], [5, 0, 7, 10, 9, 11], [4, 7, 0, 7, 6, 8], [7, 10, 7, 0, 5, 8], [6, 9, 6, 5, 0, 8], [8, 11, 8, 8, 8, 0]] ids3 = map(str, range(6)) self.dm3 = DistanceMatrix(data3, ids3) self.expected3_str = ("((((0:1.000000,1:4.000000):1.000000,2:2.000000" "):1.250000,5:4.750000):0.750000,3:2.750000,4:2." "250000);") self.expected3_TreeNode = TreeNode.read( io.StringIO(self.expected3_str)) self.pre3_nni_str = ("((1,(((5,2),4),3)))0;") self.pre3_nni_TreeNode = TreeNode.read( io.StringIO(self.pre3_nni_str)) self.post3_nni_str = ("((1:4.0,((5:4.75,(4:2.0,3:3.0):0.75):1.25" ",2:2.0):1.0):1.0)0;") self.post3_nni_TreeNode = TreeNode.read( io.StringIO(self.post3_nni_str)) # this dm can yield negative branch lengths for both nj and nni data4 = [[0, 5, 9, 9, 800], [5, 0, 10, 10, 9], [9, 10, 0, 8, 7], [9, 10, 8, 0, 3], [800, 9, 7, 3, 0]] ids4 = list('abcde') self.dm4 = DistanceMatrix(data4, ids4) def test_nj_dm1(self): self.assertEqual(nj(self.dm1, result_constructor=str), self.expected1_str) # what is the correct way to compare TreeNode objects for equality? actual_TreeNode = nj(self.dm1) # precision error on ARM: 1.6653345369377348e-16 != 0.0 self.assertAlmostEqual(actual_TreeNode.compare_tip_distances( self.expected1_TreeNode), 0.0, places=10) def test_nj_dm2(self): actual_TreeNode = nj(self.dm2) self.assertAlmostEqual(actual_TreeNode.compare_tip_distances( self.expected2_TreeNode), 0.0) def test_nj_dm3(self): actual_TreeNode = nj(self.dm3) self.assertAlmostEqual(actual_TreeNode.compare_tip_distances( self.expected3_TreeNode), 0.0) def test_nj_zero_branch_length(self): # no nodes have negative branch length when we disallow negative # branch length. self is excluded as branch length is None tree = nj(self.dm4) for n in tree.postorder(include_self=False): self.assertTrue(n.length >= 0) # only tips associated with the large distance in the input # have positive branch lengths when we allow negative branch # length tree = nj(self.dm4, False) self.assertTrue(tree.find('a').length > 0) self.assertTrue(tree.find('b').length < 0) self.assertTrue(tree.find('c').length < 0) self.assertTrue(tree.find('d').length < 0) self.assertTrue(tree.find('e').length > 0) def test_nj_trivial(self): data = [[0, 3, 2], [3, 0, 3], [2, 3, 0]] dm = DistanceMatrix(data, list('abc')) expected_str = "(b:2.000000, a:1.000000, c:1.000000);" self.assertEqual(nj(dm, result_constructor=str), expected_str) def test_nj_error(self): data = [[0, 3], [3, 0]] dm = DistanceMatrix(data, list('ab')) self.assertRaises(ValueError, nj, dm) def test_compute_q(self): expected_data = [[0, -50, -38, -34, -34], [-50, 0, -38, -34, -34], [-38, -38, 0, -40, -40], [-34, -34, -40, 0, -48], [-34, -34, -40, -48, 0]] expected_ids = list('abcde') expected = DistanceMatrix(expected_data, expected_ids) self.assertEqual(_compute_q(self.dm1), expected) data = [[0, 3, 2], [3, 0, 3], [2, 3, 0]] dm = DistanceMatrix(data, list('abc')) # computed this manually expected_data = [[0, -8, -8], [-8, 0, -8], [-8, -8, 0]] expected = DistanceMatrix(expected_data, list('abc')) self.assertEqual(_compute_q(dm), expected) def test_compute_collapsed_dm(self): expected_data = [[0, 7, 7, 6], [7, 0, 8, 7], [7, 8, 0, 3], [6, 7, 3, 0]] expected_ids = ['x', 'c', 'd', 'e'] expected1 = DistanceMatrix(expected_data, expected_ids) self.assertEqual(_compute_collapsed_dm(self.dm1, 'a', 'b', True, 'x'), expected1) # computed manually expected_data = [[0, 4, 3], [4, 0, 3], [3, 3, 0]] expected_ids = ['yy', 'd', 'e'] expected2 = DistanceMatrix(expected_data, expected_ids) self.assertEqual( _compute_collapsed_dm(expected1, 'x', 'c', True, 'yy'), expected2) def test_lowest_index(self): self.assertEqual(_lowest_index(self.dm1), (4, 3)) self.assertEqual(_lowest_index(_compute_q(self.dm1)), (1, 0)) def test_pair_members_to_new_node(self): self.assertEqual(_pair_members_to_new_node(self.dm1, 'a', 'b', True), (2, 3)) self.assertEqual(_pair_members_to_new_node(self.dm1, 'a', 'c', True), (4, 5)) self.assertEqual(_pair_members_to_new_node(self.dm1, 'd', 'e', True), (2, 1)) def test_pair_members_to_new_node_zero_branch_length(self): # the values in this example don't really make sense # (I'm not sure how you end up with these distances between # three sequences), but that doesn't really matter for the sake # of this test data = [[0, 4, 2], [4, 0, 38], [2, 38, 0]] ids = ['a', 'b', 'c'] dm = DistanceMatrix(data, ids) self.assertEqual(_pair_members_to_new_node(dm, 'a', 'b', True), (0, 4)) # this makes it clear why negative branch lengths don't make sense... self.assertEqual( _pair_members_to_new_node(dm, 'a', 'b', False), (-16, 20)) def test_nni_dm1(self): self.assertEqual(nj(self.dm1, result_constructor=str), self.expected1_str) actual_TreeNode = nni(self.pre1_nni_TreeNode, self.dm1, inplace=False) self.assertAlmostEqual(actual_TreeNode.compare_tip_distances( self.post1_nni_TreeNode), 0.0, places=10) def test_nni_dm2(self): # Resulting tree topology is equivalent to result from nj, however, # resulting edge lengths are almost equal to 2 places. actual_TreeNode = nni(self.pre2_nni_TreeNode, self.dm2, inplace=False) self.assertAlmostEqual(actual_TreeNode.compare_tip_distances( self.post2_nni_TreeNode), 0.0) def test_nni_dm3(self): actual_TreeNode = nni(self.pre3_nni_TreeNode, self.dm3, inplace=False) self.assertAlmostEqual(actual_TreeNode.compare_tip_distances( self.post3_nni_TreeNode), 0.0) def test_nni_trivial(self): # No swaps are performed, but edge lengths are assigned. data = [[0, 3, 2], [3, 0, 3], [2, 3, 0]] dm = DistanceMatrix(data, list('abc')) pre_str = "((c,b))a;" pre_TreeNode = TreeNode.read( io.StringIO(pre_str)) expected_str = "((c:1.0,b:2.0):1.0)a;" expected_TreeNode = TreeNode.read( io.StringIO(expected_str)) self.assertEqual(str(nni(pre_TreeNode, dm, inplace=False)), str(expected_TreeNode)) def test_nni_binary_flag(self): data = [[0, 3], [3, 0]] dm = DistanceMatrix(data, list('ab')) pre_str = "((b))a;" pre_TreeNode = TreeNode.read(io.StringIO(pre_str)) msg = "Could not perform NNI. Tree needs to be a binary tree." with self.assertRaises(TypeError) as cm: nni(pre_TreeNode, dm) self.assertEqual(str(cm.exception), msg) def test_nni_leaf_root_flag(self): pre_str = "((b,d),(e,c))a;" pre_TreeNode = TreeNode.read(io.StringIO(pre_str)) msg = "Could not perform NNI. Tree needs to be rooted at a leaf node." with self.assertRaises(TypeError) as cm: nni(pre_TreeNode, self.dm1) self.assertEqual(str(cm.exception), msg) def test_perform_swap(self): # Swapping the leaf nodes a tree without edge lengths. pre_str = "(((b,d),(e,c)))a;" actual_TreeNode = TreeNode.read( io.StringIO(pre_str)) node1 = actual_TreeNode.find('b') node2 = actual_TreeNode.find('c') expected_str = "(((d,c),(e,b)))a;" expected_TreeNode = TreeNode.read( io.StringIO(expected_str)) _perform_swap(node1, node2) self.assertEqual(str(actual_TreeNode), str(expected_TreeNode)) def test_average_distance(self): expected_str = ("((((e:1.0,d:2.0):2.0,c:4.0):3.0,b:3.0):2.0)a;") expected_TreeNode = TreeNode.read(io.StringIO(expected_str)) node1 = expected_TreeNode.find('b') node2 = expected_TreeNode.find('d').parent self.assertAlmostEqual(_average_distance(node1, node2, self.dm1), 9.5, places=10) def test_tip_or_root(self): expected_str = ("((((e:1.0,d:2.0):2.0,c:4.0):3.0,b:3.0):2.0)a;") expected_TreeNode = TreeNode.read(io.StringIO(expected_str)) node_internal = expected_TreeNode.find('d').parent node_leaf = expected_TreeNode.find('b') root = expected_TreeNode.root() self.assertEqual(len(_tip_or_root(node_internal)), 2) self.assertEqual(str(_tip_or_root(node_leaf)[0]), str(node_leaf.name)) self.assertEqual(str(_tip_or_root(root)[0]), str(root.name)) def test_average_distance_upper(self): # computed manually data = [[0, 0.02, 0.18, 0.34, 0.55], [0.02, 0, 0.19, 0.35, 0.55], [0.18, 0.19, 0, 0.34, 0.54], [0.34, 0.35, 0.34, 0, 0.62], [0.55, 0.55, 0.54, 0.62, 0]] ids = ['human','monkey','pig','rat','chicken'] dm = DistanceMatrix(data, ids) expected_str = "((rat,(human,(pig,monkey))))chicken;" expected_TreeNode = TreeNode.read(io.StringIO(expected_str)) node1 = expected_TreeNode.find('pig').parent node2 = expected_TreeNode.find('human').parent.parent self.assertAlmostEqual(_average_distance_upper(node1, node2, dm), 0.545, places=10) def test_subtree_count(self): expected_str = ("((((e:1.0,d:2.0):2.0,c:4.0):3.0,b:3.0):2.0)a;") expected_TreeNode = TreeNode.read(io.StringIO(expected_str)) internal_node = expected_TreeNode.find('d').parent.parent leaf = expected_TreeNode.find('d') root = expected_TreeNode.root() self.assertEqual(_subtree_count(internal_node), 3) self.assertEqual(_subtree_count(leaf), 1) self.assertEqual(_subtree_count(root), 1) def test_swap_length(self): # results in a positive integer # computed manually expected_str = ("(((b,d),(e,c)))a;") expected_TreeNode = TreeNode.read(io.StringIO(expected_str)) adm = _average_distance_matrix(expected_TreeNode, self.dm1) self.assertAlmostEqual(_swap_length( 2, 1, 1, 1, 6, 3, 0, 1, adm), 2.5, places=10) def test_swap_heap(self): # swap length is stored into the maxheap as a negative integer expected_str = ("(((b,d),(e,c)))a;") expected_TreeNode = TreeNode.read(io.StringIO(expected_str)) adm = _average_distance_matrix(expected_TreeNode, self.dm1) self.assertAlmostEqual(_swap_heap(expected_TreeNode, adm)[0][0], -2.0, places=10) def test_average_subtree_distance(self): # computed manually expected_str = ("(((b,d),(e,c)))a;") expected_TreeNode = TreeNode.read(io.StringIO(expected_str)) a = expected_TreeNode.find('e').parent b = expected_TreeNode.find('b') a1 = expected_TreeNode.find('e') a2 = expected_TreeNode.find('c') self.assertAlmostEqual(_average_subtree_distance(a, b, a1, a2, self.dm1), 9.5, places=10) def test_average_distance_matrix_trivial(self): # In this case, the average distance matrix is equivalent to # the original distance matrix data = [[0, 3, 2], [3, 0, 3], [2, 3, 0]] ids = list('abc') dm = DistanceMatrix(data, ids) expected_str = "((c,b))a;" expected_TreeNode = TreeNode.read(io.StringIO(expected_str)) index = [0, 1, 2] actual_adm = _average_distance_matrix(expected_TreeNode, dm) for i in index: for j in index: if j < i: self.assertEqual(dm[i][j], actual_adm[i][j]) self.assertEqual(dm[j][i], actual_adm[j][i]) def test_average_distance_matrix(self): # computed manually expected_str = ("(((b,d),(e,c)))a;") expected_TreeNode = TreeNode.read(io.StringIO(expected_str)) expected_adm = [[0.0, 10.0, 8.0, 9.0, 10.0, 9.5, 5.0], [10.0, 0.0, 6.666666666666667, 3.0, 8.0, 5.5, 9.0], [8.0, 6.666666666666667, 0.0, 6.0, 9.0, 7.5, 7.0], [9.0, 3.0, 6.0, 0.0, 7.0, 6.666666666666667, 8.0], [10.0, 8.0, 9.0, 7.0, 0.0, 9.0, 9.0], [9.5, 5.5, 7.5, 6.666666666666667, 9.0, 0.0, 8.5], [5.0, 9.0, 7.0, 8.0, 9.0, 8.5, 0.0]] actual_adm = _average_distance_matrix(expected_TreeNode, self.dm1) index = [0, 1, 2, 3, 4, 5, 6] for i in index: for j in index: if j < 1: self.assertAlmostEqual(expected_adm[i][j], actual_adm[i][j]) self.assertAlmostEqual(expected_adm[j][i], actual_adm[j][i]) def test_edge_estimation(self): data = [[0, 3, 2], [3, 0, 3], [2, 3, 0]] ids = list('abc') dm = DistanceMatrix(data, ids) pre_estimation_str = "((c,b))a;" expected_str = "((c:1.0,b:2.0):1.0)a;" actual_TreeNode = TreeNode.read(io.StringIO(pre_estimation_str)) _edge_estimation(actual_TreeNode, dm) expected_TreeNode = TreeNode.read(io.StringIO(expected_str)) self.assertAlmostEqual(actual_TreeNode.compare_tip_distances( expected_TreeNode), 1, places=10) if __name__ == "__main__": main() scikit-bio-0.6.2/skbio/tree/tests/test_tree.py000066400000000000000000002327211464262511300213470ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from unittest import TestCase, main from collections import defaultdict import numpy as np import numpy.testing as npt import pandas as pd from scipy.stats import pearsonr from skbio import DistanceMatrix, TreeNode from skbio.tree import (DuplicateNodeError, NoLengthError, TreeError, MissingNodeError, NoParentError) from skbio.util import RepresentationWarning class TreeNodeSubclass(TreeNode): pass class TreeTests(TestCase): def setUp(self): """Prep the self""" # a simple tree self.simple_t = TreeNode.read(["((a,b)i1,(c,d)i2)root;"]) # /-a # /i1------| # | \-b # -root----| # | /-c # \i2------| # \-d # another test tree nodes = dict([(x, TreeNode(x)) for x in "abcdefgh"]) nodes["a"].append(nodes["b"]) nodes["b"].append(nodes["c"]) nodes["c"].append(nodes["d"]) nodes["c"].append(nodes["e"]) nodes["c"].append(nodes["f"]) nodes["f"].append(nodes["g"]) nodes["a"].append(nodes["h"]) self.TreeRoot = nodes["a"] # (((d,e,(g)f)c)b,h)a; # /-d # | # /b------- /c-------|--e # | | # -a-------| \f------- /-g # | # \-h def rev_f(items): items.reverse() def rotate_f(items): tmp = items[-1] items[1:] = items[:-1] items[0] = tmp self.rev_f = rev_f self.rotate_f = rotate_f self.complex_tree = TreeNode.read([ "(((a,b)int1,(x,y,(w,z)int2,(c,d)int3)int4),(e,f)int5);"]) def test_bug_issue_1416(self): tree = TreeNode.read(["(((a,b,f,g),c),d);"]) new_tree = tree.shear(["a", "b", "c", "f"]) exp = {"a", "b", "c", "f"} obs = {n.name for n in new_tree.tips()} self.assertEqual(obs, exp) self.assertEqual(id(new_tree), id(new_tree.children[0].parent)) self.assertEqual(id(new_tree), id(new_tree.children[1].parent)) def test_observed_node_counts(self): """returns observed nodes counts given vector of observed taxon counts """ # no taxon observed taxon_counts = {} expected = defaultdict(int) self.assertEqual(self.simple_t.observed_node_counts(taxon_counts), expected) # error on zero count(s) taxon_counts = {"a": 0} self.assertRaises(ValueError, self.simple_t.observed_node_counts, taxon_counts) taxon_counts = {"a": 0, "b": 0, "c": 0, "d": 0} self.assertRaises(ValueError, self.simple_t.observed_node_counts, taxon_counts) # all taxa observed once taxon_counts = {"a": 1, "b": 1, "c": 1, "d": 1} expected = defaultdict(int) expected[self.simple_t.find("root")] = 4 expected[self.simple_t.find("i1")] = 2 expected[self.simple_t.find("i2")] = 2 expected[self.simple_t.find("a")] = 1 expected[self.simple_t.find("b")] = 1 expected[self.simple_t.find("c")] = 1 expected[self.simple_t.find("d")] = 1 self.assertEqual(self.simple_t.observed_node_counts(taxon_counts), expected) # some taxa observed twice taxon_counts = {"a": 2, "b": 1, "c": 1, "d": 1} expected = defaultdict(int) expected[self.simple_t.find("root")] = 5 expected[self.simple_t.find("i1")] = 3 expected[self.simple_t.find("i2")] = 2 expected[self.simple_t.find("a")] = 2 expected[self.simple_t.find("b")] = 1 expected[self.simple_t.find("c")] = 1 expected[self.simple_t.find("d")] = 1 self.assertEqual(self.simple_t.observed_node_counts(taxon_counts), expected) taxon_counts = {"a": 2, "b": 1, "c": 1, "d": 2} expected = defaultdict(int) expected[self.simple_t.find("root")] = 6 expected[self.simple_t.find("i1")] = 3 expected[self.simple_t.find("i2")] = 3 expected[self.simple_t.find("a")] = 2 expected[self.simple_t.find("b")] = 1 expected[self.simple_t.find("c")] = 1 expected[self.simple_t.find("d")] = 2 self.assertEqual(self.simple_t.observed_node_counts(taxon_counts), expected) # some taxa observed, others not observed taxon_counts = {"a": 2, "b": 1} expected = defaultdict(int) expected[self.simple_t.find("root")] = 3 expected[self.simple_t.find("i1")] = 3 expected[self.simple_t.find("a")] = 2 expected[self.simple_t.find("b")] = 1 self.assertEqual(self.simple_t.observed_node_counts(taxon_counts), expected) taxon_counts = {"d": 1} expected = defaultdict(int) expected[self.simple_t.find("root")] = 1 expected[self.simple_t.find("i2")] = 1 expected[self.simple_t.find("d")] = 1 self.assertEqual(self.simple_t.observed_node_counts(taxon_counts), expected) # error on non-tips taxon_counts = {"a": 2, "e": 1} self.assertRaises(MissingNodeError, self.simple_t.observed_node_counts, taxon_counts) taxon_counts = {"a": 2, "i1": 1} self.assertRaises(MissingNodeError, self.simple_t.observed_node_counts, taxon_counts) # test with another tree taxon_counts = {} expected = defaultdict(int) self.assertEqual(self.complex_tree.observed_node_counts(taxon_counts), expected) taxon_counts = {"e": 42, "f": 1} expected[self.complex_tree.root()] = 43 expected[self.complex_tree.find("int5")] = 43 expected[self.complex_tree.find("e")] = 42 expected[self.complex_tree.find("f")] = 1 self.assertEqual(self.complex_tree.observed_node_counts(taxon_counts), expected) def test_count(self): """Get node counts""" exp = 7 obs = self.simple_t.count() self.assertEqual(obs, exp) exp = 4 obs = self.simple_t.count(tips=True) self.assertEqual(obs, exp) def test_copy(self): """copy a tree""" self.simple_t.children[0].length = 1.2 self.simple_t.children[1].children[0].length = 0.5 cp = self.simple_t.copy() gen = zip(cp.traverse(include_self=True), self.simple_t.traverse(include_self=True)) for a, b in gen: self.assertIsNot(a, b) self.assertEqual(a.name, b.name) self.assertEqual(a.length, b.length) # deep vs shallow copy self.simple_t.dummy = [1, [2, 3], 4] cp = self.simple_t.copy() cp.dummy[1].append(0) self.assertListEqual(self.simple_t.dummy[1], [2, 3]) cp = self.simple_t.copy(deep=False) cp.dummy[1].append(0) self.assertListEqual(self.simple_t.dummy[1], [2, 3, 0]) def test_deepcopy(self): self.simple_t.dummy = [1, [2, 3], 4] cp = self.simple_t.copy() cp.dummy[1].append(0) self.assertListEqual(self.simple_t.dummy[1], [2, 3]) def test__copy__(self): self.simple_t.dummy = [1, [2, 3], 4] cp = self.simple_t.__copy__() for obs, exp in zip(cp.traverse(), self.simple_t.traverse()): self.assertIsNot(obs, exp) self.assertEqual(obs.name, exp.name) self.assertEqual(obs.length, exp.length) cp.dummy[1].append(0) self.assertListEqual(self.simple_t.dummy[1], [2, 3, 0]) def test__deepcopy__(self): self.simple_t.dummy = [1, [2, 3], 4] cp = self.simple_t.__deepcopy__({}) for obs, exp in zip(cp.traverse(), self.simple_t.traverse()): self.assertIsNot(obs, exp) self.assertEqual(obs.name, exp.name) self.assertEqual(obs.length, exp.length) cp.dummy[1].append(0) self.assertListEqual(self.simple_t.dummy[1], [2, 3]) def test_append(self): """Append a node to a tree""" second_tree = TreeNode.read(["(x,y)z;"]) self.simple_t.append(second_tree) self.assertEqual(self.simple_t.children[0].name, "i1") self.assertEqual(self.simple_t.children[1].name, "i2") self.assertEqual(self.simple_t.children[2].name, "z") self.assertEqual(len(self.simple_t.children), 3) self.assertEqual(self.simple_t.children[2].children[0].name, "x") self.assertEqual(self.simple_t.children[2].children[1].name, "y") self.assertEqual(second_tree.parent, self.simple_t) def test_extend(self): """Extend a few nodes""" second_tree = TreeNode.read(["(x1,y1)z1;"]) third_tree = TreeNode.read(["(x2,y2)z2;"]) first_tree = TreeNode.read(["(x1,y1)z1;"]) fourth_tree = TreeNode.read(["(x2,y2)z2;"]) self.simple_t.extend([second_tree, third_tree]) first_tree.extend(fourth_tree.children) self.assertEqual(0, len(fourth_tree.children)) self.assertEqual(first_tree.children[0].name, "x1") self.assertEqual(first_tree.children[1].name, "y1") self.assertEqual(first_tree.children[2].name, "x2") self.assertEqual(first_tree.children[3].name, "y2") self.assertEqual(self.simple_t.children[0].name, "i1") self.assertEqual(self.simple_t.children[1].name, "i2") self.assertEqual(self.simple_t.children[2].name, "z1") self.assertEqual(self.simple_t.children[3].name, "z2") self.assertEqual(len(self.simple_t.children), 4) self.assertEqual(self.simple_t.children[2].children[0].name, "x1") self.assertEqual(self.simple_t.children[2].children[1].name, "y1") self.assertEqual(self.simple_t.children[3].children[0].name, "x2") self.assertEqual(self.simple_t.children[3].children[1].name, "y2") self.assertIs(second_tree.parent, self.simple_t) self.assertIs(third_tree.parent, self.simple_t) def test_extend_empty(self): """Extend on the empty case should work""" self.simple_t.extend([]) self.assertEqual(self.simple_t.children[0].name, "i1") self.assertEqual(self.simple_t.children[1].name, "i2") self.assertEqual(len(self.simple_t.children), 2) def test_insert(self): "Insert a node into the branch connecting self and its parent." # insert a new node into a branch with no length node = self.simple_t.find("i1") node.insert(TreeNode("x")) obs = self.simple_t.find("x") self.assertTrue(obs.parent is self.simple_t) self.assertTrue(node.parent is obs) self.assertIn(obs, self.simple_t.children) self.assertIn(node, obs.children) self.assertIsNone(obs.length) self.assertIsNone(node.length) msg = "Distance is provided but branch has no length." with self.assertRaisesRegex(ValueError, msg): node.insert(TreeNode("x"), distance=1.0) msg = "Self has no parent." with self.assertRaisesRegex(NoParentError, msg): self.simple_t.insert(TreeNode("x")) # insert an existing clade into a branch with length t = TreeNode.read(["((a:1,b:1)c:2,(d:3,e:4)f:5,g:1)h;"]) donor_t = TreeNode.read(["((x:1,y:1)m:1.5,(z:1,w:1)n:0.5,l:2.5);"]) t.find("c").insert(donor_t.find("m")) obs = t.find("m") self.assertTrue(obs.parent is t) self.assertTrue(t.find("c").parent is obs) self.assertNotIn(obs, donor_t.children) self.assertEqual(obs.length, 1) self.assertEqual(t.find("c").length, 1) t.find("d").insert(donor_t.find("n"), 2) obs = t.find("n") self.assertTrue(obs.parent is t.find("f")) self.assertTrue(t.find("d").parent is obs) self.assertEqual(obs.length, 1) self.assertEqual(t.find("d").length, 2) msg = "Distance cannot exceed branch length." with self.assertRaisesRegex(ValueError, msg): t.find("c").insert(TreeNode("x"), 20) # with branch support t = TreeNode.read(["(((a,b)90)d);"]) t.assign_supports() t.lca(["a", "b"]).insert(TreeNode("x")) self.assertEqual(t.find("x").support, 90) # with custom branch attribute t = TreeNode.read(["(((a,b)c)d);"]) n = t.find("c") n.battr = 1 # branch attribute n.nattr = 2 # node attribute n.insert(TreeNode("x"), branch_attrs=["battr"]) self.assertEqual(t.find("x").battr, 1) self.assertFalse(hasattr(t.find("x"), "nattr")) def test_iter(self): """iter wraps children""" exp = ["i1", "i2"] obs = [n.name for n in self.simple_t] self.assertEqual(obs, exp) def test_gops(self): """Basic TreeNode operations should work as expected""" p = TreeNode() self.assertEqual(str(p), ";\n") p.name = "abc" self.assertEqual(str(p), "abc;\n") p.length = 3 self.assertEqual(str(p), "abc:3;\n") # don"t suppress branch from root q = TreeNode() p.append(q) self.assertEqual(str(p), "()abc:3;\n") r = TreeNode() q.append(r) self.assertEqual(str(p), "(())abc:3;\n") r.name = "xyz" self.assertEqual(str(p), "((xyz))abc:3;\n") q.length = 2 self.assertEqual(str(p), "((xyz):2)abc:3;\n") def test_pop(self): """Pop off a node""" second_tree = TreeNode.read(["(x1,y1)z1;"]) third_tree = TreeNode.read(["(x2,y2)z2;"]) self.simple_t.extend([second_tree, third_tree]) i1 = self.simple_t.pop(0) z2 = self.simple_t.pop() self.assertEqual(i1.name, "i1") self.assertEqual(z2.name, "z2") self.assertEqual(i1.children[0].name, "a") self.assertEqual(i1.children[1].name, "b") self.assertEqual(z2.children[0].name, "x2") self.assertEqual(z2.children[1].name, "y2") self.assertEqual(self.simple_t.children[0].name, "i2") self.assertEqual(self.simple_t.children[1].name, "z1") self.assertEqual(len(self.simple_t.children), 2) def test_remove(self): """Remove nodes""" self.assertTrue(self.simple_t.remove(self.simple_t.children[0])) self.assertEqual(len(self.simple_t.children), 1) n = TreeNode() self.assertFalse(self.simple_t.remove(n)) def test_remove_deleted(self): """Remove nodes by function""" def f(node): return node.name in ["b", "d"] self.simple_t.remove_deleted(f) exp = "((a)i1,(c)i2)root;\n" obs = str(self.simple_t) self.assertEqual(obs, exp) def test_adopt(self): """Adopt a node!""" n1 = TreeNode(name="n1") n2 = TreeNode(name="n2") n3 = TreeNode(name="n3") self.simple_t._adopt(n1) self.simple_t.children[-1]._adopt(n2) n2._adopt(n3) # adopt doesn"t update .children self.assertEqual(len(self.simple_t.children), 2) self.assertIs(n1.parent, self.simple_t) self.assertIs(n2.parent, self.simple_t.children[-1]) self.assertIs(n3.parent, n2) def test_remove_node(self): """Remove a node by index""" n = self.simple_t._remove_node(-1) self.assertEqual(n.parent, None) self.assertEqual(len(self.simple_t.children), 1) self.assertEqual(len(n.children), 2) self.assertNotIn(n, self.simple_t.children) def test_shear_prune_parent_dropped(self): bugtree = "((a,b),((c,d),(e,f)));" to_keep = ["c", "d"] exp = "(c,d);\n" obs = str(TreeNode.read([bugtree]).shear(to_keep)) self.assertEqual(obs, exp) def test_prune_nested_single_descendent(self): bugtree = "(((a,b)));" exp = "(a,b);\n" t = TreeNode.read([bugtree]) t.prune() obs = str(t) self.assertEqual(obs, exp) def test_prune_root_single_desc(self): t = TreeNode.read(["((a,b)c)extra;"]) exp = "(a,b)c;\n" t.prune() self.assertEqual(str(t), exp) def test_prune(self): """Collapse single descendent nodes""" # check the identity case cp = self.simple_t.copy() self.simple_t.prune() gen = zip(cp.traverse(include_self=True), self.simple_t.traverse(include_self=True)) for a, b in gen: self.assertIsNot(a, b) self.assertEqual(a.name, b.name) self.assertEqual(a.length, b.length) # create a single descendent by removing tip "a" n = self.simple_t.children[0] n.remove(n.children[0]) self.simple_t.prune() self.assertEqual(len(self.simple_t.children), 2) self.assertEqual(self.simple_t.children[0].name, "i2") self.assertEqual(self.simple_t.children[1].name, "b") def test_prune_length(self): """Collapse single descendent nodes""" # check the identity case cp = self.simple_t.copy() self.simple_t.prune() gen = zip(cp.traverse(include_self=True), self.simple_t.traverse(include_self=True)) for a, b in gen: self.assertIsNot(a, b) self.assertEqual(a.name, b.name) self.assertEqual(a.length, b.length) for n in self.simple_t.traverse(): n.length = 1.0 # create a single descendent by removing tip "a" n = self.simple_t.children[0] n.remove(n.children[0]) self.simple_t.prune() self.assertEqual(len(self.simple_t.children), 2) self.assertEqual(self.simple_t.children[0].name, "i2") self.assertEqual(self.simple_t.children[1].name, "b") self.assertEqual(self.simple_t.children[1].length, 2.0) def test_subset(self): """subset should return set of leaves that descends from node""" t = self.simple_t self.assertEqual(t.subset(), frozenset("abcd")) c = t.children[0] self.assertEqual(c.subset(), frozenset("ab")) leaf = c.children[1] self.assertEqual(leaf.subset(), frozenset("")) def test_subsets(self): """subsets should return all subsets descending from a set""" t = self.simple_t self.assertEqual(t.subsets(), frozenset( [frozenset("ab"), frozenset("cd")])) def test_is_tip(self): """see if we're a tip or not""" self.assertFalse(self.simple_t.is_tip()) self.assertFalse(self.simple_t.children[0].is_tip()) self.assertTrue(self.simple_t.children[0].children[0].is_tip()) def test_is_root(self): """see if we're at the root or not""" self.assertTrue(self.simple_t.is_root()) self.assertFalse(self.simple_t.children[0].is_root()) self.assertFalse(self.simple_t.children[0].children[0].is_root()) def test_root(self): """Get the root!""" root = self.simple_t self.assertIs(root, self.simple_t.root()) self.assertIs(root, self.simple_t.children[0].root()) self.assertIs(root, self.simple_t.children[1].children[1].root()) def test_invalidate_lookup_caches(self): root = self.simple_t root.create_caches() self.assertNotEqual(root._tip_cache, {}) self.assertNotEqual(root._non_tip_cache, {}) root.invalidate_caches() self.assertFalse(hasattr(root, "_tip_cache")) self.assertFalse(hasattr(root, "_non_tip_cache")) def test_invalidate_attr_caches(self): tree = TreeNode.read(["((a,b,(c,d)e)f,(g,h)i)root;"]) def f(n): return [n.name] if n.is_tip() else [] tree.cache_attr(f, "tip_names") tree.invalidate_caches() for n in tree.traverse(include_self=True): self.assertFalse(hasattr(n, "tip_names")) def test_create_caches_duplicate_tip_names(self): with self.assertRaises(DuplicateNodeError): TreeNode.read(["(a,a);"]).create_caches() def test_find_all(self): t = TreeNode.read(["((a,b)c,((d,e)c)c,(f,(g,h)c)a)root;"]) exp = [t.children[0], t.children[1].children[0], t.children[1], t.children[2].children[1]] obs = t.find_all("c") self.assertEqual(obs, exp) identity = t.find_all(t) self.assertEqual(len(identity), 1) self.assertEqual(identity[0], t) identity_name = t.find_all("root") self.assertEqual(len(identity_name), 1) self.assertEqual(identity_name[0], t) exp = [t.children[2], t.children[0].children[0]] obs = t.find_all("a") self.assertEqual(obs, exp) with self.assertRaises(MissingNodeError): t.find_all("missing") def test_find(self): """Find a node in a tree""" t = TreeNode.read(["((a,b)c,(d,e)f);"]) exp = t.children[0] obs = t.find("c") self.assertEqual(obs, exp) exp = t.children[0].children[1] obs = t.find("b") self.assertEqual(obs, exp) with self.assertRaises(MissingNodeError): t.find("does not exist") def test_find_cache_bug(self): """First implementation did not force the cache to be at the root""" t = TreeNode.read(["((a,b)c,(d,e)f,(g,h)f);"]) exp_tip_cache_keys = set(["a", "b", "d", "e", "g", "h"]) exp_non_tip_cache_keys = set(["c", "f"]) tip_a = t.children[0].children[0] tip_a.create_caches() self.assertFalse(hasattr(tip_a, "_tip_cache")) self.assertEqual(set(t._tip_cache), exp_tip_cache_keys) self.assertEqual(set(t._non_tip_cache), exp_non_tip_cache_keys) self.assertEqual(t._non_tip_cache["f"], [t.children[1], t.children[2]]) def test_find_by_id(self): """Find a node by id""" t1 = TreeNode.read(["((,),(,,));"]) t2 = TreeNode.read(["((,),(,,));"]) exp = t1.children[1] obs = t1.find_by_id(6) # right inner node with 3 children self.assertEqual(obs, exp) exp = t2.children[1] obs = t2.find_by_id(6) # right inner node with 3 children self.assertEqual(obs, exp) with self.assertRaises(MissingNodeError): t1.find_by_id(100) def test_find_by_func(self): """Find nodes by a function""" t = TreeNode.read(["((a,b)c,(d,e)f);"]) def func(x): return x.parent == t.find("c") exp = ["a", "b"] obs = [n.name for n in t.find_by_func(func)] self.assertEqual(obs, exp) def test_ancestors(self): """Get all the ancestors""" exp = ["i1", "root"] obs = self.simple_t.children[0].children[0].ancestors() self.assertEqual([o.name for o in obs], exp) exp = ["root"] obs = self.simple_t.children[0].ancestors() self.assertEqual([o.name for o in obs], exp) exp = [] obs = self.simple_t.ancestors() self.assertEqual([o.name for o in obs], exp) def test_siblings(self): """Get the siblings""" exp = [] obs = self.simple_t.siblings() self.assertEqual(obs, exp) exp = ["i2"] obs = self.simple_t.children[0].siblings() self.assertEqual([o.name for o in obs], exp) exp = ["c"] obs = self.simple_t.children[1].children[1].siblings() self.assertEqual([o.name for o in obs], exp) self.simple_t.append(TreeNode(name="foo")) self.simple_t.append(TreeNode(name="bar")) exp = ["i1", "foo", "bar"] obs = self.simple_t.children[1].siblings() self.assertEqual([o.name for o in obs], exp) def test_ascii_art(self): """Make some ascii trees""" # unlabeled internal node tr = TreeNode.read(["(B:0.2,(C:0.3,D:0.4):0.6)F;"]) obs = tr.ascii_art(show_internal=True, compact=False) exp = (" /-B\n" "-F-------|\n" " | /-C\n" " \\--------|\n" " \\-D") self.assertEqual(obs, exp) obs = tr.ascii_art(show_internal=True, compact=True) exp = ("-F------- /-B\n" " \\-------- /-C\n" " \\-D") self.assertEqual(obs, exp) obs = tr.ascii_art(show_internal=False, compact=False) exp = (" /-B\n" "---------|\n" " | /-C\n" " \\--------|\n" " \\-D") self.assertEqual(obs, exp) def test_ascii_art_with_support(self): """Make some ascii trees with support values""" tr = TreeNode.read(["(B:0.2,(C:0.3,D:0.4)90:0.6)F;"]) exp = " /-B\n-F-------|\n | /-C\n "\ " \\90------|\n \\-D" obs = tr.ascii_art(show_internal=True, compact=False) self.assertEqual(obs, exp) tr.assign_supports() obs = tr.ascii_art(show_internal=True, compact=False) self.assertEqual(obs, exp) tr = TreeNode.read(["((A,B)75,(C,D)'80:spA');"]) exp = " /-A\n /75------|\n | "\ " \\-B\n---------|\n | /-C\n \\"\ "80:spA--|\n \\-D" obs = tr.ascii_art(show_internal=True, compact=False) self.assertEqual(obs, exp) tr.assign_supports() obs = tr.ascii_art(show_internal=True, compact=False) self.assertEqual(obs, exp) def test_ascii_art_three_children(self): obs = TreeNode.read(["(a,(b,c,d));"]).ascii_art() self.assertEqual(obs, exp_ascii_art_three_children) def test_accumulate_to_ancestor(self): """Get the distance from a node to its ancestor""" t = TreeNode.read([ "((a:0.1,b:0.2)c:0.3,(d:0.4,e)f:0.5)root;"]) a = t.find("a") b = t.find("b") exp_to_root = 0.1 + 0.3 obs_to_root = a.accumulate_to_ancestor(t) self.assertEqual(obs_to_root, exp_to_root) with self.assertRaises(NoParentError): a.accumulate_to_ancestor(b) def test_distance_nontip(self): # example derived from issue #807, credit @wwood tstr = "((A:1.0,B:2.0)'g__genus1':3.0)root;" tree = TreeNode.read([tstr]) self.assertEqual(tree.find("A").distance(tree.find("g__genus1")), 1.0) def test_distance(self): """Get the distance between two nodes""" t = TreeNode.read(["((a:0.1,b:0.2)c:0.3,(d:0.4,e)f:0.5)root;"]) tips = sorted([n for n in t.tips()], key=lambda x: x.name) npt.assert_almost_equal(tips[0].distance(tips[0]), 0.0) npt.assert_almost_equal(tips[0].distance(tips[1]), 0.3) npt.assert_almost_equal(tips[0].distance(tips[2]), 1.3) with self.assertRaises(NoLengthError): tips[0].distance(tips[3]) npt.assert_almost_equal(tips[1].distance(tips[0]), 0.3) npt.assert_almost_equal(tips[1].distance(tips[1]), 0.0) npt.assert_almost_equal(tips[1].distance(tips[2]), 1.4) with self.assertRaises(NoLengthError): tips[1].distance(tips[3]) self.assertEqual(tips[2].distance(tips[0]), 1.3) self.assertEqual(tips[2].distance(tips[1]), 1.4) self.assertEqual(tips[2].distance(tips[2]), 0.0) with self.assertRaises(NoLengthError): tips[2].distance(tips[3]) def test_lowest_common_ancestor(self): """TreeNode lowestCommonAncestor should return LCA for set of tips""" t1 = TreeNode.read(["((a,(b,c)d)e,f,(g,h)i)j;"]) t2 = t1.copy() t3 = t1.copy() t4 = t1.copy() input1 = ["a"] # return self input2 = ["a", "b"] # return e input3 = ["b", "c"] # return d input4 = ["a", "h", "g"] # return j exp1 = t1.find("a") exp2 = t2.find("e") exp3 = t3.find("d") exp4 = t4 obs1 = t1.lowest_common_ancestor(input1) obs2 = t2.lowest_common_ancestor(input2) obs3 = t3.lowest_common_ancestor(input3) obs4 = t4.lowest_common_ancestor(input4) self.assertEqual(obs1, exp1) self.assertEqual(obs2, exp2) self.assertEqual(obs3, exp3) self.assertEqual(obs4, exp4) # verify multiple calls work t_mul = t1.copy() exp_1 = t_mul.find("d") exp_2 = t_mul.find("i") obs_1 = t_mul.lowest_common_ancestor(["b", "c"]) obs_2 = t_mul.lowest_common_ancestor(["g", "h"]) self.assertEqual(obs_1, exp_1) self.assertEqual(obs_2, exp_2) # empty case with self.assertRaises(ValueError): t1.lowest_common_ancestor([]) def test_get_max_distance(self): """get_max_distance should get max tip distance across tree""" tree = TreeNode.read([ "((a:0.1,b:0.2)c:0.3,(d:0.4,e:0.5)f:0.6)root;"]) dist, nodes = tree.get_max_distance() npt.assert_almost_equal(dist, 1.6) self.assertEqual(sorted([n.name for n in nodes]), ["b", "e"]) def test_set_max_distance(self): """set_max_distance sets MaxDistTips across tree""" tree = TreeNode.read([ "((a:0.1,b:0.2)c:0.3,(d:0.4,e:0.5)f:0.6)root;"]) tree._set_max_distance() tip_a, tip_b = tree.MaxDistTips self.assertEqual(tip_a[0] + tip_b[0], 1.6) self.assertEqual(sorted([tip_a[1].name, tip_b[1].name]), ["b", "e"]) def test_set_max_distance_tie_bug(self): """Corresponds to #1077""" t = TreeNode.read(["((a:1,b:1)c:2,(d:3,e:4)f:5)root;"]) exp = ((3.0, t.find("a")), (9.0, t.find("e"))) # the above tree would trigger an exception in max. The central issue # was that the data being passed to max were a tuple of tuple: # ((left_d, left_n), (right_d, right_n)) # the call to max would break in this scenario as it would fall onto # idx 1 of each tuple to assess the "max". t._set_max_distance() self.assertEqual(t.MaxDistTips, exp) def test_set_max_distance_inplace_modification_bug(self): """Corresponds to #1223""" t = TreeNode.read(["((a:1,b:1)c:2,(d:3,e:4)f:5)root;"]) exp = [((0.0, t.find("a")), (0.0, t.find("a"))), ((0.0, t.find("b")), (0.0, t.find("b"))), ((1.0, t.find("a")), (1.0, t.find("b"))), ((0.0, t.find("d")), (0.0, t.find("d"))), ((0.0, t.find("e")), (0.0, t.find("e"))), ((3.0, t.find("d")), (4.0, t.find("e"))), ((3.0, t.find("a")), (9.0, t.find("e")))] t._set_max_distance() self.assertEqual([n.MaxDistTips for n in t.postorder()], exp) def test_shear(self): """Shear the nodes""" t = TreeNode.read(["((H:1,G:1):2,(R:0.5,M:0.7):3);"]) obs = str(t.shear(["G", "M"])) exp = "(G:3.0,M:3.7);\n" self.assertEqual(obs, exp) def test_compare_tip_distances(self): t = TreeNode.read(["((H:1,G:1):2,(R:0.5,M:0.7):3);"]) t2 = TreeNode.read(["(((H:1,G:1,O:1):2,R:3):1,X:4);"]) obs = t.compare_tip_distances(t2) # note: common taxa are H, G, R (only) m1 = np.array([[0, 2, 6.5], [2, 0, 6.5], [6.5, 6.5, 0]]) m2 = np.array([[0, 2, 6], [2, 0, 6], [6, 6, 0]]) r = pearsonr(m1.flat, m2.flat)[0] self.assertAlmostEqual(obs, (1 - r) / 2) def test_compare_tip_distances_sample(self): t = TreeNode.read(["((H:1,G:1):2,(R:0.5,M:0.7):3);"]) t2 = TreeNode.read(["(((H:1,G:1,O:1):2,R:3):1,X:4);"]) obs = t.compare_tip_distances(t2, sample=3, shuffle_f=sorted) # note: common taxa are H, G, R (only) m1 = np.array([[0, 2, 6.5], [2, 0, 6.5], [6.5, 6.5, 0]]) m2 = np.array([[0, 2, 6], [2, 0, 6], [6, 6, 0]]) r = pearsonr(m1.flat, m2.flat)[0] self.assertAlmostEqual(obs, (1 - r) / 2) # 4 common taxa, still picking H, G, R t = TreeNode.read(["((H:1,G:1):2,(R:0.5,M:0.7,Q:5):3);"]) t3 = TreeNode.read(["(((H:1,G:1,O:1):2,R:3,Q:10):1,X:4);"]) obs = t.compare_tip_distances(t3, sample=3, shuffle_f=sorted) def test_compare_tip_distances_no_common_tips(self): t = TreeNode.read(["((H:1,G:1):2,(R:0.5,M:0.7):3);"]) t2 = TreeNode.read(["(((Z:1,Y:1,X:1):2,W:3):1,V:4);"]) with self.assertRaises(ValueError): t.compare_tip_distances(t2) def test_compare_tip_distances_single_common_tip(self): t = TreeNode.read(["((H:1,G:1):2,(R:0.5,M:0.7):3);"]) t2 = TreeNode.read(["(((R:1,Y:1,X:1):2,W:3):1,V:4);"]) self.assertEqual(t.compare_tip_distances(t2), 1) self.assertEqual(t2.compare_tip_distances(t), 1) def test_tip_tip_distances_endpoints(self): """Test getting specifc tip distances with tipToTipDistances""" t = TreeNode.read(["((H:1,G:1):2,(R:0.5,M:0.7):3);"]) nodes = [t.find("H"), t.find("G"), t.find("M")] names = ["H", "G", "M"] exp = DistanceMatrix(np.array([[0, 2.0, 6.7], [2.0, 0, 6.7], [6.7, 6.7, 0.0]]), ["H", "G", "M"]) obs = t.tip_tip_distances(endpoints=names) self.assertEqual(obs, exp) obs = t.tip_tip_distances(endpoints=nodes) self.assertEqual(obs, exp) def test_tip_tip_distances_non_tip_endpoints(self): t = TreeNode.read(["((H:1,G:1)foo:2,(R:0.5,M:0.7):3);"]) with self.assertRaises(ValueError): t.tip_tip_distances(endpoints=["foo"]) def test_tip_tip_distances_no_length(self): t = TreeNode.read(["((a,b)c,(d,e)f);"]) exp_t = TreeNode.read(["((a:0,b:0)c:0,(d:0,e:0)f:0);"]) exp_t_dm = exp_t.tip_tip_distances() t_dm = npt.assert_warns(RepresentationWarning, t.tip_tip_distances) self.assertEqual(t_dm, exp_t_dm) for node in t.preorder(): self.assertIs(node.length, None) def test_tip_tip_distances_missing_length(self): t = TreeNode.read(["((a,b:6)c:4,(d,e:0)f);"]) exp_t = TreeNode.read(["((a:0,b:6)c:4,(d:0,e:0)f:0);"]) exp_t_dm = exp_t.tip_tip_distances() t_dm = npt.assert_warns(RepresentationWarning, t.tip_tip_distances) self.assertEqual(t_dm, exp_t_dm) def test_neighbors(self): """Get neighbors of a node""" t = TreeNode.read(["((a,b)c,(d,e)f);"]) exp = t.children obs = t.neighbors() self.assertEqual(obs, exp) exp = t.children[0].children + [t] obs = t.children[0].neighbors() self.assertEqual(obs, exp) exp = [t.children[0].children[0]] + [t] obs = t.children[0].neighbors(ignore=t.children[0].children[1]) self.assertEqual(obs, exp) exp = [t.children[0]] obs = t.children[0].children[0].neighbors() self.assertEqual(obs, exp) def test_has_children(self): """Test if has children""" t = TreeNode.read(["((a,b)c,(d,e)f);"]) self.assertTrue(t.has_children()) self.assertTrue(t.children[0].has_children()) self.assertTrue(t.children[1].has_children()) self.assertFalse(t.children[0].children[0].has_children()) self.assertFalse(t.children[0].children[1].has_children()) self.assertFalse(t.children[1].children[0].has_children()) self.assertFalse(t.children[1].children[1].has_children()) def test_tips(self): """Tip traversal of tree""" exp = ["a", "b", "c", "d"] obs = [n.name for n in self.simple_t.tips()] self.assertEqual(obs, exp) obs2 = [n.name for n in self.simple_t.traverse(False, False)] self.assertEqual(obs2, exp) def test_tips_self(self): """ See issue #1509 """ tree = TreeNode.read(["(c,(b,a)x)y;"]) ts = list(tree.find("c").tips(include_self=True)) self.assertEqual(len(ts), 1) t = ts[0] self.assertEqual(t.name, "c") self.assertTrue(t.is_tip()) def test_pre_and_postorder(self): """Pre and post order traversal of the tree""" exp = ["root", "i1", "a", "b", "i1", "i2", "c", "d", "i2", "root"] obs = [n.name for n in self.simple_t.pre_and_postorder()] self.assertEqual(obs, exp) obs2 = [n.name for n in self.simple_t.traverse(True, True)] self.assertEqual(obs2, exp) def test_pre_and_postorder_no_children(self): t = TreeNode("brofist") # include self exp = ["brofist"] obs = [n.name for n in t.pre_and_postorder()] self.assertEqual(obs, exp) # do not include self obs = list(t.pre_and_postorder(include_self=False)) self.assertEqual(obs, []) def test_levelorder(self): """Test level order traversal of the tree""" exp = ["root", "i1", "i2", "a", "b", "c", "d"] obs = [n.name for n in self.simple_t.levelorder()] self.assertEqual(obs, exp) def test_bifurcate(self): t1 = TreeNode.read(["(((a,b),c),(d,e));"]) t2 = TreeNode.read(["((a,b,c));"]) t3 = t2.copy() t1.bifurcate() t2.bifurcate() t3.bifurcate(insert_length=0) self.assertEqual(str(t1), "(((a,b),c),(d,e));\n") self.assertEqual(str(t2), "((c,(a,b)));\n") self.assertEqual(str(t3), "((c,(a,b):0));\n") def test_bifurcate_with_subclass(self): tree = TreeNodeSubclass() tree.append(TreeNodeSubclass()) tree.append(TreeNodeSubclass()) tree.append(TreeNodeSubclass()) tree.append(TreeNodeSubclass()) tree.bifurcate() for node in tree.traverse(): self.assertIs(type(node), TreeNodeSubclass) def test_index_tree_single_node(self): """index_tree handles single node tree""" t1 = TreeNode.read(["root;"]) id_index, child_index = t1.index_tree() self.assertEqual(id_index[0], t1) npt.assert_equal(child_index, np.array([[]])) def test_index_tree(self): """index_tree should produce correct index and node map""" # test for first tree: contains singleton outgroup t1 = TreeNode.read(["(((a,b),c),(d,e));"]) t2 = TreeNode.read(["(((a,b),(c,d)),(e,f));"]) t3 = TreeNode.read(["(((a,b,c),(d)),(e,f));"]) id_1, child_1 = t1.index_tree() nodes_1 = [n.id for n in t1.traverse(self_before=False, self_after=True)] self.assertEqual(nodes_1, [0, 1, 2, 3, 6, 4, 5, 7, 8]) npt.assert_equal(child_1, np.array([[2, 0, 1], [6, 2, 3], [7, 4, 5], [8, 6, 7]])) # test for second tree: strictly bifurcating id_2, child_2 = t2.index_tree() nodes_2 = [n.id for n in t2.traverse(self_before=False, self_after=True)] self.assertEqual(nodes_2, [0, 1, 4, 2, 3, 5, 8, 6, 7, 9, 10]) npt.assert_equal(child_2, np.array([[4, 0, 1], [5, 2, 3], [8, 4, 5], [9, 6, 7], [10, 8, 9]])) # test for third tree: contains trifurcation and single-child parent id_3, child_3 = t3.index_tree() nodes_3 = [n.id for n in t3.traverse(self_before=False, self_after=True)] self.assertEqual(nodes_3, [0, 1, 2, 4, 3, 5, 8, 6, 7, 9, 10]) npt.assert_equal(child_3, np.array([[4, 0, 2], [5, 3, 3], [8, 4, 5], [9, 6, 7], [10, 8, 9]])) def test_unroot(self): """Convert a rooted tree into unrooted.""" # default behavior t = TreeNode.read(["((a,b)c,(d,e)f)g;"]) t.unroot() exp = "(a,b,(d,e)f)c;\n" self.assertEqual(str(t), exp) # choose the other side t = TreeNode.read(["((a,b)c,(d,e)f)g;"]) t.unroot(side=1) exp = "((a,b)c,d,e)f;\n" self.assertEqual(str(t), exp) # with branch lengths t = TreeNode.read(["((a:2.0,b:1.5)c:0.5,(d:1.0,e:1.2)f:0.3)g;"]) t.unroot() exp = "(a:2.0,b:1.5,(d:1.0,e:1.2)f:0.8)c;\n" self.assertEqual(str(t), exp) # other child has no branch length t = TreeNode.read(["((a,b)c:1.0,(d,e)f)g;"]) t.unroot() exp = "(a,b,(d,e)f:1.0)c;\n" self.assertEqual(str(t), exp) # first child is a tip t = TreeNode.read(["(a,(b,c)d)e;"]) t.unroot() exp = "(a,b,c)d;\n" self.assertEqual(str(t), exp) # both children are tips t = TreeNode.read(["(a,b)c;"]) t.unroot() exp = "(b)a;\n" self.assertEqual(str(t), exp) # tree is already unrooted t = TreeNode.read(["(a,b,(d,e)f)c;"]) t.unroot() exp = "(a,b,(d,e)f)c;\n" self.assertEqual(str(t), exp) def test_root_at(self): """Root tree at a given node.""" t = TreeNode.read(["(((a,b)c,(d,e)f)g,h)i;"]) # original behavior (name as branch label); deprecated obs = str(t.root_at("c")) exp = "(a,b,((d,e)f,(h)g)c)root;\n" self.assertEqual(obs, exp) # root at internal node obs = str(t.root_at("c", branch_attrs=[])) exp = "(a,b,((d,e)f,(h)i)g)c;\n" self.assertEqual(obs, exp) # root at self obs = str(t.find("c").root_at(branch_attrs=[])) self.assertEqual(obs, exp) # root at tip (and input node instead of name) obs = str(t.root_at(t.find("h"), branch_attrs=[])) exp = "((((a,b)c,(d,e)f)g)i)h;\n" self.assertEqual(obs, exp) # root at root (no change) obs = str(t.root_at("i", branch_attrs=[])) self.assertEqual(obs, str(t)) def test_root_at_above(self): """Root tree at the branch above a given node.""" # no branch length t = TreeNode.read(["(((a,b)c,(d,e)f)g,h)i;"]) obs = str(t.root_at("c", above=True, branch_attrs=[])) exp = "((a,b)c,((d,e)f,(h)i)g)root;\n" self.assertEqual(obs, exp) # root at midpoint of branch t = TreeNode.read(["(((a,b)c:1.0,(d,e)f)g,h)i;"]) obs = str(t.root_at("c", above=True, branch_attrs=[])) exp = "((a,b)c:0.5,((d,e)f,(h)i)g:0.5)root;\n" self.assertEqual(obs, exp) # root at specific position t = TreeNode.read(["(((a,b)c:1.0,(d,e)f)g,h)i;"]) obs = str(t.root_at("c", above=0.4, branch_attrs=[])) exp = "((a,b)c:0.4,((d,e)f,(h)i)g:0.6)root;\n" self.assertEqual(obs, exp) # with branch support t = TreeNode.read(["(((a,b)'90:c',(d,e)'80:f')g,h)i;"]) t.assign_supports() obs = str(t.root_at("c", above=True, branch_attrs=[])) exp = "((a,b)'90:c',((d,e)'80:f',(h)i)'90:g')root;\n" self.assertEqual(obs, exp) def test_root_at_reset(self): """Root tree while resetting original root.""" t = TreeNode.read(["(((a,b)c,(d,e)f)g,h)i;"]) # unroot tree prior to rerooting obs = str(t.root_at("c", reset=True, branch_attrs=[])) exp = "(a,b,((d,e)f,h)g)c;\n" self.assertEqual(obs, exp) # root at a basal node (which will be avoided during unrooting) obs = str(t.root_at("g", reset=True, branch_attrs=[])) exp = "((a,b)c,(d,e)f,h)g;\n" self.assertEqual(obs, exp) # tree is already unrooted t = TreeNode.read(["((a,b)c,d,e)f;"]) obs = str(t.root_at("c", branch_attrs=[], reset=True)) exp = str(t.root_at("c", branch_attrs=[])) self.assertEqual(obs, exp) def test_root_at_midpoint(self): """Root tree at the midpoint""" t = self.TreeRoot for n in t.traverse(): n.length = 1 # g and h are farthest apart, by 5, therefore root should be # 2.5 away from h, i.e., midpoint between b and c result = t.root_at_midpoint() self.assertEqual(result.distance(result.find("e")), 1.5) self.assertEqual(result.distance(result.find("g")), 2.5) exp_dist = t.tip_tip_distances() obs_dist = result.tip_tip_distances() self.assertEqual(obs_dist, exp_dist) def test_root_at_midpoint_no_lengths(self): # should get same tree back (a copy) nwk = "(a,b)c;\n" t = TreeNode.read([nwk]) obs = t.root_at_midpoint() self.assertEqual(str(obs), nwk) def test_root_at_midpoint_tie(self): # original behavior (name as branch label); deprecated t = TreeNode.read(["(((a:1,b:1)c:2,(d:3,e:4)f:5),g:1)root;"]) obs = t.root_at_midpoint() exp = TreeNode.read(["((d:3,e:4)f:2,((a:1,b:1)c:2,(g:1)):3)root;"]) for o, e in zip(obs.traverse(), exp.traverse()): self.assertEqual(o.name, e.name) self.assertEqual(o.length, e.length) t = TreeNode.read(["((a:1,b:1)c:2,(d:3,e:4)f:5,g:1)h;"]) # farthest tip-to-tip distance is 12 (a or b to e) # therefore new root should be 2 above f obs = t.root_at_midpoint(branch_attrs=[]) exp = TreeNode.read(["((d:3,e:4)f:2,((a:1,b:1)c:2,g:1)h:3)root;"]) for o, e in zip(obs.traverse(), exp.traverse()): self.assertEqual(o.name, e.name) self.assertEqual(o.length, e.length) # no root name obs = t.root_at_midpoint(branch_attrs=[], root_name=None) self.assertIsNone(obs.name) # with branch support t = TreeNode.read(["((a:1,b:1)c:2,(d:3,e:4)'80:f':5,g:1)h;"]) t.assign_supports() obs = t.root_at_midpoint(branch_attrs=[]) exp = TreeNode.read(["((d:3,e:4)'80:f':2,((a:1,b:1)c:2,g:1)'80:h':3)root;"]) exp.assign_supports() for o, e in zip(obs.traverse(), exp.traverse()): self.assertEqual(o.name, e.name) self.assertEqual(o.length, e.length) self.assertEqual(o.support, e.support) def test_root_at_midpoint_node(self): t = TreeNode.read(["(((a:2,b:3)c:1,d:1)e:1,f:3)g;"]) # farthest tip-to-tip distance is 8 (b - c - e - f) # therefore new root should be at e obs = t.root_at_midpoint(branch_attrs=[]) exp = TreeNode.read(["((a:2.0,b:3.0)c:1.0,d:1.0,(f:3.0)g:1.0)e;"]) for o, e in zip(obs.traverse(), exp.traverse()): self.assertEqual(o.name, e.name) self.assertEqual(o.length, e.length) # remove original root obs = t.root_at_midpoint(branch_attrs=[], reset=True) exp = TreeNode.read(["((a:2.0,b:3.0)c:1.0,d:1.0,f:4.0)e;"]) for o, e in zip(obs.traverse(), exp.traverse()): self.assertEqual(o.name, e.name) self.assertEqual(o.length, e.length) def test_root_by_outgroup(self): tree = TreeNode.read(["((((a,b),(c,d)),(e,f)),g);"]) # outgroup is monophyletic obs = str(tree.root_by_outgroup(["a", "b"])) exp = "((a,b),((c,d),((e,f),g)));\n" self.assertEqual(obs, exp) # outgroup is monophyletic after rotating obs = str(tree.root_by_outgroup(["e", "f", "g"])) exp = "(((e,f),g),((c,d),(b,a)));\n" self.assertEqual(obs, exp) # outgroup is a single taxon obs = str(tree.root_by_outgroup(["a"])) exp = "(a,(b,((c,d),((e,f),g))));\n" self.assertEqual(obs, exp) # outgroup is not monophyletic msg = "Outgroup is not monophyletic in the tree." with self.assertRaisesRegex(TreeError, msg): tree.root_by_outgroup(["a", "c"]) # outgroup has extra taxa msg = "Outgroup is not a proper subset of taxa in the tree." with self.assertRaisesRegex(TreeError, msg): tree.root_by_outgroup(["a", "b", "x"]) # outgroup is not in tree with self.assertRaisesRegex(TreeError, msg): tree.root_by_outgroup(["x", "y"]) # outgroup is the whole tree with self.assertRaisesRegex(TreeError, msg): tree.root_by_outgroup("abcdefg") # generate unrooted tree obs = str(tree.root_by_outgroup(["a", "b"], above=False)) exp = "(a,b,((c,d),((e,f),g)));\n" self.assertEqual(obs, exp) # keep old root node obs = str(tree.root_by_outgroup(["a", "b"], reset=False)) exp = "((a,b),((c,d),((e,f),(g))));\n" self.assertEqual(obs, exp) # specify root name obs = str(tree.root_by_outgroup(["a", "b"], root_name="root")) exp = "((a,b),((c,d),((e,f),g)))root;\n" self.assertEqual(obs, exp) # transfer branch support tree = TreeNode.read(["((((a,b)80,(c,d)),(e,f)),g);"]) tree.assign_supports() obs = str(tree.root_by_outgroup(["a", "b"])) exp = "((a,b)80,((c,d),((e,f),g))80);\n" self.assertEqual(obs, exp) # transfer custom branch attribute tree = TreeNode.read(["((((a,b),(c,d))x,(e,f)),g);"]) obs = str(tree.root_by_outgroup(["a", "b"], branch_attrs=["name"])) exp = "((a,b),((c,d),((e,f),g)x));\n" self.assertEqual(obs, exp) def test_compare_subsets(self): """compare_subsets should return the fraction of shared subsets""" t = TreeNode.read(["((H,G),(R,M));"]) t2 = TreeNode.read(["(((H,G),R),M);"]) t4 = TreeNode.read(["(((H,G),(O,R)),X);"]) result = t.compare_subsets(t) self.assertEqual(result, 0) result = t2.compare_subsets(t2) self.assertEqual(result, 0) result = t.compare_subsets(t2) self.assertEqual(result, 0.5) result = t.compare_subsets(t4) self.assertEqual(result, 1 - 2. / 5) result = t.compare_subsets(t4, exclude_absent_taxa=True) self.assertEqual(result, 1 - 2. / 3) result = t.compare_subsets(self.TreeRoot, exclude_absent_taxa=True) self.assertEqual(result, 1) result = t.compare_subsets(self.TreeRoot) self.assertEqual(result, 1) def test_compare_rfd(self): """compare_rfd should return the Robinson Foulds distance""" t = TreeNode.read(["((H,G),(R,M));"]) t2 = TreeNode.read(["(((H,G),R),M);"]) t4 = TreeNode.read(["(((H,G),(O,R)),X);"]) obs = t.compare_rfd(t2) exp = 2.0 self.assertEqual(obs, exp) self.assertEqual(t.compare_rfd(t2), t2.compare_rfd(t)) obs = t.compare_rfd(t2, proportion=True) exp = 0.5 self.assertEqual(obs, exp) with self.assertRaises(ValueError): t.compare_rfd(t4) def test_assign_ids(self): """Assign IDs to the tree""" t1 = TreeNode.read(["(((a,b),c),(e,f),(g));"]) t2 = TreeNode.read(["(((a,b),c),(e,f),(g));"]) t3 = TreeNode.read(["((g),(e,f),(c,(a,b)));"]) t1_copy = t1.copy() t1.assign_ids() t2.assign_ids() t3.assign_ids() t1_copy.assign_ids() self.assertEqual([(n.name, n.id) for n in t1.traverse()], [(n.name, n.id) for n in t2.traverse()]) self.assertEqual([(n.name, n.id) for n in t1.traverse()], [(n.name, n.id) for n in t1_copy.traverse()]) self.assertNotEqual([(n.name, n.id) for n in t1.traverse()], [(n.name, n.id) for n in t3.traverse()]) def test_assign_ids_index_tree(self): """assign_ids and index_tree should assign the same IDs""" t1 = TreeNode.read(["(((a,b),c),(d,e));"]) t2 = TreeNode.read(["(((a,b),(c,d)),(e,f));"]) t3 = TreeNode.read(["(((a,b,c),(d)),(e,f));"]) t1_copy = t1.copy() t2_copy = t2.copy() t3_copy = t3.copy() t1.assign_ids() t1_copy.index_tree() t2.assign_ids() t2_copy.index_tree() t3.assign_ids() t3_copy.index_tree() self.assertEqual([n.id for n in t1.traverse()], [n.id for n in t1_copy.traverse()]) self.assertEqual([n.id for n in t2.traverse()], [n.id for n in t2_copy.traverse()]) self.assertEqual([n.id for n in t3.traverse()], [n.id for n in t3_copy.traverse()]) def test_unrooted_copy(self): tree = TreeNode.read(["((a,(b,c)d)e,(f,g)h)i;"]) node = tree.find("d") # name as branch label (default behavior, but will change in the # future) obs = node.unrooted_copy() exp = "(b,c,(a,((f,g)h)e)d)root;\n" self.assertEqual(str(obs), exp) # name as node label obs = node.unrooted_copy(branch_attrs={"length"}) exp = "(b,c,(a,((f,g)h)i)e)d;\n" self.assertEqual(str(obs), exp) # name the new root node (only when it doesn't have one) obs = node.unrooted_copy(root_name="hello") exp = "(b,c,(a,((f,g)h)e)d)hello;\n" self.assertEqual(str(obs), exp) obs = node.unrooted_copy(branch_attrs={"length"}, root_name="hello") exp = "(b,c,(a,((f,g)h)i)e)d;\n" self.assertEqual(str(obs), exp) # transfer branch support to opposite node tree = TreeNode.read(["((a,b)90,(c,d)90);"]) node = tree.find("a") obs = node.unrooted_copy(branch_attrs={"support", "length"}) exp = "((b,((c,d)90))90)a;\n" self.assertEqual(str(obs), exp) tree.assign_supports() obs = node.unrooted_copy(branch_attrs={"support", "length"}) exp = "((b,((c,d)90)90))a;\n" self.assertEqual(str(obs), exp) # retain custom attributes tree = TreeNode.read(["(((a,b)c,d)e,f)g;"]) tree.find("c").dummy = "this" tree.find("e").dummy = "that" obs = tree.find("c").unrooted_copy(branch_attrs={"length"}) exp = "(a,b,(d,(f)g)e)c;\n" self.assertEqual(str(obs), exp) self.assertEqual(obs.dummy, "this") self.assertEqual(obs.find("e").dummy, "that") self.assertIsNone(getattr(obs.find("d"), "dummy", None)) # deep vs shallow copy tree = TreeNode.read(["(((a,b)c,d)e,f)g;"]) tree.find("c").dummy = [1, [2, 3], 4] tcopy = tree.unrooted_copy(deep=True) tcopy.find("c").dummy[1].append(0) self.assertListEqual(tree.find("c").dummy[1], [2, 3]) tcopy = tree.unrooted_copy() tcopy.find("c").dummy[1].append(0) self.assertListEqual(tree.find("c").dummy[1], [2, 3, 0]) def test_unrooted_deepcopy(self): t = TreeNode.read(["((a,(b,c)d)e,(f,g)h)i;"]) exp = "(b,c,(a,((f,g)h)e)d)root;\n" obs = t.find("d").unrooted_deepcopy() self.assertEqual(str(obs), exp) t_ids = {id(n) for n in t.traverse()} obs_ids = {id(n) for n in obs.traverse()} self.assertEqual(t_ids.intersection(obs_ids), set()) def test_unrooted_move(self): t = TreeNode.read(["(((a:1,b:1)c:1,(d:1,e:1)f:2)g:0.5,(h:1,i:1)j:0.5)k;"]) tcopy = t.copy() obs = tcopy.find("c") obs.unrooted_move() exp = TreeNode.read(["(a:1,b:1,((d:1,e:1)f:2,((h:1,i:1)j:0.5)k:0.5)g:1)c;"]) self.assertTrue(obs.is_root()) for o, e in zip(obs.traverse(), exp.traverse()): self.assertEqual(o.name, e.name) self.assertEqual(o.length, e.length) def test_descending_branch_length_bug_1847(self): tr = TreeNode.read([ "(((A:.1,B:1.2)C:.6,(D:.9,E:.6)F:.9)G:2.4,(H:.4,I:.5)J:1.3)K;"]) tr.length = 1 tdbl = tr.descending_branch_length() npt.assert_almost_equal(tdbl, 8.9) def test_descending_branch_length(self): """Calculate descending branch_length""" tr = TreeNode.read([ "(((A:.1,B:1.2)C:.6,(D:.9,E:.6)F:.9)G:2.4,(H:.4,I:.5)J:1.3)K;"]) tdbl = tr.descending_branch_length() sdbl = tr.descending_branch_length(["A", "E"]) npt.assert_almost_equal(tdbl, 8.9) npt.assert_almost_equal(sdbl, 2.2) self.assertRaises(ValueError, tr.descending_branch_length, ["A", "DNE"]) self.assertRaises(ValueError, tr.descending_branch_length, ["A", "C"]) tr = TreeNode.read([ "(((A,B:1.2)C:.6,(D:.9,E:.6)F:.9)G:2.4,(H:.4,I:.5)J:1.3)K;"]) tdbl = tr.descending_branch_length() npt.assert_almost_equal(tdbl, 8.8) tr = TreeNode.read([ "(((A,B:1.2)C:.6,(D:.9,E:.6)F)G:2.4,(H:.4,I:.5)J:1.3)K;"]) tdbl = tr.descending_branch_length() npt.assert_almost_equal(tdbl, 7.9) tr = TreeNode.read([ "(((A,B:1.2)C:.6,(D:.9,E:.6)F)G:2.4,(H:.4,I:.5)J:1.3)K;"]) tdbl = tr.descending_branch_length(["A", "D", "E"]) npt.assert_almost_equal(tdbl, 2.1) tr = TreeNode.read([ "(((A,B:1.2)C:.6,(D:.9,E:.6)F:.9)G:2.4,(H:.4,I:.5)J:1.3)K;"]) tdbl = tr.descending_branch_length(["I", "D", "E"]) npt.assert_almost_equal(tdbl, 6.6) # test with a situation where we have unnamed internal nodes tr = TreeNode.read([ "(((A,B:1.2):.6,(D:.9,E:.6)F):2.4,(H:.4,I:.5)J:1.3);"]) tdbl = tr.descending_branch_length() npt.assert_almost_equal(tdbl, 7.9) def test_to_array(self): """Convert a tree to arrays""" t = TreeNode.read([ "(((a:1,b:2,c:3)x:4,(d:5)y:6)z:7,(e:8,f:9)z:10);"]) id_index, child_index = t.index_tree() arrayed = t.to_array() self.assertEqual(id_index, arrayed["id_index"]) npt.assert_equal(child_index, arrayed["child_index"]) exp = np.array([1, 2, 3, 5, 4, 6, 8, 9, 7, 10, np.nan]) obs = arrayed["length"] npt.assert_equal(obs, exp) exp = np.array(["a", "b", "c", "d", "x", "y", "e", "f", "z", "z", None]) obs = arrayed["name"] npt.assert_equal(obs, exp) exp = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) obs = arrayed["id"] npt.assert_equal(obs, exp) def test_to_array_attrs(self): t = TreeNode.read([ "(((a:1,b:2,c:3)x:4,(d:5)y:6)z:7,(e:8,f:9)z:10);"]) id_index, child_index = t.index_tree() arrayed = t.to_array(attrs=[("name", object)]) # should only have id_index, child_index, and name since we specified # attrs self.assertEqual(len(arrayed), 3) self.assertEqual(id_index, arrayed["id_index"]) npt.assert_equal(child_index, arrayed["child_index"]) exp = np.array(["a", "b", "c", "d", "x", "y", "e", "f", "z", "z", None]) obs = arrayed["name"] npt.assert_equal(obs, exp) # invalid attrs with self.assertRaises(AttributeError): t.to_array(attrs=[("name", object), ("brofist", int)]) def test_to_array_nan_length_value(self): t = TreeNode.read(["((a:1, b:2)c:3)root;"]) indexed = t.to_array(nan_length_value=None) npt.assert_equal(indexed["length"], np.array([1, 2, 3, np.nan], dtype=float)) indexed = t.to_array(nan_length_value=0.0) npt.assert_equal(indexed["length"], np.array([1, 2, 3, 0.0], dtype=float)) indexed = t.to_array(nan_length_value=42.0) npt.assert_equal(indexed["length"], np.array([1, 2, 3, 42.0], dtype=float)) t = TreeNode.read(["((a:1, b:2)c:3)root:4;"]) indexed = t.to_array(nan_length_value=42.0) npt.assert_equal(indexed["length"], np.array([1, 2, 3, 4], dtype=float)) t = TreeNode.read(["((a:1, b:2)c)root;"]) indexed = t.to_array(nan_length_value=42.0) npt.assert_equal(indexed["length"], np.array([1, 2, 42.0, 42.0], dtype=float)) def test_from_taxonomy(self): lineages = [("1", ["a", "b", "c", "d", "e", "f", "g"]), ("2", ["a", "b", "c", None, None, "x", "y"]), ("3", ["h", "i", "j", "k", "l", "m", "n"]), ("4", ["h", "i", "j", "k", "l", "m", "q"]), ("5", ["h", "i", "j", "k", "l", "m", "n"])] exp = TreeNode.read([ "((((((((1)g)f)e)d,((((2)y)x)))c)b)a," "(((((((3,5)n,(4)q)m)l)k)j)i)h);"]) # input as 2-element tuples obs = TreeNode.from_taxonomy(lineages) self.assertIs(type(obs), TreeNode) self.assertEqual(obs.compare_subsets(exp), 0.0) obs = TreeNodeSubclass.from_taxonomy(lineages) self.assertIs(type(obs), TreeNodeSubclass) # input as dictionary dict_ = dict(lineages) obs = TreeNode.from_taxonomy(dict_) self.assertEqual(obs.compare_subsets(exp), 0.0) # input as data frame df_ = pd.DataFrame([x[1] for x in lineages], [x[0] for x in lineages]) obs = TreeNode.from_taxonomy(df_) self.assertEqual(obs.compare_subsets(exp), 0.0) def test_to_taxonomy(self): input_lineages = {"1": ["a", "b", "c", "d", "e", "f", "g"], "2": ["a", "b", "c", None, None, "x", "y"], "3": ["h", "i", "j", "k", "l", "m", "n"], "4": ["h", "i", "j", "k", "l", "m", "q"], "5": ["h", "i", "j", "k", "l", "m", "n"]} tree = TreeNode.from_taxonomy(input_lineages.items()) exp = sorted(input_lineages.items()) obs = [(n.name, lin) for n, lin in tree.to_taxonomy(allow_empty=True)] self.assertEqual(sorted(obs), exp) def test_to_taxonomy_filter(self): input_lineages = {"1": ["a", "b", "c", "d", "e", "f", "g"], "2": ["a", "b", "c", None, None, "x", "y"], "3": ["h", "i", "j", "k", "l"], # test jagged "4": ["h", "i", "j", "k", "l", "m", "q"], "5": ["h", "i", "j", "k", "l", "m", "n"]} tree = TreeNode.from_taxonomy(input_lineages.items()) def f(node, lin): return "k" in lin or "x" in lin exp = [("2", ["a", "b", "c", "x", "y"]), ("3", ["h", "i", "j", "k", "l"]), ("4", ["h", "i", "j", "k", "l", "m", "q"]), ("5", ["h", "i", "j", "k", "l", "m", "n"])] obs = [(n.name, lin) for n, lin in tree.to_taxonomy(filter_f=f)] self.assertEqual(sorted(obs), exp) def test_linkage_matrix(self): # Ensure matches: http://www.southampton.ac.uk/~re1u06/teaching/upgma/ id_list = ["A", "B", "C", "D", "E", "F", "G"] linkage = np.asarray([[1.0, 5.0, 1.0, 2.0], [0.0, 3.0, 8.0, 2.0], [6.0, 7.0, 12.5, 3.0], [8.0, 9.0, 16.5, 5.0], [2.0, 10.0, 29.0, 6.0], [4.0, 11.0, 34.0, 7.0]]) tree = TreeNode.from_linkage_matrix(linkage, id_list) self.assertIs(type(tree), TreeNode) self.assertEqual("(E:17.0,(C:14.5,((A:4.0,D:4.0):4.25,(G:6.25,(B:0.5," "F:0.5):5.75):2.0):6.25):2.5);\n", str(tree)) tree = TreeNodeSubclass.from_linkage_matrix(linkage, id_list) self.assertIs(type(tree), TreeNodeSubclass) def test_shuffle_invalid_iter(self): shuffler = self.simple_t.shuffle(n=-1) with self.assertRaises(ValueError): next(shuffler) def test_shuffle_n_2(self): exp = ["((a,b)i1,(d,c)i2)root;\n", "((a,b)i1,(c,d)i2)root;\n", "((a,b)i1,(d,c)i2)root;\n", "((a,b)i1,(c,d)i2)root;\n", "((a,b)i1,(d,c)i2)root;\n"] obs_g = self.simple_t.shuffle(k=2, shuffle_f=self.rev_f, n=np.inf) obs = [str(next(obs_g)) for i in range(5)] self.assertEqual(obs, exp) def test_shuffle_n_none(self): exp = ["((d,c)i1,(b,a)i2)root;\n", "((a,b)i1,(c,d)i2)root;\n", "((d,c)i1,(b,a)i2)root;\n", "((a,b)i1,(c,d)i2)root;\n"] obs_g = self.simple_t.shuffle(shuffle_f=self.rev_f, n=4) obs = [str(next(obs_g)) for i in range(4)] self.assertEqual(obs, exp) def test_shuffle_complex(self): exp = ["(((a,b)int1,(x,y,(w,z)int2,(f,e)int3)int4),(d,c)int5);\n", "(((a,b)int1,(x,y,(w,z)int2,(c,d)int3)int4),(e,f)int5);\n", "(((a,b)int1,(x,y,(w,z)int2,(f,e)int3)int4),(d,c)int5);\n", "(((a,b)int1,(x,y,(w,z)int2,(c,d)int3)int4),(e,f)int5);\n"] obs_g = self.complex_tree.shuffle(shuffle_f=self.rev_f, names=["c", "d", "e", "f"], n=4) obs = [str(next(obs_g)) for i in range(4)] self.assertEqual(obs, exp) def test_shuffle_names(self): exp = ["((c,a)i1,(b,d)i2)root;\n", "((b,c)i1,(a,d)i2)root;\n", "((a,b)i1,(c,d)i2)root;\n", "((c,a)i1,(b,d)i2)root;\n"] obs_g = self.simple_t.shuffle(names=["a", "b", "c"], shuffle_f=self.rotate_f, n=np.inf) obs = [str(next(obs_g)) for i in range(4)] self.assertEqual(obs, exp) def test_shuffle_raises(self): with self.assertRaises(ValueError): next(self.simple_t.shuffle(k=1)) with self.assertRaises(ValueError): next(self.simple_t.shuffle(k=5, names=["a", "b"])) with self.assertRaises(MissingNodeError): next(self.simple_t.shuffle(names=["x", "y"])) def test_assign_supports(self): """Extract support values of internal nodes.""" # test nodes with support values alone as labels tree = TreeNode.read(["((a,b)75,(c,d)90);"]) tree.assign_supports() node1, node2 = tree.children # check if internal nodes are assigned correct support values self.assertEqual(node1.support, 75) self.assertEqual(node2.support, 90) # check if original node names are cleared self.assertIsNone(node1.name) self.assertIsNone(node2.name) # check if support values are not assigned to root and tips self.assertIsNone(tree.support) for taxon in ("a", "b", "c", "d"): self.assertIsNone(tree.find(taxon).support) # test nodes with support values and branch lengths tree = TreeNode.read(["((a,b)0.85:1.23,(c,d)0.95:4.56);"]) tree.assign_supports() node1, node2 = tree.children self.assertEqual(node1.support, 0.85) self.assertEqual(node2.support, 0.95) # test whether integer or float support values can be correctly parsed tree = TreeNode.read(["((a,b)75,(c,d)80.0,(e,f)97.5,(g,h)0.95);"]) tree.assign_supports() node1, node2, node3, node4 = tree.children self.assertTrue(isinstance(node1.support, int)) self.assertEqual(node1.support, 75) self.assertTrue(isinstance(node2.support, float)) self.assertEqual(node2.support, 80.0) self.assertTrue(isinstance(node3.support, float)) self.assertEqual(node3.support, 97.5) self.assertTrue(isinstance(node4.support, float)) self.assertEqual(node4.support, 0.95) # test support values that are negative or scientific notation (not a # common scenario but can happen) tree = TreeNode.read(["((a,b)-1.23,(c,d)1.23e-4);"]) tree.assign_supports() node1, node2 = tree.children self.assertEqual(node1.support, -1.23) self.assertEqual(node2.support, 0.000123) # test nodes with support and extra label tree = TreeNode.read(["((a,b)'80:X',(c,d)'60:Y');"]) tree.assign_supports() node1, node2 = tree.children self.assertEqual(node1.support, 80) self.assertEqual(node1.name, "X") self.assertEqual(node2.support, 60) self.assertEqual(node2.name, "Y") # test nodes without label, with non-numeric label, and with branch # length only tree = TreeNode.read(["((a,b),(c,d)x,(e,f):1.0);"]) tree.assign_supports() for node in tree.children: self.assertIsNone(node.support) def test_unpack(self): """Unpack an internal node.""" # test unpacking a node without branch length tree = TreeNode.read(["((c,d)a,(e,f)b);"]) tree.find("b").unpack() exp = "((c,d)a,e,f);\n" self.assertEqual(str(tree), exp) # test unpacking a node with branch length tree = TreeNode.read(["((c:2.0,d:3.0)a:1.0,(e:2.0,f:1.0)b:2.0);"]) tree.find("b").unpack() exp = "((c:2.0,d:3.0)a:1.0,e:4.0,f:3.0);" self.assertEqual(str(tree).rstrip(), exp) # test attempting to unpack root tree = TreeNode.read(["((d,e)b,(f,g)c)a;"]) msg = "Cannot unpack root." with self.assertRaisesRegex(TreeError, msg): tree.find("a").unpack() # test attempting to unpack tip msg = "Cannot unpack tip." with self.assertRaisesRegex(TreeError, msg): tree.find("d").unpack() def test_unpack_by_func(self): """Unpack internal nodes of a tree by a function.""" # unpack internal nodes with branch length <= 1.0 def func(x): return x.length <= 1.0 # will unpack node "a", but not tip "e" # will add the branch length of "a" to its child nodes "c" and "d" tree = TreeNode.read(["((c:2,d:3)a:1,(e:1,f:2)b:2);"]) tree.unpack_by_func(func) exp = "((e:1.0,f:2.0)b:2.0,c:3.0,d:4.0);" self.assertEqual(str(tree).rstrip(), exp) # unpack internal nodes with branch length < 2.01 # will unpack both "a" and "b" tree = TreeNode.read(["((c:2,d:3)a:1,(e:1,f:2)b:2);"]) tree.unpack_by_func(lambda x: x.length <= 2.0) exp = "(c:3.0,d:4.0,e:3.0,f:4.0);" self.assertEqual(str(tree).rstrip(), exp) # unpack two nested nodes "a" and "c" simultaneously tree = TreeNode.read(["(((e:3,f:2)c:1,d:3)a:1,b:4);"]) tree.unpack_by_func(lambda x: x.length <= 2.0) exp = "(b:4.0,d:4.0,e:5.0,f:4.0);" self.assertEqual(str(tree).rstrip(), exp) # test a complicated scenario (unpacking nodes "g", "h" and "m") def func(x): return x.length < 2.0 tree = TreeNode.read(["(((a:1.04,b:2.32,c:1.44)d:3.20," "(e:3.91,f:2.47)g:1.21)h:1.75," "(i:4.14,(j:2.06,k:1.58)l:3.32)m:0.77);"]) tree.unpack_by_func(func) exp = ("((a:1.04,b:2.32,c:1.44)d:4.95,e:6.87,f:5.43,i:4.91," "(j:2.06,k:1.58)l:4.09);") self.assertEqual(str(tree).rstrip(), exp) # unpack nodes with support < 75 def func(x): return x.support < 75 tree = TreeNode.read(["(((a,b)85,(c,d)78)75,(e,(f,g)64)80);"]) tree.assign_supports() tree.unpack_by_func(func) exp = "(((a,b)85,(c,d)78)75,(e,f,g)80);" self.assertEqual(str(tree).rstrip(), exp) # unpack nodes with support < 85 tree = TreeNode.read(["(((a,b)85,(c,d)78)75,(e,(f,g)64)80);"]) tree.assign_supports() tree.unpack_by_func(lambda x: x.support < 85) exp = "((a,b)85,c,d,e,f,g);" self.assertEqual(str(tree).rstrip(), exp) # unpack nodes with support < 0.95 tree = TreeNode.read(["(((a,b)0.97,(c,d)0.98)1.0,(e,(f,g)0.88)0.96);"]) tree.assign_supports() tree.unpack_by_func(lambda x: x.support < 0.95) exp = "(((a,b)0.97,(c,d)0.98)1.0,(e,f,g)0.96);" self.assertEqual(str(tree).rstrip(), exp) # test a case where there are branch lengths, none support values and # node labels tree = TreeNode.read(["(((a:1.02,b:0.33)85:0.12,(c:0.86,d:2.23)" "70:3.02)75:0.95,(e:1.43,(f:1.69,g:1.92)64:0.20)" "node:0.35)root;"]) tree.assign_supports() tree.unpack_by_func(lambda x: x.support is not None and x.support < 75) exp = ("(((a:1.02,b:0.33)85:0.12,c:3.88,d:5.25)75:0.95," "(e:1.43,f:1.89,g:2.12)node:0.35)root;") self.assertEqual(str(tree).rstrip(), exp) def test_from_taxdump(self): # same example as in skbio.io.format.taxdump nodes = pd.DataFrame([ [1, 1, "no rank"], [2, 131567, "superkingdom"], [543, 91347, "family"], [548, 570, "species"], [561, 543, "genus"], [562, 561, "species"], [570, 543, "genus"], [620, 543, "genus"], [622, 620, "species"], [766, 28211, "order"], [1224, 2, "phylum"], [1236, 1224, "class"], [28211, 1224, "class"], [91347, 1236, "order"], [118884, 1236, "no rank"], [126792, 36549, "species"], [131567, 1, "no rank"], [585056, 562, "no rank"], [1038927, 562, "no rank"], [2580236, 488338, "species"]], columns=["tax_id", "parent_tax_id", "rank"]).set_index("tax_id") names = pd.DataFrame([ [1, "root", np.nan, "scientific name"], [2, "Bacteria", "Bacteria ", "scientific name"], [2, "eubacteria", np.nan, "genbank common name"], [543, "Enterobacteriaceae", np.nan, "scientific name"], [548, "Klebsiella aerogenes", np.nan, "scientific name"], [561, "Escherichia", np.nan, "scientific name"], [562, "\"Bacillus coli\" Migula 1895", np.nan, "authority"], [562, "Escherichia coli", np.nan, "scientific name"], [562, "Escherichia/Shigella coli", np.nan, "equivalent name"], [570, "Donovania", np.nan, "synonym"], [570, "Klebsiella", np.nan, "scientific name"], [620, "Shigella", np.nan, "scientific name"], [622, "Shigella dysenteriae", np.nan, "scientific name"], [766, "Rickettsiales", np.nan, "scientific name"], [1224, "Proteobacteria", np.nan, "scientific name"], [1236, "Gammaproteobacteria", np.nan, "scientific name"], [28211, "Alphaproteobacteria", np.nan, "scientific name"], [91347, "Enterobacterales", np.nan, "scientific name"], [118884, "unclassified Gammaproteobacteria", np.nan, "scientific name"], [126792, "Plasmid pPY113", np.nan, "scientific name"], [131567, "cellular organisms", np.nan, "scientific name"], [585056, "Escherichia coli UMN026", np.nan, "scientific name"], [1038927, "Escherichia coli O104:H4", np.nan, "scientific name"], [2580236, "synthetic Escherichia coli Syn61", np.nan, "scientific name"]], columns=["tax_id", "name_txt", "unique_name", "name_class"]).set_index("tax_id") # nodes without names (use tax_id as name) obs = TreeNode.from_taxdump(nodes) exp = ("(((((((((585056,1038927)562)561,(548)570,(622)620)543)91347," "118884)1236,(766)28211)1224)2)131567)1;") self.assertEqual(str(obs).rstrip(), exp) self.assertEqual(obs.count(), 18) self.assertEqual(obs.count(tips=True), 6) # default scenario (nodes and names) obs = TreeNode.from_taxdump(nodes, names) # check tree is in same size self.assertEqual(obs.count(), 18) self.assertEqual(obs.count(tips=True), 6) # check id, name and rank are correctly set at root self.assertEqual(obs.id, 1) self.assertEqual(obs.name, "root") self.assertEqual(obs.rank, "no rank") # check an internal node node = obs.find("Enterobacteriaceae") self.assertEqual(node.id, 543) self.assertEqual(node.rank, "family") # check its children (which should preserve input order) self.assertEqual(len(node.children), 3) self.assertListEqual([x.name for x in node.children], [ "Escherichia", "Klebsiella", "Shigella"]) # check that non-scientific name isn"t used with self.assertRaises(MissingNodeError): obs.find("Donovania") # name as a dictionary names = names[names["name_class"] == "scientific name"][ "name_txt"].to_dict() obs = TreeNode.from_taxdump(nodes, names) self.assertEqual(obs.count(), 18) self.assertEqual(obs.name, "root") self.assertEqual(obs.find("Enterobacteriaceae").id, 543) # nodes has no top level nodes = pd.DataFrame([ [2, 1, "A"], [3, 2, "B"], [1, 3, "C"]], columns=["tax_id", "parent_tax_id", "rank"]).set_index("tax_id") with self.assertRaises(ValueError) as ctx: TreeNode.from_taxdump(nodes) self.assertEqual(str(ctx.exception), "There is no top-level node.") # nodes has more than one top level nodes = pd.DataFrame([ [1, 1, "A"], [2, 2, "B"], [3, 3, "C"]], columns=["tax_id", "parent_tax_id", "rank"]).set_index("tax_id") with self.assertRaises(ValueError) as ctx: TreeNode.from_taxdump(nodes) self.assertEqual(str( ctx.exception), "There are more than one top-level node.") sample = """ ( ( xyz:0.28124, ( def:0.24498, mno:0.03627) :0.17710) :0.04870, abc:0.05925, ( ghi:0.06914, jkl:0.13776) :0.09853); """ node_data_sample = """ ( ( xyz:0.28124, ( def:0.24498, mno:0.03627) 'A':0.17710) B:0.04870, abc:0.05925, ( ghi:0.06914, jkl:0.13776) C:0.09853); """ minimal = "();" no_names = "((,),(,));" missing_tip_name = "((a,b),(c,));" empty = "();" single = "(abc:3);" double = "(abc:3, def:4);" onenest = "(abc:3, (def:4, ghi:5):6 );" nodedata = "(abc:3, (def:4, ghi:5)jkl:6 );" exp_ascii_art_three_children = r""" /-a | ---------| /-b | | \--------|--c | \-d""" if __name__ == "__main__": main() scikit-bio-0.6.2/skbio/util/000077500000000000000000000000001464262511300156445ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/util/__init__.py000066400000000000000000000036231464262511300177610ustar00rootroot00000000000000r"""Utilities for Developers (:mod:`skbio.util`) ============================================ .. currentmodule:: skbio.util This package provides general exception/warning definitions used throughout scikit-bio, as well as various utility functionality, including I/O and unit-testing convenience functions. Testing functionality ^^^^^^^^^^^^^^^^^^^^^ Common functionality to support testing in skbio. .. autosummary:: :toctree: generated/ get_data_path assert_ordination_results_equal assert_data_frame_almost_equal Miscellaneous functionality ^^^^^^^^^^^^^^^^^^^^^^^^^^^ Generally useful functionality that doesn't fit in more specific locations. .. autosummary:: :toctree: generated/ cardinal_to_ordinal find_duplicates safe_md5 classproperty Developer warnings ^^^^^^^^^^^^^^^^^^ .. autosummary:: :toctree: generated/ EfficiencyWarning RepresentationWarning """ # noqa: D412, D416, D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from ._warning import EfficiencyWarning, RepresentationWarning, SkbioWarning from ._misc import cardinal_to_ordinal, find_duplicates, safe_md5, get_rng from ._testing import ( get_data_path, assert_ordination_results_equal, assert_data_frame_almost_equal, pytestrunner, ) from ._decorator import classproperty __all__ = [ "SkbioWarning", "EfficiencyWarning", "RepresentationWarning", "cardinal_to_ordinal", "find_duplicates", "safe_md5", "get_rng", "get_data_path", "assert_ordination_results_equal", "assert_data_frame_almost_equal", "classproperty", "pytestrunner", ] scikit-bio-0.6.2/skbio/util/_decorator.py000066400000000000000000000060121464262511300203360ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from ._exception import OverrideError # Adapted from http://stackoverflow.com/a/8313042/579416 def overrides(interface_class): """Indicate that a member is being overridden from a specific parent class. Decorator for class-level members. Used to indicate that a member is being overridden from a specific parent class. If the member does not have a docstring, it will pull one from the parent class. When chaining decorators, this should be first as it is relatively nondestructive. Parameters ---------- interface_class : class The class which has a member overridden by the decorated member. Returns ------- function The function is not changed or replaced. Raises ------ OverrideError If the `interface_class` does not possess a member of the same name as the decorated member. """ def overrider(method): if method.__name__ not in dir(interface_class): raise OverrideError( f"{method.__name__} is not present in parent " f"class: {interface_class.__name__}." ) backup = classproperty.__get__ classproperty.__get__ = lambda x, y, z: x if method.__doc__ is None: method.__doc__ = getattr(interface_class, method.__name__).__doc__ classproperty.__get__ = backup return method return overrider class classproperty(property): """Decorator for class-level properties. Supports read access only. The property will be read-only within an instance. However, the property can always be redefined on the class, since Python classes are mutable. Parameters ---------- func : function Method to make a class property. Returns ------- property Decorated method. Raises ------ AttributeError If the property is set on an instance. """ def __init__(self, func): name = func.__name__ doc = func.__doc__ super(classproperty, self).__init__(classmethod(func)) self.__name__ = name self.__doc__ = doc def __get__(self, cls, owner): return self.fget.__get__(None, owner)() def __set__(self, obj, value): raise AttributeError("can't set attribute") class classonlymethod(classmethod): """Just like `classmethod`, but it can't be called on an instance.""" def __get__(self, obj, cls=None): if obj is not None: raise TypeError( f"Class-only method called on an instance. Use " f"{cls.__name__}.{self.__func__.__name__} " "instead." ) return super().__get__(obj, cls) scikit-bio-0.6.2/skbio/util/_exception.py000066400000000000000000000012011464262511300203450ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- class TestingUtilError(Exception): """Raised when an exception is needed to test exception handling.""" __test__ = False # prevent py-test from collecting it class OverrideError(AssertionError): """Raised when a property does not exist in the parent class.""" pass scikit-bio-0.6.2/skbio/util/_misc.py000066400000000000000000000151241464262511300173130ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import hashlib import inspect from types import FunctionType import numpy as np def resolve_key(obj, key): """Resolve key given an object and key.""" if callable(key): return key(obj) elif hasattr(obj, "metadata"): return obj.metadata[key] raise TypeError( "Could not resolve key %r. Key must be callable or %s must" " have `metadata` attribute." % (key, obj.__class__.__name__) ) def make_sentinel(name): return type( name, (), {"__repr__": lambda s: name, "__str__": lambda s: name, "__class__": None}, )() def find_sentinels(function, sentinel): params = inspect.signature(function).parameters return [name for name, param in params.items() if param.default is sentinel] class MiniRegistry(dict): def __call__(self, name): """Act as a decorator to register functions with self.""" def decorator(func): self[name] = func return func return decorator def copy(self): """Use for inheritance.""" return self.__class__(super(MiniRegistry, self).copy()) def formatted_listing(self): """Produce an RST list with descriptions.""" if len(self) == 0: return "\tNone" else: return "\n".join( [ "\t%r\n\t %s" % (name, self[name].__doc__.split("\n")[0]) for name in sorted(self) ] ) def interpolate(self, obj, name): """Inject the formatted listing in the second blank line of `name`.""" f = getattr(obj, name) f2 = FunctionType( f.__code__, f.__globals__, name=f.__name__, argdefs=f.__defaults__, closure=f.__closure__, ) # Conveniently the original docstring is on f2, not the new ones if # inheritance is happening. I have no idea why. t = f2.__doc__.split("\n\n") t.insert(2, self.formatted_listing()) f2.__doc__ = "\n\n".join(t) setattr(obj, name, f2) def chunk_str(s, n, char): """Insert `char` character every `n` characters in string `s`. Canonically pronounced "chunkster". """ # Modified from http://stackoverflow.com/a/312464/3776794 if n < 1: raise ValueError( "Cannot split string into chunks with n=%d. n must be >= 1." % n ) return char.join((s[i : i + n] for i in range(0, len(s), n))) def cardinal_to_ordinal(n): """Return ordinal string version of cardinal int `n`. Parameters ---------- n : int Cardinal to convert to ordinal. Must be >= 0. Returns ------- str Ordinal version of cardinal `n`. Raises ------ ValueError If `n` is less than 0. Notes ----- This function can be useful when writing human-readable error messages. Examples -------- >>> from skbio.util import cardinal_to_ordinal >>> cardinal_to_ordinal(0) '0th' >>> cardinal_to_ordinal(1) '1st' >>> cardinal_to_ordinal(2) '2nd' >>> cardinal_to_ordinal(3) '3rd' """ # Taken and modified from http://stackoverflow.com/a/20007730/3776794 # Originally from http://codegolf.stackexchange.com/a/4712 by Gareth if n < 0: raise ValueError("Cannot convert negative integer %d to ordinal " "string." % n) return "%d%s" % (n, "tsnrhtdd"[(n // 10 % 10 != 1) * (n % 10 < 4) * n % 10 :: 4]) def safe_md5(open_file, block_size=2**20): """Compute an md5 sum without loading the file into memory. Parameters ---------- open_file : file object open file handle to the archive to compute the checksum. It must be open as a binary file block_size : int, optional size of the block taken per iteration Returns ------- md5 : md5 object from the hashlib module object with the loaded file Notes ----- This method is based on the answers given in: http://stackoverflow.com/a/1131255/379593 Examples -------- >>> from io import BytesIO >>> from skbio.util import safe_md5 >>> fd = BytesIO(b"foo bar baz") # open file like object >>> x = safe_md5(fd) >>> x.hexdigest() 'ab07acbb1e496801937adfa772424bf7' >>> fd.close() """ md5 = hashlib.md5() data = True while data: data = open_file.read(block_size) if data: md5.update(data) return md5 def find_duplicates(iterable): """Find duplicate elements in an iterable. Parameters ---------- iterable : iterable Iterable to be searched for duplicates (i.e., elements that are repeated). Returns ------- set Repeated elements in `iterable`. """ # modified from qiita.qiita_db.util.find_repeated # https://github.com/biocore/qiita # see licenses/qiita.txt seen, repeated = set(), set() for e in iterable: if e in seen: repeated.add(e) else: seen.add(e) return repeated def get_rng(seed=None): """Get a random generator. Parameters ---------- seed : int or np.random.Generator, optional A user-provided random seed or random generator instance. Returns ------- np.random.Generator Random generator instance. Notes ----- NumPy's new random generator [1]_ was introduced in version 1.17. It is not backward compatible with ``RandomState``, the legacy random generator [2]_. See NEP 19 [3]_ for an introduction to this change. References ---------- .. [1] https://numpy.org/devdocs/reference/random/generator.html .. [2] https://numpy.org/doc/stable/reference/random/legacy.html .. [3] https://numpy.org/neps/nep-0019-rng-policy.html """ try: if seed is None or isinstance(seed, int): return np.random.default_rng(seed) if isinstance(seed, np.random.Generator): return seed raise ValueError( "Invalid seed. It must be an integer or an " "instance of np.random.Generator." ) except AttributeError: raise ValueError( "The installed NumPy version does not support " "random.Generator. Please use NumPy >= 1.17." ) scikit-bio-0.6.2/skbio/util/_plotting.py000066400000000000000000000051621464262511300202210ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import importlib from io import StringIO, BytesIO class PlottableMixin: """A plottable object.""" def _get_mpl_plt(self): """Import Matplotlib and its plotting interface.""" msg = "Plotting requires Matplotlib installed in the system." if hasattr(self, "mpl"): if self.mpl is None: raise ImportError(msg) return try: self.mpl = importlib.import_module("matplotlib") except ImportError: self.mpl = None raise ImportError(msg) else: self.plt = importlib.import_module("matplotlib.pyplot") def _figure_data(self, format="png"): """Get figure data of a plottable object. Parameters ---------- format : str, optional Image format supported by the plotting backend. Examples include 'png' (default), 'svg', 'pdf', and 'eps'. Returns ------- str or bytes or None Figure data, or None if the plotting backend is not available. """ try: self._get_mpl_plt() except ImportError: return # call default plotting method fig = self.plot() fig.tight_layout() # get figure data # formats such as SVG are string try: fig.savefig(f := StringIO(), format=format) # formats such as PNG are bytes except TypeError: fig.savefig(f := BytesIO(), format=format) # close figure to avoid double display in IPython self.plt.close(fig) return f.getvalue() def _repr_png_(self): """Generate a PNG format figure for display in IPython.""" return self._figure_data("png") def _repr_svg_(self): """Generate an SVG format figure for display in IPython.""" return self._figure_data("svg") @property def png(self): """Get figure data in PNG format. Returns ------- bytes Figure data in PNG format. """ return self._repr_png_() @property def svg(self): """Get figure data in SVG format. Returns ------- str Figure data in SVG format. """ return self._repr_svg_() scikit-bio-0.6.2/skbio/util/_testing.py000066400000000000000000000332201464262511300200320ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import inspect import os import sys import numpy as np import numpy.testing as npt import pandas.testing as pdt from scipy.spatial.distance import pdist class ReallyEqualMixin: """Use this for testing __eq__/__ne__. Taken and modified from the following public domain code: https://ludios.org/testing-your-eq-ne-cmp/ """ def assertReallyEqual(self, a, b): # assertEqual first, because it will have a good message if the # assertion fails. self.assertEqual(a, b) self.assertEqual(b, a) self.assertTrue(a == b) self.assertTrue(b == a) self.assertFalse(a != b) self.assertFalse(b != a) def assertReallyNotEqual(self, a, b): # assertNotEqual first, because it will have a good message if the # assertion fails. self.assertNotEqual(a, b) self.assertNotEqual(b, a) self.assertFalse(a == b) self.assertFalse(b == a) self.assertTrue(a != b) self.assertTrue(b != a) def get_data_path(fn, subfolder="data"): """Return path to filename ``fn`` in the data folder. During testing it is often necessary to load data files. This function returns the full path to files in the ``data`` subfolder by default. Parameters ---------- fn : str File name. subfolder : str, defaults to ``data`` Name of the subfolder that contains the data. Returns ------- str Inferred absolute path to the test data for the module where ``get_data_path(fn)`` is called. Notes ----- The requested path may not point to an existing file, as its existence is not checked. """ # getouterframes returns a list of tuples: the second tuple # contains info about the caller, and the second element is its # filename callers_filename = inspect.getouterframes(inspect.currentframe())[1][1] path = os.path.dirname(os.path.abspath(callers_filename)) data_path = os.path.join(path, subfolder, fn) return data_path def assert_ordination_results_equal( left, right, ignore_method_names=False, ignore_axis_labels=False, ignore_directionality=False, decimal=7, ): """Assert that ordination results objects are equal. This is a helper function intended to be used in unit tests that need to compare ``OrdinationResults`` objects. Parameters ---------- left, right : OrdinationResults Ordination results to be compared for equality. ignore_method_names : bool, optional Ignore differences in `short_method_name` and `long_method_name`. ignore_axis_labels : bool, optional Ignore differences in axis labels (i.e., column labels). ignore_directionality : bool, optional Ignore differences in directionality (i.e., differences in signs) for attributes `samples`, `features` and `biplot_scores`. decimal : int, optional Number of decimal places to compare when checking numerical values. Defaults to 7. Raises ------ AssertionError If the two objects are not equal. """ npt.assert_equal(type(left) is type(right), True) if not ignore_method_names: npt.assert_equal(left.short_method_name, right.short_method_name) npt.assert_equal(left.long_method_name, right.long_method_name) _assert_frame_dists_equal( left.samples, right.samples, ignore_columns=ignore_axis_labels, ignore_directionality=ignore_directionality, decimal=decimal, ) _assert_frame_dists_equal( left.features, right.features, ignore_columns=ignore_axis_labels, ignore_directionality=ignore_directionality, decimal=decimal, ) _assert_frame_dists_equal( left.biplot_scores, right.biplot_scores, ignore_columns=ignore_axis_labels, ignore_directionality=ignore_directionality, decimal=decimal, ) _assert_frame_dists_equal( left.sample_constraints, right.sample_constraints, ignore_columns=ignore_axis_labels, ignore_directionality=ignore_directionality, decimal=decimal, ) _assert_series_equal( left.eigvals, right.eigvals, ignore_axis_labels, decimal=decimal ) _assert_series_equal( left.proportion_explained, right.proportion_explained, ignore_axis_labels, decimal=decimal, ) def _assert_series_equal(left_s, right_s, ignore_index=False, decimal=7): # assert_series_equal doesn't like None... if left_s is None or right_s is None: assert left_s is None and right_s is None else: npt.assert_almost_equal(left_s.values, right_s.values, decimal=decimal) if not ignore_index: pdt.assert_index_equal(left_s.index, right_s.index) def _assert_frame_dists_equal( left_df, right_df, ignore_index=False, ignore_columns=False, ignore_directionality=False, decimal=7, ): if left_df is None or right_df is None: assert left_df is None and right_df is None else: left_values = left_df.values right_values = right_df.values left_dists = pdist(left_values) right_dists = pdist(right_values) npt.assert_almost_equal(left_dists, right_dists, decimal=decimal) if not ignore_index: pdt.assert_index_equal(left_df.index, right_df.index) if not ignore_columns: pdt.assert_index_equal(left_df.columns, right_df.columns) def _assert_frame_equal( left_df, right_df, ignore_index=False, ignore_columns=False, ignore_directionality=False, decimal=7, ): # assert_frame_equal doesn't like None... if left_df is None or right_df is None: assert left_df is None and right_df is None else: left_values = left_df.values right_values = right_df.values if ignore_directionality: left_values, right_values = _normalize_signs(left_values, right_values) npt.assert_almost_equal(left_values, right_values, decimal=decimal) if not ignore_index: pdt.assert_index_equal(left_df.index, right_df.index) if not ignore_columns: pdt.assert_index_equal(left_df.columns, right_df.columns) def _normalize_signs(arr1, arr2): """Change column signs so that "column" and "-column" compare equal. This is needed because results of eigenproblmes can have signs flipped, but they're still right. Notes ----- This function tries hard to make sure that, if you find "column" and "-column" almost equal, calling a function like np.allclose to compare them after calling `normalize_signs` succeeds. To do so, it distinguishes two cases for every column: - It can be all almost equal to 0 (this includes a column of zeros). - Otherwise, it has a value that isn't close to 0. In the first case, no sign needs to be flipped. I.e., for |epsilon| small, np.allclose(-epsilon, 0) is true if and only if np.allclose(epsilon, 0) is. In the second case, the function finds the number in the column whose absolute value is largest. Then, it compares its sign with the number found in the same index, but in the other array, and flips the sign of the column as needed. """ # Let's convert everyting to floating point numbers (it's # reasonable to assume that eigenvectors will already be floating # point numbers). This is necessary because np.array(1) / # np.array(0) != np.array(1.) / np.array(0.) arr1 = np.asarray(arr1, dtype=np.float64) arr2 = np.asarray(arr2, dtype=np.float64) if arr1.shape != arr2.shape: raise ValueError( "Arrays must have the same shape ({0} vs {1}).".format( arr1.shape, arr2.shape ) ) # To avoid issues around zero, we'll compare signs of the values # with highest absolute value max_idx = np.abs(arr1).argmax(axis=0) max_arr1 = arr1[max_idx, range(arr1.shape[1])] max_arr2 = arr2[max_idx, range(arr2.shape[1])] sign_arr1 = np.sign(max_arr1) sign_arr2 = np.sign(max_arr2) # Store current warnings, and ignore division by zero (like 1. / # 0.) and invalid operations (like 0. / 0.) wrn = np.seterr(invalid="ignore", divide="ignore") differences = sign_arr1 / sign_arr2 # The values in `differences` can be: # 1 -> equal signs # -1 -> diff signs # Or nan (0/0), inf (nonzero/0), 0 (0/nonzero) np.seterr(**wrn) # Now let's deal with cases where `differences != \pm 1` special_cases = (~np.isfinite(differences)) | (differences == 0) # In any of these cases, the sign of the column doesn't matter, so # let's just keep it differences[special_cases] = 1 return arr1 * differences, arr2 def assert_data_frame_almost_equal(left, right, rtol=1e-5): """Raise AssertionError if ``pd.DataFrame`` objects are not "almost equal". Wrapper of ``pd.util.testing.assert_frame_equal``. Floating point values are considered "almost equal" if they are within a threshold defined by ``assert_frame_equal``. This wrapper uses a number of checks that are turned off by default in ``assert_frame_equal`` in order to perform stricter comparisons (for example, ensuring the index and column types are the same). It also does not consider empty ``pd.DataFrame`` objects equal if they have a different index. Other notes: * Index (row) and column ordering must be the same for objects to be equal. * NaNs (``np.nan``) in the same locations are considered equal. This is a helper function intended to be used in unit tests that need to compare ``pd.DataFrame`` objects. Parameters ---------- left, right : pd.DataFrame ``pd.DataFrame`` objects to compare. rtol : float, optional The relative tolerance parameter used for comparison. Defaults to 1e-5. Raises ------ AssertionError If `left` and `right` are not "almost equal". See Also -------- pandas.util.testing.assert_frame_equal """ # pass all kwargs to ensure this function has consistent behavior even if # `assert_frame_equal`'s defaults change pdt.assert_frame_equal( left, right, check_dtype=True, check_index_type=True, check_column_type=True, check_frame_type=True, check_names=True, by_blocks=False, check_exact=False, rtol=rtol, ) # this check ensures that empty DataFrames with different indices do not # compare equal. exact=True specifies that the type of the indices must be # exactly the same assert_index_equal(left.index, right.index) def _data_frame_to_default_int_type(df): """Convert integer columns in a data frame into the platform-default integer type. Pandas DataFrame defaults to int64 when reading integers, rather than respecting the platform default (Linux and MacOS: int64, Windows: int32). This causes issues in comparing observed and expected data frames in Windows. This function repairs the issue by converting int64 columns of a data frame into int32 in Windows. See: https://github.com/unionai-oss/pandera/issues/726 """ for col in df.select_dtypes("int").columns: df[col] = df[col].astype(int) def assert_series_almost_equal(left, right): # pass all kwargs to ensure this function has consistent behavior even if # `assert_series_equal`'s defaults change pdt.assert_series_equal( left, right, check_dtype=True, check_index_type=True, check_series_type=True, check_names=True, check_exact=False, check_datetimelike_compat=False, obj="Series", ) # this check ensures that empty Series with different indices do not # compare equal. assert_index_equal(left.index, right.index) def assert_index_equal(a, b): pdt.assert_index_equal(a, b, exact=True, check_names=True, check_exact=True) def pytestrunner(): try: import numpy try: # NumPy 1.14 changed repr output breaking our doctests, # request the legacy 1.13 style numpy.set_printoptions(legacy="1.13") except TypeError: # Old Numpy, output should be fine as it is :) # TypeError: set_printoptions() got an unexpected # keyword argument 'legacy' pass except ImportError: numpy = None try: import pandas # Max columns is automatically set by pandas based on terminal # width, so set columns to unlimited to prevent the test suite # from passing/failing based on terminal size. pandas.options.display.max_columns = None except ImportError: pandas = None try: import matplotlib except ImportError: matplotlib = None else: # Set a non-interactive backend for Matplotlib, such that it can work on # systems without graphics matplotlib.use("agg") # import here, cause outside the eggs aren't loaded import pytest args = [ "--pyargs", "skbio", "--doctest-modules", "--doctest-glob", "*.pyx", "-o", '"doctest_optionflags=NORMALIZE_WHITESPACE' ' IGNORE_EXCEPTION_DETAIL"', ] + sys.argv[1:] errno = pytest.main(args=args) sys.exit(errno) scikit-bio-0.6.2/skbio/util/_warning.py000066400000000000000000000036431464262511300200300ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from warnings import warn, simplefilter class SkbioWarning(Warning): """Filter our warnings from warnings given by 3rd parties.""" pass class EfficiencyWarning(SkbioWarning): """Warn about potentially accidental use of inefficient code. For example, if a user doesn't have an optimized version of a function/algorithm available in their scikit-bio installation, a slower, pure-Python implementation may be used instead. This warning can be used to let the user know they are using a version of the function that could be potentially orders of magnitude slower. """ pass class RepresentationWarning(SkbioWarning): """Warn about assumptions made for the successful completion of a process. Warn about substitutions, assumptions, or particular alterations that were made for the successful completion of a process. For example, if a value that is required for a task is not present, a best guess or least deleterious value could be used, accompanied by this warning. """ pass class DeprecationWarning(DeprecationWarning, SkbioWarning): """Used to indicate deprecated functionality in scikit-bio.""" pass def _warn_deprecated(func, ver, msg=None): """Warn of deprecated status.""" if not hasattr(func, "warned"): simplefilter("once", DeprecationWarning) if msg: warn( f"{func.__name__} is deprecated as of {ver}. {msg}", DeprecationWarning ) else: warn(f"{func.__name__} is deprecated as of {ver}.") func.warned = True scikit-bio-0.6.2/skbio/util/tests/000077500000000000000000000000001464262511300170065ustar00rootroot00000000000000scikit-bio-0.6.2/skbio/util/tests/__init__.py000066400000000000000000000005411464262511300211170ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- scikit-bio-0.6.2/skbio/util/tests/test_decorator.py000066400000000000000000000070701464262511300224050ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import unittest import inspect import warnings from skbio.util import classproperty from skbio.util._decorator import overrides, classonlymethod from skbio.util._exception import OverrideError class TestClassOnlyMethod(unittest.TestCase): def test_works_on_class(self): class A: @classonlymethod def example(cls): return cls self.assertEqual(A.example(), A) def test_fails_on_instance(self): class A: @classonlymethod def example(cls): pass with self.assertRaises(TypeError) as e: A().example() self.assertIn('A.example', str(e.exception)) self.assertIn('instance', str(e.exception)) def test_matches_classmethod(self): class A: pass def example(cls, thing): """doc""" A.example1 = classmethod(example) A.example2 = classonlymethod(example) self.assertEqual(A.__dict__['example1'].__func__, example) self.assertEqual(A.__dict__['example2'].__func__, example) self.assertEqual(A.example1.__doc__, example.__doc__) self.assertEqual(A.example2.__doc__, example.__doc__) self.assertEqual(A.example1.__name__, example.__name__) self.assertEqual(A.example2.__name__, example.__name__) def test_passes_args_kwargs(self): self.ran_test = False class A: @classonlymethod def example(cls, arg1, arg2, kwarg1=None, kwarg2=None, default=5): self.assertEqual(arg1, 1) self.assertEqual(arg2, 2) self.assertEqual(kwarg1, '1') self.assertEqual(kwarg2, '2') self.assertEqual(default, 5) self.ran_test = True A.example(1, *[2], kwarg2='2', **{'kwarg1': '1'}) self.assertTrue(self.ran_test) class TestOverrides(unittest.TestCase): def test_raises_when_missing(self): class A: pass with self.assertRaises(OverrideError): class B(A): @overrides(A) def test(self): pass def test_doc_inherited(self): class A: def test(self): """Docstring""" pass class B(A): @overrides(A) def test(self): pass self.assertEqual(B.test.__doc__, "Docstring") def test_doc_not_inherited(self): class A: def test(self): """Docstring""" pass class B(A): @overrides(A) def test(self): """Different""" pass self.assertEqual(B.test.__doc__, "Different") class TestClassProperty(unittest.TestCase): def test_getter_only(self): class Foo: _foo = 42 @classproperty def foo(cls): return cls._foo # class-level getter self.assertEqual(Foo.foo, 42) # instance-level getter f = Foo() self.assertEqual(f.foo, 42) with self.assertRaises(AttributeError): f.foo = 4242 if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/util/tests/test_misc.py000066400000000000000000000223721464262511300213600ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import io import unittest import numpy as np from skbio.util import cardinal_to_ordinal, safe_md5, find_duplicates, get_rng from skbio.util._misc import MiniRegistry, chunk_str, resolve_key class TestMiniRegistry(unittest.TestCase): def setUp(self): self.registry = MiniRegistry() def test_decoration(self): self.assertNotIn("name1", self.registry) self.assertNotIn("name2", self.registry) self.n1_called = False self.n2_called = False @self.registry("name1") def some_registration1(): self.n1_called = True @self.registry("name2") def some_registration2(): self.n2_called = True self.assertIn("name1", self.registry) self.assertEqual(some_registration1, self.registry["name1"]) self.assertIn("name2", self.registry) self.assertEqual(some_registration2, self.registry["name2"]) self.registry["name1"]() self.assertTrue(self.n1_called) self.registry["name2"]() self.assertTrue(self.n2_called) def test_copy(self): @self.registry("name") def some_registration(): pass new = self.registry.copy() self.assertIsNot(new, self.registry) @new("other") def other_registration(): pass self.assertIn("name", self.registry) self.assertNotIn("other", self.registry) self.assertIn("other", new) self.assertIn("name", new) def test_everything(self): class SomethingToInterpolate: def interpolate_me(): """First line Some description of things, also this: Other things are happening now. """ def dont_interpolate_me(): """First line Some description of things, also this: Other things are happening now. """ class Subclass(SomethingToInterpolate): pass @self.registry("a") def a(): """x""" @self.registry("b") def b(): """y""" @self.registry("c") def c(): """z""" subclass_registry = self.registry.copy() @subclass_registry("o") def o(): """p""" self.registry.interpolate(SomethingToInterpolate, "interpolate_me") subclass_registry.interpolate(Subclass, "interpolate_me") self.assertEqual(SomethingToInterpolate.interpolate_me.__doc__, "First line\n\n Some description of th" "ings, also this:\n\n\t'a'\n\t x\n\t'b'\n\t y\n\t'c" "'\n\t z\n\n Other things are happeni" "ng now.\n ") self.assertEqual(SomethingToInterpolate.dont_interpolate_me.__doc__, "First line\n\n Some description of th" "ings, also this:\n\n Other things are" " happening now.\n ") self.assertEqual(Subclass.interpolate_me.__doc__, "First line\n\n Some description of th" "ings, also this:\n\n\t'a'\n\t x\n\t'b'\n\t y\n\t'c" "'\n\t z\n\t'o'\n\t p\n\n Other thin" "gs are happening now.\n ") self.assertEqual(Subclass.dont_interpolate_me.__doc__, "First line\n\n Some description of th" "ings, also this:\n\n Other things are" " happening now.\n ") class ResolveKeyTests(unittest.TestCase): def test_callable(self): def func(x): return str(x) self.assertEqual(resolve_key(1, func), "1") self.assertEqual(resolve_key(4, func), "4") def test_index(self): class MetadataHaver(dict): @property def metadata(self): return self obj = MetadataHaver({'foo': 123}) self.assertEqual(resolve_key(obj, 'foo'), 123) obj = MetadataHaver({'foo': 123, 'bar': 'baz'}) self.assertEqual(resolve_key(obj, 'bar'), 'baz') def test_wrong_type(self): with self.assertRaises(TypeError): resolve_key({'foo': 1}, 'foo') class ChunkStrTests(unittest.TestCase): def test_even_split(self): self.assertEqual(chunk_str('abcdef', 6, ' '), 'abcdef') self.assertEqual(chunk_str('abcdef', 3, ' '), 'abc def') self.assertEqual(chunk_str('abcdef', 2, ' '), 'ab cd ef') self.assertEqual(chunk_str('abcdef', 1, ' '), 'a b c d e f') self.assertEqual(chunk_str('a', 1, ' '), 'a') self.assertEqual(chunk_str('abcdef', 2, ''), 'abcdef') def test_no_split(self): self.assertEqual(chunk_str('', 2, '\n'), '') self.assertEqual(chunk_str('a', 100, '\n'), 'a') self.assertEqual(chunk_str('abcdef', 42, '|'), 'abcdef') def test_uneven_split(self): self.assertEqual(chunk_str('abcdef', 5, '|'), 'abcde|f') self.assertEqual(chunk_str('abcdef', 4, '|'), 'abcd|ef') self.assertEqual(chunk_str('abcdefg', 3, ' - '), 'abc - def - g') def test_invalid_n(self): with self.assertRaisesRegex(ValueError, r'n=0'): chunk_str('abcdef', 0, ' ') with self.assertRaisesRegex(ValueError, r'n=-42'): chunk_str('abcdef', -42, ' ') class SafeMD5Tests(unittest.TestCase): def test_safe_md5(self): exp = 'ab07acbb1e496801937adfa772424bf7' fd = io.BytesIO(b'foo bar baz') obs = safe_md5(fd) self.assertEqual(obs.hexdigest(), exp) fd.close() class CardinalToOrdinalTests(unittest.TestCase): def test_valid_range(self): # taken and modified from http://stackoverflow.com/a/20007730/3776794 exp = ['0th', '1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th', '11th', '12th', '13th', '14th', '15th', '16th', '17th', '18th', '19th', '20th', '21st', '22nd', '23rd', '24th', '25th', '26th', '27th', '28th', '29th', '30th', '31st', '32nd', '100th', '101st', '42042nd'] obs = [cardinal_to_ordinal(n) for n in list(range(0, 33)) + [100, 101, 42042]] self.assertEqual(obs, exp) def test_invalid_n(self): with self.assertRaisesRegex(ValueError, r'-1'): cardinal_to_ordinal(-1) class TestFindDuplicates(unittest.TestCase): def test_empty_input(self): def empty_gen(): yield from () for empty in [], (), '', set(), {}, empty_gen(): self.assertEqual(find_duplicates(empty), set()) def test_no_duplicates(self): self.assertEqual(find_duplicates(['a', 'bc', 'def', 'A']), set()) def test_one_duplicate(self): self.assertEqual(find_duplicates(['a', 'bc', 'def', 'a']), set(['a'])) def test_many_duplicates(self): self.assertEqual(find_duplicates(['a', 'bc', 'bc', 'def', 'a']), set(['a', 'bc'])) def test_all_duplicates(self): self.assertEqual( find_duplicates(('a', 'bc', 'bc', 'def', 'a', 'def', 'def')), set(['a', 'bc', 'def'])) def test_mixed_types(self): def gen(): yield from ('a', 1, 'bc', 2, 'a', 2, 2, 3.0) self.assertEqual(find_duplicates(gen()), set(['a', 2])) class TestGetRng(unittest.TestCase): def test_get_rng(self): # no seed obs0 = get_rng() self.assertTrue(isinstance(obs0, np.random.Generator)) # integer seed obs1 = get_rng(42) self.assertTrue(isinstance(obs1, np.random.Generator)) # generator instance obs2 = get_rng(obs1) self.assertTrue(isinstance(obs2, np.random.Generator)) # invalide seed msg = ('Invalid seed. It must be an integer or an instance of ' 'np.random.Generator.') with self.assertRaises(ValueError) as cm: get_rng('hello') self.assertEqual(str(cm.exception), msg) # test if seeds are disjoint and results are reproducible obs = [get_rng(i).integers(1e6) for i in range(10)] exp = [850624, 473188, 837575, 811504, 726442, 670790, 445045, 944904, 719549, 421547] self.assertListEqual(obs, exp) # mimic legacy numpy delattr(np.random, 'default_rng') delattr(np.random, 'Generator') msg = ('The installed NumPy version does not support ' 'random.Generator. Please use NumPy >= 1.17.') with self.assertRaises(ValueError) as cm: get_rng() self.assertEqual(str(cm.exception), msg) with self.assertRaises(ValueError) as cm: get_rng('hello') self.assertEqual(str(cm.exception), msg) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/util/tests/test_plotting.py000066400000000000000000000057501464262511300222660ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import sys import unittest try: import matplotlib except ImportError: has_matplotlib = False else: has_matplotlib = True from skbio.util._plotting import PlottableMixin @unittest.skipUnless(has_matplotlib, "Matplotlib not available.") class TestPlottableMixin(unittest.TestCase): def setUp(self): def _plot(self): fig, ax = self.plt.subplots() ax.plot(1, 1, color='k', marker='o') return fig PlottableMixin.plot = _plot def test_get_mpl_plt(self): obj = PlottableMixin() # hasn't imported yet self.assertFalse(hasattr(obj, 'mpl')) # import Matplotlib if available obj._get_mpl_plt() self.assertEqual(obj.mpl.__name__, 'matplotlib') self.assertEqual(obj.plt.__name__, 'matplotlib.pyplot') # make Matplotlib unimportable delattr(obj, 'mpl') backup = sys.modules['matplotlib'] sys.modules['matplotlib'] = None with self.assertRaises(ImportError): obj._get_mpl_plt() # won't try again if already failed sys.modules['matplotlib'] = backup self.assertIsNone(obj.mpl) with self.assertRaises(ImportError): obj._get_mpl_plt() def test_figure_data(self): obj = PlottableMixin() obj._get_mpl_plt() # PNG data are bytes obs = obj._figure_data('png') self.assertIsInstance(obs, bytes) self.assertTrue(len(obs) > 0) # SVG data are string obs = obj._figure_data('svg') self.assertIsInstance(obs, str) self.assertTrue(len(obs) > 0) # plotting backend not available delattr(obj, 'mpl') backup = sys.modules['matplotlib'] sys.modules['matplotlib'] = None self.assertIsNone(obj._figure_data()) sys.modules['matplotlib'] = backup def test_repr_png(self): obj = PlottableMixin() obj._get_mpl_plt() obs = obj._repr_png_() self.assertIsInstance(obs, bytes) self.assertTrue(len(obs) > 0) def test_repr_svg(self): obj = PlottableMixin() obj._get_mpl_plt() obs = obj._repr_svg_() self.assertIsInstance(obs, str) self.assertTrue(len(obs) > 0) def test_png(self): obj = PlottableMixin() obj._get_mpl_plt() obs = obj.png self.assertIsInstance(obs, bytes) self.assertTrue(len(obs) > 0) def test_svg(self): obj = PlottableMixin() obj._get_mpl_plt() obs = obj.svg self.assertIsInstance(obs, str) self.assertTrue(len(obs) > 0) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/util/tests/test_testing.py000066400000000000000000000241701464262511300221000ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import os import itertools import unittest import pandas as pd import numpy as np import numpy.testing as npt from skbio import OrdinationResults from skbio.util import (get_data_path, assert_ordination_results_equal, assert_data_frame_almost_equal) from skbio.util._testing import _normalize_signs, assert_series_almost_equal class TestGetDataPath(unittest.TestCase): def test_get_data_path(self): fn = 'parrot' path = os.path.dirname(os.path.abspath(__file__)) data_path = os.path.join(path, 'data', fn) data_path_2 = get_data_path(fn) self.assertEqual(data_path_2, data_path) class TestAssertOrdinationResultsEqual(unittest.TestCase): def test_assert_ordination_results_equal(self): minimal1 = OrdinationResults('foo', 'bar', pd.Series([1.0, 2.0]), pd.DataFrame([[1, 2, 3], [4, 5, 6]])) # a minimal set of results should be equal to itself assert_ordination_results_equal(minimal1, minimal1) # type mismatch with npt.assert_raises(AssertionError): assert_ordination_results_equal(minimal1, 'foo') # numeric values should be checked that they're almost equal almost_minimal1 = OrdinationResults( 'foo', 'bar', pd.Series([1.0000001, 1.9999999]), pd.DataFrame([[1, 2, 3], [4, 5, 6]])) assert_ordination_results_equal(minimal1, almost_minimal1) # test each of the optional numeric attributes for attr in ('features', 'samples', 'biplot_scores', 'sample_constraints'): # missing optional numeric attribute in one, present in the other setattr(almost_minimal1, attr, pd.DataFrame([[1, 2], [3, 4]])) with npt.assert_raises(AssertionError): assert_ordination_results_equal(minimal1, almost_minimal1) setattr(almost_minimal1, attr, None) # optional numeric attributes present in both, but not almost equal setattr(minimal1, attr, pd.DataFrame([[1, 2], [3, 4]])) setattr(almost_minimal1, attr, pd.DataFrame([[1, 2], [3.00002, 4]])) with npt.assert_raises(AssertionError): assert_ordination_results_equal(minimal1, almost_minimal1) setattr(minimal1, attr, None) setattr(almost_minimal1, attr, None) # optional numeric attributes present in both, and almost equal setattr(minimal1, attr, pd.DataFrame([[1.0, 2.0], [3.0, 4.0]])) setattr(almost_minimal1, attr, pd.DataFrame([[1.0, 2.0], [3.00000002, 4]])) assert_ordination_results_equal(minimal1, almost_minimal1) setattr(minimal1, attr, None) setattr(almost_minimal1, attr, None) # missing optional numeric attribute in one, present in the other almost_minimal1.proportion_explained = pd.Series([1, 2, 3]) with npt.assert_raises(AssertionError): assert_ordination_results_equal(minimal1, almost_minimal1) almost_minimal1.proportion_explained = None # optional numeric attributes present in both, but not almost equal minimal1.proportion_explained = pd.Series([1, 2, 3]) almost_minimal1.proportion_explained = pd.Series([1, 2, 3.00002]) with npt.assert_raises(AssertionError): assert_ordination_results_equal(minimal1, almost_minimal1) almost_minimal1.proportion_explained = None almost_minimal1.proportion_explained = None # optional numeric attributes present in both, and almost equal minimal1.proportion_explained = pd.Series([1, 2, 3]) almost_minimal1.proportion_explained = pd.Series([1, 2, 3.00000002]) assert_ordination_results_equal(minimal1, almost_minimal1) almost_minimal1.proportion_explained = None almost_minimal1.proportion_explained = None class TestNormalizeSigns(unittest.TestCase): def test_shapes_and_nonarray_input(self): with self.assertRaises(ValueError): _normalize_signs([[1, 2], [3, 5]], [[1, 2]]) def test_works_when_different(self): """Taking abs value of everything would lead to false positives.""" a = np.array([[1, -1], [2, 2]]) b = np.array([[-1, -1], [2, 2]]) with self.assertRaises(AssertionError): npt.assert_equal(*_normalize_signs(a, b)) def test_easy_different(self): a = np.array([[1, 2], [3, -1]]) b = np.array([[-1, 2], [-3, -1]]) npt.assert_equal(*_normalize_signs(a, b)) def test_easy_already_equal(self): a = np.array([[1, -2], [3, 1]]) b = a.copy() npt.assert_equal(*_normalize_signs(a, b)) def test_zeros(self): a = np.array([[0, 3], [0, -1]]) b = np.array([[0, -3], [0, 1]]) npt.assert_equal(*_normalize_signs(a, b)) def test_hard(self): a = np.array([[0, 1], [1, 2]]) b = np.array([[0, 1], [-1, 2]]) npt.assert_equal(*_normalize_signs(a, b)) def test_harder(self): """We don't want a value that might be negative due to floating point inaccuracies to make a call to allclose in the result to be off.""" a = np.array([[-1e-15, 1], [5, 2]]) b = np.array([[1e-15, 1], [5, 2]]) # Clearly a and b would refer to the same "column # eigenvectors" but a slopppy implementation of # _normalize_signs could change the sign of column 0 and make a # comparison fail npt.assert_almost_equal(*_normalize_signs(a, b)) def test_column_zeros(self): a = np.array([[0, 1], [0, 2]]) b = np.array([[0, -1], [0, -2]]) npt.assert_equal(*_normalize_signs(a, b)) def test_column_almost_zero(self): a = np.array([[1e-15, 3], [-2e-14, -6]]) b = np.array([[0, 3], [-1e-15, -6]]) npt.assert_almost_equal(*_normalize_signs(a, b)) class TestAssertDataFrameAlmostEqual(unittest.TestCase): def setUp(self): self.df = pd.DataFrame({'bar': ['a', 'b', 'cd', 'e'], 'foo': [42, 42.0, np.nan, 0]}) def test_not_equal(self): unequal_dfs = [ self.df, # floating point error too large to be "almost equal" pd.DataFrame({'bar': ['a', 'b', 'cd', 'e'], 'foo': [42, 42.001, np.nan, 0]}), # extra NaN pd.DataFrame({'bar': ['a', 'b', 'cd', 'e'], 'foo': [42, np.nan, np.nan, 0]}), # different column order pd.DataFrame(self.df, columns=['foo', 'bar']), # different index order pd.DataFrame(self.df, index=np.arange(4)[::-1]), # different index type pd.DataFrame(self.df, index=np.arange(4).astype(float)), # various forms of "empty" DataFrames that are not equivalent pd.DataFrame(), pd.DataFrame(index=np.arange(10)), pd.DataFrame(columns=np.arange(10)), pd.DataFrame(index=np.arange(10), columns=np.arange(10)), pd.DataFrame(index=np.arange(9)), pd.DataFrame(columns=np.arange(9)), pd.DataFrame(index=np.arange(9), columns=np.arange(9)) ] # each df should compare equal to itself and a copy of itself for df in unequal_dfs: assert_data_frame_almost_equal(df, df) assert_data_frame_almost_equal(df, pd.DataFrame(df, copy=True)) # every pair of dfs should not compare equal. use permutations instead # of combinations to test that comparing df1 to df2 and df2 to df1 are # both not equal for df1, df2 in itertools.permutations(unequal_dfs, 2): with self.assertRaises(AssertionError): assert_data_frame_almost_equal(df1, df2) def test_equal(self): equal_dfs = [ self.df, # floating point error small enough to be "almost equal" pd.DataFrame({'bar': ['a', 'b', 'cd', 'e'], 'foo': [42, 42.00001, np.nan, 0]}) ] for df in equal_dfs: assert_data_frame_almost_equal(df, df) for df1, df2 in itertools.permutations(equal_dfs, 2): assert_data_frame_almost_equal(df1, df2) class TestAssertSeriesAlmostEqual(unittest.TestCase): def setUp(self): self.series = [ pd.Series(dtype='float64'), pd.Series(dtype=object), pd.Series(dtype='int64'), pd.Series([1, 2, 3]), pd.Series([3, 2, 1]), pd.Series([1, 2, 3, 4]), pd.Series([1., 2., 3.]), pd.Series([1, 2, 3], [1.0, 2.0, 3.0]), pd.Series([1, 2, 3], [1, 2, 3]), pd.Series([1, 2, 3], ['c', 'b', 'a']), pd.Series([3, 2, 1], ['c', 'b', 'a']), ] def test_not_equal(self): # no pair of series should compare equal for s1, s2 in itertools.permutations(self.series, 2): with self.assertRaises(AssertionError): assert_series_almost_equal(s1, s2) def test_equal(self): s1 = pd.Series([1., 2., 3.]) s2 = pd.Series([1.000001, 2., 3.]) assert_series_almost_equal(s1, s2) # all series should be equal to themselves and copies of themselves for s in self.series: assert_series_almost_equal(s, s) assert_series_almost_equal(s, pd.Series(s, copy=True)) if __name__ == '__main__': unittest.main() scikit-bio-0.6.2/skbio/workflow.py000066400000000000000000000451031464262511300171160ustar00rootroot00000000000000r"""Workflow construction (:mod:`skbio.workflow`) ============================================= .. currentmodule:: skbio.workflow Construct arbitrarily complex workflows in which the specific methods run are determined at runtime. This module supports short circuiting a workflow if an item fails, supports ordering methods, callbacks for processed items, and deciding what methods are executed based on state or runtime options. Classes ------- .. autosummary:: :toctree: generated/ Workflow Decorators ---------- .. autosummary:: :toctree: generated/ requires method Tutorial -------- >>> from skbio.workflow import Workflow As an example of the :class:`Workflow` object, let's construct a sequence processor that will filter sequences that are < 10 nucleotides, reverse the sequence if the runtime options indicate to, and truncate if a specific nucleotide pattern is observed. The :class:`Workflow` object will only short circuit, and evaluate requirements on methods decorated by ``method``. Developers are free to define as many methods as they'd like within the object definition, and which can be called from workflow methods, but they will not be subjected directly to workflow checks. >>> nuc_pattern = 'AATTG' >>> has_nuc_pattern = lambda s: s[:len(nuc_pattern)] == nuc_pattern >>> class SequenceProcessor(Workflow): ... def initialize_state(self, item): ... # Setup the state for a new item (e.g., a new sequence) ... self.state = item ... @method(priority=100) ... def check_length(self): ... # Always make sure the sequence is at least 10 nucleotides ... if len(self.state) < 10: ... self.failed = True ... @method(priority=90) ... @requires(state=has_nuc_pattern) ... def truncate(self): ... # Truncate if a specific starting nucleotide pattern is observed ... self.state = self.state[len(nuc_pattern):] ... @method(priority=80) ... @requires(option='reverse', values=True) ... def reverse(self): ... # Reverse the sequence if indicatd at runtime ... self.state = self.state[::-1] An instance of a ``Workflow`` must be passed a ``state`` object and any runtime options. There are a few other useful parameters that can be specfied but are out of scope for the purposes of this example. We also do not need to provide a state object as our ``initialize_state`` method overrides ``self.state``. Now, let's create the instance. >>> wf = SequenceProcessor(state=None, options={'reverse=': False}) To run items through the ``SequenceProcessor``, we need to pass in an iterable. So, lets create a ``list`` of sequences. >>> seqs = ['AAAAAAATTTTTTT', 'ATAGACC', 'AATTGCCGGAC', 'ATATGAACAAA'] Before we run these sequences through, we're going to also define callbacks that are applied to the result of an single pass through the ``Workflow``. Callbacks are optional -- by default, a success will simply yield the state member variable while failures are ignored -- but, depending on your workflow, it can be useful to handle failures or potentially do something fun and exciting on success. >>> def success_f(obj): ... return "SUCCESS: %s" % obj.state >>> def fail_f(obj): ... return "FAIL: %s" % obj.state Now, lets process some data! >>> for result in wf(seqs, success_callback=success_f, fail_callback=fail_f): ... print(result) SUCCESS: AAAAAAATTTTTTT FAIL: ATAGACC SUCCESS: CCGGAC SUCCESS: ATATGAACAAA A few things of note just happened. First off, none of the sequences were reversed as the ``SequenceProcessor`` did not have option "reverse" set to ``True``. Second, you'll notice that the 3rd sequence was truncated, which is expected as it matched our nucleotide pattern of interest. Finally, of the sequences we processed, only a single sequence failed. To assist in constructing workflows, debug information is available but it must be turned on at instantiation. Let's do that, and while we're at it, let's go ahead and enable the reversal method. This time through though, were going to walk through an item at a time so we can examine the debug information. >>> wf = SequenceProcessor(state=None, options={'reverse':True}, debug=True) >>> gen = wf(seqs, fail_callback=lambda x: x.state) >>> next(gen) 'TTTTTTTAAAAAAA' >>> wf.failed False >>> sorted(wf.debug_trace) [('check_length', 0), ('reverse', 2)] The ``debug_trace`` specifies the methods executed, and the order of their execution where closer to zero indicates earlier in the execution order. Gaps indicate there was a method evaluated but not executed. Each of the items in the ``debug_trace`` is a key into a few other ``dict`` of debug information which we'll discuss in a moment. Did you see that the sequence was reversed this time through the workflow? Now, let's take a look at the next item, which on our prior run through the workflow was a failed item. >>> next(gen) 'ATAGACC' >>> wf.failed True >>> sorted(wf.debug_trace) [('check_length', 0)] What we can see is that the failed sequence only executed the check_length method. Since the sequence didn't pass our length filter of 10 nucleotides, it was marked as failed within the ``check_length`` method. As a result, none of the other methods were evaluated (note: this short circuiting behavior can be disabled if desired). This third item previously matched our nucleotide pattern of interest for truncation. Let's see what that looks like in the debug output. >>> next(gen) 'CAGGCC' >>> wf.failed False >>> sorted(wf.debug_trace) [('check_length', 0), ('reverse', 2), ('truncate', 1)] In this last example, we can see that the ``truncate`` method was executed prior to the ``reverse`` method and following the ``check_length`` method. This is as anticipated given the priorities we specified for these methods. Since the ``truncate`` method is doing something interesting, let's take a closer look at how the ``state`` is changing. First, we're going to dump out the state of the workflow prior to the call to ``truncate`` and then we're going to dump out the ``state`` following the call to ``truncate``, which will allow us to rapidly what is going on. >>> wf.debug_pre_state[('truncate', 1)] 'AATTGCCGGAC' >>> wf.debug_post_state[('truncate', 1)] 'CCGGAC' As we expect, we have our original sequence going into ``truncate``, and following the application of ``truncate``, our sequence is missing our nucleotide pattern of interest. Awesome, right? There is one final piece of debug output, ``wf.debug_runtime``, which can be useful when diagnosing the amount of time required for individual methods on a particular piece of state (as opposed to the aggregate as provided by cProfile). Three final components of the workflow that are quite handy are objects that allow you to indicate ``anything`` as an option value, anything that is ``not_none``, and a mechanism to define a range of valid values. >>> from skbio.workflow import not_none, anything >>> class Ex(Workflow): ... @method() ... @requires(option='foo', values=not_none) ... def do_something(self): ... pass ... @method() ... @requires(option='bar', values=anything) ... def do_something_else(self): ... pass ... @method() ... @requires(option='foobar', values=[1,2,3]) ... def do_something_awesome(self): ... pass ... """ # noqa: D205, D415 # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import sys from copy import deepcopy from time import time from functools import update_wrapper from collections.abc import Iterable from types import MethodType class NotExecuted: """Helper object to track if a method was executed.""" def __init__(self): """Construct all the necessary attributes for the NotExecuted object.""" self.msg = None def __call__(self, msg): """Update message and return self.""" self.msg = msg return self _not_executed = NotExecuted() class Exists: """Stub object to assist with ``requires`` when a value exists.""" def __contains__(self, item): """Check if a value exists.""" return True anything = Exists() # external, for when a value can be anything class NotNone: """Check for non-None values.""" def __contains__(self, item): """Check if item is not None.""" if item is None: return False else: return True not_none = NotNone() class Workflow: """Arbitrary workflow support structure. Methods that are considered to be directly part of the workflow must be decorated with ``method``. The workflow methods offer a mechanism to logically group functionality together, and are free to make subsequent calls to other methods. All methods of a subclass of Workflow (those with and without the ``method`` decoration) can take advantage of the ``requires`` decorator to specify any option or state requirements for the decorated function. Parameters ---------- state : object State can be anything or nothing. This is dependent on the workflow as in some cases, it is useful to preallocate state while in other workflows state may be ignored. short_circuit : bool if True, enables ignoring function methods when a given item has failed debug : bool Enable debug mode options : dict runtime options, {'option':values}, that the ``requires`` decorator can interrogate. kwargs : dict Additional arguments will be added as member variables to self. This is handy if additional contextual information is needed by a workflow method (e.g., a lookup table). """ def __init__(self, state, short_circuit=True, debug=False, options=None, **kwargs): r"""Build thy workflow of self.""" if options is None: self.options = {} else: self.options = options self.short_circuit = short_circuit self.failed = False self.debug = debug self.state = state self.iter_ = None for k, v in kwargs.items(): if hasattr(self, k): raise AttributeError("'%s' already exists in self." % k) setattr(self, k, v) if self.debug: self._setup_debug() def initialize_state(self, item): """Initialize state. This method is called first prior to any other defined workflow method with the exception of _setup_debug_trace if self.debug is True Parameters ---------- item : anything Workflow dependent """ raise NotImplementedError("Must implement this method") def _setup_debug(self): """Wrap all methods with debug trace support.""" # ignore all members of the baseclass ignore = set(dir(Workflow)) for attrname in dir(self): if attrname in ignore: continue attr = getattr(self, attrname) if isinstance(attr, MethodType): setattr(self, attrname, self._debug_trace_wrapper(attr)) def _all_wf_methods(self): """Get all workflow methods. Methods are sorted by priority """ methods = [] for item in dir(self): obj = getattr(self, item) if hasattr(obj, "priority"): methods.append(obj) def key(x): return getattr(x, "priority") methods_sorted = sorted(methods, key=key, reverse=True) if self.debug: methods_sorted.insert(0, self._setup_debug_trace) return methods_sorted def _setup_debug_trace(self): """Set up a trace. The trace is per item iterated over by the workflow. Information about each method executed is tracked and keyed by:: (function name, order of execution) Order of execution starts from zero. Multiple calls to the same function are independent in the trace. The following information is tracked:: debug_trace : set([key]) debug_runtime : {key: runtime} debug_pre_state : {key: deepcopy(Workflow.state)}, state prior to method execution debug_post_state : {key: deepcopy(Workflow.state)}, state following method execution """ self.debug_counter = 0 self.debug_trace = set() self.debug_runtime = {} self.debug_pre_state = {} self.debug_post_state = {} def __call__(self, iter_, success_callback=None, fail_callback=None): """Operate on all the data. This is the processing engine of the workflow. Callbacks are executed following applying all workflow methods to an item from ``iter_`` (unless ``short_cicruit=True`` in which case method execution for an item is stopped if ``failed=True``). Callbacks are provided ``self`` which allows them to examine any aspect of the workflow. Parameters ---------- iter_ : iterator The iterator containing the data to be processed. success_callback : method, optional Method to call on a successful item prior to yielding. By default, ``self.state`` is yielded. fail_callback : method, optional Method to call on a failed item prior to yielding. By default, failures are ignored. """ if success_callback is None: def success_callback(x): return x.state self.iter_ = iter_ workflow = self._all_wf_methods() for item in self.iter_: self.failed = False self.initialize_state(item) for func in workflow: if self.short_circuit and self.failed: break else: func() if self.failed: if fail_callback is not None: yield fail_callback(self) else: yield success_callback(self) self.iter_ = None def _debug_trace_wrapper(self, func): """Trace a function call.""" def wrapped(): """Track debug information about a method execution.""" if not hasattr(self, "debug_trace"): raise AttributeError("%s doesn't have debug_trace." % self.__class__) exec_order = self.debug_counter name = func.__name__ key = (name, exec_order) pre_state = deepcopy(self.state) self.debug_trace.add(key) self.debug_counter += 1 start_time = time() if func() is _not_executed: self.debug_trace.remove(key) else: self.debug_runtime[key] = time() - start_time self.debug_pre_state[key] = pre_state self.debug_post_state[key] = deepcopy(self.state) return update_wrapper(wrapped, func) class method: """Decorate a function to indicate it is a workflow method. Parameters ---------- priority : int Specify a priority for the method, the higher the value the higher the priority. Priorities are relative to a given workflow """ highest_priority = sys.maxsize def __init__(self, priority=0): """Construct all the necessary attributes for the method object.""" self.priority = priority def __call__(self, func): """Decorate function with specified priority.""" func.priority = self.priority return func class requires: """Decorator that executes a function if requirements are met. Parameters ---------- option : any Hashable object An option that is required for the decorated method to execute. This option will be looked up within the containing ``Workflow``s' ``options``. values : object A required value. This defaults to ``anything`` indicating that the only requirement is that the ``option`` exists. It can be useful to specify ``not_none`` which indicates that the requirement is satisfied if the ``option`` exists and it holds a value that is not ``None``. Values also supports iterables or singular values. state : Function A requirement on workflow state. This must be a function that accepts a single argument, and returns ``True`` to indicate the requirement is satisfied, or ``False`` to indicate the requirement is not satisfied. This method will be passed the containing ``Workflow``s' ``state`` member variable. """ def __init__(self, option=None, values=anything, state=None): """Construct all the necessary attributes for the requires object.""" # self here is the requires object self.option = option self.required_state = state if values is anything: self.values = anything elif values is not_none: self.values = not_none elif isinstance(values, set): self.values = values else: if isinstance(values, str): self.values = values elif isinstance(values, Iterable): self.values = set(values) else: self.values = set([values]) def __call__(self, func): """Wrap a function. func : the function to wrap """ def decorated(dec_self): """Execute a decorated function that has requirements. dec_self : this is "self" for the decorated function """ if self.required_state is not None: if not self.required_state(dec_self.state): return _not_executed s_opt = self.option ds_opts = dec_self.options # if this is a function that doesn't have an option to validate if s_opt is None: func(dec_self) # if the option exists in the Workflow elif s_opt in ds_opts: val = ds_opts[s_opt] # if the value just needs to be not None if self.values is not_none and val is not None: func(dec_self) # otherwise make sure the value is acceptable elif val in self.values: func(dec_self) else: return _not_executed else: return _not_executed return update_wrapper(decorated, func) scikit-bio-0.6.2/web/000077500000000000000000000000001464262511300143355ustar00rootroot00000000000000scikit-bio-0.6.2/web/.exclude000066400000000000000000000000541464262511300157660ustar00rootroot00000000000000.git .nojekyll docs CNAME LICENSE README.md scikit-bio-0.6.2/web/Makefile000066400000000000000000000011721464262511300157760ustar00rootroot00000000000000# Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) scikit-bio-0.6.2/web/README.md000066400000000000000000000004561464262511300156210ustar00rootroot00000000000000scikit-bio website ================== The scikit-bio website can be visited at: https://scikit.bio Previous domain names [scikit-bio.org](scikit-bio.org) and [skbio.org](skbio.org) redirect to the current website. Guidelines for contributing to the website: https://scikit.bio/devdoc/web_guide.html scikit-bio-0.6.2/web/_static/000077500000000000000000000000001464262511300157635ustar00rootroot00000000000000scikit-bio-0.6.2/web/_static/css/000077500000000000000000000000001464262511300165535ustar00rootroot00000000000000scikit-bio-0.6.2/web/_static/css/style.css000066400000000000000000000027651464262511300204370ustar00rootroot00000000000000/* ---------------------------------------------------------------------------- Copyright (c) 2013--, scikit-bio development team. Distributed under the terms of the Modified BSD License. The full license is in the file LICENSE.txt, distributed with this software. ---------------------------------------------------------------------------- */ /* Custom stylesheet for the Sphinx website with the PyData theme. Instructions: https://pydata-sphinx-theme.readthedocs.io/en/stable/user_guide/styling.html */ /* Refer to doc/source/_static/style.css for comments. */ html { --pst-icon-external-link: unset; } code { border: none !important; background-color: transparent !important; } html[data-theme="light"] { --pst-color-primary: #239552; --pst-color-target: #F5FDC6; --pst-color-secondary: var(--pst-color-primary); --pst-color-inline-code: var(--pst-color-accent); --pst-color-inline-code-links: var(--pst-color-primary); --pst-color-table-row-hover-bg: var(--pst-color-target); } html[data-theme="dark"] { --pst-color-primary: #72C093; --pst-color-target: #2C1E7F; --pst-color-secondary: var(--pst-color-primary); --pst-color-inline-code: var(--pst-color-accent); --pst-color-inline-code-links: var(--pst-color-primary); --pst-color-table-row-hover-bg: var(--pst-color-target); } html[data-theme="light"] div.bd-header-announcement { background-color: var(--pst-color-target); } html[data-theme="dark"] div.bd-header-announcement { background-color: var(--pst-color-target); } scikit-bio-0.6.2/web/_static/favicon.ico000066400000000000000000000013761464262511300201130ustar00rootroot00000000000000 ( @ (/?Rf}ۓۜ!!PN! !n0"q !!0\QV!޾1an&R1!!!! ! !`!> !!!!!!!!!! !!!scikit-bio-0.6.2/web/_static/img/000077500000000000000000000000001464262511300165375ustar00rootroot00000000000000scikit-bio-0.6.2/web/_static/img/doe_logo_green.png000066400000000000000000000373521464262511300222260ustar00rootroot00000000000000PNG  IHDR^<.`>IDATx]TG~bC{Q{.vEJQD"  KIiz_l^h4ys=f~{^c9Ĵ4MaW4s{E֭rӬ[Lo\7ِ9.zukEYVʓ3v~"(g_\ZzUdTjbÛen3,c%"q q6h`#l%F^f %M2웟QeQF`a9o6~_abOo3%eNQ6i vPicQKkXA~9:OFdKGq1`{e+6Vΰ(K7/|/)9ݳI{ʾLs .NÌi PdAFl F6N6Ƕ,GlNwÎ͞5-b'4VgӌQgzTn۳/ˠr֕EY-3 ڟ۪,C)ka]-Ѡ%V=lCy4ţ#xpVؘ`L+zcsxD>(/iE=A#L%' ;Ed((?SVݬuŁmYj!NoFzs،qER >\ Br{M n#h^D=Q? [aۮ(Ox v#"آ4׊45T eQ7}5f[UF %c|/hZ+ hn ̌.7ceť>(ͰR`7q$k,G&jaB3(JPߕFNr_dFI52h0riQb2Fx|0Z!E.M,3d%ZHՍx\;W(qzsLtܣaT Iķy4F)&v&k.HM`C+btyF{eI}(Q1AF2 {=qbtQ+ mlcJ̺YA9J9-|vGq[焈e!x|v؞mI ! tKj  xde!%ΖŹnXkI.uAy`<ޞ#8}}C}-E'2|3Www^-s">l^ 5c=2ҝ'ls7og]^2O\Kio聀_ 9s|$;: B ;UlF!D!6pg5ǥM> Lpk56؜>ӭT-0i;NwĵÞW%!ط+;p<,=8?=zHHt4o`_>\\{n* qaK;)1j09~.A}M`m74ݔ*=p -}L蚆ݔgTG@I|Hp )㋢Nrg$m7%>MFfӃơA<{(cx>Ku΂6x&:4?ΡO0x12O#)Ē8R?TwwTJ4_uU$`tZK xU"ȳ1(,q,)J7i⓽0n33YaRGGn/<:㆝yؙi!=%:Ъ{Fi +oi9p"'+1,3mxCu;[yF R Zw™R7j 3R=\o~w}L=/P|.ϧI$<@KvG<47**8; udRjë+.PLc%4Tׄ'7[B/wnR 'K[Oӱ4N+z9uy<=K7:i9d0fH8&bN&=_9.ׂgݳKRUޅ^W,WѾl,c3[+ jcA-]1~;S3Xc' \akQX/"+Evb hlAd2"/z6DK n.; X;b@}|g2P\ ְˆUx~LՎ ùzָU 3)+KIb:+jrt\}xܛkM~T O+[͓<<>bK<~]r J7S.Ž櫪8$"U^B$ՂA,x鏯Ք@< o=Em8B&!j@)|b@+xzK5ޮ xyF+31fa vh֏,Mn%ZrÚajc17S{ ہi] Ok&x$s<{\SpmvXmgֶ%` :~4'ӣ.2WZ! ĸdw+Ħ V|OmI3:'!}=gEy@Ћ}EE-<(2+bƩDbשx9<"@zC5 <i=fAcI Uxc}{3 [g:ܞӍ[RAw`/tgRen((cw,vAvI&vZ89h#cloh;x%߃N6eb^K(IPȤOz@oXnLk,c*[d61GI.o+xt? x]vu5,ϠCSA??ׄt?+z0P/EvkOY?CUn@*ѡJD%}NzJRƵ2ܗXE1Y2tU #k7 cMB;N"XVT/6xIzi?)0,ɋ(XآyX# y0^xewXVݳ^-[`K>*7bsʁǷhª%<u\nM(0a怩ï]L](B7-znBӁփѤs&OgOkd*vLA^PG#k2O/0 7=0t¾L}lKքL놳ya3 79\ÜUMDʢ,RMmӍήB;g׫wP Z|?"> +R4xEJ~B{jmóEFWn%N4Idp1~|vda^)Lbm+:}ȅ-pRE0靋i×\ w9ԺC_62c  ^IP@l+6}$XVc|R~nUbX,J\)Ю07Q/'? 'Rqzty:V26r3EX5j{k,7ݽ&xp ]6h](S/D1ot2BSs`5(C gv \. mbp ܼO S[t}}Vò|u"\0{)~={4Az7;fk5Rʢ*VF`<h3j6e+-Rx*PBS_l=QhN:X.#7qSpm?x`~$QN&a@׆ &Q;1 !_!g/;$>5 }+ ) toEHG\a ũڸC4gѠ6pZGao+,J6*1Ip6wϣ7&36ڴpfV`#):c3W!=T`|˖# a󛢌]m V'N z*DT i 2ݬ!SX3u }b ¤%%%ۑcYF9]iKr]bzLuxII7_H1j: .DyFslcAstvc5VB5Z jMF Q ?a0?YR|$ iWT" Pf%ϟws1")P/bS!=^dV= M3w-+]o! xp0x %xQq|c7p([|cx-\}1&>Yh+ʁolz,60c8 bN6N7/<ܭl[VDdGjN[eEổLGߎO.rsyqU:J(%x(<&tk)ϐ яdX'BpXZN2SoϑHi&-h)zЉ%|sGWCWҽP %^$ )= xxGs"MvAolƯҀZ D-p-< {D˩^ׄIeZ f'|ܹV'_WpV9>_~omǃ%.T~s 8g_0;,9!piQh}|Kļ*4P1Nw3Dr|#` e[:a hboZ#0ļzp"k'5ZZGʄZHZ[qeҒFk<,}k 5֍%yH2.dy`IOuusb"t Q`EZR/XsVGI*l(ჹJRR0($=2?0]$GL1ɏ[]>k"?8/J68e6;t3]c}.kP"CЖ v_z1DC=7 _ny "L\^Pϙn>z"bz.d䁞 ]c3ZGc"[<8/IYT\TzzaْNP~?GF(µn%iz CҒ?H4u^9- At+v~kr)%]G$|IJzKL9}8eE_=(h , Xޕu" ܴ#_[Ǩ=s`Z93tc'`}!D-YsU[#xtH~Q,:uJwm2e[C1 V5\ܢKt`>s=pgݙjt9M{$>ʢ˲#,P7)1N73iͰ6wx"gtFZ3:oW|35Bw:ߴEvh7+?%>*E7Ʈsìz ܿBa53 ׮y"|O'fZdd,P7ubg~ 5G-gQ.-1&wh۾^;uUa3xQ\*nimd˥6EN9Qw{2]1p оW'.Pқ:` |~b a9p e;QAXLJ:6s^n`|sgnD_N6DߨA/$BlY(ڍ7e1l;(_^+ź_$V.%!a[m2k> ҒV?&jŔF9r{ez"hPik<q(׫AԇISpi|ق Tz^G޽%9`Aa>]]NpȤxr~K"ЁPO&N xKdROXaR pEs|T |pXo5 P&_QHTx%^rRdJ2 a!EI yɱeQ ?Ky)%OzⲂJ8Zex~X r$+q. f1z[WlEcؒW TA^.VOųk, =g5dDNw@Z-„TJ`Wg$>Wp-K>Qzym<~a{1)v+I74Ua}%a4<jC04g~i({6^jwz^#^ʎr'x+7#X6xUȢ,irnPT m?ĿDިڸJ}ہ7Y3tZs|8A+NBT0 {: xwD{&ȤTfH-} @7h0&Ɠa=JZyoamY[7M>Ġ/DUX]BMHBv傶؝1l *Mk &mjoRqMT抗@QNynnkC޷|:3aEs&5Zag#.FNA(AZT _۟ ^z510|aheKhևo7̠w  c^ ØnH(-"Te, Qwt U+ZzRyMa+5Q5ܭH⼵ xkmW},AT&{6m.XvXuaY=W4B p9=1#YIHK}f @GLo냣,*Kd\Kz|j~YLݐ{Q? DN>Sݰ*#h3 FVX -WG%3ej&fj.sFuy[W)ƵpBFC=&Gq HU4_ `Wkf\{Lm0)PWVY5ڨs`7RxGN<^oc>Fp 2Ie@[O>vե|unx߱\N`.s0)! <۳@d9]@6F3z/'&9Q})`\ \tF[X鄅iNX =8? :3SC51:۵ >ftd8#g-¤w|R}gOћXf5j/j!o|6 s9H1yI aMT<%4 x,H~+--ũ:?Ƥ1}YWbSt,GZr{| [Z=qkq -r3?D~Ǭw}CGVh8>̧wu+31FW?.<C}6\W6+QX 7zzwSN /oAIb7]G5xZ|O$їp=kpr@D~V%Ȧxk!-Ci׾KSu5sɿ۪j*74)՘IV"6 Eؼ;#q,zh?z0i}1 p 4d?nT P"lQK=uG4y x=;xkK^}+yM/#HZ#^F!u ޯ۽aFݐač 3K1sڣpD'K|< M?n۟;@Yr4u!>ޥ p5%8~3:͐1̭lX;fpk Y(whNemYx ڝ&1PnIqK胣;Q56ąSfq| t,}P $AЛ;oxܻ`9 tg/@wCymj\Bsr|'ʞSk;>g<7oZ|w>:S]q?fvLm. |2ɷ[6mƞ x]6]d+-a.dw5 'ݪ@,a#a|І 7y=8 ne˾?h*f='iSzDRFwEoއU73y Z k ^C9 9#c|k=y2VL_S1Q Yv}hZv]*"!o@P wmDr+m=g>XE\:ʝzh\"W١ 6 W4рP:w#0)۞g{l3[b[Jn+qܘS32BL<}H.\dV܉nw*}IU/+'_YEXV@mr"nxrRT$; |h맂mwx7){BbM>%-/dD8QQd I~a1Gus-a'l`;/}) LB ̈́U99P!SFL3ejqa0,VĒ{X\42v^mqma,a Mp6vXaC{lɳF >\$io@5v}dqGqݕGl8U86/A(.%znåٕIMIKs)Յ0=gQIx;_';ܒ\$_=dY:f# S 62 f%u=6[\-u*ŗ8<A&vd~K=~2q.I1ehx8p;G-$2CZ:aG[1ߖ&h8 %wLr^3[T;Z.M E4D^j50M8Nך/ Wj>\j? ɆƑKcNKdQxeStO+zL~kxo'Oov__믎׺R̔p]mN$*ShzbnJ NV"3 ֻ"! gdI_̟"Yy8dk9s/sNņLK4Ehp+t\{ynh?%ri\,Zw%MZAGh[ã1ZO0ljx_ cV3 7?(BzmE][p\C՗On5I I"yBH%7pߐSOo}y_B57NJ$/aIuR"[HTεVT!"q“ܯ;wt,`f'Nos4O1I oe;^syO {b8l ILJ>M j0,G0֮U36ຬ"LCY!aaf1Po ,@x75D *>dxkn[ j x Jͽ~oAo^2`{H қ ╔A5מ7Gّrɓ.l(lj-&\E.c4;nAͷ xjxVa37 jxતȔ~k݋L06#xH~绌 DӲU'X eU^urZ.]Ksvˉ!«ZFe ڡ{gG@^{T@BFw|  d;~#>eqhnpR}aL:Y6L:vdV(i#c}{L#zP,_{ coA{mN` Dαn[uNJͰ69|Z#}F(삱˜ Kp6'I Ulj`}W75wRc^5AdSR wx)$y'6IK@:h=y\o0Y j%_y:-ZKj ߁ܱ|])T/mVy[a:#hpkO (˷;Ĭ4\]5)i[E)9W4(̋ĚT[熱KseF81]Xjx_Asy] ZxK0i]5/ rY)xOf(x0}jsz \5P`GoW-?\򈜑>,~Kْ{EO)P$i I$(AWoJ *=;߇A#d@r[E 8W.<Ép ؛i"* Ɗ&؟+R# >=ac%f,a&m9tEn|c.xHrnaax'x cImv%ya]Ș |@˖ڢ{\޹k|O?;ZO.w&Yk{|ܒ?w 2ܕrmU2}TՅ$r=RJY犓rTL %gO\ׅ)Ez\aˮ#/C[RiHq ڸ`}_9{w*1y̧`2[G,tAdF,qg_Ooi.N]( _7Bul_j87 3)&^K\RjbF[ #bn]lI#ۈ$_Z$sJ!p[$fs}4bR`ښF $1Q_RuO࡜NcIAjz_H߬+干{m^xiS$pp,69}pޱ:rwYDH&4w!kIښq~V"+ag:"|Z3VZbzCɁd]O5F^(a:HZY(7{Zcq>0υE.رI Vc4Yp 76{SoHYEY*[PuZTf4;qDGvZ wxL kxvP;ؖX~_L4#9^WsT.ؒ K'31-;pTƙc*(x˪&4{$ 8Qfh?kVXh,d%TISƘaF.OhP9yad:jN<>޼8Y#L9ʢ,JU*ԇj&ݢ pֹ`@Gx/ @4[OFd xtv+'=qf4?ceAVeQH)q\5yX/*ߥn#a";d QE5.v!>ۣOg^hضci+gTYF(g?R ҍS6Ū寬;|GjLZեe˶՟LOy[yӎ4})gNYր׏Q$#l=(G%UY1IENDB`scikit-bio-0.6.2/web/_static/img/doe_logo_white.png000066400000000000000000000230661464262511300222430ustar00rootroot00000000000000PNG  IHDR^<.`%IDATx W59KRQ&PL)CxI!H!S7\ɐ"㭋KRWR|a9k}m| 58q8ʏ#x5"?n~l[ҐlCxvrc?^~ ~ߏx9~F~DZ~v?ji˦! {ph;_);Ǐg2سx'xaAяgPd?͏-ҖNCMC.mSqi~~?NJ~{Ga?~̏=, ?<Ґxw`<&q *qҽ~h lM[-F/xoulyf2n$ͱq=FAz[`xIsU׋2¥3l~\+[1|DIygyV p2D֦NgVO e5?M?#U4U̞*E\Xj6i{/&I7Zl{T"ʸQzW~)Azp_Ҿ}XxʆI5 QhəzεԱU<AkcA!\WÔ<}lޓY,B2:CT;سypxPg`\eB/b閪Y=@&xePOy"^~M ЅrfXgXZ 04` 7 ڣ\;UBhOfޯFR9J_IƖV?ҷ8;M*ݺCwwb^f(t{]5Id f}p7(OcOx .ϿBIװZ/d{@7&uFIdl|nLCHMw!*~z@K\EI4` Li(h x7AQ-CKt<D P4*8+:cFs3RQD|\6-/8gz7A[,@TH'`( ,k"x~7e=Ú>0F2sM4S'(cϋ;2yc ؙveЭl[ŴMcH5{*C\g6n"-]g򞃠jY0~&׀1QAqV*YƸj>\ _3'1WJ*# @+sB 0*Ѩgf|PLSҒVx,UQT+@Njw}łp;~e; Dm)ѥ}1vsGQmi m$xid1N]qa|ޢQ :H|r#U|?fŊ?{`Pe++rsQHЯcrf[RIU񭭢uKoBk#ܗuY/PpG҇%*]ݾoe7$A}l*:rzfm823DږoZF_xGݮTslj颮_Q¿Bpɱ p BmxaxϞFj"~2%Xgs@, iLX;9"VNH܇ٛ'HW>3M 0&nAjRG{e/K]~( xK,/u,e]IEk9\qwFT~hIcajh-mit40p=2 !V aWJ}JOQ9cw!㶙%8aHżW)e2"LnBo=(CUF) Jeh $&_pTђAYY 1YQd吆4,!Cୠa 5WR_,M6Sws1?<#%GUIG* CN41 UVo>uwUy7rB3%웄[UZSƢCىPLY5"W`_>_?atBʟ .:xc&؎ev0v%l 6iH7QK?!!ʨY&;*+՛fe]LծϾ1mm]ÁC~Zx㛒b,o#.XK8Ј|}8zWVR4LC hҷSg~ ex(=_af<b;s+ Q( MIk+^Lَ^VNe9uXIt:7 aN1jsE=1UWFig[>_e(]"y2!F\6୪> }[fڡ-M A5(ֱ RO;d0t |gR*| '³pEZiXwC~?fO n}e0 WSP,ي0&j!wu$mZHZf}ʩg8iYsO1VD몖2G6x\aŸ'mkYUh9!X)5l\K*Lf3ܦ/(#kq0W{ #Uj ^5eqxU61 ~/xxRx,/ Js F $;'G (ٸpGC|`R}`kuZ]4?bpq m)5p[>Gֳzdegfo9A/⮎epē.ұԙ -B*)s}ށeӮeԶdOTm'dgZz\rIyFx+> 9neST$ *U1,L QWDI?=ᑟW'm~y +sAaB̻X+}"g;̷=f2 r Ɓ%DbL_/{g=f!՝a[-5|Q hٰ` :T !矁'4h ڡS{ !7=vyZVӡBz?4$YG} ?xݛS~spKtڭx}< m^X:R=኿p;8Pem\)<_\wP=x3| |hQd5>.f S {{' ^N)"( o^9<|_jlTx\X8pW]#bMU9BuBҝڪ?~ebvfMYVjJgBؿ扱e||]"Nމ6)Q:r*T{ZM/t_x78pc]4^`/tfH>˸xQ^  lՒ{16]Y)Rb$J\Oݓ|@BW0}^&7# & # PSESjܑ!x<ҴkF{~837̕ w`˙HY怣De\ܑCRkF=!̡Mw^1rUQ0/t,%5x8›@,zu<42y6"MZ]*h3xGd˼, US7Wnew8-`)~"*L G=eL +eqH{Q# @ ;9>ŸƳ`@NVF˕ۏxk x, KgŔ񱕾C>R4ܳ =5ups[Ņ0B 7v:~3  pGͼ\ggiG`a= win#^xx7ʈxJAcόX|砡"T+bXF6b[HbW ~xqV|uDk4`{9;VseK_ ^mו SD6ς?,w~q}s8k}|L/oR 'j,;:m~.0=;B ĝLԨ\ҳi W hwrwUG2I|2L?~l1Sp&e]Hgù1 @…g $a>ݖ\eC-ޢ bT۝V9ZߒkrX[|Pɞ$ݑ^믑P18qo _xb^$5}pz^cw!b+|Ejk#FZ{];B p&R3J51ɶ]jZKݝ44.! ~w/d9W,KP;QaCb{Z<)u8,f,AG^/ͧ2?bvb=hW^9$GN0ܼ&vC9m#{*$#ѩ v^TAnr'&͵CfdNzC=nj{װ% Mrv~~QY.tU-:8$t({M?MPbfNG-ghT<}c?;8QB&D-Ҟ9"ߚ -?zA#7 #brhz7\oI8Usn7 JoG ^ΑƛnƕFrf:\s؛:F24j%G];LBA^J3]p7װ/BFپ^]@\Q<3޾ʠ[."X>"ҕgΗû.TI UpqU<k`,E&4W>(KP<+'qX *@GC<*tĖ/x/%ϐ&x?1b6x@z]$}+G?Ǻr>9^upJ7[<]:`J7y5yc1d.OFTZe8dy yӡ@YPR!@Y"8J61yIb(\Qna_T,`x%\+V*;t=v' x?WN=/)_N:Y`Oܾ!w4+ċ0s\!֊;epV xod¶*"5s|U6p]l=9V[k 1F>=SK5w /P'`JŔCvӍW&z)oÔgɊ6R i+ H86>y/*m=siV݋1ճTcb-MAW21Ƙ3,ÿ~Kupxr} _ ;M;N5ѢUyƨSB ;I3*) nxHIn1<$Ei3jXWьd~ 4WpҢ&܃;Ƒj)tK;v:5LkRRpM':xol/kk x%\HU>OJo$!&$6.|H>@KũF-QjbMuᅪ udhlxF1Í0r>_% ˒s{{+E$%4G,OyeW͏aTAڀOuUaBα}t;{jV$,y_0!m/4 hg6-$=6FB^ow W+#S7H5Q.f+/ir\E RYy֡NSy \:/Z1>5^J!S%xW0d~y`^Wnvϐ8>a`Fm $!e`fn͜d8Bj\,o^]y1nI;ʏׄK YB#= js=txKŹ~5L@• ޖhqzl!W?A;;X{q"Ow&c z'K_T /gQGXXGӸ3+ǤTR@{G^|C 1ִ,I~^wfŀ5p٥V ᜗_VQpa_6+X[k#c]%`G;^zq Db8Wf, +d;2 R)$0%G!y$ 8۲=944g|݅o/~CYRƧa#2WoEevo ||=DRy*R׿c'6Zc{u85XT=\5Ȑ2\Oe b@tC^j c\"&A,ZETz@b#X+*TquG#nkr «afGp ĵV2/[G:TQMB$c/87N7X~Qi%Vp\I&ȿmS(;/ hy<{-RLЍHBσնE>n( AJ % bWzOEՀ״+ziEoGV7k97fo p(LonI %yhG@L;:sıx?~ c',yFx>xR п|OqHQ/o2rs|Yb^SצJ 'Xy{.c\.etbnV0ke8x㾨qbaj@̪F&wYs%<$a\G4J|I J9x=TV&ZQom)p6h Ϋ}q, tš@}KtriD=Srk^h:ߔM.GmV2gjU@zI;ȡ#VFX~nN_=B[ɂ\<^E^C^7 M WPG,d^n,tRlĮE!P!MPPƱ7<IjV#gf~'%a׺ɤ_O^6Wr|I#΃wfUob/Z9G3E{ħ!(> tcz'K$ D Ș\pKp"\ C^g F ];ʅ-}[Xu,m+$4ҷҫ_)?'td;B1PR~\W[noe! )wrcf1]TDGe`{@¦IW-m4!9yO%P;\<kT$ak@+eaڂiHCVOOm"+K IENDB`scikit-bio-0.6.2/web/_static/img/hmp1_pcoa.png000066400000000000000000002177131464262511300211270ustar00rootroot00000000000000PNG  IHDR*$9tEXtSoftwareMatplotlib version3.8.2, https://matplotlib.org/ pHYsaa?iIDATxw#]?JnuarޝMb1^0gp69 x,=c~} rq\]tN3sK꠬|PWV%zgUj^;ޒB`aaaaaaq#XXXXXXX,ѷGD} {K-,,,,,,ѷGD} {K-,,,,,,ѷGD} {K-,,,,,,ѷGD} {K-,,,,,,ѷGD} {K-,,,,,,ѷGD} {K-,,,,,,ѷGD} {K-,,,,,,ѷGD} {K-,,,,,,ѷGD} {^_Ž$IdYFQ$IB, K-nB4M#LFMWݎ((,[( @B!dM-IBt]Ga-,,,,vkoL&Ma70Du!BTUexxKss36 f-,,,,v%!પD|D"iHD2$H?aŮ`u]$ق,gD&IMfZXXXl%e%]u]GD8\ 0""%e(鱗 v5,@k`aaqbEY0h3bB5"^}{/0;-/uD"A<[Lz=` v_B8Vk׮QWWGmmkDߢ$}bBv n m"  QUUx %]ᠱF8``00x*sPUUp8MMMj<&ZXXXK-L4McffEUUUbǙ˗/78Ninn YTU󙋀};z"L&\baaqg`EF$---f]Ǚ`vvJΟ?wn7nVD"s0==TWW׻/wّ|A"¢,ѿuUU3' z{{ 8p,wJǃ!"`||I2<BloEE>,ѿGI7bKuu5.]bzzSN9x^^/N(" ~:PSScZn'{V FZXXXl% zb[ҷ:?~n8ɲLUUUUUtvv:kkk"ƍܼy3(r vȵ0qF$ {`tXXXܻX]}b!H裏RUUU1q;Ie& Ԅ& 233.+c s0d.H@(vz3,,,,ѿG(444n2˹SȲLmm^099'#n|\7n`eYTh-,,n,ѿ(>_x_uFFFԩSmy;a6z[n`7nKv ܋ zOv=g$#laqc]$H$BOOsE^o( -HQbX[WFAu>1"#oT꬗+???O??~|[!ۯ|۝XN'MMM455:=ڵk$ »-[3}f:w#߅J7=iCCCqc bq\҂"-͛hfBz)Fk# nEl{_ B!zzzeK.QQQQ1~1HDEEeřanFà KҭtaBe5^x:::8zh/~>"(Ȩ|[Bh***zʕRjX,fXXc-, ]ұ RNC8jkkwn}dYB4-pnnaNg"t}!UUe,M h-,,vKPV)Ǚfxxm;!088Hkk+$133õkרn 1HU fܳmc}ȌX|],ѿe(Jd~VVV8>N?NITWW~rP+B]]uuu@[Ph[XDtMEihe9A/fP ===x<._lw# fq%\.9rܸqP~{lD"a._N$dd|fn's mt@Ū2!l{oLA !`ttGՕ8;!]gggNG,ܹsu]ϨrVݢp8hllH-{6<m!aTWWsUn555OM.ݥx8p@[=$)#xJZ܎EPWW˶ "{ x7byG85McxxY9bKTȮrO&IxI;n s0sbR(;p82Zɤ $ZXܓX_ZZΟ?_~$K.*7n(帎\7c^ `ppmb1meOSe*̙Bq8̕c =:fwv:XXol.¢,'/*LMMq)ڊ:N'N@eIxFn{||ifff̰vmmmRR(Afffre:Rwi"QgH$f-{K~b$^.^-u]gxxMcw ٥ZWNsSUUE `yyׯg֖.vG9Ω( t@UUBk݀%{~v ;#Gu<gϞ-9O]N?Ӄ"BD)|Ḭv} `vva\.(vPN&((ap}znYL:Eqt]'Hlh-,,#x<$'N(8CCCaQav@__?~MkkkQǀ4O'ư ^__' kkkD"3P]p9vQJ*[`I2<*Jsuߤ߭E~]sB0==0]]]9r$ʵu_~UUKj+DCDe|>>g>܌@uuJa/v9wZI=ka}}`0cccm555χiF ]dO]{cX-O+++?޴E9Dee!SNTU]qTU4';y_tttׯ_'PYYi.vZRw;(gxBu(hnnapnnn.[`)%°"@u8X Y>ƥ[CX_fJJWVVpm{K͇kll Μ9|Sv]t`c ^v^tfODGB3KQP߃, Û-,J{_,$qM9~8%~<x<΅ 2vGwebb⩳ΌnFVw*dsn7|^L3[:ŃQFAᲘ9Nߢ,9O}nSUñ#bb1TUG*$顮ozBX(}}}$M UOv;|0d|8glp^n3WH{vv{ލjʅ%ӣM tnfZXdc~Q:ucaoY9z\~qN8d' ͹.]*iW -sHݮ(Y?p8aff/ͦW Nv{799 `޻h4+7 XoQ,/p7|%ԩS̔qz{{FFE)$`rrQdY; #s ;P(D (t3`/zaJm9^n]7TAs^P{K-ʃ%XQ,,z]MsxD~UU' Cqʕ~?5w |FrS Y"cccfq ]0n]P944iTTTyh@>3ø]Dݪ=‰'/q9Bnܸ7.,vop\f;NuiSH=f,BvBَw 7of7鑀{߫VTTpAҧmܪ(js8\| rBww7UUU:t@ PqɵB0::$Ouc˿?5p7;nt]gmm`0<###mƠv󼵵|D3<'?Ioo},@{{Z{.BTVȱbݘwQ:;;ֿٕ|[Q>?ϦTq"(2TWWs¶NԝVСC/9,Y頜<|2Ο?oJd2I__kkk`R0k( =t;ˁ;[-;ubtl_ J(䳰(o{_ Ӄ1{K=V!J.]-T'$i㌍qر  T̷ q5fggKP {/٩X,fNPU 0Wߍ;}SZܛSoޫZt8?YQU_~Gյcm'Gybcbbx<I({q颶FKK ,ߖy{߉\.ZZZhii1\) {oN2ڭ){p)-mH ~"```~iZ*ۉ~zࡇ|ﯯf ّb'322B04*A]?ICCmp'!IÁL@ cccl6L& *-b [{B%mˇeo )nj|bJww7˗/ᝆ _Agg*xvzTJ. &Mb4 IvMED?xd2}}}g_r㬭ݍ."r U.CUUe``@ QOE1۸Ο?@ `Vqkfp@N"`/V5vUp8L`H-Eh2u5 Yel+lgbt]gtt)N:E[[ۦהSUUejja<Ç~HOogΜ%v#IX_|^RSәQ3ngZpYU=1ncY^^Ѽbue #pݴ֖e\SSڭ}k*(w{_N?Ӄ\xq/\)\!H$dcP"(K$[r% ;lh;ry^^/&''2#wSU0<_ٮ!HL&-ѷ(wN"(YOQFpndYҥKǔ7F.//PQQ͛7>N_O#<8*W^qE"l;u3¶nu]l%x#.x||ϵkH&]pMM͎'p [;NpQ_!8Ðs)9ѻ.]rr.F%[Nb pa|D"aFL3Vp~WN Eee%Dϴ 6ŤB`ED񌇁S;88j/bw#Gדk^ j}BD)'+&PUۍ$011,_Rw;SsWQ-킃`F$3`+"XjqgrG~b0 kkk p8|r݅i ;wn pD8 @x4')8۽h4իW3L}ُ;p8hooHA :|wO~nٙ񹚛cxx˕Hn|gߋvK$^< ) Btwwcٶ\`ND"R.LOO͜Μ9 c[ 7np N<Ɂ>Fױ_Hs !LG7xB~(+'/uuuhvvu.Fl6&`# ˿,,,t:y'yӛą `_~z8MqŰBOO]vBvbky&gΜ\-{F9 8eO$***7Cn;#ݯ|ƈ@Ja/wbי gg?// B<3/c wN?ׅ|妲.!9r籸wWJWQ3_ Dܙ>#TWW5L&?77GIŊ裏 wrH).9Rpkhϝf,LO-\n/xa%n7/p8lZeKٳg~{}!zKin˗/g (iCCCq}T#=~ 80/εkט)N]ָz*~)\.W(vum&nE9s߮ׯ3>>l9 E!K/keSu%Iz{{ B[cXb%DeeiYv!IizIn7+++u7JKR4MۓB [,͞g[<K,[hlB~?v;Uz\tiˑ; 紐pB| <###TTTlꁿ|{!z;iSjjjJ^TN( nCղgQVD{﷛oii>9|^9|ɓ'wtTU%]6.VSܲG:ΌT@ﴝ~>,//399rIJevp$P Ov(SSS:umϽSяD"twwpQK>Vuei v+J 﫪@zh4jJb1^ `H$@x*###ܸqc_L+'ܺ2\Ҽ/UUU[^8.zu'!W=@MMMT*¢Yx/V9H$BOOqł;}'NX@p8իWE>$>Mcc#N{=3339rEQ̢ |w/y f*svgc ߽ɼR٭ӷ(7{^ȷDz~~~ZZZ8qDQ+lYf0333>}4 *g}@E71)LNN222‰'hoo/8鶥DoRs޷r*8NZZZhiis/--166n77~h]ӷr}Mfvv3gܼB4]Mrħp򛙙ٳ%m'(BFGGٳc9t]7>Chc;rpLsss|qq큹FWp7noKd/wΙ>QRTղgq'g_Liӝ,;j+F룹9gD=F!nߴ("Dbؖi5׼fD!|ב7roNx p7EQ:_~G<g``MӨΈzkvn'Ŋ(f$i;Z [}ӷlq)$$^ xiZ[[s\;ex<;jU^ŋ[`vfk,;S 9l'?8ڥGmƜ1qnn⸥g333f= $Ix^^/f˛8fK4*N;rz$+Hh +oQnmx}}\#IRzU455qmtFԠD"Aoo/pG}D F⮵iF?@~jfffh]YCL&(k׈㬯_wg쥷bI$ɌTPvN2Kkkk,,,ϗ4/`+LZX>mtuu3JM?V9$IZd#0 ![\; ;m~3mmS uY7UQ!$> 5o|v.[- 1 BYE~zÇ9xQ垌L&ebb'Ore NC8Mv$C#{RU~VVVgNoG o rۉΝߎ4I$! 2??<쳷cNsnU?1SŝϞ~"u,8<.\qvb=77G?=ztˇNw릷}CCCɂ[ ak,^u~7B###񌢷о;T@EECCC:ujSDŽ8*GJ:{%~裏J]ŽǞ~,瞣˗/@?\I|vDggg Y B%'Z S()NfTAv/|l^ Gazz`SnvX]Gߢ+&oF"-C垎722Bmm-/^,v;6Z`Q' C,鋖'O!kkk~3]YYTTTv=/C wvQ0&~? rϊelG|Xoύ7enn.#]&B墵֌|,TTTlr|wN?mF*`ee$w2?3;uPMv3!geee/#rqǎ#ߩﺍB>P n+e r biL/r ᠥe|>~:@ߋBϙNo}Kw3EqBo1Ru9sLjׂWŁ^nk!>bk CEZZZ;Bm6x< P(!.N'.]bhhr errg\qXTAʱ7/ :u$N pQ;tu oĸ[$G(5CJ+B[& -Tq'@%V{ηT5V-A$)F{1O i6466n~!l3iu;9v9rIV.։DbXC ݡ !cbb"b$ijAїMaYr2*o{1Cs YYY!099YXMbnM&.#QK]# N#oJq𾑖 "Lo/lN?z(E=`}]onnsΙPA9zeen|>/^,ivbL&!lIx_u_œX {^^/fngXn+v#H:׮]3' Rm^L}xH,q$H:Wa{XHzEbw'IX[[׿ tw/nZݖ 7ѣcw=Nfx{<ŎW^%Hl+#[__r|3v{F^2w:455122ŋF~3"#~ogN?,#(oAbAedti:m!±>fyyׯحTƝP_,sssAb,z9p_{'qGm6rہ/(T;^h?MMM%+ח؊Rkkk\rjN:s=W纆i~7Zo'{,7cX# ov*n"JrIX1i27h:8.n]ٙ`|>>/ux<!adddƓO>SO=Eww^_ʝӗeh4+‘#GJl'wD6Nuazz?q :ġC˝v%o ~jj !n (uAY{a ّ^ݭp!x ]¾ﯮqrh//XH N/G'7\ˮ^JeeeE> rDQe:t(c0N__B텵p20kܭ nKH:km*$!$͗u^fz<xᇉ&*H7~4g|>G?e}Yo׿Nyag_'o6??i{jj}C<Ȳ[V>O(U[˝v722Çihh^(5nbP(&u288hKiw-VyKe/c \@<|v&6r ތ"ۑӗ6$}*% zAk;bxi.㡲Nz .lJW+466O?ɟ o~OOs{1<x<*?s?OOol_.'I<IvxXUUռ[QN?r>@]dN9D?e[h/r弗٭F[ 0$E!HP]]]>\" t$}IDV]lyi\fS1@i||܌\nU/Fu|ӿ ,Mtk>?׿]3'> s?s/2/"'x7Aoo/㴷>}W^y~lݸn._lČ/iei)2Da]r7@;}>|8Cxoz衒 *4gΜkX.ѿ]Y(coGaAj~ӽJJlɧS 9`AMY42J\E5h.X[2PZ]]ͨ0p8 p׉т^ |;yw]^x}{_߹v??o/_OO͟p)v%B0==pp!嚞( h_||lgX*W^5wwVVV^KT=ϣusxg!ݺ:3{rp8hhhL322}/$+kHXD@"u2ƮD%T?l| M~ ;urazqeY6gv>|8#z444ć>!\.$q5Ο?LB*TRl\.?8?8Gyl)y碯*-ی/U9Lp4MczzX,ƃ>Xr N?ݍi9})Ugfftuu}m’ޏD"\zVpDn ([cL¿#k"0JwM4^7wHL#T>I$T 8 I*:dsv꼁~ώ?x߈o~3ιsvtfiiZ5VE݅ 8N:SO=:sI}YsI6wO' y3fXxMp:;|ZlА8|VJ'߁?z,.\$Μ9Ï/| ^y-q/y衇OT>d6>2.{~>*++yW?; UOseN'555گ??yӛ_o|o=y'b7~bO?E#GҲCivFYkk+---\rc廮NǏ^B:ݍ;kĉB0[:===HD}}}NۋPnnt̫JKK ~i$I2^WWWVsoYAxGh;j.\T_BKG P@$ *!I,q7>Rub۾v˗/s岟c/yQp{^}QׯL&ioo?g__hkkcbb'x??_<ȧ?i7SO=Ň>!^eG(Z9t]gllI, J>]w0,i.@okjj8}NN("LѐNX\\vN'`0Cҋnwv!r9PN9́bs/o# ?BBQ$Ytҭ\w z2zx堟]T{ֱ;ֱH88R\,x]Ho"RvqLI/k*ȣ\zCw\]us:k}ӿn'|'|-xQ);xNn\m^j K9fifFAy|>_Ơt;|0x!9腯'[2 f{@E/n_,sׇQ4rAŸk WI*BnAѮ!4u {6E\YˆO#jrxBc=b#葉2:Be (PG6 Sc⾻ք=;=BLLL066eN='f̍J{gq^\ti襄ÑlXW"I/^,( t:ppMQJ3 p7EI~XEES"CϚ#cHr;Bڟ>b(p#;qd!#f>,b,R%HloA` <8Q_E BtMCudIۘcGPy{*?7Br!vo gEP d2I?ysh'lr~'7JV($uFFF̙3E Ep;\4$$TT!~ <:frleN\ @%}o >!#sy{O̝CHM!Hb"Є4nP7e$!eW_EST$[%2 $=.RN-)j F=ؔ5@FP?BфVw ޭ{#6-o!WQQm/$I;Bww7>/o~,b===P }cb<@u8zhs TW(@Ѯo ]>",FY †I-*!7#is 6R?B Cs;@ $"j$V@N o\Mqy$t4{-"$RW]gaQn\lm )0P͛7(+t<%ֺ3889 ٥srNI1nWUU?K:N!cWb E$ܵo1d}!#&myw|z^hnnH$s=g.jjjBnfS!UЗ߱NP/S$E(7e4c}HjIZ!dGQ i!lRa\~4ѴI$b\@ K86$ Q$=L$ HBo2BHnCP޺ Rs/EQ2 ʾv BWPk KKK 5(Geb//^4PZ 'v3??O___9UU5:J١gG Hb zZG'>V|H1F~O{ ş0lܼyCe ũi$Vt4>b_FO!u+֏, EhDYDCb!5lSvIKHBES.+7lr+H[^$*:z'6тY Q$oU^_5 N4AO 0]3bɗkH6p_haQ$wse ( /_.'I.]tqhiiԩS( {/uc-~G¾D_w}f=  lE$ʕ+8N.\/|Q)dY6koZ( vηDr L&59_>:C%(39B, /9!ܶ!y IF Pȭ  d} !5!BB'@i@;7 |R;Hu/ hB"DuOCbΆ,A{M$>ĉq00.>E1VӴ)D"&6ZX%FNaa>8~xI9Bzii޼6;樬̙3H͑Yg>ōn d|oڜדpRm) y@a~ իScl/wEVQIBNYiSv9R|i#*4 0*w;_sΗ}/].Wƽ0dmqE}*uvIH DAͧQt ۿ)$1]Gў1׵HH8"!#R#žjԮ 6Tj>*q:vɅFFF6MQܭղg[`7 ٳ3e;O#{)ڶ|Nq3?SMvsc/ʬ#)r . .ڙKH"uH5(}zaof-p7mx$ BSS~*4PQQSM#t@"LLKliNM38Oww>w:pMgxߜ$L`\Iic]䮖6H7 ڪN( ^#Gl|^Fp~Dbކ{Zc+|׹:^ZxYǧ;,&qvtx0(e/( mkQ>C EGBE#Q-GHȒRb?e܊Sr|1TQz-DtYUeUT-1o3=%IJskDH&48ؼÆ1ףT74H˨7]5b#)Е3_Qo#k/o l9AC J*s桢NGGfHx^sP]]]p_E髣?}ŕo^~8+q/^رc?d``___2/bK"ؕڍ`E?(ݫ,,,R6|vvUk+ҧ qib~|w,@͝k!ß/GQ\Xf@褪+P(ĕ+Wz.~>r~&''T*6IA6 H μALCG+sD-fo #vI!)4SIAd"IU^l sH$?ZH TTTT=ƒ${rZND}$CHU[e#yd}!"Սϵ}#o/֮gFF`%R5܎f{3B.4O?;]H$LATU5kkk|E髣J__?"?G_̢̎x>ootuu1>O|<7oޤ=|l\"L:<ssse;~N?=L}93|W(i~?Ͽ+_+B rZgVv*߼87[룮iX>֓MDOO9g ߃k9hp6u(«Y$06F E&%8p5^3; 0_l?Y—-.ʼn[leN ӕ/Iz 8!Yvw_53sB1Qۅ. nN|y+JEhrEVイDdIQĵql# $2$oQ8d+;\9^GSet6I]/y+Ǽguer6{ɰַl&Xefz~}k2NW΢^[ád}H ʍݾJooO +ѥ&T s圸`x$I:;;D"w#ձ~6X:6Cv6$Q?ͼ]V[bJ{& 2 г:<^ ]xAEFvT$ZޞJ[5.ܶd}I_@f @I,"†AR]Rņ I"tRjO9m{&|_ _?S?>9~~GO|-,;z_aB_W(GA$y'6|hh5;/w-oy >~QYsoLl9Sw* 555ttt|,\!]WYgi&R#did :+] =\uZp?SUT75zDx98</0#7D,4uwk7jI<ʭ$I.r|;Gk /ӳ:ŗ滱I2 P{,V8T j^H&pq+.! kjt&_l8"mjfVوACXd+4 sYRA" %@ގ.u!tHZҁ I,.m;7X!Fcٸ<쳬o~Çlx6, j5xyHq5QWW?Χ>)>g;nx~̞~<^;v,o[CW^tr!T,\n/x~<ޣffO-1;жM~k%CL4s+^+߬὿]!Wpz8p;l%چC}qјdI2PRL[5p+Pf// ="#gKE ˋF\KVf&4Bƈ$7%ZdliYTXV4HY٨Fq75eKMKIJR B"ݓd=u"DȝA@/#_(2Qd_7['W үҥK-oѲmGajj~<|K_|ٯqsw\<裷layiooѣ͕00}ndx$KS~V|D"d)d:/ERKE5w~(6]:-K!}yZk90.hkmCwc(f7?H$uUUsFB2UkvԆ qԤʫ_3=(v#'hj`gUQ)D!Ixisls%){[8id8dsa|q]ŭ8p*vzA\#I߸f? Pem<=Bg܀.@ѿ $V]y +ڿ! YN !  EKT`CVIveYvKNjRe+",fЕtέ0^BM?k^ѣGyWy' ~?<Nމx;?yG>|]B{])EַŹs6CuFGG7Y3>>ŋKnI>}֭C3 .2vex4B—|sPI& U0D=1ܗoK g)TӎIUM%*w'_Ǒ]~mcO.t]gaaA>l:gD}r=l$y:pY4wʼ]^;N"]# 3=4K,CGGMh(6ZOqQ?ʟM}D ay`0XAX)#S:f `(=їp)v| NF)^Y`6G/JV(=oǥR~1}ѴG^٘O+)I,cO16DUғW* ;ڐ0\cؔUG$?2|CD"K@D$bIn#+2Tp3x?~{xzixvw-oj8M𶯸!Co῎~!uHd}#U`sdnYNY:Hp6~2Wgs *v;^|oMH?0htTqfRu1>GXf<ZN_H):H ܉lߏFBK5qbޅF @4@F6[]w%7[Z=BfK\6lne5].9  DZx\DV#`…y^ '*K. SQSݕшˎ W$ 5as)TUxv4MGMWRӓ0H,D"@Vdd%5TkO'$w/i¯%4hՆm6a f#gw!xziGjH2+kDףhʝOLu,cw؈4X Zu!Lswe%kh_]% #UW\GOqgh=Tg#l''Y4lx'qI6͒$ً70kZM6IE4?56zckjq6dIR^a6'aK@HإGwBjFhDц8H Ub AWҼ%rvӱҿǒ> aܔ t:iii%ásffk,|>_gnn/DPE_ԩSn[/,,G{{;ǎX5*oP3+haD"n_8~ N1re**=|S \#G6R..kfިj bZlMc|%8Ħ?V\6&qC"gY"]^')@C߰앩dњFdCSx ˏo[:v."NdqY-0D_ֺ_N it~ː||>:D24hFMM0v/=¢P87rd^Bf/˪iZނ!cccLLLdgk胇XZ&0dabd"k9T͝ȸ7s>b|{goZo,IEwpӎ㢢ZEqWX[8V:+Kk DT(u37RcUUӰI .*7u`O-4ƗDR#I_L* ګC'Ir!a7cTUe::͢Rs=p>^ 1csU.+x OWh^|y77EM`rLu'/3";AaQomh ShHbR偑o^c( TJ !(s; :n ,,,0224*7pCODf#g޽tRIU p8̅ t 4D 0by6  p0=8Gd-Bec8a^Ï0?͍ |ct\XP#~<\pZxu5NcG=DףX] 3cuTu'J7'[A dEI" !-ŇCq .BFqph|},-- i~Ι6:,BuAd=Aаh'В*Bμ䦿&tf6Ǯ4 >2DET:irƴ.?"6$m7_Ayޏ8mH"QHF`HDee%tuu*`"mF_o[XB EQB/;k B\rŋ.,Tv-S8"0jԩSo|a"?Ǩzgr1Z4 @C{*B /PSSÙ3g.Öʩ*k8PCW_ ԵpCVpy\$=G8 :Y"tIN)vgb.J踃 E7z$;~.=rxm=l466Hc%**{mD,Xag^s78x#8+6W`V\/zBt"uy%W7p)/,w ނiz*Z0>GvT~*' KH$ ;*q^Swo-_#7\ehsp8\,Mt&XS\mWqbqq|#|_faa~|ɂ,_y;r_v_X,Fww7X ["\LNNr}TXvNn޼ɵk8z(CFu|MÚ~+oYw :EBFCUo-ikh"'K>% ˪} B"\r#e. $#-܂&9V 0B*++M3tAI džO#  MyMy4¨וIJڵԆ!]Ŝ3_vF[[[@B{b~~7b|{w]WVVxg_z:;;yG}c9_W9wܦ$I__җWJ[[Gɾ}!qq 2)}}+WPYYɅ J* ܮmL&x+RgJ__kkk?W^yet$Ix K reeMhw5W4чbŝ঴J]',Tq+^5 u232Z(LB]B4orx{y^ 1 H)~U$9'Ҳ3 Nn8ÕvBB\5#y~8 8ci)1#EKR vS귊4@ 2nY?4>GEN$ViO#:4oveZjkk9rX ߟ' Pwp>\@g?K0~x'x[ʥKq'b!Jw`׺h>?׿]3_*}{yWy8p }{;~O~˼o-۵}!tno_ D;w( hB2  4dfv4!9~Acp*1Y4q?6w\G,I5^7>On{r 1́>}Z|{]z??oow򶷽~ |+_8>>t:ymNPYYbٯ\ 7v=KlxE"D"Agg6Q855_9ǎü'}ADkkkΔkSaEƊc# ~6՟%۹:AXMPisCl^0jIӑN[V ::ES֩W). ź#aDuéBr#l({6Ge!⺄GN(iO!Rֻap CH$zYXB+??G?Q;;??_Wy{޳񲟍w%Bu]gxxiΜ9 GzltSbsafffrR?xB4MFGG1 ⌖ZZ8 O`8qy|dwy叿オo*[^nw 6WnCbb  $fa$$ Y -uKjKTz[.Zmy0-TR=T\UGDJwrԷa ɇa*6wߟH(i~8r'h 7U䆽{ØF/炿dC2K()n`{ngi*B7)X%3!N5]G4L3ɱ6bjQ$$%OT,:$R>u.>ne?98r땸J(w9+ z]l$덿07v;6w1/0ɴ$M % rr_cn"x_;|o3DQ\4$IW~W6<6 E!,J~+ ޞ+_xq4Y ݾB3 N81̨Q_IO?_m6I&~]]]vJKK)..fd`9[l.Y芏ҟc(u 1ʲJō8Ϯpہ-yɑ ŕ^(Ybѹ{}tuuwY#TGGi 2ٰ&RA3x^=B]}=O-J}LL{ZD'};xj< =a]ExkyYY")kf='17432%z?5ɆQopsfӻ` ªg"k|iU)8P4EB_Üa6())x_D"bYZ$So\y ~n^{5/S[4s=|d2KKkz.WV+]wݢ$e|EYŨ$ qСܬZ EQ$ >g(rg>ҏ477p82X (AvU__?ott4[@٨!J>?tS;eEOlgI:w>[Zst m>2<<̑#GrO|Άf⌤CxL6<&;&Qe1 X+n6[|d;Q fiֽgYumǣ!:{^|~h7&N)ũn1?DnQFA$=UӲ?D&T+&> DjQMF掷~x $b50%HSE6'rb@ g;/Zc=X ӹ~qs>̚ӯ{Mw˜ _===ȲL]]Gԧ>5Ki{5?MAK RNsnaZt]'N ٿ?K7 QeygϞ]b9o( fd2aXXHf`G,@hՊnmD J,H*Cï$(z鈎Zx-ӫ LV. i:;Z6TUDQY㐏&+trTz{I_lǟSR!G*v"34>JX/vE̘q V :fM^ڇM*+L49<*#HEHd-'5 b5U)}utHi2+HT}DUAD]EMRF fKf[fIDAQ5LII+J6j1cfKTVV9Vd֣o7Wm{6̑j~~>f{{}}ʕY?͌kayp8LKK ^wQ髪J[[dU>&k]룫kYK-{Fedd .sN( $ $PI{bLfb4:s&\qC9# \wu*@L3^Lvz !c`UɎMZZIut9zͭRNCFTD %wsWY/M$6#hKi zv'vfHʣv-&Q`ξS!5 ЅϋϹHY} ](BiZpXx6 $4*.0wD/_ZTz6JY ⚎8ް?44ą ؾ};[nCH& Ieeek2KOqǧ,yC>{lOONǃi#9dƦhooGQ9'M M4Ctlf D{sVT9{%F!-g„37F0g.<5ƿ@HNfGa^N#Mݥ@B5#"fQCݟrMHj.͘_]Ylr!:U2! "q%Wgmsˈz蘔#@vI&RDV3Tp"*ZQ̽oOco7"0i2p0LpWR0>[3XpiH;fn>jvPYYݻpšiddhiiAUUne+x#9~4IprIWTq4552=GZl8v^7:ESKHi2"Ą¹s稩Y0gvb,4VɌ$eЙ̎Yׂ|wĎ}h*?=K 5/N6(2Ptl j ي[ws!^D&aICS?W2|ߛhBD5yGnn8㭼&)mΊ ^b7)!jhb}Mkz?& .\hMӱYLdDZ봳}[*v<ٟv3p)Ke(￑ 4T,OӜ={EQ5u:::hjjn5 /e+7+j-u \sV3f;莍W;gyNgdߴ0.ME0iq8p"ц~|>߬LNLL *fE,,Kֺp,{'-8$:f*C *艍Ӏ'ΒIщ>ZՎ6 U&R<4Ĩ=*AF@ܑQ|? XEs-QIbdHjrvAlw)?G8]~D(d=mvtu] QE&ϫkp@Ê-cf1I EX&&9I9.7MOk#P(DKK >oIQ =|/;v, +̟X,F0dǎ ̙ QgUNf #h4ٳg)..f 6̾y:磃415 蔹=*_BGGt:*++_f 9IK*`$:Acgȑb^;*D7!%8\0EE)e3L@A_"@O|"oCuK\Ib +ЙO;DA(Q@]dA[ 0 &(@x&2U8϶.D9k_e)UVNwۜ9{e}k:]Ȉr *`D:EFIQ_ea53:#NFV2BGgh"»7zc5T A`~5'NuŦ!~x"깢jW\!N҂iBG(<3cD@˲PIPu!D'W~NRl:}xM"%_>b/㺙?t!"5x@ڎet0 h !1 a hLQo+O;瓺iZ>s-"4xB)PhjHVdEKfH 32E)퇎heaVci[ A\UElUL&Ӻ\l_*{qEFGG獪]Kݝa)))W0Q1 رc###+z|+裏={뎸\%$ ֢׸9hh n؏ $["2O5]!nw + 6DTI!XߥmvA1 Sd҉*2%.r d.>9MҼ^z!gjڒ!zI{h/st]l֋%8$bT,:٘[TDЂ&$Y4숪ΨCA"8DD]H*aB*DHKw0,x G -p Aщݬ"cNB8Iu4MGzdFa<cgM $dEzvjkkUH$@__߬*jP(޸HR)Ξ=ip vKRz)S~*I8y$px;ƍx!=z4tڬXX_.S̖J,RZ#NUl0:0DYYٲϚXZxa,#a8 >ҌgbEI2IQO{>{]ϻhKFADDQ`R>=u+U$dcuѭbtFutD$DD`Ef! i9I&xy M6JbUw DtdY )Y!Y1/7"lgf`dd*j|8UUUk|:6 /$H$x饗y/#jMdhhh)㹖Kֆ={nkA ;XQ W뮻NxsGqXٻ=[ʧ, r&+A9oʬ')4ow(]&KLb=vʅRbۑv8QZZJQQ$ vniacOG/Lv*Tȸ'Zh"uVxb)tb)[*Ql"YSjQ_=V`r2NW响^Ӧܯ$&Һ.dn 0/kWiÜ2(4V8PA˞ LUxep%I@9 zG$2׎OZQ3^PpڦIFž$IXV8,-bH$~MCKb1MMMӒVgdt:=L,woTfVC>6tN^q la;|ˣ<%b }!‰oݻuTc+ƒ&~T 4d l$ 0 5onwCEabbߟK+))yAey[i^Iԩ&q9`9[\J_JAbBC0Dn" ᩶ MR"n)*# X_Mq~/RbkS͹!epvP+87fA^?x8e&00U>Wo6IxtNPĬva.9j(i4Tx9svՎ]`*aS[O|O|k BӂVkUUŋLNNzٲeee|h4Jss3n'N,LTuN voa^vSVVFYYYkbx_RCfkU- /tRtDJf[F(g{Fػ%3I2\v%*Vg)R{ {ppjlXDh*L,1LTTTPQQa?כp8@ `$J\I/[rKUwL}GCNJ&Գ}6^%lB&m!pNJm<9y}= {t&D Q&n&K&n);qvpAقD6H'{17/?+mn| 6ˣ\ P[Z͒\xN9<-%QSSn ]gb,GQP^ؿ7&8~8إsLlvv:>M7Ν;s=?OimmexxL& 96s &^%tq+Tj~s#i"o} [o*(=)4Aйh3&KDUIDX8XT.Ф4Iy܎0yjWdɎM5bfm8w\_ fܷx"ldbHUQT$f%`5emEaKLM.`+cs/7N!Ur7{nL&@cc#<㴵(Zz=ߦ>6o'y{(//k_ڴ' pi>jkkq8߿o}[F?餪/| tMJ[n_bkAW=܃`ǎ<裹<@CCv]v77k~6N&ѱNvE]] iA|"뉉 Ξ=KMM ;w\e-4@Q=O1Pn Q%\WI(//]׉F~p#FB}G*It@v3ΖbJ>e=.0`TN'9'ʈxN9 .p3q$QџpU,ʩ[% = ~ Ee:OHg5 y{d`FfIA"@1n{‡ae,ܛmMc֭Cy(d]PUYU %T Iʳ?ӢN =<֊!=WW{*[[r-tttp |>>5:]W;9+ieskMK&}C|k_s}{d2~7o}[?x< VȚ]"k*L 2]oLVum0̍C&I)++cWU)[|˽W8ٿsnFM9wyhdZFutW/%#d$I(r,c_akx߽Ͽ)q@SS7p/7LOO?$|;ahht:M:εb/_,\wu+**b׮];Nn7?_W#Ld2\7y@u\Bww7{졷wp e\r5,50`||0`bmsF|D"~?cÃ\RVVƶr-c:8mb4xٲ.llvvؑ -%76-G#gjOIi2Fd}*lB(B/L$Y*a0CYY9r2b ёWtӎ? ՜UG[@9e/N\J:1H2V-LbR Jiړ&$(9u]H$91hgggdw_oAAL>U}3EhHb֔'Q`C%;kJ4Z˱ĥBUe'_.EQHRo|DRL<?|Al;N>/|/~߿'>`|/E{1s~9y$n}sX˝MCdius%.m`ID"Ξ=ѣvZk˗ٻw/:^6~ |"ńɌ[8{sڿ)]Ӫ~N\Mc%DA@5bJWacTX4fA v=Huxޏ z^(95|II4MxŠҖ4:tf8F[ٜՠ0 3 6a,v"BcmEuujpA}}=pJJJ\+j(4$ s(Rޮ$-}7v̲VYs_j>jX,W[eUs-qo~|_# s)~WrFf3J"]]]o_

    O? y衇x)..<9wگ'|GGK.۷<444o|W_}Y4`-D(ʒ`slk0v`dYܹs(_eH8twwֆ˙L&H&u)@GP`  EQm;AWI A ̩Èikhmu.&dllZ_!HeG -g(aKf)QCD+(dB4.͎'rUOt;DQ56N4qd{538 Y ǏyW DpJ*fAĄF", TmZ)x۾*gaTa;2b=vxͶᭊv+xv9}uO~U_\T򗂁.]Ν;]1m-7o.Wf J|={ɡCVs...;wdz:1.](X,ٳnA<]Lha4M )IfҪ (S[T&GT^#K :`aKn$OǎsYxVt5ѵWFd옩&d{T^Z4BV%_>% LaetF!ӹ,ORYa]\p&+*?z=TzXL$`?>U)DiZ{Gi+g" ][!۟vGمu,%;ʩxݼWd{^L 4k]Xi}ۭ`}׆9ɓ6>oQ[Ϳ美|3h{W\;smdCPV||pc>=[`5HFy{dddg ~(0?tIZ[[ToN'NRX,Z[[s}2|>߲oM9tiMf<J6.$q},6G91TM%ˀYy_y hEQMND=J iYc`[OehWPC@ >k x=T+g>*W跐pZp)v:'4boWƂ/-b=*.u^5%;.d iYfd2,QvX H Y7p&`qZ͌}Vk8kttts;0_JQ5[m`=& 3A5[Op%:0'S-AXrY}c]QEcwWJFB^ii){]ӄ7}p.kȉ$ " {5ȊJ{=#2vוIt=G3:2EԉcO…>iلi`w]V)$ L#GхS]GgTTUѣF.zP5 ɢ2+:>">L)B*K8~RFtt@3LF:9Ѵ(٘YFW5TQ&I) !zd $XPUcR14Qs]FbQT YEJɴ,!x.ǁ*.# #qzǂPcÃL "ݘMR6WV!;^h5K%B,᥋4w=L& b$0H\o,Km*_*~,ɉ'V]oXWWWk׮E_ÞjZFFFpNH$?St(y Ç/zdg duz4oldj6躀 hA@f6x }.?䛩NHHM_{N"kȚ)KfIG$*53]"h&H$`BL 35ng3R=M`a~+sQ%R5Y0"Ԗse9O3kCʮ]rZ/Ss3HddF&=cnJm8}r&QٳȲѣGsms0 8Y2E\f4k1ǯ$Ixޜw(t]'H0>>]$}ItwwS__H%,w\K+W)//⌏ጝ|:F8ٳTTTL# _ } OF,vqÞ-زI(}2L duN)$]3:op&kdU)8~z+x]v*|ȪF?_)Jpbǔ ޑLdDt] 4DI@FFs\݃_fd2:U om\#|1?B"Pq"Gp80"}E=ٵs(t"0ͦigZiW\6 [ʽDt]:_jG zDtGoo/G&9sv#RO`ZhLW^77Y BܺljD"8pUs.ϟ?zK/aH3|8%LBPTA?$Rʊطپ}{*/_;?m%-+8ZC\ow1)#@|{U6$ BDC%d=Q qDo-dpEJ8>Zli:ub5(vtdFjx;͡m孾Wx2 X%' Ռ(1&VflBF88;l%I~h0QHȨ{+ RZZʯ0=t ISR!n:zۦCxq1T5wYmTdumHׅ477#^ 9_-[mmY|,k7H$6b*ˑ# eUx7/EQ8&L&++ߨ \.\.ל:FYY: }5Yʖ_w4x{?۷oNdZVxN4MgKadd2k^dt1@Bw߁.N>~QyA!E&ԩIdB YH:pfo&ݟaTVV6khTńi\ 2Mk:D2o~_ !fLBvMDl01P_"`a(8ln_]Ƕ022¥Kp\TRTFww7v ssiEEղUe _̒l"0\:c9{,4D4IەQ|.;E좢 .Pyu7 OD&sA)Nlhs lB@DP-wfTP-wC9yR6aE9Ѐ[Ni[Х8zzdYν---Z$Rpipq-hS > wU!ql %'m"up6/HG3t{#竇 @cU17ʥD]e=nIRw/ HQ, >D8{Ϸ1&I@FQU E0K/OWo5(6+l-g"?%t:{)F`|_IhbU5G\{T?ߊUUUD__TjMisq۷o_F`r]mm\8:j?c+'uXQTԂD"1-4=4A(%5%X$NK붾$(4bzQmGjo ,>i?zH! itEU ksk/f3TVV]]]pR6V7k8$2*LMuS_N]lq_LFS1 "> &" ]Cվ$?9V3Hjߟ=c hqDɌ(x%sRZRx0a?$Q<t4mS@.ޱGk4jirbR.Wॗ^bǎkr gBudݹ2Z9wΝc߾}jW_.1n-0ctvwq\nUi bX$?SSyP<$Uqˑ4T7$TJaJ NC\'X*)'ȪJAw.6߆'Bǂ*&Xli醢(|ӴaF!,fv֖(Kfxb?dCUM[Kwx ""0$$L-<—h(JP,dMzt49%˼2mc)v6neh$ƳW $֪olV =?3X NEE׾|s=ʯ (شi;Fqy/ݍ?~׻EL&-5402By=`;ސL(eH ܸ|ӢeE W34Vrpe,/:x & 5$QVW:T[gf1zʚ Z?.5 ?%a|R= "'9b.VDebN1:^N2癳\~'QqpuMԕqh[ 4=a$QB[5t|y S/ ![M(X4ah"J}0AmpZGW,9!wHS}v@ '"lzB6ÑB!mF"EQrՠ5K;n_j?^cSAT 8;}F-V[.v惢(JY69d4`r.uc-ޞ#@ " njAbt2fwy IT"q`jk$Fvil-/F4-=ÔoE(ҭ\grrÇxi4/2 " ;Y= I*ff!Q 26Y]xSgP#+̃9v ͍`,j22&H'Գ_-g|ÆjDK^O3M:X@ pNi,.~Ç///`CH^O+))a޽s~ Y 0͔3<&Y$r{b\5KmU>J(*>I9fp:9|:[S9:mNuDIt- t )jv+DA4WV4Ǐ_Ku?`r2$!g-Zt&Ij1ᴚIgd)ŸaMF/^j׺IW_Z $b5Hd2K&b @ZIOUR?_V͆$QQ쾺n 0!3F[ZZap)߉ .v7e (`Tx<Ϋ:+n&V7Byo֭[s_ _N0)++iC{3qXٷ]׹|2:t(cǎ{c&hVUl63b׌ ZE */FwD6KϚLe p,3gA94 ۍd_rfNoo/W\YQzAeZ[[eyɥu|v7=#LF40Xϒr}v֖=(XٿoI[*<򫔖Ӵ{Ϫ_VT^lWN+lw$,'3Ֆ1Ib.FY Z1o >%jKؿtՌDZvQ@W+QWWA'O?fo|k&,`ӑ J-Fbpĉ9W֫i ǎCQ^oo/V5x<\x1?µc9vزz%%C] Bieee<^FIҲBAUێj C@}w BJ☜u o@S]Lh®RCpOUU6w%ѻ7hYyvl;'2"kXoG2VV<P'|-[ (`&iWe^.]b׮]s|h2/IR.err2|2.]rJKKƶmOq^ߟ--Κ9aAeV9"{bc7$JKmT œzԗ{#+[ʽMęNX4'$#!&;PL;].7Q,kdqV*v #yiH&zcem# ?ND"|ɜw{]cP\t JK+ԭ1wX O__Bq6700@WW{bFi~Z'#QJ=.3IpR_Q58TH̀?ɉ]CWKzWPU5O翉h"Ky4Q=#Ēi~އH@(B5i"'"KEic4Ēi;kEs$d.㰢 /^Glf3DZf2s9PE$W.gKf_T]= IVQ̒Ȗ]bӉd֭RA-JǛ #|EQ/pE}YJKXfbӑR -s9ٱcDzIqF|0,u,uKnvΝVd2={A8~f]6yP}6Zz D)!ΆDiDQ@ pL$R d$Αf,t@:Y2fIf1VTLHJ1Ƃ1:&KsU_HFUk:$;jͩX f$8x"`mpp !|UUymPFCЯfo(K/Ė-[Htwww'ҝ?:g!3&[P$IΞ=bk*ˏG% 8---x88#QmM`+c$22Dz*MRt$$Q QT}dOR$G$1b`,f JD2~ul){n;a(Jll UU|TWW넌i|'? >, r X-IO44Dgg=|6rrO{{{6롤$XUu>g}]veP }]ʕ+Eܹsd2\%Mr~.]Bi)H"篠k:;jJRo4kC28Tx*CJVЧvqS_B6U.7'<BaDV3 (ҍq6>=nhh% ̲2t]'… x (`H?dff3'OVr˟_ 4M:?<>/G|smmm׳m۶ ͇h4JKK wlll  3۝{ G@M2ɻdYt ht6޶/1"I"a1H A gU]$X-fdEAglo&lw_{T'LQ]s}ӢɆ|_ ;JMM 5559k`׾5>S[[9v\Al6-AZZZ`ݳHl5q0]g9qdqFFFtEEE9+\N?]]]ٳ^!JCC[n]Bd.Gq._fF⺊8qʽ.fMӱ[HSb " QQPlXUj$Qg!/ M 3Z$.aR yz,Whp &?O!2_җصk}{9x 5?v6 /^\0!d2c `֭lݺt:d2iw零r׷0Fݻf5#8Ýq) z PNF^]i8m4Pd`,$#L" QY?u~כraY>>t&CUˁ$ή3?q\9mٰ~>t]Gx n&?8px]]@kM7y8t%%%>ntt^N<7a2,˜;wD"" bZl#p+!n6䄀]W8yxJfkU L%ӌLHѴB( ɵ ¼ 3W YXc $Q嵣*PS͊dYιMLL ⪦Ca.]ao?~u]'/$tttַuYϽ~2Ɓrcs*.N$8u?L;q{h R5TiFJƓD*j }F?_ 91GyoZn)k:#amz>n>ka^~4'NO[ $|z.wqǺ%'''imm妛nZsRٳTVVs9ܸħiZnPRR.#4СC YO$ Zݗ{H*ENN'@ »wtUӦfU/룼*Tt Q?܅ӹ| ]cD"KdV2{ n)hGy7ɩSxt=b=}CKݑ.\Ν;q"&0~N4K Y*;}6ͨi'qPa \M3:S|,ukdj1/ =a7$nX3\. ECCC X,R+df>Ba``8匍p\ F塺zE He8Wja"d}'B.DR Iy~o(ګQ[IM.Jf-_㩪Kr'>k~|?3wߺ6 ,R){9n9w 3-uI}p8Çq\+0F ùЏYsٱcǂFyť~"TFa"Q'ڜcx.Q$33~R('63E.;x9nit3a 8N.^aӟ{wCЦY`PZ.|,Ns_v;[la˖-Fq:9/}N]W5g.q)..fϞ=Ȫ$Mܳw_|\s.f \6\v $7k{nZQLC>Lwkll$J儀===L&dYf2 // Mӗe9We-cw!dwYٽ{ɍqqV5ptvv2>>Çx<v^h58kLcIVsNg2IuEՑDYԉ(\ 9~# A`PB54]gϖrn=9k#&tDQ4MO>V ސS иfI^ߟ)[u]ٳ)$IC4ywuuqҥu\ b߾}/_䵳ÍUbh$dxLfAIF}E E]w1;mSH7 >njȑ#x=$wfp"BXYѰ%JlV3>7HP ͂I'3ԗ{ SS2r#H{$NHZlf+$*͇Mh }NRst`΁6PjXh/?+Be&&& 6B v.~wO~/ MWeI3?<;w.eV2 ZuIp-111Akk+[l`(QX+dz|hŋ\:yx*$hNMS'w7+ĒttdEf1OɄcI6 U2lk/Ui_~ǎdRx]vjKD n+n3T$nrVLr&̙38MB4-R)|>_nZ\to|#i xS%}]yWe\|H$={7`i}3G=ONQŠ( i><톹Z\cO4Þl Nq-% Iy|7gI+U]ҷ[L-E4,:vqق Ӝp K.xػw"|c,+ǘ 3_.9vo~/bSؔ*so sH/ r9عsYe,prrrQ ҂d:ƵgZdO(x:oo1#{HS]9 >%dDZa6]6bGGG@$)//n [/ŸL&ÓO>E^~ey;ޱ*^H!=s|| ˗ijjvCc!hF{{;Ph̀\)HLQQѦ{d~~DZf<g,ڧ.hQq{S2̙391oѫQ+[o}{=>(w'?ɺ6_ÞAz/_&H`Z}w,sY4MСCҋ7? !<Ҳ~!{|u]!~~]x*sv C UMM_5! YVUjx/D3gPVVƮ]r8O"Cf 70<<̭7??kRj\Kؔi,6vbD*ϟ'r\c6s----8N߿RF*QQUu($mmm466e˖ ;Ő?=pYKS-tL PT5?n"')EEӵ:E0Yɲ"m>qΜ9Cee tI[ccc?~v~\>N2Dujkkپ}ŋ 9rd6HPH0MbV }R򼿻d ( eaIM8]!FHkW&I2(Z(/r߼8eE%k,̙38 gXraQk9}ìǹ;8x 766R ߘs|;F0̑vk0nt۷o~M{50vTTTFb~XF,ȑ#KZLFif<`cG_c0@dhD$a6up$"2w [a0EN6߽-]D$ (=DΡQV3>7*qؖ F9s l۶mכUUUV0vŸ;dǎ|4 Zؔ*L&7}[.kg:P(PR***V퀗R'aT8|0nXQUÇ*)G_b<o,h0h_@f1Qsq˕ F>?iUGӳ8_E$i9)ʰMX&}7S\~y89g-X` ? ZyV\ؔ~n9 'ݻU;=666lY;]`llCm*?UUimm%Lrȑyok=   ?O|}ݜ>}ǏO[[N,zMEEeee IÛj8䢄:g(D׻Q L܌f߾}kfuLDHW$tUE1D\v+ }b77kbr}**ky4w K$u]4h $QuSsQurݮ:֗# Ҳhz!_011lε2 FXӧOp8я~>{p)I@"'䡇??p:u]>}F~~{ِ|n4NT7S Už}Vܺ0FRbKA,2U:/3fIPcyq3=c~Mr4&  S[^LC]5eee8K߭;w;wn /|P\\Lmm-*}C{MPY@JOz!~F)++_"vۆ̾wQQ8N.\@qF,IDATqbF$i3۫i4(`YYKZX uuul۶mC&t]<?̮2vו$gϞ]h~&'']Yk Duu=Z`||Voߎ,L&{IR'?sP xx;N<(=tM.Iӌ344D4jRWW7j "ՙT\wřA\mƮ]Y+2I`skmme޽":㴵iD"ӈ:XF$Ns}199ɓO>Y0)ypM8;wyM?9yGby睜:uw]&ŋرIfU1oX.7g+pfKjR`XĮe7Co߾UWĻ(++[ւ(p344O?  x"}P滑K/kLNNrmq)nu˗"Agzl6***(//;`cwֹ] lXV4w^.z=144DGGǺcH$())ɑBFF,K%hͷ e__gyfC (Z5GKik|?r-:uo=7c\p`0hhF >|xS7u]A\.HdCF>zzz8tІg3\tiU녕L̵ K( Ghkkgt ،xÒ~>4Mܹs@oo/z׻;WD,O3Yk1v$o-lN8mt]ύYFEdZ.._|!Ç7] {.:O(ػw/lӵ (`MAuҥKtM>}=|EI&L҂ngAϷ:T.d傐[/Q@Xm!Շ=iF !+WD"\h=_`= &R_W(555s=:umj<6#t]䡇ܹsx㍜>}f*pgϮ؛q>3}KR477p86?8O<===<<|k_[nYs(kojχ'OԩS}TWWo~@ ӧ_w+**6aC455m](R_}8z観k#GLe{'i}I2&4MxGxgپ}}_@ҟ300??K/Duu5|3 r7x8r9l2-n3h8N߿&7FcbX%R)=Z!G.*<5D"rU\\ ?coϲk׮u?fQ E( ۿͷmvA[[ԩS:ujYkƈbNUU;vj1saluduFAUU>z p _u|+<3ݻwݏY@oTH+?f۶m~3ϰ{nN:ӧ׼ǿx<BY6kkk7tadr=|sQ@YiiiA5K[+:1=jo EQMF"51p5җru?fQ Eӟu`0C~az)mƩS{wPO-KRo0\Ҏw=`X^m[] s$łE8p`Sq@ .I-xꩧ8z(!~'x\СCk4v0G2LmQXq4úvꪪ(h6-u%k c`rrGB0t"F` ػwe'x믿~ݏY@oHC=ďcJKK9}4Ǐ_1ȲLkk+pС%g\`-5LII v"(RaxG6A0o_*yx[ߺ,7 H$<}[%P@ @ O/?l6GGGyӧf]sh*z[ ܰ Rj(BKK lŒᯰ{U[UEz)>w}kqP(&C4{ _MMMkٳgs +WwSNqwlH|ݻI$쀍nyyU)itxÏP*㡭-:L`&dYfbbbZU_f׮]l6~WTFEAs nfN>͝wމ[bd2D"x`i &f& , TTTl"0~@4ĉ߹6GQ kȜ ƍ7ӧ뮻(++[`4z^+ Yod2Μ9bO4]Q`?/2O#&Gy>/rfE@0v̙3}SNm۶Yִm( +6d3gP\\̞={Ln\?;O| _@W AuyGxygٽ{wn}v~w~#Gp}mX1fO#EQ8{,mJcD"3g(--i䶜Qp)ljjzEϱ\\x;~ɟI (*@obN0?!=O=T.+_ wqdžG.dv9{,&Cmx<Ι3g`ΝkFn~EQr˱mƇ>!>^ x@Y:u===ڵgr)N>7olZVvIYY٦"X,ƙ3gf붛5F a[QQAii鼭A/_o}{ _@/]׹EGyC4?1=?O(--{رczO&k8\.~L&햖^Uwh4ʙ3gXoQ@ `qmqwww/Ms;6 >HCcx뮻8}4'NX#uX,3J&W8L9 vq݌{ #!nVw/M,R)z)~a~bZ뮻{x[޲;h4Jss%x޽DuoFAj)*@0J1%jJEěVKzS{HMk,51ڴAmgC)A;RK+D$m9I$gwAÆy~zgzנ<pe;~"$$NAmmmPT.; xMڵ 7nDNN>Cfattf(d2A$$%%Ac֭ ܾ>!88xF+h}nۋ~ l(cA\rwO!yP L> 8ވ$''dBD'$ _W㦧#))`Z-ﶷ^hZ[[+Va|g1ވ;v qqq矱tRv@"zp }rIO>塾[lNFA@@'6 ===hmmEoo/-ZAfќS'OHMMEJJ WH?ydggxwPSSܹ_5°{n|g~:\5G]`FFFpmDGGmpEףV%LՁK,KłGqqquI JTQQF* &%M+}ri_ KQa4Q]]'|Z:N[Yׯ߄v hl֥?::{7n@YYل,^cWŨBLL RSS^Ч9A$\~FF.]+CBB8͈=krG;`w^x… wɸsJbb1iΑ$ n݂dhDyy9֬Y#Ofsmkk+/gFΝ;gw* sY#==׮]lVfF%a蓒1iN$ ===χ(8<+###{]KK , bbblٲYv_z5gUZ8p~W+?0I4oؿ0(--E``|%pTTԔI܌uhlxWPYY q:>)C歁BEcŊ@Vclu6lpy 0 l6+A%c蓒1# (>>>h4?P__+WL؍ ]od2.w`蓒1 ܹs8|0# %%شi[f}Ll69rgϞlFDDKƝܹ&@tt4N l6`0h4Ǝ;둘e˖=7d$a``@'ӵ$ pa3>l'MMMNÞ={|&2U;`A oL$ gΜ믿z\"R>>H]\v [nNFGd 7}cO`$F#yft:hZxyy!<<]]]QZZpdff"r/\D$ttt8ul6j|%ppp?}̙3HMM`xxxB"'rJNdgg}`6z 'N Gϝ;#;;{BCpOBc= $IF~~>DQą N^Y0ػw/Է Dl }"_%IB__ "JKK$_ nݺޭij>SNa |"'R۷o( {N._d?~`ChDqq1DQDQQ|}}j;+Wj{W_e>ќ444RF Zz?0Z-z-:tOD2>7::h4"//8|0233D4ChCvv6^z%>݅ODD!؜C0􉈈<CC0􉈈<CC0􉈈<CBHHVQYYHA7|F]]~i$&&b4""R6'bccO?T~ ^>Rp? '$$য়~rSUDD4 yj  7o*""R<2Ix 󀿿?.\xת?y.`y>`y>`y>`y>`y>`y>`yZs IENDB`scikit-bio-0.6.2/web/_static/img/logo.svg000066400000000000000000000144751464262511300202330ustar00rootroot00000000000000 image/svg+xmlscikit-bio-0.6.2/web/_static/img/logo_inv.svg000066400000000000000000000144751464262511300211070ustar00rootroot00000000000000 image/svg+xmlscikit-bio-0.6.2/web/about.rst000066400000000000000000000117531464262511300162100ustar00rootroot00000000000000:html_theme.sidebar_secondary.remove: About scikit-bio ================ scikit-bio (canonically pronounced *sigh-kit-buy-oh*) is a library for working with biological data in Python. scikit-bio is open source BSD-licensed software that is currently under active development. Adoption -------- Some of the projects that we know of that are using scikit-bio are: - `QIIME 2 `_, `Qiita `_, `Emperor `_, `tax2tree `_, `ghost-tree `_, `Platypus-Conquistador `_, `An Introduction to Applied Bioinformatics `_. License ------- scikit-bio is available under the new BSD license. See :repo:`LICENSE.txt ` for scikit-bio's license, and the :repo:`licenses directory ` for the licenses of third-party software that is (either partially or entirely) distributed with scikit-bio. Team ---- Our core development team consists of three lead developers: **Dr. Qiyun Zhu** at Arizona State University (ASU) (@qiyunzhu), **Dr. James Morton** at Gutz Analytics (@mortonjt), and **Dr. Daniel McDonald** at the University of California San Diego (UCSD) (@wasade), one software engineer: **Matthew Aton** (@mataton) and one bioinformatician: **Dr. Lars Hunger** (@LarsHunger). **Dr. Rob Knight** at UCSD (@rob-knight) provides guidance on the development and research. **Dr. Greg Caporaso** (@gregcaporaso) at Northern Arizona University (NAU), the former leader of the scikit-bio project, serves as an advisor on the current project. Credits ------- We thank the many contributors to scikit-bio. A complete :repo:`list of contributors ` to the scikit-bio codebase is available at GitHub. This however may miss the larger community who contributed by testing the software and providing valuable comments, who we hold equal appreciation to. Wanna contribute? We enthusiastically welcome community contributors! Whether it's adding new features, improving code, or enhancing documentation, your contributions drive scikit-bio and open-source bioinformatics forward. Start your journey by reading the :doc:`Contributor's guidelines `. Funding ------- The development of scikit-bio is currently supported by the U.S. Department of Energy, Office of Science under award number `DE-SC0024320 `_, awarded to Dr. Qiyun Zhu at ASU (lead PI), Dr. James Morton at Gutz Analytics, and Dr. Rob Knight at UCSD. .. image:: _static/img/doe_logo_green.png :class: only-light :alt: DOE SC logo .. image:: _static/img/doe_logo_white.png :class: only-dark :alt: DOE SC logo .. The DOE SC logo was downloaded from: https://science.osti.gov/About/Resources/Logos .. The webpage stats the following usage policy: "The DOE SC co-Branded logos should only be obtained directly from the download links on this page. Only unaltered usage is permitted. Usage does not require explicit permission. The logos shall not be used in any manner that falsely implies employment by, or affiliation with, the U.S. Department of Energy Office of Science. Additionally, the DOE SC co-Branded logo may not be used for commercial purposes, including but not limited to endorsement of products or services." Citation -------- If you use scikit-bio for any published research, please see our `Zenodo page `_ for how to cite. Collaboration ------------- For collaboration inquiries and other formal communications, please reach out to **Dr. Qiyun Zhu** at qiyun.zhu@asu.edu. We welcome academic and industrial partnerships to advance our mission. Branding -------- The logo of scikit-bio was created by `Alina Prassas `_. Vector and bitmap image files are available at the :repo:`logos ` directory. Pre-history ----------- scikit-bio began from code derived from `PyCogent `_ and `QIIME `_, and the contributors and/or copyright holders have agreed to make the code they wrote for PyCogent and/or QIIME available under the BSD license. The contributors to PyCogent and/or QIIME modules that have been ported to scikit-bio are listed below: .. dropdown:: Rob Knight (@rob-knight), Gavin Huttley (@gavinhuttley), Daniel McDonald (@wasade), Micah Hamady, Antonio Gonzalez (@antgonza), Sandra Smit, Greg Caporaso (@gregcaporaso), Jai Ram Rideout (@jairideout), Cathy Lozupone (@clozupone), Mike Robeson (@mikerobeson), Marcin Cieslik, Peter Maxwell, Jeremy Widmann, Zongzhi Liu, Michael Dwan, Logan Knecht (@loganknecht), Andrew Cochran, Jose Carlos Clemente (@cleme), Damien Coy, Levi McCracken, Andrew Butterfield, Will Van Treuren (@wdwvt1), Justin Kuczynski (@justin212k), Jose Antonio Navas Molina (@josenavas), Matthew Wakefield (@genomematt) and Jens Reeder (@jensreeder). scikit-bio-0.6.2/web/conf.py000066400000000000000000000060421464262511300156360ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- # Configuration file for the Sphinx documentation builder. from datetime import datetime # -- Project information ----------------------------------------------------- project = 'scikit-bio' author = f'{project} development team' copyright = f'2014-{datetime.now().year}, {author}' # -- General configuration --------------------------------------------------- extensions = [ 'sphinx.ext.extlinks', 'sphinx.ext.autosectionlabel', 'sphinx_design', 'sphinx_copybutton', 'sphinxcontrib.youtube', ] root_doc = 'index' templates_path = ['_templates'] exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # -- Options for HTML output ------------------------------------------------- html_title = project html_short_title = project html_baseurl = 'https://scikit.bio' html_logo = '_static/img/logo.svg' html_favicon = '_static/favicon.ico' # static files html_static_path = ['_static'] html_css_files = ['css/style.css'] # do not show side bar with section navigation html_sidebars = {"**": []} # do not show source links html_show_sourcelink = False # link to document:section autosectionlabel_prefix_document = True # -- External links ---------------------------------------------------------- github_url = f'https://github.com/{project}/{project}' twitter_url = 'https://twitter.com/scikitbio' extlinks = { 'home': (f'{html_baseurl}/%s', None), 'repo': (f'{github_url}/%s', None), 'docs': (f'{html_baseurl}/docs/dev/generated/skbio.%s.html', None), } # -- PyData Theme configuration ---------------------------------------------- # References: # https://pydata-sphinx-theme.readthedocs.io/en/stable/user_guide/layout.html# # references html_theme = 'pydata_sphinx_theme' html_theme_options = { # logo image for light/dark modes # image files must be placed under _static/ 'logo': { 'alt_text': html_title, 'image_light': '_static/img/logo.svg', 'image_dark': '_static/img/logo_inv.svg', }, # announcement banner on top of the screen # 'announcement': ( # f"{project} is back in active development! Check out our announcement of revitalization." # ), # social media links displayed as icons 'github_url': github_url, 'twitter_url': twitter_url, # disable prev & next buttons 'show_prev_next': False, # disable search button 'navbar_persistent': [], # display all header links 'header_links_before_dropdown': 7, # footer layout 'footer_start': ['copyright'], 'footer_center': ['sphinx-version'], 'footer_end': ['theme-version'], # google analytics 'analytics': { 'google_analytics_id': 'UA-6636235-9', } } scikit-bio-0.6.2/web/contribute.rst000066400000000000000000001043211464262511300172460ustar00rootroot00000000000000Contribute to scikit-bio ======================== **Scikit-bio** is a community-driven open-source software project, and we warmly welcome your contributions! We are interested in many types of contributions, including feature additions, bug fixes, continuous integration improvements, and documentation/website updates, additions, and fixes. Whether you are a researcher, educator, or developer; whether your interest lies in biology, mathematics, statistics, or computer science; your input is invaluable. You can help making scikit-bio a better software package for our entire community. This document covers the information you may need to get started with contributing to scikit-bio. In addition, for a broader perspective, we recommend the inspiring guide: `How to Contribute to Open Source `_. Visit our GitHub repository: :repo:`scikit-bio/scikit-bio <>` for the source code of scikit-bio. You will need a GitHub account to interact with the scikit-bio codebase and community. Valuable contributions can be made without or with minimum amount of coding. We detail various ways you can contribute below: - `Ask a question`_ | `Report an error`_ | `Suggest a new feature`_ | `Fix a typo`_ Contributing code to scikit-bio is a rigorous and rewarding process. We have prepared the following step-by-step guidelines: - `Before coding`_ | `Set up a workspace`_ | `Write code`_ | `Test code`_ | `Document code`_ | `Style code`_ | `Submit code`_ | `Review code`_ .. .. contents:: .. :depth: 1 .. :local: .. :backlinks: none In addition, there are separate documents covering advanced topics: .. toctree:: :maxdepth: 1 devdoc/code_guide devdoc/doc_guide devdoc/new_module devdoc/review devdoc/release Ask a question -------------- Your inquiry matters! By asking questions in the scikit-bio :repo:`issue tracker ` or :repo:`discussion board `, you are not only giving us (and the community) the chance to help, but also let us assess the needs of users like you. Before asking a question, take a moment to search existing threads to see if there are any relevant ones. We also keep an eye on broader community forums such as Stack Overflow and BioStars for questions related to our scope. Report an error --------------- The scikit-bio team is proud of our high-quality, well-tested codebase. That being said, no software is immune to errors, which may arise from bugs, overlooked edge cases, or confusions in documentation. In any situation, we would appreciate it if you can report the error you encountered to us. You may :repo:`open an issue ` to report the error. Please provide a detailed description of the error such that the developers can reproduce it. Specifically, you may include the following information in the report: 1. The exact **command(s)** necessary to reproduce the error. 2. The input **file(s)** necessary for reproducing the error. You may either attach the file in the issue (by dragging & dropping) if it is small, or provide a link to it otherwise. The file should only be as large as necessary to reproduce the error. .. note:: For example, if you have a FASTA file with 10,000 sequences but the error only arises due to one of the sequences, create a new FASTA file with only that sequence, run the command that was giving you problems, and verify that you still get an error. Then post that command and link to the trimmed FASTA file. This is *extremely* useful to the developers, and it is likely that if you don't provide this information you'll get a response asking for it. Often this process helps you to better understand the error as well. We take error reports very seriously. Once confirmed that they should be fixed, we will update the code to fix them as soon as we can, and ship the update in the next scheduled release of scikit-bio. If the error could result in incorrect results or inability to access certain functionality, we may release a bug-fix version of scikit-bio ahead of the schedule. Suggest a new feature --------------------- We are always looking for new ideas to enhance scikit-bio's capabilities, especially from users with unique research interests. If you believe there is an analysis or feature that could extend scikit-bio's current offerings, we warmly invite you to share your suggestions with us. Please describe why the functionality that you are suggesting is relevant. For it to be relevant, it should be demonstrably useful to scikit-bio users and it should also fit within the biology/bioinformatics domain. This typically means that a new analytic method is implemented (you should describe why it's useful, ideally including a link to a paper that uses this method), or an existing method is enhanced (e.g., improved performance). If the scope of the suggested method overlaps with any pre-existing methods in scikit-bio, we may request benchmark results comparing your method to the pre-existing ones (which would also be required for publication of your method) so pointing to a paper or other document containing benchmark results, or including benchmark results in your issue, will help. Before suggesting a new feature, it is also a good idea to check whether the functionality exists in other Python packages, or if the feature would fit better in another Python package. For example, low-level statistical methods/tests may fit better in a project that is focused on statistics (e.g., `SciPy `_ or `statsmodels `_). If your proposal represents a significant research direction or requires a substantial suite of methods, we encourage you to consider establishing a formal academic or industrial collaboration with the scikit-bio team. For more details on this process, please refer to the :ref:`about:Collaboration` section. Fix a typo ---------- If you spot small errors such as typos, redundant spaces, broken links, missing citations etc. in the scikit-bio code or documentation, and want to give it a quick fix, you may follow the procedures detailed below. All procedures will take place in the web browser, and don't involve creating anything in your local computer. .. warning:: This approach should not be applied to anything larger than small errors. For the latter, please read `Before coding`_. 1. Locate the specific code file that needs to be fixed in the GitHub repository. If you are reading the documentation, you can click the `[source] `__ link next to the header to locate the corresponding code file. 2. In the top-right corner of the code viewer there is an Edit (:octicon:`pencil`) button, with a prompt "Fork this repository and edit the file". Click it. Then click the button :bdg-success:`Fork this repository`. This will open GitHub's `online file editor `_. 3. **Edit the code**. 4. When done, click :bdg-success:`Commit changes...`. Then enter a commit message to describe what you did, like "Fixed a typo in the documentation of skbio.module.function". Then click :bdg-success:`Propose changes`. 5. You will be able to review the changes you made and compare with the original code :octicon:`git-compare`. If everything looks good to you, click :bdg-success:`Create pull request`. Then enter a title and description that you think are informative to the scikit-bio maintainers. The **title** may or may not be the same as the commit message. In the **description**, you will need to answer a few questions by typing an ``x`` in the relevant checkboxes. You may also explain why the original code should be replaced by yours. Finally, click :bdg-success:`Create pull request`. 6. This will create a `pull request `__ :octicon:`git-pull-request`, i.e., the changes you made to the scikit-bio repository. A scikit-bio maintainer will review your pull request, and run necessary tests to make sure it is sound. You may be asked to clarify or to make modifications to your code. Please work with the maintainer by replying in the pull request. 7. If the maintainer believes that your code is good to go, they will `merge `_ it into the scikit-bio codebase. Once merged, the pull request webpage will have a purple notice :octicon:`git-merge`, saying: "Pull request successfully merged and closed". 8. At this point, your contribution is completed 🎉. You may optionally click :bdg-light:`Delete branch` to clean up your workspace. Then, you can move on, and enjoy the improved scikit-bio! Before coding ------------- We sincerely value your willingness to contribute code to scikit-bio (beyond reporting issues or correcting typos). This process can be intensive, particularly for those new to software engineering. The following sections detail the steps for contributing code to scikit-bio. Please review them carefully. Discuss your plan ^^^^^^^^^^^^^^^^^ When considering contributing code to scikit-bio, you should begin by posting an issue to the :repo:`scikit-bio issue tracker `. The information that you include in that post will differ based on the type of contribution. The two types of issues discussed in `Report an error`_ and `Suggest a new feature`_ can be a good start of the discussion. The scikit-bio developers will respond to let you know if we agree with the addition or change. It's very important that you go through this step to avoid spending time working on a feature that we are not interested in including in scikit-bio. Take existing tasks ^^^^^^^^^^^^^^^^^^^ Alternatively, if you're looking to contribute where help is needed, you can explore the following types of issues: - **Quick fix**: Some of our issues are labeled as ``quick fix``. Working on :repo:`these issues ` is a good way to get started with contributing to scikit-bio. These are usually small bugs or documentation errors that will only require one or a few lines of code to fix. Getting started by working on one of these issues will allow you to familiarize yourself with our development process before committing to a large amount of work (e.g., adding a new feature to scikit-bio). Please post a comment on the issue if you're interested in working on one of these "quick fixes". - **On deck**: Once you are more comfortable with our development process, you can check out the ``on deck`` :repo:`label ` on our issue tracker. These issues represent what our current focus is in the project. As such, they are probably the best place to start if you are looking to join the conversation and contribute code. Set up a workspace ------------------ To start contributing code to scikit-bio, you'll need to prepare a local development environment. This section guides you through the process step-by-step. 1. `Fork `_ the scikit-bio repository on the GitHub website. This will create a copy of the repository under your account, and you can access it using the URL: ``https://github.com/urname/scikit-bio/`` (``urname`` is your GitHub account). 2. `Clone `_ the forked repository to your local computer. Specifically, in your forked repository, you may click the :bdg-success:`<> Code` button, make sure that you are under the "Local" - "SSH" tab, and copy the URL to the clipboard. It should typically be: ``git@github.com:urname/scikit-bio.git``. .. note:: If the "SSH" tab is not available, it could mean that you have not set up an SSH key for your GitHub account. You may follow the instructions on `Connecting to GitHub with SSH `_ to set it up. Then, open "Terminal" :octicon:`terminal` (or anything similar) in your local computer and navigate to a directory where you want to place the workspace, and execute:: git clone git@github.com:urname/scikit-bio.git .. note:: If this is the first time you use ``git``, you may follow the `Set up Git `_ guidelines to install Git and set your user name and email address. This tutorial assumes that you will use the classical ``git`` to create a local development environment. If you prefer other methods such as `GitHub CLI `_ or `Codespace `_, please follow corresponding instructions. This will create a directory ``scikit-bio`` containing all files in the repository. Enter the directory:: cd scikit-bio Add the official scikit-bio repo as the **upstream** of your fork:: git remote add upstream https://github.com/scikit-bio/scikit-bio.git 3. Create a development environment with necessary dependencies. This is typically done using `Conda `_ (or `Mamba `_, in which case the command ``conda`` in the following code should be replaced with ``mamba``). .. note:: If you do not have Conda (or Mamba) in your computer, you may install one of the distributions such as `Miniconda `_, `Miniforge `_ or `Anaconda `_. We recommend Conda over other approaches such as ``pip``, ``pyenv``, and ``virtualenv``. However, you are not blocked from using them in necessary situations. Execute the following command (``skbio-dev`` can be any name you like):: conda create -n skbio-dev -c conda-forge --file ci/conda_requirements.txt --file ci/requirements.test.txt --file ci/requirements.lint.txt --file ci/requirements.doc.txt When done, activate the environment:: conda activate skbio-dev .. note:: This may be slightly different depending on the operating system. Refer to the `Conda documentation `_ to find instructions for your OS. 4. Install scikit-bio from source code:: pip install --no-deps -e . This will install scikit-bio to the current conda environment. After this, you can use scikit-bio like a normal user (e.g., you can do ``import skbio`` in Python code). When you edit the code in the this directory, the changes will be immediately reflected as you use the software. 5. Test the installation:: make test This will run all unit tests implemented in the scikit-bio codebase to check if the corresponding functionality works correctly. The output should only indicate passes and warnings, but no failures. 6. Activate pre-commit hooks:: pre-commit install This will enable a set of tools that will automatically execute every time you commit changes to ensure code quality. Write code ---------- Before you start writing code, you may discuss with the scikit-bio team to make sure that your intended contribution is relevant (see `Before coding`_ above). Next, you may work through the following steps to start coding. 1. Update your main branch such that it has the latest version of all files. This is especially important if you cloned a long time ago:: git checkout main git pull upstream main Optionally, you may do the following to keep your forked repository's main branch up-to-date as well:: git push origin main 2. Create a new `branch `__ that you will make changes in:: git checkout -b mywork ``mywork`` is the name of the branch. What you name the branch is up to you, though we recommend including the issue number, if there is a relevant one (see above). For example, if you were addressing issue #42, you might name your branch ``issue-42``. .. warning:: It is not recommended that you directly code in your ``main`` branch. 3. Run ``make test`` to confirm that the tests pass before you make any changes. 4. **Edit the code** using any code editor of your favor. Now it is the time to bring your creativity to scikit-bio! Scikit-bio's :doc:`coding guidelines ` provide more details on how to write high-quality code. It is recommended that you read this document carefully and apply the guidelines in your code. Test code --------- Testing your code is an essential step to ensure its quality and functionality. For scikit-bio, we emphasize the importance of comprehensive testing to guarantee that your contribution works as expected. You might find testing tedious (it often costs more time than implementing the algorithm!), but it is a valuable step that will significantly enhance the robustness of your code, and will help fostering a culture of reliability and trust within our community. This section is to guide you through the testing process, providing you with the tools and knowledge to perform effective tests for scikit-bio. Functional test ^^^^^^^^^^^^^^^ You will want to test your code like a *user* would do: import the function, execute it on some input data, and examine whether the results are correct. You may install additional software such as `Jupyter `_ in the same conda environment to make the testing process convenient and pleasant. There is no need to reinstall the modified scikit-bio package in a separate environment in order to test. As soon as you edit the code, the changes will immediately reflect when you use the code. For example, you wrote a function ``gc_content``, which calculates the fraction of ``G`` and ``C`` in a nucleotide sequence:: def gc_content(seq): return sum(1 for x in seq if x in "GC") / len(seq) You added this function to the code file ``skbio/sequence/nucl.py``. It will be available for use in any Python code launched in the same conda environment:: >>> from skbio.sequence.nucl import gc_content Now test the function on some data. For example, you would expect that ``gc_content("ACGT")`` and ``gc_content("GGATCCGC")`` return 0.5 and 0.75, respectively. Is that the case? We highly recommend that you use real-world biological data in addition to small, dummy data for testing. This will let you evaluate the robustness and scalability of your code in real-world applications. For example, DNA sequences retrieved from public databases may contain lowercase letters, and you will find that the ``gc_content`` function cannot handle them properly. For another instance, a FASTQ file with ten million sequences (which is common) may cost a function forever to process, in which case you should consider optimization. Unit test ^^^^^^^^^ `Unit testing `_ involves testing the smallest units of your code, such as classes, functions, and methods, to ensure they function correctly in isolation. It is a fundamental best practice in software engineering, but is often overlooked by beginners. Unit testing is made easier by writing test code alongside the algorithm code. Both types of code are integrated into the scikit-bio codebase. This test code is then regularly executed whenever changes are made to ensure that the intended behavior remains consistent over time. For example, the test code for the ``gc_content`` may live in ``skbio/sequence/tests/test_nucl.py``, under class ``TestNucl``, as a method ``test_gc_content``. It may read like:: def test_gc_content(self): self.assertEqual(gc_content("ACGT"), 0.5) self.assertEqual(gc_content("GGATCCGC"), 0.75) ... You can run this test with:: python skbio/sequence/tests/test_nucl.py TestNucl.test_gc_content The screen output will tell you whether the test passed, and if not, what went wrong. This information will help you debug your code. Ideally, every line of the code should be covered by unit test(s). For example, if your function has an ``if`` statement, both ``True`` and ``False`` situations should be tested. It is a good practice to test all types of cases you can think of, including normal cases and `edge cases `_. For example, an empty sequence (``""``) will cause the ``gc_content`` function to crash, because zero cannot serve as a denominator in an equation. Having edge cases like this will help you to identify limitations of your code and think whether you should implement special handling to avoid problems. You should also test whether the changed code fits into scikit-bio without causing problems in the other parts of the codebase. There is a convenient command to run all unit tests implemented in scikit-bio:: make test Alternatively, you may run all unit tests in a Python session (including Jupyter):: >>> from skbio.test import pytestrunner >>> pytestrunner() Code coverage ^^^^^^^^^^^^^ `Code coverage `_ refers to the percentage of source code lines covered by unit tests. It is an assessment of the quality of a software project. In scikit-bio, code coverage can be calculated using the following command:: coverage run -m skbio.test && coverage report This will report the coverage of each code file and the entire codebase. If the coverage decreased for the file you edited, you may have missed some anticipated unit tests. You can create a detailed HTML report with:: coverage html Then open ``htmlcov/index.html`` in a web browser, navigate to the page for the relevant code file, and check which lines of your code are not covered by unit tests. Work on them to bring back coverage. Please read the :ref:`devdoc/code_guide:How should I test my code?` section of the coding guidelines to learn more about unit testing. Document code ------------- `Documentation `_ is a vital part of software engineering, especially for projects like scikit-bio, which involve many contributors and are designed to endure over time. Documentation helps everyone -- users and developers -- get on the same page. It also helps you, as even seasoned developers can lose track of their own coding logic over time. Also remember that scikit-bio brings together people from various fields, and nobody is expected to have the same level of understanding across all disciplines. Therefore, documenting your code with the broader audience in mind is important. This section will cover the basics of documenting your code in a manner that benefits the scikit-bio community at large. Scikit-bio's :doc:`documentation guidelines ` provide more details on how to write effective documentation. Comments ^^^^^^^^ `Comments `_ in the source code explain the rationale to fellow developers. Please make comments frequently in your code, especially where the code itself is not that intuitive. For example:: # Perform eigendecomposition on the covariance matrix of data. # w and v are eigenvalues and eigenvectors, respectively. # eigh is used in favor of eig to avoid returning complex numbers due to # matrix asymmetry caused by floating point errors. # Discussed in: https://stackoverflow.com/questions/12345678/ w, v = np.linalg.eig(np.cov(X)) Please read the :ref:`devdoc/code_guide:How should I write comments?` section of the coding guidelines to learn more about writing comments. Docstrings ^^^^^^^^^^ `Docstrings `_ are structured text blocks associated with each unit of the code that detail the usage of the code. Docstrings will be rendered to the software documentation. That is, *users* (not just developers) will read them. Therefore, docstrings are critical if you want your code to be used, and in the correct way. Below is a very simple example for the ``gc_content``. The lines between the triple double quotes (``"""``) are the docstring:: def gc_content(seq): """Calculate the GC content of a nucleotide sequence. Parameters ---------- seq : str Input sequence. Returns ------- float Fraction of G and C. """ return sum(1 for x in seq if x in "GC") / len(seq) As shown, the docstring explains the purpose, the parameter(s), and the return value(s) of the function. In more complicated cases, the docstring should also include example usage, potential errors, related functions, mathematics behind the algorithm, references to webpages or literature, etc. Every public-facing component should have a docstring. Please read the :ref:`devdoc/doc_guide:Docstring style` section of the documentation guidelines to learn more about writing docstrings. Doctests ^^^^^^^^ You may consider adding **example usages** of your code to its docstring. For example:: def gc_content(seq): """Calculate the GC content of a nucleotide sequence. ... Examples -------- >>> from skbio.sequence.nucl import gc_content >>> gc_content("ACGT") 0.5 """ ... The example code and its output must match. This is ensured by `doctest `_. When you run ``make test`` (see above), doctests are automatically executed as part of the test suite. You may fix any issues according to the screen output. HTML rendering ^^^^^^^^^^^^^^ After completing docstrings, you will want to check how they look like when rendered to the documentation webpages. You may build the entire HTML documentation package locally with:: make doc The built documentation will be at ``doc/build/html/index.html``, and can be examined using your web browser. If errors arise during the building process, or the rendered webpages don't look as anticipated, you should address the issues accordingly. Changelog ^^^^^^^^^ Please mention your changes in :repo:`CHANGELOG.md `. This file informs scikit-bio *users* of changes made in each release, so be sure to describe your changes with this audience in mind. It is especially important to note API additions and changes, particularly if they are backward-incompatible, as well as bug fixes. Be sure to make your updates under the section designated for the latest development version of scikit-bio (this will be at the top of the file). Describe your changes in detail under the most appropriate section heading(s). For example, if your pull request fixes a bug, describe the bug fix under the "Bug fixes" section of the changelog. Please also include a link to the issue(s) addressed by your changes. Style code ---------- `Code style `_ is a set of rules for formatting and structuring code in a particular software project. Although violating these rules won't cause errors in executing the code, adhering to them ensures that the codebase remains uniform and professional, and facilitates team collaboration. Scikit-bio utilizes the `Ruff `_ program for autoformatting and linting to ensure code consistency and quality. The rules are specified in :repo:`pyproject.toml `. Basically, we largely adopted the `Black `_ code style. When you `set up the development environment <#set-up-a-workspace>`_, Ruff was already installed and integrated into a `pre-commit hook `__. This means that Ruff will automatically format and lint your code every time you commit changes (see `Submit code`_ below). Therefore *you are not required to take any explicit action*. However, you can still manually run Ruff to check and fix issues in specific code files you have worked on:: ruff check --fix mycode.py If Ruff identifies any errors that cannot be automatically fixed, you will need to manually fix them based on Ruff's feedback. When done, let Ruff reformat your code:: ruff format mycode.py You will notice the improvement in your code's appearance before and after using Ruff. While it is always beneficial to strive for professional-looking code from the start, the necessity for perfection has lessened with the advent of tools like Ruff. Submit code ----------- Having completed, tested, and documented your code, you may now believe it deserves a place in scikit-bio to benefit the community. This section outlines the steps to submit your code to the official scikit-bio repository. 1. Add any new code file(s) you created to the git repository:: git add path/to/mycode.py Alternatively, if you have multiple new files, you can add them all at once:: git add . 2. `Commit `__ the changes (this is like "saving" your code in the current branch):: git commit -am "describe what I did" Here, "describe what I did" is the placeholder of a *commit message*. You should write a meaningful commit message to describe what you did. We recommend following `NumPy's commit message guidelines `_, including the usage of commit tags (i.e., starting commit messages with acronyms such ``ENH``, ``BUG``, etc.). The `commit` command will trigger the `pre-commit hook `__, which automatically runs Ruff to check and fix any code style problems (see `Style code`_ above). If there are any errors flagged by Ruff, you will need to resolve them and commit again. 3. Merge the latest code from the official scikit-bio repository to your local branch:: git fetch upstream git merge upstream/main This step is important as it ensures that your code doesn't conflict with any recent updates in the official repository. This could happen as there are other developers simultaneously working on the project. If there are conflicts, you will need to `resolve the conflicts `__ by editing the affected files. When done, run ``git add`` on those files, then run ``git commit`` with a relevant commit message (such as "resolved merge conflicts"). 4. Run ``make test`` for the last time to ensure that your changes don't cause anything to break. 5. Once the tests pass, you should push your changes to your forked repository on GitHub using:: git push origin mywork 6. Navigate to the GitHub website, and create a `pull request `__ from your ``mywork`` branch to the ``main`` branch of the official scikit-bio repository. Usually, GitHub will prompt you to do so, and you may click the :bdg-success:`Compare & pull request` button to initiate this process. If not, you can invoke a :bdg-success:`New pull request` under the ":octicon:`git-pull-request` pull request" tab. 7. Enter a meaningful title and a description of your code in the pull request. You may mention the issue you attempt to address in the description, such as "Resolves #42". This will `link your pull request to the issue `__. You will also need to answer a few questions by either typing an ``x`` in the checkboxes that apply to your code or leaving them empty otherwise. These questions can be found in :repo:`PULL_REQUEST_TEMPLATE.md `. When done, click :bdg-success:`Create pull request`. Review code ----------- Your `pull request will be reviewed `__ by one or more maintainers of scikit-bio. These reviews are intended to confirm a few points: - Your code provides relevant changes or additions to scikit-bio. - Your code adheres to our coding guidelines. - Your code is sufficiently well-tested. - Your code is sufficiently well-documented. This process is designed to ensure the quality of scikit-bio and can be a very useful experience for new developers. Typically, the reviewer will launch some automatic checks on your code. These checks are defined in :repo:`ci.yml `. They involve: - Full unit test suite and doctests execute without errors in all supported software and hardware environments. - C code can be correctly compiled. - Cython code is correctly generated. - Documentation can be built. - Code coverage is maintained or improved. - Code passes linting. The checks may take several to a few dozen minutes. If some check(s) fail, you may click "Details" in these checks to view the error messages, and fix the issues accordingly. Meanwhile, the reviewer will comment on your code inline and/or below. They may request changes (which is very common). Please work with the reviewer to improve your code. You should revise your code in your local branch. When completed, commit and push your code again (steps 1-5 of `Submit code`_). This will automatically update your pull request and restart the checks. *Don't issue a new pull request*. .. note:: Particularly for big changes, if you'd like feedback on your code in the form of a code review as you work, you should request help in the issue that you created and one of the scikit-bio maintainers will work with you to perform regular code reviews. This can greatly reduce development time. We highly recommend that new developers take advantage of this rather than submitting a pull request with a massive amount of code. That can lead to frustration when the developer thinks they are done but the reviewer requests large amounts of changes, and it also makes it harder to review. Please read :doc:`devdoc/review` for more details on how pull requests should be reviewed in the scikit-bio project. After your code has been improved and the reviewer has approved it, they will `merge your pull request `__ into the ``main`` branch of the official scikit-bio repository. This will be indicated by a note: :octicon:`git-merge` "Pull request successfully merged and closed". Congratulations! Your code is now an integral part of scikit-bio, and will benefit the broader community. You have successfully completed your contribution, and we extend our appreciation to you! 🎉🎉🎉" scikit-bio-0.6.2/web/devdoc/000077500000000000000000000000001464262511300156015ustar00rootroot00000000000000scikit-bio-0.6.2/web/devdoc/assets/000077500000000000000000000000001464262511300171035ustar00rootroot00000000000000scikit-bio-0.6.2/web/devdoc/assets/api-lifecycle.png000066400000000000000000001165061464262511300223300ustar00rootroot00000000000000PNG  IHDRm) AiCCPICC ProfileH wTSϽ7" %z ;HQIP&vDF)VdTG"cE b PQDE݌k 5ޚYg}׺PtX4X\XffGD=HƳ.d,P&s"7C$ E6<~&S2)212 "įl+ɘ&Y4Pޚ%ᣌ\%g|eTI(L0_&l2E9r9hxgIbטifSb1+MxL 0oE%YmhYh~S=zU&ϞAYl/$ZUm@O ޜl^ ' lsk.+7oʿ9V;?#I3eE妧KD d9i,UQ h A1vjpԁzN6p\W p G@ K0ށiABZyCAP8C@&*CP=#t] 4}a ٰ;GDxJ>,_“@FXDBX$!k"EHqaYbVabJ0՘cVL6f3bձX'?v 6-V``[a;p~\2n5׌ &x*sb|! ߏƿ' Zk! $l$T4QOt"y\b)AI&NI$R$)TIj"]&=&!:dGrY@^O$ _%?P(&OJEBN9J@y@yCR nXZOD}J}/G3ɭk{%Oחw_.'_!JQ@SVF=IEbbbb5Q%O@%!BӥyҸM:e0G7ӓ e%e[(R0`3R46i^)*n*|"fLUo՝mO0j&jajj.ϧwϝ_4갺zj=U45nɚ4ǴhZ ZZ^0Tf%9->ݫ=cXgN].[7A\SwBOK/X/_Q>QG[ `Aaac#*Z;8cq>[&IIMST`ϴ kh&45ǢYYF֠9<|y+ =X_,,S-,Y)YXmĚk]c}džjcΦ浭-v};]N"&1=xtv(}'{'IߝY) Σ -rqr.d._xpUەZM׍vm=+KGǔ ^WWbj>:>>>v}/avO8 FV> 2 u/_$\BCv< 5 ]s.,4&yUx~xw-bEDCĻHGKwFGEGME{EEKX,YFZ ={$vrK .3\rϮ_Yq*©L_wד+]eD]cIIIOAu_䩔)3ѩiB%a+]3='/40CiU@ёL(sYfLH$%Y jgGeQn~5f5wugv5k֮\۹Nw]m mHFˍenQQ`hBBQ-[lllfjۗ"^bO%ܒY}WwvwXbY^Ю]WVa[q`id2JjGէ{׿m>PkAma꺿g_DHGGu;776ƱqoC{P38!9 ҝˁ^r۽Ug9];}}_~imp㭎}]/}.{^=}^?z8hc' O*?f`ϳgC/Oϩ+FFGGόzˌㅿ)ѫ~wgbk?Jި9mdwi獵ޫ?cǑOO?w| x&mf2:Y~ pHYs  iTXtXML:com.adobe.xmp 5 2 1 2@IDATx TBe"K$Ko$ٕ%})""%K*"KD>w}{s^Ν>Y|gΜ{sV"B$@$@$@$@'j b7 @,P2r$@$@$@$@Tl    *XFNHHHH-$@$@$@$@ @6I P{HHHH b9     *| ĂX,#'A$@Xl޽ikaVYe>":b=)$wTlœ A`Μ9Yf>|Yb1X7_ӦMMz ֕Bq&|P8Os# ȍ,{PƏo:tŬZ&L0ݺu33u 890 *UkIH pۺI&f1]yOkUvȑ -"vi91 ȏ,z;vb^jm۶5s 98 ZlBג @ `|3UٕoSfMrǖ.-'F$@@nȏ]Fa8| !_rHHHH T؆j98    | P͗#   *ZHHHH _Tl%HHHHBEm!   ȗ|:    Pb`HHHH%@6_r.-[fwnvi'LLjG`ݰ~XG'HH {}9< ̙34kLÇG -,֯iӦ^zJ! /ԍrfyeJMl Ln̼yLݺu6P)WC8R6"TlCJ8u&M̀1 "0^{*#G 86D'ߜQ!~kU,z;vb+<ڶmkΝơqL$Ϊ>+@b[n+f$g]vXGEjȣ1 .jժ{_D'|bZkmƌXlD`8Ms! _Ɣb})%py6L;̇~hƍgz)3tPsgv!iƍ>}[o5$0H {}9; (:c9 .Yzs5{キq AAPd$@$@i ljEȼ+2.nfS …  Žk&Mdtb4h`ڵkgLPדּ2]v 'l<@|63~gj׮m6xcӵkW<ɓ'3OWz,LNeV   (*&I (.ga$p}B,5#NGv:+%!,Pzx"WJ̈́6}YӵkWUNZ YP1R}fŊTYbO?=&_ @ -H?ߋ!;G"l0:dz[(nݺHMN_e ʾkͨ,KpAx/kG=zkGҧj=& ,dco֔3-N!VUQd%5zpIYN:$ ntӫ.,BO.2I%j}.\( }I(O(   (n+YKB*):,>C"MեX|uСCͬY4ܹsSfWfm*4?1cƨbW_~ɧJ1HHAbPf$? V,n:[F֛kFYgU:Q2A Z}`a}+^FZSɲeT駟L㠃2جF!  bŶ A$ڴic&NX(|A|˗7޸B5K+.j]#?cl7p(͈#L:u@VZB_ oܑ6Di+ Ĕ>Laڴi>Y!RL-Ɂ 9Tl#dp9@?ӴlyffΜ9c4pS@,['nBo[oFi^{53|s뭷SN9EaJp}MϞ=/T技gW",eyF"&ݻ6tk$@$K/T0HMw{dK # Px,ma\&lb%mX%rm޼֓طzNZ0aFYq/AdcC}ڎ;j/_m_Gr/|wmfͼ>pK6 /06JzdOF}}=X+wX[o.ףF e;R6e6lhhW\M>2eÂ5zQ=xU>l+ʟ袋$.bl/1+εJ+{GiI!#B+v&aTgIc%wB-{a_=H7\ΉW]uQV-{aYI 1?p;Cbv䙟FL bU%gJtdѢEV$x xe8W+ ex/_¹ Vx-qĶ q}Ɗ¶"'+_~-($I>?%1Ma$7(/%`(3Tq;WVi}xrܴiS+w*7Xp v[\J#OrMT@Y{J-@2gMVl\s̈́?;Vc( C"QKSlť;_ʪRHHHI@L/=86KnԩSuf6{l+ʛ7Qo!]Ϋ/`Dd2  qD`9̙3u.6T؇D>JlU@OgW`Hw'87شdeP~VEBbbPf$@$@$@! a,a {vӈ.j䖻+CX)/ 7,>Cf:!% PF}F Qp%砄f#v =B//7",P-eA$@$@$Pz" qUvFiU}/oAD'r;^AKX(]ow'ս 9dSALrD}w '3TTXF$@$@$Zo*+L֭ugkdwS_'"! d#L5Q>n8MW^yE |ВkHd (Q4#bV11=$ <{?oGbc   ]+0~Ȕ ";6sC@jCӧ&qv/\B` Օzr>> !-zNH0zhZk{(fL+zBL~CUqh\(3(׈bRzH0M.KjzJEtU9kef\(Ǟ]2y    Hbe IH 3+ Ĕ]bVx.vQ2당a748 ӣG#);s^83F /pQ9%[HwBuEāgy9蠃* ڞqfyaXJyLŶ7 @ qTGjQdde'k:tyꩧ駟nA2Zzi fvi\5p{tI~LAEHOI =LWvmedXN$@#-non.9J *?yd&BɥO`ƌ[n}i\n; A(5sL;찃5kVAF {챇>}A \X!|cc$M2Յ AbV[^ziۮ1ҥntpLgM41;sfk]xFZ/_}wLNlP,}nE92͙3GX[ھ{̴jlOL6vi?aGV3 x=&XX|7:77s9GۼMvLL}g{L6[RG$@!'k<,X`.""|:<(XP:jժei)խX(DN;_kC]|G1'sU2O>Z2;83|W%vt]wX\|́oa9CmݺYl*dԭP?'KHP`DK.D?ͱkk/G ncLX_xs饗~տJ;}bpe9Ct,ӦM%Tn]U|`huEa'PjO>d}b/^iݜ\=<چnhڶmJW_}e{=} 5XP8]￯z駽1cYnQH|q#ͼag$V!+_VFv„ VVobmܸ]d/|{mY\dEd:ƊUΎ3JR+ӬwXh[obb᳢\k?bt N~m`s-'V=M7l?^lb=:t]vg+V+'{mKh+ʳ U2F*+u\oßGy֮][c.@~͛+Q|Ы&e+>kٳ?ŢΝ;{UDyCM6Dd; 'o/kO?T-pp;~hppYqw*cڠAE6/lHHp{B/o&G#pCE譪 B9+*CbtE!ɒ.kF] wSO=UwwԨQ>@XgpK]M@Ӏn~va/>_YsGA:\JIHJFUkf!<š$,mRlRb֣jժ סlq eV$F +7Sa*3߹c Z~|Յظ(q5qM5>x"۬Y3E?'g)5+ r)UaIOq{05r/+}҉N]: >I&Mw?ҵs$@$@$ 6H7@noQ X* ԇ,EA ( J_|-z!/{/zƆk6Ȥڐz !0v #g}VCo$KBs"1k;TIwe8(ATNu@C1\;L(Pޡb89qm>\6CeLL]\>N' 0c9OyH,zVuA&nvPMedY 8o{yn۴icEh*g~(V%KNX kVȜ0cذ痋.H7ya-XsQ ŦC/0>ʊOږ${еP`Vv[j9_MBb bΧ֫$6aIaŚ^&:_A8H1u1ЈUXaAD[tua[oB]A n/+JB,M[klޓ>&2o?6XF 1S,Moy־֙3]ttx, C,d\ AFEF1n#;bF~TlÿFa CScwBm@FEF5ؼ|3̐c/n pd$@$@$y HZJ!Bb[(!j'VjO#' RH.#Y4+RHjDEը,J=,8#7ŽPHHʏ' K! P f®` uGcHmٳgO/a)/ljNO OSʡĢoĖDPDV0XL(KQKoOxv0fʱV' pXl"J#qSI!z뭪s<Y$G3K2IC{cv?<wDF gta7 #؞i 1C߾}5l"#m/BG!CؖRi}yX \޲ntȇJޠOn%<{od"142Knl{kigih%V=餓48 }C >V|ܩgɄb{BY!_̙3GCPo]! }GyJnd|ŗ; KPz90g2N$geK,Bꫯ5sa[_$9losVdž=tE8 n,]T16n8sWQJJ5|9s4m\pgo0aJvk>X/tV^zyQ Z϶XɐυZ0G70=â k(!P6 k,'N4F5Ǵmly/XVe(H0%Ӑ9SdѢEk ܩ|֥FUzw w%$CU(0C!?೦yfԩa2f9lZ%E ?Zc=fEqsH)Q+>^]tHD2h]Q^-,{O%<[o.gXH^C`\jQDkm86y5-bEy֮][CW~5jXQF(nqv&[޳a݇%>q{Њ"k;vJw^¸a⃱YSm>FwJw.ǒ;30OqٲvX'BK'_6ω `/Rf5k|~B< N _$ $$+?'p [ϲ_|+\njw0 y̙ZbCiG~p܂ė^*0 nIb]I(2eI( 鋰o~+P7Q=`޺uk+u] s;5nݺ?ε={;SVTlSbɻPRZd۴i yOJ`5ĐZ]+Njժew_#'4 nߧ?\j_b'ܤI}D9qkiӦF4sKnr)> VSRb(zBX;nq;'Ԅq\tEF܋~ۼ |SAl{G9 bSP9&l^3 ؤ 17)QfXy )ZPK`6Rk <`:uTAmd.6B aji2SZo:Յ/,(XX ~"tG?G*ئC?qIM >VY35yQǛ,l"k8AI&LUh ~A /P o8c%QŠ%(tPF`MT|5lS+4"%?` *.4>)VV{.#qCrA-ZhTW:(xB" #*Pv2}Na,:3#X KwsPϪr+bUZ:CTu6a_\lf+lry *-0^r"dWr\&!]ʫBWǮm;Sn+gcETh Yȗ=yL6KnC|coLk cq}F SM2mvt[΂ݻ7Pۺf͚b$ݫfZ"}gDS : ñ?jV 3o[K`="r Dݝ<N /^ye->=P/|`GS.\Rlqw/<c9Y$^~8e9|V*wr{Rʵ ~#\2 B"(AXX¾nG~B*чtWU#lGe}+&nX,"wLeױ<0UQJzk-,wq. w6) Pp =YPWq{Ȑ!ɧzՎ[b̫ Ex#\̋IFEF\.$PQ;a$~(fO5"U\GqDMiq Hf^P2(5ⴾ0uQ]wwqp v2榪( bWJmX8q$@$@ F9b#dMb8bՕ!-b+-5 ">弎HH f:p_ fذat'2"BuKJqU do %qMW(FQny(3(׈JwӥK#iC=TxQ~uuec63fhJĚ,R3l $AVE$cʔ)e5N*|DH<],g  jf$ַG5ADȅ^~OtEF~V믛ګsm a\̋BFEF96]v5Ȧ`Pt%@Jb5$3i׮](6E&'@eF1l3LW21gsOElvXo?}r *\89m۶F2y @^$uO5ƍ&lb$㡶b shvC5kVB7xZkΝ;HJ]#ZuQV#ߦն7Չ6 tMB4w('0ydAHI'o$ExJm8XU p ffUmד ĀV[me~a͂xk[nŜz^DcOd>lݺA'6W.(lRNwhs54?x 9yM>}qgΝkN9ӿh-CEn4D}dz7]|.^zwyGCXr9E<`>:Dr*L*z-7ʅᄏ>v ,nHF3uTۃ2XIwyg^r%(2DUYfM%VZz_|*Λ74o\7iz Jl2-C ꪫ5ҚP B'[H4XGX=#FTλ|ZlfO>7)D? @*{~|BmذMׯogK3}t /fԕV]Xskժeve|2q!xqP;Yfq t^JBk>Xmuăjg5p?s̄ 7j@?,JXY8@:|p9,vpo… ͋/]㳩o߾cQy6H-u-p_U~x@fϞmy3tPLJ b5 O⿭ɬ 0&!a})"6`䃦M沁ŋ`ȑ#5R|`|h{a9omUW5Zj={ oFN8AAE{^{ vqGuJm($A?"J@X[>sL2X֍m,"-0ۥt',45J%¬D"ںuZbi>]5V,+?K+~ZglSG$ `^?[ˤm$'|w3d?X`Ǐ zy@1#\s{H(",y]p^-Sɂ(Kĺ:ڭ h# ?tq~5HU%eQ]ЃCAm(!ރۘ1c4 NflA:t9,gcǎĪ[Oמ-Ƕ?baYŒ[l5u(MWSRhHHHؖ~eN3w_F" 9 ,7_u,#  b%+̀[.ho4`x'L͚5 iPHHH cU,:O۳g".Ae˖fƌf3˗/7ժU+F#Xc b '"Vivʕ^$ FbJ?~*_F6/BPM@b¦ďY4tf~G}+P?=~py衇 (EG5p MKYf_N['I`?\,A[XArvEXhiժڵZf(rƎk:wl+~w94m|իW7|iРWƃ%l2=#C "f30aرYt[n$gG$V ^r%f}]/PY`UV&D ;>B(}5 rE|.ss1͛77W^y*CW_]]^Zj2Pq$b/ N:'Xi/SF ])oΘHH b9 ȏ@6m )$@$@$u9EE >_HG)B!  rRlA/.Β3bc 0թ؂ArRl>lfE)/Tlk9[ +t*a]a@`V dE] @ 6@׃@Q b[TH T^]bz;9,%*)H pQRP:HNmgS!$pTR! P-$]M$@$-t*g=!*1\TNHʘ@N-cȕ;S%nrR$KAbO*'ŖہUAŐ դ8hM'IBD:HC JI-eAŐτmט3$L=tCai\C..[Ɛ jVqxAŐ0xy P-"lvE$P%A/؆ X%AŐ xqQ P-*nvF$@$P t*f1 *ʜ# [Ɛ+7gZy<֙$8U,rRl⸽pB*W^II_~z/Hy !W:gZlr7  28($'6޽{+WӈT N,MŶ,& JbۤI!Cn9 *aXHH (9)Q p!\}fY1{'LNLÆ ́hト."v^w߭׌;+?Ç{e< bxH*#5yrRlCoW~zF7G}Ybҥ>Qygܹs~a3n83fl̙^0u~T ϓXH hFb.ٍQF ĉ##曦A Plf͊Blb[6K͉@ J >5kf9oG_U&kִ*ڜ\CoܻK:cƏov\96tM" :h46uG9r0~"! (M;7}X'?Sciz$a{yt@F^F/W6Y7on}]Mû`>'W۷Ot*AŐqܴؖs$@p#p wlsOuab[O`A 9"67MbP|N26H 'GmNXHlR.\0oqr)K/7plܸeF_~E3R}> ,###8¸𣨋$R;wHLk lƋ0]ve /оhrRlC.+!@ ꪑPir (2w\C{xqWww6Hބr$dz'J$sz'>,B>"3)2$9yMV ⣎:L>]wyGao\$^z%w>c vp@+FF}"ʺ.+FGoq T^ݮX ($B ɢʦp!Q[lae[oz`+Ϭ bA$멶=|pײvJ-o;N_i宱mذ޵C"h+X H  [_ <&p($@Y,A @ \tEW&)wt39|_0{l|2d-]Ԝx≮>c_.I͚51=>Hq~[Wc$zG lnC¨W^yE:51gEQv[uCwGQBŐ ,HmWs"("RpLz!Ak-[U^v%T-b1^`Cepq8CHظOb;zhV631Y2\9)!F@4bȕ8̕mVs (gEu qWP""8;_ F YS2^f~g͠Q/ rRpVvә]vED_[7H /-@&N qBA6hlH NB,Yp-߿?j~4;X:,3i$-Kfdk,Bl=sk5Λ7Ofa(Dl\ >pC)2^a`@ۅAI,`O;s8UWPK.$a!n|嶁Mbiӯ8rV5v:ʊM]=ص+$A$@&PkJ*]s5u~V˩8p3L ~ H~3fLw;'VX+qcK+S_}<רQs=^ko_َ^4}]Ȩg*N`e%"P]4`!-׍ĨSę+0R -~m!rN*Af(JsZ|P٤Ic;!9=Ȑ9L 2$RhLd 69 ꫯ4sRK(y7xlƌfuUmޝg0(HT l!FmNkQc=`w һqoKڵAyp{@k.]x׋Yo'xAu;uTU@a8ƴ[ lKsooYn ;)Y18C<@w鍭 M6PHq\UΉHs Xe b \me;^PkZJ-RP`"@w"E4Ym֬Y^1R!V :EǏo .ۆpHs!Y:vouI6Pw42D]СkN|kJ<`> SO=e#;+ׯU'c(,P~L>$Lǒ5 @d ߑ\eذaꗁoNd`C:Nj'` 7I^}Z$o!uj]ÉQRE^;٤CeQuNr>N;MŒmY]_ǏV"4>}򱕰'ڶXoH>k-v,@i.DM3&|tHMŶ1` %oV= ӧOW%B[n {8-9sh,8`gbǎ5kv,K1't?A8WWн&1rT \d+B׀# L:HQFPArRl|.zr (.ǧ~/m/buGWv/W/byT!h22̜9H q9 Qi [E|s<C.X'<؆g-8 (WA Rl r+~ [ߓK9M:C!M8+,& -6A_`Z[HbIH|rRl/BߥU;'J@KPP3 _IKlՅ_^ٱlɒ.|eL^߯@^sgzRq:7WVvymyZoΖJ H$ 8rRlq;sYԀ\\upGvuW͒H ~9r}El$uٴ D+H$ϦJ ,[Dp+(͉dW4Uy*Fhbr*$cA 1FE )vql XA̅ʥ(ɒm -,ظK-ACl[Ȧn#Fи'uK+=D{4hW(nE7@@NhlDR N4Pz*\^b[2HH_A %Qlc!CVZilwRH T)d"K_H/MꝄ ݺu3_|Ɓņ4! ;*硇j+ 8-@FDp@bl-ʘL3۷7?7S _Bb+1' +R:n믿VڠB<@J-T>H~0\.qD 2}Gr $@=/,gq!<f֫WOhVE0GT^[GaJ3J$@ "b[ɲ Ѳe+fqFVmd@޽n)$@$@$P A b{9K/E*ڪK[`T(L$@"Rx-EifȽJ3 2r$@$@B 'Ŗ1A a5j_~xƑD-LeI:HY.{ΓIe FꂠbEjb7 @P*?Ơt+@ pkQX9*  Jb[b/$Z<ڥHHr$b9euZl#H" &;"D '6 'T⏜=V۪$@"@XKOP:HNmi^E r/ `8  P x% ĂX,#'A$@$ rRlCnߚrF$GA⸪)'Ŗǂ_0T 0͉cĽXH~ 9tBNm@P1J? Tlsź$@$@ BŶ6I BFh8T  rRly -K$HbeIP)%k9)!\T LUTlF  QXbpyeC. s# @iP-W ZlC @ 2\ir!*!\H  [:n bIYLmY,3'I'@$KvA 9)iGē'T ȃ( P-tIH A؆pq9$(&n+&mE$@$PH9) Pȥ(mVZiKBے`g$@9#URIe KrW_}eڵkUTl㰊 ğuq:*V$83 L>sӦMg D@N[79sC> D-_@ʌu2[b{G̜99v&#=u]/1n+{ P W*ZK}͇~hN:$ӨQhg6hѢϪjWnZnm,Yˑ-3 DuhXAcQ7`ӳgϢ O_~1m3-[f;<3w\3o<O8õYff3Æ 3u֍9Dȟ@^Q%]v5?sTq @m (,2cB͊+ԅF\CzMq&@mW7s1Ǐ7G6n&wD`~xR %:t 0a֭ZiɢrPXB[(l7 U(1lfƌf7;찃5kV@رcCm۶3LklHH lh ۊxz)siK.Ĝ{k>ժU x8ߺyqʕ5ÖhыaPݒFoB .4G}` C=dԩSIC 973b2(5Q^=="d"'曛^{M}n7onNZ> D?c~W@(Au]׼UkW,EydVH ХKsW|SHC .0p_k^L&~'o 5cn6cƌ ri7=n^Ʀ6mڨbom٦\0~~``ٳ5,ӾkFe6xH́%r'&mbҀH(ƍ3:t94nze10Nrܹ~駦Fk{G]BDH/_<Fk ;<SN,gE1$p1h2Ԗ,Yb=P${w$P00,X@G3HNaVnB:gE'@W#gXuUͥ^N>d[l` p Z yW^yepwaĦRt{fҤIԠAӮ];3e5sYg.xw>~Po?SvmcԵk ?'o,O<5lPw}9w^{esOsX)c>,vsH\@AtMoM4(?c='w}׫On~q +\܎;֫qY,"^6$~qu棏>2njO[E_wvm7uXqMZKҿSgdb<<`sA?AM=:<Ȏ~Lkr7M6$FX+:d)$ r`Vd :^-hlH׊NǪˣ9#(v5J?۹sgf͚zVʾ [V-+(Z.S' Jn6f͚Yq|w߮mJZf{u0g]br{ebq2@{e׊@c'Xz?+ ͵0>cŊ܊vu0 c(Zl? k 9r)j9J G5᯿2/6V[*f-M:O4|]כFiYL>]S.#6|J2h°:q|;4 -7W_R{̓`Y> d|ΰ?3/Ht848r!)DM@2=l2}Γ"O~M418K￯/|mF3\,nJP!oƂSɵ-.)|FR:bH.jLN &GuV]̟$?7d@0v1Sm?[,5׵m?PN/PrK: 7t@邫Arpq0F+C-J|Y[ a}9眲fQΓŶW?s +D!{ q|id2Yn J?!nw UyE&T#4$Tr#GjJ#`Lw\K/frG@>~AJWXBi2^$M|. /}TjݫlHc$Pر7p/ y_ Oye"Ȇ.(;CX2l~bL!Ctl_jŷ-V T+yidU`ye˖kwm?zmf%l]+W+Vd+ 6m]-΀;JV׭V|eĉ-$ہ=Qn5 /`Bkle^v%\+㬸@X _l~B$ Tl=<([/ۥ/+% (YO_)=KbPXJq7L('*[q;vka$!CB)^SV[7d_G,}JZ "4h?a3bv6$*ޅr{,IUUrqLZS I2' /$ ɗfyG w"KoNܥWselJbi*vYג\D,^_̘~9o\떂\ %}×uKP[ cVTbkLN Șh _7(\\& BƎ3'.Y$@}"C*A1dBC͛'ۦMY)7qe[bY7HZcWy ܦصG}n& PHW.\X!%RP\)˰{Jw!z7nzk1AdFeʦN@9E!`Ņ.Gݕ)|\oma#n|r'@6wf""_%AN\S+HUҳgOMX6}mؙ# ||F\;]we$\>M2 e|x`6r_lNݺu3'f&MDLXwg2[H/E~"ɓ5B8A723g:EzNndrJlƂ9b,˄NX J5/&1cVW~*kl믿jl~%vW/۱8Vkh#syB h(%!_8ϊ+NՏ_QX_|!¥;6N>N7"(E+@#rpsl\l|xOE&18(jn6+v};AJQQ5G13Hi{WUp$Fp 50>'p[5W p@/^X]_9<\S #^ VWnݺRw nqTL LG#eQ048\ P͕GvKu#A5+$rLGD KJl̜CL!7|6mTdW3fM'UJP |Px:|m͉XqaUeF ]H.cDµ.R:_Ygj-zp@D!|N3PUIrF%@Ŷxx@IY0ů-l3tPDbsJ!ȜuVT֩!(Jwcqg0 ,xH xdcL]j\>cq k#E(sUdXK!FVA=z)qu`u)%aT6lXJ 1[`#izqH󱅿-RRHE1=\%c4vfц۰ z4A wE(HwIRpW2܂\ 0TҴBb,H6qת?^(p0g)K1)$@!*~ǻKpa& y 6oK.B}AElh }$ `pGi/!Xn)_Ro&~(_rѸ8qU@ME@)>m|Xߐ#0<2$VX_;6!$SN&<y,蠃4[#L2ŝW_}iܸ=Í?9 [ MO6?s=رc59]\wOHu(5H\9 ޻woƜe˖fĉin]!2}tsaA "H; ++ DԈ]Q₈b%*&j ԈHK R&hʐ,*ZF-7*1jQI~]=7߼7?Uoz}wߛ>}$XbKF@ޣb(%տ>v4zǣVD> M[G^iXN;H v=UHOQN'Fw}wC"FC k֬|_~̙3#G=" Wv?3f̈ޑW #LZy?Tu1dvJ+FԮ]gϞѴiӢ>}v.]"25jwvj%ʶ]ʔ)?27Ynh{|vɈ6!СC'x"y-eݺuqp … 9*'xU<?<3{SD5>| n^-l  j11#?) Olb֣GUSDQĂa3^RfqG>L+lb$((5v?~Zv쒑Zz=?1[hxI_ҐpNbb'̱k{w(_b j4_DD@D@D2 U8 6̑212N -uD0{E6bi. $ B(իWxIO?y.GhFB1-'ޢkt8NB$-pq?kCG &Œ n.8sObJ)4%RY0)IwSV_o]X>:8+,K*x*m̚6>`KUn&1 !9)m7J`qfW>vS n%/h;uD@D@ C 2Yɗ@SxmHzj63Ket[Jb= 53ɓ]sJ⢾+ۡCRE#믿;]j-8\ +\ʕ+m"{&T|~c 'NbU@%E.?ZC2T5hLrf,]z50*v߭e_%{뭷r3~Z@k_iXNX2cN!d#Q9jV{VEE@D@j\|g7OݶnGD@,@Y3 ئ7C(Q(o-YĒz33Tf[Ԩ&" mT5" " Q&5n8S]n}tt"6?s]QD@DFبG}tQnl"Br >m.'o"=1ir 3{~KSۖ]_D@DAH>qa;|f|LN9+W(KVbe⨣r/R|+L`L7ވhylرs& IzQQM},IEUOnb7I-'4S)Zh=묳,JQ,h_∿J}22}嗡Htwv[p#~i^ܛoingϞ)F-**򗿴k/SAxT-iׯh@& 0ӟ͝;=oCo[veQQt ?~M)& u]nr!-ٯt~ /Ю[[Sa_x ,y{Req3fYL黳>ohG8e]sOGBfsC,)w7.EpF~'X4;o{D2-_ϷLlRPRyw衇ڨ1f1!I}Qͬ0n1{1W-(=P\5'-\miL"ŶTTD@De w`GYedB ('tk߾B8qbmmΜ9tرkD:2P٩&%s9K~G Cd",rH"S,/&XQN~kjx0`;G׽{w&\{nĈ3rȲεB҉pM?E[,XSlAKd,g„ APj9S$'MV^mpOK^waĔ1QXc*.B8oƌƘ}rJ!Oz̶mZ%.bhk)" "y;Yx3?<ȸXOn5ڮ];Qw+(Kݺu!S±b1<8V2X_8T+FeFrM7]>xw+ieg[n1^vCKkz(]t<Ŋs`: Ib(D(Gwpظ_a%B6撗(#H2YJ2Hfn&%03 `mCB\zĊ۵kW׹sgS>rIX||XraCX}XPtMD8X`1EO.~Yvxw+ieQh9L4eE>֧CӋkc兗w/un9aB{ t.Xe+d=i'Yɶ1 4t)ŶT^D@DE 0,fP^O!QP8Dl% i(9 (XrjY{\?bJEc?.Cq|`r-b%fSO>iۥEAn ;o|L:^v}K~?a |<.@$isKRttLD@D p'Jmayn b 'eIDATUjajTjǵ^}XQd%?SNq/U>D PL_[8w>]=.-CCD!C6.(ݠ,(iuL{^x9#S|uQNQQXuIPGh q`}XKIR:&ŶhQLbȟH(0WB)U_+0>%~,쳏#,>LbP\TI[-lcci"2҄ (X6!/nzSŸ1v¶02( leb申{'*ʈ@k![~ m(HmK$cD#b(%ڿCł6KIXaxA\bRŵI!pC`BW,eʤ',R²fB.e# &>opF|Wͨ[ޠ١$#293|JZ`GkKf|l3j"ĉ- pEtժUbK&xȒWf&)W]u[|yrW>bGj҅ :.')'t̀I6YF"GLS~O aFNFJ"" rEF?"P/RIdw=; SLqz2T(l2 vN \dIJ۶m-8%"]J"B!|qsm ]k1GY'|~'&MMܰR7A%"  6VbLXBM q 9,gϞmn Xy䑸̙3-8>[9&0O?mKD%p >'t1F/OQ^QxPHCfym}7 |-+n-E@2N@m;HSXO'Ol&ov@^,K-}&xbK.u֭3_†0`$B2@JE2!E FP(j Sr$;\Ru<\k.d}qtSN5%IߟDD ?槯VJy>lb H y饗€.bEAK,dCրu5dK.]؄Sp0i >Ar]$0ʯ"Z@[Y `P_L÷6*d4((Xr2yRܿV1i. oR=Nh/4'^yfY"\]ceUB" ! mfB x6 H$Z`cvsco=,F-0A̙cbgiGDe(ACpU3L@39M4o:ļ0"6l~C%J`u% )QEsV )5"KO?KmQ3tFy.si䊐FHE@D@D@D@rA@m.IH# 6䂀\t)" " " "F@m!)&5RD@D@D@D 4B:." " " " RlsMj@9is)2" E@mmFD@jU>w|Mw-=_{/_n~^zKpmYo]`Ad#zkGZN;NӧRҞ/>~]w~v.yZ/.b[}ƺ@tz!W_C%K;ӝ{%a}޼yrGq;ꨣܻ_u͚5nܸqEC13>Wm۶W^qok߾"SNu\p;3܊+ܐ!Cȑ#wae؏2{M7aÆ9O2-Z(C+" 'g+&lmF9kF`n6s_uZV}<)WM<Ѿ С{g`G?2 5\{L;v-Çڔvڹ+W}׎瞮_~[om(O0]^nر}n̘1?Y߬Q;O@ 0PR{֭[X+?Rmv'x)(;vtܦnjvɍ1½{n_?܎WXsj+wA^zɎBD߾}򬰍r߯]vԊ@ tLsu=zY4f2?˴r: c|]N6ZX{>~m7sLe}+pz);[p}DD dF?""Bl!@5ʆkZǝ X,$#t&]wunzڴiR駟>gu[.̬~XnHv&M2_^*}l v۶B#"P](( lٲѣGG~Gfo7mv*I'w?+2~x-Ҧܹs abyEFy+"+mƑF7xcq#?,Ŭ̑Gm6l `u"]r%Vڴis9wg2>Zϻ?D>ZCG=ޚ=q=YXRfP[4y%"PBPY|Mbڗa^1K{6íN:hݺuf%W<7J߾%6'w-07\?]34#k Hm~溢dLwOm4BUapi" " " " b[>+0)5MD@D@D@D|Rlg" " " " & 6ÝO@mTRD@D@D@D fs4 H-Jd w&" " " "P>)RI bQD@D@D@D@' Ŷ|V*)" o֯_*$JJDV H՞}@w^x iY&tR׭[,7QmFb(|:YD@jܹsktGn֬Y*E^jt_" "  'vZM>s1 @gdٳ]>}ܚ5kvm6Q"X6#3gSƌ/^,ۜ1>߸q駟-[&6}fG@8:XnV\VXV 7D1\p/0aڼwڟJ@m*""䡗FTRlS@HC/" " " " ئ"R<b^RE@D@D@D@R HME" " " " y 66bHD@D@D@D@@@mzImH%~IENDB`scikit-bio-0.6.2/web/devdoc/code_guide.rst000066400000000000000000000567031464262511300204350ustar00rootroot00000000000000Coding guidelines ================= As project size increases, consistency of the code base and documentation becomes more important. We therefore provide guidelines for code and documentation that is contributed to scikit-bio. Our goal is to create a consistent code base where: * It is easy to find relevant functionality (and to determine when functionality that you're looking for doesn't exist), * You can trust that the code that you're working with is sufficiently tested, and * Names and interfaces are intuitive. .. note:: As scikit-bio is in beta, our coding guidelines are presented here as a working draft. These guidelines are requirements for all code submitted to scikit-bio, but at this stage the guidelines themselves are malleable. If you disagree with something, or have a suggestion for something new to include, you should :repo:`create an issue ` to initiate a discussion. What are the naming conventions? and How should I format my code? ----------------------------------------------------------------- We adhere to the `PEP 8 `_ python coding guidelines for code and documentation standards. Before submitting any code to scikit-bio, you should read these carefully and apply the guidelines in your code. What should I call my variables? -------------------------------- - *Choose the name that people will most likely guess.* Make it descriptive, but not too long: ``curr_record`` is better than ``c``, or ``curr``, or ``current_genbank_record_from_database``. - *Good names are hard to find.* Don't be afraid to change names except when they are part of interfaces that other people are also using. It may take some time working with the code to come up with reasonable names for everything: if you have unit tests, it's easy to change them, especially with global search and replace. - *Use singular names for individual things, plural names for collections.* For example, you'd expect ``self.name`` to hold something like a single string, but ``self.names`` to hold something that you could loop through like a list or dictionary. Sometimes the decision can be tricky: is ``self.index`` an integer holding a positon, or a dictionary holding records keyed by name for easy lookup? If you find yourself wondering these things, the name should probably be changed to avoid the problem: try ``self.position`` or ``self.look_up``. - *Don't make the type part of the name.* You might want to change the implementation later. Use ``Records`` rather than ``RecordDict`` or ``RecordList``, etc. Don't use Hungarian Notation either (i.e. where you prefix the name with the type). - *Make the name as precise as possible.* If the variable is the path of the input file, call it ``input_fp``, not ``input`` or ``file`` (which you shouldn't use anyway, since they're keywords), and not ``infile`` (because that looks like it should be a file object, not just its name). - *Use* ``result`` *to store the value that will be returned from a method or function.* Use ``data`` for input in cases where the function or method acts on arbitrary data (e.g. sequence data, or a list of numbers, etc.) unless a more descriptive name is appropriate. - *One-letter variable names should only occur in math functions or as loop iterators with limited scope.* Limited scope covers things like ``for k in keys: print k``, where ``k`` survives only a line or two. Loop iterators should refer to the variable that they're looping through: ``for k in keys, i in items``, or ``for key in keys, item in items``. If the loop is long or there are several 1-letter variables active in the same scope, rename them. - *Limit your use of abbreviations.* A few well-known abbreviations are OK, but you don't want to come back to your code in 6 months and have to figure out what ``sptxck2`` is. It's worth it to spend the extra time typing ``species_taxon_check_2``, but that's still a horrible name: what's check number 1? Far better to go with something like ``taxon_is_species_rank`` that needs no explanation, especially if the variable is only used once or twice. Acceptable abbreviations ^^^^^^^^^^^^^^^^^^^^^^^^ The following list of abbreviations can be considered well-known and used with impunity within mixed name variables, but some should not be used by themselves as they would conflict with common functions, python built-in's, or raise an exception. Do not use the following by themselves as variable names: ``dir``, ``exp`` (a common ``math`` module function), ``in``, ``max``, and ``min``. They can, however, be used as part of a name, e.g. ``matrix_exp``. .. dropdown:: :open: +--------------------+--------------+ | Full | Abbreviated | +====================+==============+ | alignment | aln | +--------------------+--------------+ | archaeal | arch | +--------------------+--------------+ | auxiliary | aux | +--------------------+--------------+ | bacterial | bact | +--------------------+--------------+ | citation | cite | +--------------------+--------------+ | current | curr | +--------------------+--------------+ | database | db | +--------------------+--------------+ | dictionary | dict | +--------------------+--------------+ | directory | dir | +--------------------+--------------+ | distance matrix | dm | +--------------------+--------------+ | end of file | eof | +--------------------+--------------+ | eukaryotic | euk | +--------------------+--------------+ | filepath | fp | +--------------------+--------------+ | frequency | freq | +--------------------+--------------+ | expected | exp | +--------------------+--------------+ | index | idx | +--------------------+--------------+ | input | in | +--------------------+--------------+ | maximum | max | +--------------------+--------------+ | minimum | min | +--------------------+--------------+ | mitochondrial | mt | +--------------------+--------------+ | number | num | +--------------------+--------------+ | observation | obs | +--------------------+--------------+ | observed | obs | +--------------------+--------------+ | original | orig | +--------------------+--------------+ | output | out | +--------------------+--------------+ | parameter | param | +--------------------+--------------+ | phylogeny | phylo | +--------------------+--------------+ | previous | prev | +--------------------+--------------+ | probability | prob | +--------------------+--------------+ | protein | prot | +--------------------+--------------+ | record | rec | +--------------------+--------------+ | reference | ref | +--------------------+--------------+ | sequence | seq | +--------------------+--------------+ | standard deviation | stdev | +--------------------+--------------+ | statistics | stats | +--------------------+--------------+ | string | str | +--------------------+--------------+ | structure | struct | +--------------------+--------------+ | temporary | temp | +--------------------+--------------+ | taxa | tax | +--------------------+--------------+ | taxon | tax | +--------------------+--------------+ | taxonomic | tax | +--------------------+--------------+ | taxonomy | tax | +--------------------+--------------+ | variance | var | +--------------------+--------------+ How do I organize my modules (source files)? -------------------------------------------- - *Have a docstring with a description of the module's functions*. If the description is long, the first line should be a short summary that makes sense on its own, separated from the rest by a newline. - *All code, including import statements, should follow the docstring.* Otherwise, the docstring will not be recognized by the interpreter, and you will not have access to it in interactive sessions (i.e. through ``obj.__doc__``) or when generating documentation with automated tools. - *Import built-in modules first, followed by third-party modules, followed by any changes to the path and your own modules.* Especially, additions to the path and names of your modules are likely to change rapidly: keeping them in one place makes them easier to find. - *Don't use* ``from module import *``, *instead use* ``from module import Name, Name2, Name3...`` *or possibly* ``import module``. This makes it *much* easier to see name collisions and to replace implementations. - If you are importing `NumPy `_, `Matplotlib `_, or another package that encourages a standard style for their import statements use them as needed for example: :: import numpy as np import numpy.testing as npt import pandas as pd from matplotlib import pyplot as plt Example of module structure ^^^^^^^^^^^^^^^^^^^^^^^^^^^ The structure of your module should be similar to the example below. scikit-bio follows the `numpydoc style guide `_ for documentation. Our :doc:`doc_guide` explains how to write your docstrings using the numpydoc standards for scikit-bio: .. code-block:: python r""" Numbers (:mod:`skbio.numbers`) ============================== .. currentmodule:: skbio.numbers Numbers holds a sequence of numbers, and defines several statistical operations (mean, stdev, etc.) FrequencyDistribution holds a mapping from items (not necessarily numbers) to counts, and defines operations such as Shannon entropy and frequency normalization. Classes ------- .. autosummary:: :toctree: generated/ Numbers """ # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from random import choice, random import numpy as np from utils import indices class Numbers(list): pass class FrequencyDistribution(dict): pass How should I write comments? ---------------------------- - *Always update the comments when the code changes.* Incorrect comments are far worse than no comments, since they are actively misleading. - *Comments should say more than the code itself.* Examine your comments carefully: they may indicate that you'd be better off rewriting your code (especially if *renaming your variables* would allow you to get rid of the comment.) In particular, don't scatter magic numbers and other constants that have to be explained through your code. It's far better to use variables whose names are self-documenting, especially if you use the same constant more than once. Also, think about making constants into class or instance data, since it's all too common for 'constants' to need to change or to be needed in several methods. .. tab-set:: .. tab-item:: Wrong .. code-block:: python win_size -= 20 # decrement win_size b .. tab-item:: OK .. code-block:: python win_size -= 20 # leave space for the scroll bar .. tab-item:: Right .. code-block:: python self._scroll_bar_size = 20 win_size -= self._scroll_bar_size - *Use comments starting with #, not strings, inside blocks of code.* - *Start each method, class and function with a docstring using triple double quotes (""").* Make sure the docstring follows the `numpydoc style guide `_. - *Always update the docstring when the code changes.* Like outdated comments, outdated docstrings can waste a lot of time. "Correct examples are priceless, but incorrect examples are worse than worthless." `Jim Fulton `_. How should I test my code? -------------------------- There are several different approaches for testing code in python: ``unittest`` and ``numpy.testing``. Their purpose is the same, to check that execution of code given some input produces a specified output. The cases to which the approaches lend themselves are different. Whatever approach is employed, the general principle is every line of code should be tested. It is critical that your code be fully tested before you draw conclusions from results it produces. For scientific work, bugs don't just mean unhappy users who you'll never actually meet: **they may mean retracted publications**. Tests are an opportunity to invent the interface(s) you want. Write the test for a method before you write the method: often, this helps you figure out what you would want to call it and what parameters it should take. It's OK to write the tests a few methods at a time, and to change them as your ideas about the interface change. However, you shouldn't change them once you've told other people what the interface is. In the spirit of this, your tests should also import the functionality that they test from the shortest alias possible. This way any change to the API will cause your tests to break, and rightly so! Never treat prototypes as production code. It's fine to write prototype code without tests to try things out, but when you've figured out the algorithm and interfaces you must rewrite it *with tests* to consider it finished. Often, this helps you decide what interfaces and functionality you actually need and what you can get rid of. "Code a little test a little". For production code, write a couple of tests, then a couple of methods, then a couple more tests, then a couple more methods, then maybe change some of the names or generalize some of the functionality. If you have a huge amount of code where all you have to do is write the tests', you're probably closer to 30% done than 90%. Testing vastly reduces the time spent debugging, since whatever went wrong has to be in the code you wrote since the last test suite. And remember to use python's interactive interpreter for quick checks of syntax and ideas. Run the test suite when you change `anything`. Even if a change seems trivial, it will only take a couple of seconds to run the tests and then you'll be sure. This can eliminate long and frustrating debugging sessions where the change turned out to have been made long ago, but didn't seem significant at the time. **Note that tests are executed using GitHub Actions**, see :doc:`../contribute` for further discussion. Some pointers ^^^^^^^^^^^^^ - *Use the* ``unittest`` *framework with tests in a separate file for each module.* Name the test file ``test_module_name.py`` and include it inside the tests folder of the module. Keeping the tests separate from the code reduces the temptation to change the tests when the code doesn't work, and makes it easy to verify that a completely new implementation presents the same interface (behaves the same) as the old. - *Always include an* ``__init__.py`` *file in your tests directory*. This is required for the module to be included when the package is built and installed via ``setup.py``. - *Always import from a minimally deep API target*. That means you would use ``from skbio import DistanceMatrix`` instead of ``from skbio.stats.distance import DistanceMatrix``. This allows us prevent most cases of accidental regression in our API. - *Use* ``numpy.testing`` *if you are doing anything with floating point numbers, arrays or permutations* (use ``numpy.testing.assert_almost_equal``). Do *not* try to compare floating point numbers using ``assertEqual`` if you value your sanity. - *Test the interface of each class in your code by defining at least one* ``TestCase`` *with the name* ``ClassNameTests``. This should contain tests for everything in the public interface. - *If the class is complicated, you may want to define additional tests with names* ``ClassNameTests_test_type``. These might subclass ``ClassNameTests`` in order to share ``setUp`` methods, etc. - *Tests of private methods should be in a separate* ``TestCase`` *called* ``ClassNameTests_private``. Private methods may change if you change the implementation. It is not required that test cases for private methods pass when you change things (that's why they're private, after all), though it is often useful to have these tests for debugging. - *Test `all` the methods in your class.* You should assume that any method you haven't tested has bugs. The convention for naming tests is ``test_method_name``. Any leading and trailing underscores on the method name can be ignored for the purposes of the test; however, *all tests must start with the literal substring* ``test`` *for* ``unittest`` *to find them.* If the method is particularly complex, or has several discretely different cases you need to check, use ``test_method_name_suffix``, e.g. ``test_init_empty``, ``test_init_single``, ``test_init_wrong_type``, etc. for testing ``__init__``. - *Docstrings for testing methods should be considered optional*, instead the description of what the method does should be included in the name itself, therefore the name should be descriptive enough such that when running the tests in verbose mode you can immediately see the file and test method that's failing. .. code-block:: none $ python -c "import skbio; skbio.test(verbose=True)" skbio.maths.diversity.alpha.tests.test_ace.test_ace ... ok test_berger_parker_d (skbio.maths.diversity.alpha.tests.test_base.BaseTests) ... ok ---------------------------------------------------------------------- Ran 2 tests in 0.1234s OK - *Module-level functions should be tested in their own* ``TestCase``\ *, called* ``modulenameTests``. Even if these functions are simple, it's important to check that they work as advertised. - *It is much more important to test several small cases that you can check by hand than a single large case that requires a calculator.* Don't trust spreadsheets for numerical calculations -- use R instead! - *Make sure you test all the edge cases: what happens when the input is None, or '', or 0, or negative?* What happens at values that cause a conditional to go one way or the other? Does incorrect input raise the right exceptions? Can your code accept subclasses or superclasses of the types it expects? What happens with very large input? - *To test permutations, check that the original and shuffled version are different, but that the sorted original and sorted shuffled version are the same.* Make sure that you get *different* permutations on repeated runs and when starting from different points. - *To test random choices, figure out how many of each choice you expect in a large sample (say, 1000 or a million) using the binomial distribution or its normal approximation.* Run the test several times and check that you're within, say, 3 standard deviations of the mean. - All tests that depend on a random value should be seeded, for example if using NumPy, `numpy.random.seed(0)` should be used, in any other case the appropriate API should be used to create consistent outputs between runs. It is preferable that you do this for each test case instead of doing it in the `setUp` function/method (if any exists). - Stochastic failures should occur less than 1/10,1000 times, otherwise you risk adding a significant amount of time to the total running time of the test suite. Example test module ^^^^^^^^^^^^^^^^^^^ Here is an example of a unit-test module structure: .. code-block:: python # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- import numpy as np import unittest from skbio.math.diversity.alpha.ace import ace class AceTests(unittest.TestCase): def test_ace(self): self.assertAlmostEqual(ace(np.array([2, 0])), 1.0) self.assertAlmostEqual(ace(np.array([12, 0, 9])), 2.0) self.assertAlmostEqual(ace(np.array([12, 2, 8])), 3.0) self.assertAlmostEqual(ace(np.array([12, 2, 1])), 4.0) self.assertAlmostEqual(ace(np.array([12, 1, 2, 1])), 7.0) self.assertAlmostEqual(ace(np.array([12, 3, 2, 1])), 4.6) self.assertAlmostEqual(ace(np.array([12, 3, 6, 1, 10])), 5.62749672) # Just returns the number of taxa when all are abundant. assert_almost_equal(ace(np.array([12, 12, 13, 14])), 4.0) # Border case: only singletons and 10-tons, no abundant taxa. assert_almost_equal(ace([0, 1, 1, 0, 0, 10, 10, 1, 0, 0]), 9.35681818182) def test_ace_only_rare_singletons(self): with self.assertRaises(ValueError): ace([0, 0, 43, 0, 1, 0, 1, 42, 1, 43]) if __name__ == '__main__': unittest.main() Git pointers ------------ Commit messages are a useful way to document the changes being made to a project, it additionally documents who is making these changes and when are these changes being made, all of which are relevant when tracing back problems. Authoring a commit message ^^^^^^^^^^^^^^^^^^^^^^^^^^ The most important metadata in a commit message is (arguably) the author's name and the author's e-mail. GitHub uses this information to attribute your contributions to a project, see for example the :repo:`list of scikit-bio contributors `. Follow `this guide `_ to set up your system and **make sure the e-mail you use in this step is the same e-mail associated to your GitHub account**. After doing this you should see your name and e-mail when you run the following commands:: $ git config --global user.name Yoshiki Vázquez Baeza $ git config --global user.email yoshiki89@gmail.com Writing a commit message ^^^^^^^^^^^^^^^^^^^^^^^^ In general the writing of a commit message should adhere to `NumPy's guidelines `_ which if followed correctly will help you structure your changes better i. e. bug fixes will be in a commit followed by a commit updating the test suite and with one last commit that update the documentation as needed. GitHub provides a set of handy features that will link together a commit message to a ticket in the issue tracker, this is specially helpful because you can `close an issue automatically `_ when the change is merged into the main repository, this reduces the amount of work that has to be done making sure outdated issues are not open. scikit-bio-0.6.2/web/devdoc/doc_guide.rst000066400000000000000000000203721464262511300202610ustar00rootroot00000000000000Documentation guidelines ======================== This guide contains instructions for building the scikit-bio documentation, as well as guidelines for contributing to the documentation. If you would like to contribute to the documentation, whether by adding something entirely new or by modifying existing documentation, please first review :doc:`../contribute`. .. note:: If you're only interested in viewing the scikit-bio documentation, please visit the `documentation site `_. Build the documentation ----------------------- To build the documentation, you'll need a scikit-bio development environment set up. See :doc:`../contribute` for instructions. In addition, you will also need to install `Sphinx `_ and relevant extensions within the same environment for the documentation, you can do that with:: conda install -c conda-forge --file ci/requirements.doc.txt .. warning:: The documentation will be built for whatever version of scikit-bio is *currently installed* on your system (i.e., the version imported by ``import skbio``). This may not match the code located in this repository. You will need to either install this version of scikit-bio somewhere (e.g., in a virtualenv) or point your ``PYTHONPATH`` environment variable to this code, *before* building the documentation. To build the documentation, assuming you are at the top-level scikit-bio directory:: make -C doc clean html The built HTML documentation will be at ``doc/build/html/index.html``. .. warning:: Before submitting your changes, ensure that the documentation builds without errors or warnings. Docstring style --------------- Most of scikit-bio's API documentation is automatically generated from `docstrings `_. The advantage to this approach is that users can access the documentation in an interactive Python session or from our website as HTML. Other output formats are also possible, such as PDF. scikit-bio docstrings follow the `numpydoc style guide `_. This ensures that the docstrings are easily readable both from the interpreter and HTML, PDF, etc. Please read the numpydoc guidelines before continuing. Documenting a module -------------------- In addition to following the numpydoc conventions for docstrings, we have a few more conventions that will ensure your documentation is correctly built and linked within our website, and that it maintains consistency with the rest of the scikit-bio docs. The easiest way to get started with documenting your code is to look at the docstrings in existing scikit-bio modules. A couple of modules to start with are ``skbio.sequence`` and ``skbio.stats.distance``. Go ahead and look through those now. We've structured our docs in a similar way to `SciPy's documentation `_, so that may be another good place to look for examples. We'll take a top-down approach by discussing how to document a new module that you'd like to add to scikit-bio (let's call it ``skbio/example.py``). Module docstring ^^^^^^^^^^^^^^^^ The first thing you'll need to add is a docstring for the module. The docstring must start at the first line of the file. It should start with a title for the module:: """ Documentation examples (:mod:`skbio.example`) ============================================= It is important to include the ``:mod:`` Sphinx directive in the title, as this title will be included in the table of contents. Also make sure that the title underline is the same length as the title. We also need to include another Sphinx directive below this:: .. currentmodule:: skbio.example This directive tells Sphinx that other classes, functions, etc. that we will reference are located in the ``skbio.example`` module. Next, include a more detailed description of the module. For example:: This module consists of several example classes and functions to illustrate the scikit-bio documentation system. Following that, list any classes, functions, and exceptions that you'd like documentation generated for. Note that you do *not* need to include every single class, function, or exception that is defined in the module. Also, you do not need to list class methods, as those will be automatically included in the generated class documentation. Only include objects that should be exposed as part of the public API. For example:: Classes ------- .. autosummary:: :toctree: generated/ ExampleClass1 ExampleClass2 Functions --------- .. autosummary:: :toctree: generated/ example_function1 example_function2 Exceptions ---------- .. autosummary:: :toctree: generated/ ExampleError The ``autosummary`` directives are important as they generate RST files in the ``generated/`` directory for each object. A single-line summary and link to each object is inserted into the page for you. After listing public module members, we encourage a usage example section showing how to use some of the module's functionality. Examples should be written in `doctest `_ format so that they can be automatically tested (e.g., using ``make test`` or ``python -m skbio.test``):: Examples -------- Run the ``example_function1`` function: >>> from skbio.example import example_function1 >>> example_function1("hello", "world") hello world! You can also embed the plots that an example generates into the built documentation with the ``.. plot::`` directive. For example:: .. plot:: >>> import pandas as pd >>> df = pd.DataFrame({'col1': [1, 2, 3, 4], 'col2': [10, 11, 12, 13]}) >>> fig = df.boxplot() This will include the plot, a link to the source code used to generate the plot, and links to different image formats (e.g., PNG and PDF) so that users can easily download the plot. You're now ready to document the members of your module. Documenting module members ^^^^^^^^^^^^^^^^^^^^^^^^^^ When documenting the members of a module (e.g., classes, methods, attributes, functions, and exceptions), follow the numpydoc conventions. In addition to these conventions, there are a few things to keep in mind: - When documenting a class, only public methods and attributes are included in the built documentation. If a method or attribute starts with an underscore, it is assumed to be private. - When documenting a class, include the ``Parameters`` section in the class docstring, instead of in the ``__init__`` docstring. While numpydoc technically supports either form, ``__init__`` is not included in the list of methods by default and thus should have its documentation included in the class docstring. Including the module in the docs ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Until now, we've only been editing docstrings, which are attached to Python code. The final step is to hook up this new module's docstrings to the documentation build system: 1. Make sure you're within the ``scikit-bio/doc`` directory. 2. Create a new file with the same name as your module under the ``source`` directory. Do not include ``skbio`` as part of the name, and use ``.rst`` as the suffix. For example, ``source/example.rst``. 3. Add the following line to ``source/example.rst`` to have your module's docstring pulled into the document:: `` .. automodule:: skbio.example `` 4. Add the following line to ``source/index.rst`` to add the new page to the top-level table of contents:: `` example `` That's it! You can now try building the documentation, which should include the documentation for your new module! Documenting a subpackage ------------------------ The process of documenting a subpackage is very similar to documenting a module in scikit-bio. The only difference is that the module docstring goes in the subpackage's ``__init__.py``. Troubleshooting --------------- If things aren't working correctly, try running ``make clean`` and then rebuild the docs. If things still aren't working, try building the docs *without* your changes, and see if there are any Sphinx errors or warnings. Make note of these, and then see what new errors or warnings are generated when you add your changes again. scikit-bio-0.6.2/web/devdoc/new_module.rst000066400000000000000000000033701464262511300204740ustar00rootroot00000000000000Adding a new module =================== Each module needs an `__init__.py` file and a `tests` folder that also contains an `__init__.py` file. For a module, a simple one may look like this:: r""" A module (:mod:`skbio.module`) ============================== .. currentmodule:: skbio.module Documentation for this module. """ # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- from skbio.util import TestRunner test = TestRunner(__file__).test Usually, some functionality from the module will be made accessible by importing it in `__init__.py`. It's convenient to use explicit relative imports (`from .implementation import compute`), so that functionality can be neatly separated in different files but the user doesn't face a deeply nested package: `from skbio.module import compute` instead of `from skbio.module.implementation import compute`. Inside the tests folder, a simpler `__init__.py` works fine (it is necessary so that all tests can be run after installation):: # ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- Finally, remember to also follow the :doc:`doc_guide`. scikit-bio-0.6.2/web/devdoc/release.rst000066400000000000000000000201511464262511300177520ustar00rootroot00000000000000Releasing a new version ======================= Introduction ------------ This guide explains how to release a new version of scikit-bio. To illustrate examples of commands you might run, let's assume that the current version is **x.y.y-dev** and we want to release version **x.y.z**. .. note:: The following commands assume you are in the top-level directory of the scikit-bio repository unless otherwise noted. They also assume that you have [Conda](https://conda.io) installed. Prep the release ---------------- 1. Ensure the GitHub Actions CI build is passing against main. 2. Update the version strings (x.y.y-dev) to the new version (x.y.z). This will include ``__version__`` defined in ``skbio/__init__.py``. ``grep`` for the current version string to find all occurrences:: grep -r 'x\.y\.y-dev' . 3. Remove any deprecated functionality that was scheduled for removal on or before this release. To find all deprecated functionality, search for `@deprecated` decorators: grep -r '@deprecated' . .. note:: Any deprecated functionality that was removed in the ``### Miscellaneous`` section of ``CHANGELOG.md``. 4. Update ``CHANGELOG.md`` to include descriptions of all **user-facing** changes that made it into this release. Be sure to update the heading to include the new version (x.y.z) and the date of the release. Use the existing structure in the file as a template/guide. 5. Submit a pull request and merge when CI tests are passing. Build website docs ------------------ You will need to **fully install** the latest main branch of scikit-bio (including built extensions) and build the docs from this version. **Make sure the version of scikit-bio that is imported by ``import skbio`` is the correct one before building the docs.** 1. Build the documentation locally:: make -C doc clean html 2. Switch to the ``gh-pages`` branch of the repository. 3. Remove ``docs/latest``:: git rm -rf docs/latest 4. Copy over the built documentation to ``docs/x.y.z`` and ``docs/latest``:: cp -r doc/build/html docs/latest cp -r doc/build/html docs/x.y.z 5. Add a new list item to ``index.html`` to link to ``docs/x.y.z/index.html``. 6. Port content from ``README.md`` to ``index.html`` if there are any changes that need to be included on the front page. 7. Test out your changes by opening the site locally in a browser. Be sure to check the error console for any errors. 8. Submit a pull request with the website updates and merge. ..note:: Once merged, the live website is updated, so be sure to poke through the live site to make sure things are rendered correctly with the right version strings. Tag the release --------------- From :repo:`scikit-bio GitHub repo`, click on the releases tab and draft a new release. Use the version number for the tag name (x.y.z) and create the tag against main. Fill in a release title that is consistent with previous release titles and add a summary of the release (linking to ``CHANGELOG.md`` is a good idea). This release summary will be the primary information that we point users to when we announce the release. Once the release is created on GitHub, it's a good idea to test out the release tarball before publishing to PyPI: 1. Create a new ``conda`` environment for testing (fill in a name for ````):: conda create -n python=3.8 numpy conda activate 2. Install the release tarball from GitHub and run the tests:: pip install https://github.com/scikit-bio/scikit-bio/archive/x.y.z.tar.gz python -m skbio.test Publish the release ------------------- Assuming the GitHub release tarball correctly installs and passes its tests, you're ready to create the source distribution (``sdist``) that will be published to PyPI. It is important to test the source distribution because it is created in an entirely different way than the release tarball on GitHub. Thus, there is the danger of having two different release tarballs: the one created on GitHub and the one uploaded to PyPI. 1. Download the release tarball from GitHub, extract it, and ``cd`` into the top-level directory. 2. Build a source distribution:: python setup.py sdist 3. Create and activate a new `conda` environment, and test the `sdist`:: pip install dist/scikit-bio-x.y.z.tar.gz cd # cd somewhere outside the extracted scikit-bio directory python -m skbio.test 4. If everything goes well, it is finally time to push the release to PyPI:: python setup.py sdist upload .. warning:: You must have the proper login credentials to add a release to PyPI. Currently `@gregcaporaso `_ has these, but they can be shared with other release managers. 5. Once the release is available on PyPI, do a final round of testing. Create a new `conda` environment and run:: pip install scikit-bio cd # cd somewhere outside the extracted scikit-bio directory python -m skbio.test If this succeeds, the PyPI release appears to be a success. Make sure the installed version is the correct one. 6. Next, we'll prepare and post the release to `anaconda.org `_. You'll need to have ``conda-build`` and ``anaconda-client`` installed to perform these steps. Both can be conda-installed. First, log into anaconda with your anaconda username using the following command. You should have access to push to the ``biocore`` anaconda account through your account (if you don't, get in touch with [@gregcaporaso](https://github.com/gregcaporaso) who is the owner of that account):: anaconda login Due to its C extensions, releasing scikit-bio packages for different platforms will require you to perform the following steps on each of those platforms. For example, an ``osx-64`` package will need to be built on OS X, and a ``linux-64`` package will need to be built on 64-bit Linux. These steps will be the same on all platforms, so you should repeat them for every platform you want to release for:: conda skeleton pypi scikit-bio conda build scikit-bio --python 3.8 When building 64-bit Linux packages, it is recommended that you use conda-forge's ``linux-anvil``` Docker image. This ensures a consistent Linux build environment that has an old enough version of `libc` to be compatible on most Linux systems. To start up a ``linux-anvil`` Docker container:: docker run -i -t condaforge/linux-anvil # Now you should be in the linux-anvil environment sed -i '/conda-forge/d' ~/.condarc # Run the build commands from above At this stage you have built Python 3.8 packages. The absolute path to the packages will be provided as output from each ``conda build`` commands. You should now create conda environments for each, and run the tests as described above. You can install these local package as follows:: conda install --use-local scikit-bio If the tests pass, you're ready to upload:: anaconda upload -u biocore ```` should be replaced with the path to the package that was was created above. Repeat this for each package you created (here, the Python 3.8 package). After uploading, you should create new environments for every package you uploaded, install scikit-bio from each package, and re-run the tests. You can install the packages you uploaded as follows:: conda install -c https://conda.anaconda.org/biocore scikit-bio Post-release cleanup -------------------- 1. Submit and merge a pull request to update the version strings from x.y.z to x.y.z-dev (``skbio.__version__`` should be the only thing needing an update). Update ``CHANGELOG.md`` to include a new section for x.y.z-dev (there won't be any changes to note here yet). 2. Close the release milestone on the GitHub issue tracker if there was one. 3. Send an email to the skbio developers list and anyone else who might be interested (e.g., lab mailing lists). You might include links to the GitHub release page. 4. Tweet about the release from ``@scikit-bio``, including a link to the GitHub release page (for example, https://github.com/scikit-bio/scikit-bio/releases/tag/x.y.z). Post a similar message to `scikit-bio's Gitter `_. 5. Beers! :fa:`beer-mug-empty;fa-2x sd-text-success` scikit-bio-0.6.2/web/devdoc/review.rst000066400000000000000000000147701464262511300176450ustar00rootroot00000000000000Reviewing pull requests ======================= This document provides a high-level, general, and **incomplete** checklist of things that pull request reviewers should be aware of when performing code review. These are guidelines which generally make sense to follow, but they are not intended to be rigid. The checklist mainly consists of things that are specific to the scikit-bio project and that generally apply to incoming pull requests. The checklist is incomplete because it is not possible to describe all things to verify during code review (that depends on what is being reviewed). This document also doesn't attempt to describe *how* to perform code review (there are many online resources for that). Reviewers are encouraged to keep this document up-to-date as the project evolves and to add anything that's missing. The checklist is not in any particular order. Licensing and attribution ------------------------- Verify that code being included from external sources has a compatible license and is properly attributed: - Include the code's license in the top-level `licenses` directory. - Include a comment with the external code giving attribution and noting the license in the `licenses` directory. - Any other requirements set by the code's license and/or author. Changelog --------- This is one of the most important points to remember as users will review the changelog to identify changes relevant to them. This is one of the easiest parts to forget in a pull request. - Note all public (i.e. user-facing) changes in ``CHANGELOG.md`` under the latest development version section at the top of the file. This includes things like bug fixes, API additions/changes/removal, performance enhancements, etc. The changelog has several subsections for organizing these changes. - If a corresponding issue exists, it should be linked to from the changelog. - Use public imports (``skbio.sequence.Sequence`` instead of ``skbio.sequence._sequence.Sequence``) when documenting import paths in the changelog. - Internal changes not visible/applicable to users (e.g. refactoring, private methods, etc.) are better suited for commit messages than the changelog. Public vs. private API ---------------------- Be aware of what type of API changes are being made. scikit-bio uses the following conventions to distinguish public vs. private API: **Public API:** API with an import path that doesn't contain leading underscores in any submodule/subpackage/object names. Users and scikit-bio devs can use public API. Examples: ``skbio.sequence.Sequence``, ``skbio.stats.subsample``. **Package-private API:** API with an import path containing a leading underscore in at least one submodule/subpackage. Users should not import package-private API. Package-private API can be imported anywhere *within* the scikit-bio package. Examples: ``skbio.util._misc.chunk_str``, ``skbio.util._testing.ReallyEqualMixin``. **Private API:** API with object name starting with an underscore. Users should not import private API. Private API should only be imported and used in the module where it is defined. It should not be imported in other parts of the scikit-bio package. Examples: ``skbio.util._testing._normalize_signs``, ``skbio.stats.composition._gram_schmidt_basis``. - Prefer defining private APIs and only promote things to the public API that users need access to. - Within scikit-bio, prefer *using* public APIs defined in the package even if private APIs offer the same functionality. Integration/consistency with existing API ----------------------------------------- When reviewing API changes/additions, look at them in the context of the rest of the codebase and its existing APIs. For example, are there existing parameter names that could be reused for consistency/predictability? Does the new API (or changed API) compose with other relevant APIs? Example: ``skbio.stats.distance.anosim`` uses a ``permutations`` parameter, so a new nonparametric function would want to use this name over something like ``n`` or ``num_permutations``. Commit messages and merging pull requests ----------------------------------------- When merging pull requests, use GitHub's "Squash and merge" option to merge a single commit. See `this commit message `_ for an example. - Rewrite commit message to describe all changes being merged. This usually involves deleting individual commit messages that GitHub includes in the text box. - Include "fixes #n" text if there's an associated issue to be closed. - Use `NumPy-style commit tags `_ (ENH, BUG, PERF, etc.). - Include contributors' and reviewers' GitHub usernames in commit message (attribution will be lost on squash). Test changes locally -------------------- .. warning:: **This step is extremely important.** Pull down the PR changes locally and try out the API as a user would. Try to break it, make sure the docs are complete, etc. Build the docs locally and verify that they render correctly (this is a common pitfall). Documentation ------------- - Verify the docs follow the instructions in the scikit-bio :doc:`doc_guide`. - Verify the docs follow :doc:`new_module` when adding a new module or subpackage to scikit-bio. - Public API should have docstrings conforming to `numpydoc style guide `_. Manual and careful verification of the numpydoc docstrings is currently necessary; they are easy to get wrong and building the docs won't always flag issues. Building the docs and inspecting the rendered output can help with this process. - Package-private and private APIs do not need to be extensively documented; numpydoc docstrings are not required. Document these APIs as appropriate to help other devs understand the code (code comments are usually better for this anyways). Continuous integration (CI) --------------------------- - Make sure GitHub Actions CI is passing before merging a pull request. - Make sure coverage doesn't drop. Strive to have 100% coverage for new code being merged. Unit testing ------------ - Make sure the tests are as complete as possible. - Check that border cases are tested (e.g. zeros, '', [], None, etc.). - Check that the base case is tested (``n``), along with the inductive step (``n + 1``). - Verify that tests cover more than one input data set. - Make each test case simple, ideally only testing a single thing (follow `Arrange Act Assert `_). scikit-bio-0.6.2/web/index.rst000066400000000000000000000231331464262511300162000ustar00rootroot00000000000000.. remove right sidebar :html_theme.sidebar_secondary.remove: .. page style and classes .. raw:: html .. hidden page title .. title:: scikit-bio .. light/dark logo image .. image:: _static/img/logo.svg :class: only-light :width: 600 px :align: center .. image:: _static/img/logo_inv.svg :class: only-dark :width: 600 px :align: center .. brief description of the project .. rst-class:: subtitle A community-driven Python library for bioinformatics, providing versatile data structures, algorithms and educational resources. .. toctree hidden from document but provides header links .. toctree:: :hidden: :maxdepth: 1 Install Learn Documentation Contribute Community Releases About .. grid:: 1 1 1 3 .. grid-item-card:: :class-header: centered **For Researchers** :octicon:`beaker;1.2em;sd-text-danger` ^^^ Robust, performant and scalable algorithms tailored for the vast landscape of biological data analysis spanning genomics, microbiomics, ecology, evolutionary biology and more. Built to unveil the insights hidden in complex, multi-omic data. +++ .. dropdown example code, implemented using raw html details instead of sphinx-design's dropdown, because the latter has extra edges .. raw:: html

    Example .. code-block:: python from skbio.tree import TreeNode from skbio.diversity import beta_diversity from skbio.stats.ordination import pcoa data = pd.read_table('data.tsv', index_col=0) metadata = pd.read_table('metadata.tsv', index_col=0) tree = TreeNode.read('tree.nwk') bdiv = beta_diversity( 'weighted_unifrac', data, ids=data.index, otu_ids=data.columns, tree=tree ) ordi = pcoa(bdiv, number_of_dimensions=3) ordi.plot(metadata, column='bodysite') .. image:: _static/img/hmp1_pcoa.png :alt: PCoA plot .. raw:: html
    .. grid-item-card:: :class-header: centered **For Educators** :octicon:`mortar-board;1.2em;sd-text-info` ^^^ Fundamental bioinformatics algorithms enriched by comprehensive documentation, examples and references, offering a rich resource for classroom and laboratory education (with proven `success `_). Designed to spark curiosity and foster innovation. +++ .. raw:: html
    Example .. code-block:: python from skbio.alignment import global_pairwise_align_protein from skbio.sequence.distance import hamming from skbio.stats.distance import DistanceMatrix from skbio.tree import nj def align_dist(seq1, seq2): aln = global_pairwise_align_protein(seq1, seq2)[0] return hamming(aln[0], aln[1]) dm = DistanceMatrix.from_iterable( seqs, align_dist, keys=ids, validate=False ) tree = nj(dm).root_at_midpoint() print(tree.ascii_art()) :: /-chicken | -root----| /-rat | /--------| | | \-mouse \--------| | /-pig | | \--------| /-chimp | /--------| \--------| \-human | \-monkey .. raw:: html
    .. grid-item-card:: :class-header: centered **For Developers** :octicon:`git-merge;1.2em;sd-text-warning` ^^^ Industry-standard, production-ready Python codebase featuring a stable, unit-tested API that streamlines development and integration. Licensed under the :repo:`3-Clause BSD `, it provides an expansive platform for both academic research and commercial ventures. +++ .. raw:: html
    Example .. code-block:: python def centralize(mat): r"""Center data around its geometric average. Parameters ---------- mat : array_like of shape (n_compositions, n_components) A matrix of proportions. Returns ------- ndarray of shape (n_compositions, n_components) centered composition matrix. Examples -------- >>> import numpy as np >>> from skbio.stats.composition import centralize >>> X = np.array([[.1, .3, .4, .2], [.2, .2, .2, .4]]) >>> centralize(X) array([[ 0.17445763, 0.30216948, 0.34891526, 0.17445763], [ 0.32495488, 0.18761279, 0.16247744, 0.32495488]]) """ mat = closure(mat) cen = gmean(mat, axis=0) return perturb_inv(mat, cen) .. raw:: html
    ---- .. grid:: 1 1 1 2 .. grid-item:: :columns: 12 12 5 5 :padding: 0 0 4 4 .. rst-class:: heading Install .. tab-set:: :class: no-top-pardding .. tab-item:: Conda .. code-block:: bash conda install -c conda-forge scikit-bio .. tab-item:: PyPI .. code-block:: bash pip install scikit-bio .. tab-item:: Dev .. code-block:: bash pip install git+https://github.com/scikit-bio/scikit-bio.git .. tab-item:: More See detailed :doc:`instructions ` on installing scikit-bio on various platforms. .. grid-item:: :columns: 12 12 7 7 :padding: 0 0 4 4 .. rst-class:: heading News .. card-carousel:: 3 .. card:: Latest release (2024-07-07): .. button-link:: https://github.com/scikit-bio/scikit-bio/releases/tag/0.6.2 :color: success :shadow: scikit-bio 0.6.2 .. card:: New `DOE award `_ for scikit-bio development in multi-omics and complex modeling. .. card:: Upcoming scikit-bio `workshop at ISMB 2024 `_, July 12, Montreal, Canada. Welcome to join! .. card:: New website: `scikit.bio `_ and organization: https://github.com/scikit-bio are online. ---- .. rst-class:: heading Feature Highlights .. grid:: 1 2 3 3 :padding: 0 0 4 4 :gutter: 3 .. grid-item-card:: :fa:`dna;fa-2x sd-text-success` **Biological sequences**: Efficient data structure with a :docs:`flexible grammar ` for easy manipulation, :docs:`annotation `, :docs:`alignment `, and conversion into :docs:`motifs ` or :docs:`k-mers ` for in-depth analysis. .. grid-item-card:: :fa:`tree;fa-2x sd-text-success` **Phylogenetic trees**: Scalable :docs:`tree structure ` tailored for evolutionary biology, supporting diverse operations in :docs:`navigation `, :docs:`manipulation `, :docs:`comparison `, and :docs:`construction `. .. grid-item-card:: :fa:`chart-column;fa-2x sd-text-success` **Community diversity** analysis for ecological studies, with an extensive suite of metrics such as :docs:`UniFrac ` and :docs:`PD `, optimized to handle large-scale community datasets. .. grid-item-card:: :fa:`compass-drafting;fa-2x sd-text-success` **Ordination methods**, such as :docs:`PCoA `, :docs:`CA `, and :docs:`RDA `, to uncover patterns underlying high-dimensional data, facilitating insightful visualization. .. grid-item-card:: :fa:`arrow-up-right-dots;fa-2x sd-text-success` **Multivariate statistical tests**, such as :docs:`PERMANOVA `, :docs:`BIOENV `, and :docs:`Mantel `, to decode complex relationships across data matrices and sample properties. .. grid-item-card:: :fa:`chart-pie;fa-2x sd-text-success` **Compositional data** processing and analysis, such as :docs:`CLR ` transform and :docs:`ANCOM `, built for various omic data types from high-throughput experiments. scikit-bio-0.6.2/web/install.rst000066400000000000000000000042201464262511300165330ustar00rootroot00000000000000Install scikit-bio ================== Python ------ scikit-bio requires `Python `_ 3.8 or later installed in your system. Conda ----- The recommended way to install scikit-bio is via the `Conda `_ package manager. The latest release of scikit-bio is distributed via the `conda-forge `_ channel. You can install it via the following command:: conda install -c conda-forge scikit-bio Other channels such as anaconda and bioconda also host scikit-bio, which however may or may not be the up-to-date version. PyPI ---- Alternatively, the latest release of scikit-bio can be installed from `PyPI `_:: pip install scikit-bio Third-party ----------- scikit-bio is available as third-party packages from software repositories for multiple Linux/BSD distributions. However, these packages may or may not be the latest version. The scikit-bio development team is not involved in the maintenance of these packages. For example, users of Debian-based Linux distributions (such as Ubuntu and Linux Mint) may install scikit-bio using:: sudo apt install python3-skbio python-skbio-doc Users of Arch Linux or variants (such as Manjaro) may install scikit-bio from AUR:: yay -S python-scikit-bio Nightly build ------------- scikit-bio is undergoing expansion, with many new features being introduced. You are welcome to try these features by installing the current development version from :repo:`GitHub repo`:: pip install git+https://github.com/scikit-bio/scikit-bio.git Alternatively, you may download the repository, extract, and execute:: python setup.py install However, be cautious that the new functionality may not be stable and could be changed in the next formal release. It is not recommended to deploy the development version in a production environment. Test ---- You can verify your installation by running the scikit-bio unit tests (this requires `pytest` installed):: python -m skbio.test If the installation was successful and all features of scikit-bio work as intended, the test will report only passes (and warnings), but no failures. scikit-bio-0.6.2/web/learn.rst000066400000000000000000000036541464262511300162000ustar00rootroot00000000000000Learn scikit-bio ================ Workshops --------- :bdg-info:`New` Upcoming scikit-bio workshop at `ISMB 2024 `_! The tutorial will introduce the revitalized scikit-bio, with a focus on multi-omic data integration for microbiome research. Tutorial materials will be shared via a public GitHub repository. Welcome to join us! - **When**: 9:00 - 18:00 EDT, July 12, 2024 | **Where**: Montreal, Canada .. button-link:: https://www.iscb.org/ismb2024/programme-schedule/tutorials#ip3 :color: primary :shadow: ISMB 2024 tutorials Cookbooks --------- `scikit-bio-cookbook `_: A series of Jupyter Notebooks containing executable "recipes" for bioinformatics workflows using scikit-bio. You can find the table of contents, installation instructions, and other information at: .. button-link:: https://nbviewer.org/github/biocore/scikit-bio-cookbook/blob/main/Index.ipynb :color: primary :shadow: Cookbook frontpage Books ----- `An Introduction to Applied Bioinformatics `_, or **IAB**: An interactive bioinformatics textbook developed by By `Dr. Greg Caporaso `_ and colleagues at NAU. Available in Jupyter Book, Markdown, and PDF formats, IAB guides readers through the core concepts of bioinformatics with their practical application using Python libraries like scikit-learn and scikit-bio. This approach equips readers with the skills to develop their bioinformatics tools and pipelines, emphasizing rapid project initiation over theoretical depth. Focused on actionable knowledge, IAB prepares readers for effective roles in the bioinformatics field. .. button-link:: https://readiab.org/ :color: primary :shadow: IAB frontpage Videos ------ Check out our previous presentations at `SciPy conferences `_: .. youtube:: ZpgkRQooGqo .. youtube:: hgBx_DBiPxA scikit-bio-0.6.2/web/make.bat000066400000000000000000000014401464262511300157410ustar00rootroot00000000000000@ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=. set BUILDDIR=_build %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.https://www.sphinx-doc.org/ exit /b 1 ) if "%1" == "" goto help %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% :end popd scikit-bio-0.6.2/web/suburl.py000066400000000000000000000031351464262511300162250ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, scikit-bio development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE.txt, distributed with this software. # ---------------------------------------------------------------------------- # Script to substitute domain names with relative paths in hyperlinks in the # built html files, such as to avoid unnecessary domain resolutions when the # visitor navigates the website. It may be executed only when "doc" and "web" # are hosted at the same web server. import os import re import glob from functools import partial # -- Configuration ----------------------------------------------------------- rootdir = "_build/html" source = "https://scikit.bio/docs/latest" target = "docs/latest" # -- Workflow ---------------------------------------------------------------- pattern = re.compile(f'href="{re.escape(source)}/([^"]+)"') def substitute(match, prefix): return f'href="{prefix}{target}/{match.group(1)}"' cwd = os.getcwd() os.chdir(os.path.join(os.path.dirname(__file__), rootdir)) for file in glob.glob("**/*.html", recursive=True): depth = len(os.path.normpath(file).split(os.sep)) prefix = "../" * (depth - 1) with open(file, "r") as fh: content = fh.read() content = content.replace( f'href="{source}"', f'href="{prefix}{target}/index.html"' ) repl = partial(substitute, prefix=prefix) content = pattern.sub(repl, content) with open(file, "w") as fh: fh.write(content) os.chdir(cwd) scikit-bio-0.6.2/web/versions.json000066400000000000000000000041631464262511300171040ustar00rootroot00000000000000[ { "name": "dev", "version": "dev", "url": "https://scikit.bio/docs/dev/index.html" }, { "name": "latest", "version": "0.6.2", "url": "https://scikit.bio/docs/latest/index.html", "preferred": true }, { "version": "0.6.2", "url": "https://scikit.bio/docs/0.6.2/index.html" }, { "version": "0.6.1", "url": "https://scikit.bio/docs/0.6.1/index.html" }, { "version": "0.6.0", "url": "https://scikit.bio/docs/0.6.0/index.html" }, { "version": "0.5.9", "url": "https://scikit.bio/docs/0.5.9/index.html" }, { "version": "0.5.8", "url": "https://scikit.bio/docs/0.5.8/index.html" }, { "version": "0.5.7", "url": "https://scikit.bio/docs/0.5.7/index.html" }, { "version": "0.5.6", "url": "https://scikit.bio/docs/0.5.6/index.html" }, { "version": "0.5.5", "url": "https://scikit.bio/docs/0.5.5/index.html" }, { "version": "0.5.4", "url": "https://scikit.bio/docs/0.5.4/index.html" }, { "version": "0.5.3", "url": "https://scikit.bio/docs/0.5.3/index.html" }, { "version": "0.5.2", "url": "https://scikit.bio/docs/0.5.2/index.html" }, { "version": "0.5.1", "url": "https://scikit.bio/docs/0.5.1/index.html" }, { "version": "0.5.0", "url": "https://scikit.bio/docs/0.5.0/index.html" }, { "version": "0.4.2", "url": "https://scikit.bio/docs/0.4.2/index.html" }, { "version": "0.4.1", "url": "https://scikit.bio/docs/0.4.1/index.html" }, { "version": "0.4.0", "url": "https://scikit.bio/docs/0.4.0/index.html" }, { "version": "0.4.0", "url": "https://scikit.bio/docs/0.4.0/index.html" }, { "version": "0.4.0", "url": "https://scikit.bio/docs/0.4.0/index.html" }, { "version": "0.2.3", "url": "https://scikit.bio/docs/0.2.3/index.html" }, { "version": "0.2.2", "url": "https://scikit.bio/docs/0.2.2/index.html" }, { "version": "0.2.1", "url": "https://scikit.bio/docs/0.2.1/index.html" }, { "version": "0.2.0", "url": "https://scikit.bio/docs/0.2.0/index.html" }, { "version": "0.1.4", "url": "https://scikit.bio/docs/0.1.4/index.html" }, { "version": "0.1.3", "url": "https://scikit.bio/docs/0.1.3/index.html" }, { "version": "0.1.1", "url": "https://scikit.bio/docs/0.1.1/index.html" } ]

    u+M(Ⱦ9o=ȴ=yF>,?;;)>ʽ UZ@>,>>ڼ"=<<ٵL9G<=d>Mgy<$*sIؽBI3=c>?>FL>n>忱j>\D3L{\+?ީ2=<}$>@b>{ѻŠ=e>`x>|m<>b<S>>pFyr>1t>?bCT{=Vť; ;PJa=:g(?rP>oE)!tھ=&>W>s>MlJ<-E=CO>>;K>/p >>a>j=!q=}!=[G=m>?> X۽g=Ou=A-1>i~=r1]>o:W7= v><S>4y9=|m8a=>NaV< r=m <5>5j<=#>=>>pf#i`> EI>1%>݁=F>>B7>G> }>h7_ ?c<>>;}=c=IaΏ>T=C>"W;ï>T 4>A ݻEUz9> }=;4.:!=p87jQ= ۾i۽|hj=몯>[ >]b>-{(;ze~Zȼ$H>߽Zǩ@l>@>[Ӎ3/>gj= 8z?=oE<:=yϽ_a=B?*Po&t=4,ڕZgg;WQ׽K4+>d<><>={J>mP12V$=F=.>*)E>BnjBF=YH$>8TS>فD=\Jw: >>ՙ >qq>jJ8>@f罛=e >_>XZ>u:>b?J >žs>__= >9伩=ʻɾ:kN~Z>6(9 ]>m=\w=>ߔW?>H4":|3f >ZĽtm=p;]*y-lz5V#zfRo=!yU>=_s:[>Ό{Kڱ& >Z3>Y_$# >TE>˹=A2_grX>8ʽηy>Ԝv,ۼgM ^|8>νɾHA#>}Ͼ*냾M6e>dz=BB}"P=?g'eЍ/Eq r\<> =m=gu9;O=upML’>!lϾR>OH7Rb>c>G0>߾J>BM>mP)?Df0?Q=¨)>/>mҮ.Ȕ!|>{/.> ==h?2>> q>=Լ>>q=0=7[^֣zfg>gOn;=Ȿj&> >s!v'=_>D=. >?C>>嫹+bU=?=M#COx໽ų>?=R\>thԽ< =f1= /P>/«>4L>{}>c==Qy }=r^>o>2&>?3ٽ=%]=hKٻ=Sӽ;F>2 >>= f> Ӫ+>ξ;r=2>y׼ s9 >T>#<μ3=,.T[DsŇ?(wN>s*=YZɺB>F,>&[?;;՗>z+H2N?لa>BZP&žþzf>S)7=Ͼ_3=%_>#=Nb?Hk&n=_8Z>:}М!i>ܧk:]{:m>xֽ[*= ёt;x1=Dּ^*EW:ބ6;/3>%&es:>Φ>!>e=M> ?RP>ҧh O=gӾ=)k>+)>[Ľ"Y=񧂾wc$t=a>r>>־i2rHa*X>|A->EU7 ?Є>Š=~:6(>C-ă>hg=u%>ݍ=%&>e4>M=<`uJwR>ß= uޮw=-I>>ʹ>(&=a>y>Fvܾoh>QZ>iX=B/;>Z ȾN=/>k)>ݽkQ<יB>6fٽýy>M4Wֽ#~m>hy8=Kbxu=Df>ƴ9#ҽDi% >Os=,/>>u=db3=[>L߽Fg>f)>/>֠Ye>OmΉ>=5=[]=H>I>f=W=s?>>3N4>o侏^>%>Nv}7=o=5P>ZVN=[F'>!=ֽte;=[;u)K̘b=A;>_۸=jm=P❾I =:ᾶӽzνR6= :_=ؾCG> >"5>i >j '4J= y#xǤ&ܲ> ;bN>G>=kEt;V E< >hm(E2>$9=Rv="~R%e>L1$>9WA>.MG㝼Kнl< >0D :e7k y 5=.Bs>`z{8*I>*@l= Ѽu<rc+>./#dJa}bM>%= *SE'> % K=ɾ90G±8ڳh>g=䈾[Ϛ>>\> >>g%#Kmp>Xʷ=>罈\>#{=EB>Y Y6˼"^9<"ilP>pL > >K=S>P= T`Fe>ug!qE> H;Š"=><7=Y- SCp:>c⽡=lH_Fv>澄=i&>t6v[x>aBi;=}6=潕<=>y=$)DK^ =` ٖK{۾ۘ^w>6ij9(> X>_y>44(j>q>s(>oE-̾,!>m;Kl/>V=Kl澗Q>ڤ>OXk$>}&>aaL>vz7z r=<<J=I<=>hzIq4#z>ȣ$쁾i>fe{>hV=۽3ƾ=P߃͟(Bn>ޤNO<-=Bn7֌M/:=s>l>>]M!`%&g>ɣH=þ`۽0_>V>F<%̼޸F<=#ǾthJ=>׽#Bm>M՘=3>PK`W7<|g !V~fׁ=lS!f)=h=Yŕ3=:=笾_^)c>Y=[' +>x=>ھ+ x*>]>i.0>U]:n=׳}"t>k=::T>BkƲ]=v&>z7§>Ǵ=Uu>t>owd K>g>e>n>ElL=>7Q`=I=ᠾ5Ш>u@iE>s"kwT?=UҼ@6= 2d= X!=#9> >s<½ +H=Xd>n|=C=_b=>>[>i>=+8>sK>о2e)Do=>}ҍ>Dz=G=)>4 /=v;==<~佩ϱ2iK 1=ɧ>`&<ݏ) ~?N>> W@,= >D> J+>1+>lF𼼅l>=>G8U=#ս 鑼}^(e=5===N?齆%IRd>. -׼jf>ݼʶw`>&n?z#= <`=q`ݽV];M>20>X=زh TJ]<(ZټRо'=Pj6M><;X7i (>]ٽ-;s>A~:>j_)iEv{1>i$$^W=nbC>>zy>fO=^սo52ɽ>)>nm>QNIڽ.>ѭ*_)8'>չ>־;Tč=s>C>ײ>=+>t>H7 z ?xܿH>Ah>+s>=1>>S<D|9 ]zy<,X:>KU>u=0]Xg>% >[>hj==3amg>=I>=9b+@=u";XBp0R>nW}>G/;UW ><>>?_cK2??vh>9>=ν"}>FƩ>N;' 罯Ee >mS=HрE>TA<>A>M>7<-1?g:5-YV>=Ӽ>I`,g=Lf< R>>Ɋ>*-=,hVwC4J>Qm>=P;@>$:>?-<߯>1>]?=F&e]>K}[Ͼm>yM=Ii> EǬRc>~;dv3=Ec3$f<̙s=sɼI=ZG`(#<>p>Q`;37?eu_--> =(g?a:*oG=GV >4>H__[==þL|>6 {=GN=7>o3> 㢽`?>|>}>{%H=L=ݮ&U>n>1R}9>[Osu={JE.G={3ؽ<ν%ip >o ^ >ZIa>aĖ ?st=Mj @};=E>WOe>O1/=/T>Yp>0=]8t=~Qн;\>)>wҺ3'=r>(>񆘼qX>þy6Hs!u]#=_V㽉0uz[þ1/ھ|Eߓ>>>%MĖn f?֍*9ֽBNv#=]U>a =>>/I=T>\iMxŢ=Ε :=2E>P1=&W=mG>Zt8=/U> >=U0>NO1"9>H =%v[=u>=@wLy>@>}]Nʮat>п:= <;p>_>?L.>‚;>^_>l>M|M>>8IV=/K>K> >>8޼>oCY>* ''b].=N?ö Jp`-Tsui>s_td>RXyu >I0>T>C5&ɾ{>!JX=6,)>v(? <K> ΢&>pW=E>yI=xcC?anA=_=Ew==ッ}=>?3U½*DN>߽5ȕ (8N K'>!><r> (0D=f=ˢ?bJVʽS"}>*>h>52aW>rb>7>\=̖>о>͝UA*B ?LVy>H>1;>@>>>Ԗ};n>A>;q>;>>,\>=C=mL> UBnoӽi H?==F1cWpkR"ǽuGX>\>A=9 o=bVV>hž#>׀Xxln+>`>ش=v=UC>>|IR >`Q'>K>0W_>셾evT!= '˄=p >10>^9팽hr>b]>j = J :#>h5A-;c9>.D=X\Hɲpa=N僾tT^~>=>?= >ϒ> >3U>QZ=+=]>A`*=l=">C1>½c>.\TR>;+@%u>J>\n8f=vw>T>세>2>97(˽C+>G>(c<=G>),>rٿQAͽ>%/=m=?rQ 2A>c*>>о ?>;>o>9=iؽEw>|>i<>'̵=)>" >ۯhâN>1g] _>6Y=Os_(>n>?B>KR󽖶> >/<2= -:0= U=>? >W>V=A.9}Q=>y59=NZlJ=f,>>߽] >qś\= _>["~T@`㼓==g.==, bP> Ĵ>ʽ5+r15(>%=)r?p>% ˽m\>Bm<o3>뿾~=>U=N+s)3^fձzEUL>c S>>:'$>=w>/8y?bEj\;b2ӽKE>%ϛν U׽F>J=n>>>7= >Ń>ڡx>0> <ކ=f>O9>P=?*D> ߧp>Z)=4><:q O2>H@e<]>>b >B}<׃hW-Ǜ>^"KF`0>D6>G 73>󁾒4>I1nW{6y*pOIy>=K=J Ͷ Q_=Y> >< ]b3K+<<({=>n<}> x>" >F>l,>9&>=üń= /4AMpv/D+dm#qr> #>*<>=ȼ)CE=$|g=D;Op{R=Ҧ%½5=i=F]?섾XOd:>r>>Ǚ>>HK;6>|cM>)4Upt-=9zվչ">q_T>q>ь=z-<>B̾'+>|0y>eD={3ڊ=w])=>X<[I>r*=F>=L.96iڭ=c4D? 0>'驼tP=1o.>%=nN>9߾6$Ͼ{=?DR= 2-=+_g>&%>0:>Y *J:ޯ<*;9>uv=lI>׌>L+: .=([]><7>VM0=*QȒ=<'??=ˮ7>nt1>3Mݾ7>ABl= )ǼE&= ƽM|>֣ =Q>ˆ9]>!|-pT$>Q(>J?ā3| ?H1[K>p9=R>='̖>о> ?t}UB> Ò*9w>x>=? q`> ˽SS=⡔q=y 5=veOq*^@1d-z=+r=o޼(>>9hM>Y4>BQ>cYO>ˇ1>&ֽit>%>?fS[><>4=*Ⱦe a@=RPN>Hxe>K>l'1ϾY >Ip>֗ >]A=`>n>=?>"K>3Cʻ> ا=W9xq[h>6Y ܾ_V>nÊ]ۘN> @ю>}o>&V6 FTU6,=Խd>^<*>~/9e}>#>$8=6~)? x=Ѿ,>{=u?9>?Y>tN>dh$?-)>4\Oh=_<=Ua(=4@8>tm%>`A:E>Jn=o ?aa=X>Ӿ">.o(S>#=CAY˽qǠ> >>/=bᒾ2{+ھn=U=N \>bf>\}p?ny> >i>y>xU]]v=&Ms>T=mH{.<s2?>̾ic >9z=>xsù>鞩>Ӓ边M>=;u =| >=]@# {zP8=\ܩ=>BX̽+E=Kt<^=/D=҄Dz,-<s޾$jG=ÂӼ8< uɽ#>>2"wuoiA>˓ <↽J!s>=t|?p@&=Ph=?j=K >VdX̄nCs>΋¥!x#I<=d)=8C>O>?=PT>w|>S=`>*>$LC "= J>L>0i߽4@jDO>3>gAԽʍ3l$> >LA>ʾܖԾnj>=\U> 0'>۽!J>T>9^D2=ݾ;>q'<`z>r9vd5K뷾 >x,sC]Q1K>6?=T;gFy=/>ͥ㾋)>R>u,'>j8>,?F3?y΃=-.=m'y>׾Le>'=|ϸ;=><>T]]>>32=Ym7>$'Veb罡>jqzU>>@;v <6oU9>8O>:>Tb=u'Ek;̛͔̾J=H>f?*7j>? $o=+NԱW>=8=4Q>C̰=!=\k>%/pzUӽi>L?n>hT>?#tw7dX='<1>f>ٰ=b6|޽L`zۻo`>2 S>'"W>FoBX=;hJ=c>gWQZׂ<<q++w̫Y>7$=E ޛ=~}>'?,zS{=H!S]>c\<39> ?>}M>L Ϫ=ۀh"? O=ٺ͊Ql>O&=._=$H>>XXy==?r̔-; @%>{=p>ԾZپ3kX>E2J{>Ҿ:>%=y+R=Y= >"=G>#<ͽsV> B>1s>C>x>|`P>%==>&Dnt=vS?WA;>Ⱦy˽ [n}.^C=v]>Ţ>N=(YY>Cn>$==?>fbSȾ莽TC ͪ;׍>>&>&W>>{N>N=P%מa߄>0>2x=Hھ>l">B4>޽rH>ba=Em;>Y;J 7= ,?ċ=b >B>uC6w񽘟ν0>8?@H=i;ὲ=ԾɎ>7>k>[b>$>P䴾>*g>+Z>.>xO!#3>ć>_S>'<=nm?-CTTQ?w>T?hT>G>8=3-S->RBjo>>b޾> > >q>(#6j>&GmS=i?vm>=>;2>rAо=dK?_T6p>4>iߏc=H<czwȾ | =yO1?b5>>|PjX;=ĉ>Hlv=k˯=js> >D[ ?򒾮>)>h>.쬾}l=7Ə>*6b쳬36&=7 <};ܼ=Y ֺaRR>P0?^mg<[>UA@=â>c=7>v>=~=zKz>ؼ>+%vW>˅,/>Z>7yǺ>bϢm>kp>NA=5>?[2,.8(;-sD>>nLe>!x;>E˽W>2Iޕ;F>C>|;Gݷ>2">>ɾv9I>kc<_=\=ݤM>F="g> U?d=>ö>H˭>IQ )>v!>ڒ>սP>=+"?=^<=)A~>L4=ٚ8zwњ=!<>x>݉I%:>o >r>=P==%>'>zFD-<&]m~E˾4j x>S0=-ݾVE>hZ=K:}m4^Au_Q7?;>o=/;.r>{F,u1=k@i>2vϱ6=='>@ D3/=GL>hbhbU޼s=$;Go|>[)>ž^A4 0u=+4a>ʀ =6S>xͽ0$pl>ߏMO>yj=Q =Bg<>=*|>>!oOȘt[?BҍB?f/E =U>w_>߅x=gG>AB>W5{=K`>ZL>H>9yؓ} =&!=(>2g-<_>i>>9>>oS=_.>Q/h ɽtB˼'>]> *]>>x:zkžcH=a^>>|==c->yz u>=#d6J>R>?6>A²>m=s$=v>D>Y=`qo$E\7>E>>:gG>X>D]Խ9:(>o>[>y1)2Ӿ~>E">u<ò?^l=>bh&?R<9>~k+ =BS=D ?n >T>vy=Z>r>k>1>S6>νý`5?w> ܦSu5¤G>>4>+'{Hz>]:*>yz<̓4ܻP>s^>j2퟾㜾>T3>l>: p<> /f>ŹM=>2=B>ݻ; | VϾF-u#>@N6>(FYH>a >y>vĴJad>=Фd:P'$`=>= E-=# >+H>C=J|=[/>b>T&><<;A$?As=?>!#>$t=Mqsl>\ռ;c:zV=B=[E>$>K7=j5>b댥=>E7>n$,d )| ի#,>!F<=F>>[p;U@hxH>'(?Ɉ<[(/>N>a<æ> =hvx>f=u>E>liZƾqC>P>M">g:,V='̼'`>p>AyLȾSi>FM>dK׾ V=rj=O/>;'%}yZоrmBžo>zM>jFZ\] ؄^4<=ѹ >-gF>JV>\e=>?K>Õ0-Y=pG>H>&d>[>;3N >X܂=ާ6.alO> 耾b><>VK` 7>F>&k=.O>x"%=/~>Mw_>9I:>?*ὀӐ._l;,>p?>>> F= >q ǾJ,>,==6;w ݽ~7=+eɾwT<fT{>Al=P>/=[ʾA=Z9ֽlԼN>OM>w>>Ȥo>>}?%ɼ}=tJ>QC?r8ܼ/>b> Y}}<e>Z7= \쾂53G˾S0>֬,>}="/>4 >W&?͙>jҶ=,7E'$[dч;L>n^>R=:>S!$r[;wb>GX=< }Ͻw.<=F`TA% &=l=>k%>&<<`ʹW^}=JýZ5=!4^>ϋ= ?7>>>=n=>!^T0(a5G/= \>Z>ʶ->G9Z>=&<=oO='W>- >\1>Z=]q>H>e}iY/>b >g>H_>u#O=8cƧ=Ⱦ}U>ѼJ4>)]bE6<=]eR=F>O%/ ݩ>l=݅Z=Q\޾WdOË>,d籂K}>C[dƺf8|ٺ= x{7bd>v .xPžO=Z= >e$c>XX@>7C=y>[=`=2>sSZXp0Jx>?7=Kį>F {lgS4IPٽ̏Xh>>7>ZŠ4>N8+<оS|Zt>>ae]y=0F> 4%tŽfWd0j > :8t>4> rS>v>h= X>G婥̼J4$Ǩ=R4>[2?hp/>|8:,4IV7˽Vp{Tϟ=zT=1=R>V̼ց>=Vc><t>=ꢥ=\N)>=Nܾlǭ 8K޾E>f͡>(1ٙ>s=f%n>uu>=!FS> d=`8=*h=Bx@X>i佫>_vT 2<[;=[߱ u6:O*>q>潷@#>T_>$`df;ݿrK>=[ [hEm?zu/+>O=G>45>)>v`(?V|Y==VgC>̽ ;-=I2 ,> >|i>`d鱽8>-Hzfa><=]D<ɢ"jkP^f=!>j</=%>"-O=-gaJ>ʆ~>=I>fYս&̑/E=&ZȾ?@fVȾEټ[6b4;PYL:7&XE>į>C?T>쾔 C>gG>1= iK(Mmt>rͫ=IdK@<3>6>DؽnϽC>TEY4=s^f%@>3 >3=] 40(={1<>N=:{-R˜ؾ0 (ʅ><9=|0^O=փ8"'>R =#=>+[|}>O+>kS"H<>H=0(>5?tR.*>@X>̽@>P> DB> |<XP{!>,߽8@?0>MݕhT9zSBo> C/o>E>>{!<*EQ=C?B⾩}j7NG>+d=2@>ML>?=h<>ԡ'D==q=L #}=,Ou:=o= 8>ϼr2=)μt;z,Yo=J>EAei>2>+=( |^hY=R=Og=/A0Y>n>Eo>Ƥ>4`ݽLf> m>H>qG>rl>s<>z/=>8g=->kE>l Z>>>Ńӽܯ>`5?w>"=,۽$v=<&yۻk!uR2wڽJ+%>\>TlG>T=iT==5>U=E R< *?zӃM C>tԏ>򡣽=&{T>R0 D<>Y5bㄾN>3j̼^݉=>Kf>_>[=XKϾ<>=彉Ƚ >X;WP M mj=;ƔQ>[?C\PB4{<[pok>L >lz;?R==(=}k >>'o>ּ!==7>6[6_ǽ?=f> c>ؿ>]ABj0{> fݢ꽴\R_5>=grcl2;Q>_>y=, ?#Y===>A`/W=Ͼr =Lk>BeZ =lK9=2>y=J ==RŊ -> ?6Fz$R.E>r=V->ZZ ȽNֈ:[>rs<\>%=Ž<=>)H^= O=̚ݾR>AU*>pFI>ǃ9#c*E >~@>cOξ0=]4bN?\\D?G6aܾiEl=Ke>">X#Q>F)>yx=^[d4g!c>>B? >XL>(>ZP=~&AX<>Ot#_=껁 =Wa$?%¼ R>s+EyV>>2=CN9-i= >*>5>6>> 3>o=6=뭾/.<(569u=˚,HJvW&NIvP>=C8%j̽;'q0>3xk>@9o;'>&?/5PdY>H IW>>6L>*a>MV=yR ľ^=, /U==d.>uV(=I,%>YE>2u>}<>G4=_>{Q>1>O}c>;Z>HpbQ?m  -= >'<&> ?@w<ƽ]=$>V>c&!<>ߔӷdCcz4ь >Ig>=ꭧ\j\W>#Wa=ux཭M;ZC2<»x>Dn>TT|q>h>HZ=;c>PXnk<#ƾ4mBOcݽ:>e*P=-q~=fit>^sX>*&ٽgx==g=}>=]".>722Bv%?"gen>^>^ԟ$ʽm1ؼ<`==>=Ǿ`R==T՟0>zqv>& ?y>>>?<>r>=Ju=?=d]>>F5%'ɾ\L>>au5L>vĽ)RDbT>,>%ǾnZL> >>PET1=c 2>ҍ>Í>\c?><^><gtj;t8G>@dý*T]GX>ܽW=;y>xoQ,?UD_ؽ`>ƽ΅>ݞ >cAR>ICܽ; =R>v9v>Dg+ >>F=>.|=dq#=$;M߅k=>x>: D>`k=1?I(=g)=ܽc=ֽd=l>>'d=< >>`>"Yнc]HK>?ƒ=>g=e>o=/?X4>Qk`lSLWɾg;-=ⲷ=p|Wa$lP3>{)l=~>:R#>?'~]=k=}I<>K=O=`<,c>=QV=G׾xs>@2>M3>l>ݼ);^= ޼&;?N=]6>[==ezj=>A^Y\IڼjupJ18<=4?[1ľ2(M=keEj>Ѕ=>dms#9ȾB<o>̎[ԁy>< '=2m_m\P`Z=f<>ROH=k!<,>}Q>_d>/>׽B{H==`>Jvm#>hؾ.Y>[>zi;>nK>2/zC>O;I=IB!ᠾzj= 3()󽄾c,ɿ=h4>cʾ#PNkѭzE=iyqs>6.=*csF=a: =>*aZ 5=){'˗d4Ά:= g>>E>"<\=s Pr>S>G"?؉=|~[!ZS>e˾u)>F4>x4}A>ԏ>:3Є?OZ*M>[8#U>PT*>0>rH?>y=6a=$>p _>嬾SH#i}6==};4>F+ӽd=2yٓ>Ŕ4=6TKa=-FY>=u5ѽ萗>J">O'>8#>]=x>a/- ")Bq[5`>2wR5ߊ<>e#>ͮN=>>\Ƚ:e,^>>L=zR><ڔ[><*]y=kP;>/3"?Ⱦs(*z>߶>-ψ> Ϸ>xQ>:?ӾA_5>`߼Z۾ >>-:i I,=sg>šH>_k=|7 .?+4>Խ7=xi=/GĽVP3>b̆=J:.4A}&Ϳ=~%U!>0$o=nU r4=znJ[EB{4p ޽⥽Z>!=QgpiO`/>VQ>~=h==V>*=U ?^{сW>2>=1=2:>(> >1N>>@2>q 򙽵V>e=M.>Z#>>B8=kG >lPN};>q@;=*5>(0el4D=0è=&TPǢ5x>^v+>B@#=#޾ 徶 =im߭ɽ >;߾<3+4w>u3b+=G!6>12Ql> Ѿ$Z>&i *˾]D-t>1C(R+>Ά>>9w>dꩾݺ&F? =Gq>xPW㼔W> - H<2 % eՃ>,-ɾ=j=qo>a=~=b>qp ;?u=>2;~ DL>:ѵ;f >(]"-2>)ս }>jffI=>Z?4=x#>=>̓YM&μm?myk'*j=#Y_t:T|Q]=]*VT=oO3K>t۽Nt=! ۾!>Fh%;}>$>=,˽ uma?`-?]=]4ξ䤒=æ>%#:>+=/> vUk]y}>.fwK+9;=.>?>3>>>< Nu>$$>Z<½=:ǽG>uN4%V(b'?> >F>4|>޼jv=H,]=4D>GQO>X>?=p>N$MH>=p(= yV7@ =˃Ҿԟ<=㟾J99?b>A <;f>= =#x-f޾!>~=Fc+>D|>=5P>Z= ɾ\@=s;sg.? =y=>C{A{ɜ9=HR>\5f=>5=U>_B/:?:=K־C>Q> La>%)>&֜a”=)=>tqo=:L>\>v>r$=ќ>y.:=G27>>tޟT>>ZF|==^>qF>DV>;k>5JX>t >>>ᇋ>Klq->J˾|ݾu>j!>{=Ž}=M9>oL>όg=>_ka|@u>~8G?D;>2ݾJ8>1N>\9S{<l1>^=>?M?n\*$ǽ6x> =G>>z>\־5u=Yۗ>\= ?&f >Cռǚ*>uB>=A1>=qV{nؼ4[ =o>`> 龚>;μ`J@w@>9I>ήQ>$=_s< a=nn*AFGn>>_u=r=18A?( ?X=2z=t=Լ=UP7RpJ>=|q)|%T>8===#M==Y=Իn>h3bER>e{ > D:4>=ړY>)>Ǽ]I=D]>{ϙΘ>>jo>xJWHk>,>37N>w\˽F=zþS;>F־YR˽1k k=X>>e>(=gt>X_>uZ^keG=k[>db> V޽7Q̽ $> ?W=־?p=ۂթ]>p>R>\(>エ,>6!>T= >?XH>_<<">>H>>$=>;8S֮K>&>ПgC&cܾ=Ն>٤>ɽjmm\+¦>|Q齭=`i&)*+h>g>\~8 辙=ƽʾ{C!>q A>e7>cY y=h#9/ݸu=Jo:a>oW?o>>BOlv=g޾М?)pSaX'E=i?jZ+4Y=4B>aC>\< \S-Ӛ ,=%>n>=zM5>Z=o(r>P6=l 87H>==c?;ٽr ?G~I3ӭ)؍>&=;ٽ>ٗ>-Z[vżx4་>־%J^>wR? ;mZH~=AǓ>/}>#Ƥ=q+䄾Sg%%C=Z=p=x=A@ng3> ?䶑=G/= >.>?==&iJ2#Ľ+;Z:>c1X>ϙ>qv>qͯC>8?;=]=X<}.3>ye~ ^e0=(Q-A==2=ы>Aﯾ|=>Լ־?<3ž.><? c"I>ur;K5 @b=u>LAo{H콸9߾ j*>6,f[>>9-='>t'H>IѾ=(9>=rվ۽4q>W>.w'4>=$yZ>i;=±=B=Z=A >kF?LV/>4.?^<$>+=5>~G>7">{i=+ >|[A>/;>>ѽO4U>5M>94O'yȽc;-3hdzյ>P趾--qW>?U>>Ǽ=\=w>rK?׆=9=d>'x 0>!H@?z=L=D>,i̢>ヒ > ><=.U<\J>">xce>=w0ԼIC=9<>A;QH>ؾ83B>ŋ'k=B?C0<K>L7>ǒ >_rg)-xm>B>'Ⱦ?L,?<=,sENl8妃<<ӻ>ܽS>G=>F㺼3>IG+\.S#J3/@#;>);f=ei=-?n=&8=>Ɗ>c=߷>ӽ>>><2=O~>o=H5%;4>_=t"? >D Rw).M=% B=!{=F/ =^k>g> =|>3n> ;_t8?G;r=

a=T<>Ѽ>K{;>G=;EQ !>&>6_C=`o;U°?8<>==#C8>ݚ=g\Dsv>WQ^<=u'=[ ?f=>P>6>=Y_.)<0=D^a?>=jK=(>N>->@>laFJ=V>==#qXu35>4_7># =;h͍&>%;#>a;o!Mm0>Oeꋽ?>7x5>M5~WOվ qU6O=O=t~&U>?=x>þD<>8߽=Ň㛍 [)ڼK"==zx!ͽ>>W=VNHy#h)ۻNS>}HG>P>>ሾ^۽>=O=,>9D:{ѽ2=Q\;.=`>i@V2þK=J!>R><1>(>rf>.1r >=T<8ʛ>Dy>=|aj>WA@vQ(r:g݇~|=2G_7">*>k = >A˽г=+]\9hg=\r=oYo> 6"!qK< =/>!½Dؠ#M= ?{ͽaS,>MI((nm='*;. e =XX!%0=!=[0=A=eg>JIͦ ={mE &]==X>>N>ȯV=?֘=HTƅ=E-<>!M1?r Tđ*np>g >Er<3=LyS >=}6z>M2z=_q>=k>Q~>=Gy=,_=j>2+N=k8T>)>Fo>+>I?=4#Zv>>e>(&0=4>^Qsx(ͱ >}p}=RY>U=7Ž=2b>DzI>~?>ۈs>=#U!hnd>@>*>W{W=@@><쏼'F>O>nN>&z>iX=$%G=W>yX >I<ٽn>M>f=J6 zd>>:>HҾN=CCg>p(>F{U)=w2>{.=ˤ>5Wv>Do=tR g;3> =pto>s!b=E<`X> vȣʽr=+>߾:>?>%=pӌy=}ؾ 3;(>NݼE=>2=j5=0ǎ<,eϽܙY>W=,kM>yOὭ`@([=t>z-b?#Q>%:O=Y>;\>^>퍔$UQ>wg`a 4y>傾= =ǽ=7Sܾ^ܠ>r> AuИ>=>*~HD>y^t >ud=!HϾbR=H1j͑>6ʽ[NL~ܽ>5=5 >'-s=T>{=5?>۴0uZ>+r>=3=">Zհ ǽp:) 0=SL$>φ;uw>VֱI]>> >ۙ7>PyC@%M/<´DÄ>)cS=*xN:)$<< I\<7<{v=/d@>I[D>"y4ݼ9<.1̟=*<'f=&իZVX>/eث !=J>><>ư=5><0=ںʝ̾ٴ(7aоa>l\>F=_Y/es>>ڤz>bپ؍=I{=1_>k<]=/<;=jͽI:~ n>ex=ko==|оذG\r,P>*38=c&>iGi>,Ǽ=>Aj{s=MF>,=`>pG>E>OBIOPyX=ME>ↄ>K>0 Y;Og>>Fa=|=2uQi>מ=:;>D>{>}"V OV>.>*;>T< l>p=彽>iY>=7>E=b>WSe.=K>/>;<=>;y>d3==>[vB>ޙub>wz=)>>>ݐM.&D>!zU>.Ww=k<>; b3<\ػ:>EOK<=2+=+aB=l<>:Yv>g>>=ݽ19lx=썾m>P==`[UI퐩R=Rϲ߾- ҽIb=A᫽=6l> 6>9?>c<&>1YyԾX>O{)A'֯USldǗ,>. ͉fm>}-1,CR"=tPͼi>|=nĽB> ?ٮ`>FV."|۽]G=:>Y]qT =*>;)2 .%6Y>옺+f<3q~><.Q4c%=<==N9)V/>螆>nJ7=V2>yК=o>C>wbG=YHMc0&3>t>c >^'̽=>Ђ2X=(b%>*浽k/>C>>JP+s:)=ؽ;;5j><>^ =8[>E־8>=]>׭/==ں-;1">gjl&7;>B=N!W)>u*>yHF|=>\>:}ؽz6>R]W>$=l=Z=׽,’8u>z=-줾( F=[fP=0?)>n->2yĆW >LOG2DS޾oa?>_>N>ŽuR 5>$C=`>/y=aa>jS =v= >?=k=ܽk$ >YJW=þ!Q> y>(#⡮yݾiĄ>>2 >kzQ=bf=]=rޓ>Y>= G>\=G=+ܽ /9.>;ͼ'>)}ҵZ±]>L:}qNؾV.=,q>k`= q4=z=1> ?=4=-u<+Y>pC>)f sXS<[YC>0@ò=PI>"> ==i= Aּ]=B=>4[?~>Jku=|D><w>>B:= nh˽G8.>y>7=>;>|7 qPb.$>=!;.>2: 1јT>O@s%=_>Y)_>pU>Z%>)\=#Κ+HP>ہJV娾 >n=4{᭰rd=Ⱦ*?zu>|=l= ;>u>W===>=;=WUm6><0Gv=yLP_Ѫ&+)>JݽS>' +3?Zf>LYȾw \]UJ<=O>+2{=*=,tI==N=K>L>7Yrt=;[K=bHּ= =e6cJy==Sv=T=r=>>7V^Q̼=(H/>>\8~۽>hL>=p3:>c(:4?>RIkE>C$ \zAr<ن=7 >b`!g>=X=:>Rt.>= ?[҄m,>ϽG>Hӭ=Vu|P]=S;=?f><ġ#2^=Uv*'>ZQrU>J_L7>;T< d ??@>l_=YAs=+^.>&s̶7D\ns=j>b>Rq=08?8Ȯ=ސb>ךk`>==Jɪ>~6(V>Ϧ=+f%g>ab_j6M>>>s>=Nxڼ1\>1>>Kyi>콰9=>̍=*c>;%pk> }=@FS>BX><`>#d4>R<3->.>s>%*J#>U= q<&uQ>X[>{>H>jdĥ>XJ ソ&{EZP:>` }w/e>T$>}>S C s>9H?>C=U>Ղ#>$B!:->~>SaIH41j?~=V%4:s9>b[e>Ku)?E]g>H^=>d=/>68>/%}=d ==hO>\~t>G1< *$=^ >|;T=_P>jl\S>.-oYc=;Ա.>[p>X'̽>z+\>%>>`?S?t=BT=Ha>;YZ.>F ȾA>=>IG>s=,>W I;p>wW>d=bp>?5>oW =r;/=[:-=r=pA>XFzۏ=<>c7 >ə=I 0>ˆ>_X";F?#U4lD᭾]`>DԽ"=>!r><+> o==&>v;罹O4 a0k>ɟ=Kb`=&/i>kɼchnN,>#I>f-oP>X=pց>.`}>^>]-R+b=-FM=N-> =Ӵ>{i>n=ƾ:,\x?`/ ;=ӽXJGzl |=$ٛ=Z2>2+Ľ;$>Ƥ>X/f>Nx+8;)[=:=2T= R` U=7Π?Y>x=АX>Ihg^d=>0_=>1 +>|P.cR,>=賾:0=xC >vرy>>2Ӿ1½ȼ=M Ͻ{>6f=n,'>L^uJ" >,9=p=%pb+ٽ7=+Mc')nƾ6= t;KA>5>[=+S2==>]O>ܥ5s=܈=߾3=(>o=>;m}A>\/>F>l<*L=Ɍ@w<<>W>>h;,b>zI= >'ض[=JZ=jNl< -;*G>y=H;=q> kJ\sľlA8o{>hz?)v=j>Pj)<>ջ55>->z=d پTExW<-y*T>=`[>Q=ͯ=4>XX{,?j>ǹ>o=_ɋ>y&ٮeUD=|>Z;? B>ɓð9(d>v`¾~{<"Y@Ww>>༁w=s=v] =e=<]J<OW>~5V>:=_(9 ߣ>P&U X>?=v~>>waGU.?t;HP>b>2x=ʕqsB>:eA=$<=FI<[><Ҿ^={e=ʘ"(H>f,>+=(KKg>B;>'[žy<_X>=J;6s> Pֲ>xT>=4>t8?>RsdÉUD==0>4Ѿ*=*>=<>tY>LM>_m=i1KV[O=?+` )`d>ۭ=社F$>=F><#p3>+=Y_>=k>Yb>!w|>>>UHjչʝ>R_7>PE(j=M>ͦ=,^&>@qn >M2=4ȇ-(.> &>4s<>ެ^ >Y>s=F9 >=:־z>y=&V>3<`>*>l⽢㽎>R5ͽ"%<1 q>J>|"ϽI>0MLG =& >-9=sf>K ɓ> uX>*Ս=NUձo>UւQL +>(>i>M3>r*z>* Gtc=<?-+>r,:>m=~>l9$>WrüAD>&r&>)<޽m=Ks;>&>I>isv#`_>pL>{n>=m>T>m;\:)=ѣ>?1f=8=}']c =-=0>񐥼=j:4^=-=3V1ڀf(é=y>Th?Urk>2P>Mtx=kA꾿>v߹=v<M#e>Bi~O>,+< TC⾽X<="5E u>2e<;=>LF1L^>;<> Hݽ<ܸ=>X=r>:=QfR@=~2:CT'<#="ag=4Ͻjм{|$ǽn*%>^ޜ<XG-`Ig=G5=0A1>V;b=1^<H@a¢ت8 {U{R>Dj=D\>D=%W`=~D>W>H=g=ﻖ=>[=0d@>%=ᇑB=9<<=]<_>@&!=u9l=q>J=Y,V>ގ>˾h:> >_> =x=Oٽ˱<ȁ>0,q6[>̏|߽6^z==_f囖=ҽ_b8o>PD)U;7>Of>S > W=%=}>[Y r8[Ƚ[Ȁ+>"G>&2ի@>s=:<=<&>L <<>7\=$.>1]=T>q9| >h%>.> #>a?>)=6%T>:^=hνwX>- >赽T>=|Gbl>s>/lP>j=a={,=O$>*)R>uϼ=i?s䔡=bl<-8C|d׾|I=^R<0v=h!v>.h=8]W >J򕾦x>ۭO;x 4Q=@#= >S@=p}=\Z0>)v=]b>j1c)<=f8=n=ht>88>>4&;>x vǽV>I ]#O).)=!HǼ,V>Vg=>Ͼ=L=j ]s=ť=l~=~=%[>n=H ,=膬=Qn'>E н[>"=c**\io^O6=9¾PS0+9lnL>dŸӲ<-=KO%S>ޫ =Jc q\Ul"=ȼ`3,}qD%m>愡;>N8ă>+<4$]>'>ϗhie>"ɕY>Lf8?0+<`>ZP=!tkc> rľ>8=S2>:=$=O=xdAY[;>J{T?+>!&[;u㰽a V}=e&`5==P/+==^엾qf'=ŋ>3 W?I/dh,>Ug>'=-S>E g> >U= =WG#F> >$>%>Eww=l$>/'|>ޝ䨽D>m9>?̷<64*l#>/n>sW8)x(==5Q>vkM0>C@zJKt!Mv|$==:!X.kVm>B>=o>R# rB*<].>>'ֽ$/c>)/>V>[n>7<چOEB>#.T9%R !L?< "=#u>[K=Z)'2=!u=">x2P>I =@<9ED 1"#>x+*(5ڢ!> =Nb,cn9z]>٬=G>-b<=7E OC@"Gqӯ<ڌ> 3^>m<>"O=Y۸V3o r=C𥽱l>I(=q>= ]R!\"Ͼ->'$>23۽Ɨ>\*=9;]<a>L[>T'ü1k=qN<1&J!=73:>7@f/>b'L>8Y_: ǚZ>)Ii=+"oA<#E f!> (K<\8]~>e3;>l.Kv7X(}>LӽK="oE>pi>3Tw@\\>35>>D @o_Ծu<> B&> 4ݽ5 > R׾z=i,==K===jb;V]=Y&XLr>A{#Y>x3u߽F;D+>=D/=>/>=>>1=;;=(xI1q Kj߽g>`=>o'g=SȤdǵ~h-d:޽ >_=6C>i7> } ?|==:}>c[l>fhS=+:6ؾī1W<>9k>;=bxJ6)b <-:>U$>v=T;H)=>.@4=ھL>΅' 1ٽA {J:J-;gpF/>=2>>=j vk=Ge'=ǿO>lĻ+> AUc;>#ذ<]>I2N ZDq =l̽:Ti>_0>xX=ƪ">սhn >_lN^>{ŵ<=ҽu3%3= =.Y3>>\8Ə8Ծ* ?+B=et=sj23,;i?==(l>^GO>-8>8ɐ<i4>{p>X=@=&H=͂=>ZR=ý0}즎>5p#{̽-tk my>/>I\ڑ6>`IS>$c< HB7P=H=L=o2=<[ >jrn>g>q>-Q>_k5 (2gD&͉= ~}쾏!Ľ 5>~Z>E=ih >%M=|t/qv>=`ν`BX=CMҽ>p;4yAF>)I?=3d>H<h<,'=9+!=<=g >dJ=+֐\0 r==)/=oA,y>%V1>SVWmi>F>0i=a+=j@=U=FcBFʯ3>.>_yP4'C >6r/OCiY<"G=̙We&l=4MJQ0Q%צ֔=ffRûT=eR݁=N>Ӌ=R >N>םXD<`>fwwc{Ѹy\?=aըJ>*g>Қ$==˭)N= UlK=,i>VX>;o\>SQ7=l<\Jo'<m=yJ<w>o>3 0Ta>p̄>u=i=>W5>->^u>~t=q4=>}f'>m=a>/ >J}>=R->>!#U_wfo`N=oj=YG=BI>wn==J 0=#m7i187=UԽ>LM=b=><ij<>}'.¾qF>T>,)j֊A=YJp@>E=):}꾠ᅾ X44=W>B6>EZ=Ćp>A>ѱ<Ĭ>n0>{#jW 1Ty~= >ݳXD=s't=|z>)>즾<6 )0>=>@=w"=-໾P6=4E\X>*>| >NB<.]>^⯽MľE'gJܾ q>n=P>e}8JμԘ4>98>jFms\=UD5>,!==i=@Bm>֝D>$;7O/?Yh=t>R>Q>Uվ'>|H;r='|L:&ED0>G>$ͼXe6W)惾?>=-I=> #WFT>#<T>K=н I5!Bh/82>/y8=z~"6=:[@=KktF *\=>.Y=.dRP=aI} <cپ )>I>MO=vfcD>=(S=c=J]Я倾E=&=K>= ;Ƚ̵`>/=?>D/ɽ> >w]>yj>&v<=c2*=nC">"lz0>o=i>K;$7>Ǹt> K>0>=FD&>8ZYì>%mS>Zm/хI5# >mUC8?`;k>`<>Hp$}>-<><ýf=w=>>=o=a>HɻPB=q>x= ٳ4>(=;j@pS0=vr>/M;b ?=Яľ6?=SL=f=b=p>ꓽ~>H=O= >e>URp>>V.ybR>r=Ι>Aݽ?3!>{=Sn.b.>:=O<;X<L8>K>tDнυ> R ~˽t> yW=>Oh;pO4W5r4>>|.>*>0Ž==TTM>پ=Q>>q=5='>wl>%B3=>sP_=%>>T>*w޼ꎇm`4>YQ=-Xx==ڜ>meν6 ,>>eI@$=.* I6=?<->rVw>}[;>e>p=7===AʼB54=j=4=$#>===68+>Z߾ֽi)IE\=ý3N I>T=?gȾ?>EQ=RѾɍ=(=< >=pS>n=j6=I̲mie7H%> i>FL>cռ ,jϽ Wu*' Q>= O=>4=n>C?,}*{>>o#>QI>>=D.>4H=WX= ȼ|۬=͉6= Wཿ R Guý'^->\'V>,a>3b=X>@=fv<&=8M[=>H)=>?>[o-O?|̼O<sٽC_6>V9Z= h%>^G;j!=>>2>Q.=P}1=>0zPp1<=yL>G|8=6j>\d==?$ C=l>Z%="#zH>i=A I+m=[~A>ԟE=1m>ɃF=#ϽM&=:3o+ >2=qb >I@>">dǡQ>DKq_>?0 >^G;l=:֧W>b=RE8<=Pv,>5n*MY>_BͺX>֠=`>B>׋>= >L0=ے <;ƾMD＀> 彯l> Ǿ @1'{>҈Nj;N\ K&kB>9F)t> r%>ڽ< >P>f>N E,Eڻ-ǡW|=?q0 V_>=3&}=hZM$g_>?a=ypS=k=gb2=u =[E<fyѾO-y")2${>k '>Q>*4/N+4qt>B>D@ݞ s{ڣ+*>l&^PZ̽=7ľ [XOjf=i/k=;\=k꽰wk->T:<_< =m=$v>Nۻ>윅g>|XU<İ>~?> 9þ>=ULɾt=sH>!=;C >~+ >=p؍#+I$\ gQ>m=^O>P>>R 5H֗>ѻp> CD==Y(>Qbg.>O+=Hͽ !>,.i!(oQEr&rw>8q8b==n>0ν>Y5ⰾ r">&A>J 5I!C/=>hm>RE===v>nFղ`]O="6s=*>Ǚcdt}`$sL>Y/>='ֽ YgL>5q=J=xӄ>l3nfW->h>x%.">*=콶 =;Lrgn=۽rz=3(=.>-;D=%B=r8fս=$P6=w=>@1qd=vC=Tp2fDھ>t>`TC>R=f_> gW >L> wN9 ?2z=aЙ%|=\㾾k=<>v=*mվePdL=\u>*f2>zUlt⾻6>ؕ= ,;Tgƾ?h!|þu=>L=u>e9l>>=ڕ6o=Mq>vc >q=='҄Yo\ hz>rt>Y6]=gT$(}?=P;?Zi>h=E>l;?Iq˼r2>T=D>u3 >"rýN6=<0X'J%hƉۥm+>>ie>>ɍ=Z(==K[<-09#N1ą>&ڽv+>UGA;>Y/|=-/<>WaA>ר;<== >xFP<=gFx>o]= =w=0Gw>> W>J>x :A@rk=; Oڗ=&= 򠽕[=.=V$=0{)>gJ)ڽӭG+=ͽѾ 6>N>(=tl>뇪a>mV>^8v>=ԽB$(>ᆬ>0qnj,,>=Iw=;}1,>Xڽ*>qP<^' Yi 4 2sU>Sd>u;=ԹH= }6B½  e=Pv>< z-ͽ@=q"B*>E?z6?^{>S[nb=}]lk>;iF>u!B'!*2=4%>~ZK׺J>ѩc=>>A P>w <חn>Y L>$<30>>k2#6>ڇ>={秼qY?T5>W*t=CdȾ|'޾yap$c%<+׽TBHX>-3>U><=&>`W,> 3<>>rgT R=>o{=Jž?Мe$;Z?>B>`{'+>K{p>޾X=!>P#?:Ӿ*-=^*">[<=e>b>^?p.M`>>>SR>(<{zW>wn>W ,>:=(=⪚==A͇ />o9>׀.>\&>UzM.Ͻv >l!!3={Ҟ>J,>n 7<]L=Bp=lJ~>ӽzrU>>[&<=%N=rL>M){<>4>0)>Jc&ټ+?>5C =n/=o=.>vgXz= L>_l=­>1%=2= z _>5~;! e>E>;ܦ]/^3뽲C>>=>}t >^=B;>`>OWX5MdO2NO380dX?>S>{>e*>?ӽ>=3>|H<&=Q_f>{Fklz >a=̔I>6i4w%=``>] 9=J>X63>QܻV>:>k+;:8f>=Kweer=>5wE=Ҕ=;i7j>iĽɋ4>ZT>SMa=տ=lWxn>>Ί̊d'Y>f>>(>=2?h`P=G=<= [>i:;v>=B` 3JQ᥼XI=*>z=>n^P>6-=tͶi>>2>=3Ȏ>2&@D!=?y>==/>> F@ӱ<N>5=>{> =C:=1ϽY>m> )7z=%ټiv>j;= Z= #֤>|Й>W>Z>>L@?2%>ra<,?2Ӿ=oW>_MϠ[t|>G>g'Kr(nHIֽS=fVq- >:F`PX=(=aW:8=~#>{>|"B⢾$-> <\=IE=i>2*[8=M=;>f= !>V'" }'p'l=ҿg\=F! ] >gU5}޽=6߾s"=Q?A$> #=X6=J>}w 9:>79&=}%z>@н6>,>ؙdR>^=dUGok>C\&=> 7h>;>9:ϣa>14=u=6>ü!<=\>,)>ѽ.Ĭ7#>^eҞ(>P>漡}e;>,=x=o+<>-(<վczR>ξ 1j>%[?u.$n=l',KU9>>Z>H>=a!x>0>Z8C=OE>)U=}U>uJ=Fn/"<ⰽa,><=+c>1ϰ4>lY$>I=>,?;AO4D@>LI?BH>QV>Ib޾6 ?x7>LP=tD>*~Ss8뾂f> 7wr}>B<>ɰ;i> @> POͼo!>lr,|>.cL>^%Z>ע=|=2ν~>}>B!X=%>0砾vZT E> <^'=:Sۥ<4>]/#>c<>@O߼U/ƶ~7=O>Ƚ>K>,l';饽j=kf->C>'>?:U>=͚T=%"D$==5.H> ᏾^=Q<$ ;e=6>^>>0C$>=qs=&HJ>.Px˾Kkq 5<<@Ona >m.pI=!>|>Ht1=)#>^=&u>Zo>-=m,,H>aȞ>~x /!>>ߋçNΤ=$ T={>1ܾӕ>>B9B> s <-=#>!"=Ո>JA3= ѽt=ߨKǽ#C=Z#>=&=T5J z!Pڇ~1DÆ;Ըn>bνxiw3>H89=jH ?2>w=S>6k=e{[|du֊!?3>3ֽ1%r<4x)K=>@> >=OLoDAiߺo>!ʇ ,>[Ľc> 'm>' >w`L>p>(c1>"J>*eټ ս=)!=\Oh>FYܚ;|>\>XBhF=%B@ >JŽ<>  =iq={ >4\DY=d>kL>x>>J=,wվ^ez="=9c6<=zpJ>XB#Nbt5SX1@&l>'->>O=p=62> KQ=XY>,=}|] vnsd =Ae D\C>N{= >Y< f>ν2 ͽ;ׁ(ÕC+7<`k>eVA=>Wid T=Ks)<;>(P>g>F(b:P#'={ =x9vV;ͼ넾G >k>8z$=H羷 z$]=0wC=>ߎ=E?&?>2d>uD>ԩp=f*=(<=hӓ> t=05">42<-Q`ߡ==J64=>?F_=G)_>=a>o=2bL>bm@y\f:VQW8<O]>zT2=1>(>-|8"`^5tT䡾I_=f\[==2> -w>V̺͚=t>>>a>iWF+ñf=q۽X<>Δ-I>eサ35ݍ>/Z=L">ᴾ`.>lwKŲ&>H\=&EbRV> X>R=<<ꎘ>Q|==8=q>,.==t >xn)>Q=lF<>+>>d[=j(?c>s>+>=PG>ԡ+=V>ﴼw=q=U -=u=j>۽%>~Rs>ʝ>Ƒ>Qyˁ=~dߙR><ͣXKAr ./`[> f=I4>[݉=XzO<ѽ =7ccV'> ==7<:>wև>Ⱦ,)>ɽ(a>z6>S>VZ> 9>.;>=wp=r>}L2>[vԽqW>YqH~>:=| i=XƽpE=`>J`;{m>^}|> !C=G޽Wnzbn>AW==*=?T- >{= &h=>0F>mþ=ȘU$オ> *P}pϾ >Qͼ >.:>:5>!==8}<)>B7&S>?L> ZM>ž̋{>LJ>QC"޾ܻI=wh)= E=>豼*T>s >J=cǽk=]%b?` @<$:%Ś>Xϡ>1> ='8D=mDs/=.>Ȃ >(w {>0ǽ7T ߽`>&>os<][=4=><6 >Үd=ۼj>o6Xƽy͝>eԾ;:D?'> =ɉxpq+@>8Xe#K>rx>-b>(>ff>u߻\v<Ǚ==+,:7)=w=GW,>ュ=kn>G ?5o=[ҽ>>>s #[K\P>6>3i=v>=> =q=s;^}4 &,Z='n=0>Λ=j=x+CG=5e>>#=>4QA>=Z" g=sĭH>IˈP> yY` =^pOXe=$>7>Qvͽ *"}9>+=\I>55e>=#>>L(>}|S>=,=%V{ۇS>>X޻z>><,=>s1UoT>g=3=4=]:=թ>O4.lY&u<>{<\<2G}D>ҧk>:=$K~̟0]>B=Lbr2r8J">NBe˻u}ν-?=ח>ꁬ?>1-_M=񠽚H2>KJ>#5>K@>>Y> +=x;< @U >WO&D>->pgAK> q%|K|8 A=s\= ~>j1A>cM>R,> *mz~5;k<}W>. LuH<8<ۤ ^R->.>}`>d=`oPh>>3AQؾo-kAHH=^w>Z3=Wf;Ɏ=1>Nj位N=ľW|( >9y@ll>>4? Hˎ>{%>f>na>AC^/}TN >vq)>˩=Ͻ6N7>JM C<>-pXL= >֋=v<+< U>ĉITd =|Ș>%="z{>[>M2=` $閾 _<>=4~뽂#=*A#= 4ؽ%)N[*>sB^>5)ŵ@`3>^YO>J =İ2={?/m=HV>ceT>Rk;Hܼ^>塽=Jc>mP,>z]=U(<҂=})Y< F<}B>)x=M=ULӽ} >>>ɹ=QY[>;e-젾l>ּ.4=ʼR=$#S4b-=)=3y<ʡ`>ľ =t>=gqܻ/>揲m/>{B ӜjI( >c>'ѤY>є=Sڇ=xs>[=>_k=Y >`>b>_e;=hQQ>3z>V>>>B>Lly>u>/<]=IFV>>o>Dg4;Ď>P>%>J>˴O>`z9>ݯS>%>=`>:b=._>1>%+׽6>f >EK-؎>JFQ7UE#RkJ; > >[ҽӅ>bB齎Ⱦ4/>9=R[E>l=KEfJ<)ZEve<@*։=5t>Y>'>0[NT®}>E1>)߼<ۺ>"!Ɏ=kH>S/>pa&9=>uT>ho!1D=;d>ؽAs=Ն >6HVK6>ڽNw.>kȾkU$=|mQ=kE=p]'>Dvhi!f=>\X>$>;E4W==%= tQɽ n>$;˗>\>:vH/Ь>9飾7L0)>lp˽~Ћi/d>L>; =(뺉=?GI~==݀ >|PS=>m=.;N>)>r)N(j=mH>59<F(PZg<0Է><3Wvp>I> _ɖmވU>+<镽e>%<-0-<&>%>F>ѱ>ُTH9~ɃK>©$ԾKY<>= >} Z>_Bc=SAu>Ẕ=\f+½>X>2@=j|.îZc>&> 1ͼ=_cZכY=^[B>Mr>VbV>|>>e>0vv>*reN߽= >3>,>ġ=-=WݖεڽUat<]Z{M˚n>c>|>A>:gJv;.DD>ܽ<v|YAT= Q b<>RHLƞ㼴f> ٠$==aV#MV$a>:>/>x>lp= ;yRs>D>OȾI˽V>tG1=X/">=7!\?HG=E9:Mp.=\<&HҾ0iR{ ]z+>N>O=u>/=/7'=c!==Tp=r>@ܽJO= L=:o-Z>U~нgU%χ]N=><>Ӡܧ?' >^{=<@ =C??`w>c>ݵ=nW' ;>Q>,>>4ߢ=ze>e=f>U>~>; =g>@B _-="UQ=̴qWJ߾2Z>#=">26=Aύ 7>4=J>h]xG>wG=抽CU> u==d>X9*z>WAhqc #* >&H;=۞=d>>8>*p<7ӽz =Mb>߀}+jXC>FM>B=%=0a=1g1>6 v==WyC8Q fe>1>gV>lD=f >!>==Rƾ>|;>!=8_>=_<>[<|Vj >=8qC>ѽ^5sni=(=\>me=:l=E#3PVL>>S&>s>=0<cs=QB>3>:.ݽk>=~>hl>=@^qjͽ=z<˓>CQSXbg>%Q=:O;=+ѳ~>ʔa>OĽ|a/>48><==A;9\_?=4 =zk! >9=ɻ'8O,D <>j>;=T3>n񽯮7Yf*A>1,uvKc>l8\>2?پd>Ʃ==L =̇a~$iP]>5t\>W8X3O>fcν7==@޽p vs9,==ah>!NV0Md;>m=*娾~Ȼ7>)>co>Sm=qU=c>ϝ>:&>ěS>U:>jv-G/=eZ>T? >!(<>z>PB=Y=9==EfM<6=l,L =sq=;E>Y7_=VM=>Bkb[p>>93>yy>FySٽ w ZlF> "_ݙ=p=04׋6]>0=þM(cml8;>rjy'¼\>}:xK$8YiH8>&6w0f<ͨnFo1> U<ٞ=Pnf:޾G=-7:>c> > O=Ds>f>Q=E_vL= >9zDJ>ᠽo=>h[,*>\.=Xر=IнT=?>bGm$>;>(=İ>cգ>i=>y=즋>=_羧Fk>b=tE>zxر-Ҝ&=v=0>hĎ1܍렽>j=,;!= ƈ=u/$v>%Yʼ8.>V=8Ǖ=^Mҽg=iAgB>;ilW#=&GDU>G='02<&=feT=->L7G>~z= M+>=;抽Aɺ6=>>>B=jH>[4>å=/ TsB|D%X>&%ɋμ^>uD) 5ͼ\A{>s^w<>aD>OԒ>i8'9bPKN =Di>>r\>v>>ML=G86x-.5>pupʼܖ>H,)gn>`̎>ޜo3<{>dM=ڃ\>FF9>"= =[8>B=>K>>a~=v=oU1Z>q>ǧ>c݂:=%>0|>6 0-ؾB!_>Zw>E>tŽO=A/>wsBHWs>Z>_u`$RI=xcR">nJ>쾾α>{6=T MPԼ]=n>Ů[0 O ::>AW B)=p,>aQq(s>`ǾA=.>#FX2T#<7*e=6^=Z՞>>< >>pA/; 9=^>F=0 >>ol<$g>|ֽuó:E>HA>G=9>e=vDs{Y+>Ըm#J>,=R|)>z@ٶ>YLBȾ]>N>~> =MFS >=CE^AGc={ҷ-< =Q>4>>Z4KA<>Ճ>N@=Z޽~>G >R[/ /C+ :6>>a> =ս7>y>퇾,% eUë72>r 忩\K^=& >=̦=X/׾@< =4?"> jn>҈F=>x<g贽** .>_>=&="!>O=Ii>¾FI><'B=Yj>+<)|O>7¼<"[l >?˽o>G=r}.;k:9+BC = 0>{ت>(`,>Pa< )н}=o)<@΃>皅%>Kߎ=u_>ܽM>9Pq>2\a+p+>#'ԏ hHL0T,f}=L@>r"l=w^/jv=#>>Êh^=JWh>-׽= > }7f'?9`0 >V0Rn3R>ٕXUGEi%,>MϽ2 HM>N>7=wrռ$т>>|ݾjIuܖM<nW)=R1`>e=t=$ >]d=c6W>W|x>#?GQV=Y h|>=): ;ڏ=w3%/>H9C=:iQȽ{<;eL=ü/BG=X{Pɞپ?tv=C>WN>17Y">z=yQ#x+ > < >9=\C=9><<15 J>蜾Bw73s??D>*=P=m>>>TJ>V=`v͊N@>>N>>5=?JCc=/7m>=ɾiP T=>Lm< $>-r>[ou\G< >_ 65=C$>;x=͸>?>Oɺ1 >kN$y̮>5ѽ", =O j}=W:.Dhչ1<=,\=l?.>B>q>тvfJi_ C>ۿ0?=<=ҷ ?>@>/>(5>U"WZ=qվF3[.<nx. x޼~= M/i>Iws>Ie[߻ yO^=F>@e'y{=5y ӊ<`=]K]W6>oaռzk >[MEy>;p̼ ?!iN>(>{,j>D=mW<2=t>>˽M=uXٽ>[*> =s>ɠ=.eދ>]2>yS½╀F|>rNh+=>t1n퍾y#> 征i~>V)2>yw>_>>ZD>ߏ<;=~'>)|> >rC>Q>\I>5>M>5>XgJF>D==7=҉g,s=>w=f3 >Nq]v_=T#Ľ@!>N>>=X<:kإ>-v>I̽d?>@>ѽ/>*+n>D>4K8[>`F% > >;=/C4>V&= =H>6?>0M>(; >-=ZF~x>@;h>J=ʼBe?Uj>>dVyX$ؽN>x𽷝80 >6$7キ =õ >R_n;M%Īν+>2ZB=4>|:liE-PM=Ȉ}=o=\t[o==D&<~ȁ!>(N 3˾-=>}'> (C=~w@ļ`=Rx!gSs >B=A+=W,>[f==_>(U>B>Sy>%WOn>]罴<%P>w>$>yS>Jj6]>]xG>~Ķ Ǣv#=.>$<P><2A>㶯{|hY`)d%>F|=>f< =C>]=Jt>۞M >cO9=SGw: <=[ ( =[>XΥ<=-@%U®C =0S¾>|=˽?_>*Ƚw>"c=fVh>K2sH>76<1%<0=._=' <"E`5=2}=A=v=n>:mӼˌg;Q=>֊3ؾӾU>S/ʼXujNhiz0Ұ2=KaY8c>x\>t dc >G %b=x =6:9 >`>20:Wf?/&>8>`ٽ@n5{:vH=~=Ti=[>Ѧ=q k=~:֬\p̾k=.6߽yS>4=7.;6Bi='^=̼>u>R_hNWx缰]=q>o_ֽ0=>AT>ɘǻyb=I T9=;>=-nn[>g;|>4q=,097=ARdo<2=!<> > nE>rh *=*h>>u==uu(==/s>s>GG=?>܉M>t=?;=-p>t]<_x>>@Hr=T<=:).~q>'ɽ?B>5%ʖ>X5=ʽ0?b>E>1?7]%=?sY>=>Y6>挾;/>S@>(=F=ee>t>Na4= w?!rE<Һq<>n=~5=>=z ?>a>\=0>?3D8.н>` >0h 3>p{=n=]J w>Ta>1> @S==>GI=>g >Q'4<ӾrY,>h>s[SZ;p>K j>]?y5=S\:>_>r=I ~~=6>Yɜ;)>Kc>l>> ߽\د>2XWLKz\=t{f }<=[d97%/>ڟ=fy?4>!;5ƹ>hO{==f&>s>`>[>8> g=>r~=ƔxѐLn>4>i߽"}=ĻWg< >ɼӽU_*>Ӽ'=1۽EB>.>l^胾z<== ?>[h>x2v<57<` d;ڽ->>"톻~>g@U>5>/\=Wǐb%`i>g{tS>_ٽvYR>q&>ȧ=>o=I 'Qʼ/X=}>Gk >>>ʽylR<&xL_>ս%&> 4=J,> =t=1b>)3>֞=Uof>dv9Q *== Fs>Q>>Xۉ>+ b>jW9ݽ~ow==:>=k?g>jy?7;>w.ɴ"iJ>U5>5>*(Vͨϼdgr %xvZ}#̋+=;DRv =Z f>|߾&=0&jh=Ǡ>u>hT;=¾w>nV><Yc>=ƐսU=,>>9 ?_A>2߽ٛu>Ⴞ^=b=;ű|p>Ȟ>)=~!!F[=sM>ջ8\o>`>!Ὕ&= [>k"݌=ҾWŽaԩ<đ὏> >?=AFP彧>x'>3؁L}<&&Ů<ak=e>)>FM>;vfOR/\G>ח?`=-N ?/`E#IIy=8',R~D޽ ;h;>>A0j;-buW=)~>N>;q=ht_=K>fjp<=> >e=?9>覽N>-==v:Js>=G>c4><|86 < >l\If_Gq;GXD<Æ> mp>gݽqӜ==[>9j=6&>{h'+rUi>!>pƋB>*f7> ?V>/>QGJ>p>~Laz*N򽽢>?UH?|F::2=%=lczr>U*9>OW>L%2=ֽv>D?<1_=̔|./ )Q0 V==cטa=o>!=؊^>tq=H>Z7|)&6C(+n=z>w>Ȓ=QX>s7 =|.> Z=D>j_c=:T.?;=U=5=*:BY=&^>3>?> _<_|=>d=H=I>=,>>|={^{q=>IG >[>R>n)u \wc>/=,> >>|- d^!1i=ɇ== ^aU>>G0= =XAY=C{/1ڹ>Bk> e> 6K*fx|IOP7/5=T2>=r ~>FY,>=8>+cyCX>)R ?Ľ!n==h="<>ٹ꽧G>O>>Pt :>ݾkwF=2@=ٞ >$>G<֜u$4>>>v-E b>q^>K\1>F1uU>. <Hr=ֱ> FĽ˾I>W(m=ުNBԾ\X<?p=N>qY=ؾL = >8ƾU=/>?=O=6bo N>`QH>e==.kث,e>xCႽRO>2=2܃>{7SuŽ%j^?8嬻i Tc㾴E>YX>Y=W=ƞ>:~< >3Xy>旼Mue5>;9#O@N7>_>`=;(HC> ?B?e?a=VV;Ro˾L.<2מE>}ch(>Oͽ>8e!? >6r)>c>=Cz<0# R->+>F΄=[Sn>M->p->vu>4m=3P==iXT>j3>Ja>>Iʍ/Qn4>W^=Os>F7==>z>>=>+>=?>&Y>Q=W'0-xIɽ>f J0>GY}>;>%= P>>>%)x]r|7ĽMC>y}=u= 'j>[="ݾk= >SF>X5>Lڸ>y鼎>>k ?-J>]0>I.=Mg>ZӻB>f>&?T}>(=>5~3>ŏt7M=_=>d۷սWlD?b>@5u7>Q;[PTHlU>>z=>> >*=Wn!8R=]y)?Խ b)w>>Ns3D[=uv0>6 [>>VE9,(Ǿd=>fι]>%>>#EѾCm-w\ͽqm>a(a#?6=ih:TYo>FB >#+ >ɾ >pwb;9X>D?|Շ>~cCξHݾgQ>>! c>$R9MУ=^*c3cKb>h>t"@w?=N۽4} ,s=88=Z" lJ==7 =־m6H $ҾHf+m}='`=Q> .> `e=Z3o>R5>F>a>i]=?򪟽HǽyXM=Cu w>!?#>l=+齀I!mc>݊->F;s+.>̰%>E=,>"=ԅ>DKJ jLɕoѪ>:?JS;]>Խ.X?e] >mS<[> 1=>:dA >$M>&un~>9R]h +gtR=1v=ԉ#<!>n>Ṡ>{ _+ R>1>蟽& ?W(T<`ϔ> <Ψ>ZfM]wU#;q>f(>=.>&>T=Y<=eR; SWt>sU'>=i5)>B >$>s33YܽjSվ@?\>=~u>{ܕ/>> =iz[>',{f]]=>z >V!>m-I*@> =G>ړq>f>p$.>͹ ?O>n-=]⳾h>RpH==>%R-/=>*^F>쎾Wf>@[=>-&>R Š#+> }>>bBwg> %B>FjDV#ykUѻD\OC i)i2>~ =qνDhu+8=.j>`=ZF=>%Fu$>9v>9>jh=(q>>o+$r9>V=x?">ix=ط=/;,ȽO> sR=@p*>g>(w0Ùo>d=dq='za=Ao1]J,2>'nkU3">N^> >C->P==5>KP>G|>-a> y5F=ןHsi.?c& Dt=)Dͤ>Ǒ=  >by-=J}>Դ/H>B<\>vnK=?:Xf>qU>>$ci>8jA6 >!VR(v#}W>L ?D>wPs=IH6>@풾 >8,>;k=;>v:ՙ> >m OO=>ޥ$;e(l5`6>Ưg "ϝ>_ <E?> f>>XQ!k>xƾUSۿ>֗>\2<> > ?BB<2?e>>>t8>>R1^=X*!i=3S½$=` 8>xN|gUL>AT$EiD}z2^=l.=>ͽ">ľ >]ֽͪ=<7>n>N=]>ki=A7<4.>>q=YK)E>`o<=w恾E $=[8=l> SY&ľKN,!>>HbJH=3=SP>5=TV9>Ӏ~=g=t=ESS<,6'X>S >[=ܯ=Iz6I>3l{A)=Iqn>8A4z>>F}=URy>=e=43l=S%E<;D< >W2-ށI>޹V(2U<kj2A> ưm==?^t. p=>X+=`>> h~=DMs">=;}=>Y<>dE?=|7\>IƾK>=쌾XҽJ>Ѣ<=8= Z6%u=O=ڼDT Bнb r>?"NJؽН`>^ ;̽K> ]" D:>>1\Kn>i>>>!q=>>-P >==.8=d| =>:=l}g4k/=wbAB >>~S>l`>'NLQ-=V>~(>G-?rX=Q=jm~>3>}+>P>=Y{(>V>Gxd>@BǢ< =>͌erg<~ >s >>% I2Y= 1?5`>q)Ӽ:?E=AĽA =v>u=E^j=z-Ӑ0#+ >{t?ٽǮ>SfzAZT>=XJB=<=y@e]`p(o.N-> ɜ,>2ۆ=]> ],>^{/7f> {?n֤hL>ɒ|+>=<'=t)rŽs>;`gkqU4>D> ~,})>\=P=>͏>=b́>lmI>^r`&<*ƭ$>A?>0f>="=Z+=tC=sX>-5V'R>C/QT8ǫ>5份JG=N> \=t"<΀>F>=(=H>S+<&aļML>W($x]b.*v>c>i= =I< *>tuB=<(e<щ`t>p*>"~ =Cf>;> u">}Ƚ `:>>t4>Qx>IXH=W|Y T>}}>zܻeNWIޢ==X:1>B=TYW ֽi+mj =G; 5>R!Is>>%~Z>@]L\Z=uY=0=/b潨k>E;E=w7S=0̣=DU=C>/=ƽLɭuk8v* J>KCeJ>j`<5;Kӷq>>]M=ilL?姂8E-Ž|>v>==lY>I={e><1>:v\=}_> ֨<="l騽ȾT!3v>׾'=Ojae%h->)y=]06=ӓDTvx侇>:%>s'>]=f?h=rSV=5ܽ_!W>㽵VXy>b l>>B:>tȾ{0AP0qm ӫV=Hhߓ>|_>8U\>&>Nb;=>|[q=  <=>c8=:a=+x=In5k$K[ >&{=I=0> {ON>=Ak>RH>a@QA$=p"ʼh<{>1~`N=-=V=/>t9}(=(Wj=>>'>I$%=,oI=m 0=l߾c3 >*f >@AX$>>ÃKR*=T=1<=)^Z!O>y,(>,;[k>Ƚ\ >=}>=60= 0a/i[>f)iE=g\>=D=9a>2_=3SN=H>>#&=}{HB5=Y>E?#>I>%ڽ>:BN[>EY=dyY>Pm~2l>=95;é<ج.>4+r& C;@VЙ8= +>¾4u>=>o>R[>yx> B>>X n;潮>=-܂=`)Ua>eYj>)^2aO><%>sl>A>=*L> Ŀl=þ_>Jd =o?|h.о@‘=5=q>h̽v593S=(f>C>^ =Ï=>[>,BR=̩\> _E>]8=C==B=x>L >{k W>q>"=y=R5Bݼ(N4>{Q'>EP>\ʰ=G< !-&{>\ŽdT/#>uTޞ=:?>*V=j>ݶ>,l=Խ>E>W=jr8>>>3=!e d=Uj>Q8tw=? !cE>=c4<> >[>9 =Րة4>>)=#C<Ý>>E==n=> _=?qsH>m>q`)>ng5ކiRV̽x>= rrJνrqM= =G>>>:i=a!>W><#> Y>tBOD"=8+<==oR6M-䵾\ -h?=>&>=𽵷?=wQ|' =' ڋ=^>Ov=:+!=D`f>Yhdnk= ʽK->Պ>.r%,ua<Nc=>)Tv>nGK=a|=<>c/>g>O=5@;@>*=;?ؾ >#׽Y=Fc?I=/~v0a=+3pj+6>>E>G >$Ѿ=Z">}==_=uE G 411uH>V>j,f >It>=K FBu/=k=i>1=gh<1mY>>c>Y= #b=0?g@ڇƽlݺL>S}ۋȾ=Q>P4>j7>ށ>[NuNbKu>޽Q!6F_s>=fzB=zG]p=[m }e=n꽱%I)y>?C#=~:оI >Τ<f>ó-=W腾=Jς>>=) =%es>kz=Y>_ϽmLR>P=%N>NBf<'m>϶X䚾1p,u/?rp5L=7@U>Խf8<߈4ѽ 8Fe=d:> ;Az=>[ǽE½E(*u>P?>! =%e+|.?>Ya=^=x[ґF=h|t>p]pb,2d>6Q}>>(6׾[<'v=kΟ=Pp>l]Z_=7¾Ǿo\=gtỼW=w=a=Wf>s\ʽ :y +w$74c_rg۵I>#v>+btr=<'{=f{11Kd>s_>>=֨gt"P>7mV>LI>CI}:Nʾ@콽<_'>jPgmDU<> > >_G<=">9=1?PD=m=ů K>(y>v(>S=h\'b*;}c>R <-> =-A6=+M\><潓=j>o>> μ&>86ZQ>Ԇh9=G4>=k1H>]̤ƽlx/ƾ5>A<)-;!@,˽"4@ܳ񾶵+>kB>c|>_2>`y7,>6=;pw= :m8;o7B:>ᕽb>R>\=vBy?лK)=A)[%W#1O=Z&>dI>lz6-vpfw=d={O > vӠx>Š>= -< -=>%ڽ!9>4>=>Z>o>O1=7>l;!ћw 8=} >ds] +=,<~UZ 7=櫒/н>HDe=辒>꘼ǽi=)v>$[>}&l[?N<)>Z>/A*= qS> $>:/=֏Kq=uV= .E> I>;>~>F=x9l3-h >^8>=\>ɯɔ=w4< @_>iړgP^> >q~=!V Q>:>Q>f'=j{bC>D#-@໎mQ>US > <'>򚾥L>Y<} a $ /~+6=6[ݳ> ey}=>uh>d==#=F>sem#_4=!J>( K֯?;cRj Ǿ>W:R= >O}>楖=E=臽}>1!L;v>F9s!Cq=_MO9d= =+޽D=5D->m l=<>pQ s>)Mň=5t(lýu=akXD=˚>.4x"`ɽ1ӾFƾ>1Ksܛ>us>.ˋl=->>Ahg >k}2hԼx'!+M=|>\H=F>ϜVU=}\(5 -<5 G>/> =L=/>Qqip;ž>^l>M>oI >훒B;L=L;q=^Xk= >FP >(C=ա%y[>ξ/=Q8;Ɵ>(2>u>S<=j>Cգ=lr⾤6v>ex O>##=;=$;x3T%6`dߦ=ZkuEm=c[M=l*2_>[<ˡv>Z+'>l h/X>콚<_jofimcy0f]im>rnB=l=P>e>QW<&>38>GS>=B>H¾8p㵾1= .ѾƔ=_\Ͻ,,>=]>u%PG?R>hժ>">e z%(w==He=";>/n>H>(P>ļv>oʽ>D=\i>X=BDƦP6><w=e}>9l=^.>86;bJ̵h>^vZ>PuǾLPO > =nI=Yv># D4>/~d>jK :<=@=Uҽ6=:3T=SUC=5 >-V>;=C=zEN_.>|>;A=kCw>x>Îi|+Ǥ+>e@{>;þ`>$=w>QٽE>++h>lo=1Ȇ==9@U>>7=C>zʝ=2s=$8U >;]MIM=~A{><(]>i=r.>L8pWiV>g_=e>ߺ>=};+[{=;=f>$Ǿ Ց=J^Q->g=򊁾Zn=\=լ=>f|]>v>ϻ>ؽɽO(>K>== ?1={1>6-gĢ=S c6bO߫=i>^fY4^w>M&0>&ݽ Z>εB>f˻BE*J>ܒ(@>;c8=->=浽@\=@>i_0MvҼg&H>x>&>=j>W{=ܲӼG5u%Zn`>ݧ> Ĵ=| >'^0>;;Ҋ>>kagy[>7`= .5>3>XLoh;wj1ze=O80><>(>ШSV>#;_=yYz>2gV>HѷI>w;$=o;'پq-=7o<7:J># M>>̬d䢾p?>7>& E$a(>ud=jٖET =ux[< 7ݜp%HJ-a<:U f>F>jc5фc><>٧32g<=4~ A>D>:=ԅV ?J=Y=t<ƽ<=ϼI̽BMN>ٳi>Gx<Ҿ(j<꨾㲓>?Oa?d>B9 >-7>AS(-5+>=|>¯:!WQt=^G\9p**җ B8=*=>$W5F=R1>۫ǻ>>#QV'gDM>:#>%%5-=.!=tδLTr=֬TK6{ h(]4=/>lc>"ký=ͽa׽x|W>M ?> l;M>P7<45cCyҼ>Ys<ci==hlm<>A]x>=ݽ 烾'=ړ K%(G@ӕ,>y =>3rGȽ i(pG>aq>.<뾫$==*58ν4>q>/_ <>U=v>$ >+4 ׅY!S =v>3%%wȾ .>v}7tW_>poe;LT>WZ>]?n*>3y=_a‚!{>O߼sM}Vǟ)>F.GSQ=Hibv8>#>O0нx6LGi)==d/t8ֽ >۽UD˽k.<,>h}>Jگ>X&M>= 4===xl>gL>{=<>E߾Z>Ez 鄾8> >0\@>a=" >A_=5n=ˊ=Nx)|qD> =ӰR<:Ѿ?>c >C<>]ֽ(>l' >q#D`>uO<.;EZw l>0K凾3S>=ת=w/=2<=ޱ*νǼ3bü1f=y';w>̓r;?;=T.K<6P@>ܱ=fA>>Tui^C!>o= c>ymN9,>Qen<cI6=6!C>щn= Hx>Wpc>y>>8>#w={I3& 1=a;7X>H.>hӼ;k>1C=7 >W>\{ B>g/=b>T/F7>7=?=8uw <=_}=S=}<=NxQ 6sJ>Ԏ>3a>h=I>F=H|Žܽ E4O; %nU`5$%=$j>,>u>cc<=hV>*b=>~׻XT>{=Lz;>y5>,{>i۽ڌ=!Fƽ*HE>Q> =žg,pF!>W@>T+ ¾*0>~,>e΂?o >hy;yj>EHy"=*\=E>cxIc<&>u=wVf>\B=:=$d=J7㼥p$=a!*>>5_b>:֓;ӾN ;8> ۪>!D>(=T(|>a> v>'=MN!6v(>~q":=t:,+>s1=>: W>ߒ"c> Aq=ry>Z=(>\b==սq('pK=>+Tܽ>>`>4)>z=Zc>Bo{>t=Va'Oc>d=G߽/pA=<7>Ƽ+v>A(={Ra=3q=>M =>=>(RwGǽ">a>Atljl4=&Vs \)>Pc):[_9^=ʕ>X`GȼĶ>N<+kp>E=;z=(k|>pAN>PTZڍ>1vQaRjJLlHS>K">iJý V<>߽dYaiX:7˾Ͼ>=c ->/`-\>v`M2IJ=:>w>~e 7=\޽褽J>>o0>a0ߧ==ai+뻤3=`{"=vbM:2ݽF=b.ŵyB#>lvjPP]=y⾡M=<} zrGH=5Ͻ ,>v=>=c.>֨qhZӜDADV=>UKɱr=7松% <Twĕ`=i=5-=Ub=p͑; W=̥Y=8"|j>H=d=f<۝=4]۽Ag>Պ=r3>=B=G_?>0=R>`r7bž+% ?d<,u׽n Ğ>rw>9=Խy='BԽ?>^ýʰ>me.C>=x8f7?:0>I>:=6UByW.4\>i>(=>D~﬏=SLec=> 1>F=hT]uRRa>5Q'־0>ſu>H@a<;=?$> v>>p,.< d=4>/=>7d=#9P: )G>ܹOm>.<(={W)>/6>_='ᔢ>p>Xm= ͽ=x&;>6?k>P?*+>(=N5)H<==@)5;ܼk=D>:Z> b=')=ɽ h>ai=S=>]̽>BeX;`A=[<^>0ؽ 4˻퓅>o"> $=!){:!>XXjQ>#w&>=$eB>~=;?YGY->( мM3VAK%M=\N<ʼԾ`;G=՟߽!Ct=>>u>=H<|;>- >(p=ľOǂͬ=>Rx= U=bFIAj>>Y9>> >3=_<4>ʔI4>e"u=Jm0Tnc>͹y>&=Q\s u$=R=b>Wo>I=?ս5$>-= =8>v!pv1=&D=둾r=,;+9/Y==m׬=Lb>= ~u mἌ>î>Z>Ԯ>V2Fc"̽rcּ<; %^Q=`tv= #>EٓW;aIVzi{>b?a=*ο=x2üvc/=">A{>,<$=v(սξz`s)L='>;m:=˖> < ҽ{Իdg>j >ϱzki,W0>so=}=qk=x>Fؤ zJ> =L2<=%нrί=\=h='&P=gow>y9>>'-٣ :=t/1>Ïdo>=i>]½b[6IZr&Dʾ#>T== >FBoaK>?>e=OESmE:q=8оU3(=R >ì|;8s=n4=7G~g>3X=MAE@ZMO->K}\p9\Ὅ#>4.-WTP13>Xf뜘=>tXT>q{ B=֣½ݗ6.'=Ζ=+Yl<'>>]%I=R>~+R%i=F<3*M0i>=>lw# =;i >q=l=D¼r@@>TRƫ7O=0>}zE>pϾ2<<ۑ/>:Z>x1>=n<̾us[>Y{g=#AX=>[4>D>h<,e5҄>ѥ>4+>m0>,==8>Z\`\$S=+>%WP>h=;.PQ=OV yƢ˞*>CѽmHm2>??E<;=N='ӟ=Q>+RI,>d]-+N>.Kv=wmY5>Ɖ==碙>7QቾD=<} r˾{