pax_global_header00006660000000000000000000000064146371535300014521gustar00rootroot0000000000000052 comment=6df7c5c8e1e9bb319e26e3801c41607fec4958da improved-octo-waddle-1.0.7/000077500000000000000000000000001463715353000155535ustar00rootroot00000000000000improved-octo-waddle-1.0.7/.gitattributes000066400000000000000000000000341463715353000204430ustar00rootroot00000000000000bp/_version.py export-subst improved-octo-waddle-1.0.7/.github/000077500000000000000000000000001463715353000171135ustar00rootroot00000000000000improved-octo-waddle-1.0.7/.github/workflows/000077500000000000000000000000001463715353000211505ustar00rootroot00000000000000improved-octo-waddle-1.0.7/.github/workflows/python-package-conda.yml000066400000000000000000000041651463715353000256750ustar00rootroot00000000000000name: Python Package using Conda on: push: branches: [ master ] pull_request: branches: [ master ] env: latest_python: "3.12" supported_pythons: '["3.8", "3.9", "3.10", "3.11", "3.12"]' miniforge_version: "22.9.0-2" miniforge_variant: "Mambaforge" jobs: conf: # This job is needed to route the global environment variables into # a context that's available for matrix (and name, but that's unimportant) name: Prepare Test Plan runs-on: "ubuntu-latest" outputs: latest_python: ${{ steps.set-vars.outputs.latest_python }} supported_pythons: ${{ steps.set-vars.outputs.supported_pythons }} steps: - name: Report Plan id: set-vars run: | echo "latest_python=$latest_python" >> $GITHUB_OUTPUT echo "supported_pythons=$supported_pythons" >> $GITHUB_OUTPUT build-lint-test: needs: conf strategy: max-parallel: 5 fail-fast: true matrix: python_version: ${{ fromJSON(needs.conf.outputs.supported_pythons) }} os: [ubuntu-latest, macos-13, macos-14] runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v4 with: submodules: true - uses: conda-incubator/setup-miniconda@v2 with: auto-update-conda: true python-version: ${{ matrix.python_version }} miniforge-version: ${{ env.miniforge_version }} miniforge-variant: ${{ env.miniforge_variant }} environment-file: ci/conda_host_env.yml - name: Install dependencies shell: bash -l {0} run: | conda install --yes -c conda-forge --file ci/conda_requirements.txt - name: Lint with flake8 shell: bash -l {0} run: | # stop the build if there are Python syntax errors or undefined names flake8 bp --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 bp --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest shell: bash -l {0} run: | pip install --no-deps -e . pytest improved-octo-waddle-1.0.7/.github/workflows/release.yml000066400000000000000000000014441463715353000233160ustar00rootroot00000000000000name: Release on: push: tags: - '*' jobs: release: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 with: submodules: true - name: Set up Python 3.8 uses: actions/setup-python@v2 with: python-version: 3.8 - name: Build distribution run: | # set version from '${{ github.ref_name }}' export RELEASE_VERSION=${{ github.ref_name }} pip install numpy cython versioneer python setup.py sdist - name: Publish a Python distribution to PyPI if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') uses: pypa/gh-action-pypi-publish@release/v1 with: user: __token__ password: ${{ secrets.PYPI_API_TOKEN }} improved-octo-waddle-1.0.7/.gitignore000066400000000000000000000014661463715353000175520ustar00rootroot00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # cython files bp/GPL/_*.c bp/_*.c bp/tests/test_bp_cy.c # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log # Sphinx documentation docs/_build/ # PyBuilder target/ #Ipython Notebook .ipynb_checkpoints improved-octo-waddle-1.0.7/.gitmodules000066400000000000000000000001341463715353000177260ustar00rootroot00000000000000[submodule "bp/BitArray"] path = bp/BitArray url = https://github.com/noporpoise/BitArray improved-octo-waddle-1.0.7/.travis.yml000066400000000000000000000012371463715353000176670ustar00rootroot00000000000000# Check on http://lint.travis-ci.org/ after modifying it! Originally # modified from https://gist.github.com/dan-blanchard/7045057 sudo: false language: python env: - PYTHON_VERSION=3.6 before_install: - wget http://repo.continuum.io/miniconda/Miniconda3-3.7.3-Linux-x86_64.sh -O miniconda.sh - chmod +x miniconda.sh - ./miniconda.sh -b - export PATH=/home/travis/miniconda3/bin:$PATH # Update conda itself - conda update --yes conda install: - conda create --yes -n env_name python=$PYTHON_VERSION --file ci/conda_requirements.txt - source activate env_name - pip install . --no-deps script: - flake8 bp - make test after_success: - coveralls improved-octo-waddle-1.0.7/ChangeLog.md000066400000000000000000000007201463715353000177230ustar00rootroot00000000000000BP ChangeLog ============ BP 0.1.1-dev ------------ BP 0.1.1 (released on August 26, 2016) -------------------------------------- * mucking around to make it actually pip installable... BP 0.1 (released on August 26, 2016) ------------------------------------ * initial release. Not complete, but reasonably fast and memory efficient. This release was made as the object is minimally sufficient for its original intended use: optimization of UniFrac. improved-octo-waddle-1.0.7/LICENSE000066400000000000000000000027251463715353000165660ustar00rootroot00000000000000Copyright (c) 2016, Daniel McDonald All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of improved-octo-waddle nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. improved-octo-waddle-1.0.7/MANIFEST.in000066400000000000000000000004541463715353000173140ustar00rootroot00000000000000include README.md include LICENSE include ChangeLog.md include Makefile include bp/*.pyx include bp/*.pxd graft bp graft bp/BitArray global-exclude *.pyc global-exclude *.pyo global-exclude .git global-exclude *.so global-exclude *.a global-exclude *.o include versioneer.py include bp/_version.py improved-octo-waddle-1.0.7/Makefile000066400000000000000000000004511463715353000172130ustar00rootroot00000000000000libbitarr.a: cd BitArray && $(MAKE) make -C BitArray -f Makefile build: libbitarr.a python setup.py build_ext --inplace test: build python -c "from bp._binary_tree import test_binary_tree; test_binary_tree()" python cdef_bp_tests.py nosetests clean: rm -fr bp/*.so bp/*.c rm -fr build improved-octo-waddle-1.0.7/README.md000066400000000000000000000043461463715353000170410ustar00rootroot00000000000000Improved Octo Waddle -------------------- An implementation of the balanced parentheses tree structure as described by [Cordova and Navarro](http://www.dcc.uchile.cl/~gnavarro/ps/tcs16.2.pdf). Install notes ------------- Installation is a two step procedure right now due to the chicken and egg problem of requiring numpy and cython for setup.py to execute. The package is named iow in pypi as "bp" was taken at time of registration. ``` $ conda create --name bp python=3.8 $ conda activate bp $ conda install numpy cython $ pip install iow ``` Developer notes --------------- If pulling the source, please note that we use a submodule and Github does not by default bring it down. After a clone, please run: ``` $ git submodule update --init --recursive ``` Fragment insertion ------------------ BP supports the [jplace format](https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0031009). Fragments can be inserted using either fully-resolved or multifurcation mode to resolve multiple placements to the same edge. In fully resolved, the edge placed against is broken N times where N is the number of fragments on the edge. In multifurcation, a new node is constructed as the average of the distal length for the N fragments, and a separate multifurcation node is added which encompasses the placed fragments. Important: the multifurcation mode support is GPL licensed code. Support for that mode is in a fork of this repository, see [improved-octo-waddle-gpl](https://github.com/biocore/improved-octo-waddle-gpl). Insertions can be handled by the command line following install: ``` $ bp placement --help Usage: bp placement [OPTIONS] Options: --placements PATH jplace formatted data [required] --output PATH Where to write the resulting newick [required] --method [fully-resolved|multifurcating] Whether to fully resolve or multifurcate [required] --help Show this message and exit. ``` Note that the multifurcating support relies on GPL code derived from the Genesis project. To use this method, please install [iow-gpl](https://github.com/biocore/improved-octo-waddle-gpl). improved-octo-waddle-1.0.7/bp/000077500000000000000000000000001463715353000161545ustar00rootroot00000000000000improved-octo-waddle-1.0.7/bp/BitArray/000077500000000000000000000000001463715353000176715ustar00rootroot00000000000000improved-octo-waddle-1.0.7/bp/__init__.py000066400000000000000000000014311463715353000202640ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, BP development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- from ._bp import BP from ._io import parse_newick, write_newick, parse_jplace from ._conv import to_skbio_treenode, from_skbio_treenode, to_skbio_treearray from ._insert import insert_fully_resolved __all__ = ['BP', 'parse_newick', 'to_skbio_treenode', 'from_skbio_treenode', 'to_skbio_treearray', 'write_newick', 'parse_jplace', 'insert_fully_resolved'] from . import _version __version__ = _version.get_versions()['version'] improved-octo-waddle-1.0.7/bp/_ba.pxd000066400000000000000000000025261463715353000174170ustar00rootroot00000000000000cdef extern from "": ctypedef unsigned int uint64_t cdef extern from "bit_array.h": struct BIT_ARRAY: pass ctypedef uint64_t bit_index_t # allocations BIT_ARRAY* bit_array_create(bit_index_t nbits) void bit_array_free(BIT_ARRAY* bitarray) BIT_ARRAY* bit_array_clone(const BIT_ARRAY* bitarr) # utility char* bit_array_to_str(const BIT_ARRAY* bitarr, char* str) bit_index_t bit_array_length(const BIT_ARRAY* bit_arr) bit_index_t bit_array_num_bits_set(const BIT_ARRAY* bitarr) # bit juggling void bit_array_set_bit(BIT_ARRAY* bitarr, bit_index_t b) nogil void bit_array_toggle_bit(BIT_ARRAY* bitarr, bit_index_t b) nogil char bit_array_get_bit(const BIT_ARRAY* bitarr, bit_index_t b) nogil void bit_array_clear_bit(BIT_ARRAY* bitarr, bit_index_t b) # logical operations void bit_array_and(BIT_ARRAY* dest, const BIT_ARRAY* src1, const BIT_ARRAY* src2) void bit_array_or(BIT_ARRAY* dest, const BIT_ARRAY* src1, const BIT_ARRAY* src2) void bit_array_xor(BIT_ARRAY* dest, const BIT_ARRAY* src1, const BIT_ARRAY* src2) void bit_array_not(BIT_ARRAY* dest, const BIT_ARRAY* src1, const BIT_ARRAY* src2) # cyclic shifting void bit_array_cycle_right(BIT_ARRAY* bitarr, bit_index_t dist) void bit_array_cycle_left (BIT_ARRAY* bitarr, bit_index_t dist) improved-octo-waddle-1.0.7/bp/_ba.pyx000066400000000000000000000032351463715353000174420ustar00rootroot00000000000000cimport libc.stdlib from cpython cimport Py_INCREF, Py_DECREF, bool cimport numpy as np np.import_array() cdef class bitarray: cdef: BIT_ARRAY* bitarr bit_index_t nbits ### REDUNDANT as the BIT_ARRAY struct has this def __cinit__(self, bit_index_t n): self.nbits = n self.bitarr = bit_array_create(n) def __dealloc__(self): bit_array_free(self.bitarr) def __str__(self): cdef char* str_ cdef object result str_ = libc.stdlib.malloc(self.nbits + 1) result = tounicode(bit_array_to_str(self.bitarr, str_)) libc.stdlib.free(str_) return result def __getitem__(self, bit_index_t i): # could inline the macro version as well, less safe but should be # thinner code return bit_array_get_bit(self.bitarr, i) def __setitem__(self, bit_index_t i, bool v): if v: bit_array_set_bit(self.bitarr, i) else: bit_array_clear_bit(self.bitarr, i) cdef unicode tounicode(char* s): # from http://docs.cython.org/en/latest/src/tutorial/strings.html return s.decode('UTF-8', 'strict') cpdef bitarray bitarray_factory(object vec): """Construct a bitarray from a vector A value at a position in the vector is considered set if the value evaluates True. Parameters ---------- vec : object An iterable Returns ------- bitarray The bitarray based off of topo """ cdef int i cdef int n = len(vec) cdef bitarray result result = bitarray(n) for i in range(n): if vec[i]: result[i] = True return result improved-octo-waddle-1.0.7/bp/_binary_tree.pxd000066400000000000000000000062311463715353000213350ustar00rootroot00000000000000# cython: cdivision=True # cython: boundscheck=False # cython: wraparound=False # An implementation of a complete binary tree in breadth first order adapted # from https://github.com/jfuentess/sea2015/blob/master/binary_trees.h from libc.math cimport pow, log2, floor from bp._bp cimport SIZE_t # it might be useful to use a pow2 lookup. static const c arrays are not # allowed, so it might be useful to do it as a memoryview but should only be # done following benching cdef inline SIZE_t bt_is_root(SIZE_t v) nogil: """Is v the root""" return v == 0 cdef inline SIZE_t bt_is_left_child(SIZE_t v) nogil: """Is v a left child of some node""" return 0 if bt_is_root(v) else v % 2 cdef inline SIZE_t bt_is_right_child(SIZE_t v) nogil: """Is v a right child of some node""" return 0 if bt_is_root(v) else 1 - (v % 2) cdef inline SIZE_t bt_parent(SIZE_t v) nogil: """Get the index of the parent of v""" return 0 if bt_is_root(v) else (v - 1) // 2 cdef inline SIZE_t bt_left_child(SIZE_t v) nogil: """Get the index of the left child of v""" return 2 * v + 1 cdef inline SIZE_t bt_right_child(SIZE_t v) nogil: """Get the index of the right child of v""" return 2 * v + 2 cdef inline SIZE_t bt_left_sibling(SIZE_t v) nogil: """Get the index of the left sibling of v""" return v - 1 cdef inline SIZE_t bt_right_sibling(SIZE_t v) nogil: """Get the index of the right sibling of v""" return v + 1 cdef inline SIZE_t bt_is_leaf(SIZE_t v, SIZE_t height) nogil: """Determine if v is a leaf""" return (v >= pow(2, height) - 1) cdef inline SIZE_t bt_node_from_left(SIZE_t pos, SIZE_t height) nogil: """Get the index from the left of a node at a given height""" return pow(2, height) - 1 + pos cdef inline SIZE_t bt_offset_from_left(SIZE_t v) nogil: """Get the position from left of a node at its level This is the inverse of bt_node_from_left """ cdef double leftmost_check if bt_is_root(v): return 0 leftmost_check = log2(v + 1) if leftmost_check == floor(leftmost_check): return 0 return v - pow(2, floor(log2(v))) + 1 cdef inline SIZE_t bt_offset_from_right(SIZE_t v) nogil: """Get the position from right of a node at its level""" cdef SIZE_t lvl = floor(log2(v + 1)) cdef SIZE_t n_nodes_at_lvl = pow(2, lvl) return n_nodes_at_lvl - bt_offset_from_left(v) - 1 cdef inline SIZE_t bt_left_leaf(SIZE_t v, SIZE_t height) nogil: """Determine the index of a nodes left most leaf""" cdef SIZE_t left_tip = pow(2, height) - 1 cdef SIZE_t block_size if bt_is_root(v): return left_tip block_size = pow(2, height - floor(log2(v + 1))) return left_tip + (block_size * bt_offset_from_left(v)) cdef inline SIZE_t bt_right_leaf(SIZE_t v, SIZE_t height) nogil: """Determine the index of a nodes right most leaf""" cdef SIZE_t right_tip = pow(2, height + 1) - 2 cdef SIZE_t block_size if bt_is_root(v): return right_tip block_size = pow(2, height - floor(log2(v + 1))) return right_tip - (block_size * bt_offset_from_right(v)) improved-octo-waddle-1.0.7/bp/_binary_tree.pyx000066400000000000000000000056211463715353000213640ustar00rootroot00000000000000def test_binary_tree(): # wikipedia example, https://en.wikipedia.org/wiki/Binary_tree#Arrays # root test assert bt_is_root(0) assert bt_left_child(0) == 1 assert bt_right_child(0) == 2 assert bt_node_from_left(0, 0) == 0 assert bt_offset_from_left(0) == 0 assert bt_offset_from_right(0) == 0 assert bt_left_leaf(0, 3) == 7 assert bt_right_leaf(0, 3) == 14 # lvl 1 assert bt_is_left_child(1) assert bt_is_right_child(2) assert bt_left_child(1) == 3 assert bt_right_child(1) == 4 assert bt_left_child(2) == 5 assert bt_right_child(2) == 6 assert bt_parent(1) == 0 assert bt_parent(2) == 0 assert bt_right_sibling(1) == 2 assert bt_left_sibling(2) == 1 assert bt_node_from_left(0, 1) == 1 assert bt_node_from_left(1, 1) == 2 assert bt_offset_from_left(1) == 0 assert bt_offset_from_left(2) == 1 assert bt_offset_from_right(1) == 1 assert bt_offset_from_right(2) == 0 assert bt_left_leaf(1, 3) == 7 assert bt_left_leaf(2, 3) == 11 assert bt_right_leaf(1, 3) == 10 assert bt_right_leaf(2, 3) == 14 # lvl 2 assert bt_is_left_child(3) assert bt_is_right_child(4) assert bt_is_left_child(5) assert bt_is_right_child(6) assert bt_parent(3) == 1 assert bt_parent(4) == 1 assert bt_parent(5) == 2 assert bt_parent(6) == 2 assert bt_right_sibling(3) == 4 assert bt_left_sibling(4) == 3 assert bt_right_sibling(5) == 6 assert bt_left_sibling(6) == 5 assert bt_node_from_left(0, 2) == 3 assert bt_node_from_left(1, 2) == 4 assert bt_node_from_left(2, 2) == 5 assert bt_node_from_left(3, 2) == 6 assert bt_offset_from_left(3) == 0 assert bt_offset_from_left(4) == 1 assert bt_offset_from_left(5) == 2 assert bt_offset_from_left(6) == 3 assert bt_offset_from_right(3) == 3 assert bt_offset_from_right(4) == 2 assert bt_offset_from_right(5) == 1 assert bt_offset_from_right(6) == 0 assert bt_left_leaf(3, 3) == 7 assert bt_left_leaf(4, 3) == 9 assert bt_left_leaf(5, 3) == 11 assert bt_left_leaf(6, 3) == 13 assert bt_right_leaf(3, 3) == 8 assert bt_right_leaf(4, 3) == 10 assert bt_right_leaf(5, 3) == 12 assert bt_right_leaf(6, 3) == 14 # lvl 3 assert bt_offset_from_left(7) == 0 assert bt_offset_from_left(8) == 1 assert bt_offset_from_left(9) == 2 assert bt_offset_from_left(10) == 3 assert bt_offset_from_left(11) == 4 assert bt_offset_from_left(12) == 5 assert bt_offset_from_left(13) == 6 assert bt_offset_from_left(14) == 7 assert bt_offset_from_right(7) == 7 assert bt_offset_from_right(8) == 6 assert bt_offset_from_right(9) == 5 assert bt_offset_from_right(10) == 4 assert bt_offset_from_right(11) == 3 assert bt_offset_from_right(12) == 2 assert bt_offset_from_right(13) == 1 assert bt_offset_from_right(14) == 0 improved-octo-waddle-1.0.7/bp/_bp.pxd000066400000000000000000000065341463715353000174410ustar00rootroot00000000000000cimport numpy as np cimport cython from bp._ba cimport BIT_ARRAY ctypedef np.npy_intp SIZE_t ctypedef np.npy_uint32 UINT32_t ctypedef np.npy_int32 INT32_t ctypedef np.npy_float64 DOUBLE_t ctypedef np.npy_uint8 BOOL_t cdef class mM: cdef int b # block size cdef int n_tip # number of tips in the binary tree cdef int n_internal # number of internal nodes in the binary tree cdef int n_total # total number of nodes in the binary tree cdef int height # the height of the binary tree cdef int m_idx # m is minimum excess cdef int M_idx # M is maximum excess cdef int r_idx # rank cdef SIZE_t[:, ::1] mM cdef SIZE_t[:] r cdef void rmm(self, BOOL_t[:] B, int B_size) nogil @cython.final cdef class BP: cdef: public np.ndarray B BOOL_t* _b_ptr SIZE_t[:] _e_index SIZE_t[:] _k_index_0 SIZE_t[:] _k_index_1 np.ndarray _names np.ndarray _lengths np.ndarray _edges np.ndarray _edge_lookup mM _rmm SIZE_t size cdef inline SIZE_t rank(self, SIZE_t t, SIZE_t i) nogil cdef inline SIZE_t select(self, SIZE_t t, SIZE_t k) nogil cdef SIZE_t _excess(self, SIZE_t i) nogil cdef SIZE_t excess(self, SIZE_t i) nogil cdef SIZE_t fwdsearch(self, SIZE_t i, int d) nogil cdef SIZE_t bwdsearch(self, SIZE_t i, int d) nogil cpdef inline SIZE_t close(self, SIZE_t i) nogil cdef inline SIZE_t open(self, SIZE_t i) nogil cpdef inline BOOL_t isleaf(self, SIZE_t i) nogil cdef inline SIZE_t enclose(self, SIZE_t i) nogil cdef BP _mask_from_self(self, BIT_ARRAY* mask, np.ndarray[DOUBLE_t, ndim=1] lengths) cpdef SIZE_t nsibling(self, SIZE_t i) nogil cpdef SIZE_t psibling(self, SIZE_t i) nogil cpdef SIZE_t lchild(self, SIZE_t i) nogil cpdef SIZE_t fchild(self, SIZE_t i) nogil cpdef SIZE_t parent(self, SIZE_t i) nogil cpdef SIZE_t depth(self, SIZE_t i) nogil cpdef SIZE_t root(self) nogil cdef int scan_block_forward(self, int i, int k, int b, int d) nogil cdef int scan_block_backward(self, int i, int k, int b, int d) nogil cdef void _set_edges(self, np.ndarray[INT32_t, ndim=1] edges) # TODO: evalute down the road what methods should be cdef. There is a # performance cost for cpdef, so for high use functions, it may make sense # to punt down to cdef. # http://notes-on-cython.readthedocs.io/en/latest/fibo_speed.html cpdef inline unicode name(self, SIZE_t i) cpdef inline DOUBLE_t length(self, SIZE_t i) cpdef inline INT32_t edge(self, SIZE_t i) cpdef SIZE_t edge_from_number(self, INT32_t n) cpdef SIZE_t rmq(self, SIZE_t i, SIZE_t j) nogil cpdef SIZE_t rMq(self, SIZE_t i, SIZE_t j) nogil cpdef SIZE_t postorderselect(self, SIZE_t k) nogil cpdef SIZE_t postorder(self, SIZE_t i) nogil cpdef SIZE_t preorderselect(self, SIZE_t k) nogil cpdef SIZE_t preorder(self, SIZE_t i) nogil cpdef BOOL_t isancestor(self, SIZE_t i, SIZE_t j) nogil cpdef SIZE_t levelancestor(self, SIZE_t i, SIZE_t d) nogil cpdef SIZE_t subtree(self, SIZE_t i) nogil cpdef BP shear(self, set tips) cpdef BP collapse(self) cpdef SIZE_t ntips(self) nogil cpdef SIZE_t levelnext(self, SIZE_t i) nogil cpdef SIZE_t height(self, SIZE_t i) nogil cpdef SIZE_t deepestnode(self, SIZE_t i) nogil cpdef SIZE_t lca(self, SIZE_t i, SIZE_t j) nogil improved-octo-waddle-1.0.7/bp/_bp.pyx000066400000000000000000001066251463715353000174700ustar00rootroot00000000000000# cython: boundscheck=False, wraparound=False, cdivision=True, linetrace=False # ---------------------------------------------------------------------------- # Copyright (c) 2013--, BP development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- ### NOTE: some doctext strings are copied and pasted from manuscript ### http://www.dcc.uchile.cl/~gnavarro/ps/tcs16.2.pdf from libc.math cimport ceil, log as ln, pow, log2 import time #import numpy.testing as npt import numpy as np cimport numpy as np cimport cython from bp._binary_tree cimport * #bt_node_from_left, bt_left_child, bt_right_child from bp._ba cimport * np.import_array() cdef extern from "limits.h": int INT_MAX DOUBLE = np.float64 SIZE = np.intp BOOL = np.uint8 INT32 = np.int32 cdef inline int min(int a, int b) nogil: if a > b: return b else: return a cdef inline int max(int a, int b) nogil: if a > b: return a else: return b cdef class mM: def __cinit__(self, BOOL_t[:] B, int B_size): self.m_idx = 0 self.M_idx = 1 self.rmm(B, B_size) cdef void rmm(self, BOOL_t[:] B, int B_size) nogil: """Construct the rmM tree based off of Navarro and Sadakane http://www.dcc.uchile.cl/~gnavarro/ps/talg12.pdf """ cdef int i, j, lvl, pos # for loop support cdef int offset # tip offset in binary tree for a given parenthesis cdef int lower_limit # the lower limit of the bucket a parenthesis is in cdef int upper_limit # the upper limit of the bucket a parenthesis is in cdef int min_ = 0 # m, absolute minimum for a blokc cdef int max_ = 0 # M, absolute maximum for a block cdef int excess = 0 # e, absolute excess cdef int vbar cdef int r = 0 # build tip info self.b = ceil(ln( B_size) * ln(ln( B_size))) # determine the number of nodes and height of the binary tree self.n_tip = ceil(B_size / self.b) self.height = ceil(log2(self.n_tip)) self.n_internal = (pow(2, self.height)) - 1 self.n_total = self.n_tip + self.n_internal with gil: # creation of a memoryview directly or via numpy requires the GIL: # http://stackoverflow.com/a/22238012 self.mM = np.zeros((self.n_total, 2), dtype=SIZE) self.r = np.zeros(self.n_total, dtype=SIZE) # annoying, cannot do step in range if step is not known at runtime # see https://github.com/cython/cython/pull/520 # for i in range(0, B_size, b): # as a result, doing a custom range using a while loop # compute for tips of rmM tree i = 0 while i < B_size: offset = i // self.b lower_limit = i upper_limit = min(i + self.b, B_size) min_ = INT_MAX max_ = 0 self.r[offset + self.n_internal] = r for j in range(lower_limit, upper_limit): # G function, a +-1 method where if B[j] == 1 we +1, and if # B[j] == 0 we -1 excess += -1 + (2 * B[j]) r += B[j] if excess < min_: min_ = excess if excess > max_: max_ = excess # at the left bound of the bucket self.mM[offset + self.n_internal, self.m_idx] = min_ self.mM[offset + self.n_internal, self.M_idx] = max_ i += self.b # compute for internal nodes of rmM tree in reverse level order starting # at the level above the tips for lvl in range(self.height - 1, -1, -1): num_curr_nodes = pow(2, lvl) # for each node in the level for pos in range(num_curr_nodes): # obtain the node, and the index to its children node = bt_node_from_left(pos, lvl) lchild = bt_left_child(node) rchild = bt_right_child(node) if lchild >= self.n_total: continue elif rchild >= self.n_total: self.mM[node, self.m_idx] = self.mM[lchild, self.m_idx] self.mM[node, self.M_idx] = self.mM[lchild, self.M_idx] else: self.mM[node, self.m_idx] = min(self.mM[lchild, self.m_idx], self.mM[rchild, self.m_idx]) self.mM[node, self.M_idx] = max(self.mM[lchild, self.M_idx], self.mM[rchild, self.M_idx]) self.r[node] = self.r[lchild] @cython.final cdef class BP: """A balanced parentheses succinct data structure tree representation The basis for this implementation is the data structure described by Cordova and Navarro [1]. In some instances, some docstring text was copied verbatim from the manuscript. This does not implement the bucket-based trees, although that would be a very interesting next step. A node in this data structure is represented by 2 bits, an open parenthesis and a close parenthesis. The implementation uses a numpy uint8 type where an open parenthesis is a 1 and a close is a 0. In general, operations on this tree are best suited for passing in the opening parenthesis index, so for instance, if you'd like to use BP.isleaf to determine if a node is a leaf, the operation is defined only for using the opening parenthesis. At this time, there is some ambiguity over what methods can handle a closing parenthesis. Node attributes, such as names, are stored external to this data structure. The motivator for this data structure is pure performance both in space and time. As such, there is minimal sanity checking. It is advised to use this structure with care, and ideally within a framework which can assure sanity. References ---------- [1] http://www.dcc.uchile.cl/~gnavarro/ps/tcs16.2.pdf """ def __cinit__(self, np.ndarray[BOOL_t, ndim=1] B, np.ndarray[DOUBLE_t, ndim=1] lengths=None, np.ndarray[object, ndim=1] names=None, np.ndarray[INT32_t, ndim=1] edges=None): cdef SIZE_t i cdef SIZE_t size cdef SIZE_t[:] _e_index cdef SIZE_t[:] _k_index_0 cdef SIZE_t[:] _k_index_1 cdef SIZE_t[:] _r_index_0 cdef SIZE_t[:] _r_index_1 cdef np.ndarray[object, ndim=1] _names cdef np.ndarray[DOUBLE_t, ndim=1] _lengths cdef np.ndarray[INT32_t, ndim=1] _edges cdef np.ndarray[SIZE_t, ndim=1] _edge_lookup # the tree is only valid if it is balanaced assert B.sum() == (float(B.size) / 2) self.B = B self._b_ptr = &B[0] self.size = B.size self._rmm = mM(B, B.size) if names is not None: self._names = names else: self._names = np.full(self.B.size, None, dtype=object) if lengths is not None: self._lengths = lengths else: self._lengths = np.zeros(self.B.size, dtype=DOUBLE) if edges is not None: self._set_edges(edges) else: self._edges = np.full(self.B.size, 0, dtype=INT32) self._edge_lookup = None # precursor for select index cache _r_index_0 = np.cumsum((1 - B), dtype=SIZE) _r_index_1 = np.cumsum(B, dtype=SIZE) # construct a select index. These operations are performed frequently, # and easy to cache at a relatively minor memory expense. It cannot be # assumed that open and close will be same length so can't stack #TODO: leverage rmmtree, and calculate select on the fly _k_index_0 = np.unique(_r_index_0, return_index=True)[1].astype(SIZE) self._k_index_0 = _k_index_0 _k_index_1 = np.unique(_r_index_1, return_index=True)[1].astype(SIZE) self._k_index_1 = _k_index_1 # construct an excess index. These operations are performed a lot, and # similarly can to rank and select, can be cached at a minimal expense. #TODO: leverage rmm tree, and calculate excess on the fly _e_index = np.empty(B.size, dtype=SIZE) for i in range(B.size): _e_index[i] = self._excess(i) self._e_index = _e_index def write(self, object fname): np.savez_compressed(fname, names=self._names, lengths=self._lengths, B=self.B) @staticmethod def read(object fname): data = np.load(fname) bp = BP(data['B'], names=data['names'], lengths=data['lengths']) return bp def set_names(self, np.ndarray[object, ndim=1] names): self._names = names def set_lengths(self, np.ndarray[DOUBLE_t, ndim=1] lengths): self._lengths = lengths cdef void _set_edges(self, np.ndarray[INT32_t, ndim=1] edges): cdef: int i, n INT32_t edge np.ndarray[SIZE_t, ndim=1] _edge_lookup np.ndarray[BOOL_t, ndim=1] b b = self.B n = b.size _edge_lookup = np.full(n, 0, dtype=SIZE) for i in range(n): if b[i] == 1: edge = edges[i] _edge_lookup[edge] = i self._edge_lookup = _edge_lookup self._edges = edges def set_edges(self, np.ndarray[INT32_t, ndim=1] edges): self._set_edges(edges) cpdef inline unicode name(self, SIZE_t i): return self._names[i] cpdef inline DOUBLE_t length(self, SIZE_t i): return self._lengths[i] cpdef inline INT32_t edge(self, SIZE_t i): return self._edges[i] cpdef SIZE_t edge_from_number(self, INT32_t n): return self._edge_lookup[n] cdef inline SIZE_t rank(self, SIZE_t t, SIZE_t i) nogil: """Determine the rank order of the ith bit t Rank is the order of the ith bit observed, from left to right. For t=1, this is a preorder traversal of the tree. Parameters ---------- t : SIZE_t The bit value, either 0 or 1 where 0 is a closing parenthesis and 1 is an opening. i : SIZE_T The position to evaluate Returns ------- SIZE_t The rank order of the position. """ cdef int k cdef int r = 0 cdef int lower_bound cdef int upper_bound cdef int j cdef int node #TODO: add method to mM for determining block from i k = i // self._rmm.b lower_bound = k * self._rmm.b # upper_bound is block boundary or end of tree upper_bound = min((k + 1) * self._rmm.b, self.size) upper_bound = min(upper_bound, i + 1) # collect rank from within the block for j in range(lower_bound, upper_bound): r += self._b_ptr[j] # collect the rank at the left end of the block node = bt_node_from_left(k, self._rmm.height) r += self._rmm.r[node] # TODO: can this if statement be removed? if t: return r else: return (i - r) + 1 cdef inline SIZE_t select(self, SIZE_t t, SIZE_t k) nogil: """The position in B of the kth occurrence of the bit t.""" if t: return self._k_index_1[k] else: return self._k_index_0[k] cdef SIZE_t _excess(self, SIZE_t i) nogil: """Actually compute excess""" if i < 0: return 0 # wasn't stated as needed but appears so given testing return (2 * self.rank(1, i) - i) - 1 cdef SIZE_t excess(self, SIZE_t i) nogil: """the number of opening minus closing parentheses in B[1, i]""" # same as: self.rank(1, i) - self.rank(0, i) return self._e_index[i] cpdef inline SIZE_t close(self, SIZE_t i) nogil: """The position of the closing parenthesis that matches B[i]""" if not self._b_ptr[i]: # identity: the close of a closed parenthesis is itself return i return self.fwdsearch(i, -1) cdef inline SIZE_t open(self, SIZE_t i) nogil: """The position of the opening parenthesis that matches B[i]""" if self._b_ptr[i] or i <= 0: # identity: the open of an open parenthesis is itself # the open of 0 is open. A negative index cannot be open, so just return return i return self.bwdsearch(i, 0) + 1 cdef inline SIZE_t enclose(self, SIZE_t i) nogil: """The opening parenthesis of the smallest matching pair that contains position i""" if self._b_ptr[i]: return self.bwdsearch(i, -2) + 1 else: return self.bwdsearch(i - 1, -2) + 1 cpdef SIZE_t rmq(self, SIZE_t i, SIZE_t j) nogil: """The leftmost minimum excess in i -> j""" cdef: SIZE_t k, min_k SIZE_t min_v, obs_v min_k = i min_v = self.excess(i) # a value larger than what will be tested for k in range(i, j + 1): obs_v = self.excess(k) if obs_v < min_v: min_k = k min_v = obs_v return min_k cpdef SIZE_t rMq(self, SIZE_t i, SIZE_t j) nogil: """The leftmost maximmum excess in i -> j""" cdef: SIZE_t k, max_k SIZE_t max_v, obs_v max_k = i max_v = self.excess(i) # a value larger than what will be tested for k in range(i, j + 1): obs_v = self.excess(k) if obs_v > max_v: max_k = k max_v = obs_v return max_k def __len__(self): """The number of nodes in the tree""" return self.size / 2 def __repr__(self): """Returns summary of the tree Returns ------- str A summary of this node and all descendants Notes ----- This method returns the name of the node and a count of tips and the number of internal nodes in the tree. This docstring and repr was adapted from skbio.TreeNode """ cdef total_nodes = len(self) cdef tip_count = self.ntips() return "" % \ (self.name(0), total_nodes - tip_count, tip_count) def __reduce__(self): return (BP, (self.B, self._lengths, self._names)) cpdef SIZE_t depth(self, SIZE_t i) nogil: """The depth of node i""" return self._e_index[i] cpdef SIZE_t root(self) nogil: """The root of the tree""" return 0 cpdef SIZE_t parent(self, SIZE_t i) nogil: """The parent of node i""" # TODO: only make operations like this defined on the open parentheses. # this monkeying with checking open/close sucks. if i == self.root() or i == (self.size - 1): return -1 else: return self.enclose(i) cpdef BOOL_t isleaf(self, SIZE_t i) nogil: """Whether the node is a leaf""" return self._b_ptr[i] and (not self._b_ptr[i + 1]) cpdef SIZE_t fchild(self, SIZE_t i) nogil: """The first child of i (i.e., the left child) fchild(i) = i + 1 (if i is not a leaf) Returns 0 if the node is a leaf as the root cannot be a child by definition. """ if self._b_ptr[i]: if self.isleaf(i): return 0 else: return i + 1 else: return self.fchild(self.open(i)) cpdef SIZE_t lchild(self, SIZE_t i) nogil: """The last child of i (i.e., the right child) lchild(i) = open(close(i) − 1) (if i is not a leaf) Returns 0 if the node is a leaf as the root cannot be a child by definition. """ if self._b_ptr[i]: if self.isleaf(i): return 0 else: return self.open(self.close(i) - 1) else: return self.lchild(self.open(i)) def mincount(self, SIZE_t i, SIZE_t j): """number of occurrences of the minimum in excess(i), excess(i + 1), . . . , excess(j).""" excess, counts = np.unique([self.excess(k) for k in range(i, j + 1)], return_counts=True) return counts[excess.argmin()] def minselect(self, SIZE_t i, SIZE_t j, SIZE_t q): """position of the qth minimum in excess(i), excess(i + 1), . . . , excess(j).""" counts = np.array([self.excess(k) for k in range(i, j + 1)]) index = counts == counts.min() if index.sum() < q: return None else: return i + index.nonzero()[0][q - 1] cpdef SIZE_t nsibling(self, SIZE_t i) nogil: """The next sibling of i (i.e., the sibling to the right) nsibling(i) = close(i) + 1 (if the result j holds B[j] = 0 then i has no next sibling) Will return 0 if there is no sibling. This makes sense as the root cannot have a sibling by definition """ cdef SIZE_t pos if self._b_ptr[i]: pos = self.close(i) + 1 else: pos = self.nsibling(self.open(i)) if pos >= self.size: return 0 elif self._b_ptr[pos]: return pos else: return 0 cpdef SIZE_t psibling(self, SIZE_t i) nogil: """The previous sibling of i (i.e., the sibling to the left) psibling(i) = open(i − 1) (if B[i − 1] = 1 then i has no previous sibling) Will return 0 if there is no sibling. This makes sense as the root cannot have a sibling by definition """ cdef SIZE_t pos if self._b_ptr[i]: if self._b_ptr[max(0, i - 1)]: return 0 pos = self.open(i - 1) else: pos = self.psibling(self.open(i)) if pos < 0: return 0 elif self._b_ptr[pos]: return pos else: return 0 cpdef SIZE_t preorder(self, SIZE_t i) nogil: """Preorder rank of node i Parameters ---------- i : int The node index to assess the preorder order of. Returns ------- int The nodes order of evaluation in a preorder traversal of the tree. """ if self._b_ptr[i]: return self.rank(1, i) else: return self.preorder(self.open(i)) cpdef SIZE_t preorderselect(self, SIZE_t k) nogil: """The index of the node with preorder k Parameters ---------- k : int The preorder evaluation order to search for. Returns ------- int The index position of the node in the tree. """ return self.select(1, k) cpdef SIZE_t postorder(self, SIZE_t i) nogil: """Postorder rank of node i Parameters ---------- i : int The node index to assess the postorder order of. Returns ------- int The nodes order of evaluation in a postorder traversal of the tree. """ if self._b_ptr[i]: return self.rank(0, self.close(i)) else: return self.rank(0, i) cpdef SIZE_t postorderselect(self, SIZE_t k) nogil: """The index of the node with postorder k Parameters ---------- k : int The postorder evaluation order to search for. Returns ------- int The index position of the node in the tree. """ return self.open(self.select(0, k)) cpdef BOOL_t isancestor(self, SIZE_t i, SIZE_t j) nogil: """Whether i is an ancestor of j Parameters ---------- i : int A node index j : int A node index Note ---- False is returned if i == j. A node cannot be an ancestor of itself. Returns ------- bool True if i is an ancestor of j, False otherwise. """ if i == j: return False if not self._b_ptr[i]: i = self.open(i) return i <= j < self.close(i) cpdef SIZE_t subtree(self, SIZE_t i) nogil: """The number of nodes in the subtree of i Parameters ---------- i : int The node to evaluate Returns ------- int The number of nodes in the subtree of i """ if not self._b_ptr[i]: i = self.open(i) return (self.close(i) - i + 1) / 2 cpdef SIZE_t levelancestor(self, SIZE_t i, SIZE_t d) nogil: """The ancestor j of i such that depth(j) = depth(i) − d Parameters ---------- i : int The node to evaluate d : int How many ancestors back to evaluate Returns ------- int The node index of the ancestor to search for """ if d <= 0: return -1 if not self._b_ptr[i]: i = self.open(i) return self.bwdsearch(i, -d - 1) + 1 cpdef SIZE_t levelnext(self, SIZE_t i) nogil: """The next node with the same depth Parameters ---------- i : int The node to evaluate Returns ------- int The node index of the next node or -1 if there isn't one """ return self.fwdsearch(self.close(i), 1) cpdef SIZE_t lca(self, SIZE_t i, SIZE_t j) nogil: """The lowest common ancestor of i and j Parameters ---------- i : int A node index to evaluate j : int A node index to evalute Returns ------- int The index of the lowest common ancestor """ if self.isancestor(i, j): return i elif self.isancestor(j, i): return j else: return self.parent(self.rmq(i, j) + 1) cpdef SIZE_t deepestnode(self, SIZE_t i) nogil: """The index of the deepestnode which descends from i Parameters ---------- i : int The node to evaluate Returns ------- int The index of the deepest node which descends from i """ return self.rMq(self.open(i), self.close(i)) cpdef SIZE_t height(self, SIZE_t i) nogil: """The height of node i with respect to its deepest descendent Parameters ---------- i : int The node to evaluate Notes ----- Height is in terms of number of edges, not in terms of branch length Returns ------- int The number of edges between node i and its deepest node """ return self.excess(self.deepestnode(i)) - self.excess(self.open(i)) cpdef BP shear(self, set tips): """Remove all nodes from the tree except tips and ancestors of tips Parameters ---------- tips : set of str The set of tip names to retain Returns ------- BP A new BP tree corresponding to only the described tips and their ancestors. """ cdef: SIZE_t i, n = len(tips) SIZE_t p, t, count = 0 BIT_ARRAY* mask BP new_bp mask = bit_array_create(self.B.size) bit_array_set_bit(mask, self.root()) bit_array_set_bit(mask, self.close(self.root())) for i in range(self.B.size): # isleaf is only defined on the open parenthesis if self.isleaf(i): if self.name(i) in tips: # gil is required for set operation with nogil: count += 1 bit_array_set_bit(mask, i) bit_array_set_bit(mask, i + 1) p = self.parent(i) while p != 0 and bit_array_get_bit(mask, p) == 0: bit_array_set_bit(mask, p) bit_array_set_bit(mask, self.close(p)) p = self.parent(p) if count == 0: bit_array_free(mask) raise ValueError("No requested tips found") new_bp = self._mask_from_self(mask, self._lengths) bit_array_free(mask) return new_bp cdef BP _mask_from_self(self, BIT_ARRAY* mask, np.ndarray[DOUBLE_t, ndim=1] lengths): cdef: SIZE_t i, k, n, mask_sum np.ndarray[BOOL_t, ndim=1] new_b np.ndarray[object, ndim=1] new_names np.ndarray[object, ndim=1] names = self._names np.ndarray[DOUBLE_t, ndim=1] new_lengths BOOL_t* new_b_ptr DOUBLE_t* lengths_ptr DOUBLE_t* new_lengths_ptr n = bit_array_length(mask) mask_sum = bit_array_num_bits_set(mask) k = 0 lengths_ptr = &lengths[0] new_b = np.empty(mask_sum, dtype=BOOL) new_names = np.empty(mask_sum, dtype=object) new_lengths = np.empty(mask_sum, dtype=DOUBLE) new_b_ptr = &new_b[0] new_lengths_ptr = &new_lengths[0] for i in range(n): if bit_array_get_bit(mask, i): new_b_ptr[k] = self._b_ptr[i] # since names is dtype=object, gil is required new_names[k] = names[i] new_lengths_ptr[k] = lengths_ptr[i] k += 1 return BP(np.asarray(new_b), names=new_names, lengths=new_lengths) cpdef BP collapse(self): cdef: SIZE_t i, n = self.B.sum() SIZE_t current, first, last np.ndarray[DOUBLE_t, ndim=1] new_lengths BIT_ARRAY* mask DOUBLE_t* new_lengths_ptr BP new_bp mask = bit_array_create(self.B.size) bit_array_set_bit(mask, self.root()) bit_array_set_bit(mask, self.close(self.root())) new_lengths = self._lengths.copy() new_lengths_ptr = new_lengths.data with nogil: for i in range(n): current = self.preorderselect(i) if self.isleaf(current): bit_array_set_bit(mask, current) bit_array_set_bit(mask, self.close(current)) else: first = self.fchild(current) last = self.lchild(current) if first == last: new_lengths_ptr[first] = new_lengths_ptr[first] + \ new_lengths_ptr[current] else: bit_array_set_bit(mask, current) bit_array_set_bit(mask, self.close(current)) new_bp = self._mask_from_self(mask, new_lengths) bit_array_free(mask) return new_bp cpdef inline SIZE_t ntips(self) nogil: cdef: SIZE_t i = 0 SIZE_t count = 0 SIZE_t n = self.size while i < (n - 1): if self._b_ptr[i] and not self._b_ptr[i+1]: count += 1 i += 1 i += 1 return count cdef int scan_block_forward(self, int i, int k, int b, int d) nogil: """Scan a block forward from i Parameters ---------- bp : BP The tree i : int The index position to start from in the tree k : int The block to explore b : int The block size d : int The depth to search for Returns ------- int The index position of the result. -1 is returned if a result is not found. """ cdef int lower_bound cdef int upper_bound cdef int j # lower_bound is block boundary or right of i lower_bound = max(k, 0) * b lower_bound = max(i + 1, lower_bound) # upper_bound is block boundary or end of tree upper_bound = min((k + 1) * b, self.size) for j in range(lower_bound, upper_bound): if self._e_index[j] == d: return j return -1 cdef int scan_block_backward(self, int i, int k, int b, int d) nogil: """Scan a block backward from i Parameters ---------- i : int The index position to start from in the tree k : int The block to explore b : int The block size d : int The depth to search for Returns ------- int The index position of the result. -1 is returned if a result is not found. """ cdef int lower_bound cdef int upper_bound cdef int j # i and k are currently needed to handle the situation where # k_start < i < k_end. It should be possible to resolve using partial # excess. # range stop is exclusive, so need to set "stop" at -1 of boundary lower_bound = max(k, 0) * b - 1 # is it possible for k to be < 0? # include the right most position of the k-1 block so we can identify # closures spanning blocks. Not positive if this is correct, however if the # block is "()((", and we're searching for the opening paired with ")", # we need to go to evaluate the excess prior to the first "(", at least as # "open" is defined in Cordova and Navarro if lower_bound >= 0: lower_bound -= 1 # upper bound is block boundary or left of i, whichever is less # note that this is an inclusive boundary since this is a backward search upper_bound = min((k + 1) * b, self.size) - 1 upper_bound = min(i - 1, upper_bound) if upper_bound <= 0: return -1 for j in range(upper_bound, lower_bound, -1): if self.excess(j) == d: return j return -1 cdef SIZE_t fwdsearch(self, SIZE_t i, int d) nogil: """Search forward from i for desired excess Parameters ---------- i : int The index to search forward from d : int The excess difference to search for (relative to E[i]) Returns ------- int The index of the result, or -1 if no result was found """ cdef int k # the block being interrogated cdef int result = -1 # the result of a scan within a block cdef int node # the node within the binary tree being examined # get the block of parentheses to check k = i // self._rmm.b # desired excess d += self._e_index[i] # determine which node our block corresponds too node = bt_node_from_left(k, self._rmm.height) # see if our result is in our current block if self._rmm.mM[node, self._rmm.m_idx] <= d <= self._rmm.mM[node, self._rmm.M_idx]: result = self.scan_block_forward(i, k, self._rmm.b, d) # if we do not have a result, we need to begin traversal of the tree if result == -1: # walk up the tree while not bt_is_root(node): if bt_is_left_child(node): node = bt_right_sibling(node) if self._rmm.mM[node, self._rmm.m_idx] <= d <= self._rmm.mM[node, self._rmm.M_idx]: break node = bt_parent(node) if bt_is_root(node): return -1 # descend until we hit a leaf node while not bt_is_leaf(node, self._rmm.height): node = bt_left_child(node) # evaluate right, if not found, pick left if not (self._rmm.mM[node, self._rmm.m_idx] <= d <= self._rmm.mM[node, self._rmm.M_idx]): node = bt_right_sibling(node) # we have found a block with contains our solution. convert from the # node index back into the block index k = node - (pow(2, self._rmm.height) - 1) # scan for a result using the original d result = self.scan_block_forward(i, k, self._rmm.b, d) return result cdef SIZE_t bwdsearch(self, SIZE_t i, int d) nogil: """Search backward from i for desired excess Parameters ---------- i : int The index to search forward from d : int The excess difference to search for (relative to E[i]) Returns ------- int The index of the result, or -1 if no result was found """ cdef int k # the block being interrogated cdef int result = -1 # the result of a scan within a block cdef int node # the node within the binary tree being examined # get the block of parentheses to check k = i // self._rmm.b # desired excess d += self.excess(i) # see if our result is in our current block result = self.scan_block_backward(i, k, self._rmm.b, d) # determine which node our block corresponds too node = bt_node_from_left(k, self._rmm.height) # special case: check sibling if result == -1 and bt_is_right_child(node): node = bt_left_sibling(node) k = node - (pow(2, self._rmm.height) - 1) result = self.scan_block_backward(i, k, self._rmm.b, d) # reset node and k in the event that result == -1 k = i // self._rmm.b node = bt_right_sibling(node) # if we do not have a result, we need to begin traversal of the tree if result == -1: while not bt_is_root(node): # right nodes cannot contain the solution as we are searching left # As such, if we are the right node already, evaluate its sibling. if bt_is_right_child(node): node = bt_left_sibling(node) if self._rmm.mM[node, self._rmm.m_idx] <= d <= self._rmm.mM[node, self._rmm.M_idx]: break # if we did not find a valid node, adjust for the relative # excess of the current node, and ascend to the parent node = bt_parent(node) if bt_is_root(node): return -1 # descend until we hit a leaf node while not bt_is_leaf(node, self._rmm.height): node = bt_right_child(node) # evaluate right, if not found, pick left if not (self._rmm.mM[node, self._rmm.m_idx] <= d <= self._rmm.mM[node, self._rmm.M_idx]): node = bt_left_sibling(node) # we have found a block with contains our solution. convert from the # node index back into the block index k = node - (pow(2, self._rmm.height) - 1) # scan for a result result = self.scan_block_backward(i, k, self._rmm.b, d) return result # add in .r and .n into rmm calculation # - necessary for mincount/minselect ### ### improved-octo-waddle-1.0.7/bp/_cli.py000066400000000000000000000022111463715353000174300ustar00rootroot00000000000000import click from bp import parse_jplace, insert_fully_resolved try: from bp.GPL import insert_multifurcating except: insert_multifurcating = None @click.group() def cli(): pass @cli.command() @click.option('--placements', type=click.Path(exists=True), required=True, help='jplace formatted data') @click.option('--output', type=click.Path(exists=False), required=True, help='Where to write the resulting newick') @click.option('--method', type=click.Choice(['fully-resolved', 'multifurcating']), required=True, help='Whether to fully resolve or multifurcate') def placement(placements, output, method): if method == 'fully-resolved': f = insert_fully_resolved elif method == 'multifurcating': if insert_multifurcating is None: raise ValueError("Please install with 'pip install iow-gpl'") f = insert_multifurcating else: raise ValueError("Unknown method: %s" % method) placement_df, tree = parse_jplace(open(placements).read()) sktree = f(placement_df, tree) sktree.write(output) if __name__ == '__main__': cli() improved-octo-waddle-1.0.7/bp/_conv.pyx000066400000000000000000000113501463715353000200220ustar00rootroot00000000000000import skbio import numpy as np cimport numpy as np from ._bp cimport BP # our noop used when monkey patching invalidate_caches def noop(*arg, **kwargs): pass def to_skbio_treenode(BP bp): """Convert BP to TreeNode Parameters ---------- bp : BP A BP tree Returns ------- skbio.TreeNode The tree represented as an skbio.TreeNode """ cdef int i nodes = [skbio.TreeNode() for i in range(bp.B.sum())] # skbio.TreeNode.append makes a very expensive call to # invalidate_caches. Let's remove that from consideration # temporarily while constructing the tree for node in nodes: # monkey patching triggers a weird edge case with python's copy, so the # "easy" thing is to disregard what we're doing in copy as these are # immutable anyway node._exclude_from_copy.add('_old_invalidate_caches') node._exclude_from_copy.add('invalidate_caches') node._old_invalidate_caches = node.invalidate_caches node.invalidate_caches = noop root = nodes[0] for i in range(bp.B.sum()): node_idx = bp.preorderselect(i) nodes[i].name = bp.name(node_idx) nodes[i].length = bp.length(node_idx) nodes[i].edge_num = bp.edge(node_idx) if node_idx != bp.root(): # preorder starts at 1 annoyingly parent = bp.preorder(bp.parent(node_idx)) - 1 nodes[parent].append(nodes[i]) root.length = None # ...and then let's restore cache invalidation for node in nodes: node.invalidate_caches = node._old_invalidate_caches return root def from_skbio_treenode(tree): """Convert a skbio TreeNode into BP Parameters ---------- tree : skbio.TreeNode The tree to convert Returns ------- tuple (BP, np.array of str, np.array of double) """ n_nodes = len(list(tree.traverse(include_self=True))) topo = np.zeros(n_nodes * 2, dtype=np.uint8) names = np.full(n_nodes * 2, None, dtype=object) lengths = np.zeros(n_nodes * 2, dtype=np.double) edges = np.zeros(n_nodes * 2, dtype=np.int32) ptr = 0 seen = set() for n in tree.pre_and_postorder(include_self=True): if n not in seen: topo[ptr] = 1 names[ptr] = n.name lengths[ptr] = n.length or 0.0 edges[ptr] = getattr(n, 'edge_num', None) or 0 if n.is_tip(): ptr += 1 seen.add(n) ptr += 1 return BP(topo, names=names, lengths=lengths, edges=edges) def to_skbio_treearray(BP bp): """Convert to a tree array comparable to TreeNode.to_array Parameters ---------- bp : BP A BP tree Returns ------- ### TODO: revise tuple ### needs to be a dict keyed by ['length'] and ['child_index'] np.array of child index positions np.array of branch lengths in index order with respect to child index positions """ cdef int i class mock_node: def __init__(self, id, is_tip): self.is_tip_ = is_tip self.id = id def is_tip(self): return self.is_tip_ child_index = np.zeros((int(bp.B.sum()) - bp.ntips(), 3), dtype=np.int64) length = np.zeros(bp.B.sum(), dtype=np.double) node_ids = np.zeros(bp.B.size, dtype=np.uint32) name = np.full(bp.B.sum(), None, dtype=object) # TreeNode.assign_ids, decompose target chi_ptr = 0 cur_index = 0 # the index into node_ids, equivalent to TreeNode.assign_ids id_index = dict.fromkeys(set(range(bp.B.sum()))) # map a node's "id" to an object which indicates if it is a leaf or not for i in range(bp.B.sum()): node_idx = bp.postorderselect(i + 1) # the index within the BP of the node if not bp.isleaf(node_idx): fchild = bp.fchild(node_idx) lchild = bp.lchild(node_idx) sib_idx = fchild # the sibling index wtihin the BP of the node while sib_idx != 0 and sib_idx <= lchild: node_ids[sib_idx] = cur_index id_index[cur_index] = mock_node(cur_index, bp.isleaf(sib_idx)) length[cur_index] = bp.length(sib_idx) name[cur_index] = bp.name(sib_idx) cur_index += 1 sib_idx = bp.nsibling(sib_idx) child_index[chi_ptr] = [node_idx, node_ids[fchild], node_ids[lchild]] chi_ptr += 1 # make sure to capture root id_index[bp.B.sum() - 1] = mock_node(cur_index, False) node_ids[0] = cur_index child_index[:, 0] = node_ids[child_index[:, 0]] child_index = child_index[np.argsort(child_index[:, 0])] return {'child_index': child_index, 'length': length, 'id_index': id_index, 'name': name} improved-octo-waddle-1.0.7/bp/_insert.pyx000066400000000000000000000107331463715353000203650ustar00rootroot00000000000000# encoding: utf-8 # cython: profile=False, boundscheck=False, wraparound=False from ._bp cimport BP from . import to_skbio_treenode import pandas as pd import json import skbio cimport cython # see the comment in _insert_setup. Avoid the use of invalidate_caches as it # is very expensive for tree mutation operations class TreeNodeNoValidate(skbio.TreeNode): def invalidate_caches(self): pass # our noop used when monkey patching invalidate_caches def noop(*arg, **kwargs): pass # pandas apply functions for preallocation of objects in bulk def _preallocate_fragment(r): return TreeNodeNoValidate(name=r['fragment'], length=r['pendant_length']) def _preallocate_empty(r): return TreeNodeNoValidate() def _insert_setup(placements, bptree, insert_type): # insertion setup addresses: # * treenode caching # * placement ordering # * preallocation of objects where "easy" sktree = to_skbio_treenode(bptree) node_lookup = {n.edge_num: n for n in sktree.traverse(include_self=True)} # mutation operations against TreeNode is expensive as every append or # remove triggers a call to invalidate caches, which requires a traversal # to find the root (and do other stuff). so let's monkey patch the method # to force a noop for node in sktree.traverse(include_self=True): node.invalidate_caches = noop # we are only setup to handle a single placement per fragment, so pull # deduplicated following guidance from Prof. Siavash Mirarab. We sort so # "better" has a smaller index value # fragment -> group the rows by the fragment, fragment order doesn't matter # like_weight_ratio -> our first selection criteria, higher is better # pendant_length -> our second selection criteria, lower is better placements = placements.sort_values(['fragment', 'like_weight_ratio', 'pendant_length'], ascending=[True, False, True]) # take the first non-duplicated row per fragment. because of the sort, this # is assured to be the highest weight ratio, and the smallest pendant # length. Ties are handled arbitrarily. placements = placements[~placements['fragment'].duplicated()] if insert_type == 'multifurcating': placements = placements.sort_values(['edge_num', 'pendant_length']) elif insert_type == 'fully_resolved': placements = placements.sort_values(['edge_num', 'distal_length'], ascending=[True, False]) else: raise ValueError() placements['node'] = placements.apply(_preallocate_fragment, axis=1) if insert_type == 'fully_resolved': placements['parent'] = placements.apply(_preallocate_empty, axis=1) return placements, sktree, node_lookup # pd.DataFrame is not a resolved type so we cannot use it here for cython def insert_fully_resolved(object placements, BP bptree): """Update the backbone, fully resolving edges with multiple queries Parameters ---------- placements : pd.DataFrame jplace data represented as a DataFrame bptree : bp.BP An instance of a BP tree, this is expected to contain edge numbers and correspond to the backbone for the jplace data Returns ------- skbio.TreeNode A tree with the fragments placed """ # TODO: profile, type and re-profile placements, sktree, node_lookup = \ _insert_setup(placements, bptree, 'fully_resolved') for edge, edge_grp in placements.groupby('edge_num'): existing_node = node_lookup[edge] current_parent = existing_node.parent # break the edge current_parent.remove(existing_node) existing_node.parent = None existing_length = existing_node.length for _, fragment in edge_grp.iterrows(): distal_length = fragment['distal_length'] fragment_node = fragment['node'] fragment_parent = fragment['parent'] # update branch lengths fragment_parent.length = existing_length - distal_length existing_length = distal_length # attach the nodes fragment_parent.append(fragment_node) current_parent.append(fragment_parent) # update current_parent = fragment_parent existing_node.length = existing_length current_parent.append(existing_node) existing_node.length = distal_length return sktree improved-octo-waddle-1.0.7/bp/_io.pyx000066400000000000000000000272541463715353000174760ustar00rootroot00000000000000# encoding: utf-8 # cython: profile=False, boundscheck=False, wraparound=False from ._bp cimport BP import time import numpy as np import pandas as pd import json cimport numpy as np cimport cython np.import_array() cdef inline np.double_t length_from_edge(unicode token): cdef: Py_ssize_t split_idx # 0.12345{0123} -> 0.12345 # OR 0.12345[0123] -> 0.12345 split_idx_curly = token.find('{') split_idx_square = token.find('[') split_idx = max(split_idx_curly, split_idx_square) if split_idx == -1: return np.double(token) else: return np.double(token[:split_idx]) cdef inline np.int32_t number_from_edge(unicode token): cdef: Py_ssize_t split_idx Py_ssize_t end # 0.12345{0123} -> 0123 # OR 0.12345[0123] -> 0123 split_idx_curly = token.find('{') split_idx_square = token.find('[') split_idx = max(split_idx_curly, split_idx_square) if split_idx == -1: return 0 else: end = len(token) return np.int32(token[split_idx + 1:end - 1]) cdef void _set_node_metadata(np.uint32_t ptr, unicode token, np.ndarray[object, ndim=1] names, np.ndarray[np.double_t, ndim=1] lengths, np.ndarray[np.int32_t, ndim=1] edges): """Inplace update of names and lengths given token details""" cdef: np.double_t length np.int32_t edge Py_ssize_t split_idx, i, end unicode name, token_parsed name = None length = 0.0 edge = 0 # NOTE: there is likely some fat to trim in this method. we do a lot # of work per token, we could probably do that work smarter. as is, # the changes to support edge numbers increase parsing ~20%, which # is annoying but probably not a critical if token[0] == u':': token_parsed = token[1:] length = length_from_edge(token_parsed) edge = number_from_edge(token_parsed) elif u':' in token: split_idx = token.rfind(':') name = token[:split_idx] token_parsed = token[split_idx + 1:] length = length_from_edge(token_parsed) edge = number_from_edge(token_parsed) name = name.strip("'").strip() elif u'{' in token or u'[' in token: # strip as " {123}" is valid? token = token.strip() end = len(token) edge = np.int32(token.strip()[1:end - 1]) else: name = token.replace("'", "").replace('"', "").strip() names[ptr] = name lengths[ptr] = length edges[ptr] = edge def write_newick(BP tree, object output, bint include_edge): cdef: list name_stack list edge_stack list length_stack list open_paren_stack object name np.npy_float64 length Py_ssize_t idx np.npy_uint8 v Py_ssize_t root_close length_stack = [] name_stack = [] edge_stack = [] open_paren_stack = [] root_close = tree.close(0) for idx, v in enumerate(tree.B): if v: if not tree.isleaf(idx): output.write('(') name_stack.append(tree.name(idx)) length_stack.append(tree.length(idx)) edge_stack.append(tree.edge(idx)) open_paren_stack.append(idx) else: name = name_stack.pop() length = length_stack.pop() edge = edge_stack.pop() if name is not None: # if we have magical characters, make sure we quote if set(name) & {';', ',', '(', ')', ':', '_'}: output.write("'%s'" % name) else: output.write(name) if include_edge: output.write(':%f{%d}' % (length, edge)) else: output.write(':%f' % length) if tree.nsibling(open_paren_stack.pop()) == 0: if idx != root_close: output.write(')') else: output.write(',') output.write(';') cpdef parse_newick(unicode data): cdef: np.uint32_t ptr, open_ptr Py_ssize_t token_ptr, tmp, lag, datalen BP topology unicode token, last_token np.ndarray[object, ndim=1] names np.ndarray[np.double_t, ndim=1] lengths np.ndarray[np.int32_t, ndim=1] edges if data.count(',') == 0: raise ValueError("Only trees with more than 1 node supported") data = data.strip() if not data.endswith(';'): raise ValueError("Newick does not appear terminated with a semicolon") datalen = len(data) topology = _newick_to_bp(data) names = np.full(len(topology.B), None, dtype=object) lengths = np.zeros(len(topology.B), dtype=np.double) edges = np.full(len(topology.B), 0, dtype=np.int32) ptr = 0 token_ptr = _ctoken(data, datalen, 0) token = data[0:token_ptr] last_token = None # lag reflects the scenario where ((x))y, where the label y gets may end # up being associated with an earlier unnamed vertex. lag represents the # offset between the topology pointer and the token pointer effectively. lag = 0 while token != ';': if token == '(': # an open parenthesis never has metadata associated with it ptr += 1 if (token == ')' or token == ',') and last_token == ')': # determine if there are unnamed/unlengthed nodes lag += 1 elif token not in '(),:;': ptr += lag lag = 0 open_ptr = topology.open(ptr) _set_node_metadata(open_ptr, token, names, lengths, edges) if topology.isleaf(ptr): ptr += 2 else: ptr += 1 last_token = token tmp = _ctoken(data, datalen, token_ptr) token = data[token_ptr:tmp] token_ptr = tmp topology.set_names(names) topology.set_lengths(lengths) topology.set_edges(edges) return topology cdef object _newick_to_bp(unicode data): """Convert newick to balanced parentheses Newick is _similar_ to BP, but differs notably at the tips of the tree. The complexity of the parse below comes from handling tips, and single descendents. Examples of situations that introduce this complexity are: ((a,b)) -> 11101000 (a) -> 1100 () -> 1100 ((a,b),c) -> 1110100100 (a,(b,c)) -> 1101101000 Newick is not required to have node labels on tips, and the interpretation of a comma is dependent on prior state. The strategy undertaken is to reduce the newick string to only structural components. From there, the string is interpreted into tokens of: {"1", "0", "10", "100"}, which directly translate into the resulting balanced parentheses topology. It is very likely the case that this parser can be done better with improved efficiency. """ cdef: Py_ssize_t i, topology_ptr, single_descendent Py_UCS4 c, last_c np.ndarray[np.uint8_t, ndim=1] topology potential_single_descendant = False topology = np.empty(len(data), dtype=np.uint8) topology_ptr = 0 last_c = u'x' in_quote = False for i in range(len(data)): c = data[i] if c == u"'": in_quote = not in_quote else: if in_quote: continue elif c == u'(': # opening of a node topology[topology_ptr] = 1 topology_ptr += 1 last_c = c potential_single_descendant = True elif c == u')': # closing of a node if potential_single_descendant or last_c == u',': # we have a single descendant or a last child (i.e., ",)") topology[topology_ptr] = 1 topology[topology_ptr + 1] = 0 topology[topology_ptr + 2] = 0 topology_ptr += 3 potential_single_descendant = False else: # it is possible to still have a single descendant in the case # of a multiple single descendant: (...()...) topology[topology_ptr] = 0 topology_ptr += 1 last_c = c elif c == u',': if last_c != u')': # we have a new tip topology[topology_ptr] = 1 topology[topology_ptr + 1] = 0 topology_ptr += 2 potential_single_descendant = False last_c = c else: # ignore non-structure pass return BP(topology[:topology_ptr]) cdef inline int _ccheck(Py_UCS4 c): """structure check""" cdef: Py_ssize_t i if c == u'(': return 1 elif c == u')': return 1 elif c == u',': return 1 elif c == u';': return 1 else: return 0 cdef inline int _is_quote(Py_UCS4 c): if c == u'"': return 1 elif c == u"'": return 1 else: return 0 cdef inline Py_ssize_t _ctoken(unicode data, Py_ssize_t datalen, Py_ssize_t start): cdef: Py_ssize_t idx, in_quote = 0 Py_UCS4 c if start == (datalen - 1): return start + 1 for idx in range(start, datalen): c = data[idx] if in_quote: if _is_quote(c): in_quote = 0 continue else: if _is_quote(c): in_quote = 1 continue if _ccheck(c): if idx == start: return idx + 1 else: return idx return idx + 1 def parse_jplace(object data): """Takes a jplace string, returns a DataFrame of placements and the tree Implementation specific caveats: 1) we do not support multiplicities. placements are required to have an "n" entry, and we ignore "nm" 2) Matsen et al (https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0031009) define [] for denoting edge labels and {} for denoting edge numbers. We currently support either [] OR {}, we do not support edges with both. In addition, we REQUIRE the edge labels if specified to be integer. If either of these caveats are problems, then we need to modify the code. """ cdef: dict as_json list fields, placements, fragments, p, placement_data, list placement_inner_data, pquery, entry unicode frag, newick Py_ssize_t placement_idx, placement_inner_idx, fragment_idx, Py_ssize_t n_fragments BP tree object df set edges as_json = json.loads(data) newick = as_json['tree'] placement_data = as_json['placements'] fields = as_json['fields'] fields = ['fragment', ] + fields placements = [] for placement_idx in range(len(placement_data)): placement = placement_data[placement_idx] placement_inner_data = placement['p'] if 'n' not in placement: raise KeyError("jplace parsing limited to entries with 'n' keys") fragments = placement['n'] n_fragments = len(fragments) for placement_inner_idx in range(len(placement_inner_data)): pquery = placement_inner_data[placement_inner_idx] for fragment_idx in range(n_fragments): frag = fragments[fragment_idx] entry = [frag, ] + pquery placements.append(entry) tree = parse_newick(newick) edges = {tree.edge(i) for i, v in enumerate(tree.B) if v} df = pd.DataFrame(placements, columns=fields) df = df[df['edge_num'].isin(edges)] return df, tree improved-octo-waddle-1.0.7/bp/_version.py000066400000000000000000000553001463715353000203550ustar00rootroot00000000000000 # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. # This file is released into the public domain. Generated by # versioneer-0.21 (https://github.com/python-versioneer/python-versioneer) """Git implementation of _version.py.""" import errno import os import re import subprocess import sys from typing import Callable, Dict def get_keywords(): """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). git_refnames = " (HEAD -> master, tag: 1.0.7)" git_full = "6df7c5c8e1e9bb319e26e3801c41607fec4958da" git_date = "2024-06-26 20:07:04 -0700" keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} return keywords class VersioneerConfig: """Container for Versioneer configuration parameters.""" def get_config(): """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py cfg = VersioneerConfig() cfg.VCS = "git" cfg.style = "pep440" cfg.tag_prefix = "" cfg.parentdir_prefix = "bp-" cfg.versionfile_source = "bp/_version.py" cfg.verbose = False return cfg class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" LONG_VERSION_PY: Dict[str, str] = {} HANDLERS: Dict[str, Dict[str, Callable]] = {} def register_vcs_handler(vcs, method): # decorator """Create decorator to mark a method as the handler of a VCS.""" def decorate(f): """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False, env=None): """Call the given command(s).""" assert isinstance(commands, list) process = None for command in commands: try: dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git process = subprocess.Popen([command] + args, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None)) break except OSError: e = sys.exc_info()[1] if e.errno == errno.ENOENT: continue if verbose: print("unable to run %s" % dispcmd) print(e) return None, None else: if verbose: print("unable to find command, tried %s" % (commands,)) return None, None stdout = process.communicate()[0].strip().decode() if process.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) print("stdout was %s" % stdout) return None, process.returncode return stdout, process.returncode def versions_from_parentdir(parentdir_prefix, root, verbose): """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both the project name and a version string. We will also support searching up two directory levels for an appropriately named parent directory """ rootdirs = [] for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: print("Tried directories %s but none started with prefix %s" % (str(rootdirs), parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs): """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords = {} try: with open(versionfile_abs, "r") as fobj: for line in fobj: if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) if line.strip().startswith("git_date ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["date"] = mo.group(1) except OSError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords(keywords, tag_prefix, verbose): """Get version information from git keywords.""" if "refnames" not in keywords: raise NotThisMethod("Short version file found") date = keywords.get("date") if date is not None: # Use only the last line. Previous lines may contain GPG signature # information. date = date.splitlines()[-1] # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because # it's been around since git-1.5.3, and it's too difficult to # discover which version we're using, or to work around using an # older one. date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = {r for r in refs if re.search(r'\d', r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: print("likely tags: %s" % ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] # Filter out refs that exactly match prefix or that don't start # with a number once the prefix is stripped (mostly a concern # when prefix is '') if not re.match(r'\d', r): continue if verbose: print("picking %s" % r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None, "date": date} # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags", "date": None} @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command): """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ GITS = ["git"] TAG_PREFIX_REGEX = "*" if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] TAG_PREFIX_REGEX = r"\*" _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=True) if rc != 0: if verbose: print("Directory %s not under git control" % root) raise NotThisMethod("'git rev-parse --git-dir' returned error") # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty", "--always", "--long", "--match", "%s%s" % (tag_prefix, TAG_PREFIX_REGEX)], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) # --abbrev-ref was added in git-1.6.3 if rc != 0 or branch_name is None: raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") branch_name = branch_name.strip() if branch_name == "HEAD": # If we aren't exactly on a branch, pick a branch which represents # the current commit. If all else fails, we are on a branchless # commit. branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) # --contains was added in git-1.5.4 if rc != 0 or branches is None: raise NotThisMethod("'git branch --contains' returned error") branches = branches.split("\n") # Remove the first line if we're running detached if "(" in branches[0]: branches.pop(0) # Strip off the leading "* " from the list of branches. branches = [branch[2:] for branch in branches] if "master" in branches: branch_name = "master" elif not branches: branch_name = None else: # Pick the first branch that is returned. Good or bad. branch_name = branches[0] pieces["branch"] = branch_name # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparsable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%s'" % describe_out) return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root) pieces["distance"] = int(count_out) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() # Use only the last line. Previous lines may contain GPG signature # information. date = date.splitlines()[-1] pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces def plus_or_dot(pieces): """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces): """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty Exceptions: 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_branch(pieces): """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . The ".dev0" means not master branch. Note that .dev0 sorts backwards (a feature branch will appear "older" than the master branch). Exceptions: 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: if pieces["branch"] != "master": rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0" if pieces["branch"] != "master": rendered += ".dev0" rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def pep440_split_post(ver): """Split pep440 version string at the post-release segment. Returns the release segments before the post-release and the post-release version number (or -1 if no post-release segment is present). """ vc = str.split(ver, ".post") return vc[0], int(vc[1] or 0) if len(vc) == 2 else None def render_pep440_pre(pieces): """TAG[.postN.devDISTANCE] -- No -dirty. Exceptions: 1: no tags. 0.post0.devDISTANCE """ if pieces["closest-tag"]: if pieces["distance"]: # update the post release segment tag_version, post_version = pep440_split_post(pieces["closest-tag"]) rendered = tag_version if post_version is not None: rendered += ".post%d.dev%d" % (post_version+1, pieces["distance"]) else: rendered += ".post0.dev%d" % (pieces["distance"]) else: # no commits, use the tag as the version rendered = pieces["closest-tag"] else: # exception #1 rendered = "0.post0.dev%d" % pieces["distance"] return rendered def render_pep440_post(pieces): """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards (a dirty tree will appear "older" than the corresponding clean one), but you shouldn't be releasing software with -dirty anyways. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%s" % pieces["short"] else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%s" % pieces["short"] return rendered def render_pep440_post_branch(pieces): """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . The ".dev0" means not master branch. Exceptions: 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["branch"] != "master": rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%s" % pieces["short"] if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["branch"] != "master": rendered += ".dev0" rendered += "+g%s" % pieces["short"] if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_old(pieces): """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces): """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces): """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. The distance/hash is unconditional. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces, style): """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"], "date": None} if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-branch": rendered = render_pep440_branch(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-post-branch": rendered = render_pep440_post_branch(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%s'" % style) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None, "date": pieces.get("date")} def get_versions(): """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which # case we can only use expanded keywords. cfg = get_config() verbose = cfg.verbose try: return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass try: root = os.path.realpath(__file__) # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. for _ in cfg.versionfile_source.split('/'): root = os.path.dirname(root) except NameError: return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to find root of source tree", "date": None} try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) return render(pieces, cfg.style) except NotThisMethod: pass try: if cfg.parentdir_prefix: return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) except NotThisMethod: pass return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version", "date": None} improved-octo-waddle-1.0.7/bp/tests/000077500000000000000000000000001463715353000173165ustar00rootroot00000000000000improved-octo-waddle-1.0.7/bp/tests/__init__.py000066400000000000000000000000001463715353000214150ustar00rootroot00000000000000improved-octo-waddle-1.0.7/bp/tests/data/000077500000000000000000000000001463715353000202275ustar00rootroot00000000000000improved-octo-waddle-1.0.7/bp/tests/data/200/000077500000000000000000000000001463715353000205305ustar00rootroot00000000000000improved-octo-waddle-1.0.7/bp/tests/data/200/placement.jplace000066400000000000000000000232161463715353000236640ustar00rootroot00000000000000{ "fields": [ "edge_num", "likelihood", "like_weight_ratio", "distal_length", "pendant_length" ], "metadata": { "invocation": "/home/y5jiang/miniconda3/envs/std/bin/run_apples.py -q model.200.10000000.0.000001/01/1/query.fa -s model.200.10000000.0.000001/01/1/backbone.fa -t model.200.10000000.0.000001/01/1/jc_result/run.raxml.bestTree -o model.200.10000000.0.000001/01/1/jc_result/placement.jplace -f 0 -b 5 -D" }, "placements": [ { "n": [ "82" ], "p": [ [ 361, 0.01013206496780672, 1, 0.02652932626620403, 0.039354548684623215 ] ] }, { "n": [ "99" ], "p": [ [ 308, 0.04520741687623886, 1, 0.11020044356641526, 0.06550337922097477 ] ] }, { "n": [ "43" ], "p": [ [ 309, 0.04054866161921744, 1, 0.010712923050783987, 0.020946988900520196 ] ] }, { "n": [ "195" ], "p": [ [ 277, 0.01918907908397749, 1, 0.03065741838803451, 0.04513513498399864 ] ] }, { "n": [ "162" ], "p": [ [ 55, 0.01758935282545493, 1, 0.0033199487685078776, 0.05388735804976052 ] ] }, { "n": [ "56" ], "p": [ [ 81, 0.2366882303770561, 1, 0.04172580852519453, 0.0007060238727097983 ] ] }, { "n": [ "91" ], "p": [ [ 105, 0.0001863393767883581, 1, 0.04578898721138839, 0.08655004339151215 ] ] }, { "n": [ "174" ], "p": [ [ 89, 0.01216463967379211, 1, 0.04707020642820376, 0.045206727542450205 ] ] }, { "n": [ "5" ], "p": [ [ 143, 0.012162345471765756, 1, 0.023797389484252734, 0.10447375403452556 ] ] }, { "n": [ "55" ], "p": [ [ 139, 0.09563944060686769, 1, 0.014593217782258146, 0.04537214236560885 ] ] } ], "tree": "(((128:0.091649{0},(((((63:0.046046{1},34:0.026065{2}):0.147471{3},(36:0.056854{4},113:0.119931{5}):0.057087{6}):0.048654{7},(((65:0.013097{8},39:0.023368{9}):0.048249{10},(68:0.038382{11},(166:0.028569{12},(60:0.015049{13},86:0.070973{14}):0.018681{15}):0.065314{16}):0.013338{17}):0.037003{18},(172:0.11128{19},178:0.075705{20}):0.033605{21}):0.095847{22}):0.063586{23},((((127:0.095186{24},(169:0.004654{25},((176:0.080311{26},199:0.025038{27}):0.010857{28},171:0.036119{29}):0.008868{30}):0.039983{31}):0.071999{32},(((((((160:0.030215{33},192:0.023451{34}):0.048363{35},((197:0.1835{36},(141:0.072552{37},200:0.065412{38}):0.038114{39}):0.018307{40},138:0.111608{41}):0.011379{42}):0.064896{43},(20:0.132222{44},94:0.05062{45}):0.029734{46}):0.010164{47},((93:0.034447{48},17:0.16696{49}):0.036792{50},(52:0.073619{51},(108:0.108945{52},54:0.071498{53}):0.020253{54}):0.007886{55}):0.012139{56}):0.020069{57},(((117:0.087632{58},175:0.04907{59}):0.023117{60},105:0.063734{61}):0.034021{62},142:0.159509{63}):0.034053{64}):0.008633{65},180:0.038679{66}):0.011734{67},(32:0.177401{68},(191:0.012004{69},18:0.006781{70}):0.070602{71}):0.073237{72}):0.027244{73}):0.065858{74},(48:0.020422{75},145:0.041412{76}):0.078334{77}):0.158131{78},((((4:0.077285{79},140:0.059016{80}):0.10287{81},(112:0.054516{82},73:0.098056{83}):0.07508{84}):0.040879{85},(((64:0.034688{86},(((103:0.024899{87},164:0.027051{88}):0.108886{89},(90:0.037625{90},170:0.013174{91}):0.061949{92}):0.04288{93},125:0.03853{94}):0.000689{95}):0.043095{96},((((155:0.007038{97},116:0.010439{98}):0.021865{99},(115:0.04627{100},194:0.050225{101}):0.001882{102}):0.007504{103},35:0.02298{104}):0.057944{105},(((129:0.001131{106},144:0.004662{107}):0.035561{108},(177:0.068837{109},27:0.109227{110}):0.005532{111}):0.025179{112},(97:0.048918{113},133:0.07577{114}):0.049757{115}):0.027304{116}):0.007374{117}):0.072662{118},(147:0.088135{119},11:0.064036{120}):0.146731{121}):0.045209{122}):0.033272{123},(((57:0.038761{124},(25:0.012687{125},139:0.051202{126}):0.011167{127}):0.070301{128},((114:0.04211{129},167:0.018572{130}):0.026461{131},58:0.098008{132}):0.003546{133}):0.046481{134},(((((189:0.004832{135},84:0.00443{136}):0.021258{137},101:0.048949{138}):0.045037{139},(163:0.040324{140},80:0.095645{141}):0.025838{142}):0.028898{143},((21:0.071425{144},190:0.028078{145}):0.038979{146},(1:0.020267{147},33:0.063047{148}):0.117384{149}):0.019667{150}):0.020694{151},2:0.084342{152}):0.023439{153}):0.086726{154}):0.074316{155}):0.103828{156}):0.22174{157},(((((96:0.047644{158},71:0.017724{159}):0.079408{160},(29:0.058493{161},158:0.110122{162}):0.057941{163}):0.077188{164},((28:0.032344{165},179:0.019221{166}):0.129582{167},(((126:0.021211{168},122:0.003436{169}):0.122348{170},((120:0.048273{171},23:0.083327{172}):0.024246{173},59:0.078525{174}):0.022224{175}):0.060385{176},(100:0.092243{177},87:0.079571{178}):0.064832{179}):0.066272{180}):0.091976{181}):1e-06{182},69:0.084749{183}):0.086387{184},((((((46:0.003773{185},85:0.017394{186}):0.030542{187},70:0.090134{188}):0.018446{189},152:0.072279{190}):0.027947{191},(77:0.051355{192},30:0.021462{193}):0.075809{194}):0.005858{195},181:0.099444{196}):0.083413{197},(((182:0.064543{198},89:0.048253{199}):0.106031{200},((135:0.015705{201},7:0.00821{202}):0.030258{203},161:0.021676{204}):0.047744{205}):0.002861{206},(150:0.031904{207},((37:0.11002{208},61:0.068051{209}):0.018612{210},187:0.205805{211}):0.021969{212}):0.001188{213}):0.062534{214}):0.101311{215}):0.053824{216}):0.044002{217}):0.011366{218},(((((38:0.018712{219},92:0.035841{220}):0.028224{221},((154:0.021713{222},75:0.065821{223}):0.031695{224},132:0.049308{225}):0.049428{226}):0.019141{227},(24:0.084227{228},88:0.025948{229}):0.04393{230}):0.027486{231},((10:0.041678{232},50:0.099926{233}):0.089001{234},8:0.137018{235}):0.01696{236}):0.106719{237},(((((16:0.276403{238},((((95:0.11632{239},121:0.073923{240}):0.047448{241},(9:0.067187{242},136:0.037463{243}):0.124333{244}):0.019898{245},(((67:0.152317{246},41:0.038205{247}):0.00857{248},6:0.039272{249}):0.092741{250},(((130:0.023863{251},45:0.067713{252}):0.057962{253},(198:0.067436{254},(47:0.042684{255},12:0.016951{256}):0.079918{257}):0.019656{258}):0.043012{259},81:0.094409{260}):0.007624{261}):0.006874{262}):0.008771{263},(((76:0.035191{264},((51:0.030081{265},79:0.01744{266}):0.020574{267},124:0.020613{268}):0.034443{269}):0.079633{270},(98:0.074293{271},((123:0.060385{272},((168:0.019534{273},104:0.037426{274}):0.002911{275},131:0.013521{276}):0.03589{277}):0.012241{278},107:0.068656{279}):0.082793{280}):0.031941{281}):0.001462{282},186:0.144723{283}):0.00835{284}):0.029303{285}):0.010882{286},(66:0.173463{287},(3:0.142032{288},(102:0.056606{289},(165:0.041571{290},151:0.055941{291}):0.026282{292}):0.103159{293}):0.056229{294}):0.056911{295}):0.03039{296},((62:0.104589{297},(193:0.11889{298},15:0.057758{299}):1e-06{300}):0.056656{301},(((78:0.040435{302},137:0.055276{303}):0.028649{304},106:0.036861{305}):0.067081{306},26:0.070208{307}):0.123168{308}):0.042645{309}):0.010026{310},(((53:0.112361{311},153:0.068983{312}):0.029664{313},(159:0.092072{314},44:0.043428{315}):0.05657{316}):0.059581{317},(((196:0.024776{318},22:0.028294{319}):0.132592{320},((((185:0.032274{321},13:0.134446{322}):0.024481{323},(111:0.048261{324},173:0.012298{325}):0.174056{326}):0.006241{327},156:0.077862{328}):0.016951{329},(((110:0.015929{330},14:0.011513{331}):0.033346{332},40:0.066945{333}):0.079179{334},19:0.130813{335}):0.010367{336}):0.030079{337}):1e-06{338},(42:0.021955{339},183:0.040206{340}):0.056799{341}):0.032675{342}):0.104819{343}):0.003319{344},(((118:0.065071{345},109:0.109846{346}):0.002421{347},146:0.205691{348}):0.003295{349},((74:0.030031{350},148:0.023025{351}):0.048024{352},83:0.232573{353}):0.003829{354}):0.100581{355}):0.059103{356}):0.037669{357}):0.017766{358},((157:0.070487{359},134:0.121248{360}):0.046303{361},(((119:0.159111{362},(184:0.022568{363},143:0.016722{364}):0.068286{365}):0.076846{366},(149:0.088897{367},188:0.101306{368}):0.045138{369}):0.011258{370},(72:0.153764{371},49:0.087393{372}):0.038648{373}):0.010676{374}):0.009971{375},31:0.124516{376});", "version": 3 } improved-octo-waddle-1.0.7/bp/tests/data/200/placement.newick000066400000000000000000000110271463715353000237030ustar00rootroot00000000000000(((128:0.091649,(((((63:0.046046,34:0.026065):0.147471,(36:0.056854,113:0.119931):0.057087):0.048654,(((65:0.013097,39:0.023368):0.048249,(68:0.038382,(166:0.028569,(60:0.015049,86:0.070973):0.018681):0.065314):0.013338):0.037003,(172:0.11128,178:0.075705):0.033605):0.095847):0.063586,((((127:0.095186,(169:0.004654,((176:0.080311,199:0.025038):0.010857,171:0.036119):0.008868):0.039983):0.071999,(((((((160:0.030215,192:0.023451):0.048363,((197:0.1835,(141:0.072552,200:0.065412):0.038114):0.018307,138:0.111608):0.011379):0.064896,(20:0.132222,94:0.05062):0.029734):0.010164,((93:0.034447,17:0.16696):0.036792,((52:0.073619,(108:0.108945,54:0.071498):0.020253):0.00332,162:0.053887):0.004566):0.012139):0.020069,(((117:0.087632,175:0.04907):0.023117,105:0.063734):0.034021,142:0.159509):0.034053):0.008633,180:0.038679):0.011734,(32:0.177401,(191:0.012004,18:0.006781):0.070602):0.073237):0.027244):0.065858,(48:0.020422,145:0.041412):0.078334):0.158131,(((((4:0.077285,140:0.059016):0.041726,56:0.000706):0.061144,(112:0.054516,73:0.098056):0.07508):0.040879,(((64:0.034688,((((103:0.024899,164:0.027051):0.04707,174:0.045207):0.061816,(90:0.037625,170:0.013174):0.061949):0.04288,125:0.03853):0.000689):0.043095,(((((155:0.007038,116:0.010439):0.021865,(115:0.04627,194:0.050225):0.001882):0.007504,35:0.02298):0.045789,91:0.08655):0.012155,(((129:0.001131,144:0.004662):0.035561,(177:0.068837,27:0.109227):0.005532):0.025179,(97:0.048918,133:0.07577):0.049757):0.027304):0.007374):0.072662,(147:0.088135,11:0.064036):0.146731):0.045209):0.033272,(((57:0.038761,(25:0.012687,139:0.051202):0.011167):0.070301,((114:0.04211,167:0.018572):0.026461,58:0.098008):0.003546):0.046481,(((((((189:0.004832,84:0.00443):0.021258,101:0.048949):0.014593,55:0.045372):0.030444,(163:0.040324,80:0.095645):0.025838):0.023797,5:0.104474):0.005101,((21:0.071425,190:0.028078):0.038979,(1:0.020267,33:0.063047):0.117384):0.019667):0.020694,2:0.084342):0.023439):0.086726):0.074316):0.103828):0.22174,(((((96:0.047644,71:0.017724):0.079408,(29:0.058493,158:0.110122):0.057941):0.077188,((28:0.032344,179:0.019221):0.129582,(((126:0.021211,122:0.003436):0.122348,((120:0.048273,23:0.083327):0.024246,59:0.078525):0.022224):0.060385,(100:0.092243,87:0.079571):0.064832):0.066272):0.091976):0.000001,69:0.084749):0.086387,((((((46:0.003773,85:0.017394):0.030542,70:0.090134):0.018446,152:0.072279):0.027947,(77:0.051355,30:0.021462):0.075809):0.005858,181:0.099444):0.083413,(((182:0.064543,89:0.048253):0.106031,((135:0.015705,7:0.00821):0.030258,161:0.021676):0.047744):0.002861,(150:0.031904,((37:0.11002,61:0.068051):0.018612,187:0.205805):0.021969):0.001188):0.062534):0.101311):0.053824):0.044002):0.011366,(((((38:0.018712,92:0.035841):0.028224,((154:0.021713,75:0.065821):0.031695,132:0.049308):0.049428):0.019141,(24:0.084227,88:0.025948):0.04393):0.027486,((10:0.041678,50:0.099926):0.089001,8:0.137018):0.01696):0.106719,(((((16:0.276403,((((95:0.11632,121:0.073923):0.047448,(9:0.067187,136:0.037463):0.124333):0.019898,(((67:0.152317,41:0.038205):0.00857,6:0.039272):0.092741,(((130:0.023863,45:0.067713):0.057962,(198:0.067436,(47:0.042684,12:0.016951):0.079918):0.019656):0.043012,81:0.094409):0.007624):0.006874):0.008771,(((76:0.035191,((51:0.030081,79:0.01744):0.020574,124:0.020613):0.034443):0.079633,(98:0.074293,((123:0.060385,(((168:0.019534,104:0.037426):0.002911,131:0.013521):0.030657,195:0.045135):0.005233):0.012241,107:0.068656):0.082793):0.031941):0.001462,186:0.144723):0.00835):0.029303):0.010882,(66:0.173463,(3:0.142032,(102:0.056606,(165:0.041571,151:0.055941):0.026282):0.103159):0.056229):0.056911):0.03039,(((62:0.104589,(193:0.11889,15:0.057758):0.000001):0.056656,((((78:0.040435,137:0.055276):0.028649,106:0.036861):0.067081,26:0.070208):0.1102,99:0.065503):0.012968):0.010713,43:0.020947):0.031932):0.010026,(((53:0.112361,153:0.068983):0.029664,(159:0.092072,44:0.043428):0.05657):0.059581,(((196:0.024776,22:0.028294):0.132592,((((185:0.032274,13:0.134446):0.024481,(111:0.048261,173:0.012298):0.174056):0.006241,156:0.077862):0.016951,(((110:0.015929,14:0.011513):0.033346,40:0.066945):0.079179,19:0.130813):0.010367):0.030079):0.000001,(42:0.021955,183:0.040206):0.056799):0.032675):0.104819):0.003319,(((118:0.065071,109:0.109846):0.002421,146:0.205691):0.003295,((74:0.030031,148:0.023025):0.048024,83:0.232573):0.003829):0.100581):0.059103):0.037669):0.017766,(((157:0.070487,134:0.121248):0.026529,82:0.039355):0.019774,(((119:0.159111,(184:0.022568,143:0.016722):0.068286):0.076846,(149:0.088897,188:0.101306):0.045138):0.011258,(72:0.153764,49:0.087393):0.038648):0.010676):0.009971,31:0.124516); improved-octo-waddle-1.0.7/bp/tests/data/300/000077500000000000000000000000001463715353000205315ustar00rootroot00000000000000improved-octo-waddle-1.0.7/bp/tests/data/300/placement.full_resolve.newick000066400000000000000000000155541463715353000264150ustar00rootroot00000000000000(((128:0.091649,(((((63:0.046046,34:0.026065):0.147471,(36:0.056854,((((((((((113:0.004063,290:0.005988):0.000434,284:0.072903):0.003443,287:0.056094):0.001762,282:0.098423):0.003904,286:0.060559):0.002115,288:0.006143):0.002067,289:0.088623):0.000588,283:0.085678):0.001465,281:0.003223):0.002072,285:0.091293):0.098017):0.057087):0.048654,(((65:0.013097,39:0.023368):0.048249,(68:0.038382,(166:0.028569,(60:0.015049,86:0.070973):0.018681):0.065314):0.013338):0.037003,(172:0.11128,178:0.075705):0.033605):0.095847):0.063586,((((127:0.095186,(169:0.004654,((176:0.080311,199:0.025038):0.010857,171:0.036119):0.008868):0.039983):0.071999,(((((((((((((((((160:0.030215,192:0.023451):0.048363,((197:0.1835,(141:0.072552,200:0.065412):0.038114):0.018307,138:0.111608):0.011379):0.001504,223:0.090953):0.000226,230:0.047803):0.000344,229:0.068799):0.000164,226:0.00335):0.00045,222:0.031651):0.000887,228:0.038382):0.002752,221:0.091118):0.000559,227:0.084958):0.001811,225:0.074026):0.000567,224:0.003594):0.055633,(20:0.132222,94:0.05062):0.029734):0.010164,((((((((((((93:0.034447,17:0.16696):0.036792,((((((((((((52:0.073619,(108:0.108945,54:0.071498):0.020253):0.000176,294:0.099238):0.002077,296:0.035863):0.000773,300:0.071974):0.000295,162:0.053887):0.002136,297:0.079837):0.003267,291:0.094715):0.002088,292:0.028878):0.001585,299:0.024189):0.000163,293:0.070704):0.000281,298:0.058305):0.001655,295:0.06148):-0.006608):0.000093,252:0.027034):0.000133,255:0.002332):0.004035,258:0.038656):0.003066,260:0.037665):0.003569,256:0.020097):0.009588,257:0.040595):0.017057,251:0.080285):0.000343,259:0.078047):0.000655,253:0.032226):0.003105,254:0.010247):-0.029506):0.020069,(((117:0.087632,175:0.04907):0.023117,105:0.063734):0.034021,142:0.159509):0.034053):0.008633,180:0.038679):0.011734,(32:0.177401,(191:0.012004,18:0.006781):0.070602):0.073237):0.027244):0.065858,(48:0.020422,145:0.041412):0.078334):0.158131,(((((4:0.077285,140:0.059016):0.041726,56:0.000706):0.061144,(((((((((((112:0.000831,208:0.008299):0.0014,204:0.066574):0.001608,201:0.097385):0.000593,207:0.042287):0.000956,202:0.032033):0.000414,203:0.075777):0.001457,205:0.079511):0.00288,209:0.089969):0.007424,206:0.006239):0.002295,210:0.097627):0.034659,73:0.098056):0.07508):0.040879,(((64:0.034688,((((103:0.024899,164:0.027051):0.04707,174:0.045207):0.061816,(90:0.037625,((((((((((170:0.003516,268:0.044987):0.00756,264:0.022814):0.002436,269:0.063646):0.00001,263:0.037064):0.001021,270:0.020117):0.006095,267:0.004791):0.0058,265:0.012625):0.000153,266:0.018241):0.005366,262:0.091953):0.012666,261:0.075964):-0.031448):0.061949):0.04288,125:0.03853):0.000689):0.043095,(((((((((((((((155:0.007038,116:0.010439):0.016547,214:0.016392):0.013741,213:0.053041):0.002082,219:0.006335):0.013601,218:0.086967):0.001359,220:0.002828):0.001732,217:0.088396):0.009109,211:0.062074):0.005639,212:0.032374):0.01498,215:0.070369):0.02325,216:0.058501):-0.080176,(115:0.04627,194:0.050225):0.001882):0.007504,35:0.02298):0.045789,91:0.08655):0.012155,(((129:0.001131,144:0.004662):0.035561,(177:0.068837,27:0.109227):0.005532):0.025179,(97:0.048918,133:0.07577):0.049757):0.027304):0.007374):0.072662,(147:0.088135,11:0.064036):0.146731):0.045209):0.033272,(((57:0.038761,(25:0.012687,139:0.051202):0.011167):0.070301,((114:0.04211,167:0.018572):0.026461,58:0.098008):0.003546):0.046481,(((((((189:0.004832,84:0.00443):0.021258,101:0.048949):0.014593,55:0.045372):0.030444,(163:0.040324,80:0.095645):0.025838):0.023797,5:0.104474):0.005101,((21:0.071425,190:0.028078):0.038979,(1:0.020267,33:0.063047):0.117384):0.019667):0.020694,2:0.084342):0.023439):0.086726):0.074316):0.103828):0.22174,(((((96:0.047644,71:0.017724):0.079408,(29:0.058493,((((((((((158:0.000007,246:0.010132):0.000122,243:0.084106):0.00078,244:0.019057):0.000628,245:0.029957):0.000097,247:0.096535):0.000033,249:0.056484):0.000418,242:0.058161):0.000264,250:0.098013):0.000066,241:0.067649):0.000025,248:0.057714):0.107683):0.057941):0.077188,((28:0.032344,179:0.019221):0.129582,(((126:0.021211,122:0.003436):0.122348,((120:0.048273,23:0.083327):0.024246,((((((((((59:0.006102,278:0.073546):0.000279,272:0.086692):0.002661,275:0.084918):0.009397,277:0.038376):0.002339,271:0.000845):0.010633,273:0.068913):0.001691,274:0.088526):0.008005,276:0.064526):0.000855,279:0.034788):0.001741,280:0.056721):0.034822):0.022224):0.060385,(100:0.092243,87:0.079571):0.064832):0.066272):0.091976):0.000001,69:0.084749):0.086387,((((((((((((((((46:0.003773,85:0.017394):0.030542,70:0.090134):0.018446,152:0.072279):0.027947,(77:0.051355,30:0.021462):0.075809):0.000203,233:0.045389):0.00465,235:0.05425):0.003291,234:0.014328):0.000973,232:0.077092):0.004218,236:0.013624):0.005206,231:0.057416):0.0003,237:0.018566):0.003878,240:0.032864):0.004376,239:0.014682):0.001328,238:0.034672):-0.022566,181:0.099444):0.083413,(((182:0.064543,89:0.048253):0.106031,((135:0.015705,7:0.00821):0.030258,161:0.021676):0.047744):0.002861,(150:0.031904,((37:0.11002,61:0.068051):0.018612,187:0.205805):0.021969):0.001188):0.062534):0.101311):0.053824):0.044002):0.011366,(((((38:0.018712,92:0.035841):0.028224,((154:0.021713,75:0.065821):0.031695,132:0.049308):0.049428):0.019141,(24:0.084227,88:0.025948):0.04393):0.027486,((10:0.041678,50:0.099926):0.089001,8:0.137018):0.01696):0.106719,(((((16:0.276403,((((95:0.11632,121:0.073923):0.047448,(9:0.067187,136:0.037463):0.124333):0.019898,(((67:0.152317,41:0.038205):0.00857,6:0.039272):0.092741,(((130:0.023863,45:0.067713):0.057962,(198:0.067436,(47:0.042684,12:0.016951):0.079918):0.019656):0.043012,81:0.094409):0.007624):0.006874):0.008771,(((76:0.035191,((51:0.030081,79:0.01744):0.020574,124:0.020613):0.034443):0.079633,(98:0.074293,((123:0.060385,(((168:0.019534,104:0.037426):0.002911,131:0.013521):0.030657,195:0.045135):0.005233):0.012241,107:0.068656):0.082793):0.031941):0.001462,186:0.144723):0.00835):0.029303):0.010882,(66:0.173463,(3:0.142032,(102:0.056606,(165:0.041571,151:0.055941):0.026282):0.103159):0.056229):0.056911):0.03039,(((62:0.104589,(193:0.11889,15:0.057758):0.000001):0.056656,((((78:0.040435,137:0.055276):0.028649,106:0.036861):0.067081,26:0.070208):0.1102,99:0.065503):0.012968):0.010713,43:0.020947):0.031932):0.010026,(((53:0.112361,153:0.068983):0.029664,(159:0.092072,44:0.043428):0.05657):0.059581,(((196:0.024776,22:0.028294):0.132592,((((185:0.032274,13:0.134446):0.024481,(111:0.048261,173:0.012298):0.174056):0.006241,156:0.077862):0.016951,(((110:0.015929,14:0.011513):0.033346,40:0.066945):0.079179,19:0.130813):0.010367):0.030079):0.000001,(42:0.021955,183:0.040206):0.056799):0.032675):0.104819):0.003319,(((118:0.065071,109:0.109846):0.002421,146:0.205691):0.003295,((74:0.030031,148:0.023025):0.048024,83:0.232573):0.003829):0.100581):0.059103):0.037669):0.017766,(((157:0.070487,134:0.121248):0.026529,82:0.039355):0.019774,(((119:0.159111,(184:0.022568,143:0.016722):0.068286):0.076846,(149:0.088897,188:0.101306):0.045138):0.011258,(72:0.153764,49:0.087393):0.038648):0.010676):0.009971,31:0.124516); improved-octo-waddle-1.0.7/bp/tests/data/300/placement_mul.jplace000066400000000000000000000360731463715353000245470ustar00rootroot00000000000000{"fields": ["edge_num", "likelihood", "like_weight_ratio", "distal_length", "pendant_length"], "metadata": {"invocation": "/home/y5jiang/miniconda3/envs/std/bin/run_apples.py -q model.200.10000000.0.000001/01/1/query.fa -s model.200.10000000.0.000001/01/1/backbone.fa -t model.200.10000000.0.000001/01/1/jc_result/run.raxml.bestTree -o model.200.10000000.0.000001/01/1/jc_result/placement.jplace -f 0 -b 5 -D"}, "placements": [{"n": ["82"], "p": [[361, 0.01013206496780672, 1, 0.02652932626620403, 0.039354548684623215]]}, {"n": ["99"], "p": [[308, 0.04520741687623886, 1, 0.11020044356641526, 0.06550337922097477]]}, {"n": ["43"], "p": [[309, 0.04054866161921744, 1, 0.010712923050783987, 0.020946988900520196]]}, {"n": ["195"], "p": [[277, 0.01918907908397749, 1, 0.03065741838803451, 0.04513513498399864]]}, {"n": ["162"], "p": [[55, 0.01758935282545493, 1, 0.0033199487685078776, 0.05388735804976052]]}, {"n": ["56"], "p": [[81, 0.2366882303770561, 1, 0.04172580852519453, 0.0007060238727097983]]}, {"n": ["91"], "p": [[105, 0.0001863393767883581, 1, 0.04578898721138839, 0.08655004339151215]]}, {"n": ["174"], "p": [[89, 0.01216463967379211, 1, 0.04707020642820376, 0.045206727542450205]]}, {"n": ["5"], "p": [[143, 0.012162345471765756, 1, 0.023797389484252734, 0.10447375403452556]]}, {"n": ["55"], "p": [[139, 0.09563944060686769, 1, 0.014593217782258146, 0.04537214236560885]]}, {"n": ["201"], "p": [[82, 0, 1, 0.0038392824534644932, 0.09738497526912704]]}, {"n": ["202"], "p": [[82, 0, 1, 0.00538773823683071, 0.032032960914394386]]}, {"n": ["203"], "p": [[82, 0, 1, 0.005801486748959656, 0.07577745216073528]]}, {"n": ["204"], "p": [[82, 0, 1, 0.0022313670167670714, 0.06657407149107182]]}, {"n": ["205"], "p": [[82, 0, 1, 0.007258285742496784, 0.0795108053167541]]}, {"n": ["206"], "p": [[82, 0, 1, 0.01756211733321826, 0.006238859758160742]]}, {"n": ["207"], "p": [[82, 0, 1, 0.004431883939348495, 0.04228685810244977]]}, {"n": ["208"], "p": [[82, 0, 1, 0.000831186124187672, 0.008298893486876858]]}, {"n": ["209"], "p": [[82, 0, 1, 0.010138002398385186, 0.08996899063567976]]}, {"n": ["210"], "p": [[82, 0, 1, 0.019857347084761147, 0.09762673781554322]]}, {"n": ["211"], "p": [[99, 0, 1, 0.05817121463528927, 0.062073731536758714]]}, {"n": ["212"], "p": [[99, 0, 1, 0.06381062694080353, 0.032373568085737825]]}, {"n": ["213"], "p": [[99, 0, 1, 0.03028823953685711, 0.05304055142721412]]}, {"n": ["214"], "p": [[99, 0, 1, 0.016547219749226325, 0.016391605372241335]]}, {"n": ["215"], "p": [[99, 0, 1, 0.07879101455738889, 0.07036932424282365]]}, {"n": ["216"], "p": [[99, 0, 1, 0.10204066488361377, 0.058500618162122354]]}, {"n": ["217"], "p": [[99, 0, 1, 0.04906249141328772, 0.08839613241770768]]}, {"n": ["218"], "p": [[99, 0, 1, 0.045971427260312794, 0.08696673554654553]]}, {"n": ["219"], "p": [[99, 0, 1, 0.03237070165118943, 0.006334874014950454]]}, {"n": ["220"], "p": [[99, 0, 1, 0.04733071059127464, 0.0028279960074323544]]}, {"n": ["221"], "p": [[43, 0, 1, 0.006325862907639057, 0.09111791720504678]]}, {"n": ["222"], "p": [[43, 0, 1, 0.002686703042868586, 0.0316512148732311]]}, {"n": ["223"], "p": [[43, 0, 1, 0.0015035609528728996, 0.09095341462064187]]}, {"n": ["224"], "p": [[43, 0, 1, 0.009262887050070149, 0.0035942670895906196]]}, {"n": ["225"], "p": [[43, 0, 1, 0.008695754915554806, 0.07402648602287575]]}, {"n": ["226"], "p": [[43, 0, 1, 0.0022365238321496814, 0.0033497094894365587]]}, {"n": ["227"], "p": [[43, 0, 1, 0.006885235457101727, 0.08495752397682416]]}, {"n": ["228"], "p": [[43, 0, 1, 0.0035739191459535813, 0.03838151801335741]]}, {"n": ["229"], "p": [[43, 0, 1, 0.0020728349983266912, 0.06879871242504297]]}, {"n": ["230"], "p": [[43, 0, 1, 0.0017292248833447473, 0.0478032903627909]]}, {"n": ["231"], "p": [[195, 0, 1, 0.018542079279493546, 0.05741622518262618]]}, {"n": ["232"], "p": [[195, 0, 1, 0.00911759221261531, 0.07709240371928998]]}, {"n": ["233"], "p": [[195, 0, 1, 0.00020319242574295114, 0.04538866538714619]]}, {"n": ["234"], "p": [[195, 0, 1, 0.008144144843706217, 0.014327999762752498]]}, {"n": ["235"], "p": [[195, 0, 1, 0.004853394537979706, 0.05424997642712658]]}, {"n": ["236"], "p": [[195, 0, 1, 0.013335581527542776, 0.01362360117744147]]}, {"n": ["237"], "p": [[195, 0, 1, 0.01884185541465883, 0.01856597288915145]]}, {"n": ["238"], "p": [[195, 0, 1, 0.028424432342402117, 0.0346715341119091]]}, {"n": ["239"], "p": [[195, 0, 1, 0.02709635449716477, 0.014682050864100994]]}, {"n": ["240"], "p": [[195, 0, 1, 0.022719999261047792, 0.03286365871050772]]}, {"n": ["241"], "p": [[162, 0, 1, 0.002413623158154767, 0.06764881473116402]]}, {"n": ["242"], "p": [[162, 0, 1, 0.002084003310834421, 0.05816110057007791]]}, {"n": ["243"], "p": [[162, 0, 1, 0.00012841791875353866, 0.08410564388060832]]}, {"n": ["244"], "p": [[162, 0, 1, 0.0009081699513484975, 0.019057370501793805]]}, {"n": ["245"], "p": [[162, 0, 1, 0.0015362448061163048, 0.0299567478307208]]}, {"n": ["246"], "p": [[162, 0, 1, 6.502620439317826e-06, 0.010131616101684948]]}, {"n": ["247"], "p": [[162, 0, 1, 0.0016329995755277153, 0.09653453597841082]]}, {"n": ["248"], "p": [[162, 0, 1, 0.002438739063810635, 0.05771424955563628]]}, {"n": ["249"], "p": [[162, 0, 1, 0.00166571868259112, 0.05648419244303224]]}, {"n": ["250"], "p": [[162, 0, 1, 0.002347510056324297, 0.0980128375802925]]}, {"n": ["251"], "p": [[56, 0, 1, 0.03754183067497629, 0.08028486400656981]]}, {"n": ["252"], "p": [[56, 0, 1, 9.308132946008312e-05, 0.027033850979417996]]}, {"n": ["253"], "p": [[56, 0, 1, 0.03853972935513422, 0.03222639346515282]]}, {"n": ["254"], "p": [[56, 0, 1, 0.0416447839933881, 0.010247295724155604]]}, {"n": ["255"], "p": [[56, 0, 1, 0.00022650593572792356, 0.0023322118147899975]]}, {"n": ["256"], "p": [[56, 0, 1, 0.010896419479392783, 0.02009719174718031]]}, {"n": ["257"], "p": [[56, 0, 1, 0.020484866308019878, 0.040594671641619784]]}, {"n": ["258"], "p": [[56, 0, 1, 0.004261847471710495, 0.038655734404798414]]}, {"n": ["259"], "p": [[56, 0, 1, 0.037885033227844846, 0.0780474705476729]]}, {"n": ["260"], "p": [[56, 0, 1, 0.007327447653239102, 0.03766518019461845]]}, {"n": ["261"], "p": [[91, 0, 1, 0.044622174119930795, 0.07596413427814225]]}, {"n": ["262"], "p": [[91, 0, 1, 0.03195580867179439, 0.09195266235774964]]}, {"n": ["263"], "p": [[91, 0, 1, 0.013521585106735944, 0.03706402606132619]]}, {"n": ["264"], "p": [[91, 0, 1, 0.011075687820536058, 0.022814405720244624]]}, {"n": ["265"], "p": [[91, 0, 1, 0.026436963025463164, 0.012625199583600345]]}, {"n": ["266"], "p": [[91, 0, 1, 0.026589636186294962, 0.01824087755866659]]}, {"n": ["267"], "p": [[91, 0, 1, 0.02063734341795329, 0.004791104073642561]]}, {"n": ["268"], "p": [[91, 0, 1, 0.0035160423741338264, 0.04498656379340781]]}, {"n": ["269"], "p": [[91, 0, 1, 0.013511901464408727, 0.06364639184383179]]}, {"n": ["270"], "p": [[91, 0, 1, 0.014542448105622207, 0.02011739269298679]]}, {"n": ["271"], "p": [[174, 0, 1, 0.020778314066793896, 0.0008447184600889446]]}, {"n": ["272"], "p": [[174, 0, 1, 0.006381238233165224, 0.08669217824012865]]}, {"n": ["273"], "p": [[174, 0, 1, 0.03141180769387497, 0.0689128265938673]]}, {"n": ["274"], "p": [[174, 0, 1, 0.03310323147018247, 0.08852618741739243]]}, {"n": ["275"], "p": [[174, 0, 1, 0.009042278367296576, 0.08491831866457393]]}, {"n": ["276"], "p": [[174, 0, 1, 0.041107791928253444, 0.06452589289586465]]}, {"n": ["277"], "p": [[174, 0, 1, 0.018439103882379904, 0.038376083655843496]]}, {"n": ["278"], "p": [[174, 0, 1, 0.006102071391935765, 0.07354615008868613]]}, {"n": ["279"], "p": [[174, 0, 1, 0.04196240401900247, 0.034788431851590494]]}, {"n": ["280"], "p": [[174, 0, 1, 0.04370311067003953, 0.05672056605900292]]}, {"n": ["281"], "p": [[5, 0, 1, 0.019841704630175813, 0.003223061374573555]]}, {"n": ["282"], "p": [[5, 0, 1, 0.009702313799660963, 0.09842319880826393]]}, {"n": ["283"], "p": [[5, 0, 1, 0.018376981392323598, 0.0856782610322325]]}, {"n": ["284"], "p": [[5, 0, 1, 0.00449752634898426, 0.07290332316282015]]}, {"n": ["285"], "p": [[5, 0, 1, 0.02191391517652802, 0.09129293782283024]]}, {"n": ["286"], "p": [[5, 0, 1, 0.013606608435547207, 0.06055919854081219]]}, {"n": ["287"], "p": [[5, 0, 1, 0.007940484155814099, 0.05609399449434607]]}, {"n": ["288"], "p": [[5, 0, 1, 0.01572196902031946, 0.006143145957555097]]}, {"n": ["289"], "p": [[5, 0, 1, 0.017789442953763043, 0.08862323433217094]]}, {"n": ["290"], "p": [[5, 0, 1, 0.004063376214766736, 0.005988317791778242]]}, {"n": ["291"], "p": [[55, 0, 1, 0.008722795973856707, 0.09471491815328043]]}, {"n": ["292"], "p": [[55, 0, 1, 0.010810845046100902, 0.02887811138505705]]}, {"n": ["293"], "p": [[55, 0, 1, 0.012558260925821522, 0.07070398547982587]]}, {"n": ["294"], "p": [[55, 0, 1, 0.00017579504469218633, 0.09923788030546805]]}, {"n": ["295"], "p": [[55, 0, 1, 0.01449369616781433, 0.06148017400252222]]}, {"n": ["296"], "p": [[55, 0, 1, 0.0022525211215865875, 0.03586335857639652]]}, {"n": ["297"], "p": [[55, 0, 1, 0.005455581874504406, 0.07983681101178416]]}, {"n": ["298"], "p": [[55, 0, 1, 0.012838861087384591, 0.05830458097797218]]}, {"n": ["299"], "p": [[55, 0, 1, 0.012395505224489706, 0.0241892221887115]]}, {"n": ["300"], "p": [[55, 0, 1, 0.0030252844954950517, 0.0719738912287745]]}], "tree": "(((128:0.091649{0},(((((63:0.046046{1},34:0.026065{2}):0.147471{3},(36:0.056854{4},113:0.119931{5}):0.057087{6}):0.048654{7},(((65:0.013097{8},39:0.023368{9}):0.048249{10},(68:0.038382{11},(166:0.028569{12},(60:0.015049{13},86:0.070973{14}):0.018681{15}):0.065314{16}):0.013338{17}):0.037003{18},(172:0.11128{19},178:0.075705{20}):0.033605{21}):0.095847{22}):0.063586{23},((((127:0.095186{24},(169:0.004654{25},((176:0.080311{26},199:0.025038{27}):0.010857{28},171:0.036119{29}):0.008868{30}):0.039983{31}):0.071999{32},(((((((160:0.030215{33},192:0.023451{34}):0.048363{35},((197:0.1835{36},(141:0.072552{37},200:0.065412{38}):0.038114{39}):0.018307{40},138:0.111608{41}):0.011379{42}):0.064896{43},(20:0.132222{44},94:0.05062{45}):0.029734{46}):0.010164{47},((93:0.034447{48},17:0.16696{49}):0.036792{50},(52:0.073619{51},(108:0.108945{52},54:0.071498{53}):0.020253{54}):0.007886{55}):0.012139{56}):0.020069{57},(((117:0.087632{58},175:0.04907{59}):0.023117{60},105:0.063734{61}):0.034021{62},142:0.159509{63}):0.034053{64}):0.008633{65},180:0.038679{66}):0.011734{67},(32:0.177401{68},(191:0.012004{69},18:0.006781{70}):0.070602{71}):0.073237{72}):0.027244{73}):0.065858{74},(48:0.020422{75},145:0.041412{76}):0.078334{77}):0.158131{78},((((4:0.077285{79},140:0.059016{80}):0.10287{81},(112:0.054516{82},73:0.098056{83}):0.07508{84}):0.040879{85},(((64:0.034688{86},(((103:0.024899{87},164:0.027051{88}):0.108886{89},(90:0.037625{90},170:0.013174{91}):0.061949{92}):0.04288{93},125:0.03853{94}):0.000689{95}):0.043095{96},((((155:0.007038{97},116:0.010439{98}):0.021865{99},(115:0.04627{100},194:0.050225{101}):0.001882{102}):0.007504{103},35:0.02298{104}):0.057944{105},(((129:0.001131{106},144:0.004662{107}):0.035561{108},(177:0.068837{109},27:0.109227{110}):0.005532{111}):0.025179{112},(97:0.048918{113},133:0.07577{114}):0.049757{115}):0.027304{116}):0.007374{117}):0.072662{118},(147:0.088135{119},11:0.064036{120}):0.146731{121}):0.045209{122}):0.033272{123},(((57:0.038761{124},(25:0.012687{125},139:0.051202{126}):0.011167{127}):0.070301{128},((114:0.04211{129},167:0.018572{130}):0.026461{131},58:0.098008{132}):0.003546{133}):0.046481{134},(((((189:0.004832{135},84:0.00443{136}):0.021258{137},101:0.048949{138}):0.045037{139},(163:0.040324{140},80:0.095645{141}):0.025838{142}):0.028898{143},((21:0.071425{144},190:0.028078{145}):0.038979{146},(1:0.020267{147},33:0.063047{148}):0.117384{149}):0.019667{150}):0.020694{151},2:0.084342{152}):0.023439{153}):0.086726{154}):0.074316{155}):0.103828{156}):0.22174{157},(((((96:0.047644{158},71:0.017724{159}):0.079408{160},(29:0.058493{161},158:0.110122{162}):0.057941{163}):0.077188{164},((28:0.032344{165},179:0.019221{166}):0.129582{167},(((126:0.021211{168},122:0.003436{169}):0.122348{170},((120:0.048273{171},23:0.083327{172}):0.024246{173},59:0.078525{174}):0.022224{175}):0.060385{176},(100:0.092243{177},87:0.079571{178}):0.064832{179}):0.066272{180}):0.091976{181}):1e-06{182},69:0.084749{183}):0.086387{184},((((((46:0.003773{185},85:0.017394{186}):0.030542{187},70:0.090134{188}):0.018446{189},152:0.072279{190}):0.027947{191},(77:0.051355{192},30:0.021462{193}):0.075809{194}):0.005858{195},181:0.099444{196}):0.083413{197},(((182:0.064543{198},89:0.048253{199}):0.106031{200},((135:0.015705{201},7:0.00821{202}):0.030258{203},161:0.021676{204}):0.047744{205}):0.002861{206},(150:0.031904{207},((37:0.11002{208},61:0.068051{209}):0.018612{210},187:0.205805{211}):0.021969{212}):0.001188{213}):0.062534{214}):0.101311{215}):0.053824{216}):0.044002{217}):0.011366{218},(((((38:0.018712{219},92:0.035841{220}):0.028224{221},((154:0.021713{222},75:0.065821{223}):0.031695{224},132:0.049308{225}):0.049428{226}):0.019141{227},(24:0.084227{228},88:0.025948{229}):0.04393{230}):0.027486{231},((10:0.041678{232},50:0.099926{233}):0.089001{234},8:0.137018{235}):0.01696{236}):0.106719{237},(((((16:0.276403{238},((((95:0.11632{239},121:0.073923{240}):0.047448{241},(9:0.067187{242},136:0.037463{243}):0.124333{244}):0.019898{245},(((67:0.152317{246},41:0.038205{247}):0.00857{248},6:0.039272{249}):0.092741{250},(((130:0.023863{251},45:0.067713{252}):0.057962{253},(198:0.067436{254},(47:0.042684{255},12:0.016951{256}):0.079918{257}):0.019656{258}):0.043012{259},81:0.094409{260}):0.007624{261}):0.006874{262}):0.008771{263},(((76:0.035191{264},((51:0.030081{265},79:0.01744{266}):0.020574{267},124:0.020613{268}):0.034443{269}):0.079633{270},(98:0.074293{271},((123:0.060385{272},((168:0.019534{273},104:0.037426{274}):0.002911{275},131:0.013521{276}):0.03589{277}):0.012241{278},107:0.068656{279}):0.082793{280}):0.031941{281}):0.001462{282},186:0.144723{283}):0.00835{284}):0.029303{285}):0.010882{286},(66:0.173463{287},(3:0.142032{288},(102:0.056606{289},(165:0.041571{290},151:0.055941{291}):0.026282{292}):0.103159{293}):0.056229{294}):0.056911{295}):0.03039{296},((62:0.104589{297},(193:0.11889{298},15:0.057758{299}):1e-06{300}):0.056656{301},(((78:0.040435{302},137:0.055276{303}):0.028649{304},106:0.036861{305}):0.067081{306},26:0.070208{307}):0.123168{308}):0.042645{309}):0.010026{310},(((53:0.112361{311},153:0.068983{312}):0.029664{313},(159:0.092072{314},44:0.043428{315}):0.05657{316}):0.059581{317},(((196:0.024776{318},22:0.028294{319}):0.132592{320},((((185:0.032274{321},13:0.134446{322}):0.024481{323},(111:0.048261{324},173:0.012298{325}):0.174056{326}):0.006241{327},156:0.077862{328}):0.016951{329},(((110:0.015929{330},14:0.011513{331}):0.033346{332},40:0.066945{333}):0.079179{334},19:0.130813{335}):0.010367{336}):0.030079{337}):1e-06{338},(42:0.021955{339},183:0.040206{340}):0.056799{341}):0.032675{342}):0.104819{343}):0.003319{344},(((118:0.065071{345},109:0.109846{346}):0.002421{347},146:0.205691{348}):0.003295{349},((74:0.030031{350},148:0.023025{351}):0.048024{352},83:0.232573{353}):0.003829{354}):0.100581{355}):0.059103{356}):0.037669{357}):0.017766{358},((157:0.070487{359},134:0.121248{360}):0.046303{361},(((119:0.159111{362},(184:0.022568{363},143:0.016722{364}):0.068286{365}):0.076846{366},(149:0.088897{367},188:0.101306{368}):0.045138{369}):0.011258{370},(72:0.153764{371},49:0.087393{372}):0.038648{373}):0.010676{374}):0.009971{375},31:0.124516{376});", "version": 3}improved-octo-waddle-1.0.7/bp/tests/data/300/placement_mul.newick000066400000000000000000000136161463715353000245670ustar00rootroot00000000000000(((128:0.091649,(((((63:0.046046,34:0.026065):0.147471,(36:0.056854,(113:0.013345,(281:0,290:0.002765,288:0.00292,287:0.052871,286:0.057336,284:0.06968,283:0.082455,289:0.0854,285:0.08807,282:0.0952):0.003223):0.106586):0.057087):0.048654,(((65:0.013097,39:0.023368):0.048249,(68:0.038382,(166:0.028569,(60:0.015049,86:0.070973):0.018681):0.065314):0.013338):0.037003,(172:0.11128,178:0.075705):0.033605):0.095847):0.063586,((((127:0.095186,(169:0.004654,((176:0.080311,199:0.025038):0.010857,171:0.036119):0.008868):0.039983):0.071999,((((((((160:0.030215,192:0.023451):0.048363,((197:0.1835,(141:0.072552,200:0.065412):0.038114):0.018307,138:0.111608):0.011379):0.004497,(226:0,224:0.000245,222:0.028302,228:0.035032,230:0.044454,229:0.065449,225:0.070677,227:0.081608,223:0.087604,221:0.087768):0.00335):0.060399,(20:0.132222,94:0.05062):0.029734):0.010164,(((93:0.034447,17:0.16696):0.036792,((52:0.073619,(108:0.108945,54:0.071498):0.020253):0.007823,(299:0,292:0.004689,296:0.011674,162:0.029698,298:0.034115,295:0.037291,293:0.046515,300:0.047785,297:0.055648,291:0.070526,294:0.075049):0.024189):0.000063):0.01989,(255:0,254:0.007915,256:0.017765,252:0.024702,253:0.029894,260:0.035333,258:0.036324,257:0.038262,259:0.075715,251:0.077953):0.002332):0):0.020069,(((117:0.087632,175:0.04907):0.023117,105:0.063734):0.034021,142:0.159509):0.034053):0.008633,180:0.038679):0.011734,(32:0.177401,(191:0.012004,18:0.006781):0.070602):0.073237):0.027244):0.065858,(48:0.020422,145:0.041412):0.078334):0.158131,(((((4:0.077285,140:0.059016):0.041726,56:0.000706):0.061144,((112:0.007734,(206:0,208:0.00206,202:0.025794,207:0.036048,204:0.060335,203:0.069539,205:0.073272,209:0.08373,201:0.091146,210:0.091388):0.006239):0.046782,73:0.098056):0.07508):0.040879,(((64:0.034688,((((103:0.024899,164:0.027051):0.04707,174:0.045207):0.061816,(90:0.037625,(170:0.020641,(267:0,265:0.007834,266:0.01345,270:0.015326,264:0.018023,263:0.032273,268:0.040195,269:0.058855,261:0.071173,262:0.087162):0.004791):0):0.061949):0.04288,125:0.03853):0.000689):0.043095,((((((155:0.007038,116:0.010439):0.052438,(220:0,219:0.003507,214:0.013564,212:0.029546,213:0.050213,216:0.055673,211:0.059246,215:0.067541,218:0.084139,217:0.085568):0.002828):0,(115:0.04627,194:0.050225):0.001882):0.007504,35:0.02298):0.045789,91:0.08655):0.012155,(((129:0.001131,144:0.004662):0.035561,(177:0.068837,27:0.109227):0.005532):0.025179,(97:0.048918,133:0.07577):0.049757):0.027304):0.007374):0.072662,(147:0.088135,11:0.064036):0.146731):0.045209):0.033272,(((57:0.038761,(25:0.012687,139:0.051202):0.011167):0.070301,((114:0.04211,167:0.018572):0.026461,58:0.098008):0.003546):0.046481,(((((((189:0.004832,84:0.00443):0.021258,101:0.048949):0.014593,55:0.045372):0.030444,(163:0.040324,80:0.095645):0.025838):0.023797,5:0.104474):0.005101,((21:0.071425,190:0.028078):0.038979,(1:0.020267,33:0.063047):0.117384):0.019667):0.020694,2:0.084342):0.023439):0.086726):0.074316):0.103828):0.22174,(((((96:0.047644,71:0.017724):0.079408,(29:0.058493,(158:0.001516,(246:0,244:0.008926,245:0.019825,249:0.046353,248:0.047583,242:0.048029,241:0.057517,243:0.073974,247:0.086403,250:0.087881):0.010132):0.108606):0.057941):0.077188,((28:0.032344,179:0.019221):0.129582,(((126:0.021211,122:0.003436):0.122348,((120:0.048273,23:0.083327):0.024246,(59:0.025203,(271:0,279:0.033944,277:0.037531,280:0.055876,276:0.063681,273:0.068068,278:0.072701,275:0.084074,272:0.085847,274:0.087681):0.000845):0.053322):0.022224):0.060385,(100:0.092243,87:0.079571):0.064832):0.066272):0.091976):0.000001,69:0.084749):0.086387,(((((((46:0.003773,85:0.017394):0.030542,70:0.090134):0.018446,152:0.072279):0.027947,(77:0.051355,30:0.021462):0.075809):0.015128,(236:0,234:0.000704,239:0.001058,237:0.004942,240:0.01924,238:0.021048,233:0.031765,235:0.040626,231:0.043793,232:0.063469):0.013624):0,181:0.099444):0.083413,(((182:0.064543,89:0.048253):0.106031,((135:0.015705,7:0.00821):0.030258,161:0.021676):0.047744):0.002861,(150:0.031904,((37:0.11002,61:0.068051):0.018612,187:0.205805):0.021969):0.001188):0.062534):0.101311):0.053824):0.044002):0.011366,(((((38:0.018712,92:0.035841):0.028224,((154:0.021713,75:0.065821):0.031695,132:0.049308):0.049428):0.019141,(24:0.084227,88:0.025948):0.04393):0.027486,((10:0.041678,50:0.099926):0.089001,8:0.137018):0.01696):0.106719,(((((16:0.276403,((((95:0.11632,121:0.073923):0.047448,(9:0.067187,136:0.037463):0.124333):0.019898,(((67:0.152317,41:0.038205):0.00857,6:0.039272):0.092741,(((130:0.023863,45:0.067713):0.057962,(198:0.067436,(47:0.042684,12:0.016951):0.079918):0.019656):0.043012,81:0.094409):0.007624):0.006874):0.008771,(((76:0.035191,((51:0.030081,79:0.01744):0.020574,124:0.020613):0.034443):0.079633,(98:0.074293,((123:0.060385,(((168:0.019534,104:0.037426):0.002911,131:0.013521):0.030657,195:0.045135):0.005233):0.012241,107:0.068656):0.082793):0.031941):0.001462,186:0.144723):0.00835):0.029303):0.010882,(66:0.173463,(3:0.142032,(102:0.056606,(165:0.041571,151:0.055941):0.026282):0.103159):0.056229):0.056911):0.03039,(((62:0.104589,(193:0.11889,15:0.057758):0.000001):0.056656,((((78:0.040435,137:0.055276):0.028649,106:0.036861):0.067081,26:0.070208):0.1102,99:0.065503):0.012968):0.010713,43:0.020947):0.031932):0.010026,(((53:0.112361,153:0.068983):0.029664,(159:0.092072,44:0.043428):0.05657):0.059581,(((196:0.024776,22:0.028294):0.132592,((((185:0.032274,13:0.134446):0.024481,(111:0.048261,173:0.012298):0.174056):0.006241,156:0.077862):0.016951,(((110:0.015929,14:0.011513):0.033346,40:0.066945):0.079179,19:0.130813):0.010367):0.030079):0.000001,(42:0.021955,183:0.040206):0.056799):0.032675):0.104819):0.003319,(((118:0.065071,109:0.109846):0.002421,146:0.205691):0.003295,((74:0.030031,148:0.023025):0.048024,83:0.232573):0.003829):0.100581):0.059103):0.037669):0.017766,(((157:0.070487,134:0.121248):0.026529,82:0.039355):0.019774,(((119:0.159111,(184:0.022568,143:0.016722):0.068286):0.076846,(149:0.088897,188:0.101306):0.045138):0.011258,(72:0.153764,49:0.087393):0.038648):0.010676):0.009971,31:0.124516); improved-octo-waddle-1.0.7/bp/tests/test_bp.py000066400000000000000000000412071463715353000213340ustar00rootroot00000000000000# ---------------------------------------------------------------------------- # Copyright (c) 2013--, BP development team. # # Distributed under the terms of the Modified BSD License. # # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- # line length is useful here, so disabling check # flake8: noqa: E501 from unittest import TestCase, main import numpy as np import numpy.testing as npt from bp import BP, parse_newick import bp.tests.test_bp_cy as tbc for name in dir(tbc): if name.startswith('test_'): getattr(tbc, name)() class BPTests(TestCase): def setUp(self): # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 self.fig1_B = np.array([1, 1, 1, 0, 1, 0, 1, 1 ,0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0], dtype=np.uint8) self.BP = BP(self.fig1_B) def test_rmq(self): # ( ( ( ) ( ) ( ( ) ) ) ( ) ( ( ( ) ( ) ) ) ) #excess 1 2 3 2 3 2 3 4 3 2 1 2 1 2 3 4 3 4 3 2 1 0 #i 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 exp = [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 21], [1, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 21], [2, 3, 3, 3, 3, 3, 3, 3, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 21], [3, 3, 3, 3, 3, 3, 3, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 21], [4, 5, 5, 5, 5, 5, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 21], [5, 5, 5, 5, 5, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 21], [6, 6, 6, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 21], [7, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 21], [8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 21], [9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 21], [10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 21], [11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 21], [12, 12, 12, 12, 12, 12, 12, 12, 12, 21], [13, 13, 13, 13, 13, 13, 13, 20, 21], [14, 14, 14, 14, 14, 19, 20, 21], [15, 16, 16, 16, 19, 20, 21], [16, 16, 16, 19, 20, 21], [17, 18, 19, 20, 21], [18, 19, 20, 21], [19, 20, 21], [20, 21], [21]] for i in range(len(self.fig1_B)): for j in range(i+1, len(self.fig1_B)): self.assertEqual(self.BP.rmq(i, j), exp[i][j - i]) def test_rMq(self): # ( ( ( ) ( ) ( ( ) ) ) ( ) ( ( ( ) ( ) ) ) ) #excess 1 2 3 2 3 2 3 4 3 2 1 2 1 2 3 4 3 4 3 2 1 0 #i 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 exp = [[0, 1, 2, 2, 2, 2, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7], [1, 2, 2, 2, 2, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7], [2, 2, 2, 2, 2, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7], [3, 4, 4, 4, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7], [4, 4, 4, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7], [5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7], [6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7], [7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7], [8, 8, 8, 8, 8, 8, 8, 15, 15, 15, 15, 15, 15, 15], [9, 9, 9, 9, 9, 14, 15, 15, 15, 15, 15, 15, 15], [10, 11, 11, 11, 14, 15, 15, 15, 15, 15, 15, 15], [11, 11, 11, 14, 15, 15, 15, 15, 15, 15, 15], [12, 13, 14, 15, 15, 15, 15, 15, 15, 15], [13, 14, 15, 15, 15, 15, 15, 15, 15], [14, 15, 15, 15, 15, 15, 15, 15], [15, 15, 15, 15, 15, 15, 15], [16, 17, 17, 17, 17, 17], [17, 17, 17, 17, 17], [18, 18, 18, 18], [19, 19, 19], [20, 20], [21]] for i in range(len(self.fig1_B)): for j in range(i+1, len(self.fig1_B)): self.assertEqual(self.BP.rMq(i, j), exp[i][j - i]) def test_mincount(self): # ( ( ( ) ( ) ( ( ) ) ) ( ) ( ( ( ) ( ) ) ) ) #i 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 #excess 1 2 3 2 3 2 3 4 3 2 1 2 1 2 3 4 3 4 3 2 1 0 exp = [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 1], [1, 1, 2, 2, 3, 3, 3, 3, 4, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 1], [1, 1, 1, 2, 2, 2, 2, 3, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 1], [1, 1, 2, 2, 2, 2, 3, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 1], [1, 1, 1, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 1], [1, 1, 1, 1, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 1], [1, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 1], [1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 1], [1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 1], [1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 1], [1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1], [1, 1, 1, 1, 1, 1, 1, 1, 2, 1], [1, 1, 1, 1, 1, 1, 2, 1, 1], [1, 1, 2, 2, 3, 1, 1, 1], [1, 1, 1, 2, 1, 1, 1], [1, 1, 2, 1, 1, 1], [1, 1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1], [1, 1], [1]] for i in range(len(self.fig1_B)): for j in range(i+1, len(self.fig1_B)): self.assertEqual(self.BP.mincount(i, j), exp[i][j - i]) def test_minselect(self): """position of the qth minimum in excess(i), excess(i + 1), . . . , excess(j).""" exp = {(0, 20, 1): 0, (0, 21, 1): 21, (0, 20, 2): 10, (0, 21, 2): None, (0, 20, 3): 12, (0, 20, 4): 20, (8, 15, 1): 10, (8, 15, 2): 12, (6, 9, 1): 9} for (i, j, q), e in exp.items(): self.assertEqual(self.BP.minselect(i, j, q), e) def test_preorder(self): exp = [1, 2, 3, 3, 4, 4, 5, 6, 6, 5, 2, 7, 7, 8, 9, 10, 10, 11, 11, 9, 8, 1] for i, e in enumerate(exp): self.assertEqual(self.BP.preorder(i), e) def test_preorderselect(self): exp = [0, 1, 2, 4, 6, 7, 11, 13, 14, 15, 17] for k, e in enumerate(exp): self.assertEqual(self.BP.preorderselect(k), e) def test_postorder(self): exp = [11, 5, 1, 1, 2, 2, 4, 3, 3, 4, 5, 6, 6, 10, 9, 7, 7, 8, 8, 9, 10, 11] for i, e in enumerate(exp): self.assertEqual(self.BP.postorder(i), e) def test_postorderselect(self): exp = [2, 4, 7, 6, 1, 11, 15, 17, 14, 13, 0] for k, e in enumerate(exp): self.assertEqual(self.BP.postorderselect(k + 1), e) def test_isancestor(self): exp = {(0, 0): False, # identity test (2, 1): False, # tip test (1, 2): True, # open test (1, 3): True, # close test (0, 7): True, # nested test (1, 7): True} # nested test for (i, j), e in exp.items(): self.assertEqual(self.BP.isancestor(i, j), e) def test_subtree(self): exp = [11, 5, 1, 1, 1, 1, 2, 1, 1, 2, 5, 1, 1, 4, 3, 1, 1, 1, 1, 3, 4, 11] for i, e in enumerate(exp): self.assertEqual(self.BP.subtree(i), e) def test_levelancestor(self): exp = {(2, 1): 1, # first tip to its parent (2, 2): 0, # first tip to root (4, 1): 1, # second tip to its parent (5, 1): 1, # second tip, closing, to its parent (7, 1): 6, # deep tip to its parent (7, 2): 1, # deep tip to its grandparent (7, 3): 0, # deep tip to its great grand parent (7, 9999): 0, # max out at the root (10, 0): -1} # can't be an ancestor of yourself for (i, d), e in exp.items(): self.assertEqual(self.BP.levelancestor(i, d), e) def _testinator(self, exp, f, verbose=False): self.assertEqual(len(exp), len(self.fig1_B)) for i, e in enumerate(exp): if verbose: print(i, e) self.assertEqual(f(i), e) def test_levelnext(self): # ( ( ( ) ( ) ( ( ) ) ) ( ) ( ( ( ) ( ) ) ) ) exp = [-1, 11, 4, 4, 6, 6, 14, 15, 15, 14, 11, 13, 13, -1, -1, 17, 17, -1, -1, -1, -1, -1] self.assertEqual(len(exp), len(self.fig1_B)) for i, e in enumerate(exp): self.assertEqual(self.BP.levelnext(i), e) def test_close(self): exp = [21, 10, 3, 5, 9, 8, 12, 20, 19, 16, 18] for i, e in zip(np.argwhere(self.BP.B == 1).squeeze(), exp): npt.assert_equal(self.BP.close(i), e) def test_lca(self): # lca(i, j) = parent(rmq(i, j) + 1) # unless isancestor(i, j) # (so lca(i, j) = i) or isancestor(j, i) (so lca(i, j) = j), nodes = [self.BP.preorderselect(k) for k in range(self.fig1_B.sum())] exp = {(nodes[2], nodes[3]): nodes[1], (nodes[2], nodes[5]): nodes[1], (nodes[2], nodes[9]): nodes[0], (nodes[9], nodes[10]): nodes[8], (nodes[1], nodes[8]): nodes[0]} for (i, j), e in exp.items(): self.assertEqual(self.BP.lca(i, j), e) def test_deepestnode(self): # deepestnode(i) = rMq(i, close(i)), exp = [7, 7, 2, 2, 4, 4, 7, 7, 7, 7, 7, 11, 11, 15, 15, 15, 15, 17, 17, 15, 15, 7] self._testinator(exp, self.BP.deepestnode) def test_height(self): # height(i) = excess(deepestnode(i)) − excess(i). exp = [3, 2, 0, 0, 0, 0, 1, 0, 0, 1, 2, 0, 0, 2, 1, 0, 0, 0, 0, 1, 2, 3] self._testinator(exp, self.BP.height) def test_ntips(self): exp = 6 obs = self.BP.ntips() self.assertEqual(obs, exp) def test_shear(self): # r 2 3 4 5 6 7 8 9 10 11 # ( ( ( ) ( ) ( ( ) ) ) ( ) ( ( ( ) ( ) ) ) ) #i 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 names = np.array(['r', '2', '3', None, '4', None, '5', '6', None, None, None, '7', None, '8', '9', '10', None, '11', None, None, None, None]) lengths = np.array([0, 1, 2, 0, 3, 0, 4, 5, 0, 0, 0, 6, 0, 7, 8, 9, 0, 10, 0, 0, 0, 0], dtype=np.double) self.BP.set_names(names) self.BP.set_lengths(lengths) in_ = {'4', '6', '7', '10', '11'} exp = np.array([1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0], dtype=np.uint32) exp_n = np.array(['r', '2', '4', None, '5', '6', None, None, None, '7', None, '8', '9', '10', None, '11', None, None, None, None]) exp_l = np.array([0, 1, 3, 0, 4, 5, 0, 0, 0, 6, 0, 7, 8, 9, 0, 10, 0, 0, 0, 0], dtype=np.double) obs = self.BP.shear(in_) npt.assert_equal(exp, obs.B) for i in range(len(obs.B)): self.assertEqual(obs.name(i), exp_n[i]) self.assertEqual(obs.length(i), exp_l[i]) in_ = {'10', '11'} exp = np.array([1, 1, 1, 1, 0, 1, 0, 0, 0, 0], dtype=np.uint32) obs = self.BP.shear(in_).B npt.assert_equal(obs, exp) def test_shear_raise_tree_is_empty(self): names = np.array(['r', '2', '3', None, '4', None, '5', '6', None, None, None, '7', None, '8', '9', '10', None, '11', None, None, None, None]) lengths = np.array([0, 1, 2, 0, 3, 0, 4, 5, 0, 0, 0, 6, 0, 7, 8, 9, 0, 10, 0, 0, 0, 0], dtype=np.double) self.BP.set_names(names) with self.assertRaises(ValueError): self.BP.shear({'not', 'in', 'tree'}) def test_collapse(self): names = np.array(['r', '2', '3', None, '4', None, '5', '6', None, None, None, '7', None, '8', '9', '10', None, '11', None, None, None, None]) lengths = np.array([0, 1, 2, 0, 3, 0, 4, 5, 0, 0, 0, 6, 0, 7, 8, 9, 0, 10, 0, 0, 0, 0], dtype=np.double) self.BP.set_names(names) self.BP.set_lengths(lengths) exp = np.array([1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0], dtype=np.uint8) exp_n = ['r', '2', '3', None, '4', None, '6', None, None, '7', None, '9', '10', None, '11', None, None, None] exp_l = [0, 1, 2, 0, 3, 0, 9, 0, 0, 6, 0, 15, 9, 0, 10, 0, 0, 0] obs = self.BP.collapse() npt.assert_equal(obs.B, exp) for i in range(len(obs.B)): self.assertEqual(obs.name(i), exp_n[i]) self.assertEqual(obs.length(i), exp_l[i]) bp = BP(np.array([1, 1, 1, 0, 0, 1, 0, 0], dtype=np.uint8)) exp = np.array([1, 1, 0, 1, 0, 0]) obs = bp.collapse().B npt.assert_equal(obs, exp) def test_name_unset(self): for i in range(self.BP.B.size): self.assertEqual(self.BP.name(i), None) def test_length_unset(self): for i in range(self.BP.B.size): self.assertEqual(self.BP.length(i), 0.0) def test_name_length_set(self): names = np.full(self.BP.B.size, None, dtype=object) lengths = np.zeros(self.BP.B.size, dtype=np.double) names[0] = 'root' names[self.BP.preorderselect(7)] = 'other' lengths[1] = 1.23 lengths[self.BP.preorderselect(5)] = 5.43 self.BP.set_names(names) self.BP.set_lengths(lengths) self.assertEqual(self.BP.name(0), 'root') self.assertEqual(self.BP.name(1), None) self.assertEqual(self.BP.name(13), 'other') self.assertEqual(self.BP.length(1), 1.23) self.assertEqual(self.BP.length(5), 0.0) self.assertEqual(self.BP.length(7), 5.43) if __name__ == '__main__': main() improved-octo-waddle-1.0.7/bp/tests/test_bp_cy.pyx000066400000000000000000000263041463715353000222200ustar00rootroot00000000000000import numpy.testing as npt import numpy as np cimport numpy as np from bp._bp cimport BP, mM fig1_B = np.array([1, 1, 1, 0, 1, 0, 1, 1 ,0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0], dtype=np.uint8) def get_test_obj(): return BP(fig1_B) def test_rank(): cdef BP obj = get_test_obj() counts_1 = fig1_B.cumsum() counts_0 = (1 - fig1_B).cumsum() for exp, t in zip((counts_1, counts_0), (1, 0)): for idx, e in enumerate(exp): npt.assert_equal(obj.rank(t, idx), e) def test_select(): cdef BP obj = get_test_obj() pos_1 = np.unique(fig1_B.cumsum(), return_index=True)[1] #- 1 pos_0 = np.unique((1 - fig1_B).cumsum(), return_index=True)[1] for exp, t in zip((pos_1, pos_0), (1, 0)): for k in range(1, len(exp)): npt.assert_equal(obj.select(t, k), exp[k]) def test_rank_property(): cdef BP obj = get_test_obj() for i in range(len(fig1_B)): npt.assert_equal(obj.rank(1, i) + obj.rank(0, i), i+1) def test_rank_select_property(): cdef BP obj = get_test_obj() pos_1 = np.unique(fig1_B.cumsum(), return_index=True)[1] #- 1 pos_0 = np.unique((1 - fig1_B).cumsum(), return_index=True)[1] for t, pos in zip((0, 1), (pos_0, pos_1)): for k in range(len(pos)): # needed +t on expectation, unclear at this time why. npt.assert_equal(obj.rank(t, obj.select(t, k)), k + t) def test_excess(): cdef BP obj = get_test_obj() # from fig 2 exp = [1, 2, 3, 2, 3, 2, 3, 4, 3, 2, 1, 2, 1, 2, 3, 4, 3, 4, 3, 2, 1, 0] for idx, e in enumerate(exp): npt.assert_equal(obj.excess(idx), e) def test_depth(): cdef BP obj = get_test_obj() # from fig 2 exp = [1, 2, 3, 2, 3, 2, 3, 4, 3, 2, 1, 2, 1, 2, 3, 4, 3, 4, 3, 2, 1, 0] for idx, e in enumerate(exp): npt.assert_equal(obj.depth(idx), e) def test_close(): cdef BP obj = get_test_obj() exp = [21, 10, 3, 5, 9, 8, 12, 20, 19, 16, 18] for i, e in zip(np.argwhere(fig1_B == 1).squeeze(), exp): npt.assert_equal(obj.close(i), e) npt.assert_equal(obj.excess(obj.close(i)), obj.excess(i) - 1) def test_open(): cdef BP obj = get_test_obj() exp = [2, 4, 7, 6, 1, 11, 15, 17, 14, 13, 0] for i, e in zip(np.argwhere(fig1_B == 0).squeeze(), exp): npt.assert_equal(obj.open(i), e) npt.assert_equal(obj.excess(obj.open(i)) - 1, obj.excess(i)) def test_enclose(): cdef BP obj = get_test_obj() # i > 0 and i < (len(B) - 1) exp = [0, 1, 1, 1, 1, 1, 6, 6, 1, 0, 0, 0, 0, 13, 14, 14, 14, 14, 13, 0] for i, e in zip(range(1, len(fig1_B) - 1), exp): npt.assert_equal(obj.enclose(i), e) def test_parent(): cdef BP obj = get_test_obj() exp = [-1, 0, 1, 1, 1, 1, 1, 6, 6, 1, 0, 0, 0, 0, 13, 14, 14, 14, 14, 13, 0, -1] for i, e in zip(range(len(fig1_B)), exp): npt.assert_equal(obj.parent(i), e) def test_root(): cdef BP obj = get_test_obj() npt.assert_equal(obj.root(), 0) def test_isleaf(): cdef BP obj = get_test_obj() exp = [0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0] for i, e in enumerate(exp): npt.assert_equal(obj.isleaf(i), e) def test_fchild(): cdef BP obj = get_test_obj() exp = [1, 2, 0, 0, 0, 0, 7, 0, 0, 7, 2, 0, 0, 14, 15, 0, 0, 0, 0, 15, 14, 1] for i, e in enumerate(exp): npt.assert_equal(obj.fchild(i), e) def test_lchild(): cdef BP obj = get_test_obj() exp = [obj.preorderselect(7), obj.preorderselect(4), 0, 0, 0, 0, obj.preorderselect(5), 0, 0, obj.preorderselect(5), obj.preorderselect(4), 0, 0, obj.preorderselect(8), obj.preorderselect(10), 0, 0, 0, 0, obj.preorderselect(10), obj.preorderselect(8), obj.preorderselect(7)] for i, e in enumerate(exp): npt.assert_equal(obj.lchild(i), e) def test_nsibling(): cdef BP obj = get_test_obj() exp = [0, 11, 4, 4, 6, 6, 0, 0, 0, 0, 11, 13, 13, 0, 0, 17, 17, 0, 0, 0, 0, 0] for i, e in enumerate(exp): npt.assert_equal(obj.nsibling(i), e) def test_psibling(): cdef BP obj = get_test_obj() exp = [0, 0, 0, 0, 2, 2, 4, 0, 0, 4, 0, 1, 1, 11, 0, 0, 0, 15, 15, 0, 11, 0] for i, e in enumerate(exp): npt.assert_equal(obj.psibling(i), e) def test_fwdsearch(): cdef BP obj = get_test_obj() exp = {(0, 0): 10, # close of first child (3, -2): 21, # close of root (11, 2): 15} # from one tip to the next for (i, d), e in exp.items(): npt.assert_equal(obj.fwdsearch(i, d), e) def test_bwdsearch(): cdef BP obj = get_test_obj() exp = {(3, 0): 1, # open of parent (21, 4): 17, # nested tip (9, 2): 7} # open of the node for (i, d), e in exp.items(): npt.assert_equal(obj.bwdsearch(i, d), e) def test_fwdsearch_more(): cdef BP bp from bp import parse_newick bp = parse_newick('((a,b,(c)),d,((e,f)));') # simulating close so only testing open parentheses. A "close" on a closed # parenthesis does not make sense, so the result is not useful. # In practice, an "close" method should ensure it is operating on a closed # parenthesis. # [(open_idx, close_idx), ...] exp = [(1, 10), (0, 21), (2, 3), (4, 5), (6, 9), (7, 8), (11, 12), (13, 20), (14, 19), (15, 16), (17, 18)] for open_, exp_close in exp: obs_close = bp.fwdsearch(open_, -1) assert obs_close == exp_close # slightly modified version of fig2 with an extra child forcing a test # of the direct sibling check with negative partial excess # this translates into: # 012345678901234567890123 # ((()()(()))()((()()()))) bp = parse_newick('((a,b,(c)),d,((e,f,g)));') #enmM = rmm(bp.B, bp.B.size) # simulating close so only testing open parentheses. A "close" on a closed # parenthesis does not make sense, so the result is not useful. # In practice, an "close" method should ensure it is operating on a closed # parenthesis. # [(open_idx, close_idx), ...] exp = [(0, 23), (1, 10), (2, 3), (4, 5), (6, 9), (7, 8), (11, 12), (13, 22), (14, 21), (15, 16), (17, 18), (19, 20)] for open_, exp_close in exp: obs_close = bp.fwdsearch(open_, -1) assert obs_close == exp_close def test_bwdsearch_more(): cdef BP bp from bp import parse_newick bp = parse_newick('((a,b,(c)),d,((e,f)));') # simulating open so only testing closed parentheses. # [(close_idx, open_idx), ...] exp = [(21, 0), (8, 7), (9, 6), (10, 1), (3, 2), (5, 4), (12, 11), (16, 15), (20, 13), (19, 14), (18, 17)] for close_, exp_open in exp: obs_open = bp.bwdsearch(close_, 0) + 1 assert obs_open == exp_open # slightly modified version of fig2 with an extra child forcing a test # of the direct sibling check with negative partial excess # this translates into: # 012345678901234567890123 # ((()()(()))()((()()()))) bp = parse_newick('((a,b,(c)),d,((e,f,g)));') # simulating open so only testing closed parentheses. # [(close_idx, open_idx), ...] exp = [(23, 0), (10, 1), (3, 2), (5, 4), (9, 6), (8, 7), (12, 11), (22, 13), (21, 14), (16, 15), (18, 17), (20, 19)] for close_, exp_open in exp: obs_open = bp.bwdsearch(close_, 0) + 1 assert obs_open == exp_open def test_scan_block_forward(): cdef BP bp from bp import parse_newick bp = parse_newick('((a,b,(c)),d,((e,f)));') # [(open, close), ...] b = 4 d = -1 exp_b_4 = [(0, ((0, -1), (1, -1), (2, 3), (3, -1))), (1, ((4, 5), (5, -1), (6, -1), (7, -1))), # 8 and 9 are nonsensical from finding a "close" perspective (2, ((8, 9), (9, 10), (10, -1), (11, -1))), (3, ((12, -1), (13, -1), (14, -1), (15, -1))), # 16 and 18 are nonsensical from a "close" perspective (4, ((16, 19), (17, 18), (18, 19), (19, -1))), # 20 is nonsensical from finding a "close" perspective (5, ((20, 21), (21, -1)))] for k, exp_results in exp_b_4: for idx, exp_result in exp_results: obs_result = bp.scan_block_forward(idx, k, b, bp.excess(idx) + d) assert obs_result == exp_result b = 8 exp_b_8 = [(0, ((0, -1), (1, -1), (2, 3), (3, -1), (4, 5), (5, -1), (6, -1), (7, -1))), (1, ((8, 9), (9, 10), (10, -1), (11, 12), (12, -1), (13, -1), (14, -1), (15, -1))), (2, ((16, 19), (17, 18), (18, 19), (19, 20), (20, 21), (21, -1)))] for k, exp_results in exp_b_8: for idx, exp_result in exp_results: obs_result = bp.scan_block_forward(idx, k, b, bp.excess(idx) + d) assert obs_result == exp_result def test_scan_block_backward(): cdef BP bp from bp import parse_newick bp = parse_newick('((a,b,(c)),d,((e,f)));') # adding +1 to simluate "open" so calls on open parentheses are weird # [(open, close), ...] b = 4 d = 0 exp_b_4 = [(0, ((0, 0), (1, 0), (2, 0), (3, 2))), (1, ((4, 0), (5, 4), (6, 5), (7, 0))), (2, ((8, 0), (9, 0), (10, 0), (11, 10))), (3, ((12, 0), (13, 12), (14, 0), (15, 0))), (4, ((16, 0), (17, 16), (18, 17), (19, 0))), (5, ((20, 0), (21, 0)))] for k, exp_results in exp_b_4: for idx, exp_result in exp_results: obs_result = bp.scan_block_backward(idx, k, b, bp.excess(idx) + d) obs_result += 1 # simulating open assert obs_result == exp_result b = 8 exp_b_8 = [(0, ((0, 0), (1, 0), (2, 0), (3, 2), (4, 3), (5, 4), (6, 5), (7, 0))), (1, ((8, 0), (9, 0), (10, 0), (11, 10), (12, 11), (13, 12), (14, 9), (15, 8))), (2, ((16, 0), (17, 16), (18, 17), (19, 0), (20, 0), (21, 0)))] for k, exp_results in exp_b_8: for idx, exp_result in exp_results: obs_result = bp.scan_block_backward(idx, k, b, bp.excess(idx) + d) obs_result += 1 # simulating open assert obs_result == exp_result def test_rmm(): cdef BP bp from bp import parse_newick # test tree is ((a,b,(c)),d,((e,f))); # this is from fig 2 of Cordova and Navarro: # http://www.dcc.uchile.cl/~gnavarro/ps/tcs16.2.pdf bp = parse_newick('((a,b,(c)),d,((e,f)));') exp = np.array([[0, 1, 0, 1, 1, 0, 0, 1, 2, 1, 1, 2, 0], # m [4, 4, 4, 4, 4, 4, 0, 3, 4, 3, 4, 4, 1]], # M dtype=np.intp).T obs = mM(bp.B, bp.B.size) # original r / k0 values, preserving for posterity # [0, 0, 10, 0, 6, 10, 0, 0, 3, 6, 7, 10, 11], # r # [11, 6, 11, 2, 6, 11, 0, 1, 2, 5, 6, 9, 11]], # k0 assert exp.shape[0] == obs.mM.shape[0] assert exp.shape[1] == obs.mM.shape[1] for i in range(exp.shape[0]): for j in range(exp.shape[1]): assert obs.mM[i, j] == exp[i, j] improved-octo-waddle-1.0.7/bp/tests/test_conv.py000066400000000000000000000057521463715353000217050ustar00rootroot00000000000000from unittest import TestCase, main from io import StringIO import skbio import numpy.testing as npt import numpy as np from bp import (to_skbio_treenode, from_skbio_treenode, parse_newick, to_skbio_treearray) class ConversionTests(TestCase): def setUp(self): self.tstr = "(((a:1,b:2.5)c:6,d:8,(e),(f,g,(h:1,i:2)j:1)k:1.2)l,m:2)r;" self.bp = parse_newick(self.tstr) self.sktn = skbio.TreeNode.read(StringIO(self.tstr)) def test_to_skbio_treenode_with_edge_numbers(self): # from https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0031009 # but without edge labels # 0 1 2 3 4 5 6 7 8 9 # 1 1 1 0 1 0 0 1 0 0 in_ = '((A:.01{0}, B:.01{1})D:.01{3}, C:.01{4}) {5};' obs = parse_newick(in_) obs_sk = to_skbio_treenode(obs) self.assertEqual(obs_sk.find('A').edge_num, 0) self.assertEqual(obs_sk.find('B').edge_num, 1) self.assertEqual(obs_sk.find('D').edge_num, 3) self.assertEqual(obs_sk.find('C').edge_num, 4) self.assertEqual(obs_sk.edge_num, 5) def test_to_skbio_treenode(self): obs = to_skbio_treenode(self.bp) for o, e in zip(obs.traverse(), self.sktn.traverse()): if e.length is None: self.assertEqual(o.length, None if e.is_root() else 0.0) else: self.assertEqual(o.length, e.length) self.assertEqual(o.name, e.name) self.assertEqual(obs.ascii_art(), self.sktn.ascii_art()) def test_from_skbio_treenode(self): obs_bp = from_skbio_treenode(self.sktn) exp_bp = self.bp npt.assert_equal(obs_bp.B, exp_bp.B) for i in range(len(self.bp.B)): self.assertEqual(exp_bp.name(i), obs_bp.name(i)) self.assertEqual(exp_bp.length(i), obs_bp.length(i)) def test_to_array(self): t = parse_newick('(((a:1,b:2,c:3)x:4,(d:5)y:6)z:7,(e:8,f:9)z:10);') exp_child_index = np.array([[4, 0, 2], [5, 3, 3], [8, 4, 5], [9, 6, 7], [10, 8, 9]], dtype=np.uint32) exp_length = np.array([1, 2, 3, 5, 4, 6, 8, 9, 7, 10, 0.0], dtype=np.double) exp_id_index = {0: True, 1: True, 2: True, 3: True, 4: False, 5: False, 6: True, 7: True, 8: False, 9: False, 10: False} exp_name = np.array(['a', 'b', 'c', 'd', 'x', 'y', 'e', 'f', 'z', 'z', None]) obs = to_skbio_treearray(t) obs_child_index = obs['child_index'] obs_length = obs['length'] obs_id_index = obs['id_index'] obs_name = obs['name'] npt.assert_equal(obs_child_index, exp_child_index) npt.assert_equal(obs_length, exp_length) self.assertEqual(obs_id_index.keys(), exp_id_index.keys()) npt.assert_equal(obs_name, exp_name) for k in obs_id_index: self.assertEqual(obs_id_index[k].is_tip(), exp_id_index[k]) if __name__ == '__main__': main() improved-octo-waddle-1.0.7/bp/tests/test_insert.py000066400000000000000000000041711463715353000222360ustar00rootroot00000000000000import unittest import pkg_resources from bp import parse_jplace, insert_fully_resolved import skbio import pandas as pd class InsertTests(unittest.TestCase): package = 'bp.tests' def setUp(self): self.jplacedata_multiple = \ open(self.get_data_path('300/placement_mul.jplace')).read() self.final_multiple_fully_resolved = \ skbio.TreeNode.read(self.get_data_path('300/placement.full_resolve.newick')) def get_data_path(self, filename): # adapted from qiime2.plugin.testing.TestPluginBase return pkg_resources.resource_filename(self.package, 'data/%s' % filename) def test_insert_fully_resolved(self): exp = self.final_multiple_fully_resolved placements, backbone = parse_jplace(self.jplacedata_multiple) obs = insert_fully_resolved(placements, backbone) self.assertEqual({n.name for n in obs.tips()}, {n.name for n in exp.tips()}) self.assertEqual(obs.compare_rfd(exp), 0) self.assertAlmostEqual(obs.compare_tip_distances(exp), 0) def test_insert_fully_resolved_multiple_placements(self): exp = self.final_multiple_fully_resolved placements, backbone = parse_jplace(self.jplacedata_multiple) # add another placement elsewhere that we would not keep # as it's ratio is lower dup1 = placements.iloc[0].copy() dup1['like_weight_ratio'] -= 0.5 dup1['edge_num'] += 1 # add another placement elsewhere that we would not keep # as, though its ratio is the same, its pendant is larger dup2 = placements.iloc[1].copy() dup2['pendant_length'] += 0.5 dup2['edge_num'] += 1 placements = pd.concat([placements, pd.DataFrame([dup1, dup2])]) obs = insert_fully_resolved(placements, backbone) self.assertEqual({n.name for n in obs.tips()}, {n.name for n in exp.tips()}) self.assertEqual(obs.compare_rfd(exp), 0) self.assertAlmostEqual(obs.compare_tip_distances(exp), 0) if __name__ == '__main__': unittest.main() improved-octo-waddle-1.0.7/bp/tests/test_io.py000066400000000000000000000305351463715353000213440ustar00rootroot00000000000000from unittest import TestCase, main from bp import parse_newick, to_skbio_treenode, write_newick, parse_jplace import re import json import pandas as pd import pandas.testing as pdt import pkg_resources import skbio import io import numpy as np import numpy.testing as npt class NewickTests(TestCase): def test_parse_newick_simple_edge_numbers(self): # from https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0031009 # but without edge labels # 0 1 2 3 4 5 6 7 8 9 # 1 1 1 0 1 0 0 1 0 0 in_ = '((A:.01{0}, B:.01{1})D:.01{3}, C:.01{4}) {5};' exp_sk = '((A:.01, B:.01)D:.01, C:.01);' # skbio doesn't know about edge numbers obs = parse_newick(in_) obs_sk = to_skbio_treenode(obs) exp_sk = skbio.TreeNode.read([exp_sk]) self.assertEqual(obs_sk.compare_rfd(exp_sk), 0) self.assertEqual(obs.edge(2), 0) self.assertEqual(obs.edge(4), 1) self.assertEqual(obs.edge(1), 3) self.assertEqual(obs.edge(7), 4) self.assertEqual(obs.edge(0), 5) self.assertEqual(obs.edge_from_number(0), 2) self.assertEqual(obs.edge_from_number(1), 4) self.assertEqual(obs.edge_from_number(3), 1) self.assertEqual(obs.edge_from_number(4), 7) self.assertEqual(obs.edge_from_number(5), 0) def _compare_newick(self, obs, exp): a = skbio.TreeNode.read([obs]) b = skbio.TreeNode.read([exp]) self.assertEqual(a.compare_rfd(b), 0) npt.assert_equal(a.tip_tip_distances().data, b.tip_tip_distances().data) def test_write_newick_cases(self): tests = ['((foo"bar":1,baz:2)x:3)r;', "(((a:1,b:2.5)c:6,d:8,(e),(f,g,(h:1,i:2)j:1)k:1.2)l,m:2)r;", "(((a)b)c,((d)e)f)r;", "((a,(b,c):5)'d','e; foo':10,((f))g)r;"] for test in tests: buf = io.StringIO() obs = write_newick(parse_newick(test), buf, False) buf.seek(0) obs = buf.read() self._compare_newick(obs, test) def test_write_newick_edges(self): test_a = '((foo"bar":1{0},baz:2{1})x:3{2})r;' test_b = "(((a)b)c,((d)e)f)r;" buf = io.StringIO() obs = write_newick(parse_newick(test_a), buf, True) buf.seek(0) obs = to_skbio_treenode(parse_newick(buf.read())) self.assertEqual(obs.find('foo"bar"').edge_num, 0) self.assertEqual(obs.find('baz').edge_num, 1) self.assertEqual(obs.find('x').edge_num, 2) buf = io.StringIO() obs = write_newick(parse_newick(test_b), buf, True) buf.seek(0) obs = to_skbio_treenode(parse_newick(buf.read())) for o in obs.traverse(): self.assertEqual(o.edge_num, 0) def test_parse_newick_singlenode_bug(self): # https://github.com/wasade/improved-octo-waddle/issues/29 test = 'i:1;' # let's not allow this edge case with self.assertRaises(ValueError): parse_newick(test) def test_parse_newick_no_semicolon_bug(self): # https://github.com/wasade/improved-octo-waddle/issues/26 test = "((h:1, i:1, j:1, k:1, l: 1),(e:1,f:1),(n:1,o:1,p:1))a:1" with self.assertRaises(ValueError): parse_newick(test) # make sure we work with a newline test = "((h:1, i:1, j:1, k:1, l: 1),(e:1,f:1),(n:1,o:1,p:1))a:1;\n" parse_newick(test) def test_write_newick_underscore_bug(self): test = "(((a)b)'c_foo',((d)e)f)r;" buf = io.StringIO() obs = write_newick(parse_newick(test), buf, False) buf.seek(0) self.assertIn("'c_foo'", test) def test_parse_newick_nested_quotes(self): # bug: quotes are removed in_ = '((foo"bar":1,baz:2)x:3)r;' exp = skbio.TreeNode.read([in_]) obs = to_skbio_treenode(parse_newick(in_)) self.assertEqual(obs.compare_subsets(exp), 0.0) def test_parse_newick_with_commas(self): # bug: comma is getting interpreted even if in quotes in_ = "(('foo,bar':1,baz:2)x:3)r;" exp = skbio.TreeNode.read([in_]) obs = to_skbio_treenode(parse_newick(in_)) self.assertEqual(obs.compare_subsets(exp), 0.0) def test_parse_newick_with_parens(self): # bug: parens are getting interpreted even if in quotes in_ = "(('foo(b)ar':1,baz:2)x:3)r;" exp = skbio.TreeNode.read([in_]) obs = to_skbio_treenode(parse_newick(in_)) self.assertEqual(obs.compare_subsets(exp), 0.0) def test_parse_newick(self): in_ = "((a:2,b):1,(c:4,d)y:20,e)r;" exp_bp = [1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0] exp_n = ['r', None, 'a', None, 'b', None, None, 'y', 'c', None, 'd', None, None, 'e', None, None] exp_l = [0, 1, 2, 0, 0, 0, 0, 20, 4, 0, 0, 0, 0, 0, 0, 0] obs_bp = parse_newick(in_) npt.assert_equal(obs_bp.B, np.asarray(exp_bp, dtype=bool)) for i, (e_n, e_l) in enumerate(zip(exp_n, exp_l)): self.assertEqual(obs_bp.name(i), e_n) self.assertEqual(obs_bp.length(i), e_l) def test_parse_newick_complex(self): in_ = "(((a:1,b:2.5)c:6,d:8,(e),(f,g,(h:1,i:2)j:1)k:1.2)l,m:2)r;" # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 exp_bp = [1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0] exp_n = ['r', 'l', 'c', 'a', None, 'b', None, None, 'd', None, None, 'e', None, None, 'k', 'f', None, 'g', None, 'j', 'h', None, 'i', None, None, None, None, 'm', None, None] exp_l = [0, 0, 6, 1, 0, 2.5, 0, 0, 8, 0, 0, 0, 0, 0, 1.2, 0, 0, 0, 0, 1, 1, 0, 2, 0, 0, 0, 0, 2, 0, 0] obs_bp = parse_newick(in_) npt.assert_equal(obs_bp.B, np.asarray(exp_bp, dtype=bool)) for i, (e_n, e_l) in enumerate(zip(exp_n, exp_l)): self.assertEqual(obs_bp.name(i), e_n) self.assertEqual(obs_bp.length(i), e_l) def test_parse_newick_singledesc(self): in_ = "(((a)b)c,((d)e)f)r;" # 0 1 2 3 4 5 6 7 8 9 10 11 12 13 exp_bp = [1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0] exp_n = ['r', 'c', 'b', 'a', None, None, None, 'f', 'e', 'd', None, None, None, None] obs_bp = parse_newick(in_) npt.assert_equal(obs_bp.B, np.asarray(exp_bp, dtype=bool)) for i, e_n in enumerate(exp_n): self.assertEqual(obs_bp.name(i), e_n) def test_parse_newick_unnamed_singledesc(self): in_ = "((a,b)c,d,(e))r;" exp_bp = [1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0] exp_n = ['r', 'c', 'a', None, 'b', None, None, 'd', None, None, 'e', None, None, None] obs_bp = parse_newick(in_) npt.assert_equal(obs_bp.B, np.asarray(exp_bp, dtype=bool)) for i, e_n in enumerate(exp_n): self.assertEqual(obs_bp.name(i), e_n) def test_parse_newick_name_with_semicolon(self): in_ = "((a,(b,c):5)'d','e; foo':10,((f))g)r;" exp_bp = [1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0] exp_n = ['r', 'd', 'a', None, None, 'b', None, 'c', None, None, None, 'e; foo', None, 'g', None, 'f', None, None, None, None] exp_l = [0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 10, 0, 0, 0, 0, 0, 0, 0, 0] obs_bp = parse_newick(in_) npt.assert_equal(obs_bp.B, np.asarray(exp_bp, dtype=bool)) for i, (e_n, e_l) in enumerate(zip(exp_n, exp_l)): self.assertEqual(obs_bp.name(i), e_n) self.assertEqual(obs_bp.length(i), e_l) class JPlaceParseTests(TestCase): package = 'bp.tests' def setUp(self): self.jplacedata = open(self.get_data_path('200/placement.jplace')) self.jplacedata = self.jplacedata.read() no_edge_numbers = re.sub(r"{\d+}", '', json.loads(self.jplacedata)['tree']) self.tree = skbio.TreeNode.read([no_edge_numbers]) def get_data_path(self, filename): # adapted from qiime2.plugin.testing.TestPluginBase return pkg_resources.resource_filename(self.package, 'data/%s' % filename) def test_place_jplace_square_braces(self): self.jplacedata = json.loads(self.jplacedata) treestr = self.jplacedata['tree'] treestr = re.sub(r"{(\d+)}", r"[\1]", treestr) self.jplacedata['tree'] = treestr self.jplacedata = json.dumps(self.jplacedata) exp_tree = self.tree obs_df, obs_tree = parse_jplace(self.jplacedata) obs_tree = to_skbio_treenode(obs_tree) self.assertEqual(obs_tree.compare_rfd(exp_tree), 0) for n in obs_tree.traverse(include_self=False): self.assertTrue(n.edge_num >= 0) def test_parse_jplace_simple(self): columns = ['fragment', 'edge_num', 'likelihood', 'like_weight_ratio', 'distal_length', 'pendant_length'] exp_df = [["82", 361, 0.01013206496780672, 1, 0.02652932626620403, 0.039354548684623215], ["99", 308, 0.04520741687623886, 1, 0.11020044356641526, 0.06550337922097477], ["43", 309, 0.04054866161921744, 1, 0.010712923050783987, 0.020946988900520196], ["195", 277, 0.01918907908397749, 1, 0.03065741838803451, 0.04513513498399864], ["162", 55, 0.01758935282545493, 1, 0.0033199487685078776, 0.05388735804976052], ["56", 81, 0.2366882303770561, 1, 0.04172580852519453, 0.0007060238727097983], ["91", 105, 0.0001863393767883581, 1, 0.04578898721138839, 0.08655004339151215], ["174", 89, 0.01216463967379211, 1, 0.04707020642820376, 0.045206727542450205], ["5", 143, 0.012162345471765756, 1, 0.023797389484252734, 0.10447375403452556], ["55", 139, 0.09563944060686769, 1, 0.014593217782258146, 0.04537214236560885]] exp_df = pd.DataFrame(exp_df, columns=columns) exp_tree = self.tree obs_df, obs_tree = parse_jplace(self.jplacedata) obs_tree = to_skbio_treenode(obs_tree) pdt.assert_frame_equal(obs_df, exp_df) self.assertEqual(obs_tree.compare_rfd(exp_tree), 0) def test_parse_jplace_multiple_per_fragment(self): columns = ['fragment', 'edge_num', 'likelihood', 'like_weight_ratio', 'distal_length', 'pendant_length'] exp_df = [["82", 361, 0.01013206496780672, 1, 0.02652932626620403, 0.039354548684623215], ["99", 308, 0.04520741687623886, 1, 0.11020044356641526, 0.06550337922097477], # tied on like_weight_ratio but lower pendant ["99", 309, 0.04520741687623886, 1, 0.11020044356641526, 0.00550337922097477], ["55", 139, 0.09563944060686769, 1, 0.014593217782258146, 0.04537214236560885], # tied higher like_weight_ratio ["55", 138, 0.09563944060686769, 10, 0.014593217782258146, 0.04537214236560885]] exp_df = pd.DataFrame(exp_df, columns=columns) # ...adjust jplace data data = json.loads(self.jplacedata) keep = [] for placement in data['placements']: if placement['n'][0] == '82': keep.append(placement) elif placement['n'][0] == '99': placement['p'].append([309, 0.04520741687623886, 1, 0.11020044356641526, 0.00550337922097477]) keep.append(placement) elif placement['n'][0] == '55': placement['p'].append([138, 0.09563944060686769, 10, 0.014593217782258146, 0.04537214236560885]) keep.append(placement) data['placements'] = keep data = json.dumps(data) exp_tree = self.tree obs_df, obs_tree = parse_jplace(data) obs_tree = to_skbio_treenode(obs_tree) pdt.assert_frame_equal(obs_df, exp_df) self.assertEqual(obs_tree.compare_rfd(exp_tree), 0) if __name__ == '__main__': main() improved-octo-waddle-1.0.7/cdef_bp_tests.py000066400000000000000000000002071463715353000207300ustar00rootroot00000000000000import bp.tests.test_bp_cy for n in dir(bp.tests.test_bp_cy): if n.startswith('test_'): getattr(bp.tests.test_bp_cy, n)() improved-octo-waddle-1.0.7/ci/000077500000000000000000000000001463715353000161465ustar00rootroot00000000000000improved-octo-waddle-1.0.7/ci/conda_host_env.yml000066400000000000000000000003051463715353000216600ustar00rootroot00000000000000# directly adapted from # https://github.com/biocore/scikit-bio/blob/31123c6471dc62f45a55bfdff59c61a4850be367/ci/conda_host_env.yml#LL1C1-L9C35 name: testing channels: - conda-forge - defaults improved-octo-waddle-1.0.7/ci/conda_requirements.txt000066400000000000000000000000561463715353000225770ustar00rootroot00000000000000pytest numpy flake8 cython < 1.0.0 scikit-bio improved-octo-waddle-1.0.7/ipynb/000077500000000000000000000000001463715353000166745ustar00rootroot00000000000000improved-octo-waddle-1.0.7/ipynb/find inconsistencies.ipynb000066400000000000000000000217471463715353000240530ustar00rootroot00000000000000{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Test the BP implementation against the `skbio.TreeNode` using real world trees provided by Greengenes 13_8. The following conditions are checked:\n", "\n", "* the topology, names, and lengths on a direct parse from newick are identical in preorder traversal\n", "* a serialization back to newick is identical by string comparison\n", "* that `TreeNode.to_array` is reproduced minimally for unifrac\n", "* that shear/collapse operations result in equivalent trees" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "from bp import parse_newick, to_skbio_treenode, to_skbio_treearray\n", "from skbio import TreeNode\n", "import numpy as np\n", "import glob\n", "from random import shuffle" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def _correct_gg_reroot_length_issue(t):\n", " # the greengenes trees on reroot had a node with a length set to None\n", " # find and correct if it exists\n", " try:\n", " gg_reroot_none_node = t.find('k__Bacteria')\n", " gg_reroot_none_node.length = 0.0\n", " except:\n", " pass\n", "\n", " return t\n", "\n", "def preorder_names(fp):\n", " \"\"\"Find any preorder node name inconsistencies as a proxy for topology testing\"\"\"\n", " skt = _correct_gg_reroot_length_issue(TreeNode.read(fp))\n", " bpt = parse_newick(open(fp).read())\n", " \n", " for sk_node, k in zip(skt.preorder(include_self=True), range(bpt.B.sum())):\n", " bp_idx = bpt.preorderselect(k)\n", " if (sk_node.name != bpt.name(bp_idx)) or (sk_node.length != bpt.length(bp_idx)):\n", " # bpt right now uses 0.0 for root length\n", " if sk_node.is_root() and bp_idx == bpt.root():\n", " continue\n", " else:\n", " return sk_node, bp_idx\n", " \n", " return None, None\n", "\n", "def newick_comparison(fp):\n", " \"\"\"Verify newick output is consistent\n", " \n", " Note: the BP tree is converted to TreeNode\n", " \"\"\"\n", " tn = str(_correct_gg_reroot_length_issue(TreeNode.read(fp)))\n", " bp = str(to_skbio_treenode(parse_newick(open(fp).read())))\n", " \n", " for i in range(len(tn)):\n", " if tn[i] != bp[i]:\n", " return (tn[i-25:i+25], bp[i-25:i+25])\n", " return None, None\n", " \n", "def check_to_array(fp):\n", " skt = _correct_gg_reroot_length_issue(TreeNode.read(fp)).to_array(nan_length_value=0.0)\n", " bpt = to_skbio_treearray(parse_newick(open(fp).read()))\n", " \n", " if list(skt['id_index'].keys()) != list(bpt['id_index'].keys()):\n", " return 'id_index keys are not equal'\n", " \n", " for k in skt['id_index']:\n", " if skt['id_index'][k].is_tip() != bpt['id_index'][k].is_tip():\n", " return \"id index tip identification is not equal\"\n", " \n", " if not np.allclose(skt['child_index'], bpt['child_index']):\n", " return 'child_index is not equal'\n", " \n", " if not np.allclose(skt['length'], bpt['length']):\n", " return 'length is not equal'\n", " \n", " return None\n", " \n", "def check_shear(fp):\n", " \"\"\"Verify a random shear/collapse is comparable\n", " \n", " Note: skbio.TreeNode can alter the order of children in the tree. This does not\n", " represent a change in topology. Because of this, we are testing node subsets\n", " which are invariant to child order. \n", " \"\"\"\n", " skt = _correct_gg_reroot_length_issue(TreeNode.read(fp))\n", " bpt = parse_newick(open(fp).read())\n", " \n", " # determine which tips to keep\n", " names = [n.name for n in skt.tips()]\n", " shuffle(names)\n", " to_keep = int(np.ceil(len(names) * 0.1))\n", " names_to_keep = set(names[:to_keep])\n", " \n", " # shear the treenode\n", " skt_shear = skt.shear(names_to_keep) \n", " bpt_shear = bpt.shear(names_to_keep).collapse()\n", " \n", " res = skt_shear.subsets() == to_skbio_treenode(bpt_shear).subsets()\n", " if res:\n", " return None\n", " else:\n", " return \"shear/collapse is not equivalent\"" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "61_otus.tree\n", "64_otus.tree\n", "67_otus.tree\n", "70_otus.tree\n", "73_otus.tree\n", "76_otus.tree\n", "79_otus.tree\n", "82_otus.tree\n", "85_otus.tree\n", "88_otus.tree\n", "91_otus.tree\n", "94_otus.tree\n", "97_otus.tree\n", "99_otus.tree\n" ] }, { "data": { "text/plain": [ "{'61_otus.tree': {'check_to_array': None,\n", " 'newick_comparison': (None, None),\n", " 'preorder_names': (None, None),\n", " 'shear/collapse': None},\n", " '64_otus.tree': {'check_to_array': None,\n", " 'newick_comparison': (None, None),\n", " 'preorder_names': (None, None),\n", " 'shear/collapse': None},\n", " '67_otus.tree': {'check_to_array': None,\n", " 'newick_comparison': (None, None),\n", " 'preorder_names': (None, None),\n", " 'shear/collapse': None},\n", " '70_otus.tree': {'check_to_array': None,\n", " 'newick_comparison': (None, None),\n", " 'preorder_names': (None, None),\n", " 'shear/collapse': None},\n", " '73_otus.tree': {'check_to_array': None,\n", " 'newick_comparison': (None, None),\n", " 'preorder_names': (None, None),\n", " 'shear/collapse': None},\n", " '76_otus.tree': {'check_to_array': None,\n", " 'newick_comparison': (None, None),\n", " 'preorder_names': (None, None),\n", " 'shear/collapse': None},\n", " '79_otus.tree': {'check_to_array': None,\n", " 'newick_comparison': (None, None),\n", " 'preorder_names': (None, None),\n", " 'shear/collapse': None},\n", " '82_otus.tree': {'check_to_array': None,\n", " 'newick_comparison': (None, None),\n", " 'preorder_names': (None, None),\n", " 'shear/collapse': None},\n", " '85_otus.tree': {'check_to_array': None,\n", " 'newick_comparison': (None, None),\n", " 'preorder_names': (None, None),\n", " 'shear/collapse': None},\n", " '88_otus.tree': {'check_to_array': None,\n", " 'newick_comparison': (None, None),\n", " 'preorder_names': (None, None),\n", " 'shear/collapse': None},\n", " '91_otus.tree': {'check_to_array': None,\n", " 'newick_comparison': (None, None),\n", " 'preorder_names': (None, None),\n", " 'shear/collapse': None},\n", " '94_otus.tree': {'check_to_array': None,\n", " 'newick_comparison': (None, None),\n", " 'preorder_names': (None, None),\n", " 'shear/collapse': None},\n", " '97_otus.tree': {'check_to_array': None,\n", " 'newick_comparison': (None, None),\n", " 'preorder_names': (None, None),\n", " 'shear/collapse': None},\n", " '99_otus.tree': {'check_to_array': None,\n", " 'newick_comparison': (None, None),\n", " 'preorder_names': (None, None),\n", " 'shear/collapse': None}}" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "problems = {}\n", "for f in glob.glob('../../../greengenes_release/gg_13_8_otus/trees/*_otus.tree'):\n", " obs = {}\n", " key = f.rsplit('/')[-1]\n", " print(key)\n", " \n", " obs['preorder_names'] = preorder_names(f)\n", " obs['newick_comparison'] = newick_comparison(f)\n", " obs['check_to_array'] = check_to_array(f)\n", " obs['shear/collapse'] = check_shear(f)\n", " problems[key] = obs\n", "problems" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" } }, "nbformat": 4, "nbformat_minor": 0 } improved-octo-waddle-1.0.7/ipynb/performance comparison.ipynb000066400000000000000000004524001463715353000244000ustar00rootroot00000000000000{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "%load_ext autoreload\n", "%load_ext memory_profiler\n", "%autoreload 2\n", "from bp import parse_newick, to_skbio_treenode, to_skbio_treearray\n", "from skbio import TreeNode\n", "import numpy as np\n", "import glob\n", "from functools import partial\n", "import time\n", "from random import shuffle\n", "%matplotlib notebook\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def _correct_gg_reroot_length_issue(t):\n", " # the greengenes trees on reroot had a node with a length set to None\n", " # find and correct if it exists\n", " try:\n", " gg_reroot_none_node = t.find('k__Bacteria')\n", " gg_reroot_none_node.length = 0.0\n", " except:\n", " pass\n", "\n", " return t\n", "\n", "def shear_names(skt):\n", " # determine which tips to keep\n", " names = [n.name for n in skt.tips()]\n", " shuffle(names)\n", " to_keep = int(np.ceil(len(names) * 0.1))\n", " names_to_keep = set(names[:to_keep])\n", " return names_to_keep\n", "\n", "def read_before_parse_newick(f):\n", " \"\"\"we need to have newick in memory right now, so not a fair comparison if we cache\"\"\"\n", " return parse_newick(open(f).read())" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "61_otus.tree\n", "64_otus.tree\n", "67_otus.tree\n", "70_otus.tree\n", "73_otus.tree\n", "76_otus.tree\n", "79_otus.tree\n", "82_otus.tree\n", "85_otus.tree\n", "88_otus.tree\n", "91_otus.tree\n", "94_otus.tree\n", "97_otus.tree\n", "99_otus.tree\n" ] }, { "ename": "TypeError", "evalue": "unsupported operand type(s) for -: 'str' and 'float'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 52\u001b[0m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'memories'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mobs_m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 53\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 54\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Bench walltime: %0.2fs\"\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mstart\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for -: 'str' and 'float'" ] } ], "source": [ "results = {'timings': {}, 'memories': {}}\n", "\n", "start = time.time()\n", "for f in glob.glob('../../../greengenes_release/gg_13_8_otus/trees/*_otus.tree'):\n", " obs_t = {}\n", " obs_m = {}\n", " key = f.rsplit('/')[-1]\n", " #if key.startswith('9'):# or key.startswith('8') or key.startswith('7'):\n", " # continue\n", " print(key)\n", " \n", " # load trees for non-parse tests\n", " sktree = TreeNode.read(f)\n", " bptree = parse_newick(open(f).read())\n", " \n", " # parse timings and memory\n", " sk_parse_t = %timeit -o -q TreeNode.read(f)\n", " sk_parse_m = %memit -o -q TreeNode.read(f)\n", " bp_parse_t = %timeit -o -q read_before_parse_newick(f)\n", " bp_parse_m = %memit -o -q read_before_parse_newick(f)\n", " \n", " shear_names_to_keep = shear_names(sktree)\n", " \n", " # shear times and memory\n", " sk_shear_t = %timeit -o -q sktree.shear(shear_names_to_keep)\n", " sk_shear_m = %memit -o -q sktree.shear(shear_names_to_keep)\n", " bp_shear_t = %timeit -o -q bptree.shear(shear_names_to_keep)\n", " bp_shear_m = %memit -o -q bptree.shear(shear_names_to_keep)\n", "\n", " # to_array times and memory\n", " sk_toarray_t = %timeit -o -q sktree.to_array()\n", " sk_toarray_m = %memit -o -q sktree.to_array()\n", " bp_toarray_t = %timeit -o -q to_skbio_treearray(bptree)\n", " bp_toarray_m = %memit -o -q to_skbio_treearray(bptree)\n", " \n", " obs_t['n_tips'] = bptree.ntips()\n", " obs_t['sk_parse'] = sk_parse_t\n", " obs_t['bp_parse'] = bp_parse_t\n", " obs_t['sk_shear'] = sk_shear_t\n", " obs_t['bp_shear'] = bp_shear_t\n", " obs_t['sk_toarray'] = sk_toarray_t\n", " obs_t['bp_toarray'] = bp_toarray_t\n", " results['timings'][key] = obs_t\n", " \n", " obs_m['n_tips'] = bptree.ntips()\n", " obs_m['sk_parse'] = sk_parse_m\n", " obs_m['bp_parse'] = bp_parse_m\n", " obs_m['sk_shear'] = sk_shear_m\n", " obs_m['bp_shear'] = bp_shear_m\n", " obs_m['sk_toarray'] = sk_toarray_m\n", " obs_m['bp_toarray'] = bp_toarray_m \n", " results['memories'][key] = obs_m\n", " \n", "print(\"Bench walltime: %0.2fs\" % (time.time() - start))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "application/javascript": [ "/* Put everything inside the global mpl namespace */\n", "window.mpl = {};\n", "\n", "mpl.get_websocket_type = function() {\n", " if (typeof(WebSocket) !== 'undefined') {\n", " return WebSocket;\n", " } else if (typeof(MozWebSocket) !== 'undefined') {\n", " return MozWebSocket;\n", " } else {\n", " alert('Your browser does not have WebSocket support.' +\n", " 'Please try Chrome, Safari or Firefox ≥ 6. ' +\n", " 'Firefox 4 and 5 are also supported but you ' +\n", " 'have to enable WebSockets in about:config.');\n", " };\n", "}\n", "\n", "mpl.figure = function(figure_id, websocket, ondownload, parent_element) {\n", " this.id = figure_id;\n", "\n", " this.ws = websocket;\n", "\n", " this.supports_binary = (this.ws.binaryType != undefined);\n", "\n", " if (!this.supports_binary) {\n", " var warnings = document.getElementById(\"mpl-warnings\");\n", " if (warnings) {\n", " warnings.style.display = 'block';\n", " warnings.textContent = (\n", " \"This browser does not support binary websocket messages. \" +\n", " \"Performance may be slow.\");\n", " }\n", " }\n", "\n", " this.imageObj = new Image();\n", "\n", " this.context = undefined;\n", " this.message = undefined;\n", " this.canvas = undefined;\n", " this.rubberband_canvas = undefined;\n", " this.rubberband_context = undefined;\n", " this.format_dropdown = undefined;\n", "\n", " this.image_mode = 'full';\n", "\n", " this.root = $('
');\n", " this._root_extra_style(this.root)\n", " this.root.attr('style', 'display: inline-block');\n", "\n", " $(parent_element).append(this.root);\n", "\n", " this._init_header(this);\n", " this._init_canvas(this);\n", " this._init_toolbar(this);\n", "\n", " var fig = this;\n", "\n", " this.waiting = false;\n", "\n", " this.ws.onopen = function () {\n", " fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n", " fig.send_message(\"send_image_mode\", {});\n", " fig.send_message(\"refresh\", {});\n", " }\n", "\n", " this.imageObj.onload = function() {\n", " if (fig.image_mode == 'full') {\n", " // Full images could contain transparency (where diff images\n", " // almost always do), so we need to clear the canvas so that\n", " // there is no ghosting.\n", " fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n", " }\n", " fig.context.drawImage(fig.imageObj, 0, 0);\n", " };\n", "\n", " this.imageObj.onunload = function() {\n", " this.ws.close();\n", " }\n", "\n", " this.ws.onmessage = this._make_on_message_function(this);\n", "\n", " this.ondownload = ondownload;\n", "}\n", "\n", "mpl.figure.prototype._init_header = function() {\n", " var titlebar = $(\n", " '
');\n", " var titletext = $(\n", " '
');\n", " titlebar.append(titletext)\n", " this.root.append(titlebar);\n", " this.header = titletext[0];\n", "}\n", "\n", "\n", "\n", "mpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n", "\n", "}\n", "\n", "\n", "mpl.figure.prototype._root_extra_style = function(canvas_div) {\n", "\n", "}\n", "\n", "mpl.figure.prototype._init_canvas = function() {\n", " var fig = this;\n", "\n", " var canvas_div = $('
');\n", "\n", " canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n", "\n", " function canvas_keyboard_event(event) {\n", " return fig.key_event(event, event['data']);\n", " }\n", "\n", " canvas_div.keydown('key_press', canvas_keyboard_event);\n", " canvas_div.keyup('key_release', canvas_keyboard_event);\n", " this.canvas_div = canvas_div\n", " this._canvas_extra_style(canvas_div)\n", " this.root.append(canvas_div);\n", "\n", " var canvas = $('');\n", " canvas.addClass('mpl-canvas');\n", " canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n", "\n", " this.canvas = canvas[0];\n", " this.context = canvas[0].getContext(\"2d\");\n", "\n", " var rubberband = $('');\n", " rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n", "\n", " var pass_mouse_events = true;\n", "\n", " canvas_div.resizable({\n", " start: function(event, ui) {\n", " pass_mouse_events = false;\n", " },\n", " resize: function(event, ui) {\n", " fig.request_resize(ui.size.width, ui.size.height);\n", " },\n", " stop: function(event, ui) {\n", " pass_mouse_events = true;\n", " fig.request_resize(ui.size.width, ui.size.height);\n", " },\n", " });\n", "\n", " function mouse_event_fn(event) {\n", " if (pass_mouse_events)\n", " return fig.mouse_event(event, event['data']);\n", " }\n", "\n", " rubberband.mousedown('button_press', mouse_event_fn);\n", " rubberband.mouseup('button_release', mouse_event_fn);\n", " // Throttle sequential mouse events to 1 every 20ms.\n", " rubberband.mousemove('motion_notify', mouse_event_fn);\n", "\n", " rubberband.mouseenter('figure_enter', mouse_event_fn);\n", " rubberband.mouseleave('figure_leave', mouse_event_fn);\n", "\n", " canvas_div.on(\"wheel\", function (event) {\n", " event = event.originalEvent;\n", " event['data'] = 'scroll'\n", " if (event.deltaY < 0) {\n", " event.step = 1;\n", " } else {\n", " event.step = -1;\n", " }\n", " mouse_event_fn(event);\n", " });\n", "\n", " canvas_div.append(canvas);\n", " canvas_div.append(rubberband);\n", "\n", " this.rubberband = rubberband;\n", " this.rubberband_canvas = rubberband[0];\n", " this.rubberband_context = rubberband[0].getContext(\"2d\");\n", " this.rubberband_context.strokeStyle = \"#000000\";\n", "\n", " this._resize_canvas = function(width, height) {\n", " // Keep the size of the canvas, canvas container, and rubber band\n", " // canvas in synch.\n", " canvas_div.css('width', width)\n", " canvas_div.css('height', height)\n", "\n", " canvas.attr('width', width);\n", " canvas.attr('height', height);\n", "\n", " rubberband.attr('width', width);\n", " rubberband.attr('height', height);\n", " }\n", "\n", " // Set the figure to an initial 600x600px, this will subsequently be updated\n", " // upon first draw.\n", " this._resize_canvas(600, 600);\n", "\n", " // Disable right mouse context menu.\n", " $(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n", " return false;\n", " });\n", "\n", " function set_focus () {\n", " canvas.focus();\n", " canvas_div.focus();\n", " }\n", "\n", " window.setTimeout(set_focus, 100);\n", "}\n", "\n", "mpl.figure.prototype._init_toolbar = function() {\n", " var fig = this;\n", "\n", " var nav_element = $('
')\n", " nav_element.attr('style', 'width: 100%');\n", " this.root.append(nav_element);\n", "\n", " // Define a callback function for later on.\n", " function toolbar_event(event) {\n", " return fig.toolbar_button_onclick(event['data']);\n", " }\n", " function toolbar_mouse_event(event) {\n", " return fig.toolbar_button_onmouseover(event['data']);\n", " }\n", "\n", " for(var toolbar_ind in mpl.toolbar_items) {\n", " var name = mpl.toolbar_items[toolbar_ind][0];\n", " var tooltip = mpl.toolbar_items[toolbar_ind][1];\n", " var image = mpl.toolbar_items[toolbar_ind][2];\n", " var method_name = mpl.toolbar_items[toolbar_ind][3];\n", "\n", " if (!name) {\n", " // put a spacer in here.\n", " continue;\n", " }\n", " var button = $('