pax_global_header00006660000000000000000000000064141736214500014515gustar00rootroot0000000000000052 comment=7b1ab62a7fe708b75cdc8740bbda8886ea96a2c5 treetime-0.8.6/000077500000000000000000000000001417362145000133465ustar00rootroot00000000000000treetime-0.8.6/.gitignore000066400000000000000000000027131417362145000153410ustar00rootroot00000000000000*~ # KDE directory preferences .directory # # Linux trash folder which might appear on any partition or disk .Trash-* #vscode .vscode/ #VIM backup and swap [a-w][a-z] [._]s[a-w][a-z] *.un~ Session.vim .netrwhist *~ .ropeproject/ #python compiled files # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover # Translations *.mo *.pot # Django stuff: *.log # Sphinx documentation docs/_build/ # PyBuilder target/ #file for sublime text *.tmlanguage.cache *.tmPreferences.cache *.stTheme.cache # workspace files are user-specific *.sublime-workspace # project files should be checked into the repository, unless a significant # proportion of contributors will probably not be using SublimeText *.sublime-project # sftp configuration file sftp-config.json #data files other resources *.svg !data/ # OS generated files # ###################### .DS_Store .DS_Store? ._* .Spotlight-V100 .Trashes Icon? ehthumbs.db Thumbs.db test/treetime_examplestreetime-0.8.6/.pylintrc000066400000000000000000000023131417362145000152120ustar00rootroot00000000000000[MASTER] ignore=.git .idea .input .output .temp .venv .vscode [MESSAGES CONTROL] disable= bad-continuation, bad-whitespace, bare-except, chained-comparison, consider-using-in, fixme, invalid-name, len-as-condition, line-too-long, missing-docstring, multiple-imports, no-else-continue, no-else-raise, no-else-return, no-self-use, protected-access, too-many-arguments, too-many-branches, too-many-branches, too-many-instance-attributes, too-many-lines, too-many-locals, too-many-nested-blocks, too-many-public-methods, too-many-statements, trailing-newlines, unidiomatic-typecheck, unnecessary-comprehension, unnecessary-pass, useless-object-inheritance, duplicate-code, unused-argument, unused-import, unused-variable, simplifiable-if-expression, simplifiable-if-statement, singleton-comparison, attribute-defined-outside-init, multiple-statements, redefined-outer-name, cyclic-import, import-error, import-outside-toplevel, reimported, wrong-import-order, wrong-import-position, [BASIC] bad-names=foo,baz,toto,tutu,tata,let,const,nil,null,define good-names=a,b,c,e,f,g,i,j,k,x,y,z,ex,Run,_,__,___ extension-pkg-whitelist=numpy treetime-0.8.6/.readthedocs.yml000066400000000000000000000001511417362145000164310ustar00rootroot00000000000000--- version: 2 conda: environment: docs/environment.yml sphinx: configuration: docs/source/conf.py treetime-0.8.6/.travis.yml000066400000000000000000000011601417362145000154550ustar00rootroot00000000000000language: python python: - "3.8" - "3.7" - "3.6" install: - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; - bash miniconda.sh -b -p $HOME/miniconda - export PATH="$HOME/miniconda/bin:$PATH" - hash -r - conda config --set always_yes yes --set changeps1 no - conda update -q conda # Useful for debugging any issues with conda - conda info -a - conda create -q -n test-environment python=$TRAVIS_PYTHON_VERSION numpy scipy biopython pandas matplotlib pytest - source activate test-environment - python setup.py install script: - bash .travis_test.sh treetime-0.8.6/.travis_test.sh000066400000000000000000000002421417362145000163250ustar00rootroot00000000000000cd test git clone https://github.com/neherlab/treetime_examples.git bash command_line_tests.sh OUT=$? if [ "$OUT" != 0 ]; then exit 1 fi pytest test_treetime.py treetime-0.8.6/LICENSE000066400000000000000000000021141417362145000143510ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2016 Pavel Sagulenko and Richard Neher Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. treetime-0.8.6/Makefile000066400000000000000000000012771417362145000150150ustar00rootroot00000000000000-include .env.example -include .env export UID=$(shell id -u) export GID=$(shell id -g) export DOCS_CONTAINER_NAME=treetime-docs SHELL := bash .ONESHELL: .PHONY: docs docker-docs docs: @$(MAKE) --no-print-directory -C docs/ html docs-clean: rm -rf docs/build docker-docs: set -euox docker build -t $${DOCS_CONTAINER_NAME} \ --network=host \ --build-arg UID=$(shell id -u) \ --build-arg GID=$(shell id -g) \ docs/ docker run -it --rm \ --name=$${DOCS_CONTAINER_NAME}-$(shell date +%s) \ --init \ --user=$(shell id -u):$(shell id -g) \ --volume=$(shell pwd):/home/user/src \ --publish=8000:8000 \ --workdir=/home/user/src \ --env 'TERM=xterm-256colors' \ $${DOCS_CONTAINER_NAME} treetime-0.8.6/README.md000066400000000000000000000300141417362145000146230ustar00rootroot00000000000000[![Build Status](https://travis-ci.org/neherlab/treetime.svg?branch=master)](https://travis-ci.org/neherlab/treetime) [![anaconda](https://anaconda.org/bioconda/treetime/badges/installer/conda.svg)](https://anaconda.org/bioconda/treetime) [![readthedocs](https://readthedocs.org/projects/treetime/badge/)](https://treetime.readthedocs.io/en/latest/) ## TreeTime: maximum likelihood dating and ancestral sequence inference ### Overview TreeTime provides routines for ancestral sequence reconstruction and inference of molecular-clock phylogenies, i.e., a tree where all branches are scaled such that the positions of terminal nodes correspond to their sampling times and internal nodes are placed at the most likely time of divergence. To optimize the likelihood of time-scaled phylogenies, TreeTime uses an iterative approach that first infers ancestral sequences given the branch length of the tree, then optimizes the positions of unconstrained nodes on the time axis, and then repeats this cycle. The only topology optimization are (optional) resolution of polytomies in a way that is most (approximately) consistent with the sampling time constraints on the tree. The package is designed to be used as a stand-alone tool on the command-line or as a library used in larger phylogenetic analysis work-flows. [The documentation of TreeTime is hosted on readthedocs.org](https://treetime.readthedocs.io/en/latest/). In addition to scripting TreeTime or using it via the command-line, there is also a small web server at [treetime.ch](https://treetime.biozentrum.unibas.ch/). ![Molecular clock phylogeny of 200 NA sequences of influenza A H3N2](https://raw.githubusercontent.com/neherlab/treetime_examples/master/figures/tree_and_clock.png) Have a look at our repository with [example data](https://github.com/neherlab/treetime_examples) and the [tutorials](https://treetime.readthedocs.io/en/latest/tutorials.html). #### Features * ancestral sequence reconstruction (marginal and joint maximum likelihood) * molecular clock tree inference (marginal and joint maximum likelihood) * inference of GTR models * rerooting to maximize temporal signal and optimize the root-to-tip distance vs time relationship * simple phylodynamic analysis such as coalescent model fits * sequence evolution along trees using flexible site specific models. ## Table of contents * [Installation and prerequisites](#installation-and-prerequisites) * [Command-line usage](#command-line-usage) + [Timetrees](#timetrees) + [Rerooting and substitution rate estimation](#rerooting-and-substitution-rate-estimation) + [Ancestral sequence reconstruction](#ancestral-sequence-reconstruction) + [Homoplasy analysis](#homoplasy-analysis) + [Mugration analysis](#mugration-analysis) + [Metadata and date format](#metadata-and-date-format) * [Example scripts](#example-scripts) * [Related tools](#related-tools) * [Projects using TreeTime](#projects-using-treetime) * [Building the documentation](#building-the-documentation) * [Developer info](#developer-info) ### Installation and prerequisites TreeTime is compatible with Python 3.6 upwards and is tested on 3.6, 3.7, and 3.8. It depends on several Python libraries: * numpy, scipy, pandas: for all kind of mathematical operations as matrix operations, numerical integration, interpolation, minimization, etc. * BioPython: for parsing multiple sequence alignments and all phylogenetic functionality * matplotlib: optional dependency for plotting You may install TreeTime and its dependencies by running ```bash pip install . ``` within this repository. You can also install TreeTime from PyPi via ```bash pip install phylo-treetime ``` You might need root privileges for system wide installation. Alternatively, you can simply use it TreeTime locally without installation. In this case, just download and unpack it, and then add the TreeTime folder to your $PYTHONPATH. ### Command-line usage TreeTime can be used as part of python programs that create and interact with tree time objects. How TreeTime can be used to address typical questions like ancestral sequence reconstruction, rerooting, timetree inference etc is illustrated by a collection of example scripts described below. In addition, TreeTime can be used from the command line with arguments specifying input data and parameters. Trees can be read as newick, nexus and phylip files; fasta and phylip are supported alignment formats; metadata and dates can be provided as csv or tsv files, see [below](#metadata-and-date-format) for details. #### Timetrees The to infer a timetree, i.e. a phylogenetic tree in which branch length reflect time rather than divergence, TreeTime offers implements the command: ```bash treetime --aln --tree --dates ``` This command will infer a time tree, ancestral sequences, a GTR model, and optionally confidence intervals and coalescent models. A detailed explanation is of this command with its various options and examples is available in [the documentation on readthedocs.org](https://treetime.readthedocs.io/en/latest/tutorials/timetree.html). #### Rerooting and substitution rate estimation To explore the temporal signal in the data and estimate the substitution rate (instead if full-blown timetree estimation), TreeTime implements a subcommand `clock` that is called as follows ```bash treetime clock --tree --aln --dates --reroot least-squares ``` The full list if options is available by typing `treetime clock -h`. Instead of an input alignment, `--sequence-length ` can be provided. Documentation of additional options and examples are available at in [the documentation on readthedocs.org](https://treetime.readthedocs.io/en/latest/tutorials/clock.html). #### Ancestral sequence reconstruction: The subcommand ```bash treetime ancestral --aln input.fasta --tree input.nwk ``` will reconstruct ancestral sequences at internal nodes of the input tree. The full list if options is available by typing `treetime ancestral -h`. A detailed explanation of `treetime ancestral` with examples is available at in [the documentation on readthedocs.org](https://treetime.readthedocs.io/en/latest/tutorials/ancestral.html). #### Homoplasy analysis Detecting and quantifying homoplasies or recurrent mutations is useful to check for recombination, putative adaptive sites, or contamination. TreeTime provides a simple command to summarize homoplasies in data ```bash treetime homoplasy --aln --tree ``` The full list if options is available by typing `treetime homoplasy -h`. Please see [the documentation on readthedocs.org](https://treetime.readthedocs.io/en/latest/tutorials/homoplasy.html) for examples and more documentation. #### Mugration analysis Migration between discrete geographic regions, host switching, or other transition between discrete states are often parameterized by time-reversible models analogous to models describing evolution of genome sequences. Such models are hence often called "mugration" models. TreeTime GTR model machinery can be used to infer mugration models: ```bash treetime mugration --tree --states --attribute ``` where `` is the relevant column in the csv file specifying the metadata `states.csv`, e.g. `=country`. The full list if options is available by typing `treetime mugration -h`. Please see [the documentation on readthedocs.org](https://treetime.readthedocs.io/en/latest/tutorials/mugration.html) for examples and more documentation. #### Metadata and date format Several of TreeTime commands require the user to specify a file with dates and/or other meta data. TreeTime assumes these files to by either comma (csv) or tab-separated (tsv) files. The first line of these files is interpreted as header line specifying the content of the columns. Each file needs to have at least one column that is named `name`, `accession`, or `strain`. This column needs to contain the names of each sequence and match the names of taxons in the tree if one is provided. If more than one of `name`, `accession`, or `strain` is found, TreeTime will use the first. If the analysis requires dates, at least one column name needs to contain `date` (i.e. `sampling date` is fine). Again, if multiple hits are found, TreeTime will use the first. TreeTime will attempt to parse dates in the following way and order | order | type/format | example | description| | --- |-------------|---------|------------| | 1| float | 2017.56 | decimal date | | 2| [float:float] | [2013.45:2015.56] | decimal date range | | 3| %Y-%m-%d | 2017-08-25 | calendar date in ISO format | | 4| %Y-XX-XX | 2017-XX-XX | calendar date missing month and/or day | ### Example scripts The following scripts illustrate how treetime can be used to solve common problem with short python scripts. They are meant to be used in an interactive ipython environment and run as `run examples/ancestral_inference.py`. * [`ancestral_inference.py`](https://github.com/neherlab/treetime_examples/tree/master/scripts/ancestral_sequence_inference.py) illustrates how ancestral sequences are inferred and likely mutations are assigned to branches in the tree, * [`relaxed_clock.py`](https://github.com/neherlab/treetime_examples/tree/master/scripts/relaxed_clock.py) walks the user through the usage of relaxed molecular clock models. * [`examples/rerooting_and_timetrees.py`](https://github.com/neherlab/treetime_examples/tree/master/scripts/rerooting_and_timetrees.py) illustrates the rerooting and root-to-tip regression scatter plots. * [`ebola.py`](https://github.com/neherlab/treetime_examples/tree/master/scripts/ebola.py) uses about 300 sequences from the 2014-2015 Ebola virus outbreak to infer a timetree. This example takes a few minutes to run. HTML documentation of the different classes and function is available at [here](https://treetime.biozentrum.unibas.ch/doc). ### Related tools There are several other tools which estimate molecular clock phylogenies. * [Beast](http://beast.bio.ed.ac.uk/) relies on the MCMC-type sampling of trees. It is hence rather slow for large data sets. But BEAST allows the flexible inclusion of prior distributions, complex evolutionary models, and estimation of parameters. * [Least-Square-Dating](http://www.atgc-montpellier.fr/LSD/) (LSD) emphasizes speed (it scales as O(N) as **TreeTime**), but provides limited scope for customization. * [treedater](https://github.com/emvolz/treedater) by Eric Volz and Simon Frost is an R package that implements time tree estimation and supports relaxed clocks. ### Projects using TreeTime * TreeTime is an integral part of the [nextstrain.org](http://nextstrain.org) project to track and analyze viral sequence data in real time. * [panX](http://pangenome.de) uses TreeTime for ancestral reconstructions and inference of gene gain-loss patterns. ### Building the documentation The API documentation for the TreeTime package is generated created with Sphinx. The source code for the documentaiton is located in doc folder. - sphinx-build to generate static html pages from source. Installed as ```bash pip install Sphinx ``` After required packages are installed, navigate to doc directory, and build the docs by typing: ```bash make html ``` Instead of html, another target as `latex` or `epub` can be specified to build the docs in the desired format. #### Requirements To build the documentation, sphinx-build tool should be installed. The doc pages are using basicstrap html theme to have the same design as the TreeTime web server. Therefore, the basicstrap theme should be also available in the system. ### Developer info - Copyright and License: Pavel Sagulenko, Emma Hodcroft, and Richard Neher, MIT Licence - References * [TreeTime: Maximum-likelihood phylodynamic analysis](https://academic.oup.com/ve/article/4/1/vex042/4794731) by Pavel Sagulenko, Vadim Puller and Richard A Neher. Virus Evolution. * [NextStrain: real-time tracking of pathogen evolution](https://academic.oup.com/bioinformatics/advance-article/doi/10.1093/bioinformatics/bty407/5001388) by James Hadfield et al. Bioinformatics. treetime-0.8.6/benchmarking/000077500000000000000000000000001417362145000157765ustar00rootroot00000000000000treetime-0.8.6/benchmarking/sequence_algorithms.py000066400000000000000000000014251417362145000224130ustar00rootroot00000000000000from __future__ import print_function, division import numpy as np from Bio import Phylo if __name__ == '__main__': from treetime.seq_utils import normalize_profile, prof2seq, seq2prof from treetime.gtr import GTR gtr = GTR.standard('JC69') dummy_prof = np.random.random(size=(10000,5)) # used a lot (300us) norm_prof = normalize_profile(dummy_prof)[0] # used less but still a lot (50us) gtr.evolve(norm_prof, 0.1) # used less but still a lot (50us) gtr.propagate_profile(norm_prof, 0.1) # used only in final, sample_from_prof=False speeds it up (600us or 300us) seq, p, seq_ii = prof2seq(norm_prof, gtr, sample_from_prof=True, normalize=False) # used only initially (slow, 5ms) tmp_prof = seq2prof(seq, gtr.profile_map) treetime-0.8.6/benchmarking/timetree_algorithms.py000066400000000000000000000053331417362145000224230ustar00rootroot00000000000000from __future__ import print_function, division import numpy as np from Bio import Phylo from treetime import TreeTime from treetime.utils import parse_dates from treetime.node_interpolator import Distribution, NodeInterpolator if __name__ == '__main__': base_name = 'test/treetime_examples/data/h3n2_na/h3n2_na_20' dates = parse_dates(base_name+'.metadata.csv') tt = TreeTime(gtr='Jukes-Cantor', tree = base_name+'.nwk', aln = base_name+'.fasta', verbose = 3, dates = dates, debug=True) # rerooting can be done along with the tree time inference tt.run(root="best", branch_length_mode='input', max_iter=2, time_marginal=True) # initialize date constraints and branch length interpolators # this called in each iteration. 44ms tt.init_date_constraints() ########################################################### # joint inference of node times. done in every generation. 0.7s tt._ml_t_joint() # individual steps in joint inference - post-order msgs_to_multiply = [child.joint_pos_Lx for child in tt.tree.root.clades if child.joint_pos_Lx is not None] # 330us subtree_distribution = Distribution.multiply(msgs_to_multiply) # 30ms (there are 19 nodes here, so about 20 internal branches -> 1s) res, res_t = NodeInterpolator.convolve(subtree_distribution, tt.tree.root.clades[1].branch_length_interpolator, max_or_integral='max', inverse_time=True, n_grid_points = tt.node_grid_points, n_integral=tt.n_integral, rel_tol=tt.rel_tol_refine) ########################################################### # marginal inference. done only for confidence estimation: 2.7s tt._ml_t_marginal() # individual steps in marginal inference - post-order msgs_to_multiply = [child.marginal_pos_Lx for child in tt.tree.root.clades if child.marginal_pos_Lx is not None] # 330us subtree_distribution = Distribution.multiply(msgs_to_multiply) # 60ms (there are 19 nodes here, so about 20 internal branches -> 1s) res, res_t = NodeInterpolator.convolve(subtree_distribution, tt.tree.root.clades[1].branch_length_interpolator, max_or_integral='integral', inverse_time=True, n_grid_points = tt.node_grid_points, n_integral=tt.n_integral, rel_tol=tt.rel_tol_refine) # 80ms (there are 19 nodes here, so about 20 internal branches -> 1s) res, res_t = NodeInterpolator.convolve(subtree_distribution, tt.tree.root.clades[1].branch_length_interpolator, max_or_integral='integral', inverse_time=False, n_grid_points = tt.node_grid_points, n_integral=tt.n_integral, rel_tol=tt.rel_tol_refine) # This points towards the convolution being the biggest computational expense. treetime-0.8.6/bin/000077500000000000000000000000001417362145000141165ustar00rootroot00000000000000treetime-0.8.6/bin/treetime000077500000000000000000000005161417362145000156640ustar00rootroot00000000000000#!/usr/bin/env python from __future__ import print_function, division, absolute_import import sys from treetime import version, make_parser import matplotlib matplotlib.use('AGG') if __name__ == '__main__': parser = make_parser() params = parser.parse_args() return_code = params.func(params) sys.exit(return_code) treetime-0.8.6/changelog.md000066400000000000000000000126461417362145000156300ustar00rootroot00000000000000# 0.8.6 * optionally allow incomplete alignment [PR #178](https://github.com/neherlab/treetime/pull/178) * reduce memory footprint through better clean up and optimizing types. [PR #179](https://github.com/neherlab/treetime/pull/179) # 0.8.5 * bug fixes related to edge cases were sequences consist only of missing data * bug fix when the CLI command `treetime` is run without alignment * more robust behavior when parsing biopython alignments (id vs name of sequence records) * drop python 3.5 support # 0.8.4 -- re-release of 0.8.3.1 # 0.8.3.1 -- bug fix related to Bio.Seq.Seq now bytearray * Biopython changed the representation of sequences from strings to bytearrays. This caused crashes of mugration inference with more than 62 states as states than exceeded the ascii range. This fix now bypasses Bio.Seq in the mugration analysis. # 0.8.3 -- unpin biopython version * Biopython 1.77 and 1.78 had a bug in their nexus export. This is fixed in 1.79. We now explictly exclude the buggy versions but allow others. # 0.8.2 -- bug fixes and small feature additions This release fixes a few bugs and adds a few features * output statistics of different iterations of the treetime optimization loop (trace-log, thanks to @ktmeaton) * speed ups by @akislyuk * fix errors with dates in the distant future * better precision of tablular skyline output * adds clock-deviation to the root-to-tip output of the `clock` command # 0.8.1 -- bug fixe amino acid profile map. # 0.8.0 -- drop python 2.7 support, bug fixes. # 0.7.6 -- catch of distributions are too short for calculating confidence intervals. # 0.7.5 -- fix desync of peak from grid of distributions after pruning # 0.7.4 -- bug fix in reconstruct discrete trait routine The `reconstruct_discrete_traits` wrapper function didn't handle missing data correctly (after the changed released in 0.7.2) which resulted in alphabets and weights of different lengths. # 0.7.3 -- bug fix in average rate calculation This release fixes a problem that surfaced when inferring GTR models from trees of very similar sequences but quite a few gaps. This resulted in mutation counts like so: A: [[ 0. 1. 8. 3. 0.] C: [ 1. 0. 2. 7. 0.] G: [ 9. 0. 0. 2. 0.] T: [ 1. 23. 6. 0. 0.] -: [46. 22. 28. 38. 0.]] As a result, the rate "to gap" is inferred quite high, while the equilibrium gap fraction is low. Since we cap the equilibrium gap fraction from below to avoid reconstruction problems when branches are very short, this resulted in an average rate that had substantial contribution from and assumed 1% equilibrum gap frequency where gaps mutate at 20times the rate as others. Since gaps are ignored in distance calculations anyway, it is more sensible to exclude these transitions from the calculation of the average rate. This is now happening in line 7 of treetime/gtr.py. The average rate is restricted to mutation substitutions from non-gap states to any state. # 0.7.2 -- weights in discrete trait reconstruction This release implements a more consistent handling of weights (fixed equilibrium frequencies) in discrete state reconstruction. It also fixes a number of problems in who the arguments were processed. TreeTime now allows * unobserved discrete states * uses expected time-in-tree instead of observed time-in-tree in GTR estimation when weights are fixed. The former resulted in very unstable rate estimates. # 0.7.0 -- restructuring ## Major changes This release largely includes changes under the hood, some of which also affect how treetime behaves. The biggest changes are * sequence data handling is now done by a separate class `SequenceData`. There is now a clear distinction between input data that is never changed and inferred sequences. This class also provides consolidated set of functions to convert sparse, compressed, and full sequence representations into each other. * sequences are now unicode when running from python3. This does not seem to come with a measurable performance hit compared to byte sequences as long as all characters are ASCII. Moving away from bytes to unicode proved much less hassle than converting sequences back and forth from unicode to bytes during IO. * Ancestral state reconstruction no longer reconstructs the state of terminal nodes by default and sequence accessors and output will return the input data by default. Reconstruction is optional. * The command-line mugration model inference now optimize the overall rate numerically and is hence no longer making a short-branch length assumption. * TreeTime raises now a number of custom errors rather than returning success or error codes. This should result in fewer "silent errors" that cause problems downstream. ## Minor new features In addition, we implemented a number of other changes to the interface * `treetime`, `treetime clock` now accept the arguments `--name-column` and `-date-column` to explicitly specify the metadata columns to be used as name or date * `treetime mugration` accepts a `--name-column` argument. ## Bug fixes * scaling of skyline confidence intervals was wrong. It now reflects the inverse second derivative in log-space * catch problems after rerooting associated with missing attributes in the newly generated root node. * make conversion from calendar dates to numeric dates and vice versa compatible and remove approximate handling of leap-years. * avoid overwriting content of output directory with default names * don't export inferred dates of tips labeled as `bad_branch`.treetime-0.8.6/contributing.md000066400000000000000000000010511417362145000163740ustar00rootroot00000000000000# Contributing to TreeTime Thank you for your interest in contributing to TreeTime. We welcome pull-requests that fix bugs or implement new features. ## Bugs If you come across a bug or unexpected behavior, please file an issue. ## Testing Upon pushing a commit, travis will run a few simple tests. These use data available in the [neherlab/treetime_examples](https://github.com/neherlab/treetime_examples) repository. ## Coding conventions (loosly adhered to) * indentation: 4 spaces * docstrings: numpy style * variable names: snake_case treetime-0.8.6/docs/000077500000000000000000000000001417362145000142765ustar00rootroot00000000000000treetime-0.8.6/docs/.dockerignore000066400000000000000000000000231417362145000167450ustar00rootroot00000000000000* !environment.yml treetime-0.8.6/docs/Dockerfile000066400000000000000000000020521417362145000162670ustar00rootroot00000000000000FROM continuumio/miniconda3:4.10.3 ARG DEBIAN_FRONTEND=noninteractive ARG USER=user ARG GROUP=user ARG UID ARG GID ENV TERM="xterm-256color" ENV HOME="/home/user" RUN set -x \ && mkdir -p ${HOME}/src \ && \ if [ -z "$(getent group ${GID})" ]; then \ addgroup --system --gid ${GID} ${GROUP}; \ else \ groupmod -n ${GROUP} $(getent group ${GID} | cut -d: -f1); \ fi \ && \ if [ -z "$(getent passwd ${UID})" ]; then \ useradd \ --system \ --create-home --home-dir ${HOME} \ --shell /bin/bash \ --gid ${GROUP} \ --groups sudo \ --uid ${UID} \ ${USER}; \ fi \ && touch ${HOME}/.hushlogin RUN set -x \ && chown -R ${USER}:${GROUP} ${HOME} COPY environment.yml ${HOME}/src/ WORKDIR ${HOME}/src RUN set -x \ && conda env create docs USER ${USER} RUN set -x \ && conda init bash \ && echo "conda activate docs" >> ${HOME}/.bashrc CMD bash -c "set -x \ && source ${HOME}/.bashrc \ && cd ${HOME}/src/docs \ && rm -rf build \ && make autobuild \ " treetime-0.8.6/docs/Makefile000066400000000000000000000013311417362145000157340ustar00rootroot00000000000000# Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = source BUILDDIR = build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) autobuild: sphinx-autobuild --host=0.0.0.0 --port=8000 "$(SOURCEDIR)" "$(BUILDDIR)/html" treetime-0.8.6/docs/environment.yml000066400000000000000000000005121417362145000173630ustar00rootroot00000000000000name: docs channels: - defaults dependencies: - make - sphinx - pip - pip: - biopython>=1.67,!=1.77,!=1.78 - numpy>=1.10.4 - pandas>=0.17.1 - scipy>=0.16.1 - recommonmark>=0.5.0 - sphinx-argparse>=0.2.5 - sphinx-autobuild - sphinx-markdown-tables - sphinx-rtd-theme - sphinx-tabs treetime-0.8.6/docs/flu_200.png000066400000000000000000000442031417362145000161560ustar00rootroot00000000000000PNG  IHDR XvpsBIT|d pHYsaa?i IDATxt[w}W'+N%V7ICŅSsN _F᜝;c9;NfwbӅTY1?-t0ai#fh9- R#g- }Ȯ*+Jz>i{uuytZ]^A0 C0 C0 C0 C0 C0 C0 C0 C0 C0 C0 C0 C0 C0 C0 C0 C0 C0 C0 C0 C0 C0 C0 C0 C0 C0 C0 C0 C0 sc\^tE]zg͛z+W4{9u۾momo!8˲dYV{zzT*k׮5pe|>e  -̲,MLL(4{)UѣG 4YH$Ihǎ*H(L[[lѹs紺Z3 Ða5 #߯^D"1aG;22#G8LFqr=xW.dRpX333 冀 ̩Sc5{400P34M654/W@\:p@EqB:JSTJZ\\\c+ .[N՞86::۷K<h?@@^7U,k;G</{JdnnwR1 ۰Pգԉc@ PAP Љ'NBwZb1/{Ί$B!RS ŕh9 ͒S{-X@ ~Eќzi'4׳D@rJ52~0 я~t] n0>>G6{C@2|>m׮]?JR)TtM7$ϧ^z wܱ -4M*L H caY+7Rqݻn(bf͛7jӦMrJW쓷Jrʙ[2jF\y#C:T"(M,PFiXD"1Mp(0 iDodRpX333 E+ @" Љ'~nB'oS @{,tPB4*JUJ7S_~Yz_@i羓ri߷oE\ھZJ+ `eE%v[omS TwҎK5o޼YRKNMkqJG֓c ` @|v6,WzѣG~@{ 4M6Mƾ7txd=u@J˲*nPiU2<ΰ,KD"eё#G*h}@ۨbQx +TB vRfT*U*ReYTA -T2 븪 m@ zsfijvvjF-K[̲! i6i||jG@#˲411Q"[=PSiyp:,KHDd2=pM@~Kzzzr>K&.]T^-..V,$ڵkcuH$tq*,TK|:pFGG>+,4X Okii)SleI˗KmU@l۶M7x^VJqI?\Uhz_?7R&ڂc BPP lJ&///ɓegب Љ'2xSH='Zait :a2 ˨j+,vŤ^l:@5ϧq*r˲2*U,@X%iddDG8\Ų,E"MNNW@ W]ٳX~8mWa#Ji~~^] 鲷]I*9TX%IG^w]8\#ފ/+ sss^DbN bZYYtbYU e U Љ'6'ٚEppJNߢUhk} ۢeFIY= ۢeo:{կfe.r mѲ+#SSSɻgf,@Z\"P2l2֖-[t9ؑհ+#X,s$Ȉ۷gpb'SL"ؘp]*H ^ dt[$Ia̸vQr3n*ϗ%S4,fPTvB @S͛7jӦMrJUߵC=n,G9: =w˲4113XpttT۷o_s/SρAdYVf4Ch j~_b:,h @-[N8B-s~"gYVf抽e+mYl\#{pcCKm*۪![H{քmYTJ+++sr@MWhdCS4~iE"h׮]@u ۗzx+ sssK5pr%@i[T?g@@Ke^H~>@@Kd^HJA$^BheeETJ׮][sczVdx<d>x 3>h߾}ZZZjMdb4M*Lf* Z{4$Qi2MS{n]|9ӄ^3@v_G!]ݖeɲؾQI\dؾH~/HtwwkuuUz<yޢSZ Tdl*&/_jz?@@I'[e˞Q絰}[WAh/PVVr|K~N Pb|ϟב#G}Lv[tjV2>O~b@5BL?:VmF3lݺUǏҒ\s^&->Jx6eibbBH4WIF)W݈D"<*T7~׵ @c@XU ǕJ4??Ŋ,y F!ahppPP(sX[ UOhRZa:q,TCab*$@@@K!3xF+^@@KˮTˮn*%@ 孧5@u G,syڲeΝ;ղu H|>[fz1띒^)_!I'*L:Z* i(/6G$JieeE&s[;$_" 4eܼy^6mڤ+W8RXLU<"T/!QjzfwB Pf/U -P#{kmVcv,J{555T*zw\0 (jwv䤆^О U Љ'jTbCyeYZ]]ʊ6nܨުt p0rTd|P>Gּ]@+bٞ bY>xN@@v?}ovcz*իWKV@z{{u-}sxՀ@J(='dzzlO:Hv#K?nv ꝞցE PTJ_$r//i۶mJRk^%UjW(\tIۿ5aptJNY&/..jaaAsssZ^^֥K$: فƮLOOg>_^^ɓ'iL@ɮ\pAg4::lff&gXa>i 0 C@@s* v+m۶oToosBۭ@Ae圚Uhƍ2MSJ&n@ְ,Keȋy4M\y ʭxqE"MNNWرC7n2GXx^ŋzj{Dζ, ,ʩx؃xUϱ{@R]RAhPG a BuyϱcǴsΜkt*!hw<`ZNڱcG TgP A#Vla2 gU{n=쳺tRJH!TG #{U)333 UtmVvy#$Ia(T]@@ǪeSJޓdo2=۱Ъ cw;WbXaԑ۱к 5D> :WF, ptZW^z$ \+Rqݻw] gA*9ݻwlFC:Z {A:h7|^ K @ijvv6a\t쌐e]t&t,@YrG!D"S! ?U5bLd%KBݏ~- 3 4riA-..{EMSϾ/c{0M3'p)uV9dlB#@ZD WTئF 6_bQjzFp 4Cw~w6K+"r$mDFТi"G+ MhG|zk^{Ncccڽ{wU" W#i{_fgg>UenfIs=W=p;s*GJjVEpcǎo~VD"9Zvܩ% )K*@px/_łb1adF*!LQ~TBQ@\xc.]DZp< IDAT} g|tIJ&J&lłk@Z𰦦$I:~8[jdiӧ }/)ҥK{z.bߟS )Ԕ>>>{e@5 -T%nJn`UB! 233#I6moSO=ױ ؕ-..رcڹs'Mp * m4M=#Ԕ$QhID"g`!G۫Ś C8b )|>X,Y,[sn#8Biȑ#?AG6*$رczߜ'OVLF7 Ν;500smeг-//KRՍTM4@ 2M`H$t֩i6L&knZN#t@ f+" -֐osH$l -ქs1-ܒ,82Gz]⁺#R}do}/|> \X$L,8L쬒df;rųAؖ'@:@K.)#n5fٖ'@\7Nz%I絸sKcq$-..8A~ӧOgO*4+B0iT<'b5BS 󩿿_htjxiСCcǎk_Z揙I3<}CJ$\* esΜJݨKhD# ݨ77T06=S^a.ܽ{ɤ$ @*50Hd$@the؜bჽ:y򤦦r޽{?K٧Q%Ib1ǃHcn>ϾX/٧QN{z):tY X@Mbַu!a&9۱ Y^^} X7@vHhT~ᄏw@Z?3M -ҥKy@m nBhA[mYdRdX@=@Z\"XmYlł@Z]bmYtR{{{X TW333U5W @hA }j'@ZX L1\nPW[^ -Ξb~iAb.//ɓD"%v+  kxxX`۴4 TB4@+!WbR"B%N#iX"UcǎiΝk;j[(P7*JbW@=w||\{];lhgeWIv]tGiaf]{dJփIX$u4@LRg Fh3*(7IX@I&k*ȟMQɀu5{pϧq>PWT@\5 o}[L&3|Ձj@Z?_}mH^8B|>555U6go=,˒eY2 CaYhbfgguNjFCeYP$Ȉ9Ý -4͚g[[lѹs紺xn;T?@@EO:$O~_+s?a. (ӱ~VE@YN:99+{])&]9: XX~89~W ՑF@Mnݪ~ZOT@հ,+rB@Pm۶iϞ=rJZ<ޏFX 1 C5UA$11C@:@0D`bN8QB[ڳgOO*%儕JV VvdnnN###ڷo_UGk!tӫa{1MMMr %;y<Eтp讻*Y@PV,[׷rKX9J@PpiuيŖ,H`fggL&3bΞ=zJ?OH$>KE $4ef ?gysFFFk.C@U|>(˲3hrrR~ BT4M=C9~ɀA:lT@TUNO㒤T*iE"Ӏ͔:w oP($(݊U}d/ՒFh3'W'V?":_ճ,DzB@PuV޽;Z*R49xF5۶m.IhEGf^{L !jm6=#ַ H|46[o0 TA${Mꭍ2 C'Nko3y<y^-VeWD@WD@na#x =ў>0t\,Kh4w}XS@gGx_[ E"5oa,0Bդ5@8˲Drڧ[qR)kqqѱwҤ 졄}[}n9T?Z*5>ʮ[B#M4ϿǮTz@l]=Rhq;1<GաCtԩ& nAuF599̵rRio8.{2RAl47gOF9b5A }EOʞB@]deKR T9:Æ0 5۷{@#H(L[[lѹs紺n~whJъ H$S8vpEh7d$Ia(7bO?J!`@ l޼Y^W6mҕ+Wd@Qj~a:z.]h4Z>N\Dχ?>яjqq}L:l ~eYT4訶o^>Sz>J,KhTyk477WC:kT } V<$^o2|>kچ%I^W  45;;[$h4feY:uxb,+G#145;;[p tRI8 ȹ= ʕ+e=%ݮ0@ڜi2Mi/4%=JlbU ۱2[<$)fҾ ;;TՐ9IOz'rҩ7Ʈ9sfM%nJ h(0444S o]^:] $={V'2b}Q@SdWB$IbV+!333P(ҩS,4}:V eYT?ugYD@YY0@X,VjXfG)ljo4<<`0XwaYN:>:Y%ɂb… ZZZj{x q=c:p@:i2MG?*?Jizzh/ȩStBH#&DB >n}x\PH+++{AϿ/ @.}}}5{A Ph=!!^|ł P(W2 C\+Ԍ}u{J:ueN83g( mFPC@)֌^{h/]^: B[ڨ!{A*݊6B@CT+JIR^[ +U*3ר>ʊbk?gU+BB@PcϬt+V*R8@PiU2\kUl+V}p/J2MSiݽ.r. 9 h {+deYСCtԩ&BM++gA2HH5@TVP(=%ݮ䟒őbYxh4Z^u7\/{J +X^ǓbH h @ S?G %dWA~-S2@αv(Քw!e{Y:SN5yu[RNMMIFGG7+C% h9v%$i~~^sssvv쥡 Z=p0Ȳ,[Ćt:n"t4 @@4 @@4 @@ /;{G>O]]]?\O}Sڽ{6mڤ۷kllL+++9_aOM]xQ{nVtMzޠo~u$NtO __{^=k巭z?яt4Mm޼Y}k}3}^q馛co"Ț{#򖷨G^WT</ܙX׮][sm'?O]]]z\PpN{wߝ>|p+}h}n!''o|Oo9=44vZ>˲wַ5Օ̽{oO?7~;ޑ~ի^3۟龾K_Ezaa!=??޺ukzΝ_~9s/m9^|9חO>d_zKoذ!O|"w+}wggg SNۗ~ի^}h4e˖wߝ׾җ޻wonK[̏|#鮮?^XXH?##Gr㷭z+7ozӛ t:MXްaÚGs駟Noذ!㏗|/I8p >J\vZA'~Loذ!}nͽ]]]/˙kgg鮮ۿ{={zzҗ/_\d2/޺ukWW3w}>_N?|zƍz(smyy9yh3O8>lm}9ۻwoot:f ZF8soϹkkSN?yz{ߛs_vڥ׿k7p:х '@16m$|9|Jә%~[-4׽nͽ?H IDAT}-s߷n5n&ٳ'z;ߩn)s߮7Mҗկ~U{sy}?/g֗ӿm5m!LݝsUz6lؠgϖg?Y|z;ߙs5?c=Fm߯7:~y?V$>u]z[ޒ}n:ι߷GHD{$\5yz2{߻uVk߶ h!{Q:ӧs>}ZtZE@w'!eNooo6lWرCӣK^W_u 7duj~={^ /s﷾-mذ!^~O~}KT*soww6o\^~Zo[ ~[H;S+˗/FGGu 7qmذA{R?Ou=hiiIgo}KO>^x-oK/R{axz{ޣ%]tI/| TQ_PHO?>O"oF:uJoou{nw!Mqǿ(ΉD!XExʛ&@B6hBIE Z#0+K Aݕ0l߾}ϟ?[>={ؓ'O<;p@q|݈-H˗KEQ sf{nkjj2fmŹe[]]-ɋlݨUA03SVܺuKCGUKK~Qq;w޾}[V%`__OU":޽+<ӥK֭Zw ۫}l'K$ݻn?3L}ή+++R8VKK433S:ۯ_jxxXhT`PHD'O,* رcP u-,,T%%:tZ[[uV?~\_|x܉ uuuYHDDB|ll U=ÇKf$ g( p g( p g( p g( p g( p g( p g( p g( p g( p g( pSИ}IENDB`treetime-0.8.6/docs/source/000077500000000000000000000000001417362145000155765ustar00rootroot00000000000000treetime-0.8.6/docs/source/APIdoc.rst000066400000000000000000000012201417362145000174220ustar00rootroot00000000000000API documentation ================= .. toctree:: :maxdepth: 1 :hidden: treetime clock_tree treeanc seqgen gtr vcf_utils seq_utils Core classes ------------ .. automodule:: treetime :doc:`TreeTime class` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ :doc:`ClockTree class` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ :doc:`TreeAnc class` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ :doc:`SeqGen class` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ :doc:`GTR class` ~~~~~~~~~~~~~~~~~~~~~ Utility code ------------ :doc:`VCF tools` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ :doc:`Seq tools` ~~~~~~~~~~~~~~~~~~~~~~~~~~~ treetime-0.8.6/docs/source/clock_tree.rst000066400000000000000000000017321417362145000204450ustar00rootroot00000000000000***************************** ClockTree class documentation ***************************** ClockTree is a class that implements the core algorithms for maximum likelihood time tree inference. It operates on a tree with fixed topology. All operations the reroot or change tree topology are part of the TreeTime class. .. .. autoclass:: treetime.ClockTree .. :members: ClockTree docstring and constructor =================================== .. autoclass:: treetime.ClockTree :members: __init__ Running TreeTime analysis ========================= .. automethod:: treetime.ClockTree.init_date_constraints .. automethod:: treetime.ClockTree.make_time_tree Post-processing =============== .. automethod:: treetime.ClockTree.branch_length_to_years .. automethod:: treetime.ClockTree.convert_dates .. automethod:: treetime.ClockTree.get_confidence_interval .. automethod:: treetime.ClockTree.get_max_posterior_region .. automethod:: treetime.ClockTree.timetree_likelihood treetime-0.8.6/docs/source/commands.rst000066400000000000000000000002251417362145000201300ustar00rootroot00000000000000Detailed command line documentation =================================== .. argparse:: :module: treetime :func: make_parser :prog: treetime treetime-0.8.6/docs/source/conf.py000066400000000000000000000235651417362145000171100ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # TreeTime documentation build configuration file, created by # sphinx-quickstart on Mon Jul 31 11:44:07 2017. # # This file is execfile()d with the current directory set to its # containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import sys import os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath('../..')) from treetime import version # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. #needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.doctest', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode', 'sphinx.ext.napoleon', 'recommonmark', 'sphinxarg.ext' ] # Napoleon settings napoleon_google_docstring = False napoleon_numpy_docstring = True napoleon_include_init_with_doc = True napoleon_include_private_with_doc = True napoleon_include_special_with_doc = True napoleon_use_admonition_for_examples = False napoleon_use_admonition_for_notes = False napoleon_use_admonition_for_references = False napoleon_use_ivar = False napoleon_use_param = True napoleon_use_rtype = True # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = ['.rst', '.md'] source_suffix = ['.rst', '.md'] # The encoding of source files. #source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' # General information about the project. project = u'TreeTime' copyright = u'2017-2021, Pavel Sagulenko and Richard Neher' author = u'Pavel Sagulenko and Richard Neher' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The full version, including alpha/beta/rc tags. release = version # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' # Else, today_fmt is used as the format for a strftime call. #today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path exclude_patterns = [] # The reST default role (used for this markup: `text`) to use for all # documents. #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. #keep_warnings = False # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = True # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = 'alabaster' html_theme = 'sphinx_rtd_theme' html_theme_options = {} # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. #html_theme_path = [] # The name for this set of Sphinx documents. # " v documentation" by default. #html_title = u'TreeTime v1.0' # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = None # The name of an image file (relative to this directory) to use as a favicon of # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. #html_extra_path = [] # If not None, a 'Last updated on:' timestamp is inserted at every page # bottom, using the given strftime format. # The empty string is equivalent to '%b %d, %Y'. #html_last_updated_fmt = None # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. #html_use_smartypants = True # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_domain_indices = True # If false, no index is generated. #html_use_index = True # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, links to the reST sources are added to the pages. #html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. #html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. #html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = None # Language to be used for generating the HTML full-text search index. # Sphinx supports the following languages: # 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' # 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh' #html_search_language = 'en' # A dictionary with options for the search language support, empty by default. # 'ja' uses this config value. # 'zh' user can custom change `jieba` dictionary path. #html_search_options = {'type': 'default'} # The name of a javascript file (relative to the configuration directory) that # implements a search results scorer. If empty, the default will be used. #html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. htmlhelp_basename = 'TreeTimedoc' # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). #'pointsize': '10pt', # Additional stuff for the LaTeX preamble. #'preamble': '', # Latex figure (float) alignment #'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'TreeTime.tex', u'TreeTime Documentation', u'Pavel Sagulenko and Richard Neher', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # If true, show page references after internal links. #latex_show_pagerefs = False # If true, show URL addresses after external links. #latex_show_urls = False # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'treetime', u'TreeTime Documentation', [author], 1) ] # If true, show URL addresses after external links. #man_show_urls = False # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'TreeTime', u'TreeTime Documentation', author, 'TreeTime', 'One line description of project.', 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. #texinfo_appendices = [] # If false, no module index is generated. #texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. #texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. #texinfo_no_detailmenu = False treetime-0.8.6/docs/source/gtr.rst000066400000000000000000000016441417362145000171310ustar00rootroot00000000000000*********************** GTR class documentation *********************** .. autoclass:: treetime.GTR :members: __init__ .. automethod:: treetime.GTR.standard .. automethod:: treetime.GTR.custom .. automethod:: treetime.GTR.random .. automethod:: treetime.GTR.infer .. automethod:: treetime.GTR.assign_rates .. Note:: GTR object can be modified in-place by calling :py:func:`treetime.GTR.assign_rates` Sequence manipulation --------------------- .. automethod:: treetime.GTR.state_pair Distance and probability computations ------------------------------------- .. automethod:: treetime.GTR.optimal_t .. automethod:: treetime.GTR.optimal_t_compressed .. automethod:: treetime.GTR.prob_t .. automethod:: treetime.GTR.prob_t_compressed .. automethod:: treetime.GTR.prob_t_profiles .. automethod:: treetime.GTR.propagate_profile .. automethod:: treetime.GTR.sequence_logLH .. automethod:: treetime.GTR.expQt treetime-0.8.6/docs/source/index.rst000066400000000000000000000052361417362145000174450ustar00rootroot00000000000000.. TreeTime documentation master file, created by sphinx-quickstart on Mon Jul 31 11:44:07 2017. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. TreeTime: time-tree and ancestral sequence inference ==================================================== .. image:: https://travis-ci.org/neherlab/treetime.svg?branch=master :target: https://travis-ci.org/neherlab/treetime .. image:: https://anaconda.org/bioconda/treetime/badges/installer/conda.svg :target: https://anaconda.org/bioconda/treetime TreeTime provides routines for ancestral sequence reconstruction and inference of molecular-clock phylogenies, i.e., a tree where all branches are scaled such that the positions of terminal nodes correspond to their sampling times and internal nodes are placed at the most likely time of divergence. To optimize the likelihood of time-scaled phylogenies, TreeTime uses an iterative approach that first optimizes branch lengths of the tree given the sequence data and date constraints, and then optimizes coalescent tree priors, relaxed clock parameters, or resolves polytomies. This cycle is repeated a few times. The only topology optimization are (optional) resolution of polytomies in a way that is most (approximately) consistent with the sampling time constraints on the tree. The code is hosted on `github.com/neherlab/treetime `_. .. toctree:: :maxdepth: 2 :hidden: installation tutorials commands APIdoc .. image:: https://raw.githubusercontent.com/neherlab/treetime_examples/master/figures/tree_and_clock.png Features -------- * ancestral sequence reconstruction (marginal and joint maximum likelihood) * molecular clock tree inference (marginal and joint maximum likelihood) * inference of GTR models * rerooting to maximize temporal signal and optimize the root-to-tip distance vs time relationship * simple phylodynamic analysis such as coalescent model fits Developer info -------------- - Source code on github at https://github.com/neherlab/treetime - Copyright and License: Pavel Sagulenko, Emma Hodcroft, and Richard Neher, MIT Licence - References * `TreeTime: Maximum-likelihood phylodynamic analysis `_ by Pavel Sagulenko, Vadim Puller and Richard A Neher. Virus Evolution. * `NextStrain: real-time tracking of pathogen evolution `_ by James Hadfield et al. Bioinformatics. Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` treetime-0.8.6/docs/source/installation.rst000066400000000000000000000034201417362145000210300ustar00rootroot00000000000000Installation ============ TreeTime is compatible with Python 2.7 upwards and is tested on 2.7, 3.5, and 3.6. It depends on several Python libraries: * numpy, scipy, pandas: for all kind of mathematical operations as matrix operations, numerical integration, interpolation, minimization, etc. * BioPython: for parsing multiple sequence alignments and phylogenetic trees * matplotlib: optional dependency for plotting Installing from PyPi or Conda ----------------------------- You can also install TreeTime from PyPi via .. code:: bash pip install phylo-treetime You might need root privileges for system wide installation. Similarly, you can install from conda using .. code:: bash conda install -c bioconda treetime Installing from source ---------------------- Clone or download the source code. .. code:: bash git clone https://github.com/neherlab/treetime.git cd treetime pip install . You might need root privileges for system wide installation. Alternatively, you can simply use it TreeTime locally without installation. In this case, just download and unpack it, and then add the TreeTime folder to your $PYTHONPATH. Building the documentation -------------------------- The API documentation for the TreeTime package is generated created with Sphinx. The source code for the documentaiton is located in doc folder. - sphinx-build to generate static html pages from source. Installed as .. code:: bash pip install Sphinx - basicstrap Html theme for sphinx: .. code:: bash pip install recommonmark sphinx-argparse After required packages are installed, navigate to doc directory, and build the docs by typing: .. code:: bash make html Instead of html, another target as `latex` or `epub` can be specified to build the docs in the desired format. treetime-0.8.6/docs/source/seq_utils.rst000066400000000000000000000003101417362145000203320ustar00rootroot00000000000000******************* Sequence Utilities ******************* .. autofunction:: treetime.seq_utils.seq2array .. autofunction:: treetime.seq_utils.seq2prof .. autofunction:: treetime.seq_utils.prof2seqtreetime-0.8.6/docs/source/seqgen.rst000066400000000000000000000002041417362145000176060ustar00rootroot00000000000000Sequence evolution and generation ================================= .. autoclass:: treetime.seqgen.SeqGen :members: __init__ treetime-0.8.6/docs/source/treeanc.rst000066400000000000000000000041201417362145000177460ustar00rootroot00000000000000*************************** TreeAnc class documentation *************************** This is the core class of the TreeTime module. It stores the phylogenetic tree and implements the basic algorithms for sequence manipulation, sequence reconstruction, and branch length optimization. The tree is stored as Bio.Phylo object. In order to facilitate the tree operations, each node of the tree is decorated with additional attributes which are set during the tree preparation. These attributes need to be updated after tree modifications. The sequences are also attached to the tree nodes. In order to save memory, the sequences are stored in the compressed form. The TreeAnc class implements methods to compress and decompress sequences. The main purpose of the TreeAnc class is to implement standard algorithms for ancestral sequence reconstruction. Both marginal and joint maximum likelihood reconstructions are possible. The marginal reconstructions computes the entire distribution of the states at a given node after tracing out states at all other nodes. The `example scripts `_ illustrate how to instantiate TreeAnc objects. TreeAnc Constructor =================== .. autoclass:: treetime.TreeAnc :members: __init__ TreeAnc methods =============== Basic functions, utilities, properties -------------------------------------- .. automethod:: treetime.TreeAnc.prepare_tree .. automethod:: treetime.TreeAnc.prune_short_branches .. automethod:: treetime.TreeAnc.set_gtr .. automethod:: treetime.TreeAnc.logger .. automethod:: treetime.TreeAnc.aln() .. automethod:: treetime.TreeAnc.gtr() .. automethod:: treetime.TreeAnc.tree() .. automethod:: treetime.TreeAnc.leaves_lookup() Ancestral reconstruction and tree optimization ---------------------------------------------- .. automethod:: treetime.TreeAnc.infer_ancestral_sequences .. automethod:: treetime.TreeAnc.sequence_LH .. automethod:: treetime.TreeAnc.optimize_tree .. automethod:: treetime.TreeAnc.infer_gtr .. automethod:: treetime.TreeAnc.get_tree_dict treetime-0.8.6/docs/source/treetime.rst000066400000000000000000000017321417362145000201510ustar00rootroot00000000000000**************************** TreeTime class documentation **************************** TreeTime is the top-level wrapper class of the time tree inference package. In addition to inferring time trees, TreeTime can reroot your tree, resolve polytomies, mark tips that violate the molecular clock, or infer coalescent models. The core time tree inference is implemented in the class ClockTree. TreeTime docstring and constructor ================================== .. autoclass:: treetime.TreeTime :members: __init__ Main pipeline method ==================== .. automethod:: treetime.TreeTime.run Additional functionality ======================== .. automethod:: treetime.TreeTime.resolve_polytomies .. automethod:: treetime.TreeTime.relaxed_clock .. automethod:: treetime.TreeTime.clock_filter .. automethod:: treetime.TreeTime.reroot .. automethod:: treetime.TreeTime.plot_root_to_tip .. automethod:: treetime.TreeTime.print_lh .. autofunction:: treetime.plot_vs_years treetime-0.8.6/docs/source/tutorials.rst000066400000000000000000000020261417362145000203560ustar00rootroot00000000000000TreeTime command line usage =========================== TreeTime implements a command line interface (for details, see `Command-line API `_) that allow estimation of time scaled phylogenies, ancestral reconstruction, and analysis of temporal signal in alignments. The command interface is organized as the main command performing time-tree estimation as .. code:: bash treetime --tree tree_file --aln alignment --dates dates.tsv with other functionalities available as subcommands .. code:: bash treetime {ancestral, clock, homoplasy, mugration, version} TreeTime can use full alignments in `fasta` or `phylip` format or work of VCF files. For each of the different subcommands, we prepared tutorials listed below. These tutorials use example data provided in the github-repository `github.com/neherlab/treetime_examples `_. .. toctree:: :maxdepth: 1 tutorials/timetree tutorials/ancestral tutorials/clock tutorials/mugration tutorials/homoplasytreetime-0.8.6/docs/source/tutorials/000077500000000000000000000000001417362145000176245ustar00rootroot00000000000000treetime-0.8.6/docs/source/tutorials/ancestral.rst000066400000000000000000000057161417362145000223430ustar00rootroot00000000000000 Ancestral sequence reconstruction using TreeTime ------------------------------------------------ At the core of TreeTime is a class that models how sequences change along the tree. This class allows to reconstruct likely sequences of internal nodes of the tree. On the command-line, ancestral reconstruction can be done via the command .. code-block:: bash treetime ancestral --aln data/h3n2_na/h3n2_na_20.fasta --tree data/h3n2_na/h3n2_na_20.nwk --outdir ancestral_results This command will save a number of files into the directory `ancestral_results` and generate the output .. code-block:: bash Inferred GTR model: Substitution rate (mu): 1.0 Equilibrium frequencies (pi_i): A: 0.2983 C: 0.1986 G: 0.2353 T: 0.2579 -: 0.01 Symmetrized rates from j->i (W_ij): A C G T - A 0 0.8273 2.8038 0.4525 1.031 C 0.8273 0 0.5688 2.8435 1.0561 G 2.8038 0.5688 0 0.6088 1.0462 T 0.4525 2.8435 0.6088 0 1.0418 - 1.031 1.0561 1.0462 1.0418 0 Actual rates from j->i (Q_ij): A C G T - A 0 0.2468 0.8363 0.135 0.3075 C 0.1643 0 0.1129 0.5646 0.2097 G 0.6597 0.1338 0 0.1432 0.2462 T 0.1167 0.7332 0.157 0 0.2686 - 0.0103 0.0106 0.0105 0.0104 0 --- alignment including ancestral nodes saved as ancestral_results/ancestral_sequences.fasta --- tree saved in nexus format as ancestral_results/annotated_tree.nexus TreeTime has inferred a GTR model and used it to reconstruct the most likely ancestral sequences. The reconstructed sequences will be written to a file ending in ``_ancestral.fasta`` and a tree with mutations mapped to branches will be saved in nexus format in a file ending on ``_mutation.nexus``. Mutations are added as comments to the nexus file like ``[&mutations="G27A,A58G,A745G,G787A,C1155T,G1247A,G1272A"]``. Amino acid sequences ^^^^^^^^^^^^^^^^^^^^ Ancestral reconstruction of amino acid sequences works analogously to nucleotide sequences. However, the user has to either explicitly choose an amino acid substitution model (JTT92) .. code-block:: bash treetime ancestral --tree data/h3n2_na/h3n2_na_20.nwk --aln data/h3n2_na/h3n2_na_20_aa.fasta --gtr JTT92 or specify that this is a protein sequence alignment using the flag ``--aa``\ : .. code-block:: bash treetime ancestral --tree data/h3n2_na/h3n2_na_20.nwk --aln data/h3n2_na/h3n2_na_20_aa.fasta --aa VCF files as input ^^^^^^^^^^^^^^^^^^ In addition to standard fasta files, TreeTime can ingest sequence data in form of vcf files which is common for bacterial data sets where short reads are mapped against a reference and only variable sites are reported. In this case, an additional argument specifying the mapping reference is required. .. code-block:: bash treetime ancestral --aln data/tb/lee_2015.vcf.gz --vcf-reference data/tb/tb_ref.fasta --tree data/tb/lee_2015.nwk The ancestral reconstruction is saved as a vcf files with the name ``ancestral_sequences.vcf``. treetime-0.8.6/docs/source/tutorials/clock.rst000066400000000000000000000063351417362145000214600ustar00rootroot00000000000000 Estimation of evolutionary rates and tree rerooting --------------------------------------------------- Treetime can estimate substitution rates and determine which rooting of the tree is most consistent with the sampling dates of the sequences. This functionality is implemented as subcommand ``clock``\ : .. code-block:: bash treetime clock --tree data/h3n2_na/h3n2_na_20.nwk --dates data/h3n2_na/h3n2_na_20.metadata.csv --sequence-len 1400 --outdir clock_results This command will print the following output: .. code-block:: bash Root-Tip-Regression: --rate: 2.826e-03 --r^2: 0.98 The R^2 value indicates the fraction of variation in root-to-tip distance explained by the sampling times. Higher values corresponds more clock-like behavior (max 1.0). The rate is the slope of the best fit of the date to the root-to-tip distance and provides an estimate of the substitution rate. The rate needs to be positive! Negative rates suggest an inappropriate root. The estimated rate and tree correspond to a root date: --- root-date: 1996.75 --- re-rooted tree written to clock_results/rerooted.newick --- wrote dates and root-to-tip distances to clock_results/rtt.csv --- root-to-tip plot saved to clock_results/root_to_tip_regression.pdf In addition, a number of files are saved in the directory specified with `--outdir`: * a rerooted tree in newick format * a table with the root-to-tip distances and the dates of all terminal nodes * a graph showing the regression of root-to-tip distances vs time * a text-file with the rate estimate .. image:: figures/clock_plot.png :target: figures/clock_plot.png :alt: rtt Confidence intervals of the clock rate ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ In its default setting, ``treetime clock`` the evolutionary rate and the Tmrca by simple least-squares regression. However, these root-to-tip distances are correlated due to shared ancestry no valid confidence intervals can be computed for this regression. This covariation can be efficiently accounted if the sequence data set is consistent with a simple strict molecular clock model, but can give misleading results when the molecular clock model is violated. This feature is hence off by default and can be switched on using the flag .. code-block:: bash --covariation Filtering of tips ^^^^^^^^^^^^^^^^^ More often than not, a subset of sequences in an alignment are outliers and don't follow the molecular clock model. Such outliers can badly skew rerooting and estimation of the substitution rates. To guard against such problems, ``treetime clock`` marks sequences as suspect if they deviate more than a certain amount from the clock model. TreeTime first performs a least-square root-to-tip vs date regression and then marks tips whose residuals are greater than ``n`` inter-quartile distances of the residual distribution. The parameter ``n`` is set via .. code-block:: bash --clock-filter and is 3 by default. For the example Ebola virus data set, the command .. code-block:: bash treetime clock --tree data/ebola/ebola.nwk --dates data/ebola/ebola.metadata.csv --sequence-len 19000 .. image:: figures/ebola_outliers.png :target: figures/ebola_outliers.png :alt: ebola rtt treetime-0.8.6/docs/source/tutorials/figures/000077500000000000000000000000001417362145000212705ustar00rootroot00000000000000treetime-0.8.6/docs/source/tutorials/figures/clock_plot.png000066400000000000000000001164721417362145000241420ustar00rootroot00000000000000PNG  IHDR5sBIT|d pHYsaa?i9tEXtSoftwarematplotlib version 2.2.2, http://matplotlib.org/ IDATxy\Uue˪h d傖 EYV8eմ:q\&e\RKMmqKKDEKsEYTVe]/˅zQrЇ<*EQB!DB!K@!BfF@!BfF@!BfF@!BfF@!BfF@!BfF@!BfF@!BfF@!BfF@!BfF@!BfF@!BfF@!BfF@!BfF@!BfƩ;ДfDR5twBф(MPZZ۷on! 릮! yzzpś!|fBZ|bKu7XPP`& mt8K@!&Rj09ݱU-I(BP3p1G= !*БYon4 !2.h36tW B( &Rj(6ʜIEd2a4o3B4e8::6t7Kyb.j=1 둢(呝dj!ꁏ^jR|W +M(##jw1BBBu.s9O+)^݌&&&~~~L:jj*u놛ݺucetJUö~+MF=ztK.ӓ4SkѶm[<==9sa2rHK( 7o_~6|?3w:B46p9O˥\ ]Narrr&33s7nȓO>kƨQضmO<3ƪmTT߬u里߿?k֬:o[oE``k`2cSZ-\r ?h",XMn l6sbbbj={srq>SKu?̳> @hh#MII7 ~]K.%;;ÇӦMyG8r}ܷ~(͛@DDgΜaƌe6m^wo߿?'%% y.~Ŋ<3U^wٲeiulϞ=Zt={ݻY?II.@___͵ZVyyyyk<(;cƌaҥy76;о}HԿpuueٕq4&P̘1JE\\ׯgذaر@qyXt)m۶%<<'OO?M߾}-m[lI||\^y3!!(43j(,Y믿^9sĨQ߿!WRҭR)J߿0k,RRRhݺ5_5QQQ:t;SsvJ/-z!9qDi4zEصkWYPP`ٕzc0HJJ"8877j]L4Grr7oس7|kZ QMg_4}b)9E>L(a6ΈIѽ E}~]腣Cݭ,Ψ)_… y9s&DGGZ=+V6{lƏoȑ#پ};7ofƍ6k׮%66Hڵk˗?>,_>ߪ]j=z5kְo߾&A`XXXCwC!ꝽtJN@\ .Ecc(Հ=kb LJ]vرcyy衇XjU;TfCqXb_5#F`ͬ\jpp0+ >ӧȾ}{=ړ]vq=|r/_d=zT@!Db%ݼa 2s ^Ig]Lۋ8, {a0Ijg%&n[7tʛxUPaPsџe:؞bP!uPo\f}:# ? .E3_. !nsI7'mM5' !PS(ftocv͉L !͌`"1Ю?"P*VPQ(I #H(B4#Z 7z K?O|@@!P|WV=߂QAO갏%`$7-%% &'8p/ChтݻGa0T=1dT*U~FzJ/{ILL$22:u*EEEVm6n=܃mem ɉ+W͛7mڴرc6T9˯h2|UdḲ?s!r 3\~g6N;ͨپv>ۊli`547cu+СCeŨjz-y~!;vd޼yr-ϼ믬Xs,YBAAձ,YrY`6gȆ !::L+K k7Ǐg֬Y/ܹ&}ĉ={6EEE{ 6'Np}s*UW_/!읦Hj~JՖ#E+m"C `Lt{ۆ'gKͿ>39x%HHH>uٲe4@DD0sLKPXnݺ96}tzI^ʼv%f͚ŢE4im.]Jvv6Mi<#9r>}?!CɓIMMC^?99`رc}ٲe O>dÚ5k bʕ_zMNoyk|U9A~g9EŤQITZ1FiŐ6],S2rHK( 7o_~6_ --F:w'(3AUxx'<$%%Uym۶1tPKP0f<<<غuklZUINN= <<`swv ܝkCl6sbbbj={srq>2E)S<՟qqqSN5Uppp/ڎ;8s z}e4hCeժUff͚Űa8}4...ɓgeĈ̟?~B^uڷo? 3?|hNfRs77bHER4Eal>=6^ LRj18{,1h > ѣG+Vg˖-ɓ'Y`'OqMիW3x`ڵkgu\ /@rrrٓ[P2NHH9SL޹9KNNNZ"??Ç~zo Kl207ֱ:t(7ndĉDGG?`yo7zϩ_/!k:s2w?{sbo OkPE%$$R cZ-~:tʏ%W\ᡇ"44]N{0} j۷c61FZnMϞ=GQqXϊٳ~?'O2zhXgggGhh(Vǯ(SPP޽{3f @SO=??_M@@F"33ZϨ6Qu^B4Yt$_HW[W 1l8|<\x/C[D@HH 00~x:vƍ-E-[Z։Uѱk׮1j(ٳgjF^jnnn;xJJ cՖT#;5 x{{do\ˋ{a9d%//yze~ 0op6mdi`]*==1ck788;vw Pi/2`>c˱?>/bϨV3KhU7nJ+8eX]OAA_! X; aNxh?jZKHHډ 쌓Si@l1yINNf֨Fu1z2III2!s=),,^{rzzzhZܵk2kL&gϞ-4(,, JEbbXVhժgϞ޲e 89ЧO [~QwyyyʹsoUϩ_/!2DU FYW+t9UX݊!Ez#+R~<rsOCҹuҨ54 E=zL ={JTTT7TUL&&LCصkv[rʕrovvmu,##{3gr}V0`[wyg###5kٖ[l￿O(BHHH*c2ضmK,رegѣ=znQMSyƯWCjj?)Bz d6ΈIѽMɴo#,<J6z15"nP腣CݍVgԔZIHH`ĉz:u믿 /P'ܱc3gd̙V޽!C%iL&7nSXXHll,&MVW}r}]?hh۶- gϞî]>}:cǎ͍qǪU,y:vԩS2Jebcc4hM{qsscѢEYwwwؽ{edfT9 ќLfRs4h _AnUbɬtVť5ptP16l󫌌Ps|7YvݰCvӦM싆S/)f4?uBv!v'r.d qWOEԪ 2(Μ9]BЕB=:YWtkSe ]4##`<"GB1Yr<͗;˒=d]+&twR]6Vj-]!D &.h8K 6\4\M1KsyN n={ !L\ f;IR4JlL*2YS2z#*+pwi<j]P!h$E!@OS+i;vNn8eaR}\ Ж\ w'[ZI堐j^65P!hx)/fq1Qc*TP4Z#x & &3_1\دczӝUHf#dbbbtR1rHK~aou͙3ÇJbϞ=;r{/j&L@ZZZ <K=ǏWߎ;R/jлwoAXX^^^3fN8Q333yۛ{uRQ_(Hbv:#2_) .E3⫾X$Pvr>ӿ8ʚTf>A,~<;5$Xrr2fͺf͚U/ҥKt >6gΜa(ºuXd dذatX~=#G{lڴ/#Fh*M8pGTT6+M9r 60vX6mիWׯ[L&FEBB˗/g9rs ~O\-s!ixqVo`ul0Y3p9tKZ|Z8ݸūy)`a3zWW׆]IMM8֯__ny֭[qwwK.ӇO?Ry2e |>FU=rwX}n2cȑjժ/_NLLLó_W}=Ù3gpr㯴aÆŋ諯ѣ/ æMxG'$$石`?רB\x1Y>|GtpdPO?M.]ӧ?8p`i| ]vՕmꫯח,|޳g2X8틛z+W.ٳL8 ܹ3oY}|M}KfE_۷wdݴC?d ,,VZYĭnדk;v8::[jz~gZjÇҥ NNNtܙ+WָoV˫Sٲe <@~up vZi'Oۦ[n:uҥK䭷޺@g 1mx#1@ٕ{TkH_T49FW_b2+lLĴ5 IA~X0־yS%<6o̠A,̱c, >?W_}dbDEEtRF3Ջnݺн{w+1ر???zEϞ=ܹ]DDedQQn<==4i-r[… ,X_{8::2c ^yZ,I䄣mrIuܙCa6-#\v2gn6V^ͤI[1bDjٴicǎ-Sl6[+hjw^c͚5պUQTL:vV|}}6dee1c >Zha>'@GV}xQLڿƩ[ ÿ?^?UȢ]@@/EW~V+"#ּ?}ѥK]?q...nغuZ>}p!am6&M /3^vÇ?ye ӧ1q}ѽ{j%<-JӉl^2mT|e6s?ګxa?&l.$65p@K?NϞ=tl޼%IZn?~uYX64T'(ׯ .իt$O 5M\ٹ}^T* 2 fy=Noۍ=w}~j|׮]}n:Fv=zK.Xyzzh7;v,cǎQHV\ɉ'ѣǎ̙3(I({n;/ŋ뮻}?<(JIܢ&U>Rع(Eզa3N-pğ1'R{{57 zg?>G^^^|GXFdywx:u*= 3gdرs8::l2\]]qvv0 (xĈt:fΜYfvĈ|tڕvڱb RRR\k׮Zn[o%::gyǏ3`El޼X(Uhh(AAAUܻw/ٜ;w+Whݺ5 ))e˖?|/2sZmww裏ob6ҥ V"!!onʕ<ܹҗRVgϞn-[p]wU8^ӧOЮ];NJ|Ia,RRRԩeGwތ7sFTTP2_Q$VfRs.>1LZXߙax#JɦުDsn&fs#)OOO+´iӇ~J^Ņ駟Һuk^x[xb͛ǚ5k0Vw-/裏ҡCf͚ʕ+F.\dW_ٙcDz`FmuŋK/1j(Z-wfȐ!|tܙ?<`Ӷ}]7טl=YhnVkj`ٲe̘1{\zaYWl6c2<gDfC8/1g( &j;vॗ^d6-k:sMh:QzE]hxBל, * jjUH?%t(bK SпSnZ_Y\O@!&Rs4/1^)/?U[3|tFT<U['*BBnQ1XJYvSabfOJӲdO"\* jق9E$B!jɦxB;YWFMG/š̸8:0o{no+K3s#D"?MKSNR[>^݉hSt"۽}'`=qvvFRQTTd)u%h4ܮO$SSORrԦ]ʸOGܜtn]n)Q7$'x{{^ '''fRFCVV>>>S5/U*ApU;EW 0K;owIXqww'++Ժ‰hx)K~aTVA`iavg_ӳL ۍinS/]eIXT*>>>x{{c20/!2gggcb)9Esʷ<H1q1VB jcT09Ϊ* k;$? IRprrIB4F9EŤ5/5P<3qfUX#J! w%r.L%!-n B;Y!-x1 S &Ƨ˘P82i@GFtAֽ7 !hbYx\Ml;ԏChviI(+ARNO'l6~l yG8r}ܷ~(͛@DDgΜaƌӧOg?nތBLhKj1+ ;Nd@2b*=wRKZھ3Q(eGYT%)aDfS۶mcСVrcƌÃ[Vx^rr2N:>qDN8AJJ[w^}۾!u"OSLbVՔr7=ĭm/((B!jh&5GXRT/X;|[Ahޚ+EApR NEXN>G}?cyΝˬYgB!wMgbY5OI:]2R”A!WmߚspT/v.}}}+s<77-[VzPґs_~e~zA^^yyyFF#yyy *{7Ϸ|\xVO!D2 t$_HWEQu:Wa,T=XxX]Yg28{l;y?JrƵN"%%7/6l`رe^suuյq!c4Pgͦ&5eɞX2Բ/ W疮ƯA/5~ ###5kٖ [l￿󂃃ҥ ֭cܸqk׮{__ ͣ{uBTES\R`W2N:.̦EMf\з=Qɱ}P)mOmڴkri4 @:X&NH۶mktͼz*]\(w* WYr֥ syk#˗/箻bɒ%7|c[ !hB4\ʭ5Izd_ӳL*cԈPz++W,LLLdڴiiժW^-B4jaqܯIY鬊KAk0blX;'&W&%%܊B!Q Bv!v'r.L%{&;oN[srrرc -Bر{; ScT>v-\yz@GFtA6ydqӧs%y.]d牊"??ӧB!TCW(MzlP[7E \I`ђNXxzH'lf#> :|AAA%Cmڴի(̙3:tn)iUjIs5|/eCܪ$&V;w.CeѢE#FvB!H]VFTNQ?ɲ)qPy_.7_6/w}q}B!ؕB=uTգ6SKN䷴BZyaH(Y͋]BaLf˹ZuTգ/w= :Rok &3_1\دczdB 7lݺ~r_OKKfBFLg0q>QUJ^7mZQħb4+ eaDN?Q/l6xb ,@Xx1FGm![T强QSxiPp)JG_'e uFķ'3i!'{EY/Tڦ_~lݺVBȘ iZrѨu (;wO_ OSlFtpXN_ 4xkﺜڴlW\-B4"z9J \F9OBj.}ݙJ@oC4 hv'G*gu֜9s6gΜe˖BF"_kRs-g2+l>v89x<g; 5'G<\hv56  W_}ůJ^ʼ/?l[ !h`QʵJSX; WŴP늆䀟 -.vfS(Y믿ҷo_\\\xWh۶-/_㣏>`0prĦoooj!D2̤hvl][\:8 -/w'Z{¥qn`s&)3\R36tWD+$)6᠂;29K^{R3=]ϖqF mB4Jb#9 F3HZ݉ʸ@[wQwgL& !  ZQX~*?VxjwZV*J[@D(,$ L|HI!1!̜q].grH^|>x1nPR!Q ~kJ_?8;}r8PsF ŷ2f٤_$ Y?~<_}U3gyf{=f̘AQQ7ndرB y5N7Yִixm[Q I@us$"^/!FjE.ZBڟոQUYfiFcc#wygn)C/%:=UW@T|:߅^| N,FY1Ih3ɟIR;5Q__OBB&S{}]v[XhQ(nB٤Y!SQ߆?}Kqn%!V;/d{pt)ytx`eY/KYcl{bTT.ci6-b Y7P挐i{̝;ٳgw{̎;((({ mbH.S1^;vsM'!Wع/5Z3f 5xxrG  afŜ<&g'j"t::jIүӟxg8vn'BDۛ}9. * S`əYvVA0nb,TT`0SUU9٦bȄ 7ow}wn)Qm ;[gkoeMA1EIss~9Iҿ7d:[\v !)^u8AEgKk/2lM!T\5# &ˌS:fkǐgKKKb (ۉ-UWv_5T58;'"ng}f5HgzC̀.++7bd֗BBPUfOXڹ`3;Kygm&nˌQI0l+];4%";*]aĩ˩^hG nclz(|vկꫯxW% !$UUqzk%y#A@^Dzi,4\ Evmf?Y EQɓyCuK! ?HECm\$_V;YSPL6&eƳb(a ];Do,R^^7l6sgwve{bq}TF’kYU1t0f&.];ķ Y<묳زe : 6mdddBTUX7CP^q(,53spHy)$J7! wy'߹袋 wyg=,Y$TBwFo]' ~i=qnRv Iv3ҵCɓ'{nK2m'seB|T6 {$'TgU]@SxN!ɠ#9Lt0(Ц*GԷDo~-K<W14z9"S>.om̼$~2+aҿwzRҵC S^t(BQQcƌxUUQ@oE?׿=]||L2sǏرcٴiS{/>ZZZHJJ;{8e X%T9袱 6/O[u$8grg$Hq1IED+gyEQHOOg@zo0o޼!nw(**b՝޿Yldgg\͆loBS8\߆;=TU͢YFǏNggc5& `:|Nw,ڿec1c(**<(** `Z~ߡ(ʀ~.!6oP%ʆ6Q'6n0,(J{K'&:;!!!ܕ+WCoA^^^x"2r={vo |z233IMM ~zG(quy/pR.ݝM8 :^ϤI=l6wu,.@-Ҫ1zc i,1EOio۶JfΜe]OVVV>wΝ;M… Yz5uuu$''k¢E=/''cDza.Ҏ_x&L`|Q<^!5}T6oگ}5$3MyLȈ Ȣ.d-[FYYwӧ /Чk7662qDihh`ʕL>͛7wwײvNK/K.og֭=TB~ U6hh.{J/g̏gc ȴjғ0Hw}ϬYHII;vc0x9sf(nqdPmvn^,Cu#PvnAU_TvW:9egeI~Hq1IKϧw}}D\\gqf͒Ma9銮~[YSPLQu3S2/${GmX Vh]B:8 㧢 ?zy6|POXzҥ)RI5̩(;(HUUjkEӯ*YSPLUs~(cCѩ8Rf ~"IBD5?HECmRDg2#w.o8=506VXl"BӟL0?3^|E򗿄B(*5Nu͞pۼd-֣FHÌ(OHqBMM.G\E`M3l+ncDBLGy$ !Ā U6hh{(G `7fH;.U6y|C|X ~BDƯ7ux7o^n)Ѐ6-{Js_?o1M߿)J{j'31FŸ.d3۶mcܹ=S^^CuK!DSUfn8%$ !zlhգέ?v*Ta1p\x8 +8cD|G>1FcNѡ_@Dg DD&# +*. `)\fܾ/9G`39#Y0! HuX$ !U`EEf͢c2{lөzs=={ ![% IDAT!Tro{(}Qyn+̼$;7D)#|X V?!D,^Z}Q.%x nF^zPVqʭ;J)28 DhT;k֬UT3;h)p\qv @vTTb(M L8c&N?PRG.o6<>mU6Ϗ:Mqs~Sc|SrqƏϜ9sHMMB v<("]RT>tzTUCfZ~U?Ō;7YyICդ#a!O5 `VV;wg۶m|c=O7<xSjgOUUv` O= ~B3 ***t OfŠ7xJ,̹ jǶ d&X)? qa0uĚz5 3_9$a 7ݫpf-l{9ĠSX:5K&Ég2Bzc~`0 !T˾ߠbjVg7kYq/*21?<Nˊg4(d[j'f't!KcқXqbiaW\/OTn6p̑|w\j!N!af>!D t\]]Vu0o)DTR)&>)>SzǶp1\;+'} 9L͌n/BD~uuzw.***Xnݷv B Pi=,[:j<೥њ6- #쪾˓y83+19;!#X:]{$yHBD~tZ8q +{s뭷RXXbaҥf}ׯ理\..8pGywy2={6?ٽIUUZ<:9J B`$*v S`əYvVcnN/Eavv3(Q1x"f> `k~E]8^Obb"3f !7551o<222x饗gʕ+xM+/~\oW\All,/_۷o?1&MiӦ駟1 Jn_ʆNu9px^}-ʏfƤڹ9?${X5l&RbQEm!xWdɒPJ> )//'%M6q2eʔn?~3;ưa:f>|#G8ePZzت`pⷦ/dÇlPjslˠqV# &?!;18x ͛=v^n`YYEEE^_~9˖-lErr2G \YoӚ1cp-U6&7g$rQ$ǚ<g50G" د=!7n\ cƌ.?sKjkk+@ Uk!\>~s$LyX~h|ԆX;Yb$ !4g466R^^k544{<r;7 rM7W\=C ?G]x4TU:~fXxZ:WNf΢1f=iK~>! Nncǎlݺx.B`Px7C鵣.V̾&c9?դ#aa1{(Bo 466vyѣGxF1y]oC=ĺu?~;Xrek)=E5}itkK/O &e2Y2ixT<1uZ'{7ƍy@ zu|N{6';z\=l6c6GMb*G]4=^#Ŕ׷0)3sG}~X 1Fi&:W_MAAAHpB x^Eu{^NNcǎeÆ 0aB"7noo4'@hr8PӬC+R^߆bý'D],|'5D+R!XXXȑ# JKK={vĉ說+W2}t6oqܵ^ڵkK,]o?[i&,Y92e ;;99QFjRP|_ HU&6Wr' Kom>qC5Dm"҅2gl0??{c֭[G~~~;#K.[n?ׯt\  tzK/eڵl޼ e֭[ z{̘1>WorY3ᯮo(_R%=`"? 'ڶ}'5TEŸbH NcժUs=s}zN3tDfx4h*. )\!DDWzg}3<{B @&wjتzsYTó;h)p\qv6VS ^ N!afB ك0?яBu)!4KU@0<뽎ҭ^ m)(Nrmܒ?[Sz57c$褜Bt+yl&99;a(=Je+/0~c G֎| /9-/|G :9?^:К6 -O?赥:@شZ~g!Xt)7oFUU, vQK6DTUCM}؜Eu{iSq(R^DЄg~ 컀FR̘ E!B!";/W_e̙ܹ6JKKikkc׮]̚5͛7B 6jT\-Nc7 .cׂ :8s.:ﳥ5zF#O!(d3&Mvga4v-81ݻ782Ajo! n9T^Ng*,?'q@.B*b|݁OF#yGBuK!\SM.*ݦ`wa[pٱpNcBF:a-b4(ZHII!诐@DkkkǴb27o<Unݑٷ ck5 *~U3 b\X0̽R:]$BP 3gy&7nѣ'zUU7ndɡ!*nִDl@j*s{/DžatSH5XRbegBRfW\E]ԩSۘ3gN.m۶?zV\[ R?G]}Q3<IWJ G w"gVIq^!(!?_N﫪`w?Cu#@RtN`Oq^±F(tqbö9#lf=iqbL!T!F(sFH @II ?<{tp883 "@ w'od!v:@̊9yLNlԑ&;{G2 8#@PU<6oKβ9`+ĚIB" Z 6{8bνU~5U70:y&w-I^!{I꫹B};!z壪ɅϯA>9B b5rz6 OKGc$-΂Q-!B ppd~'5T58;'g"9摝:@zEKBN X^PUZj}Mǟ:6NUD;/˯l3ud'Őd'BzFfn~mSUx~`i\9=Yu !@^&7M.;[@vb 7cl6PHI5kYE!f XDUUkP^ ȫ*:Mdɤ4A">HÂɠ !Px]wEFFFK/`1PZͣn_V;YSPL6&esQd['g4(9,̾Bh!28 ޡc<^XB}cSffڟt템lRY!KD^RGU[S˽u(<dYX#c8z88+CQ9sBu)TUXf7AlMY;_Ю9<3tdrBГ(:ijr {(DUU wKhvQsllfmuWH5+˽B!6~"qil{(hG n4GG{VYB1$qJ]c-MnM*~r?bXvV&KA#{xsB1($QrKmrUoM5m#S^xVEzuG|JfRbwB#pjrqDh!gGVwZזNU8s.gr~V 8,~|n.s$k9YOFUSBD C6x8J @YIck5Yo@G,,, )\33U˧zBFESBD C W*ߜSP97y @Fy1"~Gz&B$F1 H]-ai6_J+V \2%S35[jґo%$B4BZapvy`6w~>u:3af N@BݤgBD? QKM_S~෦tK5gӁEK^2;Hk[8'"H.5m^[: =.u$+Mli%a"B435Q\\… $%%b Z[{7~zƏba<]8HHH@QmO00ܾZ);֦[~IB_Yc J{wNZ0( $Ś+O!DDDljjb޼yK/C+pUW}빛6m+ /d֭,Z+o{'pݜwy1BRQZp_TU_y*-y ?gK;FIϨd;qV),"ib '?eA_G}Ĕ)S=WK, ??ŋwwat:wfƍAk;{MeCk d&/j}k'pNl0{(B!Di"̛7#,^믿m,++իWwz/gٲe NPյk{GlPlqY,>cxG֌aedBDܿ^1c(**9B b5bz6NKŨcDB V6f)B PʾFSD=XmAB!H=jrxfg)|Y @s;L3Aj3"A : !'H'*_%4(sll4(g!ɮYJ!bh'TG]<}Md'ps~c@ Ob6ȬBME ȫ*:Mdɤ#V5ִi A0B!%PeG )o`Rf<+"=ڧ8J{֪t™sAHMvVLi&BDnw9[?B%;}(J7;[z|@BNqVlh;'B!JUUv:Γ%Էy?6kf8~W*ߌ ** W>/D-B qfOz2,|M[N˾ߤbjV֌|:x 12'B!$TgU]SxNssWmHj #^fBS%p(=sqiܔG0[H﷦Y?!"4$F9/ {y*Ę,?g$ & a'ִixm[Q ڟ KœB!BG`GS0s0~rn驚o@EOl @dO!= Q@Iv37eZΰ3K@-ʬB10$FUUygvS3l̹gy"uB%0JT6Ϗ:MqKhR?KH]?!bI8_ U AAggu1t:H(~B!Āa_mbMA1 .d'pQ:,aYH_!bpIԠv5\f%o8) YfWB!%PCTUe:~6 &|HmQƘHb6 !BRV/cW]2ܔDŽ0oRceO! a3)mWX:%KNYILj,FB!IFĘ 6- RG 9XS!""`H$8P(: #""5lG(7"""G6䔾!4QnDDD uIQ!lDDD~?թS} Z5'u&""+lɁ:@}jDDD 152AI@`zB0rz"""8 °T39 T(F(FNBDD8@AZ 0sz"""b @ u>""""6 aT#@ o ""R:6~NoKDDDc:MÂ`e&""O9DDD6~& cs|Sl!Ph: KDDDcng{Tqуz]DDD(`@ 1DDDsl}LNBу@n)HDDDD Ha||2a4lܽ{שmك#F@cĈػwC7|_W߿? &MoADDDu>!11(((-[pA̛7m:s"%%GԩSӟ]]NNn݊+WCHH}YvW,""""Px$a\׮]C~>mGaÆСCeӦMÕ+WpMX,a{aРAm۶9u E]]L&ӣ%"""r>',..Fbb4qFEEEnwU\pfͲ[>g;w׮];v O~b1 HKKrDDDD'cv4  .tmGmϟ?pڵk@DDD[<555ӧrٌ.భl۶]_DPSS hllh\ɕO4ڵkj*^8""""Dh6Q[[밼wjkkeuJe^y,]VTWW#<<*ʉdѸ~"o~a~e=`~g~oo7$$['Ç;\҂/bڴi]n;10?3o'5DDDD:l 8p鈉`@\\7=zcǎ^Gll,6mx B#>>ǎsihhK/F$''-| ~aС0 Evv6ܖ+xΆJ… ]'̙3HNNFhh(F#Ǝ |FHMMEdd$L&Ə%_w\wRSSѯ_?T*ڵcL~}!E0a̜9S'VU^u2o<[MyyhZ*k׮Z-[nW^^h4YnXV7ot:ORSS%22R#2~xX,R__y:NN|ߗ۷Kii;%C{,wom>c $YYYno/++^/3f̐"9vlذA>y:ˡCߗٳgJ#Gx,wW0a?^y ;w:9ߟ?g_6acpYFHee}_մȨQ$%%Ŷ@ mn޼)|tWVV&d.,OonرzjX,^=Y $3fpWt]v r]%33"r>pM466ĉ9s̙j:u GfͲ`֬Y8~8 `GBBܒ+nܸ`Nt6o_'N@EE/^XNt?* F W莿suߦlɓ'mbWTTؼ6m#=뵯kll_|a2djCÏwxm-_YY_ؼyã{woЀcB &&7ntkpg4K,_~ҥK޻!Y:}=Do|gBc˖-X`L&jjj8>8$$jjF: vu=/l6jv=,]F”)S\?>VXHHHk֫ '߾}߿?}a(..O?:?l W_CSSJKK uoiiAXX vվCkjjl5eggGȑ#ݒYʿh",\} ߬=͓ 11n(((@}}=L&vS7l؀[nvg}W\Q^^ޠߣYm5FL>W^ᅬ 4:ZioGu:O<ŋ|?k׀x2+V ??{qh<͓?3l޼fuuܹfgϞu[x2\\\ {"gcc '_°al?`ҥK $Ww=o㟳z{hnn41wX$ƍ ~ɒ%b6QDL*999V3fL:ն\yVYY)qFW떧 y뭷ܐg<+22RRSSsy:[DՉdddHLL+9_z%c$$$H||+9Ujn*o*\VV5kG}dUWW'""NF#/EȦMqFj~zZ)ZVN>mW7uTye޽R\\,'Nhh")((b:㟳ü9T8":*))ѣEӉbtҍ7bN'GpvzY``$x"v333;=ʕ+ݘcx;:o ,k֬j2l0ٵk"vO<)nqlbWkX~>?,, _~e/^ xqp႓IH0 /`Νwt466b8q"l8p 6l؀saȑQTTt8pa43jӦMÓO>$Xf $X,ܹs/_FYY^u >ܓ"Qlz ??C A~~>|MDEEaҥ1c]h4aZl28q"1Çsu\]]ÇزeKB@axyy?TYII ֭[5kAc "HyBwVUUEEE?:/cmЀk׮aƍeܺu | WѣiӰ`@>}x!D 0m6dgga!<<\o|[l/ ooo6MNɯE"rEAQQQOOO=W^l۶ O>?F !MPH:#G\gggܾ}_~%?SLAZZ󺺺-1c .sssaӦMr( :) :!;v 33{Exx8z@_Ty}B($#pkXr%|||,UW(ʔeee?ĉE"&LM5 :sssxs劂t 9r$FP 66V*411A޽U^!) I2rHt7&OKwڹ[PPgʤci,A!CHAWWW*q $wBi) I兵kwŋk%K:: \ٺuPQQƒ~:Ν+U?Cj0`@Bvt(^^^x9 yfuiݺuڵk!pe;v cǎٳg1k,z!JKKg8::bĈƍaÆqup066allx!h/ I"ɍi&XYYZ?fff8r""" ggg9s2e """kٳgc֭A޽j*,_\|7|*jkkQ\\L !cAds;RdWWW#33044l~իqAB4Uy#@NPXX[O# t?WvQhfIq9?Arrr g{)6xmcuzvMkqAuW=x011 .]ϟСC!<<]?'On ~G5 þ}lׯSHHkBBxH>!ZS8~8|(msë3g`Μ9055r@DDgw?q͛xI[YYH_~Add$~g$&&ʤ8}sl<ޕcҤIhhhha!]H$Si&ƀwh9h7 |Аse111 zҶ,88Xlʔ)lv=ٱ˗se2?x RSShM1L*!s{9u}{hLUlN|{\Y`` LLLp)p̜9S|ٸ}6ΝC}}=^u1N*u~[[[>$BrrrwqB/7mEt0##RepssÝ;w  0%QUUK.^z%B899BiFoivtXXXȔ[ZZJA^;2m%I*;?c %%%0229HL<ÇC__7p9cAMM \uBxw 99:EUUUaڴiſ/c羏;x뭷mٲ6mjaBH$+ ũ]BB"۷:%JKKeKJJ-B2״dfPVzΝ;8{,XJ1c(|wիQVV}=zbc5K_ҹL 8qN<'p}5n@mgSsvvZe!,, .\@BBѵ4M#a``333믿{{{bĈHIIn[cĈ044Dݱxbz-ɬ!6TꢨH>%%%=HQr}&%T&*?B)i&r;qPQQI&)lѣx׸#G`pvvL0BGZUU8Lkʕ+qQbԨQj_Ñ#GÆ ScS`0* tU|Į]Э[7YK,7xݻe5֜<;vŽY]>}70`NcL+9oD8_B:Gh"| QRRpaС\ b6oތP8q";!Q&xGGG,^k֬>zm۶Wo֭/tRܱ޽{s AA__?#oߎ=SUo+c?޽{QXXT.HOO޽{"v7rJ 4͛1p@|\cmmӧӼfϞ=(,,իWFFF>}:]ƥ*x16mڄ/Wy,"33...J&$$`ܹ_CGO oTo`L)9lBMX?333`Ŋ:u*:$UO$Ad ^qI#..@ppTm۶aٲeܮR,!={xKsiۇ_SLɓ'zj'rc qqq>|xo!-- smjj*Ə/55aɓ'^r&L)LLLL}k'wA`` ­[0e۪{{%5rQ?Bt@pss0E$mj޼y7oҶzzzزe l٢NRR:C۱}vHO+8׮]ƍQUUc˖-u1_P%CAGGfx BKA _ʮ\ѣGBCC6mڄcݻ2h,##CoEy2Ϟ=s޽{Ri}JDR:%>666 ֭[n8#5rQ?Bt&ܿѣqUYy߿oFGG#,,LeÇc̘1ٳcW Reiii`I\j SNAGG<1?b xb}'˗/Gdd$푕%|MkɯE"Mfݧ#@dnnزe RRRpU4H3IrO Ǟl򯲶+Wą 'ׯ_ǒ%K   ECCQ__[[[ 4W\c+Wk泩>XrzӧO>Rڔŋ ǫ>soS> 'OVZǏF.҆$9qטΝꝯ -Wfo&B K#""X޽jkk?LVWWРe˖1CCf`[Yۆj*0PV\Ɇ |}}c=~P;v,KLL*svve2}{zz3f0bĉ7o2l޽kWTTҸO||<ʋ6Ge1ƔWUI{_u"VVVlҥJ봅szX[Kבd 5KH;Sh.'==]fǪtuu&k GŔ)S4ο$/ڵk GGGښ{=ȜRI/E*˓)DMM/\+WDEE[kkkdxРA w'$$ ?R pO#UsTiӀ KAA0xq}@uI]Z&] ø~L $2UquuUzGQQQmkjjʥQ]]ݺuȑ#믿*M@8Qc|饗(.//3g1~x.XH3g`R׫h(O#e67VI *Oߢ*ecT1t>Dii)GGG1ظq#t:kkkr8p , 0f^rx饗P__cΝضmzՋ1g 77IIIChh'BԒ=x+z0^ ?3={'6ܧ#@4gbȐ!055Ejj*>cKxכ}} hE:tE<,IVm;v`o`FFFl,**ԴZϞ=clɒ% DGG3,11Qׯ_g^^^ԔÇݸq1KKKf``zXFFwgʊ-Z={LiL9rDzrGDDp5%1飏>bdLWWRHsϫD5$&ߦ>!gann2ϖUWW#33044lk8r$ׯ-[rw1is+HsW2@ N#I <͌.=H:U&(  p= HQgꤙ%tׯ_2Ӧᆱ8`srNk qBzKHB@HPPPC tvg)~#HTN43t1BD"9nD8ؓةJ3CHCKB:?Uy#DQH!S7W#Ddeeaƍx8qLBeulܸ?SUrJ 8&&&prrٳ-S7??3g΄91sL0^ `Qx˖-;LMMaff_~Ç sss w9j llltR<\ecǎ!$$NNNظqz|&LKKK$%%iicӏh tR$''\FFF:u*u]3gfmHMMRjjjyfzzz3gD"~w}\p  E}}=7n*++qmp!""8x `~˱b ݻWiŋ .Ga…iVV>XpTق Ο@H$LQ`B4H:=z- IDATN%%%\IYcݻw@ k"77^^^r8px{n[#FҰl2呤]w}ںu+v؁`رR,--QZZ*ӦqԔ @\ݻwsecǎŋ ;6eo߾n!t Cަ KKK2yyyIRPPw\rO/1رcseg8ruSzE_Xz5~%qU K2d+* p`ƌ6m~7t aàK }}}ȔK:u 38ʳgϸ̳~"ߗU$ΊI;wH=V]]8xϫűcǤ=z;nÁ9>|8S_IR\YY)2ӓwܸqСCe>*ԱcǰdZ r ##C*Pq޽I&iܧ>^jv @bb" TTTv!= x l۶ SL~333DEEk׮ MaXK.ԩS]!!!4hPh@OOO wڵ Xn~z,|7pwwGϞ=~opwwG||<|}}ѭ[7}Exx8x ܺu #Gc >D\\a``ӧ>xE̝;ÇԩSqe-z >}:^|Ekزe `ժU6rD͒%|ĉ1c ggg#..֭[!bܳϟ?Gdd$Fnݺ1e̞=[uR]]scmE?GPP֯_#((H bRQ222@Q%##KHHWŋ(,,o}/**-ƌBZZI mK=|֯_?DzYHH377gW^a.]gֿXDDꫯX^.SJJ :t(g}aI\PPBBBf-b 2 '322IgK/1fnnΆ "## ;;;L̟?_nnn. eԔ͘1IՑ$Ζ?33={2}}}֣GN<)3㨬d*a͍mݺ*NP:!!u֭ʼnݻ1b-bϞ=3|>46fu>w{BfL-`uD077GYYdWWW#33-ʕFHWh""66;'D 4AK.E Bb B!DPH!e($B2B!h !B B!Z@B!-C 6ne~RX'++ 7nǏeV%!mB |M\tni&)))?~|;BHWDo!BϞ=ѳgZ[1bDO!k@HJW$j-߽{~~~022 /;v`pqq۷ Bnm۶ oooxT@?ׯ# Q]] r &LSSS ((R={ x뭷PYYړ @ @  /FTT`ddRݿ? cccXXX_ƩSBv../0k..vP]] &LjΝ;qa>}ZnٳgW^A\\Əp;w;~57BG3f ={&u]v!=={ALL p=8t}:B`C*++cXYYUUU,##UUU@ Ĵ"##/dݻw+{)366f\Ytt4mƕ3ore^~]v`;w}zC2Hĕ=y/1XFFlRm LןL9r+svvfƬPZc}Ras-3#o+BfT@m'o-N.@jj*<<<ƕYYYGn}BZMNNFPP2'ds@(r߫P444ŠAp%bBBB7}to >>>WWWn,CEII ΝgϢ1T[U HP%'MvJa xH\ V{r[ZZJ}ɴCIITY=C$>iii?1HfyG [dffbɰFpp07VBZUl,"gKN@BπzЀ"m)y!??3fয়~ӧOqAa޼y)tU"@m(`O ܿ++..FRRR퍸8pe7n{mn0rH:ţ_~aÆA ĉRcbbTOj2IIIx)===>|L]sss"44jf렫 iPy{={f] W$-,, }&O>裏`eeݲvZ9xwPVV5kaaa*GEE3glll$!44 ヒZ@NN󻹹A("::СCַ?֭[JYz¬Y(((/:t&LP,tU"fP!uOzmΝ#ϟ+V`ƌ2dΟ?b1b.^SSS 0 Xv-jjj0h ޽{c̙011|666صk~'=/'O̙3|r,X}===n<+0~xl޼sW_}r,H]U H0&oڇr fff2ǫ WWW~ޡ+~nҍ86AMM Nqqqĉ)BܷHWժBffÒNUl Z&bӦAArrs:n??ؠW^(**ݻXDQA"@F P(ȳuVL'($JdH6!=?'ym[㙽@GG\Oqz!mثʺuƬْ%KXEEZm<ݙswwgS[[֬Y홑5jzT~͚52CCCַo_fHKKcFbFFFޞ]թ}eee +++S !h$11^>}%h>cwEhh(>}pw'|YVV???رcbbb0o>^^DDv܉HƍٳgJKKgbŊ/1i$yYYY;v, 6`XjN!&7cvZD"ttt֭1]]]Gi|?|,&&kݝKM2 8tuuَ;ϟ3;;;|r@dXjj*Wd(rssոb$$S3۶mDZh"wߕ:ޣGx{{̙3Cݹ@ԩS eeeΝ;9sTٳqmdggΝ;z\cccL:U2} 2f6Ϝ9SJs֬YeBH?çhS@>prҩ۷/2v 333Lϝwww2]]]Ν;Ji\ی X[[Kz٨RǥKDvvLvvvR:^BiSB8 JNV@kkk<}TsBJikii \[egqi*//}:7I*C +W2ΉD"ܿ_Yȴml;>}BzRe!,, .\@BB(զ[nxdGqqLC!x j*|wpvvFTT{pssCrr2֭[???D-!!4iv߿?=*U~ 0 &@(JիB\\W\Gѣ5jšZO]]]L0A 'Bi -N$ĹsX`` ѣc, ={,))aˋ9s>|уI[` ReǎcتUXbb"?Xz˖-clϞ=lҤIܜeeequlKO>dl쫯b&&&V) !B364wgʊ-Z={L󙼘v_~L__׏8p@Nmm-{344d rT1c0r?RuSSS+ [z5Uz)$BHc|ÌtB!o:u ӦMÓ'O M. !BH3ڵ >Į]B!4o͛71|pu7n%!Bibw6ecc"$B![hkk{)s=o B!mpHHH/"͛71c%!Bi/6lbpttDNNΝ;(!55.B!1>c^`*gԩSKB!u o> 6lϟ[[[>($BHc:$B!u7B!A^riii(--H$9.~z%B!jm SN? erC" !o3駟gϞ}B!oکS0l0\pB! GB!-|饗!BH+-D||<._) !BH+"..YYYXx1n߾H$B:/8 kT>q+m!~B`` ݹ2c###|ppp5kB M :!oZ4<ũ^( iU-W^w===TTTputt#G%!J"y'M @@I`7@W5FLݻngg~MNuu5*++B%:tw۟[|FeZ>>{q_y;w)));wرc߿?_]Bjgn έS:S|&x 'MK.!77j*00j(bРA(--g !Deߔ/I%GH#/FNN/".\'ƍCBBB_IiI_SL$GH#==U9O B!|$ $Gy@J;mp͸t:ؼy3_]B$'!I64]Fl Ɉo+fY3nzcP&IB:::ظq#6lؠ·~ 6@$%Uann2p!  gPz@ߦ|@} PUf;( qKeUZʗB`m xTGmm-BÄBx*0 Y $|P5z;&ũ^ZF3 (Z޽;]BvSD/# .,!j%izHIvV>8rl@PiN^w?`۶m'PTTjokIBD83YE5EU0M[2{l.}50>AQE =G6ؽ{wkصkɓ'ٳ'"""駟Ə<ܺu.4$5hjfugx (|ί^_4qurM .]@ )S2OOO|W|uI!]:䚱hHHkf3fseUb=z@vv6~ΕUWW+}[!F%g9"5oIzo y~(-|SNG/ :pppKBxIZ5 o3 E5(xFf͚5GPPQ]]5kpkjjp%AB!JHr)$|}m}iK ޲:/xr fx˗qA@hh( ~:|}}1k,$iJA? me -᭪IY*kT$Dv~.`BFVړOY<8{3ՋW^:.`B!p!C ^ B8oUֳ'c0mZt/,WMsکaۧ&QC~y5^jI}?~KB!/$2OA$툪!&o4Xh~#A,sj jȏ.BHD?ywk+ " 3Y,,$iGkFּM/MwV 4KAAVSGD 7G5m mKlG ^Z?~G~~>bbbܹs{W_ř3g0g"0?@#""> {?O77o /#G0l0 >.]Rӧaee} :kqڑkkP\݈[kUunʄѣe/ prrB=GA$a كB\zݻwav uO>{aڵ\|嗈¢E#GD޽駟? <[ ===aggUѤ^s|_6oC[]Tyd&TI6A؟Alm.wsˡ+ܨBZQ}<̚5 >x!fΜRUq8s abbSN)l;w`̙Rg۷󎏪JLfLz!Hh"`,(]Ttuյ#?UEDAE\4A $$F7L3w&7pޯssgBsy`l6چ6L8iѨX\u"\A嗛eBK߃Z =*@+As[FD!cxw. `uu5yf=z*l;c6<=z4vm]#Fx׿œO>قQ m+IZȩçF{ƕGc5s0QVUDzyڕ㾝6(3R k-n.>6n 6d񔔔x]/..v˲\t7$Id^s5>G(--msС&)0bL|m2pz߃+?a&l*km,ޒ˟;BAhl>)3R[5>Aˢ SO=ѣ6l]vCC;vc2?ؽ{:gٻw~jy6wQXXHrr[L]X,X,MD]+TxeldJm{# J Ҷ¬[)XXOy,z]LIxq4E78tPV\mY|9$!ڵ+ {ɓL0A_׮]իK,aʔ) /^YgEff&cƌh4dnX|9Wtș2`zNN@4 {ۺVFryUadhh;zwK~d,2/-Ve+gq\d"%6AS.{r=~zÙ2e /QQQ mnF,X--\~lvc=ƴi1k,ϟOII \ps:&bx fϞ }ǪU8ruuutڕS2c ۼB 8땲k9XS[EuʫXEvdcc]x%/GrԹmqt,qɲ̖'xoS*cJ\JQ۱!.<P 8پT+߂jxZZ;dJ^FQy^p}n o?t%1=X-FۉKI'k#`EOm@@ 9JSͤWQeYDE-%q<D䱟5Y~/M9zsӹo'G@!çXsnƊ,uVS]zt3 u[ SmY<Nc+W6g+gMKRү3sPоP _}l>|zuͰZc_ZMYUcR꘦eGshsݳ2ݾ#4ђ*ߒ˺(eFJy@@@ h.N'>UBUTCXy5E'kk wE5|!ޕO}6H⺁t#@ 4'Ň/C : նH55}%VDFΚJE*SpC$O/aUL41>[<ʜ2~`&=:DylfQ^^Ntt4&Й4`l_V/ ը^Eڦ>?dIuΚ<_tjM<_X]gm'kݓLԅ3D6nN<رcٴi RRR(((`Ν<,] ժ״@ hMlۆE(zO >ǔvKay9?'1٫XsNd2p[(CEbW-MTeIy,1JEv (@wقk`֬YlܸS~8ƍ9pkeÆ ̚5K)@hٶuڿi@zL&#+T8m .\S5,t)ք)CFsa$!͂n+~!z.##Ew^,Y‹/״@pwP Em,=sR ^g\o{ZƐeџlSW !>2kg0OmA&>}6_|1skJ@ 8}ѻnsk(Zh߾};|w^S G(5wK/o;@YvZ}Uv骧d$jiʹ=~'VuLn+Wf3l\n%B Z]WGE~:thCYp!cǎG[έE]W@оѻnN(V/Hc'۶} tE!Xi +UvVakI_i$|˫K>'Y~;;F\m>&]:]J+z#ɲjTP $I9k֒k:(++#66RbbbZ;@Vؾݕo)g-1А1QHmjDAUu? ʪ)h>jXʫ`!VC 6ri] nmC\T s(/S8k,.͍^5wu^*.fV&e|,GP9FQu [Dv-sK_qe-tU;՗3*a&C&gϞP@4Z%xB镧%颩϶Zk5JZmN Fmʏ2iNYq`0r?f/G眎Lηs%c 3e!.2L,&}ђ(Ӕe.?Z&~DZP(k(JӘ}8W 7Xg05Q)QLԅs;`;-8cI!VN7Ӥ@ h?8mF<Ŗfd׊OC\GC~=&]_ su~c+(]}M̷aڗ6nʡNFB$ W pF :Dd+ݓ;M y[nH7|C׮]֭~$P,[d;XK\gl`quh9=_]tG2dۇ1vGH 5&f0d"`$oгuD 6"h0p8pu8Ȳ@ A0(m"B+TP0plv?8ߪغ\eV}ntJkx7^|F%~:k:k*F !)QtNO4a~ AJM$yZ"`cSVLɂ_|_nRm"]u1'7hTVds %w r4`X: 6+YzB-c e!j$j <I }В(1OsLrzWT$l6Iv-w/XtN͎^<_w ?gE%`Z(HhjJVA(e!j2)8zԨQxL>]{oͷ~ǔa-/gGkx\ؼquny&V3a+s$VEĉ>W齅/mQnnj"<OPsIQ⅕KMA]#Fmה@HKڣx]?V}K8A_Oy,+~|fS9g%qB9FgR{:>c=Qɢ9l_ 1O LbJG*C-p6SMDi- D@ h&9K}Юc$@+eGI=u 寊FH&K,i[Ia֭ |Z~Y5vϱm0dIjl.TJH:64I=.))JCl2tҔ)?-i]tn:ѣH4wr6_ <}R6?E7ʎ뻳ßƲm%r 2s0h)l'AIK. $Ibܹ̝;W,a.qkW-b^MJdo u;AqiٲFDR(brI+%8 -%dgg5@ }nmz繹_rL֕aKi5_w*klDJdh"v%5eCϿPW @ld"^]Atx 46(L~f ~$hY|9ovs >S`*N4͉?'#} 5U nc)~3H?sq RpES#b5ZfB]!J mPS[A-'*gۦZ-g_ONh}s%PlChr]-Dj `ALr V)_~θ356 $EI۬@DW}qw?GͫJ=]#|O3zʵg\(4$*j Utjps z]6q|ڬG`x {ֿr^qؕ⃊sfuQ_흪c1Ry,(Uk؇ 4޺%|nǍ4d"cazߵ='h@7x!ϱcի]tiiin:vMJJ [lszLٮ4$XViiQZ,`Z}z?kQ}oW a?!5y\AUN2Tf2J;U=E_ f7U-{Xy5A;윹day*cIo㿘 mROW_o-7;[n7cv)Jf֡@Z,Y]'!CN5ރv_[OYE]ӯV6>y9]:(Cj}8:pIэt[yMnme -mRvܙ?˗4i[nzLٮ4T*@U2d^ Biܾwؕ3Ko*-[M@2*UDN:gi6*]X98$+0)>2.7Qo߫k@ސm9%`Wg_Sd:mܱc&++/R)Z^T~XF}Zxѻ`T4?,W~hN$QݦOjdkhkT]{Z@,@`rr2vf׮]$''5@ hkwӒ8Υ_[ Yk\+n2y5*w a(H5M,ZLg%Ex=!OG*"]>az˃426l#GcG6 Ǝnpر̟?z[n~>oQ)A{%\ u.-NᕤA}nq;WѶ:ylr@$l\ŭ/*mTa2ܮ)嫯l,IQfgJJLr1S>\(S'; ү_?ӧÇ'%%֭[Ν;IJJ⧟~Yg5ZQ@ AY=vhȀ=,c]5ձ$#e&|>X|g?. xp?P&qC&:Cnme@a֭ |̫d 9B|+%?R"Iti6svZFkFϞ=]#GSJp04'[&zuhI窐1F-qgLw%ge+^^m-/cx%.h/߈GᢿRm%U6;y2{u_RYG[r{z/3i24QO7&g_@X+pz{xB5MtinJpu}:Uxh60Fٳg3sL6O=O<6M@ aWV| /g(5ok(\y>Xĵz- 8zΟ)#}Fvĉݚ"dOwlc>(^iPQNڦٚTbHwGHFr0m+HZZoft,e1Q,8 f!?ߦWuTǟʛ^D*;B;f;gg,EtgeχQ!]8S<*cÚg呆v[$GY0 A#86L9wHI :)0-^XXHDDDKN)Nz,P>ZjsܫHPʢV峏DEdGvfɗ?QRY@FB$ɠ xCAV IQfLF!!b4 /ۼ`… o۶nСC,\0`@  zQ)JgtV F";ڬxRu/EFruOCyV -nQ┕x'uxrQfO h d Ν} SDDDl2ƍꔧ " DpJ5XZW[t -NOb:=|'h Hq?ek77؜d"ml[ ;nK̡5 }ė"f-1}Tr̒( 7&{i\X6"cMIP2uԆ3f(3f B o{aff,X(ofL4ɫh$!!zB <ح:/s4dۂϪ fcc/{qU<Xs9hq &ɲ_ΎZ)E&$dAT1v}v׽D`L*=6Om $G[a,Jm-DFf@Wn&.r&Nz MJc`Uvi3X|>7"Nx=E:ő$YQ0qv-ev):"Œ\޷#I'\aע =%SM0Q4X^N4/ |fM;z %2حh)}:3Qo۸aG}&BqR/,Rc +9qFw!7>W))VSTp2c4M9.sҸNFa=\k&ݏ"%F .Y8aiDyiO C|F{klYseܹ8pA0v+C7|xhsS~c'(HR/PWP6B9Joh'ku&1QliumgBD 2|fb^OmgU ݻ֯yl%%%6޾}?~}O>,ZȫM]]=DFF2l0n֦{wAO?İaÈ$==\` ح5[Кm.VӦڥqȑ_ngLsͷ #L=h փ0)!-]wwO6)aTZZʨQ8q}seٲeL>=`ߏ?iӦq饗j*&LGVXW^'`ŊDGGsȑ#,^$:=z4111XYf/͹$ح5[Кm.z PMcr,OڗXj3oj_Ùkuz *24,OQfZfxx 駟AAW^֭[ T>\~wd3~:fl߶I\T2'R,y/BǾcoazt#c=1~al'uT0=z޳{> M砲Qg?_y{Jov))Wo&FXv5LٓݻՋڥ8z>}ؽ{7ڵD7loPUU]ee%999^s7^DDd#J=@V0V(m!@v4>It >a_`ߓ@vs[S9&Aůݤ)6}sh ˼d,b"vfYS̘w5ovJl/be <ʴW̤^{Vգ"uP+z 2vDc$$OIg)p}E\n]t!##L233:tl.袠...&..z||<'Nxe:ͨ}/2Śڜ⭩qYYVDE|tsey??@N4.8s|Ӫxοag߿Ro>'s6U>eGO~LAUKbr06NfVW})5Sq7fw|~I0u2g2I2ˣ7U䓱߫}~(z ecH(x{11{&GO>o#b:W}/IRP+#PZZСCZ`Xe~۾xO`\Y$;Hܽ=YfK ":Dd+ݓD ѢY;Aݻ9޽{s۽{7YYY =ݛ" INNvkyjoAA'N:b`w{)DS[ۿ9iAoAsyꯇKX1=D\ѷ#N9>{FrE m& o~v:ĺugF?'|M}g'љ-(M9l;6M./ĕP % "g1 'e$ϟoIضm۶mV$Ϝ9sog޼yL43gR\\l{̙3Jz!X.] ;ӧ!bѢEL2}1sL?vp}o`>|c9}iӓ/곃[iesO" m}t^#g0;g0AbY\ݯ3 VswӯP 1LRÛH:>,t֍{׫h$>>r{{a3e^xx,X<… yΦk׮IXʈM}]p|Ea;Px;>>3 uh 5P<(k2D ]+i} l*{'%5sVX"wbPk&rxv%:"#a`ę: Ԙpp5[x/)Q~I2h5c󹊕{$FOm$ʂ ۷/sÝ(hvPk^ XmLGo7McjU PöI2`g?JIe>8=)j>{UY >r[ͅ~`&"59V:6I,L&j`X" BM @(hqia8krhc h+"ڧpiG)͛5⺊*+pE$bLZWZ^5߰H58 BQ>&U_t] VGT 3ѸY~}v}v@Dڴ\hg۶mC߾}]#E ! eyս:k{٠|Q>ꔭ>i1LIVG?. ;g.B}^(1H} &{=,4 7|.|&6nsW駟"2ᤧSPP7|Ú5kXl}暺ڗ.y]< 땤 MMPWW>Ic9Y=ZIddeL.ȈSY:1}MnJW^O> //dggSYYɆ :t(~):T-ZpvR V<VwR^mc\ŜkΣ_f&}=DlG Mn+ ,gϞY+uРA|7s9;>3A+1[?sh/ Cf92l[rg0W\Cs%&D̛b$Z ޽{UmN¸ה |! 'o"ϖŤk;@ /̦)b|MΌ=+0ϒmZw/\vآJv,qkJYv F%4V,n]2"1tf m***01*!"i!& {܏D IDATت}?n6fUisUwCΙ7hvPFe~9\I#We_)"yǛHu;]/O#igk̒V3Q̦&3Qz e9;v/|^^sg}6~SkD@wxAӼ"A_=6XNK\=4t+aümUS﷼2ݔïG#0};e{xH^9Ǯ1 `5e^ml*jUc.,`l> Ç'%%֮]K/ĉ'xR haWOS/Xd.1-Ұ?68!^# p;ʶfF Sn`x\dJjwսT>^Au4$ƟUt">noşo_-qzAM&R$FY>_QX .^x3f9Y1L \z(@/X<[ ǹusZGpcb3S]G& eGg^ђ*me$;kwCtxސ{ CL5%MFR㭍U; ('OfѢE^F]wݺus:@`ښrfٷ`chAL8Fay K~8熝u2]M syNQ'# )ڢx}֭3g{X@jkuo|:pA _jVOX#:e3mP&ݒ}mGmtf}ԐSuK t W[H0cV?X25~O<><깊WkM#")|uԀ56>vێ6m;+=郻'-E ;nl* d[hHNNb}8J*#اtۗ*_y1a ɫYdޞrN: pni")<_žk6(azt#>!q's7\Gx:;= =yeIW~F¬[]+oT tU ؙ\]]$ILya4}XÏ`0%obOzlQ2XZ%+r6bvոlT֒_VMNegBy昆`kLUǰEtw^Eƚ;,c(/ۮ䰬R,>R&Qe;Ek% S?S`6Խ=mRwyTWW}v8sX,l۶M)5B 4YIFx,_ ~.6%fWnF^i5Uvp9s*nZ\x2yvNt>w[3{($\\rEY%zjbĉ޻w^S lWVvϾ uyUpB(yf  "5wyP̤rW};G9{L3}IHȘ+oz{}%H23%NB SP=j{ fU{V Zkd횩R}-M)2h)ﯭ׺XOƕbZIְ`D(3&˵ : }c9R 8P;g_|RS;b2PgMd1-CPcUf߱99@fj;L(o I]M S_1*[ {IF%` P<ϯzX[>V]+#i"uf?W_C,^ӵkD zpBk/=F˖=\۵>TxF<}=e [5%s"\,Q?LAz\[/I˄; X!w>N&IQff$I?@6(4d˺dvO=O/=+>Vτ *}ڼ Zs˫I,_L]& q`&]nme##VS'Fct0-F '4Ӧ@J5nWoP dPUXŮ%J_M0;P]ԖŊÎ5 ѹI Q&GRޱp.HqXX|LUElZ6k|cDxFvK t;h4kYh^C ٲvYtx`tɏo&|h$evbK|tg"G#郩H^N9 ߹n 3 }z@MBf3ZZ+~8d5 F3v~Xy-E'kvf07pID- [ʻr>T# 71_gga65o% hl?H}Xxt@ het6$'P>%ŸX=S)b'9j]6: ʶ 2P}G@-$H.RkMc)W*uDL>#K'Ҭ>$I("S@ HJ 8پ0ǘeۂ/I||p#R.cpc[1UcGU2ͶM@9fI㊾ifI(3IQ\LT6lӄF*\}c %>dK)`\6P2j1}R_g,!ǨrmuP &WAlM\( iLoPsA$^Hr{ΚﶯCtf:= gGف2H oR|0$$Z-g Ak!@Кŧ%41mXs+gūBwKd&Zm*&԰gO 3Z4f6oγuљ^<(^]|XjPψ39#%Z|fH`5:"@КsA]'*+ħ `Quua@E ص>WxQwmZ>'t'`0MjN'l((̺7X>_+>?RXdv#.>D'(zIh`Q>E& I݂G \SF4aCs_۵>%RgwPPVMqE8t?s[tG9Fg*۾Ջ]19+ɑSH2sc:.I>ThHh 1V3#(4[XF hoeumaWLݗ3#cC kK9.NߟƆm_Ym { (n' }3u/æZ7#b‰\^;_z!BGłG !@zPF")(pGbDi_Y'Vj\S É0X55BGG@ A[ s(v4`2'>_,ڜlE67~]t*R4)v'atWbS^ g{ (4E,氃$AXUHpߎxɕՂS!;ݿzlkg_љד ;TW=۫m3H0c->D'8˹;gNpޠMJ$E%G~N ,t2P _nZ䲹"/V\̨yH*x]2Qy41_{/'06><;G y|Tם$d&! Y B5eQ HX*_mE,V+mY*RvQKeQ,ZAIdmf!C&3̄ ɐ9w7t vTשX@n.i&B >zzo&X]Ž HJpPBīq؅Z}mipk[3HieMH[ocZ?Z:ǚw@YE *35b!D'20foa߿1hu4uڎ5lzҔnElV_.#ւ t#u8I[[}dЈ5VwB$ ^T﹩ vC&czihv1T @׮KNNqqKTW;`ΝctRSSYzcȑ#9z({$%%ycp9:w̝wɟ'/_θqسg={믿W^\;?'77FfO8oڡ8rb|['+rq<={h^ڳtq T3հt[P6(A^!:bF"C8]9!^iu8p#=ęL&֭[1;v3k,'Nd䐒§~ba6aaa=u9`VVAjj*֭s@D܍,i:ٴieO}ӣt5l֞M)sK}ωrbƒwm{FG0V''1%-4XG\cPϏ|>~$f' !hg?sZf0ڵ+ﯷ@ZZtOJJ 8̚v_)//'44luմsW_}5$%%1af͚Ehhw-yY[J4bGr\=*b/ϥK r;e'l4pW 2J'."9m7pB4EEEieyTT\܋Wӷ+(**"44"ѿj.'$&&һwoٸq#c|n KKK=WSR]~i]޾%,ޚ\Jh1״cIG;4XO|DfwY l5 h !C DJJ > 6m.}fϞrZR]~i]US\}nudLΌDWϬ|BњD(]9Ͳup[32XӷkXOc`ܸqرO=%%%Ǐ׻V#F3Ҕr~B/JxK(M >|_x?dg=x1zư =ɯ/,DOǶat3yLs.#`ZZ=vVC9rqñiii>}bccڥ8KKKcΝ.ΦO>^!g^;bk3͂5hܶPNj-q-bW`&_mЀve)$:?=qf/Gp>G8E"-@@Ç3k,pY233=ԩ,[˱|ҥtޝYl<]oi?/^̾}r8pcXt)_qv[h IDAT\C}ir7؏j%$ʥ ;ɷt`rtTc4y.H̚u艏0b G'อB" ӣGoKQQSNO>{v<-b8Xc2c G}K/ի3f#<ŋ_yKkJfϞ AL&oߎ^oCY FԛX܌}_nK"_-@eRiO}ï ˹Ju\؂?-B߂_]uGSUQ!Z1fcF.ΝdrXhuiѣtԉ~I&9w .^z+pu9磏>죂.7W⣏>ĉTWWөS'&LO>e^ nxUo#M6+ڣIF<xVhTٿ_9ڜUFXeg C;t2Y'pp+&BxAfPvpeYy ϗB/g[%.j`=?cB!jH-r7p9OBjQ>W%VN_;XmK[&NCT_ uBHEm@3>ZTޔlZ=k ~ ko.UV{^)Q;.qV`qF"$ ! Yj:ϺuOw=O3{&0Va B_Y! ,m9Gʫ'F)tO O!DS(Z_)o^ڐN]ke`*TǾ;/g*-t >)JBksL5k6*O!I˺|NC1پn֌VU2Ҕg\ 惲_֭Rz:wSp >*{zpC? G.AfNB7lI)!MCh^gs·G5s$"SU訵WWя6*yr'ǕPnƀnqu ~5I^u]wzB!{D+D4hgw4v1*"ѩUuxz _ S9,q#۬$nRO1jSp\іbfYҭE\?iB!h^ޖ}w }ѩqqǡ䏿:_-w!KCH:Ҕ +ncki[Z @$glU9^DI"nf0 !"P4/_yYIy5'K+UuY_EχR~^ s[ qf#Qu) !ޑ(W N 4\` 9SQM~i%U_6?͛Μ" ` kij5udЈ5l0bN𮣟 W !IͫAUZ/=KYpHΖ˖X6@zܥ I Y\s!F9W{5: !~$P49<T|~i]wz*<2~P1n֛b#t_auywf }fͺ OnHB!hJ)oKHJJJhݹ4>yoЂ+'S(\Ao9 V2eKPQ̨ WZ58BO2=4ښi>vD;{Bo6BhRjc,AnZك8i>[YTޏs?4\oڧnR\XS1جپg #B!\3%`21FpYugVT[Yu*o 5”vGq94 )tB?JRE!%%P4;FE>ٓ|G:Nj:D1O }R"I]>MsW{1`bM!RO]!D`(զ(8Sɩ$T[/:Z~H0rwdn"Nk6`C%?sAB( ⒳VRp⮁k @_}iVMi|dyN%Â}nIw j-1vWJppP!B%c)NUQp;KxW(8oWg*&U̵A6n'dL$:pa~ī% ! HMN aŅ}ޛ ,!QlWeCNw]1lO m6c_Y{))""Ir(\5'TPmiB;5.N̵sU;yS/"Í:<),Ci,1XOQvtS ^ZBBKLʪ8y*.{a[/[" ΐq:C6JӁ5<;8jBqII@b&Lվhq[[^jk?lа1Z~eXEv,QDwrI^zS/wpB$ ()di ~6+۞@Ee4ZQ}tn +iVJ֙Խ\ĭ3b(=a6ޕYBN(W^uvhgo/B( 'eJ+8Wy 5lgO:XFq` U׆ }Z*NmE?r̤0kaX:i0E""I^9We+gz[\b1հ4&9&u ƟNfH)O7}/J !O׹* e}6]Us8H4Vs 4Bu&)39t׋wWA鏵8@L̉˾2'2 PU^e%3~ ~J)v+bɗ|wvє=&cԪn EpY.-:'O:o˟pOՙRQ{ޅnCBˊ@ᤢJP9;s!?¤Um;K1D!r; ek>%b LBqِ({;YZIIyG:nab :bF /tzvG7HIg! \E3_9];r]!Ɛ ѡz–E5xOܸpֵ!mc 8Kg! ԹjFNd|fi7t)tsi_iW>H/m5tW哧x*KgE!"@HVR^ͪorڃ\Fr%_я6 #Z++umTk/kB!.3UYXG}iL•"lu&\.zX3bÌF$Ig!% k+Y{sYG}Ć3O <=6nJO4#A1g˅'t Ƿ]x-B\$Y6?8NaYڄ2w27vi΋WP~vog+#^BGVo xw 6ǚCdtC>հ{7]Ɍ]!^ [/<@  u^wYT'T׸:6G+3vBoHJ)P⯎ɳ#=LtzJ<1YYnf+3vBoI-[sD LBmj >ˆ]P |g|Bn5ĥB!$rGOضW&rgDea!z"a˟y ^+BxM`+s?~r/-m;g66; F"AڬѻW^\Yf !"f_]іO}c#ؠ#>"6Fs8_سB!< ]M)=SIǶ ׈3(5mg!B%JKGo{?wt:5ڀ )BR/&ZJLECӠ9nf" ?OֈH$ŞBF(MX]$Fb0NCN= !AID+Mt#oCIg!"=+..Z~>t-\#{B! `F>1Lmۖ)SPVVUwytF#,YĥMuu5O?4IIIѯ_?v.?? &Idd$&Lɓ.vI~ #))gy73cat5/3yNowÐ?wj?!bĊ+(,,dԩjժz^I&1}t FVVsf#/iӦ`ΝKΝy嗹[سgXV FYY .`ƌdffm6t:{>v ⦛n׿y5͇gAxp< '5P!MC9s(ѨVZsz1c8-T=O8 zTBBzG˖-[gDz]v)@\ұ엿JJJReSAAA*77׫c.))Q*))񪽯Jʫ7Nj]wD,PVI+Bg6KYYY 88Dz#Gb2Xn~ǎcL0iĉٷo999|X,Əhѣ֟Ezz:={t, 55եѣ1/@45-Lًz~B!XIKKsZf0ڵ+ﯷ7==7;;Y.''rQӮf]Ν#''ǥ]BBos4 [H#z ~B!e/ ,**M6.ˣ(,,7** ѷ+(**"44"ѿ[6J*++KJJ(--x|LE58ByYʛdKB!fٳ5k:4!r0**bEEE\q(..}N\i׮f]5# 멧bԩ6Bbbb4.˖ҡC?NDDW}Ds<9WAQJ`6/z]\Z:tȩ~`ׯGu LKK.%%PG;wl';;>}NrrSXXBBBB썈 r!*0yj~1 dlܸDz>gϒ_NHMMeٲeN˗.]JIII`zv]iÇ';;}9޽[v-N4 <B!]t!KHkNWeee%Kx5j(vN˖/_5c qFFjN~ae6_WgL;hS]]*֬Y֬Yv222bq;r2jĈ>So2LoO炦(GU`8\9O-G@@:x2d Sj̙3Nm^..ZHuMnݺŋRO>JHHPFQxj.rssرclVfY7N年۶mhT ꩧRUUU 4B=쳪I#. )pȹ rZMw !BV! B!#P!x\1cƐLXXݻwg޼yTWW;h4ҩS'^yuvmši .lpSLA4z! >D۶m2e eee^ tt6mڄi._]<5MԩS o~~>&L 22H&Lɓ'u&ձc3L>@\} >HL&lذmͿ! ^ΝKǎy饗g˖-<3|7,Zm۶1rHn͛Ƕmۘ>}:G}ԱE"33ӫ@c/^uͥHRR+VSϪUu$Uot:,,̧Ӕ)S4iӲjvZV FYYc3f 33m۶]sU[nq^mj_r뭷2rH}]ٽ{7Νw_[)h (ɓ']= p"6lիl6>vAlZRɀZ`ZV{9&OΙ3GFXj*;w6?ҹڸqW_}]6<j>eXk.+W@Hѣ PK.obΝ;c?\lAj?aF^d7o'N-[82Z~x'++X6rHL&֭z=*Uk\1HOOgϞeT ;W٥>Oׯȑ#kk=Ԍ ܹ3GQq.F^^< cꓝ]6j?.-\5jzx|A }^堩/c61b|M}LLsUG`0i> fz}~O6~1bO&**U|"##6md2m6fϞW_}Ν;&ե8O&MbĈ$&&rfϞM7Ď;֭~EEEN5ػwZ ᡇbİw^^xnvvr)//ロiӦ1o<1cL$W_%**uq,X@TT=MKKsʡC|ޏ@(>oҾ\]T>kw?S5"?S-\5U@q)SUOW~O]< jeر,{PBBB8p ˗/w|,](6nmƍflܸDz>gϒ~@:W|'r/\N^^6mj>|8Nẃguٷo퓟)?Cg}|$''{ek=Y5Ztڸq?ҥTr;vj֬YeꪫRj͚5j͚5k׮*##CY,&_ SS+V 6^{Mǫ:SN5'ռ߾3f sϩ'Ph#K$%%En6nhjRRRܹs]u]OJJJP7T[jȐ!*,,LEGGɓ'3g4J ?Gl6+:v쨞x U\\|1A@W}U`0X5n8uu~g檱c*٬f7n.wիW/զMe0Tvs4Ms|-\Щڵk4hQQQFzܹsZMvBahBу>?:u?LEE/2[lqi;g>L>}3f |L<2o<:vȳ>ˬYHIIqHrW;SO/Ү];nv"##ټy3ӧOg۶mXɏ_ѲiJ);!M61`*'9u{cѣtiÇa;iOS6m Çټy3IKK(M`tڕ?3uTƎM&6l`|lڴݻdQ 6`h߾=={#7|3:ׯ:u̙38q-B!he@!BVFB!D+#P!(BHB!he$ !B2B!Z B!@!BVFB!D+#P!(BHB!he$ !B2B!Z B!@!BVFB!D+1Hh=ۮIENDB`treetime-0.8.6/docs/source/tutorials/figures/ebola_skyline.png000066400000000000000000000232721417362145000246240ustar00rootroot00000000000000PNG  IHDRZK $iCCPiccxڕgPY<@BPC*%Z(ҫ@PEl+4EQ@U)VD((bA7"Wy?g=8X'& 영ALi)OO7z? xo"DDX3+=e/1=<+]fRK|cלo,]z ){T8بlOrTzV $G&DPJGf/GnrAltL:5204gk!FgY߽zس {{t@wOm|:3<@t *K`  |d\ "U4& NNp\mp '@&+ <AX H Rt # YC AP4e@v*: :]nB#h >Ll¾Z8Ns|x7\' |+xa ʈ.FD!d3R#H+ҍ!!2|DaP4D9P|T*j3U:@P"4-A[y@t4: ].G7 { 0fgL&S9i\ b1X,Vz`ðl%v;#pF8G\0. +5.py8^oG7K n~ A`86Bp0JxK$UD/b,q+xx8FHI\R)ttL&kmtnr*MLO'!EZClH5OQp((9rʌ8^\C+&YZMPC"QXYKՠ:P##ԫqBSqi|vZm@IR%%%%%/H Cc$0J8RRZm# ۤ?0edet L[MBjFt'}mngG StYZ[6[Nb\հjJ*̪Jhʹ>l-Q ynjah;qNp^ M}ϡᙣccieg^k\\6\}\\i ܺaw}W'<}O=Yza<^xzz|4-}Oo ( n $ n ]fkYk\'.a݅aτCBC*6,b:*4r2*4j**z_tMLyL,7*Ms\m\GńD\bh$jR|Robrv`NJA0"uH*hL֦uӗ> ͌c֙ՙdKd'eoްkdcэ=ʹr6q6m6o٢%VǷo- 4 [w8h)+Y؟vUKaD"b~ yqwӒC{0{}e̲²wYn\^{p 〰­RrOBULp]u[|ͮjU-t8:z##G^47emjm,j|,&f%eDȉ'Ovֵ1ڊNS^˃Ӯ{ΰϴU?[Nk/:6t:c:]A]\t[v/H^(HqRΥ)gD_Yj^\ݸxj oZÃ< >x8(ћǙlE>ZLYZ M<2G //'&O;N}īW3JYZٿl&,]Vwzf=gO|??WA}>Mg-`*>k}et1qq?.r)ԕ cHRMz&u0`:pQ<PLTE\\\wwwGGG666222DDD)))UUUfff333"""[Ȉvvvsssuuuttt 恪P΍w+}әDhѥt7\m,~JzzzWWWppptRNSYbKGDH pHYsHHFk>tIME}u~%IDATx 8v;xĝv`$N& chjP5=D-&ٲJy9GNJw0NHH4E)$M$i$IS$I"IIH4E)$M$i$IS$I"IIH4E)$M$i$IS$I"IIH4E)$M$i$IS$I"IIH4E)$M$i$IS$5sՠ%ѤÖhSJVL4VK45w]W<}7?T/DG?V/D󧟺~j^$&Qy FL75pI$դghX1JY_VoGU6iRHsNwCu6PgɌ&̚L)(YѼLQ&Lf:ӴԩhkxG3kIR!Mm\ߍ=;Ҵ#e)Exdjk%ih]>20h41 j*=Ȩ?"g%iOә٣!d{:I|Ђu1G>SAP?Pkc.iTAP=&TQDh)d%hNFb`l72F]'fJxڐ~> # 4!5q"Sqkhݍ8;B8Vn4gž iUdې)V:l0ʎϭ8́m#ӅC Ҍuۆ-UM͟V{Y$J e` RkxKf#;_K5ImA|jy i%M+1?H%%ͤtDSf?I$凮@?1F\Ǖ4sk@ǟgrhn!` 檍7޻`+*N刦kჳt h o$(N6 vw|8MY|K .N/lCsg8.́u KA5_Cg,֌8ɞ3=i{u6Ts4A"t;6Go{qaOƸeA.Fڃ@d `{[]&Xo@do;̈ b߂4 fu g6۲M;9&6X8B# 39yqj(ʛa'e&8Ɖã8#׃gz0g<0COii fG1{,k],7Η l70lꎼ.뛟}\yrűhL]+gy9?a8uqJ؉niQ0</g$[ڮUH r7U9l m) |&`(T,^>JBK1f^CM@: tIs3qifv[.k"s{tIhˤrԎ6n4lʷG"TKBosoJjT5fB8ZOq8: .4ҖQm3> ;5M]ցi>dfIhz (#&gMu4!{4c-ISɝmL3m%t虜G Ѥed+$Ģڹ3:*aعiMhCJ2JBԫ+r_x&.k4M 'Lڹ:PMڂuY{U4 `N|^FitR .|#ɟmwC%=|tv.j6L( 4n]pZ%g"lrHJgqOj:NGcf`і1RcxYLMiRL-e3ɑX)PTdf369I7MJbx܂lk#My3̓e}a#sq@+\;Ւr{S2lHy5fJ!D@yDg2lH40]1F \ni;W,8a&he0̓Yೌ p$OqMP(8aK6MO#y,I(0ÉM.i6첎&-K*8g`ĉM.i6첾2e}bDs*i6겎{l8sy[TIj3 |\Z S+Δ‰4͍a x@6٤:nU4;aeq+.گ le]4l 8!ϵ{26Y!PfkuBY;e|169YX "$$Ҿ,Nm !69Y=d'=pZ $zē96TH3nWj&.I}+mw#HԸE[fѓsEz'km,b [24k5nw vz>bDeT⧧pw@^PEl`PcaYrzyi;4KnSUѐUQx|߽}~~ wn;4mʶYWonP}D:9!}5*P4 PnfPW\4CݦY{LjjPh&EӆS07׎B#mőϨeZqhҶh.*uV9U| < znhhL3TW+=_n*&UۼqMCMEvfli#Mjǭfgw]gx9-PojqJ\+hҫ@QIi<+>nhRL&,lڍyfbw Fɛ{Wɺ1^LQ}5giB儩݉tA_Z7nhR)%a26̀gyژ ƶ:c0wMZ`cBh4`xXjدYgvͳO4hո_6xjym|h7k4=AܔKk k6M3YΟ-4ar`|-  +EqӬkQЅU4rTi.*kaQ^|%`VXQV =1ҟəLK`chwj7 ;mAoe592ͪӷ;?}—,'T*Z]J/ |[Ed2YLhT+Ӭk}=EA C-4|L}ޛdSb93ZrFepOnF*d(D2ܘe83TZ_o9y8d2Y4iGPoo+E kL&?G]y&Ksk5~uU ES*x5g5"0n}mA&h>:JS_[dH }4.צy`Bu-Av{BӪJM9qbO–CHs[Q*ӯE3ZfSw{ƻԥrP@qfbw4R sJ_6CUCMe3ԶJ'6JEOe3TG!<+=sKۘ7P&qCh6ݦO}:i}dz#9tͦVRqw~U6C4i{V scUloP]4d2Yt=R 2VV6Cڮd2Y-M[ [fy΂L&=hwl bO@-wtd2YidB,dfv;:sfF+mvtd2YiՎ΂L&5C+i!$)Yd1n j^G'vgls!$iYd1f:: 2,4:$:: 2,4ljmXA&ŖfoeT!Y@BT6C/ej(}@Ǖ`659v'pbn 2,49_[iCzhsB#B1dLRuֳЎR<甮d2Y i.A 4Yod2Y inAynE.n‚L&!~,֩>6]np)da>vʊ(Wu %R*pd2Yh 6#y @M7ؘ¶-%h+²)ݣqYhB͛v=(4%c땔tEfu>\~Tt0&f.KKN4'4_!lƵB!M%LN4'4O_#l&e8ѝAd4N't͂c}|g|WDKLN,4۬(H-6ki 6Zz I4h%Lݗt2:zDfym׭Ҍb+#5Fq JN$PtE+CS J{c&sMAmD1e(7\cJ8s* vPM9+ŧ+aZ\ aNhfBdʰYIc~(2lVSøuVhʰI_< (M)4E)$Mz7%4W׼T-Q&uyIQ}mtٲ I]^lAiv_Am3ߩR@4n q*DMIS4KW?a@ T`XI A^O8O)վ$M$i$j4#Ջ^ PB4K8SQލ0=T^BKUQ=wxXOAN:Q„Xru = &s!xD?ƢFSUێv/h:$g\Usu g?V8W$L8C~DOAfBm*L‡-T5ME|J<~~.]:37MO;-0<{[|%Fv~‰ӋIP&uP}Eϑp*-?VPy'QB}LG/0$3~kjUZ4ij[ ez<8ȴz龜? i.Z"aBb1MLn%Dw@7i.luB'XpnE SO==3QiGz=YcOFMĚ3{% Ʋ= fiͺ3%JT \|wbLAh"z.ݧSm"Iۄr1 Y$mS 5y=$fJ~.M?Ä{ mOBVA@)-j4=z8P`aqqihG/FX9H \2F0P9-VaAVβ >%z U%j ?(~xa*ΛmA"IIH4E)$M$i$IS$I"IIH4E)$M$i$IS$I"IIH4E)$M'!օ %tEXtdate:create2018-07-29T20:29:14+02:00W=%tEXtdate:modify2018-07-29T20:29:14+02:00&n$tEXtpdf:HiResBoundingBox460.8x345.6+0+0/2tEXtpdf:VersionPDF-1.4 G:xIENDB`treetime-0.8.6/docs/source/tutorials/figures/timetree.png000066400000000000000000000161621417362145000236220ustar00rootroot00000000000000PNG  IHDR5sBIT|d pHYsaa?i9tEXtSoftwarematplotlib version 2.2.2, http://matplotlib.org/ IDATx]u ̩V0 Y"S.1$+o/E o 1\z5&ƄoyZ.K4+BH Ϝ/wB_vϜ94Ιy&&oϜFYe96z?˲,ZVNS[Vnc8 <"NsqokmGR76 w###########szënGݎ3 #cڴiS9??}6Wt:q=q8G}4FFF>qy.N1###M }TŪcdd$J9::ZzTEEQ=;6۷=winn.ɺG*NgMۏFݎvKKK̴###QE,..~,T0xp<{{GL#X:Nܹ1pLOO\:>>PXZ]0ltdYsyf3n[|׃VRVInyy9FFF(X\\T/dOBo^Ue#PKP(b۶mu@2N_DkzXZl x||P?HeaC|eY4<FespjL_eY333u;Tb޽^qWY=p ##########㳀jtNhh۱T,122EQba@޽*733S,ԤΝ;H;C`zzˤ111 .EՊپ.E ,ˎyGٌ(V:Gٌ<ϣhTv^#c ZxEU8RQE%^@j155U,K LeC@n޽ q1[zEQd%3@h#.xA=0@b @b @b @b @b @b @b<`_d?/; +/YH;C`zzx >/HY7}4O#######AP((bqq"FGGnGݮ'R?NS<'"fu@b,C < QBNucccjbvv񘘘ʖNS?NSSSp<jI.˲h6y4>O<ϣlFQv+9%`ĸEQ#^,*;P\tEwffղ 0:Nܹ1S;3<EQTL #HHHH7>ztt4vXZZyx 2@_T8J) I;qv v{񘘘5-#X|]l6(vN%`@@@@@@@@@@,` nGQEQ<Q @ b˖-u", ,bff1S;?j xrrrst:uctt4vs(bqqc Gn;0@b @b @b @b @b @b @b @b @b @b|T(u>˲>M @{Ojϊm۶=;PDu@Pc  1 1XNS@ k255U: @N(˲;w=Jrrtt4vXZZZ>&޽{Na޽`  =;U??"=xLLLQ'''M,)0@kvV8 )0h4{||<&&&baa%`N \e`l6(v90D<'`8p8^yNFoβobc0䮸⊾offc C(G ;cՒ0!eۣ,$F$F$F$F$F$F$F:ϞH @@@@@@Ĝ^u:h4}ݧGˤM+ϙ0 <瞫{ NQX(B fywI%`@@@@@@@@@@@4ʲ,q 1 1 1 1 f\z׾66o^{md\}Ցyl޼9nXZZZͽz׻btt4Ğ={>=aP{F?y?gK.Ѹ⋏zo{s=7Tv^ߠOu]w155o~o9Ǐ΍cu?쮻ZK.9 6g}+˲,_x3(o֕mfggˈ(WeY(O;СC+?(<zq2H_W}~jV eY~_.wQYVU]r ?dC׿guVDD6G6A]|űu?>zp2׼+"FFFVwUWa\sMl޼9.o~ӟ7Ȇ7xc /0"":###1>>j-[סCVEyTr>uv[vmqƕW^_>-dy/zwA=6m~q뭷kM*O{8Y_}<#?Q*;bǎ+_={,nTu? _ח^zi>N+뮁/eYsssO~[rW /[l)?hm\"P\K$2 Cm3.V.ybW<_qDB\2Mo!,˒eYcܜbl^q4U7)L.qhT.x:<b,+w@-ƿ\5\5\5(FOQ5jjjjjjjjEQ5j=E4s4s4s4s4s4s4s4j=E4ziiiiiiiiE4ziPӈPӈPӈPӈPӈPӈPӈP(iP)        QP) S@M#>@M#>@M#>@M#>@M#>@M#>@M#>@M( S@M(F|.$>ա庮`98$>0 r9aDF WX6<ؘl۾j|D"JRr]Weuݫv,( irr\V2ϧh4t:-۶yM]F P8y5u `PXLdRRI㈛.DT*T(8ROkY *Jr]%|>AE"  _B$Q2a?@ OQ|߽{n6R?J҂ؘ,Zi-;TJPHPhŜC-@ X,P(IyK|JRD"EqRDD(<'2;m[\N;v(\9U a Ď㬘UHDZv-Ԭp8\SOS$YYK$q+*Y,g5* )/¨eY:}_\ɲ, ??ta\vNPxoooWU}_WCCRO _nw߭X,ijy*[a˰]5XmJE՜,aW<fǤ(9Y =zT۷oL&/9N뮘x<7./_Ť eDžB!b1|>4K$wVP(h4h4J.VzLەiY*8_H-_4Q4z(~5oAOQ<|>qmkk8*+ǓW4ѹbQԾ}40001+Ms\]P[;w?֜Xm,Sɲ5UTٮ =EWJqs!+ Ţ6rY}}}m*ά6E *%ɱ*EXLTj~Iq V?5nOʕ-͔mճ *?S'O\жʲ4եZ< Ê BWjEMMM]5J)bx7lʖ#填񹫈-ܢh4yog_*ήTVS~Wqyy-z7;htt]O}J۶mjs`1'uU2ʶ-ˡjD|*ޮX,>BA{83\MmkllL/T*4M={Vf͚LƦyDQb1o `iqS5>R;W\WEDa9r9]]eѨ2̂ JR|*JQ(X/qF8 ‚dt2 cQeLT>ׇ~{NhTdr֟aeY,KHDHDhT4>>СC φ yf]`iy'.ER!@`PRTʖre[R]Ŝ|>ѨΞ=~[Tc~Ѩ亮*jddd΂yQ566SNW7u뭷^8y ۧC_yJ%SVJԶm)˲Q'@m|| `Z*o*S`vbNPHڸqݫ={̹Mi5~HO;maEO?|M:thQ%I=z) PT| '@|X<ϓy2,WyVU4bN(JӻᆱQ8u=x *{S,KlVvtndWWJ<o VJd2m{U8֖-[TWWw%S ?V X*,dخJeGP"JR{Ԥˎ B|8v8ҫzѨ~M{b_~Yǎ伷ܕD- Fa 9\Vld:Y >XDHD6mRggedžBj|3gdE-ٳguȑ/jm[bQǎȈ浭S(R<8 .GQi+2mOEVlhb(\`E"E"9U }~_i0E-EBE庮 ӣ~X---6(LUiT*-(>+N+N_Kb500Q2\ie%C.ӡCtAYFtq(4MAs*sadRx\K{^%jKJ|P&.W@ V<ϫ6(L4MR4m700|>햃b|>?_۶599r.zImT*ԩS:{٬lۮ~~ܰad.Ll߯6566* %PYZ GyVr(">UԤ3gH$浭W(R28m\N|֬YmRTҁt!\vmۚR}}q544+"V T,u=ڻwz{{U* ԤG}TuM7]P:q^x{ڹs0EQJa :Q0U2YP\}2nV;>O@@pCsEs(˚ZqEiݻW f8JǑeYK 4M;vL4::d2\HD###z7ꫯ3gΨYO>fkT,UWW,nX,ꮻR*R( wsN}'>X,ӧOkddD}z{{/8::>MLLu]<َ+vU0l [%ӑK=EQL2T<_ `0t:JȅgE+ɤnݺYAڵKhttTi^,u]eYU=ǬoD"f ]IfYLӔiբW8V8dw4/{`(R<W eX,4j+K*0ǫ-30dYV5u) *4M9#߯x<>yZl6 (wQP}ݧP2>ǏW6wɲ ###JO}SJR|ʲ,ڿ=bD"=#jiiu)"5$ixxX[nUGGک)~;~ȑ#:{fff|G?O$Iwuu]W\]O|VѴP(U!IǎO?L&"ѩSm۶J2MSO~;^qګiƍڽ{n馥 5m׿=ڴi%SAlۖaWt.D.{S.uQ};QPкuc)w}JڻwoV~{5Z|nl\8Ɣ#߯A ^{nЮ]ԓO> 9;S{6l IjnnVCCòJU)Z%vE9K\B2ԽޫoY\n3ogΜåFM3 Cշ-mڴIx|uD"Qؼ/i*\>|Xz)=CjkkS,Ӟ={oOSAzǫ %۷oW>ӟThx\---ڰaE`0H$BfhkkSKKfff411iIg Ky_,kAs>O}Sھ}%;wN>}kzᇵyf~b1k:}nV577WDZz7jͺ%) X "]O%TաD< `ՈF+./ jf IDATڵZnݲYYUsGz?An֭zG|*\o]XgԵx+W,U.}ߖeixxXGu]zh ]rYtZ]]]>ueYm޼Y2n&jddD6m4JhxxXk׮ƍ/:۰a3hddDsqbQsJcXZ[[fjhhH_FGGu1۷O7u*κJStZ7|666~^>vܩu]P}>_ruš~mG?Ҿ}.8y*>pG|^gϞueRIo٬}>kbbBbQ7n(6WW +=jbbB穾^ /(ϧl6l6{Qɉ'444^zIJQmqD"555izzZCCCs/1z)[PHHDpXahjjJCCC>ϫa6ZV,\(Q }:u8Q=ڼyccc:}9rr."aGGEz0466\.`0 ^ijzzZgϞU2Tww.}9_$5 x\ĄۧP([k֬P(T*uUVB!(+HTܬ!qٶ]]*RT:wsB<_q)B,Gv]LGyVpdE> ,ctZ[lQoo9W2m%TV~+8Je/###zf4$ miʶF g/0p8|sp8\]gq ڽ{Q]ax1 z饗t)uuuo(Z*T,/ZZ*455Ź5L&a ѣjoo{ァP(|+sz`^}З%]4MMLL\.+ 333ǵ{  TzqE"STjAOeg$QkkE|>R)e2Rg*j>ʹ-*[b( X&~sFAb1aYŮ?NU__\.'۶511zzzF2ڵkm۪צMtݻ<ϓmjjjR6U__22O(UWڶ7nw]OLFDb WPHDB몮Na,I$QCCZZZ499)˲422'O.VʊϺPH W;SBaֱX,=%Iw} dƬ&۶庮ļ޳BP5}Z۶Kzp mبNtM}_s>DB---鹨(̹|@ U(~[joo 7ܠjQcY488D"H$d2`0*vU [VɢUbr~Q EQ% %IY%4lj]brYRip\TdiG}j?bB$]Ѓв,RY{T/VCѸRIR:x<>~~᰺Ңw}7Tl߾][n__^pNX*gsO"_X,&۶c޽{,+E[,SggZZZ[o瞓SOO>#߿_\N۶m?{O۶mS[[ۢe>5>>BP1hÆ ֋/q馛.g8Vccz{{uIzRt4]+[r\$\.W]A|>a)MMM}j<Ǐ?ǝzUWW6oEhllL@@7oV:suuujiiQ]]ק[n1gΜܬ9뺮\訦>qqq+˲ N~zm޼Y[l( N'dY599YF`qsqYmQϓo7 F3?(Qk[jXLk֬Q{{_-|캮֭['E^uvvZ[[8TjcGo~3ghbbzȈ߯qim߾r,4ev'tM===T3:{zzz֦T*%\Rqd۶@}4My8qBcccڴi25~0<+۶קg}VahXԃ>8'xc]W1eY544׾m_ ^vfa.Y.YrVcPבϵ Οߜʎ6ʨ['' K>ߞbڱcoiddD.vJљEY):66_|_KRڵkgGPP.as$ l۶M}}}_WR.ӽ+4u=3+ i˖-/HG0 Cz뭷tI%IE"l6gϪK;w߮kTa٬;W^yEfZnvڥm۶UWc433믿(WyrsC a`ث1(M(1%;Y '05Lwɍ\ XROeوFJR mcE]Ԝ=<ϓa2MjqW_Pu^eͺiMMMijjj^D"\U o駟o[?W0TGG>O_n.[X__;w4M+ڳg8 ԤzHg[nBk]]y9  EQ577kÆ zumiݺuW-Sz7HDB۷oדO>o]mmmK=\.kGpTD\ٖa塞']RlҽRgƹ X9%FPPB^ $#>uۗsP):uJ###*˒X=qℶnݪ;SrYlv֕HD.quvvKPhѾ'Fޮ??׃>b(ϧh4L&K'<ھ}&''u7^rߛ6mRssj{8Vss.*j͛U.=F+7$ 577n,뮻N=vڥ@ P3{'裏Q---K=jyʗm͔l,Gl Z)PTWfk1L߯eǛem LQהy,fZaG?o[f />Xk֬ѣ>/ U  * jZ~[ve{`KV{x\x\T*T*u-Tj 7ܰSf9'qU4mʶ#{5o(}OX?ZTO)+2yB^rrCOQpM ˶mWWڶi8qBw}nݪT*%q4::g}VlV~&&&SO]HDҹBAbQ,hNhT*D"!I2WDޔeZs¹3 Ct6>I>ϑ+TR8߯r(!OQt(|>uQFU,uY-+]ei*Ye[Eӑa|~];><_ફļ۴uVe~?~_DBw}:% **J\.1볟o߮L&S79&IR,'\/ۚ.Y*Z g7 |t xW!O Jyy9~dWrٶmMMM\8Z!B!B'ш+*JFХЭ.Df>-1RMm"^D67hqOR>W!R)EVχ˿d0 ;wRRRB aY!bQ4ܸqiv)//@ X暘`llP(D4%Lfq:*++bsiH&wݶFX,nctuuDz^ ~RUd2ɍ7dffCYYMMM[ Z>ˑdD"={L&Î;xB1KhO$Ib2K^0TaaXlΩKxcU Zd]AITlE 4aX &=EBX䂢 FGG9tЂi^|@>Ek֬aǎ]a _!3M9;ETqgY.v4`XX7npIxٲe l6Eѣ\|9_QeQWWGYY. Ξ=0x<CAA2 7o&ɠiڜS===RWWGuu5fܠ(‰'}Q!Ν;ygؽ{7[n0 zzzF pQ:;;)//ϗ---eQTTDAAvH$¥K.aZLܺuGʕ+q\0FQΝ;餩 yby.]D&aϞ=^R,>|ͺuXbżAٳ?~vͶmy&h[nq:;;4t:ӧ9w---R>N! UQTWTR ]B?_ mK;ғc cQ"D^$CwHI !0LvZZZ!p5>IEmm-~ޅGڣH!2 f.pMuXf }x}d2ʕ+M ȏ́VwF6EUUt]ϷðX,|եf^vӧOxl6SWWG?Qy<9}|B!+Ñ_TPPpEL&L~1~ܳr4L>G!OH$™3gx<)O/\{ú\.|dYfff(..^Ro/RiH޽7$Ͳ~z  NLLp%>333Amm-ݻ%IJnw~ZPP멬d|| .PUUE0q- ͖Ϭ}U8 k.ZZZ%iZ>ynn Ø=$###=zg244ix^yY~ bݺuP(Dss3UUUx<1|>TVV200 u_oJaaaEOU׉UidFC?sʢ(ȺJQ [xoQEIdTj8lwF !⩑[V[2 X,իWB">N"ȟǁΝ;Ǎ7xصk=C!xrѬMedU >9,iAi,ڽ{aƱ%FLJI<ݾO P8ɹGM4xX_S@ J\!B<-E!Nrؾ};MMM*pO?inݺfv?cB!f6c5?KqZlffIREEED"1<<86l`߾} \vGd%_f׮]]vɁp8˗y lܸ׻쾓b9AI0PUÇsy^yVZ%###[LOOe~PUU0 JKKp---lڴiQ6EQd2σfL&QUumNIhW^a޽f ;{,;00(W^x<o_ݎR)***?3mۖ/{ =JII : %%%fZ[[inn[Oؾ};(dٻu8݇GɓݻիWzge !ģ RYHR%j}Ҙ5s6c˺XT4b N%Ndp-xV2NKY,sBuUUq8uVc$xWwll>.]4H$stIl6^wV+VPYYbϥdϞ=޳LbK xX,av\.x<8~:7oޤu֖@ @II ֭[twwrJ=fMevf94M#qu.]D:f޽477cyauP4TD20̏ &}5 Eՙ) L%1{$h4(+p^gc]!>'6/K|B!@<)L&ff^/EEEXǹuRJK!LLL066d \v#Gp---l߾;8555~֯_侢.:0j011{LJ~HUUl߾}YP(k2uűN(R[[Kgg'~EQdLoUUD"IUUhtNyJ0$322B&!uݳ&!cƍd2FFF8s /8x h6@| ӟd294M#NsA~_o}[|  bX4x<h5kPRR?V*sL&gQ=4W^\x;wyfjkkI׿ĉ?vSYYctt^xN> htIcνv!0A0رb2]C'xY]'M$3K@auflυ~CwlaX]6b?/WRYՙJ(s+(vR- EgW`1IgIPT!O\?.]fxxYN,# ya|ٳImmc++B'$333PPP@qq1:Z[[پ};W_ΩShii͕]l6˕+W?pYwƍԿq!===\t ̊+hmmܸqa{9t]'LNIRh p85M>H$7hllddd+Wootuu己k.`qqseZDv>EQޣNwgd򥐗sW!cPTbJQD @U%5YwoxdAyx+j0KiSq08d*l6QsUj\L_/ST!OJQQ---9rCyZu,RgϞ7ڵkK^0| 7Iyy8B!r PUUt&QQQ7 j&''S5miY!455QYYy50u ._Ç]رc@`Y=sYt===Q]]Mmmmg䝢(###ܼyBΞ=xi<͛LOOsQ&&&ذa{U.frxPYYIuuujmN;dc<](@v)((>^Aܱfs0^xY:X .`t:I$R_<wEH'ORRRBss3mmmƁ@i|>cccܼy͛7:u\_ZJKKT~q6mΘ^/n{NU|fLNNbXPUX,ƍ7رcLMM~z݋ZԹB{uUIf4JBQɪ?i`X]23 _gvRzHvV;|ܫahT"P8Exˡ( RY"BA/m~JVNۗB!fQ^^ηmBМ S _KR 3::׿uJKKH6bz)..zVQd2D|&x_BLLLFzRvh}}}+?O\;:eee Q4 l6K"wb1^x^uJKKGg0::JGGcM&$Iv;]]]7sd2s\.3ȴlY^x{nyaWk yfV+׊̚{L{pT*EUUբs8TTTxfLMM(ʬz`2(,,`0xkF9wnl N~~W^y%dfdr{fx(..u:8 wfΝ477{|ł餬,?o~wsaa!B9.wOafvSNqԩ92ɠi.]vJPTlNBјJdP>)b29jbMLJ1L_k1V Ѻ}?G:ozCVIdT.D4fHe5l3>lk,܇n;H\!B<f3.v͆ejjjVP4KHu6mD]]#+]t:)**EQkk)Eƍ׿?*++پ};۷o_VVB! ql6qFJJJ=N]]*KmoogŊwA__?vggtvvRTT@b===p8ضmeeen[RR³>__ɞ3x;>>s=GKK 6mnc%a5M#Hu^ |̻Nܷr;F("H3(׮]# t:Yv-ŋ:?2333l6JKKJ`0 ".\`Æ 悍=yFqqq_.dbQ__ςQp\466{;mkwkjjرcH$pݘL&bW^% p8Xf +WdŊ_5:t!vAKK 6lVBB7 Ո*USHHi| ;|[bvG%sĪwFS~cOz'?d,& 4l+Mau/B!xbL&\.ׂ AfŜ.b èz_{sO8_H$™3gxxS)//gʕ F% QRR2+`n2Z~Aoo/ϟرcz4~;F8f֭ūVbppnΟ?Oaa!`l&affM(,,$ =榦&9w555a2u|A!V1LޯwRiD"]m۶ֶBqLr}jhBR%kѭnTW f-a[=d=$K֑*]K[a+$p8FoMf4V3>V}h-sٰYW㷕ʏ IDATB!xre]t@sffSN155şɟڊtRPPRwB!X:EQ#0:: EEEb1ܹs8pիWm6 O=;v~ӧOo>I$ܼysv4667n;?0 p8ljbj*~ɕR)8ihh*MEE7x<͛inn\ 6t>---SQQg}4qOmm-7naV?>* Z{z^vJKP(PQQACCrR\\̞={Ӹ\.6nHuu5paN:an:l6< hÇ288͛h^,k׮e˖- Ciiiarrwy'O233ë kx뭷dӦM>LnSVVF}}=$ !tr0Hf4",qE $Q[|[L @u4b\5=d5 VJ FX&n o R#{4ӞfR>W!1fO?Eg|>UUUR`B!x$b1`xxE W^7>_488͛7u'OrM^ĪUGKKˬYl Μ9Cii)6Wq466|VΞ=Ç9tat:s}p__Gq>իyٱcǜ&`EWWdUU1Lv^/;wd˖-l޼Pn`xx&WݻwiǏ͛˿ a`6X,\??3j**++L|+_!u˞?j0d˖-$Iy |D0d߾}сflXz޽6ZZZd l6l66lAU|pVJJJ=Bii)8 Ė-[hjjuʕdY!O ihh`͚5dŊ|;aݺul100@(`0PsQ\\Lww7ӤiV+EEE444i&*++HEmm-b\Lٌ㡬FUNks|P(΂njjgNhmm]R֪B E>nX]h{{}IF3Iᰚ-tP⡽ϊBf"I\!B( |g\t UU| UUD"ꫯI0~lQbʕR_ˡ*dd29^r%@!xڥR)1ͬ^??|dooq_677|m6tttv;mQQEEElܸqٟ{'A0_*_WY~e֭lݺuQOLLٲe˂=]+b VXWEt:^{큏n yזu|FQQ_6h:{\n4E_j) J(ӛS|zsh]BUb4SuH B!)Lrߏ UUIRdʗBUU$ n76p8|_UUŋ]r%k׮?G!mu:o~^zB8PT,JV;T39{8#43,rkW(pRc1<~HPT!O\nB(1==~܇(J[nE xH#;EQ'ݳgB! QVV*BXqA! @ U'$dFr208hk1X&~uEnZ)r㰚e ST!O Ál6zl6jjjhllv/iL&CAAk׮ȑ#;ig?s֬Y^ 1L!xeYfffhmm20B!,:dQ%=t\GCvޤd7{s60 U74ëcDҤB7k e2vST!O;> L%?g(++?WUCݽb1.\ۿQUUŊ+X,XRlD!ϺuL   >qs!Br!JHbXlY58Z[[GӴb1JKKTUU*B|s= !B<T]'$3J2J/0i,)>[1Lnuuע4{t1>O4g㘳 LZtb L'ܚN;ppx:KAM5UT(tB!L&Coo//_f||<(ܸqc( ,fp8gO^/VEMM͜B!B\eNȨ:J8!Jv95y`2~f.h$5Ϣ`,/`iht T[|{lK&nqJP > mrUӉ\pw C2En;k PlV3f=R>W!8]]]:tt:fd2ixǓV4qq Y;~L#B!B!nnPTiDFC$3[=v [z *_Ɍf`MMbUf_CÉ H8Glj$3lԕh,OsЇijޡB!X$IUUuuuX,(EEEI& D ***YT*E<'Ju`0嚷BL&HY]!):i%ߣA i(bp,f}s~Ɩ;cF_|<0̙L&, 6 25Zξ9iZ~*rfy1EUUt~wcٖtPU5p`XyLyZrWUu5ZZvuTl6avl6#o\@Toˍ(Y .]9Սoc8ّ[aލ(]vi`R&);ӱdcxFņl^jR0tEո1D4'M&6 +}li(f}m!>UJ>BR>W!br\Nł(XVTUe||K4ebbĆ hjjr=% =V5(Xv]v{9ewBdb qhhh_ZýϤR)VXp8uQ>***e G!BTo<Ṝd2nOfxxصkk׮ dR)B}3<<̥K8z(CCCDQL&Ŵ/SUU{ڵk};}xioo_Vr1Ο?ϩSt:},UU;w .$T FYYmmmڵ%lJ"O>O?l6里ݻwсk`SuEg?|Da޽TUUBO YH*K2Q% z7&--6=1%Cnj kjG@s,S ,0C8//j2>EgFYnw"Lp۩+v/Nv}䷡B!}l<.i^bp},t7npu >> n۱ f<!Ÿz*?JJJ\!rYbX>nFGGyl6[< &&&BDbtt{R[[;~d^{=ncPA8Hݻwt:ܾձ1>CN:h:4Pp8Lii΃dfaѴ 355իYbOO'N`rrD"c)B(ɓ(j&LJqN]šs/ZgH43Cjnh")nN&>HЦ-e>+8lOƜFB!v¶m8| Y8=ұb1FFFp:|ߠ~;{-d?!j]e<w}66nP9l||{UUihhx;Xsp8(**t:}9qwGee%[la߾}+Wؿ?CCCtttPZZo]}7oK/a\zz+u} goNj/͛Auuɴs]SS}Ea>])//\NǏc0.>|M}]B֭n{t]g||_?L\!jqv܄lG:f5I,yWa&1/q_Kz{tkz=DgŰ%BC oBj{A7 Q9|}C3O%Lx ]n Ou楤#=EB!b֭lݺ>,:tǏe^<GQ2 HEQdJKK }\B!\&ݓ(_[[bCX?.f3*_רJ~̳>wgq\==`Awq(J&MQlɒ,˕N*lIJR)?RypvKBR%$E b1̈́A$:*LL=sb7N]]ݼ?xyi\tW_}׿u^~ejjjxH$i `p|d2.\ںu+/"[lB!H!x(ge2ZZZb477S(ҥKLOOd0MOMM {2<< l0p8TWWHkk+vMӈ:t#G`ǏvSSSfm۶-*L&ŋ$I r "\+WKj\v墪zbv]Wʼ+{H$DQ֮]K$)_rl6i~Wg(ޭ䀊 qE!HpE~M/_jYr%vEQBTTTo>._̡Choo/6/_˗;6dɒqzzz8~8_Wپ};+W㾯v}e>gzzit]gݺu-e\q8r333dҥXNE9$LDʕ+ ^.\0 :;;ٴiHUU B>9~8󂢥gϞѣn,YBe&A:.Ha5JLՅbhuޚU gqkչ,Pl\[0w(K`q\>|>yT2d``mF$CkVgYʾl6|>~_Jt !-RzW9{,h o|^zfff8r?9{,cccNCCׯ_:W1M]9y${eϞ=LNNi>kײ}v^ypkoMXwa߾}~{솿KYCCC{?dppB@ee%< HbxMYB立D"UXjO>$/" CB9}4?֯___f\.q1|ٰa;v_& ^)-6hbMMML&immrux<455F)?~;w'NO_ӤR)YbEʈeYޠ wjV\y'SB!:|hZv-+V  x0M .p\پ};˖-t~EQ7ŋvEGG^ryݻwE,n.]Ğ={`۶mb1l6^e6Oȑ#βk.}Q1 G"̙3pn:::r:333?~~+WՅYa~RWWٱcNEEEeqe~H$hiiaΝ455t:Ʋ,Μ9ݻinn&p8Px<ίk\.n{%Or#EQuB@6%ɐf) :H$ԩS$I٢SSSR)fffo `ffÇcfJɧ-KRҫ__(ٺiL&9uMMM,]\^tǏ'yf:;;]6ɐhlln@xWեX,222ݻ4UVyK7W\n.4ޱO~.BmĞ=}GfQ<͹A dkcڽ},1LՉ(,4b'\'RvXV`Ym*?*rcR>W! ͊+hjj"H`Y߿~++,V×TTTfKcMD4|Ioظq%KT!bHmm-ʕ+W.]??aɒ%TTTMMM >O'|[ow3o1S)?J4EQEgMNNgikkOO.qDyc}>K. ]1 41Mr bhh|>_^xUWW(;q.^Ȼ׾5osyYUU,gǎLR `0ݳ|>z{{P(t:,\.XLiVTTx5c<H 8@<W^|>ρطo}}}b1{E8~8twwxIPX, H'IlyDnT^+e%|rPt1 !ޗY}[%inTӉ JR\tݻwk.~i xE1_yQ "\Sn3˜/O2'?j8|v7 ǞA1K?tWBxd6Zosb&e\5B${{&Β A7jX^CS[ =|B!(NH$R~pz, կ~k҃4?? b1Z[Gf[U!tFYlPhktϣ( >v"ׯ b&TqR---,_LQhii)& +++illn6a>|#GpifggfN"`bb|>O" /@[N[[˗/UPhbbb|-[b٨L*X>:ľ}s\|MӨ:j(g۷qoaBgüֲj**10Id ![aYss؊Y,Չ QcN.}k=#]mRWP I0E_=Zu~+ص$6Y:6 ñ&E$w)DW\fhU!ER>W!EQPUp|&wF BnƲ,&&&% ]! sA***  @ 0/c|477K<Dz,2 dMӈD">WF2,SB1n7UUU8yAKM("^S4Hoɓ'4s\x<5rE+355i?ݻw_S4Hp)4M#3;;iJEE[l+sZ[[yGXjkbYSSS`٨% r~z8 :.]ZX^Κ\d 6lO.Ο?ϩS'e__ΝCUUZ[[V-DZojsPYO$RSSC8./ p14M+ovihh`˖-ĉLLLtR}Y:::ni_ٲW3 0p:l64M#LrzzzX~=|>t]GӴ àX,R, %XeYEhzj!E828}8S8pM1ftOlv1ܕ `BSDz{Q Wx.lZ{~ G% .5[ZƙDca֨*?+">|CB! q, Ν;%7p|{}ny$3xz > 3 \.nVN r_B@PviڼwX,Jʽ k?e߾}/Lgg'uuux^._0 t]_t 믿oqmm6bL&s g~԰qFfggyx뭷ojkkٸq#TTT0222/VSSk԰iӦ-EkjkkԩSuJ[oo/ϟ' hjjZ3SUUWxy28z(nggܣ|>###,]}/+dyJoڵk,e^p{|K_^-18y?-ϣiZl.˜8qQRYYYd̐N1M\.W^ PYYyM&Xi$rE"A釃5W*י?!qQ]b0ɦ/"[30Pt,-S6=Ik&3Ǐ]*5k9iGI A$AQ!B/0EQztwwSYiyΜ9-e܃vo!Bq/#f *R|tݸnE!͒d{KA84[x<ΡCF<#˄a\.og) /y%K|UF(;wrQYYICC(Ec+**[ik2~0 ,}}}LMMaÆr_؅8hUp8b״PU5|eӦMFQn__[(vinn.?)G'p8L0dbb⺁xx<477@.#HK2u]Gܻ֭7r zzzy٢Dd2I(lrЍ70(l6)WA Z)yxxl6ʕ+*^[YY9sT:b+$IΟ?pi~?=7oPSSùs瘘 J?bt:8Ll BYfk[* s?yeZtL Stz+, ?!у̌`34toÏ{ǩ gGQ  YUM5UElC,Q*nA'=EB!}Kufffx"[lGCORB!l2<Jee%b1ZZZlajkkD" r~irgϢ( 4662(5MWѢyOGG===199n`Y\rk25KFBTUҜ*_ NP(V,ͷV<L|>Omm-N|>㚦4 9q7SگeY^>y麎(JJQ,ܧ˨֭[ihh f.]bxx/dɒybpp bؼEG积U"࣏>bllP(TRk\+W0>>(twwF˯;B-C^ϴP(4{TilghsHKK 333 0<gf:tA6ms=G{{;mmmuBO<gI[[a{^- rJػw/ D"2 'O$K\.뙘رctww|D"ry׮]\tʕ+DqFFFx">,ׯ/Ν;??p8Xjַ]5N300@:fvvAfggI`Y555x^D"\'N RQQfctt'OnoNww]7{)y뎍Ftvv裏O~LӤM8}4iկ~ .p|Mvŗe q8@P~HR?~***XP`zz_7|źuXbLLLu:~(R,I$ꫜ9sNZ[[Ynmmm>|0صk!v K.eÆ ` \y͕+WPUH$B[[e0-RyTHN3 I4e)O̽ұ88SXT$bϒ4L_ff;V#v~:j"% B!aY|Griljr9\.>JG~,Bǹ|+Vn:qB<<---|K_X,9wv~$O<[lWSSî]f9sӃ&L2<Nqr'O$PWWngzzU^]vgy7{333hm۶qYzzz/N'MMMl۶\^b߾}\r`0. rRkz6rp: f^ gppId2}Ν;G4;w311l8N,"J1;;(lܸm۶QUU5/\j+رWV^x}]FFFx뭷B.nyxϳgZ[[ٸq5A|>NI(&H~/$IN8n ;/#Ny8qO088裏mFYt)۷oOO<ɇ~HUU+W,o k_ZyBN2{1z(ty;S.?YB< D-rN-HSsS؊9G?bj )ZAQeرq%erFE),SXechxc衖{^W|v|B!oXE2W'N@Ur1 L&ӧ![j&\>D"QvW+BS xPUUU׾Fcc#?я8q>bf6o+BWW׼ ng޽;vB@ `ƍر_~y^/mMGGw/~cسgSO/0(ZI}J~parHիW_7i[ٳZ6lڵk̙3\yeY ~{ꫯR(Xf X`0XΞwwe``T*ᠺV֮]Kee6 ]ފ)}]?ؼ@EE˖-l ŋD4&۶m+g^Fcn8t=o ؽ{7 H`ill㩧bGq)x{t:$,[UV-W秕*}n K'NK^z CCCd2,"Ͳw^{ͶK,aΝlٲ(AQNg?---a97@4***Xz5/"_җ*( wK/RDMLn"ё;6=\ǚl, ={ʘC0+LD8 Pptlix<***',8,ݱcGyn'SUU&R vuuH$4 ˲ZtؚmFGGl0p:rزe aJ+Wʆ H R IDATR\ip8L(B4֭[G, J˗/lD"[*ΟٟL& pr,N:F4LrFmYzc~?UUU7l3P nnٲ$/3njkk&tRouVihh&3uVH$B]]mE0[lEQ?-299I?<w=(ZOss37R]]] ''|[ߢycJ#O?4k֬!LbY`0H}}=>onxg蠱ƻ~MxEV3Hd ECwewa:|U^T?nfL sr(lLRvȺ`ve>&N@ E=K8)+B!K~z,vIlfpΝg͚5A N[oqYNN$!\7#p!&nx%^{}\+)&[[[imm;::ᠢέ[&<jp 68qUU~JeˮMB=.o^Ke{rٻY:\UUx䡳60-^.\"B!oZQn:t]_pL&ٳg9z(|T*5,Ox^<f4Y*B!Ӳ f):ED fe\PW"ZUAn3Ztt8W*w"F&ʊ5XZcB!5EQnNv388ɓ' '''9{,@o}[a&^P(awB!B{a)ij¥\`i:M`=lŹN76Y6;+D!E2_*s\O1ְ*NTUC{u)*B!xH$Bgg'.?Ջ,Bu{1$ \"g B!Nd4t^'S0(7ZO1 8R83#sY⮷,b/$p&1! F- ҅"ӜJ3d>ax޻v,!xbH&AQLn]+DB*eI.F~U4L ELA']0iY6Eo Su`qBq#@[\$e :,FTmԆ<4Wz .@Cȃn{SqCR>W!b6 W*8Tf,"z(.h&wjjjX|9x<***qa|{riL1xPUBW) Z(HE|>NSR[rA2W$+bJ4sa.&4{n *W!^-Ԋ/o´0MFyN % Ɠy4D)4TxXf*.\^xH\!B<<uuuB!t:bH<FL$sEbwؾ`˙Tꆙ;w:&N|f咏v 6 U?z^/t:d2r B(FUU p:TB5tEC7F>dB^|d%0N^iA7L860M˺ :k4Wz\;T)+B㡻o~ ߓc$INرc\pم|lڴM63BK|9A!x9UU4 4/Pgx<\.R`htR9nbHDBt3gȌa3 \j Z0Fa+XoyIK$d:18D 9AEs%~V5yzozq$(*BᠱJr=yH;::J2n^s\a"r;U* ˫Я-e.4>Μ9|ӧOd| MuxnD [E<[P}ӜIP4JV\k`S[%k8T*nB!p Bdz?d8ro&lUUI&b1 ׌)=/ܳD!(R~h.AQ!x8sbqt'CES%!BDH5nÙ@(iMzOMxVpC .OgJ YdecJ/QwNz !BB&ahhEQ?#{ᣏ>iy{_|"yh.B{m.;Ԣd \Q{90< w%Z{vKR=(+ګ|tׇXR'ඣJ& ~B!xnhllzt:}1 D"&&&ft~@ 继B!BE)y˲2DYtޠĢ) C1 s, "_48?P G̐ TET =ª>]䉻J !Bftzinn&et]?ѣutt裏v{z!B!xXXaZERy\@7$=a3p,2%&Q*+E|ݸ=]B!t:q:7nbbSNq (~rǛB!B,ahI`.d : ͹v4o*ͅF:v~ګt 8pڥT7|B!xhYq+466K𘦉d*VvӠB!B< J [n*̤5P(9`8N04æ@堩ˎt {>Y/)+B!Kea&i}+UNJѩ)HRx<hiiAQ,"2==Np8L$nB!Bq7EF3Hd )чIV3pj8ّ$<ϩʆjԅ={ B !BR:fhh'J-z|Xd``9s16l0ol6G}oAooתyygw~BB!BgYs١1?IC0- x2ϕ,SgBb/thbW[$ĝ%B!ԩS꫸n<###.j?199ɑ#Ghmm0 <8NEaffӧO300?NEEap444HB!BaE9f [)βL @4ó9tPѶJj7T| B!o(B `|>ʯ.ir9fggyz{nBL^,b֭TWWsyzzzd2hvK%{B!B,0-EDHN3(Deqe:ّ$ h,]>DCCDU%3T|>B!o(eӦMtww˯,wOgy@Q5-qTǎu:qm@ڋW EINC[dKL#%RLy~/(Ғ5Q2%ýlDCCrŹ!ˑH$FA'8t[n.W!B!ĝ˥d~l|R'J((P ѕךb}[U "*QbQLQ!Bb1b5J2h,cΝW!B<EAׯ'0NsN8dYۇ躎i7]Zr!y._ >ʓO>Ioo=!B!X |LǧlEmAjSe8U!_s訏Ӕ`KGɀP8 @@jRbI|B!xhN[[j5.]ę3g,\.G<lÇs1:;; BD"[v !B!ăj5cdc |]5-Kw~C9j(c<:#RLh]$"F|#A꠨RBsB!IJyZJr]* iv=00 D"eY366O&y|#G( 1ML&C(B!Br=JKt\_ ˚T-E\s~xQ[:XSa#R. Y]P4|=k]zd/O<$>W!B, ieqYJmm]x*b͚5 / p6FFFYz5uuutuu9B!b$t([.˥fK5t%S9=^bl4%Xcc{M2I^z؇g}1$Ud~(X)R+B!xܩcKO$>W!B, Xݻwm7M!^{y( dH&xw]ץ>=ki2ST!BP^[eٿֈdO*Y9p)UCf !Be0 ܑP(D*b!_883 fϞ=${.!Ľ/<VenϏ;9mםϚͭ( ]/!B- r}UH1t \8t1d>1gZLvt7X1_!͜&T0Ѽ߼J:%BQB˘F@?dB!\A@Zelll6;Finn_DQܶ.I.7\t MhjjbW%w!B!(J Zϟq:;;yyF%2..]7`AP(}lf˖-Z255o+`NMMQ*r$I7ݟeY߿_8puظ`!B˵]53?j{2?t*Y\83QHS|ϥ=Wbv]IlT3 P4<=^DAuM˝/@Q>B9B!=u)J=z|>1SV)ڵ&BxfuH$B" -•!4Mgٹs'/j>GJ%* 7nwm6>_*۶mCUU>cݻoZ,1L*" -A!fS&l'[X.+_p\bPӜ/c>a- c8<KS.Uk˕HOt ψcV⇒h\]#+竏E]V$>W!B<<#*===5]ejj3g066FTBuTUӄB!EaժU<#lڴiFq;GСCE//yD"^u>e|e~ӟ:O?4N}}=7oRJWW '? m6ŢDs ! (Y%ӥf{x,pƙ_32Up|L-"{ʯW!B;w2<<̇~0iBԩS[S=ʦKve~2d%e8WDS%NqxHcuS OVa])l~sU"ٓQu5…c>8զXP $TG+B!H:pK.O"ᱶm366eYR)Z[[inn<|wMMMZbȧWTUE4(T^$ͲqFZ[[ cs؇[Y,gdd.֯_]ڵkYr% EObܹsK477s !B\8S2WCc5e~sbeUUD-Inhamh\̅i S"TCu+FKC(ܨ(*n03}Jt$>W!B< àu_g<>瓈 u) y<ϻOSt:M:B,]T*zzzUUUd2D"FFFj]32== @}}=L溟LE;ꢨ8ԩS|`ݺu1%BP\?lWC%.w9򃀢prgˌML#=ņk"$*cN;oV<13(g$H^@:JW?Gm7ne.Zy!B@ BtwwK/1>>~M9\Բ,Ν;/mp#_ximm]B,reY444 ((B"  f]*(JA@"an"X/s+B>ܹs?~xYr%ϟ-|ߟs5qC!"`&jjĭIAt9q=270UHse&lH)Ns2ai\;_ jF6o3⸱FB!tswD8jMۨ5oIuhᅼ|!EB!d6SSSDQ4M#`6ZB7"111!)?UUlذ W3MA~_3Ea8@__TNP(D$0 àH$rϯKYb N\WѲpb?'LӜ ^yYnO=apFq0MGjJB!ăg x>K>R]ʭ9C*,3s^Xguc6T@S?)Z?E~2A#F7ކk9eBKv@F=FGf !B܆a$IL䷿-s*cXZvˢ۝}i&&&X~QUh4z]wͿO1>> u!B,s`:Cp=).'߳e85Vb$_bD u-I6wYے5!Q?Ӎ6`VDPƚC !P4-DT!B!D"Aoo/.\>{\Q4MCӴ"JO'"w$allcǎ122t9rcǎaY\WY8fʹ-օwAu4Mö.PnuoXt.s'Oߏi]>\EQǙۣyضa:{b1uERtB!ălϧjT,eaS\J*'G+q9W%jhtXcc{uIQ Ưi=oڼթ.<ε%k:D8զXP f_^o` iX,=+Bqx={ept:M]]b4>whh 5MӨc͚5|ᇼAaT*+_t:M}}"\q7"ST+^7MuI$7ؽڧqon&Zmn۷Çqƹ(\.*uMӨjdY\|ر-[|k/8tmI!߯Ot(\j"K0'JA2\^g6}F5  ݙ;WdoXC ~P\%TGF7*i9LB! B! EQFB!F444d8EQM$>uL&ßٟ/|IN:ů~cEAUU4Mbyd2b1犔W}\.itttgq&EQrLOOuϊl64A\s|6ܹseǩ;'qYEaddcǎ}'|giz !ˣdTm]10w&@qkQ4 opcxDoxM93^b0[%W ݙ0;Vۜ+#5д}}#GaxF{ t HPmNI*mq-J7$>W!bfcr6;2aYa,XQu]"ȼ"g7zj:::u] T*a6Ν_+lݺ{5k,wt:͹sfsP՘djj EQXv-dkD*b||!mv1.]bttt:Mcc\sŊlݺunnݫW8J&vv !Sg ޢDF3KKPQx;v ;7:3a3]rv̑F &є ӝۜ`kW-EnH5Tqhpq̢61x+զmԚ㤺h!B!h8&MMMq X4mB@RmA%l7٨(W K{455[o122BT+PNMMqA&&&H&l߾Yc\1JJww7/_ɓ|K_}SN144Dww7R)}Yv؁8ȑ#e/nc]!5Z}*tv}e|h%GI_$Er('YHb[նDZ@y(;G \'Wh0ǁ,'NJA4%^3z&([)w|tP]$"FtEn˓ !B)HvvũS(<#Μ90B۶q^z 鬴mST룩3省m۶/q$0114weӡbyX5?~¹3$.Ml0SJyl/\JC- k L96\dQ3mI)ƎkBDg [uX$>W!BuzڈF Ο?ϥKMWWtvv.VX u]^}UCk4 4)sN<磏>ܹsa(ٺu+я~o;#tuuco};w^?6;[Z&GŶm^u~oEr+_a޽ڵk3E靍B!OfTLB͡b{,zo'y o7*)VSG0CIPîn\sERk Ɩ:i-EdwHQ !Bq:uuuttt5W㋽U!Q<~; < {!Go߾k7 :V\(橧ZD"O?M\FQbtvvbuݻ72==Ν;ov__ ٳ˲Blkk+w˫(B&w1U!'/b͡jt.vATL0(]f譨n 6Ep߈ENdb`r\2^0\TۺX%!kB!B!&E!Jc*'N RT`͚5tww/6Xy׹|2---|j >k mii!200@\^N"yrJV\ychhhuoO<B!ă\/T,xDAMyT߾ \4^GuV=EQX.eN86Rj{uuIz%kKD[2ST!B{Yb?9y$(>ަp{|;s7J]]hɹK!Bqml\զdz)QDneb5Rʛ&81Zd`(jō-mIҜ,x@H|B!dÆ ^w}˲PUUf1Oi266Ǝ;ho$AuJ !B,OUۣlM۝_4($w~I.]bpl"E鐌kI=ͺ$ɈqX(+B!X!b OV[vѹBϮ{cǎϿ˿WW444>A>m355Łٳg瞓*Bй5ۣPs(Cs觹vF _$Ar#g$1zɯ:նGӫqwn.8ghR=TlLʹ"DC)" !BkA@>?)o:oa6ͬ^xʚ[tǨ{B#8ӧf\xǡX,>p EQD"yBOP(ϫ[ośo9sUOu??^B<"HފB!Ē?Z]ʖKZCg)ff}H05wLhxF:f)u?%q Um &*)pfLjmI1kc*CX $>W!!d&'O__9qZ횏{GRaӦMwדN +WcQ,F  5Eث)BKK ---w}mB)#EA򋡛'#} @UUEB!x| 듭T,ۻͤJ:"SHԥ7nrfIQo$(w|ۈ|| 5(sBӔ-*k[<$"D H|B!CȲ,FFF޽{Ybh)@LMMk|۶)˔J%nz-u6M«DruneI]x 7ڈZ$ ålr LVר+blguSthH\! B!xyGVX,}v}QR0SpṶm300oŋo:tlGX(Va-#tJSS;v`ǎhvϟOqkSSS>|+V3EQ8~8.\P(H$(*B؞O(.U{r:ѩ赉?*psㄊЬ<^8 ⼧1Q~ IDAT==^rBAQV5Yg]ki2aC TI|B!C.NH}}=0S0-BNB0w|X7ph4zˢlQ}*m躎,O\B,q~ߐhnnPP(eY={Ioκu㮅B!I;S2%-4E s7JEBe|#F޿B(,c`3~(`B^43KRobi\!BqUU1 l6K?sr:t ~zbMRMӘLLLܶV"MMMܲ HR9W0*ttt:ZZZHR RTB!)[.ˣj{Nj"8pq._|LLLp!yGn[ b1uK$ZH$"BXF\ץR`htE"t]\.8=ܡB!ĽAS9jCgZH(*W_2WhQ|#q?4'_sb秹0U|ºJc"xbK)$:ɈN,𑙢B!SE! KWWuEB@<ԩSg?Fo~lϓNh[Y~=\1M÷|N!cTrL.y\bH<0$L!ˏTm`C 7ڈ^m@uʨysE7؉dzo_ e/9ϥl鲍nךbsG0*5( hB454"FHS5UgLQ!BqCHH$rdm۶3pܹ7|ߧV.m|jF23]bFtvvx"|D"qӛ$lۦR088H.:LB!l\T-[.ǎ… ZިA@\ŋ=z)v=m.BT]݄z>Mb:Y5tF(2#TnԬF)2Vf;5 BXg0[PsPt mIJg}dDJҤ a]%IuCR9B!w, 8NN۶1 p8L]]]]]466W!b+ yނ癘Ew0 2 _W&6>Wd2ڵݻwS(xw)J={.J!N>M?===<DCoJ3bMT[vqHPEE78V;5oIvh7p7ϧlj LV86R``t&1Xeuc-iV6iJYW, `h*!]%lsQCS#߫!B!K(q9w>W0Xe1>>`݊i444o~u ZFVqϴb1ѨqXXn^x4M?d7<^u(_җ?VX!oB$69Or.8À؇hoܹ4Dt.1hV]ea][=.LysL TԠ%k]G"Rˋ/H__G󌏏c&imm͛7i&VXB!)eӥbDoIQqc$ka- ,\jF/1R0\DXg}kuiNКʕrX*tM!F *?B!XTU%LsSt|>ρ Ayq׮fy8z(mwD(GB,L&C2drEDZRpV\I__X 0yB! ](.UŖ[Ru 8_g3Mɒt#N+C:ݙ+216uYߚ) b1i h)DtHh&&׸R !B,aDbAcsgMOOs…_^bH>ew뺌pI֮]˳>KSS]?az !čA:f޽{#BqGf P2 5;t~]|?rY93I +b<*=C(UadT'kyHQT!b Wqf'?رc qdtwwBTT*y=(l>WB!BLǣ`:lˑܥbL_qnPFt5l画)AW&F""CUJ4U+yoLQ!Bd4uݴs< eT*| a=w}}=>(w&_X'!feQ.)JVN6B!Ss<*KtqPx r<6N鿔gly+cmIdXHzUCS51374jh*{Lf !B%}j<!Lzޝuw} 9\E(Qc˒Vś8p"H}o{^(mз)Mm'xѮhېR,E|_hf9!Ɯ011կ}\.G:4:|q<ϻqUU1M˲T[X< Ðrp%us_B!kaD)5<%ш{ٚuqs M3@ZH*Nj*Lt mB&%B!B2 7!ts}]|sc߾}d%O44Zb׽.L͛uz-m?_ٳTUl&[v.:x { B!PA>5' H*:-qb\tfbɲ+Ko>NkDjNb- XB7hT15 CWU]+B! fyg> 'O(>y+aR,)j5~_/a˗%:;;4,F277yx뭷j 2̒:E!ݥ !B,aD 5' V("#f. .LW9>:t 'M6loO;˶BIXb*Eј\X#+B!+Tx<ތܵk/ynh( C:d~r8݌-yTYb|2?x7h4ݻOܹ|>i{a`׮]wiB!ȚѬaD:~H(kёyh 14okeG{ BłhIt.uBB!}EӴٟ8}:ŋ|2GŲ,TU 0%W!bZ,HpYRԪ4M$sssm%DQyR,WlKXd2VbJ&ry#BDR=NR]#QeTc%NT+]4{3loO#eIC].  r'B!؀bܹX8WEQPU pEJ"p]˗/={F"]l6>9>S(걅Xk<\r˗/3;;K&~!B0Ps|ʶD2?t aH ,\sję eқOךdow):1TIwh0#t jhČ|2ST!b,;vo}%.(heY LOO/Z\err#GpF4X,F<_q,cppc Qs=zgϲcZ[[o" !N\{fF]K^9X9.?ddi޾8t%#Fo. <җ%a B܈BHtRr^rOB!봵OR pՎ( eJevvvɱzꫯZ0h077ʓO>Igg'lD"*njyA`5 ^ ~ӧOsot~w( X!BWOaD:^H w!"6 I+n-1o`e9!V),tL'ajRI|B!hiiTU]e^q4M#ͮ1XOmƷ-t```W$B";BVq 8U.ԘB_69Қ$5yT.E]UbrMaj*R I|B!4 00 cՎQx׹|2W!BZr1.^H.^L&zYBB@PXe!b0OQ})E0JUUttL'i M  !B{8j5QհmEuoGXdrrI]!B!VH뇔mKwɱ2'J Ԙ*;( td,tgӝ-I!CRTL횎PCTtUAj !BuKQTUEuIJW^v|gE$3338LNNR*ᥗ^"H088} q/ 7|ߦeY躜 !("#l/m QWw^ կC聢jEvEQF0Ww/58]hsSU*G6n5ֶ$l2О&3 *V*Č/D^#F&B!ĺ:|;vK/ܢbA>ǎT*-k?ah4صk=> tzY^a]DضMR. RB!un^-kJz}2ꖉ8^eHWϽP8Z,~ى o KsE;xIAJ*]UA԰tm$)*B!-]) )ccc4;_X1;;ˉ'```T*u)A"u]}d2(w,z655ӧyX,8ΒoÇ߅U !^O͖܏cTGOrݞC }"E#4xvjh߇'̏grys\16CmIך3җ 0brͫ]ҍ,LQ!BnJ<`ApG+˼ >"\ץ\.uV>Lkkmgdd .pY~_N-[~85 ^KVW__7ߤR0??4dX,afI&XE\^!Bud1.Y,: 7@xߘh#F^&9=|8$R4#^Bw(*^k!RaH * M83Q<GWUR @OLXabh 5crMME&'CB!b][\I0V,6Likkmkea6J%4MkKܱcNJQVq%_7Ξ={fLOOFgg'\/66zzzP(P(m!bXl È0[s?rMEW#6{~& 2ї1+FZFrk^z! ~}fc%F}No>gwu;MWVRp15 9/T\!B!2EQbݻ{nBT*akD!V(_{g֭| Ora2O楗^bbbX,Ɨez{{m!b#Jç^H #Bk̐sbgnϒsx7ל-Zw}.-sz|R#eilkK;`g\L\m3u74,C4 MA\1$>W!B,N裏yv"ˑN0 + q/W_%˳>KkkksVp.cǎ:t رc---Ŝ IDAT?<|G_Bk{D8~@ > /KzuXBZw^qE*0SwބX hf+iyDQ,*¦Md28qRVB!"pJã^ ۤ6_vj# @œʼW•lm=Ů :C)S Ub( tk*k*1%B!bPUxn&\ZuC}u]\וYB!EaP-\*/ݡw bzHQxƵSQ 0#.)a)cMYAW6NҒX9 S#3H[̥+N%B!6 4oo(Y0::??tT*EOOJ.79z(rX)ymR`6dK{Y"W(N388('Nٳtww:bG2<3<ßɟro\u J188Ƕm,ˢɓG?0w<}QZ[[m!bEQDF8~H9>u'@w.4x.m6f S$ťSQo91"ivPo Ʌ&-RUCW05072TTECꑙB!bCY,^;SQbmxU*bLMMQT#L2==iύ躎eYb1L$L*XY~طoǎŋ͑_"/"G%L( Z0 '?ɳ>˦Mm!b-T]^/CW .bV>Qb ^ B=䁜ZػLGq@ҖN:05TU ,.!B!hx8w:K" J./_R*epر]vL&) AhK_ttt~͛7d2躎ilٲzjJE$ z!:k6B 0 B*O񱽀@ +.4m{(m"'{j r<Vžޮ [zўoUd%Kt]Y54bBL)+"B!b N~N8ŋ uCgϞ)hEE:0(Ap7ߚ.Lm6zzz,a&O<lڴy("Ck.zzzq!>!Rw|*;?jrPJ聢EEzh^%p!THW积C |RJer 73 ЗMPɎ,َ~1#B܊*XW SWX B!bt].z~XO:114MM;>NgR>Mww7|W&BJur[b赢>QAsKF?ގ2@[=Q1Н"JYL~k(;9ޮUEߒbR8ZMU b 1!bI|B!AUUZZZHRDQDV^*bQ"C__D VT* }($I* SVoS4Jdb᭷ޢ%w~2<<0dwaB!XMqeۣ*w^eTF͞"9feݙC ="E'4hfرUYDb30k( QH6mԺmOرJ ѹ: s~\㓍l@EO"4F[Qиaj3&Wʡb=\!B!n(躎xa[[9rcǎQ,Q4bq<ϣX,\tFq~u]'vax[zG9|0+>Xi###G?ݏ3::o͛oIOOEB{XEaԝrCV{|h#F~Kr- 8MFc )~HUU"5kRc+Fj(|=5w͞ P߭ /`pi  (6kK2ؑI)ݡ(-䚺J욹 H|B!w0 O=.\X,umse^xE!H`YVyMh4͢~OX,6 e5g6 DQĦMbQG?_җVqB!Xma2WlW%6sGfn(@sKG^Ƭ ij]f(jTGHFݙb C`h#V^=?.v–օ(+ S#IY:gI[+B!u6/PTW^Wu[5zynJ:n>oYT Mp;E/^dll 0刺WA@raE!5\Pq|/6._קK1Kfg[ƚ?Kx%pox.fu  2(Bf. MV8>Zl? iMZ v۝eKkP3D~{ j**1 3EB!XA0>>N<'S.jE!JJ0 6 -[P(V q'rE322L斟{!B?aa_iBzzc|.]3Cf@|TQŬؘKqtch~>U"/(5+sqaOTqCS 6Rtۣ(kJ3"7ajR$)*B! 2M|>O6N'n?L2d||^xQ(BQ LQ]?́E4f q}k… DQ{G\淿Mu8quyrwaB!Sa!U{;\VͯAtyj@]JyfmUomED|sUsXgb![qvty'Ko>I{BW%FQ@WbBLnLW1u]SQs$Q+B!5R 0ryK>300O?͕+WA=4k2 iRVy7X,y7- ĺ{z}a8R3gr s=G,[ !4RݣE14bzۏ 82THio,ld۪s~/O34]X_2ٕ=]lnM/bRU0uL e M "B!b4ywxW-5ܵkג盏J%4IP f(m)ZE:&ˡ"ֿo|<3 (:ٷo'MuL&C{{;e2J!Xr+G  -<_G:WfB" RtB#Mo"Rn(@p}.|#QoylP% 8٭VK )5<~?QxsSU^Hkdww]Stec$L9oOU;BU,CTtUAj~#WB!yOh4$?s0M6//~ gLV?!Gi^VWW>,_җST~a~濳,ǎ'СC:tH8B!qQոr B\zAY ~&Il?eٕ!U'r8-۱s;Ϟ@ nv|7Fd;BnDL%NU8$M- ےqVV?Fi\]P9҅7?6@)P?`doLsRKAkKH *DSJ&n0br$>W!B ib ÐT*"qf9x F0 j>}G{qv>J.cnnjJRX,{ݽ{rxB˲hkkCu4]ǡT*q2m6 aB!:x=jN@qp=D vJ[@`fINaQFhFz>݇u>(&j=PBĊѼ*GC=IDe;@<ʜ(36oSw}`G=]vtI:R 7ubk蚂.#*"B!B%L&I&K>| . DO:i⋔e 0[;H088A댍199I*b޽ќZ.ȑ#\xyt]gϞ=<۷U!B(:5ǧ7{\/"OtP|HZ0+#J]!MtP<@0nz3hTi Ru7 6DoiFq~&qF%N9=^fX'fjtf<ГewWx3T?* PCStPI:FlHRB!bɖ-[+" ÐFOS(,ܥ(Jke2rZ !dtt'Klݺoۤfw0?8v/000@:B(f /{9z^z^`/cTF/.u /f m/v~7[ƚ?QBd]•^LRxW[<ҟKQKܚ$ML fhB!b iFKK ]]]ٳ|+ qe^ZEQf`Yuϩb=wAI&ESk꫄aiooVqYΜ9/~ >L__ڽ !b />U;C*^GQf%@uH3YzVnV`DjP,f)qnbR#klx747gh 4,]mvjR Bf !BEA4,ˢ.W^wmSO=O?MooZ/]RV9wB|>iJǹ|2[n3 j5?i~߲sN) !wuc?pS}^=}Ija(4S`I剢 97Y9&J6Қ؜=Û[HV&W_44UbFHB1T B4I|B!w("lƶfmEQqxg9s rX*r;_Tq淋q(ⳟ,_y%G+_Z-_!ذn@Q=("J w/,a(g,>5ϣyѥ%nUN&ciB B!aH^ԩSaHtvvǶm?OMR!˱~|3.\8k|!bC _˭HAt9~\Xc%.ԙ8hBG6 l'hKY\ײt7u,]:EUEϊ7!B!B܆(0 Q~pyl^lOEQPU{|2 Lf߂kb1*: C|o>^.9y$JB޽{f8eh4݉-B P= W_q?۹xfhʱ秪Bm)>ѓe=EQO\CS MЮD |VXB!6DQD\T*yz .p~i|e28|\.GKK*\elFь(J;vjX|늨B!X=aPTl_CWP뇜+,\ MOl@2 )o?(3T1LLG*X:B!b|gff'9sJBj52ڵ|>NRttt`YFj5* z JNձ*jI&l۶I}]{= 7 yfLӼ[.i4r9L\w Bܟn@>JAt,l+gj|0<%hOڒtfbXD1]%fj m!&WS5UωKsB!XŢ믿N2]יeddmFq4zTƄFQm=zӧO3;; )CCC\|sydmYtMp9ZQQ8rq](@WW6mT*L&1M]bڼȳ8{txxyyZ,]!oaD8^/+BtzSeK )/N_[`G{B=D4F2HYBZL\B!b,]]]dYLd߾}8q>`YSMӀcb\`Ν<],twwC.[֚XId~op)<;v੧z\zCֶVB!+A!ۣ8^H ;FUle~?YatNᓉ ӝf{GM-q1C3TF0ł|NX2ST!B;`]]]_%$ l޼ӧOE NPEQ8pCںFB!}7yAԜa!2atpeƩ ʌJW6Ɩ{{LӝK @k*6#r-CEU!jB!B,AVc||ZvݼOEQuvrM]]]>|QMG yMHd2wM[[mmmb.R)o-_'BSw}Uۗb 񂐙ëxkx#JgRH[|Wj iK'I 1B%W>B!D"AP… ?<uQ00 cI&R) E=4>###7}M t{'xe]۱X"R_\m3??/̱cE4ؾ};ԧغu+Μ9ѣG9rR(f8pz;w^zy饗d~~Fa&<Ȯ]B!Rq*ĝjhFh>\mmm$;^/kq`{ddӧO>{%_j4E~[o1<<|."fff(iZ[[8zu }I駟&LMB!60 9>#d|蝩>uJpa&8dՕ-R4tMU,Ø\S_B=+B!M(B*'dr㨪7Mlٲ +jF6%ٻغ3߳}Sb%/Nh&8dd04bb:o蛶E_EMK Od 'ı$[-[djNq瞥/(ޱ,-}E<'F^0 ,²۹}YxW㡇"377ǫz=6m=ˑ#G_dػw/wT*;_|b=s.,,[o?dffo~ݻ4ywy塇X.҆aH<gƍS.9~8?<ɓ'{ ADD??eR]7* %'4Sw{c@3%껝i@1I1m:ȝ\XLRU33anz="4K3 X,FKK e:u?ys)ܒѣG9|0KKK<3|_gOgg'ꫯrN>MOOT뎏saY_?CZ[[1 !2ǎ瞣Yd2 B&!JD<͛7s}ݿcnn7xx<\Qr7c!Zlzz>s%&|4]lB 8rN-AG:JVw0 btDP20 EIK41LLL|flٲ\P(0==8O=;wΝ;9z(?OP(뚦I4%`&""zΕ^2nZT>DjÇZ{< DeeL&qlDVՑ%""""ضMGGO<1Rjz4MWV/IZV9y$WE.#Ja6S}qFbKD4OTn,Gwr&''yz,عs'du?)'N`bbn߾7޲sS2+=~'E:mmmD"Gܭei"ssskZX,!tt:}1tL&s`6::Ӕe9|0dݻwk.rUa^UygEDD֎~HS5(}AaJbsesbB%wܑdc{;Ӵ$#X _wX5XݡE̱MCPUN3EEDDDDad2faVd2tuu8b3gF"r%Emϖ-[so W,qndDVzN H`ۗW8^ӌOY՚Ͻ\D'e;P_}U{9~ht:~~izzzb4K^Vq]!Xk4fބS%8=ϡS]pL])oi $"6~wLD&IDlKoD3EEDDDDpCj_gsZ1LĶm癛kW,j50l^)"4M,wky_߳ghGyR4Ǐ_\. Wo_->TjOQuCu-U\+;2U D m sljOҗs, CѨwd#Y&ebWMDV犈1X.|J*0 :A@R^ɓJ? ?M}o-]sBC=DMgYi%+|'8i^uM۶]W}YqF۹)Js~_o}v{챫E0lv~(! T\B Br`9vxX|;H0 [lH;͞rqs e1 p$Yɍ&\J"""""kL*bϞ=,..6#tWfaH <,---$+.6,܌7x}{BZ>{b1ǡR\@Xqwǖ8P! ! Nҗc]0 1T&4;VdP|H$xٶmR/*  3{:+###LMML&WDDDDd 64=fٰay F۶1 Z֜k۶mbl.t: _tSSS͟\.w{^)֮w5i^6h\q>~j bͣʂ ǹR*'g3P! CRQZ f'`kmwۻic8AĶ1XuH"""""?zNV{q2>><׿u_cXl6K&"k~J?g,..}}]R̟㭷?1'N-d޴immme{zzE:;;/+""S(̗\m6x /Ӓ50v"NLLӟy?6nHKK e]t~KK > _ͷ-瞛]8ws2>(r1~!lݺ9_7ٟow?< Wlڴ 08{,/jgy 64Ü>}|>O___s\.3>>Α#G8txo}[@DDV0 J\ z`tə"L.wA$;כa5AG:c;t3 -x" 2MY{@dLQu"}vz){=&''9}4j>%q76 e +y7yW!JaYX =re333 cyƻr^^{5&''/?|Kݻ~/3;;yLNN2>>ξ}H[[E+$SSSs|!b6ړ dͰ+}CPLXȵLIJV `G pvI\^yHxWDDDD.eY1_ׯ˳\.r!տW7yfoo8C&annsα@:l^S*ennL&sK#qs]۶m۶mc:;;dxxrr9o~]{- C0B*GS7E5~@1qzщ</PvI6'ґdx $=T[ ,BLg||LӤVL$J5g~Z`۶m 333sCkmme*e$6mԩS;WDDDDd1MyƘ㥗^V]24<> Pl7RVohmH$T~|f֭1 J$###{;v>}E*""ZajG]˕k!ω"Ǧ)1THF,vt'=rAT4&E̱&ԸP|Ⱥo/cYX3h4xL$!ne'ԩSWСC;w~H$T*&&&gbb͛7/~'x;|"""7\?kt}Z\.i^ZeQ(TQTDDD20B\ϧT)=*n{PvW95[gKԼC_.ζ96uHD,L]^,6  XD6+"sEDDDDֱH$B,<&&&YbDI&rɱHT*E:ZH亣wEDDDֻЀbAQ@ѫYi֙85W%Ηp\)veЖ;#br׃msLmM+"sEDDDDֱX,F<o~o0>>N,ؾ>~aynQDDDdY)5Sj j^kuBϗ86U<s,wѓf'ýY ۺXcؖA,bp,2," 犈cFONNh4.*0 RD4 \muH$rݳHEDDD֫0rAڠ\Qs赫g /2[Вp֙ɡ.6whMFNT!qT;@"""""w/}K8pK>f333T*(}}}DEDDDV/7r+O z Xk02[d3%Ɨ*>ٸ4CvIlL͖\ c9&7XcLl\HKK lݺ7 };k;/C*0]uLrq6%՟eGWl 0T0[ <34X#P)*""""b1dvvz;>]e||Çc~i,̰<0 4LF"""AH`!b赩{*/g-Db%wp` m:C<Ӏx"IEmls+TDd5LQu,Hi&z!^x?gOP`tt}o6h;au/PA* -q|K l dѝ/%`]L΅P$b8cUD"""""X$!^|E>CΟ?OKK X}ߧR>i2>>ίk$db8J%<\._TM&dY"J&""/?ZçX(>mZ+7g2TÉǦ -uLSrqv0Ƕt3Zd`˸kwLLEdLQuβ,8aWU~~ݝzg/忤RDO?m7/xA6%.eaY}zƄa$/ Ge|| Xlڵ}kl߾"""K(,\~HkrX/?\N 8%D]^ 2!HGRQ4PADV;犈eZZZظq#|V>H|>yDQhkkRPׁ|d2IKK \>Y#|ߧhraΟ?OR!NV,..o3::ʱcx'xqGw0 B*G> 5>srę2:˱+ņ$m(Tm1M9 "Dl 2Q5`j\ay8}v>#$>`qqw}._W>S5(s)~s!:twwN"R0??cccKj5l–-[hmmg"""Pp=R  X6X2rĻ/U>--qvx`\Lܹۖd`M±:˅mBEd-Q|:{-lۦw_gqq3g+}݇eYjHӷd㭷u]}Qo>ۛ0d~~zǼtvv*mqjB녨 5Ɨ842ϱu--<)h5` ڤc6ɨWDDDDdsT*uMwqI"cpp0~+({ajj"f>gСC?S6oLkk+\liilK9¡Cx'zUoT\bAU,P586gLjtӓa'æ]h 朵2 IYJ17ofS40 /G{5+vvvnDD.\-=5jW{g/97_%̕XRu}ڒ6'՗eS{\؅~/\ 1"Xmb[&EdLQ"˲ >|_LOObHl۶GyNoYV\.0z "Dq.{|ѠZ211AVcxx\.9ZDDփ^RV]_qWgrz-Q{X$[:R dϒ;DlNo[im8A6IDlb\uzNQLeL&_d2Iww7*7 ~266lW*xwEDd=>jBbU^R'st"b dbCkt'Cw&50ӱܸ\h\aR)6l0|ǎci&6euVΜ9?￟;vC"Z299ɉ'8|0 lݺ^裏.YL&y^6J7Pw蕄aH1X<M[TillO2ԓaGw8xKUo%&7;Q"rST1"rQ|-YV:FM??۷q{ntf=z|>:gϞe֭L&?dƍrQ=zٵo144yrA!O(4P0W3Xt& -V01hKEЖ`'˽Zؖ憮veX&Q$YQL+"""""u---122rKH$fR[yǎ} Ð{Gbsa5 C0l322ӧy篸-[TAHX> 2X,zd7,0XDmt/m`ߦ6zs1lD4 ꘤cM̶+"sEDDDD^xӧO_ p]FAX8D"[a/w[y===|߼Y""7aH)=u T '|0gl\% Cvٝf'@kTRjf\ ;q\ɵMl䊈4)>WDDDDdmD"Akk+RVY\\$7Y\\N,9ѯ/^__wne]Y; g.D^p*>jBc]jٸÖf7QTAmL4+16qg96W+""""E"سgo8wcx뭷T*>D:a`6mD"A&5dug-ky[j  ~١ad_fbJП?ˁ{HEmTS[LI:nD+"TY"|䣏>׿5E===K^X,F6S]r 4U[* C zjWA ~BtSN͖WL{{3N=Eo6F̱0U][ "I>ɍ]ɍ)&WDh]{6<;vp255uk8dYqװ,d2I*¶j#"""˝KZ+Wv=>)dz?X\ޞ _νY Ke@1IQœ;+GWDDDDD2Ht:M.l6 J֫?~#GkVOR<<'{0 ;vOf9q?;yaw.*拈ݧqF iPq}>.plKUj t CvƚQu.mw,E1X&\%"r+"""""Nl6{]ϛٳA@,4\nv}ϕcbb'?΁H&LLLӟ nh]0x'T a~@S{ +7 fKu7GEf 5RQ ٝ=Uw*cp!&2X1Mܱ-EDnsEDDDD䦔eDZ, ^16 fY]JsaxC+Cj rjve!/3 ~EQ={ʭ*!P{ ij*/12[blJ ݟeKG阃U4 r!&7ڊ]+""""r}bHT^gbbbi8C$u]"zy\Rw孷bbbur~;e;#YKV~H} AR|K.ʜ." e lMm vtTcmZ$1RZD6R|]"ݴŰ tTl6{];! Q5^ fdw-rvDl\/`?@kBQi#brED>O """"r;w;AFy{-k-..d%^x}l&˅ع986mb߾}l߾T*ii-;OY=>#~򓟰sNvɎ; ĉ|G?~za""r'x~@)<*O )UgP<ǧ/3Wrql8C=vtЖ-6UgBcm&\ϋ"""""w!u) 8q>_⽱%Wh-qvvg՗ec[cȶ :ABA4*&WDLQ\WW7oDr:uQ*l7288,_J%z{{okS(ٹs'Bẞ7>>αcq""r4bm9.١r;y<q4#ܑR *c]ėcrSȝ"""""w9˲ms,[8w{% C"b0 7 C2 ;;wur\B%@Ћ NNNϕO!t"ӓkC] &#:(&WDdmP|]b%~0'oF,#fq]h4JVcffiL&sٯBOOmmmro.C^{_3yz'OyG켭{'B~@Zu}D? FΗ8>UL F@K"4;3lL'z)&WDdQ]4M2 eňuR%*drrr|q=wyᇛ{EoddzqZ% Cj"m&ɰ{n|Izzz>-!AR*GQkʡ#(̢i8C.۶?{g}yJ,Y2>b ؀14)@&ML3ЙtڿomIMiHЄ@!8$! Y> YZiW{RplcHm] }߷m ^K/6!aeףlە,?qSc3o 3UKSYݖd]k 1t!.B; !B!(b1mFkk+Eja1w8۷7|>mKrba]ǖe֭[&B܁ l6?hrƔ+9 NW8|a&D E{:4[S΁#1BqgB!bq,l0p]gϲgRq###:V5%j›Mbr'3EB!"c=}v~_P(p~mvA{{;Hd w/nBѣGy뭷'BqÐ 8ŪGp<!0]v0U {M1xIg}5-I盛9!1bAILB,/+B!XJ@UUV\?LOO1ccc{nQ.I&xG&!wR{RoG!uDkBqCZ!eIN=UQhJZlm`kWlCאХ!1BH|B!bѨa(B4%N8h4ʣ>J1<<o'fӦMlڴi @ A>A,VBS)EۣDlgpt%F 6uQ6Xך!F644$f\!X$>W!Bd8;v`ݺuSNqQV^M*Bq qQ~{#2UvW(`EG:jcU64$,Uo7 !B!B,X,FWWmmmlܸ\.yDQz{B!(B*OPq}\WC/ B݀#yqGQ"=u< m"$&W!H|B!bN2DQ8i^vL\P(P,<0 4 UU]B!-Blקh{Z\HA`|% (18]4%#l=EwcQSCU+fɍ+˖ !B!aE-u]JRJ9p###bǎرcv/n4ihh֥ގB°6#Ÿ\eeghsfȱOSq|L]'`usul#a^$&W!ć\!B!JB>X,oTU˲Z+ Cؽ{7.\\.sQFGGI&DѹضK/Ja .vB\R-T\ U#͡5U/\ě'8>,n 0>c˓~|u[ M) CSR͡ C#\!'B!B,ݴ299,MӤ/}K 066Ʊc4M8477ܺJ0gΜD"iWqsy/ٳgX,>P+' eǎY:BE!P=JG ] ?(72ñbpp .KccB!_B(T]Ue>:tDЅi8>XԮMG}{:ϙ3gxxw8~8?яxYzB\?vke6;tDmgpbWhx|hKG-$+4%-"&E(`jcrKcrUBH|B!⦉blݺl6WK) RV;vP(zj{9>Z[[opC.ڵ{GBq0$*OxAr  竜/reUTԠ.ʪ[H.O g6&67T%fD ]7 !X+B!R0>>NRXD"A$汪NYnW=<" E;4m۶SOW/jnnfΝ ]IR &''.Q(HR;B;K)Tkq~.cEn$E#B4UaMsW6fX.P$,!1B!n)*B! HŪU>WTP(\u]XR`Yuuui*.SWWƍpw歷bÆ d+?>>Ή'ؽ{7\7No0cTiw\CyΎ9?UaQ5QGOct殅1\SW1TBqsI|B!ݐ2 4(R`)U(=qֵoE=騁eZ-&WT,]#fiD K\!KCsB!s X,RVweʕXyŵe8X:hT w}YOSokڵkP(ӧ sN}YZZZ,& C0q *S`AC/$@QpKQqe[ *>5΁ L񂐈ґɲ5E[:T ]!y1&7fj K1!˚ !B!ZA4scx3gllr|388믿 olld͚5]#?XzHyZZZصkSSS߿eqtwwO}V"RB< 0r C@+Q8>2 Ð3pt&KLLM7cCk5-IVdbM VT1kQSTtMEB!n+B!jŨg~lq:;;1M]vq1J.~֯_g?Y)!t]'H#Nb>}Q*\r" ]wO?ƍr3UW!B뺌s;9y$GG!,~mmm:Jl۶[2>>0sh+VFcc#ai2ML?wǸe?6ymM ׀eY|坳۟J&f]<2Cg}SWQVR04,B!@sB!@-v.\ץc6'O>qF?J333ض}׍D"$Ib%4 0>9[a455H$hmmj7g;JceIbDžpg+Dk>>"i*k VÅaH2Ɗsnht5Yݜ`}kLe"\T54"&1B!na+B!L4zq8x 㸮KRR\q]TUG__\fz!> q a}D""ȇv02207oa!ĝ+p:GkW-f7ζ9XDW&TP/۞LcP G*! K3wձ5K1t(Jjh*8;4bbr*V'EQ!B!Yźuf_N>kF* (˼;LMMňDj7g=u] dtwwb L4͛2T\_?Cyyy{x饗ȿۿ|fw*K)fj]zeV ŻËoT銋,nciKGIFu4)-THXI nihBAdB!,ˢ UUyxw9pU󘙙aΝ<쳴LLL͎F)J8p6::: ybj:;ujbx]P%3Es(n455|3Z^X__>B^{ݻw366vɚٸF>^w<|ߗa!/$0Bή~@-O8x~3 K!FOcVӓӘ)-]UеZTnԈ[:QBq\!B!i*a\늢*444\AƻÇLkt>У,vJ|3o'~Ru}Npb"m#w5%hOGIF tU?*EKWkQcrM]E87TB; !B YEGGԧ8x ϟ_UX,ƶmظq#mCz-9B.bWݻwsС'O`ppݻw3==}A`6B={[l!,<ömX,( ht:M[[T UUFb۵~iLcc#y~0wCu̙3k׮ j7UU塇cŊ ?۶{?Cu:;;y?ϽދeY'N`׮]?fll0 f|ӟfΝl۶SSS>|7xcǎ3>>N,5kظ`@\?`v)T<*OB1ʟAGzB~jV2{MSLW\/ V6y;C= V^]HDtRRhBqG\!B!"dGGD\.JH&ar9q]wQ_W,GyH$2_XnV\yյ:HVV^O}}Ͷm>̷-^O̻Sfjj~s1z{{y衇P1y&''D"̝9|0"K<u]&''9}4S?SUg !n}A!%ۣd{T\ .:.;P}nh^У8v-[Y$յ}2<]`SE&.QC!tfbcކy+1u55"HwB; !B  IDATL)o|)78)O~ wؾ}%sjɓ'ټy3[la֭W]*aDhoo'D?1[la۶m} سgr~B,o^UgQv|<{s~jf=jYij@n~5c٥8!0Uv8+srt=O16c!qk[lИxQ 誂qqnhԈ:bBM!B!8JU*8vo&Wf͚5DQ8]]];KaÆ [nq"رclݺ[r_ȩ( Cw^ 'g>3Wd$OO߿g;. ./Ny'x抩۶mc||7x_|M6]իW4MCӴsްa> 7/KɤEMUBեPq Kg~P`&d7QͬC氦Ϡ9(opV[5Bͻ9qfhB[]{+MWCCSD׏UN]k1rYB,7RB!B̛89s0<<|CQV9<ɟ ;v젾Mӈb޵X ŽYwu_Whll0,1֬YCGGtzfyf ~25333p96lD"֭_~a dx>NZ] #X`{E67v}PT5wP5$ݱ3Y9:T %f(Bg}m)ֶnMZXz{DCStkXqqn\S!ˑB!B[X䷿-w) y7|ߧZ>L6]yG/|Tbbbqb֭[CCCyVZE&;9E6… }hiǙˎꢽ}|>?hޛw-h°6# n@f΋_. p™"93^LzQjJpOglI RT] :1K+ !˙B!B[Tb``;VX1pa-.ddd?}k^T*~dUUd2DQ嘜$ C2 Ld2444( SSSLNN[V"!D?i!MzLգW AH~9 婺1Sgkw=̰9EDGeh*1S.j`&1B!,B!B[aH$rĊu]ǹz!č (9eۧzm)T9;^`'K Tֺ(8w.BP zC4UkЈa*t !#B!B릪*HX,vEn^/,A`6r۶okzXu⼮먪mՊͭbg;]׽fupp{ꫯH$Xf oddA`BmW!Bضmskq]D"O?M&\.y`YHDH~_/sqOby ٳ^zGta8WB,0Q2Vv<ΌQNt`uSnIz4M!j1SUB!AsB!@co>1. k:uSN`Y+WGf&EeYĶm|ߧMeͭRD8sݰW؅ZO~(?\/mc{,AHrv'f8+S}mIqWSLDDǐn*`*1C#bjDtCWUUbrBy\!B!Pq89990dzz_|W^y .|oll^XRq[J2J$XE.x9R8d2b絞3H( Rbx1bˎI:~Ν;G[[=7n$GV*w;B\* k3B][r"}&\.$V0~:r*?yyv17t4MblLƅ!خt٥h{8^rVu~7ξ)Fgl\? jjxlu#tiHXzǷUK&LPkK1!6$B!B,suuu|cݻwk{TU,[neŊ7bBllۦ]vq96m-VCC۷o'HtS^G~FGGT*D"E!sYƈFl޼k>g*9u<%ǜ:uZ[[inn&J}mjj3gCc>ի?RD7ry8< J}|66oLoo%ѵoo~oGGs___x7 r!|Mb_[788[o׿uٶmu]N:5wi$IH$:!-!ZT=ʎo4_i~7ČM (HcuI$*:) B2btJLBdB!})o_~2A<)t]' Y*O__yyᇁZˇ__W|_]h,{_*{̙3|_' C `<<~lۦX,2==8Ev/XѪ3Vrlɱ"%FUTVdlh+dIEt4,k**1B!BH !B!n٢ɓ'{xG)Ab&HQ;Egg[|;AUU6oO?͎;5 +D鹮uW__Aww7>,uuu۷ϣiMMM]'|իW_v3[bY---|-rJz)2 o6SSSaHKK ۷og˖-tww_lYܹs뺗u]<ϻ7.C ʎO(WQsG (T<++\ S*O]Ԡ=aM6&U%Rл"T]StŘ\PQf!!B8B!B, V477s~5ApqL&b>---D""x\n8FDhmmy:LӴ=SIuŲ,4M CS~,B+B!XReJk!\7߿RtŢ( mc6aRVm]ٴi]7g>X,FXQiu?i?0 )t ‹ݡקdT\˯ xLJ (1<] &ZSlhPYg4 *U $TMXv]/喭8nh5_w9P0tQʵtSW1ZATެ%B,> B!mTUN8??STR}BX8ϟ'_jEsȑ>r&''ijj²E߷bÐ ^,l[^?#fC26SdSa쒊lgK퓻IQ4ǰSŎǨ4M`&m稪 QC%nD ]\!f\!B!-i6vRǏYf\0 4Z4:{|Y0s (r99BlذT*u3.X&9]q(Vk6[>>\I!l`SGS/M7յBjoSv}ʶG piAths.W` q8S4&hOGIBu (wg QB3kT08S4U!jbr#k*1B!Ē\!B!-\.o>^yK: =cjj m4 W_u=>(r]]]su]gddH$B:kEAUUt][ s7.B |G8+AH,9/qls9*XM-Ž+IFtL%@u Dr'LC j+~;jNBL~ItMT,]%fiLKW*B"(*B!eN8o[zzzHR躎mۨʹs( r9 [X& økc%VTUE$X,w^Μ9áCoSNQT(J>}__QQ.r _Qw :W__71MS"rυQ3B(BbzRS9q@AQ|gvvz8y dRwP7np卸7nyeddw^f.]DІR^H+:ݡ4?,]®/~ĈRKczUʭj۾"c8fFKͮ2ybD˰o ǁc=r Rb;E+?=@hg0:-^Jziw[!#6I:&i"X$l21 \KKK3y;edl6 }}}E^~eN:8wt(T*p%|ߧGc̙3>}+WH;LӤݻwsaqam<'m /h!f{~D;v-d$;fؼKéy"7BPitfV9?i6;ioж<{ry[_Wc%LME(~~ ihv' [/4Q՞v-2 TwV\&''կ~EOOm>(JyuZm+H0::7 &&&C(l6}e àlk׮b|yQ}8ƲU^$*o6?<{,+UYm6vTV##7 F!w ,fqvv/0Sj )o+~Ik;bX"Q=߼kd6M¶Lݱ"""S||i/^=w1AEQ׹p333uD"ݻ*QtA333bO<ݺޑ*_Oɓ\z!N88CCCѺ177Dž x뭷W^a||Ç388W"qS IDAT"`3BAHnj(A6[n`~v;:2C"=;=4\Cy ՗a{Wca[w8ymGz?lD鮋~vq*OOlZxYA6II"ᘸP-O"""""=|ghh^y0dyyɓLOOcǎ:mtwwEOtww3>>;CV#r1L7o~˗I5sqqZ۶M&YqHӟŒT*8p08w (JKLLLt<<=7-حرcݻ 4 ]8P(`YMٖea&w^Ο?뛰ryQDԼC 6B[T$h/]| j8*?pW\=;2L0Lb&0mb;#b%tCpe0 RM5I:ZgyVEDDdKLQڅT*EOOXXXTd\|-ds7p=ϣZ7i6LMM#Rg9XR{˧OR!ۋiw[,) T*O(#^H-{3 <|1'v2p%0 FS9p$ֽ. x܏3vgk لQfDaq~O&*{ل~}>l299ɕ+W+N8AOO]dw\4Ml&;*&Oض}w;D䁷^-}̀CZ ]9iZ]xAB'*c?h#2< 9< bS<їOdqmAnTmO:d6)4 tDDD:sEDDDDhE7nܴ+o~~\vmV>+++˷|mEd2I?\EEۗJevvS*f}zŋettt:}W-"M?Z!-? ㇺ fe?[!7JD4[>wr)"]qȘyC=J&rnK,EұH9& ±̍nZ3Π\044D__7K) M/...r9:ānڱ++++꫔J%Onª{zSN/{nd2SXXX`bbuqzzz62D v1ZC573`P9H^e~qk NE#enbLFE3M"裄vҲzfЎɵ-mNؤ]mbwyΪl  i-RܷA"lD"eYDQĵk(J8q'Nc(V1<<4bb״Z-gMRFCCC<\twyk׮}'N~ \p7|_sA}Y7*D^ÈP0]Ff;s= ~;+M.>9=Fcqf/b)"I6erǬȃM"""""mT*a?3===8aq ZT*EWW-;{zz(u,Dy\r .t˵suvn?=,ԩS7̙3tuuH$2rrqu<(#~HՎm!QDD^BŹU.Ζ\0ߊ1l]ÅY%'&B^nTvV M\6vATݡ"""M"""""I&ڵׯk}4M,lSVW^߲4U2 L~۶1M7nl6 ˲H&r9=3<÷mƴ-0񂈆Rm4 R}X{̔a{S%&jxEwa[ᑢb.~ Dw ئk$L&67T;DDDdsEDDDD䁓Ny'ַ-" Cfff?VLMMdя~M7NR%Wݻygx78{,SSS4 ]8q۷յɫ-bjrݧQgF߇w&y2*44'm=ː 6e`facMʵ3C7{a"""Q<$дm0 I$ ׮]^gffV8C6ŲdYreanqg#.7Ns*Al6KXdppP\E"]u/AY8ngK ,8;S& ?d{w=Yʳa0"ژxӀcrLREb32 uM7{Tjy|gaaa8f&k p&''7QމzիWq_|Jz0O<'N=87{)"r] f6jwXT.DTj s>.sz0I;{sh[CeYc8I&av-RN}Ϣ"""""rA2X,rU~ӟNY^^T*}汎wN:*_d,Hq" `YAh4(J]bꭐJӧ NTi\…yިs1YCyg'MWV1RI.OXZm)*"""""eYtwwsq^u^{58jQV?|>s=i6_h ?ɓZ-cǡZo />i\|/ru}8q]bgϞ=^|^Cj^@ӏ΋Rb s./T\ṯ/hOLƱ^ű N+4XmۦGEDDDD侳,~sh4hZLMM àP(`Ya~5twww^Ν;G 1::JE\|ŋ<__SSS|k:u?G˲c9r}k:t1@"(?i!V@F bdbe.̮Xmt, Ive9:ž,|ckQI5I6P+"""""&ϓdVa!z/eYx yWu]:ΝR{m۶ݗ5=(g?;#yqiEWWWdjj[ / +k!Vk%~M}<@wYz -mȗ\4mbVCƲ,ؿ?NK:&0 cC1ܗ5=x9y$ 3>>D󘝝ŋ[LNN+0>>Σ>&_8 ׎m!Nv /B?SuJu480T`|0ϾۻSd\\Tw6I)MLSݡ""")>WDDDDD:V6+ .W 3 ++`;{%&^L I&avma*""""""""%u&tׯSՈOoȻK:&NEGFF63MscV7}'l45[Nd35զj# b vr㽩pˋU~@>;{86 IRgǘAڵȧ2m)JXDDDZ4M}]Μ9 P.}E۷sፎ;8bbx7/kR###q-gBZ׹x"|"|9~Cj^HiAؙVR]*.֘-7 |ys2ON,] M)µMSݡ"""ri<2/*iNiZax7MFFF*twwcos)^z%vE?lnA@Vcaa N:E^s_Eފ0 ZAøq#[mquʻ+aj3rݗaw_ƺӗ%1UlLI6av-EDDD犈'"("J {2==M^ih*P(|hys,g˼;xg8q'cr/orA}Y72D:BF@QCEkCLy{bJעrc]<CyRmzw(d6MƵ1M\ u&''Vtuud>fyܸqccT|>O6g[|{4MN>ͯkΜ9CwwFy0774_Ww?NQ{(? {! /GQD+M&o`̇5K *\ʳ/pWlT, ܈M:ebF|eY vӧx"DFիW'? RFA4OO8vؽ L&C&駟ƶm,ƍ\rfkYd\.ѣGygؘ6E\?hZwhЙEAQCXpvfw'Whe0V̰ ǡWDDDDD(\\.q!\47U_~eΞ=KV4??ȑ#E8{,|~oN>etOD6kNF1^\m1\烙U\/3Sn`ŬXOۻ88\`+٘ip\ݡ""""2犈P(022B__A¥Kx'8v]]]{|>}X̛oɏ~#?s=ǿw "[C 4}Vawdg:/Y8yqwV)5񂈄c2\H}}b Zo>4H:&CڵI&R."""["""""+ |\r(}~vE*T*EP UDža17Զ-? iQY 0T|sLިTY?c'M_.A1;!jCӮMʱH&ebR,"""[f}7nlW u]ŋJ%0LNN233ss:tGVGDbH"쥈tABj^@tpwh)7|V?]2*8Vq/Ñ.m+/QkQkL:ވ֤"""""r!!/"Оs{tn 7ߤll69s o&JnI;44D}0pqnkf|l6˞={h4,..nrDzZ1ix!^ul1tbř%~saJj]$I.b6AƵ4vA4ȧҮc**WDDD6emd2ٳ~~j/"zfI\&c~m6Աl|>sqLEͱ@^u]Tw]0<<__ӧOo~f/M1A^HiQGwJ s.WpLA <2g@ IvgD l Z\cnD;TDDD6犈mf|[,..2==_뾼w&[̙3_7>H$ضm;viQ0>;1"2|MU 2 vq&&&'x vI*¶#ާlQ^CvQ :1~Sm̯6P.թ|IB9HOTetLIca[!,"""犈m3M|>O*bhhL&C..m`GQDVcvv˲A077|(_+عs'Ln^F*˜9s\.4˯~+vΝ;$ɛ!ggel aS z+uU^g+TZ>QӓqH'v7#;3fZK\4""""#犈, ˲pt:_i$I4ih4hZK/QՈEJ)<ȓO>qsz{{W0==OSVVVX^^RppntÇ(*& IDATbbh i! "c:VU.VXeԠ9s1THO9FgG&cr-kXCEDD\RL$J,ꫯR(H=EF0 I&twwX\\^СCr9E8 Ð\.G.cllSq1~#*M=4+-ywr sHwў4_f|0GO6aَ$,2Qwhy)>WDDDDD۶^czzX^^? N9}gzz?a>fe|My-4Mm&Gy_˗:ǡCjDvTnSiԽ(P\eޚX ˀC9ёn2 2;>ֵL2 B3t%"""r(>WDDDDDq( o9z(O /i 2::zsN[R.vryZ*vz꺯Q[Mww7ǎ47R~DQ'CTRU>\2\T)Fܖgw_]iNHlCӮMҵHڦrEDD(*"""""[i222Baiwt>qb׮]={gbY籴īᅬ8:zzl6aJHR -%b(cꭀRktr-4b~HqFse޹BcC]Ivf88\X7CVLl$a]lƵLlξO< 4STDDDDD$۶q]ތi y*aR*(0::J28u]$=== ߕu|FZ ;;*#fJ ^\[ b80cµ-2 pmB!ItLQxiJ8v<y \v 0p]D"qgllt:뺛x"v4n iZTnHѱ8{!s&V8WaFJ 4[2>g@}.ɵNlLH)'i[Q""""L2IӤi s1{=Ξ=Z#:XDDDf+"""""[V&a``]=|L&CEeYXuWSD:GDŽQLk-*aD}8cRbL,ՙ[mBF{q`0ǎ $iNHӄcvQ ±LLu|sEDDDDdJ<#?~^ziQ?>Rr̡CfJdmNDvGaDy!V;*ӟ[j+`t˜)qmxt{G80ǵM-YcI6avmNF犈Ȗ.۶mcϞ=EQ~:Νc~~~EV d8<!H*"qLˏ(7|j/ౡP[ \[iP{aP><:PW|:{?+m[-qƿj5y~_l6q0ZiWDDDDDǏs:رi4-6<9z(z˲,rT^^lA~Qk>ZghC7M%V> "WDDDDDd2ɾ}LTbvv_r Z===|3J)"_XExAD i!M?D*VlUP7-Ş 1ZLS$-ֻCSʵM١ڷ{+"""""[8twwS(6A}]gyCmREd( C̀VvzV. " +u.UxkbJ ?$I1>牝=f 2L2H8&GQ*SÇ "jz0 oyN4Id2,˺/!"(nC+̀j+Pg J'y .Uia0\H"_b'C11;86ev^\yK.#LjxyG! CJ:׮]hYebLA0ܕbWoCܡBlyZ14ڤ\cb&*""""B3EEDDDD䡓J8p T*LMM122B&qM^l8 ÈRm4?T%tg.<.U`fVÈms_^XK,l$d6MPͦ"""""aHu]lۦf!zyvE"e"!b*MJ3Dq?Ro\^>\Xm]i9tbbűM:gt, iLBQ""""犈Ȧ<7:9?mA@T*jS.$ɰsNml6K6yvu/EDx B^O+hG!bV>7jpyu*̀kqh`=Yvf%m˾LҎEj-*7i8a;TDDDA\8&"癟VqeH$H&h4h4LLL/裏u@D8ވʭB^; ] #h`)[ QD+QQe>)3± s v3PBsm~s'L4p\&㶣r;>l. CJ?O8y$}ZWL?O>+A VaE홢8©0U"'KU ms N^\Եs-0"XlI}^/ı -l!ν""""[sEDDDD例}xgn4M!HyT*._omۘy@D/hG!-?;tS&r{i F[ ?Jg?7IQbK .UQFEnj?c@bl#"- a)"X+"""""]x*{駟fxxs,d2I&arrZF\X,rh4 Dd+0jG6vLnC[ 5Bӯ+䮿*aޝ=u>iFI#ٲdG87c'qR) E\*PTRd%رk_FH}}=.ƚX3zTͅ?L{yMh:NBa `$ 5\}98tϱh9}>5ʶ5fEؖ]X&\ADDDdR|ZZFj5 Aioo{XE,#LR*bqJwP(篺a88*YAHXЋZ!ZN~ĻA# /Ycu"џ`gM|㝑f5A}-1>'6vҖpf <ۤ%l<ۢ oȊ\(" CǙP(PT*mO<\w}oo/%CA[T!AzC}ͮ]u+,|A2-?(ZWz@BC8i9%3D?)f4p!r$L8h6]);"M sHzcbw'DDDDV;*":|[OӧOzK{EN>,Cq:::dppPѹ"+\E4¥bhXɭ!AxW#^Ĭ1^%rX 7QCzNGqd>IyvSvEyiTmbFH"""""rq`qq۶Vy{zl63==MGG_җhmm4ͫ.tZQ,"JR-D4k8_:6,|`QqϚyJބ9Iҵ]\IJ+""")>WDDDDD:˲hmmexxz7xㆻq.0 -[dXn"rwVZ#u~B=A-Okaq*p AN}̑y|<΃(i2A-E&iZ&q",Eze;TDDDdj</\,~੧"qy333LNN{Q,1 41MJ###ܹKA@VcjjJayt.RMgfYVK]N/۳T*1;;KTT*QTT*DuoQTlF@P6(C*WRw,xX Y9X7&a1(O[PT[6SK#ZaHXk\tƳ-)"""":(>WDDDDD:0p]͛7nݺMm>Dgr)Ν;Ǒ#G#8\ovZl{z0bq6J4/2o6###ضM?;wk_;v츩Bcx뭷~ Q?3ٳg)J< [lv]FPBr:jJ=$z- |s028:TpIRc/[y,5L"^B-)Dm ۤ}c+*WDDD(>WDDDDD}[E,#Lftvvo---XETԩS9r .099,\زe [n%NUVȑ#;gϞq^x /a0??s=wÝ~C֭[#188HKK'?ZRimmevvyjaޑ{q 0brZHP" ((T/9:diQfuOs`yN#K $Ŏ}I:L0TQIGر}sT*<ȅ xw1 ݻwH$n(~yffgDQ /͛1 'N+]R,D"Aoo/D۶YXXqڍ0!jR-RobÈzPqȃ_B h;lE-4l ʸQNzBSz>M-9oe;&)!Zؖ憊4#EEDDDDdŊ;v,\.ٳgٿ?j܄HRXm4(Vbr,j32G/S8IGM<ҟ?0VyB'A#E53D&-9O0 Y|ocjm"""""b]8ˏaH__CCC=zi֭[G__eih;}4SSS 100@WWb===<|{cjjzݢhP`rrSNsϱm64bӃ>ȉ'|d2axGWWi6Z#dTR~+WEaqnG'r+1P ؝bKw 1"p('1 /T ] ٖo=c9&Pf"""""뺗=>d>G}tStuunݺxSY2zT*uY⿑}qrzX<}X,' Aa^xRC=/32tAўtғbSW Iֶʼn9:^iXݥ߱p,KQ ʍ%K~& C,oMO\.>a,O}۷/O%NEww7#?v%ȟk|[_s>wx7d2dY* GT*I*bӦMXYYAd{gw~ѹFmmٍlwL&"eL!Yئ|ADDDD犈Ȫe~zr'O;viؾ};.\ĉ8q۶I$* CCC<|c'&&1:: b*dN֭[׿uEWRlǧ)00gKO)6t&L$ܥ/b4Siw-Q,ˢ'xlٲbLOO۶m^_VL&lܸ/| ;0??OEgv%V (Jy2i2<<|Z%/G/"0"[3XtG83[[ 9`{߱h`[moMwDDDDS|4 qXf 79~88###8q??G!N/FSԽ>m|ߧ|_hKqЮ^uns=Ǟ={j^q-[~z^zeYyW, 45M۶\!0Xm3s|8za@:P;ZЙw,l9cl2120+""""wsEDDDDI&aƍ<;wA@GG֭[.vyYGߦD"A"<1EQDLd˜)rt""e :}-1:lM#Io'Mi&r}ijM CQ""""r(>WDDDDDaXEKK O>$mmm߿G֭[I$^DnH=)V,jqB/c1g[_-lVuEXRTnܵH<Ķ+"""""MqIӤR)8~+~_i&4k׮}uu+uOy4%f 5j4Oo-NkŵM6 Mڷ96e4}{C"""""Ҵ\ץ͛7SOq1(˴裏qFbؽ>UabΙ"&svDm: wؕd=A³qXA[ʍ&ma%n|[T8|dllwy{XEoo"rFR+ѹEL-Tldž$he;EWkyiXm9$\2>A3EEDDDD陦IKK T.~i2ݤ{}z"r).,y{dcYf5jAHµXߑlKӖ8*I?z&)!X٬7BDDDD )*"""""^TP(PV6\u]|WG t<ǧΕX(0 ض& 'hOtC,MEܱ Z n{!""""犈Ȫ!J_=z>6L#sNǹg)"_$/u&s~5,gfָKOg`+fX4/"06 |"ZcebuDDDD\k*|l6{sJB@(";p <:6N344D|DdeP Z@@{Ok3i\i#b$IcZo+"""""waβ~&&&.[W\.߶Ð|>υ _uQ-wڶ88C:&L޶Tk0pl"ωgfLf+@[i6u'lOНq,*CEܵCEDDD>\LӤ͛7o wB:fڵ]\dY~0d #0EYE!j2'flj#$;ғbv>I&?Z"A#la>Gvfw-,S3EDDD\.E|\.wuzi^ykh4kA$j0Ð\.GP9FI~ӟ{nd27w"bQ\ƾ f3EDНZlO[눌"Z/w($tT~}]5 HzKФoc|"""""sEDDDD3 uٲe 4+T*9sWݫ^366믿ί~+"7MEyfttSNqKf^=vL͛7J}_]O"MZȖ12L2FHgcKO=I:}R1nYƈ_8Sl,uGK=Cerק |76 <$\ 1*""""+sEDDDD}|\fqq˲R022¿˿8]s}EejSNx6 C2A022EaJ\E~yv)f#Pg]mlL[*"n, ?'yMbsGy>cIR a(6L$iimI³Ii4 UYTZ233ɓ'+lB, ,,,7 6lذ\l4LLLOO>}"wD. j=dE>]EJ0m|zzS]2o59RߢUbX1Y̠Jttl_ \ۤ%lWDDDDDV-0H޽˲8vQQ8pSSSݣwgg'[ne˖-wE6|&szH\dzM6w'N;ٝFPǪ.`6\?ןDAToy}mI[ m TwU4M$>,CCC?P(sIj}7L&UY0,j/qh,YƳeLZ. ^¶ k2-Dao#l`ܡoLc).7Z$WDDDDDV=uXRd2L&ܣ^f1 T*eMyagg.pl*3١C]I6u-v$JyÆNZjzc6@tY{BC+Fo$pS|L$Y]kX/K.mmm;0??OE?Ν;G^;2c{n]:sJ= _3[".d9:õMRú8h᡾6u';+3<n Ԓx0Ku sya[mlbo[ئCEDDDhw.%4VFKkk+k֬< Qa6PV)˗W*8|0:gΜY~|۶mK*%Q"*'|pv3IϦ;3ܝmmwi/ vJ0<a:m!w ӈwR\n̵Hx6阃e,HDDDDf}gaa3g\VrLMMަidذacqqD"A>g?fnnc0\.H$ :;;qL&]-86Ssʓ+i! 2sA/[S<^0 |gR,9uϳyf~wK^waajJ?x>@YM(" s*<&sL$=5->k<-=)Y rb4 4lk懚fsEDDDD>aYT^x={PV/[/~Cnbmmmc6###tuuh4h4ɚ5k,_WDnLadG^7of ܣYY(Xmpzȡ,')0a0ԙ`;CI.qw;.vƽܸkZ&*"""""ץ4=4d2ڵ}1::JP [ݡ"""""7E""""""3M4I$lذ??errZ@J%{=<ɓ'aϞ=<@de0 6I6)6ޗa{_ ؖą?0g8*""""rK+"""""^ZRPo0 Y~=axEtwws9 LT*Q߂ik[<3$jGwp,.9AʷI14 鍈ȭQ|ZQQ9qΝ#~x 6SOh4ضM"t"a,ֶIx6 ib7 2 E[_wK"""""F"""""jaH.^#RGgGG/"/"LES&`P4ˆf޶icI:pmlKCEDDDDn'犈ȪEl3g044O?}KLu&LƳfs-RMw-穊)'"fgg#_u]LLLP,gxx5k,?_T(k$ 8`YmiNmp-⮍ZR'.""""r)>WDDDDDV qhiiz{8UׇaH>gzzm۶Ǘ<+ Of߾}LMM]u7mΝ;켭#"LME&&ռTEDDDDEEDDDDd}!򕯰w^fffVMEeje oضMaxZXn===RqV"r3, -Ce/TDDDDLQY1WDDDDDV-4I<\>Vh4( r9&&&V8s}GGGq]gsNo~K#"+cgs-<\DDDDD>犈ȪeX;wuVsE&&&(J%uF1Wں+"ic\w-ۼק%""""+"""""^<'_XgE|3'_+"0$ K^4MZ.IƱM+""""rP|4ZF.100peEdklbkO(>WDDDDDR"ÇlIL.~.\X~|֭<|]/"L2H6I!Y*܇+"""""M)Lca|c޽H$njzBiG2W "m]87 Ty[l<*hPVoShPp0Mq,vCi& "eoi4-HӤi GeϞ=Y+JGx<e˖Q^^bf TEi1g5I2T!BB!Bt]p( v] B(9rzn   1228E*B,&xâa6iH\!B!^R>W!BqO4 jEUUhnn,ОHMM^ ##cho|G4Ivv g !nE]Sq5v]-+B!{I\!B!qy{=]$!??Lrrr&}dϞ=9sarrrxgxg%)*JS&]M7ΰB!&sB!bGGG###_5iiin|Fx<ܹQڈD"S:VZżyHOO'33sw !,FDUGS$*B! OB!B "`EQ(++#77j|[SZ}]nL8fdd>RRRp:a\.$ flnѰ&Uf !BC#IQ!B!8N\.פg4l6sp8]w4 QG}}=EEEH! WZtEe3ڡB!BɚB!B(ng̛7)یLWW---aZyGIMM g-ISpXM&lf oHB!5EB!⏢(Hfsvvl6ԩS PZZȔG3KUn6a7k8?dB!?`R>W!B!=o>Hee%QVV=P0 f]m3ᰘ0k$BB!GH\!B!=Muod2ߏ7(2666ii4d2ʕ+|ddd`2T!nⴚ5UB!"sB!4LFFO<W\a``K ᾡPn;F{{;`pҘ;w.gϖ3LlbfְTEJ !Bqq!B!=d2ƍ={pQB>z^裏F躎iS+))k !&MnpYf !B!~x|B!➧inmp8Loo/>oD'55577t:lp8տT&xblҐjB!Bۤ|B!BLcpp[ىvH$r2Nۍ)դb3k-6YAB!Bۤ|B!BLc||:VZEvv66m6`ndVZŜ9sfԅ'iISpZL8&f*B!0HRT!B!D `6  Bb$ *fBbb"v B34ڡvk* B!BL$k !B!& Պfaa``)ۇaĖ'q8QPP sbk:,&f لIUPeQ!B!5dMQ!B!f1k,rsscll}QYYI__;ٸq#˗/tY q1.Uפ\B!⺤|B!BL`٘7oׯKKK ɼꫤ;V+X8c!=qVqVECS$*B!6R>W!B!&PUNRRDaa!Dkp8l6t]YB ?uMXu P!B! I\!B!l&!!tΝ;GWWɌPSSCkk+@`~sΥpBp) hլⴘp̲vB!;B!Bq LFF֭P(ҥKl۶ ٌDQᰱ6?Lg1X B!B|GR>W!B!a6W_-[000@]]W\oo5k㌎eeeew>MU5 لŤbR}ZB!BB!BihF||]qp8L0!Qٌ$)) ݎ(y}ˑ  FuXStNX!*$X,ަB!B |B!B v; .P__Okk+ deeOk&$$Sx<>c<ȥK4Lyg(++l6ﮯڵ C4%))׳f,Y2mR4saѣGzl6rrrxgB B!)+B!EQ#??|k={6nPU˅jݧ,(PWW֭[ill$ b 08pEQbڵ7=3sxxV>3,X!:D?vlnoww7رX,~:;;/eIZ!B!IRT!B!D6"^z%jkk9|0>/J[[cӧyw)--eŊ_qΞ=֭[ٿ?H%K`1nxطo$ /0gTU>ɣ>j$ECW\a֭4551k,6mDJJ 8pC188Ȓ%K(..&))VI!B!~PdMQ!B!L&RSSYf x<<w殮INNPv'O244c=SO=ŬYFdffr~Ǚ3ghjj"//~o[[EuVZ/jEQ,YB__d֭0{lhjjbݬ[Gy] ,^ضm/$EB!;ReMQ!B!i"?? N'MMM SPP@nn.qqq4.]< nt:bժU$&&Ouu57Q:;;'--瓔D\\N$ȠFGGFtttpe(**ۍvŜ9sp:TVV7QB!B)+B!&EQp\lܸKrYm޽{ äqOSQ[[222(((M~~>YYYxpN[[Knnϓ8rD"ENB!^'sB!{p8!!!ٳgKJJ YYYx^ϟn(hFBB6v|>M}g__hD$,ǶOOO!N'.k9$&&Goo/cccB!t]!B!ϽdddEl6:iƦ*&ڙa4TU5ړjEuuTl-,iSSibAQ6v+ևš:pvd2aXPU8">;ibHa6lƌCç( & EQHMM%11lOHyuٌi3v|EQu2cǼx^.˸'%2v;&a zlgl6#z^/~~ݶb۱ZA|>~[pH$2 AQUB!B|~?@`FH{(4M7i&.w|x6ucK]1cۧ%&6I#rn)T0$cZ{n*d> c։s}q;%ҥKDQ9y$υB!3. rś}B!B!̉'رc;v옔ԍD"x^B+k !B!f\4%xfR>W!B!=l6c2|-ø\Xb6'n&١ŘywⱮ'775k֐2ih4% JB g裏pBt]ϐ`0Hcc#}֭\? MMM|駬YŋKgX$! W_xxqRd{n:;;yIHHϐXkZZZxIJJϐXOSSO?4)))g!--펊x|^!B!v]={^ 7L@8ؔm&np8Pݎb1: è:@l Rłfab ذaÔ/ z5Lŭعs'۷o#S IDATfRJ~k.vC=ďcc oq덏o>vɃ>o!apGgg''?!;;[loA~~?88_H?CbJFGG?1%%%8+̛7[:#e6gd9FI !B!$&&biooN<000###NHH 11EQ刺I/#HRRq>nQz8I(III84 UUtFDQ⌲Eb( f.I{m6v]?Cb,yPfXV3 nߏX]%A,oy8St dMQ!B!pq#i5<͛G|| _H1q֯(:e&ub01ֱKgĘO7w!nkۼfE^ŭF' “?mdMQ!B!"ٳg ̙3,"W\ %appQHHHMOO ٳgy>j(++###(:k,IMMŋTUUlG}}=>X'T!B!w#sA@`0hҎFf,˴u0@@ @$0JWFFh<|ƶӉMq7hh4J$a||P(do2X,xqg*'Dbbiڔ6( & <ӱv}cXۘnP(dh"]׍sѼwp8l+UU65]bĶm#m*7jf[OjS}kW0$ :f~zmd2aZҍ BkDQ80=bד`0h3sߘoDĿq=];$}YFL&l6u"xtm{GYY---|ܹ~{19s ~!qqq,\V={عs'<g<|$&&RZZ(_*:~N~ajkkOl_͹seƍdffhB!@ q>/cǎqe._07nd,[l>---|ܹvTU%??kkrCCCٳ_ttt\'O5k֭cӦMݲ؈ﮩ#Gpbɲex(,,h \tÇx,\_ Lkj۷clcNIEQ裏صkD"yyʺ17' R]]9uqKlذzI qi~PWW8fbժUMZ*m6N:ECC--- 0|;ñcpPVVƫJyy9.H~񔗗WW\pۍyRR=k֬fMJFcc####7%%իWӧ9tO^333Y`V4S4rssټy3_~%.]O?Eu ,Y˗SPP0OTU%--͸^*)))]lH5b_f6 L;wz_RR80Dqq1~)a&ٳg3::*a˗r4D!Fwߥh4… pB&sI{=mFvv6K.!9ŋ)--vi3|,Y2؋;+BVV&H$=ٳHKKTRRR$8K,[x<Zkk+eeeO*}v~Q__Ϝ9sXr%466rYFFFXt)pB9t,Y2ǏIQQ@ ؾ};o|2VEQpgϞμ3oVQzzz7o^A>3~BII +V 99 .p9-`0ȉ'/~AUU%%%,^tؽ{7I0B[nߧ5kְh"0ΝID{n,]bOb??@~~>+V`֬Y9sj\.SNsN Y|9tttPYYI \=8::/~ vڅf林*++ deeMCz8s fٸϧJ%>>UUd޽;X W̉'(++#>>3կ! Wl6|Ib0ռ?~R,YBrr2/_f޽dfflAäiqqq3>>N?͌vY|9O<-RaA(7nbd2ڊattڵky)++4ni._L__plx ֬YCZZT[ EQV+ , 55UTaϟ? FX,ʤFHYf1gΜVN86"fF}Yjj*%%%S[KQ")))`%3,Lii @ EjrJ sԩiz>|̋/H~~>PSNg駟rp8HIIbPPP@RRQ*ܕ+Wp80w\rgg'f޽[oe4?Vt~SOrV֯_ߏ#--m;w.<=i8ǴɀH+Wh"|>hEQu8KiFVV>,v]ͰX~iv *aO?fŋ B|_QQA(r3,voh"B… %!wL&,X@II?$srx饗3X,nx8w###+_xٟJ$aΝ߿ٳg#TvٜHVzzzlh|~E;+¢Ep:DQ233ٿ?'Odҥw}2:999g?Pn!\UUE~~>?8WÁa۶mڵl#)сe͚5<lFGG9q]]]IǏS__Ovv6JQQQq֭FM^, EEE?#1Ktʕ+TVVb qxsJkk+۷t]'Ob6O~ٳTUUk.jjjXl:tK.QZZ/lBFFFO>矗٢wf?p6nٳikk/$))M6rJL&p<:9qℑꢪ ϦMxᇍl6y7!==4ꫯgݺulܸd(԰k.8O?=ZbfYVi,**ʕ+lٲk:'OsksssZ|'ݻ5]y׌>z(޷>544PYY딖bۉF̚58b .uRRҴevpfff}]qqqӖw/EQS ^_VR~:^/SJ3[: n6ÄaBHMӰlJUWWz),,$??hEyy9tvvwGCC~)/E__ǎ#Cww7IIIOJl6.]\x&ӧO )((0pMQZZJCC]]] fnvi8NRRR# k$ֲlٲI5M#%%y h4J4ܹstvvR'aٲeD"x<յ?Nrr2EEEFEQPUX1Vf2HQU5mBQuq8I(|>Il6q?+WOqq`( Nu1<>,"---}A|2F2Dϟva,K(an1/j墽k|>W\!++4QFF"I;磣arrrc``FvvY OZngtt(;lJoo>M!B!w?)/(%%8hjjx644DSShaǯ=~6Ξ=ˬYR&NUU1Lx^I8܅t]';;Lgg'wttM(Hhܹs_whf7xG}tRc, .h2dXB4顠 C0dppИv~j$H~?|>&5Øieuڢ(FR_ܙN<ɧ~J(⩧"++qFGGQU=mra6x^\.A,ˤe,].###FB 7m5Ӊj%Lj_p1شi3::GiKL&IU'GvOIjFBBMӄ?Ltvvߏ7)**bΜ9טQ:;;a, 3gΜ9O]] F%Evv6 .$##5eP(DWWO~0dn]SwddĈGGOaaϽccctvvRSSCSS# c7)GDKK 10VYfQPP=wI7N'̞=bt]6Ν!fO{A#팏t:d…deeݰD9\]|>EEE,^'@Xl@ߕ+Whoo6EEEc6'tNrr2Ŕ\7CCCp:ddd񏏏v_.\C(2ֽ7o%%%ܩhmmXU,AnnX,I|?44$%%ȘF|8I^_GGgϞc}̴4JKK5^L4tcb'E#F3333Vq㔕Q^^N~~uwogΜ C8&!!\,YBRR]|>"Nl-k hhh`xxx^盚蠷A~?F}G粒f<###AV+2|Iw8o'OАjm1\EGGLjII S*}a")))btɚB|222(((ٳܹ9s搟O(ɓ|FIo{i;44bƍSJFIފMwl<._7|Cff& W_QYY 0=uuu `IJJBuhootuucP((ӮNfaݝl6Ѧjkk9p̚5~KΞ=;i2\-\ZZJuu5[laƍ0669p###D"IbTlsXҦ\|裏f 1g)+MӦmľ&/wIޙ9~8}p~%KQy00H[s#;sA#Kղ$d bݿ?׋뤦RZZʓO>jrM~?sΟ?OGG@F~~>===Yb,ˤ}cksA"ny (//'33EQ58|0gϞ I Naa18++kRۍǖuJKKٸq444_p9ۍի)))1?::J}}=^FGGhlـ`0ܹsIMMdh4n cIXaĿֈrr2<liȶm͈ۨNNݬ]sNi~\bTG|[&):88ѣG$K⟟Oyy93)]]]F/\d2Dii)?8V=)###\xxkmm%`ZΦ5k0o޼)ɓ>|jvINN㓢;vӧOs%#D^^-BQrssD?DΝ?n<ر3gH IDATڊjEww7WfF"<}%=޽{X,tR|A˧$D###?1)dc…@ @ww7;w׋i$&&RZZʆ SȮ].xg#++I^OhllH$B\\~,YB^^(:u/2005⟛˂ D"LOO066f$JKKyGq8Ix"_}Eww76m2\#GpI<FDQQ> 5?TUUQUUEccLlM@ СC… vO7q]kk(2eԳ3aO$;;_>l4MchhJPUGyĘ 7007|Ν;9~8c=vW\Cz{{ꫯBdee#rp.\˗я~Ċ+?r/IMMeٲeTTTOk֬kjjرc|\ /C=\UU,Z FGGc׮]X˜8qqHHH`pp[ri.\gժUddd=z}qY~K/]w0dxx}CJJ V")) :h?<<pߊ_UU.\ޙuidK$@ HrZ,ٖ[ULwLGL11z]o%[#=L2{Ȗ|"!=o<ك餻7nGC. QWW&22-)-H-Çh4?#/_"ٹs'x<aqLNNr= Z4 jZd ZvlA?Q|׿ȇf޽466VŗŁn߾wGIIxe׮]?\ jnhxll); Q*_%j?_W~j5ǎ6C`>vHOO筷`Y2 ,..t:]nh5yw|2!X kmm$p G# ߏeqq0( FcrR0 .KD?s|M^u &,vsHvU!!!ۢjQ*luAv>Ng A!&&'OFtt4qqqj͛7ٿ?999jZ[[illQRRѣGIIIfa2TWWSRRL&cuuOcc#mmmrAʈ0?Ç$&&@hh(.y;vqs|---sfV+h4|>qmJJJCVFCCsqY\\$:: z{{>:~hii!%%p"""HKK#**J$8'%%L&#,,ݻwR$))IT 9s=bΝl۶mo28~v](xwe޽Vioo޽{yINNfii޽{۷/ )&9p؈OOOd2100 ȂUwK6"p՞xp\\Z---4668|0ddd`2ѣG$''c6E* ܺu LFii)}JjwhD+͎\.d2@b ɸ>ٳBj5rD_JJ N&''Ν;cGkk+Q^^Nee%dggc6Μ9BrGOO.3gPRRBZZr f3NFFƦ߷1;vPGӉB\ν{bj5]]]TWWt:ٿ?O&%%Ebb"SSSr- G DL.344DkkW^e~~Vػw/T*%""V6#G_ włBΝ;ձ{nJJJPp}8|0gϞ%99#'&&y&zݻE\.gllNFCNN$FN۷y"{3NEE>/`0??J֭[466KYYj^ (++㥗^"99Krr2333LNNru ڵ )۟o{{;w!!!ǃr_ h(++#??y, zHFGGSZZJttilldqq 7<.""ep:DEE=M͏^ ϳ@\\;ܽ{Rq÷ZLMMP(HOOKTTCCCLMM%$$0003 u //xטnKxx8@cc#v ĉTTT011@ף멮F.з ))ffgg]je||LFrrr0qc111k'' Dxx8fLﱱ1BBB$5hb`IHHBѰm6XZZz-6ahjjz*o&#n4Eo'~"""}S"## cxxx=륯˅dl6$HA||<*Zv d<Ռp8P*MZZСC"ycrrO?l6:ۙ^"I$99Yl ɓ'8<//zfİʥK顽;vl\\\Ν{X,~mYZZBT#:;;ٶm:Zп*Ie3g8x (J{1<\ l/?u9ysBؔ*BBB cbb+W‚(9͌_XX 6߿(uuu?p/:l6 s1jf^z Zd~~7BRIGGmmm$&&R^^NEEHbʕ+466gsSSStbCp8444BQQ?UٳOrgttTN>|HBBl"Wrjhh^8ꘝl6 -E`zii&,|>l6---<|cǎhhmmݒA A_$;HHrr2999rppa6?00@MM *C=9}ppǏUnBӑLXXFd2ݻIJJbppz=gΜZMXX? odHCBBp8vm)..^@1 cl6CǴZ-c R<nCQ YPP`jeΝl6z{{Dv\GGG+ v(V{KFyz{{}}}΢h())plII zE갰θ8q[׽.zH&t:bPTT$F"##VI__333sׇY7}6ϳm۶UO.$U/266{!...``(,r`ffR޽{7[\\璘ۍp [VHUO?**f2>>,)..d2v`zzB޽{7w^BBBë|>VWW p [ZT*p% -z:J_~:;;q8T*Z-n!]ȈڵWo3??OVV Yn޼(*))&? 144իW_'&&&Xɐ2Gq8LNN4]]]LOO!6x<"#r144ĕ+WXXXuc||\ظIPӧ)** 2+//ǏSNhڵk8x-c@ww7}W\AՊĜ MɄV%))SN\t -oܸ(ݻW5166˗ILL$99;wf߾}է~ʍ7P*#x!qС`}zinnwޡ$***Pr\lvݻwկ~Eii)z~^ G ؄̈/nFj5fYXeeeQ\\̃կ~ӧٶm6ƙ3gعs禯 $wG{VJhh(:\b%mۆ^dt뙝ettJv\&_HU( l6RdxxXTbg5is-99n述@hh(ZB6 2Q8,,,@IC^8cccjm:- tu״Y:9(iɄVFCjj*aaa 066&DZT*RRR*kbncX1n7###\|VKAA"i|v]lhKV͆r166FFF2WT⽏9~?*QP(0Ldee}uʳ/II$ CjZhrgrT*INN'韒Zb`X0Lh4233err۷oBff mjjbffvܹ%}VlXV|>p *'&&X\\ (>ɴ.+%'&&277Ė}X]]KHHHX׺FRv.PsqqYVVVh4~,kݎjeyyo4xu ?ΣGXYY!66%z{{ĉ,aq8\|YWK/ÁBsssر_~9YMqQ|#˱Z<|4=`[ѣGZܻw%t:LMMw^^~倌 ʇ~frֆᠥeZ-ɓT*KiNٳӧOc2$/_d2T*qLMM111Aaa!/"cx|2w=fe߾}9s&سoNgg'۷ogllW; IDATW_%++~Bgg'6xMMMر#G܇T*YoILLt244VܹsI^Ϟ={8{,TUUcd2T****(--iq|gܾ}E\\tvvO~J%;wرcsejLLL022©S8tP@RԛÇX,&''Z\.^Jkk+ =z;w 2=zÇY^^l6cgǎQ-8::dd2VrP(^NhzUfffpDFF΁ER#̌*mnT*f3=jnJѧ!?00"80==DP`6/ZVo-JٌJֽR2FHfBALLH٪HKVBN'rk2\.J1UT,--aXIss3 Bn7zdhh(4>OMi]jBӉFO_Vb!!!Av;nAhoobyyy:u ^n!?44Bә2K?_^^fnn)d{GTT:NV X˅h$33<}6"i&r9۷o'??KEgyyaQn2tbyr(t:+++ Wo>—nROҵHԏv+T*> IDGG뙞p{9 tUEuVEttǏz^?OzRMyhiicǎa4: كAioo?F Jkk+fW_}Ux ebN8AQQSqN'yyy?~+dgg___FΝ;HAAA@Y?r*0ioonP(HMM7СC2 OMM苕s=k&6"9{,111\p{xSNAH!>>G}q'++ ZMvv67}]qjڵGGIUU=BV9{l}R.^׹u~T^u\Z^+SPt:YZZ +4i nwI000@yy9reeLV]d/P(XWVHs}bbbرciii[RiH!~DB4jZX}'99ݻwov3==M]]===].ˢzޯ+ T*Նc`xin0>>/K xILLddd6>cq-[+z:;;)..&==]Ϫ%˿V ̙3DFF__,//#..7|JtP ),,$##Cأz^Q|s$Un+w8-4$hkk#??LQ,+JjvuuK}}=׮]m{X^^fjjJ$ڵ,aURϝNfC>3صk999[Zg L?߇V o?ōP*W(_T*//g߾}R6BPgdsa6~e\.ÇS^^.zHrJ"55*Ju Er/VHKK?oiNp1***{ N=}޵kucn]j>|2-S'[_%nmS./~ VVVĺu4?? #7`08}[v====d2!D^CCa6mg d2qI].4jPTzEۉĉر[xY]]eaawR]]ĉٳgK;^ .^Hkk+~)alq}}c"&U(^|@蟘]#+fŋ`۶m:uJokojj|> 99YTw8477 6yo=E $H A $H |><.]ի\.N: /^ =eʊC<Ţt4V&6kJvr^/Et:|d?4n1| NsYÅk=U@vkǮ__|.]^zgϒ%/76n7~).]biiW^y_~KyWVV]{εVCCC;( ֘ݠTvqykyJ]z>ٳg9wI')󬬬_{tzV׾ZlѮ@Bӑ-H*ɌR5k׸pV7|Sɒ kml9oXiooh4tz=!!!dgg(|gٳ%tX,kDGG#Y^^KZ ώU Iׯ{1;;˹sx7dM]\xyWعs'}͛pΟ?O\\7:C.\n ^U\r=f{ $H A $H Ah|puݻ7$%%RdBѰ azzZl Ĉj8FvԔFEE111`Z72*J"""L|(7nΝ;ɢS_ղ N3\՛DTFj4<333hڀMT}j&immmTUU100@EEڵ Vt|q n߾W_СC?aHH (Z-+++/iVicl6jYZZbff޹njnBCCLOOkjjN>͛7}6.sqaRRR|n8gggq8( f3:NhX,`cϔvEEEhp\ĠX^^~j/A)zEz+ܺu%^z%***ضmۆi]J%bmۙl6z􏌌Dղ@?n8RRRՋ:xhiiaxxx]f1==͍7y&63gPYYIjj**J)ܜH,//o䪵yzzzX,|HQQGtĉ=z4j3뿼ܜd2}{㡯YV+  ֆ륯'NmFl $H A $H AHI333444x^IHH000n p:$Q!Dxx8sss !R[?+++F"##QTh4mFCCl63T^ZZbxxFCllЇ,MMM;\.vILL _Ph$$$ '&&Q($&&DFF*NVVV0LDDDXvzzzpmmmF^{5=*oYZZ"//l~JKhhb``EZ_V $::zx^DV#1 ,,,088`X,8\. ,//S\\LTTԦJo[v;;w7 555@\hh4"ɾ* SSS '6ޭFQZMJJ y^7HkI IyZZZxwYXX ++_ *w$'FCBB044$_n;@"px2B&{KV󤧧kL&( 4|RJ")) BVh0#U"HHH'&--W^y;vlhDP066:[`liiA> Ӕ'kokk>`bbT^~errr2 @DDJ1,K^aPߋ2pfffcll,VE<6 F~~~e $H A $H AQϸt?OO8s lǤj$hhhȑ#,,,l6dffNHHYYYL&#)))`3oaa:T*iiiB)<<ݻws5xtp:LOOIyy9颊hϹt===曜={u=o0hllXVjkkuff&233zILL [WWB -- Rikkʕ+\t2N8AYY٦*~?7n࣏>W_}_~m۶OLL$""f:pf"##A쌌 g'%%rERjj*5|ᇴ́(++#--{P]}6~!]]]K;wukÇ>-as~!9ssΑnH'$$Mkk+,..䕈zinn&--1n@FFPr9QQQ*'w=###|> ~ݻ|@{{;'OW_%33s]BT-l}KKK466277'j֭[ގ`,ݤ޵Yܿ6~kAIII1&1}g%ҥKdffr :%$~?555{RYYo;\DK||<]]]#>7g?fm*ݻGJJ %%%8pu׿  $H A $H A蘚>a @233 6QUUŭ[ %55JSSׯ_'"""ENcLMMqmzDFF222_|ǏXdeeE}}=>V\J%>Ν;~v]Ħ̌pp|>;;Tw0W^( RSS477su Æ166ݻwbyy(FGGCQRR"ҋ SUUEuu5deebFAբj7}п]vEXXssshaaaa^^CCCBZ-ommz1DEEbllzzz(--TEF#O~XvMAAfQ__ϵk#77W???‚8v 駟r4 iii,--t $sssH~?QQQS[[Kww7{e¶QP`2ͥ:ޙL&ijjDqq񖰒X,BvIVVłjǮ?''^ݻwp8hooƍj \oo/"XtuuOYY#nxw),,$99L$HAAH/,,رcBA^/*wAQQW^޽{墳7nT*Kl6V+EX^^n3<ikk[wlnn.F򘞞t:cffnxٿ?JRr9EEELOOSSSn'>>Nh4g'٠4IDAT@\.gbbFCmm-l߾2226}@ J_tZ)**'!-- @NNпM^^sssBӧOSVV&K w,..Oww7ݨT*vMaa@l466r=:;;!$$dC+$RRRHII333|%$$011Aww7Vr˷DW\FAAn{Co.>n0:t^/ޣ^T Ihh(yyy۷O(**!$;;\N?]]]8NnfOb㡽}ݱYYYdff - tvvB^^144ġC ,,,^855EGG"(v Jxx8"*Fg``GbACINN۷o kڵ ׻"a ++2ҥK AOee%OOO?22bzzfl6xap8hmm/d2SSSLMM\sw+m6}gjjJa?9spbcc9x ܼysU7ܹs>}: )/ͼ\.oo8p* ;Fdd$|ܻwGtt4={-! _ZM>zyN'/^|Wŋ/HXX1116֭[PUUJ">>_|ӧO6%v-|ᇸ\.z=_Q&m&NFss3===|QNxk׮R̙3:u* )'ojj ?==gNGBBol{餳v;W\Vx?7xNGtt4ϭ[hll֭[rf3='NtxOO},--zY\\* Hqq1 -//'"">DDDcΞ=KAA\.j駟r Ct:&{۷o۷DGGsQ?Nyy: p)'%$y^L066&/,, szzA[[8y$ћ3rfzzJUUpxW_}Gt:ػw/?Osa]QQ9p: ^._,򖗗E8J("""~_rAY^^^  $H A $H A/bqq`eee`2 p˺WTL&EʍXXX~p݄Onn. mmmaZhJJ w`0l@a]f~履Mhh(N)nT*7jcvqqq +333;vsk~ھ>llc@y2a$օavҖHrM; ӤviNVTMke)Y Lll~~n{Q~åD|\|jttTXpΝ;&Yc;33rܮrn=3'{Z}]ϫYΝ;lmmMz?22!~t:p8Y---)Lng!統QKKJ%ZZZ*N~եJFGG}Ο?a555fO?HnX=naaAd29{X,P(2EԨI ǦHDo~fS&)qq[NMOOkbbB`O_v@>W,VVVB ׫r\Wz7p٬߿%Iy:1833 |bt5yrR‚677?ή.9sFgΜ94F^D"' z}?$W^飏>Rssc( D/O89RT D#>F|.\'N4sh88l+kwwWPHCuuujlli. ;E￯GO^{M{{{. ϔNPP"ܜ U,X^^+"á>⋪O\ \G׮]˗>Wp.pLJ%YD"]iyjy<_(dJNf;Gm6<jkkt:ݻyr9EQݻwOk^^< i)L*(ϫT*f믭$}6iوT*ikkKn޼iqy<uvvŋNRם;w499%=|PtZب]tI/_V__߁{_JǕH$4::;w~z饗t…`0wҒѨ,RCC422+Wh``(SUәw[Z]]U6P,o5r9eCb1 T*p8deYѨp8^r82 Cv]^W>OpXavԩSx^9`3ibbB( 0 9\.޽{Z]]U8֕+W|bCQ8fkW_:;;544s0 mmmݻu떒#lVd2jjjR PCC4;;[n)HȲ,fnI/_U]]^KrEi*( ^{ァp8>}Zr:Z[[Ӄ411m쨧Gr8e';Ěُ@wT__*)뗿^Gmii=9NnUWW4Mbt:)ܜnܸׯrrQ/_eYo~#ۭ^]~] 89`0۷o7ސз-?T[[jjjTUU\.yؘt-]tI@l5`(3DBoҒvy[6MUUUjjjАH$=F}}}r:#kӭ[Dt5:{jkkeەrN 455b( x*1L&u]E"|>}Tww=6Ms=C1MSGFۓeY* ?PTWW'áx??qyuww̙3;[(/߯T*I ONc&jmmMdR ;WZZZd>#S,fumݼySD"Q.ST*/b:uvvvd-B!E2MST*X,jmmMRIHDO^py% r9v|>9G^xzeƁEB![y>H$/--))Jɲ~/\8fJŢ Puuuyl477_Z^^VuuΞ=.577v4^wyGx@( r*JRР~/\8f ÐfS>W2|it:T*uhq-,,M/nOnoo>nFUUU~OJicpEd2ZYYQCC#hT"oc666=s>p]&Q<8bO644ԩSؐ3#E"b(˲ C8٬J^~d6U4Ԕc멪fSXT>[[[*+ jaaQ ,R6}xpҥKjkkS4믿Yr5|^x\wؘgx<y^H$`0x4i8և~ 4MnY8%ippP/^Ѝ7ꫯjssv_:BRԿa(3G?A 2 Cx\<쬜N>]pkk?O,KNnW2x+˥g}V333{vJjnnV}}:;;uEkqqQ۫fnye2%I˲,kddDG^ NQ8fN:;;u%E"/э74;;l6%IͩSOV(:f hxxXzD"˥X,P(R!r9%GSWW+kuuU'Qϟ?y<~ z7twS{{Z[[zeYRjkkr488Yp?p~9e2k~~^HD6M.\зmuttƍ|:}rF}}}J$Z__W0JFFFtU}+_677588A먮V{{ bVWW5??{ijjJuuuP{{vN*˲C-..Z[[S"aו+W /(W1zuiUUUٳP:SWW:::dD aӗeEQr9:uwwMNSRIP[[ہ:s^nmnn*(ϫT*ٳ l$ݮ&;wN֖,*NkjjܬVy Specifying equilibrium frequencies ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Another possibility to address problems associated with biased sampling, is direct specification of the equilibrium frequencies using the flag ``--weights``. This parameter expects a csv or tsv file specifying the relative weights for each discrete state (they will be normalized to 1.0). These weights correspond to equilibrium frequencies in a time-reversible model. This has sometimes slightly unintuitive implications and should be used with caution. treetime-0.8.6/docs/source/tutorials/timetree.rst000066400000000000000000000140141417362145000221740ustar00rootroot00000000000000 Estimation of time scaled phylogenies ------------------------------------- The principal functionality of TreeTime is estimating time trees from an initial tree topology, a set of date constraints (e.g. tip dates), and an alignment (optional). This tutorial uses data provided in the github repository `github.com/neherlab/treetime_examples `_. .. code-block:: bash treetime --tree data/h3n2_na/h3n2_na_20.nwk --dates data/h3n2_na/h3n2_na_20.metadata.csv --aln data/h3n2_na/h3n2_na_20.fasta --outdir h3n2_timetree The tree can be in newick or nexus format, the alignment in fasta or phylip, and the dates should be given as a tsv or csv file. TreeTime will attempt to parse dates, preferred formats are are "%Y-%m-%d" or numerical as in 2016.45. This command will estimate an GTR model, a molecular clock model, and a time-stamped phylogeny. The results are saved to several files in the directory specified as `outdir` and printed to standard out: .. code-block:: bash Inferred GTR model: Substitution rate (mu): 1.0 Equilibrium frequencies (pi_i): A: 0.2983 C: 0.1986 G: 0.2353 T: 0.2579 -: 0.01 Symmetrized rates from j->i (W_ij): A C G T - A 0 0.8273 2.8038 0.4525 1.031 C 0.8273 0 0.5688 2.8435 1.0561 G 2.8038 0.5688 0 0.6088 1.0462 T 0.4525 2.8435 0.6088 0 1.0418 - 1.031 1.0561 1.0462 1.0418 0 Actual rates from j->i (Q_ij): A C G T - A 0 0.2468 0.8363 0.135 0.3075 C 0.1643 0 0.1129 0.5646 0.2097 G 0.6597 0.1338 0 0.1432 0.2462 T 0.1167 0.7332 0.157 0 0.2686 - 0.0103 0.0106 0.0105 0.0104 0 Root-Tip-Regression: --rate: 2.613e-03 --chi^2: 22.16 --r^2: 0.98 --- saved tree as h3n2_timetree/timetree.pdf --- alignment including ancestral nodes saved as h3n2_timetree/ancestral_sequences.fasta --- saved divergence times in h3n2_timetree/dates.tsv --- tree saved in nexus format as h3n2_timetree/timetree.nexus --- root-to-tip plot saved to h3n2_timetree/root_to_tip_regression.pdf Other output files include an alignment with reconstructed ancestral sequences, an annotated tree in nexus format in which branch length correspond to years and mutations and node dates are added as comments to each node. In addition, the root-to-tip vs time regression and the tree are drawn and saved to file. .. image:: figures/timetree.png :target: figures/timetree.png :alt: rtt Accounting for phylogenetic covariance ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The root-to-tip distances of samples are expected to increase with sampling date and TreeTime uses this behavior to estimate clock rates. However, these root-to-tip distances are correlated due to shared ancestry. This can be efficiently accounted if the sequence data set is consistent with a simple strict molecular clock model, but can give misleading results when the molecular clock model is violated. This feature is hence off by default and can be switched on using the flag .. code-block:: bash --covariation Fixed evolutionary rate ^^^^^^^^^^^^^^^^^^^^^^^ If the temporal signal in the data is weak and the clock rate can't be estimated confidently from the data, it is advisable to specify the rate explicitly. This can be done using the argument .. code-block:: bash --clock-rate Specify or estimate coalescent models ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ TreeTime can be run either without a tree prior or with a Kingman coalescent tree prior. The later is parameterized by a time scale 'Tc' which can vary in time. This time scale is often called 'effective population size' Ne, but the appropriate Tc has very little to do with census population sizes. To activate the Kingman Coalescent model in TreeTime, you need to add the flag .. code-block:: bash --coalescent where the argument is either a floating point number giving the time scale of coalescence in units of divergence, 'const' to have TreeTime estimate a constant merger rate, or 'skyline'. In the latter case, TreeTime will estimate a piece-wise linear merger rate trajectory and save this in files ending on 'skyline.tsv' and 'skyline.pdf' The following command will run TreeTime on the ebola example data set and estimate a time tree along with a skyline (this will take a few minutes). .. code-block:: bash treetime --tree data/ebola/ebola.nwk --dates data/ebola/ebola.metadata.csv --aln data/ebola/ebola.fasta --outdir ebola --coalescent skyline .. image:: figures/ebola_skyline.png :target: figures/ebola_skyline.png :alt: ebola_skyline Confidence intervals ^^^^^^^^^^^^^^^^^^^^ In its default setting, ``treetime`` does not estimate confidence intervals of divergence times. Such estimates require calculation of the marginal probability distributions of the dates of the internal nodes that take about 2-3 times as long as calculating only the jointly maximally likely dates. To switch on confidence estimation, pass the flag ``--confidence``. TreeTime will run another round of marginal timetree reconstruction and determine the region that contains 90% of the marginal probability distribution of the node dates. These intervals are drawn into the tree graph and written to the dates file. VCF files as input ^^^^^^^^^^^^^^^^^^ In addition to standard fasta files, TreeTime can ingest sequence data in form of vcf files which is common for bacterial data sets where short reads are mapped against a reference and only variable sites are reported. The following example with a set of MtB sequences uses a fixed evolutionary rate of 1e-7 per site and year. .. code-block:: bash treetime --aln data/tb/lee_2015.vcf.gz --vcf-reference data/tb/tb_ref.fasta --tree data/tb/lee_2015.nwk --clock-rate 1e-7 --dates data/tb/lee_2015.metadata.tsv For many bacterial data set were the temporal signal in the data is weak, it is advisable to fix the rate of the molecular clock explicitly. Divergence times, however, will depend on this choice. treetime-0.8.6/docs/source/vcf_utils.rst000066400000000000000000000002121417362145000203210ustar00rootroot00000000000000************* VCF Utilities ************* .. autofunction:: treetime.vcf_utils.read_vcf .. autofunction:: treetime.vcf_utils.write_vcf treetime-0.8.6/setup.py000066400000000000000000000032271417362145000150640ustar00rootroot00000000000000import os from setuptools import setup def get_version(): v = "0.0.0" with open('treetime/__init__.py') as ifile: for line in ifile: if line[:7]=='version': v = line.split('=')[-1].strip()[1:-1] break return v with open("README.md", "r") as fh: long_description = fh.read() setup( name = "phylo-treetime", version = get_version(), author = "Pavel Sagulenko, Emma Hodcroft, and Richard Neher", author_email = "richard.neher@unibas.ch", description = ("Maximum-likelihood phylodynamic inference"), long_description = long_description, long_description_content_type="text/markdown", license = "MIT", keywords = "Time-stamped phylogenies, phylogeography, virus evolution", url = "https://github.com/neherlab/treetime", packages=['treetime'], install_requires = [ 'biopython>=1.67,!=1.77,!=1.78', 'numpy>=1.10.4', 'pandas>=0.17.1', 'scipy>=0.16.1' ], extras_require = { ':python_version < "3.6"':['matplotlib>=2.0, ==2.*'], ':python_version >= "3.6"':['matplotlib>=2.0'], }, classifiers=[ "Development Status :: 3 - Alpha", "Topic :: Scientific/Engineering :: Bio-Informatics", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8" ], scripts=['bin/treetime'] ) treetime-0.8.6/test/000077500000000000000000000000001417362145000143255ustar00rootroot00000000000000treetime-0.8.6/test/command_line_tests.sh000066400000000000000000000036541417362145000205400ustar00rootroot00000000000000all_tests=0 treetime homoplasy --aln treetime_examples/data/h3n2_na/h3n2_na_20.fasta --tree treetime_examples/data/h3n2_na/h3n2_na_20.nwk retval="$?" if [ "$retval" == 0 ]; then echo "homoplasy_scanning ok" else echo "homoplasy_scanning failed $retval" ((all_tests++)) fi treetime ancestral --aln treetime_examples/data/h3n2_na/h3n2_na_20.phylip --tree treetime_examples/data/h3n2_na/h3n2_na_20.nwk retval="$?" if [ "$retval" == 0 ]; then echo "ancestral_reconstruction ok" else ((all_tests++)) echo "ancestral_reconstruction failed $retval" fi treetime clock --tree treetime_examples/data/h3n2_na/h3n2_na_20.nex --dates treetime_examples/data/h3n2_na/h3n2_na_20.metadata.csv --sequence-length 1400 retval="$?" if [ "$retval" == 0 ]; then echo "temporal_signal ok" else ((all_tests++)) echo "temporal_signal failed $retval" fi treetime --aln treetime_examples/data/h3n2_na/h3n2_na_20.fasta --tree treetime_examples/data/h3n2_na/h3n2_na_20.nwk --dates treetime_examples/data/h3n2_na/h3n2_na_20.metadata.csv retval="$?" if [ "$retval" == 0 ]; then echo "timetree_inference ok" else ((all_tests++)) echo "timetree_inference failed $retval" fi treetime mugration --tree treetime_examples/data/zika/zika.nwk --states treetime_examples/data/zika/zika.metadata.csv --weights treetime_examples/data/zika/zika.country_weights.csv --attribute country retval="$?" if [ "$retval" == 0 ]; then echo "mugration ok" else ((all_tests++)) echo "mugration failed $retval" fi treetime --aln treetime_examples/data/tb/lee_2015.vcf.gz --vcf-reference treetime_examples/data/tb/tb_ref.fasta --tree treetime_examples/data/tb/lee_2015.nwk --clock-rate 1e-7 --dates treetime_examples/data/tb/lee_2015.metadata.tsv retval="$?" if [ "$retval" == 0 ]; then echo "timetree_inference on vcf data ok" else ((all_tests++)) echo "timetree_inference on vcf data failed $retval" fi if [ "$all_tests" == 0 ];then echo "All tests passed" exit 0 else exit "$all_tests" fi treetime-0.8.6/test/run_tests.py000066400000000000000000000003071417362145000167250ustar00rootroot00000000000000from test_treetime import * import_short_test() test_GTR() test_ancestral() test_seq_joint_reconstruction_correct() test_seq_joint_lh_is_max() print('\n\n TEST HAVE FINISHED SUCCESSFULLY\n\n') treetime-0.8.6/test/test_treetime.py000066400000000000000000000233461417362145000175640ustar00rootroot00000000000000from __future__ import print_function from io import StringIO # Tests def test_import_short(): print("testing short imports") from treetime import GTR from treetime import TreeTime from treetime import TreeAnc from treetime import seq_utils def test_GTR(): from treetime import GTR import numpy as np for model in ['Jukes-Cantor']: print('testing GTR, model:',model) myGTR = GTR.standard(model, alphabet='nuc') print('Frequency sum:', myGTR.Pi.sum()) assert (myGTR.Pi.sum() - 1.0)**2<1e-14 # the matrix is the rate matrix assert abs(myGTR.Q.sum(0)).sum() < 1e-14 # eigendecomposition is made correctly n_states = myGTR.v.shape[0] assert abs((myGTR.v.dot(myGTR.v_inv) - np.identity(n_states)).sum() < 1e-10) assert np.abs(myGTR.v.sum()) > 1e-10 # **and** v is not zero def test_ancestral(): import os from Bio import AlignIO import numpy as np from treetime import TreeAnc, GTR root_dir = os.path.dirname(os.path.realpath(__file__)) fasta = str(os.path.join(root_dir, 'treetime_examples/data/h3n2_na/h3n2_na_20.fasta')) nwk = str(os.path.join(root_dir, 'treetime_examples/data/h3n2_na/h3n2_na_20.nwk')) for marginal in [True, False]: print('loading flu example') t = TreeAnc(gtr='Jukes-Cantor', tree=nwk, aln=fasta) print('ancestral reconstruction' + ("marginal" if marginal else "joint")) t.reconstruct_anc(method='ml', marginal=marginal) assert t.data.compressed_to_full_sequence(t.tree.root.cseq, as_string=True) == 'ATGAATCCAAATCAAAAGATAATAACGATTGGCTCTGTTTCTCTCACCATTTCCACAATATGCTTCTTCATGCAAATTGCCATCTTGATAACTACTGTAACATTGCATTTCAAGCAATATGAATTCAACTCCCCCCCAAACAACCAAGTGATGCTGTGTGAACCAACAATAATAGAAAGAAACATAACAGAGATAGTGTATCTGACCAACACCACCATAGAGAAGGAAATATGCCCCAAACCAGCAGAATACAGAAATTGGTCAAAACCGCAATGTGGCATTACAGGATTTGCACCTTTCTCTAAGGACAATTCGATTAGGCTTTCCGCTGGTGGGGACATCTGGGTGACAAGAGAACCTTATGTGTCATGCGATCCTGACAAGTGTTATCAATTTGCCCTTGGACAGGGAACAACACTAAACAACGTGCATTCAAATAACACAGTACGTGATAGGACCCCTTATCGGACTCTATTGATGAATGAGTTGGGTGTTCCTTTTCATCTGGGGACCAAGCAAGTGTGCATAGCATGGTCCAGCTCAAGTTGTCACGATGGAAAAGCATGGCTGCATGTTTGTATAACGGGGGATGATAAAAATGCAACTGCTAGCTTCATTTACAATGGGAGGCTTGTAGATAGTGTTGTTTCATGGTCCAAAGAAATTCTCAGGACCCAGGAGTCAGAATGCGTTTGTATCAATGGAACTTGTACAGTAGTAATGACTGATGGAAGTGCTTCAGGAAAAGCTGATACTAAAATACTATTCATTGAGGAGGGGAAAATCGTTCATACTAGCACATTGTCAGGAAGTGCTCAGCATGTCGAAGAGTGCTCTTGCTATCCTCGATATCCTGGTGTCAGATGTGTCTGCAGAGACAACTGGAAAGGCTCCAATCGGCCCATCGTAGATATAAACATAAAGGATCATAGCATTGTTTCCAGTTATGTGTGTTCAGGACTTGTTGGAGACACACCCAGAAAAAACGACAGCTCCAGCAGTAGCCATTGTTTGGATCCTAACAATGAAGAAGGTGGTCATGGAGTGAAAGGCTGGGCCTTTGATGATGGAAATGACGTGTGGATGGGAAGAACAATCAACGAGACGTCACGCTTAGGGTATGAAACCTTCAAAGTCATTGAAGGCTGGTCCAACCCTAAGTCCAAATTGCAGATAAATAGGCAAGTCATAGTTGACAGAGGTGATAGGTCCGGTTATTCTGGTATTTTCTCTGTTGAAGGCAAAAGCTGCATCAATCGGTGCTTTTATGTGGAGTTGATTAGGGGAAGAAAAGAGGAAACTGAAGTCTTGTGGACCTCAAACAGTATTGTTGTGTTTTGTGGCACCTCAGGTACATATGGAACAGGCTCATGGCCTGATGGGGCGGACCTCAATCTCATGCCTATA' print('testing LH normalization') from Bio import Phylo,AlignIO tiny_tree = Phylo.read(StringIO("((A:0.60100000009,B:0.3010000009):0.1,C:0.2):0.001;"), 'newick') tiny_aln = AlignIO.read(StringIO(">A\nAAAAAAAAAAAAAAAACCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTT\n" ">B\nAAAACCCCGGGGTTTTAAAACCCCGGGGTTTTAAAACCCCGGGGTTTTAAAACCCCGGGGTTTT\n" ">C\nACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGTACGT\n"), 'fasta') mygtr = GTR.custom(alphabet = np.array(['A', 'C', 'G', 'T']), pi = np.array([0.9, 0.06, 0.02, 0.02]), W=np.ones((4,4))) t = TreeAnc(gtr=mygtr, tree=tiny_tree, aln=tiny_aln) t.reconstruct_anc('ml', marginal=True, debug=True) lhsum = np.exp(t.sequence_LH(pos=np.arange(4**3))).sum() print (lhsum) assert(np.abs(lhsum-1.0)<1e-6) t.optimize_branch_len() def test_seq_joint_reconstruction_correct(): """ evolve the random sequence, get the alignment at the leaf nodes. Reconstruct the sequences of the internal nodes (joint) and prove the reconstruction is correct. In addition, compute the likelihood of the particular realization of the sequences on the tree and prove that this likelihood is exactly the same as calculated in the joint reconstruction """ from treetime import TreeAnc, GTR from treetime import seq_utils from Bio import Phylo, AlignIO import numpy as np from collections import defaultdict def exclusion(a, b): """ Intersection of two lists """ return list(set(a) - set(b)) tiny_tree = Phylo.read(StringIO("((A:.060,B:.01200)C:.020,D:.0050)E:.004;"), 'newick') mygtr = GTR.custom(alphabet = np.array(['A', 'C', 'G', 'T']), pi = np.array([0.15, 0.95, 0.05, 0.3]), W=np.ones((4,4))) seq = np.random.choice(mygtr.alphabet, p=mygtr.Pi, size=400) myTree = TreeAnc(gtr=mygtr, tree=tiny_tree, aln=None, verbose=4) # simulate evolution, set resulting sequence as ref_seq tree = myTree.tree seq_len = 400 tree.root.ref_seq = np.random.choice(mygtr.alphabet, p=mygtr.Pi, size=seq_len) print ("Root sequence: " + ''.join(tree.root.ref_seq.astype('U'))) mutation_list = defaultdict(list) for node in tree.find_clades(): for c in node.clades: c.up = node if hasattr(node, 'ref_seq'): continue t = node.branch_length p = mygtr.evolve( seq_utils.seq2prof(node.up.ref_seq, mygtr.profile_map), t) # normalize profile p=(p.T/p.sum(axis=1)).T # sample mutations randomly ref_seq_idxs = np.array([int(np.random.choice(np.arange(p.shape[1]), p=p[k])) for k in np.arange(p.shape[0])]) node.ref_seq = np.array([mygtr.alphabet[k] for k in ref_seq_idxs]) node.ref_mutations = [(anc, pos, der) for pos, (anc, der) in enumerate(zip(node.up.ref_seq, node.ref_seq)) if anc!=der] for anc, pos, der in node.ref_mutations: mutation_list[pos].append((node.name, anc, der)) print (node.name, len(node.ref_mutations), node.ref_mutations) # set as the starting sequences to the terminal nodes: alnstr = "" i = 1 for leaf in tree.get_terminals(): alnstr += ">" + leaf.name + "\n" + ''.join(leaf.ref_seq.astype('U')) + '\n' i += 1 print (alnstr) myTree.aln = AlignIO.read(StringIO(alnstr), 'fasta') # reconstruct ancestral sequences: myTree.infer_ancestral_sequences(final=True, debug=True, reconstruct_leaves=True) diff_count = 0 mut_count = 0 for node in myTree.tree.find_clades(): if node.up is not None: mut_count += len(node.ref_mutations) diff_count += np.sum(node.sequence != node.ref_seq) if np.sum(node.sequence != node.ref_seq): print("%s: True sequence does not equal inferred sequence. parent %s"%(node.name, node.up.name)) else: print("%s: True sequence equals inferred sequence. parent %s"%(node.name, node.up.name)) # the assignment of mutations to the root node is probabilistic. Hence some differences are expected assert diff_count/seq_len<2*(1.0*mut_count/seq_len)**2 # prove the likelihood value calculation is correct LH = myTree.ancestral_likelihood() LH_p = (myTree.tree.sequence_LH) print ("Difference between reference and inferred LH:", (LH - LH_p).sum()) assert ((LH - LH_p).sum())<1e-9 return myTree def test_seq_joint_lh_is_max(): """ For a single-char sequence, perform joint ancestral sequence reconstruction and prove that this reconstruction is the most likely one by comparing to all possible reconstruction variants (brute-force). """ from treetime import TreeAnc, GTR from treetime import seq_utils from Bio import Phylo, AlignIO import numpy as np mygtr = GTR.custom(alphabet = np.array(['A', 'C', 'G', 'T']), pi = np.array([0.91, 0.05, 0.02, 0.02]), W=np.ones((4,4))) tiny_tree = Phylo.read(StringIO("((A:.0060,B:.30)C:.030,D:.020)E:.004;"), 'newick') #terminal node sequences (single nuc) A_char = 'A' B_char = 'C' D_char = 'G' # for brute-force, expand them to the strings A_seq = ''.join(np.repeat(A_char,16)) B_seq = ''.join(np.repeat(B_char,16)) D_seq = ''.join(np.repeat(D_char,16)) # def ref_lh(): """ reference likelihood - LH values for all possible variants of the internal node sequences """ tiny_aln = AlignIO.read(StringIO(">A\n" + A_seq + "\n" ">B\n" + B_seq + "\n" ">D\n" + D_seq + "\n" ">C\nAAAACCCCGGGGTTTT\n" ">E\nACGTACGTACGTACGT\n"), 'fasta') myTree = TreeAnc(gtr=mygtr, tree = tiny_tree, aln =tiny_aln, verbose = 4) logLH_ref = myTree.ancestral_likelihood() return logLH_ref # def real_lh(): """ Likelihood of the sequences calculated by the joint ancestral sequence reconstruction """ tiny_aln_1 = AlignIO.read(StringIO(">A\n"+A_char+"\n" ">B\n"+B_char+"\n" ">D\n"+D_char+"\n"), 'fasta') myTree_1 = TreeAnc(gtr=mygtr, tree = tiny_tree, aln=tiny_aln_1, verbose = 4) myTree_1.reconstruct_anc(method='ml', marginal=False, debug=True) logLH = myTree_1.tree.sequence_LH return logLH ref = ref_lh() real = real_lh() print(abs(ref.max() - real) ) # joint chooses the most likely realization of the tree assert(abs(ref.max() - real) < 1e-10) return ref, real treetime-0.8.6/treetime/000077500000000000000000000000001417362145000151645ustar00rootroot00000000000000treetime-0.8.6/treetime/__init__.py000066400000000000000000000015611417362145000173000ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import version="0.8.6" class TreeTimeError(Exception): """TreeTimeError class""" pass class MissingDataError(TreeTimeError): """MissingDataError class raised when tree or alignment are missing""" pass class UnknownMethodError(TreeTimeError): """MissingDataError class raised when tree or alignment are missing""" pass class NotReadyError(TreeTimeError): """NotReadyError class raised when results are requested before inference""" pass from .treeanc import TreeAnc from .treetime import TreeTime, plot_vs_years from .clock_tree import ClockTree from .treetime import ttconf as treetime_conf from .gtr import GTR from .gtr_site_specific import GTR_site_specific from .merger_models import Coalescent from .treeregression import TreeRegression from .argument_parser import make_parser treetime-0.8.6/treetime/aa_models.py000066400000000000000000000107571417362145000174740ustar00rootroot00000000000000from __future__ import division, print_function, absolute_import, absolute_import import numpy as np from .seq_utils import alphabets def JTT92(mu=1.0): from .gtr import GTR # stationary concentrations: pis = np.array([ 0.07674789, 0.05169087, 0.04264509, 0.05154407, 0.01980301, 0.04075195, 0.06182989, 0.07315199, 0.02294399, 0.05376110, 0.09190390, 0.05867583, 0.02382594, 0.04012589, 0.05090097, 0.06876503, 0.05856501, 0.01426057, 0.03210196, 0.06600504]) # attempt matrix (FIXME) Q = np.array([ [-1.247831,0.044229,0.041179,0.061769,0.042704,0.043467,0.08007,0.136501,0.02059,0.027453,0.022877,0.02669,0.041179,0.011439,0.14794,0.288253,0.362223,0.006863,0.008388,0.227247 ], [0.029789,-1.025965,0.023112,0.008218,0.058038,0.159218,0.014895,0.070364,0.168463,0.011299,0.019517,0.33179,0.022599,0.002568,0.038007,0.051874,0.032871,0.064714,0.010272,0.008731 ], [0.022881,0.019068,-1.280568,0.223727,0.014407,0.03644,0.024576,0.034322,0.165676,0.019915,0.005085,0.11144,0.012712,0.004237,0.006356,0.213134,0.098304,0.00339,0.029661,0.00678 ], [0.041484,0.008194,0.270413,-1.044903,0.005121,0.025095,0.392816,0.066579,0.05736,0.005634,0.003585,0.013316,0.007682,0.002049,0.007682,0.030217,0.019462,0.002049,0.023559,0.015877 ], [0.011019,0.022234,0.00669,0.001968,-0.56571,0.001771,0.000984,0.011609,0.013577,0.003345,0.004526,0.001377,0.0061,0.015348,0.002755,0.043878,0.008264,0.022628,0.041124,0.012199 ], [0.02308,0.125524,0.034823,0.019841,0.003644,-1.04415,0.130788,0.010528,0.241735,0.003644,0.029154,0.118235,0.017411,0.00162,0.066406,0.021461,0.020651,0.007288,0.009718,0.008098 ], [0.064507,0.017816,0.035632,0.471205,0.003072,0.198435,-0.944343,0.073107,0.015973,0.007372,0.005529,0.111197,0.011058,0.003072,0.011058,0.01843,0.019659,0.006143,0.0043,0.027646 ], [0.130105,0.099578,0.058874,0.09449,0.042884,0.018898,0.086495,-0.647831,0.016717,0.004361,0.004361,0.019625,0.010176,0.003634,0.017444,0.146096,0.023986,0.039976,0.005815,0.034162 ], [0.006155,0.074775,0.089138,0.025533,0.01573,0.1361,0.005927,0.005243,-1.135695,0.003648,0.012767,0.010259,0.007523,0.009119,0.026217,0.016642,0.010487,0.001824,0.130629,0.002508 ], [0.01923,0.011752,0.025106,0.005876,0.009081,0.004808,0.00641,0.003205,0.008547,-1.273602,0.122326,0.011218,0.25587,0.047542,0.005342,0.021367,0.130873,0.004808,0.017094,0.513342 ], [0.027395,0.0347,0.010958,0.006392,0.021003,0.065748,0.008219,0.005479,0.051137,0.209115,-0.668139,0.012784,0.354309,0.226465,0.093143,0.053877,0.022829,0.047485,0.021916,0.16437 ], [0.020405,0.376625,0.153332,0.015158,0.004081,0.170239,0.105525,0.015741,0.026235,0.012243,0.008162,-0.900734,0.037896,0.002332,0.012243,0.027401,0.06005,0.00583,0.004664,0.008162 ], [0.012784,0.010416,0.007102,0.003551,0.007339,0.01018,0.004261,0.003314,0.007812,0.113397,0.091854,0.015388,-1.182051,0.01018,0.003788,0.006865,0.053503,0.005682,0.004261,0.076466 ], [0.00598,0.001993,0.003987,0.001595,0.031098,0.001595,0.001993,0.001993,0.015948,0.035484,0.098877,0.001595,0.017144,-0.637182,0.006778,0.03668,0.004784,0.021131,0.213701,0.024719 ], [0.098117,0.037426,0.007586,0.007586,0.007081,0.082944,0.009104,0.012138,0.058162,0.005058,0.051587,0.010621,0.008092,0.008598,-0.727675,0.144141,0.059679,0.003035,0.005058,0.011632 ], [0.258271,0.069009,0.343678,0.040312,0.152366,0.036213,0.020498,0.137334,0.049878,0.02733,0.040312,0.032113,0.019814,0.06286,0.194728,-1.447863,0.325913,0.023914,0.043045,0.025964 ], [0.276406,0.037242,0.135003,0.022112,0.02444,0.029677,0.018621,0.019203,0.026768,0.142567,0.014548,0.059936,0.131511,0.006983,0.068665,0.27757,-1.335389,0.006983,0.01222,0.065174 ], [0.001275,0.017854,0.001134,0.000567,0.016295,0.002551,0.001417,0.007793,0.001134,0.001275,0.007368,0.001417,0.003401,0.00751,0.00085,0.004959,0.0017,-0.312785,0.010061,0.003542 ], [0.003509,0.006379,0.022328,0.014673,0.066664,0.007655,0.002233,0.002552,0.182769,0.010207,0.007655,0.002552,0.005741,0.170967,0.00319,0.020095,0.006698,0.022647,-0.605978,0.005103 ], [0.195438,0.011149,0.010493,0.020331,0.040662,0.013117,0.029512,0.030824,0.007214,0.630254,0.11805,0.009182,0.211834,0.040662,0.015084,0.024922,0.073453,0.016396,0.010493,-1.241722] ]) Spis = np.sqrt(pis[None, :] / pis[:,None]) W = Q * Spis gtr = GTR(alphabet=alphabets['aa_nogap']) gtr.assign_rates(mu=mu, pi=pis, W=W) return gtr treetime-0.8.6/treetime/argument_parser.py000066400000000000000000000426671417362145000207530ustar00rootroot00000000000000#!/usr/bin/env python from __future__ import print_function, division, absolute_import import sys, argparse, os from .wrappers import ancestral_reconstruction, mugration, scan_homoplasies, timetree, estimate_clock_model from . import version py2 = sys.version_info.major==2 def set_default_subparser(self, name, args=None, positional_args=0): """default subparser selection. Call after setup, just before parse_args() name: is the name of the subparser to call by default args: if set is the argument list handed to parse_args() https://stackoverflow.com/questions/6365601/default-sub-command-or-handling-no-sub-command-with-argparse """ subparser_found = False if len(sys.argv)==1: sys.argv.append('-h') else: for x in self._subparsers._actions: if not isinstance(x, argparse._SubParsersAction): continue for sp_name in x._name_parser_map.keys(): if sp_name in sys.argv[1:]: subparser_found = True if not subparser_found: # insert default subcommand in first position if args is None: sys.argv.insert(1, name) else: args.insert(1, name) if py2: argparse.ArgumentParser.set_default_subparser = set_default_subparser treetime_description = \ "TreeTime: Maximum Likelihood Phylodynamics\n\n" subcommand_description = \ "In addition, TreeTime implements several sub-commands:\n\n"\ "\t ancestral\tinfer ancestral sequences maximizing the joint or marginal likelihood.\n"\ "\t homoplasy\tanalyze patterns of recurrent mutations aka homoplasies.\n"\ "\t clock\t\testimate molecular clock parameters and reroot the tree.\n"\ "\t mugration\tmap discrete character such as host or country to the tree.\n\n"\ "(note that 'tt' is a default subcommand in python2 that doesn't need to be specified).\n"\ "To print a description and argument list of the individual sub-commands, type:\n\n"\ "\t treetime -h\n\n" ref_msg = \ "If you use results from treetime in a publication, please cite:"\ "\n\n\tSagulenko et al. TreeTime: Maximum-likelihood phylodynamic analysis"\ "\n\tVirus Evolution, vol 4, https://academic.oup.com/ve/article/4/1/vex042/4794731\n" timetree_description=\ "TreeTime infers a time scaled phylogeny given a tree topology, an alignment, "\ "and tip dates. Reconstructs ancestral sequences and infers a molecular clock tree. "\ "TreeTime will reroot the tree and resolve polytomies by default. "\ "In addition, treetime will infer ancestral sequences and a GTR substitution model. "\ "Inferred mutations are included as comments in the output tree.\n\n" gtr_description = "GTR model to use. '--gtr infer' will infer a model "\ "from the data. Alternatively, specify the model type. If the specified model "\ "requires additional options, use '--gtr-params' to specify those." gtr_params_description = "GTR parameters for the model specified by "\ "the --gtr argument. The parameters should be feed as 'key=value' "\ "list of parameters. Example: '--gtr K80 --gtr-params kappa=0.2 "\ "pis=0.25,0.25,0.25,0.25'. See the exact definitions of the "\ "parameters in the GTR creation methods in treetime/nuc_models.py "\ "or treetime/aa_models.py" reroot_description = "Reroot the tree using root-to-tip regression. Valid choices are "\ "'min_dev', 'least-squares', and 'oldest'. 'least-squares' adjusts the root to "\ "minimize residuals of the root-to-tip vs sampling time regression, " \ "'min_dev' minimizes variance of root-to-tip distances. "\ "'least-squares' can be combined with --covariation to account for shared ancestry. "\ "Alternatively, you can specify a node name or a list of node names "\ "to be used as outgroup or use 'oldest' to reroot to the oldest node. "\ "By default, TreeTime will reroot using 'least-squares'. "\ "Use --keep-root to keep the current root." tree_description = "Name of file containing the tree in "\ "newick, nexus, or phylip format. If none is provided, "\ "treetime will attempt to build a tree from the alignment "\ "using fasttree, iqtree, or raxml (assuming they are installed)" aln_description = "alignment file (fasta)" dates_description = "csv file with dates for nodes with 'node_name, date' where date is float (as in 2012.15)" coalescent_description = \ "coalescent time scale -- sensible values are on the order of the average "\ "hamming distance of contemporaneous sequences. In addition, 'opt' "\ "'skyline' are valid options and estimate a constant coalescent rate "\ "or a piecewise linear coalescent rate history" ancestral_description = \ "Reconstructs ancestral sequences and maps mutations to the tree. "\ "The output consists of a file 'ancestral.fasta' with ancestral sequences "\ "and a tree 'annotated_tree.nexus' with mutations added as comments "\ "like A45G,G136T,..., number in SNPs used 1-based index by default. "\ "The inferred GTR model is written to stdout." homoplasy_description = \ "Reconstructs ancestral sequences and maps mutations to the tree. "\ "The tree is then scanned for homoplasies. An excess number of homoplasies "\ "might suggest contamination, recombination, culture adaptation or similar." mugration_description = \ "Reconstructs discrete ancestral states, for example "\ "geographic location, host, or similar. In addition to ancestral states, "\ "a GTR model of state transitions is inferred." def add_seq_len_aln_group(parser): parser.add_argument('--sequence-length', type=int, help="length of the sequence, " "used to calculate expected variation in branch length. " "Not required if alignment is provided.") add_aln_group(parser, required=False) # seq_group_ex.add_argument('--aln', type=str, help=aln_description) def add_aln_group(parser, required=True): parser.add_argument('--aln', required=required, type=str, help=aln_description) parser.add_argument('--vcf-reference', type=str, help='only for vcf input: fasta file of the sequence the VCF was mapped to.') def add_reroot_group(parser): parser.add_argument('--clock-filter', type=float, default=3, help="ignore tips that don't follow a loose clock, " "'clock-filter=number of interquartile ranges from regression'. " "Default=3.0, set to 0 to switch off.") reroot_group = parser.add_mutually_exclusive_group() reroot_group.add_argument('--reroot', nargs='+', default='best', help=reroot_description) reroot_group.add_argument('--keep-root', required = False, action="store_true", default=False, help ="don't reroot the tree. Otherwise, reroot to minimize the " "the residual of the regression of root-to-tip distance and sampling time") parser.add_argument('--tip-slack', type=float, default=3, help="excess variance associated with terminal nodes accounting for " " overdisperion of the molecular clock") parser.add_argument('--covariation', action='store_true', help="Account for covariation when estimating rates " "or rerooting using root-to-tip regression, default False.") def add_gtr_arguments(parser): parser.add_argument('--gtr', default='infer', help=gtr_description) parser.add_argument('--gtr-params', nargs='+', help=gtr_params_description) parser.add_argument('--aa', action='store_true', help="use aminoacid alphabet") def add_anc_arguments(parser): parser.add_argument('--keep-overhangs', default = False, action='store_true', help='do not fill terminal gaps') parser.add_argument('--zero-based', default = False, action='store_true', help='zero based mutation indexing') parser.add_argument('--reconstruct-tip-states', default = False, action='store_true', help='overwrite ambiguous states on tips with the most likely inferred state') parser.add_argument('--report-ambiguous', default=False, action="store_true", help='include transitions involving ambiguous states') def add_common_args(parser): parser.add_argument('--verbose', default=1, type=int, help='verbosity of output 0-6') parser.add_argument('--outdir', type=str, help='directory to write the output to') def make_parser(): parser = argparse.ArgumentParser(description = "", usage=treetime_description) subparsers = parser.add_subparsers() if py2: t_parser = subparsers.add_parser('tt', description=timetree_description) else: t_parser = parser t_parser.add_argument('--tree', type=str, help=tree_description) add_seq_len_aln_group(t_parser) t_parser.add_argument('--dates', type=str, help=dates_description) t_parser.add_argument('--name-column', type=str, help="label of the column to be used as taxon name") t_parser.add_argument('--date-column', type=str, help="label of the column to be used as sampling date") add_reroot_group(t_parser) add_gtr_arguments(t_parser) t_parser.add_argument('--clock-rate', type=float, help="if specified, the rate of the molecular clock won't be optimized.") t_parser.add_argument('--clock-std-dev', type=float, help="standard deviation of the provided clock rate estimate") t_parser.add_argument('--branch-length-mode', default='auto', type=str, choices=['auto', 'input', 'joint', 'marginal'], help="If set to 'input', the provided branch length will be used without modification. " "Note that branch lengths optimized by treetime are only accurate at short evolutionary distances.") t_parser.add_argument('--confidence', action='store_true', help="estimate confidence intervals of divergence times.") t_parser.add_argument('--keep-polytomies', default=False, action='store_true', help="Don't resolve polytomies using temporal information.") t_parser.add_argument('--relax',nargs=2, type=float, help='use an autocorrelated molecular clock. Strength of the gaussian priors on' ' branch specific rate deviation and the coupling of parent and offspring' ' rates can be specified e.g. as --relax 1.0 0.5. Values around 1.0 correspond' ' to weak priors, larger values constrain rate deviations more strongly.' ' Coupling 0 (--relax 1.0 0) corresponds to an un-correlated clock.') t_parser.add_argument('--max-iter', default=2, type=int, help='maximal number of iterations the inference cycle is run. Note that for polytomy resolution and coalescence models max_iter should be at least 2') t_parser.add_argument('--coalescent', default="0.0", type=str, help=coalescent_description) t_parser.add_argument('--n-skyline', default="20", type=int, help="number of grid points in skyline coalescent model") t_parser.add_argument('--plot-tree', default="timetree.pdf", help = "filename to save the plot to. Suffix will determine format" " (choices pdf, png, svg, default=pdf)") t_parser.add_argument('--plot-rtt', default="root_to_tip_regression.pdf", help = "filename to save the plot to. Suffix will determine format" " (choices pdf, png, svg, default=pdf)") t_parser.add_argument('--tip-labels', action='store_true', help = "add tip labels (default for small trees with <30 leaves)") t_parser.add_argument('--no-tip-labels', action='store_true', help = "don't show tip labels (default for small trees with >=30 leaves)") add_anc_arguments(t_parser) add_common_args(t_parser) t_parser.add_argument("--version", action="version", version="%(prog)s " + version) def toplevel(params): if (params.aln or params.tree) and params.dates: timetree(params) else: print(treetime_description+timetree_description+subcommand_description+ "'--dates' and '--aln' or '--tree' are REQUIRED inputs, type 'treetime -h' for a full list of arguments.\n") t_parser.set_defaults(func=toplevel) ## HOMOPLASY SCANNER h_parser = subparsers.add_parser('homoplasy', description=homoplasy_description) add_aln_group(h_parser) h_parser.add_argument('--tree', type = str, help=tree_description) h_parser.add_argument('--const', type = int, default=0, help ="number of constant sites not included in alignment") h_parser.add_argument('--rescale', type = float, default=1.0, help ="rescale branch lengths") h_parser.add_argument('--detailed', required = False, action="store_true", help ="generate a more detailed report") add_gtr_arguments(h_parser) h_parser.add_argument('--zero-based', default = False, action='store_true', help='zero based mutation indexing') h_parser.add_argument('-n', default = 10, type=int, help='number of mutations/nodes that are printed to screen') h_parser.add_argument('--drms', type=str, help='TSV file containing DRM info. columns headers: GENOMIC_POSITION, ALT_BASE, DRUG, GENE, SUBSTITUTION') add_common_args(h_parser) h_parser.set_defaults(func=scan_homoplasies) ## ANCESTRAL RECONSTRUCTION a_parser = subparsers.add_parser('ancestral', description=ancestral_description) add_aln_group(a_parser) a_parser.add_argument('--tree', type=str, help=tree_description) add_gtr_arguments(a_parser) a_parser.add_argument('--marginal', default=False, action="store_true", help ="marginal reconstruction of ancestral sequences") add_anc_arguments(a_parser) add_common_args(a_parser) a_parser.set_defaults(func=ancestral_reconstruction) ## MUGRATION m_parser = subparsers.add_parser('mugration', description=mugration_description) m_parser.add_argument('--tree', required = True, type=str, help=tree_description) m_parser.add_argument('--name-column', type=str, help="label of the column to be used as taxon name") m_parser.add_argument('--attribute', type=str, help ="attribute to reconstruct, e.g. country") m_parser.add_argument('--states', required = True, type=str, help ="csv or tsv file with discrete characters." "\n#name,country,continent\ntaxon1,micronesia,oceania\n...") m_parser.add_argument('--weights', type=str, help="csv or tsv file with probabilities of that a randomly sampled " "sequence at equilibrium has a particular state. E.g. population of different continents or countries. E.g.:" "\n#country,weight\nmicronesia,0.1\n...") m_parser.add_argument('--confidence', action="store_true", help="output confidence of mugration inference") m_parser.add_argument('--pc', type=float, default=1.0, help ="pseudo-counts higher numbers will results in 'flatter' models") m_parser.add_argument('--missing-data', type=str, default='?', help ="string indicating missing data") m_parser.add_argument('--sampling-bias-correction', type=float, help='a rough estimate of how many more events would have been observed' ' if sequences represented an even sample. This should be' ' roughly the (1-sum_i p_i^2)/(1-sum_i t_i^2), where p_i' ' are the equilibrium frequencies and t_i are apparent ones.' '(or rather the time spent in a particular state on the tree)') add_common_args(m_parser) m_parser.set_defaults(func=mugration) ## CLOCKSIGNAL c_parser = subparsers.add_parser('clock', description="Calculates the root-to-tip regression and quantifies the 'clock-i-ness' of the tree. " "It will reroot the tree to maximize the clock-like " "signal and recalculate branch length unless run with --keep_root.") c_parser.add_argument('--tree', required=True, type=str, help=tree_description) c_parser.add_argument('--dates', required=True, type=str, help=dates_description) c_parser.add_argument('--date-column', type=str, help="label of the column to be used as sampling date") c_parser.add_argument('--name-column', type=str, help="label of the column to be used as taxon name") add_seq_len_aln_group(c_parser) add_reroot_group(c_parser) c_parser.add_argument('--allow-negative-rate', required = False, action="store_true", default=False, help="By default, rates are forced to be positive. For trees with little temporal " "signal it is advisable to remove this restriction to achieve essentially mid-point rooting.") c_parser.add_argument('--plot-rtt', default="root_to_tip_regression.pdf", help = "filename to save the plot to. Suffix will determine format" " (choices pdf, png, svg, default=pdf)") add_common_args(c_parser) c_parser.set_defaults(func=estimate_clock_model) # make a version subcommand v_parser = subparsers.add_parser('version', description='print version') v_parser.set_defaults(func=lambda x: print("treetime "+version)) ## call the relevant function and return if py2: parser.set_default_subparser('tt') return parser treetime-0.8.6/treetime/branch_len_interpolator.py000066400000000000000000000163341417362145000224420ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import import numpy as np from . import config as ttconf from .distribution import Distribution class BranchLenInterpolator (Distribution): """ This class defines the methods to manipulate the branch length probability distributions. """ def __init__(self, node, gtr, one_mutation=None, min_width=ttconf.MIN_INTEGRATION_PEAK, branch_length_mode = 'joint', pattern_multiplicity = None, n_grid_points = ttconf.BRANCH_GRID_SIZE, ignore_gaps=True): self.node = node self.gtr = gtr if node.up is None: raise Exception("Cannot create branch length interpolator for the root node.") self._gamma = 1.0 self._merger_cost = None if one_mutation is None: L = node.sequence.shape[0] one_mutation = 1.0/L # optimal branch length mutation_length = node.mutation_length if mutation_length < np.min((1e-5, 0.1*one_mutation)): # zero-length short_range = 10*one_mutation grid = np.concatenate([short_range*(np.linspace(0, 1.0 , n_grid_points//2)[:-1]), (short_range + (ttconf.MAX_BRANCH_LENGTH - short_range)*(np.linspace(0, 1.0 , n_grid_points//2+1)**2))]) else: # branch length is not zero sigma = mutation_length #np.max([self.average_branch_len, mutation_length]) # from zero to optimal branch length grid_left = mutation_length * (1 - np.linspace(1, 0.0, n_grid_points//3)**2.0) grid_zero = grid_left[1]*np.logspace(-20,0,6)[:5] grid_zero2 = grid_left[1]*np.linspace(0,1,10)[1:-1] # from optimal branch length to the right (--> 3*branch lengths), grid_right = mutation_length + (3*sigma*(np.linspace(0, 1, n_grid_points//3)**2)) # far to the right (3*branch length ---> MAX_LEN), very sparse far_grid = grid_right.max() + ttconf.MAX_BRANCH_LENGTH*np.linspace(0, 1, n_grid_points//3)**2 grid = np.concatenate((grid_zero,grid_zero2, grid_left,grid_right[1:],far_grid[1:])) grid.sort() # just for safety if branch_length_mode=='input': # APPROXIMATE HANDLING OF BRANCH LENGTH PROPAGATOR WHEN USING INPUT BRANCH LENGTH # branch length are estimated from as those maximizing the likelihood and the # sensitivity of the likelihood depends on the branch length (gets soft for long branches) # observed differences scale as p = p_0 (1-exp(-l/p_0)) where p_0 is the distance of random sequence # (3/4 for nucleotides, more like 0.9 for amino acids). The number of observable # substitutions fluctuates by dp = \sqrt{p(1-p)/L} which corresponds to fluctuation # in branch length of dp = dl exp(-l/p0). A Gaussian approximation for the branch length would # therefore have variance p(1-p)e^{2l/p0}/L. Substituting p results in # p_0(1-exp(-l/p0))(1-p_0(1-exp(-l/p0)))e^{2l/p0}/L which can be slightly rearranged to # p_0(exp(l/p0)-1)(exp(l/p0)-p_0(exp(l/p0)-1))/L p0 = 1.0-np.sum(self.gtr.Pi**2) # variance_scale = one_mutation*ttconf.OVER_DISPERSION if mutation_length<0.05: # for short branches, the number of mutations is poissonian. the prob of a branch to have l=mutation_length*L # mutations when its length is k, is therefor e^{-kL}(kL)^(Ll)/(Ll)!. Ignoring constants, the log is # -kL + lL\log(k) log_prob = np.array([ k - mutation_length*np.log(k+ttconf.MIN_BRANCH_LENGTH*one_mutation) for k in grid])/one_mutation log_prob -= log_prob.min() else: # make it a Gaussian #sigma_sq = (mutation_length+one_mutation)*variance_scale l = (mutation_length+one_mutation) nm_inv = np.exp(l/p0) sigma_sq = p0*(nm_inv-1)*(nm_inv - p0*(nm_inv-1))*one_mutation sigma = np.sqrt(sigma_sq+ttconf.MIN_BRANCH_LENGTH*one_mutation) log_prob = np.array(np.min([[ 0.5*(mutation_length-k)**2/sigma_sq for k in grid], 100 + np.abs([(mutation_length-k)/sigma for k in grid])], axis=0)) elif branch_length_mode=='marginal': if hasattr(node, 'profile_pair'): log_prob = np.array([-self.gtr.prob_t_profiles(node.profile_pair, pattern_multiplicity, k, return_log=True) for k in grid]) else: raise Exception("profile pairs need to be assigned to node") elif branch_length_mode=='joint': if not hasattr(node, 'branch_state'): raise Exception("branch state pairs need to be assigned to nodes") log_prob = np.array([-self.gtr.prob_t_compressed(node.branch_state['pair'], node.branch_state['multiplicity'], k, return_log=True) for k in grid]) else: raise Exception("unknown branch length mode! "+branch_length_mode) # tmp_dis = Distribution(grid, log_prob, is_log=True, kind='linear') # norm = tmp_dis.integrate(a=tmp_dis.xmin, b=tmp_dis.xmax, n=200) super(BranchLenInterpolator, self).__init__(grid, log_prob, is_log=True, kind='linear', min_width=min_width) @property def gamma(self): return self._gamma @gamma.setter def gamma(self, value): self._gamma = max(ttconf.TINY_NUMBER, value) @property def merger_cost(self): return self._merger_cost @merger_cost.setter def merger_cost(self, cost_func): self._merger_cost = cost_func self._peak_idx = np.argmin(self.__call__(self.x)) self._peak_pos = self.x[self._peak_idx] if self.kind=='linear': # can't mess like this with non-linear interpolation deltay = self.__call__(self.peak_pos) - self._peak_val self._peak_val += deltay self._func.y -= deltay @property def peak_pos(self): return super(BranchLenInterpolator,self).peak_pos/self.gamma @property def support(self): return self._support/self.gamma @property def fwhm(self): return super(BranchLenInterpolator,self).fwhm/self.gamma def __call__(self, x, tnode=None, multiplicity=None): res = super(BranchLenInterpolator, self).__call__(x*self.gamma) if self.merger_cost is not None: if tnode is None: tnode = self.node.time_before_present if multiplicity is None: multiplicity = len(self.node.up.clades) res += self.merger_cost(tnode, x, multiplicity=multiplicity) return res def __mul__(self, other): res = BranchLenInterpolator(super(BranchLenInterpolator, self).__mul__(other), gtr=self.gtr) return res treetime-0.8.6/treetime/clock_tree.py000066400000000000000000001261231417362145000176550ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import import numpy as np from . import config as ttconf from . import MissingDataError from .treeanc import TreeAnc from .utils import numeric_date, DateConversion, datestring_from_numeric from .distribution import Distribution from .branch_len_interpolator import BranchLenInterpolator from .node_interpolator import NodeInterpolator class ClockTree(TreeAnc): """ ClockTree is the main class to perform the optimization of the node positions given the temporal constraints of (some) leaves. The optimization workflow includes the inference of the ancestral sequences and branch length optimization using TreeAnc. After the optimization is done, the nodes with date-time information are arranged along the time axis, the conversion between the branch lengths units and the date-time units is determined. Then, for each internal node, we compute the the probability distribution of the node's location conditional on the fixed location of the leaves, which have temporal information. In the end, the most probable location of the internal nodes is converted to the most likely time of the internal nodes. """ def __init__(self, *args, dates=None, debug=False, real_dates=True, precision='auto', branch_length_mode='joint', use_covariation=False, **kwargs): """ ClockTree constructor Parameters ---------- dates : dict :code:`{leaf_name:leaf_date}` dictionary debug : bool If True, the debug mode is ON, which means no or less clean-up of obsolete parameters to control program execution in intermediate states. In debug mode, the python debugger is also allowed to interrupt program execution with intercative shell if an error occurs. real_dates : bool If True, some additional checks for the input dates sanity will be performed. precision : int Precision can be 0 (rough), 1 (default), 2 (fine), or 3 (ultra fine). This parameter determines the number of grid points that are used for the evaluation of the branch length interpolation objects. When not specified, this will default to 1 for short sequences and 2 for long sequences with L>1e4 branch_length_mode : str determines whether branch length are calculated using the 'joint' ML, 'marginal' ML, or branch length of the input tree ('input'). use_covariation : bool determines whether root-to-tip regression accounts for covariance introduced by shared ancestry. **kwargs: Key word argments needed to construct parent class (TreeAnc) """ super(ClockTree, self).__init__(*args, **kwargs) if dates is None: raise ValueError("ClockTree requires date constraints!") self.debug=debug self.real_dates = real_dates self.date_dict = dates self._date2dist = None # we do not know anything about the conversion self.tip_slack = ttconf.OVER_DISPERSION # extra number of mutations added # to terminal branches in covariance calculation self.rel_tol_prune = ttconf.REL_TOL_PRUNE self.rel_tol_refine = ttconf.REL_TOL_REFINE self.branch_length_mode = branch_length_mode self.clock_model=None self.use_covariation=use_covariation # if false, covariation will be ignored in rate estimates. self._set_precision(precision) self._assign_dates() def _assign_dates(self): """assign dates to nodes Returns ------- str success/error code """ if self.tree is None: raise MissingDataError("ClockTree._assign_dates: tree is not set, can't assign dates") bad_branch_counter = 0 for node in self.tree.find_clades(order='postorder'): if node.name in self.date_dict: tmp_date = self.date_dict[node.name] if np.isscalar(tmp_date) and np.isnan(tmp_date): self.logger("WARNING: ClockTree.init: node %s has a bad date: %s"%(node.name, str(tmp_date)), 2, warn=True) node.raw_date_constraint = None node.bad_branch = True else: try: tmp = np.mean(tmp_date) node.raw_date_constraint = tmp_date node.bad_branch = False except: self.logger("WARNING: ClockTree.init: node %s has a bad date: %s"%(node.name, str(tmp_date)), 2, warn=True) node.raw_date_constraint = None node.bad_branch = True else: # nodes without date contraints node.raw_date_constraint = None if node.is_terminal(): # Terminal branches without date constraints marked as 'bad' node.bad_branch = True else: # If all branches dowstream are 'bad', and there is no date constraint for # this node, the branch is marked as 'bad' node.bad_branch = np.all([x.bad_branch for x in node]) if node.is_terminal() and node.bad_branch: bad_branch_counter += 1 if bad_branch_counter>self.tree.count_terminals()-3: raise MissingDataError("ERROR: ALMOST NO VALID DATE CONSTRAINTS") self.logger("ClockTree._assign_dates: assigned date contraints to {} out of {} tips.".format(self.tree.count_terminals()-bad_branch_counter, self.tree.count_terminals()), 1) return ttconf.SUCCESS def _set_precision(self, precision): ''' function that sets precision to an (hopfully) reasonable guess based on the length of the sequence if not explicitly set ''' # if precision is explicitly specified, use it. if self.one_mutation: self.min_width = 10*self.one_mutation else: self.min_width = 0.001 if precision in [0,1,2,3]: self.precision=precision if self.one_mutation and self.one_mutation<1e-4 and precision<2: self.logger("ClockTree._set_precision: FOR LONG SEQUENCES (>1e4) precision>=2 IS RECOMMENDED." " precision %d was specified by the user"%precision, level=0) else: # otherwise adjust it depending on the minimal sensible branch length if self.one_mutation: if self.one_mutation>1e-4: self.precision=1 else: self.precision=2 else: self.precision=1 self.logger("ClockTree: Setting precision to level %s"%self.precision, 2) if self.precision==0: self.node_grid_points = ttconf.NODE_GRID_SIZE_ROUGH self.branch_grid_points = ttconf.BRANCH_GRID_SIZE_ROUGH self.n_integral = ttconf.N_INTEGRAL_ROUGH elif self.precision==2: self.node_grid_points = ttconf.NODE_GRID_SIZE_FINE self.branch_grid_points = ttconf.BRANCH_GRID_SIZE_FINE self.n_integral = ttconf.N_INTEGRAL_FINE elif self.precision==3: self.node_grid_points = ttconf.NODE_GRID_SIZE_ULTRA self.branch_grid_points = ttconf.BRANCH_GRID_SIZE_ULTRA self.n_integral = ttconf.N_INTEGRAL_ULTRA else: self.node_grid_points = ttconf.NODE_GRID_SIZE self.branch_grid_points = ttconf.BRANCH_GRID_SIZE self.n_integral = ttconf.N_INTEGRAL @property def date2dist(self): return self._date2dist @date2dist.setter def date2dist(self, val): if val is None: self._date2dist = None else: self.logger("ClockTree.date2dist: Setting new molecular clock." " rate=%.3e, R^2=%.4f"%(val.clock_rate, val.r_val**2), 2) self._date2dist = val def setup_TreeRegression(self, covariation=True): """instantiate a TreeRegression object and set its tip_value and branch_value function to defaults that are sensible for treetime instances. Parameters ---------- covariation : bool, optional account for phylogenetic covariation Returns ------- TreeRegression a TreeRegression instance with self.tree attached as tree. """ from .treeregression import TreeRegression tip_value = lambda x:np.mean(x.raw_date_constraint) if (x.is_terminal() and (x.bad_branch is False)) else None branch_value = lambda x:x.mutation_length if covariation: om = self.one_mutation branch_variance = lambda x:((x.clock_length if hasattr(x,'clock_length') else x.mutation_length) +(self.tip_slack**2*om if x.is_terminal() else 0.0))*om else: branch_variance = lambda x:1.0 if x.is_terminal() else 0.0 Treg = TreeRegression(self.tree, tip_value=tip_value, branch_value=branch_value, branch_variance=branch_variance) Treg.valid_confidence = covariation return Treg def get_clock_model(self, covariation=True, slope=None): Treg = self.setup_TreeRegression(covariation=covariation) self.clock_model = Treg.regression(slope=slope) if not np.isfinite(self.clock_model['slope']): raise ValueError("Clock rate estimation failed. If your data lacks temporal signal, please specify the rate explicitly!") if not Treg.valid_confidence or (slope is not None): if 'cov' in self.clock_model: self.clock_model.pop('cov') self.clock_model['valid_confidence']=False else: self.clock_model['valid_confidence']=True self.clock_model['r_val'] = Treg.explained_variance() self.date2dist = DateConversion.from_regression(self.clock_model) def init_date_constraints(self, ancestral_inference=False, clock_rate=None, **kwarks): """ Get the conversion coefficients between the dates and the branch lengths as they are used in ML computations. The conversion formula is assumed to be 'length = k*numdate + b'. For convenience, these coefficients as well as regression parameters are stored in the 'dates2dist' object. .. Note:: The tree must have dates set to all nodes before calling this function. Parameters ---------- ancestral_inference: bool If True, reinfer ancestral sequences when ancestral sequences are missing clock_rate: float If specified, timetree optimization will be done assuming a fixed clock rate as specified """ self.logger("ClockTree.init_date_constraints...",2) self.tree.coalescent_joint_LH = 0 if self.aln and (not self.sequence_reconstruction): self.infer_ancestral_sequences('probabilistic', marginal=self.branch_length_mode=='marginal', sample_from_profile='root',**kwarks) # set the None for the date-related attributes in the internal nodes. # make interpolation objects for the branches self.logger('ClockTree.init_date_constraints: Initializing branch length interpolation objects...',3) has_clock_length = [] for node in self.tree.find_clades(order='postorder'): if node.up is None: node.branch_length_interpolator = None else: has_clock_length.append(hasattr(node, 'clock_length')) # copy the merger rate and gamma if they are set if hasattr(node,'branch_length_interpolator') and node.branch_length_interpolator is not None: gamma = node.branch_length_interpolator.gamma merger_cost = node.branch_length_interpolator.merger_cost else: gamma = 1.0 merger_cost = None if self.branch_length_mode=='marginal': node.profile_pair = self.marginal_branch_profile(node) elif self.branch_length_mode=='joint' and (not hasattr(node, 'branch_state')): self.add_branch_state(node) node.branch_length_interpolator = BranchLenInterpolator(node, self.gtr, pattern_multiplicity = self.data.multiplicity, min_width=self.min_width, one_mutation=self.one_mutation, branch_length_mode=self.branch_length_mode) node.branch_length_interpolator.merger_cost = merger_cost node.branch_length_interpolator.gamma = gamma # use covariance in clock model only after initial timetree estimation is done use_cov = (np.sum(has_clock_length) > len(has_clock_length)*0.7) and self.use_covariation self.get_clock_model(covariation=use_cov, slope=clock_rate) # make node distribution objects for node in self.tree.find_clades(order="postorder"): # node is constrained if hasattr(node, 'raw_date_constraint') and node.raw_date_constraint is not None: # set the absolute time before present in branch length units if np.isscalar(node.raw_date_constraint): tbp = self.date2dist.get_time_before_present(node.raw_date_constraint) node.date_constraint = Distribution.delta_function(tbp, weight=1.0, min_width=self.min_width) else: tbp = self.date2dist.get_time_before_present(np.array(node.raw_date_constraint)) node.date_constraint = Distribution(tbp, np.ones_like(tbp), is_log=False, min_width=self.min_width) if hasattr(node, 'bad_branch') and node.bad_branch is True: self.logger("ClockTree.init_date_constraints -- WARNING: Branch is marked as bad" ", excluding it from the optimization process." " Date constraint will be ignored!", 4, warn=True) else: # node without sampling date set node.raw_date_constraint = None node.date_constraint = None def make_time_tree(self, time_marginal=False, clock_rate=None, **kwargs): ''' Use the date constraints to calculate the most likely positions of unconstrained nodes. Parameters ---------- time_marginal : bool If true, use marginal reconstruction for node positions **kwargs Key word arguments to initialize dates constraints ''' self.logger("ClockTree: Maximum likelihood tree optimization with temporal constraints",1) self.init_date_constraints(clock_rate=clock_rate, **kwargs) if time_marginal: self._ml_t_marginal(assign_dates = time_marginal=="assign") else: self._ml_t_joint() self.convert_dates() def _ml_t_joint(self): """ Compute the joint maximum likelihood assignment of the internal nodes positions by propagating from the tree leaves towards the root. Given the assignment of parent nodes, reconstruct the maximum-likelihood positions of the child nodes by propagating from the root to the leaves. The result of this operation is the time_before_present value, which is the position of the node, expressed in the units of the branch length, and scaled from the present-day. The value is assigned to the corresponding attribute of each node of the tree. Returns ------- None Every internal node is assigned the probability distribution in form of an interpolation object and sends this distribution further towards the root. """ def _cleanup(): for node in self.tree.find_clades(): del node.joint_pos_Lx del node.joint_pos_Cx self.logger("ClockTree - Joint reconstruction: Propagating leaves -> root...", 2) # go through the nodes from leaves towards the root: for node in self.tree.find_clades(order='postorder'): # children first, msg to parents # Lx is the maximal likelihood of a subtree given the parent position # Cx is the branch length corresponding to the maximally likely subtree if node.bad_branch: # no information at the node node.joint_pos_Lx = None node.joint_pos_Cx = None else: # all other nodes if node.date_constraint is not None and node.date_constraint.is_delta: # there is a time constraint # subtree probability given the position of the parent node # Lx.x is the position of the parent node # Lx.y is the probablity of the subtree (consisting of one terminal node in this case) # Cx.y is the branch length corresponding the optimal subtree bl = node.branch_length_interpolator.x x = bl + node.date_constraint.peak_pos node.joint_pos_Lx = Distribution(x, node.branch_length_interpolator(bl), min_width=self.min_width, is_log=True) node.joint_pos_Cx = Distribution(x, bl, min_width=self.min_width) # map back to the branch length else: # all nodes without precise constraint but positional information msgs_to_multiply = [node.date_constraint] if node.date_constraint is not None else [] msgs_to_multiply.extend([child.joint_pos_Lx for child in node.clades if child.joint_pos_Lx is not None]) # subtree likelihood given the node's constraint and child messages if len(msgs_to_multiply) == 0: # there are no constraints node.joint_pos_Lx = None node.joint_pos_Cx = None continue elif len(msgs_to_multiply)>1: # combine the different msgs and constraints subtree_distribution = Distribution.multiply(msgs_to_multiply) else: # there is exactly one constraint. subtree_distribution = msgs_to_multiply[0] if node.up is None: # this is the root, set dates subtree_distribution._adjust_grid(rel_tol=self.rel_tol_prune) # set root position and joint likelihood of the tree node.time_before_present = subtree_distribution.peak_pos node.joint_pos_Lx = subtree_distribution node.joint_pos_Cx = None node.clock_length = node.branch_length else: # otherwise propagate to parent res, res_t = NodeInterpolator.convolve(subtree_distribution, node.branch_length_interpolator, max_or_integral='max', inverse_time=True, n_grid_points = self.node_grid_points, n_integral=self.n_integral, rel_tol=self.rel_tol_refine) res._adjust_grid(rel_tol=self.rel_tol_prune) node.joint_pos_Lx = res node.joint_pos_Cx = res_t # go through the nodes from root towards the leaves and assign joint ML positions: self.logger("ClockTree - Joint reconstruction: Propagating root -> leaves...", 2) for node in self.tree.find_clades(order='preorder'): # root first, msgs to children if node.up is None: # root node continue # the position was already set on the previous step if node.joint_pos_Cx is None: # no constraints or branch is bad - reconstruct from the branch len interpolator node.branch_length = node.branch_length_interpolator.peak_pos elif node.date_constraint is not None and node.date_constraint.is_delta: node.branch_length = node.up.time_before_present - node.date_constraint.peak_pos elif isinstance(node.joint_pos_Cx, Distribution): # NOTE the Lx distribution is the likelihood, given the position of the parent # (Lx.x = parent position, Lx.y = LH of the node_pos given Lx.x, # the length of the branch corresponding to the most likely # subtree is node.Cx(node.time_before_present)) # subtree_LH = node.joint_pos_Lx(node.up.time_before_present) node.branch_length = node.joint_pos_Cx(max(node.joint_pos_Cx.xmin, node.up.time_before_present)+ttconf.TINY_NUMBER) node.time_before_present = node.up.time_before_present - node.branch_length node.clock_length = node.branch_length # just sanity check, should never happen: if node.branch_length < 0 or node.time_before_present < 0: if node.branch_length<0 and node.branch_length>-ttconf.TINY_NUMBER: self.logger("ClockTree - Joint reconstruction: correcting rounding error of %s"%node.name, 4) node.branch_length = 0 self.tree.positional_joint_LH = self.timetree_likelihood() # cleanup, if required if not self.debug: _cleanup() def timetree_likelihood(self): ''' Return the likelihood of the data given the current branch length in the tree ''' LH = 0 for node in self.tree.find_clades(order='preorder'): # sum the likelihood contributions of all branches if node.up is None: # root node continue LH -= node.branch_length_interpolator(node.branch_length) # add the root sequence LH and return if self.aln and self.sequence_reconstruction: LH += self.gtr.sequence_logLH(self.tree.root.cseq, pattern_multiplicity=self.data.multiplicity) return LH def _ml_t_marginal(self, assign_dates=False): """ Compute the marginal probability distribution of the internal nodes positions by propagating from the tree leaves towards the root. The result of this operation are the probability distributions of each internal node, conditional on the constraints on all leaves of the tree, which have sampling dates. The probability distributions are set as marginal_pos_LH attributes to the nodes. Parameters ---------- assign_dates : bool, default False If True, the inferred dates will be assigned to the nodes as :code:`time_before_present' attributes, and their branch lengths will be corrected accordingly. .. Note:: Normally, the dates are assigned by running joint reconstruction. Returns ------- None Every internal node is assigned the probability distribution in form of an interpolation object and sends this distribution further towards the root. """ def _cleanup(): for node in self.tree.find_clades(): try: del node.marginal_pos_Lx del node.subtree_distribution del node.msg_from_parent #del node.marginal_pos_LH except: pass self.logger("ClockTree - Marginal reconstruction: Propagating leaves -> root...", 2) # go through the nodes from leaves towards the root: for node in self.tree.find_clades(order='postorder'): # children first, msg to parents if node.bad_branch: # no information node.marginal_pos_Lx = None else: # all other nodes if node.date_constraint is not None and node.date_constraint.is_delta: # there is a hard time constraint # initialize the Lx for nodes with precise date constraint: # subtree probability given the position of the parent node # position of the parent node is given by the branch length # distribution attached to the child node position node.subtree_distribution = node.date_constraint bl = node.branch_length_interpolator.x x = bl + node.date_constraint.peak_pos node.marginal_pos_Lx = Distribution(x, node.branch_length_interpolator(bl), min_width=self.min_width, is_log=True) else: # all nodes without precise constraint but positional information # subtree likelihood given the node's constraint and child msg: msgs_to_multiply = [node.date_constraint] if node.date_constraint is not None else [] msgs_to_multiply.extend([child.marginal_pos_Lx for child in node.clades if child.marginal_pos_Lx is not None]) # combine the different msgs and constraints if len(msgs_to_multiply)==0: # no information node.marginal_pos_Lx = None continue elif len(msgs_to_multiply)==1: node.subtree_distribution = msgs_to_multiply[0] else: # combine the different msgs and constraints node.subtree_distribution = Distribution.multiply(msgs_to_multiply) if node.up is None: # this is the root, set dates node.subtree_distribution._adjust_grid(rel_tol=self.rel_tol_prune) node.marginal_pos_Lx = node.subtree_distribution node.marginal_pos_LH = node.subtree_distribution self.tree.positional_marginal_LH = -node.subtree_distribution.peak_val else: # otherwise propagate to parent res, res_t = NodeInterpolator.convolve(node.subtree_distribution, node.branch_length_interpolator, max_or_integral='integral', n_grid_points = self.node_grid_points, n_integral=self.n_integral, rel_tol=self.rel_tol_refine) res._adjust_grid(rel_tol=self.rel_tol_prune) node.marginal_pos_Lx = res self.logger("ClockTree - Marginal reconstruction: Propagating root -> leaves...", 2) from scipy.interpolate import interp1d for node in self.tree.find_clades(order='preorder'): ## The root node if node.up is None: node.msg_from_parent = None # nothing beyond the root # all other cases (All internal nodes + unconstrained terminals) elif (node.date_constraint is not None) and (not node.bad_branch) and node.date_constraint.is_delta: node.marginal_pos_LH = node.date_constraint else: parent = node.up # messages from the complementary subtree (iterate over all sister nodes) complementary_msgs = [sister.marginal_pos_Lx for sister in parent.clades if (sister != node) and (sister.marginal_pos_Lx is not None)] # if parent itself got smth from the root node, include it if parent.msg_from_parent is not None: complementary_msgs.append(parent.msg_from_parent) if len(complementary_msgs): msg_parent_to_node = NodeInterpolator.multiply(complementary_msgs) msg_parent_to_node._adjust_grid(rel_tol=self.rel_tol_prune) else: x = [parent.numdate, numeric_date()] msg_parent_to_node = NodeInterpolator(x, [1.0, 1.0],min_width=self.min_width) # integral message, which delivers to the node the positional information # from the complementary subtree res, res_t = NodeInterpolator.convolve(msg_parent_to_node, node.branch_length_interpolator, max_or_integral='integral', inverse_time=False, n_grid_points = self.node_grid_points, n_integral=self.n_integral, rel_tol=self.rel_tol_refine) node.msg_from_parent = res if node.marginal_pos_Lx is None: node.marginal_pos_LH = node.msg_from_parent else: node.marginal_pos_LH = NodeInterpolator.multiply((node.msg_from_parent, node.subtree_distribution)) self.logger('ClockTree._ml_t_root_to_leaves: computed convolution' ' with %d points at node %s'%(len(res.x),node.name),4) if self.debug: tmp = np.diff(res.y-res.peak_val) nsign_changed = np.sum((tmp[1:]*tmp[:-1]<0)&(res.y[1:-1]-res.peak_val<500)) if nsign_changed>1: import matplotlib.pyplot as plt plt.ion() plt.plot(res.x, res.y-res.peak_val, '-o') plt.plot(res.peak_pos - node.branch_length_interpolator.x, node.branch_length_interpolator(node.branch_length_interpolator.x)-node.branch_length_interpolator.peak_val, '-o') plt.plot(msg_parent_to_node.x,msg_parent_to_node.y-msg_parent_to_node.peak_val, '-o') plt.ylim(0,100) plt.xlim(-0.05, 0.05) import ipdb; ipdb.set_trace() # assign positions of nodes and branch length only when desired # since marginal reconstruction can result in negative branch length if assign_dates: node.time_before_present = node.marginal_pos_LH.peak_pos if node.up: node.clock_length = node.up.time_before_present - node.time_before_present node.branch_length = node.clock_length # construct the inverse cumulant distribution to evaluate confidence intervals if node.marginal_pos_LH.is_delta: node.marginal_inverse_cdf=interp1d([0,1], node.marginal_pos_LH.peak_pos*np.ones(2), kind="linear") else: dt = np.diff(node.marginal_pos_LH.x) y = node.marginal_pos_LH.prob_relative(node.marginal_pos_LH.x) int_y = np.concatenate(([0], np.cumsum(dt*(y[1:]+y[:-1])/2.0))) int_y/=int_y[-1] node.marginal_inverse_cdf = interp1d(int_y, node.marginal_pos_LH.x, kind="linear") node.marginal_cdf = interp1d(node.marginal_pos_LH.x, int_y, kind="linear") if not self.debug: _cleanup() def convert_dates(self): ''' This function converts the estimated "time_before_present" properties of all nodes to numerical dates stored in the "numdate" attribute. This date is further converted into a human readable date string in format %Y-%m-%d assuming the usual calendar. Returns ------- None All manipulations are done in place on the tree ''' from datetime import datetime, timedelta now = numeric_date() for node in self.tree.find_clades(): years_bp = self.date2dist.to_years(node.time_before_present) if years_bp < 0 and self.real_dates: if not hasattr(node, "bad_branch") or node.bad_branch is False: self.logger("ClockTree.convert_dates -- WARNING: The node is later than today, but it is not " "marked as \"BAD\", which indicates the error in the " "likelihood optimization.",4 , warn=True) else: self.logger("ClockTree.convert_dates -- WARNING: node which is marked as \"BAD\" optimized " "later than present day",4 , warn=True) node.numdate = now - years_bp node.date = datestring_from_numeric(node.numdate) def branch_length_to_years(self): ''' This function sets branch length to reflect the date differences between parent and child nodes measured in years. Should only be called after :py:meth:`timetree.ClockTree.convert_dates` has been called. Returns ------- None All manipulations are done in place on the tree ''' self.logger('ClockTree.branch_length_to_years: setting node positions in units of years', 2) if not hasattr(self.tree.root, 'numdate'): self.logger('ClockTree.branch_length_to_years: infer ClockTree first', 2,warn=True) self.tree.root.branch_length = 0.1 for n in self.tree.find_clades(order='preorder'): if n.up is not None: n.branch_length = n.numdate - n.up.numdate def calc_rate_susceptibility(self, rate_std=None, params=None): """return the time tree estimation of evolutionary rates +/- one standard deviation form the ML estimate. Returns ------- TreeTime.return_code : str success or failure """ params = params or {} if rate_std is None: if not (self.clock_model['valid_confidence'] and 'cov' in self.clock_model): raise ValueError("ClockTree.calc_rate_susceptibility: need valid standard deviation of the clock rate to estimate dating error.") rate_std = np.sqrt(self.clock_model['cov'][0,0]) current_rate = np.abs(self.clock_model['slope']) upper_rate = self.clock_model['slope'] + rate_std lower_rate = max(0.1*current_rate, self.clock_model['slope'] - rate_std) for n in self.tree.find_clades(): if n.up: n._orig_gamma = n.branch_length_interpolator.gamma n.branch_length_interpolator.gamma = n._orig_gamma*upper_rate/current_rate self.logger("###ClockTree.calc_rate_susceptibility: run with upper bound of rate estimate", 1) self.make_time_tree(**params) self.logger("###ClockTree.calc_rate_susceptibility: rate: %f, LH:%f"%(upper_rate, self.tree.positional_joint_LH), 2) for n in self.tree.find_clades(): n.numdate_rate_variation = [(upper_rate, n.numdate)] if n.up: n.branch_length_interpolator.gamma = n._orig_gamma*lower_rate/current_rate self.logger("###ClockTree.calc_rate_susceptibility: run with lower bound of rate estimate", 1) self.make_time_tree(**params) self.logger("###ClockTree.calc_rate_susceptibility: rate: %f, LH:%f"%(lower_rate, self.tree.positional_joint_LH), 2) for n in self.tree.find_clades(): n.numdate_rate_variation.append((lower_rate, n.numdate)) if n.up: n.branch_length_interpolator.gamma = n._orig_gamma self.logger("###ClockTree.calc_rate_susceptibility: run with central rate estimate", 1) self.make_time_tree(**params) self.logger("###ClockTree.calc_rate_susceptibility: rate: %f, LH:%f"%(current_rate, self.tree.positional_joint_LH), 2) for n in self.tree.find_clades(): n.numdate_rate_variation.append((current_rate, n.numdate)) n.numdate_rate_variation.sort(key=lambda x:x[1]) # sort estimates for different rates by numdate return ttconf.SUCCESS def date_uncertainty_due_to_rate(self, node, interval=(0.05, 0.095)): """use previously calculated variation of the rate to estimate the uncertainty in a particular numdate due to rate variation. Parameters ---------- node : PhyloTree.Clade node for which the confidence interval is to be calculated interval : tuple, optional Array of length two, or tuple, defining the bounds of the confidence interval """ if hasattr(node, "numdate_rate_variation"): from scipy.special import erfinv nsig = [np.sqrt(2.0)*erfinv(-1.0 + 2.0*x) if x*(1.0-x) else 0 for x in interval] l,c,u = [x[1] for x in node.numdate_rate_variation] return np.array([c + x*np.abs(y-c) for x,y in zip(nsig, (l,u))]) else: return None def combine_confidence(self, center, limits, c1=None, c2=None): if c1 is None and c2 is None: return np.array(limits) elif c1 is None: min_val,max_val = c2 elif c2 is None: min_val,max_val = c1 else: min_val = center - np.sqrt((c1[0]-center)**2 + (c2[0]-center)**2) max_val = center + np.sqrt((c1[1]-center)**2 + (c2[1]-center)**2) return np.array([max(limits[0], min_val), min(limits[1], max_val)]) def get_confidence_interval(self, node, interval = (0.05, 0.95)): ''' If temporal reconstruction was done using the marginal ML mode, the entire distribution of times is available. This function determines the 90% (or other) confidence interval, defined as the range where 5% of probability is below and above. Note that this does not necessarily contain the highest probability position. In absense of marginal reconstruction, it will return uncertainty based on rate variation. If both are present, the wider interval will be returned. Parameters ---------- node : PhyloTree.Clade The node for which the confidence interval is to be calculated interval : tuple, list Array of length two, or tuple, defining the bounds of the confidence interval Returns ------- confidence_interval : numpy array Array with two numerical dates delineating the confidence interval ''' rate_contribution = self.date_uncertainty_due_to_rate(node, interval) if hasattr(node, "marginal_inverse_cdf"): min_date, max_date = [self.date2dist.to_numdate(x) for x in (node.marginal_pos_LH.xmax, node.marginal_pos_LH.xmin)] if node.marginal_inverse_cdf=="delta": return np.array([node.numdate, node.numdate]) else: mutation_contribution = self.date2dist.to_numdate(node.marginal_inverse_cdf(np.array(interval))[::-1]) else: min_date, max_date = [-np.inf, np.inf] return self.combine_confidence(node.numdate, (min_date, max_date), c1=rate_contribution, c2=mutation_contribution) def get_max_posterior_region(self, node, fraction = 0.9): ''' If temporal reconstruction was done using the marginal ML mode, the entire distribution of times is available. This function determines the interval around the highest posterior probability region that contains the specified fraction of the probability mass. In absense of marginal reconstruction, it will return uncertainty based on rate variation. If both are present, the wider interval will be returned. Parameters ---------- node : PhyloTree.Clade The node for which the posterior region is to be calculated interval : float Float specifying who much of the posterior probability is to be contained in the region Returns ------- max_posterior_region : numpy array Array with two numerical dates delineating the high posterior region ''' if node.marginal_inverse_cdf=="delta": return np.array([node.numdate, node.numdate]) min_max = (node.marginal_pos_LH.xmin, node.marginal_pos_LH.xmax) min_date, max_date = [self.date2dist.to_numdate(x) for x in min_max][::-1] if node.marginal_pos_LH.peak_pos == min_max[0]: #peak on the left return self.get_confidence_interval(node, (0, fraction)) elif node.marginal_pos_LH.peak_pos == min_max[1]: #peak on the right return self.get_confidence_interval(node, (1.0-fraction, 1.0)) else: # peak in the center of the distribution rate_contribution = self.date_uncertainty_due_to_rate(node, ((1-fraction)*0.5, 1.0-(1.0-fraction)*0.5)) # construct height to position interpolators left and right of the peak # this assumes there is only one peak --- might fail in odd cases from scipy.interpolate import interp1d from scipy.optimize import minimize_scalar as minimize pidx = np.argmin(node.marginal_pos_LH.y) pval = np.min(node.marginal_pos_LH.y) # check if the distribution as at least 3 points and that the peak is not either of the two # end points. Otherwise, interpolation objects can be initialized. if node.marginal_pos_LH.y.shape[0]<3 or pidx==0 or pidx==node.marginal_pos_LH.y.shape[0]-1: value_str = "values: " + ','.join([str(x) for x in node.marginal_pos_LH.y]) self.logger("get_max_posterior_region: peak on boundary or array too short." + value_str, 1, warn=True) mutation_contribution=None else: left = interp1d(node.marginal_pos_LH.y[:(pidx+1)]-pval, node.marginal_pos_LH.x[:(pidx+1)], kind='linear', fill_value=min_max[0], bounds_error=False) right = interp1d(node.marginal_pos_LH.y[pidx:]-pval, node.marginal_pos_LH.x[pidx:], kind='linear', fill_value=min_max[1], bounds_error=False) # function to minimize -- squared difference between prob mass and desired fracion def func(x, thres): interval = np.array([left(x), right(x)]).squeeze() return (thres - np.diff(node.marginal_cdf(np.array(interval))))**2 # minimza and determine success sol = minimize(func, bracket=[0,10], args=(fraction,)) if sol['success']: mutation_contribution = self.date2dist.to_numdate(np.array([right(sol['x']), left(sol['x'])]).squeeze()) else: # on failure, return standard confidence interval mutation_contribution = None return self.combine_confidence(node.numdate, (min_date, max_date), c1=rate_contribution, c2=mutation_contribution) if __name__=="__main__": pass treetime-0.8.6/treetime/config.py000066400000000000000000000017201417362145000170030ustar00rootroot00000000000000VERBOSE = 3 BIG_NUMBER = 1e10 TINY_NUMBER = 1e-12 SUPERTINY_NUMBER = 1e-24 MIN_LOG = -1e8 # minimal log value MIN_BRANCH_LENGTH = 1e-3 # fraction of length 'one_mutation' that is used as lower cut-off for branch lengths in GTR OVER_DISPERSION = 10 # distribution parameters BRANCH_GRID_SIZE_ROUGH = 200 NODE_GRID_SIZE_ROUGH = 60 N_INTEGRAL_ROUGH = 60 BRANCH_GRID_SIZE = 250 NODE_GRID_SIZE = 100 N_INTEGRAL = 100 BRANCH_GRID_SIZE_FINE = 300 NODE_GRID_SIZE_FINE = 180 N_INTEGRAL_FINE = 150 BRANCH_GRID_SIZE_ULTRA = 400 NODE_GRID_SIZE_ULTRA = 400 N_INTEGRAL_ULTRA = 250 MIN_INTEGRATION_PEAK = 0.001 # clocktree parameters BRANCH_LEN_PENALTY = 0 MAX_BRANCH_LENGTH = 4.0 # only relevant for branch length optimization and time trees - upper boundary of interpolator objects NINTEGRAL = 300 REL_TOL_PRUNE = 0.01 REL_TOL_REFINE = 0.05 NIQD = 3 # SUCCESS = "success" ERROR = "error" # treetime # autocorrelated molecular clock coefficients MU_ALPHA = 1 MU_BETA = 1 treetime-0.8.6/treetime/distribution.py000066400000000000000000000260601417362145000202610ustar00rootroot00000000000000from __future__ import division, print_function, absolute_import import numpy as np from scipy.interpolate import interp1d try: from collections.abc import Iterable except ImportError: from collections import Iterable from copy import deepcopy as make_copy from scipy.ndimage import binary_dilation from .config import BIG_NUMBER, MIN_LOG, MIN_INTEGRATION_PEAK, TINY_NUMBER from .utils import clip class Distribution(object): """ Class to implement the probability distribution. This class wraps the scipy linear interpolation object, and implements some additional operations, needed to manipulate distributions for tree nodes positions, branch lengths, etc. This class is callable, so it can be treated similarly to the scipy interpolation object. """ @staticmethod def calc_fwhm(distribution, is_neg_log=True): """ Assess the width of the probability distribution. This returns full-width-half-max """ if isinstance(distribution, interp1d): if is_neg_log: ymin = distribution.y.min() log_prob = distribution.y-ymin else: log_prob = -np.log(distribution.y) log_prob -= log_prob.min() xvals = distribution.x elif isinstance(distribution, Distribution): # Distribution always stores neg log-prob with the peak value subtracted xvals = distribution._func.x log_prob = distribution._func.y else: raise TypeError("Error in computing the FWHM for the distribution. " " The input should be either Distribution or interpolation object") L = xvals.shape[0] # 0.69... is log(2), there is always one value for which this is true since # the minimum is subtracted tmp = np.where(log_prob < 0.693147)[0] x_l, x_u = tmp[0], tmp[-1] if L < 2: print ("Not enough points to compute FWHM: returning zero") return min(TINY_NUMBER, distribution.xmax - distribution.xmin) else: # need to guard against out-of-bounds errors return max(TINY_NUMBER, xvals[min(x_u+1,L-1)] - xvals[max(0,x_l-1)]) @classmethod def delta_function(cls, x_pos, weight=1., min_width=MIN_INTEGRATION_PEAK): """ Create delta function distribution. """ distribution = cls(x_pos,0.,is_log=True, min_width=min_width) distribution.weight = weight return distribution @classmethod def shifted_x(cls, dist, delta_x): return Distribution(dist.x+delta_x, dist.y, kind=dist.kind) @staticmethod def multiply(dists): ''' multiplies a list of Distribution objects ''' if not all([isinstance(k, Distribution) for k in dists]): raise NotImplementedError("Can only multiply Distribution objects") n_delta = np.sum([k.is_delta for k in dists]) min_width = np.max([k.min_width for k in dists]) if n_delta>1: raise ArithmeticError("Cannot multiply more than one delta functions!") elif n_delta==1: delta_dist_ii = np.where([k.is_delta for k in dists])[0][0] delta_dist = dists[delta_dist_ii] new_xpos = delta_dist.peak_pos new_weight = np.prod([k.prob(new_xpos) for k in dists if k!=delta_dist_ii]) * delta_dist.weight res = Distribution.delta_function(new_xpos, weight = new_weight,min_width=min_width) else: new_xmin = np.max([k.xmin for k in dists]) new_xmax = np.min([k.xmax for k in dists]) x_vals = np.unique(np.concatenate([k.x for k in dists])) x_vals = x_vals[(x_vals>new_xmin-TINY_NUMBER)&(x_vals self._xmin-TINY_NUMBER) & (x < self._xmax+TINY_NUMBER) res = np.full(np.shape(x), BIG_NUMBER+self.peak_val, dtype=float) tmp_x = x[valid_idxs] res[valid_idxs] = self._peak_val + self._func(clip(tmp_x, self._xmin+TINY_NUMBER, self._xmax-TINY_NUMBER)) return res elif np.isreal(x): if x < self._xmin or x > self._xmax: return BIG_NUMBER+self.peak_val # x is within interpolation range elif self._delta == True: return self._peak_val else: return self._peak_val + self._func(x) else: raise TypeError("Wrong type: should be float or array") def __mul__(self, other): return Distribution.multiply((self, other)) def _adjust_grid(self, rel_tol=0.01, yc=10): updated = True n_iter=0 while len(self.y)>200 and updated and n_iter<5: interp_err = 2*self.y[1:-1] - self.y[2:] - self.y[:-2] ind = np.ones_like(self.y, dtype=bool) dy = self.y-self.peak_val prune = interp_err[::2] > rel_tol*(1+ (dy[1:-1:2]/yc)**4) ind[1:-1:2] = prune if np.mean(prune)<1.0: self._func.y = self._func.y[ind] self._func.x = self._func.x[ind] updated=True n_iter+=1 else: updated=False n_iter+=1 self._peak_idx = self.__call__(self._func.x).argmin() self._peak_pos = self._func.x[self._peak_idx] self._peak_val = self.__call__(self.peak_pos) def prob(self,x): return np.exp(-1 * self.__call__(x)) def prob_relative(self,x): return np.exp(-1 * (self.__call__(x)-self.peak_val)) def x_rescale(self, factor): self._func.x*=factor self._peak_pos*=factor if factor>=0: self._xmin*=factor self._xmax*=factor else: tmp = self.xmin self._xmin = factor*self.xmax self._xmax = factor*tmp self._func.x = self._func.x[::-1] self._func.y = self._func.y[::-1] def integrate(self, return_log=False ,**kwargs): if self.is_delta: return self.weight else: integral_result = self.integrate_simpson(**kwargs) if return_log: if integral_result==0: return -self.peak_val - BIG_NUMBER else: return -self.peak_val + max(-BIG_NUMBER, np.log(integral_result)) else: return np.exp(-self.peak_val)*integral_result def integrate_trapez(self, a=None, b=None,n=None): mult = 0.5 if a>b: b,a = a,b mult=-0.5 x = np.linspace(a,b,n) dx = np.diff(x) y = self.prob_relative(x) return mult*np.sum(dx*(y[:-1] + y[1:])) def integrate_simpson(self, a=None,b=None,n=None): if n % 2 == 0: n += 1 mult = 1.0/6 dpeak = max(10*self.fwhm, self.min_width) threshold = np.array([a,self.peak_pos-dpeak, self.peak_pos+dpeak,b]) threshold = threshold[(threshold>=a)&(threshold<=b)] threshold.sort() res = [] for lw, up in zip(threshold[:-1], threshold[1:]): x = np.linspace(lw,up,n) dx = np.diff(x[::2]) y = self.prob_relative(x) res.append(mult*(dx[0]*y[0]+ np.sum(4*dx*y[1:-1:2]) + np.sum((dx[:-1]+dx[1:])*y[2:-1:2]) + dx[-1]*y[-1])) return np.sum(res) treetime-0.8.6/treetime/gtr.py000066400000000000000000001073341417362145000163420ustar00rootroot00000000000000from __future__ import division, print_function, absolute_import from collections import defaultdict import numpy as np from . import config as ttconf from .seq_utils import alphabets, profile_maps, alphabet_synonyms def avg_transition(W,pi, gap_index=None): if gap_index is None: return np.einsum('i,ij,j', pi, W, pi) else: return (np.einsum('i,ij,j', pi, W, pi) - np.sum(pi*W[:,gap_index])*pi[gap_index])/(1-pi[gap_index]) class GTR(object): """ Defines General-Time-Reversible model of character evolution. """ def __init__(self, alphabet='nuc', prof_map=None, logger=None): """ Initialize empty evolutionary model. Parameters ---------- alphabet : str, numpy.array Alphabet of the sequence. If a string is passed, it is understood as an alphabet name. In this case, the alphabet and its profile map are pulled from :py:obj:`treetime.seq_utils`. If a numpy array of characters is passed, a new alphabet is constructed, and the default profile map is atached to it. prof_map : dict Dictionary linking characters in the sequence to the likelihood of observing characters in the alphabet. This is used to implement ambiguous characters like 'N'=[1,1,1,1] which are equally likely to be any of the 4 nucleotides. Standard profile_maps are defined in file seq_utils.py. logger : callable Custom logging function that should take arguments (msg, level, warn=False), where msg is a string and level an integer to be compared against verbose. """ self.debug=False self.is_site_specific=False if isinstance(alphabet, str): if alphabet not in alphabet_synonyms: raise AttributeError("Unknown alphabet type specified") else: tmp_alphabet = alphabet_synonyms[alphabet] self.alphabet = alphabets[tmp_alphabet] self.profile_map = profile_maps[tmp_alphabet] else: # not a predefined alphabet self.alphabet = np.array(alphabet) if prof_map is None: # generate trivial unambiguous profile map is none is given self.profile_map = {s:x for s,x in zip(self.alphabet, np.eye(len(self.alphabet)))} else: self.profile_map = {x if type(x) is str else x:k for x,k in prof_map.items()} self.state_index={s:si for si,s in enumerate(self.alphabet)} self.state_index.update({s:si for si,s in enumerate(self.alphabet)}) if logger is None: def logger_default(*args,**kwargs): """standard logging function if none provided""" if self.debug: print(*args) self.logger = logger_default else: self.logger = logger self.ambiguous = None self.gap_index = None self.n_states = len(self.alphabet) self.assign_gap_and_ambiguous() # init all matrices with dummy values self.logger("GTR: init with dummy values!", 3) self.v = None # right eigenvectors self.v_inv = None # left eigenvectors self.eigenvals = None # eigenvalues self.assign_rates() def assign_gap_and_ambiguous(self): n_states = len(self.alphabet) self.logger("GTR: with alphabet: "+str([x for x in self.alphabet]),1) # determine if a character exists that corresponds to no info, i.e. all one profile if any([x.sum()==n_states for x in self.profile_map.values()]): amb_states = [c for c,x in self.profile_map.items() if x.sum()==n_states] self.ambiguous = 'N' if 'N' in amb_states else amb_states[0] self.logger("GTR: ambiguous character: "+self.ambiguous,2) else: self.ambiguous=None # check for a gap symbol try: self.gap_index = self.state_index['-'] except: self.logger("GTR: no gap symbol!", 4, warn=True) self.gap_index=None @property def mu(self): return self._mu @property def Pi(self): return self._Pi @property def W(self): return self._W @W.setter def W(self, value): self.assign_rates(mu=self.mu, pi=self.Pi, W=value) @Pi.setter def Pi(self, value): self.assign_rates(mu=self.mu, pi=value, W=self.W) @mu.setter def mu(self, value): self.assign_rates(mu=value, pi=self.Pi, W=self.W) @property def Q(self): """function that return the product of the transition matrix and the equilibrium frequencies to obtain the rate matrix of the GTR model """ Q_tmp = (self.W*self.Pi).T Q_diag = -np.sum(Q_tmp, axis=0) np.fill_diagonal(Q_tmp, Q_diag) return Q_tmp ###################################################################### ## constructor methods ###################################################################### def __str__(self): ''' String representation of the GTR model for pretty printing ''' multi_site = len(self.Pi.shape)==2 if multi_site: eq_freq_str = "Average substitution rate (mu): "+str(np.round(self.average_rate,6))+'\n' else: eq_freq_str = "Substitution rate (mu): "+str(np.round(self.mu,6))+'\n' if not multi_site: eq_freq_str += "\nEquilibrium frequencies (pi_i):\n" for a,p in zip(self.alphabet, self.Pi): eq_freq_str+=' '+a+': '+str(np.round(p,4))+'\n' W_str = "\nSymmetrized rates from j->i (W_ij):\n" W_str+='\t'+'\t'.join(self.alphabet)+'\n' for a,Wi in zip(self.alphabet, self.W): W_str+= ' '+a+'\t'+'\t'.join([str(np.round(max(0,p),4)) for p in Wi])+'\n' if not multi_site: Q_str = "\nActual rates from j->i (Q_ij):\n" Q_str+='\t'+'\t'.join(self.alphabet)+'\n' for a,Qi in zip(self.alphabet, self.Q): Q_str+= ' '+a+'\t'+'\t'.join([str(np.round(max(0,p),4)) for p in Qi])+'\n' return eq_freq_str + W_str + Q_str def assign_rates(self, mu=1.0, pi=None, W=None): """ Overwrite the GTR model given the provided data Parameters ---------- mu : float Substitution rate W : nxn matrix Substitution matrix pi : n vector Equilibrium frequencies """ n = len(self.alphabet) self._mu = mu self.is_site_specific=False if pi is not None and len(pi)==n: Pi = np.array(pi) else: if pi is not None and len(pi)!=n: self.logger("length of equilibrium frequency vector does not match alphabet length", 4, warn=True) self.logger("Ignoring input equilibrium frequencies", 4, warn=True) Pi = np.ones(shape=(n,)) self._Pi = Pi/np.sum(Pi) if W is None or W.shape!=(n,n): if (W is not None) and W.shape!=(n,n): self.logger("Substitution matrix size does not match alphabet size", 4, warn=True) self.logger("Ignoring input substitution matrix", 4, warn=True) # flow matrix W = np.ones((n,n)) np.fill_diagonal(W, 0.0) np.fill_diagonal(W, - W.sum(axis=0)) else: W=np.array(W) self._W = 0.5*(W+W.T) np.fill_diagonal(W,0) average_rate = avg_transition(W, self.Pi, gap_index=self.gap_index) self._W = W/average_rate self._mu *=average_rate self._eig() @classmethod def custom(cls, mu=1.0, pi=None, W=None, **kwargs): """ Create a GTR model by specifying the matrix explicitly Parameters ---------- mu : float Substitution rate W : nxn matrix Substitution matrix pi : n vector Equilibrium frequencies **kwargs: Key word arguments to be passed Keyword Args ------------ alphabet : str Specify alphabet when applicable. If the alphabet specification is required, but no alphabet is specified, the nucleotide alphabet will be used as default. """ gtr = cls(**kwargs) gtr.assign_rates(mu=mu, pi=pi, W=W) return gtr @staticmethod def standard(model, **kwargs): """ Create standard model of molecular evolution. Parameters ---------- model : str Model to create. See list of available models below **kwargs: Key word arguments to be passed to the model **Available models** - JC69: Jukes-Cantor 1969 model. This model assumes equal frequencies of the nucleotides and equal transition rates between nucleotide states. For more info, see: Jukes and Cantor (1969). Evolution of Protein Molecules. New York: Academic Press. pp. 21-132. To create this model, use: :code:`mygtr = GTR.standard(model='jc69', mu=, alphabet=)` :code:`my_mu` - substitution rate (float) :code:`my_alph` - alphabet (str: :code:`'nuc'` or :code:`'nuc_nogap'`) - K80: Kimura 1980 model. Assumes equal concentrations across nucleotides, but allows different rates between transitions and transversions. The ratio of the transversion/transition rates is given by kappa parameter. For more info, see Kimura (1980), J. Mol. Evol. 16 (2): 111-120. doi:10.1007/BF01731581. Current implementation of the model does not account for the gaps. :code:`mygtr = GTR.standard(model='k80', mu=, kappa=)` :code:`mu` - overall substitution rate (float) :code:`kappa` - ratio of transversion/transition rates (float) - F81: Felsenstein 1981 model. Assumes non-equal concentrations across nucleotides, but the transition rate between all states is assumed to be equal. See Felsenstein (1981), J. Mol. Evol. 17 (6): 368-376. doi:10.1007/BF01734359 for details. :code:`mygtr = GTR.standard(model='F81', mu=, pi=, alphabet=)` :code:`mu` - substitution rate (float) :code:`pi` - : nucleotide concentrations (numpy.array) :code:`alphabet' - alphabet to use. (:code:`'nuc'` or :code:`'nuc_nogap'`) - HKY85: Hasegawa, Kishino and Yano 1985 model. Allows different concentrations of the nucleotides (as in F81) + distinguishes between transition/transversion substitutions (similar to K80). Link: Hasegawa, Kishino, Yano (1985), J. Mol. Evol. 22 (2): 160-174. doi:10.1007/BF02101694 Current implementation of the model does not account for the gaps :code:`mygtr = GTR.standard(model='HKY85', mu=, pi=, kappa=)` :code:`mu` - substitution rate (float) :code:`pi` - : nucleotide concentrations (numpy.array) :code:`kappa` - ratio of transversion/transition rates (float) - T92: Tamura 1992 model. Extending Kimura (1980) model for the case where a G+C-content bias exists. Link: Tamura K (1992), Mol. Biol. Evol. 9 (4): 678-687. DOI: 10.1093/oxfordjournals.molbev.a040752 Current implementation of the model does not account for the gaps :code:`mygtr = GTR.standard(model='T92', mu=, pi_GC=, kappa=)` :code:`mu` - substitution rate (float) :code:`pi_GC` - : relative GC content :code:`kappa` - ratio of transversion/transition rates (float) - TN93: Tamura and Nei 1993. The model distinguishes between the two different types of transition: (A <-> G) is allowed to have a different rate to (C<->T). Transversions have the same rate. The frequencies of the nucleotides are allowed to be different. Link: Tamura, Nei (1993), MolBiol Evol. 10 (3): 512-526. DOI:10.1093/oxfordjournals.molbev.a040023 :code:`mygtr = GTR.standard(model='TN93', mu=, kappa1=, kappa2=)` :code:`mu` - substitution rate (float) :code:`kappa1` - relative A<-->C, A<-->T, T<-->G and G<-->C rates (float) :code:`kappa` - relative C<-->T rate (float) .. Note:: Rate of A<-->G substitution is set to one. All other rates (kappa1, kappa2) are specified relative to this rate """ from .nuc_models import JC69, K80, F81, HKY85, T92, TN93 from .aa_models import JTT92 if model.lower() in ['jc', 'jc69', 'jukes-cantor', 'jukes-cantor69', 'jukescantor', 'jukescantor69']: model = JC69(**kwargs) elif model.lower() in ['k80', 'kimura80', 'kimura1980']: model = K80(**kwargs) elif model.lower() in ['f81', 'felsenstein81', 'felsenstein1981']: model = F81(**kwargs) elif model.lower() in ['hky', 'hky85', 'hky1985']: model = HKY85(**kwargs) elif model.lower() in ['t92', 'tamura92', 'tamura1992']: model = T92(**kwargs) elif model.lower() in ['tn93', 'tamura_nei_93', 'tamuranei93']: model = TN93(**kwargs) elif model.lower() in ['jtt', 'jtt92']: model = JTT92(**kwargs) else: raise KeyError("The GTR model '{}' is not in the list of available models." "".format(model)) model.mu = kwargs['mu'] if 'mu' in kwargs else 1.0 return model @classmethod def random(cls, mu=1.0, alphabet='nuc'): """ Creates a random GTR model Parameters ---------- mu : float Substitution rate alphabet : str Alphabet name (should be standard: 'nuc', 'nuc_gap', 'aa', 'aa_gap') """ alphabet=alphabets[alphabet] gtr = cls(alphabet) n = gtr.alphabet.shape[0] pi = 1.0*np.random.randint(0,100,size=(n)) W = 1.0*np.random.randint(0,100,size=(n,n)) # with gaps gtr.assign_rates(mu=mu, pi=pi, W=W) return gtr @classmethod def infer(cls, nij, Ti, root_state, fixed_pi=None, pc=1.0, gap_limit=0.01, **kwargs): r""" Infer a GTR model by specifying the number of transitions and time spent in each character. The basic equation that is being solved is :math:`n_{ij} = pi_i W_{ij} T_j` where :math:`n_{ij}` are the transitions, :math:`pi_i` are the equilibrium state frequencies, :math:`W_{ij}` is the "substitution attempt matrix", while :math:`T_i` is the time on the tree spent in character state :math:`i`. To regularize the process, we add pseudocounts and also need to account for the fact that the root of the tree is in a particular state. the modified equation is :math:`n_{ij} + pc = pi_i W_{ij} (T_j+pc+root\_state)` Parameters ---------- nij : nxn matrix The number of times a change in character state is observed between state j and i Ti :n vector The time spent in each character state root_state : n vector The number of characters in state i in the sequence of the root node. pc : float Pseudocounts, this determines the lower cutoff on the rate when no substitutions are observed **kwargs: Key word arguments to be passed Keyword Args ------------ alphabet : str Specify alphabet when applicable. If the alphabet specification is required, but no alphabet is specified, the nucleotide alphabet will be used as default. """ from scipy import linalg as LA gtr = cls(**kwargs) gtr.logger("GTR: model inference ",1) dp = 1e-5 Nit = 40 pc_mat = pc*np.ones_like(nij) np.fill_diagonal(pc_mat, 0.0) np.fill_diagonal(nij, 0.0) count = 0 pi_old = np.zeros_like(Ti) if fixed_pi is None: pi = np.ones_like(Ti) else: pi = np.copy(fixed_pi) pi/=pi.sum() W_ij = np.ones_like(nij) mu = nij.sum()/Ti.sum() # if pi is fixed, this will immediately converge while LA.norm(pi_old-pi) > dp and count < Nit: gtr.logger(' '.join(map(str, ['GTR inference iteration',count,'change:',LA.norm(pi_old-pi)])), 3) count += 1 pi_old = np.copy(pi) W_ij = (nij+nij.T+2*pc_mat)/mu/(np.outer(pi,Ti) + np.outer(Ti,pi) + ttconf.TINY_NUMBER + 2*pc_mat) np.fill_diagonal(W_ij, 0) scale_factor = avg_transition(W_ij,pi, gap_index=gtr.gap_index) W_ij = W_ij/scale_factor if fixed_pi is None: pi = (np.sum(nij+pc_mat,axis=1)+root_state)/(ttconf.TINY_NUMBER + mu*np.dot(W_ij,Ti)+root_state.sum()+np.sum(pc_mat, axis=1)) pi /= pi.sum() mu = nij.sum()/(ttconf.TINY_NUMBER + np.sum(pi * (W_ij.dot(Ti)))) else: mu = nij.sum()/(ttconf.TINY_NUMBER + np.sum(pi * (W_ij.dot(pi)))*Ti.sum()) if count >= Nit: gtr.logger('WARNING: maximum number of iterations has been reached in GTR inference',3, warn=True) if LA.norm(pi_old-pi) > dp: gtr.logger('the iterative scheme has not converged',3,warn=True) elif np.abs(1-np.max(pi.sum(axis=0))) > dp: gtr.logger('the iterative scheme has converged, but proper normalization was not reached',3,warn=True) if gtr.gap_index is not None: if pi[gtr.gap_index] .9 * ttconf.MAX_BRANCH_LENGTH: self.logger("WARNING: GTR.optimal_t_compressed -- The branch length seems to be very long!", 4, warn=True) if opt["success"] != True: # return hamming distance: number of state pairs where state differs/all pairs new_len = np.sum(multiplicity[seq_pair[:,1]!=seq_pair[:,0]])/np.sum(multiplicity) return new_len def prob_t_profiles(self, profile_pair, multiplicity, t, return_log=False, ignore_gaps=True): ''' Calculate the probability of observing a node pair at a distance t Parameters ---------- profile_pair: numpy arrays Probability distributions of the nucleotides at either end of the branch. pp[0] = parent, pp[1] = child multiplicity : numpy array The number of times an alignment pattern is observed t : float Length of the branch separating parent and child ignore_gaps: bool If True, ignore mutations to and from gaps in distance calculations return_log : bool Whether or not to exponentiate the result ''' if t<0: logP = -ttconf.BIG_NUMBER else: Qt = self.expQt(t) if len(Qt.shape)==3: # site specific GTR model res = np.einsum('ai,ija,aj->a', profile_pair[1], Qt, profile_pair[0]) else: res = np.einsum('ai,ij,aj->a', profile_pair[1], Qt, profile_pair[0]) if ignore_gaps and (self.gap_index is not None): # calculate the probability that neither outgroup/node has a gap non_gap_frac = (1-profile_pair[0][:,self.gap_index])*(1-profile_pair[1][:,self.gap_index]) # weigh log LH by the non-gap probability logP = np.sum(multiplicity*np.log(res+ttconf.SUPERTINY_NUMBER)*non_gap_frac) else: logP = np.sum(multiplicity*np.log(res+ttconf.SUPERTINY_NUMBER)) return logP if return_log else np.exp(logP) def propagate_profile(self, profile, t, return_log=False): """ Compute the probability of the sequence state of the parent at time (t+t0, backwards), given the sequence state of the child (profile) at time t0. Parameters ---------- profile : numpy.array Sequence profile. Shape = (L, a), where L - sequence length, a - alphabet size. t : double Time to propagate return_log: bool If True, return log-probability Returns ------- res : np.array Profile of the sequence after time t in the past. Shape = (L, a), where L - sequence length, a - alphabet size. """ Qt = self.expQt(t) res = profile.dot(Qt) return np.log(res) if return_log else res def evolve(self, profile, t, return_log=False): """ Compute the probability of the sequence state of the child at time t later, given the parent profile. Parameters ---------- profile : numpy.array Sequence profile. Shape = (L, a), where L - sequence length, a - alphabet size. t : double Time to propagate return_log: bool If True, return log-probability Returns ------- res : np.array Profile of the sequence after time t in the future. Shape = (L, a), where L - sequence length, a - alphabet size. """ Qt = self.expQt(t).T res = profile.dot(Qt) return np.log(res) if return_log else res def _exp_lt(self, t): """ Parameters ---------- t : float time to propagate Returns -------- exp_lt : numpy.array Array of values exp(lambda(i) * t), where (i) - alphabet index (the eigenvalue number). """ return np.exp(self.mu * t * self.eigenvals) def expQt(self, t): ''' Parameters ---------- t : float Time to propagate Returns -------- expQt : numpy.array Matrix exponential of exo(Qt) ''' eLambdaT = np.diag(self._exp_lt(t)) # vector length = a Qs = self.v.dot(eLambdaT.dot(self.v_inv)) # This is P(nuc1 | given nuc_2) return np.maximum(0,Qs) def expQs(self, s): return self.expQt(s**2) def expQsds(self, s): r''' Returns ------- Qtds : Returns 2 V_{ij} \lambda_j s e^{\lambda_j s**2 } V^{-1}_{jk} This is the derivative of the branch probability with respect to s=\sqrt(t) ''' lambda_eLambdaT = np.diag(2.0*self._exp_lt(s**2)*self.eigenvals*s) return self.v.dot(lambda_eLambdaT.dot(self.v_inv)) def sequence_logLH(self,seq, pattern_multiplicity=None): """ Returns the log-likelihood of sampling a sequence from equilibrium frequency. Expects a sequence as numpy array Parameters ---------- seq : numpy array Compressed sequence as an array of chars pattern_multiplicity : numpy_array The number of times each position in sequence is observed in the initial alignment. If None, sequence is assumed to be not compressed """ if pattern_multiplicity is None: pattern_multiplicity = np.ones_like(seq, dtype=float) return np.sum([np.sum((seq==state)*pattern_multiplicity*np.log(self.Pi[si])) for si,state in enumerate(self.alphabet)]) def average_rate(self): return self.mu*avg_transition(self.W, self.Pi, gap_index=self.gap_index) def save_to_npz(self, outfile): full_gtr = self.mu * np.dot(self.Pi, self.W) desc=np.array(["GTR matrix description\n", "Substitution rate: " + str(self.mu)]) np.savez(outfile, description=desc, full_gtr=full_gtr, char_dist=self.Pi, flow_matrix=self.W) if __name__ == "__main__": pass treetime-0.8.6/treetime/gtr_site_specific.py000066400000000000000000000372741417362145000212400ustar00rootroot00000000000000from __future__ import division, print_function, absolute_import from collections import defaultdict import numpy as np from . import config as ttconf from .seq_utils import alphabets, profile_maps, alphabet_synonyms from .gtr import GTR class GTR_site_specific(GTR): """ Defines General-Time-Reversible model of character evolution that allows for different models at different sites in the alignment """ def __init__(self, seq_len=1, approximate=True, **kwargs): """constructor for site specfic GTR models Parameters ---------- seq_len : int, optional number of sites, determines dimensions of frequency vectors etc approximate : bool, optional use linear interpolation for exponentiated matrices to speed up calcuations **kwargs Description """ self.seq_len=seq_len self.approximate = approximate super(GTR_site_specific, self).__init__(**kwargs) self.is_site_specific=True @property def Q(self): """function that return the product of the transition matrix and the equilibrium frequencies to obtain the rate matrix of the GTR model """ tmp = np.einsum('ia,ij->ija', self.Pi, self.W) diag_vals = np.sum(tmp, axis=0) for x in range(tmp.shape[-1]): np.fill_diagonal(tmp[:,:,x], -diag_vals[:,x]) return tmp def assign_rates(self, mu=1.0, pi=None, W=None): """ Overwrite the GTR model given the provided data Parameters ---------- mu : float Substitution rate W : nxn matrix Substitution matrix pi : n vector Equilibrium frequencies """ if not np.isscalar(mu) and pi is not None and len(pi.shape)==2: if mu.shape[0]!=pi.shape[1]: raise ValueError("GTR_site_specific: length of rate vector (got {}) and equilibrium frequency vector (got {}) must match!".format(mu.shape[0], pi.shape[1])) n = len(self.alphabet) if np.isscalar(mu): self._mu = mu*np.ones(self.seq_len) else: self._mu = np.copy(mu) self.seq_len = mu.shape[0] if pi is not None and pi.shape[0]==n and len(pi.shape)==2: self.seq_len = pi.shape[1] Pi = np.copy(pi) else: if pi is not None: if len(pi)==n: Pi = np.repeat([pi], self.seq_len, axis=0).T else: raise ValueError("GTR_site_specific: length of equilibrium frequency vector (got {}) does not match alphabet length {}".format(len(pi), n)) else: Pi = np.ones(shape=(n,self.seq_len)) self._Pi = Pi/np.sum(Pi, axis=0) if W is None or W.shape!=(n,n): if (W is not None) and W.shape!=(n,n): raise ValueError("GTR_site_specific: Size of substitution matrix (got {}) does not match alphabet length {}".format(W.shape, n)) W = np.ones((n,n)) np.fill_diagonal(W, 0.0) np.fill_diagonal(W, - W.sum(axis=0)) else: W=0.5*(np.copy(W)+np.copy(W).T) np.fill_diagonal(W,0) average_rate = np.einsum('ia,ij,ja',self.Pi, W, self.Pi)/self.seq_len # average_rate = W.dot(avg_pi).dot(avg_pi) self._W = W/average_rate self._mu *=average_rate self.is_site_specific=True self._eig() self._make_expQt_interpolator() @classmethod def random(cls, L=1, avg_mu=1.0, alphabet='nuc', pi_dirichlet_alpha=1, W_dirichlet_alpha=3.0, mu_gamma_alpha=3.0): """ Creates a random GTR model Parameters ---------- L : int, optional number of sites for which to generate a model avg_mu : float Substitution rate alphabet : str Alphabet name (should be standard: 'nuc', 'nuc_gap', 'aa', 'aa_gap') pi_dirichlet_alpha : float, optional parameter of dirichlet distribution W_dirichlet_alpha : float, optional parameter of dirichlet distribution mu_gamma_alpha : float, optional parameter of dirichlet distribution Returns ------- GTR_site_specific model with randomly sampled frequencies """ from scipy.stats import gamma alphabet=alphabets[alphabet] gtr = cls(alphabet=alphabet, seq_len=L) n = gtr.alphabet.shape[0] # Dirichlet distribution == l_1 normalized vector of samples of the Gamma distribution if pi_dirichlet_alpha: pi = 1.0*gamma.rvs(pi_dirichlet_alpha, size=(n,L)) else: pi = np.ones((n,L)) pi /= pi.sum(axis=0) if W_dirichlet_alpha: tmp = 1.0*gamma.rvs(W_dirichlet_alpha, size=(n,n)) else: tmp = np.ones((n,n)) tmp = np.tril(tmp,k=-1) W = tmp + tmp.T if mu_gamma_alpha: mu = gamma.rvs(mu_gamma_alpha, size=(L,)) else: mu = np.ones(L) gtr.assign_rates(mu=mu, pi=pi, W=W) gtr.mu *= avg_mu/np.mean(gtr.average_rate()) return gtr @classmethod def custom(cls, mu=1.0, pi=None, W=None, **kwargs): """ Create a GTR model by specifying the matrix explicitly Parameters ---------- mu : float Substitution rate W : nxn matrix Substitution matrix pi : n vector Equilibrium frequencies **kwargs: Key word arguments to be passed to the constructor Keyword Args ------------ alphabet : str Specify alphabet when applicable. If the alphabet specification is required, but no alphabet is specified, the nucleotide alphabet will be used as default. """ gtr = cls(**kwargs) gtr.assign_rates(mu=mu, pi=pi, W=W) return gtr @classmethod def infer(cls, sub_ija, T_ia, root_state, pc=1.0, gap_limit=0.01, Nit=30, dp=1e-5, **kwargs): r""" Infer a GTR model by specifying the number of transitions and time spent in each character. The basic equation that is being solved is :math:`n_{ij} = pi_i W_{ij} T_j` where :math:`n_{ij}` are the transitions, :math:`pi_i` are the equilibrium state frequencies, :math:`W_{ij}` is the "substitution attempt matrix", while :math:`T_i` is the time on the tree spent in character state :math:`i`. To regularize the process, we add pseudocounts and also need to account for the fact that the root of the tree is in a particular state. the modified equation is :math:`n_{ij} + pc = pi_i W_{ij} (T_j+pc+root\_state)` Parameters ---------- nija : nxn matrix The number of times a change in character state is observed between state j and i at position a Tia :n vector The time spent in each character state at position a root_state : np.array probability that site a is in state i. pc : float Pseudocounts, this determines the lower cutoff on the rate when no substitutions are observed **kwargs: Key word arguments to be passed Keyword Args ------------ alphabet : str Specify alphabet when applicable. If the alphabet specification is required, but no alphabet is specified, the nucleotide alphabet will be used as default. """ from scipy import linalg as LA gtr = cls(**kwargs) gtr.logger("GTR: model inference ",1) q = len(gtr.alphabet) L = sub_ija.shape[-1] n_iter = 0 n_ija = np.copy(sub_ija) n_ija[range(q),range(q),:] = 0 n_ij = n_ija.sum(axis=-1) m_ia = np.sum(n_ija,axis=1) + root_state + pc n_a = n_ija.sum(axis=1).sum(axis=0) + pc Lambda = np.sum(root_state,axis=0) + q*pc p_ia_old=np.zeros((q,L)) p_ia = np.ones((q,L))/q mu_a = np.ones(L) W_ij = np.ones((q,q)) - np.eye(q) while (LA.norm(p_ia_old-p_ia)>dp) and n_itera', p_ia, W_ij, T_ia)) if n_iter >= Nit: gtr.logger('WARNING: maximum number of iterations has been reached in GTR inference',3, warn=True) if LA.norm(p_ia_old-p_ia) > dp: gtr.logger('the iterative scheme has not converged',3,warn=True) if gtr.gap_index is not None: for p in range(p_ia.shape[-1]): if p_ia[gtr.gap_index,p]2: W = np.copy(self.W[:,:,pi]) np.fill_diagonal(W, 0) elif pi==0: np.fill_diagonal(self.W, 0) W=self.W ev, evec, evec_inv = self._eig_single_site(W,self.Pi[:,pi]) eigvals.append(ev) vec.append(evec) vec_inv.append(evec_inv) self.eigenvals = np.array(eigvals).T self.v = np.swapaxes(vec,0,-1) self.v_inv = np.swapaxes(vec_inv, 0,-1) def _make_expQt_interpolator(self): """Function that evaluates the exponentiated substitution matrix at multiple time points and constructs a linear interpolation object """ self.rate_scale = self.average_rate().mean() t_grid = (1.0/self.rate_scale)*np.concatenate((np.linspace(0,.1,11)[:-1], np.linspace(.1,1,21)[:-1], np.linspace(1,5,21)[:-1], np.linspace(5,10,11))) stacked_expQT = np.stack([self._expQt(t) for t in t_grid], axis=0) from scipy.interpolate import interp1d self.expQt_interpolator = interp1d(t_grid, stacked_expQT, axis=0, assume_sorted=True, copy=False, kind='linear') def _expQt(self, t): """Raw numerical matrix exponentiation using the diagonalized matrix. This is the computational bottleneck in many simulations. Parameters ---------- t : float time Returns ------- np.array stack of matrices for each site """ eLambdaT = np.exp(t*self.mu*self.eigenvals) return np.einsum('jia,ja,kja->ika', self.v, eLambdaT, self.v_inv) def expQt(self, t): if t*self.rate_scale<10 and self.approximate: return self.expQt_interpolator(t) else: return self._expQt(t) def prop_t_compressed(self, seq_pair, multiplicity, t, return_log=False): print("NOT IMPEMENTED") def propagate_profile(self, profile, t, return_log=False): """ Compute the probability of the sequence state of the parent at time (t+t0, backwards), given the sequence state of the child (profile) at time t0. Parameters ---------- profile : numpy.array Sequence profile. Shape = (L, a), where L - sequence length, a - alphabet size. t : double Time to propagate return_log: bool If True, return log-probability Returns ------- ` res : np.array Profile of the sequence after time t in the past. Shape = (L, a), where L - sequence length, a - alphabet size. """ Qt = self.expQt(t) res = np.einsum('ai,ija->aj', profile, Qt) return np.log(np.maximum(ttconf.TINY_NUMBER,res)) if return_log else np.maximum(0,res) def evolve(self, profile, t, return_log=False): """ Compute the probability of the sequence state of the child at time t later, given the parent profile. Parameters ---------- profile : numpy.array Sequence profile. Shape = (L, a), where L - sequence length, a - alphabet size. t : double Time to propagate return_log: bool If True, return log-probability Returns ------- res : np.array Profile of the sequence after time t in the future. Shape = (L, a), where L - sequence length, a - alphabet size. """ Qt = self.expQt(t) res = np.einsum('ai,jia->aj', profile, Qt) return np.log(res) if return_log else res def prob_t(self, seq_p, seq_ch, t, pattern_multiplicity = None, return_log=False, ignore_gaps=True): """ Compute the probability to observe seq_ch (child sequence) after time t starting from seq_p (parent sequence). Parameters ---------- seq_p : character array Parent sequence seq_c : character array Child sequence t : double Time (branch len) separating the profiles. pattern_multiplicity : numpy array If sequences are reduced by combining identical alignment patterns, these multplicities need to be accounted for when counting the number of mutations across a branch. If None, all pattern are assumed to occur exactly once. return_log : bool It True, return log-probability. Returns ------- prob : np.array Resulting probability """ if t<0: logP = -ttconf.BIG_NUMBER else: tmp_eQT = self.expQt(t) bad_indices=(tmp_eQT==0) logQt = np.log(tmp_eQT + ttconf.TINY_NUMBER*(bad_indices)) logQt[np.isnan(logQt) | np.isinf(logQt) | bad_indices] = -ttconf.BIG_NUMBER seq_indices_c = np.zeros(len(seq_ch), dtype=int) seq_indices_p = np.zeros(len(seq_p), dtype=int) for ai, a in enumerate(self.alphabet): seq_indices_p[seq_p==a] = ai seq_indices_c[seq_ch==a] = ai if len(logQt.shape)==2: logP = np.sum(logQt[seq_indices_p, seq_indices_c]*pattern_multiplicity) else: logP = np.sum(logQt[seq_indices_p, seq_indices_c, np.arange(len(seq_ch))]*pattern_multiplicity) return logP if return_log else np.exp(logP) def average_rate(self): if self.Pi.shape[1]>1: return np.einsum('a,ia,ij,ja->a',self.mu, self.Pi, self.W, self.Pi) else: return self.mu*np.einsum('ia,ij,ja->a',self.Pi, self.W, self.Pi) treetime-0.8.6/treetime/merger_models.py000066400000000000000000000304531417362145000203670ustar00rootroot00000000000000""" methods to calculate merger models for a time tree """ from __future__ import print_function, division, absolute_import import numpy as np import scipy.special as sf from scipy.interpolate import interp1d from Bio import AlignIO, Phylo try: from collections.abc import Iterable except ImportError: from collections import Iterable from . import config as ttconf from .utils import clip class Coalescent(object): """docstring for Coalescent""" def __init__(self, tree, Tc=0.001, logger=None, date2dist=None): super(Coalescent, self).__init__() self.tree = tree self.calc_branch_count() self.set_Tc(Tc) self.date2dist = date2dist if logger is None: def f(*args): print(*args) self.logger = f else: self.logger = logger def set_Tc(self, Tc, T=None): ''' initialize the merger model with a coalescent time Args: - Tc: a float or an iterable, if iterable another argument T of same shape is required - T: an array like of same shape as Tc that specifies the time pivots corresponding to Tc note that this array is ordered past to present corresponding to decreasing 'time before present' values Returns: - None ''' if isinstance(Tc, Iterable): if len(Tc)==len(T): x = np.concatenate(([ttconf.BIG_NUMBER], T, [-ttconf.BIG_NUMBER])) y = np.concatenate(([Tc[0]], Tc, [Tc[-1]])) self.Tc = interp1d(x,y) else: self.logger("need Tc values and Timepoints of equal length",2,warn=True) self.Tc = interp1d([-ttconf.BIG_NUMBER, ttconf.BIG_NUMBER], [1e-5, 1e-5]) else: self.Tc = interp1d([-ttconf.BIG_NUMBER, ttconf.BIG_NUMBER], [Tc+ttconf.TINY_NUMBER, Tc+ttconf.TINY_NUMBER]) self.calc_integral_merger_rate() def calc_branch_count(self): ''' calculates an interpolation object that maps time to the number of concurrent branches in the tree. The result is stored in self.nbranches ''' # make a list of (time, merger or loss event) by root first iteration self.tree_events = np.array(sorted([(n.time_before_present, len(n.clades)-1) for n in self.tree.find_clades() if not n.bad_branch], key=lambda x:-x[0])) # collapse multiple events at one time point into sum of changes from collections import defaultdict dn_branch = defaultdict(int) for (t, dn) in self.tree_events: dn_branch[t]+=dn unique_mergers = np.array(sorted(dn_branch.items(), key = lambda x:-x[0])) # calculate the branch count at each point summing the delta branch counts nbranches = [[ttconf.BIG_NUMBER, 1], [unique_mergers[0,0]+ttconf.TINY_NUMBER, 1]] for ti, (t, dn) in enumerate(unique_mergers[:-1]): new_n = nbranches[-1][1]+dn next_t = unique_mergers[ti+1,0]+ttconf.TINY_NUMBER nbranches.append([t, new_n]) nbranches.append([next_t, new_n]) new_n += unique_mergers[-1,1] nbranches.append([next_t, new_n]) nbranches.append([-ttconf.BIG_NUMBER, new_n]) nbranches=np.array(nbranches) self.nbranches = interp1d(nbranches[:,0], nbranches[:,1], kind='linear') def calc_integral_merger_rate(self): ''' calculates the integral int_0^t (k(t')-1)/2Tc(t') dt' and stores it as self.integral_merger_rate. This differences of this quantity evaluated at different times points are the cost of a branch. ''' # integrate the piecewise constant branch count function. tvals = np.unique(self.nbranches.x[1:-1]) rate = self.branch_merger_rate(tvals) avg_rate = 0.5*(rate[1:] + rate[:-1]) cost = np.concatenate(([0],np.cumsum(np.diff(tvals)*avg_rate))) # make interpolation objects for the branch count and its integral # the latter is scaled by 0.5/Tc # need to add extra point at very large time before present to # prevent 'out of interpolation range' errors self.integral_merger_rate = interp1d(np.concatenate(([-ttconf.BIG_NUMBER], tvals,[ttconf.BIG_NUMBER])), np.concatenate(([cost[0]], cost,[cost[-1]])), kind='linear') def branch_merger_rate(self, t): # returns the rate at which one particular branch merges with any other branch # note that we always have a positive merger rate by capping the # number of branches at 0.5 from below. in these regions, the # function should only be called if the tree changes. return 0.5*np.maximum(0.5,self.nbranches(t)-1.0)/self.Tc(t) def total_merger_rate(self, t): # returns the rate at which any branch merges with any other branch # not that we always have a positive merger rate by capping the # number of branches at 0.5 from below. in these regions, the # function should only be called if the tree changes. nlineages = np.maximum(0.5,self.nbranches(t)-1.0) return 0.5*nlineages*(nlineages+1)/self.Tc(t) def cost(self, t_node, branch_length, multiplicity=2.0): ''' returns the cost associated with a branch starting at t_node t_node is time before present, the branch goes back in time Args: - t_node: time of the node - branch_length: branch length, determines when this branch merges with sister - multiplicity: 2 if merger is binary, higher if this is a polytomy ''' merger_time = t_node+branch_length return self.integral_merger_rate(merger_time) - self.integral_merger_rate(t_node)\ - np.log(self.total_merger_rate(merger_time))*(multiplicity-1.0)/multiplicity def attach_to_tree(self): ''' attaches the the merger cost to each branch length interpolator in the tree. ''' for clade in self.tree.find_clades(): if clade.up is not None: clade.branch_length_interpolator.merger_cost = self.cost def total_LH(self): LH = 0.0 #np.log(self.total_merger_rate([node.time_before_present for node in self.tree.get_nonterminals()])).sum() for node in self.tree.find_clades(): if node.up: LH -= self.cost(node.time_before_present, node.branch_length) return LH def optimize_Tc(self): ''' determines the coalescent time scale that optimizes the coalescent likelihood of the tree ''' from scipy.optimize import minimize_scalar initial_Tc = self.Tc def cost(Tc): self.set_Tc(Tc) return -self.total_LH() sol = minimize_scalar(cost, bounds=[ttconf.TINY_NUMBER,10.0]) if "success" in sol and sol["success"]: self.set_Tc(sol['x']) else: self.logger("merger_models:optimize_Tc: optimization of coalescent time scale failed: " + str(sol), 0, warn=True) self.set_Tc(initial_Tc.y, T=initial_Tc.x) def optimize_skyline(self, n_points=20, stiffness=2.0, method = 'SLSQP', tol=0.03, regularization=10.0, **kwarks): ''' optimize the trajectory of the merger rate 1./T_c to maximize the coalescent likelihood. parameters: n_points -- number of pivots of the Tc interpolation object stiffness -- penalty for rapid changes in log(Tc) methods -- method used to optimize tol -- optimization tolerance regularization -- cost of moving logTc outsize of the range [-100,0] merger rate is measured in branch length units, no plausible rates should never be outside this window ''' self.logger("Coalescent:optimize_skyline:... current LH: %f"%self.total_LH(),2) from scipy.optimize import minimize initial_Tc = self.Tc tvals = np.linspace(self.tree_events[0,0], self.tree_events[-1,0], n_points) def cost(logTc): # cap log Tc to avoid under or overflow and nan in logs self.set_Tc(np.exp(clip(logTc, -200, 100)), tvals) neglogLH = -self.total_LH() + stiffness*np.sum(np.diff(logTc)**2) \ + np.sum((logTc>0)*logTc)*regularization\ - np.sum((logTc<-100)*logTc)*regularization return neglogLH sol = minimize(cost, np.ones_like(tvals)*np.log(self.Tc.y.mean()), method=method, tol=tol) if "success" in sol and sol["success"]: dlogTc = 0.1 opt_logTc = sol['x'] dcost = [] for ii in range(len(opt_logTc)): tmp = opt_logTc.copy() tmp[ii]+=dlogTc cost_plus = cost(tmp) tmp[ii]-=2*dlogTc cost_minus = cost(tmp) dcost.append([cost_minus, cost_plus]) dcost = np.array(dcost) optimal_cost = cost(opt_logTc) self.confidence = dlogTc/np.sqrt(np.abs(2*optimal_cost - dcost[:,0] - dcost[:,1])) self.logger("Coalescent:optimize_skyline:...done. new LH: %f"%self.total_LH(),2) else: self.set_Tc(initial_Tc.y, T=initial_Tc.x) self.confidence = [np.nan for i in initial_Tc.x] self.logger("Coalescent:optimize_skyline:...failed:"+str(sol),0, warn=True) def skyline_empirical(self, gen=1.0, n_points = 20): ''' returns the skyline, i.e., an estimate of the inverse rate of coalesence. Here, the skyline is estimated from a sliding window average of the observed mergers, i.e., without reference to the coalescence likelihood. parameters: gen -- number of generations per year. ''' mergers = self.tree_events[:,1]>0 merger_tvals = self.tree_events[mergers,0] nlineages = self.nbranches(merger_tvals-ttconf.TINY_NUMBER) expected_merger_density = nlineages*(nlineages-1)*0.5 nmergers = len(mergers) et = merger_tvals ev = 1.0/expected_merger_density # reduce the window size if there are few events in the tree if 2*n_points>len(expected_merger_density): n_points = len(ev)//4 # smoothes with a sliding window over data points avg = np.sum(ev)/np.abs(et[0]-et[-1]) dt = et[0]-et[-1] mid_points = np.concatenate(([et[0]-0.5*(et[1]-et[0])], 0.5*(et[1:] + et[:-1]), [et[-1]+0.5*(et[-1]-et[-2])])) # this smoothes the ratio of expected and observed merger rate self.Tc_inv = interp1d(mid_points[n_points:-n_points], [np.sum(ev[(et>=l)&(etg.xmin and t-tauf.xmin tau_max = min(t_val - f.xmin, g.xmax) else: ## tau>g.xmin and t+tau>f.xmin tau_min = max(f.xmin-t_val, g.xmin) ## tau4*center_width: grid_right = grid_center[-1] + right_range*(np.linspace(0, 1, n)**2.0) elif right_range>0: # use linear grid the right_range is comparable to center_width grid_right = grid_center[-1] + right_range*np.linspace(0,1, int(min(n,1+0.5*n*right_range/center_width))) else: grid_right =[] left_range = grid_center[0]-tmin if left_range>4*center_width: grid_left = tmin + left_range*(np.linspace(0, 1, n)**2.0) elif left_range>0: grid_left = tmin + left_range*np.linspace(0,1, int(min(n,1+0.5*n*left_range/center_width))) else: grid_left =[] if tmin>-1: grid_zero_left = tmin + (tmax-tmin)*np.linspace(0,0.01,11)**2 else: grid_zero_left = [tmin] if tmax<1: grid_zero_right = tmax - (tmax-tmin)*np.linspace(0,0.01,11)**2 else: grid_zero_right = [tmax] # make grid and calculate convolution t_grid_0 = np.unique(np.concatenate([grid_zero_left, grid_left[:-1], grid_center, grid_right[1:], grid_zero_right])) t_grid_0 = t_grid_0[(t_grid_0 > tmin-ttconf.TINY_NUMBER) & (t_grid_0 < tmax+ttconf.TINY_NUMBER)] # res0 - the values of the convolution (integral or max) # t_0 - the value, at which the res0 achieves maximum # (when determining the maximum of the integrand, otherwise meaningless) res_0, t_0 = np.array([conv_in_point(t_val) for t_val in t_grid_0]).T # refine grid as necessary and add new points # calculate interpolation error at all internal points [2:-2] bc end points are sometime off scale interp_error = np.abs(res_0[3:-1]+res_0[1:-3]-2*res_0[2:-2]) # determine the number of extra points needed, criterion depends on distance from peak dy dy = (res_0[2:-2]-res_0.min()) dx = np.diff(t_grid_0) refine_factor = np.minimum(np.minimum(np.array(np.floor(np.sqrt(interp_error/(rel_tol*(1+(dy/yc)**4)))), dtype=int), np.array(100*(dx[1:-2]+dx[2:-1])/min_fwhm, dtype=int)), 10) insert_point_idx = np.zeros(interp_error.shape[0]+1, dtype=int) insert_point_idx[1:] = refine_factor insert_point_idx[:-1] += refine_factor # add additional points if there are any to add if np.sum(insert_point_idx): add_x = np.concatenate([np.linspace(t1,t2,n+2)[1:-1] for t1,t2,n in zip(t_grid_0[1:-2], t_grid_0[2:-1], insert_point_idx) if n>0]) # calculate convolution at these points add_y, add_t = np.array([conv_in_point(t_val) for t_val in add_x]).T t_grid_0 = np.concatenate((t_grid_0, add_x)) res_0 = np.concatenate ((res_0, add_y)) t_0 = np.concatenate ((t_0, add_t)) # instantiate the new interpolation object and return res_y = cls(t_grid_0, res_0, is_log=True, kind='linear') # the interpolation object, which is used to store the value of the # grid, which maximizes the convolution (for 'max' option), # or flat -1 distribution (for 'integral' option) # this grid is the optimal branch length res_t = Distribution(t_grid_0, t_0, is_log=True, min_width=node_interp.min_width, kind='linear') return res_y, res_t treetime-0.8.6/treetime/nuc_models.py000066400000000000000000000162561417362145000177000ustar00rootroot00000000000000#!/usr/local/bin/python # -*- coding: utf-8 -*- from __future__ import division, print_function, absolute_import import numpy as np from .seq_utils import alphabets, profile_maps from .gtr import GTR def get_alphabet(a): if type(a)==str and a in alphabets: return alphabets[a] else: try: return np.array(a) except: raise TypeError def JC69 (mu=1.0, alphabet="nuc", **kwargs): """ Jukes-Cantor 1969 model. This model assumes equal concentrations of the nucleotides and equal transition rates between nucleotide states. For more info, see: Jukes and Cantor (1969). Evolution of Protein Molecules. New York: Academic Press. pp. 21–132 Parameters ----------- mu : float substitution rate alphabet : str or character array specify alphabet to use. Available alphabets are: 'nuc_nogap' - nucleotides only, gaps ignored 'nuc' - nucleotide alphabet with gaps, gaps can be ignored optionally """ num_chars = len(get_alphabet(alphabet)) W, pi = np.ones((num_chars,num_chars)), np.ones(num_chars) gtr = GTR(alphabet=alphabet) gtr.assign_rates(mu=mu, pi=pi, W=W) return gtr def K80(mu=1., kappa=0.1, **kwargs): """ Kimura 1980 model. Assumes equal concentrations across nucleotides, but allows different rates between transitions and transversions. The ratio of the transversion/transition rates is given by kappa parameter. For more info, see Kimura (1980), J. Mol. Evol. 16 (2): 111–120. doi:10.1007/BF01731581. Current implementation of the model does not account for the gaps. Parameters ----------- mu : float Overall substitution rate kappa : float Ratio of transversion/transition rates """ num_chars = len(alphabets['nuc_nogap']) pi = np.ones(len(alphabets['nuc_nogap']), dtype=float)/len(alphabets['nuc_nogap']) W = _create_transversion_transition_W(kappa) gtr = GTR(alphabet=alphabets['nuc_nogap']) gtr.assign_rates(mu=mu, pi=pi, W=W) return gtr def F81(mu=1.0, pi=None, alphabet="nuc", **kwargs): """ Felsenstein 1981 model. Assumes non-equal concentrations across nucleotides, but the transition rate between all states is assumed to be equal. See Felsenstein (1981), J. Mol. Evol. 17 (6): 368–376. doi:10.1007/BF01734359 for details. Current implementation of the model does not account for the gaps (treatment of gaps as characters is possible if specify alphabet='nuc_gap'). Parameters ----------- mu : float Substitution rate pi : numpy.array Nucleotide concentrations alphabet : str Alphabet to use. POsiible values are: ['nuc', 'nuc_gap'] Default 'nuc', which discounts al gaps. 'nuc_gap' alphabet enables treatmen of gaps as characters. """ if pi is None: pi=0.25*np.ones(4, dtype=float) num_chars = len(get_alphabet(alphabet)) pi = np.array(pi, dtype=float) if num_chars != len(pi) : pi = np.ones((num_chars, ), dtype=float) print ("GTR: Warning!The number of the characters in the alphabet does not match the " "shape of the vector of equilibrium frequencies Pi -- assuming equal frequencies for all states.") W = np.ones((num_chars,num_chars)) pi /= (1.0 * np.sum(pi)) gtr = GTR(alphabet=get_alphabet(alphabet)) gtr.assign_rates(mu=mu, pi=pi, W=W) return gtr def HKY85(mu=1.0, pi=None, kappa=0.1, **kwargs): """ Hasegawa, Kishino and Yano 1985 model. Allows different concentrations of the nucleotides (as in F81) + distinguishes between transition/transversionsubstitutions (similar to K80). Link: Hasegawa, Kishino, Yano (1985), J. Mol. Evol. 22 (2): 160–174. doi:10.1007/BF02101694 Current implementation of the model does not account for the gaps Parameters ----------- mu : float Substitution rate pi : numpy.array Nucleotide concentrations kappa : float Ratio of transversion/transition substitution rates """ if pi is None: pi=0.25*np.ones(4, dtype=float) num_chars = len(alphabets['nuc_nogap']) if num_chars != pi.shape[0] : pi = np.ones((num_chars, ), dtype=float) print ("GTR: Warning!The number of the characters in the alphabet does not match the " "shape of the vector of equilibrium frequencies Pi -- assuming equal frequencies for all states.") W = _create_transversion_transition_W(kappa) pi /= pi.sum() gtr = GTR(alphabet=alphabets['nuc_nogap']) gtr.assign_rates(mu=mu, pi=pi, W=W) return gtr def T92(mu=1.0, pi_GC=0.5, kappa=0.1, **kwargs): """ Tamura 1992 model. Extending Kimura (1980) model for the case where a G+C-content bias exists. Link: Tamura K (1992), Mol. Biol. Evol. 9 (4): 678–687. DOI: 10.1093/oxfordjournals.molbev.a040752 Current implementation of the model does not account for the gaps Parameters ----------- mu : float substitution rate pi_GC : float relative GC content kappa : float relative transversion/transition rate """ W = _create_transversion_transition_W(kappa) # A C G T if pi_GC >=1.: raise ValueError("The relative GC content specified is larger than 1.0!") pi = np.array([(1.-pi_GC)*0.5, pi_GC*0.5, pi_GC*0.5, (1-pi_GC)*0.5]) gtr = GTR(alphabet=alphabets['nuc_nogap']) gtr.assign_rates(mu=mu, pi=pi, W=W) return gtr def TN93(mu=1.0, kappa1=1., kappa2=1., pi=None, **kwargs): """ Tamura and Nei 1993. The model distinguishes between the two different types of transition: (A <-> G) is allowed to have a different rate to (C<->T). Transversions have the same rate. The frequencies of the nucleotides are allowed to be different. Link: Tamura, Nei (1993), MolBiol Evol. 10 (3): 512–526. DOI:10.1093/oxfordjournals.molbev.a040023 Parameters ----------- mu : float Substitution rate kappa1 : float relative A<-->C, A<-->T, T<-->G and G<-->C rates kappa2 : float relative C<-->T rate Note ---- Rate of A<-->G substitution is set to one. All other rates (kappa1, kappa2) are specified relative to this rate """ if pi is None: pi=0.25*np.ones(4, dtype=float) W = np.ones((4,4)) W = np.array([ [1, kappa1, 1, kappa1], [kappa1, 1, kappa1, kappa2], [1, kappa1, 1, kappa1], [kappa1, kappa2, kappa1, 1]], dtype=float) pi /=pi.sum() num_chars = len(alphabets['nuc_nogap']) if num_chars != pi.shape[0] : pi = np.ones((num_chars, ), dtype=float) print ("GTR: Warning!The number of the characters in the alphabet does not match the " "shape of the vector of equilibrium frequencies Pi -- assuming equal frequencies for all states.") gtr = GTR(alphabet=alphabets['nuc']) gtr.assign_rates(mu=mu, pi=pi, W=W) return gtr def _create_transversion_transition_W(kappa): """ Alphabet = [A, C, G, T] """ W = np.ones((4,4)) W[0, 2]=W[1, 3]=W[2, 0]=W[3,1]=kappa return W if __name__ == '__main__': pass treetime-0.8.6/treetime/seq_utils.py000066400000000000000000000333041417362145000175510ustar00rootroot00000000000000import numpy as np from Bio import Seq, SeqRecord alphabet_synonyms = {'nuc':'nuc', 'nucleotide':'nuc', 'aa':'aa', 'aminoacid':'aa', 'nuc_nogap':'nuc_nogap', 'nucleotide_nogap':'nuc_nogap', 'aa_nogap':'aa_nogap', 'aminoacid_nogap':'aa_nogap', 'DNA':'nuc', 'DNA_nogap':'nuc_nogap'} alphabets = { "nuc": np.array(['A', 'C', 'G', 'T', '-']), "nuc_nogap":np.array(['A', 'C', 'G', 'T']), "aa": np.array(['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y', '*', '-']), "aa_nogap": np.array(['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']) } profile_maps = { 'nuc':{ 'A': np.array([1, 0, 0, 0, 0], dtype='float'), 'C': np.array([0, 1, 0, 0, 0], dtype='float'), 'G': np.array([0, 0, 1, 0, 0], dtype='float'), 'T': np.array([0, 0, 0, 1, 0], dtype='float'), '-': np.array([0, 0, 0, 0, 1], dtype='float'), 'N': np.array([1, 1, 1, 1, 1], dtype='float'), 'X': np.array([1, 1, 1, 1, 1], dtype='float'), 'R': np.array([1, 0, 1, 0, 0], dtype='float'), 'Y': np.array([0, 1, 0, 1, 0], dtype='float'), 'S': np.array([0, 1, 1, 0, 0], dtype='float'), 'W': np.array([1, 0, 0, 1, 0], dtype='float'), 'K': np.array([0, 0, 1, 1, 0], dtype='float'), 'M': np.array([1, 1, 0, 0, 0], dtype='float'), 'D': np.array([1, 0, 1, 1, 0], dtype='float'), 'H': np.array([1, 1, 0, 1, 0], dtype='float'), 'B': np.array([0, 1, 1, 1, 0], dtype='float'), 'V': np.array([1, 1, 1, 0, 0], dtype='float') }, 'nuc_nogap':{ 'A': np.array([1, 0, 0, 0], dtype='float'), 'C': np.array([0, 1, 0, 0], dtype='float'), 'G': np.array([0, 0, 1, 0], dtype='float'), 'T': np.array([0, 0, 0, 1], dtype='float'), '-': np.array([1, 1, 1, 1], dtype='float'), # gaps are completely ignored in distance computations 'N': np.array([1, 1, 1, 1], dtype='float'), 'X': np.array([1, 1, 1, 1], dtype='float'), 'R': np.array([1, 0, 1, 0], dtype='float'), 'Y': np.array([0, 1, 0, 1], dtype='float'), 'S': np.array([0, 1, 1, 0], dtype='float'), 'W': np.array([1, 0, 0, 1], dtype='float'), 'K': np.array([0, 0, 1, 1], dtype='float'), 'M': np.array([1, 1, 0, 0], dtype='float'), 'D': np.array([1, 0, 1, 1], dtype='float'), 'H': np.array([1, 1, 0, 1], dtype='float'), 'B': np.array([0, 1, 1, 1], dtype='float'), 'V': np.array([1, 1, 1, 0], dtype='float') }, 'aa':{ 'A': np.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Alanine Ala 'C': np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Cysteine Cys 'D': np.array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Aspartic AciD Asp 'E': np.array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Glutamic Acid Glu 'F': np.array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Phenylalanine Phe 'G': np.array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Glycine Gly 'H': np.array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Histidine His 'I': np.array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Isoleucine Ile 'K': np.array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Lysine Lys 'L': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Leucine Leu 'M': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Methionine Met 'N': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #AsparagiNe Asn 'P': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Proline Pro 'Q': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Glutamine Gln 'R': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #ARginine Arg 'S': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], dtype='float'), #Serine Ser 'T': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], dtype='float'), #Threonine Thr 'V': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0], dtype='float'), #Valine Val 'W': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], dtype='float'), #Tryptophan Trp 'Y': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], dtype='float'), #Tyrosine Tyr '*': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], dtype='float'), #stop '-': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype='float'), #gap 'X': np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype='float'), #not specified/any 'B': np.array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Asparagine/Aspartic Acid Asx 'Z': np.array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Glutamine/Glutamic Acid Glx }, 'aa_nogap':{ 'A': np.array([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Alanine Ala 'C': np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Cysteine Cys 'D': np.array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Aspartic AciD Asp 'E': np.array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Glutamic Acid Glu 'F': np.array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Phenylalanine Phe 'G': np.array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Glycine Gly 'H': np.array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Histidine His 'I': np.array([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Isoleucine Ile 'K': np.array([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Lysine Lys 'L': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Leucine Leu 'M': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Methionine Met 'N': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #AsparagiNe Asn 'P': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Proline Pro 'Q': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], dtype='float'), #Glutamine Gln 'R': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], dtype='float'), #ARginine Arg 'S': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0], dtype='float'), #Serine Ser 'T': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], dtype='float'), #Threonine Thr 'V': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], dtype='float'), #Valine Val 'W': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], dtype='float'), #Tryptophan Trp 'Y': np.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], dtype='float'), #Tyrosine Tyr 'X': np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype='float'), #not specified/any 'B': np.array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], dtype='float'), #Asparagine/Aspartic Acid Asx 'Z': np.array([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], dtype='float'), #Glutamine/Glutamic Acid Glx } } def extend_profile(gtr, aln, logger=None): tmp_unique_chars = [] for seq in aln: tmp_unique_chars.extend(np.unique(seq)) unique_chars = np.unique(tmp_unique_chars) for c in unique_chars: if c not in gtr.profile_map: gtr.profile_map[c] = np.ones(gtr.n_states) if logger: logger("WARNING: character %s is unknown. Treating it as missing information"%c,1,warn=True) def guess_alphabet(aln): total=0 nuc_count = 0 for seq in aln: total += len(seq) for n in np.array(list('acgtACGT-N')): nuc_count += np.sum(seq==n) if nuc_count>0.9*total: return 'nuc' else: return 'aa' def seq2array(seq, word_length=1, convert_upper=False, fill_overhangs=False, ambiguous='N'): """ Take the raw sequence, substitute the "overhanging" gaps with 'N' (missequenced), and convert the sequence to the numpy array of chars. Parameters ---------- seq : Biopython.SeqRecord, str, iterable Sequence as an object of SeqRecord, string or iterable word_length : int, optional 1 for nucleotide or amino acids, 3 for codons etc. convert_upper : bool, optional convert the sequence to upper case fill_overhangs : bool If True, substitute the "overhanging" gaps with ambiguous character symbol ambiguous : char Specify the character for ambiguous state ('N' default for nucleotide) Returns ------- sequence : np.array Sequence as 1D numpy array of chars """ if isinstance(seq, str): seq_str = seq elif isinstance(seq, Seq.Seq): seq_str = str(seq) elif isinstance(seq, SeqRecord.SeqRecord): seq_str = str(seq.seq) else: raise TypeError("seq2array: sequence must be Bio.Seq, Bio.SeqRecord, or string. Got "+str(seq)) if convert_upper: seq_str = seq_str.upper() if word_length==1: seq_array = np.array(list(seq_str)) else: if len(seq_str)%word_length: raise ValueError("sequence length has to be multiple of word length") seq_array = np.array([seq_str[i*word_length:(i+1)*word_length] for i in range(len(seq_str)/word_length)]) # substitute overhanging unsequenced tails if fill_overhangs: gaps = np.where(seq_array != '-')[0] if len(gaps): seq_array[:gaps[0]] = ambiguous seq_array[gaps[-1]+1:] = ambiguous else: seq_array[:] = ambiguous return seq_array def seq2prof(seq, profile_map): """ Convert the given character sequence into the profile according to the alphabet specified. Parameters ---------- seq : numpy.array Sequence to be converted to the profile profile_map : dic Mapping valid characters to profiles Returns ------- idx : numpy.array Profile for the character. Zero array if the character not found """ return np.array([profile_map[k] for k in seq]) def prof2seq(profile, gtr, sample_from_prof=False, normalize=True): """ Convert profile to sequence and normalize profile across sites. Parameters ---------- profile : numpy 2D array Profile. Shape of the profile should be (L x a), where L - sequence length, a - alphabet size. gtr : gtr.GTR Instance of the GTR class to supply the sequence alphabet collapse_prof : bool Whether to convert the profile to the delta-function Returns ------- seq : numpy.array Sequence as numpy array of length L prof_values : numpy.array Values of the profile for the chosen sequence characters (length L) idx : numpy.array Indices chosen from profile as array of length L """ # normalize profile such that probabilities at each site sum to one if normalize: tmp_profile, pre=normalize_profile(profile, return_offset=False) else: tmp_profile = profile # sample sequence according to the probabilities in the profile # (sampling from cumulative distribution over the different states) if sample_from_prof: cumdis = tmp_profile.cumsum(axis=1).T randnum = np.random.random(size=cumdis.shape[1]) idx = np.argmax(cumdis>=randnum, axis=0) else: idx = tmp_profile.argmax(axis=1) seq = gtr.alphabet[idx] # max LH over the alphabet prof_values = tmp_profile[np.arange(tmp_profile.shape[0]), idx] return seq, prof_values, idx def normalize_profile(in_profile, log=False, return_offset = True): """return a normalized version of a profile matrix Parameters ---------- in_profile : np.array shape Lxq, will be normalized to one across each row log : bool, optional treat the input as log probabilities return_offset : bool, optional return the log of the scale factor for each row Returns ------- tuple normalized profile (fresh np object) and offset (if return_offset==True) """ if log: tmp_prefactor = in_profile.max(axis=1) tmp_prof = np.exp(in_profile.T - tmp_prefactor).T else: tmp_prefactor = 0.0 tmp_prof = in_profile norm_vector = tmp_prof.sum(axis=1) return (np.einsum('ai,a->ai',tmp_prof,1.0/norm_vector), (np.log(norm_vector) + tmp_prefactor) if return_offset else None) treetime-0.8.6/treetime/seqgen.py000066400000000000000000000063311417362145000170230ustar00rootroot00000000000000from __future__ import division, print_function, absolute_import from collections import defaultdict import numpy as np from . import config as ttconf from .seq_utils import alphabets, profile_maps, alphabet_synonyms, seq2array, seq2prof from .gtr import GTR from .treeanc import TreeAnc class SeqGen(TreeAnc): ''' Evolve sequences along a given tree with a specific GTR model. This class inherits from TreeAnc. ''' def __init__(self, L, *args, **kwargs): """Instantiate. Mandatory arguments are a the sequence length, tree and GTR model. """ super(SeqGen, self).__init__(seq_len=L, compress=False, **kwargs) def sample_from_profile(self, p): """returns a sequence sampled from a profile (column wise state probabilities) Parameters ---------- p : np.array sequence profile with dimensions (L,q) Returns ------- np.array (character) sequence as character array array(['A', 'C', 'G',...]) """ cum_p = p.cumsum(axis=1).T prand = np.random.random(self.seq_len) seq = self.gtr.alphabet[np.argmax(cum_p>prand, axis=0)] return seq def evolve(self, root_seq=None): """Evolve a root sequences along a tree. If no root sequences is provided, one will be sampled from the equilibrium probabilities of the GTR model Parameters ---------- root_seq : numpy character array, optional sequence to be used as the root sequence of the tree. if not given, will sample a sequence from the equilibrium probabilities of the GTR model. """ # set root if not given if root_seq: self.tree.root.ancestral_sequence = seq2array(root_seq) else: if len(self.gtr.Pi.shape)==2: self.tree.root.ancestral_sequence = self.sample_from_profile(self.gtr.Pi.T) else: self.tree.root.ancestral_sequence = self.sample_from_profile(np.repeat([self.gtr.Pi], self.seq_len, axis=0)) # generate sequences in preorder for n in self.tree.get_nonterminals(order='preorder'): profile_p = seq2prof(n.ancestral_sequence, self.gtr.profile_map) for c in n: profile = self.gtr.evolve(profile_p, c.branch_length) c.ancestral_sequence = self.sample_from_profile(profile) self.aln = self.get_aln() def get_aln(self, internal=False): """assemble a multiple sequence alignment from the evolved sequences. Optionally in clude internal sequences Parameters ---------- internal : bool, optional include sequences of internal nodes in the alignment Returns ------- Bio.Align.MultipleSeqAlignment multiple sequence alignment """ from Bio import SeqRecord, Seq from Bio.Align import MultipleSeqAlignment tmp = [] for n in self.tree.get_terminals(): if n.is_terminal() or internal: tmp.append(SeqRecord.SeqRecord(id=n.name, name=n.name, description='', seq=Seq.Seq(''.join(n.ancestral_sequence.astype('U'))))) return MultipleSeqAlignment(tmp) treetime-0.8.6/treetime/sequence_data.py000066400000000000000000000535611417362145000203510ustar00rootroot00000000000000from __future__ import division, print_function, absolute_import import sys from os.path import isfile from collections import defaultdict import numpy as np from Bio import SeqRecord, Seq, AlignIO, SeqIO from . import config as ttconf from . import MissingDataError from .seq_utils import seq2array, guess_alphabet, alphabets string_types = [str] if sys.version_info[0]==3 else [str, unicode] def simple_logger(*args, **kwargs): print(args) class SequenceData(object): """docstring for SeqData Attributes ---------- additional_constant_sites : int length of the sequence without variation not included in the alignment aln : dict sequences, either sparse of full ambiguous : byte character signifying missing data compress : bool compress the alignment compressed_alignment : dict dictionary mapping sequence names to compressed sequences compressed_to_full_sequence_map : dict for each compressed position, contain a list of positions in the full alignment fill_overhangs : bool treat gaps at either end of sequence as missing data full_length : int length of the sequence full_to_compressed_sequence_map : np.array a map of each position in the full sequence to the compressed sequence inferred_const_sites : list list of positions that are constant but differ from the reference, or contain ambiguous characters is_sparse : bool whether the representation of the alignment is sparse (dict) or fill (array) likely_alphabet : str simply guess as to whether the sequence alignment is nucleotides or amino acids logger : callable function writting log messages multiplicity : np.array specifies for each column of the compressed alignment how often this pattern occurs nonref_positions : list positions where at least one sequence differs from the reference ref : np.array reference sequence (stored as np.array(dtype="S")) seq_multiplicity : dict store the multiplicity of sequence, for example read count in a deep sequencing experiment sequence_names : list list of all sequences in a fixed order word_length : int length of state (typically 1 A,C,G,T, but could be 3 for codons) """ def __init__(self, aln, ref=None, logger=None, convert_upper=True, sequence_length=None, compress=True, word_length=1, sequence_type=None, fill_overhangs=True, seq_multiplicity=None, ambiguous=None, **kwargs): """construct an sequence data object Parameters ---------- aln : Bio.Align.MultipleSeqAlignment, str alignment or file name ref : Seq, str sequence or file name logger : callable, optional logging function convert_upper : bool, optional convert all sequences to upper case, default true sequence_length : None, optional length of the sequence, only necessary when no alignment or ref is given compress : bool, optional compress identical alignment columns into one word_length : int length of state (typically 1 A,C,G,T, but could be 3 for codons) fill_overhangs : bool treat gaps at either end of sequence as missing data seq_multiplicity : dict store the multiplicity of sequence, for example read count in a deep sequencing experiment ambiguous : byte character signifying missing data **kwargs Description """ self.logger = logger if logger else simple_logger self._aln = None self._ref = None self.likely_alphabet = None self.compressed_to_full_sequence_map = None self.multiplicity = None self.is_sparse = None self.convert_upper = convert_upper self.compress = compress self.seq_multiplicity = seq_multiplicity or {} # possibly a dict mapping sequences to their read cound/sample count self.additional_constant_sites = kwargs['additional_constant_sites'] if 'additional_constant_sites' in kwargs else 0 # if not specified, this will be set as the alignment_length or reference length self._full_length = None self.full_length = sequence_length self._compressed_length = None self.word_length = word_length self.fill_overhangs = fill_overhangs self.ambiguous = ambiguous self.sequence_type = sequence_type self.ref = ref self.aln = aln @property def aln(self): """ The multiple sequence alignment currently used by the TreeAnc :setter: Takes in alignment as MultipleSeqAlignment, str, or dict/defaultdict \ and attaches sequences to tree nodes. :getter: Returns alignment as MultipleSeqAlignment or dict/defaultdict """ return self._aln @aln.setter def aln(self,in_aln): """ Reads in the alignment (from a dict, MultipleSeqAlignment, or file, as necessary), sets tree-related parameters, and attaches sequences to the tree nodes. Parameters ---------- in_aln : MultipleSeqAlignment, str, dict/defaultdict The alignment to be read in """ # load alignment from file if necessary from Bio.Align import MultipleSeqAlignment self._aln, self.is_sparse = None, None if in_aln is None: return elif type(in_aln) in [defaultdict, dict]: #if input is sparse (i.e. from VCF) self._aln = in_aln self.is_sparse = True elif type(in_aln) in string_types and isfile(in_aln): if any([in_aln.lower().endswith(x) for x in ['.vcf', '.vcf.gz']]) and (self.ref is not None): from .vcf_utils import read_vcf compress_seq = read_vcf(in_aln) in_aln = compress_seq['sequences'] else: for fmt in ['fasta', 'phylip-relaxed', 'nexus']: try: in_aln=AlignIO.read(in_aln, fmt) except: continue if type(in_aln) is MultipleSeqAlignment: # check whether the alignment is consistent with a nucleotide alignment. self._aln = {} for s in in_aln: if s.id==s.name: tmp_name = s.id elif ' in_aln.get_alignment_length(): self.logger("SequenceData.aln: specified sequence length doesn't match alignment length. Treating difference as constant sites.", 2, warn=True) self.additional_constant_sites = max(0, self.full_length - in_aln.get_alignment_length()) else: if self.is_sparse: self.full_length = len(self.ref) else: self.full_length = in_aln.get_alignment_length() self.sequence_names = list(self.aln.keys()) self.make_compressed_alignment() @property def full_length(self): """length of the uncompressed sequence """ return self._full_length @full_length.setter def full_length(self,L): """set the length of the uncompressed sequence. its inverse 'one_mutation' is frequently used as a general length scale. This can't be changed once it is set. Parameters ---------- L : int length of the sequence alignment """ if (not hasattr(self, '_full_length')) or self._full_length is None: if L: self._full_length = int(L) else: self.logger("Alignment: one_mutation and sequence length can only be specified once!",1) @property def compressed_length(self): return self._compressed_length @property def ref(self): """ :setter: Sets the string reference sequence :getter: Returns the string reference sequence """ return self._ref @ref.setter def ref(self, in_ref): """ Parameters ---------- in_ref : file name, str, Bio.Seq.Seq, Bio.SeqRecord.SeqRecord reference sequence will read and stored a byte array """ read_from_file=False if in_ref and isfile(in_ref): for fmt in ['fasta', 'genbank']: try: in_ref = SeqIO.read(in_ref, fmt) self.logger("SequenceData: loaded reference sequence as %s format"%fmt,1) read_from_file=True break except: continue if not read_from_file: raise TypeError('SequenceData.ref: reference sequence file %s could not be parsed, fasta and genbank formats are supported.') if in_ref: self._ref = seq2array(in_ref, fill_overhangs=False, word_length=self.word_length) self.full_length = self._ref.shape[0] self.compressed_to_full_sequence_map = None self.multiplicity = None def check_alphabet(self, seqs): self.likely_alphabet = guess_alphabet(seqs) if self.sequence_type: if self.likely_alphabet!=self.sequence_type: if self.sequence_type=='nuc': self.logger("POSSIBLE ERROR: This does not look like a nucleotide alignment!", 0, warn=True) elif self.sequence_type=='aa': self.logger("POSSIBLE ERROR: This looks like a nucleotide alignment, you indicated amino acids!", 0, warn=True) if self.ambiguous is None: self.ambiguous = 'N' if self.likely_alphabet=='nuc' else 'X' def make_compressed_alignment(self): """ Create the compressed alignment from the full sequences. This method counts the multiplicity for each column of the alignment ('alignment pattern'), and creates the compressed alignment, where only the unique patterns are present. The maps from full sequence to compressed sequence and back are also stored to allow compressing and expanding the sequences. Notes ----- full_to_compressed_sequence_map : (array) Map to reduce a sequence compressed_to_full_sequence_map : (dict) Map to restore sequence from compressed alignment multiplicity : (array) Numpy array, which stores the pattern multiplicity for each position of the compressed alignment. compressed_alignment : (2D numpy array) The compressed alignment. Shape is (N x L'), where N is number of sequences, L' - number of unique alignment patterns """ if not self.compress: # self.multiplicity = np.ones(self.full_length, dtype=float) self.full_to_compressed_sequence_map = np.arange(self.full_length) self.compressed_to_full_sequence_map = {p:np.array([p]) for p in np.arange(self.full_length)} self._compressed_length = self._full_length self.compressed_alignment = self._aln return ttconf.SUCCESS self.logger("SeqData: making compressed alignment...", 1) # bind positions in full length sequence to that of the compressed (compressed) sequence self.full_to_compressed_sequence_map = np.zeros(self.full_length, dtype=int) # bind position in compressed sequence to the array of positions in full length sequence self.compressed_to_full_sequence_map = {} #if alignment is sparse, don't iterate over all invarible sites. #so pre-load alignment_patterns with the location of const sites! #and get the sites that we want to iterate over only! if self.is_sparse: from .vcf_utils import process_sparse_alignment tmp = process_sparse_alignment(self.aln, self.ref, self.ambiguous) compressed_aln_transpose = tmp["constant_columns"] alignment_patterns = tmp["constant_patterns"] variable_positions = tmp["variable_positions"] self.inferred_const_sites = tmp["constant_up_to_ambiguous"] self.nonref_positions = tmp["nonref_positions"] else: # transpose real alignment, for ease of iteration alignment_patterns = {} compressed_aln_transpose = [] aln_transpose = np.array([self.aln[k] for k in self.sequence_names]).T variable_positions = np.arange(aln_transpose.shape[0]) for pi in variable_positions: if self.is_sparse: pattern = np.array([self.aln[k][pi] if pi in self.aln[k] else self.ref[pi] for k in self.sequence_names]) else: # pylint: disable=unsubscriptable-object pattern = np.copy(aln_transpose[pi]) # if the column contains only one state and ambiguous nucleotides, replace # those with the state in other strains right away unique_letters = list(np.unique(pattern)) if len(unique_letters)==2 and self.ambiguous in unique_letters: other = [c for c in unique_letters if c!=self.ambiguous][0] #also replace in original pattern! pattern[pattern == self.ambiguous] = other unique_letters = [other] str_pattern = "".join(pattern.astype('U')) # if there is a mutation in this column, give it its private pattern # this is required when sampling mutations from reconstructed profiles. # otherwise, all mutations corresponding to the same pattern will be coupled. # FIXME: this could be done more efficiently if len(unique_letters)>1: str_pattern += '_%d'%pi # if the pattern is not yet seen, if str_pattern not in alignment_patterns: # bind the index in the compressed aln, index in sequence to the pattern string alignment_patterns[str_pattern] = (len(compressed_aln_transpose), [pi]) # append this pattern to the compressed alignment compressed_aln_transpose.append(pattern) else: # if the pattern is already seen, append the position in the real # sequence to the compressed aln<->sequence_pos_indexes map alignment_patterns[str_pattern][1].append(pi) # add constant alignment column not in the alignment. We don't know where they # are, so just add them to the end. First, determine sequence composition. if self.additional_constant_sites: character_counts = {c:np.sum(aln_transpose==c) for c in alphabets[self.likely_alphabet+'_nogap'] if c not in [self.ambiguous, '-']} total = np.sum(list(character_counts.values())) additional_columns_per_character = [(c,int(np.round(self.additional_constant_sites*n/total))) for c, n in character_counts.items()] columns_left = self.additional_constant_sites pi = np.max(variable_positions)+1 for c,n in additional_columns_per_character: if c==additional_columns_per_character[-1][0]: # make sure all additions add up to the correct number to avoid rounding n = columns_left str_pattern = c*len(self.sequence_names) pos_list = list(range(pi, pi+n)) if n: if str_pattern in alignment_patterns: alignment_patterns[str_pattern][1].extend(pos_list) else: alignment_patterns[str_pattern] = (len(compressed_aln_transpose), pos_list) compressed_aln_transpose.append(np.array(list(str_pattern))) pi += n columns_left -= n # count how many times each column is repeated in the real alignment self.multiplicity = np.zeros(len(alignment_patterns)) for p, pos in alignment_patterns.values(): self.multiplicity[p]=len(pos) # create the compressed alignment as a dictionary linking names to sequences tmp_compressed_alignment = np.array(compressed_aln_transpose).T # pylint: disable=unsubscriptable-object self.compressed_alignment = {k: tmp_compressed_alignment[i] for i,k in enumerate(self.sequence_names)} # create map to compress a sequence for p, pos in alignment_patterns.values(): self.full_to_compressed_sequence_map[np.array(pos)]=p # create a map to reconstruct full sequence from the compressed (compressed) sequence for p, val in alignment_patterns.items(): self.compressed_to_full_sequence_map[val[0]]=np.array(val[1], dtype=int) self.logger("SequenceData: constructed compressed alignment...", 1) self._compressed_length = len(self.multiplicity) return ttconf.SUCCESS def full_to_sparse_sequence(self, sequence): """turn a sequence into a dictionary of differences from a reference sequence Parameters ---------- sequence : str, numpy.ndarray sequence to convert Returns ------- dict dictionary of difference from reference """ if self.ref is None: raise TypeError("SequenceData: sparse sequences can only be constructed when a reference sequence is defined") if type(sequence) is not np.ndarray: aseq = seq2array(sequence, fill_overhangs=False) else: aseq = sequence differences = np.where(self.ref!=aseq)[0] return {p:aseq[p] for p in differences} def compressed_to_sparse_sequence(self, sequence): """turn a compressed sequence into a list of difference from a reference Parameters ---------- sequence : numpy.ndarray compressed sequence stored as array Returns ------- dict dictionary of difference from reference """ if self.ref is None: raise TypeError("SequenceData: sparse sequences can only be constructed when a reference sequence is defined") sparse_seq = {} compressed_nonref_positions = self.full_to_compressed_sequence_map[self.nonref_positions] compressed_nonref_values = sequence[compressed_nonref_positions] mismatches = (compressed_nonref_values != self.ref[self.nonref_positions]) return dict(zip(self.nonref_positions[mismatches], compressed_nonref_values[mismatches])) def compressed_to_full_sequence(self, sequence, include_additional_constant_sites=False, as_string=False): """expand a compressed sequence Parameters ---------- sequence : np.ndarray compressed sequence include_additional_constant_sites : bool, optional add sites assumed constant as_string : bool, optional return a string instead of an array Returns ------- array,str expanded sequence """ if include_additional_constant_sites: L = self.full_length else: L = self.full_length - self.additional_constant_sites tmp_seq = sequence[self.full_to_compressed_sequence_map[:L]] if as_string: return "".join(tmp_seq.astype('U')) else: return tmp_seq def differences(self, seq1, seq2, seq1_compressed=True, seq2_compressed=True): diffs = [] if self.is_sparse: if seq1_compressed: seq1 = self.compressed_to_sparse_sequence(seq1) if seq2_compressed: seq2 = self.compressed_to_sparse_sequence(seq2) for pos in set(seq1.keys()).union(seq2.keys()): ref_state = self.ref[pos] s1 = seq1.get(pos, ref_state) s2 = seq2.get(pos, ref_state) if s1!=s2: diffs.append((s1,pos,s2)) else: if seq1_compressed: seq1 = self.compressed_to_full_sequence(seq1) if seq2_compressed: seq2 = self.compressed_to_full_sequence(seq2) diff_pos = np.where(seq1 != seq2)[0] for pos in diff_pos: diffs.append((seq1[pos], pos, seq2[pos])) return sorted(diffs, key=lambda x:x[1]) treetime-0.8.6/treetime/treeanc.py000066400000000000000000002106551417362145000171700ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import import time, sys import gc from collections import defaultdict import numpy as np from Bio import Phylo from Bio.Phylo.BaseTree import Clade from Bio import AlignIO from . import config as ttconf from . import MissingDataError,UnknownMethodError from .seq_utils import seq2prof, seq2array, prof2seq, normalize_profile, extend_profile from .gtr import GTR from .gtr_site_specific import GTR_site_specific from .sequence_data import SequenceData def compressed_sequence(node): if node.name in node.tt.data.compressed_alignment and (not node.tt.reconstructed_tip_sequences): return node.tt.data.compressed_alignment[node.name] elif hasattr(node, '_cseq'): return node._cseq elif node.is_terminal(): # node without sequence when tip-reconstruction is off. return None elif hasattr(node, '_cseq'): return node._cseq else: raise ValueError('Ancestral sequences are not yet inferred') def mutations(node): """ Get the mutations on a tree branch. Take compressed sequences from both sides of the branch (attached to the node), compute mutations between them, and expand these mutations to the positions in the real sequences. """ if node.up is None: return [] elif (not node.tt.reconstructed_tip_sequences) and node.name in node.tt.data.aln: return node.tt.data.differences(node.up.cseq, node.tt.data.aln[node.name], seq2_compressed=False) elif node.is_terminal() and (node.name not in node.tt.data.aln): return [] else: return node.tt.data.differences(node.up.cseq, node.cseq) string_types = [str] if sys.version_info[0]==3 else [str, unicode] Clade.sequence = property(lambda x: x.tt.sequence(x, as_string=False)) Clade.cseq = property(compressed_sequence) Clade.mutations = property(mutations) class TreeAnc(object): """ Class defines simple tree object with basic interface methods: reading and saving from/to files, initializing leaves with sequences from the alignment, making ancestral state inference """ def __init__(self, tree=None, aln=None, gtr=None, fill_overhangs=True, ref=None, verbose = ttconf.VERBOSE, ignore_gaps=True, convert_upper=True, seq_multiplicity=None, log=None, compress=True, seq_len=None, ignore_missing_alns=False, **kwargs): """ TreeAnc constructor. It prepares the tree, attaches sequences to the leaf nodes, and sets some configuration parameters. Parameters ---------- tree : str, Bio.Phylo.Tree Phylogenetic tree. String passed is interpreted as a filename with a tree in a standard format that can be parsed by the Biopython Phylo module. aln : str, Bio.Align.MultipleSequenceAlignment, dict Sequence alignment. If a string passed, it is interpreted as the filename to read Biopython alignment from. If a dict is given, this is assumed to be the output of vcf_utils.read_vcf which specifies for each sequence the differences from a reference gtr : str, GTR GTR model object. If string passed, it is interpreted as the type of the GTR model. A new GTR instance will be created for this type. fill_overhangs : bool, default True In some cases, the missing data on both ends of the alignment is filled with the gap sign('-'). If set to True, the end-gaps are converted to "unknown" characters ('N' for nucleotides, 'X' for aminoacids). Otherwise, the alignment is treated as-is ref : None, optional Reference sequence used in VCF mode verbose : int, default 3 Verbosity level as number from 0 (lowest) to 10 (highest). ignore_gaps : bool, default True Ignore gaps in branch length calculations convert_upper : bool, default True Convert all sequences to upper case seq_multiplicity : dict If individual nodes in the tree correspond to multiple sampled sequences (i.e. read count in a deep sequencing experiment), these can be specified as a dictionary. This currently only affects rooting and can be used to weigh individual tips by abundance or important during root search. compress : bool, default True reduce identical alignment columns to one (not useful when inferring site specific GTR models). seq_len : int, optional length of the sequence. this is inferred from the input alignment or the reference sequence in most cases but can be specified for other applications. ignore_missing_alns : bool, default False **kwargs Keyword arguments to construct the GTR model .. Note:: Some GTR types require additional configuration parameters. If the new GTR is being instantiated, these parameters are expected to be passed as kwargs. If nothing is passed, the default values are used, which might cause unexpected results. Raises ------ AttributeError If no tree is passed in """ if tree is None: raise TypeError("TreeAnc requires a tree!") self.t_start = time.time() self.verbose = verbose self.log = log self.ok = False self.data = None self.log_messages = set() self.logger("TreeAnc: set-up",1) self._internal_node_count = 0 self.use_mutation_length = False self.ignore_gaps = ignore_gaps self.reconstructed_tip_sequences = False self.sequence_reconstruction = None self.ignore_missing_alns = ignore_missing_alns self._tree = None self.tree = tree if tree is None: raise ValueError("TreeAnc: tree loading failed! exiting") # set up GTR model self._gtr = None self.set_gtr(gtr or 'JC69', **kwargs) # set alignment and attach sequences to tree on success. # otherwise self.data.aln will be None self.data = SequenceData(aln, ref=ref, logger=self.logger, compress=compress, convert_upper=convert_upper, fill_overhangs=fill_overhangs, ambiguous=self.gtr.ambiguous, sequence_length=seq_len) if self.gtr.is_site_specific and self.data.compress: raise TypeError("TreeAnc: sequence compression and site specific gtr models are incompatible!" ) if self.data.aln and self.tree: self._check_alignment_tree_gtr_consistency() def logger(self, msg, level, warn=False, only_once=False): """ Print log message *msg* to stdout. Parameters ----------- msg : str String to print on the screen level : int Log-level. Only the messages with a level higher than the current verbose level will be shown. warn : bool Warning flag. If True, the message will be displayed regardless of its log-level. """ if only_once and msg in self.log_messages: return self.log_messages.add(msg) lw=80 if level self.tree.count_terminals()/3: raise MissingDataError("TreeAnc._check_alignment_tree_gtr_consistency: At least 30\\% terminal nodes cannot be assigned a sequence!\n" "Are you sure the alignment belongs to the tree?") else: # could not assign sequence for internal node - is OK pass if failed_leaves: self.logger("***WARNING: TreeAnc: %d nodes don't have a matching sequence in the alignment." " POSSIBLE ERROR."%failed_leaves, 0, warn=True) # extend profile to contain additional unknown characters extend_profile(self.gtr, [self.data.ref] if self.data.is_sparse else self.data.aln.values(), logger=self.logger) self.ok = True def prepare_tree(self): """ Set link to parent and calculate distance to root for all tree nodes. Should be run once the tree is read and after every rerooting, topology change or branch length optimizations. """ self.sequence_reconstruction = False self.tree.root.branch_length = 0.001 self.tree.root.mutation_length = self.tree.root.branch_length self.tree.ladderize() self._prepare_nodes() self._leaves_lookup = {node.name:node for node in self.tree.get_terminals()} def _prepare_nodes(self): """ Set auxilliary parameters to every node of the tree. """ self.tree.root.up = None self.tree.root.tt = self self.tree.root.bad_branch=self.tree.root.bad_branch if hasattr(self.tree.root, 'bad_branch') else False name_set = {n.name for n in self.tree.find_clades() if n.name} internal_node_count = 0 for clade in self.tree.get_nonterminals(order='preorder'): # parents first if clade.name is None: tmp = "NODE_" + format(internal_node_count, '07d') while tmp in name_set: internal_node_count += 1 tmp = "NODE_" + format(internal_node_count, '07d') clade.name = tmp name_set.add(clade.name) internal_node_count+=1 for c in clade.clades: c.up = clade c.tt = self for clade in self.tree.find_clades(order='postorder'): # children first if clade.is_terminal(): clade.bad_branch = clade.bad_branch if hasattr(clade, 'bad_branch') else False else: clade.bad_branch = all([c.bad_branch for c in clade]) self._calc_dist2root() self._internal_node_count = max(internal_node_count, self._internal_node_count) def _calc_dist2root(self): """ For each node in the tree, set its root-to-node distance as dist2root attribute """ self.tree.root.dist2root = 0.0 for clade in self.tree.get_nonterminals(order='preorder'): # parents first for c in clade.clades: c.dist2root = clade.dist2root + c.mutation_length #################################################################### ## END SET-UP #################################################################### ################################################################### ### ancestral reconstruction ################################################################### def reconstruct_anc(self,*args, **kwargs): """Shortcut for :py:meth:`treetime.TreeAnc.infer_ancestral_sequences` """ return self.infer_ancestral_sequences(*args,**kwargs) def infer_ancestral_sequences(self, method='probabilistic', infer_gtr=False, marginal=False, reconstruct_tip_states=False, **kwargs): """Reconstruct ancestral sequences Parameters ---------- method : str Method to use. Supported values are "parsimony", "fitch", "probabilistic" and "ml" infer_gtr : bool Infer a GTR model before reconstructing the sequences marginal : bool Assign sequences that are most likely after averaging over all other nodes instead of the jointly most likely sequences. reconstruct_tip_states : bool, optional Reconstruct sequences of terminal nodes/leaves, thereby replacing ambiguous characters with the inferred base/state. default: False **kwargs additional keyword arguments that are passed down to :py:meth:`TreeAnc.infer_gtr` and :py:meth:`TreeAnc._ml_anc` Returns ------- N_diff : int Number of nucleotides different from the previous reconstruction. If there were no pre-set sequences, returns N*L """ if not self.ok: raise MissingDataError("TreeAnc.infer_ancestral_sequences: ERROR, sequences or tree are missing") self.logger("TreeAnc.infer_ancestral_sequences with method: %s, %s"%(method, 'marginal' if marginal else 'joint'), 1) if not reconstruct_tip_states: self.logger("WARNING: Previous versions of TreeTime (<0.7.0) RECONSTRUCTED sequences" " of tips at positions with AMBIGUOUS bases. This resulted in" " unexpected behavior is some cases and is no longer done by default." " If you want to replace those ambiguous sites with their most likely state," " rerun with `reconstruct_tip_states=True` or `--reconstruct-tip-states`.", 0, warn=True, only_once=True) if method.lower() in ['ml', 'probabilistic']: if marginal: _ml_anc = self._ml_anc_marginal else: _ml_anc = self._ml_anc_joint elif method.lower() in ['fitch', 'parsimony']: _ml_anc = self._fitch_anc else: raise ValueError("Reconstruction method needs to be in ['ml', 'probabilistic', 'fitch', 'parsimony'], got '{}'".format(method)) if infer_gtr: self.infer_gtr(marginal=marginal, **kwargs) N_diff = _ml_anc(reconstruct_tip_states=reconstruct_tip_states, **kwargs) else: N_diff = _ml_anc(reconstruct_tip_states=reconstruct_tip_states, **kwargs) return N_diff ################################################################### ### FITCH ################################################################### def _fitch_anc(self, **kwargs): """ Reconstruct ancestral states using Fitch's algorithm. It implements the iteration from leaves to the root constructing the Fitch profiles for each character of the sequence, and then by propagating from the root to the leaves, reconstructs the sequences of the internal nodes. Keyword Args ------------ Returns ------- Ndiff : int Number of the characters that changed since the previous reconstruction. These changes are determined from the pre-set sequence attributes of the nodes. If there are no sequences available (i.e., no reconstruction has been made before), returns the total number of characters in the tree. """ # set fitch profiiles to each terminal node for l in self.tree.get_terminals(): l.state = [[k] for k in l.cseq] L = self.data.compressed_length self.logger("TreeAnc._fitch_anc: Walking up the tree, creating the Fitch profiles",2) for node in self.tree.get_nonterminals(order='postorder'): node.state = [self._fitch_state(node, k) for k in range(L)] ambs = [i for i in range(L) if len(self.tree.root.state[i])>1] if len(ambs) > 0: for amb in ambs: self.logger("Ambiguous state of the root sequence " "in the position %d: %s, " "choosing %s" % (amb, str(self.tree.root.state[amb]), self.tree.root.state[amb][0]), 4) self.tree.root._cseq = np.array([k[np.random.randint(len(k)) if len(k)>1 else 0] for k in self.tree.root.state]) self.logger("TreeAnc._fitch_anc: Walking down the self.tree, generating sequences from the " "Fitch profiles.", 2) N_diff = 0 for node in self.tree.get_nonterminals(order='preorder'): if node.up != None: # not root sequence = np.array([node.up._cseq[i] if node.up._cseq[i] in node.state[i] else node.state[i][0] for i in range(L)]) if self.sequence_reconstruction: N_diff += (sequence!=node.cseq).sum() else: N_diff += L node._cseq = sequence del node.state # no need to store Fitch states self.sequence_reconstruction = 'parsimony' self.logger("Done ancestral state reconstruction",3) return N_diff def _fitch_state(self, node, pos): """ Determine the Fitch profile for a single character of the node's sequence. The profile is essentially the intersection between the children's profiles or, if the former is empty, the union of the profiles. Parameters ---------- node : PhyloTree.Clade: Internal node which the profiles are to be determined pos : int Position in the node's sequence which the profiles should be determinedf for. Returns ------- state : numpy.array Fitch profile for the character at position pos of the given node. """ state = self._fitch_intersect([k.state[pos] for k in node.clades]) if len(state) == 0: state = np.concatenate([k.state[pos] for k in node.clades]) return state def _fitch_intersect(self, arrays): """ Find the intersection of any number of 1D arrays. Return the sorted, unique values that are in all of the input arrays. Adapted from numpy.lib.arraysetops.intersect1d """ def pairwise_intersect(arr1, arr2): s2 = set(arr2) b3 = [val for val in arr1 if val in s2] return b3 arrays = list(arrays) # allow assignment N = len(arrays) while N > 1: arr1 = arrays.pop() arr2 = arrays.pop() arr = pairwise_intersect(arr1, arr2) arrays.append(arr) N = len(arrays) return arrays[0] ################################################################### ### Maximum Likelihood ################################################################### def sequence_LH(self, pos=None, full_sequence=False): """return the likelihood of the observed sequences given the tree Parameters ---------- pos : int, optional position in the sequence, if none, the sum over all positions will be returned full_sequence : bool, optional does the position refer to the full or compressed sequence, by default compressed sequence is assumed. Returns ------- float likelihood """ if not hasattr(self.tree, "total_sequence_LH"): self.logger("TreeAnc.sequence_LH: you need to run marginal ancestral inference first!", 1) self.infer_ancestral_sequences(marginal=True) if pos is not None: if full_sequence: compressed_pos = self.data.full_to_compressed_sequence_map[pos] else: compressed_pos = pos return self.tree.sequence_LH[compressed_pos] else: return self.tree.total_sequence_LH def ancestral_likelihood(self): """ Calculate the likelihood of the given realization of the sequences in the tree Returns ------- log_lh : float The tree likelihood given the sequences """ log_lh = np.zeros(self.data.multiplicity.shape[0]) for node in self.tree.find_clades(order='postorder'): if node.up is None: # root node # 0-1 profile profile = seq2prof(node.cseq, self.gtr.profile_map) # get the probabilities to observe each nucleotide profile *= self.gtr.Pi profile = profile.sum(axis=1) log_lh += np.log(profile) # product over all characters continue t = node.branch_length indices = np.array([(self.gtr.state_index[a], self.gtr.state_index[b]) for a, b in zip(node.up.cseq, node.cseq)]) logQt = np.log(self.gtr.expQt(t)) lh = logQt[indices[:, 1], indices[:, 0]] log_lh += lh return log_lh def _branch_length_to_gtr(self, node): """ Set branch lengths to either mutation lengths of given branch lengths. The assigend values are to be used in the following ML analysis. """ if self.use_mutation_length: return max(ttconf.MIN_BRANCH_LENGTH*self.one_mutation, node.mutation_length) else: return max(ttconf.MIN_BRANCH_LENGTH*self.one_mutation, node.branch_length) def _ml_anc_marginal(self, sample_from_profile=False, reconstruct_tip_states=False, debug=False, **kwargs): """ Perform marginal ML reconstruction of the ancestral states. In contrast to joint reconstructions, this needs to access the probabilities rather than only log probabilities and is hence handled by a separate function. Parameters ---------- sample_from_profile : bool or str assign sequences probabilistically according to the inferred probabilities of ancestral states instead of to their ML value. This parameter can also take the value 'root' in which case probabilistic sampling will happen at the root but at no other node. reconstruct_tip_states : bool, default False reconstruct sequence assigned to leaves, will replace ambiguous characters with the most likely definite character. Note that this will affect the mutations assigned to branches. """ self.logger("TreeAnc._ml_anc_marginal: type of reconstruction: Marginal", 2) self.postorder_traversal_marginal() # choose sequence characters from this profile. # treat root node differently to avoid piling up mutations on the longer branch if sample_from_profile=='root': root_sample_from_profile = True other_sample_from_profile = False elif isinstance(sample_from_profile, bool): root_sample_from_profile = sample_from_profile other_sample_from_profile = sample_from_profile self.total_LH_and_root_sequence(sample_from_profile=root_sample_from_profile, assign_sequence=True) N_diff = self.preorder_traversal_marginal(reconstruct_tip_states=reconstruct_tip_states, sample_from_profile=other_sample_from_profile, assign_sequence=True) self.logger("TreeAnc._ml_anc_marginal: ...done", 3) self.reconstructed_tip_sequences = reconstruct_tip_states # do clean-up: if not debug: for node in self.tree.find_clades(): try: del node.marginal_log_Lx del node.marginal_subtree_LH_prefactor except: pass gc.collect() self.sequence_reconstruction = 'marginal' return N_diff def total_LH_and_root_sequence(self, sample_from_profile=False, assign_sequence=False): self.logger("Computing root node sequence and total tree likelihood...",3) # Msg to the root from the distant part (equ frequencies) if len(self.gtr.Pi.shape)==1: self.tree.root.marginal_outgroup_LH = np.repeat([self.gtr.Pi], self.data.compressed_length, axis=0) else: self.tree.root.marginal_outgroup_LH = np.copy(self.gtr.Pi.T) self.tree.root.marginal_profile, pre = normalize_profile(self.tree.root.marginal_outgroup_LH*self.tree.root.marginal_subtree_LH) marginal_LH_prefactor = self.tree.root.marginal_subtree_LH_prefactor + pre self.tree.sequence_LH = marginal_LH_prefactor self.tree.total_sequence_LH = (self.tree.sequence_LH*self.data.multiplicity).sum() self.tree.sequence_marginal_LH = self.tree.total_sequence_LH if assign_sequence: seq, prof_vals, idxs = prof2seq(self.tree.root.marginal_profile, self.gtr, sample_from_prof=sample_from_profile, normalize=False) self.tree.root._cseq = seq def postorder_traversal_marginal(self): L = self.data.compressed_length n_states = self.gtr.alphabet.shape[0] self.logger("Attaching sequence profiles to leafs... ", 3) # set the leaves profiles. This doesn't ever need to be reassigned for leaves for leaf in self.tree.get_terminals(): if not hasattr(leaf, "marginal_subtree_LH"): if leaf.name in self.data.compressed_alignment: leaf.marginal_subtree_LH = seq2prof(self.data.compressed_alignment[leaf.name], self.gtr.profile_map) else: leaf.marginal_subtree_LH = np.ones((L, n_states)) if not hasattr(leaf, "marginal_subtree_LH_prefactor"): leaf.marginal_subtree_LH_prefactor = np.zeros(L) self.logger("Postorder: computing likelihoods... ", 3) # propagate leaves --> root, set the marginal-likelihood messages for node in self.tree.get_nonterminals(order='postorder'): #leaves -> root # regardless of what was before, set the profile to ones tmp_log_subtree_LH = np.zeros((L,n_states), dtype=float) node.marginal_subtree_LH_prefactor = np.zeros(L, dtype=float) for ch in node.clades: ch.marginal_log_Lx = self.gtr.propagate_profile(ch.marginal_subtree_LH, self._branch_length_to_gtr(ch), return_log=True) # raw prob to transfer prob up tmp_log_subtree_LH += ch.marginal_log_Lx node.marginal_subtree_LH_prefactor += ch.marginal_subtree_LH_prefactor node.marginal_subtree_LH, offset = normalize_profile(tmp_log_subtree_LH, log=True) node.marginal_subtree_LH_prefactor += offset # and store log-prefactor def preorder_traversal_marginal(self, reconstruct_tip_states=False, sample_from_profile=False, assign_sequence=False): self.logger("Preorder: computing marginal profiles...",3) # propagate root -->> leaves, reconstruct the internal node sequences # provided the upstream message + the message from the complementary subtree N_diff = 0 for node in self.tree.find_clades(order='preorder'): if node.up is None: # skip if node is root continue if hasattr(node, 'branch_state'): del node.branch_state # integrate the information coming from parents with the information # of all children my multiplying it to the prev computed profile node.marginal_outgroup_LH, pre = normalize_profile(np.log(np.maximum(ttconf.TINY_NUMBER, node.up.marginal_profile)) - node.marginal_log_Lx, log=True, return_offset=False) if node.is_terminal() and (not reconstruct_tip_states): # skip remainder unless leaves are to be reconstructed continue tmp_msg_from_parent = self.gtr.evolve(node.marginal_outgroup_LH, self._branch_length_to_gtr(node), return_log=False) node.marginal_profile, pre = normalize_profile(node.marginal_subtree_LH * tmp_msg_from_parent, return_offset=False) # choose sequence based maximal marginal LH. if assign_sequence: seq, prof_vals, idxs = prof2seq(node.marginal_profile, self.gtr, sample_from_prof=sample_from_profile, normalize=False) if self.sequence_reconstruction: N_diff += (seq!=node.cseq).sum() else: N_diff += self.data.compressed_length #assign new sequence node._cseq = seq return N_diff def _ml_anc_joint(self, sample_from_profile=False, reconstruct_tip_states=False, debug=False, **kwargs): """ Perform joint ML reconstruction of the ancestral states. In contrast to marginal reconstructions, this only needs to compare and multiply LH and can hence operate in log space. Parameters ---------- sample_from_profile : str This parameter can take the value 'root' in which case probabilistic sampling will happen at the root. otherwise sequences at ALL nodes are set to the value that jointly optimized the likelihood. reconstruct_tip_states : bool, default False reconstruct sequence assigned to leaves, will replace ambiguous characters with the most likely definite character. Note that this will affect the mutations assigned to branches. """ N_diff = 0 # number of sites differ from perv reconstruction L = self.data.compressed_length n_states = self.gtr.alphabet.shape[0] self.logger("TreeAnc._ml_anc_joint: type of reconstruction: Joint", 2) self.logger("TreeAnc._ml_anc_joint: Walking up the tree, computing likelihoods... ", 3) # for the internal nodes, scan over all states j of this node, maximize the likelihood for node in self.tree.find_clades(order='postorder'): if hasattr(node, 'branch_state'): del node.branch_state if node.up is None: node.joint_Cx=None # not needed for root continue branch_len = self._branch_length_to_gtr(node) # transition matrix from parent states to the current node states. # denoted as Pij(i), where j - parent state, i - node state log_transitions = np.log(np.maximum(ttconf.TINY_NUMBER, self.gtr.expQt(branch_len))) if node.is_terminal(): if node.name in self.data.compressed_alignment: tmp_prof = seq2prof(self.data.compressed_alignment[node.name], self.gtr.profile_map) msg_from_children = np.log(np.maximum(tmp_prof, ttconf.TINY_NUMBER)) else: msg_from_children = np.zeros((L, n_states)) msg_from_children[np.isnan(msg_from_children) | np.isinf(msg_from_children)] = -ttconf.BIG_NUMBER else: # Product (sum-Log) over all child subtree likelihoods. # this is prod_ch L_x(i) msg_from_children = np.sum(np.stack([c.joint_Lx for c in node.clades], axis=0), axis=0) if not debug: # Now that we have calculated the current node's likelihood # from its children, clean up likelihood matrices attached # to children to save memory. for c in node.clades: del c.joint_Lx # for every possible state of the parent node, # get the best state of the current node # and compute the likelihood of this state # preallocate storage node.joint_Lx = np.zeros((L, n_states)) # likelihood array node.joint_Cx = np.zeros((L, n_states), dtype=np.uint16) # max LH indices for char_i, char in enumerate(self.gtr.alphabet): # Pij(i) * L_ch(i) for given parent state j msg_to_parent = (log_transitions[:,char_i].T + msg_from_children) # For this parent state, choose the best state of the current node: node.joint_Cx[:, char_i] = msg_to_parent.argmax(axis=1) # compute the likelihood of the best state of the current node # given the state of the parent (char_i) node.joint_Lx[:, char_i] = msg_to_parent.max(axis=1) # root node profile = likelihood of the total tree msg_from_children = np.sum(np.stack([c.joint_Lx for c in self.tree.root], axis = 0), axis=0) # Pi(i) * Prod_ch Lch(i) self.tree.root.joint_Lx = msg_from_children + np.log(self.gtr.Pi).T normalized_profile = (self.tree.root.joint_Lx.T - self.tree.root.joint_Lx.max(axis=1)).T # choose sequence characters from this profile. # treat root node differently to avoid piling up mutations on the longer branch if sample_from_profile=='root': root_sample_from_profile = True elif isinstance(sample_from_profile, bool): root_sample_from_profile = sample_from_profile seq, anc_lh_vals, idxs = prof2seq(np.exp(normalized_profile), self.gtr, sample_from_prof = root_sample_from_profile) # compute the likelihood of the most probable root sequence self.tree.sequence_LH = np.choose(idxs, self.tree.root.joint_Lx.T) self.tree.sequence_joint_LH = (self.tree.sequence_LH*self.data.multiplicity).sum() self.tree.root._cseq = seq self.tree.root.seq_idx = idxs self.logger("TreeAnc._ml_anc_joint: Walking down the tree, computing maximum likelihood sequences...",3) # for each node, resolve the conditioning on the parent node nodes_to_reconstruct = self.tree.get_nonterminals(order='preorder') if reconstruct_tip_states: nodes_to_reconstruct += self.tree.get_terminals() #TODO: Should we add tips without sequence here? for node in nodes_to_reconstruct: # root node has no mutations, everything else has been already set if node.up is None: continue # choose the value of the Cx(i), corresponding to the state of the # parent node i. This is the state of the current node node.seq_idx = np.choose(node.up.seq_idx, node.joint_Cx.T) # reconstruct seq, etc tmp_sequence = np.choose(node.seq_idx, self.gtr.alphabet) if self.sequence_reconstruction: N_diff += (tmp_sequence!=node.cseq).sum() else: N_diff += L node._cseq = tmp_sequence self.logger("TreeAnc._ml_anc_joint: ...done", 3) self.reconstructed_tip_sequences = reconstruct_tip_states # do clean-up if not debug: for node in self.tree.find_clades(order='preorder'): # Check for the likelihood matrix, since we might have cleaned # it up earlier. if hasattr(node, "joint_Lx"): del node.joint_Lx del node.joint_Cx if hasattr(node, 'seq_idx'): del node.seq_idx self.sequence_reconstruction = 'joint' return N_diff ############################################################### ### sequence and mutation storing ############################################################### def get_branch_mutation_matrix(self, node, full_sequence=False): """uses results from marginal ancestral inference to return a joint distribution of the sequence states at both ends of the branch. Parameters ---------- node : Phylo.clade node of the tree full_sequence : bool, optional expand the sequence to the full sequence, if false (default) the there will be one mutation matrix for each column in the compressed alignment Returns ------- numpy.array an Lxqxq stack of matrices (q=alphabet size, L (compressed)sequence length) """ pp,pc = self.marginal_branch_profile(node) # calculate pc_i [e^Qt]_ij pp_j for each site expQt = self.gtr.expQt(self._branch_length_to_gtr(node)) + ttconf.SUPERTINY_NUMBER if len(expQt.shape)==3: # site specific model mut_matrix_stack = np.einsum('ai,aj,ija->aij', pc, pp, expQt) else: mut_matrix_stack = np.einsum('ai,aj,ij->aij', pc, pp, expQt) # normalize this distribution normalizer = mut_matrix_stack.sum(axis=2).sum(axis=1) mut_matrix_stack = np.einsum('aij,a->aij', mut_matrix_stack, 1.0/normalizer) # expand to full sequence if requested if full_sequence: return mut_matrix_stack[self.data.full_to_compressed_sequence_map] else: return mut_matrix_stack def marginal_branch_profile(self, node): ''' calculate the marginal distribution of sequence states on both ends of the branch leading to node, Parameters ---------- node : PhyloTree.Clade TreeNode, attached to the branch. Returns ------- pp, pc : Pair of vectors (profile parent, pp) and (profile child, pc) that are of shape (L,n) where L is sequence length and n is alphabet size. note that this correspond to the compressed sequences. ''' parent = node.up if parent is None: raise Exception("Branch profiles can't be calculated for the root!") if not hasattr(node, 'marginal_outgroup_LH'): raise Exception("marginal ancestral inference needs to be performed first!") pc = node.marginal_subtree_LH pp = node.marginal_outgroup_LH return pp, pc def add_branch_state(self, node): """add a dictionary to the node containing tuples of state pairs and a list of their number across the branch Parameters ---------- node : tree.node attaces attribute :branch_state: """ seq_pairs, multiplicity = self.gtr.state_pair( node.up.cseq, node.cseq, pattern_multiplicity = self.data.multiplicity, ignore_gaps = self.ignore_gaps) node.branch_state = {'pair':seq_pairs, 'multiplicity':multiplicity} ################################################################### ### Branch length optimization ################################################################### def optimize_branch_len(self, **kwargs): """Deprecated in favor of 'optimize_branch_lengths_joint'""" return self.optimize_branch_lengths_joint(**kwargs) def optimize_branch_len_joint(self, **kwargs): """Deprecated in favor of 'optimize_branch_lengths_joint'""" return self.optimize_branch_lengths_joint(**kwargs) def optimize_branch_lengths_joint(self, **kwargs): """ Perform optimization for the branch lengths of the entire tree. This method only does a single path and needs to be iterated. **Note** this method assumes that each node stores information about its sequence as numpy.array object (node.sequence attribute). Therefore, before calling this method, sequence reconstruction with either of the available models must be performed. Parameters ---------- **kwargs : Keyword arguments Keyword Args ------------ store_old : bool If True, the old lengths will be saved in :code:`node._old_dist` attribute. Useful for testing, and special post-processing. """ self.logger("TreeAnc.optimize_branch_length: running branch length optimization using jointML ancestral sequences",1) if (self.tree is None) or (self.data.aln is None): raise MissingDataError("TreeAnc.optimize_branch_length: ERROR, alignment or tree are missing.") store_old_dist = kwargs['store_old'] if 'store_old' in kwargs else False max_bl = 0 for node in self.tree.find_clades(order='postorder'): if node.up is None: continue # this is the root if store_old_dist: node._old_length = node.branch_length new_len = max(0,self.optimal_branch_length(node)) self.logger("Optimization results: old_len=%.4e, new_len=%.4e" " Updating branch length..."%(node.branch_length, new_len), 5) node.branch_length = new_len node.mutation_length=new_len max_bl = max(max_bl, new_len) if max_bl>0.15: self.logger("TreeAnc.optimize_branch_lengths_joint: THIS TREE HAS LONG BRANCHES." " \n\t ****TreeTime's JOINT IS NOT DESIGNED TO OPTIMIZE LONG BRANCHES." " \n\t ****PLEASE OPTIMIZE BRANCHES USING: " " \n\t ****branch_length_mode='input' or 'marginal'", 0, warn=True) # as branch lengths changed, the distance to root etc need to be recalculated self.tree.root.up = None self.tree.root.dist2root = 0.0 self._prepare_nodes() return ttconf.SUCCESS def optimal_branch_length(self, node): ''' Calculate optimal branch length given the sequences of node and parent Parameters ---------- node : PhyloTree.Clade TreeNode, attached to the branch. Returns ------- new_len : float Optimal length of the given branch ''' if node.up is None: return self.one_mutation if not hasattr(node, 'branch_state'): self.add_branch_state(node) return self.gtr.optimal_t_compressed(node.branch_state['pair'], node.branch_state['multiplicity']) def optimal_marginal_branch_length(self, node, tol=1e-10): ''' calculate the marginal distribution of sequence states on both ends of the branch leading to node, Parameters ---------- node : PhyloTree.Clade TreeNode, attached to the branch. Returns ------- branch_length : float branch length of the branch leading to the node. note: this can be unstable on iteration ''' if node.up is None: return self.one_mutation else: pp, pc = self.marginal_branch_profile(node) return self.gtr.optimal_t_compressed((pp, pc), self.data.multiplicity, profiles=True, tol=tol) def optimize_tree_marginal(self, max_iter=10, infer_gtr=False, pc=1.0, damping=0.75, LHtol=0.1, site_specific_gtr=False, **kwargs): self.infer_ancestral_sequences(marginal=True) oldLH = self.sequence_LH() self.logger("TreeAnc.optimize_tree_marginal: initial, LH=%1.2f, total branch_length %1.4f"% (oldLH, self.tree.total_branch_length()), 2) for i in range(max_iter): if infer_gtr: self.infer_gtr(site_specific=site_specific_gtr, marginal=True, normalized_rate=True, pc=pc) self.infer_ancestral_sequences(marginal=True) old_bl = self.tree.total_branch_length() tol = 1e-8 + 0.01**(i+1) for n in self.tree.find_clades(): if n.up is None: continue if n.up.up is None and len(n.up.clades)==2: # children of a bifurcating root! n1, n2 = n.up.clades total_bl = n1.branch_length+n2.branch_length bl_ratio = n1.branch_length/total_bl prof_c = n1.marginal_subtree_LH prof_p = normalize_profile(n2.marginal_subtree_LH*self.tree.root.marginal_outgroup_LH)[0] new_bl = self.gtr.optimal_t_compressed((prof_p, prof_c), self.data.multiplicity, profiles=True, tol=tol) update_val = new_bl*(1-damping**(i+1)) + total_bl*damping**(i+1) n1.branch_length = update_val*bl_ratio n2.branch_length = update_val*(1-bl_ratio) else: new_val = self.optimal_marginal_branch_length(n, tol=tol) update_val = new_val*(1-damping**(i+1)) + n.branch_length*damping**(i+1) n.branch_length = update_val self.infer_ancestral_sequences(marginal=True) LH = self.sequence_LH() deltaLH = LH - oldLH oldLH = LH dbl = self.tree.total_branch_length() - old_bl self.logger("TreeAnc.optimize_tree_marginal: iteration %d, LH=%1.2f (%1.2f), delta branch_length=%1.4f, total branch_length %1.4f"% (i, LH, deltaLH, dbl, self.tree.total_branch_length()), 2) if deltaLH 0.1)): # re-assign the node children directly to its parent node.up.clades = [k for k in node.up.clades if k != node] + node.clades for clade in node.clades: clade.up = node.up ##################################################################### ## GTR INFERENCE ##################################################################### def infer_gtr(self, marginal=False, site_specific=False, normalized_rate=True, fixed_pi=None, pc=5.0, **kwargs): """ Calculates a GTR model given the multiple sequence alignment and the tree. It performs ancestral sequence inferrence (joint or marginal), followed by the branch lengths optimization. Then, the numbers of mutations are counted in the optimal tree and related to the time within the mutation happened. From these statistics, the relative state transition probabilities are inferred, and the transition matrix is computed. The result is used to construct the new GTR model of type 'custom'. The model is assigned to the TreeAnc and is used in subsequent analysis. Parameters ----------- print_raw : bool If True, print the inferred GTR model marginal : bool If True, use marginal sequence reconstruction normalized_rate : bool If True, sets the mutation rate prefactor to 1.0. fixed_pi : np.array Provide the equilibrium character concentrations. If None is passed, the concentrations will be inferred from the alignment. pc: float Number of pseudo counts to use in gtr inference Returns ------- gtr : GTR The inferred GTR model """ if site_specific and self.data.compress: raise TypeError("TreeAnc.infer_gtr(): sequence compression and site specific GTR models are incompatible!" ) if not self.ok: raise MissingDataError("TreeAnc.infer_gtr: ERROR, sequences or tree are missing", 0) # if ancestral sequences are not in place, reconstruct them if marginal and self.sequence_reconstruction!='marginal': self._ml_anc_marginal(**kwargs) elif not self.sequence_reconstruction: self._ml_anc_joint(**kwargs) n = self.gtr.n_states L = len(self.tree.root._cseq) # matrix of mutations n_{ij}: i = derived state, j=ancestral state n_ija = np.zeros((n,n,L)) T_ia = np.zeros((n,L)) self.logger("TreeAnc.infer_gtr: counting mutations...", 2) for node in self.tree.get_nonterminals(): for c in node: if marginal: mut_stack = np.transpose(self.get_branch_mutation_matrix(c, full_sequence=False), (1,2,0)) T_ia += 0.5*self._branch_length_to_gtr(c) * mut_stack.sum(axis=0) * self.data.multiplicity T_ia += 0.5*self._branch_length_to_gtr(c) * mut_stack.sum(axis=1) * self.data.multiplicity n_ija += mut_stack * self.data.multiplicity else: for a,pos, d in c.mutations: try: i,j = self.gtr.state_index[d], self.gtr.state_index[a] except: # ambiguous positions continue cpos = self.data.full_to_compressed_sequence_map[pos] n_ija[i,j,cpos]+=1 T_ia[j,cpos] += 0.5*self._branch_length_to_gtr(c) T_ia[i,cpos] -= 0.5*self._branch_length_to_gtr(c) for i, nuc in enumerate(self.gtr.alphabet): cseq = c.cseq if cseq is not None: ind = cseq==nuc T_ia[i,ind] += self._branch_length_to_gtr(c)*self.data.multiplicity[ind] self.logger("TreeAnc.infer_gtr: counting mutations...done", 3) if site_specific: if marginal: root_state = self.tree.root.marginal_profile.T else: root_state = seq2prof(self.tree.root.cseq, self.gtr.profile_map).T self._gtr = GTR_site_specific.infer(n_ija, T_ia, pc=pc, root_state=root_state, logger=self.logger, alphabet=self.gtr.alphabet, prof_map=self.gtr.profile_map) else: root_state = np.array([np.sum((self.tree.root.cseq==nuc)*self.data.multiplicity) for nuc in self.gtr.alphabet]) n_ij = n_ija.sum(axis=-1) self._gtr = GTR.infer(n_ij, T_ia.sum(axis=-1), root_state, fixed_pi=fixed_pi, pc=pc, alphabet=self.gtr.alphabet, logger=self.logger, prof_map = self.gtr.profile_map) if normalized_rate: self.logger("TreeAnc.infer_gtr: setting overall rate to 1.0...", 2) if site_specific: self._gtr.mu /= self._gtr.average_rate().mean() else: self._gtr.mu=1.0 return self._gtr def infer_gtr_iterative(self, max_iter=10, site_specific=False, LHtol=0.1, pc=1.0, normalized_rate=False): """infer GTR model by iteratively estimating ancestral sequences and the GTR model Parameters ---------- max_iter : int, optional maximal number of iterations site_specific : bool, optional use a site specific model LHtol : float, optional stop iteration when LH improvement falls below this cutoff pc : float, optional pseudocount to use normalized_rate : bool, optional set the overall rate to 1 (makes sense when optimizing branch lengths as well) Returns ------- str success/failure code """ self.infer_ancestral_sequences(marginal=True) old_p = np.copy(self.gtr.Pi) old_LH = self.sequence_LH() for i in range(max_iter): self.infer_gtr(site_specific=site_specific, marginal=True, normalized_rate=normalized_rate, pc=pc) self.infer_ancestral_sequences(marginal=True) dp = np.abs(self.gtr.Pi - old_p).mean() if self.gtr.Pi.shape==old_p.shape else np.nan deltaLH = self.sequence_LH() - old_LH old_p = np.copy(self.gtr.Pi) old_LH = self.sequence_LH() self.logger("TreeAnc.infer_gtr_iterative: iteration %d, LH=%1.2f (%1.2f), deltaP=%1.4f"% (i, old_LH, deltaLH, dp), 2) if deltaLH0: slope = (Q[dtavgii] - Q[tavgii]*Q[davgii]/Q[sii]) \ /(Q[tsqii] - Q[tavgii]**2/Q[sii]) else: raise ValueError("No variation in sampling dates! Please specify your clock rate explicitly.") only_intercept=False else: only_intercept=True intercept = (Q[davgii] - Q[tavgii]*slope)/Q[sii] if (Q[tsqii] - Q[tavgii]**2/Q[sii])>0: chisq = 0.5*(Q[dsqii] - Q[davgii]**2/Q[sii] - (Q[dtavgii] - Q[davgii]*Q[tavgii]/Q[sii])**2/(Q[tsqii] - Q[tavgii]**2/Q[sii])) else: chisq = 0.5*(Q[dsqii] - Q[davgii]**2/Q[sii]) if only_intercept: return {'slope':slope, 'intercept':intercept, 'chisq': chisq} estimator_hessian = np.array([[Q[tsqii], Q[tavgii]], [Q[tavgii], Q[sii]]]) return {'slope':slope, 'intercept':intercept, 'chisq':chisq, 'hessian':estimator_hessian, 'cov':np.linalg.inv(estimator_hessian)} class TreeRegression(object): """TreeRegression This class implements an efficient regression method for quantity associated with tips and one that changes in an additive manner along the branches of the tree, e.g. the distance to the root. This implemented algorithm take into account the correlation structure of the data under the assumptions that variance increase linearly along branches as well. """ def __init__(self, tree_in, tip_value = None, branch_value = None, branch_variance = None): """ Parameters ---------- T : (Bio.Phylo.tree) Tree for which the covariances and regression are to be calculated. tip_value : (callable) function that for each tip returns the value to be used in the regression. branch_value : (callable) function that for each node of the tree returns the contribution of this branch to the value of the subtending tips. variance_function : (callable) function that for each node of the tree returns the accumulated variance """ super(TreeRegression, self).__init__() self.tree = tree_in # prep tree for li, l in enumerate(self.tree.get_terminals()): l._ii = np.array([li]) total_bl = 0 for n in self.tree.get_nonterminals(order='postorder'): n._ii = np.concatenate([c._ii for c in n]) n._ii.sort() for c in n: c.up=n total_bl+=c.branch_length self.tree.root.up=None self.N = self.tree.root._ii.shape[0] if tip_value is None: self.tip_value = lambda x:np.mean(x.numdate) if x.is_terminal() else None else: self.tip_value = tip_value if branch_value is None: self.branch_value = lambda x:x.branch_length else: self.branch_value = branch_value if branch_variance is None: # provide a default equal to the branch_length (Poisson) and add # a tenth of the average branch length to avoid numerical instabilities and division by 0. self.branch_variance = lambda x:x.branch_length + 0.05*total_bl/self.N else: self.branch_variance = branch_variance def Cov(self): """ calculate the covariance matrix of the tips assuming variance has accumulated along branches of the tree accoriding to the the provided Returns ------- M : (np.array) covariance matrix with tips arranged standard transersal order. """ # accumulate the covariance matrix by adding 'squares' M = np.zeros((self.N, self.N)) for n in self.tree.find_clades(): if n == self.tree.root: continue M[np.meshgrid(n._ii, n._ii)] += self.branch_variance(n) return M def CovInv(self): """ Inverse of the covariance matrix Returns ------- H : (np.array) inverse of the covariance matrix. """ self.recurse(full_matrix=True) return self.tree.root.cinv def recurse(self, full_matrix=False): """ recursion to calculate inverse covariance matrix Parameters ---------- full_matrix : bool, optional if True, the entire inverse matrix is calculated. otherwise, only the weighing vector. """ for n in self.tree.get_nonterminals(order='postorder'): n_leaves = len(n._ii) if full_matrix: M = np.zeros((n_leaves, n_leaves), dtype=float) r = np.zeros(n_leaves, dtype=float) c_count = 0 for c in n: ssq = self.branch_variance(c) nc = len(c._ii) if c.is_terminal(): if full_matrix: M[c_count, c_count] = 1.0/ssq r[c_count] = 1.0/ssq else: if full_matrix: M[c_count:c_count+nc, c_count:c_count+nc] = c.cinv - ssq*np.outer(c.r,c.r)/(1+ssq*c.s) r[c_count:c_count+nc] = c.r/(1+ssq*c.s) c_count += nc if full_matrix: n.cinv = M n.r = r #M.sum(axis=1) n.s = n.r.sum() def _calculate_averages(self): """ calculate the weighted sums of the tip and branch values and their second moments. """ for n in self.tree.get_nonterminals(order='postorder'): Q = np.zeros(6, dtype=float) for c in n: tv = self.tip_value(c) bv = self.branch_value(c) var = self.branch_variance(c) Q += self.propagate_averages(c, tv, bv, var) n.Q=Q for n in self.tree.find_clades(order='preorder'): O = np.zeros(6, dtype=float) if n==self.tree.root: n.Qtot = n.Q continue for c in n.up: if c==n: continue tv = self.tip_value(c) bv = self.branch_value(c) var = self.branch_variance(c) O += self.propagate_averages(c, tv, bv, var) if n.up!=self.tree.root: c = n.up tv = self.tip_value(c) bv = self.branch_value(c) var = self.branch_variance(c) O += self.propagate_averages(c, tv, bv, var, outgroup=True) n.O = O if not n.is_terminal(): tv = self.tip_value(n) bv = self.branch_value(n) var = self.branch_variance(n) n.Qtot = n.Q + self.propagate_averages(n, tv, bv, var, outgroup=True) def propagate_averages(self, n, tv, bv, var, outgroup=False): """ This function implements the propagation of the means, variance, and covariances along a branch. It operates both towards the root and tips. Parameters ---------- n : (node) the branch connecting this node to its parent is used for propagation tv : (float) tip value. Only required if not is terminal bl : (float) branch value. The increment of the tree associated quantity' var : (float) the variance increment along the branch Returns ------- Q : (np.array) a vector of length 6 containing the updated quantities """ if n.is_terminal() and outgroup==False: if tv is None or np.isinf(tv) or np.isnan(tv): res = np.array([0, 0, 0, 0, 0, 0]) elif var==0: res = np.array([np.inf, np.inf, np.inf, np.inf, np.inf, np.inf]) else: res = np.array([ tv/var, bv/var, tv**2/var, bv*tv/var, bv**2/var, 1.0/var], dtype=float) else: tmpQ = n.O if outgroup else n.Q denom = 1.0/(1+var*tmpQ[sii]) res = np.array([ tmpQ[tavgii]*denom, (tmpQ[davgii] + bv*tmpQ[sii])*denom, tmpQ[tsqii] - var*tmpQ[tavgii]**2*denom, tmpQ[dtavgii] + tmpQ[tavgii]*bv - var*tmpQ[tavgii]*(tmpQ[davgii] + bv*tmpQ[sii])*denom, tmpQ[dsqii] + 2*bv*tmpQ[davgii] + bv**2*tmpQ[sii] - var*(tmpQ[davgii]**2 + 2*bv*tmpQ[davgii]*tmpQ[sii] + bv**2*tmpQ[sii]**2)*denom, tmpQ[sii]*denom] ) return res def explained_variance(self): """calculate standard explained variance Returns ------- float r-value of the root-to-tip distance and time. independent of regression model, but dependent on root choice """ self.tree.root._v=0 for n in self.tree.get_nonterminals(order='preorder'): for c in n: c._v = n._v + self.branch_value(c) raw = np.array([(self.tip_value(n), n._v) for n in self.tree.get_terminals() if self.tip_value(n) is not None]) return np.corrcoef(raw.T)[0,1] def regression(self, slope=None): """regress tip values against branch values Parameters ---------- slope : None, optional if given, the slope isn't optimized Returns ------- dict regression parameters """ self._calculate_averages() clock_model = base_regression(self.tree.root.Q, slope=slope) clock_model['r_val'] = self.explained_variance() return clock_model def find_best_root(self, force_positive=True, slope=None): """ determine the position on the tree that minimizes the bilinear product of the inverse covariance and the data vectors. Returns ------- best_root : (dict) dictionary with the node, the fraction `x` at which the branch is to be split, and the regression parameters """ self._calculate_averages() best_root = {"chisq": np.inf} for n in self.tree.find_clades(): if n==self.tree.root: continue tv = self.tip_value(n) bv = self.branch_value(n) var = self.branch_variance(n) x, chisq = self._optimal_root_along_branch(n, tv, bv, var, slope=slope) if chisq=0 or (force_positive==False): best_root = {"node":n, "split":x} best_root.update(reg) if 'node' not in best_root: print("TreeRegression.find_best_root: No valid root found!", force_positive) return None if 'hessian' in best_root: # calculate differentials with respect to x deriv = [] n = best_root["node"] tv = self.tip_value(n) bv = self.branch_value(n) var = self.branch_variance(n) for dx in [-0.001, 0.001]: # y needs to be bounded away from 0 and 1 to avoid division by 0 y = min(0.9999, max(0.0001, best_root["split"]+dx)) tmpQ = self.propagate_averages(n, tv, bv*y, var*y) \ + self.propagate_averages(n, tv, bv*(1-y), var*(1-y), outgroup=True) reg = base_regression(tmpQ, slope=slope) deriv.append([y,reg['chisq'], tmpQ[tavgii], tmpQ[davgii]]) estimator_hessian = np.zeros((3,3)) estimator_hessian[:2,:2] = best_root['hessian'] estimator_hessian[2,2] = (deriv[0][1] + deriv[1][1] - 2.0*best_root['chisq'])/(deriv[0][0] - deriv[1][0])**2 # estimator_hessian[2,0] = (deriv[0][2] - deriv[1][2])/(deriv[0][0] - deriv[1][0]) # estimator_hessian[2,1] = (deriv[0][3] - deriv[1][3])/(deriv[0][0] - deriv[1][0]) estimator_hessian[0,2] = estimator_hessian[2,0] estimator_hessian[1,2] = estimator_hessian[2,1] best_root['hessian'] = estimator_hessian best_root['cov'] = np.linalg.inv(estimator_hessian) return best_root def _optimal_root_along_branch(self, n, tv, bv, var, slope=None): from scipy.optimize import minimize_scalar def chisq(x): tmpQ = self.propagate_averages(n, tv, bv*x, var*x) \ + self.propagate_averages(n, tv, bv*(1-x), var*(1-x), outgroup=True) return base_regression(tmpQ, slope=slope)['chisq'] if n.bad_branch or (n!=self.tree.root and n.up.bad_branch): return np.nan, np.inf chisq_prox = np.inf if n.is_terminal() else base_regression(n.Qtot, slope=slope)['chisq'] chisq_dist = np.inf if n==self.tree.root else base_regression(n.up.Qtot, slope=slope)['chisq'] grid = np.linspace(0.001,0.999,6) chisq_grid = np.array([chisq(x) for x in grid]) min_chisq = chisq_grid.min() if chisq_prox<=min_chisq: return 0.0, chisq_prox elif chisq_dist<=min_chisq: return 1.0, chisq_dist else: ii = np.argmin(chisq_grid) bounds = (0 if ii==0 else grid[ii-1], 1.0 if ii==len(grid)-1 else grid[ii+1]) sol = minimize_scalar(chisq, bounds=bounds, method="bounded") if sol["success"]: return sol['x'], sol['fun'] else: return np.nan, np.inf def optimal_reroot(self, force_positive=True, slope=None): """ determine the best root and reroot the tree to this value. Note that this can change the parent child relations of the tree and values associated with branches rather than nodes (e.g. confidence) might need to be re-evaluated afterwards Parameters ---------- force_positive : bool, optional if True, the search for a root will only consider positive rate estimates slope : float, optional if given, it will find the optimal root given a fixed rate. If slope==0, this corresponds to minimal root-to-tip variance rooting (min_dev) Returns ------- dict regression parameters """ best_root = self.find_best_root(force_positive=force_positive, slope=slope) if best_root is None: raise ValueError("Rerooting failed!") best_node = best_root["node"] x = best_root["split"] if x<1e-5: new_node = best_node elif x>1.0-1e-5: new_node = best_node.up else: # create new node in the branch and root the tree to it new_node = Phylo.BaseTree.Clade() # insert the new node in the middle of the branch # by simple re-wiring the links on the both sides of the branch # and fix the branch lengths new_node.branch_length = best_node.branch_length*(1-x) new_node.up = best_node.up new_node.clades = [best_node] new_node.up.clades = [k if k!=best_node else new_node for k in best_node.up.clades] best_node.branch_length *= x best_node.up = new_node new_node.rtt_regression = best_root self.tree.root_with_outgroup(new_node) self.tree.ladderize() for n in self.tree.get_nonterminals(order='postorder'): for c in n: c.up=n return best_root def clock_plot(self, add_internal=False, ax=None, regression=None, confidence=True, n_sigma = 2, fs=14): """Plot root-to-tip distance vs time as a basic time-tree diagnostic Parameters ---------- add_internal : bool, optional add internal nodes. this will only work if the tree has been dated already ax : None, optional an matplotlib axis to plot into. if non provided, a new figure is opened regression : None, optional a dict containing parameters of a root-to-tip vs time regression as returned by the function base_regression confidence : bool, optional add confidence area to the regression line n_sigma : int, optional number of standard deviations for the confidence area. fs : int, optional fontsize """ import matplotlib.pyplot as plt if ax is None: plt.figure() ax=plt.subplot(111) self.tree.root._v=0 for n in self.tree.get_nonterminals(order='preorder'): for c in n: c._v = n._v + self.branch_value(c) tips = self.tree.get_terminals() internal = self.tree.get_nonterminals() # get values of terminals xi = np.array([self.tip_value(n) for n in tips]) yi = np.array([n._v for n in tips]) ind = np.array([n.bad_branch if hasattr(n, 'bad_branch') else False for n in tips]) if add_internal: xi_int = np.array([n.numdate for n in internal]) yi_int = np.array([n._v for n in internal]) ind_int = np.array([n.bad_branch if hasattr(n, 'bad_branch') else False for n in internal]) if regression: # plot regression line t_mrca = -regression['intercept']/regression['slope'] if add_internal: time_span = np.max(xi_int[~ind_int]) - np.min(xi_int[~ind_int]) x_vals = np.array([max(np.min(xi_int[~ind_int]), t_mrca) - 0.1*time_span, np.max(xi[~ind])+0.05*time_span]) else: time_span = np.max(xi[~ind]) - np.min(xi[~ind]) x_vals = np.array([max(np.min(xi[~ind]), t_mrca) - 0.1*time_span, np.max(xi[~ind]+0.05*time_span)]) # plot confidence interval if confidence and 'cov' in regression: x_vals = np.linspace(x_vals[0], x_vals[1], 100) y_vals = regression['slope']*x_vals + regression['intercept'] dev = n_sigma*np.array([np.sqrt(regression['cov'][:2,:2].dot(np.array([x, 1])).dot(np.array([x,1]))) for x in x_vals]) dev_slope = n_sigma*np.sqrt(regression['cov'][0,0]) ax.fill_between(x_vals, y_vals-dev, y_vals+dev, alpha=0.2) dp = np.array([regression['intercept']/regression['slope']**2,-1./regression['slope']]) dev_rtt = n_sigma*np.sqrt(regression['cov'][:2,:2].dot(dp).dot(dp)) else: dev_rtt = None dev_slope = None ax.plot(x_vals, regression['slope']*x_vals + regression['intercept'], label = r"$y=\alpha + \beta t$"+"\n"+ r"$\beta=$%1.2e"%(regression["slope"]) + ("+/- %1.e"%dev_slope if dev_slope else "") + "\nroot date: %1.1f"%(-regression['intercept']/regression['slope']) + ("+/- %1.2f"%dev_rtt if dev_rtt else "")) ax.scatter(xi[~ind], yi[~ind], label=("tips" if add_internal else None)) if ind.sum(): try: # note: this is treetime specific tmp_x = np.array([np.mean(n.raw_date_constraint) if n.raw_date_constraint else None for n in self.tree.get_terminals()]) ax.scatter(tmp_x[ind], yi[ind], label="ignored tips", c='r') except: pass if add_internal: ax.scatter(xi_int[~ind_int], yi_int[~ind_int], label="internal nodes") ax.set_ylabel('root-to-tip distance', fontsize=fs) ax.set_xlabel('date', fontsize=fs) ax.ticklabel_format(useOffset=False) ax.tick_params(labelsize=fs*0.8) ax.set_ylim([0, 1.1*np.max(yi)]) plt.tight_layout() plt.legend(fontsize=fs*0.8) if __name__ == '__main__': import matplotlib.pyplot as plt import time plt.ion() # tree_file = '../data/H3N2_NA_allyears_NA.20.nwk' # date_file = '../data/H3N2_NA_allyears_NA.20.metadata.csv' tree_file = '../data/ebola.nwk' date_file = '../data/ebola.metadata.csv' T = Phylo.read(tree_file, 'newick') #T.root_with_outgroup('A/Canterbury/58/2000|CY009150|09/05/2000|New_Zealand||H3N2/8-1416') dates = {} with open(date_file, 'r', encoding='utf-8') as ifile: ifile.readline() for line in ifile: if line[0]!='#': fields = line.strip().split(',') dates[fields[0]] = float(fields[1]) for l in T.get_terminals(): l.numdate = dates[l.name] branch_variance = lambda x:(x.branch_length+(0.0005 if x.is_terminal() else 0.0))/19000.0 #branch_variance = lambda x:(x.branch_length+(0.005 if x.is_terminal() else 0.0))/1700.0 #branch_variance = lambda x:1.0 if x.is_terminal() else 0.0 tstart = time.time() mtc = TreeRegression(T, branch_variance = branch_variance) print(time.time()-tstart) reg = mtc.optimal_reroot() print(time.time()-tstart) print(reg) plt.figure() ti = [] rtt = [] T.root.rtt=0 for n in T.get_nonterminals(order='preorder'): for c in n: c.rtt = n.rtt + c.branch_length for l in T.get_terminals(): ti.append(l.numdate) rtt.append(l.rtt) ti = np.array(ti) rtt = np.array(rtt) plt.plot(ti, rtt) plt.plot(ti, reg["slope"]*ti + reg["intercept"]) plt.show() Phylo.draw(T) treetime-0.8.6/treetime/treetime.py000066400000000000000000001236261417362145000173660ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import import numpy as np from scipy import optimize as sciopt from Bio import Phylo from . import config as ttconf from . import MissingDataError,UnknownMethodError,NotReadyError from .utils import tree_layout from .clock_tree import ClockTree rerooting_mechanisms = ["min_dev", "best", "least-squares"] deprecated_rerooting_mechanisms = {"residual":"least-squares", "res":"least-squares", "min_dev_ML": "min_dev", "ML":"least-squares"} class TreeTime(ClockTree): """ TreeTime is a wrapper class to ClockTree that adds additional functionality such as reroot, detection and exclusion of outliers, resolution of polytomies using temporal information, and relaxed molecular clock models """ def __init__(self, *args,**kwargs): """ TreeTime constructor Parameters ----------- *args Arguments to construct ClockTree **kwargs Keyword arguments to construct the GTR model """ super(TreeTime, self).__init__(*args, **kwargs) def run(self, root=None, infer_gtr=True, relaxed_clock=None, n_iqd = None, resolve_polytomies=True, max_iter=0, Tc=None, fixed_clock_rate=None, time_marginal=False, sequence_marginal=False, branch_length_mode='auto', vary_rate=False, use_covariation=False, tracelog_file=None, **kwargs): """ Run TreeTime reconstruction. Based on the input parameters, it divides the analysis into semi-independent jobs and conquers them one-by-one, gradually optimizing the tree given the temporal constarints and leaf node sequences. Parameters ---------- root : str Try to find better root position on a given tree. If string is passed, the root will be searched according to the specified method. If none, use tree as-is. See :py:meth:`treetime.TreeTime.reroot` for available rooting methods. infer_gtr : bool If True, infer GTR model relaxed_clock : dic If not None, use autocorrelated molecular clock model. Specify the clock parameters as :code:`{slack:, coupling:}` dictionary. n_iqd : float If not None, filter tree nodes which do not obey the molecular clock for the particular tree. The nodes, which deviate more than :code:`n_iqd` interquantile intervals from the molecular clock regression will be marked as 'BAD' and not used in the TreeTime analysis resolve_polytomies : bool If True, attempt to resolve multiple mergers max_iter : int Maximum number of iterations to optimize the tree Tc : float, str If not None, use coalescent model to correct the branch lengths by introducing merger costs. If Tc is float, it is interpreted as the coalescence time scale If Tc is str, it should be one of (:code:`opt`, :code:`const`, :code:`skyline`) fixed_clock_rate : float Fixed clock rate to be used. If None, infer clock rate from the molecular clock. time_marginal : bool If True, perform a final round of marginal reconstruction of the node's positions. sequence_marginal : bool, optional use marginal reconstruction for ancestral sequences branch_length_mode : str Should be one of: :code:`joint`, :code:`marginal`, :code:`input`. If 'input', rely on the branch lengths in the input tree and skip directly to the maximum-likelihood ancestral sequence reconstruction. Otherwise, perform preliminary sequence reconstruction using parsimony algorithm and do branch length optimization vary_rate : bool or float, optional redo the time tree estimation for rates +/- one standard deviation. if a float is passed, it is interpreted as standard deviation, otherwise this standard deviation is estimated from the root-to-tip regression use_covariation : bool, optional default False, if False, rate estimates will be performed using simple regression ignoring phylogenetic covaration between nodes. If vary_rate is True, use_covariation is true by default **kwargs Keyword arguments needed by the downstream functions Returns ------- TreeTime error/succces code : str return value depending on success or error """ # register the specified covaration mode self.use_covariation = use_covariation or (vary_rate and (not type(vary_rate)==float)) if (self.tree is None) or (self.aln is None and self.data.full_length is None): raise MissingDataError("TreeTime.run: ERROR, alignment or tree are missing") if self.aln is None: branch_length_mode='input' self._set_branch_length_mode(branch_length_mode) # determine how to reconstruct and sample sequences seq_kwargs = {"marginal_sequences":sequence_marginal or (self.branch_length_mode=='marginal'), "branch_length_mode": self.branch_length_mode, "sample_from_profile":"root", "reconstruct_tip_states":kwargs.get("reconstruct_tip_states", False)} tt_kwargs = {'clock_rate':fixed_clock_rate, 'time_marginal':False} tt_kwargs.update(kwargs) seq_LH = 0 if "fixed_pi" in kwargs: seq_kwargs["fixed_pi"] = kwargs["fixed_pi"] if "do_marginal" in kwargs: time_marginal=kwargs["do_marginal"] # initially, infer ancestral sequences and infer gtr model if desired if self.branch_length_mode=='input': if self.aln: self.infer_ancestral_sequences(infer_gtr=infer_gtr, marginal=seq_kwargs["marginal_sequences"], **seq_kwargs) self.prune_short_branches() else: self.optimize_tree(infer_gtr=infer_gtr, max_iter=1, prune_short=True, **seq_kwargs) avg_root_to_tip = np.mean([x.dist2root for x in self.tree.get_terminals()]) # optionally reroot the tree either by oldest, best regression or with a specific leaf if n_iqd or root=='clock_filter': if "plot_rtt" in kwargs and kwargs["plot_rtt"]: plot_rtt=True else: plot_rtt=False reroot_mechanism = 'least-squares' if root=='clock_filter' else root self.clock_filter(reroot=reroot_mechanism, n_iqd=n_iqd, plot=plot_rtt, fixed_clock_rate=fixed_clock_rate) elif root is not None: self.reroot(root=root, clock_rate=fixed_clock_rate) if self.branch_length_mode=='input': if self.aln: self.infer_ancestral_sequences(**seq_kwargs) else: self.optimize_tree(max_iter=1, prune_short=False,**seq_kwargs) # infer time tree and optionally resolve polytomies self.logger("###TreeTime.run: INITIAL ROUND",0) self.make_time_tree(**tt_kwargs) if self.aln: seq_LH = self.tree.sequence_marginal_LH if seq_kwargs['marginal_sequences'] else self.tree.sequence_joint_LH self.LH =[[seq_LH, self.tree.positional_joint_LH, 0]] if root is not None and max_iter: new_root = self.reroot(root='least-squares' if root=='clock_filter' else root, clock_rate=fixed_clock_rate) self.logger("###TreeTime.run: rerunning timetree after rerooting",0) self.make_time_tree(**tt_kwargs) # iteratively reconstruct ancestral sequences and re-infer # time tree to ensure convergence. niter = 0 ndiff = 0 # Initialize the tracelog dict attribute self.trace_run = [] self.trace_run.append(self.tracelog_run(niter=0, ndiff=0, n_resolved=0, time_marginal = tt_kwargs['time_marginal'], sequence_marginal = seq_kwargs['marginal_sequences'], Tc=None, tracelog=tracelog_file)) need_new_time_tree=False while niter < max_iter: self.logger("###TreeTime.run: ITERATION %d out of %d iterations"%(niter+1,max_iter),0) # add coalescent prior tmpTc=None if Tc: if Tc=='skyline' and niter0.1: bl_mode = 'input' else: bl_mode = 'joint' self.logger("TreeTime._set_branch_length_mode: maximum branch length is %1.3e, using branch length mode %s"%(max_bl, bl_mode),1) self.branch_length_mode = bl_mode else: self.branch_length_mode = 'input' def clock_filter(self, reroot='least-squares', n_iqd=None, plot=False, fixed_clock_rate=None): r''' Labels outlier branches that don't seem to follow a molecular clock and excludes them from subsequent molecular clock estimation and the timetree propagation. Parameters ---------- reroot : str Method to find the best root in the tree (see :py:meth:`treetime.TreeTime.reroot` for options) n_iqd : float Number of iqd intervals. The outlier nodes are those which do not fall into :math:`IQD\cdot n_iqd` interval (:math:`IQD` is the interval between 75\ :sup:`th` and 25\ :sup:`th` percentiles) If None, the default (3) assumed plot : bool If True, plot the results ''' if n_iqd is None: n_iqd = ttconf.NIQD if type(reroot) is list and len(reroot)==1: reroot=str(reroot[0]) terminals = self.tree.get_terminals() if reroot: self.reroot(root='least-squares' if reroot=='best' else reroot, covariation=False, clock_rate=fixed_clock_rate) else: self.get_clock_model(covariation=False, slope=fixed_clock_rate) clock_rate = self.clock_model['slope'] icpt = self.clock_model['intercept'] res = {} for node in terminals: if hasattr(node, 'raw_date_constraint') and (node.raw_date_constraint is not None): res[node] = node.dist2root - clock_rate*np.mean(node.raw_date_constraint) - icpt residuals = np.array(list(res.values())) iqd = np.percentile(residuals,75) - np.percentile(residuals,25) bad_branch_count = 0 for node,r in res.items(): if abs(r)>n_iqd*iqd and node.up.up is not None: self.logger('TreeTime.ClockFilter: marking %s as outlier, residual %f interquartile distances'%(node.name,r/iqd), 3, warn=True) node.bad_branch=True bad_branch_count += 1 else: node.bad_branch=False if bad_branch_count>0.34*self.tree.count_terminals(): self.logger("TreeTime.clock_filter: More than a third of leaves have been excluded by the clock filter. Please check your input data.", 0, warn=True) # reassign bad_branch flags to internal nodes self.prepare_tree() # redo root estimation after outlier removal if reroot: self.reroot(root=reroot, clock_rate=fixed_clock_rate) if plot: self.plot_root_to_tip() return ttconf.SUCCESS def plot_root_to_tip(self, add_internal=False, label=True, ax=None): """ Plot root-to-tip regression Parameters ---------- add_internal : bool If true, plot inte`rnal node positions label : bool If true, label the plots ax : matplotlib axes If not None, use the provided matplotlib axes to plot the results """ Treg = self.setup_TreeRegression() if self.clock_model and 'cov' in self.clock_model: cf = self.clock_model['valid_confidence'] else: cf = False Treg.clock_plot(ax=ax, add_internal=add_internal, confidence=cf, n_sigma=1, regression=self.clock_model) def reroot(self, root='least-squares', force_positive=True, covariation=None, clock_rate=None): """ Find best root and re-root the tree to the new root Parameters ---------- root : str Which method should be used to find the best root. Available methods are: :code:`best`, `least-squares` - minimize squared residual or likelihood of root-to-tip regression :code:`min_dev` - minimize variation of root-to-tip distance :code:`oldest` - reroot on the oldest node :code:`` - reroot to the node with name :code:`` :code:`[, , ...]` - reroot to the MRCA of these nodes force_positive : bool only consider positive rates when searching for the optimal root covariation : bool account for covariation in root-to-tip regression """ if type(root) is list and len(root)==1: root=str(root[0]) if root=='best': root='least-squares' use_cov = self.use_covariation if covariation is None else covariation slope = 0.0 if type(root)==str and root.startswith('min_dev') else clock_rate old_root = self.tree.root self.logger("TreeTime.reroot: with method or node: %s"%root,0) for n in self.tree.find_clades(): n.branch_length=n.mutation_length if (type(root) is str) and \ (root in rerooting_mechanisms or root in deprecated_rerooting_mechanisms): if root in deprecated_rerooting_mechanisms: if "ML" in root: use_cov=True self.logger('TreeTime.reroot: rerooting mechanisms %s has been renamed to %s' %(root, deprecated_rerooting_mechanisms[root]), 1, warn=True) root = deprecated_rerooting_mechanisms[root] self.logger("TreeTime.reroot: rerooting will %s covariance and shared ancestry."%("account for" if use_cov else "ignore"),0) new_root = self._find_best_root(covariation=use_cov, slope = slope, force_positive=force_positive and (not root.startswith('min_dev'))) else: if isinstance(root,Phylo.BaseTree.Clade): new_root = root elif isinstance(root, list): new_root = self.tree.common_ancestor(root) elif root in self._leaves_lookup: new_root = self._leaves_lookup[root] elif root=='oldest': new_root = sorted([n for n in self.tree.get_terminals() if n.raw_date_constraint is not None], key=lambda x:np.mean(x.raw_date_constraint))[0] else: raise UnknownMethodError('TreeTime.reroot -- ERROR: unsupported rooting mechanisms or root not found') #this forces a bifurcating root, as we want. Branch lengths will be reoptimized anyway. #(Without outgroup_branch_length, gives a trifurcating root, but this will mean #mutations may have to occur multiple times.) self.tree.root_with_outgroup(new_root, outgroup_branch_length=new_root.branch_length/2) self.get_clock_model(covariation=use_cov, slope = slope) self.logger("TreeTime.reroot: Tree was re-rooted to node " +('new_node' if new_root.name is None else new_root.name), 2) self.tree.root.branch_length = self.one_mutation self.tree.root.clock_length = self.one_mutation self.tree.root.raw_date_constraint = None if hasattr(new_root, 'time_before_present'): self.tree.root.time_before_present = new_root.time_before_present if hasattr(new_root, 'numdate'): self.tree.root.numdate = new_root.numdate # set root.gamma bc root doesn't have a branch_length_interpolator but gamma is needed if not hasattr(self.tree.root, 'gamma'): self.tree.root.gamma = 1.0 for n in self.tree.find_clades(): n.mutation_length = n.branch_length if not hasattr(n, 'clock_length'): n.clock_length = n.branch_length self.prepare_tree() self.get_clock_model(covariation=self.use_covariation, slope=slope) return new_root def resolve_polytomies(self, merge_compressed=False): """ Resolve the polytomies on the tree. The function scans the tree, resolves polytomies if present, and re-optimizes the tree with new topology. Note that polytomies are only resolved if that would result in higher likelihood. Sometimes, stretching two or more branches that carry several mutations is less costly than an additional branch with zero mutations (long branches are not stiff, short branches are). Parameters ---------- merge_compressed : bool If True, keep compressed branches as polytomies. If False, return a strictly binary tree. Returns -------- poly_found : int The number of polytomies found """ self.logger("TreeTime.resolve_polytomies: resolving multiple mergers...",1) poly_found=0 for n in self.tree.find_clades(): if len(n.clades) > 2: prior_n_clades = len(n.clades) self._poly(n, merge_compressed) poly_found+=prior_n_clades - len(n.clades) obsolete_nodes = [n for n in self.tree.find_clades() if len(n.clades)==1 and n.up is not None] for node in obsolete_nodes: self.logger('TreeTime.resolve_polytomies: remove obsolete node '+node.name,4) if node.up is not None: self.tree.collapse(node) if poly_found: self.logger('TreeTime.resolve_polytomies: introduces %d new nodes'%poly_found,3) else: self.logger('TreeTime.resolve_polytomies: No more polytomies to resolve',3) return poly_found def _poly(self, clade, merge_compressed): """ Function to resolve polytomies for a given parent node. If the number of the direct decendants is less than three (not a polytomy), does nothing. Otherwise, for each pair of nodes, assess the possible LH increase which could be gained by merging the two nodes. The increase in the LH is basically the tradeoff between the gain of the LH due to the changing the branch lenghts towards the optimal values and the decrease due to the introduction of the new branch with zero optimal length. """ from .branch_len_interpolator import BranchLenInterpolator zero_branch_slope = self.gtr.mu*self.data.full_length def _c_gain(t, n1, n2, parent): """ cost gain if nodes n1, n2 are joined and their parent is placed at time t cost gain = (LH loss now) - (LH loss when placed at time t) """ cg2 = n2.branch_length_interpolator(parent.time_before_present - n2.time_before_present) - n2.branch_length_interpolator(t - n2.time_before_present) cg1 = n1.branch_length_interpolator(parent.time_before_present - n1.time_before_present) - n1.branch_length_interpolator(t - n1.time_before_present) cg_new = - zero_branch_slope * (parent.time_before_present - t) # loss in LH due to the new branch return -(cg2+cg1+cg_new) def cost_gain(n1, n2, parent): """ cost gained if the two nodes would have been connected. """ try: cg = sciopt.minimize_scalar(_c_gain, bounds=[max(n1.time_before_present,n2.time_before_present), parent.time_before_present], method='Bounded',args=(n1,n2, parent)) return cg['x'], - cg['fun'] except: self.logger("TreeTime._poly.cost_gain: optimization of gain failed", 3, warn=True) return parent.time_before_present, 0.0 def merge_nodes(source_arr, isall=False): mergers = np.array([[cost_gain(n1,n2, clade) if i1 1 + int(isall): # max possible gains of the cost when connecting the nodes: # this is only a rough approximation because it assumes the new node positions # to be optimal new_positions = mergers[:,:,0] cost_gains = mergers[:,:,1] # set zero to large negative value and find optimal pair np.fill_diagonal(cost_gains, -1e11) idxs = np.unravel_index(cost_gains.argmax(),cost_gains.shape) if (idxs[0] == idxs[1]) or cost_gains.max()<0: self.logger("TreeTime._poly.merge_nodes: node is not fully resolved "+clade.name,4) return LH n1, n2 = source_arr[idxs[0]], source_arr[idxs[1]] LH += cost_gains[idxs] new_node = Phylo.BaseTree.Clade() # fix positions and branch lengths new_node.time_before_present = new_positions[idxs] new_node.branch_length = clade.time_before_present - new_node.time_before_present new_node.clades = [n1,n2] n1.branch_length = new_node.time_before_present - n1.time_before_present n2.branch_length = new_node.time_before_present - n2.time_before_present # set parameters for the new node new_node.up = clade new_node.tt = self n1.up = new_node n2.up = new_node if hasattr(clade, "_cseq"): new_node._cseq = clade._cseq self.add_branch_state(new_node) new_node.mutation_length = 0.0 if self.branch_length_mode=='marginal': new_node.marginal_subtree_LH = clade.marginal_subtree_LH new_node.marginal_outgroup_LH = clade.marginal_outgroup_LH new_node.profile_pair = self.marginal_branch_profile(new_node) new_node.branch_length_interpolator = BranchLenInterpolator(new_node, self.gtr, pattern_multiplicity = self.data.multiplicity, min_width=self.min_width, one_mutation=self.one_mutation, branch_length_mode=self.branch_length_mode) clade.clades.remove(n1) clade.clades.remove(n2) clade.clades.append(new_node) self.logger('TreeTime._poly.merge_nodes: creating new node as child of '+clade.name,3) self.logger("TreeTime._poly.merge_nodes: Delta-LH = " + str(cost_gains[idxs].round(3)), 3) # and modify source_arr array for the next loop if len(source_arr)>2: # if more than 3 nodes in polytomy, replace row/column for ii in np.sort(idxs)[::-1]: tmp_ind = np.arange(mergers.shape[0])!=ii mergers = mergers[tmp_ind].swapaxes(0,1) mergers = mergers[tmp_ind].swapaxes(0,1) source_arr.remove(n1) source_arr.remove(n2) new_gains = np.array([[cost_gain(n1,new_node, clade) for n1 in source_arr]]) mergers = np.vstack((mergers, new_gains)).swapaxes(0,1) source_arr.append(new_node) new_gains = np.array([[cost_gain(n1,new_node, clade) for n1 in source_arr]]) mergers = np.vstack((mergers, new_gains)).swapaxes(0,1) else: # otherwise just recalculate matrix source_arr.remove(n1) source_arr.remove(n2) source_arr.append(new_node) mergers = np.array([[cost_gain(n1,n2, clade) for n1 in source_arr] for n2 in source_arr]) return LH stretched = [c for c in clade.clades if c.mutation_length < c.clock_length] compressed = [c for c in clade.clades if c not in stretched] if len(stretched)==1 and merge_compressed is False: return 0.0 LH = merge_nodes(stretched, isall=len(stretched)==len(clade.clades)) if merge_compressed and len(compressed)>1: LH += merge_nodes(compressed, isall=len(compressed)==len(clade.clades)) return LH def print_lh(self, joint=True): """ Print the total likelihood of the tree given the constrained leaves Parameters ---------- joint : bool If true, print joint LH, else print marginal LH """ try: u_lh = self.tree.unconstrained_sequence_LH if joint: s_lh = self.tree.sequence_joint_LH t_lh = self.tree.positional_joint_LH c_lh = self.tree.coalescent_joint_LH else: s_lh = self.tree.sequence_marginal_LH t_lh = self.tree.positional_marginal_LH c_lh = 0 print ("### Tree Log-Likelihood ###\n" " Sequence log-LH without constraints: \t%1.3f\n" " Sequence log-LH with constraints: \t%1.3f\n" " TreeTime sequence log-LH: \t%1.3f\n" " Coalescent log-LH: \t%1.3f\n" "#########################"%(u_lh, s_lh,t_lh, c_lh)) except: print("ERROR. Did you run the corresponding inference (joint/marginal)?") def add_coalescent_model(self, Tc, **kwargs): """Add a coalescent model to the tree and optionally optimze Parameters ---------- Tc : float,str If this is a float, it will be interpreted as the inverse merger rate in molecular clock units, if its is a """ from .merger_models import Coalescent self.logger('TreeTime.run: adding coalescent prior with Tc='+str(Tc),1) self.merger_model = Coalescent(self.tree, date2dist=self.date2dist, logger=self.logger) if Tc=='skyline': # restrict skyline model optimization to last iteration self.merger_model.optimize_skyline(**kwargs) self.logger("optimized a skyline ", 2) else: if Tc in ['opt', 'const']: self.merger_model.optimize_Tc() self.logger("optimized Tc to %f"%self.merger_model.Tc.y[0], 2) else: try: self.merger_model.set_Tc(Tc) except: self.logger("setting of coalescent time scale failed", 1, warn=True) self.merger_model.attach_to_tree() def relaxed_clock(self, slack=None, coupling=None, **kwargs): """ Allow the mutation rate to vary on the tree (relaxed molecular clock). Changes of the mutation rates from one branch to another are penalized. In addition, deviation of the mutation rate from the mean rate is penalized. Parameters ---------- slack : float Maximum change in substitution rate between parent and child nodes coupling : float Maximum difference in substitution rates in sibling nodes """ if slack is None: slack=ttconf.MU_ALPHA if coupling is None: coupling=ttconf.MU_BETA self.logger("TreeTime.relaxed_clock: slack=%f, coupling=%f"%(slack, coupling),2) c=1.0/self.one_mutation for node in self.tree.find_clades(order='postorder'): opt_len = node.mutation_length act_len = node.clock_length if hasattr(node, 'clock_length') else node.branch_length # opt_len \approx 1.0*len(node.mutations)/node.profile.shape[0] but calculated via gtr model # stiffness is the expectation of the inverse variance of branch length (one_mutation/opt_len) # contact term: stiffness*(g*bl - bl_opt)^2 + slack(g-1)^2 = # (slack+bl^2) g^2 - 2 (bl*bl_opt+1) g + C= k2 g^2 + k1 g + C node._k2 = slack + c*act_len**2/(opt_len+self.one_mutation) node._k1 = -2*(c*act_len*opt_len/(opt_len+self.one_mutation) + slack) # coupling term: \sum_c coupling*(g-g_c)^2 + Cost_c(g_c|g) # given g, g_c needs to be optimal-> 2*coupling*(g-g_c) = 2*child.k2 g_c + child.k1 # hence g_c = (coupling*g - 0.5*child.k1)/(coupling+child.k2) # substituting yields for child in node.clades: denom = coupling+child._k2 node._k2 += coupling*(1.0-coupling/denom)**2 + child._k2*coupling**2/denom**2 node._k1 += (coupling*(1.0-coupling/denom)*child._k1/denom \ - coupling*child._k1*child._k2/denom**2 \ + coupling*child._k1/denom) for node in self.tree.find_clades(order='preorder'): if node.up is None: node.gamma = max(0.1, -0.5*node._k1/node._k2) else: if node.up.up is None: g_up = node.up.gamma else: g_up = node.up.branch_length_interpolator.gamma node.branch_length_interpolator.gamma = max(0.1,(coupling*g_up - 0.5*node._k1)/(coupling+node._k2)) def tracelog_run(self, niter=0, ndiff=0, n_resolved=0, time_marginal=False, sequence_marginal=False, Tc=None, tracelog=None): """ Create a dictionary of parameters for the current iteration of the run function. Parameters ---------- niter : int The current iteration. ndiff : int The number of sequence changes. n_resolved : int The number of polytomy changes time_marginal : bool True if marginal position estimation was requested, else False sequence_marginal : bool True if marginal sequence estimation was requested, else False Tc : float, str The coalescent model that was used for the current iteration. tracelog : str The output file to write the trace log to. Returns ------- trace_dict : str A dictionary of parameters for the current iteration. """ # Store the run parameters in a dictionary trace_dict = { 'Sample' : niter, 'ndiff' : ndiff, 'n_resolved' : n_resolved, 'seq_mode' : ('marginal' if sequence_marginal else 'joint') if self.aln else 'no sequences given', 'seq_LH' : (self.tree.sequence_marginal_LH if sequence_marginal else self.tree.sequence_joint_LH) if self.aln else 0, 'pos_mode' : 'marginal' if time_marginal else 'joint', 'pos_LH' : self.tree.positional_marginal_LH if time_marginal else self.tree.positional_joint_LH, 'coal_mode' : Tc, 'coal_LH' : self.tree.coalescent_joint_LH, } # Write the current iteration to a file if tracelog: # Only on the initial round, write the headers if niter == 0: with open(tracelog, "w") as outfile: header = "\t".join(trace_dict.keys()) outfile.write(header + "\n") # Write the parameters with open(tracelog, "a") as outfile: params_str = [str(p) for p in trace_dict.values()] params = "\t".join(params_str) outfile.write(params + "\n") return trace_dict ############################################################################### ### rerooting ############################################################################### def _find_best_root(self, covariation=True, force_positive=True, slope=None, **kwarks): ''' Determine the node that, when the tree is rooted on this node, results in the best regression of temporal constraints and root to tip distances. Parameters ---------- infer_gtr : bool If True, infer new GTR model after re-root covariation : bool account for covariation structure when rerooting the tree force_positive : bool only accept positive evolutionary rate estimates when rerooting the tree ''' for n in self.tree.find_clades(): n.branch_length=n.mutation_length self.logger("TreeTime._find_best_root: searching for the best root position...",2) Treg = self.setup_TreeRegression(covariation=covariation) return Treg.optimal_reroot(force_positive=force_positive, slope=slope)['node'] def plot_vs_years(tt, step = None, ax=None, confidence=None, ticks=True, **kwargs): ''' Converts branch length to years and plots the time tree on a time axis. Parameters ---------- tt : TreeTime object A TreeTime instance after a time tree is inferred step : int Width of shaded boxes indicating blocks of years. Will be inferred if not specified. To switch off drawing of boxes, set to 0 ax : matplotlib axes Axes to be used to plot, will create new axis if None confidence : tuple, float Draw confidence intervals. This assumes that marginal time tree inference was run. Confidence intervals are either specified as an interval of the posterior distribution like (0.05, 0.95) or as the weight of the maximal posterior region , e.g. 0.9 **kwargs : dict Key word arguments that are passed down to Phylo.draw ''' import matplotlib.pyplot as plt tt.branch_length_to_years() nleafs = tt.tree.count_terminals() if ax is None: fig = plt.figure(figsize=(12,10)) ax = plt.subplot(111) else: fig = None # draw tree if "label_func" not in kwargs: kwargs["label_func"] = lambda x:x.name if (x.is_terminal() and nleafs<30) else "" Phylo.draw(tt.tree, axes=ax, **kwargs) offset = tt.tree.root.numdate - tt.tree.root.branch_length date_range = np.max([n.numdate for n in tt.tree.get_terminals()])-offset # estimate year intervals if not explicitly specified if step is None or (step>0 and date_range/step>100): step = 10**np.floor(np.log10(date_range)) if date_range/step<2: step/=5 elif date_range/step<5: step/=2 step = max(1.0/12,step) # set axis labels if step: dtick = step min_tick = step*(offset//step) extra = dtick if dtick=1: tick_labels = ["%d"%(int(x)) for x in tick_vals] else: tick_labels = ["%1.2f"%(x) for x in tick_vals] ax.set_xlim((0,date_range)) ax.set_xticklabels(tick_labels) ax.set_xlabel('year') ax.set_ylabel('') # put shaded boxes to delineate years if step: ylim = ax.get_ylim() xlim = ax.get_xlim() from matplotlib.patches import Rectangle for yi,year in enumerate(np.arange(np.floor(tick_vals[0]), tick_vals[-1]+.01, step)): pos = year - offset r = Rectangle((pos, ylim[1]-5), step, ylim[0]-ylim[1]+10, facecolor=[0.7+0.1*(1+yi%2)] * 3, edgecolor=[1,1,1]) ax.add_patch(r) if year in tick_vals and pos>=xlim[0] and pos<=xlim[1] and ticks: label_str = "%1.2f"%(step*(year//step)) if step<1 else str(int(year)) ax.text(pos,ylim[0]-0.04*(ylim[1]-ylim[0]), label_str, horizontalalignment='center') ax.set_axis_off() # add confidence intervals to the tree graph -- grey bars if confidence: tree_layout(tt.tree) if not hasattr(tt.tree.root, "marginal_inverse_cdf"): raise NotReadyError("marginal time tree reconstruction required for confidence intervals") elif type(confidence) is float: cfunc = tt.get_max_posterior_region elif len(confidence)==2: cfunc = tt.get_confidence_interval else: raise NotReadyError("confidence needs to be either a float (for max posterior region) or a two numbers specifying lower and upper bounds") for n in tt.tree.find_clades(): if not n.bad_branch: pos = cfunc(n, confidence) ax.plot(pos-offset, np.ones(len(pos))*n.ypos, lw=3, c=(0.5,0.5,0.5)) return fig, ax def treetime_to_newick(tt, outf): Phylo.write(tt.tree, outf, 'newick') if __name__=="__main__": pass treetime-0.8.6/treetime/utils.py000066400000000000000000000431521417362145000167030ustar00rootroot00000000000000from __future__ import division, print_function, absolute_import import os,sys import datetime import pandas as pd import numpy as np from scipy.interpolate import interp1d from scipy.integrate import quad from scipy import stats from scipy.ndimage import binary_dilation from . import TreeTimeError class DateConversion(object): """ Small container class to store parameters to convert between branch length as it is used in ML computations and the dates of the nodes. It is assumed that the conversion formula is 'length = k*date + b' """ def __init__(self): self.clock_rate = 0 self.intercept = 0 self.chisq = 0 self.r_val = 0 self.cov = None self.sigma = 0 self.valid_confidence = False def __str__(self): if self.cov is not None and self.valid_confidence: dslope = np.sqrt(self.cov[0,0]) outstr = ('Root-Tip-Regression:\n --rate:\t%1.3e +/- %1.2e (one std-dev)\n --chi^2:\t%1.2f\n --r^2: \t%1.2f\n' %(self.clock_rate, dslope, self.chisq**2, self.r_val**2)) else: outstr = ('Root-Tip-Regression:\n --rate:\t%1.3e\n --r^2: \t%1.2f\n' %(self.clock_rate, self.r_val**2)) return outstr @classmethod def from_regression(cls, clock_model): """ Create the conversion object automatically from the tree Parameters ---------- clock_model : dict dictionary as returned from TreeRegression with fields intercept and slope """ dc = cls() dc.clock_rate = clock_model['slope'] dc.intercept = clock_model['intercept'] dc.chisq = clock_model['chisq'] if 'chisq' in clock_model else None dc.valid_confidence = clock_model['valid_confidence'] if 'valid_confidence' in clock_model else False if 'cov' in clock_model and dc.valid_confidence: dc.cov = clock_model['cov'] dc.r_val = clock_model['r_val'] return dc def get_branch_len(self, date1, date2): """ Compute branch length given the dates of the two nodes. Parameters ----------- date1 : int date of the first node (days before present) date2 : int date of the second node (days before present) Returns: -------- branch length : double Branch length, assuming that the dependence between the node date and the node depth in the the tree is linear. """ return abs(date1 - date2) * self.clock_rate def get_time_before_present(self, numdate): """ Convert the numeric date to the branch-len scale """ return (numeric_date() - numdate) * abs(self.clock_rate) def to_years(self, abs_t): """ Convert the time before present measured in branch length units to years """ return abs_t / abs(self.clock_rate) def to_numdate(self, tbp): """ Convert time before present measured in clock rate units to numeric calendar dates """ return numeric_date() - self.to_years(tbp) def numdate_from_dist2root(self, d2r): """ estimate the numerical date based on the distance to root. -> crude dating of internal nodes """ return (d2r-self.intercept)/self.clock_rate def clock_deviation(self, numdate, d2r): """ calculate the deviatio of the """ return (self.numdate_from_dist2root(d2r) - numdate)*self.clock_rate def min_interp(interp_object): """ Find the global minimum of a function represented as an interpolation object. """ try: return interp_object.x[interp_object(interp_object.x).argmin()] except Exception as e: s = "Cannot find minimum of the interpolation object" + str(interp_object.x) + \ "Minimal x: " + str(interp_object.x.min()) + "Maximal x: " + str(interp_object.x.max()) raise e def median_interp(interp_object): """ Find the median of the function represented as an interpolation object. """ new_grid = np.sort(np.concatenate([interp_object.x[:-1] + 0.1*ii*np.diff(interp_object.x) for ii in range(10)]).flatten()) tmp_prop = np.exp(-(interp_object(new_grid)-interp_object.y.min())) tmp_cumsum = np.cumsum(0.5*(tmp_prop[1:]+tmp_prop[:-1])*np.diff(new_grid)) median_index = min(len(tmp_cumsum)-3, max(2,np.searchsorted(tmp_cumsum, tmp_cumsum[-1]*0.5)+1)) return new_grid[median_index] def numeric_date(dt=None): """ Convert datetime object to the numeric date. The numeric date format is YYYY.F, where F is the fraction of the year passed Parameters ---------- dt: datetime.datetime, None date of to be converted. if None, assume today """ from calendar import isleap if dt is None: dt = datetime.datetime.now() days_in_year = 366 if isleap(dt.year) else 365 try: res = dt.year + (dt.timetuple().tm_yday-0.5) / days_in_year except: res = None return res def datetime_from_numeric(numdate): """convert a numeric decimal date to a python datetime object Note that this only works for AD dates since the range of datetime objects is restricted to year>1. Parameters ---------- numdate : float numeric date as in 2018.23 Returns ------- datetime.datetime datetime object """ from calendar import isleap days_in_year = 366 if isleap(int(numdate)) else 365 # add a small number of the time elapsed in a year to avoid # unexpected behavior for values 1/365, 2/365, etc days_elapsed = int(((numdate%1)+1e-10)*days_in_year) date = datetime.datetime(int(numdate),1,1) + datetime.timedelta(days=days_elapsed) return date def datestring_from_numeric(numdate): """convert a numerical date to a formated date string YYYY-MM-DD Parameters ---------- numdate : float numeric date as in 2018.23 Returns ------- str date string YYYY-MM-DD """ try: return datetime.datetime.strftime(datetime_from_numeric(numdate), "%Y-%m-%d") except: year = int(np.floor(numdate)) dt = datetime_from_numeric(1900+(numdate%1)) return "%04d-%02d-%02d"%(year, dt.month, dt.day) def parse_dates(date_file, name_col=None, date_col=None): """ parse dates from the arguments and return a dictionary mapping taxon names to numerical dates. Parameters ---------- date_file : str name of file to parse meta data from Returns ------- dict dictionary linking fields in a column interpreted as taxon name (first column that contains 'name', 'strain', 'accession') to a numerical date inferred from a column that contains 'date'. It will first try to parse the column as float, than via pandas.to_datetime and finally as ambiguous date such as 2018-05-XX """ print("\nAttempting to parse dates...") dates = {} if not os.path.isfile(date_file): print("\n\tERROR: file %s does not exist, exiting..."%date_file) return dates # separator for the csv/tsv file. If csv, we'll strip extra whitespace around ',' full_sep = '\t' if date_file.endswith('.tsv') else r'\s*,\s*' try: # read the metadata file into pandas dataframe. df = pd.read_csv(date_file, sep=full_sep, engine='python', dtype='str', index_col=False) # check the metadata has strain names in the first column # look for the column containing sampling dates # We assume that the dates might be given either in human-readable format # (e.g. ISO dates), or be already converted to the numeric format. potential_date_columns = [] potential_numdate_columns = [] potential_index_columns = [] # Scan the dataframe columns and find ones which likely to store the # dates for ci,col in enumerate(df.columns): d = df.iloc[0,ci] # strip quotation marks if type(d)==str and d[0] in ['"', "'"] and d[-1] in ['"', "'"]: for i,tmp_d in enumerate(df.iloc[:,ci]): df.iloc[i,ci] = tmp_d.strip(d[0]) if 'date' in col.lower(): potential_date_columns.append((ci, col)) if any([x==col.lower() for x in ['name', 'strain', 'accession']]): potential_index_columns.append((ci, col)) if date_col and date_col not in df.columns: raise TreeTimeError("ERROR: specified column for dates does not exist. \n\tAvailable columns are: "\ +", ".join(df.columns)+"\n\tYou specified '%s'"%date_col) if name_col and name_col not in df.columns: raise TreeTimeError("ERROR: specified column for the taxon name does not exist. \n\tAvailable columns are: "\ +", ".join(df.columns)+"\n\tYou specified '%s'"%name_col) dates = {} # if a potential numeric date column was found, use it # (use the first, if there are more than one) if not (len(potential_index_columns) or name_col): raise TreeTimeError("ERROR: Cannot read metadata: need at least one column that contains the taxon labels." " Looking for the first column that contains 'name', 'strain', or 'accession' in the header.") else: # use the first column that is either 'name', 'strain', 'accession' if name_col is None: index_col = sorted(potential_index_columns)[0][1] else: index_col = name_col print("\tUsing column '%s' as name. This needs match the taxon names in the tree!!"%index_col) if len(potential_date_columns)>=1 or date_col: #try to parse the csv file with dates in the idx column: if date_col is None: date_col = potential_date_columns[0][1] print("\tUsing column '%s' as date."%date_col) for ri, row in df.iterrows(): date_str = row.loc[date_col] k = row.loc[index_col] # try parsing as a float first try: if date_str: dates[k] = float(date_str) else: dates[k] = None continue except ValueError: # try whether the date string can be parsed as [2002.2:2004.3] # to indicate general ambiguous ranges if date_str[0]=='[' and date_str[-1]==']' and len(date_str[1:-1].split(':'))==2: try: dates[k] = [float(x) for x in date_str[1:-1].split(':')] continue except ValueError: pass # try date format parsing 2017-08-12 try: tmp_date = pd.to_datetime(date_str) dates[k] = numeric_date(tmp_date) except ValueError: # try ambiguous date format parsing 2017-XX-XX lower, upper = ambiguous_date_to_date_range(date_str, '%Y-%m-%d') if lower is not None: dates[k] = [numeric_date(x) for x in [lower, upper]] else: raise TreeTimeError("ERROR: Metadata file has no column which looks like a sampling date!") if all(v is None for v in dates.values()): raise TreeTimeError("ERROR: Cannot parse dates correctly! Check date format.") return dates except TreeTimeError as err: raise err except: raise def ambiguous_date_to_date_range(mydate, fmt="%Y-%m-%d", min_max_year=None): """parse an abiguous date such as 2017-XX-XX to [2017,2017.999] Parameters ---------- mydate : str date string to be parsed fmt : str format descriptor. default is %Y-%m-%d min_max_year : None, optional if date is completely unknown, use this as bounds. Returns ------- tuple upper and lower bounds on the date. return (None, None) if errors """ sep = fmt.split('%')[1][-1] min_date, max_date = {}, {} today = datetime.date.today() for val, field in zip(mydate.split(sep), fmt.split(sep+'%')): f = 'year' if 'y' in field.lower() else ('day' if 'd' in field.lower() else 'month') if 'XX' in val: if f=='year': if min_max_year: min_date[f]=min_max_year[0] if len(min_max_year)>1: max_date[f]=min_max_year[1] elif len(min_max_year)==1: max_date[f]=4000 #will be replaced by 'today' below. else: return None, None elif f=='month': min_date[f]=1 max_date[f]=12 elif f=='day': min_date[f]=1 max_date[f]=31 else: try: min_date[f]=int(val) max_date[f]=int(val) except ValueError: print("Can't parse date string: "+mydate, file=sys.stderr) return None, None max_date['day'] = min(max_date['day'], 31 if max_date['month'] in [1,3,5,7,8,10,12] else 28 if max_date['month']==2 else 30) lower_bound = datetime.date(year=min_date['year'], month=min_date['month'], day=min_date['day']) upper_bound = datetime.date(year=max_date['year'], month=max_date['month'], day=max_date['day']) return (lower_bound, upper_bound if upper_bound","tmp.nwk", "2>", "fasttree_stderr"]) os.system(" ".join(tree_cmd)) return Phylo.read("tmp.nwk", 'newick') def build_newick_raxml(aln_fname, nthreads=2, raxml_bin="raxml", **kwargs): import shutil,os print("Building tree with raxml") from Bio import Phylo, AlignIO AlignIO.write(AlignIO.read(aln_fname, 'fasta'),"temp.phyx", "phylip-relaxed") cmd = raxml_bin + " -f d -T " + str(nthreads) + " -m GTRCAT -c 25 -p 235813 -n tre -s temp.phyx" os.system(cmd) return Phylo.read('RAxML_bestTree.tre', "newick") def build_newick_iqtree(aln_fname, nthreads=2, iqtree_bin="iqtree", iqmodel="HKY", **kwargs): import os from Bio import Phylo, AlignIO print("Building tree with iqtree") aln = None for fmt in ['fasta', 'phylip-relaxed']: try: aln = AlignIO.read(aln_fname, fmt) break except: continue if aln is None: raise ValueError("failed to read alignment for tree building") aln_file = "temp.fasta" seq_names = set() for s in aln: tmp = s.id for c, sub in zip('/|()', 'VWXY'): tmp = tmp.replace(c, '_%s_%s_'%(sub,sub)) if tmp in seq_names: print("A sequence with name {} already exists, skipping....".format(s.id)) continue s.id = tmp s.name = s.id s.description = '' seq_names.add(s.id) AlignIO.write(aln, aln_file, 'fasta') fast_opts = [ "-ninit", "2", "-n", "2", "-me", "0.05" ] call = ["iqtree"] + fast_opts +["-nt", str(nthreads), "-s", aln_file, "-m", iqmodel, ">", "iqtree.log"] os.system(" ".join(call)) T = Phylo.read(aln_file+".treefile", 'newick') for n in T.get_terminals(): tmp = n.name for c, sub in zip('/|()', 'VWXY'): tmp = tmp.replace('_%s_%s_'%(sub,sub), c) n.name = tmp return T def clip(a, min_val, max_val): return np.maximum(min_val, np.minimum(a, max_val)) if __name__ == '__main__': pass treetime-0.8.6/treetime/vcf_utils.py000066400000000000000000000574771417362145000175600ustar00rootroot00000000000000import gzip import numpy as np from collections import defaultdict from textwrap import fill ## Functions to read in and print out VCF files def read_vcf(vcf_file, ref_file=None): """ Reads in a vcf/vcf.gz file and associated reference sequence fasta (to which the VCF file is mapped). Parses mutations, insertions, and deletions and stores them in a nested dict, see 'returns' for the dict structure. Calls with heterozygous values 0/1, 0/2, etc and no-calls (./.) are replaced with Ns at the associated sites. Positions are stored to correspond the location in the reference sequence in Python (numbering is transformed to start at 0) Parameters ---------- vcf_file : string Path to the vcf or vcf.gz file to be read in ref_file : string, optional Path to the fasta reference file to be read in Returns -------- compress_seq : nested dict In the format: :: { 'reference':'AGCTCGA..A', 'sequences': { 'seq1':{4:'A', 7:'-'}, 'seq2':{100:'C'} }, 'insertions': { 'seq1':{4:'ATT'}, 'seq3':{1:'TT', 10:'CAG'} }, 'positions': [1,4,7,10,100...] } references : string String of the reference sequence read from the Fasta, to which the variable sites are mapped sequences : nested dict Dict containing sequence names as keys which map to dicts that have position as key and the single-base mutation (or deletion) as values insertions : nested dict Dict in the same format as the above, which stores insertions and their locations. The first base of the insertion is the same as whatever is currently in that position (Ref if no mutation, mutation in 'sequences' otherwise), so the current base can be directly replaced by the bases held here. positions : list Python list of all positions with a mutation, insertion, or deletion. """ #Programming Note: # Note on VCF Format # ------------------- # 'Insertion where there are also deletions' (special handling) # Ex: # REF ALT Seq1 Seq2 # GC GCC,G 1/1 2/2 # Insertions formatted differently - don't know how many bp match # the Ref (unlike simple insert below). Could be mutations, also. # 'Deletion' # Ex: # REF ALT # GC G # Alt does not have to be 1 bp - any length shorter than Ref. # 'Insertion' # Ex: # REF ALT # A ATT # First base always matches Ref. # 'No indel' # Ex: # REF ALT # A G #define here, so that all sub-functions can access them sequences = defaultdict(dict) insertions = defaultdict(dict) #Currently not used, but kept in case of future use. #TreeTime handles 2-3 base ambig codes, this will allow that. def getAmbigCode(bp1, bp2, bp3=""): bps = [bp1,bp2,bp3] bps.sort() key = "".join(bps) return { 'CT': 'Y', 'AG': 'R', 'AT': 'W', 'CG': 'S', 'GT': 'K', 'AC': 'M', 'AGT': 'D', 'ACG': 'V', 'ACT': 'H', 'CGT': 'B' }[key] #Parses a 'normal' (not hetero or no-call) call depending if insertion+deletion, insertion, #deletion, or single bp subsitution def parseCall(snps, ins, pos, ref, alt): #Insertion where there are also deletions (special handling) if len(ref) > 1 and len(alt)>len(ref): for i in range(len(ref)): #if the pos doesn't match, store in sequences if ref[i] != alt[i]: snps[pos+i] = (alt[i] if alt[i] != '.' else 'N') #'.' = no-call #if about to run out of ref, store rest: if (i+1) >= len(ref): ins[pos+i] = alt[i:] #Deletion elif len(ref) > 1: for i in range(len(ref)): #if ref is longer than alt, these are deletion positions if i+1 > len(alt): snps[pos+i] = '-' #if not, there may be mutations else: if ref[i] != alt[i]: snps[pos+i] = (alt[i] if alt[i] != '.' else 'N') #'.' = no-call #Insertion elif len(alt) > 1: ins[pos] = alt #No indel else: snps[pos] = alt #Parses a 'bad' (hetero or no-call) call depending on what it is def parseBadCall(gen, snps, ins, pos, ref, ALT): #Deletion # REF ALT Seq1 Seq2 Seq3 # GCC G 1/1 0/1 ./. # Seq1 (processed by parseCall, above) will become 'G--' # Seq2 will become 'GNN' # Seq3 will become 'GNN' if len(ref) > 1: #Deleted part becomes Ns if gen[0] == '0' or gen[0] == '.': if gen[0] == '0': #if het, get first bp alt = str(ALT[int(gen[2])-1]) else: #if no-call, there is no alt, so just put Ns after 1st ref base alt = ref[0] for i in range(len(ref)): #if ref is longer than alt, these are deletion positions if i+1 > len(alt): snps[pos+i] = 'N' #if not, there may be mutations else: if ref[i] != alt[i]: snps[pos+i] = (alt[i] if alt[i] != '.' else 'N') #'.' = no-call #If not deletion, need to know call type #if het, see if proposed alt is 1bp mutation elif gen[0] == '0': alt = str(ALT[int(gen[2])-1]) if len(alt)==1: #alt = getAmbigCode(ref,alt) #if want to allow ambig alt = 'N' #if you want to disregard ambig snps[pos] = alt #else a het-call insertion, so ignore. #else it's a no-call; see if all alts have a length of 1 #(meaning a simple 1bp mutation) elif len(ALT)==len("".join(ALT)): alt = 'N' snps[pos] = alt #else a no-call insertion, so ignore. #House code is *much* faster than pyvcf because we don't care about all info #about coverage, quality, counts, etc, which pyvcf goes to effort to parse #(and it's not easy as there's no standard ordering). Custom code can completely #ignore all of this. import gzip from Bio import SeqIO import numpy as np nsamp = 0 posLoc = 0 refLoc = 0 altLoc = 0 sampLoc = 9 #Use different openers depending on whether compressed opn = gzip.open if vcf_file.endswith(('.gz', '.GZ')) else open with opn(vcf_file, mode='rt') as f: for line in f: if line[0] != '#': #actual data - most common so first in 'if-list'! dat = line.strip().split('\t') POS = int(dat[posLoc]) REF = dat[refLoc] ALT = dat[altLoc].split(',') calls = np.array(dat[sampLoc:]) #get samples that differ from Ref at this site recCalls = {} for sname, sa in zip(samps, calls): if ':' in sa: #if proper VCF file (followed by quality/coverage info) gt = sa.split(':')[0] else: #if 'pseudo' VCF file (nextstrain output, or otherwise stripped) gt = sa if gt == '0' or gt == '1': #for haploid calls in VCF gt = '0/0' if gt == '0' else '1/1' #ignore if ref call: '.' or '0/0', depending on VCF if ('/' in gt and gt != '0/0') or ('|' in gt and gt != '0|0'): recCalls[sname] = gt #store the position and the alt for seq, gen in recCalls.items(): ref = REF pos = POS-1 #VCF numbering starts from 1, but Reference seq numbering #will be from 0 because it's python! #Accepts only calls that are 1/1, 2/2 etc. Rejects hets and no-calls if gen[0] != '0' and gen[2] != '0' and gen[0] != '.' and gen[2] != '.': alt = str(ALT[int(gen[0])-1]) #get the index of the alternate if seq not in sequences.keys(): sequences[seq] = {} parseCall(sequences[seq],insertions[seq], pos, ref, alt) #If is heterozygote call (0/1) or no call (./.) else: #alt will differ here depending on het or no-call, must pass original parseBadCall(gen, sequences[seq],insertions[seq], pos, ref, ALT) elif line[0] == '#' and line[1] == 'C': #header line, get all the information header = line.strip().split('\t') posLoc = header.index("POS") refLoc = header.index('REF') altLoc = header.index('ALT') sampLoc = header.index('FORMAT')+1 samps = header[sampLoc:] samps = [ x.strip() for x in samps ] #ensure no leading/trailing spaces nsamp = len(samps) #else you are a comment line, ignore. #Gather all variable positions positions = set() for seq, muts in sequences.items(): positions.update(muts.keys()) #One or more seqs are same as ref! (No non-ref calls) So haven't been 'seen' yet if nsamp > len(sequences): missings = set(samps).difference(sequences.keys()) for s in missings: sequences[s] = {} if ref_file: refSeq = SeqIO.read(ref_file, format='fasta') refSeq = refSeq.upper() #convert to uppercase to avoid unknown chars later refSeqStr = str(refSeq.seq) else: refSeqStr = None compress_seq = {'reference':refSeqStr, 'sequences': sequences, 'insertions': insertions, 'positions': sorted(positions)} return compress_seq def write_vcf(tree_dict, file_name):#, compress=False): """ Writes out a VCF-style file (which seems to be minimally handleable by vcftools and pyvcf) of the alignment. This is created from a dict in a similar format to what's created by :py:meth:`treetime.vcf_utils.read_vcf` Positions of variable sites are transformed to start at 1 to match VCF convention. Parameters ---------- tree_dict: nested dict A nested dict with keys 'sequence' 'reference' and 'positions', as is created by :py:meth:`treetime.TreeAnc.get_tree_dict` file_name: str File to which the new VCF should be written out. File names ending with '.gz' will result in the VCF automatically being gzipped. """ # Programming Logic Note: # # For a sequence like: # Pos 1 2 3 4 5 6 # Ref A C T T A C # Seq1 A C - - - G # # In a dict it is stored: # Seq1:{3:'-', 4:'-', 5:'-', 6:'G'} (Numbering from 1 for simplicity) # # In a VCF it needs to be: # POS REF ALT Seq1 # 2 CTTA C 1/1 # 6 C G 1/1 # # If a position is deleted (pos 3), need to get invariable position preceeding it # # However, in alternative case, the base before a deletion is mutant, so need to check # that next position isn't a deletion (as otherwise won't be found until after the # current single bp mutation is written out) # # When deleted position found, need to gather up all adjacent mutant positions with deletions, # but not include adjacent mutant positions that aren't deletions (pos 6) # # Don't run off the 'end' of the position list if deletion is the last thing to be included # in the VCF file sequences = tree_dict['sequences'] ref = tree_dict['reference'] positions = tree_dict['positions'] def handleDeletions(i, pi, pos, ref, delete, pattern): refb = ref[pi] if delete: #Need to get the position before i-=1 #As we'll next go to this position again pi-=1 pos = pi+1 refb = ref[pi] #re-get pattern pattern = [] for k,v in sequences.items(): try: pattern.append(sequences[k][pi]) except KeyError: pattern.append(ref[pi]) pattern = np.array(pattern).astype('U') sites = [] sites.append(pattern) #Gather all positions affected by deletion - but don't run off end of position list while (i+1) < len(positions) and positions[i+1] == pi+1: i+=1 pi = positions[i] pattern = [] for k,v in sequences.items(): try: pattern.append(sequences[k][pi]) except KeyError: pattern.append(ref[pi]) pattern = np.array(pattern).astype('U') #Stops 'greedy' behaviour from adding mutations adjacent to deletions if any(pattern == '-'): #if part of deletion, append sites.append(pattern) refb = refb+ref[pi] else: #this is another mutation next to the deletion! i-=1 #don't append, break this loop #Rotate them into 'calls' align = np.asarray(sites).T #Get rid of '-', and put '.' for calls that match ref #Only removes trailing '-'. This breaks VCF convension, but the standard #VCF way of handling this* is really complicated, and the situation is rare. #(*deletions and mutations at the same locations) fullpat = [] for pt in align: gp = len(pt)-1 while pt[gp] == '-': pt[gp] = '' gp-=1 pat = "".join(pt) if pat == refb: fullpat.append('.') else: fullpat.append(pat) pattern = np.array(fullpat) return i, pi, pos, refb, pattern #prepare the header of the VCF & write out header=["#CHROM","POS","ID","REF","ALT","QUAL","FILTER","INFO","FORMAT"]+list(sequences.keys()) opn = gzip.open if file_name.endswith(('.gz', '.GZ')) else open out_file = opn(file_name, 'w') out_file.write( "##fileformat=VCFv4.2\n"+ "##source=NextStrain\n"+ "##FORMAT=\n") out_file.write("\t".join(header)+"\n") vcfWrite = [] errorPositions = [] explainedErrors = 0 #Why so basic? Because we sometimes have to back up a position! i=0 while i < len(positions): #Get the 'pattern' of all calls at this position. #Look out specifically for current (this pos) or upcoming (next pos) deletions #But also distinguish these two, as handled differently. pi = positions[i] pos = pi+1 #change numbering to match VCF, not python, for output refb = ref[pi] #reference base at this position delete = False #deletion at this position - need to grab previous base (invariable) deleteGroup = False #deletion at next position (mutation at this pos) - do not need to get prev base #try/except is much more efficient than 'if' statements for constructing patterns, #as on average a 'variable' location will not be variable for any given sequence pattern = [] #pattern2 gets the pattern at next position to check for upcoming deletions #it's more efficient to get both here rather than loop through sequences twice! pattern2 = [] for k,v in sequences.items(): try: pattern.append(sequences[k][pi]) except KeyError: pattern.append(ref[pi]) try: pattern2.append(sequences[k][pi+1]) except KeyError: pattern2.append(ref[pi+1]) pattern = np.array(pattern).astype('U') pattern2 = np.array(pattern2).astype('U') #If a deletion here, need to gather up all bases, and position before if any(pattern == '-'): if pos != 1: deleteGroup = True delete = True else: #If theres a deletion in 1st pos, VCF files do not handle this well. #Proceed keeping it as '-' for alt (violates VCF), but warn user to check output. #(This is rare) print(fill("WARNING: You have a deletion in the first position of your" " alignment. VCF format does not handle this well. Please check" " the output to ensure it is correct.")) else: #If a deletion in next pos, need to gather up all bases if any(pattern2 == '-'): deleteGroup = True #If deletion, treat affected bases as 1 'call': if delete or deleteGroup: i, pi, pos, refb, pattern = handleDeletions(i, pi, pos, ref, delete, pattern) #If no deletion, replace ref with '.', as in VCF format else: pattern[pattern==refb] = '.' #Get the list of ALTs - minus any '.'! uniques = np.unique(pattern) uniques = uniques[np.where(uniques!='.')] #Convert bases to the number that matches the ALT j=1 for u in uniques: pattern[np.where(pattern==u)[0]] = str(j) j+=1 #Now convert these calls to #/# (VCF format) calls = [ j+"/"+j if j!='.' else '.' for j in pattern ] #What if there's no variation at a variable site?? #This can happen when sites are modified by TreeTime - see below. printPos = True if len(uniques)==0: #If we expect it (it was made constant by TreeTime), it's fine. if 'inferred_const_sites' in tree_dict and pi in tree_dict['inferred_const_sites']: explainedErrors += 1 printPos = False #and don't output position to the VCF else: #If we don't expect, raise an error errorPositions.append(str(pi)) #Write it out - Increment positions by 1 so it's in VCF numbering #If no longer variable, and explained, don't write it out if printPos: output = ["MTB_anc", str(pos), ".", refb, ",".join(uniques), ".", "PASS", ".", "GT"] + calls vcfWrite.append("\t".join(output)) i+=1 #Note: The number of 'inferred_const_sites' passed back by TreeTime will often be longer #than the number of 'site that were made constant' that prints below. This is because given the site: # Ref Alt Seq # G A AANAA #This will be converted to 'AAAAA' and listed as an 'inferred_const_sites'. However, for VCF #purposes, because the site is 'variant' against the ref, it is variant, as expected, and so #won't be counted in the below list, which is only sites removed from the VCF. if 'inferred_const_sites' in tree_dict and explainedErrors != 0: print(fill("Sites that were constant except for ambiguous bases were made" + " constant by TreeTime. This happened {} times. These sites are".format(explainedErrors) + " now excluded from the VCF.")) if len(errorPositions) != 0: print ("\n***WARNING: vcf_utils.py") print(fill("\n{} sites were found that had no alternative bases.".format(str(len(errorPositions)))+ " If this data has been run through TreeTime and contains ambiguous bases," " try calling get_tree_dict with var_ambigs=True to see if this clears the error.")) print(fill("\nAlternative causes:" "\n- Not all sequences in your alignment are in the tree" " (if you are running TreeTime via commandline this is most likely)" "\n- In TreeTime, can be caused by overwriting variants in tips with small branch lengths (debug)" "\n\nThese are the positions affected (numbering starts at 0):")) print(fill(", ".join(errorPositions))) out_file.write("\n".join(vcfWrite)) out_file.close() def process_sparse_alignment(aln, ref, ambiguous_char): return process_alignment_dictionary(aln, ref, ambiguous_char) def process_alignment_dictionary(aln, ref, ambiguous_char): """ prepare the dictionary specifying differences from a reference sequence to construct the reduced alignment with variable sites only. NOTE: - sites can be constant but different from the reference - sites can be constant plus a ambiguous sites assigns ------- - self.nonref_positions: at least one sequence is different from ref Returns ------- reduced_alignment_const reduced alignment accounting for non-variable postitions alignment_patterns_const dict pattern -> (pos in reduced alignment, list of pos in full alignment) variable_positions list of variable positions needed to construct remaining """ # number of sequences in alignment nseq = len(aln) inv_map = defaultdict(list) for k,v in aln.items(): for pos, bs in v.items(): inv_map[pos].append(bs) nonref_positions = np.sort(list(inv_map.keys())) constant_up_to_ambiguous = [] nonref_const = [] nonref_alleles = [] ambiguous_const = [] variable_pos = [] for pos, bs in inv_map.items(): #loop over positions and patterns bases = list(np.unique(bs)) if len(bs) == nseq: #every sequence is different from reference if (len(bases)<=2 and ambiguous_char in bases) or len(bases)==1: # all sequences different from reference, but only one state # (other than ambiguous_char) in column nonref_const.append(pos) if len(bases)==1: nonref_alleles.append(bases[0]) else: nonref_alleles.append([x for x in bases if x!=ambiguous_char][0]) if ambiguous_char in bases: #keep track of sites 'made constant' constant_up_to_ambiguous.append(pos) else: # at least two non-reference alleles variable_pos.append(pos) else: # not every sequence different from reference if len(bases)==1 and bases[0]==ambiguous_char: ambiguous_const.append(pos) constant_up_to_ambiguous.append(pos) #keep track of sites 'made constant' else: # at least one non ambiguous non-reference allele not in # every sequence variable_pos.append(pos) refMod = np.copy(ref) # place constant non reference positions by their respective allele refMod[nonref_const] = nonref_alleles # mask variable positions states = np.unique(refMod) refMod[variable_pos] = '.' # for each base in the gtr, make constant alignment pattern and # assign it to all const positions in the modified reference sequence constant_columns = [] constant_patterns = {} for base in states: if base==ambiguous_char: continue p = np.repeat(base, nseq) pos = list(np.where(refMod==base)[0]) #if the alignment doesn't have a const site of this base, don't add! (ex: no '----' site!) if len(pos): constant_patterns["".join(p.astype('U'))] = [len(constant_columns), pos] constant_columns.append(p) return {"constant_columns": constant_columns, "constant_patterns": constant_patterns, "variable_positions": variable_pos, "nonref_positions": nonref_positions, "constant_up_to_ambiguous": constant_up_to_ambiguous} treetime-0.8.6/treetime/wrappers.py000066400000000000000000001305041417362145000174040ustar00rootroot00000000000000from __future__ import print_function, division, absolute_import import os, shutil, sys import numpy as np import pandas as pd from textwrap import fill from Bio.SeqRecord import SeqRecord from Bio.Seq import Seq from Bio.Align import MultipleSeqAlignment from Bio import Phylo, AlignIO from Bio import __version__ as bioversion from . import TreeAnc, GTR, TreeTime from . import config as ttconf from . import utils from .vcf_utils import read_vcf, write_vcf from .seq_utils import alphabets from . import TreeTimeError, MissingDataError def assure_tree(params, tmp_dir='treetime_tmp'): """ Function that attempts to load a tree and build it from the alignment if no tree is provided. """ if params.tree is None: params.tree = os.path.basename(params.aln)+'.nwk' print("No tree given: inferring tree") utils.tree_inference(params.aln, params.tree, tmp_dir = tmp_dir) if os.path.isdir(tmp_dir): shutil.rmtree(tmp_dir) try: tt = TreeAnc(params.tree) except (ValueError, TreeTimeError, MissingDataError) as e: print(e) print("Tree loading/building failed.") return 1 return 0 def create_gtr(params): """ parse the arguments referring to the GTR model and return a GTR structure """ model = params.gtr gtr_params = params.gtr_params if model == 'infer': gtr = GTR.standard('jc', alphabet='aa' if params.aa else 'nuc') else: try: kwargs = {} if gtr_params is not None: for param in gtr_params: keyval = param.split('=') if len(keyval)!=2: continue if keyval[0] in ['pis', 'pi', 'Pi', 'Pis']: keyval[0] = 'pi' keyval[1] = list(map(float, keyval[1].split(','))) elif keyval[0] not in ['alphabet']: keyval[1] = float(keyval[1]) kwargs[keyval[0]] = keyval[1] else: print ("GTR params are not specified. Creating GTR model with default parameters") gtr = GTR.standard(model, **kwargs) infer_gtr = False except KeyError as e: print("\nUNKNOWN SUBSTITUTION MODEL\n") raise e return gtr def get_outdir(params, suffix='_treetime'): if params.outdir: if os.path.exists(params.outdir): if os.path.isdir(params.outdir): return params.outdir.rstrip('/') + '/' else: print("designated output location %s is not a directory"%params.outdir, file=sys.stderr) else: os.makedirs(params.outdir) return params.outdir.rstrip('/') + '/' from datetime import datetime outdir_stem = datetime.now().date().isoformat() outdir = outdir_stem + suffix.rstrip('/')+'/' count = 1 while os.path.exists(outdir): outdir = outdir_stem + '-%04d'%count + suffix.rstrip('/')+'/' count += 1 os.makedirs(outdir) return outdir def get_basename(params, outdir): # if params.aln: # basename = outdir + '.'.join(params.aln.split('/')[-1].split('.')[:-1]) # elif params.tree: # basename = outdir + '.'.join(params.tree.split('/')[-1].split('.')[:-1]) # else: basename = outdir return basename def read_in_DRMs(drm_file, offset): import pandas as pd DRMs = {} drmPositions = [] df = pd.read_csv(drm_file, sep='\t') for mi, m in df.iterrows(): pos = m.GENOMIC_POSITION-1+offset #put in correct numbering drmPositions.append(pos) if pos in DRMs: DRMs[pos]['alt_base'][m.ALT_BASE] = m.SUBSTITUTION else: DRMs[pos] = {} DRMs[pos]['drug'] = m.DRUG DRMs[pos]['alt_base'] = {} DRMs[pos]['alt_base'][m.ALT_BASE] = m.SUBSTITUTION DRMs[pos]['gene'] = m.GENE drmPositions = np.array(drmPositions) drmPositions = np.unique(drmPositions) drmPositions = np.sort(drmPositions) DRM_info = {'DRMs': DRMs, 'drmPositions': drmPositions} return DRM_info def read_if_vcf(params): """ Checks if input is VCF and reads in appropriately if it is """ ref = None aln = params.aln fixed_pi = None if hasattr(params, 'aln') and params.aln is not None: if any([params.aln.lower().endswith(x) for x in ['.vcf', '.vcf.gz']]): if not params.vcf_reference: print("ERROR: a reference Fasta is required with VCF-format alignments") return -1 compress_seq = read_vcf(params.aln, params.vcf_reference) sequences = compress_seq['sequences'] ref = compress_seq['reference'] aln = sequences if not hasattr(params, 'gtr') or params.gtr=="infer": #if not specified, set it: alpha = alphabets['aa'] if params.aa else alphabets['nuc'] fixed_pi = [ref.count(base)/len(ref) for base in alpha] if fixed_pi[-1] == 0: fixed_pi[-1] = 0.05 fixed_pi = [v-0.01 for v in fixed_pi] return aln, ref, fixed_pi def plot_rtt(tt, fname): tt.plot_root_to_tip() from matplotlib import pyplot as plt plt.savefig(fname) print("--- root-to-tip plot saved to \n\t"+fname) def export_sequences_and_tree(tt, basename, is_vcf=False, zero_based=False, report_ambiguous=False, timetree=False, confidence=False, reconstruct_tip_states=False): seq_info = is_vcf or tt.aln if is_vcf: outaln_name = basename + 'ancestral_sequences.vcf' write_vcf(tt.get_reconstructed_alignment(reconstruct_tip_states=reconstruct_tip_states), outaln_name) elif tt.aln: outaln_name = basename + 'ancestral_sequences.fasta' AlignIO.write(tt.get_reconstructed_alignment(reconstruct_tip_states=reconstruct_tip_states), outaln_name, 'fasta') if seq_info: print("\n--- alignment including ancestral nodes saved as \n\t %s\n"%outaln_name) # decorate tree with inferred mutations terminal_count = 0 offset = 0 if zero_based else 1 if timetree: dates_fname = basename + 'dates.tsv' fh_dates = open(dates_fname, 'w', encoding='utf-8') if confidence: fh_dates.write('#Lower and upper bound delineate the 90% max posterior region\n') fh_dates.write('#node\tdate\tnumeric date\tlower bound\tupper bound\n') else: fh_dates.write('#node\tdate\tnumeric date\n') for n in tt.tree.find_clades(): if timetree: if confidence: if n.bad_branch: fh_dates.write('%s\t--\t--\t--\t--\n'%(n.name)) else: conf = tt.get_max_posterior_region(n, fraction=0.9) fh_dates.write('%s\t%s\t%f\t%f\t%f\n'%(n.name, n.date, n.numdate,conf[0], conf[1])) else: if n.bad_branch: fh_dates.write('%s\t--\t--\n'%(n.name)) else: fh_dates.write('%s\t%s\t%f\n'%(n.name, n.date, n.numdate)) n.confidence=None # due to a bug in older versions of biopython that truncated filenames in nexus export # we truncate them by hand and make them unique. if n.is_terminal() and len(n.name)>40 and bioversion<"1.69": n.name = n.name[:35]+'_%03d'%terminal_count terminal_count+=1 n.comment='' if seq_info and len(n.mutations): if report_ambiguous: n.comment= '&mutations="' + ','.join([a+str(pos + offset)+d for (a,pos, d) in n.mutations])+'"' else: n.comment= '&mutations="' + ','.join([a+str(pos + offset)+d for (a,pos, d) in n.mutations if tt.gtr.ambiguous not in [a,d]])+'"' if timetree: n.comment+=(',' if n.comment else '&') + 'date=%1.2f'%n.numdate # write tree to file fmt_bl = "%1.6f" if tt.data.full_length<1e6 else "%1.8e" if timetree: outtree_name = basename + 'timetree.nexus' print("--- saved divergence times in \n\t %s\n"%dates_fname) Phylo.write(tt.tree, outtree_name, 'nexus') else: outtree_name = basename + 'annotated_tree.nexus' Phylo.write(tt.tree, outtree_name, 'nexus', format_branch_length=fmt_bl) print("--- tree saved in nexus format as \n\t %s\n"%outtree_name) if timetree: for n in tt.tree.find_clades(): n.branch_length = n.mutation_length outtree_name = basename + 'divergence_tree.nexus' Phylo.write(tt.tree, outtree_name, 'nexus', format_branch_length=fmt_bl) print("--- divergence tree saved in nexus format as \n\t %s\n"%outtree_name) def print_save_plot_skyline(tt, n_std=2.0, screen=True, save='', plot=''): if plot: import matplotlib.pyplot as plt skyline, conf = tt.merger_model.skyline_inferred(gen=50, confidence=n_std) if save: fh = open(save, 'w', encoding='utf-8') header1 = "Skyline assuming 50 gen/year and approximate confidence bounds (+/- %f standard deviations of the LH)\n"%n_std header2 = "date \tN_e \tlower \tupper" if screen: print('\t'+header1+'\t'+header2) if save: fh.write("#"+ header1+'#'+header2+'\n') for (x,y, y1, y2) in zip(skyline.x, skyline.y, conf[0], conf[1]): if screen: print("\t%1.3f\t%1.3e\t%1.3e\t%1.3e"%(x,y, y1, y2)) if save: fh.write("%1.3f\t%1.3e\t%1.3e\t%1.3e\n"%(x,y, y1, y2)) if save: print("\n --- written skyline to %s\n"%save) fh.close() if plot: plt.figure() plt.fill_between(skyline.x, conf[0], conf[1], color=(0.8, 0.8, 0.8)) plt.plot(skyline.x, skyline.y, label='maximum likelihood skyline') plt.yscale('log') plt.legend() plt.ticklabel_format(axis='x',useOffset=False) plt.savefig(plot) def scan_homoplasies(params): """ the function implementing treetime homoplasies """ if assure_tree(params, tmp_dir='homoplasy_tmp'): return 1 gtr = create_gtr(params) ########################################################################### ### READ IN VCF ########################################################################### #sets ref and fixed_pi to None if not VCF aln, ref, fixed_pi = read_if_vcf(params) is_vcf = True if ref is not None else False ########################################################################### ### ANCESTRAL RECONSTRUCTION ########################################################################### treeanc = TreeAnc(params.tree, aln=aln, ref=ref, gtr=gtr, verbose=1, fill_overhangs=True) if treeanc.aln is None: # if alignment didn't load, exit return 1 if is_vcf: L = len(ref) + params.const else: L = treeanc.data.full_length + params.const N_seq = len(treeanc.aln) N_tree = treeanc.tree.count_terminals() if params.rescale!=1.0: for n in treeanc.tree.find_clades(): n.branch_length *= params.rescale n.mutation_length = n.branch_length print("read alignment from file %s with %d sequences of length %d"%(params.aln,N_seq,L)) print("read tree from file %s with %d leaves"%(params.tree,N_tree)) print("\ninferring ancestral sequences...") ndiff = treeanc.infer_ancestral_sequences('ml', infer_gtr=params.gtr=='infer', marginal=False, fixed_pi=fixed_pi) print("...done.") if is_vcf: treeanc.recover_var_ambigs() ########################################################################### ### analysis of reconstruction ########################################################################### from collections import defaultdict from scipy.stats import poisson offset = 0 if params.zero_based else 1 if params.drms: DRM_info = read_in_DRMs(params.drms, offset) drms = DRM_info['DRMs'] # construct dictionaries gathering mutations and positions mutations = defaultdict(list) positions = defaultdict(list) terminal_mutations = defaultdict(list) for n in treeanc.tree.find_clades(): if n.up is None: continue if len(n.mutations): for (a,pos, d) in n.mutations: if '-' not in [a,d] and 'N' not in [a,d]: mutations[(a,pos+offset,d)].append(n) positions[pos+offset].append(n) if n.is_terminal(): for (a,pos, d) in n.mutations: if '-' not in [a,d] and 'N' not in [a,d]: terminal_mutations[(a,pos+offset,d)].append(n) # gather homoplasic mutations by strain mutation_by_strain = defaultdict(list) for n in treeanc.tree.get_terminals(): for a,pos,d in n.mutations: if pos+offset in positions and len(positions[pos+offset])>1: if '-' not in [a,d] and 'N' not in [a,d]: mutation_by_strain[n.name].append([(a,pos+offset,d), len(positions[pos])]) # total_branch_length is the expected number of substitutions # corrected_branch_length is the expected number of observable substitutions # (probability of an odd number of substitutions at a particular site) total_branch_length = treeanc.tree.total_branch_length() corrected_branch_length = np.sum([np.exp(-x.branch_length)*np.sinh(x.branch_length) for x in treeanc.tree.find_clades()]) corrected_terminal_branch_length = np.sum([np.exp(-x.branch_length)*np.sinh(x.branch_length) for x in treeanc.tree.get_terminals()]) expected_mutations = L*corrected_branch_length expected_terminal_mutations = L*corrected_terminal_branch_length # make histograms and sum mutations in different categories multiplicities = np.bincount([len(x) for x in mutations.values()]) total_mutations = np.sum([len(x) for x in mutations.values()]) multiplicities_terminal = np.bincount([len(x) for x in terminal_mutations.values()]) terminal_mutation_count = np.sum([len(x) for x in terminal_mutations.values()]) multiplicities_positions = np.bincount([len(x) for x in positions.values()]) multiplicities_positions[0] = L - np.sum(multiplicities_positions) ########################################################################### ### Output the distribution of times particular mutations are observed ########################################################################### print("\nThe TOTAL tree length is %1.3e and %d mutations were observed." %(total_branch_length,total_mutations)) print("Of these %d mutations,"%total_mutations +"".join(['\n\t - %d occur %d times'%(n,mi) for mi,n in enumerate(multiplicities) if n])) # additional optional output this for terminal mutations only if params.detailed: print("\nThe TERMINAL branch length is %1.3e and %d mutations were observed." %(corrected_terminal_branch_length,terminal_mutation_count)) print("Of these %d mutations,"%terminal_mutation_count +"".join(['\n\t - %d occur %d times'%(n,mi) for mi,n in enumerate(multiplicities_terminal) if n])) ########################################################################### ### Output the distribution of times mutations at particular positions are observed ########################################################################### print("\nOf the %d positions in the genome,"%L +"".join(['\n\t - %d were hit %d times (expected %1.2f)'%(n,mi,L*poisson.pmf(mi,1.0*total_mutations/L)) for mi,n in enumerate(multiplicities_positions) if n])) # compare that distribution to a Poisson distribution with the same mean p = poisson.pmf(np.arange(3*len(multiplicities_positions)),1.0*total_mutations/L) print("\nlog-likelihood difference to Poisson distribution with same mean: %1.3e"%( - L*np.sum(p*np.log(p+1e-100)) + np.sum(multiplicities_positions*np.log(p[:len(multiplicities_positions)]+1e-100)))) ########################################################################### ### Output the mutations that are observed most often ########################################################################### if params.drms: print("\n\nThe ten most homoplasic mutations are:\n\tmut\tmultiplicity\tDRM details (gene drug AAmut)") mutations_sorted = sorted(mutations.items(), key=lambda x:len(x[1])-0.1*x[0][1]/L, reverse=True) for mut, val in mutations_sorted[:params.n]: if len(val)>1: print("\t%s%d%s\t%d\t%s"%(mut[0], mut[1], mut[2], len(val), " ".join([drms[mut[1]]['gene'], drms[mut[1]]['drug'], drms[mut[1]]['alt_base'][mut[2]]]) if mut[1] in drms else "")) else: break else: print("\n\nThe ten most homoplasic mutations are:\n\tmut\tmultiplicity") mutations_sorted = sorted(mutations.items(), key=lambda x:len(x[1])-0.1*x[0][1]/L, reverse=True) for mut, val in mutations_sorted[:params.n]: if len(val)>1: print("\t%s%d%s\t%d"%(mut[0], mut[1], mut[2], len(val))) else: break # optional output specifically for mutations on terminal branches if params.detailed: if params.drms: print("\n\nThe ten most homoplasic mutation on terminal branches are:\n\tmut\tmultiplicity\tDRM details (gene drug AAmut)") terminal_mutations_sorted = sorted(terminal_mutations.items(), key=lambda x:len(x[1])-0.1*x[0][1]/L, reverse=True) for mut, val in terminal_mutations_sorted[:params.n]: if len(val)>1: print("\t%s%d%s\t%d\t%s"%(mut[0], mut[1], mut[2], len(val), " ".join([drms[mut[1]]['gene'], drms[mut[1]]['drug'], drms[mut[1]]['alt_base'][mut[2]]]) if mut[1] in drms else "")) else: break else: print("\n\nThe ten most homoplasic mutation on terminal branches are:\n\tmut\tmultiplicity") terminal_mutations_sorted = sorted(terminal_mutations.items(), key=lambda x:len(x[1])-0.1*x[0][1]/L, reverse=True) for mut, val in terminal_mutations_sorted[:params.n]: if len(val)>1: print("\t%s%d%s\t%d"%(mut[0], mut[1], mut[2], len(val))) else: break ########################################################################### ### Output strains that have many homoplasic mutations ########################################################################### # TODO: add statistical criterion if params.detailed: if params.drms: print("\n\nTaxons that carry positions that mutated elsewhere in the tree:\n\ttaxon name\t#of homoplasic mutations\t# DRM") mutation_by_strain_sorted = sorted(mutation_by_strain.items(), key=lambda x:len(x[1]), reverse=True) for name, val in mutation_by_strain_sorted[:params.n]: if len(val): print("\t%s\t%d\t%d"%(name, len(val), len([mut for mut,l in val if mut[1] in drms]))) else: print("\n\nTaxons that carry positions that mutated elsewhere in the tree:\n\ttaxon name\t#of homoplasic mutations") mutation_by_strain_sorted = sorted(mutation_by_strain.items(), key=lambda x:len(x[1]), reverse=True) for name, val in mutation_by_strain_sorted[:params.n]: if len(val): print("\t%s\t%d"%(name, len(val))) return 0 def timetree(params): """ implementeing treetime tree """ if params.relax is None: relaxed_clock_params = None elif params.relax==[]: relaxed_clock_params=True elif len(params.relax)==2: relaxed_clock_params={'slack':params.relax[0], 'coupling':params.relax[1]} dates = utils.parse_dates(params.dates, date_col=params.date_column, name_col=params.name_column) if len(dates)==0: print("No valid dates -- exiting.") return 1 if assure_tree(params, tmp_dir='timetree_tmp'): print("No tree -- exiting.") return 1 outdir = get_outdir(params, '_treetime') gtr = create_gtr(params) infer_gtr = params.gtr=='infer' ########################################################################### ### READ IN VCF ########################################################################### #sets ref and fixed_pi to None if not VCF aln, ref, fixed_pi = read_if_vcf(params) is_vcf = True if ref is not None else False branch_length_mode = params.branch_length_mode #variable-site-only trees can have big branch lengths, the auto setting won't work. if is_vcf or (params.aln and params.sequence_length): if branch_length_mode == 'auto': branch_length_mode = 'joint' ########################################################################### ### SET-UP and RUN ########################################################################### if params.aln is None and params.sequence_length is None: print("one of arguments '--aln' and '--sequence-length' is required.", file=sys.stderr) return 1 myTree = TreeTime(dates=dates, tree=params.tree, ref=ref, aln=aln, gtr=gtr, seq_len=params.sequence_length, verbose=params.verbose, fill_overhangs=not params.keep_overhangs) myTree.tip_slack=params.tip_slack if not myTree.one_mutation: print("TreeTime setup failed, exiting") return 1 # coalescent model options try: coalescent = float(params.coalescent) if coalescent<10*myTree.one_mutation: coalescent = None except: if params.coalescent in ['opt', 'const', 'skyline']: coalescent = params.coalescent else: print("unknown coalescent model specification, has to be either " "a float, 'opt', 'const' or 'skyline' -- exiting") return 1 # determine whether confidence intervals are to be computed and how the # uncertainty in the rate estimate should be treated calc_confidence = params.confidence if params.clock_std_dev: vary_rate = params.clock_std_dev if calc_confidence else False elif params.confidence and params.covariation: vary_rate = True elif params.confidence: print(fill("Outside of covariation aware mode TreeTime cannot estimate confidence intervals " "without specified standard deviation of the clock rate.Please specify '--clock-std-dev' " "or rerun with '--covariation'. Will proceed without confidence estimation")) vary_rate = False calc_confidence = False else: vary_rate = False # RUN root = None if params.keep_root else params.reroot try: success = myTree.run(root=root, relaxed_clock=relaxed_clock_params, resolve_polytomies=(not params.keep_polytomies), Tc=coalescent, max_iter=params.max_iter, fixed_clock_rate=params.clock_rate, n_iqd=params.clock_filter, time_marginal="assign" if calc_confidence else False, vary_rate = vary_rate, branch_length_mode = branch_length_mode, reconstruct_tip_states=params.reconstruct_tip_states, fixed_pi=fixed_pi, use_covariation = params.covariation, n_points=params.n_skyline, tracelog_file=os.path.join(outdir, "trace_run.log")) except TreeTimeError as e: print("\nTreeTime run FAILED: please check above for errors and/or rerun with --verbose 4.\n") raise e ########################################################################### ### OUTPUT and saving of results ########################################################################### if infer_gtr: fname = outdir+'sequence_evolution_model.txt' with open(fname, 'w', encoding='utf-8') as ofile: ofile.write(str(myTree.gtr)+'\n') print('\nInferred sequence evolution model (saved as %s):'%fname) print(myTree.gtr) fname = outdir+'molecular_clock.txt' with open(fname, 'w', encoding='utf-8') as ofile: ofile.write(str(myTree.date2dist)+'\n') print('\nInferred sequence evolution model (saved as %s):'%fname) print(myTree.date2dist) basename = get_basename(params, outdir) if coalescent in ['skyline', 'opt', 'const']: print("Inferred coalescent model") if coalescent=='skyline': print_save_plot_skyline(myTree, plot=basename+'skyline.pdf', save=basename+'skyline.tsv', screen=True) else: Tc = myTree.merger_model.Tc.y[0] print(" --T_c: \t %1.2e \toptimized inverse merger rate in units of substitutions"%Tc) print(" --T_c: \t %1.2e \toptimized inverse merger rate in years"%(Tc/myTree.date2dist.clock_rate)) print(" --N_e: \t %1.2e \tcorresponding 'effective population size' assuming 50 gen/year\n"%(Tc/myTree.date2dist.clock_rate*50)) # plot import matplotlib.pyplot as plt from .treetime import plot_vs_years leaf_count = myTree.tree.count_terminals() label_func = lambda x: (x.name if x.is_terminal() and ((leaf_count<30 and (not params.no_tip_labels)) or params.tip_labels) else '') plot_vs_years(myTree, show_confidence=False, label_func=label_func, confidence=0.9 if calc_confidence else None) tree_fname = (outdir + params.plot_tree) plt.savefig(tree_fname) print("--- saved tree as \n\t %s\n"%tree_fname) plot_rtt(myTree, outdir + params.plot_rtt) if params.relax: fname = outdir+'substitution_rates.tsv' print("--- wrote branch specific rates to\n\t %s\n"%fname) with open(fname, 'w', encoding='utf-8') as fh: fh.write("#node\tclock_length\tmutation_length\trate\tfold_change\n") for n in myTree.tree.find_clades(order="preorder"): if n==myTree.tree.root: continue g = n.branch_length_interpolator.gamma fh.write("%s\t%1.3e\t%1.3e\t%1.3e\t%1.2f\n"%(n.name, n.clock_length, n.mutation_length, myTree.date2dist.clock_rate*g, g)) export_sequences_and_tree(myTree, basename, is_vcf, params.zero_based, timetree=True, confidence=calc_confidence, reconstruct_tip_states=params.reconstruct_tip_states) return 0 def ancestral_reconstruction(params): """ implementing treetime ancestral """ # set up if assure_tree(params, tmp_dir='ancestral_tmp'): return 1 outdir = get_outdir(params, '_ancestral') basename = get_basename(params, outdir) gtr = create_gtr(params) ########################################################################### ### READ IN VCF ########################################################################### #sets ref and fixed_pi to None if not VCF aln, ref, fixed_pi = read_if_vcf(params) is_vcf = True if ref is not None else False treeanc = TreeAnc(params.tree, aln=aln, ref=ref, gtr=gtr, verbose=1, fill_overhangs=not params.keep_overhangs) try: ndiff = treeanc.infer_ancestral_sequences('ml', infer_gtr=params.gtr=='infer', marginal=params.marginal, fixed_pi=fixed_pi, reconstruct_tip_states=params.reconstruct_tip_states) except TreeTimeError as e: print("\nAncestral reconstruction failed, please see above for error messages and/or rerun with --verbose 4\n") raise e ########################################################################### ### OUTPUT and saving of results ########################################################################### if params.gtr=='infer': fname = outdir+'/sequence_evolution_model.txt' with open(fname, 'w', encoding='utf-8') as ofile: ofile.write(str(treeanc.gtr)+'\n') print('\nInferred sequence evolution model (saved as %s):'%fname) print(treeanc.gtr) export_sequences_and_tree(treeanc, basename, is_vcf, params.zero_based, report_ambiguous=params.report_ambiguous, reconstruct_tip_states=params.reconstruct_tip_states) return 0 def reconstruct_discrete_traits(tree, traits, missing_data='?', pc=1.0, sampling_bias_correction=None, weights=None, verbose=0, iterations=5): """take a set of discrete states associated with tips of a tree and reconstruct their ancestral states along with a GTR model that approximately maximizes the likelihood of the states on the tree. Parameters ---------- tree : str, Bio.Phylo.Tree name of tree file or Biopython tree object traits : dict dictionary linking tips to straits missing_data : str, optional string indicating missing data pc : float, optional number of pseudo-counts to be used during GTR inference, default 1.0 sampling_bias_correction : float, optional factor to inflate overall switching rate by to counteract sampling bias weights : str, optional name of file with equilibirum frequencies verbose : int, optional level of verbosity in output iterations : int, optional number of times non-linear optimization of overall rate and transmission estimation are iterated Returns ------- tuple tuple of treeanc object, forward and reverse alphabets Raises ------ TreeTimeError raise error if ancestral reconstruction errors out """ ########################################################################### ### make a single character alphabet that maps to discrete states ########################################################################### unique_states = set(traits.values()) n_observed_states = len(unique_states) # load weights from file and convert to dict if supplied as string if type(weights)==str: try: tmp_weights = pd.read_csv(weights, sep='\t' if weights[-3:]=='tsv' else ',', skipinitialspace=True) weight_dict = {row[0]:row[1] for ri,row in tmp_weights.iterrows() if not np.isnan(row[1])} except: raise ValueError("Loading of weights file '%s' failed!"%weights) elif type(weights)==dict: weight_dict = weights else: weight_dict = None # add weights to unique states for alphabet construction if weight_dict is not None: unique_states.update(weight_dict.keys()) missing_weights = [c for c in unique_states if c not in weight_dict and c is not missing_data] if len(missing_weights): print("Missing weights for values: " + ", ".join(missing_weights)) if len(missing_weights)>0.5*n_observed_states: print("More than half of discrete states missing from the weights file") print("Weights read from file are:", weights) raise TreeTimeError("More than half of discrete states missing from the weights file") unique_states=sorted(unique_states) # make a map from states (excluding missing data) to characters in the alphabet # note that gap character '-' is chr(45) and will never be included here reverse_alphabet = {state:chr(65+i) for i,state in enumerate(unique_states) if state!=missing_data} alphabet = list(reverse_alphabet.values()) # construct a look up from alphabet character to states letter_to_state = {v:k for k,v in reverse_alphabet.items()} # construct the vector with weights to be used as equilibrium frequency if weight_dict is not None: mean_weight = np.mean(list(weight_dict.values())) weights = np.array([weight_dict[letter_to_state[c]] if letter_to_state[c] in weight_dict else mean_weight for c in alphabet], dtype=float) weights/=weights.sum() # consistency checks if len(alphabet)<2: print("mugration: only one or zero states found -- this doesn't make any sense", file=sys.stderr) return None, None, None n_states = len(alphabet) missing_char = chr(65+n_states) reverse_alphabet[missing_data]=missing_char letter_to_state[missing_char]=missing_data ########################################################################### ### construct gtr model ########################################################################### # set up dummy matrix W = np.ones((n_states,n_states), dtype=float) mugration_GTR = GTR.custom(pi = weights, W=W, alphabet = np.array(alphabet)) mugration_GTR.profile_map[missing_char] = np.ones(n_states) mugration_GTR.ambiguous=missing_char ########################################################################### ### set up treeanc ########################################################################### treeanc = TreeAnc(tree, gtr=mugration_GTR, verbose=verbose, ref='A', convert_upper=False, one_mutation=0.001) treeanc.use_mutation_length = False pseudo_seqs = {n.name: {0:reverse_alphabet[traits[n.name]] if n.name in traits else missing_char} for n in treeanc.tree.get_terminals()} valid_seq = np.array([s[0]!=missing_char for s in pseudo_seqs.values()]) print("Assigned discrete traits to %d out of %d taxa.\n"%(np.sum(valid_seq),len(valid_seq))) treeanc.aln = pseudo_seqs try: ndiff = treeanc.infer_ancestral_sequences(method='ml', infer_gtr=True, store_compressed=False, pc=pc, marginal=True, normalized_rate=False, fixed_pi=weights, reconstruct_tip_states=True) treeanc.optimize_gtr_rate() except TreeTimeError as e: print("\nAncestral reconstruction failed, please see above for error messages and/or rerun with --verbose 4\n") raise e for i in range(iterations): treeanc.infer_gtr(marginal=True, normalized_rate=False, pc=pc, fixed_pi=weights) treeanc.optimize_gtr_rate() if sampling_bias_correction: treeanc.gtr.mu *= sampling_bias_correction treeanc.infer_ancestral_sequences(infer_gtr=False, store_compressed=False, marginal=True, normalized_rate=False, reconstruct_tip_states=True) print(fill("NOTE: previous versions (<0.7.0) of this command made a 'short-branch length assumption. " "TreeTime now optimizes the overall rate numerically and thus allows for long branches " "along which multiple changes accumulated. This is expected to affect estimates of the " "overall rate while leaving the relative rates mostly unchanged.")) return treeanc, letter_to_state, reverse_alphabet def mugration(params): """ implementing treetime mugration """ ########################################################################### ### Parse states ########################################################################### if os.path.isfile(params.states): states = pd.read_csv(params.states, sep='\t' if params.states[-3:]=='tsv' else ',', skipinitialspace=True) else: print("file with states does not exist") return 1 outdir = get_outdir(params, '_mugration') if params.name_column: if params.name_column in states.columns: taxon_name = params.name_column else: print("Error: specified column '%s' for taxon name not found in meta data file with columns: "%params.name_column + " ".join(states.columns)) return 1 elif 'name' in states.columns: taxon_name = 'name' elif 'strain' in states.columns: taxon_name = 'strain' elif 'accession' in states.columns: taxon_name = 'accession' else: taxon_name = states.columns[0] print("Using column '%s' as taxon name. This needs to match the taxa in the tree!"%taxon_name) if params.attribute: if params.attribute in states.columns: attr = params.attribute else: print("The specified attribute was not found in the metadata file "+params.states, file=sys.stderr) print("Available columns are: "+", ".join(states.columns), file=sys.stderr) return 1 else: attr = states.columns[1] print("Attribute for mugration inference was not specified. Using "+attr, file=sys.stderr) leaf_to_attr = {x[taxon_name]:str(x[attr]) for xi, x in states.iterrows() if x[attr]!=params.missing_data and x[attr]} mug, letter_to_state, reverse_alphabet = reconstruct_discrete_traits(params.tree, leaf_to_attr, missing_data=params.missing_data, pc=params.pc, sampling_bias_correction=params.sampling_bias_correction, verbose=params.verbose, weights=params.weights) if mug is None: print("Mugration inference failed, check error messages above and your input data.") return 1 unique_states = sorted(letter_to_state.values()) ########################################################################### ### output ########################################################################### print("\nCompleted mugration model inference of attribute '%s' for"%attr,params.tree) basename = get_basename(params, outdir) gtr_name = basename + 'GTR.txt' with open(gtr_name, 'w', encoding='utf-8') as ofile: ofile.write('Character to attribute mapping:\n') for state in unique_states: ofile.write(' %s: %s\n'%(reverse_alphabet[state], state)) ofile.write('\n\n'+str(mug.gtr)+'\n') print("\nSaved inferred mugration model as:", gtr_name) terminal_count = 0 for n in mug.tree.find_clades(): n.confidence=None if n.up is None: continue # due to a bug in older versions of biopython that truncated filenames in nexus export # we truncate them by hand and make them unique. if n.is_terminal() and len(n.name)>40 and bioversion<"1.69": n.name = n.name[:35]+'_%03d'%terminal_count terminal_count+=1 n.comment= '&%s="'%attr + letter_to_state[n.cseq[0]] +'"' if params.confidence: conf_name = basename+'confidence.csv' with open(conf_name, 'w', encoding='utf-8') as ofile: ofile.write('#name, '+', '.join(mug.gtr.alphabet)+'\n') for n in mug.tree.find_clades(): ofile.write(n.name + ', '+', '.join([str(x) for x in n.marginal_profile[0]])+'\n') print("Saved table with ancestral state confidences as:", conf_name) # write tree to file outtree_name = basename+'annotated_tree.nexus' Phylo.write(mug.tree, outtree_name, 'nexus') print("Saved annotated tree as:", outtree_name) print("---Done!\n") return 0 def estimate_clock_model(params): """ implementing treetime clock """ if assure_tree(params, tmp_dir='clock_model_tmp'): return 1 dates = utils.parse_dates(params.dates, date_col=params.date_column, name_col=params.name_column) if len(dates)==0: return 1 outdir = get_outdir(params, '_clock') ########################################################################### ### READ IN VCF ########################################################################### #sets ref and fixed_pi to None if not VCF aln, ref, fixed_pi = read_if_vcf(params) is_vcf = True if ref is not None else False ########################################################################### ### ESTIMATE ROOT (if requested) AND DETERMINE TEMPORAL SIGNAL ########################################################################### if params.aln is None and params.sequence_length is None: print("one of arguments '--aln' and '--sequence-length' is required.", file=sys.stderr) return 1 basename = get_basename(params, outdir) try: myTree = TreeTime(dates=dates, tree=params.tree, aln=aln, gtr='JC69', verbose=params.verbose, seq_len=params.sequence_length, ref=ref) except TreeTimeError as e: print("\nTreeTime setup failed. Please see above for error messages and/or rerun with --verbose 4\n") raise e myTree.tip_slack=params.tip_slack if params.clock_filter: n_bad = [n.name for n in myTree.tree.get_terminals() if n.bad_branch] myTree.clock_filter(n_iqd=params.clock_filter, reroot=params.reroot or 'least-squares') n_bad_after = [n.name for n in myTree.tree.get_terminals() if n.bad_branch] if len(n_bad_after)>len(n_bad): print("The following leaves don't follow a loose clock and " "will be ignored in rate estimation:\n\t" +"\n\t".join(set(n_bad_after).difference(n_bad))) if not params.keep_root: # reroot to optimal root, this assigns clock_model to myTree if params.covariation: # this requires branch length estimates myTree.run(root="least-squares", max_iter=0, use_covariation=params.covariation) try: res = myTree.reroot(params.reroot, force_positive=not params.allow_negative_rate) except TreeTimeError as e: print("ERROR: unknown root or rooting mechanism!") raise e myTree.get_clock_model(covariation=params.covariation) else: myTree.get_clock_model(covariation=params.covariation) d2d = utils.DateConversion.from_regression(myTree.clock_model) print('\n',d2d) print(fill('The R^2 value indicates the fraction of variation in' 'root-to-tip distance explained by the sampling times.' 'Higher values corresponds more clock-like behavior (max 1.0).')+'\n') print(fill('The rate is the slope of the best fit of the date to' 'the root-to-tip distance and provides an estimate of' 'the substitution rate. The rate needs to be positive!' 'Negative rates suggest an inappropriate root.')+'\n') print('\nThe estimated rate and tree correspond to a root date:') if params.covariation: reg = myTree.clock_model dp = np.array([reg['intercept']/reg['slope']**2,-1./reg['slope']]) droot = np.sqrt(reg['cov'][:2,:2].dot(dp).dot(dp)) print('\n--- root-date:\t %3.2f +/- %1.2f (one std-dev)\n\n'%(-d2d.intercept/d2d.clock_rate, droot)) else: print('\n--- root-date:\t %3.2f\n\n'%(-d2d.intercept/d2d.clock_rate)) if not params.keep_root: # write rerooted tree to file outtree_name = basename+'rerooted.newick' Phylo.write(myTree.tree, outtree_name, 'newick') print("--- re-rooted tree written to \n\t%s\n"%outtree_name) table_fname = basename+'rtt.csv' with open(table_fname, 'w', encoding='utf-8') as ofile: ofile.write("#Dates of nodes that didn't have a specified date are inferred from the root-to-tip regression.\n") ofile.write("name, date, root-to-tip distance, clock-deviation\n") for n in myTree.tree.get_terminals(): if hasattr(n, "raw_date_constraint") and (n.raw_date_constraint is not None): clock_deviation = d2d.clock_deviation(np.mean(n.raw_date_constraint), n.dist2root) if np.isscalar(n.raw_date_constraint): tmp_str = str(n.raw_date_constraint) elif len(n.raw_date_constraint): tmp_str = str(n.raw_date_constraint[0])+'-'+str(n.raw_date_constraint[1]) else: tmp_str = '' ofile.write("%s, %s, %f, %f\n"%(n.name, tmp_str, n.dist2root, clock_deviation)) else: ofile.write("%s, %f, %f, %f\n"%(n.name, d2d.numdate_from_dist2root(n.dist2root), n.dist2root, clock_deviation)) for n in myTree.tree.get_nonterminals(order='preorder'): ofile.write("%s, %f, %f, 0.0\n"%(n.name, d2d.numdate_from_dist2root(n.dist2root), n.dist2root)) print("--- wrote dates and root-to-tip distances to \n\t%s\n"%table_fname) ########################################################################### ### PLOT AND SAVE RESULT ########################################################################### plot_rtt(myTree, outdir+params.plot_rtt) return 0