pax_global_header00006660000000000000000000000064145267317150014525gustar00rootroot0000000000000052 comment=2ada87c43f1aecd67e5919ae6675e0a9be614914 pairtools-1.0.3/000077500000000000000000000000001452673171500135425ustar00rootroot00000000000000pairtools-1.0.3/.flake8000066400000000000000000000006711452673171500147210ustar00rootroot00000000000000[flake8] exclude = __init__.py __main__.py max-line-length = 120 ignore = # whitespace before ':' E203 # too many leading '#' for block comment E266 # line too long E501 # line break before binary operator W503 select = # mccabe complexity C # pycodestyle E # pyflakes error F # pyflakes warning W # bugbear B # line exceeds max-line-length + 10% B950 pairtools-1.0.3/.github/000077500000000000000000000000001452673171500151025ustar00rootroot00000000000000pairtools-1.0.3/.github/workflows/000077500000000000000000000000001452673171500171375ustar00rootroot00000000000000pairtools-1.0.3/.github/workflows/python-package.yml000066400000000000000000000023511452673171500225750ustar00rootroot00000000000000# This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions name: Python package on: push jobs: build: runs-on: ubuntu-latest strategy: matrix: python-version: ["3.7", "3.8", "3.9", "3.10"] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip wheel setuptools pip install numpy cython pysam pip install -r requirements-dev.txt pip install -e . - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest run: | pip install pytest pytest pairtools-1.0.3/.github/workflows/python-publish-test.yml000066400000000000000000000016531452673171500236310ustar00rootroot00000000000000 # This workflows will upload a Python Package using Twine when a release is created # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries name: Publish Python Package to Test PyPI on: release: types: [prereleased] jobs: deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Set up Python uses: actions/setup-python@v2 with: python-version: '3.10' - name: Install dependencies run: | python -m pip install --upgrade pip pip install setuptools wheel twine cython numpy pysam - name: Build and publish env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | python setup.py sdist twine upload --repository-url https://test.pypi.org/legacy/ dist/* pairtools-1.0.3/.github/workflows/python-publish.yml000066400000000000000000000015501452673171500226500ustar00rootroot00000000000000# This workflow will upload a Python Package using Twine when a release is created # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries name: Upload Python Package on: release: types: [created] jobs: deploy: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Set up Python uses: actions/setup-python@v2 with: python-version: '3.10' - name: Install dependencies run: | python -m pip install --upgrade pip pip install setuptools wheel twine cython pysam numpy - name: Build and publish env: TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} run: | python setup.py sdist twine upload dist/* pairtools-1.0.3/.gitignore000066400000000000000000000022501452673171500155310ustar00rootroot00000000000000# vim undos *.un~ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so *.c *.cpp # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # IPython Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # dotenv .env # virtualenv venv/ ENV/ # Spyder project settings .spyderproject # Rope project settings .ropeproject # cython compiled C extension _*.c *.DS_Store # VS code settings .vscode/* # Files generated as the examples examples/* pairtools-1.0.3/CHANGES.md000066400000000000000000000115641452673171500151430ustar00rootroot00000000000000### 1.0.3 (2023-11-20) ### - [x] `pairtools dedup`: update default chunksize to 10,000 to prevent memory overflow on datasets with high duplication rate ### 1.0.2 (2022-11-XX) ### - [x] `pairtools select` regex update (string substitutions failed when the column name was a substring of another) - [x] Warnings capture in dedup: pairs lines are always split after rstrip newline - [x] Important fixes of splitting schema - [x] Dedup comment removed (failed when the read qualities contained "#") - [x] Remove dbist build out of wheel - [x] pairtools scaling: fixed an issue with scaling maximum range value https://github.com/open2c/pairtools/issues/150#issue-1439106031 ### 1.0.1 (2022-09-XX) ### - [x] Fixed issue with pysam dependencies on pip and conda - [x] pytest test engine instead of nose - [x] Small fixes in teh docs and scaling ### 1.0.0 (2022-08-XX) ### This is a major release of pairtools since last release (April 2019!) #### Post merge: - [x] sphinx docs update with incorporated walkthroughs #### New tools: - [x] parse2 module with CLI for parsing complex walks - [x] scaling and header modules with CLI #### Fixes by modules: pairtools dedup - [x] finalize detection of optical duplicates https://github.com/open2c/pairtools/issues/106 and https://github.com/open2c/pairtools/issues/59, also related to https://github.com/open2c/pairtools/issues/54 - [x] chunked dedup by @Phlya - [x] improvement of dedup to include reporting of the parent readID by @Phlya and @agalitsyna pairtools stats/scaling - [x] split dedup stats and regular stats - [x] output chromosome size to the stats output https://github.com/open2c/pairtools/issues/83 - [x] pairtools stats: YAML output? https://github.com/open2c/pairtools/issues/111 and https://github.com/open2c/pairtools/issues/79 - [x] pairtools scaling tool which takes into account chromosome sizes: https://github.com/open2c/pairtools/issues/81, https://github.com/open2c/pairtools/issues/56? pairtools parse - [x] parse complex walks engine and tools: https://github.com/open2c/pairtools/pull/109 - [x] stdin and stdout reporting defaults: https://github.com/open2c/pairtools/issues/48 - [x] flipping issue: https://github.com/open2c/pairtools/issues/91 pairtools phase - [x] make work with both pip and github versions of bwa: https://github.com/open2c/pairtools/pull/114 pairtools restrict - [x] Handle empty pairs with "!" chromosomes: https://github.com/open2c/pairtools/issues/76 - [x] Problem with restriction sites header/first rfrag: https://github.com/open2c/pairtools/issues/73 - [x] Suggestions by @golobor: https://github.com/open2c/pairtools/issues/16 pairtools merge - [x] do not require sorting? https://github.com/open2c/pairtools/issues/23 - [x] headers handling: https://github.com/open2c/pairtools/issues/18 #### General improvements: Headers maintenance - [x] allow adding a header to a headerless file https://github.com/open2c/pairtools/issues/119 or broader addition of the headed module, draft: https://github.com/open2c/pairtools/pull/121 Code maintenance - [x] transfer pairlib into sandbox of pairtools lib - [x] separate cli and lib - [x] Remove OrderedDict: https://github.com/open2c/pairtools/issues/113 - [x] Clean up deprecation warnings, e.g. https://github.com/open2c/pairtools/issues/71 - [x] Fix input errors without explanations, e.g. https://github.com/open2c/pairtools/issues/61 #### Specific changes: Docs improvements - [x] pairtools walkthrough - [x] phasing walkthrough - [x] parse docs update Tests proposals - [x] add tests for dedup @Phlya : https://github.com/open2c/pairtools/issues/5 - [x] add tests for stats, and merge: https://github.com/open2c/pairtools/issues/5 Enhancements - [x] add summaries: https://github.com/open2c/pairtools/pull/105 - [x] support of [bwa mem2]( https://github.com/bwa-mem2/bwa-mem2), which is 2-3 times faster than usual bwa mem: https://github.com/open2c/pairtools/discussions/118 - [x] I/O single utility instead of repetitive code in each module ### 0.3.1 (2021-02-XX) ### * sample: a new tool to select a random subset of pairs * parse: add --readid-transform to edit readID * parse: add experimental --walk-policy all (note: it will be moved to a separate tool in future!) * all tools: use bgzip if pbgzip not available Internal changes: * parse: move most code to a separate _parse module * _headerops: add extract_chromosomes(header) * all tools: drop py3.5 support * switch from travis CI to github actions ### 0.3.0 (2019-04-23) ### * parse: tag pairs with missing FASTQ/SAM on one side as corrupt, pair type "XX" ### 0.2.2 (2019-01-07) ### * sort: enable lz4c compression of sorted chunks by default ### 0.2.1 (2018-12-21) ### * automatically convert mapq1 and mapq2 to int in `select` ### 0.2.0 (2018-09-03) ### * add the `flip` tool ### 0.1.1 (2018-07-19) ### * Bugfix: include _dedup.pyx in the Python package ### 0.1.0 (2018-07-19) ### * First release. pairtools-1.0.3/LICENSE000066400000000000000000000020541452673171500145500ustar00rootroot00000000000000MIT License Copyright (c) 2017-2021 Open2C Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. pairtools-1.0.3/MANIFEST.in000066400000000000000000000005641452673171500153050ustar00rootroot00000000000000include CHANGES.md include README.md include requirements.txt include requirements_doc.txt include LICENSE graft tests graft doc prune doc/_build prune doc/_templates global-include *.pyx global-include *.pxd global-exclude __pycache__/* global-exclude *.so global-exclude *.pyd global-exclude *.pyc global-exclude .git* global-exclude .deps/* global-exclude .DS_Store pairtools-1.0.3/Makefile000066400000000000000000000012661452673171500152070ustar00rootroot00000000000000.PHONY: init install clean-pyc clean-build build test publish docs-init docs init: conda install --file requirements.txt install: pip install -e . test: nosetests clean-pyc: find . -name '*.pyc' -exec rm --force {} + find . -name '*.pyo' -exec rm --force {} + find . -name '*~' -exec rm --force {} + clean-build: rm -rf build/ rm -rf dist/ clean: clean-pyc clean-build build: clean-build python setup.py sdist # python setup.py bdist_wheel publish: build twine upload dist/* publish-test: twine upload --repository-url https://test.pypi.org/legacy/ dist/* #docs-init: # conda install --file docs/requirements.txt # #docs: # cd docs && python make_cli_rst.py && make html pairtools-1.0.3/README.md000066400000000000000000000217631452673171500150320ustar00rootroot00000000000000# pairtools [![Documentation Status](https://readthedocs.org/projects/pairtools/badge/?version=latest)](http://pairtools.readthedocs.org/en/latest/) [![Build Status](https://travis-ci.org/mirnylab/pairtools.svg?branch=master)](https://travis-ci.org/mirnylab/pairtools) [![Join the chat on Slack](https://img.shields.io/badge/chat-slack-%233F0F3F?logo=slack)](https://bit.ly/2UaOpAe) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1490831.svg)](https://doi.org/10.5281/zenodo.1490831) ## Process Hi-C pairs with pairtools `pairtools` is a simple and fast command-line framework to process sequencing data from a Hi-C experiment. `pairtools` process pair-end sequence alignments and perform the following operations: - detect ligation junctions (a.k.a. Hi-C pairs) in aligned paired-end sequences of Hi-C DNA molecules - sort .pairs files for downstream analyses - detect, tag and remove PCR/optical duplicates - generate extensive statistics of Hi-C datasets - select Hi-C pairs given flexibly defined criteria - restore .sam alignments from Hi-C pairs - annotate restriction digestion sites - get the mutated positions in Hi-C pairs To get started: - Visit [pairtools tutorials](https://pairtools.readthedocs.io/en/latest/examples/pairtools_walkthrough.html), - Take a look at a [quick example](https://github.com/open2c/pairtools#quick-example), - Check out the detailed [documentation](http://pairtools.readthedocs.io). ## Data formats `pairtools` produce and operate on tab-separated files compliant with the [.pairs](https://github.com/4dn-dcic/pairix/blob/master/pairs_format_specification.md) format defined by the [4D Nucleome Consortium](https://www.4dnucleome.org/). All pairtools properly manage file headers and keep track of the data processing history. Additionally, `pairtools` define the [.pairsam format](https://pairtools.readthedocs.io/en/latest/formats.html#pairsam), an extension of .pairs that includes the SAM alignments of a sequenced Hi-C molecule. .pairsam complies with the .pairs format, and can be processed by any tool that operates on .pairs files. `pairtools` produces a set of additional extra columns, which describe properties of alignments, phase, mutations, restriction and complex walks. The full list of possible extra columns is provided in the [`pairtools` format specification](https://pairtools.readthedocs.io/en/latest/formats.html#extra-columns). ## Installation Requirements: - Python 3.x - Python packages `cython`, `pysam`, `bioframe`, `pyyaml`, `numpy`, `scipy`, `pandas` and `click`. - Command-line utilities `sort` (the Unix version), `bgzip` (shipped with `samtools`) and `samtools`. If available, `pairtools` can compress outputs with `pbgzip` and `lz4`. For the full list of recommended versions, see [requirements in the the GitHub repo](https://github.com/open2c/pairtools/blob/detect_mutations/requirements.txt). We highly recommend using the `conda` package manager to install `pairtools` together with all its dependencies. To get it, you can either install the full [Anaconda](https://www.continuum.io/downloads) Python distribution or just the standalone [conda](http://conda.pydata.org/miniconda.html) package manager. With `conda`, you can install `pairtools` and all of its dependencies from the [bioconda](https://bioconda.github.io/index.html) channel. ```sh $ conda install -c conda-forge -c bioconda pairtools ``` Alternatively, install non-Python dependencies and `pairtools` with Python-only dependencies from PyPI using pip: ```sh $ pip install numpy pysam cython $ pip install pairtools ``` ## Quick example Setup a new test folder and download a small Hi-C dataset mapped to sacCer3 genome: ```bash $ mkdir /tmp/test-pairtools $ cd /tmp/test-pairtools $ wget https://github.com/open2c/distiller-test-data/raw/master/bam/MATalpha_R1.bam ``` Additionally, we will need a .chromsizes file, a TAB-separated plain text table describing the names, sizes and the order of chromosomes in the genome assembly used during mapping: ```bash $ wget https://raw.githubusercontent.com/open2c/distiller-test-data/master/genome/sacCer3.reduced.chrom.sizes ``` With `pairtools parse`, we can convert paired-end sequence alignments stored in .sam/.bam format into .pairs, a TAB-separated table of Hi-C ligation junctions: ```bash $ pairtools parse -c sacCer3.reduced.chrom.sizes -o MATalpha_R1.pairs.gz --drop-sam MATalpha_R1.bam ``` Inspect the resulting table: ```bash $ less MATalpha_R1.pairs.gz ``` ## Pipelines - We provide a simple working example of a mapping bash pipeline in /examples/. - [distiller](https://github.com/open2c/distiller-nf) is a powerful Hi-C data analysis workflow, based on `pairtools` and [nextflow](https://www.nextflow.io/). ## Tools - `parse`: read .sam/.bam files produced by bwa and form Hi-C pairs - form Hi-C pairs by reporting the outer-most mapped positions and the strand on the either side of each molecule; - report unmapped/multimapped (ambiguous alignments)/chimeric alignments as chromosome "!", position 0, strand "-"; - perform upper-triangular flipping of the sides of Hi-C molecules such that the first side has a lower sorting index than the second side; - form hybrid pairsam output, where each line contains all available data for one Hi-C molecule (outer-most mapped positions on the either side, read ID, pair type, and .sam entries for each alignment); - report .sam tags or mutations of the alignments; - print the .sam header as #-comment lines at the start of the file. - `parse2`: read .sam/.bam files with long paired-and or single-end reads and form Hi-C pairs from complex walks - identify and rescue chrimeric alignments produced by singly-ligated Hi-C molecules with a sequenced ligation junction on one of the sides; - annotate chimeric alignments by restriction fragments and report true junctions and hops (One-Read-Based Interactions Annotation, ORBITA); - perform intra-molecule deduplication of paired-end data when one side reads through the DNA on the other side of the read; - report index of the pair in the complex walk; - make combinatorial expansion of pairs produced from the same walk; - `sort`: sort pairs files (the lexicographic order for chromosomes, the numeric order for the positions, the lexicographic order for pair types). - `merge`: merge sorted .pairs files - merge sort .pairs; - combine the .pairs headers from all input files; - check that each .pairs file was mapped to the same reference genome index (by checking the identity of the @SQ sam header lines). - `select`: select pairs according to specified criteria - select pairs entries according to the provided condition. A programmable interface allows for arbitrarily complex queries on specific pair types, chromosomes, positions, strands, read IDs (including matches to a wildcard/regexp/list). - optionally print the non-matching entries into a separate file. - `dedup`: remove PCR duplicates from a sorted triu-flipped .pairs file - remove PCR duplicates by finding pairs of entries with both sides mapped to similar genomic locations (+/- N bp); - optionally output the PCR duplicate entries into a separate file; - detect optical duplicates from the original Illumina read ids; - apply filtering by various properties of pairs (MAPQ; orientation; distance) together with deduplication; - output yaml or convenient tsv deduplication stats into text file. - NOTE: in order to remove all PCR duplicates, the input must contain \*all\* mapped read pairs from a single experimental replicate; - `maskasdup`: mark all pairs in a pairsam as Hi-C duplicates - change the field pair_type to DD; - change the pair_type tag (Yt:Z:) for all sam alignments; - set the PCR duplicate binary flag for all sam alignments (0x400). - `split`: split a .pairsam file into .pairs and .sam. - `flip`: flip pairs to get an upper-triangular matrix - `header`: manipulate the .pairs/.pairsam header - generate new header for headerless .pairs file - transfer header from one .pairs file to another - set column names for the .pairs file - validate that the header corresponds to the information stored in .pairs file - `stats`: calculate various statistics of .pairs files - `restrict`: identify the span of the restriction fragment forming a Hi-C junction - `phase`: phase pairs mapped to a diploid genome ## Contributing [Pull requests](https://akrabat.com/the-beginners-guide-to-contributing-to-a-github-project/) are welcome. For development, clone and install in "editable" (i.e. development) mode with the `-e` option. This way you can also pull changes on the fly. ```sh $ git clone https://github.com/open2c/pairtools.git $ cd pairtools $ pip install -e . ``` ## Citing `pairtools` Open2C*, Nezar Abdennur, Geoffrey Fudenberg, Ilya M. Flyamer, Aleksandra A. Galitsyna*, Anton Goloborodko*, Maxim Imakaev, Sergey V. Venev. "Pairtools: from sequencing data to chromosome contacts" bioRxiv, February 13, 2023. ; doi: https://doi.org/10.1101/2023.02.13.528389 ## License MIT pairtools-1.0.3/doc/000077500000000000000000000000001452673171500143075ustar00rootroot00000000000000pairtools-1.0.3/doc/Makefile000066400000000000000000000012571452673171500157540ustar00rootroot00000000000000# Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @cd ..; python setup.py build_ext --inplace; cd docs @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)pairtools-1.0.3/doc/_static/000077500000000000000000000000001452673171500157355ustar00rootroot00000000000000pairtools-1.0.3/doc/_static/hic-processing-pipeline.png000066400000000000000000001514661452673171500232000ustar00rootroot00000000000000PNG  IHDR~3sBIT|d pHYsgRtEXtSoftwarewww.inkscape.org< IDATxwE׽IH#$!B)Jޫt/ "_"_JP;kBzr~|v~3g={MxGNi O+ "l hmuDDzǀG[\l l 涰^",6KG])D`1Bb@`w`LwVJDd LmTf_fX|2[aJuoQDz3m:X0v;ZJ\ Z\NJ,[x8[b7KDdIq8vӮ؁7F~zuAw_ %`zc" <+iFliVmLޛZ\/I,߈Mj""K[X`=X|pHEqx|86dF~YWł<""v\݁wUm6}u- KEDDDz''&f:k'g;u`=c%ʻ1G˾l&QD$;fM\ $H{ץ;^⺈t'Bcs 2;t-"Yݓ̐Jneezs-"RO|A]L/❋kq]DDDDsI+H/Փ'NngUuR""u܉GU콝z, FY"K4@DD6`M`p66)3ؚQZr7l^,ՁLEXP{q:Xv[ xȊȒ?0({< XP``5`%l䍃eym3 XXkzc>3l?YeGewg >,}iƭ[$"Kdn/&,2Q_f(u1vBG+Ў^`,vlX;_#ۿ2ul _r =3{`;`&AP=;&ŽAb ?;^C ۧHp6x$ax<́lMRξ_Ifཛྷ&WD'%pAcTW=yKF$X:Wbñ bj78䍃s~&{ĶΫ׳ǯǽ&eDN0pubX_מ |-OEς/a=S܄|XЭٲe|߃e 駲$eh;+ 7'EV̼D]ATHD&|{t#5낉koVFѲeĆ*jÍ'4X`.>}F+#Û{pXNpS#{2bC@,bw0u]QM4XzHOuvG[..?o\n^@x8x]bƚ2="Y|~> ꇲICu us'#cEˆaજ߂VrA9 Oc#ͩHoˎ6/DͽÂɻaAX<v[NѲ}_~7^)R+9W=;n]PN"f`9e6Ʒ5H,+[5zm;OpmnjSY&a޸"-n,ڀ(;.8G}|-Zvh,o菝2߉f0BD' Dn ~8q=^+׮ %YTOHeSr\Ph'eqrLؙ>R\j,˷H, ?*(75+L0F T| _r'eMKrM~}}Lb=Ûe `YniTbo ǹG/X66g'g|*i2+x3v""""=Կ0un( ;̔=ZV쬜IM%"jc8-;έ~8E2J,,`MP8d8q؃g,$?+`+5u(yhǃeGNnhqcm^|CaOe;ˎ2g:E>y2W?愓`n X'/=eo`'Q}cr6+Xz޶Ԭf8-sC?_ "k{ zqqֵgs=,N]yaRCչsu궘Iw)afU>H? v2s yu {ryLuwC=u 5Ed 6k!Y86#4 qbv JDqD %6xh"<=~`.33d Aq`+u׽)"vDuy=|@Wug-]Ò.BEh#~:BNd&aj8nOR=[wzκòky|y늭$qV-cλiZ4dƚ[GT t #SYDzٿIg'Cu7diq} ύ( XwԾ=LFѸj""=kRX^!x<`}wco&3?Vfm<'?|bq& SMmFlEc-{<  N:F~6έ):oH?q j8E^OXxwk n.:8Trߣ-;qF"iLm\9o\N'[3eePEZ&iE'js9 QD/X:_8o>v:΂mgcP. q22P$x\/+l{1q%~b('AlmƮwA_Svؘ`3;zyX\NOEl¨:Xw띕SXV_!< L|߳epcJنrFOP8ۋp蝑Ī'cݰ=jX ~3}@wsvXnx?\7Ac? &uvz]W]MTaǰSI3PT1b2k}:8 ̻C{MHYnM,}cě""""-7NNffa'SfdNz !5>.ƭrjMT쵂w}Vfv2*"Ӹ ޴L 5DխK믁ex{?x.BnbNƏv5+7K(hk`]͝=뇙3u-8s"ngbNjishm2'l9?9uEf=UⱮ6<c%6u+ޘV ;,geNN,?4{G`ߜ튈Pq`Bua3w|^^,m/80,`uVg%%gԄ""ֆ8/VCGS&`cͶ>sa|Tv޺.gak͂OvP(";do.6T.%(Ol`Yv`7+Uo"p.qCSfk|fqvMPt]QdY>RNnfV7^-y ; x)e~:}neƪ+6Bp v|X.{hea=#oځ+1f5 ]WԳ! ߗ1g,<~ +a1gaåT{,ߍkQ>H:n=61Tߊ4(̪u-x^}.}3O~,_ܧf ISf~"' p7Hj5"""""""9&΍QDDZlx`cp $~Jlґ%yW-&""Ӟ!1~SƍG^&i9M("=a1o c>Mf&""=N'X9ֆM6;DNwR=Q,}@`VWBDDDDDDDDI.muEzk@%׺jH/p#pF""""""""K)XVW:h[W5ƷApz`_7֫+gb7XOjq]:$gb$⺈ȒkV1`ץ.Ǝ/_nq]DDDd)zݴd]`>|.?5Rz5b9`嶤&]Y.*߶i~qLMp\WVJDjķ%.]ay%R""ҷ.MpsݾoNNma]DDx7xRl؎#ED: 8,VhuZh߹o[Y.ձazF)WtI4[[ e;*"g`m /Z\fkxU`'Hus} i&bVWDD$SoHAKZXqTED]w<ꊈH""""""""DCjHOxh`S` l¢$6c&a?V˖\:J;uSe/`ԭ &wa38{cCS籬_bcc6bx#XW/W|;QY>}sVvޛyL?Kguүn]: o+%~/̉zÁ}m5X[92d#l\屮|n~h~ k~ ˌ? <}ҹǾ+bk~< < 7_/a`6XJln#9Xs)w#c=9~WS]wۼg_>w6ֵ/Xzl:{>Wq^xÁӲw lJsc;_Z=\dva ޾ `{9`cazf?ґlx3fl`-5c$pvCX uX[^\I͊?p0,{EYbD->@83n};%~Kyv2}7 1,w>KDe"KX pv.=;H;g;?[;=vzD}Wcזع?=y,㿱9kx-#3u.M:^mrFwN*vwud`}( ~J,_[s:o` ֫`OwH.> leO]eYSamcթX r?v,w/9܎'r[`^ NB.IJ>RoeCY;X< O{{9:ytkVTTaDg4v18?agyYݝM Dzb#b2G`'o/?.xm`[vGak2bK6`,,Y N,k2D F_,K9uaٌnkm N>v,ӱ}X,l-{r!6V#N=^Y.@a7t8'|%sTVϏc=*3X`m2>xֆ|ם3|6oeb߉;/c&m 콺 .}~[ÂK'' l ׼ Ѹ -m3/cm }_~ñTM{>}ڱE9^+R\83W[Pp7|7Xcv,y.]=kW~m;/s,s=v,8k{g8Xnw"3L²Ďñ,-عY8w֓ ?zVk+u#0 k/ηǮIq_C/Ŏ ^a ֖;ރms{ ""D37Sv8v`)Eۨ|`, H:/| ?idw'Se:>Fcw Fg~ ;t;& |OljE:vF CXvZ{%|?^[p Ӱح>N#č?߭O`An#۞  }߁B+3.o9awKufwev^ǛX(ԆݰpH0{!n2ĶgNt*X;UbgQb41~V,`Ne891XNjV>%2*mIA}{f8G_\Hwf8N,_Xc۸t}kt ~ؐ<2VLyԶ3a^|cc >3AoJHtg9 (sebH|{4KRHF' ,pg] Lb {L-VFDDd H9 &q=EF/ ,0Y4rѰh$|Yry'a)MܛX^6܆eȜrhY#}`2XP|=^,ghY7\f1%XvQ[` ȖŲS ],%=(c^ re 2.ZvD,/<2Ҙ3gvX3͛ qsP.. 8%ʤziu3te !^xP3azA6K\ƋUHk_e>H%if~|[7Z_,Ǖ$\x^˶+x"" Ȓ?b]S'ېƲ`&/WmϦt7z^GmhWꇿZ' E'wWoHLJ,K˚k܈0H^NaOq p6v2YOlQ*&+> g_Qv'o#+X[< ƿgȟ1wwt8oLGesC|.Jl]qg;M ǹfƂ.. ›kqKaƺCܟFѱq?q,{[OYV"Α瓟ҕVurhì5TKx]z"!$"h gY꺛'-enuaÏk79R=`l\"vAWt>OYgr.?LXP޾,)1Io,C/eX0 q}h-3؉jXfgC㇂eƠ7:9N2 (~z=(yLヌsL,n|aaO7N)ӎ [}_\3os\6:~̙ݔK-ߑ\`dcn[ L ǵ@݁ݸ^榪9k)IU\WKǎ#VkSbcx\@βXI[rTtLWkpE[ ,v`u{6&ϩ>h0^v]7S`y,;p^!x7]}uT;㸛 >.&?;{CqN-U,&ѽg{бQx5.gkƤuImʖPݩ,lt]u<x#)y]\&SfaXEǵf,90{p<,0-"זopu]WM{5{q&s1v0n_lϾpG8ˈ@z<03-87fkaV| eʝ=*,6iWvFº24;Y6" ]Nz"fu%] 6hpӂaE5K+Y/Y&Q=Ӱq1iw.`.*S .i䷲6T^ wxE,Ua,cCF*z]yhl# Xkܰ7 hMզpZNXkzkʼɁUmblx+v켱pDtQ"w-ո =w?nzV>uؚa`:eϮN`Dahڱ.py_ǞnջA;t1.U z];*ޯaT ,=CXҀ;M$lSѹsȧ==>XފJ0ۑsfH[wv\k z@x)כ3n ;~ݨ骹'D\:g5?TVOkz{ڹcN:iTDzeifom.ÏALdPnX PP[㿗G.ڇ~Ȋcoҍh ?ߦz"oo:mlڕ3ˮqrm7wH=f6:l5ߐ>+؆c]`1/3|lws:n-l/S~0nN!L)(EejpdNY"}\: s ՕW`דf"}|#{\>u)~Xfβ49{,6_w|D ~Xg(.kpj/př>)a'X)AiCǿgQ,vlwa ϒhԹ ώ~Z:{;a˰퇿`ewÂFyu'ҁ69>Hu6eJ72  ;Ul2)Xwj~O;ep 5)ew{  uXf`J8!#B(w#j4;''QHL)"]+uE]l Ǔ=O玏`筱F ;95UfAY6UDri gYL2|.N ~eF݆͞>[;ZbǰA C:`؝>|EAb8$vB4c>9/Xb'`(> 8j?ރ3۰}b'Ckvr>N,|.m2$IVbXݏ²_QX0x(pb]2^a㛱.`߉! pw:˚v~l,R,sPV{XXPfK,~|/^,vnb<ƺxe޵+,,=vq#Zw`C(SX>,?@+}ڄ@m7n2gfy.ڄXI~;zS>ca{C,~/kxʇnw݄ v̜}>uC=ƆZ ^֖cU؍%q7և_0b7DtvǴ`ǎ3c-)v6PnAع;vSFb=aM"3]v"v&vYþamo߆]}\\;W ǎ.x}v8,v>oQ>c,uHS]&*(c_)[HmVi,U{0g%_ȝQb[)eֹ`{өkd[%:6dOm$Y-`uw^b݃凖@_j\ff$9[8)zX&qOi&aSgGXfȭg,~wX_P~'X;u1(҆M9`ϒ?|^A90(2 =˚Kw`IxFcG=s s nO{:۹ ~w7.(j} LmJ9(W44MY{Yކ˼7$c<76"(sqN6h˸+u|^5X6uYc> =:o g`܂u}h46 hKC@ȇ)odɷ*6"lț矣ܤ""""""+&X5Ui_Q7Eu\MCKk$" mL,"""""d}XLMw ,Ƃ`_?MXajdM4v̺LODDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD9/ܝY Di#7&қҰ_3}V Jt~ס"""lnV̞[xX\Ңz4o`ץ'ikuzU[9F-HO2 F/ݙX[6;~ 6iDD:|ik[Xq-XI`d 8;GavB87  k {35 ۵c좴e]o+-Q͡=iFyel{a$""]m7`14l5:~V'k~xLεzCy0ݯD}>x#ڗ NŒ+zu &iq]RޡiFv,{O`&l;uE`'4i""yڜ^Kp8߮uXJV)\7_sYXD3vޢ6]N:aT)XvHo5k3*T3;;lnͣ:U6,8Rb7>EDخ-Ks;pOVH# צ2k)\kx`.Y][{cCy.B._nq]lπ| ۜFuVgcvmvEDBm-KW)g4N1?n IDATpsZ x 1gw}ThÏNz~+yzfvbhsDzƺq TzR xK@VGD=AD́oXo@^OV5 ƃXc(ez21 = lR][[mvVMڔ tC764vMH66^f6g=kdO\\l ?u%8vE]cuaѮ}\ZDَ} {uڰwd Mݕ]ðcmG!V.{:s/"""K؉B,p0?"૤/T/nnY]Vlp+ 1Ď%a;? f Jc﹈,' | Lpww/oe)L|jz=NBCuo1ڊ2nNZs OQ|섴(qu<pSq0dF|jXIdyyaF ğXف[9:4Xc7=S/c?=-Ur_hm~Yb3=mq))p~Q4qݯ'2 :2伿8; `ԟPܩ?S&^}(g݋wbWe/IDDDO N)bcq-hf> ʜSO`iXX7ma힝ݎΏbe/C2s}⃸R3k?$==Ӱ7|Q)Q epˮͩgc=yzkq]Dw9e 8 L. kV[: 86$ >lXX%V)#"ۗU ]]6їvs:Ι;߽XOm.g೯c-;`rk}_,?K{?NƮb Y9^"""8?,Lp'lm O6 L}wY"+3ޣy㞅lw'Elj Msؚ@" g|qb̓ݳ-kt\CnGK&|Ip45⺈t) 8&+",ۀ co~O%cԑر}XƯʯEM }u,x/Hٯ&"&6N~ j 8(1'Qc?1R;Iq {x}۸ 7]VxlkA|G4U "3mm}`]6+.b,H-vq{]Xm 6˔~X)(mҋXwオȟ4lGmE,Cl5؉j_,!哰n#~Xv`/!LcITrtm.e AG+BonPj!cNlg.p#ĉZv0_]߉t!Ԏ1X[eYu7xn1v 0 MƸ8#,o@VX(x|gVz}o](|O^jY-D_}'Ǟulq;;1l+rwx.\aTo'Uzn0 4 ò۱@ba]4Xw o 獜ۻktߒ>>k`y[t"=[ Ռq+'FۀO=]`tZ쩬,PqˁR}nUyYDe̯U ,ղ+ IJvC2{#^b>$ <&wb[Ǻ8%7 'o"ݯǞբ_P}.:kC‰V/n\u8[=-+CvN09ƬEm lؑP^F7. l¿``7C?"p'T("&wq/Nk L:< a2ՂuW:aCuxv]D:MleO CXw*> n{idMwibIml㉩\cYX)+R;ՃG`nOcjPțhu!l0Q(;'=lXkYP}AR4SҦYǞ!Nx_EuU:eo0j?pv=[vBӱ WǎաT0QDTmQNw;Y2fL[ack<s \wT`4 0O,o٪e,\X>W@QPE}52`ew sLq* 6 mPuFDJqOC^D6r,!;F\BD֧xnʯ=kbm||3fWE۰Oʼ ?:Wߵm͛Q>nְseL,騇y_*]wn;sb8>"x|<_w{*yˏ&ww;ܓXp]7e+?;\~7HE (|(bGEE"(("]D H=@H#g~ݝSu͙lyg|U\Ъy= MWz܏>7A.!{O+C=*fl_6߃"/?O~s<{{j%Q`3FpΟQۻ  u((][ KTiNdEE`s%EaC:Ffo]ǩJ?<&c؜03k(4u\S6zTOXٙa\>O|e`]ڛ/B#ޓs3^>?_6BT]=n icmqscwϺifffCІ$OPϲpU9{?6"8l\€sca֨.p{yqgOQ;Qj ΠE# ~#;qjcҏqyB< [^`R66z_mGfCJgmLyjgY$8yAZ6H&Yth<^hsA啟6 2yBԎ$l4?Y+ mX߇P[(?K/>=7;>O[ "YV}5$^D?seKff8ue2U9}Ehf!,B +Q"nGoE=O|M(A\d4 5NC'ynv/F5)~td,V~ }3[|BF8I9ˑ,u%ًm[z}|=4T%(ʥ~D~.ږgI^oWhgzd,Mq(dMgR?|o(Y4 6]HTa0fYM." 7vB:h('{Ѣ;|g%_ʊ'P~v7wtjB]FkHujf5zl:Pʺ%k BZ}CwHؒdOWw]7-HnzWqZl ܃?Uf[Q+GW333X_vEC'%g>t l n* kh[6ͬ6Gk،:!CF7N5`c```"0XoYSMuت.'fS[N<ʉyMufX-wqTVLFx')3wt3fff6، l X矇7ס P9&TV,nBeu(`ffQ.OZ^󛙙Yz&uq75L_N|+ܡl x/{Gi13\!6j?p V_=z<*k(0?:ǒ1+!իqy磊^~؁iOGy4T>XQ/PyQTN<8x+23NvF{дy^D7/D@S2FSMV] K,Ϩ]qlfff1;+`!p+p2;L`T|5'аRx03z p0v(N#́O礣8Meffݵj34ppx4@knB $t` jAvl6[㬛F i^Es`03֍F7n%;nV,崍v~Y{/̆-_Y[Qn[ 8ƒcpiffff-*{ϑݓ!hU w <6umf6\C=!8 M1:=lh33k]YAGњ,TzS|ZJ z|+VӁSQl [84} lffY*]`RW=݇pp7&F+zQҜx޶ԙ p#EP|BekM/Zez54ՆW63+0Z/>ЋU^+-+[gkV єD,s4/Q gYնM}CB9꩖!03+c {,]]fE4&x T]̪p>b<|zjvB W^ ffi+gS_N<ojnXt d#єvө_8{ uTVDϧC +p͟FU럋S]4Eki4P/^tLff7wQ_.B[V0]ݲGoT^:̬ik7PhǙ _D=/s\a̪4 F'g#v0]ff{ l_a3QkfffPܫ9h 5I&_1433+#՜ry8[ k045fGK~9-_YcЈtAx333¦R߫y6=zjg?fZ IDAT}v2Qff]*exC^U&̬FҽSd-ԏ*̆}_h1$pWb̬S^~zU&j|r"T U7R|D pE`*efff֪qϨ½ڞE37=ͬ?DSWs~Qљ!15m_\v-\H"'v4nifffj+5UAh< P϶v-Zp6̬;_r*5CeBj8 t>3f><8D RS?f4F|0333k3 55~(ܿu~tp.32̠6O; 65S?(_g,hfMS9ygYgО 5p>3333z 4, \Bm5W9<,u-F3"=hnwp`BΣ~mz@|3ffemG}_ᛓЃʆt'+/:433n 2j+,W!P+g|2s^ifVc"jh0^{}/^G9e '(Nz#mf/P_^ikJlxچg+M1)YhFN'R|5akGj(Z|  Q%.:X3n6LFS_H^./ffffA*MU9]iՁ[bc~PtzѕTPj|X o6K<,ˉOu%f6lAm38 o#oR[Ej?;d333F?/`*e*'R[9 _I}! Z̬Y=SVrb.I9 \Er1T'03]]u`JSdQ$c>Fغ 733e6zl0Zڛ\.')6}lD٠w?e*M5F3Sd333&(,4E֌4wj3d3dn'74E֬h85h:&4j̆{H* +M53^)CIy/3㛒_3>e=9̊?6O4L^e]O (S q`g)fmA*}p3 `!-8 7Aepmp>oy崛Dsv㡎fVos`:'f{TkHW;ʉ̲%3NA&=JrlxضXΣ}>4Gf?/'=ͬˋg9Y_fl͋0333Y A+}N]33v,xG:Δgw:̬wЙJa]u(f:WN\ٽK13+/+tR̬U?s>4wZ]33korba׮:e`>-3ڵ1333 >ӝrݺ 33%|y0KdffffMZxW Q%It23t5Su̬IJSkО`gѰl33\S}+p33XT6dS|{P̆̆%UJd`192  ^/\2̬"S[N68sjKUm4*e v%"̆&W@33Xj dsyU%^t._`ߪhfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffv7K*NK@rKU33kl7[@oi=еꄘ B ꄘY \ Я'0,j,~_qZ:+WN;Z8-ff6>p<0ⴔuI=C׹ꄘ BW[v-E]sҊI)׀J#cӀ+NY7=(y̦1;nO2^=x}R^k0V: L m$;ⴔsc7@o/BgS (pQ`&^kMgfC$y'*NKY8YEQ(X%w1]p9ɵ|ⴴ$?TpŬ&_[|>m0+ؾg?ӷ;`Qpμ,`~`p0xx}f8-O7T бpI^~z`߽/Fϧ'8x_Pp7GKgfC(3FKh6htcÅ yy*KEEhܛ>p0~fU8M>lJ𷻁ŀ5mQ5x7XF̮:!9Baa{`R{87OpJץ9 |F(/wi\{Ͻ+Ma4Wm9p(p>p7N({spo>cPi6F/Σ`p+W}ػQ2,rd_kAھڿ-"2;gQõԮ>Fv;^g^tmx_s\DE Y'r~pLVN3CsSjA>H: ̺Iuwyjp+LmEN/8: Wrn9uߟ)5>B~0 Uвr_"QzZ_{J 5R$9G79c^V>vp 9'-Pc7/yS"MSܦs`plI9~^0jcʵ-n 7\h[?}'? Gi8^:lT畏_.w α m{`VSj\lBV-_F/uDGp:u\ϽʱSIzBqC}^(o=Iuf)5Eӏo_ D-}k$_CDՕ x"'kVy]π3.?:Wk2{gy + 㴓}Gߑkx\QPfl[yHuk5 d7Jߋ[~ܫy&A3g/ՙ7@nF^蚮EՀ,D-~2?q&kWeA>4DOZ#;V1glOGk5_3c}ۖ3k0}M7*ḑm5<?dxa`tyjgȒu!63U]OX[k׌G\OvՀ ԾWjC2 +PϴP~}_ xJZvS[Q3kN[ظ wY.݀z4:{vp}&7 8/Ame_a~_εWW:k#OΛōd-E=Bm9.E碀A, 8Axhi~ڃR[s|ޘʭksq_s2oAFOǴ;F&q9 Y#.Ү9{P|(~e`ChMHj{{s0Tj[/py>#P^7'2gl_gS$2 -*6I%c֟eyo 8_m$n }Hp=.8 QG#Jy ȋ/|f^esє"0\1Or#֑( w:< 3hf6T6܃zM18%#8ߒmIH*FR?;cEjհ|u*M=yqmI*U𺰢=H {%pޒܟF} $tTW٬g kLۑscM8倳YpHM{h&{4߻HovElD;C {2AsyƯqs4p>zSp_MUmf lp~wXR>g#lol}sz4 έ<p^%8&-k Io\zPG8Z=Qj?E'`sej[I3McWFΠr!.˯9NJ$u}\aԶFgxE߇R -HE$M7G 8_uzHe.eibԶwt|;3Z0U%=H~9N^p'$_ "UAڃYp!dCN7OPopi7ۖ.ƺpYS}6 o{.NXG fY$n?V&lޟ_Ґg!q6U >xA KZBEGSbɄ㏐_KgYOҕy_CЁ"\бl%zhf=Qߋ=f}$Y=-2 95=ɨ3,DP{St<ܡOPɳeT\6/A2ٍ48'xJuɂU2j1iG7̬ "k̟?@f;M4DHN%頴qgJ'm@YrGFTɿZ/{r=}hW[<3|HxܽjmM2ZQ/ٰlI],A{o*n(6'ͼh{z*Vާ*dh$,ط' 0AtO3ݮipkQt`2Ci.?L#ٍK`2;Fx;뇫dN!|'ɪ8^ӱcP>Ita,٫y Dtp;+nw[xuv}ReWR7R$#P6ni3ΚG^x۠FQU|->dv ˨ÞVa޿Ifh֥/^+k+\,'e-De]rZV3ѨG,him8窨.*I/0/TtC0vE~o@=y:x)9M\dM4Wos̚17KT>g<m;Dt"i]KM0 yӚҟc^B^?cQohh$ӂ^Ӭ#?:-wjFFVȜE™H0.nM?#(oY#jZyP#O>hx"4+13nMƋC7EtƢi8F#|fX訁7+A/ao/ HpZ4ͤ;(;w,3)It%^^ `Xd~ IDAT(c,Cǜw/v8 $a-gǠKaѼjBKv`ߴV'wj}v ?eHFX6"QpԴ!/PY4T1&v 5w[eѼyGR܊}vVs3=, %~.g/ūuC2'd-th~x\=WA?Nxx z5f7FϧP;J.$$ rihEҫz:ZѪi4Y$"θ; U6x~%ioA% ^ܗ^ :%g?oPMezOھ©L^ݫSyٙDZ(^DvyHVm] uq9…~Bk=6ʈ3^|3uZx?w/Ds Gm"$C~G)<"@CIͬ9K]!ql2jZ)*7FypK|ߋH;P06>_$E?ݑ)(z-Vv-cFӑ~{ *.|˛sFӭ~Mw{)J'$qєۋ:PnL=)$km:6# L$pgtcXwH*>ߏ?<Pt:ur;7'R}>_^ot#laCwz_l_CIOzB#m{goQײ" Xe.;/ǧ5@Y|etieA(C<'s({l/qhآ5 ˺+xv}6nj?> e^_燨0z)اS4`ߝ 19u~l۾b? bǙY^&=Qpeo^s?*oA= oTlKWUZ֥6O I(Pp={0PE](~;p8r{x{FU.M=8hߣ}KUs%е>m/oc=Wa I~ ;u/ _ͪCN8|d2?-gpfַ9087]oPK8>N.Ho +oP,=vn>I3t(#}K`A?PownDž퍢!W5$9 /o$eޣ w:p+ysMmйѣBA?NIcНjut|AwGc€ҨgA t73f5y5' 8=&C\ 8nȾX"y.kzZi ffێ3LS4ဳY+Wo)a( A\Ʊ=λY_mpe5j"Zecى34~I=7\ Qts|1K6x>>&jH.8f ::t>lL=A9nhs(_|{S}~W+WԱpBjw@tԑnnby L8*|2cQ>(}fY yFD7پ6 ߈:¼ާuL}o0EPMdKPhFtWg72gXϠJ(\4'Ok{Ep;+';fh[^"b +^/cGi_Q< ̢YhĵWpא=Z$YVI~^P"zUm濹@2Yh;R~);(K6! TXǯfV^p z|)˷h({ ʇsYvo=^:&i)4tTZPyq{뙏ˀ/ѿbPs3ql/mHޗ>,اZ=Ĭ5ϣg~hh*Qox;5gdzC=dAuMsy,?ScD:zFCmR<-B}fC=]j? 5/Cut{ ]#׃=Q} j_կO ] Q#졹rE='Fi}<ϡ@|a{c?"`! 6B2 _a(nf@w@hơ\#НNfSU=k53Zp]SeK[ D#&DiJq<(Tfz|YcPC**'xߺuf6o, tQnaq4+lFSmo(Ԏ[jIʎ2C#b}\ vj$I5ff8Yg33 +lff_8G!O8ME < GC>܎;ffffffɅhܔϣtif?"ز4ޮASՙ0瀳 %K9`h: 4vh! bfffOaff03xVG Х=/ܪ3}?!lfff XN"YxHz12GIC+皙 B*NՊ^-:M0t vBw'i̺mD 0~ 2333맛Izs[qZ '<I򯬀xm|8ꄘ _fp^=)y̑1Jm[=ڐ5:wXa>233KqgkM*K uBe xOi1|#IcBcnjIm{_5?LߧQ,#K#33fY.fT33nG ⴘ G7wп] f,}MױV}RpfWW33ffVJֲrcaZ5e\zcXF/lW^yQ=cI`C}eSwyZdYocf 鏥u{֢#CCʍ'6؇gB@yJV>w^ M=\zR݀{ ο3Ivd~oF pJL`׎ڛqG3|,ڞ.;1[: Q(!;d:{e7D=ċ lu[=nCyhZQyUǟu{4y=8_m{ɴĮ ߩEGL9G<2^0lߞ:$wx74x/̆hn zi|:")pGwǡ _&u +n׃ 8l*1Ps9؅2w@PðsҸű=mz0uLـ=MtR{tmt}G=_S軶NwPơ" ?CvO^kxM+$7\0؁dAoO:`]hDVE90f7_*'IB_".ދPOHn'{:ߨ7[Po]Aթ$+(?\ : ϠU߾}Snm"<[_-g:SDž?Q~0 8'jmM6c{pΛkq^_ _xc"^Eـe9oE͚bOU9od, ^Nv;>oO dj[/cjO:43fEƒv#LcqtS1m?ۯ@ld=$yfUIYA0Z#Q]>Wۏ.$md4 TEE. ΛZd/KQ՜/E2%<5 baO9<'G[zL =Iw/ 8qHjG)Co T䝷fl 8O$3Dzќ67y>iwyTz:%8o&Ͽ Ie΍ϟ`_33-oJN > ǐl C}@toaTFO24TkMA>NA nNK_ǂǢUoP@8koJ!dD/I~j'4h΋rQha3,#H]ltPϓػ8t*E=.No!}}.ف't@j7 ta=ԛ4gi':^yY_\,z$nMgQ/ePpyelCplmH{P<%KF߽f3uý><›:R$#AӶ5zM ϳP햙hd_4?iR8I:=/]vq 4*^* TkFt=coR?gViѿ)n-Qya; 4z9Mjy xSO8;"4#iڴy;˳PXw?M bff6(A٬h3Q0ۨ7ND46~~[x4wynX3x~9VE.B< {K¹#LWy5?@ӕHz/L-b XY& lܧoߑ{e IG=u@om6|} FzJ IDAT(B45FU˒L}1S?eS, 7Ni%Cގ%{Xx,Ѩ1]74>9V^)t8ܯ]€h˰ϤWyApfh,58{-+2d4g{T[u`yڛDEu&H|OF=WF[;/ȧpf(2_784z>PVy;܎^zB(ދbÊ*UkZEk/zQQE@ EtI%̝v>'~g?gΞf֞Ϛ߬1x F[C1'9|d$c 8Ct1؊H;kb|廒 :;"ߌ5^~ a?E^b51vJԧcW7Zξok7 P/ӈ.U3rKқ{kS41Tbl}<szNZ{:PA<ͭ,_Lwv.7:b,iפA'{c"w;ѩ|#MznJgu4Vc׭:c&16p0'ƥĽHyxusҹR%iu}5;ꂸcU?Z\5ǰX6D~$,% 1d"}q?9b~CI:π3XXsCH9xńy阓_pMyޢst·A2U+)&hIi'YE] 6aқ/R݊/!.$&R [ :$ $g{zqˉbRܲk qt?-3nJӖC?\6D:ӪgDnN徙Y;ƇOHlxYhՍH/:9G54|z s]C%WusnGDyZb櫉F A#@<lJ[׾^uDzNݞ豕?x5p*pؔgg8X!D{SQ4; ѥk駈]Ҡz8?NiNS3Bdb F7\g91D7׹rM!oC:|O=C2~?Ѫ8bcn{%ZAKjeSL6qÞׯLWdK?YF-. #\ow!ךDsgeCw͌L9}(,ڪ*]J&m zm"͹({}hl;-<*wP+d۩Yy1n725y_D^G~S&d?p,u("s-!/@!^U%I35i`+֖8[RYenV:=Ímt C~AmwVs2#8M1~x6hIyV҉^eV7@Rf{0n_[1"ɑf\4OzשJX2!ZzOJ֮3C\Z T୕)zfqW5cנha۸ x3E5mRr:sD|u눇eiKFvӱ[~r@SӿɏG([J×&y.IZ"ұNLD7'Q>H)Dn<)kp:wuiG)u4NOp߷I̛vӛþg˯foʯWoCIsug\o ~<12 toIq߀ 1sH}V>GLZEj'ԯ??6Ǣ(ZIN?=74lN[~0q>TF-i / +(ߣNǁ_a=$ƺN '[D}cs|w*5ZC!`z׺S{-|W77O+u U4 ߉'Od]_YĤi;J7(nˎ/{0-r XiD'dZ{1W--#Yn  5x?/)moBH4G}$)o7 :k5h*?֊|siˎ-mIsUɺ_~9{ %9ĘyAimvtq㼔;7#l̫&__BSb^_Pz=JQo/,;"$bgY@Eo'뷬)6cx虖ቬ RWvfZD%7Sҝy+q^98ߖDEˈ`xn&rU3U]^{b]nd/''~K4'֤4IShlu߽UV!$I tS(΍w!y2%ݏ- <gK֗J~"Y{}RGܛypCW9*sx (ΙC-(M?r8=[I;q}i U&K2߅i]ʟ= $ZA w}q+f;$^A,n7uͶ&foOZn@\U[IPΓ+U6* MG"P_J/F+ >q]G{/\ۀ3*{$ U[gʿ{ym,ETewZ 7Dk*7R+Z7qiD]TWK[?}ϱbDhZ6gףp3}"ߖ Z_ .88=f ΛXFt>^g,_cvg]!ZR_bj ,Bdc O,[RthݭNL1ǟiCfG[ǨQoi "zx0|l+2̣8:+ry%ZZG3w#6ԪmL5[ݓMmb:w>cfINÔap!!x8Ue,YDຉ$-|+r;gU +l]ImJ|?='ͱ\K$]:Ed]z޻b$ ~@ѕ҂6!fي87=D.8N#ken'zG܀4v O'O#;(ZGu:r~iFlbED0)ƆIv!u2a9ڙAԝuts/"/7mĹ@⼲A9!Z=\AL9q]B7YT~[v#O#SD|9iw}"Iư|ȏ-?; 5$uVS|/sY$I##oP$ik@cbu~ybIa}"\5bHwgN|$I$4&$Mt)\t`D8ICG..s!6ҭއݐ%I$iTpX 1&D`4ˈ?2Z$93=W=dNV$I$iugC3;WMs1IDk (&';xIr)KYSf狵VηChMwJƟN˓I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$k}0i]I$I$IZUxa1ZVz``>Q/$I$I$[}.hű8d$I$I$IZי}- 8 ܕ >Œ$I$I-zsYFS]%Q,$I$I$[S}.hkpQ*$I$I$[h>e 8ؼu$I$I$IֵD@`R2'wgi6Je0H$I$I!lkkSP:1y"$&0 +("} 8#[ rۓ$I$IU{)xs,nԼs( 6݃_n DK*Z8C)Ơխپ$I$I/J/qGe+S Z0 T2"[YgAD8X+da[z|qAۑ$I$IU߉o ,ɶ Oyf<ˀC*< XY<"͟)Z(M4U餅3Iy/mN$I$IV[:$ϱҺg$.`p9$; 8_M:I$I$IZ-Mƞ6h<l68KبӀYL>R$I$IZ3Y~V\N܉28z2[U n(9@L2؋L gI$I$i䥭u!L"&XA{ v۩F7c+qƀ$I$I4+Yh$:̳2I0Mfvэ,cY$I$Iy O0$ߜo,/Ynt ;g1Ȁ$I$I4:n~nBO܍=v]"ݺؿ~.묈M&QL6xk 8K$I$I `a$hH N.;bhA*/vΖ>E$I$IƥCc.HM $I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$IVIkcw> KƷ9--ʐ ؿ߅4j>lBa_"ӁIm^< /ixm ױi}،~D$)E Èr`s\41ٻe1D,Fۉ=\h]`)q~a<,W?C}.UODraxR`Qt:X?$Iji?"8~ED: 2/nWo1ǭ 8e46"߯8c;({ plK[IwSԛsY:5(tx=i$I838Y"(.l6OM"Z=sne' -c>e~@YsY$RR٧eVwY@~U8H#4=lUD w<؂8OD$١Eo2eAm\CuEw>eĀ* XFe6V'Y#>J`>Eq:Y}.K7&%}+Q oI5\I\ L2ޚa.p - r |,DV-ߡ߮e>HqCYOKfMbpQҋIӉ1&Q w"WI/>˼Sr p~>1H16h$Ҫe##B:ﲼ:!Z|7Ė}-=k\E}.$I,ǮjMݘX"xفZn\>&cE/$( uʶxE9D ޵m?FLw[}^l0Oj-n׷($`kbi{w3W?RJ>ؐ{ 1~(.>e8͈%zΚĹa 19D=+{M#&mJ!&Eԉw\`lW npE $)q&S;ȿqgax=TJ9},e >ee|=#Id!lg bȿ{(:1k7&8ԙIԁ ެ%QoAViw!S{;H#B3[A$iM"nMn<)FI٪t+S *kOLU6[WPM]zxvMxil %ZލJy$5y<^v>K[&i:^La 8弊>U'$lj*(Z|I2mcgS\Wn!Ljz?kԟ'=Vn{ߢ|'#˝k.8 %I*bOۀu0FZɥt.Ҁ;n^4728y뷇dEˑv$eM{$hn<9&o$nȃDwDk4|Hă˃)&Nj7眵%43jip>$Ή{gSݲ{q،uU 8ϮXo}<7IsF 弙[3]c$ӀDAI@k4wXvI8CNnFt7&yzup~ST.K6&c؜d@&" [m3B'EK- ~en[&l@1ɘkdsB^wM &݇av|g DWoqu>o( pOű;^PL@[wf5i$J~..K]۩N浩$IR 8KJGy*V7#),pLdy"ݍ?S|OgtkRxVM;$dTbȆn8M$ꢼpݹSe?7jQ;?ڟ"nG ߤMI jcSb+)u+T Iu:cM~G@a3L!CG8?&z<`=,I҈0\/9]A< ZILq?1Dghu t88[vkBlwDݸ?"լL_cx[Q #j~{2Ug^C~*a ^ ܘ.1 D7J#"h$-dO 9aEn?&{:1ޟiI?x[њkg.ږhu#Chm0򭛡!p =D;sQDPyݫNW;g8JZ}Ylahn쒶,D 36%E 2 Γ[nU%IR{zWmҞ[b_ҴL)+1ks^SKOKٮ?,Y> pqJIb;{$~Ou 4jK_^L0m3H  Q?64 +}?n]Ϗ%iZ̝Rl݊igd?k6W-rP?EY;*4)CC4uEqzVvMM[tO\/޿ac]Y +֝dTbHe牝*K=G$a z7';ִp7T乥d "p;hez=Eknޟ~@ۙ,K}50ևƆmi aҺ:?7C?^-O1)HxF|v^cMg,Sؓޟl]jR2=2b HσĤe齝$ kՅ DCjZ 6Smy,t loqmek0QbE}츴[]ML(OOZ{HãW4]y}453lHE*7ne*j;?|>?^[0'ʯ)gA$;Iݹh:bhVnԍC?V'$K)oʶ3ܙ] 9a dݷ*>_iDA\ۃ*=SN[n)k]Շ1.ɶ8:Y`}I_ҡ2F x *n@L Lhm: As.Rl BŃ'|8~ 14sjKRusPC9TSyHyfӒ472=3YUP#9]lGܫy~E|{AsI &JUk;1 8ܝo]n^%K=F| m¾o".dyˍR'K1۵N6 b܇;+i4-D eɺ_u U9uGC^d bxtLDFE[8Ϣ|^7uoP{7Qݕ|ˊQ 8c&$1kMz# 8(ܭb{CW ('u#pBq8B %~s cԮ$I҈ؓ?3D˵7Ui@"1gp97"8Z~If?9N~OAR\.y"S@z@Зupx7yU-K'`hNH+AequuDQ"mΛC:ѽĹj DV!E=z;yWI#[>8<  ĹbIE9i#\q8T婲9rND~"Oz,:`ŘHmBs8 ԽDùDx;$]g_ z=.msަ5BRu^ؔD=A1znqKg[C ]$iHx }.XJH[8އrm@sYIZ5G14\~ڌ\x-Ź$um֯"_#P.E$ ΥIܡMYUy"\ab%(i|KǤc/X/sYƃR/ 4TS(XH0@esya/sY$I("%-ӱL?ߢHakW,n;$ͤdxǠN=b:̳s(`ٵő$iWWnޟ-ϧR~ѷRxX.RSXk8n`ldxZ XҒ4T%Fi c7[U},$I' ?c]GLtq(>klK4:'zt2 dw_YK.B4o͖/!&]K$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$IrNIENDB`pairtools-1.0.3/doc/_static/hic-processing-pipeline.svg000066400000000000000000000563741452673171500232150ustar00rootroot00000000000000 image/svg+xml paired-end DNA sequencing reads(.fastq/.sra) ligation junctionsaka Hi-C pairs(.pairs) paired-endsequence alignments(.sam/.bam) Hi-C contact maps(.cool) align reads to reference genome(bwa, bowtie2, ...) identifyligation junctions(pairtools) bin and count junctions(cooler) pairtools-1.0.3/doc/_static/read-vs-alignment-vs-pairs.svg000066400000000000000000011223441452673171500235440ustar00rootroot00000000000000 image/svg+xml side 1 side 2 5' 3' 5' 3' reads (.fastq) alignments (.sam/.bam) ligations (.pairs) --report-alignment-end 3 In cis pairs, we reporte the side with a lower coordinate first In trans pairs, we report the side with a "lower" chromosome first Pair reporting order Reporting chimeric reads Other options Unmapped reads Reads can have no alignments on either side ! Reads can have multiplealignments on either side When reads have multiple alignments at either side, we need a rule to select the one to report By default, we report the 5'-most unique alignment at each side Chimeric rescue --max-inter-align-gap ! ! Some reads with three alignments mayin fact represent Hi-C molecules formedthrough a single ligation event UU ! NU ? MU ! MM ! ! NM ! ! ! WW ! ! WW ! NN ? ? ! ? ? ! ! WW UU UR ? UR UR pairtools-1.0.3/doc/_static/read_pair_MU.png000066400000000000000000000410371452673171500207770ustar00rootroot00000000000000PNG  IHDR%sBIT|d pHYs.$.$* tEXtSoftwarewww.inkscape.org< IDATxux?(A!$+BԨS[گWzNFP$!.+?Xlv"z<]֟[e|N5^c'bK%~2CqcKR yQ%i3}ڦwo mL=ۼAqq\l<v2h 8G gܟsY[O[ y8fhBS̢l汃Ŗ$U.p3?;lIL9lh xs#_Ht`z>1-߰}{p,/pVnZW۲d]8*\ŚWoډهu8TmYn% ?)lxk_S%nhr>#dC:i7(gGNb7_k).⍵& OGj_M Gi%yqJ[lIc;Jxnr[lIaeYNS_U]ǥoQ}17TdųWiP^> ˣtĮ2VL^y"{2'l\ջY9m F[u哶⮭bŴ)k+HJ HG 9e` L/Yoۊ{S8=}~u*~.j-Ǧ`Zuki[lҏs3hwÖB[l9o glXW lEx.V=ue^CiFU/\O] @ &gsFmj/@ư-3,#[^D=eaI6ekhŖBPnq%/zw| *iƍi.w/Wbu9#K% h-Ss׻~pB.Ŗ+g3O_?[I{p02j'6x5k DɖM銟KGZ-fiGRœXny"6}?[l81HTW =wjݮ`rqϣt[B3h@w*o/:PF^mǨQ{ExlczKk<Go--?N.sao(Ezܕ v.M3iG_FCβǖ?v#.gb?>32ƌ8yܷhvHj""n˦ ɖp{ªJ[Q\x8x2/Tȱaa<8 "skQDww*7dݛwkH;:[l*\7m:l-yJƓqRS5U%.sμ[[Q EŖ=aK񈤞\=Bk\a 9Iܐ)ֺ5bR\dvl~v^^[2,ܱW~vrT]$;GD$՟6 \x 0YWs\Hj* {}38ltU٫qU^dMy[öe215Mk?r9 vlŖqRZ~qJmf-S8-=Ckv5$t;.ugedj7֮fN=Bc0Y6BWV66)Fk_7ב{O|Sovf@I|E$5Q$uEj ' O?-k_GH(YW?MDB1ؚ޽nE%%Kp'e~=)\^2y%+mr}7kOC&GLw/kqͷ-?FY-6YoSLpxM ǎE_k'6cf(CJ|rk--a[g\PYcKhɽ|x-l?KkI=&[ *ypB5\EH#ym9<(MLj\.3 $EDGi`Rv3uZXSbD/O8Q暹Y].p^p$)px|Zo5=|6fCj`>;Hg{÷<8FNq6V?=ocCȉdOy֖$i Gzp,9S-גG&ksIE/ؖkgisه{ëeZC[qb˶?f+m#pggKG\PCN7^ۖ,:bkU%ZGppm]D}W{a^Bqu+|4V빃8e"sݬ@IxUvz[$z(Oډݖi9 3(]#_<?{CLjC/~/-1GY5RW6~۔{"LYe$ݺd% x^[YE0"ɞL,o[Ï[lWƤ$潼u4d.g?-)I!/7\cIJ"tOy1}I.Iʄ{9Zۚ"9t~ֶ&&SHY뱦|,9߽l-F2=%߱mGZC._el{l ڻVΥ'mHwo]mʔtˈMkM"rIg. xMF$pc_nWg&4#\>,TIa-a^Z akl[lw&qƏkJTL? z\gOPG[l}i;b髧ٱp-hSqth(! b\^/5."SiTjE[BN-JXMBˬ@tWwmrt8aK-dKHdbKomÆin0/x<Mc7b\ǖܸOCzmBw6IAhyd\\ de|~vXAhqaJϜn=>'4>~:;Fri5V.]$SPbd/LB7m4;b(4=wrQ_fXvM6vgPwX"v\|b^+QAKչzGN_V`9M.Bw$uIso%ʳ㋍m씠[*m|eC^Ah4T7\{FZ,İ]QV%拦Uhka@V&ND oR B{UnCz[/汌 (,:З:0֣n=3u{(7~D=8 B A#fu,}Pt {7no'c'ty2Q+o\[~ºBp~j.!UI`~Wf>eme-'{ם,g9C' x kJ"!`JCMqM>^}*hn(WFDr V5n?ڞxEu^`AT Poi}X4V=~zB0pŽC+^YwVơ)AojN`pӁ肉Wh: ^ FM9>ϭWGj(b3Qڊ =`|c, e:$OGƗ^՗2u2}ٸ{ `tF {E[SOw"h:\ N" g(o܄fc{s9%UFL**'1Pf] @%xAT jT'o< 8Mv'&r[Ò&h}`-}ӨnL4 %gwB'%U2o6>^T,{v#,I#5hC%507gZ3da>AmQKÒ5X^5HJ7p)j`7~5*znCڀ8esF4R=GӋпDoB\]'wލ aM{Q1sQ)$~ctlߍ؁Uo"LP?FW]aI2n)B2e759.zs1f[š4=6E==|Bu~4kdV#ݓpW A0ZCi.Gl]XlKpNQn֎yEF)7_618o-=4Yc;NT{(/XGu/P't1](U;):Ul}1z?F`?ǾD4v8x566s9וKf\2[hznJTG{O#',芺Gm%8Q;o:C,pԅVJ KDl`?I(ϐU+0u/[|d ~B!:l%$*HG=Mduw@˝-tOVaxΟZvŨd$1OyͶEO?QtQ l|!QnnhcqͶ09atn{Pi߀CM]T׽6ґ\;(XO JH$竰'{|/8QaEjl,YK=nC: w.MDspc<|]#,)PRnCdtRwtj>%c}(MЍ n-lԨ_A{?b5%mޛEg§nCOOT֤\IIP٩|'ȝ6ґ CeU8|SQO֋3(;Q] :5>oC7݉J{+P'< $$ iPE֚$۝󀂰7 C Q㽽(Z_#]5Z w Qњ/,G;vf,{:1cԵx:cwU-B GG]0_{| 7mЩlպLQ( *B?6^2p+ @y23 X_QDx)hj^PixzA@a r-t3{zՠ7 BPٝ![(!_k75w(1IT;+žY؍5 . s=݆t@bQ#&HA=a B؂ YH*T4ROEHmC:@؊n D ]|xEZAA[6Ġt7GmHnon:hAԨA2i t^Qޯuv tM G *='^*$d$, Ay6߷ې=rb!D j=Ĩhj U(ynneAWZTeeBCITlx?SPYAMɈ8S&z%$Wx\YIIM ^ot4105VVz %;u'Vir vю|V.AEO8W;ha26:(FiFTzbg7ƌc&xtm q }W*+xp8%I#$~'x\l9 OCQ{٦o=ɀG3_Υ?hL&O)A6;GD&4EuzDHƨq[, ,/>JjPn1_~ȶكΰfN+vrt"C3A|x إUU OHd@uu:MY{[XMs5wq 7Uʖ8_ÛOO\ yE/.QHNK4=kw=LzXGtG54f IDATבDm`#^jg%$ÖBG+vr Cϟ|W_ԍ|ou+iTs"L1$ Gwz9 aŏ?d,sعGwsCMm nݟKM>8ꕦ|zl,7_mmt˱~ec=Hk[u z^_M|>e^}X~& \igبБH;Z"R੯qGkht* ƐO;ӚMlU>2'ߍ3T^>Op1vOP:TrPsQIMS]W s1u1g!7`#rFTnDcK5|>i`:+kVQZǫ̌Lpvim z^^coO-_U ߱hX՛oLJP{;ùt~#˯(&#&o3+!(Q;J(Μpy^L&6]hӚk W.[;Sg]k,3#s P z 0_E@qzPN@{juSgӚԅ[KKwskRjEwS+ɽ84I&ˇf|oetr/{;>-HÍc}c~{;22dpghqgq=ys 7͜F].HJ!+vy"JBBN2ɝYܜvfwi/aI5Qk~ʂZύtᖏQZZP9d`t3q~\Frj vqjz6ƊrM9|7%9R=9Ą65"hh &4Iz謄$>-؈ lχ%e+w2i`~?"j*2'TYi>M{;9)-P53nmn*ۃ7+^CA},޳ ʌˡ 2Gmp#5^n $;Ǹ̑lc5Sq1)5"R.@VXYÉHT34Иv݄DDٱe^೴NyBe ; :g}a *-~vmg/=4LAQTΓ/; BPnj#dm0ДukV?$!Z{YN~Z}7x;EKgz4/ޣǑNń((ZX?|֔i`Mƙw޽zx *g>GN} -"/iN?7,b/8jnήf?wv.8̛~IP&t 2 rb/#.:ZV;Ӌ O^M˨u7I=9wVѡ\eZ]}Xe:Աֽ[xc?,;k{B%<.Oڕk18dp>OC-Ǵ_MdOݙÇp͸A=vۇTlSkDDq_v]V e߿ЙANT)9+k;>.5Mk[zYdHWe]oBym-qe __wv?lюfeB9a@Xa9)1kjԂxaYfoaM{Z{G]H˥3{| $۵vݮb }Ѵ4ɝ3s]lYL4ΉyxSpQ\KL HהM\ns/<w+Svi$ICfn.`.A:r8p\]zPdYԴYwR@ -=lEP&toGM4) Z;<.ڕVP1_ѿMx=)IqlۼOL2Shxˉl:oR mϯZAKL'0p3 숐dYoOn^I|TkM.VLզԱ6w{o_KI.bixﺜ&w wCϬ\frg ݛsQu5ჇMsywFK#-K3,3x]Y1~δ3nΖs` +$&; wF2;jk5h>znF?h}t^:ͷwTi~zR(N 5.XԼBCIջal` SqXd-bjR\ljg'&m+6MoĄ129Ykob!нNbsɷ]k+w>7Pigz_ț^O4%)%Ju՝]Y|JDsp'9abx|9_k ~M|^ojO[Xj 415M75f#֔$DCK u:9a@Bm[sqF zEN&tf\` 0ˆذ:=ѕóM >.'8~ >pETsxtLSn,9!$ M$1lUyEKix_M|^ϺݺҡM-UUQަtV"k FhZ\4-bpf opy<,VN=J"ºb6ϜICz0ͭ 4A #AgiulS{B& 2,dcr5UZ;5(l%J?~AJ?~q\2܁ネyl֮CXQnrҏ{Eó+I\4D>xn * F?l)dN}JCLSBP }M`\9w*ĩD$ImJlyE1';zƐO㏅.O}lX0&vd;n 0gYhhUCgiͮV蕂œN1Z@ֱ %3b8y`i'/J 9L5}^: ϫr.cf-g/O倞psμWd1 ݝ߼` E8vܯ5A2sYC&6|YRh׽yBB'~0&"VU򑡂Ϙ^֔i6ZXV9+#45:^\m2byVf1Kx~?ap?+3gZv`jb yAk Iq+XtXK0B$@,[K0kT-o-(t /p^V1(<~~ֶֶ=oмmbDlZǺݻb>b=_F&3 ϭEYIMFϭ\NUcH/]ip?[KjkxҖcye9nF pk0Fcەߚ20Ԭ-uÈ5=6S,'/3<"ֳRwy^5B$Q>oQޘR":i+-f3+goa-rlͨцdZϨU\jq?tam& ƓD @: ]D-%1\mu9#REC9aH5/u6iM_=cLYecovaGm~=~G^dG6~:MSo&GЦQkgm<Fk|C6df `X_/6k#R3.nh)n >ԁgZ~77*6]4d},gcZI=9^W~ *Kf5kŦrF AyxGN^kOŴ/B?3k/.Ik;"b-Wn6}֎NLʄ񛵉\J`q0 p#u<Ѧ'z Pwsgp1pDc`!V 1ƈ9JKZ-)nSܚX-u BX  4UbqL7\f-T*C-K\xtr/&C|xm6L#<91F7yLi7#h̳#xG/k{`!PNߨAjG13 h9g5v:4-,6T9&--.Ϧ ڮmfMY^^TSw:20^S:Blo\N9|bR Wn 辙;b66LݟשڢG z a1μhE?{p,9񛵉GЫ[Y7||^4˨v xhX j\߀Tn}:8KZ£z@n &4d:P\u !iQR߸PID-.Z׋ HڇKڰEt@1tƂ tibRӍ.FdXv4={ }w9PpAv pCk9Bxo(!%0joX.ODB_R̸hJ|T4@/Hg,B`S K6.z(T3C8pNuhWXnAHd i']o\Eۄ?Bu~ӀE{tƂ tR?E16#9{XƿvHg,B ![┅΋x}pub~66!qKCY3;\&B du8=hX>n &3<춴](% m$nHr0.UI輼s<%xzߔθ`팭mAZICE)ml2G e/5,7.jθ3Qfib tfOC.JC * 1\g7.[ n]B:΃u$, ek~gOfdX^38Hhlމ^={ZQtM-uo^ZlIB`5 >=4SBFƝ >RSl/0.b9B '7K q-3LX-k)Ey 6#o24 jK[Qԝρc v"uoߍ׭UsWi)~@ϸUƾ'O ͭ,q硸%B:]/4.7Mn%;}6;FKS_^BWIDAT1.JU==k<#x5b#22K{pUWU6#tN7J{>jx!PD #bm-@:cA,gcL7QZAh+/PDTkO\JH,wriZs)j.B ֿcP &s#9 WKZoQ8Ԉzg`j,t1P1tƗ}U x#z'; z\jBDӴcn522QU?ī?Ӂ/JUݒZBXĬC}I 0N%dd,Bgת_` 3+xn#%dd,Bgg-05^J9QSȿ:'IENDB`pairtools-1.0.3/doc/_static/read_pair_MU_MM_NM.png000066400000000000000000001250171452673171500217630ustar00rootroot00000000000000PNG  IHDRsBIT|d pHYs.$.$* tEXtSoftwarewww.inkscape.org< IDATxexWր_;8 NN1-oq˘eN)M))lymگRJI@ӆf;fq,lHw$y$#y=3xIEWLK$Ci E12FQEQE?]T hKx)`\X 7-(Q`pmcvV뭁9ÀMK$SFEI&F%G* hNqKS`ik4x)P,#RKqE]yFAgCj~@]“L.yQ޿(JD5((({MK(Jh`R?N&18FKx%ہ}((ԉ ((((Vgҁ+UۅӘYfU. \&@4Af9{-Ⱦ˶9YosiI22{DY@mel%4ex2Ǽ5(0f^kQI9eJ>e;w4={KO(zs/./m 5ŭ%~q¥&( wDc[wmeҧcE䭻ZIKҧ_]=l3Y\R?GvqD' !ZSP:WdWy99iisW2SSX+^/_YŎ2uťIZG''VT[1G?Gk,^vCi!.@C. lh9 u|y\V>ͫTKDnD≷BB:1o .Gņ]Gty3u90Nv=i=FgҙlVC'w.ː#wAmrN>܉ݺsRTQAQEw}GjwnݹewEvtq:[.?Fp\s_vC0y4zߍui9\ie<{v^ <طº\.~~O l3$ \VgW AѸyڄ˙juQ\OM{J|2II^vnbKQc.'\̟p%ފj~FŔK8bRl݄{EJhv=+2.mp S7;v,ޱG (gۼX3:v#O7:v#Py?rSˁgvgs9iEi]srQ36os%)w t0o.A.37obQpQKy66{F:ˋ1o~=ae(e./{H'|E qDKiEIݶK |rA;mr"!Eq OHkҜWHjV'O;U.Y@jV1\r(5^]\R1]o݆KTof[2k\.rps|^L(x p5>o%z wPe ^6T=̻Tl\vL3S9}ٻz%zc.gv8˶⭥msy{b>_ʐK{.y+?JJw%5 F?OZfF\xm&.Jb %sbZiٶFrym.c\00m)uQƒfǒIOݦ!{رjIf\޾Ѥ}/#.KÎ, .5)'ݗtX޲9 ՜ӻ/tTO.gZqs~wo\QYy㯠bh֊W.qQŲb©iҡw˶y?t5'XX`oUow;ZZ3.S?dݬWow=2&xԏ7-cP3.~ȟbХ6b:(n^D?Cfvf\^E]°՛(\5ri2̸,~ WIOJQR<:UJ*+mT věs@VVrSa{1Ǽ6Nگ^|~;\ܘ_U.\#52 LzzsQD!pj=obH!pͥ\3r7}^p\iEt]m-+]̸Ǣ7-Ws.uii<8brZZN덵 ii ge;Ws.{3yqKkdE1Oњ,~]Ң~t;3.k{:ae,|F˾t?F#.JXv7l5re_1C. '9yzLaVݽ>'/˂),j_:<3. \:r%:nޜ ޞi,Z.5ACݴw˿ l֜]⢘b捿dwߥOIZX]d@ch'sʒB8H?4&B]A:M0x}5!n~U |˕Ѥ}>"ynrp2qZ q"c47\knᵆp;C;Nf%~f086dڀ¹]Ϲ9\etك>tǥtv yfQuA%٩ sm咐E ׼koe޶m|iK-iNm+]Zq!۷e3rxs"H0$7t[jx6(E M2/ ^Ml:%#ϓk QlKz&sy:77@'޴ \20ʐ$B\Yۣʄ*9y A\"m3G ĥM#j}l `Z'U[:qX-۾:.*6-.CKt3cSKw5t^vl F\"ؠ@zJB!aB]-%wKk" +?'۪{z)N3e@n%%=K]&][-SosD{+z|hAz+\8{W7pn|-.vZl}>VmIZFNjo^ETovOn~j%!.voeڐɠBl]yԟCHqt-]yfϋ[]LHsc2[vEO0p{κ.c O2eU9%/{9O][I᪹rYzpc˵O֝v3҂yw\oȋ.u9sjfkrB7w)6w2e;7r{d4jU`]gۜ\5L'\?&SsSrl;}\xՃii6d+\RӸ}pc.̚Q="35[SsyU.]Ğ]sC,u\:{H]sXt,Ɋ=Ac7?Ug9/.X',xGbߑ>μGǮsQc֠0G.qy2B`^o*O%UH` aˢZ^X~嵱8;$|W}4~]5Z!.]<%.߽).#O?yʉerլp9niuڍ<#.`dP 8ۀ\ *!.y-nO3.ws]1xMiނS1v3NTQۏ8;nU /CeMf^; FM`g(h|~ ."e _˟iexf?VR$ЄKF./@wQ˄se@:x(=&m. F浥TˍsyahL~5<x"6WlGk<֌\'h};ƟHU=GnZ kZkye<9 h=h3.gWXr 0rVfW/ϋcbjt:bLzɸhEߐ1]m߇mpC]m;u^i>w6^%">z':p-Ks9Ҵt=v߿&S?3.^Clych6ܼKytGfwk#Tvɀ]LwHS.tms_L/$Zh>zĂuiyhf=[Y,8m"M a9yzҷe_櫗tc(=O%ocsĐ,ynqk&D#._'o-&7ݫt-?|\ՇC;qaҳk\H)qYǿ/˧HNcK`GH4yŬ>Fǩ=O5 $4WoI^4;.=N5]z9{q96g©]N,n&C#.O?m]3x޶ /]x$ $d .&ϋbO /ǬObxK7a4Co89f<)d:Tb x{c^4cқpye \l祢yϏ6R1},ڱ ߛ6ٱUz>rJg[ɂdj#.V\vݳeK&]ƸEqT*b6 AP]R nfQ_\ᢸf(9R$Q,|9ȱ.Loπ+_$%=rf\r(5rٸyIib("IOHW lW"8`@b优ryRoEqMVݾ` ?xĘK\Naev=w.;LgfK%3XfDfnyVh#:u=cuZ]ԙS u C]غ%ֱ3eEq')\1<ʋ1oh%p?J]3)5zcEvq)+Njx}>1dfFJ*؛N%?.syp ViEqMToo>{Π˃.g%z YN5+?}ƌKSEG#.+aHuQ Y\ f.0䢸VC E'F\A#.\&Ħ_'s9r˿0L:eW/s GL6siTVͺլ97JZgJ;Bkecq1:ڥ K$Vi<)cp\q;\VS e_Tˢ7M< qh?oX_}kA\g:+ %.tyu|[.;i)?{|gdx?q92Μ>'sWN|:ɢ 6ElۏtXyԠI ǀw %.'7Eq1=N^MU @|c23ry% y%QRed^Ll+5RrSRR ;[iz:py\b[Zۏn'Zu S?dOuƩworٗ'mrm*NuhA{rLp9{O͈=L۴!n=8֠+.qQMмpyY(޸E{\7r{b(d'xx {5 HrRKC d ؀ˏ}^oW1 ~R}lg!t<4[2n.$UrLfZ xaRJQ;2'͇T*!pQ#yHz0PX+x_ٶ5%|?zNj ֞VbK Sh"rʪ5/Izxn+s95%O7/242[.5;!b$퀛H  [e $=5A*%d&ۄ-w!1T LaEq_~ `&ݜZB x5Կ(N+Vx@:/7LBOU۵uĮAꂭB(ьTz3xl;&8 O)?ߊc 68Vq? (ܬHOg*(Bm:ʪZ:"bp!i[V &A?TҝUFkx4ٿL % #緶lYHPC3dPj3'4zcj_y?{3nX,*d~ۊVgSm#z @y-?6=NCWǵ'IQS!: Xwxȗjj瞉@L 9T{d IC:Y*gLaEm9YWK$v|q%co>p!'+wQ~CG'k)XB‡ go]iWl.6$}O@' 2?x 7 !/%i\Ykw"9q:)VlƩ)I?> d-3>>9jJ=ؗ끣)FRFL$p&Xz|305C*P`|HH%U]5%yw a R( Ϊ'6r}ggc;75\MCǑՠb>*rQ#CTϵǣ%absr:=?s?JM |_֗HXNj!%RZ 3|?C*!hVMzajrQ6y7;يwO9!D? \M; 3%HI>,'۬XܯH~<JL+8œېk9΁}Cc߃-Jv2s_sq~:-0-p:#>$GcѤBbF7 wfk H!h 6_@^FTB+fLف'ؗ}?dT}¿(I p,>q{U M$ȊdA&]Q )"2_!a4H]џ? jr3RNR uJ)C3i>q#nژO>RR'J䴍Aݐ.sѤMJd|LX9i+\kk6:PINRB+Éؗ9v7R$x y+nD'MyHJgj_W?6"a1LL+.kډEu.Q:[ґ$Bːf x(ߘ|ⓋTʂlBZ:|V'd m=ߡ2=+>2$u`Œl#B*5)iE+~&_ hMҁb 2YJ| LQ*>dIEt yO2$V{8p'9n Z(!);Zg͌Q 3>Fsx]Cfu&sI8Cbt|UڥV41q1 iXKU5`DXO0 b$Lfu)3~D?+J,W8T̑tnAʇE36|624Gq?atθ|6HGpS%pySXH LcZcWǎ wNM%>IC&' ƙ KB5~UrXz#a҃s 4 vt'|?f%x6#'V`Ӱ2(Ѡ VLXuB6uW #&VxݾKi5U ?Һy2l1?ԏJ9L`n }a!m~Hy\D wk'G ̩% h[pMHɍrNDM(>,EJ+;R& LCJM nljؑ!`Hv͐DY\0dؗY`o(5 j.j ͐R/{poCB,nfU{ aO`%x-!E8Q|ACuR{$ EmX27vV< `6IW/Y CA(\dZD*ÐY-ng:#/?:ч ރ4v. I|3]"%x5fuvHY` RƸ}XN*6܅uqN3B'+b(HZI_#c (2YRqڙթBODxrB;gykN7> 蒕=F8!*6"D+" W ϿC[DX_:XFMgٴHL)EffV''YcV'UnE- ׽45U$1 s}< SbK}ƴH"dŭ[o6<$4y,;BB#63]dVK"N;$⠠n샶Xy5b.p$Xg"Ed"1&m'e-L$j#þ]n#+ώ&{Ek,Ss"DBw~' (l q#HsWFvX_譐NBa2Hw6j -r,6b rB%CDADp=0!eRH$g~`7vIa.ϴA/?eZ4trE'+)MB:CIn@M9yӬN %qWT7k `-"aX,ƇyݍK" Ҝd ԤJ0 \B[|W2LJ9+xmNƙy{WT3-#C mýzC:X,A&d3gvI"E\@s׿mVVڸca]a Q|iP㑉Y]c IHA:)bGB*\\=MT6 l_sI; h#^H:y8$o ?&2$w#`% N |s-658-&^1 )G81"73-$@f5\E nQ,X.cЉU& %6x1J'~š$v62hop&%ƿƼ}fr'+ݰ΅&T) 0 NQIEB >{2HYJ*:_ANX!F<jZ$Eʊc p_5/RNF^ĽU4.8M+BdUV?.D?B'K>Q}5EC?^di$ :dٸvZ &~4ɤH(0R] M D_pOlo?mO|2Ei,o# n/+ZF~UE\L$:=fUavo@wǾ2F?Ā1 !,yʜ⾯a%97(Q#Ei*wS #͐y?"u{"A,C;Npu!r h[p_ckT |HԌY}3.KΡ %W ?'6%V G4E(͑v}D$OcB #eYcdOI2= W?j_~lpR(8\S+%~AEQC R?xkUY uz'"kq:4ni "KR|yF?C欲(6. 8n?%p 2WԶq>_\Khd3:($B̹U N,! GWZu%d#sV%+^ː%y (A/Yu|2%s+q6(xﰾwEQ:ETbh) FGw}g!H}󺊢a+NAOEViJ.EQEINr6c=(IP2fHp#IQbCQf_ <p g=pt4}(J l*uH:s*Yz$+ÑDė e׾)'n@c+(p>F6,;Va5ϩgJ +] xץ4MWQXO/T.8WW$ȯ~|c(q;i+??"t#1VI"7C:Jh(Ӆ`dAyE?=" 7(X[{vW#~(&Pƿ?P?z$d+\L$U^BW l J9ߛQӐXzñVH~Ua"}Y,i,nBˢ2m`?sjqXR%u7nZE ӐKppW<+us]tK =ҨDӰqo_?F"Kk>34}gN:^"XG.\F*7%k *B+ꋿii~ IDAT%a52$7P3wgj1rRQUd}v}ejr9K s+ꇿa<8~j(X_\?uP q EZ7Hل_Y|!/;dɵuñ$O6v2/||gґ/L 1) li%e1dA/EWzc}n0#HC.$νȸLrֹj%H!4DSقhPXx.n0rxg%_MarYWL\92R>ك~QJ)e\2-%yH`z ?nXHE Z9j_]8:g $:eӭ H=ٟSd!+ˇ$&mƍX+8'DP{cG`yzV/d}0(3މ:?j". [TzQ"ն}jy9O[L(qC3\d nD B͐їc%( ʳHy]V6XX \Gc|g aUb]ε{%hj(]}]܂u2 (Ɂ (=%2N=)$h/6Ch3m(+J5ާ,oGhrȹrbf~ z4 M DBp&!(}hx8m-E:I>`)Rrν7_"s25h%5]g C:Δ p0@;hCOjQ͑B;SG$;+%>}H${>d&n }D5b^HIXI:nNˤĜ,d .BTv w^G:A(u dr)#zvH"`NA 1㍣Ers<]E\@>2cl >n\zX}@(s<MdP[Qљ@ 0WugE 0rS ϐi :vQ̓5ЁmX]msm_Fo"AM?"咉J6HXFv ` Ϳ]& yi%tFG{'L#[+(" 1' Y%8~cͩ՛ 4ds2Y!Le௴|1?Q LϤx{c<+=)`UJp6!8|18n2;>pWDGaXs4eS%7d"2'295# :vIt L"a%2!/"m-\xGr"( uiFdB>̜48,!t}\NºHGv5M8 ÿ(8:'&Tu/F##,?u(J(+Wv5J7uNm5gɶ6,.yôHĥ[N`",Ʊ@'sj1!NEQ9)_26fZPsh2:֡cE\bݔOvHӴN;1DI$lswa^ +NCbf{qӉ(t߉H:p*k#15݊5O$jNX%]ϑ<մHL($n̢CGb$oD(2kZ"r6R{Nb׽A:OBB2n_Luh, DX"nj5,$|5~D c-eD S (au*YWD 1CFE^ErrBlŲA~g "1)H!w$1x xH[-;.8԰K48 z~yY՘d#m"M2k(8RBD:효?Eq72?"$(Hh2:> /  p2H S9lH\}KJu@Wy8~4AC Li1!!#3ˑq<tBL|gȽۗІYb!^0 [ | l,YHRxxiBYRH(Ne? `>RY u>9D,D 9.D$HrDYI^dkXk!/t? Y]4-*YK|@$dN*( g;XvE&d+c7Qd~5(IdpC((ed۹PGf~^Ƚx.+ZQ%l@LDF~lFbFVv6YHGP2%VEW$Xi8oxj\R ӀO_QE c=&$ ["  $PҐk.4D(VQUO$E׹fuFtdպ( :_DVb2"s[x!uj NA_"3Ϭa!us;?e+CC耄"sbM2PEQC ZiZ7H Ɛx,̀I^JΫ5&EKt,Sܳ5:g2)ٴ~ ZU^{|׋ǓBv.v(/TSM"a>'vnZdRҭyśVyߦejQd@l}W &R #$_=rD液+HnJf~ǐ|[w1oy}%n=SgSXST-Hx8C'Y ezvWT0$5ss_|Lݸ%teh;h6|@vZtD˿7RlߑuAπxSGl9`@͸o|ss`}:W cpeή~}xacte6_/gQR)xyݫOثtMO=ZwkV|*vd&v|<ZCNI9Ľ]$97!ԟ w.JSRhT8{DFՌ4SRR|z{x<rBfKyUIZZ`mۗ|㟣 T6JTkv.*Ys'%xd^/wdGYssi$k ٰ{7>yFx}[i)+ wQž-#JK"U犳-/l\H!-'RO|^/)除\?II%-EH@e.G<Ҳ=f+?|nGjfr[II[5G0+{elD'!>x6to[ԗ uO,);lޚ^?`3j{[ط߼V= sxۻ-aͤN3U3ޣSHnֹV h"eҷs,2i- SA lmf4`չRRzu~dƘ'RRxHϱ,TbRzq7y߾dsWT͈~5'|}#3=CnO虖Se%{\Y< MeW^N6uH 7o $L7dd\mx.@:+^ndS*z9ۯ]!]+ w2v}IFe5WڢBnEٌwϟ?OvbsVTj3‰B?hjké#FZmAZ/?*YӤ>_~֧jnUɣqЧ~~;aСX: x.$Yl1TAC0vѿ`d}NX-ZbA18~xWdry效n5cvp8]/maףmcnBbVuP'h]>mAez[Tm|S7X-meY+O_1M3  C1#.AoQR L&>ޘQԄ3Jq焉n XoIazCjv=Gyg"q jTٽv`6J1/8y\·'5--x>i7A::c7Z-FYQ هPPەTԟoY+O7^>7ڂ8}U&䬒 75B[S=zH5*詝;ou*Àh? 10]` m-CpE|خOka oe iz)} cbG}kRcl`<.B7S\w)ǶbmvMK ^/OyҧHSGFJ}UYȩJ6[CsQ,|Ҁ@աq'c'Ҁ@[c4hVVm0+],/B]q>07 73JA|a}GUNg@ดl[+KPM=lҀ҆F¡98q3Y$ZJ'G~/9悛82a=K  }Hhl 8 @D=<_,wc!k_9h(gkYGL*uMgAKQo;g^Wf6&3w 6Ǟ:ֺJozVoEo$/=^+z$4SvĔ:ژ giiDΚ'h$b.;/ghܡ&A& 췍 U-Kߍ6Q8IZW@?mćF{'G4}:Lץ?wQC3޴V|@=0tn'Fl} MBUfnM m`q$ۧX,xvo_"'w1 E%z$%_+mAڂE}-gݐTD9_o܌m_Ao7UʺoC[/~= w1B-Sm/\aТphlJUuNnU>ŹQј1Bo>dק^Sl)0}ܓ6ISs{䘮2IRE1 )H}SGlO>/G[y5X+}6u59z#(=$ ?٘E;o FHa_5]`vԴT >Ru}Ö9Z̩7EW+E|;< yḱ#5:ij䋤ʠLJU1l13ڂێv {vኈGEz\^i$%_']TcVAn Gerl'; wd ̦yp=-;\}u"NENƣFz/ǡ /WGģ!ӝBcc ✚jl˕O[,O9!:y4bߛVc8&NoZX_YҌwH@iqB)?+? gCs,I<[V,KEz|nB /߫@-T~]ZˎgMVꏢtu_=A?'8VG|~8kţҌ)30tiz㗤7{k|>vh/TCz5"ljG}}F :ːApҧؠ /I3ֻiXlܧX)eES\+) ҧc,ol;~6jbX8΋6&l)k͸USaҧX)vKNHgק`)=\ا蓲W=KK1!eR{ɶ3g#U7I"#qՠQ9B7{-û{aDڀև1y,ׂK:{@Ez;<ʘQZW'w"[7> $}<ɶ}Y0o±_ ovX58UL@i hS IDATdlq?-LD;բQ/l҄ĥbT`6Hz:_xz']-j+;Sf<(g?iS *=@` \. ydݩF^gus3/m,fhZ()xbH5f\3"0 TbXQ$3tSG,!`uJ( \"|QWۨ+2.%P33fBb'HjPW hy=eE'gC._̂Wh|5na}fqPSKUbH}sqj%RYQf!fp33C])X>+KbۍJ3{,b!qc^m (1C{i'^߀`_qMX*ȏ{mJo+crg̩wЃٻ.: |8X 5 7Pj]']@OؾjpQ58hp^*k vV5Mec.w1W=$Qwaሻ\YwF[XMu²]kh\hqB=VX_@"UgBَvԹxk]h :Aټ=;b[*; 3^gz&Cˇϩy-L^6 mV3ؔǯNy`{qnT4`Ø$&N?j?rÂ|}3{0ud^gZ!lz-f_":-ZI0Z,=fhTs>'! rWEb A4p%-|n[.kQ{`A"bL}mʸ-Jnm [V?Ym3$^Hsܬw V<X𤋮@^h u;MJ;ݪyZuh=D 3?#F`Hl?\ X [̐-J&=fhNBK&/>s{S_<> 20-j^ cS~.ೢBIH‰Stu)c9?OSX&)f'b]1X}An)ݶuƌRG@%_"lq*d![\X?rg_O@z8*l 6 Zj*ICT6 ?yYLV W }CW\wărmVw!cW;ÅeXH^r{7_ X/ۡ]r;c(%m d kţWAH}__h3Dl'3CW5fv3H/K#Ԙ; k|}qx˳Le5fh!5`Tp0T" zC C7qc][LmSʁ}*z@B{aGQfȅ:ݖY>f(a\!OMYd] 1>n#]hwu0Q$ ӗ1C:0fȘR fzjn }ՠ1-&T * z.xO dLPs ?h7f}jp1O ޭ(MB6R!,[BDOw[j@ W@g`?caYrQwXmHLUGDžX XkPcLn :p;XGЪzQLZa̐ pT $@Z fHcuc01,\oy@ZQE /R" - Ubh`Dz^~aBǝ.%_c3'T ֗Bܙp9f i:ڰJem8BQLp X^{׫n[FSy0b5bЧxA*}S>3TM>>g z 1CrqH'o29BoGOnaYjp4uR58I߻caYr (kcxcsVӨP5fbG4ǧ 'o ,YD@;6*haق_CNˆS+#WcߕX7g̓bښꑻΈè nW#O7AǛǎt1##q|uNrj hC m[$hI$hrш MG]kK7_޴:]c12#yv4'q^F *  9Euux?BzrfFo7ץm<_@Ph%W8oSh~Gi!mڡ-fq@ N=х%o;NZfY()+)KHOt4 ҆QFbKA> } [̐hqD 2ɳwj(sLb`p(f.#wݓJrOIU1, 7WAr9} ˒v@ѣ,h TKh(3s{zm~:Vo2CGOvU3hQ6wa/4Z ܩ²FY1tiz҈5N]O/\XK@*%t AsUN0y4xA4ϓre1irR5f(HC0SAНHG1C䚱hT+mG1CNieJ$J̐ H9 1C4Ey%36=-F!Ezr8˧z[xHok1COIH}-QZ@{P)@vڧua4F'f?j|a]-VUݣ -#)&&kKJt klk?|VT_ۮ B̐m-@:)嘡/KjYXۋz|LHZxop $,,jPXzQ e?lj9y;X Xը53.B|}wO܏q&^9˙!FIoLJ1C?[%@(f6'1CDɑjTy"e*O">t.f*'1CK O+բ4|v[crW$Jߔt.]m-fH^=iFP[+V1Cԯ@h>HSt͈KP#Pd\ DO}̐%_R̐8Qz_"9[˜X ,(f%fB9fhI3rqNcZ^Xc6=F#ꜫpOՠ;; R z[:Ko[Z֨4d8WCTaҎ gt9C=ízvi[;w| L.}UI9yXȩsX:{Ro;^8,-uo/bG /s^bnjR"nR#!rMI1NVU#aW̐Qi=1,fkvϥ!ʄn"ye yZ[~cK1CKŘ y>>ųJ&)4l)P!vK1C,-yRPٌ3J"}jiCG98q3Qcԅeժ oKEb퉅ebb w)3sV jЃ*ԧnw8K'_nl0Kn!| t²%Ugby?YZph:H}JiI$UG~HoG}cơ #L?h1,a175fkr:z6C>{Q#_crP]2Z:~_t(f(㘱0 Ҥg3Ԩ}~kj$#L~oV Lc3#$ΘY4W㦁q>F][cEҕm1C3 Ԙ!AtLpk'1-mEfѧ6ᖱFPQ]]ݩ!O1'! >+KK u S_ؗV>f{B__1_(f(M.x:c3qBtᛙBg@uEP ZZqhaܵOt;[ΪUYGv} oc*KQ'zAAk lyMFqZ5XuGIo vs[x>@JڣgۚXC2k&_3Cǫk~CvwdqeRbVc0P^|}hߓ#;7}vN1x10pWB' 12|-V+[[-#bRDdBLQR҂# s']-\SҊZaa 6bdafd`>؄a~DxM C "aE/|Ld•F|Us36A$A]7FpNatƭ#D"^DЇ0`rV =ʚ;*w80í$~ 6&V@X8Moo}S*< Kv]k)W?F6#{1, \Kq3HEs7Q#Η|ԇ_J}R'뤙{鯍Uy&^{3Ae;SЀrڰ0ϢTX927‚z ʏ\ 0ĩ? ^2QB`ycf*@̃(`,`LT_ݟI#(ʸgmmAѧmI:u|f41G]x9hٳe߯ۃOD,uB,Q_pۯíz\#"<@-8;! bCiC=.>ܮxaCl6cz|P<>k³"0QXb" I(⾽RZT*?ūSF#t-.ollc6Ʉaa8'*} ~#M^7i`ɗ`ZD ;vMnR?" 9k6٫!Ug]zTŗ/ 3HU|@Pd2 +lE];'ikEoIN>BqZ7z, k`[9hn(˳{?O^p1EJDaPhx.9쇤ƺEjD!yVpxDPo!Zo4Oz[m(|ш ZqfaҵlCC wKxo=ONxu!Y53^#XXndqDq0މ'̪*l7%ϋS-mxNV|~oڣ5|)ດ1 QIٳB~70w ~cT ZZEA} R&~WuU#{māJy-1qHfT5*k٨ {$KN%pʸ|qҌ:o/+O5Ϥ/LkDهPXgTX6>!,U,cMmmAD@ mu|ܛ6 |#vm^V*E--M3` *,$> #ONga~J7_J5qTBo|$, ZWHH;)0k\~t)P]<+q}0 NY++(6,]KgАሻz6?5֢Т& J+KدeRӃ&Q=GyQsil< xIXx8:(خiaaubX#M5W.έ\c亥~>;jeesD 5*+IQ]==&ƶ6}S3vKLKxN?c_Q$-(RsT `7 $3ZmX*C{0HȆZW)6҃7cn:܊ȍ|LH^nI^|36<]MP͸[oחd %/|P/X,瘃C)D+m<$|(%JY#@ĥI)O0(Xh:VB%fCċy&aKӍ )/-8mTH9)@C횖r8y93vB@o2,džbzep1G]p.@Ŝ˔A&ج-^Vx1A0l c/M0lЇ$^!|Dbپ6>El[yDž-$~X3Ը§%j\I藤N%?) $$!y.9b ?=V}r\%"_<QACycN&@lmWHާMn9Jۨ3VUa\& ~!cwm^:1CĖ{/\oٻP&|/7#g爽N 5²W=. M, l[<=7 WܫPUMízq1dh#"" {~^mߣPs$/3RaR9W#?1CY߈hcxΘ=v1PF̐s&RPh1C yd=~%|S~w x dLu WZ6 f}R|/OJ[Ԙk 1 ӭ lԼ9m_#&_.]$4JYI( $ՓPNCGAbhf-Tbԙd(36J7o/Ycd˟2<g @mV+V okLQ&׭ʔbnw E`},U,D IDATJ,uJ,sJ, {Gň@F_F'\svg78qBUXm^~v1CBU\jyZ!`_q51CBF+iP{>cĘuB)f(R̐'}Ď6$"r7ظI;Z̐0fhR0뷝 ||61Pj ]r '/ k9K1Cn`gqԷNb;ێc##XV 47W60ݕ&eJƳX 5yi2D[KQ!c8X 2MTr?,1WF}yz ~Ug90q diK4hmn/e_Bp!h tx9CQ*>zB ~ @K@ xQX{bd` .I Oڂ.=>ERP';ڹ.f(i8`9 >|AGcs8ǦR~o>b̐X9)f?, 1ެh3a%f)$AцP /ցccb>yWuőKM8 aivo(B6"r#U8bԘ oq3ȩ v3M4S{WMڍ@f wr_k >.Cxe x;Sh $Vw,^̹;Xx@1ӕXcb,bAͰM^ T@qps:QhS;>X r,@J,PMK ^;` *$ Hr Qji!蚔1R,PYCUb. m3Mmf3n/yGJ_BOPT νVo*2c6/h2՘\бO^FcpiА1C߭+#WELO(@FJ?쟤5Ř!j/;fzo4 I9EZ! U j;ڐ\;b}f?д+ a \# g8$3t%h94M4Hk1d3ݎ^ M#-9!r|yJ;@H1Ci3t,}1&כ"avSE `3th#Ҍ7O?AZh 6$"r7al܂ƹ3/z3ȹ)Pb"}qޯQvQcԘ!%jIJ̐i=hp^k]?|k3Uq(kw].7` .X5bBc9 kTݓqXV<MuGƳ ,HmbF2?Kx@'v m؇nX>(cC>e]k ^*Gc ,:+\_qj노³"p/!fJ@:A,gEj,QWcqU<=h(f_dF^}!K1CuOȳGvnA36/Ĕ@wXx:]~ X7X!C1#.A_*4? wLO5McJ\~شXoq T].9<ECǞ!u  ME/ 0>wtڊbHWw ^̱r|3OIX:QA@^Hxɶ+4*W>a!+2 L4ƛv ?Cr4Ъkmf8g %:..hC^DZ@- %qhB\XKXV_Z$neFjqg ċvɗ6 EP -=3֊UjЍrz9f((z4"Z~ 8gxsLƛ3p6톺5;5ꂛ8ҘՕT <ӘW&T^o(Ti1Cy50J̐8fchdh(F7E@:Xƾ*B; ZqAQ/87 Hae:hу^mfqN}eqZ뫑A[PdxDh1Cߐba[U_ޓ e׭`yP SMAtۙPMK ^SbfM7b t^(Lb-m{?mL 9]q_t ^Xes{Rq'a\@mۡ03eCѦ+1P4f˜ 3@b5CD @dA^zGgQTm9Jzӯ[:n=?!ݿl[)S`;47`op-x轇Qu* ozF `ԑ5DW#A[:ض'U6 +~BX9n~/lTHF7!s\+sA o? =6M*X]o돛2Sh,GsXWh `thU8m?\AR'M?7j k dVUJOj}fAC[+^N"lV!X Ʉ#ǷިBʌ ~9,T d5x◟`ZPeQn_:RLhj_~/. -N}, E0}td Pg b.cfʱ@yI, BKzjNp!@ۉ|h5.ЪlW  #:'Wc&G-Ubq 4Q*kj[FCqy\|7_aق/b|0;! _`_^zil<66 Xm  -V?5QlYüddWigĎ|fv)rY/''^kȩǃc7 m5̷@҂?C FQVq[-8?бKy0B)hv'Т-ShGV+Z뫜n6td}Eɗ!0B{5#_Հ{QM3qL I: .ű?Uf[umBƵ߷“Xw""|A9PeW^aj>w:ٷՂ\Бk?E[EPRh @8@>?<0g [:&v꬝q.$V7 nuCG{QЦis`\MӭЦ CX_pӭ79~]k֦[- N܈ XZk(i֊ XN"#^зZRWZVCIf6-VA4Ե͛jV-UxO m``pFȕ}Q8ߗ Z@ O!ee@+Z^kmV+B`Y hlmEk,@˰onf6+'Z,hlkZ)O8Yڏ6 lk!j[Z`LHƭ^v 9 EJuS?㼭 hkx[B 턵wtl9 ?t4ТSS2W6BnUȁ`p&B;w?s]m2^$WpKߕ>CuD \GAHP\" yýI Ϝ8gah~_ua?4 bQGµ^&:YD[mu0IN~BNM n="6|p_ Hp1V;m!FQ6(n^p`+}m}K} Њ;z`hqBwC^{:h1څxVoVr툈Cqn2fh=ۡE-8۰ڬ˓} DD4hqm?:֡xXɕ jWQFO)>7) 4 `1%蟃?DDt zhz@c q T'@uGp h } }~B_S@ :"""Ȁh6h9Ͻbӛ|?<'"rOz \ТuUL6x@wR/ Qo2kPכ `r =(DL±ycV±FHS[ 2VK$ ߆WZ zDD۰#""հ?ثGDԻ^=""}DDv5}DDDDDDDDD370{ep$o߮A:e6{Ȉz Qxm$ 嶇 c^:<"""""""""KAxkEz{t90Uil;5 ^ p<|)}spr^_w+ <X徭2DDDv{`\(A] ^o t徟Ѕ> +yz7:nϴEf{`%ٰ}?1q]I&ǚ#O `qWwy#zDt,X9d{G?Y.7;^=" rt{d .fx&~9RwVNǢ.l;@PWa]8,2 "" _;'D];k'NSĻ~퍔'نۉw+n?"""" `# w}NdzO"""rs>Gc} Q4πa/u;mWFG/Z7_Ukv"L؉>{jg]'^ضPpP' KŻnáZ.U;~ox蘈)0U:~,So^QW}@a,60CDD-@7ZN=^|~{p'۟`|{ !qa25}݋C8)0ń+-CDDDdcS\dۛ|_Wxvlfh'DDD!/<[b€ΈR8A""\ {pk`itk.&O9kM*0 |֍}'cR~{HDDM/H-p` ;vA"0g.9 PIDAT8Tp>/F^%:#DDDԯ4X~{ x .Z\ `n/e?ȻZ3anG5Ovs}-Q8(@D9h(b/]k 3l'f8`~ h\ |_q!a Bćq|{ %u_bށ[pPﰸxub>Jq8_oQt`zQ_>Q7ҿ} `_k LvݰϷU".`["o!QUlo-n' oR4 ?6 JW"?gV+Ɇ#j60NQ_o+8rNO} 7*Fn_-σ-rplD:#l -Fȝ,0f%FHyF_w9(wT9iF6 #s :`ZQ 7ҿ crni@`vqDCod`xP}$7?0f;@ߡM8Ccyx8eQw#3.GO^:ǿt}@[T8v* M0 @Pmc!Q720| c ȑB -fXTx*q߅V6;8(@V X-045ܴz;yڶ0_}D9G'/Hd`oVlhݥ{~{ 7~H9 ۻ8_n$/ =)CwF$VtC0ꥲ CYZVeJtZ/6˼zu{8qִ;a9Μs+1r~`„(^0 I)?E(6 /3*:-J)]Y"5(g$}`@0mo88/ɿ lCR-Y"5( x+IF')@R@Ѿm'QuQ|H 'jGQlY*SH_#=YCqfpIiZSFȭԈ8 F9٦GB8Ap.!I9A^`BmӖ\VYߖ(=Y}qJA} 2'$?{+qIza۔Pk^\[7 r;yAR 6-ϋ^>Uj(=яϞ)} 25m$ _Vkem .C23jK+R`}.>~OգRśBBCRtɯ3'E(vFI~?Z?~'KoCg5 Qއ/^#]Y1x4ʷչ\.3,~,hjJh~g$~Eem)݋[OgMa?⢻ %yG_K}<|n:k<|6IKM3=!q(_ɇ HjjkٰdjUaPH—{ަt5$Iʇ t&/I?[)(ź73@$5ݬq?= E{ 3åXy _+ " $$I$I .'fHԴ:Mkݲh"sF!pBR6I2 NtEsfej}U}ۣQ]{kK}Ӏc3S=> mIQYgwUII:"ӈ51 zFM]3jꪞ#.n,TCm(R*fӻOqYIek3HWfX.ӈ>r7s>?. |Q8ʿK$I$)4/gc,=G`0!$I$I⠀6kA$I$I!Y IuVAH$I$I•n~#$I$Ip5]AIENDB`pairtools-1.0.3/doc/_static/read_pair_NU.png000066400000000000000000000241271452673171500210010ustar00rootroot00000000000000PNG  IHDR!6msBIT|d pHYs.$.$* tEXtSoftwarewww.inkscape.org< IDATxw|׽BXa :qօTgk BUqW[jZZ:ZPS3@$?νq$=x܇s'$sg|NurH5$Zy IX)Z׃Mԁcyz&ǫ| ׄs)Kz8.x=p=B.mùt ¹Z% 7sHH%SxIMޛ}Ns I7ɹjYsn.LNKY3Nqt:h\-Bd{>N.~$?\:s ]=A{6Ɵy_tFox |FrhtN*,-a -2e/ؚ69R٬jg{<~w\پb. ec4!ľ#(> ug >A&E3|xOKq~~z'ׄ$ԷIY kc1sG \GX`'>\~4z/H¨\4se%Tji]24;63o*w\:f[iˋ0w*ˊHʼI(5)1hCVr()bTl( qC/qvN <Lķ }/z3-2_m)70W4Wr?~)[ HHm^&.TV0K(٘orIi^61υP% +&2x£$g婫(^=OOOˆXNrh+lVw?t=|\6<ݎ]+Kcp' Kq'~wB14sc3G߳mMu_=9^3*O|:bi60nئ=ncmDc;,"m1$7 81iޤ I$ ə|#bKMOR39#?B019fя1c`f(/O|'p\Lv e~?{13mx x_YOck-=.{{jΓiPe󟼒 +$K$IO^E3I*'Y$5+܉I\lMzv";, Ƞ&}Z^4uٗKJ0lWzs?o. +A[0kmΥɘ5uU'Q[J JksUwh4̪b\A3&Ҷϰi'n.%=$՟Ȧ?rܑWЮ>VrYd6|λq9گ~;NߓP+fc<sE9A_b{7162 \m'zIui: = wޚ/;kWcA#HkOF1?tIdW{_tZ݉~5[Hek,jb~Jѻ6USe)h%p fSs 0gV[ȥ-|^wɯM' +h#p,XJ7}\2,/`3jyTg>4SGջ%cf7;; rG6;TawpwRpl~\HdzׄǬU3#^GW4LIHyǵR D;v ѕb)EFW$X~dҶp.i6w\/`v*N%gtb"; X[ +tdG"K̸\f1x$If(x2?qS˦ڦc2#LYH9|<rL ʟ,nA LYry {3Ij fOaŝWPe#-SoM.!Ch?F}Ԏ= &eك(IlҡmU?֎>ڴwƯ YYՕvu%MDJv76~+wYy_gz+z̆ k`JoF؜$u9p'wXxބX nۀ" t'cݏ.|9X_\S,y.9܏ݜb!v8z7'껴ij0We3&9N%!K)r*Ҧp6R &Td!BT]VVKBj:+@EfKKJ:'PMq'֜rIHؤ\T,qR4PZ]BT(]5\Jb6HWەq!و4h%nìh- Q$ljt6t?v)fH4V ^$^Yfa&wWDĚp?V˱"`9 L|DDZn$S.yFx+p?f{Yv ۟j9VĆޘ՘Z!"--@\u Pu͍`? H uUǵs)SGR^mbH0!1ivӑV$BhggfW-"-@̆8;OZ L""-RI8%k9Fx6fRDDXJl:]@c""V A9˗`ʯHv s[ atH|xYs l?8b^""R'~l9037‹1m&""ܫt˹Hq[T>L̘ġ1WZEv.^H s1+dG&# 3ᮻżDD \a;˹ߛKp( wۼh-jSU 2 | =3}ț|; [}F_jnDVx⻰*&x6p$rXK:=;Q+p/ˀ MHX5͞=pe|'KIy}(/ξטly8a,isp+{̉{ !Up)bŒָ'= Ǭnc8?(Aɛ|sHLxR'3Z9sY;)Ӂƒ+wU1OD/c锻(n@~MpErꘉ\ D`kmƹ0XYOj0c6iQ6|7;q~GZN\VNr^{p,_sR*}F_{>'(٘}\~޾|kgĝ>6]3q+ `8-!gmy e#TSq䌎~^_w~^m.r>N\=}{x򟜉a4~M S9 \m53+ ܕY9&"Ҵ?xu˜W+RT'tH2ĕ;KX:NҾ+=cԿRQjswڙS['uR:tkVdR`0"ĿкO""p+|Cry^&Vj7غxBٖNw RxYl='v6rn'[sdDQie,}ݽbMCϨ% pX ~uʥ+X~n?P:{P,95SlONA`J{+{?aj SewˡgЮ>N\Q79cO{KMf Ho=Wؾܝ;23:5g ѻ8ĥ}qu8] G^Vd:qy6w9g/TIS29DCZ " ;!N)+t˴.}ss?cs@052ʋͿ+IJrW TQ9CvU=Kq`Rdzo bXGtq2""M#jrEvG5#u/yv*˜xͧ/Pj'$w;cbV}ϺɹU;<1#z "Bd? w5'pR&T_} D3&TYa&dyum ;;ڏnGě~Ms>i&-`UA`灁1NFDI;fwV2Q[(zuEc[|?p@0\7;r_wWugKݪkVbviR:tq:qo}~w.bQ)왌;%{xg/Qd{xGA?~?ī'fy4""-FOJ'Ub_mrԶ4%Wߙ7_|ΰsh|\Cܶ?ĩў fKobH &4rV5 J^wgw=|owugvEw_a4'6ٷ8qBEug@梫95ڜs:P0鱈HQd?W;Wlix}+ Rew(@/ug/y.( ]/S??\ρMvvӾE0I'()։2z<9K^;t:Э]]-s$8qU}ԮUV[}WM|`X_N$;7x#`W{Cr;D0)շqVz6S2+;3|:xaF{.n"m;$!xZaHO0iCC "s= &9T:wz*U'+}̷b~[,lw,;SJ3;v)ȮG[]A~?t3<{U\8i'x -qfBDD~5xX&P%[|zѩz3\! L UsFnL}e)/ߓst.uuϧÐI @01,grE6id19ǺoPb~6nt^sL÷K`p*06緥HiW#>A;7%c6GqX::)PQ݉{ !x7ou7NLJN/2o+E.DugQJ75ɖwb!_۞ÌcG0=WY˨i$O_*xIDAT`A^5>@X4ΕqBJ*Jؾ|N &=t; 7PV9&!-4(ȃJPy)k(߱$}\/Ce*)^kmYGey%WX1Ĵv}եWQ^yt:ݽ2>3.}!7s(L{Ж_X̫DczyCnUBDĞ1j50o7^(++Ft'6F9^`QTLܡ1+yO='v"Mdfطʅա5$"ҭve'c*?މ*uS, 7йCEDy x֣sm'HaQoH N@NccrF8~} ,""0>M*"+y@(jF"0v1v$m' "g%v""q,Hבfw`!15""u  %+{MD09 5'czEG""uw)/5iEBHݴnm{ ZLGZ5""us y#*CI""G5p{H \""Hsiҁ$O^צe@Q$G~(JB]l_<<6N6乭:7z^l5zkk/ >ŸTrG3VV{I45"" z09ӑ*\ tDC1Zpw$HjMc-#Hó,^1"AS$!5b:e*nx#5}'"u ҺgF5v~'4 cHp fp@V6C7Y̥AǏQqi&"+pu$c(ٿ8b:R9Av4%*UHm2j8T[p/LNSc?4g">di,wXʣAǏ8:s .,~Vʋ!ğqB+j㇮Ebgq㲭X,#X<"ڷJSc,[>?~%YLGjq/1|o긡8~Z$'Tɢo"TQn7#V௞ R.8~LTܮpi0'Pj!>|^6R{}X⹭Hq/C$Xփ_a1A1f2WxAYTV{W5""5+.&\驲E/nFRE9o_Qqz;c~o޲ԠS$b"F&DqZ5""utx$yo|Q6S1GcߧVjQT.UpKŢp' sZrk0k};S2i>nG*4C2nZ2x]?;k2EW@:%qJH|Kj4C03Q9e~ B$Qr8$T˱KU3 S!.hc_ H` 7 c?&2t!-'{d3!^2]K?şA9kIENDB`pairtools-1.0.3/doc/_static/read_pair_NU_NN.png000066400000000000000000000413041452673171500213700ustar00rootroot00000000000000PNG  IHDR,sBIT|d pHYs.$.$* tEXtSoftwarewww.inkscape.org< IDATxu[ULgJ"nqY-^vq)H.n)@)5&&HnMzy$&77=ET;036"jWu!U@c!DrM`g!$t?\ eX!{c\ A֎ """"""""٤ Zn C-hk.Vq!JY\,:%6NѴE(=(FUD]yzr,Pkv:c~QLz$= <]4.+1,,gʒ,zf鱤cm67gkkoBK}Ynre]lUT\Jo_ux)YU?fm} c*Xe8p,Mz!Qr@,O!eiy Sidqʾ~3q^֟'?o8I^u@N fItxxawQH)O~oc_'[^>F @XQʲ%A(,cRw>$Y/V_'{˳喤,; |,^eYk'BՔ|9ztYF;q,8rpn'|&{TZVp7og7e)pDunv/+fOMCI$f./Ea' B\ twX˿DOSISziY4QaR,9K]:+B˝ۭ_޸p[,{8r;p},Nr,wpE"|2&6KhҾ;OFROVY1DwY}TRdI%vܳ/6eU"el _rQD \(D-H$ p`p 6rDLqV`ۈdR{c c06ͅc;rJ<8"E:mZ\ڄOVfxYf,Lot0˨"4Y:dH<k_d]<k_0|o_R^,8"78_eGYNNr,}1K"YNjNx.vZX k&s22BYw Que,1FGnkcM̲,3QgID)˝Uo. A7[;Imteobבʲβ,Nrw0Hka-e90BY@+Yfڭ2Qq Vqp4n {Yړgy")QD,5@ϛC cr_ ocg8\eW;;,9"; ?Fbt~f9͔s;NXM Z,fy2..%UdqQԘˊ\FE?r ,^Hv{ְ079ҲsVۃDQRYDjk'VteeI, V|xۮo/v?CGvٛzu=M1c+?e)j,Wj,Gc9ΩN! l\CEEp/~2u{|Yܳe]+^1ɅG3oeיeޤl5?άw`e h|g"ZmgCej8K|a#M[\yY< ~Dqhй$Bp\;')&1]Gu5r,Y&1]k Ǩc?pwkKa$j |A5?6E|VUIt ey{ila2{2DGԈ)!cSO(M\}L6,ws.P dɋVͭQYwŊ՝x 1!f,g* 0 fd9 2>wYD2Ύz$8eNY%?˭Ul,Wnb۰ygGYnb[)l.'FʵW"傈ey׾+t…IY u%\wX \kwcJgk߁%v.pd)jTQҸm._ĤGQj,'˲n923Y:,_IೈT76Λp,:ˣ+)DDY]fʱz.bmL6B,*.S}Y:D K3l,â/} -t!9'ؐKWYNʒpᗜK`,A[wbn/ݹld&{b[wbQ,gOa}se/6Kǔ,?H$bX{>VOk,GزP(]E~cvň} lĘ,c &gK{,^M3g뵙DHOY=Hs).x(LeMe.VŁDXC?x/^qP0)K6.ˀ$| fe(U7Zʀ.˾z$Hۀ<3vkIJrK,S(d+ ,921MK6Y'g DB:,%Ջ?Gg,+KZE$D\JmeN=y & c\,K&LǂԄ,O].GuD icw٨O'Rg瑘sk4Y^b۰ 8NK''exQ ;aKᎲ$a*$I\.:,&óLlYO:IYFJ!򜃻,'v-X ʊr~ V1YV}6HYKYznD, ?u&+esubbrTR;Yڿ [%ercRqHFbEb @GY+aKv +9_uw+p{wkJ7g)n Z@7eybl~gW꿬d h PڪI*ײzl*+*(.il'2BR %4I=Y2_Ml)l?b kqF@O`\YKW,=HpT'k,>wV+YY|lc%Y:cvT B܈wX4 h"D)$Y;rQUDDDDDDD Si$틱"G}X@"Yw'̗$rl[ؙ1 Ga']"`C3~ >8p$V{ |} Hu[b2wg x`26u`{ՂK&aT^ٷJ` 7 VN~Vc_E\UWA5Z |-Ӽ{KJH|8Hma}Ap24sM"{}FR[^6qDR_?bNU[r%Ӿd2O C59,]}(K4t>6rN]Jg ۊD./|uJMeOX|f$6Î|=ŽvA_j3K~%(\X댘GuXHa(Pw.ƪ)8;[uD6*[2I$v5/eX!UMXlQ9`E2ޑ|dFS%/g-E7}7" N֨^Lza&)|x- l4cr,"> y$UGV`4V$yNH⵶xw¿^ջ)'8>OHއnH K-VhX\>فhQD*a%ؙvNI!~m)m\ +%$l_W]S6~A9M#XgEUHNvmC$jSp|MWҽF$ðdG ^m%X}n#Fi]:+BJ5;!3ĉ=&XJW(,A9VX],ɲ)z "P}o%Yga{$rZcsʿ$8$:SI.~%%(`s^'u?EؼugV\[8X Me b Pg$ #ؼZ\x CqoV,3y I_`l:õm?Ӏ?('Q_gl˰"zҏ*4Ev#x |+ GO?jSh Rpc-v븋bl$U3>Sݰׂ7W<ݛG m$0YUk\"oHbVPm#0ض$͍fk3RP>I5^?ߑ~_hi\JpI2Z!NOnDZB{F C9y%hY߫V'4[&7]M)\(僩ߓ?CrI4iX^S/v?լ0-ȊR6^+~vz#1yHs/Kg5vb#Xxvָ\R6:OK {1U Da(~"{y)6vxo$0e(ҿ)waBc^"DͰؗ]zG:"UwTxh3eh<խ8;{UO]tqFSHzaS|2T=반 ƦtVh5cs~w!VDY.5$] e؋ ia+a5&0fQW;W%t-`!"RM٣r1lH!hx{FﰨFDC[&1a͒ؗIf# Hؼ}dGĨ{2 B~dJx1x;ƹ66Zb7OsۦG;yS#RDQ^M %y!${5F4uj;D}1M?fRa)qXgk%x菆.o8ڍHJj1Jp.'0Ӻཉ'Ct?nU! 0 Ϥ[WD$Wv:wQ! ؔ Y1\ lרh-!Yg8K& $^Q_Dݗk}fO' X*Y:V_R{ľ5q+Ha8ޚEV"شbR \Da-H9M.6)tFTj`)m`5! GNqt3󨥳tRc1sBPN^gIaoh+a_$D#ӼCicwoI56WRO*<@atyc'K`l옵PV| /A5ۗ.ZVF~$4\n4bDFLʃEs0x;LyIv-ѝ%n!QK潋44ؗIw;Rh t|)X|5ߑ#8rc,MQ`O(sI}0ϫ0Et]ghhkqۧv.yJ|R׏ԗMH!H>9/ɗx0$Nkz}3V6)1b V6y:9x?x~CEІ\ [-ie5~>룮)V,H>Mۜt&nOG )VO($3p闏jJݗɶ.K%v1۶jP$eYɵIpErvT| D"aju>RQ)l[JBe3zv0|!]--%74=)%R$ؙ\iD˰e0+nNW?H!9e:WMsGV~t'NwH_s޶Qqќe4_hjL⻎Hv5vuDK/lZD`® VDQͱbɣ)ʰ%PH>XD6S݂U$NnYx"{WfEbchO΄YzV$VY st5K"~qblם&h=^Fg }gY||"j6{/aسYKp4E,>H !?ޤ`ddHzclmHy0g;%"uS 샍 {^.&t0ܐ,?T8NGFv;REjG!Nj!?H[ 9\C3D$;.Ap?8RZޝtsW= 89w},S;%""R KXӏQgdSHvՅf """ z !o(*BDDDDDDDb}i~u+rпx0H$#zY Q_b s980],i\h%]0A| r p8708D] Ib`\"p u4YD$?%]7 H^:%sH$^+"^R+I*Mِc&|ӚNI "R mE%=Ӏ&>WBxa!"ElX0)hZ[}L:*vpHDDDDO"O*`nj:%] 4סDDDDD2Q䛻 ``$VyYiYD"""""Py $>qD$< F"J~m33c=#"RkI=FnC*,jsqDDDHT|8KTFT1VHSDDDDDDDr3 4b́ӱ_¤"""""""9untobsd<3yiy:bg+T}@\$MǦ xʁ#G$lhw2XrH\k&Jza{8uXxnNO8â-wݽɞΏn>x^K@Qq1Vݻ~x۰i6**+*h=`(=v96~{eE9SY邒x5Jg {94z7}A>qw',`sG&N(:"lZϮ>8I$""u[~@; }Ί~^F-w(O|ץt{4ND$-{o@R6 0Y}#Cͻ d!:_EJXq4XWwV;<:oot$M88tzqr=+Y[߸Z?g{mw)kQ .~ASlL|9Ί݀n∈H=NcEǺ ӇΊW0 C[$fw|BQaEf37fقxAce`w=vkb͒2~}xEtsLy *+l5; AW}p gX aC"ݰg:'gObK{j)4x`gȊB|Mꇴ:-#,./DHC,99L|zxF;~Ȱxr*.m՞{xOK^HQq`/OힻO}Uip9V}4"B_.N lk} ϯ%v,rGDD,D~sK`IW'd? )T/a#݀a;NcY'"k/nwHaTdR0p7cŜivΦybq3&1'펛I;_M56xqcT=n0s]ȁ5KoPwY"tx5GYDD$>#0T958WPJl}/ڊS=^NO~n^eFO-e87VUhO閞HDtojCKbDZ/ lqv`#,#hMHJH7cSz 8ZMXp63^wݲtjrl]#50u^;1$z][Ն""fzCr+p1c#sK;6|s>owlOڮU]'\GJTEE 8(*&nIZSo"J|Gp=wa+ys/N Uz,[|徴y=3CO 9" !XL+DDD?F[ lr"]TDԩ0zIQYϏ]6pjـ(dZ+WADDDw[V&6Hfqop٘qR9Rz1瓧YvᅵLI*d4w'` """ r_OŀyFn󈈄g͒2~}1QEt6aOXOo7iו_Lb`"_oUn%"""zי0""8BKjvRܸY9w|;Vc$ ##9#"~1tԩO^o7nۅ^{x1gS=z9*0ceL~{x^ݤ ]TVZSe#/pT 4]&aDDB1S=(*߈C}:FMfNf'O7ݍەL{j\]ڪ=v?1<TYoq>iA`r;g)$1/pTL4aDDBfh *+Y]O noֹMuLHvQq#ڬfB1 ~" ^seRH&:Fɭˊ qj)=w>f&\gѲtfD=XSӢ)}2V-͌x~#Kʨ`ʣAee.[@~}VV/4x"g)$1R*li㻼ﻢKÈ#k1;\JuD2#  """@lY1 OnC^e@SGS#HJ=NO{m|+/\áJ Hp gHYH9+K6ٕvCou7#ԑ:䵁#Ҡ<XRՆܷ o*Hpjbu>q0ДhK}Neʣ|OzpD|><>ud2Y1g`SG:#>e!tfy>O(.mJ#}=#?@:@[|`tvuDjd5gp8""@[j!9w`a P0` EDZd{lLtb oo?$xE;#fS;,)cƋoy7x )]1ċvv|/~x AmGF^60*WFϻc`AqEjY[a?#""YV q2`9#nAgCD$7Ak-o lS#oܓ#!tV/|ujkFp DZ6^F;"""w<;vkH.]0wX|*MnrF~z >SG<#]o#!ق((i}iq)A.sDje>} ""Z \ȑvujHy{4z$H 8KPYQǯ}#icx{3dͲvn6xFx[92:bgx뺖b" }d.GH]].:DD$]@pU#p{7ԗg4$ Q7߾lVw÷Y8H}SBDD|)@vUW,mіxcnQTTLmT]C ~q*|GaQUѼO~`/_y)miM5iNQq#|>ͣI8)_z_u,p!V<NpB1xDeؙ2gc'wUB`O|AWkTqy.v$k+hKY9v/FqFbND$otoOyr*֮ۮ 7)N:ҼMͦ4ϿʵkdpH Nyԑ,i$:C o>s%{c% @d7q>Ės5^BQQ;w߀PX[a/I - 0DX/e$.EDb Et$|.ťM>4n1~e TVT@QoK޾ŕ ,"ͺ㦻CMz~ @-HKo_1w:|2EJ8t! +yXFW 1](z8k!(tV>_ KL.V8$""6>|p8im^w}Xtw#;wb'v<_&1z.>xRg.ƆDI_ 9YI;Nش(v:}{+/ ! Ǒ,uZtO~%661 jORl>]*muZt]7U V3cN;([DDę/YQu6$ӆyj:özc7wqDDDbJ!zL7Wi;2Um;3 ""Q#ydѷX]+¦4T]WDDl.џҷ>žk llT3:ˣ4NDDrN`5;6$ߕcK~:ĦXE7sED$^:XGy! xX:HV:]@V9""""-A^I D:,\Su-4tls(Sn∈4hI ϷsEum[Ie#_ 'vy"p[۝ sp_  8ktZN.b-""""st|8H"W=-csciҾ8"""""84+'PHjJ['9Ӡ~-}kZ6}۴elr)""".]( 9A _MFOY+wi|Gsl~Vam2[lEDDD^`Fr6'XCBx@""iM;aZ:H"zE@,?"`}%6R5a!"R^^ (* cTÕh=݀BxMDžB'liZۄ0$;8e)7J8M+ r1<ϡ *\ Ƒ$|H{"v-qYt709vU 3uX^'"p8t aIN1';""nIp=_tgc-p}>Yɡ$*#"w %sǑ48[NTDc$9O!"٠sv  0x2v-0:(7H8|(:uF0Ur;LYDĝc$qHJ~?WCj|f+H8s- )1$:~6 9,NdAa?4B$7*+*+1TVS|2Uq"i-E7} ǷTZ>+BxbK 3uXyErG`X7HaEc뀋Hác$¡sx.&h^#96"hxHn]|5?}vG28YD$t$R8~t._lmɭUJ˘h5$K]nD8CE$t$R8~$V]Kq:vyl:SE h9m~ǗPիH`sݲUyzm/"czxq \f_'0 ;9| XY6H1۹/a -Q?.azjcrZ7yUi0:,DDW5V//8$:twv}.7T[Gr9a!"^cxH5~nA͇!^ϻr9 """U7aLa_QH X @e?=4C\DѮ-jxvmo-*"""ѵ;},htL'IY)IDAT&~oO]sJ)n ֕o-6fXxqA5N*"ٰ #w9HNEt" )#Fr708ULfj hnz +yf.D" Oc,|%PYܸYWS;Vn~kύ,TI}wʰmKˊjVYڑ S <<2GDkH9M$"u-.[""RsQ؇'W×M$UYJpci }7688wX`F-5;{H#,W`覑nWܯ󹺊@'D\ ¡"yJ"["ݗVuXGQlLϕ5%D$a!R8a!*)"R{  '1!`fu?~uÀ@{<x@:uX_%IU""< 6x)VW"9Ԥ떻""(W/{}@2y{-) E7 z.2Kx<$6BG+9/QRT@Bv47\ >J KkIK;ӓ}M;Wܣ^*;'uND|aFQ>Qc#"x{j8xrYA` +zIzHƫJh{kKQކ4ȚkBj~܂C41Nhґc?La~67hMq\<ԥfcWxuh?}iDqӈJ\ 8:9SFtJI=n'uC7Hf=%msNfU+Xq\2T0 p0a5"F5oǓF"R|d-SFD&ZcoI1kIA>"&NO-a{wW`btd-1^/?77N1 bb^4IP/’b^ÅA<ؽ9,_TReK؟\`*pDS. K\kEM&OtbSecdm\Z7H|6Zr2̥ϑи\~2ь\$Iͻje7oqo\~Zm{iŗ )*{%6B;ʣ˖PVrOdR$FEp K) PCw?r,p{Y@\DGy::Ғ_?gif|-[%;ߌ?ӆhd3nt40-dYĶ݌^N3ђKYBAhS~;)kViɥib"zI2Zr &H[BS.iX_65"BVl9Ɍz>MλEK.Gf?2:ΥEc䒛xS:eh%op/v[0u5'6LۑOkɥ&Nuephk&FqQb6㭯UɷSSF+2*蹔xY3N7FgNs䒹'ĩi8q);M1/6q*gJ&N>g^%ܝItި%2kǷv褥16c;swX㤣wލ1藶.0HS.y[["tG|CkɁ(ڧ%ZmN#i'3>{ԒKRSHjqZ}z$6 ^9tl ېt3ڴ#;hɥ*B;6*6dq4nhAK.AGDAFSq-|ꡃu+=VE`KEsTb+ 8"/bCi?œ~+Uu-m}D^ zs9MYdo^Ί6g;|:9ⅿ2v@;j}ߟ,ҡ]#>߿%m?/VŊ/-С1.zyfcɤ}ku0}Եb 2CfƻO,%u~îCK.ɭ{ f|oɘ\ZvH ZrIhҁv=cƇ9/iɥ"! FN=IIZrys͟,ٿ=h]B^P1m50TS.pV k US."d5.DŽM%g>6xKx(DŽϓz\Nе8-} 2Ne+GDmvlM7Q;rVFZr~>ۼɌiӎs5ђKK5V0((hirv>KJ<r;rq.y26(8;}ɓvž[wrt |ZݹAK.e ĩY}\kSX4M<#EV911EP)䈭4gJt z/Z*'t7}Q P#Q׎u b|@ T.E,˞\OTrNft2EtJZ'<MB*\D9HqU6V}z6yr G%#"&[nZt\ Ռ4a6B0Y N@! &ێ*i5ǍL^@q5k>jqz?+e: Vs*hMkMOrMX 739j r+58ύP<Ȃ(rB%犓XTXfwԑ` ?u$Q|$^pn%E}ճnz Hՙ>jJzgչ8؉`֛Ne+Wo}Q]3:uYxK/  dAu0Z ,BUZPC^TM%=5.Pwyya~F>CiUnD4F-yHJFM0 כN؋z`M""PKGm${/Fs޻X?ks~ĺy`&!r}#L\:Xs.j]f=f$j {`yVRKK~؛`_8ǩt*x]ʞ 9h]-D 9.ƭx3ju5ԐcSNu^z [Ea7KY0pjUH4HЛNX7> /MPmGxVr/X#Xof6wgu?jֳqRQ^E,koڲ w~I{4n5&Pk4p {+S,Q/ߍPTa+;gߵ_z5 n0֋J=Y.ԧ΍8_KQk*X9[o*!+'b<$c(= n@ȮC+uo_n/HԛD$BTcR@xK4м|*O>W E9PՔQ9[iGXJPi(KT)\ԥ9_)J&Z)kC[`2Ή_yH)N\>4^SC 56Ucwك& o8hs/2D.(QۃFvm &m&8Z񗂬 \@ԇ ע*muZi B5Xf˺eFFoBX ^?jtBP'ԆԤb$aښg/G˞B4ʞx?|;jXša g!k]ZftMp34bMd úoy 5A j-ew oS%pa\Ե'/=n\U8'k[`]^=d'*5SYԇ.N6M[v"O^@͜FU1yqkm}m1Qu~1̭Xe'8 xuX Sy}d*5X?}s9YMPTvrQ5Ձ܋^Vapvc:Rv\lRCᬧ|l+X;. ;&tzmhQ6QpzjE7ewNYuU5`%(BOsj?qs Q\yj:5\Fk!{V#QOk}@h].| VĢ:֡~ݨՑy DIYsej;'!ol*V%ߛ4^<͹X?X͹y.&z ?ghD܈IUܓwPk+Ʊ U9KF9 e\;@-T:.m:/՜KUQ)R}Gkm*oEm*kIKo2}#4*D-J^j&{'?<5i^]v e}X-vAod![M&ƺn|݂>'4ƹOT|}~*AD.} I-\T%A5R(^Kxq-ؘ$ոޏ=-ciN $QثÐL]'?QUr@bmjH[a8AƁ6~E x%j%TO8yVs.T>\CWT?qX/V 0UX)z)Da}Jo:"QK|E..D1/1j~/c5q|\G['j^Z±Wa՘We'|Gϐa kw/㼧OQ,-;!lVt«W,p-2L}ycMj =P-2u#ڲB^> ]4Rt@Cw"Uڜ`# 렮S'5"꒙qv5K}x%܇UjIQ=m]oAD5PkYE| 5qWE֖ U>W`!CݶƠߥAjpc9\94qwsC-3ze-O1r?@8 Z <ߠl$.] !4J,W"PBlB?]!,"0qu8k[87ڲB8fC)4@Bf>܃VGA5Ud7j۶I-B16Ew"*Rd&ΪZȞBQ],NG5Ρh"$E2 '5Or8묳2dRF˗3uT bEJ;%Xv Сw}Y =sNF_쇮iJ"O&c=Fڵ5trJJJꫯ},j1|Rܨ|ڶmOem*Dx5kÆ L9 ݄ cqxGw|>sLZrUo+[w&:qڟCfh<3fش#nxu#{r\Qt3INgǷևWMnlo1ܹFU3Gǎٶmqh;j<}YY?9P*7qfΜ7LVVm 5bkw΂ S%BT#I3P)KPWV\jSpU<讍_~z^h"5㨤4n"U;M1cW<@lj33>|4xIqav/:^:p#(WWY\\&Mk55Cm󴾬L.R7s(>>/φ FӦM4h999ᗁJPkNcQs$Vv1Z@e|-B g<P=V&8}qu4Gva2z=/v'kD[!}sf\f[Toy7Z_5[ >}ݏ811sVЧOf͚m6Z/{ JgSn}iErUT^Xk 64^1[#Aw`dYdMD\f9ȎX{bNf6f5hZ^ɱwgyrx"beZm{&r/ӿ^/yBQui\`̝;/S A1n8^|\ 1qhSQ=ztQ&7K\+7/b m uENCgk ow>e_a79777Ōu!$l вl όC޴KIIa޼yj驧b̙lݺ88j)T1XC7R:vs=9M!BDGnc5t-.kh;H2K޾fΡ1Ejgx HeoSizhsz#*ph}D5}K"kR3nzhbNg%66j:թS׈ &&G}~p=QƁ;֭eQ]jIt IDATQZ+]?;Kcch;)>oYЉ:ӠP9?{žkwo0Ì[޶1.V^f\gϛqLJwId"PZhO?7߄FURs=G+?9r$O<漎D ޷73jԨc@>p6j y%Э46_)_ I|z8{K$k3nzk=gHlޅ>ZOPPt˝ v̛Jfr"bjaܸq7Nw5ZDD(تqEDD0h ) b.ƹ hfbUs3R[>9 }dmZfM/غ͸ s߽cɭzP},_Q#FXtQ+,رcXWFobq^+X}J͕ ͟\5{[bD2j9͌K Hc݀>C[f>ckݥזCmwT!BS۶mi۶PP rGH׮UN - ȶũxW[ogs5U9cwt-.sNd Y3Z {wtgoYeV!U׷o_GA6[h܌e.T/&p AmbO>響`1D>_gn}jh6bXe, &c{fܺ*xKJ{AMe&YTlBhjlq,p->,L*suE/gfΞkqQ? EiH7:c˿̱M>h<+͸;U8!jpƶqbbm.JUw0жc{w\[^- 5VW>ڸ5FRs2VqyɌcSاABTMjj16sNA=56m g|%S~W)qˡ;z%gU QkaPtRnx{WD1ז3Ga~3nqݸ#OW1ܨڿQPR.SHrxx/8uQ-D'!Q;3>{"G.~9sBT]qq"7}1BiL!B7a[< k`3jFC? e$Zy1uԶ8{oɸܴں{=: q=նWYB=t &K|f\ !.++a7B'Q<_Pu/|g].#k}I"!>k^oHA7d!Dlܸnlo'6lvNBr[PdB!§j[Fӷj[~g/'ph5Ash۷j@;.;v !dR߁7M`$Dh-u%V)"ѷ%|g?x -g?3}*i}.3cߺXZVBTlɒ%p t}f͚ %DZدn6[ۺ[ou;?ͺvΝ|b ڶ{_BoÆ mbP "8*{e QCvvvA⎌V72@ˡ{FHu6c߽cRX#})!DMfQpGO!k wukdY*GRS3Θ.yqtr= Ìl_͞_f9Uq#(>Yw qe=qs-&LN:!s:ujIW3>~kJV… d-7A+A-s2>n5"鳟s|Œg?OQI#Yg]aKֆf쎊̸0;Mq<DZ=qж6mW^!333dn~;o_MP}lݺ~ L\H.Q3}FC?l%ҠUf|`dYdƮ(Z^q{rc[hy}D%Ϟ[bUBR[Y뼢XgO&藺r]w+N%(rssyꩧVa@f O/;6Xy ZԟZ.jf!pf@KDZWUӽ7x5{N=ڝ sOd,x4|'[f#;ְ,%fOmVd_ϟ^ k…̞4UNTQ 2n8<Ǐ~amܸql߾~QlW^%%%7~( V@Uhz?ެO?߶Z|n?}Dik}Q./^8/Ŷ1)c8b뜗ZurW?擹a\Q{rkuc6Рz⎡'n=$3'|oً4c/YL8GyD_F_s* [gIT7,8 z!^Z'.F>)|Tl%\NOy9uD%Ǽ\x|TQ )xr.{}FnuBCy{S..%zXhk7#ѣz?)B~g41kZp8i08B./XRىCN0scT~qJ\ B$ TaPWpƠ^ܛpn- '6!A]ּ/qLo1TrnYNA t!B J*ը sqB!%FyZ% !"@"yXOB!D6"|!\>اf1 !P f6Yp~ jNB!Suv@=' g~=xA_*BN@wU:x*=5."z`0BIC_ -[w"~1|\5ȭBm򀎔PjSw"p] &I !( ;r Pe8*Ԫ2'C|mxYf|ߟЯ=dݫ5O?6mhAQLf/6 Z/2ߣMw޼DDoI묬,Nʌ3|u/CY rԎcƌg!!!|D6k֌;v0UoҬj+rD4~veM68U{\&.^+Wk׮'!ĉڽ{7ڵȑ#ơ_6 Q=Ɓ~1w\jժ/+?1cFѣ!/p1jISܨ)xҤI<L5>UI3c+ZF8[͸ERtDm OrBaÆ?Gy8Ϡ[TlܹsIJJҗ >:up3j=ynA}r`ܸqaWE˫Vqrt478u*,419c!UsM86{Ք9NYsa޼yժ16 8)ScMo߾=O?t@ 7cmm|KNԊ69|5݌7hHiAQ!NT||<&M3~Fե3ws!.;wp9wp6WZl|Mbcu B>ڰΌ[$%sIs^_'^u%Nr]X^F믿ҥKC=tc7x`f͚U#ڜ^{\PCOj}8=;uĀ.o Eե+.kO{vx5[Vmh B8.W_}D 4ΐ!CjLc Ƙ1c쇆m!B]; Zr:d嶭f|FFN=%%O3um3E!O?I&^Aʣ>JTTT{W^dz gk v~Zjs(sglkvLb {! &Nƫ[.?77͙սz"%Bwb}f|U4I3 fܮV C5jB!#F6@kA>8ڧO`椅kVqFuVv5G<wu;Į!O \:Ǎ8ҹs`tnHF5=c" YfU}@&t.;N !ujբCC}@]d@PM?kVRRzm9&"::- !'wo]@} ˞efEE3 8xiӎ#Binl;:aZ{8o3M96]P\츶\?.k۴E!zBS؆m%#73nh\S͎3w`x6F!Bu:nc?RT:[sی ;w8p!_Jw~2C7g?bۯ9aczZ}g[׍uTZ9Z!𗬬,{x ٿP t3kUGc GB!!33vszǎVZ&%sQf[+N2'ߊ]B!?^fbl׭[{jgt$&kZйɄoMk!dٶXfZaY|yP!9:ZRs6'm}w}B!NƎ;عsRA^fYhp􊖭hd[ER24oiƾ" !'j̙Xl<Ï?ȠA^pE݌ܕ@va!mX]]ҡ?0WIzD: !D8x^mbcciԨ46m=hCMjWƯ=Y雹y Z&%jhvxuJdm[ڲDҷo_6mڤ;"##㏹uTK.eٲeC^q`ƌddd/;tfx^5 mٚV 4;V;"\!*D5rH>cݩ#h<1-e3p9_d-> [yKZ''BJqB-KJJ[x"^/wq1c,<]woU.k`oM4\Ҽmkr*8!+Z6=BϾܧ1Ok)z;v,/r 3#GW_Tlrsɀ9n3f iii%4k$$!'SJ;ֵI2_硃vhHjt~Q2ʕ+ڵ~$!8n ,`C)\dj>3L0A_F~vZn,Yb?EuM5.&(:uDjj Enmt}%11"@֬Yî]жPs 01ߟX}\/^LIIpp&Qv(A5 0FE +<0_^B!h#B sN #[nAy'UA64.*7c' !]J4RO5B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B`rN@i@ ?X\8/T "'R[m Q97iN sN@HYIqzNBP;qBn:MI5%DOALFt?t%6 D#hrQXR, cF K,j$?FpGDdn+FԢ јAzFнn=5t!Bң@#h9~)+>Fɶ8 ()D )ryO˓@jUo6BmFPt8]c:"Dގwf'}a4#|oـ * r\g4CJmqBb\Z^~tP$Ni(PO&˔Eܡt8naFߨt0؞xm=v@-`j_I.0+[Q|> (/QZ0"&ߥ9!L|TOB`(0IȐusc+-ň6U"$]7&Jtrt۱s9z :1![$DתOA7kLG  ēHb'a|C[Ҙ'VԤDZ\z71ℬ}] 6G5:v&-e"$ a t!BV<^|R=t9u]s7ј'QYհq#*8]u@<4ȡ1lՌFw"Wp,Ҿ/u/!N\g"[}AMU3ߠ!=ɌTvrܴl8~nbŶşdrhj SeO. W݈u!0X25\v,l DCM3]gt8!j-)f?[R9%!s]JB54\A^Qt8nJ kLG 2A]{`oQ>ذӅI }ޣ111nQ@3osz* iC;|y#kLGaGٓau\gͷLAgU (7><ǫiC[ p3/bVN7&Bw4#87\tD`-.ob_PCʆy~6e^4G&,ܽ*l!B69/kEy^}}>pW%'1qsz ?A/wŕpјmfeiLGbQA5гPE;lǛ7zgێrzS[0~%K^A~>_1!N$`y;4#BT1p5vl(e6p:q.>ٷ b /uJzQ{\{1íbKJ&w9ՃЪ9|z\5)Qs!=!ԈBTo&~p7-nA-.јyotJuLP!"XYTYr 9|b[,DUIO; !N1'"[w"BƸT!IENDB`pairtools-1.0.3/doc/_static/read_pair_UR_MorN.png000066400000000000000000001005671452673171500217430ustar00rootroot00000000000000PNG  IHDRsBIT|d pHYs.$.$* tEXtSoftwarewww.inkscape.org< IDATxwx?ZTEXbWk?;zQ!`EQ JB893M#{vy3;̙s(`[B8 T3-"B82-B̟@W"2y8?ۏ(L ?* ж+J0L Ҿ"[O>KB&AuEL  5 zQ^BITAM k G AAAA" P\צ-j/JzL^ڊ8Q }SmaŁ|Vk959~Mhx}r22F}``v8!)ˀ_l@'?c )T3Nۓ dϢκ:mzd_ejY8=~{Ş?g{в+v/i2:mO1e6|q9uڝZm{|[Yi!I-]Ԏm [9ƵZ]S>-߱glhuhٿ'33{ 1Wy _fnȠCҤF"5omTKvAsskO<(˯)7`OZvtShYusD^x墰+EZ}| :$6.)g\e䉡C~l$:1z^*aCkԈj=XԨcV˓: ?]R6'>jԒwkWjׁ #KdFlׁ6-O-]bLKxU 7=\);yӠ!l8s޺ ʛVzg1eFVmӒ֙!Ӳⵛpi9Kђw +_š%iyCkDZ_-Bէu$m"jNݻxk #ZF[b[wGZfŚI\QѴuhi;9#JpZ>zR$ß528/8Q.tF5)([rh (UD#}h‡_9Y,0*)Vڏ|H([AN^I~!KKQ/Ӳ|(2Q1qAR ;_kiR ej-GbD9MmiVU+LasS2e~wJnjDK苳dT 2ՠ!,:-vN|ѭ߿Î_&Fl=v2N:aFT;L#ZϛĎh-NݎgAZqq<ҽ' *n{VW %-A.0}6l>-Bv;ڦzlڍz:e|0@Z޽˩eKidLˑkVs'˴_$>Ɍ5%36[dPYN,JOu/dFxGÇh .jqQx 0JZкEqݰ4 hQ]s_RMiYu'%pCZ~Nw4=zcZM)NCӾ#hEH :@hU !߁y\bkK&ۖQUR>86Ϟ ePu73Zy͡%ݺ-[y ZGzmNz1-BŸ=hP |,#+hiXͮe>yU(.zP",wp]#jUi9oZͿ~0vXZ7JͿ4e+^\KK?eNDZ4ߨ$BfB+OjSi-^۪xw̅ABٿ6}N>y`@̛ÁqZKKH9 CZ~cvÞh|֕f >g_mDˡ1[tMιֈ!rz^yxџC]Z/^hDKh2ףMiGsp|(#_%:z He9UCC 洼v9FH :@h+%Z"{VV~eج4S!{VYcbU}XMK燊 ǦeHmqsmgلQ 򵖫 BQ׃i6 -:l5h 'sq&qjgPR~S5\QΜڏڅ6-P~k'kXZ KPX7eQEkzJ+3Z>mZzJk#ZJLRPaٓMV~>-N?A'r1k=ikt-A#e,(ppm A|Ȳi?q&m<JOgG^4 ,qB?oy,ue[n|ZrZj$)SBSZ&&>@jj?zQqhyթ)-,xy"hɊ+q\:tk0Z..eW.uUŪMtiYjӒ*V9[#y0뢏?؋Ք9)aTT??bOvǟ]}?Z.ǟmF>on~=#Zv>ms\ n~fm?gwgDKYsIr$ôd@K<풒ؽh#l<|I4Qn3_ Lښe@iINy(5*i))׆6L*pUnGj3V(oL˦ϟeWήe9Gn\E:,ڳWCos`b6Rk-b ihRoZʹR}atQ)G:-%eq/^qL^>pٿg6x+8iX+?IڠzִM]76L{!)ִ3,0JiY;?yF(]WFK3(%gwn=GˮZ#zT~*zy`HE@Gm} Wy(i2/JYW)^F@/lMQ/xl ^IcfSݯY~K,ҒŲWiiw,zdٻ3Fp}[#J x|='x/ siܣݵDǛ\7'S{[X-tk^7ii?U<>,l-]KYLzEna.9Ԋ=I+,侅?Ǽ/` 2Z( aMAwԡfw oKx'u;w1--/ϲk>̌J@ KXcZZ\_ךIc~ fo`V_mqfolfچuV68+-A*zғʐf09k -B] k4%3Z*a_xWڿg6}9-JX̦ϟ3-iY ?{ƈAgŊ*=h\F |rA-AcynqLې!$qEEnsYbAN'"?@I[Wm?в)-+'Z{2K,fi|)-Gе\g<"f<;%ֈ_:{deHK(^G'K&c(dSJchT :䞗rV{/oF49g]|{64>j1%,ڌk^aϟ_r!MzjoUv9ˈA[#u0FLT/ۮ(]4fRBkF?r#n#TҒKxʖQn8A}t 3[m- ,/)?-|rm_WhA9(j!XAȠCx` :BC5 F$KTLv   DeFޝ_@H.`:!r À+yuO<\ t(jO 56@[5+^uBeyX܄J^@QO{j֠Nc+Ы,`iɫ ~kU@,x{ .. i* QlBq<́4r㼥|{{1:vnչlfRT|>,!T yW0 * B} 8GH{&E)+c8Ө|c~Y9P&5Pu|&[)q{PWIawLLU9xwQÞ? Tr "SP TE?!)-B x/|=lLH^F3rC Bw1eG sU*<꣸Q^ar8} ߴCŹQO.ͤ Gtغ끢 @|>sQa*v p j0Bģ @=QS%:G-{3Šz7*@ 9.GEy& z!@b@_?C='nEna-UY޻}UޅQoI-tg&}Pnñ?ϯyP7P>m4 4!h.I'' Zv<*3c;Št`56mTQ1=Ix9_Р10㉎_caPυ3kY9U(lǢV)B`~P'y}|z-{L >_5ʒ*7 gz%Ȃ7CqFe-%},?5cF H@qz}@t1=pu P^PQz%yPMFme]~nhLq7(rHt,+G2* %!!t@<_(CQO >g2*$y.Gn"&v.Fͪ{W}="7 go |epfX^uQDqvnb&:Gk Ӆ HSនA!xLawp)U,Ti0=\rž7^eP݅L~*2+'"{QU!X G{BT)QaUXаpqVZ { OFy%Bb!3J~}}&ߥS/gzoP9S6\1@B㦅U|ĩju [$N]l2C&e(o,16Tm_B$YQA؇PvJX_zn V1Q717eD_}o_9jTz%Rzϸʔ8 BIDfra=/jGOP R {J IDAT݃J?!ymQ'Pչ |"P9% ByvaƏۋWAÿ y~.Ŀnq=m #evi!BL(1#~!*@=C(CU BIڋ: #4L`*|I BhZ@=tt}_C Dzxx5xsz*jA&vY9Bf3Z\mP8@.E#Us("UI?lO>pQ5BqaP8T1(f D󘇊ǹJۀǰ ϠrԽ<soAf#F& ]BE/PQM ޅ(syucN s6Q(?uYHi2ÝA'Vg3Pj P>Yv\@7j?P9c /TboY#;pc""tJ?*)&BUfܿͰPb!!p.ԀAp.(~ݫh:D=痢2Bp I&AE?C]ɔ(Ϗ32_]|:GpJn4OcSBԍd:o6c"T-R|u q"D`%:rB~kØ+mk(Uk`EjgP8xL M#|GU\<ˆ4OhbNy?l/]~9U| oxKSQNFD;}\ H p=0DzΨB\A &ycqhj<4ad/.k8~&^BFkwdTZwrBE9SQ!hmމpmqjo?R ű9Xf.tc?:IB¼  77 юTX_3}(n6G'&i4GDŽD8ˀѩn|WixP5MQ}f!=B[uL=B`4UÞZ` f05 B]UU=7`k'*AlX)@{;׹: ۷Gz^[P30ڄq{t@Mͣd?xp I۶(=۶S5{ap.WVGpI꣏6G' -exo$T*BX.Ӭ=@?hc#.ůǰH"T1׎//P)2Ryʰ`ɟ'R gؾ7p6 ¾Y* BF}:25  i$Ľ n{:" S#u`ҝ(c`!e|.h[} %GfxNB=xޯը4Ƞ_D~X-o3_#L$pF(yLV"m w<i&*pހנQ &onC>6%XDG@N2,W24_OiAމC NogTD pFT @]lU)P]QpQN?OhAዚ VFgq!x\Koqa`Bߧ@_r'l5 t@y]-,{!;5M B]rTS_o&.8 'pB7^ޗ:F˛qw)m~Yd8Š|S1,!4HDNDpx^ ATŴ `O°1Ay|K#E LBc[ M!55a'oOaTC !0Q lbUEF_ÍxTNY9B9H. AbM QkRx_cy!;@x"jyf8|z,is*d%_ٺ51;{4u|$JɅ;GfABZ k)~}ը""\E+J!45૟y!A%zTe8z_N";g>+ h.*~N 5Paj !i,O TE`ف yp5Y)TP7'TAUoE* -LL0#ǙP п+\LsAcѿ kJ?q_# hBpy؄ﴉ`WR9m$H1+RDBeʐ{&Zom=AW^o.0śdT=cL丩FL A& } Ƭ4+G(;@r"(ՙJ!ޯUJz O!}@x;֢&BGvj1×.6湨xȩǠj/Y( ͓*MQ:VU BuPK?2"HA<Mb3f.WT"'JBƒT=PnвQ i Ϭcb.J!CI>jSy仺5Hcv k5i"y jSx aGr"Ih؅g BXQm8X@x'!>nX  k)8<ńvK!' %4#\•(}s0 P6 k(#'jwnTu/P̃|,_G̕>t@<{Sn>G56SMAeg4| C}Ь@99?bZD.̞Gn MzrY9B^'D&i1pa-e:;½B34Pa[^cCLg`&\%2^Y8:nP"eGAГK {(jۍ*bJxX_yhP~N AjVNDs۠;6RvX)8/g聧ヰPa_USfA(a-1 ]ea-1*lOQ k G\(S}lEM @NBs"p'R = ;5ѥĶPBQ_0Zߥ ڏbVuW- w| LA%MEIi%`  ;Fxu kp t5Bw VLu|ݨ6Gj@oy|@nN BT7-Rfhd<_.yh%{PoN cѰpi)5Z4Q~бj?nw(:}#@0{c_+PcCU!y}-jXKAA{,B\5ĠJ?,dWA1?[G߰w`Xr-gʑ)~E ukO0Yi GąYHy^=llZAjzިȡ!*:sȭU-rg!h*MgL@.1\h0N+wofofOj7Ѩh Py}Ҡ^I֡|;H_xP^TdQ$ BXʅ +v 9.$wdVN@h ܈M+1u GָQQ#~ z LՋXA <-yڔ8A67AA5 t].B x֠4C9Pe2 80YDf6ZRTD\X ; 2-: Q!9.T|adPЂDy4o7jCOF}G_>74uU*"ʉ"IUdˠ~QP@ B$*]'9RU"ײO!B֨aHږ B8=8qAgeXڠCc TUr(/P4T]L;uh࿨Hj ~!"J!U"BĻ(ϒ̒V(i T/TB A10Դ8 UW(jU";EB<*\(bk7 39?ٻAQ);;hժk֬)w*ȑ#y |`wO,Y^zo*PzhѢ3g$%%|3}t>vm_+cS29<#rʸP ,-eWT "?e0TZhQsU+xsECYgf ֠CE(~^zYYp9E-7Itghkڦ s05J2y#> ]G?Ԍ>fƏoNhڴ)?ǀ%߰OVI9e,=ђش=ɧ 7O>IVTK4Sɰ؀C֭;wnpиqcfϞM.]o_ \꽮} 6qy/D I@[{=BNȴ`zp]^ E& } j;u;iyV@lz4;8H2q3,)ZV;n6=j'6HrK*q뭷Ҽys[`8o*ъ?~<<9Ew<#)!9J<*.F|w޿ ABS.P@h҆u,V$'@zL7kd̈́SsIjVǬvlz4;`çO8I闎JL7ll=zB*ڶmܹsIMM5*@Ԯ]3fPn]/e yrhذ!/BE !N?TaOOQՇ= i@7`|/N~F7| @G@4D}?OJ :8&!c˷~6Gwn-ܦܼ|lwOR;]0/ q\47ʱ Q%{z]vyޚ.#PС}%T&NȐ!C@6*0NYiAdoi\[,0c'6mOS8Q,m;˷| 堻ia|mڰEn!Ig2p\"zөS'n&ZA(waHM_iHسx#m"uTkj&XfH>ec<Bbh9ș^*VZ5QS^րCN􀃇ӫW/[aK% :Xz)))\y˞tZڳёuR< #8{#@a+./y6a^8+mb#m"6Fw$7ky3?<#.gy*r/r뭷4Zjŵ^k25EW3DEI4 2&LضeMxA饧M^NmӾדPWNǎޥҹsg;4hPUvd#qr_ F\ \ GE0,FؖEU() #;?#f"hl-GDKn/TwHj7v[kS¼l6|K7iDj+ń;Ü9s^9\.FqLzg?ZW/t膚P( <@TD=" FUX,<Ƭ'Q4 Eّ6OK79&;&_.{DiG8CM!OF hӦ ݻw?ٳ+A@c"ߟjL P?E]XĢ_6a'5@bi㜑饛H^]堻tŻ`>ĸ& 5G )%~SP}B\IH]|5 v~#;VVZgvי4A.C !>ѭ9 'MAS a3*Ž' #\i]P4hKrQ.vMiTDYiOwMwMΖ9oYZi9AȲ`7_=i+PY+`JR,mEXw:&{ (6wuN ?LanEeX*@ǎILLճuJcqh۶mi `=vM^Tv6^GġgVѩC9A щm]ҺB8 4!D(-6Ѭ Jhb}Mt]'EWW?Ih DEa% 5DJ {`]GJ'|`؞|\U#0mGߧsx]fD.4l˩q,'ԠHQFf >0`7opt6j9NwDnPu7&I&bO*6O9/# A(;IQ(%qt`T @>0k9@C[; #ys iQ QqըR_]?"gZp`oawGT ;i[جcv/\bԩS4#Wz b™:htPGD֎Zj'?xSf1LqRṈ|ߓ{pNy Q%BRRRPu P2PVuQ&v:ZTY l{l8U~.e˷o8;11%͵ gL4CvtB"MzjٺKq [Cc"33<\zB> 8ɧ!f=7q[?] ߑmހԕ0Mr#Gy'8?ôrۼI #GxXCYYY+Bqƙ>0[ ū8zGqӳ`:]0m>'vQebW:5i ahuVDAGA{ĺ0/j';z'k4#[V"B9p,'}AMbդgqvXbTt,O9 j4ic7}yvƭI9Jo[6jbbIr;/_HoxZ;v^A8gApQpo]%2 w[+wHL4{&4?n0LjwA_qxINf;߽G1) Bg{S^vYz $KIl#27}#ݡZf4>}`o]]QѪEYs}Vbuq[zzQÙV{ptzA^}(tim޼{Ap2]\sVg`}>P;8!TTfɶK,jP"@$6HrOPQ ] mm.dq4F fvƬ7JVƞų+!믿Mck.rrrJ[WShuΈϞu"2SӦ=zz[>UV;zm_xھm>p#m轸cp~rY,]jϳg7Eݲe˂I'֡ %g6N8NJޙ]c™RfyRy0Vw0i#m}_#* cCfʙ 41!{ee$N;4^ڛ;) O|=kw61gJr&Є$&>t3 Ps?ڟ?eΛ4:mUc:V}͆ 4:upn闎e.0G~#''ĆiRS/+VZgv]|ߦ+B萑ѣG^9\.^~eFmZJP0a |XUkҤI9`jpR97hvM:iǰw-ljw~lE@4:u}hyBd|䞗X4-_Ntȧ1eW\6D>rH$(gA;f{ Ôav0j*t wHĎѣG̀~wy'SLyϤIAY?P"fōy|wJ$:Rd 0-g>/x{r{Z12WB0$v,\ӧ׼c͛7STTv*Q3qDk58vXg-FMe #iE>Y8%Ipvxo|;0,VS˳s5Ӥ[;>jŎ /b7Z|Mt{B'1cưwsTGʥugɒ%L6\D&II%4ٛAZv[;^?k|pcleW) L5ecwa96 ?}ɟ}>nqW烾g]O?1lذg'm#-(¬Xai B^O&BfL.MxUzyhko7ʳc baeA9!a[j.z0..ޮ"#Gd„ cƼyBguӢW@9[Vq\Zw_XǓҐNoǥvenb+G0{A1c5-:*Ð1k,,:s> 8+˹3j+ txGCYf?!IJrl]˯)0p-e4D>4hJk>?3QaT Ȇa^kcOJà&w5YfJ3ǭ?1KvsѣGc Vk H⟫E]:DDbL+ \Bě?x1xFfhEq,cЬ aa"+--eԨQM[ {8ǥۯAo߾0g.\йy !8z1beeeH=)e\6B^P༽Jl"?e?̄ աW饢8[S/%uyyoMd&Ԧ-iuZ` =ĞEڏ{ˊpQ`ࢬ`p$7 |־1}͎HV~? E[VPQ8?Uf1kØ1cx(-[ƹˮ]Abއ蚋~%yyye$RyKj<4H99i)>`//hFY^Yג֬9;t$2+UV1+d&6g};7=Bξff .sΉU2|p^z)#y阵 |-77}f̘sirk$cɇyfps wر? #FԘC=zu5 mL81ojw˹;ϑ#Gy_zzzohY34n7~x؈$M6~G}ؽ{iӦwzAmB>}v/^0`@1`u&Mxb)UVVz7Ք^RRz ҨQj9x %%%<㍢B**N| S<ktgC!Q |@˖-v\o***شiiDT6mj<ד(** ]l܋M1߫O6dggӴiӪ^/7n ݼ3=A^1'""$(* t;&$+x=weLDDjmrOn'!ˁ/AA0mo4_DD$W8ׅ /0 ״ `p\DDʀ\`ۉԠ1fVn'F0+UT;A@%rԩ29Bmј?08DD"`If{Zc,i13;[ajpq@#̕)7afR4YvG(N$֜M uxk--"""$̚J>`jiìԱs1"E_5`pB].aߚ\=VDDD>0^&LDDD⏵ h>Sz]uyq9\EDDD$\ 0a˹K1kE}Ҩ]cNzss3R񙈈HD9{^znFEDDDB%P>l# |F=DDDֱ@)`G=J1KnD G9@F8bH=Sr9{I'.&0*wt| ^Sњ";*)ƺ$w@s 2]Nz`VlWK#>&LO1x$P.fšD ̠tA`, |Zv5H~v"aw+]ENJ0ED$NomX+^N :xKDD\ D v"RxxD 8DDDή/.동D"d:fj! x ɹжm[RScP^^бcG]ADJ&M<4FkCzz:[GرC*~h޼9ii=s޽߿5kFfz,"`ΝY& eTӀ&ֆۓ^FuTPP={B7\a T5e&M4aĉ 8+R0ԟ.u5)c' U8W:^XkWP1ω l0ݞ j>`Fkψ>'}̺%/"RCeԩ?6Y@  dX^&O\uU 8?ŋ[  Uq/z439 ɓ]L'z].$%1<7x`f0{OA駟UW]̙3M7bf;r/+fX IDATw/jٲ%f͢)}u%:'3o {8II |0a  oq"7ZnͨQ`m:XbJAڷoLnN%ƌÇ~ȢEMahV, ?ٸqc׿&զ|g7wlK:vKˌr\ngryN("R&Lwq~唂8&RS)xie$׃+#"X5jĄ N#HΝܹ\U=s8쳝~V.̵Wjذ!wY?zLвB`ǧjͩ-[E5Gp=z4-[t; &McqHIIGun @yũ@o=jɹiul*r0.ݍr"zv9ӈaÆ駟Za71?U?C`ugsWX^VM\R6%TKx< ;wTs ,>Smv*dffr)F :￟;wZ>εfggsgF;;\ĐeS:wM5E$q%%%qI'F7x`5kƾ}MCѠC$Z!C5q-tfHW;w]I!WvBƙī^zѫVqS :t(SL6] a}Nye_W. :ߝ\6m|;p0sWhED$񤥥q饗:7V.IK+$BXuDye%-юqq3 JDDׯkgk :5p  xoclӖ>?|gnD߀AW]J>g9hтvڹKTT61k:6,rK^4NIj""Gzziͩ T)S. *}ಉְ3Wc :K9֝={V[brj&vjs$rS<@k ۷p.;Kfפ)ED$ٓTGb[r\~S^Ys˫/xi2{Ѫ\""F}u<@3͛G5h*C& K)}t0x<nRd[wZH_ | 0S֞ekצv|vv"תUЊ-<@C疆 „20e Mv|nc{Lˆ{45MF#W6qQ},]L4Փ|HyH8x*4h@Z'~7dd 8Hdf-O">IչT6ѿEˠeeo/JƍiED~r=@sKIIIT{`grQt]FU+)(GDD$~kZ!]I[yu7IM嚮ҵ|uKTs!';p&.YY\ұz"""8">Is$>'WJ^^X4hGFe̞_#% DDDp;[mՄh3hݖ4xsڠL"""uzٳgsӾ:.l߾<"&M9[lfOYZr2<ގw;0a7xYGҿJOã_ݝۇwQ^Yc%L9}: /4V.vf@ċrLu;\vQ;^xݭ\R3`0y]M(lӖ-Zp.f[E:3 M”V2 V""u`/_vpuבv*Qz1cst|sq]wxHdf4N]tnn.ܱn0zឯ VpOᆴ,"r^|EFvdM 7v*Qx駝V/:AK_0p@Sspg2Wwƿ6mdK'e٦IĿseԌYf1{l6lv*Qk99gڵknރa%;z7< #/ CD$߿?OnrM71m4Sz :7=V.,KgN2TSfw̱/o݁)wa+-׬BD$ތ7.}.ժVyy9cǎunZi46 Nqq/ {z7l-*bֆuA KG CD$< zo^0oV^x1߹i#PF\Uw=gΜzsqǓ( G ۴g-ZײDDyg\٣ |C ߫z)֭ =C0 ֮]ѣ.CMVcD]FWtbłۣZn=tAC3D>;3SFǏwu65$2, n6]L':RS[;+,ݐw&ٿ)HE$q5R+?]uos墋.ip[ƌܴx B6&Ld~1=}IL[܄w{a<R!"xЃq,4v(30|pƍnV2n887R:`lڴ . gBs:efK+_bHNv<vk'""믿fM/av&``O9(H2M6qWR^n/\ _C*[p,-5zhFxKayy,$6m䇟؏{}>~8EEL_:5+*DD4|̙ c?8?#<ef^1c#87܀`,Qw#+9sYf)E^rRwゲ2^Y2h[z Zee""ƺ` [U(o( ֭[رc?ss1p!YuM13MN6s3tP:t@j/=n8&Oj)4gnf禇Xr s̱ߟ9s搑^VuT\\+¸qB''_Bkj[@zس7<Lt;*\Y9ו*.q]\9 DD>{b`l鈙*QxJDDb*`ILga%$DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD\vR'큖<(R."G# $WN`Kv@vkى3<ɽDDDDD_2rb_g.ئ-}9#&J0Yw@"p ~׽'SĕA:1}Uk`'_)-ogOIONJǻH] ڟYZ~]8͕,D[Дv\1ǽlDc@tR%.m”Z. k!}IN'r23]LGRVִ5Ǟ{Jx+Y$8_Wo5ʖx Zt!Y]Or1:YTcm_6F pq=\NGYAΥIUc0W;kJD$AFZͲ9MjvI1YHJn@eװH丟ԠCǔGvZCL`dۃ^V"1FW8BD&hJ{Qě3KYQ :{j$7oؐtfw :]1$OIѤסcVF#)z;pݶ([K4hFْ';xQ :Ħ{]0Y_$̎>L9D#/5fÁ"HbOIop[O5ʖspR7s1xp~ Nj"6}[6\LGY)ڲtDijNìe986q5ĜTq+HkښvtD$k#stMkpW?׫fl)<[Dsʺ]Yc}&P"zHP$A%a}Ohj-\veK u 1_:snt]V0mrH&YAaH SjT#*鸽9;u?7ht$J C1-tY>p9p~X_U܆bL%?T 3Xiz};][mZ;Ar0gw5E(g_i)/,_b:"Ge4 ־19b+y"V5:M5Ӭ?v鶭.#Rkϼ%X71w3X 9xny%M/8?M"ohN ,ڳtDjm/0 w.# ͏f) $9E@.DŽGo$ q1˫]$|iLEaI# 8/wsO$xiabC  +CDbk8m5 /r1SŎ|sgq{pux t}f--/+Q/c1oz!;_ɑ/|V´4sE2kw1vp׮v/s+`Nh|߇y+Hk\,Ͻae=X~cYغРC|xq?ՒW~ҽ{ӑ5qj8⿆5#="0h")e+xtDj-NJsE[WeF^|UN])ߘՠC$Si_^ԇW(XH ZǬy}0ݕ?۶Ʃ~ p7 .^Ll/@ye%O-4Ob_l|o*%7ęyَ0t0Dc#f%0sZ5f]U浇URJ`$aL}+ ocfZXi`A$|1XO{hI\ UTdt7#'b,#1\Ĝ˗xL]_Pe2`_~y .^`mYɶO^u1Aَm/1ŒS]hH8=lg-aYq[W" ՘խLq.f Mq8./AbzJ^&/ Wvyl6==]LGbP>Is@=PLMŜ3k6qscbt? X7w2oI\)Ü\LC\f$jfVALbfhr(fHktbnokc㹷| ?Ӄ?ؼor1Z+Q~WV sfwӧjcI a;,~ #Ӏ41̏/+s1Zl{~ƞ>p1/3'~G˛]W} El{ 0V԰ǹI٠930tsL6ΙY>6dI6fJ5hk1ח Or3Aqp\߂! wJV%X3H|Zw x~o_3lRwaj߇)C'4oi5ͨׄ$w dРěk3AŃYR<kgA78ɹ> :.IDATH I=MDD$LXm{)nMhAEL_"YiA%Ma6F=#iߤ)⪼ \DFk Nj#"""""" `UτLS3"""""".ѽ :H|Vb נ[DB IENDB`pairtools-1.0.3/doc/_static/read_pair_UR_criteria.png000066400000000000000000001276171452673171500226770ustar00rootroot00000000000000PNG  IHDRuuXWsBIT|d pHYs.$.$* tEXtSoftwarewww.inkscape.org< IDATxyxTdOB6 " "Ƣ"(j[mnhEڪU[Z*(.("" !=3qrΜ̙Lu23'3PHy~CK.@#>0A_ \0p1$"""qw q0A^ EDD`nE ""D==~ .""" ~o߾\yt҅$%rlЁ$k;!$7x`C|s=wӷl7xOrM<;a +VOЯ1hs7pY$&&2eڵkވӧ :2G0VB{Yiӆ)SpAZӣ Kz7FXW_/ۘbSXXx׬Y '`$ume Ne;O[u3(*v|ǚ'=lz\~xՏ=HHaM/[笟W6 ~?{Nj?uS'[usߧl7h_p!#Fh=I~ իC{^` 99_c[Z3fp5װg. IDDDZ8 """Q/pYddd2tP$Y)@F}kOAVbV{rڲb/Uu>c~볎}­V`OǤߓz|'ۄ[InɱtU|Xum󞷍]_b yҌ|>=G6n9E|MƌsoiY."ϟϨQGLDDDݑ,; uZZ 70ueVcҝ~׊fUw:'f.~ƪ N Y7>;g坿QUͪO_bJߜ4٨Q ~1M1ZƬYb*7 8ӧY."""ri/"""W'ŭ]HN׋3up16:s#7ĆoůZu=̪lnu"1slx!UǤw.}߭.@bV]]RȖ^{҅MslLœ9sHyfάYbmYg-ƒ>h L^roT"""ڋ6ݻ7;&`4|v*. CɌ(-N&0v'mmǷ}Vқ3o%y.&IVd[ubt::kj*w*}?)O>iQD؛;LfϞͰa?{6m֭-CDDDĢ%y"""2vILLtq8".ζڃ17̤9NZv7דɪK moһ"%'n꽡7 t&kl~iG7!٪[ɮ3݈:oN=CfKwm۶̛7U s=C-KL{"##I&9w ,6ײz p&6:bkjOlGFcҝEξ>.5h{g[UBC/ {!J׋,O_NN?0/C?Og$d[u-|n^71Mo?A;]VKh'zA޽ZGS9* EDDZ>iiitͱDz𭭎.ե3h\U@n_]7Bmor:fsζ7M_Xu_kՕ6} VTiiEl{[^DDuh~ѥK}طx lI}\mtS0p`ۛKnty_)-7>ѪK^l{s`lU'an}jx}"rTD/"""[;ۻ98Vo78rBWԵ pM ~l{k3Ug7=*?oվ4 .56xǤmoJZĞ[u&qM|g"OAALEHb~8Dh V Tmt>i!ho{ۅqco{s R+>Mz/xѪA#::>5ql;irss88DDD$z)iݬi;Ouζ 9A=fQwU\ GۛMi~Iڮ`ϦYuBz.j%79߹ *vw=M9BhTfhM` :&gX?ݪ;7ZuBzϻo^CWcҝ1>jڟw{=Iٝzk_]Ѵ7& )(iݬ[n=y",QD V էJ\j&ª+v|ˎ^vc6:ZֿNc%9Uox} ueV֥FL]f7KD Zuui![fLYvTiڋn/oNYrvlufqRQ?$C/#ԖXuJ^)^5FO YumYmt9FX7ucFI{Uo4nr%"""""""`UV9*`.oBlT0c]V]W^8%7e dx6Yΰmʢ V&̪o1gj`F޲eC'"""B{[`Y,XDtf< i۾L\ؠujO\<'nյely҂mtDr""""QLe~b VXXD!ζ7iX`'XvytyU|=[lSMo=xP 诳chL6,\omb|W.w&tmxw}$WUUŋ/h?Fp/"""^DDD^iO>aNana8+9`#`ܯHHϵuS#PWcՙ}N%{hޱߔoڪR2(+?57h?88G$n^&3 7㏻= oc7on?MT/K/Ě5k\K^ |aG"0%QpѯrfqNIw:講zg^Ijzӌǩooӓg^i{>l㋣ewXu]y)ёp뭷RUU0CqM7 JؕCMs]Dv( kE>ذVƏ}*p~uOĎ^b&N¦ Kɹ]0lkP<Bmt>6:{Z_k +_?ao$mO8=_,`h$R: ŋ+Le <xG8(**"""rLڋ;.]7̣>D"&=o#wKIwj *gӛա66KߤfNO~gX33 'ij)^.;>/Of_KF߱}t}/3M?9`0o[L2 0֬d/~:&~SIFEDD0GDDDL/ΟgnfCϰh{oˀYpZu{!wyVc46j_R*]/UW̶xl3mg~ 6Nt.r!{V/hfK/tRp?Xm?gy뮻.~ \pDDDi """,O@{:`恫-=[ee%wq=}C0DDD%Ph/""" Q_~ٳ' J3k֬O $ƍwsǜ9s"4"w9*ر ] ץ] <0|p:uވ({na?p5;D nDDDl)0x= 2' -{͡K!<7$w#""(L=؅9x 4+"""rD4^DDD\ GDD—=n=&a큈} B㐃2W@in@H*8 F nG>8#@i:4 M EDDDDDDDDB{j M uޤ^DDDDDDDD(X`oRp{{#^ґ&uIȑQh/Qh{8ޤ^DDDDDDDDq %k`oRp^؛܋x 1F~ 96iz`8kMCDDDDDDho5ʹf74^DDZ53DZka퉰񈈈N|k ?5E=H w5@b%"A3Ss""r-^ȉV[53c9{`=h Y_iƽHL{qU8{;͸opvq/""-[aƽl"͸h0oXϸ?H`o"QN&܁I w`oRp/""-[ZCpޤ^DD{917؛܋D1H&G'RID#npID# m\pIHRh/ޤD:7)ht4-I`oRp/""n;=1e`oRp/KD؛[IDcnh{{q˱m1&"QFDہI؛܋H4huK,؛܋H5%=G ؛܋D؛;EK`oRp/""nj떖7%7)Hi\-.oޤ^$J(ޤmI9[ZbpI[sl1}3&"Q@UGk`oRp/""떖7g`oRp/""Q܇)7)qB{ hM5ޤ^DD"!--!G`oRp/""-56s`oRp/"-%7ྥ&""Nn>I4H׊>BIKn@bOK <#?Ln--@YDDZH]x#c8_GDDbߗ0!+טi[ G(3Y\$KAj WQ-)7y 1 Ė:5.E{S}p?lGDD$6UA7Gqybb}`G5qRE"O4>Pt{,G+!Τ~PCw """>Fpɷ|8 H XtF5ED$0?}oW1_3KHZhQ8>zܯo/gPXDD$68w{ܻ0~|"""V@J<-RF{"B{ +S`/""Ҹ+X{K{(9r+X{MDS`/""rb5W`/"",ւ{"^"F}(9v+ V{" %܇{^&-=W`/"&q GHi{iZjp^Dڋ+7?"""ͯ ED5ki{iHF}Q`/"">--W`/""r{"r0G{Ӂˀ  JtJߙ&nh@C(+h qCZ| 3n% nK IDATk]4+ ^Jch)w"T*0 Ht{ ǠXP ޥoJ`i%aVH9큈HD{p^DD@+98]Kcn$'m\xmrzboŎo)rU?伮V, Ltǝj{ ;Πv/. N#dؿy1aI֯(k۶MM=?9ZgC~pym۶̝;!C7fRTTC̙c?L<9Z~؋:q2"`%v,Y$ kh1s{ h#G Awr< Xvp^H@#x 큄zKuB;a tO3<mڐ:jT Ք~{/)!;UmhUg<[u%ӭ:>=ܡo}V`xv ImCJZdm! ٻ ^`Κ4j(ƍe64_"&x 8нQ5|f͚5\ÿ/pY`{#ka|i0dpŸg 쥉?v{ 1cVzif]1RIcȱH"ܛ?^Fl7vKWqn$L 1`'Clf{ァ^"NY߶h0%!hL'{\*B+]˷Kϭ:ϩ_]ɮOfYuBzYǟx% IZS7Am<}-mʔ)̞=Z0;*1]fg޼yp * ^/?<%%%'?i;4YBNNqqZ/-KEEu3'$$5Cuu5{ zc\J`ߵk%ٵk}=qvn6~ر#MlG馃`F+h0sF͸qh.BL<իWEMSc:$cMx 8/ofztTWxJx}70|< 0;vҲc>-4h{/w^xf8䝋90i(NJ8kaH82_ӛ3'a#W^?K~OgcsuI!7A/xZo7{]f+W$++9s&^xYaսlKq϶;`wyn͛k1nF0 [`߳gOyN?tFu}Ph?`p']C2q&&Ob1_M@F^y1b_| _ PQadcp{aÈ푸I>{ByK_>h3klxa.ZP]UՏN| qmxT|k3JWϵ}0%_-'L@B‘I_s5ިJ&0 zLRwF%\ IAA|AK.SN9~f[&Z= X?@z!To}XhQv'N䡇_1"X̰{ѣGӷoN!}ŔmoBMMmw2S.c[nI -WW9VՎ>F2a5G^xU:t`̙4~r 8k5J;`,F˙hv&c7nnK0ǭo`ck/ekSu\>Nc0,f\:6]Z-lr:sժfd)Xwp'VK :9 znn0?`ɜx+u c{9e3ָ7*No-馛2eʡKOOW^OXT7܃ó>Kn`\ID|O{v!f]i?[4 9qYeoª ϽʝVTeuS'[u\J `ǨʤwM,=L~Drs4%%xZccE~7o^3Xeټu:߂%c5|׵>R[yhk 8Y~fyζ7%E|e-%K?:-6&1BAcsێvy\U`XjhƌCAA6m2]M7b|%33~ሸxVV u2$L%@FS(Ъlg5i\^_yC/b{F˜Uk7"6~N AFKo^'SCq?֘e3:lsbV;:kؾ+ٹ-NIbxMi=dԸ0ªk׮<?4uA;k*9z!I!]_$XZ+olW@v~Gj<^=&x|)Uw7ħfqTǪmP`]-_ oM ~wv/,_nŒMJ yb&q=s=G>}#8gQapD\ZZdCClRo<4ڶju%VcՉ{9v~zG~}_4Ӟ5RSj;Bf(%]Mu:NH#<W^y8 {2nqM$FBc6@:`Ű7B#r\/1t}Vݦ@R;dmtBKېsV]{ KJwQ^/Ǥ;~@m_w|O`ë9nKi׮v8;{W_͐!C~Ѳ(|xިl$4mtfDZ*G}8ö)j{VfqtR;R6Ҿ5M?4 3.9l{^]/tGim3Mݺu뮳D6q۷G$ZJ#?s-i0~FۮOf9 w)۴[ !3Ϫ*)4퍩x|>^^]r% "shr=n$$$p׺<գ\3΂eiꎣ!9Uזy֓VxڍN?tN֮]8iii<#dedygMi})qcH޿g96vmɗ morom FۛүYuZHڣ"Ӷ/9퀶7>yul{c*[[CK"Y8p IIITUUc!1fٛR?Fpoϸm3(]Z)k۴~uՎ>RZlߺN}&|)Mgf;O[:vC]eNfoZ3scXyI7/,99sF͞7iii 7mo=\z7|cCbj.w1~c90XQ_/Fk(b{jf Tl֪^r#7߲×:ijwUjY?=h|lsklx}ucҝx>k$ N:1f̱޺޽65 X {P`^篮`'ȴ͠(jmo7'9WocڏME޴6 ^($$$0d-n 9. 1 &T>Pd/M&FKz6Z| =^/Ƴe3qP\Oo7֯\:o(:'/?`I @Bzϻ qo^C`=&.`@mGK{n9޽{ӻwOꪫ.+ :c{kW˾bu .Dsc4fKKfUB|;!)BK<34>}/ڟ~9M?}w6&kE#UE[uI{XޘW_}=tV?:DR7`75Z1xgۛ)w `u)^5ת;y@{ۛ8s<Ԗ[uv("GhKw5j[qOζ70zM؄Ɠr3i]wl,_ŇTZuÍK6:~tǤ[3B3: 6'nj.h7bUZ1,m($`sF}tm0iMyFufKL U_w/cqt 7q]/t%zAۛ-.?5t F3'm­M{cҢ{x"W-qv: B dmMg Ӿg=^j'.C3~6f1P9^zo?v km{<? ݹi:PID]pm뭺[ö7ۿ𣗭:gX?ݪulG6W&5аͮoY:^[lza?4 $URR$|-z}z}/?trпmf6i#;BtqƮo5@+@MR*"rrs9}ΡsYW>}4Xd}muNo ^S$d8>HojKpݕXV]]/iYu8z6\ݰG-_RȾ1L~'5z27Oxi`.c&W`jyf_J^kfdGۛn:go}Th]r@ۛB{ۛZ ƤE6̱n`oi Zr#&4}#S[loQ[؀{`ue{#.-_[hcC%ػw1ϱPޱcG7!⎆mo7x|.V_m*̡`Y&m!JKN% BIҏOb`|,|Z3qx|΃;Ʊ,|PW9ޞH8pv"Ě߾}vww=: ;*1^}Xl|y}r +gSehNg_hXo7yڪӺݩ^0It[f?CU666|h*5Ilazrp;ĹalW`̜Omq)jS{ ߄^v'B}`]@f) >dmd'"GgϞ=ҭq4šf۠H[|cfF(cj8ucf{o|"9CBkv|{}dJo~M&chsҍo6:;HYumY1gޝHsr6nkj3V{pѱĒMI!{EJb~ Y֯(\8ͪR IDAT7͋ՎI yt9gڟWoT{ֆh{ܪ7 Q{veγV}{ JѰc-JKܜ^b5KN%{PJ8.K {?PK}U't"Ӷ3u+(\gյxu_."ruDup֜*rX |&B=š#T| g8mC WΦPYءOȫK.f;B˦;^_i^yѻ>{hGmh#"N eC 2D%Fa1ix`&ͶEU+W ?| uK՜6xq:/\*UXu㳵ܹɱwP~gs5|ڗBsx| ,7a6 [F|@"-V !34f򷩳IjGFSlߺ֪=>y.2Wxܡ%f/1zW'$uSI>{m˜g* a#^SV|Db[gp5`,60fax`85'Kr^hBo"t}W:ue?u<ENծ8'``]-e8w$ڴic/hTF,Ÿk:E8{sZi*.vvW^>XSC(oF9qԻW:Ƶ=LuOV$v&shã߭uPm򥿺/?r<=h.AiMj{^x1wnݺA%?,P_]Aq_yC/s6:])46:idxUWJѺOϩT1NkWʱK+VUnՇ;O$η[qnH p9(0f93]՞B~U6tz)?d=C_xڄes/4kgƽ{7??/  M u+JT3t^7!`MF4_7w7`@V׋HjԪ,1:ctaι(Ÿ P~Ny3>ڎx$}gfH&Ol/SO#Qy"6,qpl4b<ƈۄ*3ԯ:ypUZ 'V1"on;ЦoCvwB2mG=pi#ڎ|"ijéߡg0HϷj;/LAuzn0Vwcb7(vDX`oRp{=ߡVz7Nt`{+ #ى] + nUtq5F߁[ q9m,?XuAwª ?:94XPߝX/`ْ%K6OO%qfa7]E"47)W?N\rQ|c=h[IʡmΡKٳضPO>Fgct61` 5.x>[{&:fp_&lݞCgoRcu{O)Yr9/0;]2v~YcoӊfADMfprgk㓕ш^\)M2Nph,}a8~B_-(۷0BZW!fN^W/ț7Hz۳sޣ@W(+ټybs[T˖-c޼yXK;!=.'{Z̊C{9;oӤHwd>/'zu|V (X7zxϒ)$uˆMgSv7s{[1Ap'߱M}lmR'?w_|13gΤ}N6cƌT#TVVZZի֭=M16Y]pnUWctl+1WwtNn؂Ws@^csތz,`͡K%FlW:ASZ\=Hn:ve?#p`&$7)$xJwWo ϹZֺ+MA|c>E7P^1J͈GXch=h; 8X;Y~kCNߑ~1:M$1nĪF_比kw>mO,ؗ-z*-ctJvLVrigU0mb*l/ {,mf˖-׏VbŊ7~-"w4{XpN6VDt%f:8 v2pbsPkg]Bb9kmNѮ;:KJWm_<*"ÀJ1qJL4(ƠYD H5Gu} -{F:w|"mG8_=1F1:Z!>icNctv/.wՋB1:j< 4Wڗc~ 7E fVܻ\43- >r,B Ni mXbٽ`"-ΰ6;V]ٳu ǑfR@|J}Z_6?Jvm`|m}3X"w tU,⬿5g:#.cc~1sL:uW:vmx{G/:(29|/'jx/Z8p~s|<D( MZqvp`os5kČ^¢ț769Fct&L;߰w?ooϺ$pιؽx4D1`|\p̛7/$_D%&&G1bDj* xC0~܌1_ՋN;-lMK)9Hos57`|2;^ |wB {&ﻏ1:HϼLJ*!w=(7kXwߠWY+Sv7cֻ_1fӠkP?|9gZs}Hڵ3c vzόR=˖-zm"T+V 5U 12| ۸43(B`O,|F8fh)ϠqOWNHݛ99~.%Y9S^¦Y [6fܴ_"Oaa!7pyyy>9L>S+22>I| C? 뮻n) &aY؛BK1~HޤԵ)WUɦ /^ys?tf62z\Ӫ 9}G7טg4w#O3i;Q־e)/cz؃}O3>$ϩ>䓈 Xvus5w]G-zWOpY۷| 3beTNF~UhߞuyVwT w뱩Ud ͺ`|-uժJs+QsƌXϧ,_ϊR/R`C휨OfoTfή7avC ]^@Wa1?.7ٷbUkܖv_c<NHI!߰գn~׀7$.i>{N^OZ|#6̛7AQPPt+axj.L:o'-ŋ/HQQ6NՕ[肢#IrNMJȹz޻l*|gծL[h3Q[u1:z B|mSPRf-.Azh2k}0랪Jn>c,X~j?wG8 w1gϞkvmRFbL$q>HlL^|Yvzm6}\j}ػ oe9;f793P|J܉x+9eE7o<2\q )99/$#k~/)Y7G<D`rp+K:u*={t(--塇p!pPLW'c|&L૯+p+'w@:@P3Cۖ1BBj&g>WUcIߑ`{Z_OMkx S2zsYM13аeVzU%z5L7oO XqS .K.aڴi4hB_g͚5CY ƿB5Uv=9 }IhX`oҊSvoٻx Ưqc\8vZa&k贼^6M|ɛ; +@_ٺew?wl2 oM쇞VN\'[onB[nžʾx,jbL}Y FNN{94]Yh֭>TR/SxrYKmoSή9ck|oEuW-MoK/$&OLE c}$ؿ?^z)?0G ֭[`]LW'We}deeDb*Z>paUl#}˦SYY8&? kʪYluѐ,ۈ;Kqϒ]Y+ awQͰIhD'x2'/yk%K0`ӧOQ?~'?S;;}8fo#U+խaKB#F{I! dzvK~7Y紿I<qrVK~i7BMBWX+Zu!c,LowB2mG==ct}\PPݻy u p{<~_RYYɝwyϋ:^?J#%"C/]6meR1s%(W ,ZP***˯a#7|ɣ~R*Ӎ<7۶mc̜9S= 1b 2HVl cFO%+C}+zMڒ >{Ө琀wTkl@í wx@…ד¿qxUALzC4wsʂ 7.=~` F,zE]̙3i֬۶m>|8*F⊕@[orM7S#Xr Sͱ7j 5`$8H)<JpsH>l*֯1Sfpگq1~R5p+ys?xNIѨRͽNE`ط|V!E1FݨY,)on?i;.Bxp'Y+sl9_Cmz9t(`p>*  rwSQQpkz;6`j[O>zڄGRfrpd`/*,R_ar~FWmqSƛ-[K5;9 ('L^b,`=5gݧ8fЮlhځ>"6i(+޾x{Pv !P8-p%$QTQ}nGB[Ɂϻ|ҿIk(X1CѶ'%H|>oj^Q>%bl>8+Q7c%_bÇg6clHWUU7̔)Sxi۶ݝo Qn jG.^/s=Oݛ6mHhxl,gÎ\2OY1_$>;.?w$98 NSp'Buh_]0a|_~>5ܿd7c\y6\.zAILL#9s?!t̛Oj^BhЀTXCݻI ߥ͍Qߧyf-Zt!5j(nOV\i\trCکx !CHI \p+..fƌ1` 쇱V %Ee{`/Shc\9,U,X 0Y 1S1FIJ1vpvs7cQBln4 1 t'p}svd?V0.$G8aH@ oNq{Fq0/`N{ =HEJ`Ƹ p' '_(F]S?XE"Z`L[H h{1FE1?q IDAT~O˜ߝZ{J{^DB&¿'R'DR`or~r*G%+n$H10F,0F$Łk4lj,n /8^A h1{v{I5w̳"åǂc^(VC/sAT~Ay|sH(0BFe"4BLcgtkZeC##ٽ|_)v/8&d F 1^F>Ne1$GX}5" "ޤ^DDԭ u^`Ι'.Pfo%|v>EDD&Z{{DB$Bx +kZAʕ.%ʡjl>௄wDߩ tAPL6:݈H,ޤ^DDh M ExB)YmGy1fڻRK w16ta+WyY""-7)(Z{{1+ x0HqR0ED 16ޮx gh M ED.ޤ^DA35fddW6mڄs6mv!{t--%ϡVr^=Co;!S(Y$$$вeK9u<<툈DhM ED.ޤ^DjKޟ^馛o~CCUJLL(zloV`PU]>;ybqO**n%,\IIO\=x|fz˂~GsfUW]`KСCk_m*:;JD$ԅNrDD$եNrDJ}k OT{Gdf- pXqӭ[}`_>SNrꫯgJؔVDDյ^DD>8؃s+Ր=ࢋ.k׮̇1g/VO!p;W_ ,ON>}n%l:w̍7h?tFx/""@tK'T[}V酿RX*X%| (D 3s)}'a,;믿ޙ"q1}L}[u4'`ʙ3gwޮ];ڵktaxbz:ޕۅsT{ƴ) Ǩ3?G΂M?pz:U:о,IsUM>h؟P̸guܙkךg(#17#W`/"":{S{gq%wS i5?#Aąbƽ8뮳H*0;Tb"9ϸW`/""Q1Ch{癡[gP+1pJˠ,`ވ>x7P+"")T}P ED$\GK`o W`/оycǎc\ײV82/BBSp;zE\\ц|I%7;W`/"":{S0{Ƙ}ah޼sĚV@[Xo*]ct(!==&mk;WD. Vpm)X{ S 57+,nA-F9JԳ5gk0>64n^6tHw}S ؋zBII؛N6W`/@@rrr-Iۀy5Ω9FgbQ CG}ޡ9 =7lp^DDX M'+\n ~ >>Seר֨30Fv>4'J`o:^D c-7op^$2ZNRs|&pys>̜\bQp<P:ՇH49>{ ED$+t^Hs%EEٴ48#$[Xh:ٷoޝ^caG_ EDӱX M ؋H$-T[p^$:1|Wv\7ccYS;1m"0<< -J[9aR uYF؛j ؋H$וT3W`/=oٲŹnbDc΂>V_4Ok GRRH8Xi^BshU3+fp^DD-/K*dXrCĘ<>Y%a+FJ`oRp/d^+""3{K ]GDfPkgW`/=~y㜲co8|Wc<S&J~,68ԊH]`晐#K=.0lQD'x}V_$ژ<@nn..e>^dt[8'młX M %TƎk/ C~N.N"""""K; }FXSQdr؛ `̸YrD؛x,a.x>2wQXXpgvq58F-Z~~Cz+ zCaYesA/p3hs, GM6e˖-$$$8UJLL:i]ˉ_{\E𚭾x_8pZNmu%FepJJ"&-Çל 1cSO9FX]}L0,9@oU e{-_,Pp/""""rܶ4?صk|DL[}V'CW=DC3f;a~iƌta|r&Nh?o؋#>pWQ9G \+5*GDDDD>:<# <ή>)qcl.k=ka+ͣr~ 6l  wcl ѣsЫ/9ؒHT-7ق\q_[`o#""""r 졽x`ΝՍ~o+}QQ'N$8Xk qK0vHx穬䥗b7~7oП|j M+7)9uSxxGQ4iB)LZn #P`(FL8"a2Hw+fcN|嗹+ckZ ""!r)Fo`oҨSs'̀U@y஻W_%---lEh{-m7}ctՕT̟ǝse]وV}as]v#`eW7@-L6+.qeﭷw*)S M EDDDDNўXZ?LHWUqӍR`a!#uN 0] {`9>B FDDM0{S4M EDDDDNL͙v0L S/R)?1YpyM펠Q9㍐.""""K) """'n0x1~z+p.0?\'""""m4GJ}iTA 8.799*'#qLn w(Q`> EDDNN] ؋D. Oɉ^HdSh/!9 EDDNN EDDDD"B{ )Sp/""rrb-W`/""""N7 +{>{PPt/'Id:݋=.to=;\w<Z*Zi/!큽fB`o+`[+EDDNHW`/""""]^nd a+Wܯt{'W+>Zi/A烸p^NPvѺ ׂ^D0C\ 8hD\8Eq@oHlh|.𺠪}[;O_k? ¹J'< $z8N:D,݋H-KHDap_'{S E"N_mӍ[}0R@WdQ8lzdsXw<#!OÉtJ ?*'6U`/Q\2CQ` /WB(}:3g4K/X\Gzx,իǣ>Nشip90ڙDDD$)NH ؋D0F /O\\{ɓ:tKNcϚz|` Ϫs, t“O>y,7Q9:ӳط|ўs"M]2Ū3;]@z[2v>O%ṁ~w/;p'$Ӭߍ7m[̕HkE[WiKV~yrogAIIyh%o"55?opEr~? g.x_NDDDDq$"tTj8*GH `[շ{.%F*09omN{_{ I4yisҚnixΥ$5hn} InԺ=M7:_`^wx*FFX{+(ٹBJxT؞?&Gֲ%)iѢo.F:NM.t꫘xW7>""""^#‚{5DRp^$$9Cjj*ӦM[nv%z+0Vs-,qU+C>w 94k$NΟ)<&}C=K Zz\A| Ο?ʪӚNZ+~`w&v;5VCm^k; VBGYfC/x\߰a_>_5^xs]`r؋Dfӽ{w KyS;3vňrBۖ}ҺМ $R[ugw iwx;C큷uEOj믿su=z4fbܹ{1\W""" KX9+?'{"Ʌ лwox.1>|3hnձ3{~M=O1V'fd"el:uV"K>qiVv#U%t3Kv뭷eˬ17;ُ]vv6ӦM7xwҥ 8ژ8Nq$891*GHĺ`=n:Hty +sKl oFPùkڪ.NeQU'g}f?.0F}FHYQ)Hd)6~ު ̎}`׷Z+%2n^}U8LF5kVN;~ @ 0W`¹^HD-Z0tP'{qֵu0v%\6! }FWU_Ҭߍ)g*1L$U'F)9mhs6-dv>z [PبG<Pg+;gKݵOr`UrǍCo-t+oرcҥnmTO*0xٮDDDI 1a ؟p E"ZO#l}HynkѢ]v_~yڋi]Q9 OQ(G(xgf1x`[Wc`2Wqa#DwEVm9` idߪK7%o#zκܴ~5FSVIjȯ VqĢn^vNs q! I({"Ql󃤤$zd/"!bwa<cੂ%Ka_zX#!~\8a }U^<9kds) iYGeEu{F4Kț;+:.96aVv|'(O$ 28EDDD^"B{A^H԰ wԉ'{1F&)j*|}nHHk`ջN Xܰ;߄;a %y@^C,J}Nј9}GަUoT܍.==Ν;)KRp>D+*ڶm{D7Ʀ`rsj%ܻi**-dߙ2c IDAT4}d8z\aĩ.}nNʡ;ƥ챯wi1:%ly"1rsCDDD^") CT{"QAf͎vH fg;lu 0V&8ת륓}@6,`frOER[YO_ WCrǿCwҪMpӏAz"}2ǩ>DDDy %dp>LN&W`/222C$2] W׸b|> Ӧ:tyؽt@bFC޽sdXΦJ>#/Y;!6#ylV}qI)F;"aÆS}KD:^}Hp^$j%$%%9هHdjklfqÒ/%7hFϵM?֘]BfoXyvø}>61>Jz۳z̮OʡۭƥY2Hj( OjEDDD^"q r<{XUlu:Ъ9k]x\>_zwBRzt^M0) 6eUfI pNѶ5.wO?#R~Q %#W`ﰣ E}]ZzOĮn[=gsmuW{0DBzsuUqhWͶ}hxe>gS->%6W=p[&V'qgX;+fo!iȑSEDD$)WKp>B)W`/8p>D"O!n qQI\Am[YΞ_ZuBFC 8'ThS3K Z_kXuI^.;UgugXu|j&)>2V#?T"""<j #=W`/3l۶h=xu>^ lGR^-ޞS*wI⭺xzd M/۫J?YuJN{%>6~<>O`%;[u=lt͵dzĨc"\KĞA:B0v-`Æ N!Ylu/>h3˨Uvi)e˖C8[NivmδjK|wtZ }0kl *Wشl߰ln6VR _zhzDbŋe~0HQh/Q[}ȑ`ƍd?"o 1n*;̎Z n?M>sy;ہKιC9Jvm`[76:>O6崳}l1&#gymm\ XpH`CHxn>_~T[qE؛9˴ ] CF=~-?GڹVLZtJ>#M0vLHϦU7?{5`x&`%_q`VrНkԯyc6>>Vj߾}L4~h,A\"""R)}$t[ikκO:˴ ]7?6&/̿>.&ܿ[*XK|Ju{Ѷ5Ī3γjoe9'3{$-[ƛouː!C4i\rӭ/Lqux(H8"""R|Hii8d-€{NHݨn>-Zh;qNIH=C0tP+[ ;wo}vZi/"""G&Ơ6lO?+pK"a ,} {SاZ4~P؛yVֺi-:ZaBq>'pE_!g6-'GAV{Ϡ;InCA۾49oxpg}ٳ&8)w1b>`KkDDD$B( \?pW8ەH8I7Sj _2_)>R{3,nkqV]w;@>94՛Ӯoݕ&dݞs؛M_S^b.{\7UTT裏tdc2rH _~iӦѡvDDD$ٴ%P x9rd͐A$67m^`/Cn^AuVݠkco ٷ|U'7lA|g4KѹouGKJCo{8;K$z7Y~S8***7n][oO :n/Cn宇D']ϰ!冻$ImK$I&LlQ~ <xbo1G /۲uLI$i0$I(>^$I? %I.+}$I?K$I)$IR3$IC_ S %Ifh/I$uR_S %Ieh/I$uB)$IRn2$I!{I$)K$I>^$I-$IAz`bp/I$C{I$%O1$Ir$IF S %I^$IJ_{I$o3$IZ>^$I %I$N`bp/I$M$IZ`bp/I$=Ѱ$I”HSG`[؁}K͕Lvg똉 Gex$IR1$IҀD<ݜ: g a)gBB6%I$"H$IS7"9*fTo/9lʉsK!I$:C{I$Eo}9O^$IC{I$)MoاFpo`/I$u$IFOاdpo`/I$C{I$=b`$It %INpˁ}Jw{{I$ka7 I$UgBS%v %Wg`/I$u$Iԁ8%]_#p|7$I8$IR'FDL"M4B ӥ9&3H ꀵ7$I+%IN8" C/ (W3o`/I$X H$I`-LN*#PX>-QFa>.%I$I!IqH@U>3o `"pZ" `}@5K$I:gK$IHӷG q$ZU+qE`+-(8B$IR`h/I$`}G %I1$Iѕ>?] S %I3$IN`}w{I$k %I4=اbp}$It %I=اRpߓ}$Itx %I$z'OɅ7{I$ %I4f`җ S %I1$IҀ>/S %IC݀$I895@glPdġƱ$I\$I8#y:f_>0HIluLI$)K$ih$IwK$I ^$I %I=7$I>C{I$)M %I`h/I$߂{{I$)wK$I/$I[ %I^$I=$IRr57$Ir$ItK$I^$I\ %Ifh/IBEu'KIJusx}74jBy~T+U lS=b-qMtR prMa7!I6C{I_.܋U<{W֌'/ˁ5xy}z8'&:#pR؍?$Il!Ir06V8 ?v;T ɣY:fɏ^]0Xv#(H3?o#I"$I5xrRW7W7'lS؍c4ذew_'I^$u$i |/fر֜=W\qEܥ{KK`^ү*ZcɼumEc~[z5~{P"ƛ8&j+(h &4V7+,&˼nDD Rz~!@ZV@$B H$! V0R"< hس+XqFῃzo[o!s0'cRϟϲe(*ۧݻ_?>ҿWKy^"I3鼿d`WWPFF|æN!!O8C&̤R3}s1XuZ VXI?ZAtPm.-vWn |,U:X7|pJK[{S7/~ ֭[#?Кtڰ`~a^W=o%KpuԮ7[N$%)IrSŢEx 5d (U39F2OTzwQ&o~سUvl>#oC[+z65mlxvx$Ġ3fyģɁ@[LW,e>oNT0vښDS]A`#|`nΜ9\GBrҕ$I$I bΜ9,_Q8KH@PXUO&d@ݫ <5^^N݅H1/WؿksPN-'. [%Rv'Hி BXa1:Euӹ… 7}it'>u9bŊO~*6msyy _[Iq$IRd@^^կ 50KUƀS]ןq_eP:y ֹ?u>fkA=l =) z56e7@iZee7f̲ٻط|_2dTϜz7O]w?S_+IK)t^{x,$d~f5`TyBkMm"1&_蓾dv[o(-hg/RfEP[p9C&YGMbY#秞3{lpw6l ݒ%K4P7}]woزeKjӏI񦴒$ `Ǒ$Ie Ho׉si ̴ *U̸ϲO9-sM{coN8aAPSA[O~Nt`"̆.cC|w2z4꩗ Ѽ梛naÆza]r%,[l@ 7ܐi6$Ih^$gu3ghM&||Q j*yC9MḆ7gQcoٽ:m}I?^^Nrh/v[#/n؛6u1fDy;F{|Ͱ[s/\j[$I!X$IRg榊+R?H贺XVO'V Uӳ'zឌ7ڎYMu5A=/9P}~&$O{̂)pLn%clo75Ƙ43Wsrƀ_έޚt -@x]I09G$d qg؊&2oY,j~ZcYihR}oʩ~%o W:Tʵ zAH}A`)*aecL㾪zDjhg^M3z+L/ƹ$Ix %IR{1y0{G2\[yO>Moɱ72JgfKhn 2f~挟/Qv!t؛aDhu^p&8;i`;w.%%w>5^$IR %IR{̚5+>pՓɌ]-x~coFz'VPi GfE38ƻ51e߾Q>'IF7o^&C{I0C{IԞ ћ0aB}Hḗ AZS}u,?gujMJ3痿Ӽ/SHZ_J*~:8㔌zo\_f#IS2ue(_\'eh F9 0ӗvl$fT$s$= a!eW)pFZ>͚%iv*`wQQ׾y -x%N$Ȉ| ģ16OC9cD51oM l{2O5De!@᫭M/@AJ~^$gAA.Ȝes0;~ yWa85k6NǭwǍ}J; Mu{xqf2yM u"Vr#Hă} zs̭΁`x&.^eת|N@V]]^%K^$<`.R.+O?nR ufwqt`yz߉/O"-t/2Ӄ⭕4)HQQ }=ؿo|D"LoeN=L\t-#M4c,ΩH/C$^$gk-[C iCz,ٴz=vz\}y0s@6y)c͘MZ ??cMZOvȤٌQPZ {_B𯃺j8;i`yf)>$IR %IR{6[.>pDiu`59F.3;0>eĬ34ltP~a Wϰ[ $OmX͌%7Ԕџg㨋:cΆ~f >/#cC[zuz`$IRgh/IzAee%7n )Gsgmiu)4*=Db1FŠn޿X3jy Zz`NQ] 2ӂ701e ictet?t?fÆ %I %IR{^%֛O xl2ۑ2s]5:5F疖֧>eR{{$$|i9܎u:<W=)s+(t:o=EPcT{ҙ s?`ϓW7ԔџsE쏺)_"7u'\H4M6;4yyyqD"=y;L/&y lI4@K'w裏%)`6W 8XR? |t 3Pۺ}[1dL"(c_GoȘS/ W}4;Hҙ Z,۞c?})@БL\*}[ֲ{z@?k׆J_*~;P /+d|*΃$Iy%$IׯI~D"mr;RH %iu9vbŴ .W>E zKI[o;d§gԱ"s6'g~q]F63O5A3/٧{H477zq?q 7$$I$I:* H~ۑB482~+!9F p8wR5c_L$'|1󆲥Otl-4VMgWug#ϔWP7`#gnưho~jn=\2}-ڑ$I}$IkD"}B6k8x "cN`.mMkzа`N=@Go˺D1Chm5[c{DsɎs6Nꏞ5wnSO~ݻ7]k3̝wɗ%{;vկ~5}S9O!#IC{Iԑ@0bժU\s5mH)vK@z]i j>D3nWX3jb%9bC& ~KG3ourNK'KB+e?Sشizkǁ{I~*݀uٲe,]_mm-\rI7N 8KJ$KC;Ңw\uUԄؒ X2gGq 2<9_>>J[hW 4;=ITn#[~P__*NԆ˗s󪫫9y7ϐZ$I}LK$I< 0i$z.]ʰaBkLꪇz?G+s7k,׼Km^7y m?< , nKk!Ԟ)J`rj… x}3`bv_u@S*&:i $&zSKK$Ia-Ll 1d^da *3"P=^-I$3 %IN꫁}$I %IN}$I %ICȕ>^$I]$IRr-O1$Ir$ItاK$I^$IjG)$IRn1$I/}$I; %I4-O1$Ir$IԢ)$IRgh/I${I$o3$IҀ7P{I$2$IҀ6{I$o݀$I8y'x&>0 fgx$IR.2$IҀrs J`2 v5h"{cI$I8$IfXXo1*'w$IR2$I/)K$Igh/I$> %Ich/I$> %Igh/I$}.)=K$I]ch/I$;}.) %I3$I+}SK$Ich/I$upاNpo`/I$u$It c`ҙ^$I$IR't>^$I9$IR'>^$IY$IaHR`K$I$IBƮNܜ?zFK$I k9 IENDB`pairtools-1.0.3/doc/_static/read_pair_WW.png000066400000000000000000001004401452673171500210050ustar00rootroot00000000000000PNG  IHDR(G%sBIT|d pHYs.$.$* tEXtSoftwarewww.inkscape.org< IDATxw|SUǿIwK  D*8QQpx{{ (n=q#dSfJKMܓ$'IÓ$\n==&V6M T-ZFc"4e9[ V-0EFCՋB4Fh4Fhh4Fh4Fl|Yfc< 8aR8c޿1.,-%cLJvAk\/|:gxP ?<{Q$6+󏏍ݰ3ix|V}}`CO#Jm?mICFvIj[4ⶃN&iJ_s0'|izUYT Fq<~ELYAh&EHZRՋpZyBs:NA=Eg.Zc<4I3+]w+y?&t:*>w+\tȤөW+RQ==t5Xۄ'Wծ+Mb 񽖢l1 -]H?fi,.`{FMP lDĶM0wF@[0M|?I1}"BBxm*ٵZش L /"`0 -j-=:5 }&IN]Į0Gy`xղafĉOO+Rq k\&J@(؜Úo6G59%-Z$V4B8FLkM`{i)==S02߰(+e7h-*+"偾@>-0 1lzsj-;8yrczem%@ܺr+3es?i &P#E88Uy=E=e vVϼ2CKٓ)ag7P+EKJ&'p8X͔n_'J&'p8X-nZ,!d]B''< = {w. OZ; sl,v;/rk!Hr l&!JZ@Ohv[+_]FǓ=e:!GU3`B"c5e:!|^Ūj-2ШX5Z_C͆) \`#İz 5o*LM-׭w#ϯ\Β{hTvbrٽSq:>-*pIM`طWX,d^:EgVj m ˚n s:^=~=sr?])ђ4v2^⻫ɔsG@lX9{CQ+=Oth( 9Peoy9w-JZ|Fp%b'6_h -g{)޸T+=/{d5>6߿u Xȼ StW!qwE^bp|CK%C_%6~8+1F9V?e/qh;;f/LFNNV/tb8Je{^6ⴑnJh4uC'><-k)Q^Us-># DT{W]&ZKdu1IĻ_xJa7AnB#r;ek1wL׳oSlּv8ď7.c ީFˮMzZv:qw+Rw gN5ԋ>Dˡ cR3>CZ6d3 :_U89W ؿa1>xЈSh9u%=תѲ9oaĭ{%>Jf[K-YGh)ݑ+L10}-MCHAaWK)ђuEE# JjDiY|Hﰕ3m2UK᫤ʀiS*)GmRɞ2 K]KX9m 2r,̰Cy,%gdH;<-Y_s-rr]E}^W5,Wݛ(KemQ-j1\SF8\o*[ <_EoFWQ2'a-QySJ(m%zNzVeδ~aDi+-b ړ5IAF$%sYf9/]m QyE>ػkR^ڿa1N3Ar#0ch99qǑzx%ZJeOq^G x(ݾ=n =q(ruk}忾w?z&i^:mjCA)e:^k6ܴtF(ҢQN#!y[%%[VȾ%{P'=%=qv[ֱ5eT-y\7>mCU32ùu!4&^W`ɣPuB7)8X dV5pR 3;:h)m.CW&1r`.zFPClc:gu{;"Lld,|EA|I wZ!dMzN|e| ;-V.=Wrd;-z^{aU aX,dN|(ٺ0/aԅ_MP(+^#>PegY).j-㹹->elVbEx@=^=Zާk{Rļf\l4qz/vϢ㫤ܓ0-mֽ+ZE9{w:kM fk7wi\KLZO__VLM_<Flv;w/h6:4N z{ͮ2!Cib‡T`)ypْy4{;Ji3MP@$i,9+DW} OrA`VV|-}ZB"cȞ2ŏIUlZnxK^Sgb++h93MHHD4S36~UU4lFDW*Uܹo +Ľk\x8 9HZ|\̮gc6d%?\35e?qr̳ɣ&_zƍ=a`%톝E1r+os&*_|?bٳ;5ZCˌxwMŮ,ݻ :α7x~2Tkw D8%Z|f81A-.qp"-#&->br~'giIDUp&_AKZg'*ҕK42Z."Gy$w& lE~-u7O,[b Xx`PhyrRVV{!GВ<8H6i-&<`],;GmbX=ϵC -m|MػC7n6~.g&kd~i4w|l':GOh&-x̤$3cS; sd`jFZ~3zCO7FGv˂[ _D:S̙j\n@S2m?Ɏ?>ޣuQt}2v-/NqyF|mOTSWц\dWefh;aӀ i%􅮚2VRiSW\%$+_4j mEL4ӽvUy 9Ӧ(1t]U^iSJh94`)9&+3hjcmQ!/VGrqu{+*ⱥDvLTg؁8D+^vWa.w]IB"p8X-nS p+{-(\rhX#^WciG:uś)ђ~굴x{)XWg_ߢ|bўLs1<1#(2\^rC :2^[ϰ/!Z~dHZ4ADT-XJ'}F_ɖլ{v5ZȚtG._8Zڒ=%ݶlᓡ@KXLkL.vmb+W\F㎻eV{`>6_Q ZKZL+|->y#fNh KY9m˟!2EقJ˟&ZiS|FC 7?*lEo!C+-e6eJL(7hƍUh48^FqRߌi4pp)"5܎hR|%0ܗ׳3܎zh4 .mp4~C&ruqcY{0?6W?'"{jwυ CFw>_^hZ4|[*=PčxBܧ"!ZDg-0H"&yMz"p=ֵcTYko#ѩ8{<:mf2k8L͕r+ ^Aki  M?s9@m3?Jb-fĠ>f@Cj@dZZ#uE}j4^$ xN)z[SC "+ᣦ&CkĩulkAd:Ǥ?h QF>܋6#1hZ.ϑijCC?{k t l^n /"sk v@bB4-(l͏QipdM j4Ѱ$`+2zS!+J3pb-(XeaEĽBM$p:ic!r_6 }!gu-8D}ԥ 9Fmψ@j'4!Mx} a?b忝 YLŖIx1,54dr$ES iyb-icub-Ay=5Cc{{Ws1"P#~=Ո nU4&"Gk f] jz!.®ο} 3Mll^ x3Q,+p75=ty+Te442> ?{ IDAT74DD$ [!WR%y K"pޫ%b!kY(UǛ'#-k38jx+PMx |j3V 3 j{X@&'9Y`|>X&HC_v*ĽcۯaƎQ_'?K0z#n~E;o0 xa/ ٛƛ<&6}#Ev :4#NmbЇ@?X0~;+0 olLFSk.Z{6ԧ??OT sTߎ<}HRO[m 8z!&[w k}*h$Δ x.m"{qᜨz{Jς~.BuuZ4E";t1Q"?pt~}-E}M7`]tRCqSM@z 5}%M5C{$1QZK>ۄ!&WV84W< PUM*~]眸~Q4M9)<S}4~IB4~KkĪjpU%PQDWqf"# >@ogގL} 8+>СyvVo%\Y8uQOQSڏy̽7]xEC _oKqz"| 2<MfG]ԜzfoJaFm!wQs8D)^w_/ Muѐֿa%юݩs?0ڗsbj4N40@brl~<&"KNv>C1fɭ1lj55qm_b-NxH{ ?V ̙n_Rp5aX :pl`k":983T xPd]jw^qhT:"S1w"n#ow9=?T玱[{&&b?nP-D5O -C{P@yAji;1*VLeU c\'.U%ك. Fz!~#۩yދk4Ƃ4:[>q#>Q錦ӐSEև.߿x# z@{ Y2IUh7 'nRES;=+1*~PF,Gڇ?`W":TXa}>TDJw; BfF~XKb}ZVZ##q/Z5kbmBP-ٙď%t $0EG(֢ Rq>?K:&{XKc菸tW!##JF+Snlmy6R&ʹa,B/T D!ggSgˁ홊Z4k̩Ɋx;iyE&>uJJS89 f`F{t承p?Ex!"(uHw+ <ܝ &,.S'M P TzvGGcX<63Ư6X"SႡ}N?Dөlr\9^-[hXK 7&xQH}׿F9^CJlWyc0F.ULEN8o-*)fx]i8UBZ(a>#J/B=PG`ϝs)N*ghh?[׻M943!aq{EԞ%4撀9irG[~]AZs$fτg aʨYNF ,LyOU^8"L![ ݁0Do v8 r?quC{Mab-31B.jiM@Utmv]D"LET>GXC9 g"i4`Ie o3 eW@Ǣ=Գ{[^X }0'i04wU!d@j4D' vyBe$-+.3UMp75'%* B&P8~}+=ƸyYO'T簵_9ѷ)X<O˪wQt 2l5Op8pc&F.907B΃Gt\s{pjB\SJ dvGhZ2nFWa6|":9oE6 O@Ll{"2I5_Z]rb-d͈ںth4r4`YD9971ja=VDHiVj @=0h4Krb-,@dyRo47Ka6M#s(:')@r豵x(bĽ.4TAIYc`b-&Qr1obV/h|kHWd)My̵?E<'WÁwOr>7#[B-Eق/Q[i4- uC*P]c* E:nG_.Mi{;sX_?ZJs/vj3TCs"ƻi8Yr" -Dghd{w"b96)&hp`'" .13sDK_b^yB7kU\G [iuqZM?jvq g!&F iMx艸0A\l|Cc b@;+ne1x8*Ԣ+|?5G"@1VZD:e OwB8k b,DK"" }E d (Q~[i7'K!@vGLbrl+@.NPj :Q0_ c>b=z9uc%4mG,VDM0dlv4t|ˀ3 ~F F<z_a,OPw聘l@p)j4M착R!ɼ`1R!́~o&(iy.FlؐJ*0a l 3gp8\_*M#A| ,,/ӱcGBCr5xHf8yוjh \s ~R ?8gRhϨQX~=ְ1hw NXwBGVw\> XGb 3/{iwCO :H>:tG}Dv3`x).6١,w5ɉs=iӦlqqg8&f֟ɛo5:rTRzgV,`atR}bb**HLLkWOo^'z_鋧yU6z\IC]Ē$ >?f<_U^‚I $Eδ)Y}{TI˺w6Osi#/^YWgۥygϞ=·DdKeWDQFgG/2guƍc~cB>`2a:.}ߦ_~~cR-Xqe,_FE@F\۴E{vM 291cY9[6ڈTV~:*Ÿ[\<#9تOJJ }QOxx'M&}z_,y|,P=2x9ީ0;b*?kKWwɜ&YT?oiKvjws,}l,E]ow97W*'dvؑ~n7[os3d·O@Kv}q\'Nd̙XV-=AFP6v;3V46Է?1#5&'n۟mMeRh|l3ӈ24xa'ze:$2g߼HE.#zɉ ƛ&'lFN5^ZCetGLN98OkLj2't}'x"p>*|Wj38׏szd6MRŃxo- 䎝L׮ U,wQ689Fŝ}[1kkV̮ᰳo0msNƹw&' W7&'R>49a+-"eQc8lX!t{oyꩧl Q hD ә;w.Ty Νˈ#XҸHD@@ 59Pf_˖c@ަɉE|%߈OбF <6|HUXw)qQIN:xlw>~zӈz#q+;<`ġ񤻏w|;M%$`<#nt{bbҤIK"bWnݺ믿䄓$fϞZUaAq5ꍈcƌJ'' 3k,Iir%<|)ň&gLFa݋d}qʑ۹W ̆Nt8~=r?zMuY!doe%[VqlY6\hu$mh7SChh(?8G(jaL#惀(%|w %͸qox-: ȤS(/"III8'/1;!!\mukr~uvMxKeWY֯3ޭpKKqDreKj}G%uqxgaۼw(ٺƈOJX+YT}=~yLjAFGqhL駪i?f͢З^b3dܹtPwA ӥK^|E.蠚(+ 8=CǎyWD`ϔÖ|n=HLFIu]u23f*g8:[T >S?8S9r$s%}Ʉ x7l6H<|gN( ¸T?W8O>Yw'.I;ή2>ȕ7}$r\44RrʫVz39}|9}r ,.M`g/K2k[~2c/ CJȟGv1>4 #;Mo vl0.gИf~3u$%%qwr-8G}V)̝;y ._~~4d'Kǧ4$U9Fy j4F*㘩2AbT1כ>cӗԝʸ'S*c/!}J?3~7 В&'@ tc:٭]Vm6^^-ǒcftlWm{\KUcHc>_m)*Kx'c]ޥ?R#p%DH-;ENqf:u*ݻ׸%MN8yG\6xg IDAT00a]Nྦ)crh4GTZL)k2K7j2^_#q/qbi#v*X0bT`2歷"??}@BB&MRZJ믿{:&8ѵkWNFb\;,#N?ֽ݂VɆe'D:nlISKn_h ΛoɯZ .Lϑ ΰaØ? ~p}0aB7%MIe^2j4NTF7SJT^؀TMCTƮaz=7G\6mpꩧ駟:=A/XNm+-FܷM"Ǻt?;<n7-XURѣ'.Jpܷ&Zk4*͔5VRH/qtjwR6_s?xOh]ϾQsw9?F9##Nt 7Æ cذaex." #7X@wq7TFeҔTNc̙ 2.þrjňS[u&ef~3&\`:Qq#0FYǦ煕ˍV7wĚB#436?fcY!&]ͥϯ\fuzSh4@CL)7~y3.SfÞ߳/Ǩ'jw6ԋvGm~1a 9 ; b8+׏߫񩌝u*F$X2r36~b#wwP2j#FϲT`c, T&=u6#>-3⤏ہJf'Ŵ.ג)ftW_\~o ۍ]Ɉ L9M`S)eu}#n;p2bl=|Rx2湚45%WRo62dqS1y_L9 a1JOOwS NZ(xm* + m ol-)㍹FK]bB[YL\x80n}1[F%?P)9Rgw9VB"[qe3#N22S^Qf.hJGOKRXd,ƻ3ߒoJЭ)qgY)b[TF&ٵ+S*cQL;r)\NdR'{l]!1zo2mn\gTFOhdݞc< {i)n㝁mδK9+Gjjش z2GtcSq1_I#ڥط*J3erutLJ#V$96q@ö;L5Bv/ƈc;"x@k+P*++7:}۴5eC{([EpFvqF[ntN F\bgi'U%lhj5凗M';Y.xSJƄqP0HS+xgJSy |ϟfh|oI˄EWF{,Lۤ8wULXԴȳ{ &oZ$ I0e-p&~#ḍX:6:bb!ݳ_\( S]SQh4$4*gde256]ޅ5T **r?|1Iiy ڍ#.2(X>ψ-!dwWG4PTd*|G͈AޤtpVm{y_L>+`8l޼}Yrf{{i gՇp%[S{htOvMrr͡YlkI !k{wL w1u:{mMvrT]2re-Fln;+x-CEQIL >dZPI9#x/(ٲڈCIweo>#cXB`m]Lt{;TCn@u>r҉;b5qNa|F7#^Vy۶h$ m;⍲OHd|w>[nz܎5L CS>V齍8oTmQRr1LοЂCm)xh׵hX΢ZJ_si-ZbUhDIrSܧM"wn#g!1t>eANƹwT>||K_zbUn*ٷ l525J Jv\h<͂!B+˕eX3e6W0ms[)+Q^k4@9;<#rv_@ELLD.55ٳ;525[j䘩I >hZh<Ŋ+ҽ =A/!ŋ iH뱩sa.} "$+]2rw1kl>DV';[\P_kY ligt>zx'tF v &(6%%%̟αW6&~9e.%*4wQU?IX"B@DEADB}Cj[u[åR֥P @YdAd Ⱦ&3wL2ܙz3<,9sssgr$Gg*TZ1ͳ>n>p } ] ^ nÿQ\?ЮL=8u8W/32k<ņM$>`NS%&L(؊fmNu-yY_iwg6?a2¾p6&nIK4K_{^ 29 kaY雑YW>h!KAU`lڴ P30 1b;w{NiÔM>y HQ4+Yx4'"'OɁfʑ@fEgаln?.2+2kaɻPm&C^gۂ<3q+f臁@ziʰѯѰծ@2駟Xn]'AZ8s#|>Æ Fa!*&79~U\ӜOW,Axz4dV4Y^-[1g}髽Eed2r{$}> x}BpV[ZȗkV&..Z2+D,}QiG4|spY~/}9x3㝃NG;Ʃ&.V Ͼ*Y,}]7?@pkv/!ƶl>1N?|Wuq7d f/=k׎wde-O-%;v0nZ̊4SqTs7/3$^U39?-3_cƾI+ s}:E3 }F>pu yCؽdjr :]CcLU4ſghrײ9l%Yg^f^ -n |>sⳗ8k{raÆOO}b 5j+WF:W<1ŋ4i]v$g]S\xxVv /گ4fQYX4gULXk̊uʩ<5$o9-ٵCZ\lacgD`}l2$Pk͗92I3Ηعl6CHKa =՟V>yg]+>|ۜȬHfs#G:nݺ'PF C|>Kp#vu[h\V=p h]4 ǭs/ Z;Ml ^/hZ=g~|4R%S#/;0ab䳮 eoG{~2zjr'TxD$9p9`Gw=dVa__itՁvOٹlv^_H]m#kǽzshgYn/b'zo}:xwBz2Drxi||cְ;,tw.iڌ_$QQ]:jhs׬vsmK h""d.Z9r@x ue?@)8K_ md]Hk+fuwדhhg)..:kzRxb^u# 0@Jx9KۄYuÖ/e=1իsKϘukT-G癁vzL.`&֎q__ 8xón`گ^ԾHw.8?GǾb|u?Пy|A|I< Te˖C]+HByfw]-gYp8_ xcw'-Zc-Y~cVwbz\֬y=ize "YpڎuCҡO;:]C5_ `[0,}hZ>-G_U*!C0{lqDO2nݺs 'B~_s}=A1 4 3< Ksm!3h{^'iHr:GR@{Y[eyuO<;^7MoV'w{V/$'HKㄛ4#V}+w.>zu"Y̚5v7 5@W^y{7%')nʵ^˞=9pʞHTm+Vpuكfk7k)ukƘs) EE8ɽ䕐 "^fkNז7%8)Oz\H_~v E-.[ӧO/fR)qw2w\"#'?N~8xЛ1Gړ7n-vtY R+ȊоJ˨߳v:縫"-{#q=}^O2kլ9mg6ȡao9N <^|fx`&֌~x{uB|ɡ;8oR"{Bw2D8=NǬY ԙh>x wu=(Q,Y/T}yǸˏh ;ޤAf&6j\5EELܰŤGM:u]L/Lї5jЩaײ!^|E? "gÆ 4iލ5+0J1b={M4udTtvm;d$NEEE|GIAf˳,qnׯ29n!оqM/LF5jx,ݹ;]UF+m%{|W3xnk^(.{,kW&k;4zG/f5ФV-r?>X"o*#K6mXhQ'&￟W^y0JX~=[}ΛHGcC5ȧv&La~w}̜9ބYr,H_5t֭(۷od5j ӿ^qqqJ]6UDw^ :5T,N:I=AQXXntLȞWHjh#VٖG"'jժEjɿբ߮](*J^:5k,8 [ tfyQDusdرcGh/%(}-.pFD'""^ S"Q&ewcݐQ,#>fLFc R<S' ܁LFܺNX-]m e,s-{ciͻ9|g@`u+DTm\ " 0\%4,K_K$Wc8 yYYR j`2RaB| &+y") /L1f F$"""uD"""xshT1w 08DDD茹+LR\m8doxHua xm6f`SMDDD$r;^ 8Y*EDDv]Kuc( ?0"""Nј"N """"^kMHHTn]fɇ UW~)2/Uy I(b?O]q|]G ` 1n_<8H9/P1Eg H28 ?V鵫1ۘO IDATg0v{HnxD(>xTj@WB&"Au0fc>C`jS8{D|H0ҋ:1 +0""|:(: 6E#R p5ID$Y}7rρQcƣ(￴@`жsED$u:R< b1Km]DII=05=EDDʯ Xu 39Q@*X20SCHߺ:ɡAHٽL-[Rn<|J7oEEl^Sg~N#336m{ǚ5kزeKhh"dggӰaCVMEƍٰa14lؐOcİ|rvY &d] |Lp8m۶MϚ۷bŊD+ x`q_*U|}k֬Hrss}Nj_ zm(ƬgL4/`Yo^^}U_5Bߣ/xw-&ֿߕW^%~iC> >ܮ`ٿ*رc}͛775WwNq|t!")L=i`{.>L]v^`uxHTO3|p'' x 8ĸ tԮ]/.]D*IT^?tܙ._~y`*0ʻ$ 'b~fTR7xoۨDQ]Fw'q:p-pL$J*ٳ'*ӦMs`V l.*k15 @>}]r%̛7.7x>7=c8F1qDMNWC*+ׅ?cJ9 \m= 'G2~BYvv6}:a {- 7D:u3fLJLNڵkǸqB]($)Ե@FFHye{os~MVj1CHRax饗HK T8MԽ scM Gz/i@w#4eO>9T:c̘30b$׋kBz!|MVz;M[=P?N^z;:Ðb} 8[nuGC?q7{H :K|4}Bwr07v?a6M)>^z1tP뷘+:l4^}U>Cu2bڷoog;? nv.8+RI \ |of4L SBJ֊DFC\"3:twjkV W!gJv qHLdddбuh)?D+` gY8%"1ZWK`ԤI/U ,k4h f?n`t9WcnRI*YYYWqK`LT~XWJaÆv3+23='ZI@ծ{)`jEB |-xH#Z*R$HdvOF)"t=wd殁 j½DDF͚5]M@ULD<o~M\`I W"NtBJ8J3"9;zsUbdcݫ8ĥ9xHCŘGJn\hQr$"I!tEΝ;]M΁C" l>,xt`ݳ~$"~Vv` A9Z Eƍ_eP$M΁C"-3NNu&"oÆ vs[:.gժUqID8јmL>NJCDЮ]XԞGbիCIi*D$Νk79~rz~ᇸ$"K7¥2βJeIjgϦصOwΕZ̟?8D*}.LLG؇%"qA͛gw}LPzg̘נD*=2T:_܎CV"??sE"w2)9wŞ05D$)M6-zLPLuz,X.VDbI"cذav8Cr7-|>ƍ+t0ʪˀV{ 0jD$fj77W7c6I| <};>GG>gy={x 999s=Ub*~-+V&F:Wn0מs뭷zHepe! h:3qI6lX,V<u::p#F |* G%###QT&Ѥ2Ɲ2J :'x0\,XIO>u=vsW`'Nd…z$*ُT?a]&qKRڵkݻ7:/qq1xJ 4] 3sf>8&RIEh/S? h$%߿~yF osOhȔ5m4+kPQ8{^ꫯzHedOZbC]<qIӧOBMN̙3K.m۶yJ\ݻ]3\sc0EZ>ZXbΜ9tڕǓaHקO6olwR| bC֯_+`&Wr6n:K=8o 3?RXXSvo߾[yCs@bzw}Wb`վ8jŌqKR vcnn.*clq}0a= +vFIX{. |4}z衘(RD8jC\{10xw3I6jȐ!ՋzUkvE‘i,Yݻk׮R.tfmւ+̶9-q0l0f̘aw=`. ?^&v̙3ի=I>'\U_+M7֭[cHRV*$`վ -Rcv0DkbĈxFY"~ix{ ՞Ht0N>Ν;'Lw5w6S@8%))L-N=ۑ%Kڵk}tޝݻwK*0|p&NȭJ׮]iذaʦ)כGMCVE0`5%!ׄR_pٲe̙3'R<#vXw" }SO>뮻zðիy>}]\Oȇ$bL`Ro;v_W_M&M4HDa5xJw~Œy[aPu?| ~!W?jfG07oXl]twߥv-{n~m bgN x?ҊKt” ADpfׁP&g3r4f.:{丄6I^9""6<;1`WubZ tBh;^I&I^>asr^/n!Aa; fc,'*"zo{DbV&: xxHMץ<njza TVDD0.M*&cBR kgЪ@#Z9oDuSKRW6f%IIdUf^QA֡""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""4H!=!}M"jZ]B~ {H*-P-?@qC)1/yF$:~GkF$"K)3q|U4"J"DDDDDDDD4A!"""""""xN"""""""9{X:DqM#^ٵ,E ( pXu'g ϢFDDD\6f3(K7 iw^`9Ms}>#e ?|e L: :HhfS!ʷ;߭kf[oXc|'Bbx7e]39kDv-⑜rp5 &Q\8{!'E\uC5E@(kZGqM<,Vt N\xR~T-G ]DDD$bQ\sfu9Y㟀M\V;w&H$Xe&3iBDDbi j#?,MEpiڔ}GŎӉf IiS" """gl>=sLp^,cGqMN5n ?u`mF#zׁN vG0EDDbٓ LG3kC.esYE0 "z]1$-MPHY|w *us1Cuw@9bY*"""c/KugXhEu(^csC'sæ:'`;ýfRZu|6p/0\JCGu(v\WEADDDbpPt"XžoJϹwan(c<)}9 J97hBDDbm*աH#Xb:f#6j9'mU n/ Gu2 """gg4D;0"1HlJ>_CP8廀VSg=}y<&gߩGQ1$MPTpp}"rJ yqncv;tr`: ١v{6eO\1\&IœڞȑHli"}m֡w185:Y39ʈ~<9KN'@IwxEbG`?a/!'3whO|k? N"Qۡ""MXfY?Pػe?agw ;Vi1֡jo!O&(*R/H)"5L!s::i :N욘mAyQErpglmC_$P~&TeP<؞ȵM uTJ?#=W.,ު V԰u(N!r G:`&:L|L"0uP0bpb?1+Dy(Rq4O yֱ]ޱ+\]"79=w1sm=.q,VP IEG3"S@uz%<8{YwFvYV_&S÷ϼ;DbG`סxJ4"gA87%Nԡh)6c2:n dvM&.?p|N8A&[/0u(/ED΂Me!yCP@]8`֓BɕÉAHgHh\6Kv.fih-QvdgSb7}1 <;Cfb2}Di.pU "): jot$4gfUu(/Sɘ; 1ո}38 fʈ$4UUB72caE doGqȑd-oKȳδkjEc2p\!tiG)XBDD 30>`ub]BwC0snp^wCDDD$1Lz+0`HV]`=#ֵ1KB}u1IԥpÌADDD"@:[}SE+B q]׍)G """[?'Fq{s80ch!QQ\;cpaƐ4TBDDNgt>){a+ϯ,C!fҬV!1Hlm #?K[Z=`D_± w`96?f IC""oŵIE*k7gaR>C^9b !L61|M ןP̊ ҷq!} -e o\~M"Uv(`&>`N9b)av="rdNdm7jČʲ}X!x ̋LQoDŽr$mIDAT |-dDJŁbKcT( }WϘ#(ZdPnhB8&0)ݗywzoUYL\i\-%hRL¼G'pXZi}Ga؃Gz|i+"n;z^8|;䧯dDz$zN}B}bQDz]2[`5 MuSZnZzCi6ZKYa~c M51'uJAC>˿iP 8 8GVXD}1]'-05Fe{a?2Rv 8ۃۗ[ k+㡲B,jMZ|C?TL5[MZu/]C-F{- "CK8YڗfRm]N'_mpk3Z8f۵49 AQ U(qbӠ7a֚/RZ\ZEMgykՊ3#8R@pD4}f%8T+)S%<>3#$-/]M񮭦'}&AG,<*p(0L1{-:!5#{\0-:iAdd-Z޽*;(91!/>~ !*QM<ĞKM-f0o$5R,@=5陁Jug~~8PG?.EMר%M> Zg/Ex1Z}ϟ3/zv >c؉'Gc'jѲgOd|a'?&Ѳg6/zܰGP]Y|U*=_uǢ F.] R.سnFe mKaۏvʘk=l-]Hovh;;@bo5|^s;LWfNLVs__ ISi?,-Z PewWQ.5h)vUqTH Z|C0h^BJ-vF-R*9cKCr Z夳 -17Ѳ}_&@R'ܦEKLֽ8gEyNS,ֽxE>t?O#wTTe|QXIK"sP3A@UZ%0Dz]J;֪*@ BVϝNY 9 tZ҂=Zkڳ\KEi1kA~%4 g%(4OsPrJA:Peu,: xɘNTܡI7({hQZG,UE["ۥǵ\^Z QVjIeLk,zϘۨY#Pt5Psd_vCpVk)ڡ{⏯]o JC?^JGm(߸˞&B޺jL+fAQ@ޣR˦J; /G@Zzy5~{~\] +x=^T)κ^lznǩ/= yx?)|,/,Ds=9BbWT_[Su/Kn׀ɽrwq/#5iyUœњwoX~R{zѫۨZ}U˾~~.e? f';6Z&]ԛOb׊/ ۔i٥-[afnVq/s':6Zr-_e%ic-u P+-v \ ؝1oWI,㹋RMZZco Pú 'Bj=~©ufϛoV2[orڊ?f}rwqkܝݛaV[!mKLɣV['*zE8zk jP]ʄ23գU3{zPQWM4ձm-=km?Lc_?d|a?ǞEkXav OѢ0{>fmzG ђƦr{!ċh꠸5rMZE]:c\-7|j;j"h3m߿e4\Qc 7mK6R|Mm}.|{;b wP_D%vOQVWA8H{XҤe(5i ;;ӌ@iY={qsY"0Zb~#Z:M!' 2ұ'mzE-cZD%{0޻wm^GKa$?}e-K]I @>{rս L/[Kco1=_`u, URPk;UoRJ;RK|M9%S+R8H5 MTBґ{6GL~w{}?od 8iNZ\]VC|E ;+>>ڷ;o?YƕKꠈ-t8(~v۴e&1@u#NE˶B+euty-]W̮ɣgT4/&BP*Z|5-A;8x!DrEOوUuP*x9TTJ_O8Ы^Rʄ@c8tkzd篇2"[[aWEKQN:+kv8HM|`u ZT%sb79rNj;tNƺK1z~G҉h36ٳ]ՓxyM>jKBqfEOAǑ0 8s/K5f-K%|Bs‰j49*qET|C&jE/`&-YΉJ8 {ϚbK;4Oђԃ2H-cGwЕZ7Iz"wų-OrPX[xoyu s4MZ|C$I*]NդE[sQ|YQκ6}M):jpSa>2Nr[G _ctӖ+up.C{w%S-CnfKjD^1b.ÌըE[ K f6|ƧOkzyI#.2-K:jOIv/:j'٢k~詸D[tmwo*z.7Z*r쀋[ <01t@O]WZhi7ҡEkB#l+K f oWƲ-1('u\IK[zOCPHҲ}AXCԝT)n-&:'\j.Ee R TC38iBLK$m'S3RZZp'W<7L}v-S/ZA"9^nZ֡nMEwWĵ򆯙*r Zhh[.$h1.gh;~_L7j:Mɮ_˵|‹Z$u9/|{ҟ=`jk[/yAD0SXJU`-Ajv*ծ?*6њMIgz&B[ U3E7kLDݙ{q& [CZ{sGUjb WjCDp +fzAPp=A 4h E='U*k;~ޓ T&]ZاHNF ;{ػł=JǓ. ;}UfZ$;N4,|ע3I8~ao^<ӢE*y՛Bzb4=XIo^^1]2vE1Sj^]t@; {__e\-ZZtCW}CƧҩ7X&'ؓDfi!=j/&VftfurRzhMY^ʍ锋 _i)ǽ FѠBu9)/.0=!-Jžş@y~8DKoT1-euGham3Z,a@䶛vMx.-]jt5*qM2{S, FQGP1s *QQvãqxmMWK`߷TZ(3Cj)-Ԣ%,GHX-g{JOڌ6fi%jŸە`şE]}T~\TşMWɛn(Emw/E(Ίr l%:ԫǿ_i9P ]Bt".AAh[ ƢPBb*ZOezOZ"^7w!fn`6*;Q8x\_C51$Ե8F~-Th*Uw<^:*#Q-a5n-^!bx^݂Iګw9 ġjM !Գkwv*@<p :5#BP?븖 vvF9+XE]܄ƱXYKsrsYWA8G5d.<]7P7iA˹h'* ]WǺyѬEBjq+P9^B8PNA Ѩ ;3Q7cW) >bĒjs]j>t{Cݠe6]|pToq6Qbnzmi6 .Զ5 W>.'b4FO#@ u7R@8xnQ #Ujk-4vi!c%`%zIp|lijVE&R]a&㰧U^_xP;=QG+'Η>o5~uMgY{E|ER P/nxf-M0S5k dFcgiw/TTɃCtN wҰjPAqϷ8C=|…} <GhEB%p*UӵA(,DDɸՒ>¼'YXZ]ſ]"E8` ~Ө:@= Np1%ݴx 6/B=*Aݜ >cżvޣYK2 ԬEL:&cKk53wlwÜm:Pa0 ַ$ЄHB5qj:u  :PxAQHdz~G]Cӑ ǼOuլEL^>Bu*9؀fߧbx#w$֠lԸ [ ,\[Tw]¸8[A7P$e~ 6.|ָBcn<4奨B5aAwbprwEV' ؉* oE@yѪHC̎3p0:|Ҭ3|@ՏߨWMʦF^7ƏQBUBQ0ɇ?x6㫚uǁh%PѨHtT=3z^ƼK5[zZa)J.:?P-_9zLL6>[5Z%E%R@(Q?=fqѿPm:"x0TWTHFMujWk<ⴰ5*A7N1|ZZj۸^E#&(dUZ&pl3AaVEM~ԱN+ǯp9QQo^h_{n' Ql 4}3+P|k]z4Iݟ%NAT=!ժ(0).+oHtެ@r/q4ɥ!9{Hx6(cR\ԱU#Lk}WhU8Et\3wiU?܃yYKS*PM}ռT oIZjUYOs5oPmn!Bi LG5gp'5SE" 2gZ.zhҔVX:-D(\I[)/ԊU:CRZ% 'Vz4ϥ^L 3bL\ Ρ%L~xh֖ I^WVEBS#RE+"Tr)ALB19H3R߿HBoGZ B9VEBS"ܑvmi \:T4Z6u fGҶ_^lT-A0m?mu󼩮Z[o%<} B}gU[D`=꼙[H3p 4vk)0! ;ʻe}}SP3Y?($ϯĠZMT69Ƃ Btu<WOID^uZYKA+੭EhA(Ѩ:JMkTa5dsa)fn'c͹XcKi n'& Xp:$h^7w80͝4U:Z7SuT 'A"{9_tsZA/k#IsTAhJ\9.(3"-+0hU:FE Tx{oᓵ*ob~4k'a1p\NrT  3Ղ T "\YKSd*h  `&A 0Yo(ggcPzi⯄êyo0#GG_ A<``'}i6C} us$twAn-B04kݨ+KGAkF]܁yS VZ{LTDrA/p|&+9:E_*E`f-SY`h U]>kL8O@  |+G 'Q[4k$#~%n'Q[' i:W859~FE5=)j+xCը@#*r`+f-:* a=MOG_lp{?(@ TaΦ@[T@MSu\-u} ?AW('s [Y [(W B#^?8Lt&??]v/8( O[FDDGxxsM`Z'4Z".呛k<R15 8ĵ]vhBFRZZ֭[oCMLU;qVb<ZpI'q-0tP@HWdggdح{ !E:?WqKqVF˔jgYɞPV:xmY)Qa1t#ظQo7y@pD41]ef8{*EDu,g xbFݠ yOn]t%&95q*!J޷z+w>E pW;*r=ʄ5@jj*sƍ#2R 0Ú5kJNtbz|: ?WœtZu _xwdލp`AD|'Zv>(ݿٳg7uѷo_JK EX39Z0|p/^Ltt.w|rMʕ+j\ffwp-駟f#$VչY*:6Z5y;\ CΤөx-qY ;*/ΰ嬸 :xcI(S32G]ElTӊ :Kxg]BF Ts;)L4;SN9իW: G:Ή .g}A$y:㻖N>\f_s iJFs [wIGP=/m,J}3z& 3߳gvkA٪;ߏLd?uгgOM3s4cP էFׂO>>( Gq˖-cԩۮŝPcRW} D,VPP/3f >%?m;0{w8f9Q^\fvD|gF\dFRQ;I=6~|̵r6Dю {Ӱ:ؾ|Kpp0fͲ.j ܮI;Ps 焕$kwvݎt./7E .W^ >EOvd|gG\h['\P}t(\gɣ9' s nXsYQN{O=$5G1_K wum"xgaE7T^kȑ#YhQ@8'\3aZ#(ǻk /[AYAڻwSuV &e̵U|Le"u-y떱kp$;2½d,zܰ#%8<>޻v:_'ȑ#3C9Sű(x(kSM:0|LII { ` K;AK/I-A|Ndz9ӰN0[ ڰCtR.+KOvTB*,WAw}lT@;U߈o5R_?֧1-s̘1̟??D W_eժU_޵TK#pƯս{w^_iA0y21ӯ՞(ڹo^5= mtV6߼~D"SF$@;q?xD'[4%}QK@Y`\y啺xsr 7 D|ZOPWN _&$=#Vblhh0l6qKټIx[mr7eoQVGƧf骨$7q_L#W\qsaƍEOV3oyʖumu2N=Cj!tt@ et箻uֺeM60!s`5Jg27&;xgOvTB7cۆmJ[lg}E?hsP3OMc3vX [?7|YEyVgL(pAUt-:uj_B e<Ѱeڶ:CW}k eL~v2Zgҥl߮Ill,-s9wv-:?J8!CЯ_?ZAh8+IFh{ESZ`IUxm]a' 6)ݷ-Nt^t8l>6sMJ32l[]}@pp0Cmvut¨QXpk9g,:,+PFkFY)i?`ء-c|hvavae4k˜8q"s-& S%T* xH=d:Yvʘkm6y$1,+% ;eR764|N=>j؝Nv)b~DRRIIIe4{;<;p8Gpki۶m%R#1euDՄ21i-*'I`P,6#FeUASVǖO5쨄$?ٶN{"uo^h&2zB"[k٬HZ0;_WCӖNNo79-[Z߿?wT ejPƉ}r(_y״SPXX{֩EfȦf;gflחYawq1vllaԛG#{NPh8]ƻ98|][ Y7ѢLGa]t(EkIϞ=A%߽PƢ}]PF~3Ao]t.-v=A_ 7㻆v)}a2N#|7-|?>evǓ/%"a E,1xmpP$H.  -XR&t'a:("nS4xHmͿy=3)ڱٰy}e59?oG!M)^_x(miDx+̶ x- Z%.اAh2莽 {Q/װ#v" ;o2rζ //)ddzmwy9!ѭ ;mN6Eml[= AyyyVs. cCi^EHT+- 7w ػ(/(pY A/`My,:)g\MXT06}'p[d76qan@ozFRgY)iq!մSо}j[O$Ih Yڻw,+5C{0vڢ g4사lR'iϿ~҈lu6-|DA5Av hmmb~|vpD h{öԳ (DtV"QCtRX$œQ Hza] A4999VsOM >m۶MA!Q1$ְvduRϾ6Io$;C {mȣp)mo]a;BB:ތ(ݷ-F AHxAal|nH@qh|ag|<; ;23/0u dURViH>>s>~ev£ IDATrP+WtlxHm$7ɖyNdO0SAUv{*Hʘkj;>C ;(Io7Yba?$B3'<6'^ld% } R^\@Ƣ'lHuabNﳥ9'Qi>D"k 0'J xj*kZW)F~پ}{m x`Ͱˋ^[+yL{{w&vO} jEh32pFvx=f/_n5k(vZ},KN瑗h @at;/tGD2t0B7:V{*[hdӰ3>yVS(BQcY{7~YFdExlO-> @~ v)GRpzp:)/.`G8䂇}tyۗ}hC5sq V%q[NhSfѢE["##>}:aaab}_̟?YfY ͉xg35jxщxgr6O~dwOC*dv%ۖCSHRWyʑJ}N4eVZŒ%K^GL2N:s~mX Ap;(OFff&;v@A9gƂb6-|SM~3ؾJrUǡ pãIHٹr~вK?2d Bׯ>gE9I{ϨTbʔ)b[xٳִ;w`=:>"Y:~HI(~;$u3r~^@HRz-q`:G[T0μ%%?mK͚]c|ETTN B%-`3k ްKTMYڎZSA>pXlY"v-rM7s`ɒ%7W y衇#2XRnz) / $e{˶uw[x{g֞׉[osTÇ[ϸ;pQ<>r-|'Xz rMGPIڻӸ16Ȱ.yYަO԰W2 RRȦf4tyavlE;˗/w k|u =z9Ow2˗s"B3qt;ܰ,kD•jP b킶 RQjqnsmb9?KA9Mp-XGxhKȖ-[:t(כ]W%KpB9XER.b8 / ik Aܿ_/\ uPb6/|ԶNN#".ɰ7} %pLׁ;bao_6 8KFn6_/ɵ^k._70rHC|AK勶}t%Y%43i٥aolm梩`~'GPQs: ~N8 U8쳭T9B`*طoÇgZE ۢϰtsR_KYYYYqnԃBwmEHT { Pi%liަ|gO[ATBWoYMJGJ"%s ŻR2#TdOetkՎ=.Y^FaZxG;~_l|ɓ'ӣS_())aܸqeLjT*9|w?H<ҥK5jw.[lFGݻŋ3p@ 2gvTbG}?i3u "/Xt̽'USeAhz|;hi?࣍/jTG_|"k;@g_H;NKmX |d$gpVA!r[fp)S8,e M;IR$٘v mWR@=![%j\8s$paHځ7{Bs wBZPVT\\LD$~vckp"0|FǝCeر 8S#=oj 'pCM5u3f`Is p .~_VB-k/N:"+V!B D3qD DuunJyGSaȑkғ`ѢE_-S??uK,i?~ѕt)99wuWu7l'?}P2KʶG'ѣG3gvu׮!Hm4٨0l[c{ko5vjuM<ӧꫯ&]Wwmg.Bfg3f ˖-cҤI?Fڶ3Z)spBdAr9%g.a·J7^]lԩ7i(hpbv⮻*Duu5/8Z'/͠O>̘1. T8'K,:x+0C7$ګ1|_dڴiL."_^T|h58Q4h7n۶m0 ~ 4*?Ñ 'ec T!dS-*`!7zv6l>W\qEu00 -weƏbH]믿e˖rʤ{ au!{bpTҸK8rX^f)UcIO l%\B(u@UU?O7n\Rqj2rH^cCRcƌnr0'7Io|)#Ӂ S_x*ȱ=˹8Bj._V2KҌ'6`fϞG%lp wqIn+@t!X*SG t˻>,>:7[8ø~p"qNזUjNL^z)UUқAdgD< < 0`SLI;fjjj?~N$&Mħ>ۄM/&~_fw.ZRũ"0 e >Da2.^*/W^y%-b֭iw]QujjjMr4*%_!:S Ls1{BjN}UуKF=z4#GOS%7*Æ cĈŎOL' D@Ȥ";`xԞCT̤IYU>V,jfgR E N}*җy008,=cTjx 2` a*vꩧQd*!C~T9N25v")q HR9 P4Lr!ŌIڵބՉG.3vظهXj::ӌC=dOTRF.dWY$I)pTqFrD5082dHQD4?j/ JԐ],j%ⵤԣGw߸kpK~ӌCLN}*>9hJZMV Jm9 3f.Aiš,U zdvT :TMXܧI>R訽gIxSA$P χ2sdOJ%*MuK;JJɄDn(Ľd/%d+nli?$u^vtLWk~ xÒrχqϦMKy sW[(!YR)Y:e޺7X$gR:h3%{/n=kx;VJ6Bb?}${H dVH*)k׮j4y>$l}LaLGu+$۵q믿^Ԁ$5x+jگE^CRIYreܬki?UICRJ^DOQP83QC()$+q_,vLN|8*g۽kK*IoviŢ,M?χ4#SF`lٲdtSϓ#IEw;Ox=LeJZ·1@!)MS7r]̏ڹKJ*)|۶+d(&=/_TrS?󸩌RYٺu+rKuwZRHԕ}ywVk6H%3f$0ꫯ.ntR%ZF멌a*:͛93Yn]ڡ0tPZv(E5k,y睸kFKrڎ8ҍH ބDnm> M/GI; z5\{v(EUWWwgݣ4$J} ছn.cv*nR%1QE6m4nw,W_};b>o⮗Vv/8@SO0nܸCܭ~2qaʫ=SN흋䩧gk{2qUWiSz 4ް]l^ӧ/:h6Q;w?aa4`>O~C)3g|d {=ДrWT^G} 3jLTt ^{WL7|k6 xgrK.O>Ç+Nrʨ"KYvm+i_Z&,\O7Zb2 Խ\| py1z芛,uQ$``Ԯ-B\*K7o /N6?(z|୷b„ ,XC-_iӦ1k֬7SI:g75/OHQ/=uTFuɓ'K8mpV'K2qD}ݖ*A {ygS Im{FҨ^z)Ő2ԝDߓYW=!uдix! _~t1a„nSij3zL<9z 8?pLҘ?>cǎ' @5QE2g͛wM#{4C8m*sqQWW^Th͚5q\wuqk)ܘ4^}U`=avz! [l-a4`ҤIq0wܕjEDӢ*ܤL2W;۰aկr +6[jB$)-MfJ;.&p"_M;IR\A vځt-?7{G+#6nyAMIRx80bm&L:hʱt{iV W*=$ilN%gZA73c}Lځt p紴C!RY>!URL[JYaЍ N;NPO2PZK·\Lʯ'F̦3{iQ)?D'y?W0$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$IRwWvek9}/?0pg H$I=wPHa$paN$u[l|PT?vm_T!$IRo6yHi2a@5Ti@$I%`X?಴*$Iv00< 4R6W=p6ewT$IRO)X75)~PRDTC9?*6 $u6n ]1{G퇁w0pb্gC x'Gōqp27.y)`=U~ H{/P墇%h_`XBXj%[s ֵ5FaKts 1!P_T#R.'3+"xܴ\?̉!:fnHĿif'' n7 :+>qrRUv{4?vwزgLIRW%ܙh19na 1&@ko BnT9g*'Rq% GOÑ*A2z^@&r pX}l7nƴ\;ȉacmO^R 8>HTb:Xi}ǟE=T$ueQ;73!.ۿk8n<ϴ-Մpл1ď'pJc"S ~R)~}wfIT$uvmƃ?u#Έ'd _'i^TK*mf %GUN' L0s=`Ñ*0>P$+n$ŭzxpm0P$g`ih+-$Gz.!LJQoBQdɇ'}HrBՖ{1+? aQd5"#v:]bt ՗LEd2P IWճ+a}R ;]HeI] IRW+(2/-OO&b3p}VO\G)Äh;d.+⤱^7cNI1I$8F۵-\$Ok"HJSo{ѣSR8 aaF"2ɬq,PZ!a L|ϵxq{3, p&QRwICgZ[R18@y'/zmա Mu(9.oa+1,$3՗u(=`B{,6Ek1Hċ&K5J bU ;ӍFR(:<}^@I#,h>U"^#h68b3a:FKz4?x =.o%| >s,"n!= =(p$<.Ij?_aa]XXFqaZ?/֞+7%['TIvij;Rwq05P]\%WN<K 4ʐJ!ŀ\o"{=6n֡ho88c"~9ǭ'ScBcA*kz \J,R~EXWnewIEY^E2Β(JQ\'=:R2+fĒ)5_8YX@ ȮCqdvo2'$~{` Uߒm IԝgyP_iLzc:zeΓ{׋'8ţO" -gAlmGjo|%,O_I1l$L3i H RX ܓ,R)ZW(SrHt)q tz`9}7_m8NX%i`"=/ףC2Iԩ{uq 5pR?}{]@ Rwdyupv2crg{}aϞo8vch?R ?[Z |q2J{^c:{eegۻ@=s@%i #r^ԽmnFF}xWFO$F}3 BFk·Z>T 3($ImăB%@u0_:ɛ ޠPH$)sfϢkO?~z̺$\@9&ՈUI5"I*/E2>ZtRqH\fT$ImXvRO;9ţVmJ# 4 0- "w)i"Uc#&qIENDB`pairtools-1.0.3/doc/_static/report-orientation.svg000077500000000000000000000473761452673171500223460ustar00rootroot00000000000000 report-orientation read { walk { pair { default for both parse --walks-policy all and parse2 junction { --report-orientation pairtools-1.0.3/doc/_static/report-positions.svg000077500000000000000000000732751452673171500220370ustar00rootroot00000000000000 report-positions * * * * * * read { * * * * * * walk { * * * * * * outer { default for parse --walks-policy all * * * * * * junction { default for parse2 --report-position pairtools-1.0.3/doc/_static/rescue_modes.svg000066400000000000000000000512541452673171500211420ustar00rootroot00000000000000 rescue_modes 3 UU 2 UU UU all mask 5any, 5unique --walks-policy walk_pair_index 1 R2 R1-2 walk_pair_type R1 UU 3any, 3unique UU WW ! ! { pairtools-1.0.3/doc/_static/rescue_modes_readthrough.svg000066400000000000000000000600761452673171500235400ustar00rootroot00000000000000 Group 2 all mask 5any, 5unique --walks-policy UU UU UU UU 3any, 3unique UU WW ! ! { 3 2 walk_pair_index 1 R2 R1&2 walk_pair_type R1 pairtools-1.0.3/doc/_static/terminology.png000066400000000000000000001007201452673171500210130ustar00rootroot00000000000000PNG  IHDR(sBIT|d pHYs.$.$* tEXtSoftwarewww.inkscape.org< IDATxuUHx qu!aq]` / _'5JB L2q;3]=3ԓNUi:u=DDD$Ӿ|i8LLEjiOg)i_ ˠR%DDDD}@> .\`8pP ~Òuizv? 6f =E kX5=)zɜ6 l]]0:wk50 uWs)|AXVw;`Y mGy8յ=EHi<,)#)<'<"iMeiql <l>DzlLRH|OhHK)6M$eDZ60`Ez̹w8 (%xtMhl6IUx {1MN1a-&;a _&aQ$y?a _.rd8Eb-| 3w/θiFhL7xNrUUDD$[N1x ;9u3pkϗnB&qXqX2kjG{Udn lݵD$ MbwX՝X@,LU s%"0 Kbb:7\/vݷxZ )os yWR& [E"""֮Rd'_b-.dmWa'y ű=-+ޥ++)ǵ1JjqI8ߗEw=X"vdOްmb*ϰoV+öJISl x`1NX6bT:EL4q|۵] E UqkMX+@#]&_ 9)fVa$3BDD*=#Ǧx.Mpq'.\A '"QUZ>u{607  6FrxBOBǞ^` )QDD$]'no&'_i'N 6c@$pn]]xJ9XAd6 m&ـLnu  ZaHl`c',}mNvƦޘuEJsMX6ԋ]7(D$3UpY|%aZR>NbDJhHK'_Yê_$ I2Ů}q\G̒ʀ( -$iMO2α.l}#q]zEk 1cUڮUUDD$܅aI_Im` ߇XpW"x'M$דj^ZH ,|/ %;̶79u]ؼGĔ`ㄎĺv 8O9 wj}]uUu{MqO 6׾1lV5څ2+30x#X%Ve5S%EDD$T]`8%6a}lV $9g޲G Đ&鏡ve})>Vb qMh%0;)k؂uŝU6 뮚JߕcAD,<Ϳ^d8Ǫ \Z퉧x{.֋W,zk}l9Bō5p6> ?>й&cУX""""""""""""""""""""""""""""""""""""""""""rCDD*u l7<`Sn|¸DcS kMS |p;0 `+`+MM1mccK|Md/nx:Ӱ9/EDD8&\6OG*^8K7s9Di2دy)v]=Hck%w D6N>dc;`u~a'pd_2TDDDDKc'XcXM~`wK^Ijo׽:p8l @m6D-v*"""Uѽ@1 vE> aÁDkChͦ Ķ Kja-L]wdZGGYtw."c_d .8He 聝xŪOTENGw+cc TCld)w(> Ǘ~w;64Xx$NGm%;#=,"""IWU l haZ7LBmWн:{@rƐĊ?l;;^K:EQ={ݓh\}Nsmك8ILBD4MߋNF8Heq :SU[u;o8fǰRb*aZ?Z& 6BmNs޿<$urVNy3PU v_>P]ԘWd&L-֝+j.9p7lD&9h _`Ķ8¾kIs*iHl\⑃ʕ Z$r\y3Z8Hp%Z]bM5>ȴ7`x|JS/{b*cX+)p6 VhϢDJ/aT% ?cJ 5"m[\ە`]K^&/^D,'w vxdb-@gcpzP̥XumE*ee,`2;WJ ͫXOa)kF/M^S|@uާב>WіT^bL8$;J&]k9ʄ;%C'=u6\?T(VT# yx<_?%rL\` p6=Ǫ-5q>7?/N XG[y8\zdp*^Ŧ8̵*Zo`i6_PwOB:@Nu_NTm y |._J%KiwU?__[}CvSXqc1+X?Sr{k׼L}x˼^Ϛcus4yPweW]UE$.Z^NknoO񱫪\GyE7@Kn4XnןXn<;wf<kkEXÛ5}ҽACJ}>2U^һ_B=S&b6Oba &ϺۛXc1'I#'I#@3,i\ix4`˝4G;u/gX aQ6@Gg2-󽰮~qwSaEPKX K~&o™v" *VHg.| \ %5Us`k{^uYK L26㽍7Ggh)l?-sЖp.c_Y;u6qGF3)gBRZ56( tʠ|2իc),yy7hh0bn4!hTϵXְkQ[yn=Ф7Nf*{,zΧ*.Mgه[|6-M,s~d{כ=VHvr6ⷜ4͆@$- h&͑y㐊D,{?`Q@>6.4sd {b l; k8Ʀ!Jbq0~e0%g-%TG9m8-vŵ9}N :]Bgm}`TbӽL,fK}IwUuajn4>UwrY֭?cٮϓX&_5XnӖ'{%5oU?}n"}d}7-3ZACuU,RsMbth7)s">p"wn^5r1e{v?:GFn~kJ7mӶ'nxjh*7vݴ>=,53KqZ;µnݕ~7E^ZqLLXW" \-RٌNssEt5@ҸKJwъUeNB;ax;b'CV@w8ρ@KlLZ88 KBIc1&O_]'D*9[sn~)tX܃}%{t+BHa-bwz9l6a[.b< s,n`xѸ/R+[.NA޷/#LԴt\ฺLؤt\l2YȓX4j̥ݝY}>_1xKFիxmAEUiϮ}Y0i^=F=IҸhG_LkBK$i,-їju=IKK1vz\6ړtw1=*4תO˞LYX^*#pl:Xw˷+)M~翽ku|K0Z;.*X}X+sX5VGb1=qFxYX46u`-Nr$ªf]G!۩Jw^/6c4ɾG=5n.>?ye_¸L,:枼EЇ]; &A`U:qkwnitW?8.۷  <<׭Iឳte^a!_GfNCz9Sn'=O̚Azσ=t'̢S pd /\ǎ+cToco`yXz RS)Y_mˬ]Nn/yZz˂WogN,&xl]?ɥۅfvZ2i Ǻo#NkaؘA4NǙ  DMNHZ/_Ǧ_KqAh҈qFaAN8 yDJZhtO.+A8-c.ђi 0Pm36 op?㴄v>UJnNw DI\\gԳb9{n]{p_ ak aYU泧B М~;DՒ^{9ftI,+/k~|3<+@WX'\CއxK4jq`am}7a]_Ǿ/b'z7 K4a=, 8v IDAT51y NmܗwА/C&=r7,+hI9 `%9ֿؘ&X@VW2v~54Kw>y",Z=5JJذ <:lxxrwƢ"n48( \W >`je4y;h~5{~@ d])[qh⌫w$Vt.U Ou0U]-+@CCO!x_#9bh74mЛXMe;^I,(qd7S DRg1eXa8Lg1F?_;):I6`2c!s.ڒǝ\1{EsF` 2մp+RyOZ7}v@:uO?b S֯s5/SS?O(32Kundgm3WҢf-no O yy| j^ȵ`B9/a//!zx43"UMBǷDNo~2RVx%y:鷧Iٵ~yH,uWzm\ŜgXɾ>g4Ųy G__vntCዂ`pf?y9P]vÞGLbRb󹟏3@"ݍ؉ oc쏛{|+HqXf[e8.Z:c]zHw\!V&[=+NT 莽O`Xx l?KtnԋU%8G)5mY[tG/xPŋB*͎ޓMI?]w,_е;[yvcxV1HH ꎿqCq+EG_'eĭ# fϟ譿R}w5>y0:d|cgeK?xzz2S(?5?`OP~44<¹Yu0[287HHuwI|z"\c '`s=M{DZ0O)E& Т6e +TG`-c[< [Xf*oKpD?GnJ$5yZt.8M[N)3cFϞ\?>ѩ'ܴfMoъ:I,7miOZT*4O\ܴyT*pw'm_#1?7QgSg~甜7jon˝̴fN,9С75={&=ϥHGwWWI { WWjp\]c~Opm;Obim:t}p}X RS:rSp}XGj`ZbU"M{^> :NK'9qIbj@r=VЛMߣD}OhqIl'DID8f>Z\︼G/*d,]\[z˧˗^9Uy:G+Bgh]rX6"٫?Ө%c>펻& pdObKi:pDObis44"h}l;ޓXZv>-8%qַ1%R\wPuom$I$swLBl+ؕ[YS˱|TGɈzLx/=دkul,kng_ ȶ4 -@sGE_͠RU&i _X4i%)@N..|PZR'/"spO]0pWRɛXpG+/My<[!tl!.GcW"uY Nx'ca%݀3\'pXvNy s*= p;#qG/|`v.:EZ/e躿vx1JIhŒ=hiYw֒,6EZ*#1תO?=K^],ڸ9\4SpOdVNឭUp~ᤢ;x`dĒ؜{_KbU7acjcn-@+,9U%c.] 898Ϳ$n:Ͽ/t_ӱ֮ΡX+xNyGeSQ?(6$t|~a^%уX2U%a-*&9)g-X"wķ@qt*cn%"}֣iu箆>Zh _ܭm$>nwi)͝Ê6Iͼ|.ѓF52?1̝m8]#/Q{Ҵ'0Wl~Z,jf>ReB,\ҽ=4. ml~n.wA牔:1"JRvsL<&R%Xg imp&IƲd+>{ډ4?TObپb> >Hհǁ~F˯,X;VM,+/2VWcΛȪ_ xe~ D依1Nľ|݉%p{&F|9Xx1=%.$Qi0N6!t, O##V"ك^ys0zDZT5$pNP\r.]'t.Olr8΅5k$O#bnpޗRlxBS/q fӹ3+"P.(IoXފ]8L*Jh֧D.H6#mZwj#^/BbZ`$`VHf$8ޗ)o۱EHvjMoK$]a'bS]KyEDDLU&;!/y ^ęT$4J wشYSlGj"""iw?6@cA*?S\E26;0&I^-{ZTrmQXahM*@%-MX H3q>{3nhRq&}+<|>o."""ZyIi4RR=JWXB¥H,y5N@]EDDB^ iъTٲԉp8ӻL.8hnE* JشG"闇0Kώݱ[H 6DW,a'ߣ7U-vCnW80ȅm6c]S{dZ=`,e/4"VXPyt2(J_^% iE)3bH$PG;ZQM1A,췥A 9ؼ9 ] 39sHEp $o ӈk9ؼ}[q>NDTZw|Vc]Z%sj#8őƼH-pQiDXqg;TKE.1#NmˀDxgщHuC܃0<Hհ?r1pIeq*TJ*D'QH3؈s-ӈ$SK} hi^:J'ȭˁ'QIX5 ކ#t0pAHs"*&{ǿrDDD$C |jBk9+ƺI_|P8p9lj?bcEDDD<*>4`3VHCKrӈ8xD'JEDD$KnmRB + fﱊ%X^("""Rp]q,߉Z%{ƒEw8ˠҠ8{@%9""""5 STV*;܊ kd~B1T`YA肎TpNx=H"i|>bJKŒB&l\JTcp.3JGH 8 *N4"q;X,*4T$- mMxmj.MMv$NBV>6NE؉ix7^%RNq>;Fda-`߻Rwdhe@U2a-6HePPUBp0ca e}o^'O+,ɜWp^DDƺfJmlDN`]jOE]PEDDD*MR) 9XR_a7H:=O@?Hq1}6J)ɱKUgߛ$u)68_)"""R)MׁTB3D$KIwcا)5Ēj)WDDDD\bW%5r+>`#(P KbKºwA-D H8%jweP"Y3?BJ"ـ񊈈H)XKT`=Ήvq _cSjdB[6b{Kd)'c."ݷDNz$";rqEN4"Q8 x8&_uM}8>$t:VDDDDn$ De௄heP"Y#p% |mk\[CcEDDD*/8'Q|y=c''"UAp 0Na|T(38'%XI,q^Q zbŠ9Y\p8T,"""řG+:<މ*=JR^މnFKtv#8""""RN pN}<(cTf5o.}/21fEDDD$I ӈR5pAdPs-` 6bk`c3ZP.p PHhDNtj$DDjU$m 4V Z!8To_ G$mߪL"]\nL_ƥ8p46Ie0wG 8HEV]$TAD f'`VuIBUb`.""م@%""XL:Nޠr%`==it8NQ x8Mgv5DZHr™sKcfa] w"v_)0XGxehui &R yD~ u_$.;; \ao+؅W!{"]WgMık9(qu^RU)ʗ85;Q۰5&>lDYc3j}Ú,f[ /,d|p|ԩV-j v{f-X9X vur`!MY97P+?mۓ~Ɯ͛~]`u-*/D`AѧX/dĦku%UAyع`o"JXBӿ>^z$NxͻPsQc)𬇱TE$3wyGU5<{_*e!\;*IAM.aS5Q'f, l]6'w .޺k'{|>6q]_RZX}s 7qǀA!֒b>Zfm[~k$_\?[mwS3?<^[8ߝ8Tjm'^cNhsGQ5W)sȯUA~E: ,|V~zQCf\o}t:ָc#do}Ѽ|;j8E. \+FQy `4 x{CЕL녕t%͹@m{ %s~gQTlTx.juLÝҡm ?,;?[v߮^u׫PEE4nc+$|ttJ}ۄ$.qTY-NvK?|µMdž_>ۃwЎ|F\H~W0h@>$}er$ekI|n5؄99X5IyK4WArsSOtKq18 IDATk6^uã3gǕܲ6Y۶ըt +l Wrslq={F9H%k;gv'~]IO7(ޠ4wTp}}Ŝz~;"uڥtot ߸ # {.Ԅ {>ǹuC{8ni3gT-#2R?WDq^bm()8( J] uןuJ}7ึ_ZTuC^4Dzuz^Amڻ]떅t=tVs|K Ϡv֥UDBuzi[Z\F8F\ NձGDӰ74:Z*V|?Wq;:~U_Ըإg뾞㻗;uĺ>Օ@(&/):n@6} I^C;ht?b! ߝ`WUC {Of`䞥lYGDhT>?`Xm8N^`) l?Дka{>x&cɔ80 ptۥsb A:u9Cuk7z^NWrN@¼9!Ǹ[O껺>6k:KKhw,kXFH-|tIH%VQKqٲh k'}lK3\]Zv݇B?Q\8^JwwH߿֥N:;uX ~nIx)6Dlv>;&#7!X2ޥ؉/M7,ok`MN>wc'Ki῿[bs*{(߀}hb3wX&/Ӱ@XmL'XQ? ?vb'˱X%I’Wl K2/` k"̀< ԟII 8W¾#<!)񨁽w5Y W,ڸIL(u@R|L`BWPA|22 nI-n%8g16pr ]IVĩ~-qVhF>vmk邝ODHD5Zaݨ\ڦX N5G`kcX8Iǰ>Hk/XX~7;)kp+O0N7aD`s? $˝OLb7wҸ{ޓi8[Y5,&Ih?ykaK,ig)N&K^ XK$.9XDj-k'EmDO.IaZ9+ v#O$K1u$NCM( v~5.tX}o-^L$|<֥CZ]]Zm ]]Z(#"j?ѹWi +L6NI$}>+b|I[,m\\Ѩ%mtoY3~v]ɥw,֓fslXbvk8v q,lu{`FSnt$NW W'r$֕~V,g_l,Tf<m閮cƮZac^NǒA؉ms&bކM^]8{/|k}{ͻu<@#B[v&X; 4kY<~ mP]ӕyuZO>dlpv4+X;{oyXQQԵgh0v9<`]eZOo3̿Ok,i @XDjY"={b1}gf%u{'m+u>LD`c{Ӱ߱ag }$vw6dGhcDtm 9Hֵk ~-?qqpU/%ckI ͝]{viڥ5^ lw\_Sڰ"^A \ܲh*k'ۂ_iNwi=2j4r/{Hztqb=R%{g&gujq8@)xBbmh(Phq/?J Vܝ !F{ M̽dٝyy;ye{L 3_'yZ̀9ۖn6>W@` m"lk=w9AC'\n`{lp}>.5o?n  3cVEXh27agUOpĭ1NF?N¾؀9603s\{wUv+s!.?/e[!7sB 0b2ƞ7띰0=T'^SnŒ+YM2\ dt& =%v'[aEKg2UXyNTǎ7Ѷ%x7bw>Gc^k٘%ЉO ycnHFx˹9j).*;RWWsߔwCE:i<=oTJ5YX߭8{Ӝ[=3z6zAgvs>Ҽcf j;l+gX=zqy}vY8W,b^g{-zVdяߋ ȓ ē.4qg2DP}e;Ƅd2=nL6"r_0ѐwM8N2DfOgŘ yrr5V]YU D)&c-Yhp\HbC$`ȧO ᎍC=5J߄"l2*UuA#ۧlxEZ3CG=9|͓(-`:WpȸW$}ߦm|)-[ql!DVeW{}/z *.kN.o\ ^+p@xHQs|}ws++{[;Ef=&Z|]J~rru)=|ae%cA,SM/l{Id +B6MC"ǒ:/݊{|ROq[gvl9>([FԗdHO aZ4, t><1g/ن/ñ e+ yI ?z3xpW9ï[e5|l)".TZUWsAֳBfγy/M!D 7?1Dj]PI·v'6,qARm*y,vD,*:짃6=9Ӭ' x6Vh8b&Bnj o5DZn CD]rۦl 2_-'!ko ?npVƗB7pwa%!Jo9׆'0̛]W*1ᪧ >/ ǰl*U Dp StFsW W=oy甭N_DRB0dPCg>6)[5~r\ntN8|Š弹k+CZ "8}@Hm&ԩ}:9zrJYnTU!RQֺc@ u|'m{B՛t0APRz/T9)Vnח|ĭz˞N ٧/kU\ZNc]ُMk1`Vc~EI0*ac! i}@Hت α؀6V%6*Tu u od3xNHCd{W-yB?'&$o8˃}4x܄v)[ՍȽsabP8CE2?x+a6=qϩ װcKBP:,_BPN]W; n.l^BP<1.S\Bl=ŷ\G~J6FQDj.yKg}66^_).sR|]{D Mr㜏 Φ!9Vq[R>ꢁy +caU$xWDzV3e.|-y1阡IZk>.raa¤nj>+$GRO l2baaI]Tehs:pq}a\ NH=gm1d ঘsk"oZ'0Z8m;< &O7\O:q ޘƇ >.DӦfzf=BPwIg<>MѠb A]|;>BۺH KFGjk,1JiN0ib\-UP_?+>ԩѥ….ٰӧׇ=@tm$-'W$ԂBlo ۀ'pO<'ҺzMZ}/d&v(6Kf}]}̘퍍UIPdm598;3Ō\3NKf p2[M !8"JU;rcgufgؤ}qva`K0ћX$P{d_2 0)Rb\1,d8e1y'\k^A=z1sVpWwDsxjqQ!9L^2bN=79!DO`McBP[KǘlZ{K)|ݷW3Do IDAT_͜o:n_>ګY@kȏ WayWenr՘WhoKk37E*ͬm6>pzRDpycaB[y}] m|&2nޱצk1uv wPT3]sO}:1o1qj,4s?–B@oƼD.\id6'e@̳/l5 Ԋݱg +r9*S2/ X4X\lT7dn!v>as;Ha^M')3N">Dž{ٜOKאd]!ݘ<ǮH`iV,͐v#{8ó[@}[536͢we}6 KAi1^ڢ-ޘؼ e;JK`$GasgjqAaH^36/Y 2ǧP J1^'!\ &sMB&/W`F{C%1o|.k3gɔu'{]iܾawɾWce#) ؽ8Flc0Kӭk߂%cсW1G ڏQ9s'}࡜>hp|}Ɇ kT՘}Dܱ>ACm$¹ÔU6lQZ#MsD'ثHӸ}R<'k.v 52x=>~&sՔ 8yWF]E|kybVo=/ҲXeWm?7_~4>Dw8a?K]|gbR\O3@ąe 0Sív(ÓTc 7zyEaQV325l"qϯ01fe\- gK-ui8i|k m?q),l\c\F4yh;`4nm]E NTAuX\T sčF3  ֭.`TʴF:I=G BPUo>/{˖_0I\k EE\4\THǢqn}1a~to|=RSGlGuygr!YTlhesKxwCo{ ,M);g#~I!%f?{SZ1_}Tt_rxk|Etgx| س_ZF. lX2;K)H cLJ]('pzbyu܁m`d8F1#CMXR8T!['15ݽmX ps2Ae@;hiy˖p rѡRm+erI3,<ظ)1JSc.eX>.] b ʻo!"7+4J1A25lD$ٔ G!"bBO@)ats_:RoXw$\oˁ/N!BSw'H G->Ju_4k~5qߥ4{Zz)o~m t~Ǘv*pO<%UXyȂmڲ_׭;K?gE_-!D +uUaЄ(`d8m7qޟܗmkVfypl]8*}YphތyW+ ]G! Vrm 96_%[viG42E6BZ?/`s_*Ǿ46pϒ<%w ey싨'H_u`)oӉo82O-0{ev)ڌt= "yvs_p?ou^׺jMBT`?UtLiyoP\”F̤n0 2`qt=W'Py%݇R u-v/M7CS f7$i c9 Q4lX``J=.":K?|?5J.{·d݂MWc5Aqg,'m mWaFx=l1twt ;cρ1U1#g)*.ﱗ:c>B 2E6Tb~(w +x Y星v#}[%ߍ ۦ8Nr"ݽ+1U6Oc^d3v->vnÌdNaqZc`iڵű7awйU ј)W؀̛,,/xoM;1юd-趮$l@> Ax\MD̎~b`v&(>=Ga3rڗh ^E%&̀6m9{O^y;B }-+9"`_B@(rO=0#6cy? }wYl}<{?ѿ LJ\p*A<kX~a7;tbD|)H $9/xrbɸ 햡_ah,1VNBl"yyo+[Hv}?f%3`qo= phu _ϱdcބymw&Z&}" cܢTԀMB zG6܆&|DPUKi?}Cډo4h?4c|L0XL$cDyaFFdy%,"1˟/~>/c6p  M6c|M0_н Xv*#|L$o9]H(`:WTp\@4h,ZQ=RyG%y}yq?WB3 v3 6COfHZ`s/{˃27ܷD*&˟Hn4/> v+NlXWjکa}6SW42LfH{-(Bj Qp{[$/ %XUMi WOF Ir/Jl;YYCR i8nƔD3񌷜M(n )-/`s•Kep0,\&BR~4pi$&y?U_ h.n@K|CmRVA&fqcRш)6#s\"|Xyr3񍷜LnoGX8XPjAFC_N oyhV敽ˋ\S9?]MW!݇=S`kv'j&Բ1}ңet](XJ[F GK+C'qY@ BC C7ړh[~f ' ˱3b8gOXCkR+lo9u?;&X% U]ݧybLOXX2Vd7_t PZ\C;BtT*:7!s9%! "FF$e a=~ ךk|#5HbR'3rB ! Қ>a&V424.[Àq27aQ!s{h{?&s3&?;zHci]Vбs;'k'( JJ(kH]uyD8D. ܺT1"؏$+O[|]KvB}y3*dwÅ7mu7#+f5+[͖5YDZ|tFi؉dA]ј4 ه%hʂT4:57#YB(}537S!q9e<,e ÕKM7HBsvzbt͘2`_CtH][z+aFQ3Wɓ+A\1u,Vt\o96JoyzN{'Oenf ! Mk2M͢MOQ=>(r؏̞R,.~pgBsry[ޓpaze%&BYc~-Xc3Omwbt %hd?aiD2Z2B(3#=F6E6:-_ [AH_1-Խ%QqUm1tAf휲Qa+882CJxo#WB2ul-7t1g8[O<߉3xW ElFc`cM|mNWVGVL|oz˻MD lƛmSN۳%'QZ|11gy;`Y*~NxϘ+Dc\o0ˆ=z4+epb )¼Wؙ0T\!X,6Vң V"WCv{ ΀r7Y^7O"FpAQ@nl/Q?LJW+m$zT#lXüA5B.[oavBSo/g8dLK0ǍZ`c~ 0qE \+GB ޶K;Oǐܓ3}OڣIeb^aU;ac00W R$?-z!&i]gLC <'@Sm{$1;7Vo'baBe7QlKH22g H]n̟܋b 懊,551%=v7`' `}V`63Bʣm L  ?'isV\=baOcP=l&g0cY~N}(BdǼNӰ~o {wos<?>1yq=3x_cֿ Iy 3{kG܏1C+l~mC[u'0Z@0 4*N.oq_E#qۤ )Ag!DnW,x/opSiXLX9xS-n`nr}9ɈF\n3b(lh&]Shc.w 3t9޶>X=1q#{Un]g>#XƥC0f4Fk.{?oҴ˗ʼ@dSX)!D/#R9߈*rO܊Kij9m1gBtYUM02k>vI<"ƅ8mą%*Q&-#Zy|iMVXXXY`p%9 `1!Jx3I5>Xa\q .5C \3K|Ƽ]1v&P i.1-R5L`#VB@/K0#az ֝]nG}v<`c,{BjoK1Ö8ļ3bI0-\[ _~6`ϝ@Yփ3t-|l96u/Q}ayKM1¢VcFH$e_biR 鈭1~r{Hx#&B0Xyc =OQ!$L65=3]^Y BѴ J}Nƅaf=~ˏ͗P'ǩ_.ljXn+l_,'r v documentation" by default. # html_title = 'pairtools v0.0.1' # A shorter title for the navigation bar. Default is the same as html_title. # html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. # html_logo = None # The name of an image file (relative to this directory) to use as a favicon of # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. # html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. # html_extra_path = [] # If not None, a 'Last updated on:' timestamp is inserted at every page # bottom, using the given strftime format. # The empty string is equivalent to '%b %d, %Y'. # html_last_updated_fmt = None # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. # html_use_smartypants = True # Custom sidebar templates, maps document names to template names. # html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. # html_additional_pages = {} # If false, no module index is generated. # html_domain_indices = True # If false, no index is generated. # html_use_index = True # If true, the index is split into individual pages for each letter. # html_split_index = False # If true, links to the reST sources are added to the pages. # html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. # html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. # html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. # html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). # html_file_suffix = None # Language to be used for generating the HTML full-text search index. # Sphinx supports the following languages: # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh' # html_search_language = 'en' # A dictionary with options for the search language support, empty by default. # 'ja' uses this config value. # 'zh' user can custom change `jieba` dictionary path. # html_search_options = {'type': 'default'} # The name of a javascript file (relative to the configuration directory) that # implements a search results scorer. If empty, the default will be used. # html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. htmlhelp_basename = "pairtoolsdoc" # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). #'pointsize': '10pt', # Additional stuff for the LaTeX preamble. #'preamble': '', # Latex figure (float) alignment #'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, "pairtools.tex", "pairtools Documentation", "Open2C", "manual"), ] # The name of an image file (relative to this directory) to place at the top of # the title page. # latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. # latex_use_parts = False # If true, show page references after internal links. # latex_show_pagerefs = False # If true, show URL addresses after external links. # latex_show_urls = False # Documents to append as an appendix to all manuals. # latex_appendices = [] # If false, no module index is generated. # latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [(master_doc, "pairtools", "pairtools Documentation", [author], 1)] # If true, show URL addresses after external links. # man_show_urls = False # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ( master_doc, "pairtools", "pairtools Documentation", author, "pairtools", "One line description of project.", "Miscellaneous", ), ] # Documents to append as an appendix to all manuals. # texinfo_appendices = [] # If false, no module index is generated. # texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. # texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. # texinfo_no_detailmenu = False pairtools-1.0.3/doc/examples/000077500000000000000000000000001452673171500161255ustar00rootroot00000000000000pairtools-1.0.3/doc/examples/benchmark/000077500000000000000000000000001452673171500200575ustar00rootroot00000000000000pairtools-1.0.3/doc/examples/benchmark/Snakefile000066400000000000000000000243371452673171500217140ustar00rootroot00000000000000cores_choices = [1, 2, 4] chromap = expand( "output/result.chromap.{cores}.pairs", cores=cores_choices, ) juicer = expand( "output/result.juicer.{cores}.pairs", cores=cores_choices, ) hicexplorer = expand( "output/result.hicexplorer.{cores}.cool", cores=cores_choices, ) fanc_bwa = expand( "output/result.fanc_bwa.{cores}.pairs", cores=cores_choices, ) fanc_bowtie = expand( "output/result.fanc_bowtie2.{cores}.pairs", cores=cores_choices, ) hicpro = expand( "output/result.hicpro.{cores}.pairs", cores=cores_choices, ) tadbit = expand( "output/result.tadbit.{cores}.reads", cores=cores_choices, ) tadbit_bowtie = expand( "output/result.tadbit_bowtie2.{cores}.reads", cores=cores_choices, ) pairtools = expand( "output/result.pairtools.{cores}.pairs", cores=cores_choices, ) pairtools_bwamem2 = expand( "output/result.pairtools_bwamem2.{cores}.pairs", cores=cores_choices, ) # mapping only: bowtie = expand( "output/result.bowtie.{cores}.sam", cores=cores_choices, ) bwamem = expand( "output/result.bwamem.{cores}.sam", cores=cores_choices, ) bwamem2 = expand( "output/result.bwamem2.{cores}.sam", cores=cores_choices, ) rule all: input: lambda wildcards: tadbit + tadbit_bowtie + bowtie + bwamem2 + pairtools + pairtools_bwamem2 + chromap + hicpro + fanc_bowtie + fanc_bwa + hicexplorer # + bowtie + bwamem + bwamem2 # + juicer # + pairtools + pairtools_bwamem2 + chromap + hicpro + fanc_bowtie + fanc_bwa + hicexplorer # hicexplorer # heavy because it creates coolers # juicer # run separately with the number of cores equal to tested, b/c multiplw juicers cannot be run with the same path rule test: input: fastq1="data/SRR6107789_1.fastq.gz", fastq2="data/SRR6107789_2.fastq.gz", genomefile="data/hg38/hg38.fa", chromsizes="data/hg38/hg38.fa.sizes", genome_index_bwa="data/hg38/index/bwa/hg38.fa", genome_index_chromap="data/hg38/index/chromap/hg38", genome_index_bwamem2="data/hg38/index/bwa-mem2/hg38", genome_index_bowtie2="data/hg38/index/bowtie2/hg38", genome_index_gem="data/hg38/index/gem/hg38.gem", genome_rsites="data/hg38/hg38.DpnII.bed", threads: lambda wildcards: int(wildcards.cores), output: file="output/result.{mode}.{cores}.{format}", benchmark: repeat( "benchmarks/result.{mode}.{cores}.{format}.benchmark", 5, ) run: if wildcards.mode == "pairtools_bwamem2": shell(""" soft/bwa-mem2/bwa-mem2 mem -t {wildcards.cores} -SP {input.genome_index_bwamem2} {input.fastq1} {input.fastq2} | \ soft/pairtools1.0.2/bin/pairtools parse --nproc-in {wildcards.cores} --nproc-out {wildcards.cores} --drop-sam --drop-seq -c {input.chromsizes} | \ soft/pairtools1.0.2/bin/pairtools sort --nproc {wildcards.cores} | \ soft/pairtools1.0.2/bin/pairtools dedup -p {wildcards.cores} --chunksize 1000000 \ -o {output.file} """) elif wildcards.mode == "pairtools": shell(""" soft/pairtools1.0.2/bin/bwa mem -t {wildcards.cores} -SP {input.genome_index_bwa} {input.fastq1} {input.fastq2} | \ soft/pairtools1.0.2/bin/pairtools parse --nproc-in {wildcards.cores} --nproc-out {wildcards.cores} --drop-sam --drop-seq -c {input.chromsizes} | \ soft/pairtools1.0.2/bin/pairtools sort --nproc {wildcards.cores} | \ soft/pairtools1.0.2/bin/pairtools dedup -p {wildcards.cores} --chunksize 1000000 \ -o {output.file} """) elif wildcards.mode == "chromap": shell(""" soft/chromap/bin/chromap --preset hic \ -t {wildcards.cores} -x {input.genome_index_chromap} -r {input.genomefile} \ -1 {input.fastq1} -2 {input.fastq2} -o {output.file} """) elif wildcards.mode == "fanc_bwa": shell(""" TMP_FILE1=$(mktemp -u output/tmp.XXXXXXXX.bam) TMP_FILE2=$(mktemp -u output/tmp.XXXXXXXX.bam) soft/fanc/bin/fanc map -t {wildcards.cores} {input.fastq1} {input.genome_index_bwa} $TMP_FILE1 samtools sort -n -@ {wildcards.cores} $TMP_FILE1 -o $TMP_FILE1.sorted.bam soft/fanc/bin/fanc map -t {wildcards.cores} {input.fastq2} {input.genome_index_bwa} $TMP_FILE2 samtools sort -n -@ {wildcards.cores} $TMP_FILE2 -o $TMP_FILE2.sorted.bam soft/fanc/bin/fanc pairs -f -g {input.genome_rsites} $TMP_FILE1.sorted.bam $TMP_FILE2.sorted.bam {output.file} rm $TMP_FILE1 $TMP_FILE2 $TMP_FILE1.sorted.bam $TMP_FILE2.sorted.bam """) elif wildcards.mode == "fanc_bowtie2": shell(""" TMP_FILE1=$(mktemp -u output/tmp.XXXXXXXX.bam) TMP_FILE2=$(mktemp -u output/tmp.XXXXXXXX.bam) soft/fanc/bin/fanc map -t {wildcards.cores} {input.fastq1} {input.genome_index_bowtie2} $TMP_FILE1 samtools sort -n -@ {wildcards.cores} $TMP_FILE1 -o $TMP_FILE1.sorted.bam soft/fanc/bin/fanc map -t {wildcards.cores} {input.fastq2} {input.genome_index_bowtie2} $TMP_FILE2 samtools sort -n -@ {wildcards.cores} $TMP_FILE2 -o $TMP_FILE2.sorted.bam soft/fanc/bin/fanc pairs -f -g {input.genome_rsites} $TMP_FILE1.sorted.bam $TMP_FILE2.sorted.bam {output.file} rm $TMP_FILE1 $TMP_FILE2 $TMP_FILE1.sorted.bam $TMP_FILE2.sorted.bam """) elif wildcards.mode == "hicpro": shell(""" cd soft/HiC-Pro_env/HiC-Pro/ mkdir -p output TMP_DIR=$(mktemp -d -u output/tmp.XXXXXXXX) TMP_CONFIG=$(mktemp -u output/tmp.XXXXXXXX.config) cp config-hicpro.txt $TMP_CONFIG sed -i 's/N_CPU = 4/N_CPU = {wildcards.cores}/' $TMP_CONFIG bin/HiC-Pro -i rawdata/ -o $TMP_DIR -c $TMP_CONFIG # Cleanup: cp $TMP_DIR/hic_results/data/sample1/sample1.allValidPairs ../../../{output.file} rm -r $TMP_DIR; rm $TMP_CONFIG """) elif wildcards.mode == "juicer": # Note that this process is not guaranteed to work well in parallel mode; # recommended to run separately shell(""" soft/juicer-1.6/CPU/juicer.sh -g hg38 -d data/4juicer/ -s DpnII -S early \ -p {input.chromsizes} -y {input.genome_rsites} -z {input.genome_index_bwa} -t {wildcards.cores} -D soft/juicer-1.6/CPU # Cleanup: mv data/4juicer/aligned/merged_nodups.txt {output.file} rm -rf data/4juicer/aligned; rm -rf data/4juicer/splits/[^S]* """) elif wildcards.mode == "hicexplorer": shell(""" TMP_DIR=$(mktemp -d -u output/tmp.XXXXXXXX) soft/hicexplorer/bin/hicBuildMatrix --samFiles \ <(bwa mem -A1 -B4 -E50 -L0 {input.genome_index_bwa} -t {wildcards.cores} {input.fastq1} | samtools view -@ {wildcards.cores} -Shb -) \ <(bwa mem -A1 -B4 -E50 -L0 {input.genome_index_bwa} -t {wildcards.cores} {input.fastq2} | samtools view -@ {wildcards.cores} -Shb -) \ --restrictionSequence GATC \ --danglingSequence GATC \ --restrictionCutFile {input.genome_rsites} \ --threads {wildcards.cores} \ --inputBufferSize 1000000 \ --QCfolder $TMP_DIR \ -o {output.file} # Cleanup: rm -r $TMP_DIR """) elif wildcards.mode == "tadbit": shell(""" TMP_DIR=$(mktemp -d -u tadbit_output/tmp.XXXXXXXX) soft/tadbit/bin/tadbit map $TMP_DIR -C {wildcards.cores} --mapper_binary soft/tadbit/bin/gem-mapper --fastq {input.fastq1} --read 1 --index {input.genome_index_gem} --renz DpnII || true soft/tadbit/bin/tadbit map $TMP_DIR -C {wildcards.cores} --mapper_binary soft/tadbit/bin/gem-mapper --fastq {input.fastq2} --read 2 --index {input.genome_index_gem} --renz DpnII || true soft/tadbit/bin/tadbit parse $TMP_DIR --genome data/hg38/hg38.fa || true soft/tadbit/bin/tadbit filter $TMP_DIR -C {wildcards.cores} --format mid || true mv $TMP_DIR/03_filtered_reads/valid_r1-r2_intersection_*.tsv {output.file} rm -r $TMP_DIR """) elif wildcards.mode == "tadbit_bowtie2": shell(""" TMP_DIR=$(mktemp -d -u tadbit_output/tmp.XXXXXXXX) soft/tadbit/bin/tadbit map $TMP_DIR -C {wildcards.cores} --mapper bowtie2 --mapper_binary soft/tadbit/bin/bowtie2 --fastq {input.fastq1} --read 1 --index {input.genome_index_bowtie2} --renz DpnII || true soft/tadbit/bin/tadbit map $TMP_DIR -C {wildcards.cores} --mapper bowtie2 --mapper_binary soft/tadbit/bin/bowtie2 --fastq {input.fastq2} --read 2 --index {input.genome_index_bowtie2} --renz DpnII || true soft/tadbit/bin/tadbit parse $TMP_DIR --genome data/hg38/hg38.fa || true soft/tadbit/bin/tadbit filter $TMP_DIR -C {wildcards.cores} --format mid || true mv $TMP_DIR/03_filtered_reads/valid_r1-r2_intersection_*.tsv {output.file} rm -r $TMP_DIR """) elif wildcards.mode == "bowtie": shell(""" soft/tadbit/bin/bowtie2 -p 4 -x {input.genome_index_bowtie2} -1 {input.fastq1} -2 {input.fastq2} -S {output.file} """) elif wildcards.mode == "bwamem": shell(""" soft/pairtools0.3.0/bin/bwa mem -t 4 -SP {input.genome_index_bwa} {input.fastq1} {input.fastq2} > {output.file} """) elif wildcards.mode == "bwamem2": shell(""" soft/bwa-mem2/bwa-mem2 mem -t 4 -SP {input.genome_index_bwamem2} {input.fastq1} {input.fastq2} > {output.file} """) pairtools-1.0.3/doc/examples/benchmark/benchmark.ipynb000066400000000000000000021104541452673171500230630ustar00rootroot00000000000000{ "cells": [ { "cell_type": "markdown", "id": "864d317a-4960-4315-846d-ba2f36014614", "metadata": { "tags": [] }, "source": [ "# Pairtools benchmarking\n", "\n", "Welcome to pairtools benchmarking. These are the instructions on how to test performance of different software for mapping Hi-C and Hi-C-like methods.\n", "Mapping usually results in the file with mapped pairs, which is then converted into binned matrix format. Pairs format is the \"rawest\" interpretable type of data after reads.\n", "\n", "Reviewing the literature suggests that there are at least 6 methods to map Hi-C and Hi-C-like data. These include:\n", "\n", "- **pairtools** is a lightweight Python CLI that extracts and manipulates Hi-C contacts post-alignment. Aslignment can be done by:\n", " - bwa mem\n", " - bwa-mem2, ahn optimized version of bwa mem, which [x2-2.5 improves speed over bwa](https://github.com/bwa-mem2/bwa-mem2)\n", "\n", "- **chromap** is a [fast alignment tool for chromatin profiles](https://www.nature.com/articles/s41467-021-26865-w), not specialized for Hi-C but [parameterized for a broad range of sequencing data including Hi-C short reads](https://github.com/haowenz/chromap#map-hi-c-short-reads). \n", "\n", " Does not require separate step of mapping.\n", "\n", "- **HiC-Pro** is a [pipeline for Hi-C and DNase-C mapping](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-015-0831-x), \"optimized and flexible\".\n", "\n", " It calls mapping within. By default, creates the output cooler files with binned data, but the script can be tinkered in order to stop the processing at the step of pairs. \n", "\n", "- **Juicer** is a [platform for analysis of Hi-C data](https://github.com/aidenlab/juicer), which is already adapted to a wide range of cluster types.\n", "\n", " It calls mapping within. Has an option to stop the data processing at the step of pairs, without further construction of binned matrices. \n", "\n", "- **HiCExplorer** is a [broad-range set of tools for processing, normalization, analysis and visualization Hi-C and Hi-C-like methods](https://doi.org/10.1038/s41467-017-02525-w). \n", "\n", " It [builds Hi-C binned matrix post-alignment with bwa mem](https://hicexplorer.readthedocs.io/en/latest/content/tools/hicBuildMatrix.html#hicbuildmatrix). \n", "\n", "- **FAN-C** is a [set of CLI tools that runs the mapping (bowtie or bwa mem), extracts and manipulates Hi-C contacts](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-020-02215-9). It also has the [tools for data visualization and downstream analysis](https://github.com/vaquerizaslab/fanc).\n", "\n", "- **TADbit** is [multi-task Python API](https://3dgenomes.github.io/TADbit/index.html) that handles all the steps from the alignment of paired-end reads to the detection of Topologically Associating Domain (TAD) borders, compartments and three-dimensional modeling of chromatin based on interaction matrices.\n", "\n", "\n", "*We benchmark these programs on one million of representative reads.*\n", "These reads are taken from random replicate from Rao SSP et al., [\"Cohesin Loss Eliminates All Loop Domains.\"](https://pubmed.ncbi.nlm.nih.gov/28985562/), Cell, 2017 Oct 5;171(2):305-320.e24\n", "
\n", "Generally, it is useful to assess how much computational time you need per million of reads.\n", "
\n", "As long as you have this assessment, you may multiply the size of your experiment by the whole library size (in mlns of reads), because we expect linear growth of computational complexity of reads mapping with library size.\n", "\n", "\n", "The benchmarking consists of four general steps. If you want to reproduce it, you need to run steps 1 and 2 manually in order to create the working environment, and then use snakemake script to run the benchmarks. \n", "
\n", "You may use the commands form the \"3. Run\" section to get an understanding how each indiviaul framework works and what parameters can be changed. \n", "
\n", "Note that you need separate run of juicer with single value of --ncores, because it does not support parallel launches (because it writes to the default output).\n", "
\n", "Finally, there is a visualization section with a display of all the results that we calcualted on our machines. \n", "\n", "1. [Install software](#1.-Install-software)\n", "\n", "2. [Download data and genome](#2.-Download-data-and-genome). \n", "\n", "3. [Run](#3.-Run)\n", "\n", "4. [Visualize benchmarks](#4.-Visualize-benchmarks)\n" ] }, { "cell_type": "markdown", "id": "8ae7b1ea-f64b-4740-8694-2fdb1d7353c4", "metadata": {}, "source": [ "## 1. Install software\n", "\n", "We will use separate conda environments to install different utilities. Each utility will have its own environment and peth to the binaries." ] }, { "cell_type": "code", "execution_count": null, "id": "0f98ab45-3759-4260-ab9f-79e487410d5f", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "mkdir ./soft" ] }, { "cell_type": "markdown", "id": "b9dc2f27-868f-4bfd-bd9f-d88d18d6655f", "metadata": {}, "source": [ "### pairtools" ] }, { "cell_type": "markdown", "id": "cd56a6ab-3836-445c-ab70-73eaa4e80da8", "metadata": {}, "source": [ "#### pairtools v1.0.2" ] }, { "cell_type": "code", "execution_count": null, "id": "f3e0038a-f034-4c40-8e5d-d50f2351679f", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "conda create -y --prefix soft/pairtools1.0.2 python=3.9 pip\n", "conda activate soft/pairtools1.0.2\n", "pip install cython numpy pysam\n", "pip install git+https://github.com/open2c/pairtools.git@v1.0.2\n", "\n", "conda install -c conda-forge lz4-c # conda install -c anaconda lz4\n", "\n", "conda install -y -c bioconda \"bwa>=0.7.17\"" ] }, { "cell_type": "markdown", "id": "a7548c59-7cd2-40f8-85da-7a6b2ede143d", "metadata": {}, "source": [ "#### bwa-mem2" ] }, { "cell_type": "code", "execution_count": null, "id": "695cdebb-7a4b-4ca9-b2a5-f0a178874b77", "metadata": {}, "outputs": [], "source": [ "%%bash \n", "conda activate soft/pairtools1.0.2\n", "\n", "# bwa-mem2: compile from source (not recommended for general users)\n", "\n", "# Get the source\n", "git clone --recursive https://github.com/bwa-mem2/bwa-mem2 soft/bwa-mem2\n", "cd soft/bwa-mem2\n", "\n", "# Compile\n", "make\n", "\n", "# Exit compilation folder\n", "cd ../../" ] }, { "cell_type": "markdown", "id": "0bc9befa-e4cc-4cf3-84d5-fbae94a2e6fb", "metadata": {}, "source": [ "### chromap" ] }, { "cell_type": "code", "execution_count": null, "id": "2f02a8e1-998e-4383-bc8a-d9d493b425ef", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "conda create -y --prefix soft/chromap\n", "conda activate soft/chromap\n", "conda install -y -c bioconda -c conda-forge chromap" ] }, { "cell_type": "markdown", "id": "37f50ca4-74a2-44a3-8038-83a4d7b43c85", "metadata": {}, "source": [ "### HiC-Pro\n", "\n", "[HiC-Pro](https://github.com/nservant/HiC-Pro) is a popular software for Hi-C mapping, its now part of nf-core Hi-C pipeline, supports both fragment-based analysis of Hi-C and fragement-free analysis of DNase-based Hi-C." ] }, { "cell_type": "code", "execution_count": null, "id": "45c9697c-5f49-4a53-bbf0-18535f05e465", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "git clone https://github.com/nservant/HiC-Pro.git soft/HiC-Pro_env/HiC-Pro\n", "conda env create -f soft/HiC-Pro_env/HiC-Pro/environment.yml -p soft/HiC-Pro_env\n", "### Working environment will be soft/HiC-Pro_env\n", "\n", "conda activate soft/HiC-Pro_env\n", "\n", "# Install dependencies\n", "conda install -y -c bioconda bowtie2 samtools pysam numpy scipy bx-python\n", "conda install -y -c r r r-rcolorbrewer r-ggplot2\n", "\n", "# Copy prepared config:\n", "cp configs/config-hicpro_install.txt soft/HiC-Pro_env/HiC-Pro/config-install.txt\n", "cp configs/config-hicpro.txt soft/HiC-Pro_env/HiC-Pro/config-hicpro.txt\n", "\n", "# Configure and install:\n", "cd soft/HiC-Pro_env/HiC-Pro\n", "make configure\n", "make install\n", "\n", "cd ../../../\n", "\n", "# Patch the code to retain only data processing steps with no creating of maps:\n", "sed -i \"s/all : init mapping proc_hic merge_persample hic_qc build_raw_maps ice_norm/all : init mapping proc_hic merge_persample #hic_qc build_raw_maps ice_norm/\" soft/HiC-Pro_env/HiC-Pro/scripts/Makefile\n" ] }, { "cell_type": "code", "execution_count": null, "id": "18e29459-334a-458f-8244-ede873b25258", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "# Note that the configs should be adjusted for your system:\n", "cp configs/config-hicpro_install.txt soft/HiC-Pro_env/HiC-Pro/config-install.txt\n", "cp configs/config-hicpro.txt soft/HiC-Pro_env/HiC-Pro/config-hicpro.txt" ] }, { "cell_type": "markdown", "id": "d00d4aed-94b4-4de6-83b2-9950c9d7b949", "metadata": {}, "source": [ "### FAN-C" ] }, { "cell_type": "code", "execution_count": null, "id": "0ebd9a60-f0d0-4f0b-9a64-d2ea386a15f9", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "conda create -y --prefix soft/fanc python=3.9 pip hdf5\n", "conda activate soft/fanc\n", "pip install fanc\n", "conda install -y -c bioconda samtools" ] }, { "cell_type": "markdown", "id": "a2b58a8e-b828-47c7-87f2-86337657f5e4", "metadata": {}, "source": [ "### Juicer" ] }, { "cell_type": "code", "execution_count": null, "id": "c2611844-0e32-465f-befa-a8e296bf54d2", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "conda create -y --prefix soft/juicer\n", "conda activate soft/juicer\n", "\n", "conda install -y -c bioconda bwa java-jdk\n", "conda install -y -c conda-forge coreutils\n", "\n", "# Download the recommended stable version:\n", "wget https://github.com/aidenlab/juicer/archive/refs/tags/1.6.zip\n", "unzip 1.6.zip\n", "rm 1.6.zip\n", "mv juicer-1.6 soft/juicer-1.6\n", "\n", "# Download compile jar files of the stable version:\n", "wget http://hicfiles.tc4ga.com.s3.amazonaws.com/public/juicer/juicer_tools.1.6.2_jcuda.0.7.5.jar\n", "mv juicer_tools.1.6.2_jcuda.0.7.5.jar soft/juicer-1.6/CPU/scripts/common/juicer_tools.jar\n", "\n", "# Copy the scripts to some accessible location:\n", "mkdir -p soft/juicer-1.6/CPU/scripts/\n", "cp -r soft/juicer-1.6/CPU/[^s]* soft/juicer-1.6/CPU/scripts/" ] }, { "cell_type": "markdown", "id": "3e02b40b-1f5c-4bf8-89fc-36af2f485c55", "metadata": {}, "source": [ "### HiCExplorer" ] }, { "cell_type": "code", "execution_count": null, "id": "8d5e4ffd-7908-44eb-9d22-22cb24170207", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "conda create -y --prefix soft/hicexplorer python=3.9\n", "conda activate soft/hicexplorer\n", "conda install -y -c bioconda hicexplorer bwa" ] }, { "cell_type": "markdown", "id": "380efd65-263d-4b24-9921-8d0be8013c7d", "metadata": {}, "source": [ "### TADbit" ] }, { "cell_type": "code", "execution_count": null, "id": "040231d7-c0c4-425f-85a4-da0cfc62eec1", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "conda create -y --prefix soft/tadbit\n", "conda activate soft/tadbit\n", "\n", "# # Install mappers:\n", "conda install -y -q -c bioconda gem3-mapper bowtie2\n", "\n", "# install tadbit\n", "conda install -y -q -c bioconda tadbit" ] }, { "cell_type": "markdown", "id": "e325db7c-93d8-4e48-9ba6-8867956398cd", "metadata": {}, "source": [ "## 2. Download data and genome" ] }, { "cell_type": "code", "execution_count": null, "id": "aaea1786-7c9b-425c-9aac-de8ac709688c", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "mkdir data" ] }, { "cell_type": "markdown", "id": "d4372383-a702-44f5-89e7-66746700f765", "metadata": {}, "source": [ "### Download raw data\n", "\n", "Test data from Rao et al. 2017, 1 mln pairs: " ] }, { "cell_type": "code", "execution_count": null, "id": "05b7140b-454d-4bd2-842f-a0d042701a4e", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "fastq-dump -O data --gzip --split-files SRR6107789 --minSpotId 0 --maxSpotId 1000000" ] }, { "cell_type": "code", "execution_count": null, "id": "2d2952e1-528a-41dc-8efc-be866e958c89", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "# Put the data in accessible folder for juicer: \n", "mkdir -p data/4juicer/fastq/\n", "mkdir -p data/4juicer/splits/\n", "cp data/SRR6107789_1.fastq.gz data/4juicer/fastq/SRR6107789_R1.fastq.gz\n", "cp data/SRR6107789_2.fastq.gz data/4juicer/fastq/SRR6107789_R2.fastq.gz\n", "cp data/4juicer/fastq/* data/4juicer/splits/" ] }, { "cell_type": "code", "execution_count": null, "id": "083d9534-ce41-45b1-98f4-2007c64fb5f3", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "# Put the data in accessible folder for HiC-Pro:\n", "mkdir -p soft/HiC-Pro_env/HiC-Pro/rawdata/sample1\n", "cp data/S*fastq.gz soft/HiC-Pro_env/HiC-Pro/rawdata/sample1/" ] }, { "cell_type": "markdown", "id": "a4683297-4109-4786-8faa-26089fa8d3e4", "metadata": {}, "source": [ "### Install genome" ] }, { "cell_type": "markdown", "id": "1c29a2d6-cdf4-4552-b856-9316b8e332d4", "metadata": {}, "source": [ "#### Genomepy installation\n", "will install fasta, bwa and bowtie2 indexes:" ] }, { "cell_type": "code", "execution_count": null, "id": "3bf9ab65-da9c-41f0-adc6-f7f9d268e55e", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "# Activate bwa plugin for genomepy:\n", "! genomepy plugin enable bwa bowtie2" ] }, { "cell_type": "code", "execution_count": null, "id": "2b7dc978-408d-4b32-8a32-119645b24c9f", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "# Install hg38 genome by genomepy:\n", "! genomepy install hg38 -g data/" ] }, { "cell_type": "code", "execution_count": null, "id": "3fe79dca-fc66-4b81-904f-bfedc7cfd5b1", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "# Restrict the genome:\n", "! cooler digest data/hg38/hg38.fa.sizes data/hg38/hg38.fa DpnII --rel-ids 1 -o data/hg38/hg38.DpnII.bed" ] }, { "cell_type": "markdown", "id": "8db4bf50-7e32-4b01-bb2c-a2f1c02565f7", "metadata": { "tags": [] }, "source": [ "#### Build genome index: bwa-mem2" ] }, { "cell_type": "code", "execution_count": null, "id": "e2691bce-a469-495c-aa3e-2abb6105b1f4", "metadata": {}, "outputs": [], "source": [ "%%bash \n", "mkdir data/hg38/index/bwa-mem2/\n", "soft/bwa-mem2/bwa-mem2 index -p data/hg38/index/bwa-mem2/hg38 data/hg38/hg38.fa" ] }, { "cell_type": "markdown", "id": "3274559c-b130-4d40-93f1-59efc3abb1ed", "metadata": { "tags": [] }, "source": [ "#### Build genome index: chromap" ] }, { "cell_type": "code", "execution_count": null, "id": "993b7093-d896-4726-bfd5-77c86bb5d302", "metadata": {}, "outputs": [], "source": [ "%%bash \n", "mkdir data/hg38/index/chromap\n", "chromap -i -r data/hg38/hg38.fa -o data/hg38/index/chromap/hg38" ] }, { "cell_type": "markdown", "id": "aecf762a-f9e4-4b87-863a-ca92ae88bb2e", "metadata": { "tags": [] }, "source": [ "#### Build genome index: GEM" ] }, { "cell_type": "code", "execution_count": null, "id": "3ccdc3c1-f762-4cac-9821-4479d44c2f78", "metadata": {}, "outputs": [], "source": [ "%%bash \n", "mkdir data/hg38/index/gem\n", "gem-indexer -T 8 -i data/hg38/hg38.fa -o data/hg38/index/gem/hg38" ] }, { "cell_type": "markdown", "id": "5a9bd0a9-9dc0-4942-bb1b-cf7b77363bb6", "metadata": {}, "source": [ "## 3. Run\n", "\n", "The banchmarking is usually cumbersome, but it can be simplified by snakemake. We provide a Snakemake pipeline that will allow you to benchmark different approaches.\n", "\n", "The output of snakemake will consist of resulting Hi-C pairs/maps in `output` folder and benchmarking files in `benchmarks` folder. \n", "The file names have the information on parameters in their names:\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "ad48c488-05f4-4b2d-a18d-2b399e8b03b0", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "# Running \n", "snakemake --cores 10" ] }, { "cell_type": "code", "execution_count": null, "id": "9b086bae-ef42-41bb-9254-42af10c9ab1b", "metadata": { "tags": [] }, "outputs": [], "source": [ "%%bash\n", "\n", "# Cleanup (only if you want to erase all the output)\n", "rm output/*; rm benchmarks/*" ] }, { "cell_type": "markdown", "id": "e46dffea-87ac-4157-8938-ae032d50a591", "metadata": {}, "source": [ "## Manual run\n", "\n", "You may also run them to test individual steps of the pipeline." ] }, { "cell_type": "markdown", "id": "6dcbff7b-8caf-4512-9c44-375eac698730", "metadata": {}, "source": [ "### pairtools" ] }, { "cell_type": "code", "execution_count": null, "id": "7da2496b-fd21-4383-a3df-ba9fadb9e505", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "soft/bwa-mem2/bwa-mem2 mem -t 5 -SP data/hg38/index/bwa-mem2/hg38 data/SRR6107789_1.fastq.gz data/SRR6107789_2.fastq.gz | \\\n", " soft/pairtools1.0.2/bin/pairtools parse --nproc-in 5 --nproc-out 5 --drop-sam --drop-seq -c data/hg38/hg38.fa.sizes | \\\n", " soft/pairtools1.0.2/bin/pairtools sort --nproc 5 | \\\n", " soft/pairtools1.0.2/bin/pairtools dedup -p 5 --backend cython \\\n", " -o output/result.pairtools.pairs" ] }, { "cell_type": "markdown", "id": "b0c9a7e3-8e08-42bf-9748-cd94eff6731a", "metadata": {}, "source": [ "### chromap" ] }, { "cell_type": "code", "execution_count": null, "id": "e3012cdb-be1c-46ef-bb7a-20eff2d34fba", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "soft/chromap/bin/chromap --preset hic --low-mem \\\n", " -t 5 -x data/hg38/index/chromap/hg38 -r data/hg38/hg38.fa \\\n", " -1 data/SRR6107789_1.fastq.gz -2 data/SRR6107789_2.fastq.gz -o output/result.chromap.pairs" ] }, { "cell_type": "markdown", "id": "32e60c83-1fab-4fcb-ba0b-8c1258e457c6", "metadata": {}, "source": [ "### HiC-Pro" ] }, { "cell_type": "code", "execution_count": null, "id": "5931f3a6-82f1-4fd8-b65c-9ed648b5f986", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "cd soft/HiC-Pro_env/HiC-Pro\n", "bin/HiC-Pro -i rawdata/ -o output -c config-hicpro.txt\n", "\n", "cd ../../../" ] }, { "cell_type": "markdown", "id": "43171a68-8928-418c-9779-268a5d4923d3", "metadata": {}, "source": [ "### FAN-C\n", "Based on [CLI tutorial](https://fan-c.readthedocs.io/en/latest/fanc-executable/fanc-generate-hic/fanc_modular_steps.html):" ] }, { "cell_type": "code", "execution_count": null, "id": "802e66a8-4c93-45d6-a735-4a68a1d9184a", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "fanc map -t 5 data/SRR6107789_1.fastq.gz data/hg38/index/bwa/hg38.fa output/fanc-output_1.bam\n", "fanc map -t 5 data/SRR6107789_2.fastq.gz data/hg38/index/bwa/hg38.fa output/fanc-output_2.bam\n", "samtools sort -@ 5 -n output/fanc-output_1.bam -o output/fanc-output_1.sorted.bam\n", "samtools sort -@ 5 -n output/fanc-output_2.bam -o output/fanc-output_2.sorted.bam\n", "fanc pairs output/fanc-output_1.sorted.bam output/fanc-output_2.sorted.bam output/fanc-output.pairs -g data/hg38/hg38.DpnII.bed" ] }, { "cell_type": "markdown", "id": "46f11121-bff6-4f92-8d80-aa86b01ffcc0", "metadata": {}, "source": [ "### Juicer" ] }, { "cell_type": "code", "execution_count": null, "id": "a1a5571c-b1da-4125-b915-34884be7299e", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "soft/juicer-1.6/CPU/juicer.sh -g hg38 -d data/4juicer/ -s DpnII -S early -p data/hg38/hg38.fa.sizes -y data/hg38/hg38.DpnII.bed -z data/hg38/index/bwa/hg38.fa -t 5 -D soft/juicer-1.6/CPU\n" ] }, { "cell_type": "markdown", "id": "871ac7b7-0180-4103-a8b3-bd49b7269d83", "metadata": {}, "source": [ "### HiCExplorer\n", "Based on the example: https://hicexplorer.readthedocs.io/en/latest/content/example_usage.html\n", "\n", "Note that it does not procude the pairs, but binned coolers." ] }, { "cell_type": "code", "execution_count": null, "id": "5f14967d-ed63-4d20-a006-bc038d1f1f6f", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "hicBuildMatrix --samFiles \\\n", " <(bwa mem -t 4 -A1 -B4 -E50 -L0 data/hg38/index/bwa/hg38.fa data/SRR6107789_1.fastq.gz | samtools view -Shb -) \\\n", " <(bwa mem -t 4 -A1 -B4 -E50 -L0 data/hg38/index/bwa/hg38.fa data/SRR6107789_2.fastq.gz | samtools view -Shb -) \\\n", " --restrictionSequence GATC \\\n", " --danglingSequence GATC \\\n", " --restrictionCutFile data/hg38/hg38.DpnII.bed \\\n", " --threads 4 \\\n", " --inputBufferSize 100000 \\\n", " --QCfolder hicexplorer_tmp \\\n", " -o hicexplorer_output.cool" ] }, { "cell_type": "markdown", "id": "3a856970-2770-4d52-b170-2308b9864ea3", "metadata": {}, "source": [ "### TADbit" ] }, { "cell_type": "code", "execution_count": null, "id": "463b8edd-9b9b-405a-ba21-6b76429ece41", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "tadbit map tadbit_output --fastq data/SRR6107789_1.fastq.gz --read 1 --index data/hg38/index/gem/hg38.gem --renz DpnII \n", "tadbit map tadbit_output --fastq data/SRR6107789_2.fastq.gz --read 2 --index data/hg38/index/gem/hg38.gem --renz DpnII \n", "\n", "tadbit parse tadbit_output --genome data/hg38/hg38.fa \n", "\n", "tadbit filter tadbit_output --format short" ] }, { "cell_type": "markdown", "id": "f2b8fa6a-d282-4ae5-9154-4578be8418dd", "metadata": {}, "source": [ "### Read mapping only" ] }, { "cell_type": "code", "execution_count": null, "id": "6b48d56b-122d-4827-a384-0cfeb797e081", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "# bwa mem\n", "soft/pairtools0.3.0/bin/bwa mem -t 4 -SP data/hg38/index/bwa/hg38.fa data/SRR6107789_1.fastq.gz data/SRR6107789_2.fastq.gz > bwa-mem.sam\n", "\n", "# bwa mem2\n", "soft/bwa-mem2/bwa-mem2 mem -t 4 -SP data/hg38/index/bwa-mem2/hg38 data/SRR6107789_1.fastq.gz data/SRR6107789_2.fastq.gz > bwa-mem2.sam\n", "\n", "# bowtie2 only\n", "soft/tadbit/bin/bowtie2 -p 4 -x data/hg38/index/bowtie2/hg38 -1 data/SRR6107789_1.fastq.gz -2 data/SRR6107789_2.fastq.gz -S bowtie2.sam\n" ] }, { "cell_type": "markdown", "id": "9b3b93e5-47b1-408f-a4d5-32a85060fd8a", "metadata": {}, "source": [ "## 4. Visualize benchmarks" ] }, { "cell_type": "code", "execution_count": 2, "id": "8eb57b57-db42-420a-a2e7-631fda0676e4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "36 CPUs at 1211 GHz\n" ] } ], "source": [ "# Check the CPU properties:\n", "import psutil\n", "print(f\"{psutil.cpu_count()} CPUs at {psutil.cpu_freq().current:.0f} GHz\") " ] }, { "cell_type": "code", "execution_count": 3, "id": "5a59f6bc-be2d-442b-b4ac-07237f38c38b", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import numpy as np\n", "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "\n", "mpl.rcParams['font.family'] = \"sans-serif\"\n", "figsize_A4 = np.array([11.69, 8.27])\n", "plt.rcParams[\"figure.figsize\"] = figsize_A4.T\n", "plt.rcParams['figure.facecolor']='white'\n", "plt.rcParams['font.size']=16\n", "\n", "import glob" ] }, { "cell_type": "code", "execution_count": 4, "id": "986fae72-ac93-4bff-9749-3b3a70057e17", "metadata": {}, "outputs": [], "source": [ "## If you start from .csv. file: \n", "# df = pd.read_csv('benchmarking_1mln.csv')" ] }, { "cell_type": "code", "execution_count": 13, "id": "dd9e829a-f25e-4c66-b22d-01e008143396", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "39\n" ] } ], "source": [ "# If you start from your own benchmarks:\n", "files = glob.glob(\"benchmarks/*\") #+ glob.glob(\"benchmarks_v1_2022/*\")# + \n", "print(len(files))" ] }, { "cell_type": "code", "execution_count": 14, "id": "08707677-e087-44ca-8e8a-9d74ef4482a4", "metadata": {}, "outputs": [], "source": [ "def get_params(filename):\n", " split = filename.split('.')\n", " util= split[1]\n", " ncores = int(split[2])\n", " \n", " return util, ncores\n", "\n", "timings = []\n", "for f in files:\n", " t = pd.read_table(f)\n", " t[['util', 'ncores']] = get_params(f)\n", " timings.append(t)\n", "timings = pd.concat(timings)" ] }, { "cell_type": "code", "execution_count": 15, "id": "d43f8549-4765-441c-b94c-eb76a950ca4d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sh:m:smax_rssmax_vmsmax_ussmax_pssio_inio_outmean_loadcpu_timeutilncores
0482.10290:08:0217068.9720572.7316931.9116951.7116275.210.02102.18493.82pairtools_bwamem21
1482.52610:08:0217079.4620508.7316941.4616956.3432534.1139.25101.73498.88pairtools_bwamem21
2488.99970:08:0817055.0420508.5916920.9616939.6942104.6178.47100.17502.58pairtools_bwamem21
3484.34600:08:0416981.5920380.6016961.3016962.3745493.36117.7093.0650.54pairtools_bwamem21
4483.31590:08:0316969.0220595.5116944.7516945.7961922.79156.9399.61507.64pairtools_bwamem21
\n", "
" ], "text/plain": [ " s h:m:s max_rss max_vms max_uss max_pss io_in \\\n", "0 482.1029 0:08:02 17068.97 20572.73 16931.91 16951.71 16275.21 \n", "1 482.5261 0:08:02 17079.46 20508.73 16941.46 16956.34 32534.11 \n", "2 488.9997 0:08:08 17055.04 20508.59 16920.96 16939.69 42104.61 \n", "3 484.3460 0:08:04 16981.59 20380.60 16961.30 16962.37 45493.36 \n", "4 483.3159 0:08:03 16969.02 20595.51 16944.75 16945.79 61922.79 \n", "\n", " io_out mean_load cpu_time util ncores \n", "0 0.02 102.18 493.82 pairtools_bwamem2 1 \n", "1 39.25 101.73 498.88 pairtools_bwamem2 1 \n", "2 78.47 100.17 502.58 pairtools_bwamem2 1 \n", "3 117.70 93.06 50.54 pairtools_bwamem2 1 \n", "4 156.93 99.61 507.64 pairtools_bwamem2 1 " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "timings.head()" ] }, { "cell_type": "code", "execution_count": 87, "id": "ba0c1c4f-ac4c-43f5-8245-e32d1d4cc3cf", "metadata": {}, "outputs": [], "source": [ "df = timings.sort_values(['ncores', 'util'])" ] }, { "cell_type": "code", "execution_count": 88, "id": "06fa2f0b-1c9e-473b-bbdd-3157f1d81a1a", "metadata": {}, "outputs": [], "source": [ "df.to_csv('benchmarking_1mln.csv')" ] }, { "cell_type": "code", "execution_count": 17, "id": "306e3829-ab40-4bfb-804b-775b5bf6a170", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['bowtie', 'bwamem', 'bwamem2', 'chromap', 'fanc_bowtie2',\n", " 'fanc_bwa', 'hicexplorer', 'hicpro', 'juicer', 'pairtools',\n", " 'pairtools_bwamem2', 'tadbit', 'tadbit_bowtie2'], dtype=object)" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.unique(df.util)" ] }, { "cell_type": "code", "execution_count": 21, "id": "8b20d808-78aa-4efc-9c2d-999b4e393968", "metadata": {}, "outputs": [], "source": [ "labels = [\n", " 'chromap', \n", " 'pairtools_bwamem2', \n", " 'tadbit',\n", " 'pairtools', \n", " 'tadbit_bowtie2',\n", " 'juicer', \n", " 'hicpro', \n", " 'hicexplorer', \n", " 'fanc_bwa', \n", " 'fanc_bowtie2',\n", " 'bwamem2',\n", " 'bwamem',\n", " 'bowtie',\n", "]\n", "labels_mod = [\n", " 'Chromap', \n", " 'bwa-mem2 + pairtools', \n", " 'GEM + TADbit',\n", " 'bwa mem + pairtools', \n", " 'bowtie2 + TADbit',\n", " 'Juicer', \n", " 'Hi-Pro', \n", " 'HiCExplorer', \n", " 'bwa mem + FANC', \n", " 'bowtie2 + FANC',\n", " 'bwa-mem2',\n", " 'bwa mem',\n", " 'bowtie2',\n", "]" ] }, { "cell_type": "code", "execution_count": 24, "id": "8f31f3be-cf8f-4976-9a60-28e97c13593d", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig, axes = plt.subplots(nrows=1, ncols=2, sharey=True)\n", "\n", "cmap = ['#FD7F69', '#9FC9DD', '#89A76F']\n", "\n", "style_dict = dict(\n", " orient='h',\n", " palette=cmap,\n", " edgecolor=\"k\",\n", " linewidth=2.0,\n", " errwidth=2.0,\n", " capsize=0.07)\n", "\n", "ax = axes[0]\n", "b = sns.barplot(x=\"s\", \n", " y=\"util\", \n", " data=df.sort_values('util'),\n", " order=labels,\n", " hue='ncores',\n", " hue_order=[4,2,1],\n", " ax=ax,\n", " **style_dict\n", ")\n", "plt.setp(b.patches, linewidth=0.5)\n", "\n", "ax.set_ylabel('')\n", "ax.set_xlabel('Time (sec)')\n", "ax.set_yticklabels(labels_mod)\n", "ax.set_axisbelow(True)\n", "ax.grid(which='both', axis='x', color='k')\n", "#ax.set_xscale('log')\n", "ax.set_xlim([0, 5e3])\n", "ax.set_xticks(np.arange(0, 6000, 100), minor=True)\n", "ax.grid(which='minor', axis='x', alpha=0.2, color='k')\n", "ax.get_legend().remove()\n", "\n", "ax = axes[1]\n", "b = sns.barplot(x=\"max_rss\", \n", " y=\"util\", \n", " data=df.sort_values('util'),\n", " order=labels,\n", " hue='ncores',\n", " hue_order=[4,2,1],\n", " ax=ax,\n", " **style_dict)\n", "plt.setp(b.patches, linewidth=0.5)\n", "\n", "ax.set_ylabel('')\n", "ax.set_xlabel('Maximum Resident Set Size (MB)')\n", "ax.set_yticklabels(labels_mod)\n", "ax.set_axisbelow(True)\n", "ax.grid(which='both', axis='x', color='k')\n", "ax.set_xticks(np.arange(0, 30000, 1000), minor=True)\n", "ax.grid(which='minor', axis='x', alpha=0.2, color='k')\n", "\n", "fig.suptitle('Benchmark of different Hi-C mapping tools for 1 mln reads (5 iterations)', y=0.99)\n", "\n", "# (x, y, width, height)\n", "bb = (fig.subplotpars.left, fig.subplotpars.top+0.002, fig.subplotpars.right-fig.subplotpars.left, 0.2)\n", "ax.legend(bbox_to_anchor=bb, title=\"Number of cores\", loc=\"lower right\", ncol=3, borderaxespad=0., bbox_transform=fig.transFigure, frameon=False)\n", "\n", "plt.savefig(\"benchmarking_1mln.pdf\")" ] }, { "cell_type": "code", "execution_count": 25, "id": "1f41e7db-0f80-45f6-96f6-d1ae81055b83", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig, axes = plt.subplots(nrows=1, ncols=2, sharey=True)\n", "\n", "cmap = ['#FD7F69', '#9FC9DD', '#89A76F']\n", "\n", "style_dict = dict(\n", " orient='h',\n", " palette=cmap,\n", " edgecolor=\"k\",\n", " linewidth=2.0,\n", " errwidth=2.0,\n", " capsize=0.07)\n", "\n", "ax = axes[0]\n", "b = sns.barplot(x=\"s\", \n", " y=\"util\", \n", " data=df.sort_values('util'),\n", " order=labels,\n", " hue='ncores',\n", " hue_order=[4,2,1],\n", " ax=ax,\n", " **style_dict\n", ")\n", "plt.setp(b.patches, linewidth=0.5)\n", "\n", "ax.set_ylabel('')\n", "ax.set_xlabel('Time (sec)')\n", "ax.set_yticklabels(labels_mod)\n", "ax.set_axisbelow(True)\n", "ax.grid(which='both', axis='x', color='k')\n", "ax.set_xscale('log')\n", "ax.set_xlim([1, 5e3])\n", "# ax.set_xticks(np.arange(0, 5000, 100), minor=True)\n", "ax.grid(which='minor', axis='x', alpha=0.2, color='k')\n", "ax.get_legend().remove()\n", "\n", "ax = axes[1]\n", "b = sns.barplot(x=\"max_rss\", \n", " y=\"util\", \n", " data=df.sort_values('util'),\n", " order=labels,\n", " hue='ncores',\n", " hue_order=[4,2,1],\n", " ax=ax,\n", " **style_dict)\n", "plt.setp(b.patches, linewidth=0.5)\n", "\n", "ax.set_ylabel('')\n", "ax.set_xlabel('Maximum Resident Set Size (MB)')\n", "ax.set_yticklabels(labels_mod)\n", "ax.set_axisbelow(True)\n", "ax.grid(which='both', axis='x', color='k')\n", "ax.set_xticks(np.arange(0, 30000, 1000), minor=True)\n", "ax.grid(which='minor', axis='x', alpha=0.2, color='k')\n", "\n", "fig.suptitle('Benchmark of different Hi-C mapping tools for 1 mln reads (5 iterations)', y=0.99)\n", "\n", "# (x, y, width, height)\n", "bb = (fig.subplotpars.left, fig.subplotpars.top+0.002, fig.subplotpars.right-fig.subplotpars.left, 0.2)\n", "ax.legend(bbox_to_anchor=bb, title=\"Number of cores\", loc=\"lower right\", ncol=3, borderaxespad=0., bbox_transform=fig.transFigure, frameon=False)\n", "\n", "plt.savefig(\"benchmarking_1mln_log.pdf\")" ] }, { "cell_type": "code", "execution_count": 89, "id": "475f8c97-3de4-4ff1-b1e9-6a01331a6c52", "metadata": {}, "outputs": [], "source": [ "labels = [\n", " 'chromap', \n", " 'pairtools_bwamem2', \n", " 'tadbit',\n", " 'pairtools', \n", " 'tadbit_bowtie2',\n", " 'juicer', \n", " 'hicpro', \n", " 'hicexplorer', \n", " 'fanc_bwa', \n", " 'fanc_bowtie2',\n", " # 'bwamem2',\n", " # 'bwamem',\n", " # 'bowtie',\n", "]\n", "labels_mod = [\n", " 'Chromap', \n", " 'bwa-mem2 + pairtools', \n", " 'GEM + TADbit',\n", " 'bwa mem + pairtools', \n", " 'bowtie2 + TADbit',\n", " 'Juicer', \n", " 'Hi-Pro', \n", " 'HiCExplorer', \n", " 'bwa mem + FANC', \n", " 'bowtie2 + FANC',\n", " # 'bwa-mem2',\n", " # 'bwa mem',\n", " # 'bowtie2',\n", "]" ] }, { "cell_type": "code", "execution_count": 90, "id": "3b2cd5e8-71bf-4039-a6b1-be7565b84759", "metadata": {}, "outputs": [], "source": [ "df = timings.sort_values(['ncores', 'util'])\n", "df.loc[:, \"max_rss_gb\"] = df.loc[:, \"max_rss\"]/1024\n", "df.loc[:, \"min\"] = df.loc[:, \"s\"]" ] }, { "cell_type": "code", "execution_count": 91, "id": "08e08c97-3e00-438e-810e-77eae9c6804b", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig, axes = plt.subplots(nrows=1, ncols=2, sharey=True)\n", "\n", "cmap = ['#FD7F69', '#9FC9DD', '#89A76F']\n", "\n", "style_dict = dict(\n", " orient='h',\n", " palette=cmap,\n", " edgecolor=\"k\",\n", " linewidth=2.0,\n", " errwidth=2.0,\n", " capsize=0.07)\n", "\n", "ax = axes[0]\n", "b = sns.barplot(x=\"s\", \n", " y=\"util\", \n", " data=df.sort_values('util'),\n", " order=labels,\n", " hue='ncores',\n", " hue_order=[4,2,1],\n", " ax=ax,\n", " **style_dict\n", ")\n", "plt.setp(b.patches, linewidth=0.5)\n", "\n", "ax.set_ylabel('')\n", "ax.set_xlabel('Time (sec)')\n", "ax.set_yticklabels(labels_mod)\n", "ax.set_axisbelow(True)\n", "ax.grid(which='both', axis='x', color='k')\n", "#ax.set_xscale('log')\n", "ax.set_xlim([0, 5e3])\n", "ax.set_xticks(np.arange(0, 5000, 100), minor=True)\n", "ax.grid(which='minor', axis='x', alpha=0.2, color='k')\n", "ax.get_legend().remove()\n", "\n", "\n", "# Add text, slowdown over chromap\n", "ncores_order = [4, 2, 1]\n", "for icore, ncores in enumerate(ncores_order):\n", " for ilabels, util in enumerate(labels):\n", " if util==\"chromap\":\n", " continue\n", " \n", " df_reference = df.query(f'ncores=={ncores} and util==\"chromap\"')\n", " mean_reference = np.mean(df_reference['min'].values)\n", " df_target = df.query(f'ncores=={ncores} and util==\"{util}\"')\n", " mean_target = np.mean(df_target['min'].values)\n", " \n", " slowdown = mean_target / mean_reference\n", " \n", " w = b.patches[0].get_height()\n", " \n", " b.text( s=f\"x {slowdown:.1f}\", \n", " x=mean_target+150, y=ilabels + (icore-1)*w,\n", " ha = 'left', va = 'center', fontsize=8, weight='bold')\n", "\n", " \n", "\n", "ax = axes[1]\n", "b = sns.barplot(x=\"max_rss\", \n", " y=\"util\", \n", " data=df.sort_values('util'),\n", " order=labels,\n", " hue='ncores',\n", " hue_order=[4,2,1],\n", " ax=ax,\n", " **style_dict)\n", "plt.setp(b.patches, linewidth=0.5)\n", "\n", "ax.set_ylabel('')\n", "ax.set_xlabel('Maximum Resident Set Size (MB)')\n", "ax.set_yticklabels(labels_mod)\n", "ax.set_axisbelow(True)\n", "ax.grid(which='both', axis='x', color='k')\n", "ax.set_xticks(np.arange(0, 30000, 1000), minor=True)\n", "ax.grid(which='minor', axis='x', alpha=0.2, color='k')\n", "\n", "fig.suptitle('Benchmark of different Hi-C mapping tools for 1 mln reads (5 iterations)', y=0.99)\n", "\n", "# (x, y, width, height)\n", "bb = (fig.subplotpars.left, fig.subplotpars.top+0.002, fig.subplotpars.right-fig.subplotpars.left, 0.2)\n", "ax.legend(bbox_to_anchor=bb, title=\"Number of cores\", loc=\"lower right\", ncol=3, borderaxespad=0., bbox_transform=fig.transFigure, frameon=False)\n", "\n", "plt.savefig(\"benchmarking_1mln.pdf\")" ] }, { "cell_type": "code", "execution_count": 92, "id": "c7c68d19-7606-4d35-a150-580fd5c78cd6", "metadata": {}, "outputs": [], "source": [ "dct_mapper = {\n", " 'bowtie': 'bowtie', \n", " 'bwamem': 'bwamem', \n", " 'bwamem2': 'bwamem2', \n", " 'chromap': \"\", \n", " 'fanc_bowtie2': 'bowtie',\n", " 'fanc_bwa': 'bwamem', \n", " 'hicexplorer': 'bwamem', \n", " 'hicpro':'bowtie', \n", " 'juicer': 'bwamem', \n", " 'pairtools': 'bwamem',\n", " 'pairtools_bwamem2': 'bwamem2', \n", " 'tadbit': 'GEM', \n", " 'tadbit_bowtie2': 'bowtie'\n", "}\n", "df.loc[:, \"mapper\"] = df.util.replace(dct_mapper)" ] }, { "cell_type": "code", "execution_count": 93, "id": "2996f9c4-0ed4-4d81-80aa-7044eed59648", "metadata": {}, "outputs": [], "source": [ "df = pd.merge(df, df, left_on=['mapper', 'ncores'], right_on=['util', 'ncores'], suffixes=[\"\", \"_mapper\"])" ] }, { "cell_type": "code", "execution_count": 94, "id": "90943478-8a46-486d-b6d7-74a1306dbc70", "metadata": {}, "outputs": [], "source": [ "labels = [\n", " 'pairtools_bwamem2', \n", " # 'tadbit',\n", " 'pairtools', \n", " 'tadbit_bowtie2',\n", " 'juicer', \n", " 'fanc_bwa', \n", " 'hicexplorer', \n", " 'hicpro', \n", " 'fanc_bowtie2',\n", " # 'bwamem2',\n", " # 'bwamem',\n", " # 'bowtie',\n", "]\n", "labels_mod = [\n", " 'bwa-mem2 + pairtools', \n", " # 'GEM + TADbit',\n", " 'bwa mem + pairtools', \n", " 'bowtie2 + TADbit',\n", " 'Juicer', \n", " 'bwa mem + FANC', \n", " 'HiCExplorer', \n", " 'Hi-Pro', \n", " 'bowtie2 + FANC',\n", " # 'bwa-mem2',\n", " # 'bwa mem',\n", " # 'bowtie2',\n", "]" ] }, { "cell_type": "code", "execution_count": 113, "id": "7d976913-dc1e-49f3-ac37-5bc2f16a13fd", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig, axes = plt.subplots(nrows=1, ncols=2, sharey=True)\n", "\n", "cmap = ['#FD7F69', '#9FC9DD', '#89A76F']\n", "style_dict = dict(\n", " orient='h',\n", " palette=cmap,\n", " edgecolor=\"k\",\n", " linewidth=2.0,\n", " errwidth=2.0,\n", " capsize=0.07)\n", "\n", "cmap1 = ['#ECECEC', '#BFBFBF', '#868686']\n", "style_dict1 = dict(\n", " orient='h',\n", " palette=cmap1,\n", " edgecolor=\"k\",\n", " linewidth=2.0,\n", " errwidth=2.0,\n", " capsize=0.07, \n", " alpha=0.8)\n", "\n", "\n", "ax = axes[0]\n", "b = sns.barplot(x=\"s\", \n", " y=\"util\", \n", " data=df.sort_values('util'),\n", " order=labels,\n", " hue='ncores',\n", " hue_order=[4,2,1],\n", " ax=ax,\n", " **style_dict\n", ")\n", "plt.setp(b.patches, linewidth=0.5)\n", "\n", "\n", "b1 = sns.barplot(x=\"s_mapper\", \n", " y=\"util\", \n", " data=df.sort_values('util'),\n", " order=labels,\n", " hue='ncores',\n", " hue_order=[4,2,1],\n", " ax=ax,\n", " **style_dict1\n", ")\n", "plt.setp(b1.patches, linewidth=0.5)\n", "\n", "\n", "ax.set_ylabel('')\n", "ax.set_xlabel('Time (sec)')\n", "ax.set_yticklabels(labels_mod)\n", "ax.set_axisbelow(True)\n", "ax.grid(which='both', axis='x', color='k')\n", "#ax.set_xscale('log')\n", "ax.set_xlim([0, 5e3])\n", "ax.set_xticks(np.arange(0, 6000, 100), minor=True)\n", "ax.grid(which='minor', axis='x', alpha=0.2, color='k')\n", "ax.get_legend().remove()\n", "\n", "\n", "# Add text, runtime percentage out of mapper timing\n", "ncores_order = [4, 2, 1]\n", "for icore, ncores in enumerate(ncores_order):\n", " for ilabels, util in enumerate(labels):\n", " if util==\"chromap\":\n", " continue\n", " \n", " df_target = df.query(f'ncores=={ncores} and util==\"{util}\"')\n", " mean_target = np.mean(df_target['min'].values)\n", " mean_mapper = np.mean(df_target['min_mapper'].values)\n", " \n", " prc = 100 * (mean_mapper) / mean_target\n", " \n", " w = b.patches[0].get_height()\n", " \n", " if prc>100:\n", " signature = f\"~0 : ~100 %\"\n", " else:\n", " signature = f\"{prc:.0f} : {100-prc:.0f} %\"\n", " \n", " b.text( s=signature, \n", " x=mean_target+150, y=ilabels + (icore-1)*w,\n", " ha = 'left', va = 'center', fontsize=8, weight='bold')\n", "\n", " \n", "\n", "ax = axes[1]\n", "b = sns.barplot(x=\"max_rss\", \n", " y=\"util\", \n", " data=df.sort_values('util'),\n", " order=labels,\n", " hue='ncores',\n", " hue_order=[4,2,1],\n", " ax=ax,\n", " **style_dict)\n", "plt.setp(b.patches, linewidth=0.5)\n", "\n", "\n", "b1 = sns.barplot(x=\"max_rss_mapper\", \n", " y=\"util\", \n", " data=df.sort_values('util'),\n", " order=labels,\n", " hue='ncores',\n", " hue_order=[4,2,1],\n", " ax=ax,\n", " **style_dict1\n", ")\n", "plt.setp(b1.patches, linewidth=0.5)\n", "ax.get_legend().remove()\n", "\n", "\n", "\n", "# Add text, runtime percentage out of mapper timing\n", "ncores_order = [4, 2, 1]\n", "for icore, ncores in enumerate(ncores_order):\n", " for ilabels, util in enumerate(labels):\n", " if util==\"chromap\":\n", " continue\n", " \n", " df_target = df.query(f'ncores=={ncores} and util==\"{util}\"')\n", " mean_target = np.mean(df_target['max_rss'].values)\n", " mean_mapper = np.mean(df_target['max_rss_mapper'].values)\n", " \n", " prc = 100 * (mean_mapper) / mean_target\n", " \n", " w = b.patches[0].get_height()\n", " \n", " if prc>=100:\n", " signature = f\"\"\n", " else:\n", " signature = f\"{prc:.0f} : {100-prc:.0f} %\"\n", " \n", " b.text( s=signature, \n", " x=mean_target+550, y=ilabels + (icore-1)*w,\n", " ha = 'left', va = 'center', fontsize=8, weight='bold')\n", "\n", " \n", "\n", "ax.set_ylabel('')\n", "ax.set_xlabel('Maximum Resident Set Size (MB)')\n", "ax.set_yticklabels(labels_mod)\n", "ax.set_axisbelow(True)\n", "ax.grid(which='both', axis='x', color='k')\n", "ax.set_xticks(np.arange(0, 30000, 1000), minor=True)\n", "ax.grid(which='minor', axis='x', alpha=0.2, color='k')\n", "\n", "# fig.suptitle('Benchmark of different Hi-C mapping tools for 1 mln reads (5 iterations)', y=0.99)\n", "\n", "# (x, y, width, height)\n", "bb = (fig.subplotpars.left, fig.subplotpars.top+0.002, fig.subplotpars.right-fig.subplotpars.left, 0.2)\n", "# ax.legend(bbox_to_anchor=bb, title=\"Number of cores\", loc=\"lower right\", ncol=3, borderaxespad=0., bbox_transform=fig.transFigure, frameon=False)\n", "\n", "plt.savefig(\"benchmarking_1mln.mappers.pdf\")" ] }, { "cell_type": "code", "execution_count": null, "id": "a08db361-28ab-4a3d-a69a-4d6869e28a39", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "test", "language": "python", "name": "test" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" } }, "nbformat": 4, "nbformat_minor": 5 } pairtools-1.0.3/doc/examples/benchmark/benchmarking_1mln.csv000066400000000000000000000431411452673171500241560ustar00rootroot00000000000000,s,h:m:s,max_rss,max_vms,max_uss,max_pss,io_in,io_out,mean_load,cpu_time,util,ncores 0,444.776,0:07:24,3496.51,4314.64,3468.51,3472.45,0.0,780.64,398.88,1775.19,bowtie,1 1,446.6064,0:07:26,3495.83,4314.77,3466.35,3470.77,0.0,1568.73,396.66,1776.07,bowtie,1 2,439.4402,0:07:19,3495.91,4314.39,3468.05,3471.94,0.0,2373.96,403.7,1781.37,bowtie,1 3,449.5572,0:07:29,3497.03,4314.64,3469.36,3473.22,0.0,3148.84,394.6,1784.75,bowtie,1 4,449.9673,0:07:29,3490.93,4314.89,3470.16,3471.92,40.96,3942.04,393.64,1785.49,bowtie,1 0,280.5757,0:04:40,6019.11,6453.95,5994.93,5998.23,7.86,746.23,366.91,1031.28,bwamem,1 1,286.6525,0:04:46,6004.57,6453.95,5986.34,5988.55,1485.98,1679.14,356.05,1027.24,bwamem,1 2,300.468,0:05:00,6009.04,6517.95,6000.2,6000.56,6660.16,2736.5,375.23,1138.7,bwamem,1 3,336.5047,0:05:36,6027.58,6645.95,6018.34,6018.94,8138.28,3669.41,375.65,1280.85,bwamem,1 4,323.7325,0:05:23,6012.26,6453.95,6002.96,6003.52,8932.21,4602.32,391.87,1291.62,bwamem,1 0,169.2031,0:02:49,17583.51,21017.69,17574.31,17574.8,13309.02,870.68,260.2,441.72,bwamem2,1 1,190.2805,0:03:10,17611.99,20913.7,17601.16,17601.72,29713.3,1554.78,185.67,357.92,bwamem2,1 2,198.262,0:03:18,17553.89,20425.7,17545.7,17546.26,46154.95,2487.69,161.87,328.04,bwamem2,1 3,117.1978,0:01:57,17591.75,20553.7,17583.7,17584.1,46179.89,3669.41,358.44,430.41,bwamem2,1 4,142.6662,0:02:22,17615.75,20688.82,17606.59,17607.45,62596.36,4602.32,301.67,447.83,bwamem2,1 0,155.4555,0:02:35,19062.05,20632.42,19039.34,19042.16,14756.57,0.02,78.1,122.2,chromap,1 1,129.4362,0:02:09,19034.41,20605.15,19011.44,19014.27,14756.57,88.36,86.69,116.07,chromap,1 2,130.3877,0:02:10,19034.32,20605.15,19011.52,19014.34,14756.57,176.69,86.2,121.96,chromap,1 3,133.456,0:02:13,19030.1,20598.45,19007.18,19010.0,14756.57,265.02,84.37,128.14,chromap,1 4,129.5292,0:02:09,19045.01,20615.43,19022.31,19025.15,14756.57,353.35,87.22,134.56,chromap,1 0,4261.7448,1:11:01,7186.76,8679.24,4290.61,5702.39,1719.48,10257.31,36.77,348.34,fanc_bowtie2,1 1,4077.2502,1:07:57,7188.25,8680.68,4290.98,5703.88,1719.5,20183.4,34.11,330.67,fanc_bowtie2,1 2,4131.9376,1:08:51,7189.56,8681.8,4292.67,5709.75,1719.51,30697.16,35.35,351.1,fanc_bowtie2,1 3,4050.4727,1:07:30,9027.23,10823.83,5831.42,7385.27,1719.51,42084.66,35.13,382.96,fanc_bowtie2,1 4,4020.6237,1:07:00,9032.61,10828.28,5837.03,7391.75,2343.53,52412.16,34.58,394.25,fanc_bowtie2,1 0,2731.6263,0:45:31,7185.57,8945.22,5718.36,5838.26,0.38,7029.16,35.71,346.69,fanc_bwa,1 1,2715.4035,0:45:15,9044.39,10839.64,5846.46,7385.01,0.42,14315.23,32.27,353.83,fanc_bwa,1 2,2769.2431,0:46:09,7188.17,9009.55,5735.71,5855.89,0.44,21877.82,37.54,374.56,fanc_bwa,1 3,2706.3695,0:45:06,9043.16,10838.74,5844.58,7393.22,0.44,29434.04,35.55,376.27,fanc_bwa,1 4,2682.8176,0:44:42,6172.31,8945.66,5679.21,5812.1,0.44,37010.45,30.23,380.22,fanc_bwa,1 0,961.0734,0:16:01,21569.48,23347.51,21492.25,21513.06,2618.56,0.02,138.26,532.15,hicexplorer,1 1,908.2106,0:15:08,22286.37,23347.51,22206.49,22228.31,2618.56,0.54,140.61,522.7,hicexplorer,1 2,896.8171,0:14:56,21604.46,23347.51,21524.48,21546.3,2618.56,1.05,138.93,499.36,hicexplorer,1 3,910.574,0:15:10,22440.63,23347.51,22361.79,22382.79,2618.56,1.56,140.63,542.86,hicexplorer,1 4,895.9424,0:14:55,21516.94,23347.51,21469.93,21475.81,2618.56,2.07,138.9,529.28,hicexplorer,1 0,1111.7203,0:18:31,6730.91,7406.83,6675.2,6682.8,237.25,1090.37,186.34,87.74,hicpro,1 1,1130.0595,0:18:50,6730.21,7406.82,6675.14,6682.36,903.14,2215.98,186.08,80.43,hicpro,1 2,1123.3536,0:18:43,6712.22,7406.82,6674.77,6677.97,5265.45,3341.59,182.57,89.92,hicpro,1 3,1181.2436,0:19:41,6715.32,7406.82,6675.26,6680.12,6316.99,4467.2,177.76,100.38,hicpro,1 4,1116.4777,0:18:36,6715.52,7406.82,6674.92,6679.32,6490.31,5592.8,188.64,114.97,hicpro,1 0,951.7772,0:15:51,5457.95,5857.16,5432.96,5439.95,0.0,2882.08,95.15,7.25,juicer,1 1,946.1429,0:15:46,5458.0,16410.45,5433.02,5439.99,0.0,4613.73,92.49,14.04,juicer,1 2,950.1664,0:15:50,5458.07,5857.16,5433.16,5440.17,0.2,7180.02,95.32,26.17,juicer,1 3,1004.5055,0:16:44,5458.27,16410.45,5433.17,5439.37,0.2,10377.86,93.19,30.43,juicer,1 4,1088.6224,0:18:08,5458.32,5857.16,5433.14,5439.08,0.2,13611.21,94.37,43.31,juicer,1 0,1031.7979,0:17:11,5868.29,8533.19,5732.12,5752.16,0.57,0.02,101.37,1048.77,pairtools,1 1,1057.2271,0:17:37,5866.38,8533.06,5731.94,5746.23,5193.45,39.25,101.33,1078.93,pairtools,1 2,1020.0639,0:17:00,5753.98,8533.06,5731.69,5732.67,10586.02,78.47,102.21,1056.3,pairtools,1 3,1044.6887,0:17:24,5852.79,8533.19,5734.25,5766.39,15784.64,117.71,99.64,29.63,pairtools,1 4,1046.266,0:17:26,5824.86,8533.06,5734.24,5757.25,21050.2,156.94,99.64,38.13,pairtools,1 0,482.1029,0:08:02,17068.97,20572.73,16931.91,16951.71,16275.21,0.02,102.18,493.82,pairtools_bwamem2,1 1,482.5261,0:08:02,17079.46,20508.73,16941.46,16956.34,32534.11,39.25,101.73,498.88,pairtools_bwamem2,1 2,488.9997,0:08:08,17055.04,20508.59,16920.96,16939.69,42104.61,78.47,100.17,502.58,pairtools_bwamem2,1 3,484.346,0:08:04,16981.59,20380.6,16961.3,16962.37,45493.36,117.7,93.06,50.54,pairtools_bwamem2,1 4,483.3159,0:08:03,16969.02,20595.51,16944.75,16945.79,61922.79,156.93,99.61,507.64,pairtools_bwamem2,1 0,611.4665,0:10:11,13519.86,15003.46,13507.46,13508.44,22937.93,2801.37,50.95,5.28,tadbit,1 1,442.9993,0:07:22,13559.35,15003.46,13509.66,13529.09,40809.44,6184.54,55.05,25.04,tadbit,1 2,385.3129,0:06:25,13561.27,15003.46,13508.5,13522.26,40809.46,9295.42,59.39,22.13,tadbit,1 3,390.4362,0:06:30,13561.43,15003.46,13509.03,13522.27,40809.5,12333.14,58.93,23.74,tadbit,1 4,447.8446,0:07:27,13560.76,15003.46,13509.25,13528.03,51014.88,15465.8,51.07,24.5,tadbit,1 0,862.2741,0:14:22,3639.61,4465.74,3612.88,3616.18,9401.06,3187.84,73.44,23.31,tadbit_bowtie2,1 1,815.1788,0:13:35,3640.85,4464.65,3582.08,3602.21,9401.11,6292.2,82.09,20.52,tadbit_bowtie2,1 2,817.2921,0:13:37,3736.97,4560.27,3712.91,3716.56,13567.1,9504.59,81.15,19.79,tadbit_bowtie2,1 3,819.7936,0:13:39,3640.98,4464.65,3617.2,3619.83,13567.22,12458.8,80.4,19.48,tadbit_bowtie2,1 4,760.4646,0:12:40,3641.41,4464.98,3621.55,3622.75,13567.24,15676.85,81.76,20.99,tadbit_bowtie2,1 0,444.1354,0:07:24,3496.54,4314.39,3479.12,3481.87,27.41,783.47,400.27,1779.49,bowtie,2 1,446.9715,0:07:26,3496.32,4314.39,3478.36,3481.27,27.41,1570.48,397.44,1782.37,bowtie,2 2,484.7029,0:08:04,3511.4,4338.64,3490.33,3495.52,27.41,2346.41,368.72,1797.04,bowtie,2 3,450.6565,0:07:30,3488.59,4313.88,3466.99,3471.75,27.41,3148.2,393.42,1786.37,bowtie,2 4,453.0468,0:07:33,3488.74,4314.02,3466.96,3470.63,27.41,3936.13,392.09,1793.93,bowtie,2 0,330.2381,0:05:30,6045.63,6645.95,6021.65,6027.11,5173.52,870.68,375.32,1240.4,bwamem,2 1,329.5853,0:05:29,6028.34,6645.95,6019.0,6019.59,6654.07,1803.59,382.07,1265.1,bwamem,2 2,297.1758,0:04:57,6003.03,6453.95,5994.1,5994.56,9493.45,2736.5,363.55,1091.44,bwamem,2 3,263.866,0:04:23,5991.57,6453.95,5982.83,5983.36,10181.14,3669.41,388.27,1039.2,bwamem,2 4,258.637,0:04:18,5992.16,6453.95,5983.26,5983.78,10181.15,4477.87,351.12,927.49,bwamem,2 0,141.9624,0:02:21,17604.44,20094.61,17587.78,17591.4,16300.85,870.68,311.82,444.2,bwamem2,2 1,133.0894,0:02:13,17620.68,20849.7,17603.97,17607.69,26647.44,1554.78,267.76,361.73,bwamem2,2 2,119.1224,0:01:59,17591.03,21040.61,17573.69,17577.37,26647.45,2736.5,354.15,430.73,bwamem2,2 3,119.2461,0:01:59,17618.88,20912.61,17601.95,17605.61,26647.45,3669.41,353.61,437.38,bwamem2,2 4,119.4407,0:01:59,17610.04,20579.05,17592.86,17596.52,26647.46,4602.32,352.7,443.75,bwamem2,2 0,99.0323,0:01:39,19065.62,20682.54,19043.42,19050.13,14756.72,0.02,106.11,105.91,chromap,2 1,93.6757,0:01:33,19075.63,20687.33,19053.32,19060.02,25886.48,88.36,121.88,118.06,chromap,2 2,120.2376,0:02:00,19145.25,20776.39,19130.55,19133.61,37626.33,176.69,104.51,132.92,chromap,2 3,83.0685,0:01:23,18948.87,20569.96,18934.38,18937.44,41077.34,270.34,159.69,144.2,chromap,2 4,98.8891,0:01:38,19049.5,20677.47,19034.7,19037.84,41077.34,353.35,126.08,140.51,chromap,2 0,2463.1096,0:41:03,7184.84,8822.2,4290.22,5704.96,1790.3,9624.3,17.33,337.56,fanc_bowtie2,2 1,2388.2986,0:39:48,7183.59,8820.36,4839.33,5975.55,1790.35,20084.17,17.7,322.7,fanc_bowtie2,2 2,2313.5663,0:38:33,6195.27,8222.25,3496.78,4531.77,1790.41,30597.45,18.65,343.23,fanc_bowtie2,2 3,2351.3693,0:39:11,9008.62,11035.56,5810.7,7349.54,1790.42,41169.94,17.91,356.34,fanc_bowtie2,2 4,2369.8083,0:39:29,7186.87,8822.57,4288.14,5702.9,1790.45,51675.89,19.01,377.66,fanc_bowtie2,2 0,1774.8625,0:29:34,9036.59,11062.79,5886.57,7381.14,180.82,6716.99,24.32,331.93,fanc_bwa,2 1,1740.5138,0:29:00,7185.89,9381.23,5771.89,5896.22,180.85,14316.39,27.45,352.99,fanc_bwa,2 2,1693.8652,0:28:13,7188.38,9315.89,5720.5,5842.73,180.88,21905.75,22.64,337.38,fanc_bwa,2 3,1673.5139,0:27:53,7184.62,9376.55,5740.96,5861.3,180.95,29485.2,22.17,333.76,fanc_bwa,2 4,1671.1137,0:27:51,7186.76,9379.55,5780.07,5902.28,180.97,37075.34,22.58,346.62,fanc_bwa,2 0,746.2236,0:12:26,22026.01,24493.83,21948.96,21969.79,2997.02,0.53,170.7,512.85,hicexplorer,2 1,710.6231,0:11:50,23024.07,24502.34,22945.46,22966.38,3015.5,0.54,171.76,495.71,hicexplorer,2 2,705.1923,0:11:45,22126.58,24493.83,22047.82,22069.79,3015.5,1.05,176.52,507.18,hicexplorer,2 3,712.7222,0:11:52,23066.12,24493.83,22987.89,23008.64,3015.5,1.56,171.49,518.58,hicexplorer,2 4,715.7957,0:11:55,22876.23,24493.83,22797.87,22819.07,3015.5,2.58,172.73,557.14,hicexplorer,2 0,1073.3043,0:17:53,6730.26,7406.83,6678.91,6685.25,9.88,1089.03,184.85,64.55,hicpro,2 1,1038.1439,0:17:18,6730.3,7406.82,6675.17,6685.6,9.89,2215.98,182.2,73.83,hicpro,2 2,1033.7443,0:17:13,6730.34,7406.82,6675.99,6686.95,221.47,3341.59,185.87,88.03,hicpro,2 3,1022.0066,0:17:02,6731.1,7406.83,6675.88,6688.85,222.14,4467.19,188.24,109.28,hicpro,2 4,1094.391,0:18:14,6730.64,7406.82,6676.12,6689.47,222.14,5592.8,175.63,105.96,hicpro,2 0,502.114,0:08:22,5634.02,34265.69,5609.24,5616.47,70.82,2785.59,175.44,10.71,juicer,2 1,502.401,0:08:22,5634.24,6057.17,5609.44,5616.5,70.89,5661.32,175.44,14.17,juicer,2 2,502.5511,0:08:22,5634.45,6057.17,5609.73,5616.78,70.92,8537.05,175.43,18.41,juicer,2 3,500.9428,0:08:20,5634.69,6057.17,5609.77,5616.84,70.92,11412.78,175.96,24.15,juicer,2 4,500.3197,0:08:20,5634.83,6057.17,5610.01,5617.05,70.92,14367.65,176.2,21.5,juicer,2 0,519.8764,0:08:39,6037.57,9285.21,5901.66,5924.11,5249.05,0.02,194.04,1009.67,pairtools,2 1,483.1521,0:08:03,6037.39,9285.21,5901.68,5924.15,5249.05,39.25,200.27,974.78,pairtools,2 2,480.6768,0:08:00,6037.79,9285.21,5903.83,5941.17,5249.05,78.47,201.33,980.38,pairtools,2 3,465.3507,0:07:45,6037.73,9285.21,5903.88,5941.19,5249.05,117.7,194.76,924.69,pairtools,2 4,466.1686,0:07:46,6038.33,9285.21,5903.87,5941.35,5249.05,156.93,194.4,930.11,pairtools,2 0,228.2886,0:03:48,17383.15,22336.79,17245.43,17268.2,0.0,0.02,186.3,427.67,pairtools_bwamem2,2 1,221.768,0:03:41,17381.39,22167.71,17243.96,17266.91,0.0,39.25,193.13,437.76,pairtools_bwamem2,2 2,221.3263,0:03:41,17362.04,22295.71,17224.0,17246.68,0.0,78.47,193.49,445.19,pairtools_bwamem2,2 3,217.581,0:03:37,17340.12,22400.79,17203.5,17233.95,0.0,117.7,196.51,452.01,pairtools_bwamem2,2 4,223.3371,0:03:43,17393.16,22144.79,17255.85,17277.7,0.01,156.93,192.59,462.11,pairtools_bwamem2,2 0,610.9462,0:10:10,13529.42,15232.48,13516.34,13517.79,36766.11,2801.28,49.41,96.95,tadbit,2 1,376.1804,0:06:16,13569.33,15232.48,13545.14,13547.36,61864.18,5678.4,62.61,85.0,tadbit,2 2,300.593,0:05:00,13570.71,15232.47,13518.36,13536.02,61954.27,9211.43,31.59,15.96,tadbit,2 3,297.5085,0:04:57,13570.71,15232.48,13518.59,13531.8,61954.29,12339.2,53.0,24.9,tadbit,2 4,296.7615,0:04:56,13570.29,15232.48,13518.88,13531.85,61954.32,15451.43,72.1,28.21,tadbit,2 0,463.5859,0:07:43,3666.75,4713.83,3630.19,3636.92,0.02,2804.57,129.07,84.87,tadbit_bowtie2,2 1,455.2237,0:07:35,3666.76,4713.65,3630.33,3637.05,0.04,6087.06,128.93,11.28,tadbit_bowtie2,2 2,455.0799,0:07:35,3666.91,4713.66,3630.34,3637.1,0.07,9386.64,128.87,14.03,tadbit_bowtie2,2 3,458.9496,0:07:38,3667.79,4714.59,3631.29,3638.12,321.35,12458.88,123.9,12.09,tadbit_bowtie2,2 4,452.953,0:07:32,3667.88,4714.02,3597.93,3621.82,321.36,15940.75,129.9,20.51,tadbit_bowtie2,2 0,420.8718,0:07:00,3496.11,4314.01,3471.04,3476.4,15.42,768.89,393.0,1654.84,bowtie,4 1,422.8224,0:07:02,3496.45,4314.02,3471.73,3477.1,15.42,1557.63,391.27,1658.74,bowtie,4 2,419.9842,0:06:59,3494.73,4314.02,3470.36,3475.6,15.42,2354.96,393.91,1662.14,bowtie,4 3,420.7907,0:07:00,3495.93,4314.02,3471.43,3476.67,15.42,3145.49,393.27,1666.11,bowtie,4 4,412.5256,0:06:52,3497.47,4314.64,3472.82,3478.16,15.42,3952.15,401.55,1671.29,bowtie,4 0,252.8627,0:04:12,6006.18,6453.96,5982.69,5986.96,0.83,746.23,358.77,908.39,bwamem,4 1,261.662,0:04:21,6007.87,6453.96,5984.64,5988.93,5174.14,1803.59,380.15,999.85,bwamem,4 2,274.3422,0:04:34,6024.01,6581.96,5999.88,6003.05,6023.34,2736.5,373.3,1032.61,bwamem,4 3,298.8224,0:04:58,6014.57,6453.96,5990.63,5993.79,11196.66,3669.41,359.87,1087.63,bwamem,4 4,285.7411,0:04:45,5997.78,6453.96,5988.62,5989.24,12674.78,4477.87,357.24,1036.2,bwamem,4 0,111.3764,0:01:51,17591.11,20784.61,17566.66,17572.18,0.02,932.93,377.1,420.58,bwamem2,4 1,111.2423,0:01:51,17569.3,20537.94,17544.68,17550.22,0.02,1865.84,376.76,425.65,bwamem2,4 2,112.9415,0:01:52,17579.97,20665.95,17555.33,17560.86,0.02,2798.75,371.87,432.32,bwamem2,4 3,111.3421,0:01:51,17573.66,20489.7,17548.98,17554.51,0.02,3731.66,376.83,437.97,bwamem2,4 4,111.3777,0:01:51,17544.64,20889.7,17520.3,17525.84,0.02,4353.51,270.46,325.61,bwamem2,4 0,63.9876,0:01:03,19049.48,20739.12,19027.24,19032.6,14756.71,0.02,122.87,79.43,chromap,4 1,46.2109,0:00:46,17981.29,19696.59,17959.17,17964.53,14756.71,88.36,86.01,43.67,chromap,4 2,46.4987,0:00:46,17964.62,19692.91,17941.91,17947.24,14756.71,176.69,85.48,49.23,chromap,4 3,46.4768,0:00:46,17972.1,19695.5,17949.51,17954.84,14756.71,265.02,85.79,55.0,chromap,4 4,46.1236,0:00:46,17970.75,19693.68,17948.35,17953.68,14756.71,353.35,86.4,60.65,chromap,4 0,1480.6598,0:24:40,6107.95,8576.53,3509.07,4446.13,0.04,9605.84,30.34,315.13,fanc_bowtie2,4 1,1496.7858,0:24:56,9031.23,11498.52,5833.15,7371.95,0.07,20128.0,29.31,332.72,fanc_bowtie2,4 2,1463.532,0:24:23,7184.77,9101.79,4217.2,5669.65,0.07,31165.73,31.89,344.45,fanc_bowtie2,4 3,1450.3658,0:24:10,6120.51,8586.21,3510.02,4468.34,0.07,41105.85,30.36,336.53,fanc_bowtie2,4 4,1452.4516,0:24:12,7187.97,9104.71,4292.93,5709.08,0.07,51577.66,30.16,338.06,fanc_bowtie2,4 0,1150.8569,0:19:10,7186.05,9953.48,5723.31,5859.42,5224.05,6719.89,32.08,312.36,fanc_bwa,4 1,1135.3056,0:18:55,7224.26,9950.27,5660.22,5824.15,5224.08,14390.72,35.73,329.42,fanc_bwa,4 2,1125.9794,0:18:45,9041.66,11507.65,5849.72,7432.53,5224.11,21886.05,34.66,314.55,fanc_bwa,4 3,1128.931,0:18:48,9044.25,11511.71,5853.64,7435.73,5224.14,29477.74,34.51,319.67,fanc_bwa,4 4,1128.7103,0:18:48,9040.23,11509.34,5851.56,7432.95,5224.14,37075.91,34.49,326.24,fanc_bwa,4 0,596.2727,0:09:56,23080.44,26188.7,23034.84,23042.16,0.0,0.53,199.86,493.7,hicexplorer,4 1,594.7958,0:09:54,23219.5,26188.7,23173.16,23180.65,0.0,1.04,203.29,503.47,hicexplorer,4 2,609.9005,0:10:09,23002.46,26188.7,22956.89,22965.46,0.0,1.05,192.2,508.65,hicexplorer,4 3,597.4554,0:09:57,23259.77,26188.7,23215.77,23224.37,19.97,2.01,204.19,518.44,hicexplorer,4 4,622.1449,0:10:22,23509.05,26188.7,23466.36,23474.47,5281.18,2.58,171.82,538.96,hicexplorer,4 0,619.9254,0:10:19,6759.34,7631.43,6704.49,6717.01,3.34,1088.32,295.46,59.78,hicpro,4 1,558.5864,0:09:18,6759.78,7631.43,6705.0,6715.83,3.34,2216.0,327.68,66.81,hicpro,4 2,550.9692,0:09:10,6760.03,7631.43,6716.98,6725.62,3.34,3341.63,337.46,79.54,hicpro,4 3,549.0615,0:09:09,6759.72,7631.44,6717.38,6725.84,3.34,4467.26,338.57,88.86,hicpro,4 4,548.9341,0:09:08,6760.25,7631.43,6717.1,6725.58,3.34,5592.88,339.01,94.98,hicpro,4 0,297.0198,0:04:57,5996.96,6457.18,5977.55,5979.61,3412.56,2785.59,297.18,6.35,juicer,4 1,319.5797,0:05:19,5989.58,6457.18,5979.42,5981.48,9268.84,5739.07,309.19,7.78,juicer,4 2,327.0335,0:05:27,5988.18,6457.18,5977.84,5978.92,10206.69,8537.05,311.25,12.23,juicer,4 3,331.7378,0:05:31,5988.45,6457.18,5978.38,5979.71,16123.1,11364.07,297.09,18.28,juicer,4 4,334.7347,0:05:34,5989.59,6457.18,5979.36,5980.55,19292.39,14066.95,290.47,19.62,juicer,4 0,241.5991,0:04:01,6399.55,10581.27,6265.49,6303.07,0.17,0.02,385.61,935.31,pairtools,4 1,241.5127,0:04:01,6398.66,10581.27,6265.65,6303.01,0.17,39.25,386.39,940.88,pairtools,4 2,242.2225,0:04:02,6399.6,10581.27,6266.3,6303.79,0.17,78.47,384.74,943.77,pairtools,4 3,241.9401,0:04:01,6398.33,10581.27,6264.49,6301.69,0.17,117.7,385.22,947.96,pairtools,4 4,242.4722,0:04:02,6399.78,10581.27,6265.86,6303.43,0.17,156.93,384.24,951.72,pairtools,4 0,149.1161,0:02:29,18032.98,24632.93,17895.58,17910.56,6662.34,0.02,321.06,482.55,pairtools_bwamem2,4 1,129.8502,0:02:09,18011.57,24911.85,17874.46,17889.42,7215.82,39.25,336.11,443.2,pairtools_bwamem2,4 2,149.7954,0:02:29,17943.55,24532.52,17806.29,17821.07,13922.12,78.47,316.37,488.94,pairtools_bwamem2,4 3,151.1137,0:02:31,17982.62,24665.18,17847.23,17861.77,30195.88,117.7,311.36,487.72,pairtools_bwamem2,4 4,151.164,0:02:31,18000.05,24454.96,17865.51,17883.09,46478.44,156.93,310.2,489.74,pairtools_bwamem2,4 0,258.4889,0:04:18,13609.79,15690.48,13542.78,13567.37,15901.3,3103.55,57.27,19.48,tadbit,4 1,226.7053,0:03:46,10086.89,15275.6,10018.9,10043.75,15902.2,5596.25,40.62,76.32,tadbit,4 2,228.1718,0:03:48,8939.3,15275.61,8872.51,8896.66,15902.23,9327.17,39.3,23.97,tadbit,4 3,226.9155,0:03:46,9170.13,15275.61,9102.29,9127.05,15902.26,11800.28,40.77,79.04,tadbit,4 4,225.4808,0:03:45,9664.37,15275.61,9635.06,9642.85,15902.27,14928.92,41.44,83.32,tadbit,4 0,320.7662,0:05:20,3689.32,5212.95,3650.62,3659.72,0.02,3171.16,145.76,25.25,tadbit_bowtie2,4 1,348.5605,0:05:48,3687.51,5211.26,3648.52,3653.68,3011.82,6430.47,170.42,20.75,tadbit_bowtie2,4 2,338.4871,0:05:38,3688.18,5211.85,3649.18,3654.37,3012.64,9240.8,172.35,8.57,tadbit_bowtie2,4 3,340.9527,0:05:40,3689.14,5212.57,3649.83,3654.76,3014.55,12098.38,165.3,65.98,tadbit_bowtie2,4 4,346.3,0:05:46,3688.78,5212.19,3649.61,3654.97,3060.71,15516.58,170.85,94.72,tadbit_bowtie2,4 pairtools-1.0.3/doc/examples/example_pipeline.sh000066400000000000000000000044211452673171500220020ustar00rootroot00000000000000#!/usr/bin/env bash if [ $# -le 3 ] ; then echo "Usage: bash example_pipeline.sh BWA_INDEX FASTQ_1 FASTQ_2 OUTPUT_PREFIX" echo "" echo "A example of a bash pipeline to align the sequencing data from a " echo "single Hi-C experiment." echo "" echo "positional arguments:" echo "" echo "BWA_INDEX The path to a bwa index of the reference genome." echo "CHROM_SIZES The path to a file with chromosome sizes." echo "FASTQ_1 The path to a fastq file with the sequences of " echo " the first side of Hi-C molecules." echo "FASTQ_2 The path to a fastq file with the sequences of " echo " the second side of Hi-C molecules." echo "OUTPUT_PREFIX The prefix to the paths of generated outputs. " echo "" echo "" exit 0 fi set -o errexit set -o nounset set -o pipefail INDEX=$1 CHROM_SIZES=$2 FASTQ1=$3 FASTQ2=$4 OUTPREFIX=$5 N_THREADS=8 UNMAPPED_SAM_PATH=${OUTPREFIX}.unmapped.bam UNMAPPED_PAIRS_PATH=${OUTPREFIX}.unmapped.pairs.gz NODUPS_SAM_PATH=${OUTPREFIX}.nodups.bam NODUPS_PAIRS_PATH=${OUTPREFIX}.nodups.pairs.gz DUPS_SAM_PATH=${OUTPREFIX}.dups.bam DUPS_PAIRS_PATH=${OUTPREFIX}.dups.pairs.gz bwa mem -SP -t "${N_THREADS}" "${INDEX}" "${FASTQ1}" "${FASTQ2}" | { # Classify Hi-C molecules as unmapped/single-sided/multimapped/chimeric/etc # and output one line per read, containing the following, separated by \\v: # * triu-flipped pairs # * read id # * type of a Hi-C molecule # * corresponding sam entries pairtools parse --chroms-path "{CHROM_SIZES}" } | { # Block-sort pairs together with SAM entries pairtools sort --nproc 4 } | { # Remove duplicates, separate mapped and unmapped reads pairtools dedup \ --output \ >( pairtools split \ --output-pairs ${NODUPS_PAIRS_PATH} \ --output-sam ${NODUPS_SAM_PATH} ) \ --output-dups \ >( pairtools markasdup \ | pairtools split \ --output-pairs ${DUPS_PAIRS_PATH} \ --output-sam ${DUPS_SAM_PATH} ) \ --output-unmapped >( pairtools split \ --output-pairs ${UNMAPPED_PAIRS_PATH} \ --output-sam ${UNMAPPED_SAM_PATH} ) } pairtools-1.0.3/doc/examples/example_singlecell_pipeline.sh000066400000000000000000000055271452673171500242130ustar00rootroot00000000000000#!/usr/bin/env bash if [ $# -le 3 ] ; then echo "Usage: bash example_pipeline.sh BWA_INDEX FASTQ_1 FASTQ_2 OUTPUT_PREFIX" echo "" echo "A example of a bash pipeline to align the sequencing data from a " echo "single Hi-C experiment." echo "" echo "positional arguments:" echo "" echo "BWA_INDEX The path to a bwa index of the reference genome." echo "CHROM_SIZES The path to a file with chromosome sizes." echo "FASTQ_1 The path to a fastq file with the sequences of " echo " the first side of Hi-C molecules." echo "FASTQ_2 The path to a fastq file with the sequences of " echo " the second side of Hi-C molecules." echo "OUTPUT_PREFIX The prefix to the paths of generated outputs. " echo "" echo "" exit 0 fi set -o errexit set -o nounset set -o pipefail INDEX=$1 CHROM_SIZES=$2 FASTQ1=$3 FASTQ2=$4 OUTPREFIX=$5 N_THREADS=8 UNMAPPED_SAM_PATH=${OUTPREFIX}.unmapped.bam UNMAPPED_PAIRS_PATH=${OUTPREFIX}.unmapped.pairs.gz NODUPS_SAM_PATH=${OUTPREFIX}.nodups.bam NODUPS_PAIRS_PATH=${OUTPREFIX}.nodups.pairs.gz DUPS_SAM_PATH=${OUTPREFIX}.dups.bam DUPS_PAIRS_PATH=${OUTPREFIX}.dups.pairs.gz LOWFREQPAIRS_SAM_PATH=${OUTPREFIX}.lowfreq.bam LOWFREQPAIRS_PAIRS_PATH=${OUTPREFIX}.lowfreq.pairs.gz HIGHFREQPAIRS_SAM_PATH=${OUTPREFIX}.highfreq.bam HIGHFREQPAIRS_PAIRS_PATH=${OUTPREFIX}.highfreq.pairs.gz bwa mem -SP -t "${N_THREADS}" "${INDEX}" "${FASTQ1}" "${FASTQ2}" | { # Classify Hi-C molecules as unmapped/single-sided/multimapped/chimeric/etc # and output one line per read, containing the following, separated by \\v: # * triu-flipped pairs # * read id # * type of a Hi-C molecule # * corresponding sam entries pairtools parse "{CHROM_SIZES}" } | { # Block-sort pairs together with SAM entries pairtools sort } | { # Set unmapped and ambiguous reads aside pairtools select '(pair_type == "UU") or (pair_type == "UR") or (pair_type == "RU")' \ --output-rest >( pairtools split \ --output-pairs ${UNMAPPED_PAIRS_PATH} \ --output-sam ${UNMAPPED_SAM_PATH} ) } | { # Remove duplicates pairtools dedup \ --output-dups \ >( pairtools markasdup \ | pairtools split \ --output-pairs ${DUPS_PAIRS_PATH} \ --output-sam ${DUPS_SAM_PATH} ) } | { # Remove high frequency interactors pairtools multifilter \ --output \ >( pairtools split \ --output-pairs ${LOWFREQ_PAIRS_PATH} \ --output-sam ${LOWFREQ_SAM_PATH} ) \ --output-high-frequency-interactors \ >( pairtools markasdup \ | pairtools split \ --output-pairs ${HIGHFREQPAIRS_PAIRS_PATH} \ --output-sam ${HIGHFREQPAIRS_SAM_PATH} ) } pairtools-1.0.3/doc/examples/pairtools_phase_walkthrough.ipynb000066400000000000000000002201251452673171500250050ustar00rootroot00000000000000{ "cells": [ { "cell_type": "markdown", "id": "112fe2d5-aaed-4eb1-b3f5-2f5889a9c89f", "metadata": {}, "source": [ "# Pairtools phase walkthrough\n", "\n", "Welcome to the pairtools phase walkthrough!\n", "\n", "Haplotype-resolved Hi-C is a popular technique that helps you to resolve contacts of homologous chromosomes. \n", "It relies on a simple idea tha homologous chromosomes have variations (e.g., SNPs) that are inherited together as **haplotypes**. DNA reads in Hi-C will have the SNVs from one of two haplotypes, which can be used to distinguish the contacts on the same chromosome (*cis-homologous*) and contacts connecting two homologs (*trans-homologous*). \n", "\n", "The experimental challenge of the haplotype-resolved Hi-C is to increase the number of SNPs that distinguish reads from different chromosomes. This can be dome by mating highly diverged. \n", "\n", "- Erceg et al. 2019 create highly heterozygous embryos of *Drosophila* [1] \n", "- Collombet et al. 2020 create highly polymorphic F1 hybrid embryos obtained by crossing female *Mus musculus domesticus* (C57Bl/6J) with male *Mus musculus castaneus* CAST/EiJ) to resolve structures of individual chromosomes in the zygote and embryos [2] \n", "- Tan et al. 2018 uses available heterozygous positions to infer the 3D structures of single chromosomes by single-cell variant of the protocol Dip-C [3] \n", "- Duan et al. use dikaryonic nuclei of fungi with 0.7% heterozygosity [4]" ] }, { "attachments": { "62e74fba-c1c1-44b5-a3e2-3699c3cac7ce.png": { "image/png": "" } }, "cell_type": "markdown", "id": "c3795661-e308-44e6-9b0f-3f0396541250", "metadata": {}, "source": [ "In `pairtools` we implement an approach to resolving haplotypes from Erceg et al. The outline of haplotype-resolved parsing of pairs:\n", "\n", "1. [Create the reference genome](#Create-the-reference-genome): create the concatenated reference genomes from two haplotypes. \n", "\n", " Usually the SNVs are known and can be obtained in .vsf format. We will incorporate the SNVs by [bcftools](https://samtools.github.io/bcftools/bcftools.html) into the reference and create updated fasta files with haplotype-corrected sequences.\n", " For each homologue we will add the suffixes that identify the type of homologue (`_hap1` or `_hap2`).\n", "\n", "2. Map the Hi-C data to the concatenated reference and parse allowing multimappers (mapq 0). \n", "\n", " We will also need the mapper to report two suboptimal alignments (aka the second and the third hit).\n", " When the Hi-C read is mapped to some location in the genome, it will have the suffix of the homologue reported as part of chromosome name.\n", " However, the true resolved pairs are not yet known at this step. \n", " \n", " See sections:\n", " \n", " (i) [Download data](#Download-data)\n", " \n", " (ii) [Map data with bwa mem to diploid genome](#Map-data-with-bwa-mem-to-diploid-genome)\n", " \n", " (iii) [pairtools parse](#pairtools-parse)\n", " \n", "\n", "3. [pairtools phase](#pairtools-phase): phase the pairs based on the reported suboptimal alignments. \n", "\n", " By checking the scores of two suboptimal alignments, we will distinguish the true multi-mappers from unresolved pairs (i.e. cases when the read aligns to the location with no distinguishing SNV).\n", " Phasing procedure will remove the haplotype suffixes from chromosome names and add extra fields to the .pairs file with:\n", " \n", " '.' (non-resolved)\n", " \n", " '0' (first haplotype) or \n", " \n", " '1' (second haplotype). \n", " \n", " \n", " \n", " Phasing schema: \n", " \n", "![image.png](attachment:62e74fba-c1c1-44b5-a3e2-3699c3cac7ce.png)\n", "\n", "\n", "4. Post-procesing. Do sorting, dedup and stats, as usual. \n", "\n", " See sections:\n", " \n", " (i) [pairtools dedup](#pairtools-dedup)\n", " \n", " (ii) [Stats](#Stats)" ] }, { "cell_type": "markdown", "id": "9dc8a020-7c4b-471d-9dfd-a5e346f10a27", "metadata": {}, "source": [ "[1] Erceg, J., AlHaj Abed, J., Goloborodko, A., Lajoie, B. R., Fudenberg, G., Abdennur, N., Imakaev, M., McCole, R. B., Nguyen, S. C., Saylor, W., Joyce, E. F., Senaratne, T. N., Hannan, M. A., Nir, G., Dekker, J., Mirny, L. A., & Wu, C. T. (2019). The genome-wide multi-layered architecture of chromosome pairing in early Drosophila embryos. Nature communications, 10(1), 4486. https://doi.org/10.1038/s41467-019-12211-8\n", "\n", "[2] Collombet, S., Ranisavljevic, N., Nagano, T., Varnai, C., Shisode, T., Leung, W., Piolot, T., Galupa, R., Borensztein, M., Servant, N., Fraser, P., Ancelin, K., & Heard, E. (2020). Parental-to-embryo switch of chromosome organization in early embryogenesis. Nature, 580(7801), 142–146. https://doi.org/10.1038/s41586-020-2125-z\n", "\n", "[3] Tan, L., Xing, D., Chang, C. H., Li, H., & Xie, X. S. (2018). Three-dimensional genome structures of single diploid human cells. Science (New York, N.Y.), 361(6405), 924–928. https://doi.org/10.1126/science.aat5641\n", "\n", "[4] Duan, H., Jones, A. W., Hewitt, T., Mackenzie, A., Hu, Y., Sharp, A., Lewis, D., Mago, R., Upadhyaya, N. M., Rathjen, J. P., Stone, E. A., Schwessinger, B., Figueroa, M., Dodds, P. N., Periyannan, S., & Sperschneider, J. (2022). Physical separation of haplotypes in dikaryons allows benchmarking of phasing accuracy in Nanopore and HiFi assemblies with Hi-C data. Genome biology, 23(1), 84. https://doi.org/10.1186/s13059-022-02658-2\n" ] }, { "cell_type": "markdown", "id": "a0b4c550-8168-4780-82e0-1e18493135af", "metadata": {}, "source": [ "We will test on a sample from Collombet et al. 2019 [2], example of mouse single-cell Hi-C on embryos obtained from highly heterozygous parents. We will take some cell from the dataset, GSM3691125_2CSE_70. \n", "Note that becuase the procedure is not strictly Hi-C, the properties of this dataset may differ from what you may obtain on bulk data. " ] }, { "cell_type": "markdown", "id": "5ab026af-fe25-4a70-82ef-52af6fb25371", "metadata": {}, "source": [ "## Create the reference genome\n", "\n", "For phasing, map the data to the concatenated genome with two haplotypes. \n", "Obtaining such genome is not a simple task. You will need a reference genome, and one or two lists of mutations to instroduce to the reference.\n", "\n", "#### Download reference genome" ] }, { "cell_type": "code", "execution_count": null, "id": "9ec0743f-a299-43f0-b568-7e963ed95df8", "metadata": { "tags": [ "hide-output" ] }, "outputs": [], "source": [ "! wget ftp://ftp-mouse.sanger.ac.uk/ref/GRCm38_68.fa" ] }, { "cell_type": "markdown", "id": "7683d63a-bc2f-4c49-8371-fd57f4111072", "metadata": {}, "source": [ "#### Download .vcf file with variants" ] }, { "cell_type": "code", "execution_count": null, "id": "4a347a3b-2ee7-4824-a209-8377edddf640", "metadata": { "tags": [ "hide-output" ] }, "outputs": [], "source": [ "! wget ftp://ftp-mouse.sanger.ac.uk/current_snps/strain_specific_vcfs/CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz" ] }, { "cell_type": "markdown", "id": "88363fb6-c233-4a07-a208-a5e5a2679038", "metadata": {}, "source": [ "#### Index the variants" ] }, { "cell_type": "code", "execution_count": null, "id": "84cebce3-29c6-42df-98bf-5388a51fb268", "metadata": {}, "outputs": [], "source": [ "! bcftools index CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz" ] }, { "cell_type": "markdown", "id": "2dd599a0-64f9-4c8b-b78f-8eabf49c052e", "metadata": {}, "source": [ "#### Introduce the variants into the genome\n", "\n", "Note that you may select the variants that are only SNPs but not SNVs (deletions/insertions) by using `--include` parameter of `bcftools consensus` (e.g. `--include '(STRLEN(REF)=1) & (STRLEN(ALT[0])=1)'`).\n", "This will make sure that the genomic coorditates correspond between the haplotypes. \n", "Correspondence of coordinates is not a requirement, but might be important for downstream analysis. " ] }, { "cell_type": "code", "execution_count": null, "id": "848c9fe5-a632-4139-ba56-60871d8d1eb4", "metadata": { "tags": [ "hide-output" ] }, "outputs": [], "source": [ "%%bash\n", "bcftools consensus --fasta-ref GRCm38_68.fa.gz \\\n", " --haplotype 1 CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz |sed '/^>/ s/$/_hap1/' | bgzip -c > GRCm38_EiJ_snpsonly_hap1.fa.gz\n", "\n", "bcftools consensus --fasta-ref GRCm38_68.fa.gz \\\n", " --haplotype 2 CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz |sed '/^>/ s/$/_hap2/' | bgzip -c > GRCm38_EiJ_snpsonly_hap2.fa.gz\n" ] }, { "cell_type": "markdown", "id": "dfd7c4cb-31dd-43df-8510-95fd0ff9f78f", "metadata": {}, "source": [ "#### Create the index of concatenated haplotypes" ] }, { "cell_type": "markdown", "id": "99d28f6f-b754-4a95-95d5-9e5e51d14571", "metadata": {}, "source": [ "Concatenate the genomes and index them together. Note that [bwa-mem2](https://github.com/bwa-mem2/bwa-mem2) produces [very similar results to bwa mem](https://github.com/open2c/pairtools/discussions/118), while being [x2-3 times faster](https://github.com/bwa-mem2/bwa-mem2#performance). We highly recommend to use it instead of bwa!" ] }, { "cell_type": "code", "execution_count": null, "id": "92ff8a4f-2115-4131-8c4a-cbd040dcdffb", "metadata": { "tags": [ "hide-output" ] }, "outputs": [], "source": [ "%%bash\n", "cat GRCm38_EiJ_snpsonly_hap1.fa.gz GRCm38_EiJ_snpsonly_hap2.fa.gz > GRCm38_EiJ_snpsonly.fa.gz\n", "bwa index GRCm38_EiJ_snpsonly.fa.gz" ] }, { "cell_type": "markdown", "id": "22017c7e-71af-4ef3-8237-364402e896fb", "metadata": {}, "source": [ "Generate chromosome sizes file: " ] }, { "cell_type": "code", "execution_count": null, "id": "69489018-edde-4aa0-b7ac-7c7b4351764c", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "faidx GRCm38_EiJ_snpsonly.fa.gz -i chromsizes > GRCm38_EiJ_snpsonly.chromsizes" ] }, { "cell_type": "markdown", "id": "bd264406-be74-4060-9798-e18040c44889", "metadata": { "tags": [] }, "source": [ "## Download data\n", "\n", "Uncomment the `--minSpotId` and `--maxSpotId` if you want to run the small test instead of full run." ] }, { "cell_type": "code", "execution_count": null, "id": "f4e310c0-2d16-4e7d-87d7-44feec8e6256", "metadata": {}, "outputs": [], "source": [ "! fastq-dump SRR8811373 --gzip --split-spot --split-3 # --minSpotId 0 --maxSpotId 1000000" ] }, { "cell_type": "code", "execution_count": null, "id": "571e94fb-3dec-4042-9e21-6c39802ed8df", "metadata": {}, "outputs": [], "source": [ "! ls SRR8811373*.fastq.gz" ] }, { "cell_type": "markdown", "id": "2ce00436-bbc7-4241-a41b-12c99c708180", "metadata": { "tags": [] }, "source": [ "## Map data with bwa mem to diploid genome\n", "\n", "Note that you may use [bwa mem2](https://github.com/bwa-mem2/bwa-mem2), which is x2 times faster. \n", "It [proved to produce](https://github.com/open2c/pairtools/discussions/118) results very similar to bwa mem.\n", "\n", "There are two modes to work with phasing. \n", "\n", "1. Github mode with XB bwa tag. This is the most precise algorithm that operates based on alignment scores of optimal alignment (best hit), and two suboptimal ones.\n", "\n", " Download and install [bwa](https://github.com/lh3/bwa) from GitHub.\n", " Map with:\n", " ```bash\n", "./bwa/bwa mem -SPu -t 5 mm10_EiJ_snpsonly.fa.gz test.1.fastq.gz test.2.fastq.gz | samtools view -@ 8 -b > mapped.XB.bam\n", " ```\n", "\n", "\n", "2. Regular mode with XA bwa tag. \n", "\n", " This is simplified version that operates on number of mismatches for the suboptimal alignments.\n", "\n", " ```bash\n", "bwa mem -SP -t 5 mm10_EiJ_snpsonly.fa.gz est.1.fastq.gz test.2.fastq.gz | samtools view -@ 8 -b > mapped.XA.bam\n", " ```\n", "\n", "\n", "We will try the second option for the simplicity: " ] }, { "cell_type": "code", "execution_count": null, "id": "12f8a13d-fba6-45f7-8112-291fb883d7d0", "metadata": { "tags": [ "hide-output" ] }, "outputs": [], "source": [ "%%bash\n", "bwa mem -SP -t 5 GRCm38_EiJ_snpsonly.fa.gz SRR8811373_1.fastq.gz SRR8811373_2.fastq.gz \\\n", " | samtools view -@ 8 -b > mapped.XA.bam" ] }, { "cell_type": "markdown", "id": "3bce4691-6268-4885-b8e0-1933a561d4b5", "metadata": {}, "source": [ "## pairtools parse\n", "\n", "For phasing, we need additional tags and no filtering by mapq.\n", "\n", "`--min-mapq` is 1 by default, which removes all multiply mapped sequences. However, we need this information for phasing to distinguish true multiply mapped pairs from pairs mapped to both haplotypes:" ] }, { "cell_type": "code", "execution_count": null, "id": "efc63459-aa2f-44f5-804e-a2346d2b7820", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "pairtools parse --add-columns XA,NM,AS,XS --min-mapq 0 --drop-sam --walks-policy all \\\n", " -c GRCm38_EiJ_snpsonly.chromsizes mapped.XA.bam -o unphased.XA.pairs.gz" ] }, { "cell_type": "markdown", "id": "c90ff16b-bb5b-4ceb-8fe3-feeae8ada021", "metadata": {}, "source": [ "## pairtools phase\n", "\n", "Phasing will remove the tags \"\\_1\" and \"\\_2\" from chromosome names and add a separate field for the phase:" ] }, { "cell_type": "code", "execution_count": null, "id": "6c8deaee-cb68-4b53-b306-bf223523ab45", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "pairtools phase --phase-suffixes _hap1 _hap2 --tag-mode XA --clean-output unphased.XA.pairs.gz -o phased.XA.pairs.gz" ] }, { "cell_type": "markdown", "id": "c17443ec-b647-4818-aced-bdc686109396", "metadata": {}, "source": [ "## pairtools dedup\n", "\n", "Sort prior to dedup: " ] }, { "cell_type": "code", "execution_count": null, "id": "6aabbc13-a8d4-43f2-b388-62e7b3b576ab", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "pairtools sort phased.XA.pairs.gz --nproc 10 -o phased.sorted.XA.pairs.gz" ] }, { "cell_type": "markdown", "id": "84d0442c-ba94-4571-8c89-44067acecb47", "metadata": {}, "source": [ "Deduplication now should take additional columns with phases into account: " ] }, { "cell_type": "code", "execution_count": null, "id": "9fd3b266-4faa-4fc0-974d-b0ca9bbeb961", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "pairtools dedup --mark-dups --extra-col-pair phase1 phase2 \\\n", " --output-dups - --output-unmapped - --output-stats phased.XA.dedup.stats \\\n", " -o phased.sorted.XA.nodup.pairs.gz phased.sorted.XA.pairs.gz" ] }, { "cell_type": "markdown", "id": "d7ae3575-aef8-4a8b-9707-b37627653ba9", "metadata": {}, "source": [ "Dedup might generate warning that phase columns now contain mixed data types ('.' alongside with 0 and 1). This warning is inherited from reading by reading the pairs file by pandas." ] }, { "cell_type": "markdown", "id": "89f9d829-3f79-49b4-b74d-8bca732b8a44", "metadata": {}, "source": [ "## Stats\n", "\n", "First, filter different types of reads:" ] }, { "cell_type": "code", "execution_count": null, "id": "727a9d2b-5977-4763-81e5-64589c067688", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "pairtools select '(phase1==\"0\") and (phase2==\"0\")' phased.sorted.XA.nodup.pairs.gz -o phased.XA.phase0.pairs.gz\n", "pairtools select '(phase1==\"1\") and (phase2==\"1\")' phased.sorted.XA.nodup.pairs.gz -o phased.XA.phase1.pairs.gz\n", "pairtools select '(phase1==\".\") or (phase2==\".\")' phased.sorted.XA.nodup.pairs.gz -o phased.XA.unphased.pairs.gz\n", "pairtools select '(phase1!=phase2) and (phase1!=\".\") and (phase2!=\".\")' phased.sorted.XA.nodup.pairs.gz \\\n", " -o phased.XA.trans-phase.pairs.gz" ] }, { "cell_type": "markdown", "id": "916a5ca1-e549-4501-82d2-8a6e0645b864", "metadata": {}, "source": [ "Calculate stats for these different types:" ] }, { "cell_type": "code", "execution_count": null, "id": "1172f899-41d6-4ca2-ab21-a283340011f8", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "pairtools stats phased.XA.phase0.pairs.gz -o phased.XA.phase0.stats\n", "pairtools stats phased.XA.phase1.pairs.gz -o phased.XA.phase1.stats\n", "pairtools stats phased.XA.unphased.pairs.gz -o phased.XA.unphased.stats\n", "pairtools stats phased.XA.trans-phase.pairs.gz -o phased.XA.trans-phase.stats" ] }, { "cell_type": "markdown", "id": "25fdebb4-24ca-4280-950e-baa9cc92d28e", "metadata": {}, "source": [ "Visualize with multiQC:" ] }, { "cell_type": "code", "execution_count": null, "id": "9039184f-65a1-43bd-9495-85266fc1fed6", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "multiqc phased.XA.*phase*.stats -o multiqc_report_phasing" ] }, { "cell_type": "code", "execution_count": 1, "id": "ed403d73-7b5f-432b-9e91-e8c70906d31b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from IPython.display import IFrame\n", "\n", "IFrame(src='./multiqc_report_phasing/multiqc_report.html', width=1200, height=700)" ] }, { "cell_type": "code", "execution_count": null, "id": "20e713fe-c962-4d6f-af73-17c21b987a5a", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "test", "language": "python", "name": "test" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 5 } pairtools-1.0.3/doc/examples/pairtools_restrict_walkthrough.ipynb000066400000000000000000010367751452673171500255650ustar00rootroot00000000000000{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "a26ff7fa-0774-497c-8df8-4686845bf3b6", "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 2, "id": "882425fb-e34a-41c7-8103-270da19ecec2", "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "import matplotlib.pyplot as plt\n", "import matplotlib.ticker \n", "import matplotlib.gridspec\n", "import seaborn as sns\n", "\n", "%matplotlib inline\n", "plt.style.use('seaborn-poster')\n", "\n", "import pandas as pd\n", "import pairtools\n", "import bioframe" ] }, { "cell_type": "markdown", "id": "66194c2b-8c1b-4e21-80ef-1d2bf069199c", "metadata": {}, "source": [ "# Pairtools: restriction walkthrough\n", "\n", "The common approach to analyse Hi-C data is based to analyse the contacts of the restriction fragments. It is used in *hiclib*, Juicer, HiC-Pro. \n", "\n", "Throughout this notebook, we will work with one of [Rao et al. 2014 datasets for IMR90 cells](https://data.4dnucleome.org/experiment-set-replicates/4DNES1ZEJNRU/) [1]. \n", "\n", "\n", "[1] Rao, S. S., Huntley, M. H., Durand, N. C., Stamenova, E. K., Bochkov, I. D., Robinson, J. T., Sanborn, A. L., Machol, I., Omer, A. D., Lander, E. S., & Aiden, E. L. (2014). A 3D map of the human genome at kilobase resolution reveals principles of chromatin looping. Cell, 159(7), 1665–1680. https://doi.org/10.1016/j.cell.2014.11.021" ] }, { "cell_type": "markdown", "id": "8a77207f-d444-4d5c-ab6c-1f2a1cf4c7b2", "metadata": {}, "source": [ "### Download the data from 4DN portal\n", "\n", "To download the data from 4DN, you may need to [register, get key and secret and write a spceialized curl command for your user](https://data.4dnucleome.org/help/user-guide/downloading-files): " ] }, { "cell_type": "code", "execution_count": 61, "id": "a3d3eafc-5c28-40d4-be2a-8c4ba23e9809", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 330 100 330 0 0 931 0 --:--:-- --:--:-- --:--:-- 932\n", "100 3395M 100 3395M 0 0 29.7M 0 0:01:54 0:01:54 --:--:-- 33.1M 0:01:48 0:00:12 0:01:36 32.8M\n" ] } ], "source": [ "!curl -O -L --user RG6CSRMC:xlii3stnkphfygmu https://data.4dnucleome.org/files-processed/4DNFIW2BKSNF/@@download/4DNFIW2BKSNF.pairs.gz" ] }, { "cell_type": "code", "execution_count": null, "id": "22d0732a-9d6a-4957-8081-5cad5b3abf09", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "# Get total number of contacts to assess how many reads you can read in the future:\n", "pairtools stats 4DNFIW2BKSNF.pairs.gz | head -n 1\n", "# This will produce around 173 M pairs" ] }, { "cell_type": "code", "execution_count": null, "id": "ff187814-015c-4f6a-b0e8-082161dfcef7", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "# Sample the fraction of pairs that will produce ~ 1 M of pairs:\n", "pairtools sample 0.007 4DNFIW2BKSNF.pairs.gz -o 4DNFIW2BKSNF.pairs.sampled.gz" ] }, { "cell_type": "markdown", "id": "e8a51837-c1a9-4c83-a140-8be9f8cbbbed", "metadata": {}, "source": [ "#### Annotate restriction fragments" ] }, { "cell_type": "code", "execution_count": null, "id": "61b32154-a8ec-48d1-9370-eaf6bc357e08", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "# Digest the genome into restriction fragments:\n", "cooler digest ../tests_chromap/hg38/hg38.fa.sizes ../tests_chromap/hg38/hg38.fa MboI > hg38/hg38.MboI.restricted.bed" ] }, { "cell_type": "code", "execution_count": null, "id": "e0699dee-a95f-4114-82c5-9758c74b5d27", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "# Annotate restriction fragments in the sampled file: \n", "pairtools restrict -f hg38/hg38.MboI.restricted.bed 4DNFIW2BKSNF.pairs.sampled.gz -o 4DNFIW2BKSNF.pairs.sampled.restricted.gz" ] }, { "cell_type": "markdown", "id": "34c594fe-41df-4f42-a25d-7c050f020fb2", "metadata": {}, "source": [ "#### Read the pairs and analyse them as dataframe" ] }, { "cell_type": "code", "execution_count": 3, "id": "309d3c54-7b2d-4a5e-a750-87eb0b6914d9", "metadata": {}, "outputs": [], "source": [ "from pairtools.lib import headerops, fileio" ] }, { "cell_type": "code", "execution_count": 4, "id": "40daf717-6ffd-4c27-8b68-d553d458a713", "metadata": {}, "outputs": [], "source": [ "pairs_file = '4DNFIW2BKSNF.pairs.sampled.restricted.gz'" ] }, { "cell_type": "code", "execution_count": 5, "id": "6d363f7a-6053-488e-ad59-9df14260a7f6", "metadata": {}, "outputs": [], "source": [ "pairs_stream = fileio.auto_open(pairs_file, 'r')\n", "header, pairs_stream = headerops.get_header(pairs_stream)\n", "columns = headerops.get_colnames(header)" ] }, { "cell_type": "code", "execution_count": 6, "id": "deb04397-579b-4305-9dec-4f58e61e7ad4", "metadata": {}, "outputs": [], "source": [ "df = pd.read_table(pairs_stream, comment=\"#\", header=None)\n", "df.columns = columns" ] }, { "cell_type": "code", "execution_count": null, "id": "7688d530-4860-40e9-b865-affb7c35ccf1", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 7, "id": "44d60718-dd56-4113-a409-57e5c1b882c0", "metadata": {}, "outputs": [], "source": [ "df.loc[:, 'dist_rfrag1_left'] = df.pos1 - df.rfrag_start1\n", "df.loc[:, 'dist_rfrag1_right'] = df.rfrag_end1 - df.pos1\n", "\n", "df.loc[:, 'dist_rfrag2_left'] = df.pos2 - df.rfrag_start2\n", "df.loc[:, 'dist_rfrag2_right'] = df.rfrag_end2 - df.pos2" ] }, { "cell_type": "markdown", "id": "330e034a-e4f2-4deb-ab2c-9103e9083fa2", "metadata": {}, "source": [ "Many of the 5'-ends of reads are mapped to the restriction sites: " ] }, { "cell_type": "code", "execution_count": 8, "id": "1a7ef073-082b-4aa6-972f-85ada84be4d4", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "xmin = 0\n", "xmax = 2000\n", "step = 20\n", "\n", "sns.distplot(df.query('strand1==\"+\"').dist_rfrag1_left, bins=np.arange(xmin, xmax, step), label='Distance from the 5\\' read end to the nearest upstream rsite, + mapped reads')\n", "sns.distplot(df.query('strand1==\"+\"').dist_rfrag1_right, bins=np.arange(xmin, xmax, step), label='Distance from the 5\\' read end to the nearest downstream rsite, + mapped reads')\n", "\n", "plt.xlim(xmin, xmax)\n", "plt.legend()\n", "plt.tight_layout()" ] }, { "cell_type": "code", "execution_count": 9, "id": "8fb2a16b-a921-4451-9250-4c0e381ac516", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "xmin = 0\n", "xmax = 200\n", "step = 1\n", "\n", "sns.distplot(df.query('strand1==\"+\"').dist_rfrag1_left, bins=np.arange(xmin, xmax, step), label='Distance from the 5\\' read end to the nearest upstream rsite, + mapped reads')\n", "sns.distplot(df.query('strand1==\"+\"').dist_rfrag1_right, bins=np.arange(xmin, xmax, step), label='Distance from the 5\\' read end to the nearest downstream rsite, + mapped reads')\n", "\n", "plt.xlim(xmin, xmax)\n", "plt.legend()\n", "plt.tight_layout()" ] }, { "cell_type": "markdown", "id": "2cfd8a30-79d4-4926-b0e9-809ac185228c", "metadata": {}, "source": [ "However, if we select only the pairs that map to the restriction sites, there is no significant skew in scaling:" ] }, { "cell_type": "code", "execution_count": 10, "id": "97eac9b5-2b51-4529-9056-48d061c30d6b", "metadata": {}, "outputs": [], "source": [ "hg38_chromsizes = bioframe.fetch_chromsizes('hg38', \n", " as_bed=True)\n", "hg38_cens = bioframe.fetch_centromeres('hg38')\n", "hg38_arms = bioframe.make_chromarms(hg38_chromsizes, \n", " dict(hg38_cens.set_index('chrom').mid), \n", " cols_chroms=('chrom', 'start', 'end') )\n", "\n", "# To fix pandas bug in some versions: \n", "hg38_arms['start'] = hg38_arms['start'].astype(int)\n", "hg38_arms['end'] = hg38_arms['end'].astype(int)" ] }, { "cell_type": "code", "execution_count": 11, "id": "28358a56-a6fe-4ec7-9ca6-52822a6224b9", "metadata": {}, "outputs": [], "source": [ "import pairtools.lib.scaling as scaling" ] }, { "cell_type": "code", "execution_count": 12, "id": "3769c57b-78c7-48e4-85bf-710f7f459e1e", "metadata": {}, "outputs": [], "source": [ "def plot(cis_scalings, n, xlim=(1e1,1e9), label='' ):\n", " strand_gb = cis_scalings.groupby(['strand1', 'strand2'])\n", " for strands in ['+-', '-+', '++', '--']:\n", " sc_strand = strand_gb.get_group(tuple(strands))\n", " sc_agg = (sc_strand\n", " .groupby(['min_dist','max_dist'])\n", " .agg({'n_pairs':'sum', 'n_bp2':'sum'})\n", " .reset_index())\n", "\n", " dist_bin_mids = np.sqrt(sc_agg.min_dist * sc_agg.max_dist)\n", " pair_frequencies = sc_agg.n_pairs / sc_agg.n_bp2\n", " pair_frequencies = pair_frequencies/cis_scalings.n_pairs.sum()\n", " mask = pair_frequencies>0\n", " label_long = f'{strands[0]}{strands[1]} {label}'\n", "\n", " if np.sum(mask)>0:\n", " plt.loglog(\n", " dist_bin_mids[mask],\n", " pair_frequencies[mask],\n", " label=label_long,\n", " lw=2\n", " )\n", "\n", " plt.gca().xaxis.set_major_locator(matplotlib.ticker.LogLocator(base=10.0,numticks=20))\n", " plt.gca().yaxis.set_major_locator(matplotlib.ticker.LogLocator(base=10.0,numticks=20))\n", " plt.gca().set_aspect(1.0)\n", " plt.xlim(xlim)\n", "\n", " plt.grid(lw=0.5,color='gray')\n", " plt.legend(loc=(1.1,0.4))\n", " plt.ylabel('contact frequency, \\nHi-C molecule per bp pair normalized by total')\n", " plt.xlabel('distance, bp')\n", "\n", " plt.tight_layout()" ] }, { "cell_type": "code", "execution_count": 13, "id": "bf07b649-d184-4ded-827b-d8ff3f9f4284", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Get the pairs where R1 is far enough from site of restriction, but not too far\n", "df_subset = df.query(\"(strand1=='+' and dist_rfrag1_left>5 and dist_rfrag1_left<=250)\")\n", "n_distant = len(df_subset)\n", "cis_scalings_distant, trans_levels_distant = scaling.compute_scaling(\n", " df_subset,\n", " regions=hg38_arms,\n", " chromsizes=hg38_arms,\n", " dist_range=(10, 1e9), \n", " n_dist_bins=128,\n", " chunksize=int(1e7),\n", " )\n", "plot(cis_scalings_distant, n_distant, label=\"pairs, 5' distant from rsite\")\n", "\n", "\n", "# Get the pairs where R1 is too far enough from site of restriction\n", "df_subset = df.query(\"(strand1=='+' and dist_rfrag1_left>550)\")\n", "n_toodistant = len(df_subset)\n", "cis_scalings_toodistant, trans_levels_toodistant = scaling.compute_scaling(\n", " df_subset,\n", " regions=hg38_arms,\n", " chromsizes=hg38_arms,\n", " dist_range=(10, 1e9), \n", " n_dist_bins=128,\n", " chunksize=int(1e7),\n", " )\n", "plot(cis_scalings_toodistant, n_toodistant, label=\"pairs, 5' too far from rsite\")\n", "\n", "\n", "# Get the pairs where R1 is very close to the site of restriction\n", "df_subset = df.query(\"(strand1=='+' and dist_rfrag1_left<5)\")\n", "n_tooclose = len(df_subset)\n", "cis_scalings_tooclose, trans_levels_tooclose = scaling.compute_scaling(\n", " df_subset,\n", " regions=hg38_arms,\n", " chromsizes=hg38_arms,\n", " dist_range=(10, 1e9), \n", " n_dist_bins=128,\n", " chunksize=int(1e7),\n", " )\n", "plot(cis_scalings_tooclose, n_tooclose, label=\"pairs, 5' close to rsite\")\n", "# Try another replicate of replicate, maybe the last one " ] }, { "cell_type": "markdown", "id": "60967f0b-7f50-429f-8865-046d3fd0d878", "metadata": {}, "source": [ "#### How many pairs we take if not strictly filtering by dangling ends and self-circles? " ] }, { "cell_type": "code", "execution_count": 14, "id": "bfb77fa0-85ee-4573-b745-57353b74f646", "metadata": {}, "outputs": [], "source": [ "df.loc[:, \"type_rfrag\"] = \"Regular pair\"\n", "\n", "mask_neighboring_rfrags = (np.abs(df.rfrag1-df.rfrag2)<=1)\n", "\n", "mask_DE = (df.strand1==\"+\") & (df.strand2==\"-\") & mask_neighboring_rfrags\n", "df.loc[mask_DE, \"type_rfrag\"] = \"DanglingEnd\"\n", "\n", "mask_SS = (df.strand1==\"-\") & (df.strand2==\"+\") & mask_neighboring_rfrags\n", "df.loc[mask_SS, \"type_rfrag\"] = \"SelfCircle\"\n", "\n", "mask_Err = (df.strand1==df.strand2) & mask_neighboring_rfrags\n", "df.loc[mask_Err, \"type_rfrag\"] = \"Mirror\"" ] }, { "cell_type": "code", "execution_count": 15, "id": "c6913bb4-f861-4098-a193-94a134df4ea5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "type_rfrag\n", "DanglingEnd 76902\n", "Mirror 3214\n", "Regular pair 1132002\n", "SelfCircle 3036\n", "Name: readID, dtype: int64" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sort_values(\"type_rfrag\").groupby(\"type_rfrag\").count()['readID']" ] }, { "cell_type": "code", "execution_count": 16, "id": "c2e77360-e8ee-43c6-8322-b9990aef19bc", "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA3oAAAJyCAYAAACSd7KhAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAADLmklEQVR4nOzdd3xUVfrH8c+Z9E4CIRB6DSXSRURFFEEsICrYe8FVV39iX8u6ru7ae1lFbNgLir0gCoqKFOm9hd5CCeltzu+PmYkB0gYmuSnf9+t1X8nce+feZ65hN0/OOc9jrLWIiIiIiIhI/eFyOgAREREREREJLCV6IiIiIiIi9YwSPRERERERkXpGiZ6IiIiIiEg9o0RPRERERESknlGiJyIiIiIiUs8EOx2AHJr4+HjboUMHp8Oo1XJycoiMjHQ6jFqvOp5TVlYW1lqMMURHRwf02k7Qz1Ll9Iwqp2dUOT2jyukZVW7u3Lnp1tpEp+MQcZoSvTrGGDMWGJuUlMScOXOcDqdWmzZtGoMHD3Y6jFqvOp7TE088QVZWFtHR0dxyyy0BvbYT9LNUOT2jyukZVU7PqHJ6RpUzxqx3OgaR2kBTN+sYa+14a22/uLg4p0MREREREZFaSomeiIiIiIhIPaNET0REREREpJ5RoiciIiIiIlLPKNETERERERGpZ1R1U0QC7sQTT6SwsJCQkBCnQxERERFpkJTo1TGl2yuI1Fa9e/d2OgQRERGRBk1TN+sYtVcQEREREZHKKNETERERERGpZzR1U0QCLisrC2stxhiio6OdDkdERESkwVGiJyIB9/LLL5OVlUV0dDS33HKL0+GIiIiINDiauikiIiIiIlLPKNETERERERGpZ5ToiYiIiIiI1DNK9EREREREROoZJXp1jDFmrDFmTkZGhtOhiIiIiIhILaVEr45Rw3QREREREamMEj0REREREZF6RomeiIiIiIhIPaNET0REREREpJ4JdjoAEal/Ro8eTXFxMUFBQU6HIiIiItIgKdETkYBr06aN0yGIiIiINGiauilSwyb8spYxL/3Gt4u3Yq11OhwRERERqYc0oidSg4rdlmenrmJfXhGz0/ZwZNt47jq1K71bxzsdmoiIiIjUIxrRE6lBS7fsY19eEY0iQ2gcFcrstD2c+eJv3PDePLbvy3M6vIBZv349a9euZf369U6HIiIiItIgKdETqUG/rUkH4ORuzfjptsFcN7gDocEuvliwhds+XuhwdIHz8ccf89Zbb/Hxxx87HYqIiIhIg6RET6QG/b52FwADOzYmNjyE24d34bubBgEwe91uit1asyciIiIih0+JnkgNKSx2M2vdbgCObt+4ZH+7JlG0aBRBbmEx69KznApPREREROoRJXp1jDFmrDFmTkZGhtOhiJ8WbtpLTkExHRKjaBobvt+xbsmxACzZss+J0ERERESknlGiV8dYa8dba/vFxcU5HYr46fc13mmbHZocdKy7Ej0RERERCSAleiI15Ddvond0h8YHHeue7Encl2zRSK2IiIiIHD4leiI1IK+wmDnr9wAwoP3BiV5qC8+I3uLN+9REXUREREQOmxI9kRowb8NeCorcdGkWQ0JU6EHHm8WGkxAVSkZuIZv35joQoYiIiIjUJ0r0RGrA797+eWWtzwMwxmidnoiIiIgEjBI9kRpQ0fo8H1XeFBEREZFACXY6AJH6LqegiPkb9+Iy0L9dQrnn+QqyLK0HBVmuueYarLUYY5wORURERKRBUqInUs1mp+2hyG3p2TKOuIiQcs+rT1M3o6OjnQ5BREREpEHT1E2RaubrnzeggmmbAO0aRxEZGsTWjDx2ZeXXRGgiIiIiUk8p0ROpZpUVYvFxuQzdmtefUT0RERERcY4SPZFqtC+vkEWbMwh2Gfq1ia/0/PoyfXPevHnMmjWLefPmOR2KiIiISIOkNXoi1Wj2ut24LfRp3YiosMr/ufkKsiyp4wVZfvzxR7KysoiOjqZ3795OhyMiIiLS4GhET6QaLd+WCUCPlo2qdL6vxcLSOj6iJyIiIiLOUqInUo3W7swGoH1iVJXO75wUQ0iQYd2ubLLzi6ozNBERERGpx5ToBYAx5i5jzApjjNsYM6qM41ONMQuMMfONMb8YY3rVfJTihHXpWUDVE73QYBedmsZgLSzbqlE9ERERETk0SvQCYypwKvBzOcfPstb2tNb2Ap4E3qihuMRha9M9I3odEqveV85XkGXx5rq9Tk9EREREnFPvEj1jTEtjzHPGmN+NMTnGGGuMaVvOua2MMR8bYzKMMfuMMZ8YY1r7e09r7R/W2jUVHC/9G3usv9eXuml3dgF7cwqJCg2iaUxYld+X2sJXkEUjeiIiIiJyaOpdogd0BM4B9gC/lHeSMSYS+BHoAlwKXAx0An4yxlRtnp0fjDHvGGM2AQ8AFwX6+lL7+KZttkuMwhhT5ffVlxYLIiIiIuKc+the4WdrbRKAMeYqYFg5510NtAdSrLWrvecvBFYB1+CZYokx5k+gvFG+3tbajVUJylp7YamYHgFOq9KnkTprja8QS5OqT9sE6No8FmNg1Y5MCorchAbXx7/HiIiIiEh1qne/QVpr3VU8dSQw05fked+7DvgVOKPUvj7W2iblbFVK8g7wKjDUGNP4EN4rdYi/FTd9osKCadc4isJiy8rtmdURmoiIiIjUc/Uu0fNDd2BxGfuXAN0CdRNjTLwxpnmpXWcDO4Ddh3Ctub4tUPFJ9Vm70zt1s4n/M4G7eqdvrthWNxO9sLCwkk1EREREal59nLpZVQl41vEdaDcQ78+FjDH3AH8DEoFUY8zzQD9r7TbvtT4wxoQDbjxJ3unWWns4wRcWFjJt2rTDuUS9l5aW5ugzWrwhB4A965czbe8qv95rMwsA+G3+Uhpnrq7k7MNTHc8pNTW15Pv68HPq9M9SXaBnVDk9o8rpGVVOz0hEqqohJ3oAZSVbVa+a4buItQ8CD5ZzbC1wpL/XLOdafX3fp6Sk2MGDBwfisvXWtGnTcOoZFbst6VO+BSxjhh9PVJh//9TWBq/j63VLiWyczODBqZW/4TA4+ZzqCj2jyukZVU7PqHJ6RpXTMxKRqmrIUzf34BnVO1A8ZY/0iVTZpj05FBS7SYoN8zvJA2gWFw7Atn15gQ5NRERERBqAhpzoLcGzTu9A3YClNRxLlRljxhpj5mRkqJl2beZrlO5vxU2fpFhPorddiZ6IiIiIHIKGnOh9DgwwxrT37fA2Vj/Ge6xWstaOt9b2i4uLczoUqcChVtz08Y3o1dVEb+rUqXz99ddMnTrV6VBEREREGqR6uUbPGDPa+61vTdspxpidwE5r7XTvvleAvwOfeYupWDzNzDcCL9dkvFL/+Cputk88tBG9pjGeapU7M/MpKnYTHFS3/iYzf/58srKyiI6OZsiQIU6HIyIiItLg1MtED/jogNcver9OBwYDWGuzjTEnAk8Bb+EpwjIVuMlam1VDcUo9ta5k6uahjeiFBLloEh1KelYB6VkFJSN8IiIiIiJVUS8TPWttlSpnWms34OlrV2cYY8YCY5OSkpwORSpwuFM3wbNOLz2rgG378pToiYiIiIhf6tZ8MNEavTogO7+IbfvyCAkytIyPPOTrNPMWZNmWUTfX6YmIiIiIc5ToiQSYb9pmm8ZRBLn8bstYIsk7ircjU4meiIiIiPhHiZ5IgK09zPV5PhrRExEREZFDpUSvjlEfvdrvcCtu+iTFeipvqmm6iIiIiPhLiV4dozV6tV8gCrGAmqaLiIiIyKFToicSYIfbWsHnr6bp+Ycdk4iIiIg0LPWyvYKIU6y1AZu66Vujt70OrtFr3rw5OTk5REYeetVRERERETl0SvREAmhHZj7ZBcU0igwhISr0sK4VFxFCWLCLzPwisvOLiAqrO/9cL7jgAqdDEBEREWnQNHWzjlExltrNtz6v3WFO2wQwxpSs01NBFhERERHxhxK9OkbFWGq3teneaZtNDm/apk9dnr4pIiIiIs5RoicSQIGquOnja5q+XU3TRURERMQPdWfRj0gdEKiKmz7NfL30MupW5c2PPvqI3NxcIiIiGDNmjNPhiIiIiDQ4SvREAihQFTd96movvQ0bNpCVlUV0dGCeg4iIiIj4R1M3RQKksNjNxj25GANtGgemrUBJMRat0RMRERERPyjRq2NUdbP22rg7h2K3JTkugvCQoIBc09c0XVU3RURERMQfSvTqGFXdrL3SdnnW57VtErgm4b6qmzuU6ImIiIiIH5ToiQTIuvQcANo2DkwhFoCm3mIsOzLzcbttwK4rIiIiIvWbEj2RAElLD1yzdJ+w4CASokIpclvSs+tW5U0RERERcY4SPZEAKZm6GcARPYCmMZ5Rve11rMWCiIiIiDhHiZ5IgPh66LUN4Ige/FWQpa61WBARERER59RoHz1jzNpDeJu11nYIeDAiAZRfVMyWvbm4DLROCFwxFvirIIsqb4qIiIhIVdV0w/QNgCpKHAZjzFhgbFJSktOhSCkbd+fgttAqIYLQ4MAOlNfFpundu3cnPz+fsLAwp0MRERERaZBqNNGz1g6uyfvVR9ba8cD4lJQUJcy1SHVU3PQp6aVXh5qmDx8+3OkQRERERBo0rdETCYD1uwJfcdMnydtiQVM3RURERKSqlOiJBEBJIZZqGNFLKmmarqqbIiIiIlI1jid6xpjjjDFfGWN2GmOKjDHFB2xFTscoUpm0ahzRUzEWEREREfFXTRdj2Y8x5kTgO2A3MBM4DfgRiASOApYAcx0LUKSK0nxr9Koh0UuICiU0yEVGbiF5hcWEhwQF/B6B9vLLL5OVlUV0dDTXXHON0+GIiIiINDhOj+jdi6cSZ1fgcu++/1prBwKDgdbARGdCE6mavMJitmTkEuQytIyPCPj1jTE09a3TqyMFWbKysko2EREREal5Tid6fYEJ1trdgNu7zwVgrf0FeA14wKHYRKpkw+4crIVW8RGEBFXPP6kkTd8UERERET84nei5gJ3e73O9X+NLHV8K9KzRiET8VFKIpRqmbfo0q4O99ERERETEOU4nehuBNgDW2lxgGzCw1PFeQHbNh1V7GWPGGmPmZGRkOB2KeKVVY8VNn7rYNF1EREREnON0ojcdTwEWn4+BvxtjXjXGvA5cg6dYi3hZa8dba/vFxcU5HYp4VWfFTZ9mcb41emqxICIiIiKVc7TqJvAUMNgYE26tzQPuBjrgKcxiganArQ7GJ1Kpmpi6qRE9EREREfGHo4metXYFsKLU60zgNGNMHFBsrVXJPqn1fK0V2mnqpoiIiIjUEo5O3TTG/NMYk3rgfmtthrU2yxjT3RjzTydiE6mK3IJitu3LIyTIkNwovNruo6bpIiIiIuIPp9fo/QvoUcHxVOC+mglFxH++9Xmt4iMJrqbWCkBJH70dmflYa6vtPiIiIiJSPzi9Rq8ykUCR00GIlCetBtbnAUSGBhMZGkROQTGZ+UXEhodU6/0O13HHHUdhYSEhIbU7ThEREZH6qsYTPWNMa6BtqV1djDGDyjg1Hk/VzbU1EZfIoVi3q/pbK/g0iQ5jw+4cdmUV1PpEr3///k6HICIiItKgOTGidzme6ZjWu93t3Q5kADdwVc2FJuIf34heuyaR1X6vJtGhbNidQ3pWfrW2chARERGRus+JRG8ykIYnkXsNGA/8fsA5FsgC5lhrN9RkcCL+8FXcrO6pm+AZ0QNIz1QvPRERERGpWI0netbaBcACAGNMG2CStXZxTcchEghpNTl1M8ab6GXV/kQvP99TNMYYQ1hYmNPhiIiIiDQ4TvfRu7/0a2OMy7NbZQWl9svOL2JHZj6hQS6SG0VU+/18I3o7swqq/V6H6/nnnycrK4vo6GhuueUWp8MRERERaXCcbq+AMaaxMeYZY0waUAgUGWPSjDFPG2OaOByeSLl8o3mtG0cS5DLVfr8m0aFA3RjRExERERFnOd0wPRn4E7gByAM+9255wI3AHGNMc+cirH2MMWONMXMyMjKcDqXBK1mfVwPTNkFr9ERERESk6pwe0XsQSAJGW2u7WGvP9G5dgLOBZt5zxMtaO95a2y8uLs7pUBo834heTVTchL8SvV3ZtX/qpoiIiIg4y+lE7xTgBWvtJwcesNZ+CrwInFrjUYlUwYptmQB0SIyukftp6qaIiIiIVJXTiV48sLqC46u854jUOku37gOge3LNjK6WVN3U1E0RERERqYTTid56YFgFx4cC6qMntU5uQTFrd2YR5DJ0SqqZEb2YsGBCg11kFxSTW1BcI/cUERERkbrJ6UTvHeAMY8wrxpgOvp3GmPbGmP8BZwBvORadSDlWbM/EbaFjYjThIUE1ck9jDE2iNH1TRERERCrnaB894CGgD3AlcIUxxldlIhQwwGfec0RqlSVbPFVPuyfH1uh9m8SEsSUjj51Z+bRKqJkiMCIiIiJS9zjdML0QGGWMOQUYAbTzHloHfG6t/dax4EQqsHSLZ31et5pO9NRiQURERESqwOkRPQCstd8A3zgdh0hV+QqxdGte04meZ+pmbW+xMGrUKIqKiggOrhX/EyMiIiLS4DjdMH2tMWZkBcdPN8asrcmYRCpT7LYs3+pprdC1xhO9ujGi16FDB1JSUujQoUPlJ4uIiIhIwDldjKUtUFHJwiigTc2EIlI1abuyyS0sJjkunHhvcZSaUpLoqRiLiIiIiFTA6USvMq2ALKeDECltiUPr8wAalzRNr91TN0VERETEWTW+gMYYcwaetgk+Y40xJ5VxajxwEjCzRgITqaK/CrHUTKP00hK9I3o7a/mI3pYtW3C73bhcLpKTk50OR0RERKTBcaJSQi/gMu/3Fhjk3Q6UhSfJu75GohKpIqcKsYCnvQLU/qmb7733HllZWURHR3PLLbc4HY6IiIhIg1PjUzettfdba13WWheeXnkX+V4fsMVaa4daa1fWdIwiFfGN6NV0Dz2oO8VYRERERMRZTq/RawdMdjiGw2aMucsYs8IY4zbGjKrgvMuNMbaic6R225GZR3pWPjFhwbSMj6jx+zeKCCHIZdiXV0RBkbvG7y8iIiIidYOjiZ61dr21NsfJGAJkKnAq8HN5Jxhj2gBXozWHdZpvNK9rcizGmBq/v8tlaBzl66WnUT0RERERKZvTI3oBZ4xpaYx5zhjzuzEmxzuC1racc1sZYz42xmQYY/YZYz4xxrT2957W2j+stWsqiMkFvArcAOi38zpsiYPTNn3+mr6pypsiIiIiUrZ6l+gBHYFzgD3AL+WdZIyJBH4EugCXAhcDnYCfjDFRAY7pZuBXa+3cAF9XapiThVh8/mqxoL8ZiIiIiEjZnKi6Wd1+ttYmARhjrgKGlXPe1UB7IMVau9p7/kJgFXAN8KR3359AeaN8va21GysKxhjTHRgNHOfn55BaaJmDPfR86kqLBRERERFxTr1L9Ky1Va1QMRKY6UvyvO9dZ4z5FU+fvye9+/ocZkiDgDbAKu+armbAeGNMc2vt/w7z2lKDsvOLWLcrm5AgQ6emMY7FUVdaLIiIiIiIcxydummMSXDw9t2BxWXsXwJ0C9RNrLX/s9Y2t9a2tda2xVOMZeyhJHnGmLm+LVDxSdUt35aJtdCxaQyhwc7902nim7qpNXoiIiIiUg6nR/S2GmM+A14DvrPW2hq8dwKedXwH2g3E+3MhY8w9wN+ARCDVGPM80M9au+2woyxHYWEh06ZNq67L1wtpaWkBfUY/bigEIMHkOPrsd272xLF07QamTdtx2NcL9HMCKCgoKPlaH35Oq+MZ1Td6RpXTM6qcnlHl9IxEpKqcTvTexrN+7WxgizHmTeCN0tMpq1lZiaXfNfOttQ8CD1bx3MH+Xr/Ue/v6vk9JSbGDBx/ypRqEadOmEchn9N0nC4GNDOnTmcHHtgvYdf3lWrmTVxbNIiiqEYMHDzjs6wX6OQH07NkTay3GGOLj/fq7Sa1UHc+ovtEzqpyeUeX0jCqnZyQiVeV0H70r8axZuxxYDfwDWGGMmW6MudRbGbO67MEzqnegeMoe6ZMGbmktKMQCpapu1uKpm/Hx8SQkJNSLJE9ERESkLnK8vYK1NtdaO9FaewLQAc/IWGs80zm3GmNeMcYcXQ23XoJnnd6BugFLq+F+AWGMGWuMmZORkeF0KA1KUbGb5dsyAejqYGsF+KvqpoqxiIiIiEh5HE/0SrPWpllr7wN6AO8BMcCVwAxjzBJjzMUBvN3nwABjTHvfDm9j9WO8x2ola+14a22/uLg4p0NpUBZuziC/yE2bxpHERYQ4GktCVCjGwO6cAoqKq1pkVkREREQaEqfX6O3HGHM8cBmedXtRwHzgVaAYuBZ4wxjTzVr7j0quM9r7rW9N2ynGmJ3ATmvtdO++V4C/A595i6lY4AFgI/ByoD6T1A/TVuwEYFCnRIcjgeAgF/GRoezOLmB3TgFNY8KdDukgixcvprCwkJCQEFJTU50OR0RERKTBcTzRM8a0AS71bm2BfcBbwKvW2tJtBF4yxryKp9F5hYke8NEBr1/0fp0ODAaw1mYbY04EnvLezwBTgZustVmH+nmkfpq+wlPdcnCK84keeFos7M4uID2zdiZ63333HVlZWURHRyvRExEREXGAo4meMWYqcDyeKaQzgPuBj6y1ueW85Qc8hVsqZK2tUuVMa+0GPBU/6wxjzFhgbFJSktOhNBi7svJZuDmD0CAXR3do7HQ4ADSJDmPl9ix2ZWudnoiIiIgczOk1et2BJ4Eu1tpB3qIs5SV54En0TqiZ0GonrdGreT+v2om1cFT7BCJDHR8EBzyJHqggi4iIiIiUzenfWltaa4uqerK1diee6ZciNca3Pu/4zrVj2ibUjRYLIiIiIuIcRxM9X5JnjDF4Cqf4ulCvA+Zaa8tqaC5SY4rdlp9XehK9wSlNHY7mLxrRExEREZGKOD11E2PMWUAa8Afwvnf7A0grVT1TvNRHr2Yt3LSXPTmFtIyPoENilNPhlPD10tupRE9EREREyuBooudN5D4CgvA0Sr/Yu/0Hz2jjB0r29qc1ejXLN21zcEoinoHn2qFJjHfqZpamboqIiIjIwZxeo/dPYDkw0Fq73xCVMeYJYCZwH/CxA7GJMN03bbNz7Zm2CaWmbmZqRE9EREREDub01M1OePrlHTQP0bvvVaBjjUclAuzOLmDBpr2EBrkY2LF2tFXw0Ro9EREREamI04neJqCibs+h3nNEatwv3rYK/dvVnrYKPglRnqmbu7MLcLtrX80il8tVsomIiIhIzXP6t9fngFuNMW9aazeXPmCMaQVcCzziSGS1lBqm15zS6/Nqm/CQIGLCg8nMKyIjt5B4b+JXW4wbN87pEEREREQatBpN9Iwx1x2wqwjYBawwxnwErPDu7wKMBlYC7pqLsPaz1o4HxqekpNS+YZx6xL1fW4Xal+iBp/JmZl4R6Vn5tS7RExERERFn1fSI3vMVHLu0jH298Iz6vVgt0YiUY9HmDHZlF3jbKkQ7HU6ZmkSHsTY9m51Z+XRKinE6HBERERGpRWo60Tuhhu8nckimLN0O1L62CqWpxYKIiIiIlKdGEz1r7fSavJ/IoVi6ZR/jf1kLwKmpzR2Opny1ucXCzz//TH5+PmFhYQwaNMjpcEREREQaHKeLsYjUKtn5Rfz9vT8pKHJzfv9WDOzYxOmQytWiUQQAabuyHY7kYLNnzyYrK4vo6GgleiIiIiIOUO3zOsYYM9YYMycj46DWgxIA//xsCWt3ZtM5KZp/nt7d6XAq1D05DoDFm/WzICIiIiL7U6JXx1hrx1tr+8XFxTkdSr3zyZ+bmPTnJsJDXLxwQR8iQoOcDqlC3ZNjAVi2NZPiWthLT0RERESco0RPBFizM4t7Ji8G4P6R3etEFcv4qFBaNIogt7CYdelZTocjIiIiIrWIEj1p8PKLivn7u/PIKShmZM9kzunXyumQqqybd1RvyZZ9DkciIiIiIrWJEj1p8N6ftZFlW/fRpnEk/zkztda2UyhLqtbpiYiIiEgZlOhJg1ZQ5Obl6WsA+McpXYgJD3E4Iv9014ieiIiIiJShRtsrGGPWHsLbrLW2Q8CDEQEmz9vMlow8OjWNZli3Zk6H47fUFp4RvSVb9mGtrVOjkSIiIiJSfWp6RG8DsP6ArRhoCyQAe4EM7/dtvcc21HCMtZraKwROsdvyP+9o3nUndMDlqntJUlJsGI2jQsnILWTTnlynwxERERGRWqJGEz1r7WBr7Qm+DbgNaAzcACRaa/tYa3sDicD/4Un4bq3JGGs7tVcInK8WbWVdejatEyIZ0SPZ6XAOiTGG7qVG9WqLxMREmjZtSmJiotOhiIiIiDRINTp1swyPA+9Za18ovdNaWwg8Z4zp6j3nBCeCk/rL7ba8+NNqAP52fAeCg+ructXuybH8vHInS7ZkMDy1dkw/veSSS5wOQURERKRBc/q32yOBRRUcX+Q9RySgpi7fwfJtmTSLDefsvi2cDuew+Cpv1qYRPRERERFxltOJXiZwfAXHBwPqBC0BZa3lee9o3tWD2hMWHORwRIcntYWn8qZaLIiIiIiIj9OJ3nvAOcaYZ40xbX07jTFtjTHPAaO954gEzK+rd7Fg414SokI5v3/daY5enlbxkcSEBbMjM58dmXlOhyMiIiIitYDTa/TuAjoCfweuN8YUABYIAwzwDfAP58KT+mbFtkwe+HIpAFce247IUKf/CRw+l8vQLTmWP9btZsmWfTRNCXc6JCZPnkxubi4RERGMGjXK6XBEREREGhxHf8u11uYCI4wxw4Ez8LRUMMBa4HNr7bcOhif1yL68Qp6esoo3f0+j2G1Jjgvn4qPbOB1WwHRPjuOPdbtZumUfJ6Q0dToc1qxZQ1ZWFtHR0U6HIiIiItIg1YrhDG9Cp6ROAq7Ybfls/mb++/Vy0rPycRm45Og23Dy0M7HhIU6HFzBapyciIiIipdWKRM8YEw0cDTQFfrDWbnc4pFrLGDMWGJuUlOR0KLXanuwCvl5bwD1//FTSSLxvm3juH9md1Bb1rwdhd1XeFBEREZFSHE/0jDE3Af8Gory7hgLbjTGJwDrgJmvtBIfCq3WsteOB8SkpKdbpWGqjDbtyePbHVXy+YAsFRW6gkJbxEdx0UmfO6t0Cl8s4HWK16JAYRViwiw27c8jILSQuov6MVoqIiIiI/xxN9IwxFwFPAt8BXwHP+o5Za3caY74DzgaU6Eml3G7Lpa/PYl16NgA9mgTxf6f1ZnBKU4LqaYLnExzkokvzWBZs3MvSLfs4ukNjp0MSEREREQc53V7hZmCqtfYUym6jMBfoXrMhSV01b+Me1qVn0yw2nGm3DubmfuEM6ZpU75M8n9Rkzzq9JVu0Tk9ERESkoXM60esKfFrB8e141u2JVOqLBVsBGNGzOW2bRFVydv2jdXoiIiIi4uN0opcLVNT0qy2wt0YikTqt2G35apEv0Ut2OBpnqPKmiIiIiPg4nejNBMaUdcBbifMy4OeaDEjqpj/W7WJnZj5tGkdyRD2sqlkVnZNiCHYZ1uzMYsHGvU6HIyIiIiIOcjrR+w/Q1xjzOTDEu6+rMeZiYBaQCDzkVHBSd5RM2+yRjDENY03egcJDghjTryVuC5e9PovVO7IciyUlJYXu3buTkpLiWAwiIiIiDZmjVTettb8aY84BxgOneXc/CxhgN3COtXaeU/FJ3VBY7OabxQ172qbPv89IZWtGHtNW7OSSV//g42sHktwoosbjOP3002v8niIiIiLyF6dH9LDWTgZaA6OAO4C7gNFAG2vt585FJnXFjNXp7M0ppHNSNCnNYpwOx1EhQS7+d2Ff+raJZ0tGHhe/+ge7swucDktEREREapjjiR6AtTbPWvuFtfYxa+0j1tpPrLXZTscldcMXC7YAnmmbAhGhQbx26ZGkJMWwZmc2l78+i6z8IqfDEhEREZEa5GiiZ4z50RjzrjEmoZzjJxljfqzpuKTuyCss5vsl2wE4vYFP2ywtLjKEiVf2p2V8BAs2ZXD0Q1O5Z/IiFm7ai7XW6fBEREREpJo5ukYPGOz9eqQxZoS1dvkBx5OA42s2JKlLpq3YSVZ+EaktYmnXAHvnlZaRn8F3ad+RX5zP0DZDaRbbjLevPIqbPpjP/I17eXvmBt6euYGUpBhO79Gc1o0jaRYbzo4cN3mFxYSHBAUslldffZXs7GyioqK48sorA3ZdEREREakapxM9gCeBc4DfjTHnWWu/czqg2swYMxYYm5SU5HQotcIXCxv2tM0idxG/bfmNyasnM23jNKL2FRCdC483fpT+LQYwssNI3h07hPXphbwzexlfLV3O6uxlPDOzEHdBE9wFTcCGcvvP3zK8ezMeGd2DuIiQw45r7969ZGVlUVhYePgfUkRERET8VhsSvT+Bx4EvgC+NMTdba59zOKZay1o7HhifkpLS4OffZecXMXWZZ9rmaT2aOxxNzbLW8sXaL3h67tOk5+zgiHWWm/609F1tcVnICYOVyb8yt+VvfNw6lLXNDHtDC6E5RB54saI4ivOb8sOGIxn5/D5euqgvXZvHOvGxRERERCRAakOih7V2mzHmOGAi8LQxphvwd4fDklrujd/SyCt007dNPC3jD0pf6q19Bft44PcH+HXpNwxabDltfhBNdnlHzkJCCE5sQuSWrfRaZ+m1zgJ5AOxs5GJni0j2tW3CnlZxzG+0j0WuLRQFZxAUnEFE1Cq27kjnzBdzeeTsnpzRq4VzH1JEREREDkutSPTAU3kTOMcY8yCeFgudgMmOBiW11uu/ruOx71YAcOWx7RyOpubM2TiT91+7lR5zdnPJGkuwG8BNcPPmxJ97Lo1Gn01wkyYUbt9B7rw/yZ03j31z51C8ag2Je/NJ3JsFSzyN1E8BXI0asa9pAvu6NOb7nLnkhH1LZsFa3nz2WNZ0b0VihIuczFxysnPJy8nF3bQZbY7tz4COTeiQGN1gm9OLiIiI1Ha1JtHzsdbeY4xZBkwAjnU6Hql9Xpuxjn9/uRSAB87ozqlH1P9pm8XuYr695zISv5nDlbnenUEuoo4/lvhzzyX6+OMxQX8VUwlJakrI8OHEDh9OEmCLiihISyNv2TLyli0nb9lS8pcuo3jvXqL37iV65VouK3n3cs/2W9mx7Ho3lk+ap7K4fW9iBxzFKT1bMqRr04AWcxERERGRw+N0ojcd2H7gTmvtO8aYdXhG9BrXdFBSe+2X5I1K5eIBbRyOqPq5rZt3HruCIz+ZA0Bm68a0Pe9yEkaMJDgxsUrXMMHBhHXsSFjHjsSNGAF41vkVbd3KnA8/omNIMMW797AjfT0L1/9BSF4RUUXhhITGExwWRVhELJHh0ZgVS2m8aycj1v3GiHW/kfXri8xu05ir2qaScPRwLh4wkH5tmpTc1wKZeYXEhB9+gRcRERERqTpHEz1r7QkVHPsNaFqD4UgtVzrJe3BUKhc1gCSv2F3Mo1/fydB3ZwGQddtl9L/yjoBc2xhDSHIy+b16kjh4MADNgPA9K7l2yrXsyN0B7Cg5PwRD0XFu2u4Iov8KN0etsLTcZTlhRTonrJhG1vRpzO4YxP9aJ5MUcxzhLtiZmc8R//qe4zo14f6R3WmfGB2Q2EVERESkYo42TBepqjd+bZhJ3r0z7qHFy18RlQ+FA3rS74rbq/emuXvpvGsj7zUexP+5Ejkzp4C+uXkkFhVRiCXIQEh8ITn9LVuuasyOq5qwsV8+uxu7ic6DExYXc9vXG2mUlw1AbEE2Vy/5ks2zFzD86V944vsV5BUWV+9nEBEREZGaHdEzxryGZzbXWGttsfd1Zay1Vh2XG7C3Z67nX180rCSvyF3EXTPuYu/XX3HkKouNDKfrw08HpvjJtsWw4msoyIKCHFI2roUdr8H2pbBrFeAZSr/Kd35Mc2jVn5yWfQlO7k1o0+4QmfDX9TbNgZ/+S/686WzbFMGKPVFYb5jBxcWMXDWNs1ZNY0byEby152Q+nb+Jf53enZO6NTv8zyIiIiIiZarpqZuX4Un0rgWKva8rYwEleg3U+7M2cM/kxQDcP7J7vU3ysj57C1fjFkQccwJu6+auGXfxy5Kvefp7T7vE5rffSUizAxKjld/B1H9DfFtI7gXJvSG5z/5JWGnWwtzX4Zs7oLigZHdzgG3eF0Fh0LwntOgLLftBq/4Q1wqMObj/nk/LfnDxJ4QNnkWb6Y/Qet0vpAXNZ5YrhsjiIqb1gWMXWI7dsoiBWxbxW1cXj5oIHp91Ck8Mv4HuzTRDW0RERCTQajTRs9a6KnotUtpHczbyj08XAXDPaV25dGBbZwOqJllvPMDGh98FILR5ArMHtuD35KVcNc1FbE4xkf370+icMfu/aeMs+PASKMqD7Yth+Zd/HWvaHY66BnqcCyHhnn2FufDVrTD/bc/rHudCYhcIiWT5uo10Se0NCe0hKRWCQw/tg7TqDxdNwriLGZuxkVM2zeQfSyfwYodg3htoGfW7m5PmW45d5mbgsmwWtpvEE8u/wt3nXB4/7XqaRGn9noiIiEigOF11U+Qgbrdl4u9p3P/lUqyFO0/pwlXHtXc6rOqxYSbpr74JhGCC3BRs3U3PSbv5n4EgW4wJD6f5A//GuEr9TSR9Nbx7rifJ63UhtDsetszzbFsXwI4l8MWNntG+/ldD55Phi//zHAuOgBHPQM9zSy63LW8aXY4YHLjP5AqC+La0im/LG91H8+vmX3Hn7qFxp19plPIZ5o88MtdF0msd9FqXy+4f3uC1byYSNaAvV130BGFRVaskKiIiIiLlU6IntcrqHVn845OFzE7bA8Ctwzrzt+M7OBxVNdmznpynLyJ3ZwiuiBB+urc/v837nSEL3By5ygKGxBv+TmibUtNVs3bA22dB7m7oNAxGPAtBwX8lbkUFsHQy/PYcbFsI0x7ybOCZ4nnu29DsiBr7iMGuYI5vdbznRedRMOQ/sHQyxfO/ZO8vi9iyMJuEjCBG/O6G32fz3ceDCOuVxOBL7yKs+1BQQ3YRERGRQ1LTxVh+PIS3WWvtkIAHI7VKQZGbl6ev4bkfV1NQ7KZJdBj3j+zOaT3qaTP0vH3w3nmkzysGQthwWj9eyPyDoE7BXNK5OZ2X/EFBZjARux6CL5dA9zM9a+feGQN713vW441+3ZPklRYcCj3OgSPGQNoM+P0FWPkNdDoZznoZIuJr5OMVF/9VWTOoVCN3X3xBPc6h8SWQUJhH5veT+PP1CcQv30anjcDG7Sz/5v/I7eCi8xX/R/zIqwNThEZERESkAanpEb32eIqriJTYkZnHpa/NZtnWfQCc268Vd53albjIetpk210Mk64ib8VKsrc2xYaH8s8WszC4eOCYBzmp/emw4D2Cf34Mdq+FOa96tqAwKM73jMxd8CGEVbCmzRhod5xny9sHYTE1Ojr29NNPk5WVRXR0NLfcckv5YYaEE3vahQw+7UI2bd7Oe4/eTqclc0jZ5CZ0hZvtdzzF8sdfJOScURxxyY2Ex5VTaEZERERE9lPTxVja1uT9aoox5i7gUqATcJa1dvIBx9OAfCDXu+t5a+2EmoyxtsrMK+Ty1z1JXpvGkTx05hEM7NjE6bCq148PwqrvSF/hqTY5vV84WZE53ND774zoMMJzTq8LoOf5numXSz6FxZ94RvIiEuDCSRDtR6XK8Nhq+BCB17JFErc+/QafzNvAjZ/9j1PSvuGoJQUk7MyHFz5g+fgPWHNSL854bCLBwfX0jwAiIiIiAaI1eoExFfgAeLWCc8611s6vmXDqhoIiN9e+/SdLtuyjbeNIPr52IE2iw5wOq3plboffnyd/XzCZ60NwBxve65VNx0aduDz18v3PNcYzXbN5TxhyH2xfApGNIbaeTmcFjDGc3acNx3e+n4/nXs3P88bTYc1HRKwIpeMGQ7dv5jN+y0k0u+N1RvRsS0iQCveKiIiIlKXe/ZZkjGlpjHnOGPO7MSbHGGONMW3LObeVMeZjY0yGMWafMeYTY0xrf+9prf3DWrvmsINvQNxuy20fL2DG6nSaRIcy8Yqj6n+SB/DHS1BcwK6tXcFapqUa9sQY7jv6PkJcFYxSGQPNUut1kldak+gw/nZ8B/5x0yOMuPVjhpzkInPYPgqD4IQFO9j08DkMfOQznp26isy8QqfDFREREal1HE/0jDGdjDEvGGNmG2NWG2PWHrD5m0B1BM4B9gC/VHDfSOBHoAueaZcX45l6+ZMxJurQPk2FJhpjFhljJhpjWlTD9euUh79dzmfztxAVGsQbl/endeNy23HXH/mZMPtVCrNdZCzai9vAp0fB6M6j6dW0l9PR1VoRbfoQ+fcZ9D/mBMIG76UwCIYuzOaKRfcwb/qLjHhmOvM37nU6TBEREZFaxdFEzxjTG/gTuBIIxVOsJdv7fVvADWzw87I/W2uTrLWnAh9VcN7V3vuNstZOttZ+BowE2gDXlIrxT2NMejlbqyrGdLy1tgfQG1gNfOznZ6pX3vwtjfE/ryXYZXjp4r6ktohzOqSaMfdNbG4GO9d1gKJifu9iKExuzE19bnI6stovMgHOfZuU+38h4bLeFAXB4Hlu+q75lpODb+KB1x7mxZ9W43ar1pOIiIgIOD+i9wCQBaQCvhYK/2etbQlcAsQB/+fPBa217iqeOhKYaa1dXeq964BfgTNK7etjrW1SzraxijGt934tAp4CjjLGNMhqEht35/Dfr5cB8NiYHhzXqYE0xy4qoOjHF9gwvTEZi7MpNvDpQBe3H3k7cWENJNENhCYdaXnbuzR7/L8UB8GQuXDSW8WMnvk5WRPP4Na7z+C7+d86HaWIiIiI45xO9AYCL3uTLV+C5gKw1r4NfAI8Wk337g4sLmP/EqBboG5ijIkyxjQqtetCYLG11u+FRcaYub4tUPHVtH9/uZT8IjcjeyZzZu+WTodTY7Lfe4y1HxeTsz2M7KhgHjrHRYueAzm13alOh1YnJZ5yJi2fe56ixEZE50GvdZYRv7u5+tNVJFw2jldeu9fpEEVEREQc5XTVzXDANyqW7/0aU+r4HOC8arp3Ap51fAfaDfjVVdoYcw/wNyARSDXGPA/0s9ZuA5KAScaYIMDg+bxjDidwgMLCQqZNm3a4l6lRC3YWMWVpPuFBcEL8nmqPPy0tzfln5HZjPv+QxG+nYwhicRvDsyMthbEx3GKGMn36dGfjo3qeU0FBQcnXavtv4AqCBx7CtWcP4SsXYRZ+TfGG3cTtDKL/kx/z0Lp1HD3kqoDcqlb8LNVyekaV0zOqnJ5R5fSMRKSqnE70NgMtAay12caYPXjWsX3qPd4BqM6SemUt6PG7q7S19kHgwXKOrcXzmQ6btbav7/uUlBQ7ePDgQFy2RuQVFnPf0z8DcMvJXTlzUPtqv+e0adNw+hktnvAkQd9Oxw18fKxh/qmd+FvXczi9/em1ZspmdTynuXPnUlBQQGhoaA39NzgT+CeFa3/n11suJ2mZYcTHc5lelMd1//0Ic5jN4mvDz1Jtp2dUOT2jyukZVU7PSESqyulE7zc8a/Pu977+ErjFGFOIZwrnDcCUarr3HjyjegeKp+yRPjkMr/y8lvW7cujUNJrLjmnrdDg1wl1QQN6rbxMFfD+siBFnnMO9Jz542ElHXXDaaadRVFREcHDN/k9MSPujGfTWL0z92zBazsnjxE+X8OreE7jomR8ID3X6f+5EREREak7A1ugZY4qNMRuMMZf48bYXgBnGmHDv6zuAtXgSv/uA9cC4QMV4gCV41ukdqBuwtJruediMMWONMXMyMjKcDqXKNu7O4fmfPDVv/n1GaoNpcr373YlE7cklrSmMaFlI72PuaBBJHkCXLl1ITU2lS5cuNX5vV3QiQyfOIe0kTxeTY37azrdjerLqq5drPBYRERERpwTyN+6NQATwhjHmz6q8wVo7y1p7l7U2z/t6G9AD6AUcARzhrYRZHT4HBhhjSuYQehurH+M9VitZa8dba/vFxdWOaX+VsdbuV4Dl6A6NnQ6p+hXk4P7pCbY/9zgAMwa46XjMzRAe63BgDYgriFOe/4F15/WjyAUpK9wU3PI000b2YM93E52OTkRERKTaBSzRs9a2tdYmAj2Btw7jOtZau9Bau8RaW3wo1zDGjDbGjAZ8a9pO8e47vtRprwBpwGfGmDOMMSOBz/AkrPrTfwCs2p7JRa/+wZSl24kKDeLu07o6HVL1W/wJPNuLvRMex5VtSGsKycPPhGNudDqyBunUf71F0YQnmXVEBMVBkLSykG3/9xCzTutNwbrlTocnIiIiUm0CvmjFWrsIWOTPe7wVKVviWTN30Nw2a22VRghLObBR+over9OBwd5rZhtjTsTT1+4t732nAjdZa7P8vJ+Usi+vkGd+WMWbv6VR5LbERYTwyNlHkBQbXvmb67KMTfDJWNyFhexa0RJw8/GxLu7ofYXTkdW4nTt34na7cblcJCY62yux98BT6HrkMP4x4QXazXiVYxcVELMmj4Wjz6TtE4/QZPBIR+MTERERqQ5VTvSMMX8H3rHWBqxQiTEmCngEuBxPq4XyBPlzXWttlRZCWWs3AGf7c22nGWPGAmOTkpKcDqVMy7ft46IJf5CeVYAxcOFRrbl1WArxUaFOh1b9fnse3IXszR5AUdYG0prCzr7t6NSok9OR1biJEyeSlZVFdHQ0t9xyi9PhEB4SxJN/u4HnOw/nlmkvc8PMr+m6EbZefweTT5tK0wvvYEiXpkSFqWCLiIiI1A/+/FbzLPCYMeYz4DVgirW2rPYE/ngJTwPxacCvwN7DvF69Z60dD4xPSUk53GdfLV6fkUZ6VgE9WzXiP6NSSW1RN9YSHrbsdJj7Bu5i2DUrE4CPjnVxUrthDaYAS21njOGGIZ3plnw777boy+5p/+GY+W6O+fx7pq5aypnH3MlH1w0iLjLE6VBFREREDps/id7xwGV4mn2PATYbY94A3vD2ijsUZwBvWWsvPcT3Sy2zbNs+AO46pUvDSfIAZv4PinLZu68vRelb2dAsmNmdLbe3HeZ0ZHKAIV2TGNL1ArIuGsZHdw+j35RchizbRGLOHVwb9TivX30cYcF+TSIQERERqXWqXIzFWvuLtfZKoBlwBZ42CHcDq4wx04wxFxtjIvy8fwGeXnpSDxS7LSu2eUazujRrOBUmbeYu9n30GhumJ7B9yjYAPjjGTavY1qTEpzgcnZQnOroJlz8xk62XtiAzAnqsz2bk1//HP96bweFPVhARERFxlt9VN621OdbaN621g4FOwH+AtsAbwFZjzMvGmCOreLmvgeP8jaEhq8199NJ2ZZNf5KZFo4gGMf2tOCuLHU88waohQ9k8LZzsreEQFMyaIZ2Z3ckwtM1QTdus7YJDGXbbd4T+bQB5IdBzVQG9vriWVz9+z+nIRERERA7L4bZX2IZnZG8jnqqVYcDFwExjzFRjTOtK3n8T0M0Y87Axpq3Rb8WVqs199JZv9Y3mxTgcSc3Ycsed7HplAsX7cgmNLaTpVWfTYfpPPHH8PjCGYZq2WTe4guh1zetE/vMqCoPgyIWWoi//zYy3boLiIqejExERETkkh5ToGWOON8a8jifRexWIA8YBLYDmwO14eti9XtF1rLW7gbeB24A1QJExpviATb9plSEmcw3cn+DZxp8ARQVOh8Ry7/q8Ls3rf6KX+cMPZE2diis8lDYnptP+yhY0vuUBFhWmkZ6bTovoFnRL6OZ0mOKHlDG3EPvQPRQbOP4Pw28zvmXWS2dQmJ/jdGgiIiIifvOnvUIb4FLv1hbIAt4HJlhrZx1w+hPeBO3hSq75T+A+YA8wD1Xd9IMFXz/5LX/CjiWQ3NvRiJZtbRjr84qzstj2wIMAJPYpJLJpAQy6DYxhyvopAJq2WUe1HXkhm/Itmff+hxHTDd8Vryb/8ZPY0PUup0MTERER8Ys/VTfX4pmeORPPurz3rbUV/al7DZ4Rv4pci6e1wqnW2nw/YmnwMmM6wr1L4NOxsHgSbF3oeKLnG9HrWs9H9HY+8yxF27cT3iGZuJZzWJTUmZ/z0pj+xTks270M8CR6Uje1HHMRW3Lz2fPfxzl5hovFO3eRknM3j5p4rj/1KPXaExERkTrBn6mbzwCp1tqB1trXKknysNZ+aa1tV8k1o4EPleRV3X7FWIKCoVkPz4FtixyNa19eIZv25BIa7KJt4yhHY6lOuYsWsefttyEoiN3DYxnaJpkLIvN4aeHLLNu9jPCgcM7qdBZHNDnC6VDlMCRfciWxT/2XvFBD6goXub/kMeLXq7j8mU/ZuFtTOUVERKT2q/Kfpq21N1fD/WcCnavhuvXWQQ3Tm/sSvYXOBQWs9LZV6JwUTXDQ4db4qZ1sURFb/3kfWEvcJRcyLuYDdppgmofFM6jtMAa1HET/Zv0JDw53OlTHXXLJJbjdblyuuvuz0OqUMylqlsCma68jaQfk/GR59Og7GfdiDvdfMYruybWvIJKIiIiIT5V/CzPGjDDGPF/B8eeNMaf5ef+bgHOMMaP9fJ/4lIzoLQa327EwljWA/nm7J75F/rJlhCQn8/2QeDabYjoWFPD1yW9xz4B7GNRykJI8r8TERJKSkkhMTHQ6lMPSrvfxNH7nVZa1dhGZbciYHsKtu/7FfS+/w2+r050OT0RERKRc/vy5/Ragot/io73n+ON1oAj4wBiz0xgz1xgz64DtDz+v2bBENYGYZCjMhj3rHAtj2VZvxc162lqhaM8edj73HACRd47jpRWegrK3ZuQQHNfKydCkmnXrMIBdN/6dX7sHEVpoCPopnITQ57jvs1t4e858p8MTERERKZM/iV4qMLeC43O95/ijqffrBjxVPBOAxAO2pmW/VUo0864H27rAsRCWb/UVYqmfI3oZkz/D5uYSdcwxTIj5k+yiHI7NyeWYyFZQh6cnStV0iu7K4Jc/YWvf1kTnwRWTwGXm8sjii7n522edDk9ERETkIP78hhoFVDY30K/hHGttW2ttu8o2f67ZIDV3tiCL221Zsa3+Nku31rL3ww8ByB0xiEmrJhGE4bbde6Bxe4ejq52WL1/O4sWLWb58udOhBEzHJp0Z/PoXRB4zkLgc+M87RSRmWKZsf4V//TTB6fBERERE9uNPorcGGFzB8cHA+qpezBgTYYx5zRgzxo8YGrz9qm76+Eb0HCrIsmlPLtkFxSTGhNE4OsyRGKpTzuzZFKxbR3BiIk+G/YLbuhkT04n2hUXQuKPT4dVKX331FZMmTeKrr75yOpSAcoWG0ur554no25eobMOTbxeStMcyaf0zPPXru06HJyIiIlLCn0TvA2CUMeZWY0zJ+4wxLmPMzcAoPA3Uq8RamwucS8Xr/uQA1trx1tp+cXGlKv453GJh2bb6PW1z7wee0byMYUfy246ZxITEcJ3bO3KZ0MHByMQJrogIWr38EuGpqYRlunh6QhFnzHTz1vL/8vrsj50OT0RERATwL9F7FE87hEeBTcaYKcaYKcBG4HFgFvCwn/efj9orHL5GbSAsFrK2Q+b2Gr/98q2eaZtd6+G0zaI9e8j8/nswhmdbLgXgmp7XEL9ng+eExkr0GqKg6GhavzqB2NNPJ6jIcOE0Nw+9Ucw33/+LT+d+5HR4IiIiIlVP9LxNzU8A7gZ2Asd4t53AXcBga22en/e/B7jKGDPYz/dJaS4XJHnr4DgwqrfcO6LXpXn9S/QyPp2MLSxk6xHNmefaRJvYNlyQcj7sWu05QVM3G6yguDhaPP4YrSZMIDg5idY74f63itn4+H28+eNjTocnIiIiDZxf5QKttQXW2oestT2ttZHerZe19mFvIuiva4EdwFRjzFJjzBfGmA8P2D44hOs2PCUFWWq+8ubyetpDr3QRlrc6bSPUFcqjgx4lJD8T8jIgNAai6nafODl80cceQ4evvyX2sosodsHQeZbwR1/nng8vo6C4wOnwREREpIEKdvj+pRuld/FuB7I1FEvdVlKQpWZH9HIKikjblU2wy9AhMbpG713dcmbNpiAtjd0xhj87Gu456k66Ne4GG7ytHRu3B2OcDVJqBVd4OC3uvJvIk04m7epL6ZHmJvb5P7h678k8dME7JEcnOx2iiIiINDB+JXrGGAMMBToCjYEDf8u11toHqno9a60akAWKQwVZVm7PwlromBRNaHD9+s+Z/r6niuLUHnBqxxGM7uT9u8TuNZ6vmrYpB4jv14/gSV+y8oIzabsjn6smbOPGzNO4/YKX6N/8KKfDExERkQakyomeMaYLMBnoxMEJno8Fqpzoif+MMWOBsUlJSfsfSOwCrhDYtQbysyCsZkbXlnkbpde3/nmFu3aR+f33GANrjmvH/wbci/GN3u3yJnqquClliGnfjm5f/siiMafTZOse7ngrj6dzruKmKydwZLKSPREREakZ/gzBPA+0Bm4D+gLtytgOqXu0MSbUGDPYGHO5MeYy7/ehh3Kt+q7M9goAwaHQtAtgYfuSGotnuS/Rq0etFazbzcyHbiGo2LKwYxD3jXqOyJDIv05QIRapRESTBHp9+QO7O7YlOg9uf7+IV5+7ijlbZjkdmoiIiDQQ/iR6A4EnrbVPWmvnWWvXl7X5G4Ax5jw8LRqmAq8Cr3m/32iMOdff6zVoJdM3a65x+rKSQix1Z0RvZ85O1mWsK/NYUXo6Ky6/iCZfetbhNb38Sto3OuDvFyVTNzWiJ+ULjYpkwKdfsOvIPoQWww2fFvHRw5fzS9rvTocmIiIiDYA/a/SygM2BvLkxZjjwjve69wBLvYe64anI+Y4xZq+19rtA3rfeatYDeKfGEj1rbcmIXl1qln7l91eyYd8Gnhj8BENaDynZn/3772y+7XZsejr7ImDu1QO57qxx+7/ZWti11vN9wiENYDcIN910k9Mh1ApBIcEcM/FtFt45jtDPvuOy7918lnkVu8a9wKgeg50OT0REROoxf0b0PgZOCfD97wZWAEd42zZ85t0eAo4AVuLp0SdV4au8ubVmEr2tGXnsyysiPjKEpjFhNXLPw7U1ayvrMtZRbIu5bfpt/LblN6y17Hz2OTZccSXF6eksaQ1P39iaS69+9uALZG6DwmyISIDIhJr/AHVEUFBQydbQGWPo+cjTRFx3MW5jOeN3N9seuI63ZnzudGgiIiJSj/mT6P0bSDLGvGKMOcIYE2OMiTxw8/P+vYDXrLUZBx7w7nsd6OPnNRuuZt6m6TuWQXFhtd+upFF6s9i/CpXUcvN2zAMgPCicQnch//fj/7Hg89dJf/FFrIGPjjX894Iw7h7xBFEhUQdfQNM25RC1vfEukv51C4XBluMXWVredgefvvGI02GJiIhIPeVPorcF6AdcCcwH9gKZB2z7DiGGiv7kX7/q9Ve38DiIbwvF+bBuerXf7ueV6UDdmrb5544/AbjqiKs4s+OZ5BXnMfvdZwD4alAEHx0XxHV9/05qk9SyL+ArxKKKm3IImpx7NW2ff4Q9jaHZHujy8BtMv/JsivbscTo0ERERqWf8WaM3kcA3L/8TuNoYM95au99vOsaYODxJ5dwA37N+63I6/P48vH8RnDMROg8L+C2stTz+/Qre+C0NgBO7NA34ParL/B3zAeiT1IerjriK3Pwseq34BoCpnfLp3+woLu9+efkX2KUeelXx22+/UVBQQGhoKAMHDnQ6nFoldvAZ9J3UjS/uPINOsy1Nf13KkqEn0O7Bh4kdPtzp8ERERKSeqHKiZ629rBru/yDwDbDMGDMeWIYnmeyGJ8lrClxXDfetv066H/IyYN5b8P75MOp/0OOcgF3e7bb88/PFvD1zA0Euw2Oje3BspyYBu351yirIYtXeVQSbYFKbpBLkCuKe0DPZkvcNmxpDVotG/OfY/xDkqmCQebe3EEtjFWKpyO+//05WVhbR0dFK9MoQ1qwTpz71IxOfPZn43wvpkZbP5pvGkX/jWppce22dmQotIiIitZc/I3oBZ62d4m2h8AyeqpuWv5qxbwHOtdb+4FR8tVG5DdN9goJh5HMQ2Rh+fRo+uRpydsGAaw/73oXFbm75cAGfL9hCaLCLFy7ow9Bu5cRRCy3cuRC3dZPaJJWI4AgAcn6YCoAZPJBXht5Ks6hmFV9EPfQkQCLjm3HejT/xVtRwJq7L4qKplvRnnyNvfRotH3gQE6pWoiIiUrm5c+eG4ul1XXd6XcnhygQ29O3bt6Cik/xK9IwxjYCbgKF4Rtsusdb+boxpDFwDfGytXenPNa21k4wxk/mrCTvAOmCutbbYn2s1BNba8cD4lJSU8qfRGgND74eoJvD9PfDtnZC9E06813PsEOQWFHPdO3P5acVOokKDeOXSfgzsUDdG8nx86/N6Ne0FgC0uJtOb6B174S2EN+5a8QXcxbDb239PrRUkABrFJzD68u/58LVzee6sjVzzhYXPvmDtxg20/d/LBMXFOR2iiIjUYvPnz09KSEh4LT4+vnlwcHDdKIEuh62oqCh/z549W+fPn39Fr169tpd3XpUTPWNMc+BXoA2wCWgJRABYa3cZYy4HEoFx5V6kHN6EbpZ3k0AZeANENoHProdfnoDsdDj9KahoamIZ9uUVctUbc5iVtpv4yBDevKI/PVo2qp6Yq5FvfV7vpr0ByJ0/n+L0dEJatiSsayVJHkDGJk+hm+gkCNMfzSQwmjduxJU3fsUrE+/l6fMmc80nEP/nAlaeNYKO73xISLNKRplFRKRBmjt3bmhCQsJrbdq0aeZyudxArtMxSc2JjY1ttn79+tfmzp17Znkje/5Utfwv0Bg4Bs/o24FDQ5OBkw4lUABve4ZWxpjWB26Hek0Bep0P570DweHw55vw0aVQmFflt6dn5XP++JnMSttNs9hwPvrb0XUyyStyF7Ew3dNf0Deil/n99wDEDB1atTVRu1WIRapHdFgw465+iFMHPMqL5wexPhHYvJOlF55JcVa20+GJiEjt1Do+Pr65y+UKdLFEqQNcLpdt1KhRc6BVuef4cb1TgeestTMpu/rmuopuVBZjTJAx5m5jzBY8c03TvNc5cJPDkXIKXDwZwuJg2RfwzmjIq7wTxua9uZzz0u8s2bKPto0j+ehvR9Oxad0cyVqxZwW5Rbm0jmlNk4gmWGvZN2UKADHDhlbtIr6Km5q2KdXk3CEjuPPcr3hrdCxb4iF0815+Of94CvKU7ImIyEFiNF2zYQsJCQkHyu1z5s8avTg8iVhF1wrx43oAj+FZ87cI+AjY7ef7paraHA2Xfw1vnw1pv8Cbp8OFkyA6sczT1+zM4uIJf7AlI48uzWKYeGV/msaE13DQgTNvu6dRum80L2/xEoq2bCW4aVMievaETXOgMBdikz1bSMTBF9mlZulS/VJbteF/10zn/pwxnPPhapJWZfPelUcz5LlPaJmg0WQREREpUeForj+J3nqgewXHjwNW+XE9gAuBL621I/18nxyKZqlw5XcwcRRsXQCvDfOM9MW32e+0xZszuOS1WezOLqBvm3heu+xI4iL8zeFrj+KsbObt8CR6vvV5JdM2TzoJs3kuvHrArOOIeIhvB70u8GyhUZq6KTUmNjycf9/8KU9nXseIz36h/9xCXvrnGRx//X0M7Rq4dikiIiJSf/kzdfMD4CpjzFGl9lkAY8xVwFnAO37ePxr40s/3yOGIbwtXfg/NjvD0hHt1GGxfWnL4j7W7OG/8THZnF3B850TeurJ/nUzybEEBYXPmsP6ii1nZrx/9n/6RRlmWPk37YK39K9EbNgxWfO15U2xLaNQaXCGQuwe2/Alf3wpPdoMf7ocdyzznJWhET6pfVFgwN/7zBaaeeBoA5//g5oOJ9/P1rKedDUxERETqBH8SvYeAhcAvwHd4krxHjDFrgJeBGcDTft5/NqDfmmtadFO47CtocyxkbYPXh8OGmfy4fDuXvDaLrPwiTuvRnFcu6UdkqKOtFv2Wvmk1qx99gFUnDqHRhFfJmTMHgB7L83jqFTcJPy8mf+UqCtavJyg+nsh+fWHtT543j3gGbloE9+yAW1fB6Neh5ZGQtxdmPAkZGwEDCe3Kvb94NGrUiPj4eBo1auR0KHVaTHgIVz72ENOPHIYLuOVTN9n3vsyMZy7Gut1OhyciItLgGWP63nzzzclOx1GWKv8Wb63NNcYMxrOm7jwgDzgCz3TNu4EnrbWFft7/NuBLY8wX1toZfr5XDkd4HFw0CSZdCcu/pOjNM3g37wbyi3tzfv/WPDgqlSDXofXcc0rB9m2sOvtMGmUUAVCYnEzLq65kVvMc9jz6JL3WWbbefgfB3mbz0UNOxBTsgy3zISgU2gz0XMjl8iTDqWd5to2z4PcXYNnn0OqostfvyX6uvPJKp0OoN2LDQzjvpceYeG0Yg+Z9Sep6C/+bw6JP+9D25nuJOf1MjMufv9mJiIhIoPzwww/L27ZtW2Hjcqf49duBtbbQWvuYtbavtTbKWhthre1hrX3YWnsoH/A2YAcw3Rgz1xgzyRjz4QHbB4dwXamKkHAY8yarWpxJcHEeLwU/wX39CvnvmXUvyXPn5rL86ktplFHE6uZw/0Wh/HTLCBIuuIDZ4Vv577kuVv1tGK7oaIq2e/pKxg4bBmunARZaD4DQyLIv3qo/nPMm3L4WLv60xj6TiE+jqFCunvAQb/zfS3zRP5G9kRCyLZ/Nt9/D9n/43bpURESkzmjRosURtXXEDGDIkCHZHTp0qHCwKzc315FfrJ3+M/BoPAVeDNAbONO778BNqoG1luenr2PomtF8WHQ8wcbN5SFTqtZTrhaxbjdb/nEXISs3sL0RfDy2C0tauXk9/Q1+3vQz83bOA2Noef6ltP/ic2KGDyd6yBCiBgz4a9pm+xMqv1FEvEbzxDERoUE8ecVxBF/0CuMu6MWEYS4KgmDPZ9+z+6G/g1UbJRERkYqsWLEi1BjT9+GHH0686qqrWiYkJPSMiIjofcIJJ3RcsWJFaOlzx48fHz9gwIDO8fHxPSMjI3t37dq123PPPdf4wGseOHXz5ptvTjbG9J09e3b4scce2ykyMrL36aef3h5g0qRJsb179+4SExPTKzIysnfbtm1Tb7311ubV9XmrPHXTGHNJVc6z1k6s6jWttU4nmg2WtZb/fr2MV35ZhzGGiCG3wfTpsPgTOPkhCC+3JUetk/7882R++y05YfDsedFMPPst/rfgf0xcOpGbfrqJIncRIa4QujfpTkhQGC2ffsrzRmthjTfR63Cicx9ApIpcLsPNw1Jo3/Qx7v3lLnLC5nLjF262TvyBYDOa2HFvQli002GKiIjUak8//XTzbt265bz44otp27dvD37wwQdbnHzyyZ1XrFixJCwszAKsXbs2bNSoUXs6d+68zeVy2WnTpsWMGzeuTW5uruv222/fWdk9zjrrrI4XXnhh+u23377N5XKxdOnS0PPPP7/j8OHD99x9991bQkND7YoVK8LWrl1bbb0Q/am08QaeAiwHDvcc+GfkKid64j9jzFhgbJJ3ndmhKCp2c9eni/hwziZCggxPnduL03skQ9oxsP5XWPIJ9L0sYDFXp4wvviT9xf/hNvDUGS6OGjia6NBobu13K2kb0/g582cAujXuRljQAf+Odq3xFFiJbAzNejgQff315ZdfkpeXR3h4OKeffrrT4dQ7o3q1pHXCU4z9/FE+PfoLzvwd1ry3hOZFg2h29duQ1M3pEEVExCFt7/yqr9MxAKQ9fNpcf9/jdrspLi4uc39h4V+zI40xBAcfesHAqKio4ilTpqwOCgoCoGvXrnknn3xylxdffLHxuHHj0gEefvjhbb7zi4uLOe200zK3bdsW8uqrryZWJdEbO3bsjnvvvXeH7/Xrr78eX1hYaN544431CQkJvopqmYf8IarAnxG1E4ATvV9920nA34AFwCxgaKADlP1Za8dba/vFxcUd0vvzi4r5+7vz+HDOJsJDXLxyST9PkgfQxzto++dbAYq2euX8+Sdb774bgLeGBrOoYzAXdr0Q8PwPwOj40YzpPAaAY1sce/AFSqZtDvYUYJGAWbFiBUuWLGHFihVOh1Jv9WndmKlXPMiiE+5kVsdgwvMNy7/KZdaE4bBtkdPhiYiI+O3rr7+OCQ0N7Vt627JlS+gzzzzTvPS+gQMHpvjeU1RURGFhYclWVqJ4oBEjRuzxJXkAw4YNy05KSiqcOXNmlG/fokWLwkaMGNGuadOmPXz3/eCDD5qkpaWFV+WznHfeeXtLvz7yyCNzgoOD7Zlnntn+9ddfj9+8eXO1l7b3p+rm9HIO/WiMeROYCRwJ/BiIwCTwsvOLuOatucxYnU5MeDCvX3Yk/dom/HVC15Hw9W2weY6nt56DowK5OfvYuHw2HXudgKuMJCx3yRI2jr0GW1DAxpO681Wf5ZzU6kRaxrQsOccYw70D7uX8LufTNq7twTdZ4/1Rrcr6PJFaKC4yhHcvvpjXWnQm7b5raJtewJLpUfwWeS7XXv4dYY1aOR2iiIjUsEMZSastjjnmmOzp06cvK71v9OjRHYcMGZJx7bXXloyixcXFlWRzAwcOTJk9e3bJuoVx48ZtffLJJ7dUdJ+kpKSDiqc0adKkcOvWraEAGRkZruHDh3cODw9333fffZs6d+6cHxYWZp9//vnEjz76qElVPkvr1q33u0dqamr+J598surRRx9tdu2117YrKCgwqamp2Q899NCm0047Lasq1/RXQDJJa22BMeZt4AbgkUBcUwJrb04Bl70+m/kb99IkOoyJV/SnW/IB6/BCI+GIMTDnVZj3Fgx/yJlggSlXnk6neTv55shkjnlqIo2atCg5lr96NRuvvAp3VhZRJw/lwaP+hELDxd0uPug6xhg6xXc6+AbFhbDuF8/3HZToSd1ljOHKIQNY3uR99l5+Ht03FJD0nuX2fady7bVv06VpT6dDFBERqZL4+Hj3oEGDckrvCwkJsc2bNy88cL/PK6+8kpaRkVEyPHdgglWW7du3hxy4Lz09PaR79+45AD/++GP0li1bQr/99tsVJ598ckkS9vTTT1e5YqHL5TqoStqIESMyR4wYkZmbm2umTJkSff/99yePGTOm05o1axY1b968qKrXrnIMAb5etVWNkUO3Y18e5748k/kb99KiUQQf/e3og5M8nz7eZGnBe1CUX3NBljJz1qd0mOf5o0372VtYcvrJLJj6IQAFGzey4YorKd67l6hBxzFr7NHsKcwgtXEqvZv2rvpNNs+Fgkxo0hniWlZ+vkgt16VnV7p9NJltLZrTZB+Mfa+IT+48n5fmvECRO+D/3yEiIlIr9OzZM3/QoEE5vq1t27aVJnpffPFFfOkpnt9//33U9u3bQwYMGJANkJ2d7QJPkuk7Z+fOnUFTpkxpFIiYIyIi7MiRIzNvvvnmbbm5ua6VK1eGVv4u/wVkRM8Y0xtPI3UtyKllNuzK4aJX/2DD7hw6No3m7SuPollcBVOLm/eCZkd41vgs/8rTMLwGua2bJeMfZyCwrWcL3Lt2k7wpF/ff7+PbM76g45xtFO3YQeSRR9LimWd467vzALi428X+tYXQtE2ph2I7tOP4b75lxi3jaDJlKmf+BivTnue2y3/nP+eNJzKknF6RIiIiDUh2dnbQ0KFDO44dO3bnjh07gh944IEWbdq0yb/uuut2AZx44olZ0dHRxTfeeGPre+65Z0tWVpbr0UcfbR4fH1+UlZUVVNn1y/Loo48m/vLLL9GnnHJKRps2bQp27twZ/PjjjzdPTEws7Nu3b25gP6GHP+0V1pZzKB6IBYqB6/y4XgTwAvCNtfajqr5PPNbvc5N633eVnpdfVExhsaVnyzhev7w/CVGV/MHAGOh9CXxzG/w5scYTva8XT6LX7N0A9LnncSI6dmbKPZfT/quFtJk8h0JgbXIQj5+4kpxPBpNTlENSZBJD2/pZB6ikrYISPalfXKGhDHruBVa++iiFL06g85YgWj02l6eWj+Rv93xA44iDWgCJiIg0KDfddNPW1atXh1177bVt8/LyXEcddVTmSy+9tMHXWiE5ObnonXfeWXPHHXe0uuyyyzokJiYWXHPNNTt2794d/NRTTx3SDMY+ffrkfPfdd7H//ve/W+7evTs4Li6uqF+/flnvvPPO2ujo6GpphuvPiN4GDm6lYIF5wCpgvLW2vGTwINbaXGPMucCvfsQgXhbIyq/adKzBKYk8f0EfosOq+J+7xxj4/h5YOw32rIf4Noccpz/yi/P5880nGZMPOd3bEndELwBOe+IDZg+eQMGDT7M7spj/joEsVzYUgcEwtsdYQlwHTbUuX+5ez9RNVzC0LaMap0g90PnK28lIKmb7c+OJWB/OWZM2M2XpUI597h1atuzqdHgiIiJVsnnz5oCXkg4NDbUTJkzYNGHChE3lnTNy5MjMkSNHLj1w/4GFXqy1cw88XlYxmJNOOin7pJNOWnM4cfvLn6qbg6vh/vOBztVw3XqvdYyLP/81rNLzjDFVT/B8IuKh20hY9BHMfwdOuOsQo/TPe0veZeBvewHocNWN+x07csRVFJ16GTmF2QwuVYUz2BVMRHCEfzdK+wVsMbQaCGExhxu2SK0Vd9qdhDVNZstL/yRjTgQ9l+WyadTZ5Pz7Pjqfeq7T4YmIiEg1crp52D3AVcaYwQ7HUee4DMSEh1S6+Z3k+fh66s17B9yV9yM5XBn5Gfz6+Yu02A3FifHEDT14KmZwUDCx4XHEhMaUbH4neVBq2uaJhxm1SC1nDOH9L6XVY9NxjWrMxhaWuCxL8c3/Yu7T9zodnYiIiFQjf9botT6UG1hrN1Rw+FpgBzDVGLMCWAMcuBjRWmv1p+ea1uZYiG8Le9I8iVGnk6r1dhMWTeD437MBaHbRpZjgauwh6SvEovV51aZDhw7k5uYSEXEIibgEXEjjtvT41898+/o/mfXLR/SfGUTkSx/z/dZ1nPTQxDJ7VYqIiNQ3KSkpBQdOtazP/PltOo2D1+hVRUWVaUaX+r6LdztQtSxOlEq4XND7YvjxAfjzzWpN9Hbl7mLqr2/zxBqLDQ2h0TnnVNu92L0W9qyD8EaQ7Ec7BvHLqFGjnA5BDmBcQZxy5X/4vMWxzI+4iSOmuWj12Vw+2TmU4S9+SnR4OS1XREREpE7yJ9H7NzAS6AFMAZYBBugKnAQsAD735+bWWv0ZuTbrdQH89B9Y8Q1k7YToxGq5zYrdKzhxtqdnX6MRIwiOj6+W+wCweqrna4cTwHVI1XFF6rSRw09hblIiW1wXkDQ9iO6/beHLC06g/8vv0T5RS6ZFRETqC38SvdVAa6C3tXa/6jfGmJ7AVGC1tfadAMYnTopNhk7DYOW3sPB9GHiD/9fIy4BJV0PnYXDkVWWesmXHGk5Y6Bm4TbjoosOJuHIlid6Q6r2PSC3Wt3c/Nt3+NVuCz8D9UzE9l+aw4KKzcY9/i45tejkdnoiIiASAPyNqdwDPHZjkAVhrF+DpifePQAUmtUTviz1f/3wL7CHMop3zOqz6Dr7/pyfpK8u304gsgIyuLQjvWo1l34sKYN3Pnu9ViEUauJZtOtLtvqmEnxxBdqSly/oiNp5/AYtnT3E6NBEREQkAf0b0OgJ7Kji+C+hQ0QWMMT/iWXN3srW2yPu6MtZaq+EXp3Q+GaKaQvoK2DgLWh9V9fe6i2H2q57vC7NhwQdw1Nj9TrFuNy2+XQBA3pnV/J9540xPHE27QVyL6r1XAzdx4kSys7OJiorikksucTocKUd0QjNS/j2NZQkXsPOrNTRLh6yrb+SXq0Zw3PWPgDFOhygiIiKHyJ8RvY3AecaYgzpTe/dd4D2nIu2BdnjW9pV+XdHW3o8YHWGMucsYs8IY4zbGjCrjeKgx5mljzCpjzBJjzNcOhHlogkKg1/me7+dN9O+9K7+FjA0QFOZ5PXvCQaOC2b/+RqPt2aTHQPzQkwMQcAVKpm1qNK+67dy5kx07drBz506nQ5FKuCLi6P6Pr2j20AOsbGeIzoNGL37BnL/1I3vbKqfDExERkUPkT6L3NDAA+M0Yc6kxZoB3uwz4HTjSe065rLVtrbXtrbWFpV63q2w7pE9Ws6YCpwI/l3P8v0AokGKt7Q5cWVOBBURv74jM4k8hP7Pq75s13vP1xLshprlnVHDd/o9o99tvAfB9HxctG7UJRLTl8yV6Hau3VYRIXdT5uDEM/vAXfu0XSbAboqbnkHbVqeycVXf+LiUiIiJ/qXKiZ619Ebgd6Aa8Bvzq3V7z7vuH9xxHGWNaGmOeM8b8bozJMcZYY0zbcs5tZYz52BiTYYzZZ4z55FD6BVpr/7DWrinnHpHAWOBOa63be/5Wf+9Rxk1xZ2eXuRVnVbRlVbxlZh68hSVRnDSA4uwc7KJJVYtv5wpYOw1CIqHPpdD3cs/+2a+UnFKQlkb29J8pCIJf+0WQEJ5w2I+lXJnbYPsiCI6A1kdX331E6rC4mMac+9o0Ph/ZnMIgcK0OZtu141j/wl1OhyYiIlIrGWP63nzzzclOx1EWv7pSW2sfN8ZMAE4G2uKZgrkWmGKtrWj9Xk3qCJwDzAV+AYaVdZI3AfsRyAcuxbN28EHgJ2NMD2ttdgDj2QPcaYwZChQCj1hrPzuci4Zs2MiKvv0CEZ8fmhO98BlafXlZ5afO8iZ0Pc6FiEbQ91L4+VFY/jVkbIa4Fux+910AZnQ3xDVthanO9UC+JuntjoOQ8Oq7j0gdFxMaw/UPfs5jR4yj32sz6LjVRc5zn7Jixiw6vvQxQXGNnA5RRESk1vjhhx+Wt23btsDpOMriV6IHYK3dC3wQqACMMZ2Am4D+QDwHjzJaa22FRV4O8LO1Nsl77asoJ9EDrsaz/i/FWrvae/5CYBVwDfCkd9+feNpKlKW3tbaydYkh3vevsdbeZYzpAvxijFlc3ihglbgMJjKy3MMVpkwVJVTlHrO4s7LJWp1H0YaVBLeuoN9W3j5Y8J7n+/5Xe77GNIOuI2DJpzD3DYr730TGpE8A+Kafi3bRLSuK+PCprYJIlUWHRvOvi8bzds93ePfFhxnzSzEh8zazfOhxdHr7PUI7pzodooiINGArVqwI7dKlyxFffPHFytNPP92PdUWBN2TIkEoHh3Jzc01ERMQhlK8/PH4nesaYwcBQoCnwhLV2uTEmGk8j9SXW2nJq6Jd5rd541rWFACvwJF5L8CR8yXhGCytLpPbjmx5ZBSOBmb4kz/vedcaYX4Ez8CZ61to+/ty/DOvxjBa+5b3ecmPMfKA34FeiZ4yZ6/u+c+fOdPlzbkWnB9yGU/uRvTab7M/fIO7v/y3/xAXvQUEWtD0Okrr/tf/Iq0sSvYxNLXFnZ7Ova0vWJ23juJhqTPTcxX+N6HVUoidSFcYYLu5xEYsf6M2/J17FpZ/vpf32IlacO4YOzz1B5LGnOh2iiIiIX3wJ4kMPPbRh9erVYZ988knj3Nxc14ABAzJfeumlDSkpKSUjc+PHj49/7bXXElesWBGRn5/vatOmTf511123/YYbbthV+prGmL7jxo3b+uSTT24BuPnmm5Ofeuqp5rNmzVoybty4Vn/++Wf00UcfvW/q1KlrJk2aFPvggw8mr169Ory4uNg0bdq0cPTo0bsef/zxw1/WVYYqJ3reypof4kmQDJ7k5T1gOVAEfAE8gafwSFU9AGQBxwF7gR3A/1lrfzTGXAQ8BZzlx/X80R0oa/rkEmBMoG5irU03xnwHDAc+N8Y0B1KBg/oR+qOwsJBp06YFIMKqa96hFaxdTvqUH5iXWs5AqXXTf9bTRAKLowaSXjpGa+kX1YaorPVsm/wCBpjRKw7YRu623IB/nrS0NKZNm0bMvlX0zd1NbnhT/li0CczmgN6nrvM9p0AqKCgo+VrTP6fVoTqeUV1yee9/8mr0y5zzyUqOWG9Zdd3NuC/8moyjRpec09CfUVXoGVVOz6hyekYih+/pp59u3q1bt5wXX3wxbfv27cEPPvhgi5NPPrnzihUrloSFhVmAtWvXho0aNWpP586dt7lcLjtt2rSYcePGtcnNzXXdfvvtlZYVP+usszpeeOGF6bfffvs2l8vF0qVLQ88///yOw4cP33P33XdvCQ0NtStWrAhbu3ZtWHV9Tn9G9O7Fk+TdDXyHZw0cANbaPGPMJGAE/iV6A4FnrbWrjTG+Shwu7zXfNsYcBzyKJ0kKtATK7gu4G8+IYpUZY+4B/gYkAqnGmOeBftbabd5TrgVeNcb8B3ADt1prV/gbsLW2r+/7lJQUO3jwYH8vcVjyo4tZO+XvFK/fx/HHHosJLuPHZ/VUmL4FYluQetbtEHTAOdH/R9aEf2B2ZRLctDGr+4TDbhjcvhODux0HrqCAxTtt2jQGDx4M02cBENH9NAafcELArl9flDynAJo7dy4FBQWEhoYG/NpOqI5nVNcMP2E4/2n3DDnPj+eolVD41g+0tHvoeNubEBSsZ1QFekaV0zOqnJ6RVOpfcX0rP6kG/CsjYFPPiouLcbv/mrRXVFRkfPsLCwtL9rtcLoKCKv9dMioqqnjKlCmrfed27do17+STT+7y4osvNh43blw6wMMPP+z7PZ7i4mJOO+20zG3btoW8+uqriVVJ9MaOHbvj3nvv3eF7/frrr8cXFhaaN954Y31CQoLvw1TrtFN/2itcCLxurX0Y2FDG8eX43/MunL+mZuZ7v8aUOj4HqM4SiWXNlfW7Ioi19kFrbUtrbZi1ton3+22ljqdZa4dYa4+w1va01r5zWFE7JLTPCYREQ3GeIe/nyWWf5CvC0u+Kg5M8gB7nsmdNHADxzdaxadscAFp89n/w8eXVEDVqqyASAMYY7hl2E00eGs/PRwQTUmTIe/NPpt8+GArznA5PRETqOLfbTWFh4X6bz2233ZYcGhra17elpqamAowaNapz6f233XZblapfjhgxYk/phHDYsGHZSUlJhTNnzozy7Vu0aFHYiBEj2jVt2rSH7/offPBBk7S0tCpV9TvvvPP2ln595JFH5gQHB9szzzyz/euvvx6/efNmv5fQ+cufG7QEZlZwPAuI8/P+m73XxVqbbYzZg2ft2qfe4x3wVKmsDnvwjOodKJ6yR/oaPONyEZXakr0zN5H97adEnDh6/xN2r/M0SQ8K9bRUKEPh7kyyNoeAyxJ7ZDM2h3im+LUoKoK0GYEPOncvbJoNrmBoNyjw15cyHXnkkeTn5xMWVm2zEcQhp3c/llUvTuHrm0cxbHYGTb/axdsZR5E45l6nQxMREQjoSFpN+vrrr2NGjBixX7U/a+1cgBtvvHHnqFGj9vr2b9q0KeTCCy/s+Oijj64/6qijcnz7W7duXaW8ISkp6aDzmjRpUrh169ZQgIyMDNfw4cM7h4eHu++7775NnTt3zg8LC7PPP/984kcffdSkKvc4MJbU1NT8Tz75ZNWjjz7a7Nprr21XUFBgUlNTsx966KFNp512WlZVrukvfxK9XXgKpJTnCDyJmz9+A4YA93tffwncYowpxDPaeAMwxc9rVtUSPOv0DtQNWFpN9zxsxpixwNikpCRH7h89eAh7Z75J1twlHPRTPudVwELq2RCdWOb79076BKwlZvhw9lx3J4Ufn0RCeAKRwemQswtydkNkAPvprfsZbDG0PgbCYwN3XanQoEFKquuzTonNuPrVX5hw8/kM+WEJfWcUMGPfP/mtXRgDO49wOjwREamDjjnmmOzp06cvK+tY27ZtC9u2bVuSOK1YsSIUoGvXrvmDBg3KKes9Fdm+fXvIgfvS09NDunfvngPw448/Rm/ZsiX022+/XXHyySeXJGFPP/10lWf+uVyug2YOjhgxInPEiBGZubm5ZsqUKdH3339/8pgxYzqtWbNmUfPmzYv8/RyVxuDHud8CVxljGh14wBjTHbgC+MrP+78AzDDG+IZA78BTafN+4D48FSvH+XnNqvocGGCMKZlu6m2sfoz3WK1krR1vre0XF+fv4GlgRJ5+CRhL7pY8irev/+tAQQ78+Zbne19LhQPY4mL2TvI0XI8fM4ZNWZsAaBnTEhp7O2jsXhvYgHd4c/ZW/QN7XZEGLjI0hBue+4gFF59LYRAcu9Cybtwd/OfbWylyB/z/q0REpJ6Lj493Dxo0KKf0Vl33+uKLL+KLi4tLXn///fdR27dvDxkwYEA2QHZ2tgsgJCSkJFnbuXNn0JQpUxoF4v4RERF25MiRmTfffPO23Nxc18qVK0MDcd0D+ZPo3QeEAQuAf+JZ33a+MeZVYDaeqpn/8efm1tpZ1tq7rLV53tfb8LRp6IVnhPAIa+06f64JYIwZbYwZDfgWo57i3Xd8qdNeAdKAz4wxZxhjRuKpwrkReNnfezYUQU2SiWwZAdaQ/cXEvw4s+gjy9kKLftCi7DXA2b/+StHWrYS0bEnkgAFszvIMALeIbgGNO3pOSl9V9WC2LYaXjoM1P5V/zj7vIHNcNffpE2mAjDFccPe/2HP3v8kPhz5rLL0e/op737qQQnd1zboXERE5PNnZ2UFDhw7t+P7778c9++yzjS+88MIO3vYJuwBOPPHErOjo6OIbb7yx9fvvvx83YcKE+GOOOSYlPj7+kP+S+eijjyaOGDGi3Ysvvpjw1VdfRb/xxhuNHnrooeTExMTCvn375gbu0/2lyometzH40cBi4Ho8RUuuBC7D0wvvWGvt9sMNyHostNYusdYWV/6OMn3k3f7mff2i97VviijW2mzgRGAlnh537wDrgBOttdUyTzYQjDFjjTFzMjKq3K4w4KKOPAKArOnTPDus/asIS/+x5b5v70cfA9Bo9GiMy8WmTO+IXnRLaNzJc9Ku1eW9/WAznoJtC2Hhh+Wfk+FN9GKV6IlUl+MvGMPa624lP8bQfhuc+dRCHn1gCIVFKtIiIiK1z0033bS1ffv2eddee23bO+64o3X37t1zvv3225W+1grJyclF77zzzpri4mJz2WWXdbj//vtbXHLJJemjR4/efaj37NOnT05OTo7r3//+d8szzzyz82233da6VatW+VOmTFkRHR1dLc3U/ar2Yq1dC5xmjIkDOuNJFNdYa9MPJwhjzFA8TcrbeXetAz6z1h7S+jxrbZXmz1prNwBnH8o9nGKtHQ+MT0lJqZYfiKqIPmU0Oz+ZTfbSLVi3G7NxJmxfBFGJ0H1Ume8pSk8n86efICiIuDPPBPgr0YtpCUHev/5XNdHLz4Tl3pnCGRvLP883ohdbpSJMEiBPPfUUWVlZREdHM25cdc2+ltokoXMHWn7yA8uvPJPGG/Yx6r2dvLXuSM59bDJRiR2cDk9EROqRlJSUAl+hlkMRGhpqJ0yYsGnChAmbyjtn5MiRmSNHjjyoboevMbrPgXE8+eSTWw48B+Ckk07KPumkk9YcasyHokojesaYCGNMsTHmXgBrbYa1dra19o/DSfK8152MZ/3fdXgKswzxfv+tMeYLY0zEoV5fqkfYMacTFGEpyob8P76HWeM9B/peBsFlV1nc++mnUFRE9PHHE5LUFKBk6qZnRM87dXNXFX/+l30BRd5R7r1ldfvw2uf9d6apmzXK7XaXbNJwNGmVzNFf/8a6IX0AOHpmEb9ceBqbp7/vcGQiIiINT5USPWttLp5G4oc1cleGh/A0Yf8f0MFaG26tDcfTVuEl4DTvOVKLGJeL6K7NAMj+dAIs/RxMEPQtuw+etZa9H3unbY75qyWDb0SvRUyLv4qx7FoNVUkOFn7w1/f7NoP74Fm+QUU5kL8PgiMgIr4qH01EDlNQcBCnvvAOm2+/hn0R0GaDYeu4f7Ho4bGead4iIiJSI/wpxvIVMDzA9z8f+MBa+/fSRVesteustdfjWVd3foDvKQEQ5S2fn/XHn572BV1HQFyLMs/N+WMWhes3EJyURPRxxwGQX5zPjtwdBJtgkiKTIKKRZ+pnUS5kHjTavb99W2HtdE+/vvA4cBdB5taDTgvL9/5dIjYZTJWr4YpIAAy74ibMq8+zvJWLqBxD8Bu/8MfooynetcPp0EREpIHyTfm8+eabAz14VSv5W3WzrTHmDWNMf2NMY2NM5IGbn/ePAqZXcPwnwN9r1mu1oRgLQNTIyz1tFnaG4i40lRRh+QiARmefhQn2LAv1TdtsFtWMYJd3qWjJ9M1K1ukt/hiw0GnYX0Vc9h68Tq8k0SsnARWR6tW/zxAGTPqRz4bEkx8MsUsyWDBsMBlff+p0aCIiIvWeP8VY1uFpqXAEcHE551g/rzkHTyuF8vTyniNetaEYC0BwcjsimoWSu7WQtT+0wMx/yFOHtQwFaevBGBqd/Vfdm82Z3vV5MaXWzjXuABt+97RYaD+4/Jv7pm32OBeWfAKb53gLshy932nheb4RPa3PE3FK89gkbnvuF/750vUcOWk6KZstW26+i91rl9Hu73c5HZ6IiEi95U9SNhFPIhdINwFTjDErgRd9/fS8DdT/DpwFDAvwPSVAYk88htx3plGY4YaMiouoxAw9iZAWf42s+Zqlt4guNdpW0mKhgmttXwrbFnmmbHYa5knyAPauP+hUjeiJ1A5BriD+c91LvNjpPZZ9+CCjfnGT9/xbfJeeydB//heXS1OrRUREAq3cRM8Y0xrY6S3EgrX2smq4/3ggC3gMeNAY4yuf2AoIBzYA483+66ustfaoaohF/BR/9wtEn7MY66qkMKrLRWjr1vvtKntErwpTNxd5e+Z1GwUh4RDXyvO6oqmbaq0gUitcN/R8prVI5fPg8xn5UzGt35/M41vTOfEfj9CvbYLT4YmIiNQrFY3orcMzRfNdAGPMj8B/rLVTA3j/pnhGCX0Jnq82f+nV+okBvJ8EkHG5CE3pcUjv9Y3otYwuK9FbVfab3G5Y6FnvR8/zPF8beRPIMnrpheXv8nyjqZsitcbgbkfQ75EpTLxzGCf8WMSI6TN4Le96vjz7Hu4b0Q2jwkkiIiIBUVExlkIgtNTrwUBSIG9urW1rrW3n7xbIGOqa2lKM5XCV9NArPaKX0A6My9MXryj/4Ddt+A32bYK41tBqgGdfVUb0NHVTpFaJjm3ONY/9yPwTQwC44o/55H5yJ6/8XM4feURERMRvFSV6q4GLjTE9jDFtvPsaG2NaV7TVQMwNmrV2vLW2X1xcnNOhHDJrbUkPvf1G9ILDPCN01g170g5+Y0kRljHg8v7oNvImehkb9+/RZW2pYiyaulnTTj75ZEaOHMnJJ5/sdChSSwVFJXLeIz+wfrBnYsllM1djnz2PSXPmOhyZiIhI/VBRovcInlG8ecBaPFMsn8YzpbOiTaRC+wr2kVWYRVRIFHFhBySs5a3TK8yDJZ95vj/inL/2h8V4mqEX5UH2zr/252UQ5M6DkCgIbxTwzyAVS01NpXfv3qSmpjoditRiJqYpwx/9nuzh4RQEw6AluYTedjFvf/uc06GJiIgcti+//DLGGNP3yy+/jHHi/uWu0bPWvmWMWQCcBDQDbgG+AZbVUGxST5UezTtoPU7jTrD6B0+LhdJWfgv5GdC8JzTtsv+xuFaQu8czfTO6qWffvs3eYy3ULF2kNottTr/H/2Db23ey4YWv6bgV9t79Io8u+5brr/+AqNBopyMUERE5JAMHDsz+4Ycflvfu3TvXiftX2DDdWrvQWvuktfZ2PF3S3rXW3lbRVjNhS11WZmsFn8YdPF8PHNFb6K222ePcg9/jK8hSusVChjfRi9X6PJFaLziUZpc9SbeJE9mVHESjbDjllbU8fffxpGdtdTo6ERFxUIsWLY64+eab/VqH8+yzzzY2xvStrpiqKiEhwT1kyJDshIQEd0Xn5ebmVsuoRIWJXmnWWpe19t3qCEKqrj4UYykZ0YspoxpmydTNUr30cnbDqu89hVpSzz74PWVV3tynRM9Je/bsYffu3ezZs8fpUKQOie7an/Yf/sbirq0JdsNZX+Tw+s3DWLt9odOhiYhIA+BLEL/55pvok046qUNkZGTvRo0a9br44otbZ2Vl7ZeMjRs3Lrlbt25dY2JiesXHx/ccMGBA56lTp0aVPqesqZv9+/dP6du3b8q7774b17Vr126hoaF9Hn300USABx54oGn79u27h4eH94mNje2VmpradeLEiY0O9fP40zBdagFr7XhgfEpKSqCb19cYX8XNMkf0mviappeaurnkU3AXQocTIabZwe8pq/Jm6ambUuNee+01srKyiI6O5pZbbnE6HKlDkpvE0vX5D/nhrps58Y/fOP3nIr656TwG/Otx+qac6nR4IiLSAFxxxRXtRowYsee6665bM3PmzKinnnqqeU5OjmvSpElpvnO2bNkScv31129v3bp1YVZWluudd95pPHz48JQZM2YsO+qooyqcqrlu3brw2267rfVtt922pWPHjgWJiYlF//vf/xLuv//+VjfddNOW448/PisnJ8e1YMGCiF27dh1yvqZET2pchSN6MckQHOEprJK7FyIalZq2eV7ZFyxdedNHUzdF6qzUFnG0Hf8yHz3wMP0+fYcT51nm3HoL629dzVnH3+h0eCIitdoRbx7h+JRFgEWXLvK7jLLb7aa4uLjM/YWFhSWvjTEEB/+VxhQVFWFLVV93uz0zJUu/ByAoKAiXq/IJjSeccELG+PHjNwGcddZZ+4wx9vHHH2+xcOHCrT169MgH+OCDD0rWDBUVFTF69OiMTp06pb700ktNjjrqqIP7fpWyd+/e4C+//HLpwIEDSxLCF154IbFz5845jz/+eMmahXPPPfewpvBVeepmdTHGBBljrjbGfGmMWeLdvvTuUyJaD8WFxdEkosn+rRV8XK6/1untXgO718HGmRASCV1OK+eCvhG9DX/t27fJe0yJnkhdFB0WzOUP3sPeux8gLwz6rQLzwP/4/JN7nQ5NRESqyddffx0TGhrat/S2ZcuW0GeeeaZ56X0DBw5MKf2+gQMHppQ+Pm7cuLYAB17r66+/rlL1y/POO2+/tSeXXHLJHrfbzYwZM0qmZk6ePDnmqKOO6tyoUaNeISEhfUNDQ/uuX78+bPXq1eGVXT85ObmgdJIHcOSRR2YvX7488tJLL201efLkmMzMzMPO0xxNpIwxCcB3QB+gAPBlv0OAU4C/GWOGWWt3ORSiVIPHjn+s4hMad4TtiyF99V/JW5fTIayc6nslxVi8vfSMgX1bPPs0oidSpx13wWi2denIxrEX0mWTm/VPf8wve7Zw3OXjwRXkdHgiIrXOoYyk1RbHHHNM9vTp0/er8D969OiOQ4YMybj22mtL+mjFxcXtN+z3yiuvpGVkZJT8n8LkyZMbPfXUU80PvNYRRxyRV5U4kpOT9xsKbNmyZSHA5s2bQwFmzJgROWbMmE7HHXfcvueeey6tRYsWhcHBwXbs2LFt8/PzK03QmjZtWnjgvuuvv35XXl6emThxYuLbb7/dNDg42B5//PEZzz333MaUlJSCqsR9IKdHzJ7Ek+TdAbxgrc0FMMZEAH8HHgaeAC5zKkBxQElBllWwZLLn+7KqbfpExENoNBRkQt5eT988Td0UqTea9elFzCffMO/c02izo4jtE35jyZ7hdL/+I8/0bhERqRfi4+PdgwYNyim9LyQkxDZv3rzwwP2l9ezZM7/06/nz50cAVPSeimzZsiUEKEkKN23aFALQokWLAoAPPvggPjg42H7zzTdrwsLCSuaM7tu3Lyg2NvbguacHMMYcVGvD5XJx2223pd92223pO3fuDJo8eXLsPffc02rMmDHtFy5cuPxQPkeVhwSNMa8ZY46q4Hh/Y8xrft5/JPCKtfZxX5IHYK3NtdY+BkzwniMNiS/RW/KpJ9mLSoT2g8s/35j9C7Lk7oGiXIqCIiE8ttrDFZHqF9W6Nb0++Y7NTUNJ2gMZH2xk43+P3b9Cr4iISAC8//778aVfT5w4Md7lcnHsscdmA+Tk5LhcLhcul6skYfv8889jtm7dGhqI+ycmJhZfffXVe0aMGLF71apVEYd6HX9G9C4DfgD+KOd4O+BS4Ao/rhkEzK/g+DygnAocDZMxZiwwNikpyelQqk9J5U1vL73U0RBUyY9qo1awc5lnqqe3QXp+WGPHh6xFJHCimyfTd9KX/Hre6XTYXMC2r4oIyzuNpvdM1XpcEREJmJ9++inummuuaTl8+PB9M2fOjHzyySeTzzzzzF2+QiynnnpqxmuvvdZ09OjR7a644or05cuXhz/xxBPNy5qSWVXnn39+m+jo6OKBAwdmN2vWrHDZsmXhkyZNanzsscfuO9RrBrIYSyyedXb++Bk4voLjg73niJe1dry1tl9cXJzToVSfhPb7v+5xTuXviStVedM7bTM/rEmAAxMRp8UntqL3u5+wpF0I0TmGrd/Cun8Oh2wt5RYRkcB47bXX1q1evTrswgsv7PDSSy81O++889LfeOONkiqbZ5999r4HH3xw45w5c6LPPffcTm+99VaT8ePHr2vTpk1+RdetyMCBA7MWLFgQdcstt7Q+44wzOj/xxBPNzzrrrN0ffPDBukO9ZoUDHsaYHkCvUruOK6cSZjxwHeDv/NEbgB+MMc8CT1pr07z3bQvcAvQFTvLzmlLXRSZAZGPI2QWNO0Fy78rfU7ogS1AIAHnhSvRE6qMWSR044o33+PW6Czh6SQHZ3xeyJPckuj07HaPp2iIi9crmzZsX+fueG2+8cdeNN954yH8BbNWqVeHUqVMrXBtw991377j77rt3lN43atSoFaVfn3766ZnW2v2K48yaNWu/c3xuuOGGXTfccENA/2pZ2cy2M4H7vN9b4BrvVpYs4Hw/7z8LCAWuB643xhR47xPmPZ4JzDZmv0b01lpbj+ctCuBZp5ezy1OEZf///mXz9dLbux5CPFVtNaInUn+lJHUn8rUv+PiW0QyfkQk/5zHnihPo+8YMXKGHvJxBRESk3qgs0XsDmAYY4Efgv8CUA86xeJK8pdbaKpUsLWWp9/0i+zv6ek81zX5VXPIZ5x3Ry9joqcCJEj0nnX/++bjd7io1JRU5VK3iWnPFC1N4/p5RnPHlNqL/zGHm2cdy1Ie/EBQR6XR4IiIijqow0bPWrgfWAxhjLgd+ttYe8jzRMq4/OFDXknqm2xmeraoalaq6GeaZuqVEzznJyclOhyANRFxYHLc+/C2PJIzh9PdWEb8qh1kjj6P/pJ8IitU0ThERqbrDnfJZ21T5z+3W2jcDmeSJBFRUUwgKhdzdkL4SUKIn0lCEBYVxz52T+eaaPuyJgkYbc/jz9BMo3LbN6dBEREQc408fvSeMMeUuSjTGrDbGPHIoQRhjOhtjbjXGvODdbjXGpBzKtaSBcrn+qryZtR3wtFcQkYbBZVzcdd3b/HBVLzY1hugdOSw+4xTyVq50OjQRERFH+LOA5lTgowqOfwic7s/NjTEuY8wzeNbqPQpc690eBZYaY54zpiqVOET4a/omQHgcxcEqyOCUNWvWsGLFCtasUTNrqTnGGO7927v8dnEXlreE8Iw8Vp4zmpy5cyt/s4iISD3jT6LXGlhdwfG13nP8cQ+eFgtfA0OAlt7tJO++64B7/bymNFRxpRK92JbOxSFMnjyZ999/n8mTJzsdijQwxhj+OXYSS85szR8phpC8QlZdcxWFW7c6HZqIiEiN8ifRKwAqqrCQDLj9vP9VwHfW2pHW2p+stVu824/W2hHAD95zxMsYM9YYMycjI8PpUGqfRqX+zhDXwrk4RMRRxuXiH9d8SfqQBOa3M4Rm5bHg8gtwFxQ4HZqIiEiN8SfRmwtcaIw5aD6cd98F3nP80QT4ooLjn3nPES9r7Xhrbb+4uDinQ6l9Sid6sar6KNKQBQWHcMNVX7PqpCh2xkJU2jZmXHOJ02GJiIjUGH8SvceBTsBPxphTjDEtvNspwE/eY0/4ef/FQLsKjrf3niNSOU3dFJFSwiJj+cf10/hqRBSFQZD4+wK+uv0Git1q3yoiIvWfP+0VvgX+D+gNfAls8G5fAn2Am621X/l5/38AVxtjzjrwgDFmNJ5pm3f+f3v3HR5Fub5x/PukEHrovYMEAQVEUcECeORYERULioJiQ7H37s+D7diPKIq9oYgFuxwFKQqCICggKiJFOtJDS3t+f+ziiTEkWUgyu5v7c11zJTvzzsy9L0nIk5l53wiPKWVV7sFYdOumiADJ5Stx6/Wf894/UgBo8snnjLr7OnZmZQecTEREYkGXLl3SunTpEpOzARQ4YXpe7v6EmY0BTgNahVf/Arzj7r8Xtr+ZvZXP6uXAaDNbCvwcXpdGaGCX+cDFwPhIckoZVaUBWCJ4dujWzY1BBxKRaFC1fHX63/UO45f35rC5OaR9+hHv1a7HmZddH3Q0ERGJcsOHD18SdIY9FVGhBxAu6B7Zw/P1LWBb0/CSW1tg3z08l5Q1iUlQvRmsXwjVm8PSRUEnEpEo0bJ6S5bd9xiLB11BszUJtH71Ob5ouA//6NMn6GgiIhLFOnfuvKOwNtu3b7cKFSpE3XMBkTyjt9fcPWEPlsTSzCgx7uRn4JRnoXrevxmISFl35D5Hs/Hey1laCypsTKD6PTfz0zffBh1LRESKUcOGDfc76aSTmj/88MO1mjRp0j4lJeWAtm3b7vvhhx9Wyd1u4sSJFY855pgWdevW3b98+fIHNGvWrP2QIUMapqen/2UO77y3bn700UdVzKzzyy+/XO3MM89sWr169Q516tTpAPDDDz+kHH300S1r1KjRISUl5YD69evvd+yxx7bIzMwsnTefR0RX9MysBjAI6AJU5++Forv7UcWUTSRyjQ8KLSIi+eh32KW8+SBk3PwErVbB1ssGsO6Vd6jZTjePiIjEi2nTplWZM2dOxdtvv315+fLl/eGHH67Xt2/ffaZPnz6vQ4cOOwEWLVpUbv/9998+YMCAdVWrVs2eM2dOhYceeqjB4sWLUz766KPfCjvH9ddf36RHjx6bnn322UXbt29PAOjdu/c+VapUyX7kkUeW1K5dO+v3338v9+mnn6ZmZ2dbcnJyqV/xK3KhZ2YtgK+AeoSefkoF1vO/gm8dsKX4I4qIiBSfMw+9lNFDs5h/13D2XeYs6d+XCq+PpmLbtkFHExHZa/Pb7Ns56AwA+/40P9Jp14rNunXrkiZPnvzTPvvskwFwwgknbG7WrNn+d955Z4MxY8YsAhg4cOBGwiM65OTk0KtXr/SqVatmDxkypPmqVasS69WrV+CoXR06dNg6atSoP5/fW7lyZdKSJUtSXnvttV/PPvvsPye8vuSSS9aXwFsskkiu6N0LVAKOJDRIyhrgDGAKcB2hK309ijugiMSeIUOG4O6YWeGNRQJw2mFX8MZ1G/n+8TfosCiH+eecwX7vf0q5RpqaRUQkCDk5OWRn/7W2Sk5OJjs7m5ycnD/XmRlJSQWXMB06dNi6q8gDqF69ek6PHj02fffdd5V2rVu/fn3CrbfeWv+jjz6qvmrVqnJZWVl//tIyb9688vXq1dta0DlOOumkjblf161bN6tRo0Y777jjjkarVq1K7tWr15b99ttvZ8HvumRFUuj1BJ5x98lmVjO8ztx9BzDUzNoSmmvvtOIOKSKxJSUlJegIIoXqd8wdvLZ+OXNfnkT7JVlMPasPh3w4npTUqkFHExHZY0FeSdsbn3zySZUTTzyxde517j7z+uuvb/Doo4/W37XuoIMOSp8+ffrPfz/C/9SuXftvD8XVqVMnc82aNeV2ve7Xr1/zKVOmVLnhhhtWHHDAAduqVKmSM2XKlEo333xzk123YhakYcOGfzlHQkICY8eO/eXWW29tMHTo0IbXXXddUsOGDTMuv/zyVTfeeOPawo5XEiIp9Kryv+kPdlXIlXNtn0joqp+IiEhM6N/vad7ecDzLX19EwzVb+eisEzn6rbFUrVQ+6GgiImVKt27dtk6cOHF+3vVXXHHF2j59+mzc9To1NbXQiVDXrl2bnHfdmjVrkuvUqZMBsG3bNhs3bly1a665ZsXtt9++ZlebWbNmVShqXjP72zN3bdu2zXjvvfcW5+Tk8M0331R4/PHH69x0001NmjdvvvP000/fXNRjF5dICr1VQF0Ad99iZumEpj54P7y9LqARMkuYmV0EXFS3bt2go4iIxD4z+l70Lh9vOZwtb6bTduEaXrzgNPoOf4uG1Yr8/72IiOyl6tWr5xxxxBHb8q5v1qxZZrNmzSIatvL777+v9Ouvvya3atUqE2DDhg0JX375ZWqPHj02AWzfvj0hOzubvAOkvPbaa7X25j3skpCQQNeuXbe3adPm97feeqvWnDlzKkR7oTcd6Jbr9RfAtWa2nNBgLFcAM4p6MDOrADwJfOruoyPIUaa5+whgRFpaWtTN1SGyy/Tp08nMzCQ5OZkuXboEHUekYMnlOf6yD/lsYw8qfAC9Zv3KwzdcyJB7nqZF7cqF7y8iIlGlZs2aWb169Wp98803r9g16ub27dsT7r777hXh7dkdOnTYOnz48Lr169fPrF27dtaLL75Yc/Xq1X+7ElhU06ZNq3DFFVc0PvXUUze0bt16R3Z2tr300ks1ExMTvVevXoEMWBlJofcccK6ZlQ8/l3cjMAl4Obx9LaFBWYrE3beb2RnA1xFkEJEYMHnyZNLT06lcubIKPYkNVepxzOWv8/mGvjSamMJ5X83kvif+xfA77yMpsVSnnBURkb108MEHbznyyCO33H333Q1Xr15drmXLljvefvvtBfvvv/+fg6OMGjXqtwsvvLDpjTfe2CQlJSXnhBNO2DBw4MDf+/Xr12pPztmoUaPMhg0bZjz11FN1V69enVyuXDlv3br19lGjRv16+OGH/+1KZWkocqHn7v8F/pvr9QIz24fQIC3ZwNfuvjHC888GWhfWSEREpMQ1PICjL3+Q8etvoP6ccpw99gMe6NyVW086KehkIiISoWuuueaPa6655o/dbU9LS8uYNGnSgrzrzzzzzL8MZpN34JcTTjhhi7v/bcCbhg0bZr377ruL9yJysSvynynN7Agzq517nbunu/sH7v4xkGRmR0R4/tuAC8yse4T7iYiIFL/2p9L9gnPZUCuHupsg9fU7mL50SeH7iYiIRJlI7kf5Eji6gO1HhdtEYjCh+fjGmdmPZvahmb2VZxkV4TFFRET2WEKvO9jvrAPJSnSO+iGD554ewLaMQKdCEhERiVgkz+gVNvNxMpBTSJu8+ub6vE14yUuDjoiISOkxo8rFL1FlTg+2f7mOAf9dzZDWV/DCwGeCTiYiIoVYvnz5nKAzRItInzDPt+gys1TgGGB1RAdzTyjCoikbRESkdCUm0/Thj8mqn0iNdOj6/mT+M/XZoFOJiIgUWYGFnpndaWbZZpZNqMh7bdfr3AuwHugH6DZLERGJC1YxlbThr5KdBN3mO/PHPMac1fOCjiUiIrJLgXdcFnbr5mzglfBBzgUmA7/laeNAOjANeGOPEpq1IPSMXx3gdXdfbGbJQG1grbtHNEmiiIhIcSjXphO1L7+E9Y8+zQWf5fBY1YE8deNXpCSlBB1NRGRLVlaWHiAuwzIzM3cAu52IvcBCz93fB94HMLOmwFB3H1ecAc3sMWAIoauLDkwFFgMVgJ+B24HHivOcIiIiRVXnoivYMGUKFaf9wOC30rm//Lncea1uYBGRwC3dsGHDyqpVq9ZLSEjQmBZlTE5Ojm3cuHEl8Pvu2kQyj16PYkmVi5ldBVwBjAA+Bd7Ldb7NZjYG6IMKPZGYUrly5b98FIllZkbac68x4/SeVJ7/B8e98gPvNHicU/tdGXQ0ESnDOnfunDF79uzzlyxZ8kK1atXqJycnl0eDGJYFlpmZuWPjxo0rN23adH7nzp0zdtcwklE3Q0c2qwg0A2qSz32h7j4pgsNdBLzr7peYWc18ts8hdEuniMSQiy++OOgIIsXKkpM58M3PGXfKQTRcmEWTB57mp0YH0Obww4OOJiJlWMeOHVfPnDnz5PXr1zcGqgadR0rNZuD3goo8iKDQM7MqwCOEntXLbz8j9FeESEbJbAE8XsD29UCNCI4nIiJSIiylPN1e/ojPzj2GNr/B1ssvZtPI90htmxZ0NBEpw8K/7C8MOodEn0iu6D1BqMj7iNDE6OuL4fxbgGoFbG8NrC2G84iIiOy1CrWass/dtzPrzn/RaaEzf9BZHPT5RBJ1m7KIiESZSAq93sCr7j6gGM8/ARhgZg/n3WBm9YBBhAeDERERiQbtDzyLqeeMZcmT02m6dhvfXHguXUe+g1mBo1yLiIiUqkgmTE8Cvi7m898JNAa+AQaG1/U0s9uB7wndBnpPMZ+z2JnZLWb2s5nlmFmfPNsamNnsXMsCM8syM92SKnHrs88+4/333+ezzz4LOopIibjg9Bf56qRqbCsHNWbNZ97jDwUdSURE5C8iKfQmA/sX58nd/UfgaCAFeDC8+hbg/4B1QC93j4V7jscBxwF/G4jG3Ve4e8ddC/A88Im7F8etryJRad68ecyePZt58zS5tMQnS0jg1ks/5v1e4cfSR7zA+m+nBRtKREQkl0gKvSuBk8zs7OIM4O7fuPt+QAfgDKAfcCDQzt2nR3o8M2tkZk+Y2VQz22ZmbmbNdtO2sZm9bWabzGyzmb1rZk324D1Mi6AgPZ9QsSciIjGsQsUaDLjiFb48ABJz4NfLBpG9cWPQsURERIDICr2RQBbwipn9YWYzzWx6nmWP/5zp7nPcfbS7j3L379x9T+cBaQWcDmwgdBUyX+FpIsYDbYABwDnAPsCXZlZpD89dIDM7AqgCfFwSxxcRkdLVsskBNL7gahbWhyqbs5ky4Hg8JyfoWCIiIhEVenXCH5cSGi2zBlA7z1In/10LZmatzew6M3syvFxnZns6XvUkd6/r7scBowtodyGh6R36uPsYd3+f0IAzTYE/JwEzs+/ChW1+S+MIsw0CXnb3rAj3ExGRKHViz4tY0PdQ0stDrZ/XM+tfVwUdSUREpOijbrp7s+I+uZklAI8Cl/H3ovMBM3sKuCKSq3vuXtQ/pfYGvnH3X3Ptu8jMvgZOIjRnIO5+QFHPXRAzqwqcCnTai2PM3PV569atiyOWiIgUgysHP8+/l3TlpA82Uu7Nz1l/9ERqdD0y6FgiIlKGRTK9Qkm4Dbic0Nx8jwI/h9e3Aa4GLiU0j97dJXDuduQ/dcM84LQSOF8/YKa7LyiOg2VmZjJhwoTiOFTcWrx4sfqoCEqinzIyMv78GA//BvpaKpz6CFofeTPjV9xEzxnOwisuZc5dD+C55tdTHxVOfVQ49ZGIFFXEhZ6Z1QWOBZoDDiwGPnX31Xtw/guAse7eO8/6FcB4MxsbblMShV4NQs/x5bUeqB7JgczsNuASQrevtjezYcCB7r4qV7NBhCad32Pu3nnX52lpad69e/e9OVzcmzBhAuqjwpVEP82cOZOMjAzKlSsXF/8G+loqnPoo5L0Kf/DLygdpvTyH7Bf+w8Fvf4YlhG5YUR8VTn1UOPWRiBRVJM/oYWY3AEsIjRp5O3AH8AKwJLwtUrWADwvY/n64TUnJ75bQiGe8dfeh7t7I3VPcvVb481V52nRx91f3OKmIiES9k3uez9zerdhSHlJ//J0Fjz5Y+E4iIiIloMiFnpn1B+4H5gD9CT1r1gk4G/gBuG8Ppl6YS+jK4O60CLcpCRsIXdXLqzr5X+kTEREp1HWD32LMMaEbZjKfe4n0b2cEnEhERMqiSK7oXQVMB7q6+xvu/n14eQPoBswg9FxdJG4GLjSzU/JuMLO+hG7bvCnCYxbVPELP6eXVFvixhM6518zsIjObsWnTpqCjiOxWkyZNaN68OU2aRDwtpUjMSylXgTMGPsbHXSDB4echF5K9ZUvQsUREpIyJ5Bm9tsBN7p6Zd4O7Z5rZSOC+CM9/MbAcGG1mS/nfYCxpQBNgPnCJmV3y19P5GRGeJz8fAA+ZWQt3/w0gPLF6N0quuNxr7j4CGJGWlran8wyKlLjTTiuJ8YxEYkeHNkcx5fjDWbh0Mi1X7eCbKy+Ecy8pfEcREZFiEkmhtxNILWB7arhNJPrm+rxpeMmtbXjJrdACJ3w1EGDX4CXHmtlaYK27TwyvexYYArwfHkzFgX8BvwPPFPkdiIiI5OOS057m3kWH0ui1zdSY8j0/tfwcNIiGiIiUkkhu3ZwKDDGzlnk3mFlTYDDwdSQnd/eEPVgSi3Do0eFl159Pnwq//r9c594K9AR+AV4FXgcWAT3dPT2S9yEiIpKXJSRw3WUf8vVhodeN3n6XlYvnBxtKRETKjEgKvTuAKsBcM3vFzG42s5vM7BVCz7RVBe4siZCRcnfbzdI9T7ul7n6qu1d19yru3sfdFweTumj0jJ6ISOxIqVKHPpc/ya9NncrbYcpl/diasTXoWCIiUgYUudBz9xnAUYRG2OwP3APcG/58DnCUu88siZDyP+4+wt0PTE0t6C5akWCNHDmS5557jpEjRwYdRSRwDdv1pPHZfdhW3mm7cCfDbzuZrJysoGOJiEici2gePXef6u4HA/WAQ8NLPXc/xN2/KYmAIhJ7Vq5cyfLly1m5cmXQUUSiQsdz7mN719Af6P7xye+88snDAScSEZF4F1Ght4u7r3H3aeFlTXGHEhERiStm5PS5lS0tnZQsqPHYyyzftCzoVCIiEscimTC9t5kNK2D7MDM7vnhiye7oGT0RkdiUU64qzf/vP2yr6KQtc96540zcNVOOiIiUjEiu6F1DaMCV3akMXLt3caQwekZPRCR21T6wF9tO7gHAEePW8ckH9wecSERE4lUkhV57oKDBVmaG24iIiMhuHHbbUyxJq0hKFuT85xW2bFgadCQREYlDkRR6lYCcQtpUiTSAmVUzs7vM7GszW2Bmh4bX1zSzW8ysdaTHFBERiVZmRttHRrO5IrRaDh/c0gdysoOOJSIicSaSQm8h0L2A7d2BJZGc3MzqA98BtwONgBZABQB3XwecR2gidhERkbjRpGULlvY/C4B2k7cz86VLAk4kIiLxJpJCbxTQx8yuM7M/9zOzBDO7BugDvBnh+e8FagLdgM6A5dk+BvhHhMeMaxqMRUQkPvS9+jZm71uDlCxY+/pkdi6fHXQkERGJI5EUev8Gvgl/XGZmn5vZ58DvwEPAdCDSp8qPA54Iz8GX39Bji4DGER4zrmkwFhGR+GBmHPjwa2ysBE2XG5/cfR5oFE4RESkmRS703H0n0AO4FVhL6Cpct/DntwDd3X1HhOdPBRYXsD0JSI7wmCISsI4dO3LQQQfRsWPHoKOIRLV9WjRnQf8zAGjx9Q5mj7oz4EQiIhIvkiJp7O4ZwH3hpTgsAdoVsP1wYEExnUtESslRRx0VdASRmHHOVXfy5lef0nHeZn5/ZjSZx11GctW6QccSEZEYF8mtmyVhFHCBmR2ca50DmNkFwCnA60EEExERKQ1mRpeH32BDZWi8Ej644ZSgI4mISBwIutC7D/gBmAyMJVTkPWBmC4FngK+AxwJLF4U0GIuISPxp2awFS84+AYBWk9cz88NnAk4kIiKxLtBCz923E5qW4VZCI27uAPYDtobXHe3umYEFjEIajEVEJD71u/pBfmhfgXLZsOGhx8nYvi3oSCIiEsMiekavJIQLuQfDi4jEgWHDhpGenk7lypUZMmRI0HFEYsbB/36V5Wf0peFq58PLT+TU58YFHUlERGJU0Lduikgc2rlz55+LiBRdsxbtWHHWkeQAbb5ewaSX/hN0JBERiVGlekXPzM7dk/3c/ZXiziIiIhKN+l41nBdnd+bQadtJGTacZUecRKMWTYOOJSIiMaa0b918idCAKxbBPg6o0BMRkTLBzPjng28y64yTaLkSfrqsL7U/+IaU5MSgo4mISAwp7UKvRymfT0REJOY0rNOaiUP6sO3/xtBwUTqf3XglJz0yLOhYIiISQ0q10HP3iaV5PhERkVh15in3MPTb8Zw2ZjMtxn7BuPfGctTJ/ww6loiIxAgNxhJjNI+eiEjZkGAJnHX9K0zcz0jKNqo+cB1zF64KOpaIiMSIQKdXMLM7itDM3f1fJR4mRrj7CGBEWlqaB51FRERKVquaaXw+uC8rbhtNg/VZLLqqP7Vf/4i6VcsHHU1ERKJc0PPo3VXAtl2DtjigQk9ERMqk84+8latPn8Alz66l+YLlvHPLLZz3yINUKKfBWUREZPeCvnWzeT5LK+CfwKfANGDfwNKJiIgELCUxhZvOf41RRyUDcNikT3j4yTfIydGNHSIisnuBXtFz9yW72fQb8LmZfQlcANxQeqlEZG/17NmTzMxMkpOTg44iEheaVG3C8dcNY/riS+jyC/R6514ea9mWa3ofEHQ0ERGJUkFf0SvMO0D/oEOISGQ6depEly5d6NSpU9BRROJG1yZHkHj9haxJhSp/OG1eGMyH368IOpaIiESpaC/0UoHqQYcQERGJBmcfdhUzz+9EVgI0/2kzS56+kS07MoOOJSIiUSgqCz0zSzWzk4FrgdkBxxEREYkKZsalg17gy6NSAThk2re8/tZHAacSEZFoFGihZ2Y5ZpaddwHWE7pt04FrgswoIpFLT09ny5YtpKenBx1FJO6UTyrPafe8w09NjErbjNYjb2fByg1BxxIRkSgT9PQKrxAq5nJzYAOwABjp7ptLPVUUM7OLgIvq1q0bdBSR3XrmmWdIT0+ncuXKXHvttUHHEYk79as2pPwtl7P9iv9Qf3E2Pwy9gFbD3sbMgo4mIiJRIuhRNwcGef5YpAnTRUQE4JgjL+Gp3u/R8+3faTF5Hl9/+B6H9T4l6FgiIhIlovIZPRERESmYmdHv1jeZ3SqB8hmGP34b27dtDTqWiIhEiaCf0TvPzN4tYPs7ZnZuaWYSERGJFTUq1KD+nXewqSLUWu5Mu+60oCOJiEiUCPqK3sXA2gK2rwYGl1IWERGRmNP9oDOYekoaADUm/saPLz0ecCIREYkGQRd6acD3BWyfE24jIiIiu3HRja8zsVMyydlG5sPD+e2Lz4OOJCIiAQu60CsHVChge0WgfCllERERiUmVkitx6MMvM2Vfo1ymseXaK1nx3Q9BxxIRkQAFXej9BBxXwPZjCU2zICIiIgXo0KATFW++nBmtjHI7ndWD+rPhl4VBxxIRkYAEXei9BPQwsyfMrOqulWZWxcweB3oALwYVTkREJJb06zKYnwZ1YU5To/z2TH49ux9bf18WdCwREQlA0IXeMOAd4DLgDzNbYGYLgD+Ay4ExgJ4qFxERKaI7ej/DmDOq80sDqLxlC9/3P4+c7duDjiUiIqUs6AnTHTjNzPoB/YBW4U2fAm+6+5uBhRORPda3b1+ys7NJTEwMOopImZOSmMK/T32d8zJO5OaXs6i/ehnfDrmGLs89hZkFHU9EREpJ0Ff0AHD3N9y9t7u3DS99VOSJxK6mTZvSokULmjZtGnQUkTKpUWoz7uh5N4+cnMDOJKj69QR+HT486FgiIlKKoqLQAzCzfcysm5mlBp1FREQk1h3e+mQuO+4mRhwbuoq3c9gTbPzqi4BTiYhIaQm80DOzU8xsEaEROCcBncPra5vZfDM7JdCAUcbMLjKzGZs2bQo6ioiIRLlj25/DMefczmedjcQcWHjV5WQumhN0LBERKQWBFnpmdgwwGtgCPAT8+fCAu68FFgP9AwkXpdx9hLsfmJqqC58SvZYsWcJvv/3GkiVLgo4iUub13r8f1QffxC8NoWI6TLnwdHLWLQ06loiIlLCgr+jdBnwHdAIezGf71PA2EYkhb7/9Nq+++ipvv/120FFEBBh42LnMHzSYTRWhzjL45sIT8J3bgo4lIiIlKOhCrxPwqrtnA57P9uVAvdKNJCIiEn9u7Hc5b518LDuSofqPmSy6+Dg8JyfoWCIiUkKCLvSyC9neANhaGkFERETimZlxy1X3MuzEBmQmws5vVrP2hgFBxxIRkRISdKH3HXBcfhvMLJnQ3HrTSjWRiIhInKpVpTynnD2Mx3snk22w7qMZrPv3LUHHEhGREhB0ofcQ0MvMngD2Ca+rYWaHA58BrcNtREREpBgc325fErpcyFMnhH4FWPPCe2x4SXPsiYjEm0ALPXf/CLgSuBD4Orx6FDABOAy4wt2/DCadiIhIfBp24mBmtEnj2X+Gfg1Y/eB/2Dl3ZsCpRESkOAV9RQ93fwJoAVwNDAdGANcD+7j7U0FmExERiUcpSUk80es+Pu+YwrgOhmfDiiHn4ds0R6uISLxICjoAgLuvAP4TdA4REZGy4uDGaRzXaCCv9BxBx9+yqbkqk3XX9qbWsC8gMTnoeCIispcCv6InIiIiwbj3qMuoV+Mgnjo+9OvA2omr2f7MANC0CyIiMa9Ur+iZ2fg92M3d/ahiDyMiIlLGJSYk8mafJznNBvJ5p7kcPQuWvzadxo1uIuXEB8As6IgiIrKHSvvWzRbkPzG6iMSRiy++GHfH9EuiSNSrmFyR108Ywbk7zqLDb4uosz6Z9a+OpHKtNlTpen7Q8UREZA+VaqHn7s1K83wiEozKlSsHHUFEIlCtfDVGnPwCd/7el6teXse6eZVJfP0OktP+QfmaTYKOJyIie0DP6ImIiAj1KtXjxsGvMK5LeRJzjNUTKjP/ljNxPa8nIhKToqLQM7O6ZjbQzP7PzO4Kf1436FxFZWa3mNnPZpZjZn3y2X6cmc00s1lmNsfMzg0gpoiISIFaVGtB9wdf4osuKSRlG+UnbmDi4DNw11MXIiKxJvBCz8xuAJYAzwO3A3cALwBLwttiwTjgOGBS3g1mlgCMBM5x907ACcCzZqZ72yRuzZo1i+nTpzNr1qygo4hIhNrX7cDJT37EO/+sTI5B3Ylz+WpgP3IyMoKOJiIiEQi00DOz/sD9wBygP9ApvJwN/ADcZ2ZnR3jMRmb2hJlNNbNtZuZm1mw3bRub2dtmtsnMNpvZu2YW8cMI7j7N3RfuLlL4Y43wx2rAOkD/Y0rcGj9+PJ9++injx+/JQLsiErRGVRpxxb+/YHSfFHYkQ61p3zP9jD7k7NgRdDQRESmioK/oXQVMB7q6+xvu/n14eQPoBswAro7wmK2A04ENwOTdNTKzisB4oA0wADgH2Af40swqRfpGdsfds4HTgDFmtiSc6Vx3V6EnIiJRq2pKKrfcMIYvT8lifWVInb+I//bvQ7au7ImIxISgC722wEh3z8y7IbxuZLhNJCa5e113Pw4YXUC7CwlN99DH3ce4+/tAb6ApcPGuRmb2nZn9sZulcWFhzCwJuBU4xd2bAkcBr5hZrQjfl4iISKkqV70ZQ46/hWXHbiW9PDSdu4QX+/+T39ZsCjqaiIgUIuhCbyeQWsD21HCbInP3og4P1hv4xt1/zbXvIuBr4KRc6w5w91q7WX4vwnk6Ag3cfVL4eN8CywndoioiIhLVyh00kNMPOZ2tR20mIwm6/bCK9677B4+Pm0NWtkbkFBGJVkEXelOBIWbWMu8GM2sKDCZUeJWEdsDcfNbPI/KriAX5HWhgZm0BzKwVodtLf470QOGRO2ea2cxizCciIrJ7ZtjxD9H9qvfIOa4S2QbHTU9nyzv9uPmVkWTnaEROEZFoVKoTpufjDkIjVc41s9HAfMAJFVqnhtvcWULnrkHoOb681gPVIzmQmd0GXALUBtqb2TDgQHdf5e6rzexCYJSZ5RAqri9z96V7Ez4zM5MJEybszSHi3uLFi9VHRVAS/ZQRfoYnIyMjLv4N9LVUOPVR4eKij459gAo5T1H1ox85eXw2H+68l+s3zePEDr0ws8L3L0Rc9FEJUx+JSFEFWui5+wwzOwp4jNCom7lNB65y95K8epXfnyEj/p/K3YcCQwvY/gbwRqTHzec4nXd9npaW5t27d9/bQ8a1CRMmoD4qXEn008yZM8nIyKBcuXJx8W+gr6XCqY8KFzd91OMoljW6ny1Pv8xJXzt/zBnDz6t+ZdC9o7CEvbtRKG76qASpj0SkqIK+dRN3n+ruBwP1gEPDSz13P8TdvynBU2/gf1Me5Fad/K/0iYiICNDoqpto+NzTbKiTTK3N0G3MXCb+ozObvp0WdDQREQkLvNDbxd3XhOejm+bua0rhlPMIPaeXV1vgx1I4/x4xs4vMbMamTRrxTEREglP1sCM5dPx3zD+hGRsqQ90VO1h2zkBmPXIHOTkapEVEJGhRUeiZWaKZNTWzTmZ2QN6lhE77AXCImbXIlaMZofn7Piihc+41dx/h7gemphY0WKlIsFJSUv5cRCR+WVISpzz0KQsu683nXUK/VJQfMZq3zurK1EUTcddALSIiQQn0Gb3wxOQPAOcB5QtomhjhcfuGP931TNuxZrYWWOvuE8PrngWGAO+HB1Nx4F+ERsl8JpLzichfDRkyJOgIIlKKzh14PyOrtmVGtXtp/2UCHWZv4tfzL2HggLZce/yd7F97/6AjioiUOUGPuvk0cDYwgdA0ChuL6bh5J0p/KvxxItAdwN23mllP4FHgVUKDsIwjNABMejHlEBERiXtmxtmnDmDtkceyqMYg+PgXWq1M4KJhP/Kv5Wdz/Ek3MKBd/2IZmVNERIom6ELvJOBVdx9QnAd19yL9TxKe4uDUQhtGETO7CLiobt26QUcRERH5i9q16lD7rg/Z2OM1/rjjbmqsTuSuV7P4+Of7uObMb7nn6PupmFwx6JgiImVC0M/oZQBTAs4QU/SMnoiIRLtqR/an5fsTqHxoXcycE6c7Jwz9nGsf7cXCjQuDjiciUiYEXeh9AhwecAYRKWbjxo3jk08+Ydy4cUFHEZGAWLUGNH5xArXuvZrsVGiwHoa8sI4PhvTm+f/+H9sytwUdUUQkrgVd6F0FtDWz+82smenmfZG4MHv2bL799ltmz54ddBQRCVidky+m5X+n8EfnZmDOMTNy6HLVm7x9xsF88s79ZGVnBR1RRCQuBVrouft64DXgemAhkGVm2XkW/Q+Qi+bRExGRWFMhtTqHvfYJP111M2ubppDgzkE/ZtH81peZ0LMTM156GM/ODjqmiEhcCbTQM7M7gAeBDcB44N18lvcCCxiF9IyeiIjEIjPj1IsH0P7d6Sy49kqWdkhgSwVouDqLSvc/x9SjDsG+/05z74mIFJOgR90cTGhqhePcfWfAWURERKSE1ahUjj4XDGZb//OZ9s6jLJr8AvW/S6T2qnQY/izzJk+kxS13UfGATkFHFRGJaUE/o1cZeEtFnoiISNlSsXwKPc6+ieMfmMra/vvycfccNleAxLm/sOSss/itf3+2jBunWzpFRPZQ0Ff0vgFaB5xBREREAlI5tSanXvEus776gE9a3ojPN46d4TBjJstmzCSpQV1qnjeIaqecQkKlSkHHFRGJGUFf0bsKON3M+gacI2ZoMBYREYlHnQ7rzUEdHqPhIV147ALnpaMSWJMKWStWs/qee/nliG5sfPM1PCcn6KgiIjEh6ELvRSALGGVma81spplNz7NMCzhjVNFgLCIiEq/KlUvhvIte4qGzplDziN4MHVSBh09O4JcG4Ft3svKue5j1j0P59cuJQUcVEYl6QRd6dcIflwLpQA2gdp6lTv67ioiISDyqW6sGN5xxH58Oms5+vW/m3+dW54kTE9hQCSqs2MzOwZcw+tRD+WLS10FHFRGJWoE+o+fuzYI8v4iUjPr167Nt2zYqVqwYdBQRiWFJiUlceWh/hhzcjynLpzGqw1M0+e939JrhtJ+3kczBFzB2/zoc8a+nqdBq36DjiohElaAHYxGROHTWWWcFHUFE4khiQiKHN+7K4f27suOM7Ux8/x7SX3qPtr/m0GTWGn478WQSOzel+S3/JqVth6DjiohEhaBv3RQREREpsvLJFfhn36H0ef97vrr+KL5uZziGz1jKr33PYM1jd2vSdRERVOjFHI26KSIiAomJSVw4aBjdnhrLA4NaMn5/IyHHWPf0G8zofwxZW7YEHVFEJFAq9GKMRt0UERH5n33rNubFqz/gt7Pu4LkTyrGtHFSeuZSp/zyUH6d9EnQ8EZHAqNATkWI3evRoXnnlFUaPHh10FBEpA5ISE3i4z5mcPeQL3u/bkmW1odb6bDIGXcvz95zF6q2rg44oIlLqVOiJSLFbunQpixYtYunSpUFHEZEy5IBmtbn1lvfZPGggS9OySMmCrq/OYvzpPXnqs/vYnrU96IgiIqVGhZ6IiIjEjZSkRE4deCNpdzxF1mFZ7EhxOi7M4aCbXuH2mw7ltZmvarAWESkTSr3QM7M6ZvaTmd1fSLv7zexHM6tZWtlEREQkPjTrfDT7PT2X1NsvZFtjo/IOGPTRTrL/7z4Gv3g62zK3BR1RRKREBXFF71KgHlBgoQc8ANQHBpd4IhEREYk/SSm06HstB4ydS/IFp5KVDF1+cfo9M5eLHz6aBeuWBJ1QRKTEBFHoHQ+87e4bC2rk7huAt4DepREqVmh6BRERkchYQgKtrhtKm08+J7NeRepsguteW89TQ3vz9rwvgo4nIlIigij00oAZRWw7K9xewjS9goiIyJ5JbtyI9p99TVKXJpTLgos+zWDFv67gpk8e0XN7IhJ3gij0ygGZRWybGW4vIiIistcSypdnn1fGUqt/N7ITnaNnO10ffJaBzw1hZ2ZRfz0REYl+QRR6qyn6Vbo0YE0JZhEREZEyqPZtz9HyjrPJqpzNPivhghHjGfjQ6SzXoxEiEieCKPS+As4yswoFNTKzisBZwORSSSUiIiJlSoUzbmff/9xJdp1sam2BG0b+xN33nMjk3zRIi4jEviAKvWFAA+BtM6uWX4Pw+ncIjbr5ZKklE5Fi0a5dOzp27Ei7du2CjiIiUqCkrmfT9vW3IM0onwlXfbCWcfeewruz5wQdTURkrySV9gndfaqZ3QPcCiwys3eBH4DNQFWgI9AHSAXud/eppZ1RRPbOMcccE3QEEZEiS2jckTYjJ7P42t5sm7COM77axlsMoP5t73Bos+ZBxxMR2SNBXNHD3W8HLgZ2AucBjwLPhT8OCK8f7O63BJFPREREyharVJPmT06kZv+OAPT9ejvDXzibBX+sCjaYiMgeKvUreru4+7Nm9jLQFWhP6GreFmAe8JW7ZwSVTURERMqgxCTq3foGO5b0YPvk1Vz60QYuq3kOIy8YTa1K1YJOJyISkUCu6O3i7hnuPsHdh7n7ve7+hLuPV5G3e5owXUREpASZ0fTx90lslEjqNhj8wTJOGXUe6Tu3Bp1MRCQigRZ6eZlZVTN7wczaBJ0lWmnCdIkFzzzzDA8//DDPPPNM0FFERCJmFVNp8cyrWKUc2iyH3l/+zKnvXkyO5wQdTUSkyKKq0AMqEHpGr0HQQURkz6Wnp/+5iIjEoqSWnWhy13VgzvHfOk2/n8XNX2ggcBGJHdFW6AFY0AFEREREKp54IXX7HQbAJZ/k8MPsZxn7y+xgQ4mIFFE0FnoedAARERERgOq3jaBqh7pUyIDr3svkgf9eyZotel5PRKJfNBZ6uqInIiIiUcESEqj//Eck1a1Ig/UwcOxarhh5GZnZel5PRKJbVBV67r7a3RPcfXzQWUREREQAEipXpunL75BTPpGDFjhtZk7n4Tf+HXQsEZECRVWhJyIiIhKNyjVrRtNHh+HAaZOctT++zGefjQ46lojIbpXqhOlmdgehZ/Ducfec8OvCuLv/q4SjiYiIiBSoco/u1Lj0EjY89TRnfgIfVLqD6s26cHCbpkFHExH5m1It9IC7CBV6DwAZ4deFcUCFnoiIiASu7pDL2TD7WypPmclBX8DL5QdSr/ZHNK1ZKehoIiJ/Udq3bjYHWrh7Rq7XhS0tSjmjiIiISL4sIYHWjz5Jds0q7LMSGsxbzfWv38im7ZlBRxMR+YtSvaLn7ksKei0i8eHwww8nMzOT5OTkoKOIiBS7xNRUmj86jCUDBnDyVGdO8y857/XneOu8i0lK1PAHIhId9NNIRIpdly5d6NatG126dAk6iohIiajUpQu1LhhEgsNlH+awOnM414z5OOhYIiJ/Ku1n9CjiACy5aTCWXMzsIuCiunXrBh1FRESkTKt9xZVsnTSe2j8v4rz/ZjHs+Lv54qf9+EebZkFHExEp/UKP/AdgcXY/UboGY8nF3UcAI9LS0jzoLCIiImWZJSfT8D/D+e3E4+k2P5tZLbdxd84THJX2EGa7+7VGRKR0BHHr5n55liMIFXkX5LNtP2D/ADKKyF7YuXMnO3bsYOfOnUFHEREpUeWaNqXebbcBMOi/OVTZ9Dmjvvsx4FQiIgFc0XP3eblfm1nN8KeL824Tkdg0bNgw0tPTqVy5Mtdee23QcURESlTqaWew9fMPYPIsrnsnk3sSH+TUji+QrIFZRCRA+gkkIiIishfMjPqPPwd1k6m3ES7+bDrPTJwSdCwRKeNU6ImIiIjspYSKFWn1+ENsr+i0X+rY8JvYujMr6FgiUoap0BMREREpBskde1Hz7H3JSISec9bx7t13Bx1JRMowFXoiIiIixaTp+Y/xQ49MADq9O5oV4ycFnEhEyqog5tG7NM+qSoSmUDjJzNrkt4+7P1XiwURERET2VvWmHHf8KTyf/iEnfgMrr7+G2p9+QnKdOkEnE5EyJoh59IbtZv3lu1nvgAo9ERERiQk1u99ChV/H8MOqJPZfvJU5l15Fp1GvYomJQUcTkTIkiEKvRwDnFBERESkdFapx3gFDODtnOE1ehmpzZ7Ho8Sdpcc0VQScTkTIkiHn0Jpb2OUVERERKU+VDLuX8FZN5oveP3PpmDjueHc6Wrl2ocsghQUcTkTJCg7GIiIiIFLfEJE489U1oU5/3DzXMYdllF5K1ennQyUSkjAji1k0RiXN9+vQhKyuLpCT9iBGRsishIZHbjv4PZ20/g7ZLs0hbnsXv5x5Ls/e/wspXDTqeiMQ5XdETkWLXsmVL0tLSaNmyZdBRREQC1bZmW05u05fHTkpkewrsWJLJH4/fFnQsESkDVOgVAzO7xcx+NrMcM+uTz/ZjzGyGmf1gZt+YWYcAYoqIiEgAruh0BZm1U3n+6NCvXetGfUHO1q0BpxKReKdCr3iMA44D/jYrqplVB14HznH3/YFrwq9FRESkDKhevjpDOg1hcntjcT3wbc6Kh+4JOpaIxLm4K/TMrJGZPWFmU81sm5m5mTXbTdvGZva2mW0ys81m9q6ZNYn0nO4+zd0X7mZzS2Cdu88Pt50CNDGzAyI9j0isWLFiBcuWLWPFihVBRxERiQqntT6N1jXa8Gyv0Fx6m996n4xlywJOJSLxLO4KPaAVcDqwAZi8u0ZmVhEYD7QBBgDnAPsAX5pZpWLMswCoYWbdwuftDVQBmhXjOUSiyhtvvMHzzz/PG2+8EXQUEZGokJSQxJ2H3smvDROY3M6w7ByW33t/0LFEJI7FY6E3yd3ruvtxwOgC2l0ItAD6uPsYd38f6A00BS7e1cjMvjOzP3azNC4sjLtvAk4B7jGzmcBRwI9A5p6/RREREYk1+9Xej3PbnsNrPRLISHJ2jB/H1mnTg44lInEq7go9d88pYtPewDfu/muufRcBXwMn5Vp3gLvX2s3yexEzTXL37u7eGbgBaADML/KbEhERkbhwWachVEitzLtdQ7dwrhh6D56dHXAqEYlHcVfoRaAdMDef9fOAtsV5IjOrn+vl7cD43AVmBMeZuWspvnQiIiJSWiokVWDowbfyYRdjbVXIWvALG0e/HXQsEYlDZXk24xqEnuPLaz1QPZIDmdltwCVAbaC9mQ0DDnT3VeEmd5vZ4YT6eyowaI9Th2VmZjJhwoS9PUxcW7x4sfqoCEqinzIyMv78GA//BvpaKpz6qHDqo8KVnT6qQq/MZF7t6VwzJodljzzC97VrQWJioXuWnT4Skb1Vlgs9AM9nnUV8EPehwNACtl8Y6TF3c5zOuz5PS0vz7t27F8dh49aECRNQHxWuJPpp5syZZGRkUK5cubj4N9DXUuHUR4VTHxWuLPXRgRUHc3LyoyyrCY3WbeaAjAyqHntsofuVpT4Skb1Tlm/d3EDoql5e1cn/Sp+IiIhIsai8/xncvmELYzuHfhVb/PzzAScSkXhTlgu9eYSe08urLaFRMaOSmV1kZjM2bdoUdBQRERHZU+VTOaJ5L5Kbb2dbOUieO4/1c+YFnUpE4khZLvQ+AA4xsxa7VoQnVu8W3haV3H2Eux+YmpoadBQRERHZGx3O4sr0jUxpH3pq5JP77sE9v6dKREQiF5eFnpn1NbO+wK5n2o4NrzsyV7NngcXA+2Z2Ungi8/eB34FnSjWwiIiIlD0te1KvcVdqtkoHoN0Ps3n1i9nBZhKRuBGvg7HknSj9qfDHiUB3AHffamY9gUeBVwkNwjIOuMrd00spp0hcOv/883F3zCIe20hEpOxISIC+L3DaM0fwaRNnn6Xww+v38MO+z7F/o2pBpxORGBeXV/Tc3XazdM/Tbqm7n+ruVd29irv3cffFwaQuGj2jJ7GgevXq1KhRg+rVI5qpRESk7Klch+TTXqZ+q20AHPvLjwwe+TmbtmcGHExEYl1cFnrxTM/oiYiIxJkmB3Ng/5tJr+w0WO/ss/ZZXp6yOOhUIhLjVOiJiIiIBMwOvYRahzQCoOdPyxgzZ6wGZhGRvaJCT0SK3dy5c5k1axZz584NOoqISGwwo/GtL5KTCJ0XOE3T32LW7xuDTiUiMUyFXozRM3oSC8aOHcsHH3zA2LFjg44iIhIzkuo3JuHQdiQAB8/5g3dmLAk6kojEMBV6MUbP6ImIiMSv5jfeS47BYT84f0x/np1Z2UFHEpEYpUJPREREJEqU36c1Szunkuhw3A+T+PKnNUFHEpEYpUJPREREJIrUvnQwGUnQ+rcMpn/wedBxRCRGqdATERERiSIdDzmbLw4M/Yp26GfDWZe+M+BEIhKLVOjFGA3GIiIiEt+SEpLYfExb0stDozUbmfz6hxEfw93J2batBNKJSKxQoRdjNBiLiIhI/Dv8wHN5r2vo17QarzyJ5+REtP/OXxbwy8GHsPy660sinojEABV6IiIiIlGma9OjGN85gT+qQO11q1gw8p2I9t/61WQ8MxNLTi6hhCIS7VToiYiIiESZiskV6VxnP946IvSr2panhpGzY0eR90+f/BUAlQ8/rETyiUj0U6EnIsUuISHhz0VERPZMz7RTmNje+KOmU3H9GtY++1yR9svZupVtM2dCQgIVDz20hFOKSLRKCjqAiMSfq6++OugIIiIx78jG3SHBGP7PRG4fmcMfI54l8Y7bCt1v6/TpkJlJ+Q77k1S9eskHFZGopD+3xxiNuikiIlI21KpQi/1r7Mucpgmkt8wgITODym+NLnS/rbtu2zzs8JKOKCJRTIVejNGomyIiImVHj2b/BOCzI5yspATKz5nDlgkTCtwn/Ss9nyciKvREREREolbPJj0B+Kx2eWq03wLA6nvuJWdn/pOoZyxZQubSpSSkplJ+v/1KLaeIRB8VeiJS7CZNmsTnn3/OpEmTgo4iIhLTmqc2p0VqCzYnJrKkw062VK1A5u+/s+65/Adm2XU1r1LXQ7HExNKMKiJRRoWeiBS7b7/9lilTpvDtt98GHUVEJOb9M3z75n+rVKRp57UArBvxLBnLlv2trZ7PE5FdVOiJiIiIRLFeTXsBMK5yFWrU3czSxnXxnTtZdedduPuf7XJ27gyNuAlUOqxbIFlFJHqo0BMRERGJYq2qt6JFags2mvNthfLsf8AitqZUZOvXX7Px7bdh9Y9sG9Gd1XcchG/bRkrr1iTXrRt0bBEJmAq9GKPpFURERMqeXs1CV/U+Sq1D/Qob+a7DPgCsuedfbH+0JwNYyciVmQBU0mibIoIKvZij6RVERETKnl23b06oUI5M4JimM8hqnELOjkzmfVuZn8ol025RqG3lw1ToiYgKPREREZGo16paK5qnNmcLGXxbpzlNEteyb+clJKQ4lZYn0/crp+la2JkMpK4H4Nc16cGGFpFAqdATERERiXJm9udVvf827YhjTE7ej1d6dgHg9K9yAJjb1BgzexjzVmzimMcmMWTkd2Tn+G6PKyLxS4WeiIiISAzYNc3CuC0L2XL1Aq4udz1j2v3CtDT7s83sFsZr2xYz9M1xZOU4NSuVIzHBdndIEYljKvREREREYkCraq2om1SXjTs3MnfrAg5o/yMJyVt5tVdTEqtXh8REVrYsx7LkJGrveI0mNSpy47Ftgo4tIgFJCjqAiMSf2rVrU7FiRSpVqhR0FBGRuGFmdKrUic82fca7C97lp+1TAVi8/p/sfPxoWiZl0GX1W/yw5iPW1viJh45uR8Vy+lVPpKzSd7+IFLtzzz036AgiInGpU8VQoTd28VgA6iS3Y+HWVjz1yw4ePaMjH05YRcVqHzK3QiKVNrwHXBxsYBEJjG7dFBEREYkR9ZPr06xqsz9f33bo1ZRLTOSzeau4dvT3/Lomh322NQXglXkvB5RSRKKBCr0YownTRUREyi4z45jmxwBweMPD6dH8YPoe2Ah3+PiHlSQYDDzidhLd+a9vZtXy6QEnFpGgqNCLMZowXUREpGw7v/35XH/g9Qw9bCgAFx/Rgl0Da154RAv+sf8h/CO5FllmvDH1/gCTikiQ9IyeiBS7MWPGsH37dipUqECfPn2CjiMiElcqJFXg3Hb/exa6ac1K3HLcvsxfuYWr/9EagHM7XMLYmfcwK30xnpODJehv+yJljQo9ESl2CxcuJD09ncqVKwcdRUSkTLjg8BZ/eb1/uzN4OSeTju3PUpEnUkap0BMRERGJN2YcsP85QacQkQDpTzwiIiIiIiJxRoWeiIiIiIhInFGhJyIiIiIiEmdU6ImIiIiIiMQZFXoiIiIiIiJxRoWeiIiIiIhInFGhJyIiIiIiEmc0j56IFLu0tDR27NhB+fLlg44iIiIiUiap0IsxZnYRcFHdunWDjiKyWyeccELQEURERETKNN26GWPcfYS7H5iamhp0FBERERERiVIq9EREREREROKMCj0REREREZE4o2f0RKTYPf/882zdupVKlSoxaNCgoOOIiIiIlDkq9ESk2G3cuJH09HQyMzODjiIiIiJSJunWTRERERERkTijQk9ERERERCTOqNATERERERGJMyr0RERERERE4owKPRERERERkTijQk9ERERERCTOqNATERERERGJMyr0RERERERE4oy5e9AZJEJmNhPYH/i+BA5fC/ijBPYpqM3utuW3Pu+63b3eN/x6fiG59kS89BFEVz8VtX2k/bQn66K1j4q6T3F8LamPirZeP5MKX68+Kny9+qjw9ZH0UTl3Ty4kl0j8c3ctMbYAM4GZJXTsGSWxT0Ftdrctv/V51+3utfqoaK+jqZ+K2j7SftqTddHaR6X5taQ+2rOvL/1MUh+pj+K3j7RoibVFt25KXiNKaJ+C2uxuW37r864r7HVJUB8VTaTnKWr7SPtpT9ZFax8VdZ/i+FpSHxVtfTR8v6mPCqc+Kpz6SCTO6NbNGBS+dRN37xx0lmilPioa9VPh1EeFUx8VTn1UOPVR4dRHhVMfifyPCj0REREREZE4o1s3RURERERE4owKPRERERERkTijQk9ERERERCTOqNATERERERGJMyr0RERERERE4owKPRERERERkTijQk9ERERERCTOqNATERERERGJMyr0RERERERE4owKPRERERERkTijQk9ERERERCTOqNCLU2Z2i5n9bGY5ZtYn6DzRxszKm9kYM5tvZrPNbKyZtQg6V7Qxs3Fm9n24jyabWcegM0UrMzvPzFzfb/kzs8Xhn0mzw8sFQWeKNmZWzsweM7MFZjbPzD4JOlM0MbMGub5+Zof7KcvMagSdLZqY2XFmNtPMZpnZHDM7N+hM0cbMjjGzGWb2g5l9Y2Ydgs4kUhKSgg4gJWYcMAp4PuggUWy4u48FMLMhwHNAz2AjRZ1T3H0TgJmdDLwEdAwyUDQys6bAhcA3QWeJcme4++ygQ0Sxe4FyQJq755hZ/aADRRN3X0Gunz9mdhPQ1d3XBxYqyphZAjCSUL/8GP7Z9IuZvevu6QHHiwpmVh14HTjM3eebWdfw6/bBJhMpfrqiFyXMrJGZPWFmU81sW/jKQLPdtG1sZm+b2SYz22xm75pZk9xt3H2auy8slfClpDj7yN137Crywr4BYv6KXgl8HW3K9bJqSWYvLcXdR+FfrJ4HLgd2lvw7KB3F3U/xqDj7yMwqAhcBN7l7DoC7ryyVN1KCSvjr6Hzi4I+ZxdxHFv646ypnNWAdkFFib6AUFHMftQTWuft8AHefAjQxswNK/p2IlC4VetGjFXA6sAGYvLtG4V8GxgNtgAHAOcA+wJdmVqkUcgapJPvocuD9Yk0bjGLvIzN73cyWAf8C+pdQ7tJU3H10DfC1u88sscTBKInvt1csdCvZK2bWsGRil6ri7KNW4ePcZGbfmtkUMzupJMOXkhL5uW1mRwBVgI9LIHNpK7Y+cvds4DRgjJktCR/vXHeP6UKP4v06WgDUMLNu4X16E/paalZS4UUC4+5aomABEnJ9fgHgQLN82l0JZAOtcq1rDmQB1+TTfgLQJ+j3F+V9dDMwFagY9HuM1j7KdbyPg36P0dRHQDtCV4OTw6/1/babryWgafhjEnAHMDXo9xhNfQR0Du8/KPy6DbAWaBn0+4yWPsrT/mXg/qDfX7T1Ufj7awJwRPj1QcAKoFbQ7zNa+ii87ohwP80EHgfmAScG/T61aCnuRVf0ooSHb9Upgt7AN+7+a659FwFfA/Hw19/dKok+MrPrgFOBY919W3FlDUoJfx09DxxtZjX3LmWwirmPjgCaAgvMbDFwCDDCzAYXX+JgFPfXkrsvCX/MAh4FDjaz5OJLXPqKuY+WEPrl9dXw9p+A2UCn4sobhBL6uV2V0M/tmL9tE4q9jzoCDdx9Unj7t8By9HWU9+fRJHfv7u6dgRuABsD8YowsEhVU6MWedsDcfNbPA9qWcpZoVaQ+MrNrgH7A0e6+sXSiRY1C+8jMqttfB4M4FVgDlJWBDwrtI3cf7u713b2ZuzcjdHXvIncfXnoxA1eUr6VKZlYt17azgbnunlny8aJCUb6W/gDGAscAhL/32gNzSilj0CL5v60fMNPdF5R4quhSlD76HWhgZru+91oRuu3x51JJGLyi/v+f+/+224HxuYtDkXihUTdjTw1C96jntR6ovuuFmd0GXALUBtqb2TDgQHdfVSopg1VoH5lZI+Bh4DdC9+4DZLn7gaUVMmBF+TqqDowys/JADqEi7wR399KJGLgifa9JkfqpLvCOmSUSGizid0LPEZUVRf1aGgw8b2b3EPqeu87dy8ov6JF8vw0CnijxRNGn0D5y99VmdiGhn905hP6gf5m7Ly29mIEq6tfR3WZ2OKHfg6cS+poSiTsq9GJTfr9o218auA8FhpZOnKhUYB+5+zLy9FkZVFgf/Ubo+Y6yrNDvtb80du9eclGiWlG+lmL61rFiUJSf24uBo0olTXQq0vebu3cphSzRqihfR28Ab5ROnKhUlD66sJSyiARKt27Gng38b9jk3KqT/1+xyiL1UeHUR4VTHxWN+qlw6qPCqY8Kpz4qnPpIJBcVerFnHqF70PNqC/xYylmilfqocOqjwqmPikb9VDj1UeHUR4VTHxVOfSSSiwq92PMBcIiZ/Tm5d3jS0G7hbaI+Kgr1UeHUR0Wjfiqc+qhw6qPCqY8Kpz4SycXKzrgK0c/M+oY/PYrQQCqXEppHaa27Twy3qQR8D2wHbiN0L/q/CE32ub+7p5d27tKkPiqc+qhw6qOiUT8VTn1UOPVR4dRHhVMfiUROhV4UMbPd/WNMzD3Ig5k1ITQP1dGEHjAeB1wVfpA/rqmPCqc+Kpz6qGjUT4VTHxVOfVQ49VHh1EcikVOhJyIiIiIiEmf0jJ6IiIiIiEicUaEnIiIiIiISZ1ToiYiIiIiIxBkVeiIiIiIiInFGhZ6IiIiIiEicUaEnIiIiIiISZ1ToiYiIiIiIxBkVeiIiUcTM3MxeyvW6WXjdXcGlig1mdle4r9oHnUVERCRoKvREROKQmVULFz7dg84iIiIipS8p6AAiIlKgJUAFICvC/aoBd4Y/n1CMeURERCQGqNATEYli7u7AjqBziIiISGzRrZsiIgEws6Zm9p6ZbTGzDWb2hpnVzafd357RM7NEM7vBzOaZ2VYz22hmc8zs3vD27sCicPM7w/u7mU0Ib69iZveY2QwzW29mO8L7X5rP+Xc993aAmQ01s+Xh9tPNrFs+7RPM7HIz+87MtoWzTTOz8/O0q2lmj5nZEjPLMLNlZvaEmaXuea/+qaKZDTOz1eEM482sQ57zdw+/ryFmdoWZ/ZqrH04rhgwiIiKB0hU9EZFSZmbVgclAXeBJYCFwPPBpEQ9xO6HbMl8B/kPoZ/k+QPfw9vnA1cCjwHvAu+H1q8MfGwLnAW8DLwLJwCnAk2ZWw92H5nPOp4GdwL+BysB1wIdm1szdN4fflwGjgL6Ebhe9E9gOdAB6Ay/kev9TgVrACOA3oB1wCXCImXVz94wi9kV+RhC6Cnpv+ByXAxPNrLO7L8zT9oJwm6fD+wwCRpmZu/vbe5FBREQkUCr0RERK341AY+B0dx8NYGZPAaOBTkXY/yTgU3cfkN9Gd19tZmMIFXo/uPtreZr8BjRx9z+f+zOzx4EvgOvN7AF3z8yzzybgn+6eE27/E6FC8SxCRRLAmYSKvOHAZeHbTncd33IdayhQDzjA3X/N1eYbYCTQn3BRuIdygCPdfWf4uB8C08Ln7ZenbWugjbsvDbd9DpgHPGpm77l79l7kEBERCYxu3RQRKX0nAUsJFUrAn8/iPVTE/TcCbc1s3z05ubtn7CryzKycmdUAahIq9KoCbfLZbdiuIi/sy/DHVrnW9QOygVtyF3nhc3r4fAacAYwHNppZrV1L+JhZwNF78r7yZN2Z69zTCV1BPcHM8v6/9/auIi/cdiPwPNAI6LyXOURERAKjQk9EpPQ1B37OWwwRuuWyKG4DKgE/mtnPZjbczI7Pc9VstyzkqvBVuR3AOmAtoVsdAarns9vi3C/cfX3405q5Vu8DLAkXS7tTO7zPSeFz5l5WErrTpE5R3kcBfspn3c+EbjmtXcS2EPp3EhERiUm6dVNEJMa4+9dm1gI4FugJ9CL0fNs4Mzsm9y2Zu3EdoWftPgbuJ/TsXiZwHKFn+/L7I+DubmEsUnGZy65jf0Lo1tL8bIjwmCIiIpKHCj0RkdK3CEiz8IgfudYX+VZMd98CvBVeMLMHgBsIFX8fAnmvFuZ2VjjDiXmeo+tZ5HeQv1+A48ws1d037abNWkLP+1V09y/28ny70waYkmddGpAePn/etnmlhT8uymebiIhITNCtmyIipe8DoAmhgUuAP59du7YoO4efZ8trVvjjrlsp08Mf87sNM5tQIfjn/wFmVhM4P5+2kXiT0B8Q/zZq567bSsODm4wCuptZr3zaJYVH5dwbQ8wsJdcxuwCHAx/nec4QoK+ZNcnVNpXQyJvLgZl7mUNERCQwuqInIlL6/k3oqtprZnYo/5teoX4R959vZlOA6YSea2sKXAqsJ3RLJO6+zswWAmea2a+ErmStcffxhKZcGAp8FB6dsw5wEaHi5m9z+UXgTeA0QoVWW0LTRWwD9iM0yubJ4XY3A0cAn5jZq8AMQv8ftQJOJXRl8jUAMxtIaAqI/3P3u4qYI4HQdApv8L/pFTYTmpYir1+AKWY2nND0EYOABsCZGnFTRERimQo9EZFSFi7CjgAeI1RgZQKfEZrbblURDvEwcAJwFaFRMlcBHwH3uPuaXO3OAR4BHgAqABMJjXb5AFAOGAD0IHSL4gOErgK+uBfvy8OTjV8Zfi9DCQ328jOhKRd2tVtvZocQmmbiVEJF71ZgCfAyoTn4dqkS/rg8gigXAecCt4b3/wa4xt0X5NP2OUKF4ZWERtr8lVCRNyqC84mIiEQd+/ugbyIiItHBzN4B9gfa5jO3394ctzuh6Rwud/dhxXVcERGRaKEreiIiEpXCc971AC4sziJPRESkLFChJyIiUSk8cEqNoHOIiIjEIo26KSIiIiIiEmf0jJ6IiIiIiEic0RU9ERERERGROKNCT0REREREJM6o0BMREREREYkzKvRERERERETijAo9ERERERGROKNCT0REREREJM78P6xy8rfhFnAYAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Full scaling\n", "\n", "n = len(df)\n", "cis_scalings, trans_levels = scaling.compute_scaling(\n", " df,\n", " regions=hg38_arms,\n", " chromsizes=hg38_arms,\n", " dist_range=(10, 1e9), \n", " n_dist_bins=128,\n", " chunksize=int(1e7),\n", " )\n", "plot(cis_scalings, n, label=\"pairs\")\n", "\n", "# The point where the scalings by distance become balanced:\n", "plt.axvline(2e3, ls='--', c='gray', label='Balancing point')\n", "\n", "plt.savefig(\"./oriented_scalings.pdf\")" ] }, { "cell_type": "code", "execution_count": 17, "id": "a506a74c-230f-4219-9273-99b6f04e211d", "metadata": {}, "outputs": [], "source": [ "df.loc[:, \"type_bydist\"] = \"Regular pair\"\n", "\n", "mask_ondiagonal = (np.abs(df.pos2-df.pos1)<=2e3)\n", "\n", "mask_DE = (df.strand1==\"+\") & (df.strand2==\"-\") & mask_ondiagonal\n", "df.loc[mask_DE, \"type_bydist\"] = \"DanglingEnd\"\n", "\n", "mask_SS = (df.strand1==\"-\") & (df.strand2==\"+\") & mask_ondiagonal\n", "df.loc[mask_SS, \"type_bydist\"] = \"SelfCircle\"\n", "\n", "mask_Err = (df.strand1==df.strand2) & mask_ondiagonal\n", "df.loc[mask_Err, \"type_bydist\"] = \"Mirror\"" ] }, { "cell_type": "code", "execution_count": 18, "id": "255bda45-6a64-4795-a964-546e55d67145", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "type_bydist\n", "DanglingEnd 135381\n", "Mirror 18383\n", "Regular pair 1053213\n", "SelfCircle 8177\n", "Name: readID, dtype: int64" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sort_values(\"type_bydist\").groupby(\"type_bydist\").count()['readID']" ] }, { "cell_type": "code", "execution_count": 19, "id": "b628bdfb-abbf-45df-8f33-2056dc96f19f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
readID
type_bydistDanglingEndMirrorRegular pairSelfCircle
type_rfrag
DanglingEnd76898040
Mirror03176380
Regular pair584831520710529945318
SelfCircle001772859
\n", "
" ], "text/plain": [ " readID \n", "type_bydist DanglingEnd Mirror Regular pair SelfCircle\n", "type_rfrag \n", "DanglingEnd 76898 0 4 0\n", "Mirror 0 3176 38 0\n", "Regular pair 58483 15207 1052994 5318\n", "SelfCircle 0 0 177 2859" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sort_values([\"type_rfrag\", \"type_bydist\"])\\\n", " .groupby([\"type_rfrag\", \"type_bydist\"])\\\n", " .count()[['readID']]\\\n", " .reset_index()\\\n", " .pivot(columns=\"type_bydist\", index=\"type_rfrag\")\\\n", " .fillna(0).astype(int)" ] }, { "cell_type": "markdown", "id": "23a56c6f-c2d1-48e4-9b2e-860622af5a3f", "metadata": {}, "source": [ "False Positives are in 3rd row, False Negatives are in 3rd column. Filtering by distance is, thus, nearly as effective as filtering by restriction fragment, but removes additional pairs that can be potential undercut by restriction enzyme.\n", "\n", "Removing all contacts closer than 2 Kb will remove Hi-C artifacts." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.10", "language": "python", "name": "python310" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" } }, "nbformat": 4, "nbformat_minor": 5 } pairtools-1.0.3/doc/examples/pairtools_walkthrough.ipynb000066400000000000000000004632711452673171500236400ustar00rootroot00000000000000{ "cells": [ { "cell_type": "markdown", "id": "112fe2d5-aaed-4eb1-b3f5-2f5889a9c89f", "metadata": { "tags": [] }, "source": [ "# Pairtools walkthrough\n", "\n", "Welcome to the pairtools walkthrough. \n", "\n", "Pairtools is a tool for extraction of pairwise contacts out of sequencing chromosomes conformation capture data, such as Hi-C, Micro-C or MC-3C.\n", "Pairtools is used for obtaining .cool files by [distiller](https://github.com/open2c/distiller-nf/blob/master/distiller.nf), and has many more applications (see single-cell walkthrough or phasing walkthrough). \n", "\n", "Here, we will cover the basic steps from raw reads to .cool file with binned contacts.\n", "\n", "Outline:\n", "\n", "- [Download raw data](#Download-raw-data)\n", "\n", "- [Install reference genome](#Install-reference-genome)\n", "\n", "- [Map data with bwa mem](#Map-data-with-bwa-mem)\n", "\n", "- [Extract contacts](#Contacts-extraction)\n", "\n", "- [MultiQC]( #MultiQC )\n", "\n", "- [Load pairs to cooler](#Load-pairs-to-cooler)\n", "\n", "- [Visualize cooler](#Visualize-cooler)" ] }, { "cell_type": "markdown", "id": "bd264406-be74-4060-9798-e18040c44889", "metadata": {}, "source": [ "### Download raw data\n", "\n", "\"Raw\" data, or .fastq files are generated by sequencing facilities or can be taken from public databases, such as SRA. We will take a sample from Rao et at al. 2017, human datasets.\n", "To reduce computateion time, take 5 mln reads instead of full sample:" ] }, { "cell_type": "code", "execution_count": null, "id": "f4e310c0-2d16-4e7d-87d7-44feec8e6256", "metadata": {}, "outputs": [], "source": [ "! fastq-dump SRR13849430 --gzip --split-spot --split-3 --minSpotId 0 --maxSpotId 5000000" ] }, { "cell_type": "code", "execution_count": null, "id": "571e94fb-3dec-4042-9e21-6c39802ed8df", "metadata": {}, "outputs": [], "source": [ "! ls SRR13849430*.fastq.gz" ] }, { "cell_type": "code", "execution_count": 1, "id": "e9fb044d-1ba0-48c7-b40a-99d033518e43", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "@SRR13849430.1 1 length=150\n", "NTCTCAGCCTTTATAAGATAGAAGAGAGTTGGGACCTTGCTCTAAATTCTGCTTTAGCAAGGGACTTTTGTACCTGCTTTCTTCCTTTATCCAGATCTAAAAATAGTTTATATGCTGACAACTCCCTGATGTTATTCTTTGTAGTATTTG\n", "+SRR13849430.1 1 length=150\n", "#AAFFJJJJJJJJJJJAJAJJJJFJJJAFFFFFFA7A-FJ7JJJ-AJAJJF-<-JJFFJ7FJJF7FJJFJJ test.bam" ] }, { "cell_type": "markdown", "id": "89f9d829-3f79-49b4-b74d-8bca732b8a44", "metadata": {}, "source": [ "After mapping, you have .sam/.bam alignment file, which cannot be interpreted as pairs directly. You need to extract contacts from it:" ] }, { "cell_type": "code", "execution_count": 3, "id": "955bcafa-e521-4627-8c8b-94e05e46e6b8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SRR13849430.1\t121\tchr12\t78795720\t60\t53S97M\t=\t78795720\t0\tCAAATACTACAAAGAATAACATCAGGGAGTTGTCAGCATATAAACTATTTTTAGATCTGGATAAAGGAAGAAAGCAGGTACAAAAGTCCCTTGCTAAAGCAGAATTTAGAGCAAGGTCCCAACTCTCTTCTATCTTATAAAGGCTGAGAN\t-7-7---A------7--77--))))7--F-A)7F( pairtools split \\\n", " --output-pairs test.nodups.pairs.gz \\\n", " --output-sam test.nodups.bam \\\n", " ) \\\n", " --output-unmapped \\\n", " >( pairtools split \\\n", " --output-pairs test.unmapped.pairs.gz \\\n", " --output-sam test.unmapped.bam \\\n", " ) \\\n", " --output-dups \\\n", " >( pairtools split \\\n", " --output-pairs test.dups.pairs.gz \\\n", " --output-sam test.dups.bam \\\n", " ) \\\n", " --output-stats test.dedup.stats \\\n", " test.pairs.gz" ] }, { "cell_type": "code", "execution_count": 7, "id": "d9aaceeb-1a88-4c24-9fc2-3f44069715a1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SRR13849430.513\tchr20\t23502312\tchr20\t23063544\t+\t+\tRU\t60\t60\n", "SRR13849430.1442\tchr7\t57224960\tchrX\t82818236\t+\t+\tUU\t60\t30\n", "SRR13849430.2378\tchr5\t115925933\tchr21\t24124840\t+\t+\tUU\t60\t50\n", "SRR13849430.2547\tchr1\t52097837\tchr12\t1888807\t-\t-\tUU\t60\t60\n", "SRR13849430.3015\tchr17\t74750879\tchr11\t117356318\t+\t-\tUR\t60\t60\n", "SRR13849430.3027\tchr15\t34977762\tchr15\t31897447\t-\t+\tUR\t11\t60\n", "SRR13849430.3406\tchr11\t1171960\tchr9\t121265592\t+\t-\tUU\t60\t60\n", "SRR13849430.3988\tchr16\t86824176\tchr13\t104521019\t-\t+\tUU\t60\t17\n", "SRR13849430.4030\tchr17\t73189645\tchr4\t49092470\t-\t+\tUU\t60\t31\n", "SRR13849430.4316\tchr8\t124329308\tchr8\t124336541\t-\t-\tUU\t60\t60\n" ] } ], "source": [ "%%bash\n", "# Unique pairs:\n", "gzip -dc test.nodups.pairs.gz | grep -v \"#\" | head -n 10" ] }, { "cell_type": "code", "execution_count": 8, "id": "ca3e27a7-7905-46b2-8ad4-245c28f01102", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SRR13849430.60371\tchr2\t44507613\tchr7\t116276932\t-\t+\tDD\t60\t57\n", "SRR13849430.67567\tchr5\t62425895\tchr5\t62425612\t-\t+\tDD\t60\t60\n", "SRR13849430.97623\tchr3\t162233323\tchr3\t162154449\t-\t+\tDD\t60\t52\n", "SRR13849430.108366\tchr8\t48691403\tchr8\t48872239\t-\t-\tDD\t60\t60\n", "SRR13849430.138622\tchr16\t8435050\tchr16\t6032751\t+\t-\tDD\t60\t60\n", "SRR13849430.146482\tchr14\t86385083\tchr2\t119648648\t+\t+\tDD\t60\t60\n", "SRR13849430.148232\tchrX\t21885792\tchrX\t21887418\t+\t-\tDD\t60\t60\n", "SRR13849430.149771\tchr16\t6646543\tchr16\t6648097\t-\t-\tDD\t60\t60\n", "SRR13849430.156983\tchr4\t55704089\tchr4\t76039070\t+\t+\tDD\t60\t13\n", "SRR13849430.157962\tchr6\t47656758\tchr6\t47748395\t+\t-\tDD\t60\t35\n" ] } ], "source": [ "%%bash\n", "# Only duplicated pairs:\n", "gzip -dc test.dups.pairs.gz | grep -v \"#\" | head -n 10" ] }, { "cell_type": "markdown", "id": "7441b723-5c5d-4502-8330-c8b7b4a24e30", "metadata": {}, "source": [ "#### pairtools select\n", "\n", "Sometimes you may need certain types of pairs based on their properties, such as mapq, pair type, distance or orientation. \n", "For all these manipulations, there is `pairtools select` which requires a file and pythonic condition as an input:" ] }, { "cell_type": "code", "execution_count": null, "id": "3a2de712-b4ef-4ee3-af68-d19f2fa8fb8f", "metadata": {}, "outputs": [], "source": [ "%%bash \n", "pairtools select \"mapq1>0 and mapq2>0\" test.nodups.pairs.gz -o test.nodups.UU.pairs.gz" ] }, { "cell_type": "markdown", "id": "1e6445fa-551b-4583-aa61-587a27370fa4", "metadata": { "tags": [] }, "source": [ "#### pairtools stats\n", "\n", "Describe the types fo distance properties of pairs: " ] }, { "cell_type": "code", "execution_count": null, "id": "3aca9ac8-668b-46c4-a1c2-6172303f284a", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "pairtools stats test.pairs.gz -o test.stats" ] }, { "cell_type": "markdown", "id": "ca2c1c56-9024-4fa0-abb9-ed1f9ab313f1", "metadata": {}, "source": [ "### MultiQC" ] }, { "cell_type": "code", "execution_count": null, "id": "0967edf9-fdf6-4294-98fc-a2c069917de6", "metadata": { "tags": [ "hide-output" ] }, "outputs": [], "source": [ "%%bash\n", "multiqc test.stats" ] }, { "cell_type": "code", "execution_count": 9, "id": "d76bd76c-f0f5-4921-b873-9390e715eab9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from IPython.display import IFrame\n", "\n", "IFrame(src='./multiqc_report.html', width=1200, height=700)" ] }, { "cell_type": "markdown", "id": "e0dc157d-a8c6-4319-b83c-d450f2a822f3", "metadata": {}, "source": [ "### Load pairs to cooler\n", "Finally, when you obtained a list of appropriate pairs, you may create coolers with it: " ] }, { "cell_type": "code", "execution_count": null, "id": "3d9df0e2-f8d3-487b-8369-cddf8bdd54df", "metadata": { "tags": [ "hide-output" ] }, "outputs": [], "source": [ "%%bash\n", "cooler cload pairs \\\n", " -c1 2 -p1 3 -c2 4 -p2 5 \\\n", " --assembly hg38 \\\n", " ~/.local/share/genomes/hg38/hg38.fa.sizes:1000000 \\\n", " test.nodups.UU.pairs.gz \\\n", " test.hg38.1000000.cool" ] }, { "cell_type": "code", "execution_count": null, "id": "083da222-8d15-408b-ad8c-7fa35881597f", "metadata": { "tags": [ "hide-output" ] }, "outputs": [], "source": [ "%%bash\n", "cooler zoomify \\\n", " --nproc 5 \\\n", " --out test.hg38.1000000.mcool \\\n", " --resolutions 1000000,2000000 \\\n", " --balance \\\n", " test.hg38.1000000.cool" ] }, { "cell_type": "markdown", "id": "9a17fb3c-d5f8-472e-b80a-e7708798ea72", "metadata": {}, "source": [ "### Visualize cooler\n", "\n", "Based on [open2c vis example](https://github.com/open2c/open2c_examples/blob/master/viz.ipynb)" ] }, { "cell_type": "code", "execution_count": 36, "id": "1839183d-4d5c-4b29-926c-0d56e00c8b8a", "metadata": {}, "outputs": [], "source": [ "import cooler\n", "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "import cooltools.lib.plotting\n", "from matplotlib.colors import LogNorm\n", "import seaborn as sns\n", "import bioframe\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 11, "id": "7bae233c-36f2-483c-8957-766e200739a4", "metadata": {}, "outputs": [], "source": [ "file = \"test.hg38.1000000.mcool::/resolutions/1000000\"" ] }, { "cell_type": "code", "execution_count": 12, "id": "2b4cc40b-5aaf-4db8-b870-ba190fdb5d01", "metadata": {}, "outputs": [], "source": [ "clr = cooler.Cooler(file)" ] }, { "cell_type": "code", "execution_count": 19, "id": "4350d8c1-b50c-43f7-92e5-43802122320b", "metadata": {}, "outputs": [], "source": [ "# Define chromosome starts\n", "chromstarts = []\n", "for i in clr.chromnames:\n", " chromstarts.append(clr.extent(i)[0])" ] }, { "cell_type": "code", "execution_count": 20, "id": "cd823dec-49c8-46e0-96b6-dcb0344f9d9c", "metadata": {}, "outputs": [], "source": [ "from matplotlib.ticker import EngFormatter\n", "bp_formatter = EngFormatter('b')\n", "\n", "def format_ticks(ax, x=True, y=True, rotate=True):\n", " if y:\n", " ax.yaxis.set_major_formatter(bp_formatter)\n", " if x:\n", " ax.xaxis.set_major_formatter(bp_formatter)\n", " ax.xaxis.tick_bottom()\n", " if rotate:\n", " ax.tick_params(axis='x',rotation=45)" ] }, { "cell_type": "code", "execution_count": 40, "id": "896235bb-749b-4c2e-95ae-352c91452b24", "metadata": {}, "outputs": [], "source": [ "# Define the bounds of the continuous fragment of whole-genome interaction map\n", "chrom_start, chrom_end = clr.chromnames.index('chr3'), clr.chromnames.index('chr6')\n", "start, end = chromstarts[chrom_start], chromstarts[chrom_end]" ] }, { "cell_type": "code", "execution_count": 43, "id": "a0d99510-d5e6-4de5-861b-8eeddcb6c25b", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "vmax = 15\n", "norm = LogNorm(vmin=1, vmax=vmax)\n", "\n", "f, axs = plt.subplots(\n", " figsize=(13, 10),\n", " nrows=2, \n", " ncols=1,\n", " sharex=False, sharey=False)\n", "\n", "ax = axs[0]\n", "ax.set_title('Interaction maps (chr1)')\n", "im = ax.matshow(clr.matrix(balance=False).fetch('chr1'), vmax=vmax, cmap='fall'); \n", "plt.colorbar(im, ax=ax ,fraction=0.046, pad=0.04, label='chr1');\n", "\n", "ax = axs[1]\n", "ax.set_title('Chromosomes 3-5')\n", "im = ax.matshow(clr.matrix(balance=False)[start:end, start:end], norm=norm, cmap='fall'); \n", "plt.colorbar(im, ax=ax ,fraction=0.046, pad=0.04, label='Whole-genome');\n", "ax.set_xticks(np.array(chromstarts[chrom_start:chrom_end])-start, clr.chromnames[chrom_start:chrom_end], rotation=90);\n", "ax.set_yticks(np.array(chromstarts[chrom_start:chrom_end])-start, clr.chromnames[chrom_start:chrom_end], rotation=90);\n", "\n", "format_ticks(axs[0], rotate=False)\n", "\n", "plt.tight_layout()" ] }, { "cell_type": "code", "execution_count": null, "id": "e07ca165-15ed-459c-af7b-3156de81f935", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "test", "language": "python", "name": "test" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 5 } pairtools-1.0.3/doc/examples/scalings_example.ipynb000066400000000000000000002373341452673171500225220ustar00rootroot00000000000000{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2022-04-12 10:00:02-- https://data.4dnucleome.org/files-processed/4DNFI3PUO824/@@download/4DNFI3PUO824.pairs.gz\n", "Resolving data.4dnucleome.org (data.4dnucleome.org)... 34.225.43.243, 34.199.170.160\n", "Connecting to data.4dnucleome.org (data.4dnucleome.org)|34.225.43.243|:443... connected.\n", "HTTP request sent, awaiting response... 403 Forbidden\n", "2022-04-12 10:00:02 ERROR 403: Forbidden.\n", "\n" ] } ], "source": [ "!wget https://data.4dnucleome.org/files-processed/4DNFI3PUO824/@@download/4DNFI3PUO824.pairs.gz -O ./tmp/MicroC.pairs.gz " ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "import matplotlib.pyplot as plt\n", "import matplotlib.ticker \n", "import matplotlib.gridspec \n", "\n", "%matplotlib inline\n", "plt.style.use('seaborn-poster')\n", "\n", "import pairtools\n", "import pairtools.lib.scaling as scaling\n", "import bioframe" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "pairs_path = '../tmp/MicroC.pairs.gz'" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "mm10_chromsizes = bioframe.fetch_chromsizes('mm10', as_bed=True)\n", "mm10_arms = mm10_chromsizes\n", "\n", "# hg38_chromsizes = bioframe.fetch_chromsizes('hg38', as_bed=True)\n", "# hg38_cens = bioframe.fetch_centromeres('hg38')\n", "# hg38_arms = bioframe.split(hg38_chromsizes, hg38_cens, cols_points=['chrom', 'mid'])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "cis_scalings, trans_levels = scaling.compute_scaling(\n", " pairs_path,\n", " regions=mm10_arms,\n", " chromsizes=mm10_chromsizes,\n", " dist_range=(10, 1000000000), \n", " n_dist_bins=128,\n", " chunksize=int(1e7),\n", " cmd_in=\"gzip -dc \"\n", " )\n", "\n", "# calculate average trans contact frequency _per directionality pair_\n", "# convert from int to float64 to avoid overflow\n", "avg_trans = (\n", " trans_levels.n_pairs.astype('float64').sum() \n", " / trans_levels.np_bp2.astype('float64').sum()\n", ")\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "fig = plt.figure(figsize=(6,10))\n", "gs = matplotlib.gridspec.GridSpec(2,1, height_ratios=[8, 1.5])\n", "ax1 = fig.add_subplot(gs[0,0])\n", "ax2 = fig.add_subplot(gs[1,0])\n", "\n", "strand_gb = cis_scalings.groupby(['strand1', 'strand2'])\n", "for strands in ['+-', '-+', '++', '--']:\n", " sc_strand = strand_gb.get_group(tuple(strands))\n", " sc_agg = (sc_strand\n", " .groupby(['min_dist','max_dist'])\n", " .agg({'n_pairs':'sum', 'n_bp2':'sum'})\n", " .reset_index())\n", "\n", " dist_bin_mids = np.sqrt(sc_agg.min_dist * sc_agg.max_dist)\n", " pair_frequencies = sc_agg.n_pairs / sc_agg.n_bp2\n", " mask = pair_frequencies>0\n", " label = f'{strands[0]}{strands[1]}'\n", "\n", " ax1.loglog(\n", " dist_bin_mids[mask],\n", " pair_frequencies[mask],\n", " label=label,\n", " lw=2\n", " )\n", "\n", " ax2.semilogx(\n", " np.sqrt(dist_bin_mids.values[1:]*dist_bin_mids.values[:-1]),\n", " np.diff(np.log10(pair_frequencies.values)) / np.diff(np.log10(dist_bin_mids.values)),\n", " label=label\n", " )\n", " \n", "ax1.axhline(avg_trans, ls='--', c='gray', label='average trans')\n", "\n", "plt.sca(ax1)\n", "plt.gca().set_aspect(1.0)\n", "plt.gca().xaxis.set_major_locator(matplotlib.ticker.LogLocator(base=10.0,numticks=20))\n", "plt.gca().yaxis.set_major_locator(matplotlib.ticker.LogLocator(base=10.0,numticks=20))\n", "plt.xlim(1e1,1e9)\n", "# plt.ylim(avg_trans / 3, plt.ylim()[1])\n", "\n", "plt.grid(lw=0.5,color='gray')\n", "plt.legend(loc=(1.1,0.4))\n", "plt.ylabel('contact frequency, \\nHi-C molecule per bp pair')\n", "plt.xlabel('distance, bp')\n", "\n", "plt.sca(ax2)\n", "plt.xlim(1e1,1e9)\n", "plt.ylim(-2,0.5)\n", "plt.gca().set_aspect(1.0)\n", "plt.ylabel('log-log slope') \n", "plt.xlabel('distance, bp')\n", "\n", "plt.yticks(np.arange(-2,0.6,0.5))\n", "plt.gca().xaxis.set_major_locator(matplotlib.ticker.LogLocator(base=10.0,numticks=20))\n", "plt.grid(lw=0.5,color='gray')\n", "\n", "# fig.tight_layout()\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.10", "language": "python", "name": "python310" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" } }, "nbformat": 4, "nbformat_minor": 4 } pairtools-1.0.3/doc/formats.rst000066400000000000000000000336201452673171500165200ustar00rootroot00000000000000Formats for storing Hi-C pairs ============================== .pairs ------ `.pairs` is a simple tabular format for storing DNA contacts detected in a Hi-C experiment. The detailed `.pairs specification `_ is defined by the 4DN Consortium. The body of a .pairs contains a table with a variable number of fields separated by a "\\t" character (a horizontal tab). The .pairs specification fixes the content and the order of the first seven columns: ======== =========== =============================================================================== index name description ======== =========== =============================================================================== 1 read_id the ID of the read as defined in fastq files 2 chrom1 the chromosome of the alignment on side 1 3 pos1 the 1-based genomic position of the outer-most (5') mapped bp on side 1 4 chrom2 the chromosome of the alignment on side 2 5 pos2 the 1-based genomic position of the outer-most (5') mapped bp on side 2 6 strand1 the strand of the alignment on side 1 7 strand2 the strand of the alignment on side 2 ======== =========== =============================================================================== A .pairs file starts with a header, an arbitrary number of lines starting with a "#" character. By convention, the header lines have a format of "#field_name: field_value". The `.pairs specification `_ mandates a few standard header lines (e.g., column names, chromosome order, sorting order, etc), all of which are automatically filled in by `pairtools`. The entries of a .pairs file can be flipped and sorted. "Flipping" means that *the sides 1 and 2 do not correspond to side1 and side2 in sequencing data.* Instead, side1 is defined as the side with the alignment with a lower sorting index (using the lexographic order for chromosome names, followed by the numeric order for positions and the lexicographic order for pair types). This particular order of "flipping" is defined as "upper-triangular flipping", or "triu-flipping". Finally, pairs are *typically* block-sorted: i.e. first lexicographically by chrom1 and chrom2, then numerically by pos1 and pos2. Pairtools' flavor of .pairs --------------------------- .pairs files produced by `pairtools` extend .pairs format in a few ways. 1. `pairtools` store null, unmapped, ambiguous (multiply mapped) and chimeric (if not parsed by `parse2` or `--walks-policy all` of `parse`) alignments as chrom='!', pos=0, strand='-'. #. `pairtools` store the header of the source .sam files in the '#samheader:' fields of the pairs header. When multiple .pairs files are merged, the respective '#samheader:' fields are checked for consistency and merged. #. Each pairtool applied to .pairs leaves a record in the '#samheader' fields (using a @PG sam tag), thus preserving the full history of data processing. #. `pairtools` append an extra column describing the type of a Hi-C pair: ======== =========== =============================================================================== index name description ======== =========== =============================================================================== 8 pair_type the type of a Hi-C pair ======== =========== =============================================================================== .. _section-pair-types: Pair types ---------- `pairtools` use a simple two-character notation to define all possible pair types, according to the quality of alignment of the two sides. The type of a pair can be defined unambiguously using the table below. To use this table, identify which side has an alignment of a "poorer" quality (unmapped < multimapped < unique alignment) and which side has a "better" alignment and find the corresponding row in the table. ======================== ====== =============== ========= ================== ========= ================== =========== . . . Less informative alignment More informative alignment . ------------------------ ------ --------------- ---------------------------- ---------------------------- ----------- Pair type Code >2 alignments Mapped Unique Mapped Unique Sidedness walk-walk WW |check| |cross| |cross| |cross| |cross| 0 [1]_ null NN |cross| |cross| |cross| 0 corrupt XX |cross| |cross| |cross| 0 [2]_ null-multi NM |cross| |cross| |check| |cross| 0 null-rescued NR |check| |cross| |check| |check| 1 [3]_ null-unique NU |cross| |cross| |check| |check| 1 multi-multi MM |cross| |check| |cross| |check| |cross| 0 multi-rescued MR |check| |check| |cross| |check| |check| 1 [3]_ multi-unique MU |cross| |check| |cross| |check| |check| 1 rescued-unique RU |check| |check| |check| |check| |check| 2 [3]_ unique-rescued UR |check| |check| |check| |check| |check| 2 [3]_ unique-unique UU |cross| |check| |check| |check| |check| 2 duplicate DD |cross| |check| |check| |check| |check| 2 [4]_ ======================== ====== =============== ========= ================== ========= ================== =========== .. [1] "walks", or, `C-walks `_ are Hi-C molecules formed via multiple ligation events which cannot be reported as a single pair. .. [2] "corrupt" pairs are those with technical issues - e.g. missing a FASTQ sequence/SAM entry from one side of the molecule. .. [2] "rescued" pairs have two non-overlapping alignments on one of the sides (referred below as the chimeric side/read), but the inner (3'-) one extends the only alignment on the other side (referred as the non-chimeric side/read). Such pairs form when one of the two ligated DNA fragments is shorter than the read length. In this case, one of the reads contains this short fragment entirely, together with the ligation junction and a chunk of the other DNA fragment (thus, this read ends up having two non-overlapping alignments). Following the procedure introduced in `HiC-Pro `_ and `Juicer `_, `pairtools parse` rescues such Hi-C molecules, reports the position of the 5' alignment on the chimeric side, and tags them as "NU", "MU", "UR" or "RU" pair type, depending on the type of the 5' alignment on the chimeric side. Such molecules can and should be used in downstream analysis. Read more on the rescue procedure in :doc:`the section on parsing `. .. [3] `pairtools dedup` detects molecules that could be formed via PCR duplication and tags them as "DD" pair type. These pairs should be excluded from downstream analyses. .pairsam -------- `pairtools` also define .pairsam, a valid extension of the .pairs format. On top of the pairtools' flavor of .pairs, .pairsam format adds two extra columns containing the alignments from which the Hi-C pair was extracted: ======== =========== =============================================================================== index name description ======== =========== =============================================================================== 9 sam1 the sam alignment(s) on side 1; separate supplemental alignments by NEXT_SAM 10 sam2 the sam alignment(s) on side 2; separate supplemental alignments by NEXT_SAM ======== =========== =============================================================================== Note that, normally, the fields of a sam alignment are separated by a horizontal tab character (\\t), which we already use to separate .pairs columns. To avoid confusion, we replace the tab character in sam entries stored in sam1 and sam2 columns with a UNIT SEPARATOR character (\\031). Finally, sam1 and sam2 can store multiple .sam alignments, separated by a string '\\031NEXT_SAM\\031' .. |check| unicode:: U+2714 .. check .. |cross| unicode:: U+274C .. cross Extra columns ---------------- `pairtools` can operate on `.pairs/.pairsam` with extra columns. Extra columns are specified in the order defined by the order their addition by various tools. Column names can be checked in the header of `.pairs/.pairsam` file. We provide `pairtools header` utilities for manipulating and verifying compatibility of headers and their columns. The list of additional columns used throughout `pairtools` modules: =================================== =================== ====================== ================================================== ================= extra column generating module format how to add it description =================================== =================== ====================== ================================================== ================= mapq1, mapq2 `parse/parse2` number from 0 to 255 `pairtools parse --add-columns mapq` `Mapping quality `_, as reported in .sam/.bam, $-10 log_{10}(P_{error})$ pos51, pos52 `parse/parse2` genomic coordinate `pairtools parse --add-columns pos5` 5' position of alignment (closer to read start) pos31, pos32 `parse/parse2` genomic coordinate `pairtools parse --add-columns pos3` 3' position of alignment (further from read start) cigar1, cigar2 `parse/parse2` string `pairtools parse --add-columns cigar` `CIGAR, or Compact Idiosyncratic Gapped Alignment Report `_ of alignment, as reported in .sam/.bam read_len1, read_len2 `parse/parse2` number `pairtools parse --add-columns read_len` read length matched_bp1, matched_bp2 `parse/parse2` number `pairtools parse --add-columns matched_bp` number of matched alignment basepairs to the reference algn_ref_span1, algn_ref_span2 `parse/parse2` number `pairtools parse --add-columns algn_ref_span` basepairs of reference covered by alignment algn_read_span1, algn_read_span2 `parse/parse2` number `pairtools parse --add-columns algn_read_span` basepairs of read covered by alignment dist_to_51, dist_to_52 `parse/parse2` number `pairtools parse --add-columns dist_to_5` distance to 5'-end of read dist_to_31, dist_to_32 `parse/parse2` number `pairtools parse --add-columns dist_to_3` distance to 3'-end of read seq1, seq2 `parse/parse2` string `pairtools parse --add-columns seq` sequence of alignment mismatches1, mismatches2 `parse/parse2` string `pairtools parse --add-columns mismatches` comma-separated list of mismatches relative to the reference, "{ref_letter}:{mut_letter}:{phred}:{ref_position}:{read_position}" XB1/2,AS1/2,XS1/2 or any sam tag `parse/parse2` `pairtools parse --add-columns XA,XB,NM` format depends on `tag specification `_ walk_pair_type `parse/parse2` string `pairtools parse2 --add-pair-index` Type of the pair relative to R1 and R2 reads of paired-end sequencing, see `pasring docs `_ walk_pair_index `parse/parse2` number `pairtools parse2 --add-pair-index` Order of the pair in the complex walk, starting from 5'-end of left read, see `pasring docs `_ phase `phase` 0, 1 or "." `pairtools phase` Phase of alignment (haplotype 1, 2, on unphased), see `phasing walkthrough `_ rfrag1, rfrag2 `restrict` number `pairtools restrict` Unique index of the restriction fragment after annotating pairs positions, see `restriction walkthrough `_ rfrag_start1, rfrag_start2 `restrict` number `pairtools restrict` Coordinate of the start of restriction fragment rfrag_end1, rfrag_end2 `restrict` number `pairtools restrict` Coordinate of the end of restriction fragment =================================== =================== ====================== ================================================== ================= pairtools-1.0.3/doc/index.rst000066400000000000000000000054331452673171500161550ustar00rootroot00000000000000.. pairtools documentation master file, created by sphinx-quickstart on Wed Dec 6 12:32:49 2017. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. Overview ======== `pairtools` is a simple and fast command-line framework to process sequencing data from a Hi-C experiment. `pairtools` perform various operations on Hi-C pairs and occupy the middle position in a typical Hi-C data processing pipeline: .. figure:: _static/hic-processing-pipeline.png :width: 100% :alt: The diagram of a typical processing pipeline for Hi-C data :align: center In a typical Hi-C pipeline, DNA sequences (reads) are aligned to the reference genome, converted into ligation junctions and binned, thus producing a Hi-C contact map. `pairtools` aim to be an all-in-one tool for processing Hi-C pairs, and can perform following operations: - detect ligation junctions (a.k.a. Hi-C pairs) in aligned paired-end sequences of Hi-C DNA molecules - sort .pairs files for downstream analyses - detect, tag and remove PCR/optical duplicates - generate extensive statistics of Hi-C datasets - select Hi-C pairs given flexibly defined criteria - restore .sam alignments from Hi-C pairs `pairtools` produce .pairs files compliant with the `4DN standard `_. `pairtools` uses a two-character notation to define pair types (see table :ref:`_section-pair-types`) The full list of available pairtools: ============ ============================================== Pairtool Description ============ ============================================== dedup Find and remove PCR/optical duplicates. filterbycov Remove pairs from regions of high coverage. flip Flip pairs to get an upper-triangular matrix. markasdup Tag pairs as duplicates. merge Merge sorted .pairs/.pairsam files. parse Find ligation junctions in .sam, make .pairs. phase Phase pairs mapped to a diploid genome. restrict Assign restriction fragments to pairs. select Select pairs according to some condition. sort Sort a .pairs/.pairsam file. split Split a .pairsam file into .pairs and .sam. stats Calculate pairs statistics. ============ ============================================== Contents: .. toctree:: :hidden: self .. toctree:: :maxdepth: 3 quickstart installation parsing sorting formats stats technotes cli_tools .. toctree:: :maxdepth: 3 :caption: Tutorials :titlesonly: ./examples/pairtools_walkthrough.ipynb ./examples/scaling_example.ipynb ./examples/pairtools_restrict_walkthrough.ipynb ./examples/pairtools_phase_walkthrough.ipynb ./examples/benchmark/benchmark.ipynb * :ref:`genindex` pairtools-1.0.3/doc/installation.rst000066400000000000000000000033571452673171500175520ustar00rootroot00000000000000Installation ============ Requirements ------------ - Python 3.x - Python packages `numpy` and `click` - Command-line utilities `sort` (the Unix version), `bgzip` (shipped with `samtools`) and `samtools`. If available, `pairtools` can compress outputs with `bgzip`, `pbgzip` and `lz4`. Install using conda ------------------- We highly recommend using the `conda` package manager to install pre-compiled `pairtools` together with all its dependencies. To get it, you can either install the full `Anaconda `_ Python distribution or just the standalone `conda `_ package manager. With `conda`, you can install pre-compiled `pairtools` and all of its dependencies from the `bioconda `_ channel: .. code-block:: bash $ conda install -c conda-forge -c bioconda pairtools Install using pip ----------------- Alternatively, compile and install `pairtools` and its Python dependencies from PyPI using pip: .. code-block:: bash $ pip install pairtools Install the development version ------------------------------- Finally, you can install the latest development version of `pairtools` from github. First, make a local clone of the github repository: .. code-block:: bash $ git clone https://github.com/open2c/pairtools Then, you can compile and install `pairtools` in `the development mode `_, which installs the package without moving it to a system folder and thus allows immediate live-testing any changes in the python code. Please, make sure that you have `cython` installed! .. code-block:: bash $ cd pairtools $ pip install -e ./ pairtools-1.0.3/doc/parsing.rst000066400000000000000000000350171452673171500165120ustar00rootroot00000000000000Parsing sequence alignments into Hi-C pairs =========================================== Overview -------- Hi-C experiments aim to measure the frequencies of contacts between all pairs of loci in the genome. In these experiments, the spacial structure of chromosomes is first fixed with formaldehyde crosslinks, after which DNA is partially digested with restriction enzymes and then re-ligated back. Then, DNA is shredded into smaller pieces, released from the nucleus, sequenced and aligned to the reference genome. The resulting sequence alignments reveal if DNA molecules were formed through ligations between DNA from different locations in the genome. These ligation events imply that ligated loci were close to each other when the ligation enzyme was active, i.e. they formed "a contact". ``pairtools parse`` detects ligation events in the aligned sequences of DNA molecules formed in Hi-C experiments and reports them in the .pairs/.pairsam format. Terminology ----------- Throughout this document we will be using the same visual language to describe how DNA sequences (in the .fastq format) are transformed into sequence alignments (.sam/.bam) and into ligation events (.pairs). .. figure:: _static/terminology.png :scale: 50 % :alt: The visual language to describe transformation of Hi-C data :align: center DNA sequences (reads) are aligned to the reference genome and converted into ligation events Short-read sequencing determines the sequences of the both ends (or, **sides**) of DNA molecules (typically 50-300 bp), producing **read pairs** in .fastq format (shown in the first row on the figure above). In such reads, base pairs are reported from the tips inwards, which is also defined as the **5'->3'** direction (in accordance of the 5'->3' direction of the DNA strand that sequence of the corresponding side of the read). Alignment software maps both reads of a pair to the reference genome, producing **alignments**, i.e. segments of the reference genome with matching sequences. Typically, if the read length is not very large (< 150 bp), there will be only two alignments per read pair, one on each side. But, sometimes, the parts of one or both sides may map to different locations on the genome, producing more than two alignments per DNA molecule (see :ref:`section-walks`). ``pairtools parse`` converts alignments into **ligation events** (aka **Hi-C pairs** aka **pairs**). In the simplest case, when each side has only one unique alignment (i.e. the whole side maps to a single unique segment of the genome), for each side, we report the chromosome, the genomic position of the outer-most (5') aligned base pair and the strand of the reference genome that the read aligns to. ``pairtools parse`` assigns to such pairs the type ``UU`` (unique-unique). Unmapped/multimapped reads -------------------------- Sometimes, one side or both sides of a read pair may not align to the reference genome: .. figure:: _static/read_pair_NU_NN.png :scale: 50 % :alt: Read pairs missing an alignment on one or both sides :align: center Read pairs missing an alignment on one or both sides In this case, ``pairtools parse`` fills in the chromosome of the corresponding side of Hi-C pair with ``!``, the position with ``0`` and the strand with ``-``. Such pairs are reported as type ``NU`` (null-unique, when the other side has a unique alignment) or ``NN`` (null-null, when both sides lack any alignment). Similarly, when one or both sides map to many genome locations equally well (i.e. have non-unique, or, multi-mapping alignments), ``pairtools parse`` reports the corresponding sides as (chromosome= ``!``, position= ``0``, strand= ``-``) and type ``MU`` (multi-unique) or ``MM`` (multi-multi) or ``NM`` (null-multi), depending on the type of the alignment on the other side. .. figure:: _static/read_pair_MU_MM_NM.png :scale: 50 % :alt: Read pairs with a non-unique alignment on one or both sides :align: center Read pairs with a non-unique (multi-) alignment on one side ``pairtools parse`` calls an alignment to be multi-mapping when its `MAPQ score `_ (which depends on the scoring gap between the two best candidate alignments for a segment) is equal or greater than the value specied with the ``--min-mapq`` flag (by default, 1). .. _section-walks: Multiple ligations (walks) -------------------------- If the read is long enough (e.g. larger than 150 bp), it may contain more than two alignments: .. figure:: _static/read_pair_WW.png :scale: 50 % :alt: A sequenced Hi-C molecule that was formed via multiple ligations :align: center A sequenced Hi-C molecule that was formed via multiple ligations Molecules like these typically form via multiple ligation events and we call them walks [1]_. The mode of walks reporting is controlled by ``--walks-policy`` parameter of ``pairtools parse``. You can report all the alignments in the reads by using ``pairtools parse2`` (see :ref:`parse2`). A pair of sequential alignments on a single read is **ligation junction**. Ligation junctions are the Hi-C contacts that have been directly observed in the experiment. However, traditional Hi-C pairs do not have direct evidence of ligation because they arise from read pairs that do not necessarily contain ligation junction. To filter out the molecules with complex walks, ``--walks-policy`` can be set to: - ``mask`` to tag these molecules as type ``WW`` (single ligations are rescued, see :ref:`Rescuing single ligations`), - ``5any`` to report the 5'-most alignment on each side, - ``5unique`` to report the 5'-most unique alignment on each side, - ``3any`` to report the 3'-most alignment on each side, - ``3unique`` to report the 3'-most unique alignment on each side, - ``all`` to report all sequential alignments (complex ligations are rescued, see :ref:`Rescuing complex walks`). Parse modes for walks: .. figure:: _static/rescue_modes.svg :width: 60 % :alt: Parse modes for walks :align: center Rescuing single ligations ------------------------- Importantly, some of DNA molecules containing only one ligation junction may still end up with three alignments: .. figure:: _static/read_pair_UR.png :scale: 50 % :alt: Not all read pairs with three alignments come from "walks" :align: center Not all read pairs with three alignments come from "walks" A molecule formed via a single ligation gets three alignments when one of the two ligated DNA pieces is shorter than the read length, such that that read on the corresponding side sequences through the ligation junction and into the other piece [2]_. The amount of such molecules depends on the type of the restriction enzyme, the typical size of DNA molecules in the Hi-C library and the read length, and sometimes can be considerable. ``pairtools parse`` detects such molecules and **rescues** them (i.e. changes their type from a *walk* to a single-ligation molecule). It tests walks with three aligments using three criteria: .. figure:: _static/read_pair_UR_criteria.png :scale: 50 % :alt: The three criteria used for "rescue" :align: center The three criteria used to "rescue" three-alignment walks: cis, point towards each other, short distance 1. On the side with two alignments (the **chimeric** side), the "inner" (or, 3') alignment must be on the same chromosome as the alignment on the non-chimeric side. 2. The "inner" alignment on the chimeric side and the alignment on the non-chimeric side must point toward each other. 3. These two alignments must be within the distance specified with the ``--max-molecule-size`` flag (by default, 2000bp). Sometimes, the "inner" alignment on the chimeric side can be non-unique or "null" (i.e. when the unmapped segment is longer than ``--max-inter-align-gap``, as described in :ref:`Interpreting gaps between alignments`). ``pairtools parse`` ignores such alignments altogether and thus rescues such *walks* as well. .. figure:: _static/read_pair_UR_MorN.png :scale: 50 % :alt: A walk with three alignments get rescued, when the middle alignment is multi- or null :align: center A walk with three alignments get rescued, when the middle alignment is multi- or null. Interpreting gaps between alignments ------------------------------------ Reads that are only partially aligned to the genome can be interpreted in two different ways. One possibility is to assume that this molecule was formed via at least two ligations (i.e. it's a *walk*) but the non-aligned part (a **gap**) was missing from the reference genome for one reason or another. Another possibility is to simply ignore this gap (for example, because it could be an insertion or a technical artifact), thus assuming that our molecule was formed via a single ligation and has to be reported: .. figure:: _static/read_pair_gaps_vs_null_alignment.png :scale: 50 % :alt: A gap between alignments can be ignored or interpeted as a "null" alignment :align: center A gap between alignments can interpeted as a legitimate segment without an alignment or simply ignored Both options have their merits, depending on a dataset, quality of the reference genome and sequencing. ``pairtools parse`` ignores shorter *gaps* and keeps longer ones as "null" alignments. The maximal size of ignored *gaps* is set by the ``--max-inter-align-gap`` flag (by default, 20bp). Rescuing complex walks ------------------------- We call the multi-fragment DNA molecule that is formed during Hi-C (or any other chromosome capture with sequencing) a walk. If the reads are long enough, the right (reverse) read might read through the left (forward) read. Thus, left read might span multiple ligation junctions of the right read. The pairs of contacts that overlap between left and right reads are intermolecular duplicates that should be removed. If the walk has no more than two different fragments at one side of the read, this can be rescued with simple ``pairtools parse --walks-policy mask``. However, in complex walks (two fragments on both reads or more than two fragments on any side) you need specialized functionality that will report all the deduplicated pairs in the complex walks. This is especially relevant if you have the reads length > 100 bp, since more than 20% or all restriction fragments in the genome are then shorter than the read length. We put together some statistics about number of short restriction fragments for DpnII enzyme: ======== ================= ================== ================== ================== ================== Genome #rfrags <50 bp <100 bp <150 bp <175 bp <200 bp -------- ----------------- ------------------ ------------------ ------------------ ------------------ hg38 828538 (11.5%) 1452918 (20.2%) 2121479 (29.5%) 2587250 (35.9%) 2992757 (41.6%) mm10 863614 (12.9%) 1554461 (23.3%) 2236609 (33.5%) 2526150 (37.9%) 2780769 (41.7%) dm3 65327 (19.6%) 108370 (32.5%) 142662 (42.8%) 156886 (47.1%) 169339 (50.9%) ======== ================= ================== ================== ================== ================== Consider the read with overlapping left and right sides: .. figure:: _static/rescue_modes_readthrough.svg :width: 60 % :alt: Complex walk with overlap :align: center ``pairtools`` can detect such molecules and parse them. Briefly, we detects all the unique ligation junctions, and do not report the same junction as a pair multiple times. To parse complex walks, you may use ``pairtools parse --walks-policy all`` and ``parse2``, which have slightly different functionalities. ``pairtools parse --walks-policy all`` is used with regular paired-end Hi-C, when you want all pairs in the walk to be reported as if they appeared in the sequencing data independently. ``parse2`` is used with single-end data or when you want to customize your reporting (orientation, position of alignments, or perform combinatorial expansion). For example, ``parse2`` defaults to reporting ligation junctions instead of outer ends of the alignments. The complete guide through the reporting options of ``parse2``, orientation: .. figure:: _static/report-orientation.svg :width: 60 % :alt: parse2 --report-orientation :align: center position: .. figure:: _static/report-positions.svg :width: 60 % :alt: parse2 --report-position :align: center Sometimes it is important to restore the sequence of ligation events (e.g., for MC-3C data). For that, you can add special columns ``walk_pair_index`` and ``walk_pair_type`` by setting ``--add-pair-index`` option of ``parse2``, that will keep the order and type of pair in the whole walk in the output .pairs file. - ``walk_pair_index`` contains information on the order of the pair in the complex walk, starting from 5'-end of left read - ``walk_pair_type`` describes the type of the pair relative to R1 and R2 reads of paired-end sequencing: - "R1-2" - unconfirmed pair, right and left alignments in the pair originate from different reads (left or right). This might be indirect ligation (mediated by other DNA fragments). - "R1" - pair originates from the left read. This is direct ligation. - "R2" - pair originated from the right read. Direct ligation. - "R1&2" - pair was sequenced at both left and right read. Direct ligation. With this information, the whole sequence of ligation events can be restored from the .pair file. Combinatorial expansion is a way to increase the number of contacts in you data, which assumes that all DNA fragments in the same molecule (read) are in contact. Use ``--expand`` parameter for combinatorial expansion. Note that expanded pairs have modified pair type, "E{separation}_{pair type}", e.g.: - "E1_R1" is a pair obtained by combining left alignment of some pair in R1 read and right alignment of the next pair in R1 sequence of the same read. - "E2_R1" is a pair obtained by combining left alignment of some pair in R1 read and right alignment of the pair separated by 2 alignments in R1 sequence of the same read. - "E2_R1&2" as above, both source pairs were sequenced on both R1 and R2. - "E4_R1-2" is a pair obtained by combining left alignment of some pair in R1 read and right alignment of some pair in R1 sequence, separated by at least 4 alignments in between. Note that "-" in the pair type means that pair is separated by unsequenced gap, which may contain other pairs. .. [1] Following the lead of `C-walks `_ .. [2] This procedure was first introduced in `HiC-Pro `_ and the in `Juicer `_ . pairtools-1.0.3/doc/quickstart.rst000066400000000000000000000024131452673171500172330ustar00rootroot00000000000000Quickstart ========== Install `pairtools` and all of its dependencies using the `conda `_ package manager and the `bioconda `_ channel for bioinformatics software. .. code-block:: bash $ conda install -c conda-forge -c bioconda pairtools Setup a new test folder and download a small Hi-C dataset mapped to sacCer3 genome: .. code-block:: bash $ mkdir /tmp/test-pairtools $ cd /tmp/test-pairtools $ wget https://github.com/open2c/distiller-test-data/raw/master/bam/MATalpha_R1.bam Additionally, we will need a .chromsizes file, a TAB-separated plain text table describing the names, sizes and the order of chromosomes in the genome assembly used during mapping: .. code-block:: bash $ wget https://raw.githubusercontent.com/open2c/distiller-test-data/master/genome/sacCer3.reduced.chrom.sizes With `pairtools parse`, we can convert paired-end sequence alignments stored in .sam/.bam format into .pairs, a TAB-separated table of Hi-C ligation junctions: .. code-block:: bash $ pairtools parse -c sacCer3.reduced.chrom.sizes -o MATalpha_R1.pairs.gz --drop-sam MATalpha_R1.bam Inspect the resulting table: .. code-block:: bash $ less MATalpha_R1.pairs.gz pairtools-1.0.3/doc/sorting.rst000066400000000000000000000076131452673171500165350ustar00rootroot00000000000000Sorting pairs ============= In order to enable efficient random access to Hi-C pairs, we **flip** and **sort** pairs. After sorting, interactions become arranged in the order of their genomic position, such that, for any given pair of regions, we easily find and extract all of their interactions. And, after flipping, all artificially duplicated molecules (either during PCR or in optical sequencing) end up in adjacent rows in sorted lists of interactions, such that we can easily identify and remove them. Sorting ------- ``pairtools sort`` arrange pairs in the order of (chrom1, chrom2, pos1, pos2). This order is also known as *block sorting*, because all pairs between any given pair of chromosomes become grouped into one continuous block. Additionally, ``pairtools sort`` also sorts pairs with identical positions by `pair_type`. This does not really do much for mapped reads, but it nicely splits unmapped reads into blocks of null-mapped and multi-mapped reads. We note that there is an alternative to block sorting, called *row sorting*, where pairs are sorted by (chrom1, pos1, chrom2, pos2). In `pairtools sort`, we prefer block-sorting since it cleanly separates cis interactions from trans ones and thus is a more optimal solution for typical use cases. Flipping -------- In a typical paired-end experiment, *side1* and *side2* of a DNA molecule are defined by the order in which they got sequenced. Since this order is essentially random, any given Hi-C pair, e.g. (chr1, 1.1Mb; chr2, 2.1Mb), may appear in a reversed orientation, i.e. (chr2, 2.1Mb; chr1, 1.1Mb). If we were to preserve this order of sides, interactions between same loci would appear in two different locations of the sorted pair list, which would complicate finding PCR/optical duplicates. To ensure that Hi-C pairs with similar coordinates end up in the same location of the sorted list, we **flip** pairs, i.e. we choose *side1* as the side with the lowest genomic coordinate. Thus, after flipping, for *trans* pairs (chrom1!=chrom2), order(chrom1)`_), - number of contacts between all chromosome pairs - **Summary statistics** include: - fraction of duplicates - fraction of cis interactions (at different minimal distance cutoffs) out of total - estimation of library complexity Summary statistics can inform you about the quality of the data. For example, more trans interactions can be a sign of problems with the 3C+ procedure and lower signal-to-noise ratio. Substantial mapping to mitochondrial chromosome (chrM) might be a sign of random ligation. - **P(s), or scaling.** The dependence of contact frequency on the genomic distance referred to as the P(s) curve or scaling, which is a rich source of both biologically relevant information and technical quality of 3C+ experiments. The shape of P(s) is often used to characterize mechanisms of genome folding and reveal issues with QC. Interactive visualization of stats with MultiQC --------- Install `multiqc`: .. code-block:: bash pip install --upgrade --force-reinstall git+https://github.com/open2c/MultiQC.git Note that (for now) the pairtools module for MultiQC is only available in the open2C fork and not in the main MultiQC repository. Run MultiQC in a folder with one or multiple .stats files: .. code-block:: bash multiqc . This will produce a nice .html file with interactive graphical summaries of the stats. Estimating library complexity ---------------------------- Pairtools assumes that each sequencing read is randomly chosen with replacement from a finite pool of fragments in DNA library [1]_ [2]_. With each new sequenced molecule, the expected number of observed unique molecules increases according to a simple equation: $$ U(N+1) = U(N) + (1 - {U(N) \\over C}), $$ where $N$ is the number of sequenced molecules, $U(N)$ is the expected number of observed unique molecules after sequencing $N$ molecules, and C is the library complexity. This differential equation yields [1, 2]: $$ {U(N) \\over C} = 1 - exp( - {N \\over C}), $$ which can be solved as $$ C = \Re(lambert W( - { \exp( - {1 \\over u} ) \\over u} ) ) + {1 \\over u} $$ Library complexity can guide in the choice of sequencing depth of the library and provide an estimate of library quality. Illumina sequencing duplicates ----------------- Importantly, you can estimate the complexity of Hi-C libraries using only small QC samples to decide if their quality permits deeper sequencing [3]_. These estimates, however, can be significantly biased by the presence of “optical” or “clustering” duplicates. Such duplicates occur as artefacts of the sequencing procedure. Optical duplicates appear in data generated on sequencers with non-patterned flowcells in cases the instrument either erroneously splits a signal from a single sequenced molecule into two. On the other hand, clustering duplicates appear on patterned flowcells, when during cluster generation a cluster occupies adjacent nanowells. [4]_. The rate of optical and clustering duplication depends on the technology and the operating conditions (e.g. molarity of the library loaded onto the flowcell), but not on the library complexity or sequencing depth. Thus, in small sequencing samples in particular the clustering duplication on recent Illumina instruments can severely inflate the observed levels of duplication [5]_, resulting in underestimation of the library complexity. While the frequency of PCR duplicates increases with sequencing depth, optical or clustering duplication levels may stay constant for a particular sequencer, provided the library is loaded at the same molarity. This means that the high frequency of clustering duplicates on the NovaSeq leads to severe underestimation of library complexity in the pilot runs. In particular, the recent models of Illumina sequencers with patterned flowcells (such as NovaSeq) suffer from increased clustering duplication rate, which may far exceed the level of PCR duplication. Luckily, optical and clustering duplicates can be distinguished from the PCR ones, as the former are located next to each other on the sequencing flow cell. In case of Illumina sequencers, pairtools dedup can infer the positions of sequencing reads from their IDs and focuses on geometrically distant duplicates to produce unbiased estimates of PCR duplication and library complexity. Although SRA does not store original read IDs from the sequencer, this analysis is possible when pairtools is run on a dataset with original Illumina-generated read IDs. Note that in our experience even when accounting for optical/clustering duplicates, the complexity can be greatly underestimated, but is still a useful measurement to choose the most complex libraries. .. [1] Picard. http://broadinstitute.github.io/picard/ .. [2] Thread: [Samtools-help] Pickard estimate for the size of a library - wrong or non-transparent? https://sourceforge.net/p/samtools/mailman/samtools-help/thread/DUB405-EAS154589A1ACEF2BE4C573D4592180@phx.gbl/ .. [3] Rao, S. S. P. et al. A 3D map of the human genome at kilobase resolution reveals principles of chromatin looping. Cell 159, 1665–1680 (2014). .. [4] Duplicates on Illumina. BioStars. https://www.biostars.org/p/229842/ .. [5] Illumina Patterned Flow Cells Generate Duplicated Sequences. https://sequencing.qcfail.com/articles/illumina-patterned-flow-cells-generate-duplicated-sequences/pairtools-1.0.3/doc/technotes.rst000066400000000000000000000110761452673171500170420ustar00rootroot00000000000000Technical notes =============== Designing scientific software and formats requires making a multitude of tantalizing technical decisions and compromises. Often, the reasons behind a certain decision are non-trivial and convoluted, involving many factors. Here, we collect the notes and observations made during the desing stage of `pairtools` and provide a justification for most non-trivial decisions. We hope that this document will elucidate the design of `pairtools` and may prove useful to developers in their future projects. .pairs format ------------- The motivation behind some of the technical decisions in the pairtools' flavor of .pairs/.pairsam: - `pairtools` can store SAM entries together with the Hi-C pair information in .pairsam files. Storing pairs and alignments in the same row enables easy tagging and filtering of paired-end alignments based on their Hi-C information. - `pairtools` use the exclamation mark "!" instead of '.' as 'chrom' of unmapped reads because it has the lowest lexicographic sorting order among all characters. The use of '0' and '-' in the 'pos' and 'strand' fields of unmapped reads allows us to keep the types of these fields as 'unsigned int' and enum{'+','-'}, respectively. - "rescued" pairs have two types "UR" and "RU" instead of just "RU". We chose this design because rescued pairs are two-sided and thus are flipped based on (chrom, pos), and not based on the side types. With two pair types "RU" and "UR", `pairtools` can keep track of which side of the pair was rescued. - in "rescued" pairs, the type "R" is assigned to the non-chimeric side. This may seem counter-intuitive at first, since it is the chimeric side that gets rescued, but this way `pairtools` can keep track of the type of the 5' alignment on the chimeric side (the alignment on the non-chimeric side has to be unique for the pair to be rescued). - `pairtools` rely on a text format, .pairs, instead of hdf5/parquet-based tables or custom binaries. We went with a text format for a few reasons: - text tables enable easy access to data from any language and any tool. This is especially important at the level of Hi-C pairs, the "rawest" format of information from a Hi-C experiment. - hdf5 and parquet have a few shortcomings that hinder their immediate use in `pairtools`. Specifically, hdf5 cannot compress variable-length strings (which are, in turn, required to store sam alignments and some optional information on pairs) and parquet cannot append columns to existing files, modify datasets in place or store multiple tables in one file (which is required to keep table indices in the same file with pairs). - text tables have a set of well-developed and highly-optimized tools for sorting (Unix sort), compression (bgzip/lz4) and random access (tabix). - text formats enable easy streaming between individual command-line tools. Having said that, text formats have many downsides - they are bulky when not compressed, compression and parsing requires extra computational resources, they cannot be modified in place and random access requires extra tools. In the future, we plan to develop a binary format based on existing container formats, which would mitigate these downsides. CLI --- - many `pairtools` perform multiple actions at once, which contradicts the "do one thing" philosophy of Unix command line. We packed multiple (albeit, related) functions into one tool to improve the performance of `pairtools`. Specifically, given the large size of Hi-C data, a significant fraction of time is spent on compression/decompression, parsing, loading data into memory and sending it over network (for cloud/clusters). Packing multiple functions into one tool cuts down the amount of such time consuming operations. - ``pairtools parse`` requires a .chromsizes file to know the order of chromosomes and perform pair flipping. - `pairtools` use `bgzip `_ compression by default instead of gzip. Using `bgzip` allows us to create an index with `pairix `_ and get random access to data. - `paritools` have an option to compress outputs with `lz4 `_. `Lz4 is much faster and only slighly less efficient than gzip `_. This makes lz4 a better choice for passing data between individual pairtools before producing final result (which, in turn, requires bgzip compression). pairtools-1.0.3/pairtools/000077500000000000000000000000001452673171500155565ustar00rootroot00000000000000pairtools-1.0.3/pairtools/__init__.py000066400000000000000000000002601452673171500176650ustar00rootroot00000000000000""" pairtools ~~~~~~~~~ CLI tools to process mapped Hi-C data :copyright: (c) 2017-2023 Open2C :author: Open2C :license: MIT """ __version__ = "1.0.3" # from . import lib pairtools-1.0.3/pairtools/__main__.py000066400000000000000000000000731452673171500176500ustar00rootroot00000000000000from .cli import cli if __name__ == "__main__": cli() pairtools-1.0.3/pairtools/_logging.py000066400000000000000000000006221452673171500177150ustar00rootroot00000000000000import logging _loggers = {} def get_logger(name="pairtools"): # Based on ipython traitlets global _loggers if name not in _loggers: _loggers[name] = logging.getLogger(name) # Add a NullHandler to silence warnings about not being # initialized, per best practice for libraries. _loggers[name].addHandler(logging.NullHandler()) return _loggers[name] pairtools-1.0.3/pairtools/cli/000077500000000000000000000000001452673171500163255ustar00rootroot00000000000000pairtools-1.0.3/pairtools/cli/__init__.py000066400000000000000000000125201452673171500204360ustar00rootroot00000000000000# -*- coding: utf-8 -*- import click import functools import sys from .. import __version__ import logging from .._logging import get_logger CONTEXT_SETTINGS = { "help_option_names": ["-h", "--help"], } @click.version_option(version=__version__) @click.group(context_settings=CONTEXT_SETTINGS) @click.option( "--post-mortem", help="Post mortem debugging", is_flag=True, default=False ) @click.option( "--output-profile", help="Profile performance with Python cProfile and dump the statistics " "into a binary file", type=str, default="", ) @click.option("-v", "--verbose", help="Verbose logging.", count=True) @click.option( "-d", "--debug", help="On error, drop into the post-mortem debugger shell.", is_flag=True, default=False, ) def cli(post_mortem, output_profile, verbose, debug): """Flexible tools for Hi-C data processing. All pairtools have a few common options, which should be typed _before_ the command name. """ if post_mortem: import traceback try: import ipdb as pdb except ImportError: import pdb def _excepthook(exc_type, value, tb): traceback.print_exception(exc_type, value, tb) print() pdb.pm() sys.excepthook = _excepthook if output_profile: import cProfile import atexit pr = cProfile.Profile() pr.enable() def _atexit_profile_hook(): pr.disable() pr.dump_stats(output_profile) atexit.register(_atexit_profile_hook) # Initialize logging to stderr logging.basicConfig(stream=sys.stderr) logging.captureWarnings(True) root_logger = get_logger() # Set verbosity level if verbose > 0: root_logger.setLevel(logging.DEBUG) if verbose > 1: # pragma: no cover try: import psutil import atexit @atexit.register def process_dump_at_exit(): process_attrs = [ "cmdline", # 'connections', "cpu_affinity", "cpu_num", "cpu_percent", "cpu_times", "create_time", "cwd", # 'environ', "exe", # 'gids', "io_counters", "ionice", "memory_full_info", # 'memory_info', # 'memory_maps', "memory_percent", "name", "nice", "num_ctx_switches", "num_fds", "num_threads", "open_files", "pid", "ppid", "status", "terminal", "threads", # 'uids', "username", ] p = psutil.Process() info_ = p.as_dict(process_attrs, ad_value="") for key in process_attrs: root_logger.debug("PSINFO:'{}': {}".format(key, info_[key])) except ImportError: root_logger.warning("Install psutil to see process information.") else: root_logger.setLevel(logging.INFO) # Set hook for postmortem debugging if debug: # pragma: no cover import traceback try: import ipdb as pdb except ImportError: import pdb def _excepthook(exc_type, value, tb): traceback.print_exception(exc_type, value, tb) print() pdb.pm() sys.excepthook = _excepthook def common_io_options(func): @click.option( "--nproc-in", type=int, default=3, show_default=True, help="Number of processes used by the auto-guessed input decompressing command.", ) @click.option( "--nproc-out", type=int, default=8, show_default=True, help="Number of processes used by the auto-guessed output compressing command.", ) @click.option( "--cmd-in", type=str, default=None, help="A command to decompress the input file. " "If provided, fully overrides the auto-guessed command. " "Does not work with stdin and pairtools parse. " "Must read input from stdin and print output into stdout. " "EXAMPLE: pbgzip -dc -n 3", ) @click.option( "--cmd-out", type=str, default=None, help="A command to compress the output file. " "If provided, fully overrides the auto-guessed command. " "Does not work with stdout. " "Must read input from stdin and print output into stdout. " "EXAMPLE: pbgzip -c -n 8", ) @functools.wraps(func) def wrapper(*args, **kwargs): return func(*args, **kwargs) return wrapper from . import ( dedup, sort, flip, merge, markasdup, select, split, restrict, phase, parse, parse2, stats, sample, filterbycov, header, scaling, ) pairtools-1.0.3/pairtools/cli/dedup.py000066400000000000000000000441011452673171500200000ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import sys import ast import pathlib from .._logging import get_logger logger = get_logger() from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options import click from ..lib.dedup import streaming_dedup, streaming_dedup_cython from ..lib.stats import PairCounter UTIL_NAME = "pairtools_dedup" @cli.command() @click.argument("pairs_path", type=str, required=False) ### Output files: @click.option( "-o", "--output", type=str, default="", help="output file for pairs after duplicate removal." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, the output is printed into stdout.", ) @click.option( "--output-dups", type=str, default="", help="output file for duplicated pairs. " " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " If the path is the same as in --output or -, output duplicates together " " with deduped pairs. By default, duplicates are dropped.", ) @click.option( "--output-unmapped", type=str, default="", help="output file for unmapped pairs. " "If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed. " "If the path is the same as in --output or -, output unmapped pairs together " "with deduped pairs. If the path is the same as --output-dups, output " "unmapped reads together with dups. By default, unmapped pairs are dropped.", ) @click.option( "--output-stats", type=str, default="", help="output file for duplicate statistics." " If file exists, it will be open in the append mode." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, statistics are not printed.", ) @click.option( "--output-bytile-stats", type=str, default="", help="output file for duplicate statistics." " Note that the readID should be provided and contain tile information for this option. " " This analysis is possible when pairtools is run on a dataset with original Illumina-generated read IDs, " " because SRA does not store original read IDs from the sequencer. " " By default, by-tile duplicate statistics are not printed. " " If file exists, it will be open in the append mode. " " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed.", ) ### Set the dedup method: @click.option( "--max-mismatch", type=int, default=3, show_default=True, help="Pairs with both sides mapped within this distance (bp) from each " "other are considered duplicates. [dedup option]", ) @click.option( "--method", type=click.Choice(["max", "sum"]), default="max", help="define the mismatch as either the max or the sum of the mismatches of" "the genomic locations of the both sides of the two compared molecules. [dedup option]", show_default=True, ) @click.option( "--backend", type=click.Choice(["scipy", "sklearn", "cython"]), default="scipy", help="What backend to use: scipy and sklearn are based on KD-trees," " cython is online indexed list-based algorithm." " With cython backend, duplication is not transitive with non-zero max mismatch " " (e.g. pairs A and B are duplicates, and B and C are duplicates, then A and C are " " not necessary duplicates of each other), while with scipy and sklearn it's " " transitive (i.e. A and C are necessarily duplicates)." " Cython is the original version used in pairtools since its beginning." " It is available for backwards compatibility and to allow specification of the" " column order." " Now the default scipy backend is generally the fastest, and with chunksize below" " 1 mln has the lowest memory requirements. [dedup option]" # " 'cython' is deprecated and provided for backwards compatibility", ) ### Scipy and sklearn-specific options: @click.option( "--chunksize", type=int, default=10_000, show_default=True, help="Number of pairs in each chunk. Reduce for lower memory footprint." " Below 10,000 performance starts suffering significantly and the algorithm might" " miss a few duplicates with non-zero --max-mismatch." " Only works with '--backend scipy or sklearn'. [dedup option]", ) @click.option( "--carryover", type=int, default=100, show_default=True, help="Number of deduped pairs to carry over from previous chunk to the new chunk" " to avoid breaking duplicate clusters." " Only works with '--backend scipy or sklearn'. [dedup option]", ) @click.option( "-p", "--n-proc", type=int, default=1, help="Number of cores to use. Only applies with sklearn backend." "Still needs testing whether it is ever useful. [dedup option]", ) ### Output options: @click.option( "--mark-dups", is_flag=True, help='If specified, duplicate pairs are marked as DD in "pair_type" and ' "as a duplicate in the sam entries. [output format option]", ) @click.option( "--keep-parent-id", is_flag=True, help="If specified, duplicate pairs are marked with the readID of the retained" " deduped read in the 'parent_readID' field. [output format option]", ) @click.option( "--extra-col-pair", nargs=2, # type=click.Tuple([str, str]), multiple=True, help="Extra columns that also must match for two pairs to be marked as " "duplicates. Can be either provided as 0-based column indices or as column " 'names (requires the "#columns" header field). The option can be provided ' "multiple times if multiple column pairs must match. " 'Example: --extra-col-pair "phase1" "phase2". [output format option]', ) ### Input options: @click.option( "--sep", type=str, default=pairsam_format.PAIRSAM_SEP_ESCAPE, help=r"Separator (\t, \v, etc. characters are " "supported, pass them in quotes). [input format option]", ) @click.option( "--send-header-to", type=click.Choice(["dups", "dedup", "both", "none"]), default="both", help="Which of the outputs should receive header and comment lines. [input format option]", ) @click.option( "--c1", type=int, default=pairsam_format.COL_C1, help=f"Chrom 1 column; default {pairsam_format.COL_C1}" " Only works with '--backend cython'. [input format option]", ) @click.option( "--c2", type=int, default=pairsam_format.COL_C2, help=f"Chrom 2 column; default {pairsam_format.COL_C2}" " Only works with '--backend cython'. [input format option]", ) @click.option( "--p1", type=int, default=pairsam_format.COL_P1, help=f"Position 1 column; default {pairsam_format.COL_P1}" " Only works with '--backend cython'. [input format option]", ) @click.option( "--p2", type=int, default=pairsam_format.COL_P2, help=f"Position 2 column; default {pairsam_format.COL_P2}" " Only works with '--backend cython'. [input format option]", ) @click.option( "--s1", type=int, default=pairsam_format.COL_S1, help=f"Strand 1 column; default {pairsam_format.COL_S1}" " Only works with '--backend cython'. [input format option]", ) @click.option( "--s2", type=int, default=pairsam_format.COL_S2, help=f"Strand 2 column; default {pairsam_format.COL_S2}" " Only works with '--backend cython'. [input format option]", ) @click.option( "--unmapped-chrom", type=str, default=pairsam_format.UNMAPPED_CHROM, help="Placeholder for a chromosome on an unmapped side; default {}".format( pairsam_format.UNMAPPED_CHROM ), ) # Output stats option @click.option( "--yaml/--no-yaml", is_flag=True, default=False, help="Output stats in yaml format instead of table. [output stats format option]", ) # Filtering options for reporting stats: @click.option( "--filter", default=None, required=False, multiple=True, help="Filter stats with condition to apply to the data (similar to `pairtools select` or `pairtools stats`). " "For non-YAML output only the first filter will be reported. [output stats filtering option] " "Note that this will not change the deduplicated output pairs. " """Example: pairtools dedup --yaml --filter 'unique:(pair_type=="UU")' --filter 'close:(pair_type=="UU") and (abs(pos1-pos2)<10)' --output-stats - test.pairs """, ) @click.option( "--engine", default="pandas", required=False, help="Engine for regular expression parsing for stats filtering. " "Python will provide you regex functionality, while pandas does not accept " "custom funtctions and works faster. [output stats filtering option]", ) @click.option( "--chrom-subset", type=str, default=None, required=False, help="A path to a chromosomes file (tab-separated, 1st column contains " "chromosome names) containing a chromosome subset of interest for stats filter. " "If provided, additionally filter pairs with both sides originating from " "the provided subset of chromosomes. This operation modifies the #chromosomes: " "and #chromsize: header fields accordingly. " "Note that this will not change the deduplicated output pairs. [output stats filtering option]", ) @click.option( "--startup-code", type=str, default=None, required=False, help="An auxiliary code to execute before filteringfor stats. " "Use to define functions that can be evaluated in the CONDITION statement. [output stats filtering option]", ) @click.option( "-t", "--type-cast", type=(str, str), default=(), multiple=True, help="Cast a given column to a given type for stats filtering. By default, only pos and mapq " "are cast to int, other columns are kept as str. Provide as " "-t , e.g. -t read_len1 int. Multiple entries are allowed. [output stats filtering option]", ) @common_io_options def dedup( pairs_path, output, output_dups, output_unmapped, output_stats, output_bytile_stats, chunksize, carryover, max_mismatch, method, sep, send_header_to, c1, c2, p1, p2, s1, s2, unmapped_chrom, mark_dups, extra_col_pair, keep_parent_id, backend, n_proc, **kwargs, ): """Find and remove PCR/optical duplicates. Find PCR/optical duplicates in an upper-triangular flipped sorted pairs/pairsam file. Allow for a +/-N bp mismatch at each side of duplicated molecules. PAIRS_PATH : input triu-flipped sorted .pairs or .pairsam file. If the path ends with .gz/.lz4, the input is decompressed by bgzip/lz4c. By default, the input is read from stdin. """ dedup_py( pairs_path, output, output_dups, output_unmapped, output_stats, output_bytile_stats, chunksize, carryover, max_mismatch, method, sep, send_header_to, c1, c2, p1, p2, s1, s2, unmapped_chrom, mark_dups, extra_col_pair, keep_parent_id, backend, n_proc, **kwargs, ) if __name__ == "__main__": dedup() def dedup_py( pairs_path, output, output_dups, output_unmapped, output_stats, output_bytile_stats, chunksize, carryover, max_mismatch, method, sep, send_header_to, c1, c2, p1, p2, s1, s2, unmapped_chrom, mark_dups, extra_col_pair, keep_parent_id, backend, n_proc, **kwargs, ): sep = ast.literal_eval('"""' + sep + '"""') send_header_to_dedup = send_header_to in ["both", "dedup"] send_header_to_dup = send_header_to in ["both", "dups"] instream = fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) out_stats_stream = fileio.auto_open( output_stats, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) bytile_dups = False if output_bytile_stats: out_bytile_stats_stream = fileio.auto_open( output_bytile_stats, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) bytile_dups = True if not keep_parent_id: logger.warning( "Force output --parent-readID because --output-bytile-stats provided." ) keep_parent_id = True # generate empty PairCounter if stats output is requested: if output_stats: filter = kwargs.get("filter", None) # Define filters and their properties first_filter_name = "no_filter" # default filter name for full output if filter is not None and len(filter) > 0: first_filter_name = filter[0].split(":", 1)[0] if len(filter) > 1 and not kwargs.get("yaml", False): logger.warn( f"Output the first filter only in non-YAML output: {first_filter_name}" ) filter = dict([f.split(":", 1) for f in filter]) else: filter = None out_stat = PairCounter( bytile_dups=bytile_dups, filters=filter, startup_code=kwargs.get("startup_code", ""), # for evaluation of filters type_cast=kwargs.get("type_cast", ()), # for evaluation of filters engine=kwargs.get("engine", "pandas"), ) else: out_stat = None if not output_dups: outstream_dups = None elif output_dups == "-" or ( pathlib.Path(output_dups).absolute() == pathlib.Path(output).absolute() ): outstream_dups = outstream else: outstream_dups = fileio.auto_open( output_dups, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if not output_unmapped: outstream_unmapped = None elif output_unmapped == "-" or ( pathlib.Path(output_unmapped).absolute() == pathlib.Path(output).absolute() ): outstream_unmapped = outstream elif ( pathlib.Path(output_unmapped).absolute() == pathlib.Path(output_dups).absolute() ): outstream_unmapped = outstream_dups else: outstream_unmapped = fileio.auto_open( output_unmapped, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) header, body_stream = headerops.get_header(instream) if not any([l.startswith("#sorted") for l in header]): logger.warning( "Pairs file appears not to be sorted, dedup might produce wrong results." ) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) if send_header_to_dedup: outstream.writelines((l + "\n" for l in header)) if send_header_to_dup and outstream_dups and (outstream_dups != outstream): dups_header = header if keep_parent_id and len(dups_header) > 0: dups_header = headerops.append_columns(dups_header, ["parent_readID"]) outstream_dups.writelines((l + "\n" for l in dups_header)) if ( outstream_unmapped and (outstream_unmapped != outstream) and (outstream_unmapped != outstream_dups) ): outstream_unmapped.writelines((l + "\n" for l in header)) column_names = headerops.extract_column_names(header) extra_cols1 = [] extra_cols2 = [] if extra_col_pair is not None: for col1, col2 in extra_col_pair: extra_cols1.append(column_names[col1] if col1.isdigit() else col1) extra_cols2.append(column_names[col2] if col2.isdigit() else col2) if backend == "cython": # warnings.warn( # "'cython' backend is deprecated and provided only" # " for backwards compatibility", # DeprecationWarning, # ) extra_cols1 = [column_names.index(col) for col in extra_cols1] extra_cols2 = [column_names.index(col) for col in extra_cols2] streaming_dedup_cython( method, max_mismatch, sep, c1, c2, p1, p2, s1, s2, extra_cols1, extra_cols2, unmapped_chrom, body_stream, outstream, outstream_dups, outstream_unmapped, out_stat, mark_dups, keep_parent_id, ) elif backend in ("scipy", "sklearn"): streaming_dedup( in_stream=body_stream, colnames=column_names, chunksize=chunksize, carryover=carryover, method=method, mark_dups=mark_dups, max_mismatch=max_mismatch, extra_col_pairs=list(extra_col_pair), keep_parent_id=keep_parent_id, unmapped_chrom=unmapped_chrom, outstream=outstream, outstream_dups=outstream_dups, outstream_unmapped=outstream_unmapped, out_stat=out_stat, backend=backend, n_proc=n_proc, ) else: raise ValueError("Unknown backend") # save statistics to a file if it was requested: if out_stat: out_stat.save( out_stats_stream, yaml=kwargs.get("yaml", False), # format as yaml filter=first_filter_name if not kwargs.get("yaml", False) else None, # output only the first filter if non-YAML output ) if bytile_dups: out_stat.save_bytile_dups(out_bytile_stats_stream) if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if outstream_dups and (outstream_dups != outstream): outstream_dups.close() if ( outstream_unmapped and (outstream_unmapped != outstream) and (outstream_unmapped != outstream_dups) ): outstream_unmapped.close() if out_stats_stream: out_stats_stream.close() pairtools-1.0.3/pairtools/cli/filterbycov.py000066400000000000000000000231021452673171500212250ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import sys import ast import warnings import pathlib import click from ..lib import fileio, pairsam_format, headerops, dedup from . import cli, common_io_options from ..lib.filterbycov import streaming_filterbycov from ..lib.stats import PairCounter UTIL_NAME = "pairtools_filterbycov" ###################################### ## TODO: - output stats after filtering ## edit/update mark as dup to mark as multi ################################### @cli.command() @click.argument("pairs_path", type=str, required=False) @click.option( "-o", "--output", type=str, default="", help="output file for pairs from low coverage regions." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, the output is printed into stdout.", ) @click.option( "--output-highcov", type=str, default="", help="output file for pairs from high coverage regions." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " If the path is the same as in --output or -, output duplicates together " " with deduped pairs. By default, duplicates are dropped.", ) @click.option( "--output-unmapped", type=str, default="", help="output file for unmapped pairs. " "If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed. " "If the path is the same as in --output or -, output unmapped pairs together " "with deduped pairs. If the path is the same as --output-highcov, " "output unmapped reads together. By default, unmapped pairs are dropped.", ) @click.option( "--output-stats", type=str, default="", help="output file for statistics of multiple interactors. " " If file exists, it will be open in the append mode." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, statistics are not printed.", ) @click.option( "--max-cov", type=int, default=8, help="The maximum allowed coverage per region." ) @click.option( "--max-dist", type=int, default=500, help="The resolution for calculating coverage. For each pair, the local " "coverage around each end is calculated as (1 + the number of neighbouring " "pairs within +/- max_dist bp) ", ) @click.option( "--method", type=click.Choice(["max", "sum"]), default="max", help="calculate the number of neighbouring pairs as either the sum or the max" " of the number of neighbours on the two sides", show_default=True, ) @click.option( "--sep", type=str, default=pairsam_format.PAIRSAM_SEP_ESCAPE, help=r"Separator (\t, \v, etc. characters are " "supported, pass them in quotes) ", ) @click.option( "--comment-char", type=str, default="#", help="The first character of comment lines" ) @click.option( "--send-header-to", type=click.Choice(["lowcov", "highcov", "both", "none"]), default="both", help="Which of the outputs should receive header and comment lines", ) @click.option( "--c1", type=int, default=pairsam_format.COL_C1, help="Chrom 1 column; default {}".format(pairsam_format.COL_C1), ) @click.option( "--c2", type=int, default=pairsam_format.COL_C2, help="Chrom 2 column; default {}".format(pairsam_format.COL_C2), ) @click.option( "--p1", type=int, default=pairsam_format.COL_P1, help="Position 1 column; default {}".format(pairsam_format.COL_P1), ) @click.option( "--p2", type=int, default=pairsam_format.COL_P2, help="Position 2 column; default {}".format(pairsam_format.COL_P2), ) @click.option( "--s1", type=int, default=pairsam_format.COL_S1, help="Strand 1 column; default {}".format(pairsam_format.COL_S1), ) @click.option( "--s2", type=int, default=pairsam_format.COL_S2, help="Strand 2 column; default {}".format(pairsam_format.COL_S2), ) @click.option( "--unmapped-chrom", type=str, default=pairsam_format.UNMAPPED_CHROM, help="Placeholder for a chromosome on an unmapped side; default {}".format( pairsam_format.UNMAPPED_CHROM ), ) @click.option( "--mark-multi", is_flag=True, help='If specified, duplicate pairs are marked as FF in "pair_type" and ' "as a duplicate in the sam entries.", ) @common_io_options def filterbycov( pairs_path, output, output_highcov, output_unmapped, output_stats, max_dist, max_cov, method, sep, comment_char, send_header_to, c1, c2, p1, p2, s1, s2, unmapped_chrom, mark_multi, **kwargs ): """Remove pairs from regions of high coverage. Find and remove pairs with >(MAX_COV-1) neighbouring pairs within a +/- MAX_DIST bp window around either side. Useful for single-cell Hi-C experiments, where coverage is naturally limited by the chromosome copy number. PAIRS_PATH : input triu-flipped sorted .pairs or .pairsam file. If the path ends with .gz/.lz4, the input is decompressed by bgzip/lz4c. By default, the input is read from stdin. """ filterbycov_py( pairs_path, output, output_highcov, output_unmapped, output_stats, max_dist, max_cov, method, sep, comment_char, send_header_to, c1, c2, p1, p2, s1, s2, unmapped_chrom, mark_multi, **kwargs ) def filterbycov_py( pairs_path, output, output_highcov, output_unmapped, output_stats, max_dist, max_cov, method, sep, comment_char, send_header_to, c1, c2, p1, p2, s1, s2, unmapped_chrom, mark_multi, **kwargs ): ## Prepare input, output streams based on selected outputs ## Default ouput stream is low-frequency interactors sep = ast.literal_eval('"""' + sep + '"""') send_header_to_lowcov = send_header_to in ["both", "lowcov"] send_header_to_highcov = send_header_to in ["both", "highcov"] instream = ( fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) if pairs_path else sys.stdin ) outstream = ( fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output else sys.stdout ) out_stats_stream = ( fileio.auto_open( output_stats, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output_stats else None ) # generate empty PairCounter if stats output is requested: out_stat = PairCounter() if output_stats else None # output the high-frequency interacting pairs if not output_highcov: outstream_high = None elif output_highcov == "-" or ( pathlib.Path(output_highcov).absolute() == pathlib.Path(output).absolute() ): outstream_high = outstream else: outstream_high = fileio.auto_open( output_highcov, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) # output unmapped pairs if not output_unmapped: outstream_unmapped = None elif output_unmapped == "-" or ( pathlib.Path(output_unmapped).absolute() == pathlib.Path(output).absolute() ): outstream_unmapped = outstream elif ( pathlib.Path(output_unmapped).absolute() == pathlib.Path(output_highcov).absolute() ): outstream_unmapped = outstream_high else: outstream_unmapped = fileio.auto_open( output_unmapped, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) # prepare file headers header, body_stream = headerops.get_header(instream) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) # header for low-frequency interactors if send_header_to_lowcov: outstream.writelines((l + "\n" for l in header)) # header for high-frequency interactors if send_header_to_highcov and outstream_high and (outstream_high != outstream): outstream_high.writelines((l + "\n" for l in header)) # header for unmapped pairs if ( outstream_unmapped and (outstream_unmapped != outstream) and (outstream_unmapped != outstream_high) ): outstream_unmapped.writelines((l + "\n" for l in header)) # perform filtering of pairs based on low/high-frequency of interaction streaming_filterbycov( method, max_dist, max_cov, sep, c1, c2, p1, p2, s1, s2, unmapped_chrom, body_stream, outstream, outstream_high, outstream_unmapped, out_stat, mark_multi, ) ## FINISHED! # save statistics to a file if it was requested: TO BE TESTED if out_stat: out_stat.save(out_stats_stream) if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if outstream_high and (outstream_high != outstream): outstream_high.close() if ( outstream_unmapped and (outstream_unmapped != outstream) and (outstream_unmapped != outstream_high) ): outstream_unmapped.close() if out_stats_stream: out_stats_stream.close() if __name__ == "__main__": filterbycov() pairtools-1.0.3/pairtools/cli/flip.py000066400000000000000000000110601452673171500176270ustar00rootroot00000000000000import sys import click from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options import warnings UTIL_NAME = "pairtools_flip" @cli.command() @click.argument("pairs_path", type=str, required=False) @click.option( "-c", "--chroms-path", type=str, required=True, help="Chromosome order used to flip interchromosomal mates: " "path to a chromosomes file (e.g. UCSC chrom.sizes or similar) whose " "first column lists scaffold names. Any scaffolds not listed will be " "ordered lexicographically following the names provided.", ) @click.option( "-o", "--output", type=str, default="", help="output file." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, the output is printed into stdout.", ) @common_io_options def flip(pairs_path, chroms_path, output, **kwargs): """Flip pairs to get an upper-triangular matrix. Change the order of side1 and side2 in pairs, such that (order(chrom1) < order(chrom2) or (order(chrom1) == order(chrom2)) and (pos1 <=pos2)) Equivalent to reflecting the lower triangle of a Hi-C matrix onto its upper triangle, resulting in an upper triangular matrix. The order of chromosomes must be provided via a .chromsizes file. PAIRS_PATH : input .pairs/.pairsam file. If the path ends with .gz or .lz4, the input is decompressed by bgzip/lz4c. By default, the input is read from stdin. """ flip_py(pairs_path, chroms_path, output, **kwargs) def flip_py(pairs_path, chroms_path, output, **kwargs): instream = ( fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) if pairs_path else sys.stdin ) outstream = ( fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output else sys.stdout ) chromosomes = headerops.get_chrom_order(chroms_path) chrom_enum = dict( zip( [pairsam_format.UNMAPPED_CHROM] + list(chromosomes), range(len(chromosomes) + 1), ) ) header, body_stream = headerops.get_header(instream) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) outstream.writelines((l + "\n" for l in header)) column_names = headerops.extract_column_names(header) if len(column_names) == 0: column_names = pairsam_format.COLUMNS chrom1_col = column_names.index("chrom1") chrom2_col = column_names.index("chrom2") pos1_col = column_names.index("pos1") pos2_col = column_names.index("pos2") pair_type_col = ( column_names.index("pair_type") if "pair_type" in column_names else -1 ) col_pairs_to_flip = [ (column_names.index(col), column_names.index(col[:-1] + "2")) for col in column_names if col.endswith("1") and (col[:-1] + "2") in column_names ] for line in body_stream: cols = line.rstrip('\n').split(pairsam_format.PAIRSAM_SEP) is_annotated1 = cols[chrom1_col] in chrom_enum.keys() is_annotated2 = cols[chrom2_col] in chrom_enum.keys() if not is_annotated1 or not is_annotated2: warnings.warn(f"Unannotated chromosomes in the pairs file!") # Flip so that annotated chromosome stands first: if is_annotated1 and not is_annotated2: has_correct_order = True elif is_annotated2 and not is_annotated1: has_correct_order = False elif not is_annotated1 and not is_annotated2: has_correct_order = cols[chrom1_col] < cols[chrom2_col] else: # both are annotated: has_correct_order = (chrom_enum[cols[chrom1_col]], int(cols[pos1_col])) <= ( chrom_enum[cols[chrom2_col]], int(cols[pos2_col]), ) # flipping: if not has_correct_order: for col1, col2 in col_pairs_to_flip: if (col1 < len(cols)) and (col2 < len(cols)): cols[col1], cols[col2] = cols[col2], cols[col1] if pair_type_col != -1 and pair_type_col < len(cols): cols[pair_type_col] = cols[pair_type_col][1] + cols[pair_type_col][0] outstream.write(pairsam_format.PAIRSAM_SEP.join(cols)) outstream.write("\n") if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if __name__ == "__main__": flip() pairtools-1.0.3/pairtools/cli/header.py000066400000000000000000000437421452673171500201410ustar00rootroot00000000000000import sys import click import warnings import subprocess from ..lib import fileio, pairsam_format, headerops from ..lib.parse_pysam import AlignmentFilePairtoolized from . import cli, common_io_options UTIL_NAME = "pairtools_header" @cli.group() def header(): """ Manipulate the .pairs/.pairsam header """ pass # Common options for all header tools: def register_subcommand(func): return header.command()( click.argument("pairs_path", type=str, required=False)( click.option( "-o", "--output", type=str, default="", help="output file." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, the output is printed into stdout.", )( click.option( "--nproc-in", type=int, default=1, show_default=True, help="Number of processes used by the auto-guessed input decompressing command.", )( click.option( "--nproc-out", type=int, default=8, show_default=True, help="Number of processes used by the auto-guessed output compressing command.", )( click.option( "--cmd-in", type=str, default=None, help="A command to decompress the input. " "If provided, fully overrides the auto-guessed command. " "Does not work with stdin. " "Must read input from stdin and print output into stdout. " "EXAMPLE: pbgzip -dc -n 3", )( click.option( "--cmd-out", type=str, default=None, help="A command to compress the output. " "If provided, fully overrides the auto-guessed command. " "Does not work with stdout. " "Must read input from stdin and print output into stdout. " "EXAMPLE: pbgzip -c -n 8", )(func) ) ) ) ) ) ) def add_arg_help(func): func.__doc__ = func.__doc__.format( """ PAIRS_PATH : input .pairs/.pairsam file. If the path ends with .gz or .lz4, the input is decompressed by bgzip/lz4c. By default, the input is read from stdin. """ ) return func @register_subcommand @add_arg_help @click.option( "--chroms-path", type=str, default=None, required=False, help="Chromosome order used to flip interchromosomal mates: " "path to a chromosomes file (e.g. UCSC chrom.sizes or similar) whose " "first column lists scaffold names. Any scaffolds not listed will be " "ordered lexicographically following the names provided.", ) @click.option( "--sam-path", type=str, default=None, required=False, help="Input sam file to inherit the header." " Either --sam or --chroms-path should be provided to store the chromosome sizes in the header.", ) @click.option( "--columns", type=click.STRING, default="", help="Report columns describing alignments " "Can take multiple values as a comma-separated list." f"By default, assign standard .pairs columns: {','.join(pairsam_format.COLUMNS)}", ) @click.option( "--extra-columns", type=click.STRING, default="", help="Report extra columns describing alignments " "Can take multiple values as a comma-separated list.", ) @click.option( "--assembly", type=str, default="", help="Name of genome assembly (e.g. hg19, mm10) to store in the pairs header.", ) @click.option( "--no-flip", is_flag=True, help="If specified, assume that the pairs are not filpped in genomic order and instead preserve " "the order in which they were sequenced.", ) @click.option( "--pairs/--pairsam", is_flag=True, default=True, help=f"If pairs, then the defult columns will be set to: {','.join(pairsam_format.COLUMNS_PAIRS)}" f"\nif pairsam, then to: {','.join(pairsam_format.COLUMNS_PAIRSAM)}", ) def generate(pairs_path, output, chroms_path, sam_path, columns, assembly, **kwargs): """ Generate the header """ generate_py(pairs_path, output, chroms_path, sam_path, columns, assembly, **kwargs) def generate_py(pairs_path, output, chroms_path, sam_path, columns, assembly, **kwargs): instream = fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) header, body_stream = headerops.get_header(instream, ignore_warning=True) # Parse chromosome sizes present in the input chromosomes: if chroms_path and not sam_path: chromsizes = headerops.get_chromsizes_from_file(chroms_path) # chromosomes = headerops.get_chromsizes_from_file(chroms_path) # Parse chromosome sizes present in sam input: if sam_path: # open input sam file with pysam input_sam = AlignmentFilePairtoolized( sam_path, "r", threads=kwargs.get("nproc_in") ) samheader = input_sam.header chromsizes = headerops.get_chromsizes_from_pysam_header(samheader) # if chroms_path: # chromosomes = headerops.get_chrom_order(chroms_path, list(chromsizes.keys())) # else: # chromosomes = chromsizes.keys() # Read the input columns: if columns: columns = columns.split(",") else: if kwargs.get("pairs", True): columns = pairsam_format.COLUMNS_PAIRS else: columns = pairsam_format.COLUMNS_PAIRSAM extra_columns = kwargs.get("extra_columns", "") if extra_columns: columns += extra_columns.split(",") # Write new header to the pairsam file new_header = headerops.make_standard_pairsheader( assembly=assembly, chromsizes=chromsizes, columns=columns, shape="whole matrix" if kwargs["no_flip"] else "upper triangle", ) if sam_path: new_header = headerops.insert_samheader_pysam(new_header, samheader) new_header = headerops.append_new_pg(new_header, ID=UTIL_NAME, PN=UTIL_NAME) # Check that the number of columns in the body corresponds to the header: if not headerops.validate_cols(instream, columns): raise ValueError( f"Number of columns mismatch:\n\t#columns: {headerops.SEP_COLS.join(columns)}\n\t{body_stream.readline()}" ) ######## # Write the output after successful checks: outstream = ( fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output else sys.stdout ) outstream.writelines((l + "\n" for l in new_header)) outstream.flush() if body_stream == sys.stdin: for line in body_stream: outstream.write(line) else: command = r""" /bin/bash -c 'export LC_COLLATE=C; export LANG=C; cat """ if kwargs.get("cmd_in", None): command += r""" <(cat {} | {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( pairs_path, kwargs["cmd_in"] ) elif pairs_path.endswith(".gz"): command += ( r""" <(bgzip -dc -@ {} {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( kwargs["nproc_in"], pairs_path ) ) elif pairs_path.endswith(".lz4"): command += r""" <(lz4c -dc {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( pairs_path ) else: command += r""" <(sed -n -e '\''/^[^#]/,$p'\'' {})""".format(pairs_path) command += "'" subprocess.check_call(command, shell=True, stdout=outstream) if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() @register_subcommand @add_arg_help @click.option( "--reference-file", "-r", help="Header file for transfer", type=str, required=True ) def transfer(pairs_path, output, reference_file, **kwargs): """ Transfer the header from one pairs file to another """ transfer_py(pairs_path, output, reference_file, **kwargs) def transfer_py(pairs_path, output, reference_file, **kwargs): instream = fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) header, body_stream = headerops.get_header(instream, ignore_warning=True) # Read the header from reference file instream_header = fileio.auto_open( reference_file, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) reference_header, _ = headerops.get_header(instream_header) # Close the reference stream after extraction of the header: if instream_header != sys.stdin: instream_header.close() reference_columns = headerops.extract_column_names(reference_header) # Check that the number of columns in the body corresponds to the header: if not headerops.validate_cols(instream, reference_columns): raise ValueError( f"Number of columns mismatch:\n\t#columns: {headerops.SEP_COLS.join(reference_columns)}\n\t{body_stream.readline()}" ) ######## # Write the output after successful checks: outstream = ( fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output else sys.stdout ) reference_header = headerops.append_new_pg( reference_header, ID=UTIL_NAME, PN=UTIL_NAME ) outstream.writelines((l + "\n" for l in reference_header)) outstream.flush() if body_stream == sys.stdin: for line in body_stream: outstream.write(line) else: command = r""" /bin/bash -c 'export LC_COLLATE=C; export LANG=C; cat """ if kwargs.get("cmd_in", None): command += r""" <(cat {} | {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( pairs_path, kwargs["cmd_in"] ) elif pairs_path.endswith(".gz"): command += ( r""" <(bgzip -dc -@ {} {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( kwargs["nproc_in"], pairs_path ) ) elif pairs_path.endswith(".lz4"): command += r""" <(lz4c -dc {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( pairs_path ) else: command += r""" <(sed -n -e '\''/^[^#]/,$p'\'' {})""".format(pairs_path) command += "'" subprocess.check_call(command, shell=True, stdout=outstream) if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() @register_subcommand @add_arg_help @click.option( "--columns", "-c", help=f"Comma-separated list of columns to be added, e.g.: {','.join(pairsam_format.COLUMNS)}", type=str, required=True, ) def set_columns(pairs_path, output, columns, **kwargs): """ Add the columns to the .pairs/pairsam file """ set_columns_py(pairs_path, output, columns, **kwargs) def set_columns_py(pairs_path, output, columns, **kwargs): instream = ( fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) if pairs_path else sys.stdin ) outstream = ( fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output else sys.stdout ) header, body_stream = headerops.get_header(instream) header = headerops.set_columns(header, columns.split(",")) outstream.writelines((l + "\n" for l in header)) outstream.flush() if body_stream == sys.stdin: for line in body_stream: outstream.write(line) else: command = r""" /bin/bash -c 'export LC_COLLATE=C; export LANG=C; cat """ if kwargs.get("cmd_in", None): command += r""" <(cat {} | {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( pairs_path, kwargs["cmd_in"] ) elif pairs_path.endswith(".gz"): command += ( r""" <(bgzip -dc -@ {} {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( kwargs["nproc_in"], pairs_path ) ) elif pairs_path.endswith(".lz4"): command += r""" <(lz4c -dc {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( pairs_path ) else: command += r""" <(sed -n -e '\''/^[^#]/,$p'\'' {})""".format(pairs_path) command += "'" subprocess.check_call(command, shell=True, stdout=outstream) if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() @register_subcommand @add_arg_help @click.option( "--reference-file", "-r", help="Header file for comparison (optional)", type=str, required=False, default="", ) @click.option( "--reference-columns", "-c", help=f"Comma-separated list of columns fro check (optional), e.g.: {','.join(pairsam_format.COLUMNS)}", type=str, required=False, default="", ) def validate_columns(pairs_path, output, reference_file, reference_columns, **kwargs): """ Validate the columns of the .pairs/pairsam file against reference or within file. If the checks pass, then returns full pairs file. Otherwise throws an exception. If reference_file is provided, check: 1) columns are the same between pairs and reference_file 2) number of columns in the pairs body is the same as the number of columns If reference_columns are provided, check: 1) pairs columns are the same as provided 2) number of columns in the pairs body is the same as the number of columns If no reference_file or columns, then check only the number of columns in the pairs body. Checks only the first line in the pairs stream! """ validate_columns_py(pairs_path, output, reference_file, reference_columns, **kwargs) def validate_columns_py( pairs_path, output, reference_file, reference_columns, **kwargs ): instream = fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) header, body_stream = headerops.get_header(instream) pairs_columns = headerops.extract_column_names(header) # Convert reference columns string into list, if provided if reference_columns: reference_columns = reference_columns.split(",") # Read the header from reference file if reference_file: instream_header = fileio.auto_open( reference_file, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) reference_header, _ = headerops.get_header(instream_header) # Close the reference stream after extraction of the header: if instream_header != sys.stdin: instream_header.close() if reference_columns: warnings.warn( "--reference-columns are ignored, as --reference-file is provided" ) reference_columns = headerops.extract_column_names(reference_header) if reference_columns: if pairs_columns != reference_columns: raise ValueError( f"Pairs columns differ from reference columns:\n\t{pairs_columns}\n\t{reference_columns}" ) # Check that the number of columns in the body corresponds to the header: if not headerops.validate_cols(instream, pairs_columns): raise ValueError( f"Number of columns mismatch:\n\t#columns: {headerops.SEP_COLS.join(pairs_columns)}\n\t{body_stream.readline()}" ) ######## # Write the output after successful checks: outstream = ( fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output else sys.stdout ) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) outstream.writelines((l + "\n" for l in header)) outstream.flush() if body_stream == sys.stdin: for line in body_stream: outstream.write(line) else: command = r""" /bin/bash -c 'export LC_COLLATE=C; export LANG=C; cat """ if kwargs.get("cmd_in", None): command += r""" <(cat {} | {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( pairs_path, kwargs["cmd_in"] ) elif pairs_path.endswith(".gz"): command += ( r""" <(bgzip -dc -@ {} {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( kwargs["nproc_in"], pairs_path ) ) elif pairs_path.endswith(".lz4"): command += r""" <(lz4c -dc {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( pairs_path ) else: command += r""" <(sed -n -e '\''/^[^#]/,$p'\'' {})""".format(pairs_path) command += "'" subprocess.check_call(command, shell=True, stdout=outstream) if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if __name__ == "__main__": header() pairtools-1.0.3/pairtools/cli/markasdup.py000066400000000000000000000037041452673171500206720ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import sys import click from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options from ..lib.dedup import mark_split_pair_as_dup UTIL_NAME = "pairtools_markasdup" @cli.command() @click.argument("pairsam_path", type=str, required=False) @click.option( "-o", "--output", type=str, default="", help="output .pairsam file." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, the output is printed into stdout.", ) @common_io_options def markasdup(pairsam_path, output, **kwargs): """Tag all pairs in the input file as duplicates. Change the type of all pairs inside a .pairs/.pairsam file to DD. If sam entries are present, change the pair type in the Yt SAM tag to 'Yt:Z:DD'. PAIRSAM_PATH : input .pairs/.pairsam file. If the path ends with .gz, the input is gzip-decompressed. By default, the input is read from stdin. """ markasdup_py(pairsam_path, output, **kwargs) def markasdup_py(pairsam_path, output, **kwargs): instream = fileio.auto_open( pairsam_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) header, body_stream = headerops.get_header(instream) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) outstream.writelines((l + "\n" for l in header)) for line in body_stream: cols = line.rstrip('\n').split(pairsam_format.PAIRSAM_SEP) mark_split_pair_as_dup(cols) outstream.write(pairsam_format.PAIRSAM_SEP.join(cols)) outstream.write("\n") if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if __name__ == "__main__": markasdup() pairtools-1.0.3/pairtools/cli/merge.py000066400000000000000000000166431452673171500200100ustar00rootroot00000000000000#!/usr/bin/env python import sys import glob import math import subprocess import click from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options UTIL_NAME = "pairtools_merge" @cli.command() @click.argument( "pairs_path", nargs=-1, type=str, ) @click.option( "-o", "--output", type=str, default="", help="output file." " If the path ends with .gz/.lz4, the output is compressed by bgzip/lz4c." " By default, the output is printed into stdout.", ) @click.option( "--max-nmerge", type=int, default=8, show_default=True, help="The maximal number of inputs merged at once. For more, store " "merged intermediates in temporary files.", ) @click.option( "--tmpdir", type=str, default="", help="Custom temporary folder for merged intermediates.", ) @click.option( "--memory", type=str, default="2G", show_default=True, help="The amount of memory used by default.", ) @click.option( "--compress-program", type=str, default="", show_default=True, help="A binary to compress temporary merged chunks. " "Must decompress input when the flag -d is provided. " "Suggested alternatives: lz4c, gzip, lzop, snzip. " "NOTE: fails silently if the command syntax is wrong. ", ) @click.option( "--nproc", type=int, default=8, help="Number of threads for merging.", show_default=True, ) @click.option( "--nproc-in", type=int, default=1, show_default=True, help="Number of processes used by the auto-guessed input decompressing command.", ) @click.option( "--nproc-out", type=int, default=8, show_default=True, help="Number of processes used by the auto-guessed output compressing command.", ) @click.option( "--cmd-in", type=str, default=None, help="A command to decompress the input. " "If provided, fully overrides the auto-guessed command. " "Does not work with stdin. " "Must read input from stdin and print output into stdout. " "EXAMPLE: pbgzip -dc -n 3", ) @click.option( "--cmd-out", type=str, default=None, help="A command to compress the output. " "If provided, fully overrides the auto-guessed command. " "Does not work with stdout. " "Must read input from stdin and print output into stdout. " "EXAMPLE: pbgzip -c -n 8", ) @click.option( "--keep-first-header/--no-keep-first-header", default=False, show_default=True, help="Keep the first header or merge the headers together. Default: merge headers.", ) @click.option( "--concatenate/--no-concatenate", default=False, show_default=True, help="Simple concatenate instead of merging sorted files.", ) # Using custom IO options def merge( pairs_path, output, max_nmerge, tmpdir, memory, compress_program, nproc, **kwargs ): """Merge .pairs/.pairsam files. By default, assumes that the files are sorted and maintains the sorting. Merge triu-flipped sorted pairs/pairsam files. If present, the @SQ records of the SAM header must be identical; the sorting order of these lines is taken from the first file in the list. The ID fields of the @PG records of the SAM header are modified with a numeric suffix to produce unique records. The other unique SAM and non-SAM header lines are copied into the output header. PAIRS_PATH : upper-triangular flipped sorted .pairs/.pairsam files to merge or a group/groups of .pairs/.pairsam files specified by a wildcard. For paths ending in .gz/.lz4, the files are decompressed by bgzip/lz4c. """ merge_py( pairs_path, output, max_nmerge, tmpdir, memory, compress_program, nproc, **kwargs, ) def merge_py( pairs_path, output, max_nmerge, tmpdir, memory, compress_program, nproc, **kwargs ): paths = sum([glob.glob(mask) for mask in pairs_path], []) if len(paths) == 0: raise ValueError(f"No input paths: {pairs_path}") outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) # if there is only one input, bypass merging and do not modify the header if len(paths) == 1: instream = fileio.auto_open( paths[0], mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) for line in instream: outstream.write(line) if outstream != sys.stdout: outstream.close() return headers = [] for path in paths: f = fileio.auto_open( path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) h, _ = headerops.get_header(f) headers.append(h) f.close() # Skip other headers if keep_first_header is True (False by default): if kwargs.get("keep_first_header", False): break if not headerops.all_same_columns(headers): raise ValueError("Input pairs cannot contain different columns") merged_header = headerops.merge_headers(headers) merged_header = headerops.append_new_pg(merged_header, ID=UTIL_NAME, PN=UTIL_NAME) outstream.writelines((l + "\n" for l in merged_header)) outstream.flush() # If concatenation requested instead of merging sorted input: if kwargs.get("concatenate", False): command = r""" /bin/bash -c 'export LC_COLLATE=C; export LANG=C; cat """ # Full merge that keeps the ordered input: else: command = r""" /bin/bash -c 'export LC_COLLATE=C; export LANG=C; sort -k {0},{0} -k {1},{1} -k {2},{2}n -k {3},{3}n -k {4},{4} --merge --field-separator=$'\''{5}'\'' {6} {7} {8} -S {9} {10} """.replace( "\n", " " ).format( pairsam_format.COL_C1 + 1, pairsam_format.COL_C2 + 1, pairsam_format.COL_P1 + 1, pairsam_format.COL_P2 + 1, pairsam_format.COL_PTYPE + 1, pairsam_format.PAIRSAM_SEP_ESCAPE, " --parallel={} ".format(nproc) if nproc > 1 else " ", " --batch-size={} ".format(max_nmerge) if max_nmerge else " ", " --temporary-directory={} ".format(tmpdir) if tmpdir else " ", memory, ( " --compress-program={} ".format(compress_program) if compress_program else " " ), ) for path in paths: if kwargs.get("cmd_in", None): command += r""" <(cat {} | {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( path, kwargs["cmd_in"] ) elif path.endswith(".gz"): command += ( r""" <(bgzip -dc -@ {} {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( kwargs["nproc_in"], path ) ) elif path.endswith(".lz4"): command += r""" <(lz4c -dc {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( path ) else: command += r""" <(sed -n -e '\''/^[^#]/,$p'\'' {})""".format(path) command += "'" subprocess.check_call(command, shell=True, stdout=outstream) if outstream != sys.stdout: outstream.close() if __name__ == "__main__": merge() pairtools-1.0.3/pairtools/cli/parse.py000066400000000000000000000222161452673171500200140ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import click import sys from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options from ..lib.stats import PairCounter from ..lib.parse_pysam import AlignmentFilePairtoolized from ..lib.parse import streaming_classify UTIL_NAME = "pairtools_parse" @cli.command() @click.argument("sam_path", type=str, required=False) @click.option( "-c", "--chroms-path", type=str, required=True, help="Chromosome order used to flip interchromosomal mates: " "path to a chromosomes file (e.g. UCSC chrom.sizes or similar) whose " "first column lists scaffold names. Any scaffolds not listed will be " "ordered lexicographically following the names provided.", ) @click.option( "-o", "--output", type=str, default="", help="output file. " " If the path ends with .gz or .lz4, the output is bgzip-/lz4-compressed." "By default, the output is printed into stdout. ", ) @click.option( "--assembly", type=str, help="Name of genome assembly (e.g. hg19, mm10) to store in the pairs header.", ) @click.option( "--min-mapq", type=int, default=1, show_default=True, help="The minimal MAPQ score to consider a read as uniquely mapped", ) @click.option( "--max-molecule-size", type=int, default=750, show_default=True, help="The maximal size of a Hi-C molecule; used to rescue single ligations" "(from molecules with three alignments) and to rescue complex ligations." "The default is based on oriented P(s) at short ranges of multiple Hi-C." "Not used with walks-policy all.", ) @click.option( "--drop-readid", is_flag=True, help="If specified, do not add read ids to the output", ) @click.option( "--drop-seq", is_flag=True, help="If specified, remove sequences and PHREDs from the sam fields", ) @click.option( "--drop-sam", is_flag=True, help="If specified, do not add sams to the output" ) @click.option( "--add-pair-index", is_flag=True, help="If specified, each pair will have pair index in the molecule", ) @click.option( "--add-columns", type=click.STRING, default="", help="Report extra columns describing alignments " "Possible values (can take multiple values as a comma-separated " "list): a SAM tag (any pair of uppercase letters) or {}.".format( ", ".join(pairsam_format.EXTRA_COLUMNS) ), ) @click.option( "--output-parsed-alignments", type=str, default="", help="output file for all parsed alignments, including walks." " Useful for debugging and rnalysis of walks." " If file exists, it will be open in the append mode." " If the path ends with .gz or .lz4, the output is bgzip-/lz4-compressed." " By default, not used.", ) @click.option( "--output-stats", type=str, default="", help="output file for various statistics of pairs file. " " By default, statistics is not generated.", ) @click.option( "--report-alignment-end", type=click.Choice(["5", "3"]), default="5", help="specifies whether the 5' or 3' end of the alignment is reported as" " the position of the Hi-C read.", ) @click.option( "--max-inter-align-gap", type=int, default=20, show_default=True, help="read segments that are not covered by any alignment and" ' longer than the specified value are treated as "null" alignments.' " These null alignments convert otherwise linear alignments into walks," " and affect how they get reported as a Hi-C pair (see --walks-policy).", ) @click.option( "--walks-policy", type=click.Choice(["mask", "5any", "5unique", "3any", "3unique", "all"]), default="mask", help="the policy for reporting unrescuable walks (reads containing more" " than one alignment on one or both sides, that can not be explained by a" " single ligation between two mappable DNA fragments)." ' "mask" - mask walks (chrom="!", pos=0, strand="-"); ' ' "5any" - report the 5\'-most alignment on each side;' ' "5unique" - report the 5\'-most unique alignment on each side, if present;' ' "3any" - report the 3\'-most alignment on each side;' ' "3unique" - report the 3\'-most unique alignment on each side, if present;' ' "all" - report all available unique alignments on each side.', show_default=True, ) @click.option( "--readid-transform", type=str, default=None, help="A Python expression to modify read IDs. Useful when read IDs differ " "between the two reads of a pair. Must be a valid Python expression that " "uses variables called readID and/or i (the 0-based index of the read pair " "in the bam file) and returns a new value, e.g. \"readID[:-2]+'_'+str(i)\". " "Make sure that transformed readIDs remain unique!", show_default=True, ) @click.option( "--flip/--no-flip", is_flag=True, default=True, help="If specified, do not flip pairs in genomic order and instead preserve " "the order in which they were sequenced.", ) @common_io_options def parse( sam_path, chroms_path, output, output_parsed_alignments, output_stats, **kwargs ): """Find ligation pairs in .sam data, make .pairs. SAM_PATH : an input .sam/.bam file with paired-end sequence alignments of Hi-C molecules. If the path ends with .bam, the input is decompressed from bam with samtools. By default, the input is read from stdin. """ parse_py( sam_path, chroms_path, output, output_parsed_alignments, output_stats, **kwargs ) def parse_py( sam_path, chroms_path, output, output_parsed_alignments, output_stats, **kwargs ): ### Set up input stream if sam_path: # open input sam file with pysam input_sam = AlignmentFilePairtoolized( sam_path, "r", threads=kwargs.get("nproc_in") ) else: # read from stdin input_sam = AlignmentFilePairtoolized("-", "r", threads=kwargs.get("nproc_in")) ### Set up output streams outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) out_alignments_stream, out_stats_stream = None, None if output_parsed_alignments: out_alignments_stream = fileio.auto_open( output_parsed_alignments, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output_stats: out_stats_stream = fileio.auto_open( output_stats, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if out_alignments_stream: out_alignments_stream.write( "readID\tside\tchrom\tpos\tstrand\tmapq\tcigar\tdist_5_lo\tdist_5_hi\tmatched_bp\n" ) # generate empty PairCounter if stats output is requested: out_stat = PairCounter() if output_stats else None ### Set up output parameters add_columns = kwargs.get("add_columns", []) add_columns = [col for col in add_columns.split(",") if col] for col in add_columns: if not ( (col in pairsam_format.EXTRA_COLUMNS) or (len(col) == 2 and col.isupper()) ): raise Exception("{} is not a valid extra column".format(col)) columns = pairsam_format.COLUMNS + ( [c + side for c in add_columns for side in ["1", "2"]] ) if kwargs.get("drop_sam", True): columns.pop(columns.index("sam1")) columns.pop(columns.index("sam2")) if not kwargs.get("add_pair_index", False): columns.pop(columns.index("walk_pair_index")) columns.pop(columns.index("walk_pair_type")) ### Parse header samheader = input_sam.header if not samheader: raise ValueError( "The input sam is missing a header! If reading a bam file, please use `samtools view -h` to include the header." ) ### Parse chromosome files present in the input sam_chromsizes = headerops.get_chromsizes_from_pysam_header(samheader) chromosomes = headerops.get_chrom_order(chroms_path, list(sam_chromsizes.keys())) ### Write new header to the pairsam file header = headerops.make_standard_pairsheader( assembly=kwargs.get("assembly", ""), chromsizes=[(chrom, sam_chromsizes[chrom]) for chrom in chromosomes], columns=columns, shape="whole matrix" if not kwargs["flip"] else "upper triangle", ) header = headerops.insert_samheader_pysam(header, samheader) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) outstream.writelines((l + "\n" for l in header)) ### Parse input and write to the outputs streaming_classify( input_sam, outstream, chromosomes, out_alignments_stream, out_stat, **kwargs ) # save statistics to a file if it was requested: if out_stat: out_stat.save(out_stats_stream) if outstream != sys.stdout: outstream.close() # close optional output streams if needed: if out_alignments_stream and out_alignments_stream != sys.stdout: out_alignments_stream.close() if out_stats_stream and out_stats_stream != sys.stdout: out_stats_stream.close() if __name__ == "__main__": parse() pairtools-1.0.3/pairtools/cli/parse2.py000066400000000000000000000303341452673171500200760ustar00rootroot00000000000000# !/usr/bin/env python # -*- coding: utf-8 -*- import click import sys from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options from ..lib.stats import PairCounter from ..lib.parse_pysam import AlignmentFilePairtoolized from ..lib.parse import streaming_classify UTIL_NAME = "pairtools_parse2" @cli.command() @click.argument("sam_path", type=str, required=False) # Parsing options: @click.option( "-c", "--chroms-path", type=str, required=True, help="Chromosome order used to flip interchromosomal mates: " "path to a chromosomes file (e.g. UCSC chrom.sizes or similar) whose " "first column lists scaffold names. Any scaffolds not listed will be " "ordered lexicographically following the names provided.", ) @click.option( "-o", "--output", type=str, default="", help="output file with pairs. " " If the path ends with .gz or .lz4, the output is bgzip-/lz4-compressed." "By default, the output is printed into stdout. ", ) @click.option( "--report-position", type=click.Choice(["junction", "read", "walk", "outer"]), default="outer", help="""Reported position of alignments in pairs of complex walks (pos columns). Each alignment in .bam/.sam Hi-C-like data has two ends, and you can report one or another depending of the position of alignment on a read or in a pair. "junction" - inner ends of sequential alignments in each pair, aka ligation junctions (complex walks default), "read" - 5'-end of alignments relative to R1 or R2 read coordinate system (as in traditional Hi-C), "walk" - 5'-end of alignments relative to the whole walk coordinate system, "outer" - outer ends of sequential alignments in each pair. """, ) @click.option( "--report-orientation", type=click.Choice(["pair", "read", "walk", "junction"]), default="pair", help="""Reported orientataion of pairs in complex walk (strand columns). Each alignment in .bam/.sam Hi-C-like data has orientation, and you can report it relative to the read, pair or whole walk coordinate system. "pair" - orientation as if each pair in complex walk was sequenced independently from the outer ends or molecule (as in traditional Hi-C, also complex walks default), "read" - orientation defined by the read (R1 or R2 read coordinate system), "walk" - orientation defined by the walk coordinate system, "junction" - reversed "pair" orientation, as if pair was sequenced in both directions starting from the junction""", ) @click.option( "--assembly", type=str, help="Name of genome assembly (e.g. hg19, mm10) to store in the pairs header.", ) @click.option( "--min-mapq", type=int, default=1, show_default=True, help="The minimal MAPQ score to consider a read as uniquely mapped.", ) @click.option( "--max-inter-align-gap", type=int, default=20, show_default=True, help="Read segments that are not covered by any alignment and" ' longer than the specified value are treated as "null" alignments.' " These null alignments convert otherwise linear alignments into walks," " and affect how they get reported as a Hi-C pair.", ) @click.option( "--max-insert-size", type=int, default=500, show_default=True, help="When searching for overlapping ends of left and right read (R1 and R2), this sets the minimal distance " "when two alignments on the same strand and chromosome are considered part of the same fragment (and thus reported as the same alignment " "and not a pair). For traditional Hi-C with long restriction fragments and shorter molecules after ligation+sonication, this " "can be the expected molecule size. For complex walks with short restriction fragments, this can be the expected restriction fragment " "size. Note that unsequenced insert is *terra incognita* and might contain unsequenced DNA (including ligations) in it. " "This parameter is ignored in --single-end mode. ", ) @click.option( "--dedup-max-mismatch", type=int, default=3, show_default=True, help="Allowed mismatch between intramolecular alignments to detect readthrough duplicates. " "Pairs with both sides mapped within this distance (bp) from each " "other are considered duplicates. ", ) @click.option( "--single-end", is_flag=True, help="If specified, the input is single-end. " "Never use this for paired-end data, because R1 read will be omitted. " "If single-end data is provided, but parameter is unset, the pairs will be " "generated, but may contain artificial UN pairs. ", ) @click.option( "--expand/--no-expand", is_flag=True, help="If specified, perform combinatorial expansion on the pairs. " "Combinatorial expansion is a way to increase the number of contacts in you data, assuming that all DNA fragments in the same molecule (read) are in contact. " "Expanded pairs have modified pair type, 'E{separation}_{pair type}'", ) @click.option( "--max-expansion-depth", type=int, default=None, show_default=True, help="Works in combination with --expand. " "Maximum number of segments separating pair. By default, expanding all possible pairs." "Setting the number will limit the expansion depth and enforce contacts from the same " "side of the read. ", ) @click.option( "--add-pair-index", is_flag=True, help="If specified, parse2 will report pair index in the walk as additional columns (R1, R2, R1&R2 or R1-R2). " "See documentation: https://pairtools.readthedocs.io/en/latest/parsing.html#rescuing-complex-walks " "For combinatorial expanded pairs, two numbers will be reported: " "original pair index of the left and right segments. ", ) @click.option( "--flip/--no-flip", is_flag=True, default=False, help="If specified, flip pairs in genomic order and instead preserve " "the order in which they were sequenced. Note that no flip is recommended for analysis of walks because it will " "override the order of alignments in pairs. Flip is required for appropriate deduplication of sorted pairs. " "Flip is not required for cooler cload, which runs flipping internally. ", ) @click.option( "--add-columns", type=click.STRING, default="", help="Report extra columns describing alignments " "Possible values (can take multiple values as a comma-separated " "list): a SAM tag (any pair of uppercase letters) or {}.".format( ", ".join(pairsam_format.EXTRA_COLUMNS) ), ) @click.option( "--drop-readid/--keep-readid", is_flag=True, default=False, help="If specified, do not add read ids to the output. By default, keep read ids. Useful for long walks analysis. ", ) @click.option( "--readid-transform", type=str, default=None, help="A Python expression to modify read IDs. Useful when read IDs differ " "between the two reads of a pair. Must be a valid Python expression that " "uses variables called readID and/or i (the 0-based index of the read pair " "in the bam file) and returns a new value, e.g. \"readID[:-2]+'_'+str(i)\". " "Make sure that transformed readIDs remain unique!", show_default=True, ) @click.option( "--drop-seq/--keep-seq", is_flag=True, default=False, help="Remove sequences and PHREDs from the sam fields by default. Kept otherwise. ", ) @click.option( "--drop-sam/--keep-sam", is_flag=True, default=False, help="Do not add sams to the output by default. Kept otherwise. ", ) @click.option( "--output-parsed-alignments", type=str, default="", help="output file with all parsed alignments (one alignment per line)." " Useful for debugging and analysis of walks." " If file exists, it will be open in the append mode." " If the path ends with .gz or .lz4, the output is bgzip-/lz4-compressed." " By default, not used.", ) @click.option( "--output-stats", type=str, default="", help="output file for various statistics of pairs file. " " By default, statistics is not generated.", ) @common_io_options def parse2( sam_path, chroms_path, output, output_parsed_alignments, output_stats, **kwargs ): """Extracts pairs from .sam/.bam data with complex walks, make .pairs. SAM_PATH : an input .sam/.bam file with paired-end or single-end sequence alignments of Hi-C (or Hi-C-like) molecules. If the path ends with .bam, the input is decompressed from bam with samtools. By default, the input is read from stdin. """ parse2_py( sam_path, chroms_path, output, output_parsed_alignments, output_stats, **kwargs ) def parse2_py( sam_path, chroms_path, output, output_parsed_alignments, output_stats, **kwargs ): ### Set up input stream if sam_path: # open input sam file with pysam input_sam = AlignmentFilePairtoolized( sam_path, "r", threads=kwargs.get("nproc_in") ) else: # read from stdin input_sam = AlignmentFilePairtoolized("-", "r", threads=kwargs.get("nproc_in")) ### Set up output streams outstream = ( fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output else sys.stdout ) out_alignments_stream = ( fileio.auto_open( output_parsed_alignments, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output_parsed_alignments else None ) out_stats_stream = ( fileio.auto_open( output_stats, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output_stats else None ) if out_alignments_stream: out_alignments_stream.write( "readID\tside\tchrom\tpos\tstrand\tmapq\tcigar\tdist_5_lo\tdist_5_hi\tmatched_bp\n" ) # generate empty PairCounter if stats output is requested: out_stat = PairCounter() if output_stats else None ### Set up output parameters add_columns = kwargs.get("add_columns", []) add_columns = [col for col in add_columns.split(",") if col] for col in add_columns: if not ( (col in pairsam_format.EXTRA_COLUMNS) or (len(col) == 2 and col.isupper()) ): raise Exception("{} is not a valid extra column".format(col)) columns = pairsam_format.COLUMNS + ( [c + side for c in add_columns for side in ["1", "2"]] ) if kwargs.get("drop_sam", True): columns.pop(columns.index("sam1")) columns.pop(columns.index("sam2")) if not kwargs.get("add_pair_index", False): columns.pop(columns.index("walk_pair_index")) columns.pop(columns.index("walk_pair_type")) ### Parse header samheader = input_sam.header if not samheader: raise ValueError( "The input sam is missing a header! If reading a bam file, please use `samtools view -h` to include the header." ) ### Parse chromosome files present in the input sam_chromsizes = headerops.get_chromsizes_from_pysam_header(samheader) chromosomes = headerops.get_chrom_order(chroms_path, list(sam_chromsizes.keys())) ### Write new header to the pairsam file header = headerops.make_standard_pairsheader( assembly=kwargs.get("assembly", ""), chromsizes=[(chrom, sam_chromsizes[chrom]) for chrom in chromosomes], columns=columns, shape="whole matrix" if not kwargs["flip"] else "upper triangle", ) header = headerops.insert_samheader_pysam(header, samheader) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) outstream.writelines((l + "\n" for l in header)) ### Parse input and write to the outputs streaming_classify( input_sam, outstream, chromosomes, out_alignments_stream, out_stat, parse2=True, **kwargs ) # save statistics to a file if it was requested: if out_stat: out_stat.save(out_stats_stream) if outstream != sys.stdout: outstream.close() if out_alignments_stream: out_alignments_stream.close() if out_stats_stream: out_stats_stream.close() if __name__ == "__main__": parse2() pairtools-1.0.3/pairtools/cli/phase.py000066400000000000000000000246121452673171500200040ustar00rootroot00000000000000import sys import click import re, fnmatch from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options from ..lib.phase import phase_side_XB, phase_side_XA UTIL_NAME = "pairtools_phase" @cli.command() @click.argument("pairs_path", type=str, required=False) @click.option( "-o", "--output", type=str, default="", help="output file." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, the output is printed into stdout.", ) @click.option( "--phase-suffixes", nargs=2, # type=click.Tuple([str, str]), help="Phase suffixes (of the chrom names), always a pair.", ) @click.option( "--clean-output", is_flag=True, help="Drop all columns besides the standard ones and phase1/2", ) @click.option( "--tag-mode", type=click.Choice(["XB", "XA"]), default="XB", help="Specifies the mode of bwa reporting." " XA will parse 'XA', the input should be generated with: --add-columns XA,NM,AS,XS --min-mapq 0" " XB will parse 'XB' tag, the input should be generated with: --add-columns XB,AS,XS --min-mapq 0 " " Note that XB tag is added by running bwa with -u tag, present in github version. " " Both modes report similar results: XB reports 0.002% contacts more for phased data, " " while XA can report ~1-2% more unphased contacts because its definition multiple mappers is more premissive. ", ) @click.option( "--report-scores/--no-report-scores", is_flag=True, default=False, help="Report scores of optional, suboptimal and second suboptimal alignments. " "NM (edit distance) with --tag-mode XA and AS (alfn score) with --tag-mode XB ", ) @common_io_options def phase( pairs_path, output, phase_suffixes, clean_output, tag_mode, report_scores, **kwargs ): """Phase pairs mapped to a diploid genome. Diploid genome is the genome with two set of the chromosome variants, where each chromosome has one of two suffixes (phase-suffixes) corresponding to the genome version (phase-suffixes). By default, phasing adds two additional columns with phase 0, 1 or "." (unpahsed). Phasing is based on detection of chromosome origin of each mapped fragment. Three scores are considered: best alignment score (S1), suboptimal alignment (S2) and second suboptimal alignment (S3) scores. Each fragment can be: 1) uniquely mapped and phased (S1>S2>S3, first alignment is the best hit), 2) uniquely mapped but unphased (S1=S2>S3, cannot distinguish between chromosome variants), 3) multiply mapped (S1=S2=S3) or unmapped. PAIRS_PATH : input .pairs/.pairsam file. If the path ends with .gz or .lz4, the input is decompressed by bgzip/lz4c. By default, the input is read from stdin. """ phase_py( pairs_path, output, phase_suffixes, clean_output, tag_mode, report_scores, **kwargs ) if __name__ == "__main__": phase() def phase_py( pairs_path, output, phase_suffixes, clean_output, tag_mode, report_scores, **kwargs ): instream = ( fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) if pairs_path else sys.stdin ) outstream = ( fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output else sys.stdout ) header, body_stream = headerops.get_header(instream) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) old_column_names = headerops.extract_column_names(header) idx_phase1 = len(old_column_names) idx_phase2 = len(old_column_names) + 1 if clean_output: new_column_names = [ col for col in old_column_names if col in pairsam_format.COLUMNS ] new_column_idxs = [ i for i, col in enumerate(old_column_names) if col in pairsam_format.COLUMNS ] new_column_idxs += [idx_phase1, idx_phase2] else: new_column_names = list(old_column_names) new_column_names.append("phase1") new_column_names.append("phase2") if report_scores: if tag_mode == "XB": new_column_names.append("S1_1") new_column_names.append("S1_2") new_column_names.append("S2_1") new_column_names.append("S2_2") new_column_names.append("S3_1") new_column_names.append("S3_2") if clean_output: new_column_idxs += [(idx_phase2 + i + 1) for i in range(6)] elif tag_mode == "XA": new_column_names.append("M1_1") new_column_names.append("M1_2") new_column_names.append("M2_1") new_column_names.append("M2_2") new_column_names.append("M3_1") new_column_names.append("M3_2") if clean_output: new_column_idxs += [(idx_phase2 + i + 1) for i in range(6)] header = headerops._update_header_entry( header, "columns", " ".join(new_column_names) ) if tag_mode == "XB": if ( ("XB1" not in old_column_names) or ("XB2" not in old_column_names) or ("AS1" not in old_column_names) or ("AS2" not in old_column_names) or ("XS1" not in old_column_names) or ("XS2" not in old_column_names) ): raise ValueError( "The input pairs file must be parsed with the flag --add-columns XB,AS,XS --min-mapq 0" ) COL_XB1 = old_column_names.index("XB1") COL_XB2 = old_column_names.index("XB2") COL_AS1 = old_column_names.index("AS1") COL_AS2 = old_column_names.index("AS2") COL_XS1 = old_column_names.index("XS1") COL_XS2 = old_column_names.index("XS2") elif tag_mode == "XA": if ( ("XA1" not in old_column_names) or ("XA2" not in old_column_names) or ("NM1" not in old_column_names) or ("NM2" not in old_column_names) or ("AS1" not in old_column_names) or ("AS2" not in old_column_names) or ("XS1" not in old_column_names) or ("XS2" not in old_column_names) ): raise ValueError( "The input pairs file must be parsed with the flag --add-columns XA,NM,AS,XS --min-mapq 0" ) COL_XA1 = old_column_names.index("XA1") COL_XA2 = old_column_names.index("XA2") COL_NM1 = old_column_names.index("NM1") COL_NM2 = old_column_names.index("NM2") COL_AS1 = old_column_names.index("AS1") COL_AS2 = old_column_names.index("AS2") COL_XS1 = old_column_names.index("XS1") COL_XS2 = old_column_names.index("XS2") outstream.writelines((l + "\n" for l in header)) for line in body_stream: cols = line.rstrip('\n').split(pairsam_format.PAIRSAM_SEP) cols.append("!") cols.append("!") if report_scores: for _ in range(6): cols.append("-1") pair_type = cols[pairsam_format.COL_PTYPE] if cols[pairsam_format.COL_C1] != pairsam_format.UNMAPPED_CHROM: if tag_mode == "XB": phase1, chrom_base1, S1_1, S2_1, S3_1 = phase_side_XB( cols[pairsam_format.COL_C1], cols[COL_XB1], int(cols[COL_AS1]), int(cols[COL_XS1]), phase_suffixes, ) elif tag_mode == "XA": phase1, chrom_base1, S1_1, S2_1, S3_1 = phase_side_XA( cols[pairsam_format.COL_C1], cols[COL_XA1], int(cols[COL_AS1]), int(cols[COL_XS1]), int(cols[COL_NM1]), phase_suffixes, ) if not report_scores: cols[idx_phase1] = phase1 else: ( cols[idx_phase1], cols[idx_phase1 + 2], cols[idx_phase1 + 4], cols[idx_phase1 + 6], ) = (phase1, str(S1_1), str(S2_1), str(S3_1)) cols[pairsam_format.COL_C1] = chrom_base1 if chrom_base1 == "!": cols[pairsam_format.COL_C1] = pairsam_format.UNMAPPED_CHROM cols[pairsam_format.COL_P1] = str(pairsam_format.UNMAPPED_POS) cols[pairsam_format.COL_S1] = pairsam_format.UNMAPPED_STRAND pair_type = "M" + pair_type[1] if cols[pairsam_format.COL_C2] != pairsam_format.UNMAPPED_CHROM: if tag_mode == "XB": phase2, chrom_base2, S1_2, S2_2, S3_2 = phase_side_XB( cols[pairsam_format.COL_C2], cols[COL_XB2], int(cols[COL_AS2]), int(cols[COL_XS2]), phase_suffixes, ) elif tag_mode == "XA": phase2, chrom_base2, S1_2, S2_2, S3_2 = phase_side_XA( cols[pairsam_format.COL_C2], cols[COL_XA2], int(cols[COL_AS2]), int(cols[COL_XS2]), int(cols[COL_NM2]), phase_suffixes, ) if not report_scores: cols[idx_phase2] = phase2 else: ( cols[idx_phase2], cols[idx_phase2 + 2], cols[idx_phase2 + 4], cols[idx_phase2 + 6], ) = (phase2, str(S1_2), str(S2_2), str(S3_2)) cols[pairsam_format.COL_C2] = chrom_base2 if chrom_base2 == "!": cols[pairsam_format.COL_C2] = pairsam_format.UNMAPPED_CHROM cols[pairsam_format.COL_P2] = str(pairsam_format.UNMAPPED_POS) cols[pairsam_format.COL_S2] = pairsam_format.UNMAPPED_STRAND pair_type = pair_type[0] + "M" cols[pairsam_format.COL_PTYPE] = pair_type if clean_output: cols = [cols[i] for i in new_column_idxs] outstream.write(pairsam_format.PAIRSAM_SEP.join(cols)) outstream.write("\n") if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if __name__ == "__main__": phase() pairtools-1.0.3/pairtools/cli/restrict.py000066400000000000000000000064731452673171500205500ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import sys import click import warnings import numpy as np from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options from ..lib.restrict import find_rfrag UTIL_NAME = "pairtools_restrict" @cli.command() @click.argument("pairs_path", type=str, required=False) @click.option( "-f", "--frags", type=str, required=True, help="a tab-separated BED file with the positions of restriction fragments " "(chrom, start, end). Can be generated using cooler digest.", ) @click.option( "-o", "--output", type=str, default="", help="output .pairs/.pairsam file." " If the path ends with .gz/.lz4, the output is compressed by bgzip/lz4c." " By default, the output is printed into stdout.", ) @common_io_options def restrict(pairs_path, frags, output, **kwargs): """Assign restriction fragments to pairs. Identify the restriction fragments that got ligated into a Hi-C molecule. Note: rfrags are 0-indexed PAIRS_PATH : input .pairs/.pairsam file. If the path ends with .gz/.lz4, the input is decompressed by bgzip/lz4c. By default, the input is read from stdin. """ restrict_py(pairs_path, frags, output, **kwargs) def restrict_py(pairs_path, frags, output, **kwargs): instream = fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) header, body_stream = headerops.get_header(instream) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) header = headerops.append_columns( header, [ "rfrag1", "rfrag_start1", "rfrag_end1", "rfrag2", "rfrag_start2", "rfrag_end2", ], ) outstream.writelines((l + "\n" for l in header)) rfrags = np.genfromtxt( frags, delimiter="\t", comments="#", dtype=None, encoding="ascii", names=["chrom", "start", "end"], ) rfrags.sort(order=["chrom", "start", "end"]) chrom_borders = np.r_[ 0, 1 + np.where(rfrags["chrom"][:-1] != rfrags["chrom"][1:])[0], rfrags.shape[0] ] rfrags = { rfrags["chrom"][i]: np.concatenate([[0], rfrags["end"][i:j] + 1]) for i, j in zip(chrom_borders[:-1], chrom_borders[1:]) } for line in body_stream: cols = line.rstrip('\n').split(pairsam_format.PAIRSAM_SEP) chrom1, pos1 = cols[pairsam_format.COL_C1], int(cols[pairsam_format.COL_P1]) rfrag_idx1, rfrag_start1, rfrag_end1 = find_rfrag(rfrags, chrom1, pos1) chrom2, pos2 = cols[pairsam_format.COL_C2], int(cols[pairsam_format.COL_P2]) rfrag_idx2, rfrag_start2, rfrag_end2 = find_rfrag(rfrags, chrom2, pos2) cols += [str(rfrag_idx1), str(rfrag_start1), str(rfrag_end1)] cols += [str(rfrag_idx2), str(rfrag_start2), str(rfrag_end2)] outstream.write(pairsam_format.PAIRSAM_SEP.join(cols)) outstream.write("\n") if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if __name__ == "__main__": restrict() pairtools-1.0.3/pairtools/cli/sample.py000066400000000000000000000036141452673171500201640ustar00rootroot00000000000000import sys import click import random from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options UTIL_NAME = "pairtools_sample" @cli.command() @click.argument("fraction", type=float, required=True) @click.argument("pairs_path", type=str, required=False) @click.option( "-o", "--output", type=str, default="", help="output file." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, the output is printed into stdout.", ) @click.option( "-s", "--seed", type=int, default=None, help="the seed of the random number generator.", ) @common_io_options def sample(fraction, pairs_path, output, seed, **kwargs): """Select a random subset of pairs in a pairs file. FRACTION: the fraction of the randomly selected pairs subset PAIRS_PATH : input .pairs/.pairsam file. If the path ends with .gz or .lz4, the input is decompressed by bgzip/lz4c. By default, the input is read from stdin. """ sample_py(fraction, pairs_path, output, seed, **kwargs) def sample_py(fraction, pairs_path, output, seed, **kwargs): instream = fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) header, body_stream = headerops.get_header(instream) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) outstream.writelines((l + "\n" for l in header)) random.seed(seed) for line in body_stream: if random.random() <= fraction: outstream.write(line) if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if __name__ == "__main__": sample() pairtools-1.0.3/pairtools/cli/scaling.py000066400000000000000000000055571452673171500203330ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import io import sys import click import pandas as pd from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options from ..lib.scaling import compute_scaling UTIL_NAME = "pairtools_scaling" @cli.command() @click.argument("input_path", type=str, nargs=-1, required=False) @click.option( "-o", "--output", type=str, default="", help="output .tsv file with summary." ) @click.option( "--view", "--regions", help="Path to a BED file which defines which regions (viewframe) of the chromosomes to use. " "By default, this is parsed from .pairs header. ", type=str, required=False, default=None, ) @click.option( "--chunksize", type=int, default=100_000, show_default=True, required=False, help="Number of pairs in each chunk. Reduce for lower memory footprint.", ) @click.option( "--dist-range", type=click.Tuple([int, int]), default=(10, 1_000_000_000), show_default=True, required=False, help="Distance range. ", ) @click.option( "--n-dist-bins", type=int, default=128, show_default=True, required=False, help="Number of distance bins to split the distance range. ", ) @common_io_options def scaling(input_path, output, view, chunksize, dist_range, n_dist_bins, **kwargs): """Calculate pairs scalings. INPUT_PATH : by default, a .pairs/.pairsam file to calculate statistics. If not provided, the input is read from stdin. The files with paths ending with .gz/.lz4 are decompressed by bgzip/lz4c. Output is .tsv file with scaling stats (both cis scalings and trans levels). """ scaling_py(input_path, output, view, chunksize, dist_range, n_dist_bins, **kwargs) def scaling_py(input_path, output, view, chunksize, dist_range, n_dist_bins, **kwargs): if len(input_path) == 0: raise ValueError(f"No input paths: {input_path}") instream = fileio.auto_open( input_path[0], mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if view is not None: view = pd.read_table(view) # Pass the header to the instream so that it can parse the header automatically cis_scalings, trans_levels = compute_scaling( instream, regions=view, chromsizes=None, dist_range=dist_range, n_dist_bins=n_dist_bins, chunksize=chunksize, ) summary_stats = pd.concat([cis_scalings, trans_levels]) # save statistics to the file summary_stats.to_csv(outstream, sep="\t") if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if __name__ == "__main__": scaling() pairtools-1.0.3/pairtools/cli/select.py000066400000000000000000000176031452673171500201650ustar00rootroot00000000000000import sys import click import re, fnmatch import warnings from ..lib import fileio, pairsam_format, headerops from ..lib.select import evaluate_stream from . import cli, common_io_options UTIL_NAME = "pairtools_select" @cli.command() @click.argument("condition", type=str) @click.argument("pairs_path", type=str, required=False) @click.option( "-o", "--output", type=str, default="", help="output file." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, the output is printed into stdout.", ) @click.option( "--output-rest", type=str, default="", help="output file for pairs of other types. " " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, such pairs are dropped.", ) # Deprecated option to be removed in the future: # @click.option( # "--send-comments-to", # type=click.Choice(['selected', 'rest', 'both', 'none']), # default="both", # help="Which of the outputs should receive header and comment lines", # show_default=True) @click.option( "--chrom-subset", type=str, default=None, help="A path to a chromosomes file (tab-separated, 1st column contains " "chromosome names) containing a chromosome subset of interest. " "If provided, additionally filter pairs with both sides originating from " "the provided subset of chromosomes. This operation modifies the #chromosomes: " "and #chromsize: header fields accordingly.", ) @click.option( "--startup-code", type=str, default=None, help="An auxiliary code to execute before filtering. " "Use to define functions that can be evaluated in the CONDITION statement", ) @click.option( "-t", "--type-cast", type=(str, str), default=(), multiple=True, help="Cast a given column to a given type. By default, only pos and mapq " "are cast to int, other columns are kept as str. Provide as " "-t , e.g. -t read_len1 int. Multiple entries are allowed.", ) @click.option( "--remove-columns", "-r", help=f"Comma-separated list of columns to be removed, e.g.: {','.join(pairsam_format.COLUMNS)}", type=str, default="", required=False, ) @common_io_options def select( condition, pairs_path, output, output_rest, # send_comments_to, chrom_subset, startup_code, type_cast, remove_columns, **kwargs, ): """Select pairs according to some condition. CONDITION : A Python expression; if it returns True, select the read pair. Any column declared in the #columns line of the pairs header can be accessed by its name. If the header lacks the #columns line, the columns are assumed to follow the .pairs/.pairsam standard (readID, chrom1, chrom2, pos1, pos2, strand1, strand2, pair_type). Finally, CONDITION has access to COLS list which contains the string values of columns. In Bash, quote CONDITION with single quotes, and use double quotes for string variables inside CONDITION. PAIRS_PATH : input .pairs/.pairsam file. If the path ends with .gz or .lz4, the input is decompressed by bgzip/lz4c. By default, the input is read from stdin. The following functions can be used in CONDITION besides the standard Python functions: - csv_match(x, csv) - True if variable x is contained in a list of comma-separated values, e.g. csv_match(chrom1, 'chr1,chr2') - wildcard_match(x, wildcard) - True if variable x matches a wildcard, e.g. wildcard_match(pair_type, 'C*') - regex_match(x, regex) - True if variable x matches a Python-flavor regex, e.g. regex_match(chrom1, 'chr\d') \b Examples: pairtools select '(pair_type=="UU") or (pair_type=="UR") or (pair_type=="RU")' pairtools select 'chrom1==chrom2' pairtools select 'COLS[1]==COLS[3]' pairtools select '(chrom1==chrom2) and (abs(pos1 - pos2) < 1e6)' pairtools select '(chrom1=="!") and (chrom2!="!")' pairtools select 'regex_match(chrom1, "chr\d+") and regex_match(chrom2, "chr\d+")' pairtools select 'True' --chrom-subset mm9.reduced.chromsizes """ select_py( condition, pairs_path, output, output_rest, # send_comments_to, chrom_subset, startup_code, type_cast, remove_columns, **kwargs, ) def select_py( condition, pairs_path, output, output_rest, # send_comments_to, chrom_subset, startup_code, type_cast, remove_columns, **kwargs, ): instream = fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) # Optional output created only if requested: outstream_rest = None if output_rest: outstream_rest = fileio.auto_open( output_rest, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) # Parse the input stream: header, body_stream = headerops.get_header(instream) # Modify the header: header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) # Filter out unwanted columns: if remove_columns: input_columns = headerops.extract_column_names(header) remove_columns = remove_columns.split(",") for col in remove_columns: if col in pairsam_format.COLUMNS_PAIRS: warnings.warn( f"Removing required {col} column for .pairs format. Output is not .pairs anymore" ) elif col in pairsam_format.COLUMNS_PAIRSAM: warnings.warn( f"Removing required {col} column for .pairsam format. Output is not .pairsam anymore" ) updated_columns = [x for x in input_columns if x not in remove_columns] if len(updated_columns) == len(input_columns): warnings.warn( f"Some column(s) {','.join(remove_columns)} not in the file, the operation has no effect" ) else: header = headerops.set_columns(header, updated_columns) # Update the chromosomes: new_chroms = None if chrom_subset is not None: new_chroms = [l.strip().split("\t")[0] for l in open(chrom_subset, "r")] if new_chroms is not None: header = headerops.subset_chroms_in_pairsheader(header, new_chroms) outstream.writelines((l + "\n" for l in header)) if output_rest: outstream_rest.writelines((l + "\n" for l in header)) column_names = headerops.extract_column_names(header) if len(column_names) == 0: column_names = pairsam_format.COLUMNS # Columns filtration rule: if remove_columns: column_scheme = [input_columns.index(COL) for COL in updated_columns] # Format the condition: condition = condition.strip() if new_chroms is not None: condition = ( f"({condition}) and (chrom1 in {new_chroms}) and (chrom2 in {new_chroms})" ) for filter_passed, line in evaluate_stream( body_stream, condition, column_names, type_cast, startup_code ): COLS = line.rstrip('\n').split(pairsam_format.PAIRSAM_SEP) if remove_columns: COLS = [ COLS[idx] for idx in column_scheme ] # re-order the columns according to the scheme: line = pairsam_format.PAIRSAM_SEP.join(COLS) + "\n" # form the line if filter_passed: outstream.write(line) elif outstream_rest: outstream_rest.write(line) if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if output_rest and outstream_rest != sys.stdout: outstream_rest.close() if __name__ == "__main__": select() pairtools-1.0.3/pairtools/cli/sort.py000066400000000000000000000103671452673171500176750ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import io import sys import click import subprocess import shutil import warnings from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options UTIL_NAME = "pairtools_sort" @cli.command() @click.argument("pairs_path", type=str, required=False) @click.option( "-o", "--output", type=str, default="", help="output pairs file." " If the path ends with .gz or .lz4, the output is compressed by bgzip " "or lz4, correspondingly. By default, the output is printed into stdout.", ) @click.option( "--nproc", type=int, default=8, show_default=True, help="Number of processes to split the sorting work between.", ) @click.option( "--tmpdir", type=str, default="", help="Custom temporary folder for sorting intermediates.", ) @click.option( "--memory", type=str, default="2G", show_default=True, help="The amount of memory used by default.", ) @click.option( "--compress-program", type=str, default="auto", show_default=True, help="A binary to compress temporary sorted chunks. " "Must decompress input when the flag -d is provided. " "Suggested alternatives: gzip, lzop, lz4c, snzip. " 'If "auto", then use lz4c if available, and gzip ' "otherwise.", ) @common_io_options def sort(pairs_path, output, nproc, tmpdir, memory, compress_program, **kwargs): """Sort a .pairs/.pairsam file. Sort pairs in the lexicographic order along chrom1 and chrom2, in the numeric order along pos1 and pos2 and in the lexicographic order along pair_type. PAIRS_PATH : input .pairs/.pairsam file. If the path ends with .gz or .lz4, the input is decompressed by bgzip or lz4c, correspondingly. By default, the input is read as text from stdin. """ sort_py(pairs_path, output, nproc, tmpdir, memory, compress_program, **kwargs) def sort_py(pairs_path, output, nproc, tmpdir, memory, compress_program, **kwargs): instream = fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) header, body_stream = headerops.get_header(instream) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) header = headerops.mark_header_as_sorted(header) outstream.writelines((l + "\n" for l in header)) outstream.flush() if compress_program == "auto": if shutil.which("lz4c") is not None: compress_program = "lz4c" else: warnings.warn( "lz4c is not found. Using gzip for compression of sorted chunks, " "which results in a minor decrease in performance. Please install " "lz4c for faster sorting." ) compress_program = "gzip" command = r""" /bin/bash -c 'export LC_COLLATE=C; export LANG=C; sort -k {0},{0} -k {1},{1} -k {2},{2}n -k {3},{3}n -k {4},{4} --stable --field-separator=$'\''{5}'\'' {6} {7} -S {8} {9} """.replace( "\n", " " ).format( pairsam_format.COL_C1 + 1, pairsam_format.COL_C2 + 1, pairsam_format.COL_P1 + 1, pairsam_format.COL_P2 + 1, pairsam_format.COL_PTYPE + 1, pairsam_format.PAIRSAM_SEP_ESCAPE, " --parallel={} ".format(nproc) if nproc > 0 else " ", " --temporary-directory={} ".format(tmpdir) if tmpdir else " ", memory, ( " --compress-program={} ".format(compress_program) if compress_program else " " ), ) command += "'" with subprocess.Popen( command, stdin=subprocess.PIPE, bufsize=-1, shell=True, stdout=outstream ) as process: stdin_wrapper = io.TextIOWrapper(process.stdin, "utf-8") for line in body_stream: stdin_wrapper.write(line) stdin_wrapper.flush() process.communicate() if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if __name__ == "__main__": sort() pairtools-1.0.3/pairtools/cli/split.py000066400000000000000000000112151452673171500200320ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import sys import pipes import click from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options UTIL_NAME = "pairtools_split" @cli.command() @click.argument("pairsam_path", type=str, required=False) @click.option( "--output-pairs", type=str, default="", help="output pairs file." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " If -, pairs are printed to stdout." " If not specified, pairs are dropped.", ) @click.option( "--output-sam", type=str, default="", help="output sam file." " If the path ends with .bam, the output is compressed into a bam file." " If -, sam entries are printed to stdout." " If not specified, sam entries are dropped.", ) @common_io_options def split(pairsam_path, output_pairs, output_sam, **kwargs): """Split a .pairsam file into .pairs and .sam. Restore a .sam file from sam1 and sam2 fields of a .pairsam file. Create a .pairs file without sam1/sam2 fields. PAIRSAM_PATH : input .pairsam file. If the path ends with .gz or .lz4, the input is decompressed by bgzip or lz4c. By default, the input is read from stdin. """ split_py(pairsam_path, output_pairs, output_sam, **kwargs) def split_py(pairsam_path, output_pairs, output_sam, **kwargs): instream = fileio.auto_open( pairsam_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) # Output streams if (not output_pairs) and (not output_sam): raise ValueError("At least one output (pairs and/or sam) must be specified!") if (output_pairs == "-") and (output_sam == "-"): raise ValueError("Only one output (pairs or sam) can be printed in stdout!") outstream_pairs = None outstream_sam = None if output_pairs: outstream_pairs = fileio.auto_open( output_pairs, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output_sam: outstream_sam = fileio.auto_open( output_sam, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) header, body_stream = headerops.get_header(instream) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) columns = headerops.extract_column_names(header) has_sams = False if columns: # trust the column order specified in the header if ("sam1" in columns) and ("sam2" in columns): sam1col = columns.index("sam1") sam2col = columns.index("sam2") columns.pop(max(sam1col, sam2col)) columns.pop(min(sam1col, sam2col)) header = headerops._update_header_entry( header, "columns", " ".join(columns) ) has_sams = True elif ("sam1" in columns) != ("sam2" in columns): raise ValueError( "According to the #columns header field only one sam entry is present" ) else: # assume that the file has sam columns and follows the pairsam format sam1col = pairsam_format.COL_SAM1 sam2col = pairsam_format.COL_SAM2 has_sams = True if output_pairs: outstream_pairs.writelines((l + "\n" for l in header)) if output_sam: outstream_sam.writelines( (l[11:].strip() + "\n" for l in header if l.startswith("#samheader:")) ) # Split sam1 = None sam2 = None for line in body_stream: cols = line.rstrip('\n').split(pairsam_format.PAIRSAM_SEP) if has_sams: if sam1col < sam2col: sam2 = cols.pop(sam2col) sam1 = cols.pop(sam1col) else: sam1 = cols.pop(sam1col) sam2 = cols.pop(sam2col) if output_pairs: # hard-coded tab separator to follow the DCIC pairs standard outstream_pairs.write("\t".join(cols)) outstream_pairs.write("\n") if output_sam and has_sams: for col in (sam1, sam2): if col != ".": for sam_entry in col.split(pairsam_format.INTER_SAM_SEP): outstream_sam.write( sam_entry.replace(pairsam_format.SAM_SEP, "\t") ) outstream_sam.write("\n") if output_pairs and outstream_pairs != sys.stdout: outstream_pairs.close() if output_sam and outstream_sam != sys.stdout: outstream_sam.close() if __name__ == "__main__": split() pairtools-1.0.3/pairtools/cli/stats.py000066400000000000000000000156261452673171500200470ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import io import sys import click import pandas as pd from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options from ..lib.stats import PairCounter, do_merge from .._logging import get_logger logger = get_logger() UTIL_NAME = "pairtools_stats" @cli.command() @click.argument("input_path", type=str, nargs=-1, required=False) @click.option("-o", "--output", type=str, default="", help="output stats tsv file.") @click.option( "--merge", is_flag=True, help="If specified, merge multiple input stats files instead of calculating" " statistics of a .pairs/.pairsam file. Merging is performed via summation of" " all overlapping statistics. Non-overlapping statistics are appended to" " the end of the file. Supported for tsv stats with single filter.", ) @click.option( "--with-chromsizes/--no-chromsizes", is_flag=True, default=True, help="If enabled, will store sizes of chromosomes from the header of the pairs file" " in the stats file.", ) @click.option( "--yaml/--no-yaml", is_flag=True, default=False, help="Output stats in yaml format instead of table. ", ) @click.option( "--bytile-dups/--no-bytile-dups", default=False, help="If enabled, will analyse by-tile duplication statistics to estimate" " library complexity more accurately." " Requires parent_readID column to be saved by dedup (will be ignored otherwise)" " Saves by-tile stats into --output_bytile-stats stream, or regular output if --output_bytile-stats is not provided.", ) @click.option( "--output-bytile-stats", default="", required=False, help="output file for tile duplicate statistics." " If file exists, it will be open in the append mode." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, by-tile duplicate statistics are not printed." " Note that the readID and parent_readID should be provided and contain tile information for this option.", ) # Filtering options: @click.option( "--filter", default=None, required=False, multiple=True, help="Filters with conditions to apply to the data (similar to `pairtools select`). " "For non-YAML output only the first filter will be reported. " """Example: pairtools stats --yaml --filter 'unique:(pair_type=="UU")' --filter 'close:(pair_type=="UU") and (abs(pos1-pos2)<10)' test.pairs """, ) @click.option( "--engine", default="pandas", required=False, help="Engine for regular expression parsing. " "Python will provide you regex functionality, while pandas does not accept custom funtctions and works faster. ", ) @click.option( "--chrom-subset", type=str, default=None, required=False, help="A path to a chromosomes file (tab-separated, 1st column contains " "chromosome names) containing a chromosome subset of interest. " "If provided, additionally filter pairs with both sides originating from " "the provided subset of chromosomes. This operation modifies the #chromosomes: " "and #chromsize: header fields accordingly.", ) @click.option( "--startup-code", type=str, default=None, required=False, help="An auxiliary code to execute before filtering. " "Use to define functions that can be evaluated in the CONDITION statement", ) @click.option( "-t", "--type-cast", type=(str, str), default=(), multiple=True, help="Cast a given column to a given type. By default, only pos and mapq " "are cast to int, other columns are kept as str. Provide as " "-t , e.g. -t read_len1 int. Multiple entries are allowed.", ) @common_io_options def stats( input_path, output, merge, bytile_dups, output_bytile_stats, filter, **kwargs ): """Calculate pairs statistics. INPUT_PATH : by default, a .pairs/.pairsam file to calculate statistics. If not provided, the input is read from stdin. If --merge is specified, then INPUT_PATH is interpreted as an arbitrary number of stats files to merge. The files with paths ending with .gz/.lz4 are decompressed by bgzip/lz4c. """ stats_py( input_path, output, merge, bytile_dups, output_bytile_stats, filter, **kwargs, ) def stats_py( input_path, output, merge, bytile_dups, output_bytile_stats, filter, **kwargs ): if merge: do_merge(output, input_path, **kwargs) return if len(input_path) == 0: raise ValueError(f"No input paths: {input_path}") instream = fileio.auto_open( input_path[0], mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if bytile_dups and not output_bytile_stats: output_bytile_stats = outstream if output_bytile_stats: bytile_dups = True header, body_stream = headerops.get_header(instream) cols = headerops.extract_column_names(header) # Check necessary columns for reporting by-tile stats: if bytile_dups and "parent_readID" not in cols: logger.warning( "No 'parent_readID' column in the file, not generating duplicate stats." ) bytile_dups = False # Define filters and their properties first_filter_name = "no_filter" # default filter name for full output if filter is not None and len(filter) > 0: first_filter_name = filter[0].split(":", 1)[0] if len(filter) > 1 and not kwargs.get("yaml", False): logger.warn( f"Output the first filter only in non-YAML output: {first_filter_name}" ) filter = dict([f.split(":", 1) for f in filter]) else: filter = None stats = PairCounter( bytile_dups=bytile_dups, filters=filter, startup_code=kwargs.get("startup_code", ""), # for evaluation of filters type_cast=kwargs.get("type_cast", ()), # for evaluation of filters engine=kwargs.get("engine", "pandas"), ) # Collecting statistics for chunk in pd.read_table(body_stream, names=cols, chunksize=100_000): stats.add_pairs_from_dataframe(chunk) if kwargs.get("with_chromsizes", True): chromsizes = headerops.extract_chromsizes(header) stats.add_chromsizes(chromsizes) if bytile_dups: stats.save_bytile_dups(output_bytile_stats) # save statistics to file ... stats.save( outstream, yaml=kwargs.get("yaml", False), # format as yaml filter=first_filter_name if not kwargs.get("yaml", False) else None, # output only the first filter if non-YAML output ) if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if __name__ == "__main__": stats() pairtools-1.0.3/pairtools/lib/000077500000000000000000000000001452673171500163245ustar00rootroot00000000000000pairtools-1.0.3/pairtools/lib/__init__.py000066400000000000000000000004011452673171500204300ustar00rootroot00000000000000from . import fileio from . import dedup from . import dedup_cython from . import filterbycov from . import headerops from . import pairsam_format from . import parse from . import parse_pysam from . import restrict from . import stats from . import select pairtools-1.0.3/pairtools/lib/dedup.py000066400000000000000000000441461452673171500200100ustar00rootroot00000000000000import numpy as np import pandas as pd import scipy.spatial from scipy.sparse import coo_matrix from scipy.sparse.csgraph import connected_components from . import dedup_cython, pairsam_format from .stats import PairCounter from .._logging import get_logger logger = get_logger() import time # Ignore pandas future warnings: import warnings warnings.simplefilter(action='ignore', category=FutureWarning) # Setting for cython deduplication: # you don't need to load more than 10k lines at a time b/c you get out of the # CPU cache, so this parameter is not adjustable MAX_LEN = 10000 def streaming_dedup( in_stream, colnames, chunksize, carryover, method, mark_dups, max_mismatch, extra_col_pairs, unmapped_chrom, outstream, outstream_dups, outstream_unmapped, keep_parent_id, out_stat, backend, n_proc, ): deduped_chunks = _dedup_stream( in_stream=in_stream, colnames=colnames, method=method, chunksize=chunksize, carryover=carryover, mark_dups=mark_dups, max_mismatch=max_mismatch, extra_col_pairs=extra_col_pairs, keep_parent_id=keep_parent_id, backend=backend, n_proc=n_proc, ) t0 = time.time() N = 0 for df_chunk in deduped_chunks: N += df_chunk.shape[0] # Write the stats if requested: if out_stat is not None: out_stat.add_pairs_from_dataframe(df_chunk, unmapped_chrom=unmapped_chrom) # Define masks of unmapped and duplicated reads: mask_mapped = np.logical_and( (df_chunk["chrom1"] != unmapped_chrom), (df_chunk["chrom2"] != unmapped_chrom), ) mask_duplicates = df_chunk["duplicate"] # Clean up dataframe: df_chunk = df_chunk.drop(columns=["duplicate"]) # Stream the dups: if outstream_dups: df_chunk.loc[mask_mapped & mask_duplicates, :].to_csv( outstream_dups, index=False, header=False, sep="\t" ) # Drop readID if it was created (not needed for nodup and unmapped pairs): if keep_parent_id: df_chunk = df_chunk.drop(columns=["parent_readID"]) # Stream unmapped: if outstream_unmapped: df_chunk.loc[~mask_mapped, :].to_csv( outstream_unmapped, index=False, header=False, sep="\t" ) # Stream unique pairs: df_chunk.loc[mask_mapped & (~mask_duplicates), :].to_csv( outstream, index=False, header=False, sep="\t" ) t1 = time.time() t = t1 - t0 logger.debug(f"total time: {t}") if N > 0: logger.debug(f"time per mln pairs: {t/N*1e6}") else: logger.debug(f"Processed {N} pairs") def _dedup_stream( in_stream, colnames, method, chunksize, carryover, mark_dups, max_mismatch, extra_col_pairs, keep_parent_id, backend, n_proc, ): # Stream the input dataframe: dfs = pd.read_table( in_stream, comment=None, names=colnames, chunksize=chunksize ) # Set up the carryover dataframe: df_prev_nodups = pd.DataFrame([]) prev_i = 0 # Iterate over chunks: for df in dfs: df_marked = _dedup_chunk( pd.concat([df_prev_nodups, df], axis=0, ignore_index=True).reset_index( drop=True ), r=max_mismatch, method=method, keep_parent_id=keep_parent_id, extra_col_pairs=extra_col_pairs, backend=backend, n_proc=n_proc, ) df_marked = df_marked.loc[prev_i:, :].reset_index(drop=True) mask_duplicated = df_marked["duplicate"] if mark_dups: df_marked.loc[mask_duplicated, "pair_type"] = "DD" # Filter out duplicates and store specific columns: df_nodups = df_marked.loc[~mask_duplicated, colnames] # Re-define carryover pairs: df_prev_nodups = df_nodups.tail(carryover).reset_index(drop=True) prev_i = len(df_prev_nodups) yield df_marked def _dedup_chunk( df, r, method, keep_parent_id, extra_col_pairs, backend, unmapped_chrom="!", n_proc=1, ): """Mark duplicates in a dataframe of pairs Parameters ---------- df : pd.DataFrame Dataframe with pairs, has to contain columns 'chrom1', 'pos1', 'chrom2', 'pos2' 'strand1', 'strand2' r : int Allowed distance between two pairs to call them duplicates method : str 'sum' or 'max' - whether 'r' uses sum of distances on two ends of pairs, or the maximal distance keep_parent_id : bool If True, the read ID of the read that was not labelled as a duplicate from a group of duplicates is recorded for each read marked as duplicate. Only possible with non-cython backends extra_col_pairs : list of tuples List of extra column pairs that need to match between two reads for them be considered duplicates (e.g. useful if alleles are annotated) backend : str 'scipy', 'sklearn', 'cython' unmapped_chrom : str, optional Which character denotes unmapped reads in the chrom1/chrom2 fields, by default "!" n_proc : int, optional How many cores to use, by default 1 Only works for 'sklearn' backend Returns ------- pd.DataFrame Dataframe with marked duplicates (extra boolean field 'duplicate'), and optionally recorded 'parent_readID' """ if method not in ("max", "sum"): raise ValueError('Unknown method, only "sum" or "max" allowed') if backend == "sklearn": from sklearn import neighbors if method == "sum": p = 1 else: p = np.inf # Store the index of the dataframe: index_colname = df.index.name if index_colname is None: index_colname = "index" df = df.reset_index() # Remove the index temporarily # Set up columns to store the dedup info: df.loc[:, "clusterid"] = np.nan df.loc[:, "duplicate"] = False # Split mapped and unmapped reads: mask_unmapped = (df["chrom1"] == unmapped_chrom) | (df["chrom2"] == unmapped_chrom) df_unmapped = df.loc[mask_unmapped, :].copy() df_mapped = df.loc[~mask_unmapped, :].copy() N_mapped = df_mapped.shape[0] # If there are some mapped reads, dedup them: if N_mapped > 0: if backend == "sklearn": a = neighbors.radius_neighbors_graph( df_mapped[["pos1", "pos2"]], radius=r, p=p, n_jobs=n_proc, ) a0, a1 = a.nonzero() elif backend == "scipy": z = scipy.spatial.cKDTree(df_mapped[["pos1", "pos2"]]) a = z.query_pairs(r=r, p=p, output_type="ndarray") a0 = a[:, 0] a1 = a[:, 1] need_to_match = np.array( [ ("chrom1", "chrom1"), ("chrom2", "chrom2"), ("strand1", "strand1"), ("strand2", "strand2"), ] + extra_col_pairs ) nonpos_matches = np.all( [ df_mapped.iloc[a0, df_mapped.columns.get_loc(lc)].values == df_mapped.iloc[a1, df_mapped.columns.get_loc(rc)].values for (lc, rc) in need_to_match ], axis=0, ) a0 = a0[nonpos_matches] a1 = a1[nonpos_matches] a_mat = coo_matrix((np.ones_like(a0), (a0, a1)), shape=(N_mapped, N_mapped)) # Set up inferred clusterIDs: df_mapped.loc[:, "clusterid"] = connected_components(a_mat, directed=False)[1] mask_dups = df_mapped["clusterid"].duplicated() df_mapped.loc[mask_dups, "duplicate"] = True # Mark parent IDs if requested: if keep_parent_id: df_mapped.loc[:, "parent_readID"] = df_mapped["clusterid"].map( df_mapped[~mask_dups].set_index("clusterid")["readID"] ) df_unmapped.loc[:, "parent_readID"] = "" # Reconstruct original dataframe with removed duplicated reads: df = pd.concat([df_unmapped, df_mapped]).reset_index(drop=True) df = df.set_index(index_colname) # Set up the original index df = df.drop( ["clusterid"], axis=1 ) # Remove the information that we don't need anymore: return df ### Cython deduplication #### def streaming_dedup_cython( method, max_mismatch, sep, c1ind, c2ind, p1ind, p2ind, s1ind, s2ind, extra_cols1, extra_cols2, unmapped_chrom, instream, outstream, outstream_dups, outstream_unmapped, out_stat, mark_dups, keep_parent_id=False, readid_ind=0, ): """ Cython-powered deduplication with online algorithm based on indexed list. Parameters ---------- method: "max" or "sum" max_mismatch: maximum allowed mismatch to count the pairs as duplicates sep: separator of the fields in the input file c1ind: index of the chr1 column c2ind: index of the chr2 column p1ind: index of the pos1 column p2ind: index of the pos2 column s1ind: index of the strand1 column s2ind: index of the strand2 column extra_cols1: extra columns for left alignment in a pair to add extra_cols2: extra columns for right alignment in a pair to add unmapped_chrom: Symbol of the chromosome for the unmapped alignment instream: input stream of pair file outstream: output stram of deduplicated pairs outstream_dups: output stream of duplicates (optionally with added parent_id, see keep_parent_id option) outstream_unmapped: output stram of unmapped pairs out_stat: output statistics mark_dups: if True, will add "DD" as the pair_type keep_parent_id: if True, additional column "parent_readID will be added to the output, can be useful for optical duplicates search readid_ind: index of the readID column in the input file Returns ------- """ maxind = max(c1ind, c2ind, p1ind, p2ind, s1ind, s2ind) if bool(extra_cols1) and bool(extra_cols2): maxind = max(maxind, max(extra_cols1), max(extra_cols2)) all_scols1 = [s1ind] + extra_cols1 all_scols2 = [s2ind] + extra_cols2 # if we do stats in the dedup, we need PAIR_TYPE # i do not see way around this: if out_stat: ptind = pairsam_format.COL_PTYPE maxind = max(maxind, ptind) dd = dedup_cython.OnlineDuplicateDetector( method, max_mismatch, returnData=False, keep_parent_id=keep_parent_id ) c1 = [] c2 = [] p1 = [] p2 = [] s1 = [] s2 = [] idx = [] line_buffer = [] cols_buffer = [] chromDict = {} strandDict = {} curMaxLen = max(MAX_LEN, dd.getLen()) t0 = time.time() N = 0 instream = iter(instream) read_idx = 0 # read index to mark the parent readID while True: rawline = next(instream, None) stripline = rawline.strip('\n') if rawline else None # take care of empty lines not at the end of the file separately if rawline and (not stripline): logger.warning("Empty line detected not at the end of the file") continue if stripline: cols = stripline.split(sep) if len(cols) <= maxind: raise ValueError( "Error parsing line {}: ".format(stripline) + " expected {} columns, got {}".format(maxind, len(cols)) ) if (cols[c1ind] == unmapped_chrom) or (cols[c2ind] == unmapped_chrom): if outstream_unmapped: outstream_unmapped.write(stripline) # don't forget terminal newline outstream_unmapped.write("\n") # add a pair to PairCounter if stats output is requested: if out_stat: out_stat.add_pair( cols[c1ind], int(cols[p1ind]), cols[s1ind], cols[c2ind], int(cols[p2ind]), cols[s2ind], cols[ptind], ) else: line_buffer.append(stripline) cols_buffer.append(cols) c1.append(fetchadd(cols[c1ind], chromDict)) c2.append(fetchadd(cols[c2ind], chromDict)) p1.append(int(cols[p1ind])) p2.append(int(cols[p2ind])) idx.append(read_idx) read_idx += 1 if bool(extra_cols1) and bool(extra_cols2): s1.append( fetchadd("".join(cols[i] for i in all_scols1), strandDict) ) s2.append( fetchadd("".join(cols[i] for i in all_scols2), strandDict) ) else: s1.append(fetchadd(cols[s1ind], strandDict)) s2.append(fetchadd(cols[s2ind], strandDict)) N += 1 if (not stripline) or (len(c1) == curMaxLen): if keep_parent_id: res, parents = dd.push( ar(c1, 32), ar(c2, 32), ar(p1, 32), ar(p2, 32), ar(s1, 32), ar(s2, 32), ) else: res = dd.push( ar(c1, 32), ar(c2, 32), ar(p1, 32), ar(p2, 32), ar(s1, 32), ar(s2, 32), ) if not stripline: if keep_parent_id: res_tmp, parents_tmp = dd.finish() parents = np.concatenate([parents, parents_tmp]) else: res_tmp = dd.finish() res = np.concatenate([res, res_tmp]) for i in range(len(res)): # not duplicated pair: if not res[i]: outstream.write(line_buffer[i]) # don't forget terminal newline outstream.write("\n") if out_stat: out_stat.add_pair( cols_buffer[i][c1ind], int(cols_buffer[i][p1ind]), cols_buffer[i][s1ind], cols_buffer[i][c2ind], int(cols_buffer[i][p2ind]), cols_buffer[i][s2ind], cols_buffer[i][ptind], ) # duplicated pair: else: if out_stat: out_stat.add_pair( cols_buffer[i][c1ind], int(cols_buffer[i][p1ind]), cols_buffer[i][s1ind], cols_buffer[i][c2ind], int(cols_buffer[i][p2ind]), cols_buffer[i][s2ind], "DD", ) if outstream_dups: if mark_dups: # DD-marked pair: output = sep.join(mark_split_pair_as_dup(cols_buffer[i])) else: # pair as is: output = line_buffer[i] if keep_parent_id: # Add parentID as the last column: parent_readID = line_buffer[parents[i]].split(sep)[ readid_ind ] output = sep.join([output, parent_readID]) outstream_dups.write(output) # don't forget terminal newline outstream_dups.write("\n") # flush buffers and perform necessary checks here: c1 = [] c2 = [] p1 = [] p2 = [] s1 = [] s2 = [] line_buffer = line_buffer[len(res) :] cols_buffer = cols_buffer[len(res) :] if not stripline: if len(line_buffer) != 0: raise ValueError( "{} lines left in the buffer, ".format(len(line_buffer)) + "should be none;" + "something went terribly wrong" ) break # process next line ... # all lines have been processed at this point. # streaming_dedup is over. t1 = time.time() t = t1 - t0 logger.debug(f"total time: {t}") if N > 0: logger.debug(f"time per mln pairs: {t/N*1e6}") else: logger.debug(f"Processed {N} pairs") def fetchadd(key, mydict): key = key.strip() if key not in mydict: mydict[key] = len(mydict) return mydict[key] def ar(mylist, val): return np.array(mylist, dtype={8: np.int8, 16: np.int16, 32: np.int32}[val]) #### Markasdup utilities: #### def mark_split_pair_as_dup(cols): # if the original columns ended with a new line, the marked columns # should as well. original_has_newline = cols[-1].endswith("\n") cols[pairsam_format.COL_PTYPE] = "DD" if (len(cols) > pairsam_format.COL_SAM1) and (len(cols) > pairsam_format.COL_SAM2): for i in (pairsam_format.COL_SAM1, pairsam_format.COL_SAM2): # split each sam column into sam entries, tag and assemble back cols[i] = pairsam_format.INTER_SAM_SEP.join( [ mark_sam_as_dup(sam) for sam in cols[i].split(pairsam_format.INTER_SAM_SEP) ] ) if original_has_newline and not cols[-1].endswith("\n"): cols[-1] = cols[-1] + "\n" return cols def mark_sam_as_dup(sam): """Tag the binary flag and the optional pair type field of a sam entry as a PCR duplicate.""" samcols = sam.split(pairsam_format.SAM_SEP) if len(samcols) == 1: return sam samcols[1] = str(int(samcols[1]) | 1024) for j in range(11, len(samcols)): if samcols[j].startswith("Yt:Z:"): samcols[j] = "Yt:Z:DD" return pairsam_format.SAM_SEP.join(samcols) pairtools-1.0.3/pairtools/lib/dedup_cython.pyx000066400000000000000000000135311452673171500215560ustar00rootroot00000000000000""" Legacy code: ``mark_duplicates`` is an offline method that finds duplicates in a given input dataset. For other applications on much larger datasets you may consider an online method ``OnlineDuplicateDetector`` which is implemented as a class. Note that for both methods data types are fixed: * chromosomes are int32 * position is int32 * strand is int32, which is basically the same as C type "char". """ import numpy as np import cython cimport numpy as np cimport cython ### Online deduplicator used in pairtools.dedup Cython: cdef class OnlineDuplicateDetector(object): cdef cython.int [:] c1 cdef cython.int [:] c2 cdef cython.int [:] p1 cdef cython.int [:] p2 cdef cython.int [:] s1 cdef cython.int [:] s2 cdef cython.char [:] rm cdef cython.int [:] parent_idxs cdef int methodid cdef int low cdef int high cdef int N cdef int max_mismatch cdef int returnData cdef int keep_parent_id def __init__(self, method, max_mismatch, returnData=False, keep_parent_id=False): if returnData == False: self.returnData = 0 else: self.returnData = 1 if keep_parent_id == False: self.keep_parent_id = 0 else: self.keep_parent_id = 1 self.parent_idxs = np.zeros(0, np.int32) self.N = 0 self.c1 = np.zeros(0, np.int32) self.c2 = np.zeros(0, np.int32) self.p1 = np.zeros(0, np.int32) self.p2 = np.zeros(0, np.int32) self.s1 = np.zeros(0, np.int32) self.s2 = np.zeros(0, np.int32) self.rm = np.zeros(0, np.int8) if method == "max": self.methodid = 0 elif method == "sum": self.methodid = 1 else: raise ValueError('method should be "sum" or "max"') self.max_mismatch = int(max_mismatch) self.low = 0 self.high = 1 def _shrink(self): if self.returnData == 1: firstret = self.rm[:self.low] retainMask = (np.asarray(firstret) == False) del firstret ret = [] for ar in [self.c1, self.c2, self.p1, self.p2, self.s1, self.s2]: ret.append(np.asarray(ar)[:self.low][retainMask]) self.c1 = self.c1[self.low:] self.c2 = self.c2[self.low:] self.p1 = self.p1[self.low:] self.p2 = self.p2[self.low:] self.s1 = self.s1[self.low:] self.s2 = self.s2[self.low:] pastrm = self.rm[:self.low] self.rm = self.rm[self.low:] self.high = self.high-self.low self.N = self.N - self.low if self.returnData == 1: self.low = 0 return ret if self.keep_parent_id == 1: # Return parent readIDs alongside with duplicates mask: pastidx = self.parent_idxs[:self.low] self.low = 0 return pastrm, pastidx self.low = 0 return pastrm def _run(self, finish=False): cdef int finishing = 0 cdef int extraCondition if finish: finishing = 1 while True: if self.low == self.N: break if self.high == self.N: if finishing == 1: self.low += 1 self.high = self.low + 1 continue else: break if self.rm[self.low] == 1: self.low += 1 self.high = self.low+1 continue # if high already removed, just continue if self.rm[self.high] == 1: self.high += 1 continue # if we jumped too far, continue if ((self.c1[self.high] != self.c1[self.low]) or (self.p1[self.high] - self.p1[self.low] > self.max_mismatch) or (self.p1[self.high] - self.p1[self.low] < 0 )): self.low += 1 self.high = self.low + 1 # restart high continue if self.methodid == 0: extraCondition = max( abs(self.p1[self.low] - self.p1[self.high]), abs(self.p2[self.low] - self.p2[self.high])) <= self.max_mismatch elif self.methodid == 1: # sum of distances <= max_mismatch extraCondition = ( abs(self.p1[self.low] - self.p1[self.high]) + abs(self.p2[self.low] - self.p2[self.high]) <= self.max_mismatch ) else: raise ValueError( "Unknown method id, this should not happen. " "Check code of this function.") if ((self.c2[self.low] == self.c2[self.high]) and (self.s1[self.low] == self.s1[self.high]) and (self.s2[self.low] == self.s2[self.high]) and extraCondition): self.rm[self.high] = 1 if self.keep_parent_id == 1: self.parent_idxs[self.high] = self.low self.high += 1 continue self.high += 1 return self._shrink() def push(self, c1, c2, p1, p2, s1, s2): self.c1 = np.concatenate([self.c1, c1]) self.c2 = np.concatenate([self.c2, c2]) self.p1 = np.concatenate([self.p1, p1]) self.p2 = np.concatenate([self.p2, p2]) self.s1 = np.concatenate([self.s1, s1]) self.s2 = np.concatenate([self.s2, s2]) self.rm = np.concatenate([self.rm, np.zeros(len(c1), dtype=np.int8)]) if self.keep_parent_id == 1: self.parent_idxs = np.concatenate([self.parent_idxs, np.zeros(len(c1), dtype=np.int32)]) self.N = self.N + len(c1) return self._run(finish=False) def finish(self): return self._run(finish=True) def getLen(self): return int(self.N)pairtools-1.0.3/pairtools/lib/fileio.py000066400000000000000000000201161452673171500201450ustar00rootroot00000000000000import shutil import pipes import subprocess import sys class ParseError(Exception): pass def auto_open(path, mode, nproc=1, command=None): """Guess the file format from the extension and use the corresponding binary to open it for reading or writing. If the extension is not known, open the file as text. If the binary allows parallel execution, specify the number of threads with `nproc`. If `command` is supplied, use it to open the file instead of auto-guessing. The command must accept the filename as the last argument, accept input through stdin and print output into stdout. Supported extensions and binaries (with comments): .bam - samtools view (allows parallel writing) .gz - pbgzip if available, otherwise bgzip .lz4 - lz4c (does not support parallel execution) """ # Empty filepath or False provided if not path or path == "-": if mode == "r": return sys.stdin if mode == "w": return sys.stdout if command: if mode == "w": t = pipes.Template() t.append(command, "--") f = t.open(path, "w") elif mode == "r": t = pipes.Template() t.append(command, "--") f = t.open(path, "r") else: raise ValueError("Unknown mode : {}".format(mode)) return f elif path.endswith(".bam"): if shutil.which("samtools") is None: raise ValueError( { "w": "samtools is not found, cannot compress output", "r": "samtools is not found, cannot decompress input", }[mode] ) if mode == "w": t = pipes.Template() t.append( "samtools view -bS {} -".format( "-@ " + str(nproc - 1) if nproc > 1 else "" ), "--", ) f = t.open(path, "w") elif mode == "r": t = pipes.Template() t.append("samtools view -h", "--") f = t.open(path, "r") else: raise ValueError("Unknown mode for .bam : {}".format(mode)) return f elif path.endswith(".gz"): if shutil.which("pbgzip") is not None: if mode == "w": t = pipes.Template() t.append("pbgzip -c -n {}".format(nproc), "--") f = t.open(path, "w") elif mode == "a": t = pipes.Template() t.append("pbgzip -c -n {} $IN >> $OUT".format(nproc), "ff") f = t.open(path, "w") elif mode == "r": t = pipes.Template() t.append("pbgzip -dc -n {}".format(nproc), "--") f = t.open(path, "r") else: raise ValueError("Unknown mode for .gz : {}".format(mode)) elif shutil.which("bgzip") is not None: if mode == "w": t = pipes.Template() t.append("bgzip -c -@ {}".format(nproc), "--") f = t.open(path, "w") elif mode == "a": t = pipes.Template() t.append("bgzip -c -@ {} $IN >> $OUT".format(nproc), "ff") f = t.open(path, "w") elif mode == "r": t = pipes.Template() t.append("bgzip -dc -@ {}".format(nproc), "--") f = t.open(path, "r") else: raise ValueError("Unknown mode for .gz : {}".format(mode)) elif shutil.which("gzip") is not None: if mode == "w": t = pipes.Template() t.append("gzip -c", "--") f = t.open(path, "w") elif mode == "a": t = pipes.Template() t.append("gzip -c $IN >> $OUT", "ff") f = t.open(path, "w") elif mode == "r": t = pipes.Template() t.append("gzip -dc", "--") f = t.open(path, "r") else: raise ValueError("Unknown mode for .gz : {}".format(mode)) else: raise ValueError( { "w": "pbgzip, bgzip and gzip are not found, cannot compress output", "a": "pbgzip, bgzip and gzip are is not found, cannot compress output", "r": "pbgzip, bgzip and gzip are is not found, cannot decompress input", }[mode] ) return f elif path.endswith(".lz4"): if shutil.which("lz4c") is None: raise ValueError( { "w": "lz4c is not found, cannot compress output", "a": "lz4c is not found, cannot compress output", "r": "lz4c is not found, cannot decompress input", }[mode] ) if mode == "w": t = pipes.Template() t.append("lz4c -cz", "--") f = t.open(path, "w") elif mode == "a": t = pipes.Template() t.append("lz4c -cz $IN >> $OUT", "ff") f = t.open(path, "w") elif mode == "r": t = pipes.Template() t.append("lz4c -cd", "--") f = t.open(path, "r") else: raise ValueError("Unknown mode : {}".format(mode)) return f else: return open(path, mode) class PipedIO: def __init__(self, file_or_path, command, mode="r"): """ An experimental class that reads/writes a file, piping the contents through another process. Parameters ---------- file_or_path : file-like object or str A path to the input/output file or an already opened file-like object. command : str A command to launch a reading/writing process. If mode is 'w', the process must accept input via stdin. If mode is 'r', the process must put output into stdout. If mode is 'r' and file_or_path is str, the path will be appended to the command as the last argument. mode : str The mode for opening, same as in open(mode=). Returns ------- file: a file-like object """ if issubclass(type(command), str): command = command.split(" ") self._command = command self._mode = mode if mode.startswith("r"): if issubclass(type(file_or_path), str): self._proc = subprocess.Popen( command + [file_or_path], universal_newlines=True, stdout=subprocess.PIPE, ) else: self._proc = subprocess.Popen( command, universal_newlines=True, stdin=file_or_path, stdout=subprocess.PIPE, ) self._stream = self._proc.stdout self._close_stream = self._proc.stdout.close elif mode.startswith("w") or mode.startswith("a"): f = ( open(file_or_path, mode=mode) if issubclass(type(file_or_path), str) else file_or_path ) self._proc = subprocess.Popen( command, universal_newlines=True, stdin=subprocess.PIPE, stdout=f ) self._stream = self._proc.stdin self.buffer = self._stream.buffer self.closed = self._stream.closed self.flush = self._stream.flush self.fileno = self._stream.fileno self.read = self._stream.read self.readline = self._stream.readline self.readlines = self._stream.readlines self.seek = self._stream.seek self.seekable = self._stream.seekable self.truncate = self._stream.truncate self.tell = self._stream.tell self.writable = self._stream.writable self.write = self._stream.write self.writelines = self._stream.writelines def close(self, timeout=None): self._stream.close() retcode = self._proc.wait(timeout=timeout) return retcode pairtools-1.0.3/pairtools/lib/filterbycov.py000066400000000000000000000173761452673171500212440ustar00rootroot00000000000000import numpy as np import warnings from .dedup import mark_split_pair_as_dup from . import pairsam_format def fetchadd(key, mydict): key = key.strip() if key not in mydict: mydict[key] = len(mydict) return mydict[key] def ar(mylist, val): return np.array(mylist, dtype={8: np.int8, 16: np.int16, 32: np.int32}[val]) def _filterbycov(c1_in, p1_in, c2_in, p2_in, max_dist, method): """ This is a slow version of the filtering code used for testing purposes only Use cythonized version in the future!! """ c1 = np.asarray(c1_in, dtype=int) p1 = np.asarray(p1_in, dtype=int) c2 = np.asarray(c2_in, dtype=int) p2 = np.asarray(p2_in, dtype=int) M = np.r_[ np.c_[c1, p1], np.c_[c2, p2] ] # M is a table of (chrom, pos) with 2*N rows assert c1.shape[0] == c2.shape[0] N = 2 * c1.shape[0] ind_sorted = np.lexsort((M[:, 1], M[:, 0])) # sort by chromosomes, then positions # M[ind_sorted] # ind_sorted # M, M[ind_sorted] if method == "sum": proximity_count = np.zeros( N ) # keeps track of how many molecules each framgent end is close to elif method == "max": proximity_count = np.zeros(N) else: raise ValueError("Unknown method: {}".format(method)) low = 0 high = 1 while True: # boundary case finish if low == N: break # boundary case - CHECK if high == N: low += 1 high = low + 1 continue # check if "high" is proximal enough to "low" # first, if chromosomes not equal, we have gone too far, and the positions are not proximal if M[ind_sorted[low], 0] != M[ind_sorted[high], 0]: low += 1 high = low + 1 # restart high continue # next, if positions are not proximal, increase low, and continue elif np.abs(M[ind_sorted[high], 1] - M[ind_sorted[low], 1]) > max_dist: low += 1 high = low + 1 # restart high continue # if on the same chromosome, and the distance is "proximal enough", add to count of both "low" and "high" positions else: proximity_count[low] += 1 proximity_count[high] += 1 high += 1 # unsort proximity count # proximity_count = proximity_count[ind_sorted] proximity_count[ind_sorted] = np.copy(proximity_count) # print(M) # print(proximity_count) # if method is sum of pairs if method == "sum": pcounts = proximity_count[0 : N // 2] + proximity_count[N // 2 :] + 1 elif method == "max": pcounts = np.maximum( proximity_count[0 : N // 2] + 1, proximity_count[N // 2 :] + 1 ) else: raise ValueError("Unknown method: {}".format(method)) return pcounts def streaming_filterbycov( method, max_dist, max_cov, sep, c1ind, c2ind, p1ind, p2ind, s1ind, s2ind, unmapped_chrom, instream, outstream, outstream_high, outstream_unmapped, out_stat, mark_multi, ): # doing everything in memory maxind = max(c1ind, c2ind, p1ind, p2ind, s1ind, s2ind) # if we do stats in the dedup, we need PAIR_TYPE # i do not see way around this: if out_stat: ptind = pairsam_format.COL_PTYPE maxind = max(maxind, ptind) c1 = [] c2 = [] p1 = [] p2 = [] s1 = [] s2 = [] line_buffer = [] cols_buffer = [] chromDict = {} strandDict = {} n_unmapped = 0 n_high = 0 n_low = 0 instream = iter(instream) while True: rawline = next(instream, None) stripline = rawline.strip() if rawline else None # take care of empty lines not at the end of the file separately if rawline and (not stripline): warnings.warn("Empty line detected not at the end of the file") continue if stripline: cols = stripline.split(sep) if len(cols) <= maxind: raise ValueError( "Error parsing line {}: ".format(stripline) + " expected {} columns, got {}".format(maxind, len(cols)) ) if (cols[c1ind] == unmapped_chrom) or (cols[c2ind] == unmapped_chrom): if outstream_unmapped: outstream_unmapped.write(stripline) # don't forget terminal newline outstream_unmapped.write("\n") # add a pair to PairCounter if stats output is requested: if out_stat: out_stat.add_pair( cols[c1ind], int(cols[p1ind]), cols[s1ind], cols[c2ind], int(cols[p2ind]), cols[s2ind], cols[ptind], ) else: line_buffer.append(stripline) cols_buffer.append(cols) c1.append(fetchadd(cols[c1ind], chromDict)) c2.append(fetchadd(cols[c2ind], chromDict)) p1.append(int(cols[p1ind])) p2.append(int(cols[p2ind])) s1.append(fetchadd(cols[s1ind], strandDict)) s2.append(fetchadd(cols[s2ind], strandDict)) else: # when everything is loaded in memory... res = _filterbycov(c1, p1, c2, p2, max_dist, method) for i in range(len(res)): # not high-frequency interactor pairs: if not res[i] > max_cov: outstream.write(line_buffer[i]) # don't forget terminal newline outstream.write("\n") if out_stat: out_stat.add_pair( cols_buffer[i][c1ind], int(cols_buffer[i][p1ind]), cols_buffer[i][s1ind], cols_buffer[i][c2ind], int(cols_buffer[i][p2ind]), cols_buffer[i][s2ind], cols_buffer[i][ptind], ) # high-frequency interactor pairs: else: if out_stat: out_stat.add_pair( cols_buffer[i][c1ind], int(cols_buffer[i][p1ind]), cols_buffer[i][s1ind], cols_buffer[i][c2ind], int(cols_buffer[i][p2ind]), cols_buffer[i][s2ind], "FF", ) if outstream_high: outstream_high.write( # DD-marked pair: sep.join(mark_split_pair_as_dup(cols_buffer[i])) if mark_multi # pair as is: else line_buffer[i] ) # don't forget terminal newline outstream_high.write("\n") # flush buffers and perform necessary checks here: c1 = [] c2 = [] p1 = [] p2 = [] s1 = [] s2 = [] line_buffer = line_buffer[len(res) :] cols_buffer = cols_buffer[len(res) :] if not stripline: if len(line_buffer) != 0: raise ValueError( "{} lines left in the buffer, ".format(len(line_buffer)) + "should be none;" + "something went terribly wrong" ) break break pairtools-1.0.3/pairtools/lib/headerops.py000066400000000000000000000573201452673171500206570ustar00rootroot00000000000000from collections import defaultdict import sys import copy import itertools import warnings import numpy as np import pandas as pd from .. import __version__ from . import pairsam_format from .fileio import ParseError from .._logging import get_logger logger = get_logger() PAIRS_FORMAT_VERSION = "1.0.0" SEP_COLS = " " SEP_CHROMS = " " COMMENT_CHAR = "#" def get_stream_handlers(instream): # get peekable buffer for the instream readline_f, peek_f = None, None if hasattr(instream, "buffer"): peek_f = instream.buffer.peek readline_f = instream.buffer.readline elif hasattr(instream, "peek"): peek_f = instream.peek readline_f = instream.readline else: raise ValueError("Cannot find the peek() function of the provided stream!") return readline_f, peek_f def get_header(instream, comment_char=COMMENT_CHAR, ignore_warning=False): """Returns a header from the stream and an the reaminder of the stream with the actual data. Parameters ---------- instream : a file object An input stream. comment_char : str The character prepended to header lines (use '@' when parsing sams, '#' when parsing pairsams). ignore_warning : bool If True, then no warning will be generated if header of pairs file is empty. Returns ------- header : list The header lines, stripped of terminal spaces and newline characters. remainder_stream : stream/file-like object Stream with the remaining lines. """ header = [] if not comment_char: raise ValueError("Please, provide a comment char!") comment_byte = comment_char.encode() readline_f, peek_f = get_stream_handlers(instream) current_peek = peek_f(1) while current_peek.startswith(comment_byte): # consuming a line from buffer guarantees # that the remainder of the buffer starts # with the beginning of the line. line = readline_f() if isinstance(line, bytes): line = line.decode() # append line to header, since it does start with header header.append(line.rstrip('\n')) # peek into the remainder of the instream current_peek = peek_f(1) # apparently, next line does not start with the comment # return header and the instream, advanced to the beginning of the data if len(header) == 0 and not ignore_warning: logger.warning( "Headerless input, please, add the header by `pairtools header generate` or `pairtools header transfer`" ) return header, instream def extract_fields(header, field_name, save_rest=False): """ Extract the specified fields from the pairs header and return a list of corresponding values, even if a single field was found. Additionally, can return the list of intact non-matching entries. """ fields = [] rest = [] for l in header: if l.lstrip(COMMENT_CHAR).startswith(field_name + ":"): fields.append(l.split(":", 1)[1].rstrip('\n').lstrip()) elif save_rest: rest.append(l) if save_rest: return fields, rest else: return fields def extract_column_names(header): """ Extract column names from header lines. """ columns = extract_fields(header, "columns") if len(columns) != 0: return columns[0].split(SEP_COLS) else: return [] def validate_cols(stream, columns): """ Validate that the number of columns coincides between stream and columns. Checks only the first line in the pairs stream! Note that it irreversibly removes the header from the stream. Parameters ---------- stream: input stream, body or full .pairs file columns: columns to validate against Returns ------- True if the number of columns is identical between file and columns """ comment_byte = COMMENT_CHAR.encode() readline_f, peek_f = get_stream_handlers(stream) current_peek = peek_f(1) while current_peek.startswith(comment_byte): # consuming a line from buffer guarantees # that the remainder of the buffer starts # with the beginning of the line. line = readline_f() # peek into the remainder of the instream current_peek = peek_f(1) line = readline_f() if isinstance(line, bytes): line = line.decode() ncols_body = len(line.split(pairsam_format.PAIRSAM_SEP)) ncols_reference = ( len(columns) if isinstance(columns, list) else columns.split(SEP_COLS) ) return ncols_body == ncols_reference def validate_header_cols(stream, header): """Validate that the number of columns corresponds between the stream and header""" columns = extract_column_names(header) return validate_cols(stream, header) def is_empty_header(header): if len(header) == 0: return True if not header[0].startswith("##"): return True else: return False def extract_chromsizes(header): """ Extract chromosome sizes from header lines. """ chromsizes_str = extract_fields(header, "chromsize") chromsizes_str = list(zip(*[s.split(SEP_CHROMS) for s in chromsizes_str])) chromsizes = pd.Series(data=chromsizes_str[1], index=chromsizes_str[0]).astype( np.int64 ) return chromsizes def get_chromsizes_from_pysam_header(samheader): """Convert pysam header to pairtools chromosomes dict (ordered by Python default since 3.7). Example of pysam header converted to dict: dict([ ('SQ', [{'SN': 'chr1', 'LN': 248956422}, {'SN': 'chr10', 'LN': 133797422}, {'SN': 'chr11', 'LN': 135086622}, {'SN': 'chr12', 'LN': 133275309}]), ('PG', [{'ID': 'bwa', 'PN': 'bwa', 'VN': '0.7.17-r1188', 'CL': 'bwa mem -t 8 -SP -v1 hg38.fa test_1.1.fastq.gz test_2.1.fastq.gz'}]) ]) """ SQs = samheader.to_dict()["SQ"] chromsizes = [(sq["SN"], int(sq["LN"])) for sq in SQs] return dict(chromsizes) def get_chromsizes_from_file(chroms_file): """ Produce an "enumeration" of chromosomes based on the list of chromosomes """ chrom_sizes = dict() with open(chroms_file, "rt") as f: for line in f: chrom, size = line.strip().split("\t") chrom_sizes[chrom] = int(size) return chrom_sizes def get_chromsizes_from_pysam_header(samheader): """Convert pysam header to pairtools chromosomes (ordered dict). Example of pysam header converted to dict: dict([ ('SQ', [{'SN': 'chr1', 'LN': 248956422}, {'SN': 'chr10', 'LN': 133797422}, {'SN': 'chr11', 'LN': 135086622}, {'SN': 'chr12', 'LN': 133275309}]), ('PG', [{'ID': 'bwa', 'PN': 'bwa', 'VN': '0.7.17-r1188', 'CL': 'bwa mem -t 8 -SP -v1 hg38.fa test_1.1.fastq.gz test_2.1.fastq.gz'}]) ]) """ SQs = samheader.to_dict()["SQ"] chromsizes = [(sq["SN"], int(sq["LN"])) for sq in SQs] return dict(chromsizes) def get_chrom_order(chroms_file, sam_chroms=None): """ Produce an "enumeration" of chromosomes based on the list of chromosomes """ chrom_enum = dict() i = 1 with open(chroms_file, "rt") as f: for line in f: chrom = line.strip().split("\t")[0] if chrom and ((not sam_chroms) or (chrom in sam_chroms)): chrom_enum[chrom] = i i += 1 if sam_chroms: remaining = sorted( chrom for chrom in sam_chroms if chrom not in chrom_enum.keys() ) for chrom in remaining: chrom_enum[chrom] = i i += 1 return chrom_enum def make_standard_pairsheader( assembly=None, chromsizes=None, columns=pairsam_format.COLUMNS, shape="upper triangle", ): header = [] header.append("## pairs format v{}".format(PAIRS_FORMAT_VERSION)) header.append("#shape: {}".format(shape)) header.append( "#genome_assembly: {}".format(assembly if assembly is not None else "unknown") ) if chromsizes is not None: try: chromsizes = chromsizes.items() except AttributeError: pass for chrom, length in chromsizes: header.append("#chromsize: {} {}".format(chrom, length)) header.append("#columns: " + SEP_COLS.join(columns)) return header def subset_chroms_in_pairsheader(header, chrom_subset): new_header = [] for line in header: if line.startswith("#chromsize:"): if line.strip().split()[1] in chrom_subset: new_header.append(line) elif line.startswith("#chromosomes:"): line = SEP_CHROMS.join( ["#chromosomes:"] + [c for c in line.strip().split()[1:] if c in chrom_subset] ) new_header.append(line) else: new_header.append(line) return new_header def insert_samheader(header, samheader): """Insert samheader into header.""" new_header = [l for l in header if not l.startswith("#columns")] if samheader: new_header += ["#samheader: " + l for l in samheader] new_header += [l for l in header if l.startswith("#columns")] return new_header def insert_samheader_pysam(header, samheader): """Insert samheader into header,pysam version.""" new_header = [l for l in header if not l.startswith("#columns")] if samheader: new_header += ["#samheader: " + l for l in str(samheader).strip().split("\n")] new_header += [l for l in header if l.startswith("#columns")] return new_header def mark_header_as_sorted(header): header = copy.deepcopy(header) if is_empty_header(header): raise Exception("Input file is not valid .pairs, has no header or is empty.") if not any([l.startswith("#sorted") for l in header]): if header[0].startswith("##"): header.insert(1, "#sorted: chr1-chr2-pos1-pos2") else: header.insert(0, "#sorted: chr1-chr2-pos1-pos2") for i in range(len(header)): if header[i].startswith("#chromosomes"): chroms = header[i][12:].strip().split(SEP_CHROMS) header[i] = "#chromosomes: {}".format(SEP_CHROMS.join(sorted(chroms))) return header def append_new_pg(header, ID="", PN="", VN=None, CL=None, force=False): header = copy.deepcopy(header) if is_empty_header(header): raise Exception("Input file is not valid .pairs, has no header or is empty.") samheader, other_header = extract_fields(header, "samheader", save_rest=True) new_samheader = _add_pg_to_samheader(samheader, ID, PN, VN, CL, force) new_header = insert_samheader(other_header, new_samheader) return new_header def _update_header_entry(header, field, new_value): header = copy.deepcopy(header) found = False newline = "#{}: {}".format(field, new_value) for i in range(len(header)): if header[i].startswith(COMMENT_CHAR + field): header[i] = newline found = True if not found: if header[-1].startswith("#columns"): header.insert(-1, newline) else: header.append(newline) return header def _add_pg_to_samheader(samheader, ID="", PN="", VN=None, CL=None, force=False): """Append a @PG record to an existing sam header. If the header comes from a merged file and thus has multiple chains of @PG, append the provided PG to all of the chains, adding the numerical suffix of the branch to the ID. Parameters ---------- header : list of str ID, PN, VN, CL : std The keys of a new @PG record. If absent, VN is the version of pairtools and CL is taken from sys.argv. force : bool If True, ignore the inconsistencies among @PG records of the existing header. Returns ------- new_header : list of str A list of new headers lines, stripped of newline characters. """ if VN is None: VN = __version__ if CL is None: CL = " ".join(sys.argv) pre_pg_header = [ line.strip() for line in samheader if line.startswith("@HD") or line.startswith("@SQ") or line.startswith("@RG") ] post_pg_header = [ line.strip() for line in samheader if not line.startswith("@HD") and (not line.startswith("@SQ")) and (not line.startswith("@RG")) and (not line.startswith("@PG")) ] pg_chains = _parse_pg_chains(samheader, force=force) for i, br in enumerate(pg_chains): new_pg = {"ID": ID, "PN": PN, "VN": VN, "CL": CL} new_pg["PP"] = br[-1]["ID"] if len(pg_chains) > 1: new_pg["ID"] = new_pg["ID"] + "-" + str(i + 1) + "." + str(len(br) + 1) new_pg["raw"] = _format_pg(**new_pg) br.append(new_pg) new_header = ( pre_pg_header + [pg["raw"] for br in pg_chains for pg in br] + post_pg_header ) return new_header def _format_pg(**kwargs): out = ["@PG"] + [ "{}:{}".format(field, kwargs[field]) for field in ["ID", "PN", "CL", "PP", "DS", "VN"] if field in kwargs ] return "\t".join(out) def _parse_pg_chains(header, force=False): pg_chains = [] parsed_pgs = [] for l in header: if l.startswith("@PG"): tag_value_pairs = l.strip().split("\t")[1:] if not all(":" in tvp for tvp in tag_value_pairs): warnings.warn( f"Skipping the following @PG line, as it does not follow the SAM header standard of TAG:VALUE: {l}" ) continue parsed_tvp = dict( [tvp.split(":", maxsplit=1) for tvp in tag_value_pairs if ":" in tvp] ) if parsed_tvp: parsed_tvp["raw"] = l.strip() parsed_pgs.append(parsed_tvp) while True: if len(parsed_pgs) == 0: break for i in range(len(parsed_pgs)): pg = parsed_pgs[i] if "PP" not in pg: pg_chains.append([pg]) parsed_pgs.pop(i) break else: matching_chains = [ branch for branch in pg_chains if branch[-1]["ID"] == pg["PP"] ] if len(matching_chains) > 1: if force: matching_chains[0].append(pg) parsed_pgs.pop(i) break else: raise ParseError( "Multiple @PG records with the IDs identical to the PP field of another record:\n" + "\n".join([br[-1]["raw"] for br in matching_chains]) + "\nvs\n" + pg["raw"] ) if len(matching_chains) == 1: matching_chains[0].append(pg) parsed_pgs.pop(i) break if force: pg_chains.append([pg]) parsed_pgs.pop(i) break else: raise ParseError( "Cannot find the parental @PG record for the @PG records:\n" + "\n".join([pg["raw"] for pg in parsed_pgs]) ) return pg_chains def _toposort(dag, tie_breaker): """ Topological sort on a directed acyclic graph Uses Kahn's algorithm with a custom tie-breaking option. The dictionary ``dag`` can be interpreted in two ways: 1. A dependency graph (i.e. arcs point from values to keys), and the generator yields items with no dependences followed by items that depend on previous ones. 2. Arcs point from keys to values, in which case the generator produces a **reverse** topological ordering of the nodes. Parameters ---------- dag: dict of nodes to sets of nodes Directed acyclic graph encoded as a dictionary. tie_breaker: callable Function that picks a tie breaker from a set of nodes with no unprocessed dependences. Returns ------- Generator Notes ----- See . Based in part on activestate recipe: by Sam Denton (MIT licensed). """ # Drop self-edges. for k, v in dag.items(): v.discard(k) # Find all nodes that don't depend on anything # and include them with empty dependencies. indep_nodes = set.union(*dag.values()) - set(dag.keys()) dag.update({node: set() for node in indep_nodes}) while True: if not indep_nodes: break out = tie_breaker(indep_nodes) indep_nodes.discard(out) del dag[out] yield out for node, deps in dag.items(): deps.discard(out) if len(deps) == 0: indep_nodes.add(node) if len(dag) != 0: raise ValueError("Circular dependencies exist: {} ".format(list(dag.items()))) def merge_chrom_lists(*lsts): sentinel = "!NONE!" g = defaultdict(set) for lst in lsts: if len(lst) == 1: g[lst[0]].add(sentinel) for a, b in zip(lst[:-1], lst[1:]): g[b].add(a) if len(g) == 0: return [] chrom_list = list(_toposort(g.copy(), tie_breaker=min)) if sentinel in chrom_list: chrom_list.remove(sentinel) chrom_list = sorted(chrom_list) return chrom_list def _merge_samheaders(samheaders, force=False): # first, append an HD line if it is present in any files # if different lines are present, raise an error HDs = set.union( *[ set(line for line in samheader if line.startswith("@HD")) for samheader in samheaders ] ) if len(HDs) > 1 and not force: raise ParseError("More than one unique @HD line is found in samheaders!") HDs = [list(HDs)[0]] if HDs else [] # second, confirm that all files had the same SQ lines # add SQs from the first file, keeping its order SQs = [ set(line for line in samheader if line.startswith("@SQ")) for samheader in samheaders ] common_SQs = set.intersection(*SQs) SQs_same = all([len(samheader) == len(common_SQs) for samheader in SQs]) if not SQs_same and not (force): raise ParseError("The SQ (sequence) lines of the sam headers are not identical") SQs = [line for line in samheaders[0] if line.startswith("@SQ")] # third, append _all_ PG chains, adding a unique index according to the # provided merging order PGs = [] for i, samheader in enumerate(samheaders): for line in samheader: if line.startswith("@PG"): split_line = line.split("\t") for j in range(len(split_line)): if split_line[j].startswith("ID:") or split_line[j].startswith( "PP:" ): split_line[j] = split_line[j] + "-" + str(i + 1) PGs.append("\t".join(split_line)) # finally, add all residual unique lines rest = sum( [ list( set( line for line in samheader if (not line.startswith("@HD")) and (not line.startswith("@SQ")) and (not line.startswith("@PG")) ) ) for samheader in samheaders ], [], ) new_header = [] new_header += HDs new_header += SQs new_header += PGs new_header += rest return new_header def _merge_pairheaders(pairheaders, force=False): new_header = [] # first, add all keys that are expected to be the same among all headers keys_expected_identical = [ "## pairs format", "#sorted:", "#shape:", "#genome_assembly:", "#columns:", ] keys_orginal = [l.split()[0] for header in pairheaders for l in header] for k in keys_expected_identical: lines = [[l for l in header if l.startswith(k)] for header in pairheaders] same = all([l == lines[0] for l in lines]) if not (same or force): raise ParseError( "The following header entries must be the same " "the merged files: {}".format(k) ) new_header += lines[0] # second, merge and add the chromsizes fields. chrom_lists = [] chromsizes = {} for header in pairheaders: chromlist = [] for line in header: if line.startswith("#chromsize:"): chrom, length = line.strip("#chromsize:").split() chromsizes[chrom] = length chromlist.append(chrom) chrom_lists.append(chromlist) chroms_merged = merge_chrom_lists(*chrom_lists) if "#chromosomes:" in keys_orginal: chrom_line = "#chromosomes: {}".format(" ".join(chroms_merged)) new_header.extend([chrom_line]) chromsize_lines = [ "#chromsize: {} {}".format(chrom, chromsizes[chrom]) for chrom in chroms_merged ] new_header.extend(chromsize_lines) # finally, add a sorted list of other unique fields other_lines = sorted( set( l for h in pairheaders for l in h if not any( l.startswith(k) for k in keys_expected_identical + ["#chromosomes", "#chromsize"] ) ) ) if other_lines: if new_header[-1].startswith("#columns"): new_header = new_header[:-1] + other_lines + [new_header[-1]] else: new_header = new_header + other_lines return new_header def all_same_columns(pairheaders): key_target = "#columns:" lines = [[l for l in header if l.startswith(key_target)] for header in pairheaders] all_same = all([l == lines[0] for l in lines]) return all_same def merge_headers(headers, force=False): samheaders, pairheaders = zip( *[extract_fields(h, "samheader", save_rest=True) for h in headers] ) # HD headers contain information that becomes invalid after processing # with distiller. Do not print into the output. new_pairheader = _merge_pairheaders(pairheaders, force=False) new_samheader = _merge_samheaders(samheaders, force=force) new_header = insert_samheader(new_pairheader, new_samheader) return new_header def append_columns(header, columns): """ Appends columns to the header, separated by SEP_COLS Parameters ---------- header: Previous header columns: List of column names to append Returns ------- Modified header (appended columns to the field "#columns") """ for i in range(len(header)): if header[i].startswith("#columns: "): header[i] += SEP_COLS + SEP_COLS.join(columns) return header def get_colnames(header): """ Get column names of the header, separated by SEP_COLS Parameters ---------- header: Previous header Returns ------- List of column names """ for i in range(len(header)): if header[i].startswith("#columns: "): columns = header[i].split(SEP_COLS)[1:] return columns return [] def set_columns(header, columns): """ Set columns to the header, separated by SEP_COLS Parameters ---------- header: Previous header columns: List of column names to append Returns ------- Modified header (appended columns to the field "#columns") """ for i in range(len(header)): if header[i].startswith("#columns:"): header[i] = "#columns:" + SEP_COLS + SEP_COLS.join(columns) return header # def _guess_genome_assembly(samheader): # PG = [l for l in samheader if l.startswith('@PG') and '\tID:bwa' in l][0] # CL = [field for field in PG.split('\t') if field.startswith('CL:')] # # return ga pairtools-1.0.3/pairtools/lib/pairsam_format.py000066400000000000000000000022271452673171500217050ustar00rootroot00000000000000PAIRSAM_FORMAT_VERSION = "1.0.0" PAIRSAM_SEP = "\t" PAIRSAM_SEP_ESCAPE = r"\t" SAM_SEP = "\031" SAM_SEP_ESCAPE = r"\031" INTER_SAM_SEP = "\031NEXT_SAM\031" COL_READID = 0 COL_C1 = 1 COL_P1 = 2 COL_C2 = 3 COL_P2 = 4 COL_S1 = 5 COL_S2 = 6 COL_PTYPE = 7 COL_SAM1 = 8 COL_SAM2 = 9 COLUMNS = [ "readID", "chrom1", "pos1", "chrom2", "pos2", "strand1", "strand2", "pair_type", "sam1", "sam2", "walk_pair_index", "walk_pair_type", ] # Required columns for formats: COLUMNS_PAIRSAM = [ "readID", "chrom1", "pos1", "chrom2", "pos2", "strand1", "strand2", "pair_type", "sam1", "sam2", ] COLUMNS_PAIRS = [ "readID", "chrom1", "pos1", "chrom2", "pos2", "strand1", "strand2", "pair_type", ] UNMAPPED_CHROM = "!" UNMAPPED_POS = 0 UNMAPPED_STRAND = "-" UNANNOTATED_RFRAG = -1 EXTRA_COLUMNS = [ "mapq", "pos5", "pos3", "cigar", "read_len", "matched_bp", "algn_ref_span", "algn_read_span", "dist_to_5", "dist_to_3", "seq", "mismatches", # Format: "{ref_letter}:{mut_letter}:{phred}:{ref_position}:{read_position}" ] pairtools-1.0.3/pairtools/lib/parse.py000066400000000000000000001560061452673171500200200ustar00rootroot00000000000000""" Set of functions used for pairsam parse, migrated from pairtools/pairtools_parse.py Parse operates with several basic data types: I. pysam-based: 1. **sam entry** is a continuous aligned fragment of the read mapped to certain location in the genome. Because we read sam entries from .sam/.bam files automatically with modified pysam, each sam entry is in fact special AlignedSegmentPairtoolized Cython object that has alignment attributes and can be easily accessed from Python. Sam entries are gathered into reads by `push_pysam` function. 2. **read** is a collection of sam entries corresponding to a single Hi-C molecule. It is represented by three variables: readID, sams1 and sams2, which keep left and right sam entries, correspondingly. Read is populated from the stream of sam entries on a fly, the process happenning in `streaming_classify` function. II. python-based data types are parsed from pysam-based ones: 1. **alignment** is a continuous aligned fragment represented as dictionary with relevant fields, such as "chrom", "pos5", "pos3", "strand", "type", etc. `empty_alignment` creates empty alignment, `parse_pysam_entry` create new alignments from pysam entries, `mask_alignment` clears some fields of the alignment to match the default "unmapped" state. `flip_alignment`, `flip_orientation` and `flip_ends` are useful functions that help to orient alignments. 2. **pair** of two alignments is represented by three variables: algn1 (left alignment), algn2 (right alignment) and pair_index. Pairs are obtained by `parse_read` or `parse2_read`. Additionally, these functions also output all alignments for each side. """ from . import pairsam_format from .parse_pysam import get_mismatches_c def streaming_classify( instream, outstream, chromosomes, out_alignments_stream, out_stat, **kwargs ): """ Parse input sam file into individual reads, pairs, walks, then write to the outstream(s). Additional kwargs: min_mapq, drop_readid, drop_seq, drop_sam, add_pair_index, add_columns, # comma-separated list report_alignment_end, max_inter_align_gap parse: max_molecule_size walks_policy parse2: single_end: indicator whether single-end data is provided report_position, one of: "outer", "junction", "read", "walk" report_orientation, one of: "pair", "junction", "read", "walk" dedup_max_mismatch: For intramolecular deduplication max_insert_size: maximum insert size when searching for overlapping ends of R1 and R2 expand: perform combinatorial expansion or not max_expansion_depth: maximum expansion depth, works in combination with expand=True """ parse2 = kwargs.get("parse2", False) ### Store output parameters in a usable form: chrom_enum = dict( zip( [pairsam_format.UNMAPPED_CHROM] + list(chromosomes), range(len(chromosomes) + 1), ) ) add_columns = kwargs.get("add_columns", "") if isinstance(add_columns, str) and len(add_columns) > 0: add_columns = add_columns.split(",") elif len(add_columns) == 0: add_columns = [] elif not isinstance(add_columns, list): raise ValueError(f"Unknown type of add_columns: {type(add_columns)}") sam_tags = [col for col in add_columns if len(col) == 2 and col.isupper()] store_seq = "seq" in add_columns ### Compile readID transformation: readID_transform = kwargs.get("readid_transform", None) if readID_transform is not None: readID_transform = compile(readID_transform, "", "eval") ### Prepare for iterative parsing of the input stream # Each read is represented by readID, sams1 (left alignments) and sams2 (right alignments) readID = "" # Read id of the current read sams1 = [] # Placeholder for the left alignments sams2 = [] # Placeholder for the right alignments # Each read is comprised of multiple alignments, or sam entries: sam_entry = "" # Placeholder for each aligned segment # Keep the id of the previous sam entry to detect when the read is completely populated: prev_readID = "" # Placeholder for the read id ### Iterate over input pysam: instream = iter(instream) while sam_entry is not None: sam_entry = next(instream, None) readID = sam_entry.query_name if sam_entry else None if readID_transform is not None and readID is not None: readID = eval(readID_transform) # Read is fully populated, then parse and write: if not (sam_entry) or ((readID != prev_readID) and prev_readID): ### Parse if not parse2: # regular parser: pairstream, all_algns1, all_algns2 = parse_read( sams1, sams2, min_mapq=kwargs["min_mapq"], max_molecule_size=kwargs["max_molecule_size"], max_inter_align_gap=kwargs["max_inter_align_gap"], walks_policy=kwargs["walks_policy"], sam_tags=sam_tags, store_seq=store_seq, report_mismatches=True if "mismatches" in add_columns else False, ) else: # parse2 parser: pairstream, all_algns1, all_algns2 = parse2_read( sams1, sams2, min_mapq=kwargs["min_mapq"], max_inter_align_gap=kwargs["max_inter_align_gap"], max_insert_size=kwargs.get("max_insert_size", 500), single_end=kwargs["single_end"], report_position=kwargs["report_position"], report_orientation=kwargs["report_orientation"], sam_tags=sam_tags, dedup_max_mismatch=kwargs["dedup_max_mismatch"], store_seq=store_seq, expand=kwargs["expand"], max_expansion_depth=kwargs["max_expansion_depth"], report_mismatches=True if "mismatches" in add_columns else False, ) ### Write: read_has_alignments = False for (algn1, algn2, pair_index) in pairstream: read_has_alignments = True # Alignment end defaults to 5' if report_alignment_end is unspecified: if kwargs.get("report_alignment_end", "5") == "5": algn1["pos"] = algn1["pos5"] algn2["pos"] = algn2["pos5"] else: algn1["pos"] = algn1["pos3"] algn2["pos"] = algn2["pos3"] if kwargs["flip"]: flip_pair = not check_pair_order(algn1, algn2, chrom_enum) if flip_pair: algn1, algn2 = algn2, algn1 sams1, sams2 = sams2, sams1 write_pairsam( algn1, algn2, readID=prev_readID, pair_index=pair_index, sams1=sams1, sams2=sams2, out_file=outstream, drop_readid=kwargs["drop_readid"], drop_seq=kwargs["drop_seq"], drop_sam=kwargs["drop_sam"], add_pair_index=kwargs["add_pair_index"], add_columns=add_columns, ) # add a pair to PairCounter for stats output: if out_stat: out_stat.add_pair( algn1["chrom"], int(algn1["pos"]), algn1["strand"], algn2["chrom"], int(algn2["pos"]), algn2["strand"], algn1["type"] + algn2["type"], ) # write all alignments: if out_alignments_stream and read_has_alignments: write_all_algnments( prev_readID, all_algns1, all_algns2, out_alignments_stream ) # Empty read after writing: sams1.clear() sams2.clear() if sam_entry is not None: push_pysam(sam_entry, sams1, sams2) prev_readID = readID ############################ ### Alignment utilities: ### ############################ def push_pysam(sam_entry, sams1, sams2): """Parse pysam AlignedSegment (sam) into pairtools sams entry""" flag = sam_entry.flag if (flag & 0x40) != 0: sams1.append(sam_entry) # left read, or first read in a pair else: sams2.append(sam_entry) # right read, or mate pair return def empty_alignment(): return { "chrom": pairsam_format.UNMAPPED_CHROM, "pos5": pairsam_format.UNMAPPED_POS, "pos3": pairsam_format.UNMAPPED_POS, "pos": pairsam_format.UNMAPPED_POS, "strand": pairsam_format.UNMAPPED_STRAND, "dist_to_5": 0, "dist_to_3": 0, "mapq": 0, "is_unique": False, "is_mapped": False, "is_linear": True, "cigar": "*", "algn_ref_span": 0, "algn_read_span": 0, "matched_bp": 0, "clip3_ref": 0, "clip5_ref": 0, "read_len": 0, "type": "N", "mismatches": "", } def parse_pysam_entry( sam, min_mapq, sam_tags=None, store_seq=False, report_3_alignment_end=False, report_mismatches=False, ): """Parse alignments from pysam AlignedSegment entry :param sam: input pysam AlignedSegment entry :param min_mapq: minimal MAPQ to consider as a proper alignment :param sam_tags: list of sam tags to store :param store_seq: if True, the sequence will be parsed and stored in the output :param report_3_alignment_end: if True, 3'-end of alignment will be reported as position (will be deprecated) :param report_mismatches: if True, mismatches will be parsed from MD field :return: parsed aligned entry (dictionary) """ flag = sam.flag is_mapped = (flag & 0x04) == 0 mapq = sam.mapq is_unique = sam.is_unique(min_mapq) is_linear = sam.is_linear cigar = sam.cigar_dict if is_mapped: if (flag & 0x10) == 0: strand = "+" dist_to_5 = cigar["clip5_ref"] dist_to_3 = cigar["clip3_ref"] else: strand = "-" dist_to_5 = cigar["clip3_ref"] dist_to_3 = cigar["clip5_ref"] if is_unique: chrom = sam.reference_name if strand == "+": # Note that pysam output is zero-based, thus add +1: pos5 = sam.reference_start + 1 pos3 = sam.reference_start + cigar["algn_ref_span"] else: pos5 = sam.reference_start + cigar["algn_ref_span"] # Note that pysam output is zero-based, thus add +1: pos3 = sam.reference_start + 1 # Get number of matches: if not sam.has_tag("MD") or not report_mismatches: mismatches = "" else: seq = sam.query_sequence.upper() quals = sam.query_qualities aligned_pairs = sam.get_aligned_pairs(with_seq=True, matches_only=True) mismatches = get_mismatches_c(seq, quals, aligned_pairs) mismatches = ",".join( [ f"{original}:{mutated}:{phred}:{ref}:{read}" for original, mutated, phred, ref, read in mismatches ] ) # n_matches = len(aligned_pairs) else: chrom = pairsam_format.UNMAPPED_CHROM strand = pairsam_format.UNMAPPED_STRAND pos5 = pairsam_format.UNMAPPED_POS pos3 = pairsam_format.UNMAPPED_POS mismatches = "" else: chrom = pairsam_format.UNMAPPED_CHROM strand = pairsam_format.UNMAPPED_STRAND pos5 = pairsam_format.UNMAPPED_POS pos3 = pairsam_format.UNMAPPED_POS dist_to_5 = 0 dist_to_3 = 0 mismatches = "" algn = { "chrom": chrom, "pos5": pos5, "pos3": pos3, "strand": strand, "mapq": mapq, "is_mapped": is_mapped, "is_unique": is_unique, "is_linear": is_linear, "dist_to_5": dist_to_5, "dist_to_3": dist_to_3, "type": ("N" if not is_mapped else ("M" if not is_unique else "U")), "mismatches": mismatches, } algn.update(cigar) algn["pos"] = algn["pos3"] if report_3_alignment_end else algn["pos5"] ### Add tags to the alignment: if sam_tags: tags = sam.tags for tag in sam_tags: algn[tag] = "" for col, value in tags: for tag in sam_tags: if col == tag: algn[tag] = value continue if store_seq: algn["seq"] = sam.seq return algn def mask_alignment(algn): """ Reset the coordinates of an alignment. """ algn["chrom"] = pairsam_format.UNMAPPED_CHROM algn["pos5"] = pairsam_format.UNMAPPED_POS algn["pos3"] = pairsam_format.UNMAPPED_POS algn["pos"] = pairsam_format.UNMAPPED_POS algn["strand"] = pairsam_format.UNMAPPED_STRAND return algn def flip_alignment(hic_algn): """ Flip a single alignment as if it was sequenced from the opposite end :param hic_algn: Alignment to be modified :return: """ hic_algn = dict(hic_algn) # overwrite the variable with the copy of dictionary hic_algn["pos5"], hic_algn["pos3"] = hic_algn["pos3"], hic_algn["pos5"] hic_algn["strand"] = "+" if (hic_algn["strand"] == "-") else "-" return hic_algn def flip_orientation(hic_algn): """ Flip orientation of a single alignment :param hic_algn: Alignment to be modified :return: """ hic_algn = dict(hic_algn) # overwrite the variable with the copy of dictionary hic_algn["strand"] = "+" if (hic_algn["strand"] == "-") else "-" return hic_algn def flip_position(hic_algn): """ Flip ends of a single alignment :param hic_algn: Alignment to be modified :return: """ hic_algn = dict(hic_algn) # overwrite the variable with the copy of dictionary hic_algn["pos5"], hic_algn["pos3"] = hic_algn["pos3"], hic_algn["pos5"] return hic_algn #################### ### Parsing utilities: #################### def parse_read( sams1, sams2, min_mapq, max_molecule_size, max_inter_align_gap, walks_policy, sam_tags, store_seq, report_mismatches=False, ): """ Parse sam entries corresponding to a single read (or Hi-C molecule) into pairs of alignments. Returns ------- stream: iterator Each element is a triplet: (algn1, aldn2, pair_index) algn1, algn2: dict Two alignments selected for reporting as a Hi-C pair. pair_index pair index of a pair in the molecule. algns1, algns2: lists All alignments, sorted according to their order in on a read. """ # Check if there is at least one sam entry per side: if walks_policy == "all": is_empty = (len(sams1) == 0 and len(sams2) < 2) or ( len(sams2) == 0 and len(sams1) < 2 ) else: is_empty = (len(sams1) == 0) or (len(sams2) == 0) if is_empty: algns1 = [empty_alignment()] algns2 = [empty_alignment()] algns1[0]["type"] = "X" algns2[0]["type"] = "X" pair_index = (1, "R1-2") return iter([(algns1[0], algns2[0], pair_index)]), algns1, algns2 # Generate a sorted, gap-filled list of all alignments algns1 = [ parse_pysam_entry( sam, min_mapq, sam_tags, store_seq, report_mismatches=report_mismatches ) for sam in sams1 ] algns2 = [ parse_pysam_entry( sam, min_mapq, sam_tags, store_seq, report_mismatches=report_mismatches ) for sam in sams2 ] if len(algns1) > 0: algns1 = sorted(algns1, key=lambda algn: algn["dist_to_5"]) else: algns1 = [empty_alignment()] # Empty alignment dummy if len(algns2) > 0: algns2 = sorted(algns2, key=lambda algn: algn["dist_to_5"]) else: algns2 = [empty_alignment()] # Empty alignment dummy if max_inter_align_gap is not None: _convert_gaps_into_alignments(algns1, max_inter_align_gap) _convert_gaps_into_alignments(algns2, max_inter_align_gap) # By default, assume each molecule is a single pair with single unconfirmed pair: hic_algn1 = algns1[0] hic_algn2 = algns2[0] pair_index = (1, "R1-2") # Define the type of alignment on each side: is_chimeric_1 = len(algns1) > 1 is_chimeric_2 = len(algns2) > 1 # Parse chimeras if is_chimeric_1 or is_chimeric_2: # Report all the linear alignments in a read pair if walks_policy == "all": # Report linear alignments after deduplication of complex walks with default settings: return ( parse_complex_walk( algns1, algns2, max_molecule_size, report_position="outer", report_orientation="pair", ), algns1, algns2, ) elif walks_policy in ["mask", "5any", "5unique", "3any", "3unique"]: # Report only two alignments for a read pair rescued_linear_side = rescue_walk(algns1, algns2, max_molecule_size) # Walk was rescued as a simple walk: if rescued_linear_side is not None: pair_index = (1, "R1" if rescued_linear_side == 1 else "R2") # Walk is unrescuable: else: if walks_policy == "mask": hic_algn1 = mask_alignment(dict(hic_algn1)) hic_algn2 = mask_alignment(dict(hic_algn2)) hic_algn1["type"] = "W" hic_algn2["type"] = "W" elif walks_policy == "5any": hic_algn1 = algns1[0] hic_algn2 = algns2[0] elif walks_policy == "5unique": hic_algn1 = algns1[0] for algn in algns1: if algn["is_mapped"] and algn["is_unique"]: hic_algn1 = algn break hic_algn2 = algns2[0] for algn in algns2: if algn["is_mapped"] and algn["is_unique"]: hic_algn2 = algn break elif walks_policy == "3any": hic_algn1 = algns1[-1] hic_algn2 = algns2[-1] elif walks_policy == "3unique": hic_algn1 = algns1[-1] for algn in algns1[::-1]: if algn["is_mapped"] and algn["is_unique"]: hic_algn1 = algn break hic_algn2 = algns2[-1] for algn in algns2[::-1]: if algn["is_mapped"] and algn["is_unique"]: hic_algn2 = algn break # lower-case reported walks on the chimeric side if walks_policy != "mask": if is_chimeric_1: hic_algn1 = dict(hic_algn1) hic_algn1["type"] = hic_algn1["type"].lower() if is_chimeric_2: hic_algn2 = dict(hic_algn2) hic_algn2["type"] = hic_algn2["type"].lower() else: raise ValueError(f"Walks policy {walks_policy} is not supported.") return iter([(hic_algn1, hic_algn2, pair_index)]), algns1, algns2 def parse2_read( sams1, sams2, min_mapq, max_inter_align_gap, max_insert_size, single_end, report_position="outer", report_orientation="pair", sam_tags=[], dedup_max_mismatch=3, store_seq=False, report_mismatches=False, expand=False, max_expansion_depth=None, ): """ Parse sam entries corresponding to a Hi-C molecule into alignments in parse2 mode for a Hi-C pair. Returns ------- stream: iterator Each element is a triplet: (algn1, aldn2, pair_index) algn1, algn2: dict Two alignments selected for reporting as a Hi-C pair. pair_index pair index of a pair in the molecule, a tuple: (1, "R1-2") algns1, algns2: lists All alignments, sorted according to their order in on a read. """ # Single-end mode: if single_end: # Generate a sorted, gap-filled list of all alignments algns1 = [ parse_pysam_entry( sam, min_mapq, sam_tags, store_seq, report_mismatches=report_mismatches ) for sam in sams2 # note sams2, that's how these reads are typically parsed ] algns1 = sorted(algns1, key=lambda algn: algn["dist_to_5"]) if max_inter_align_gap is not None: _convert_gaps_into_alignments(algns1, max_inter_align_gap) algns2 = [empty_alignment()] # Empty alignment dummy if len(algns1) > 1: # Look for ligation pair, and report linear alignments after deduplication of complex walks: # (Note that coordinate system for single-end reads does not change the behavior) output = parse_complex_walk( algns1, algns2, max_insert_size, report_position, report_orientation, dedup_max_mismatch, ) output = [x for x in output if x[-1][-1] != "R1-2"] return (output, algns1, algns2) elif len(algns1) == 1: # If no additional information, we assume each molecule is a single ligation with single unconfirmed pair: algn2 = empty_alignment() pair_index = (1, "R1") return iter([(algns1[0], algn2, pair_index)]), algns1, algns2 else: # If no additional information, we assume each molecule is a single ligation with single unconfirmed pair: algn1 = empty_alignment() algn2 = empty_alignment() pair_index = (1, "R1") return iter([(algn1, algn2, pair_index)]), algns1, algns2 # Paired-end mode: else: # Check if there is at least one SAM entry per side: is_empty = (len(sams1) == 0 and len(sams2) < 2) or ( len(sams2) == 0 and len(sams1) < 2 ) if is_empty: algns1 = [empty_alignment()] algns2 = [empty_alignment()] algns1[0]["type"] = "X" algns2[0]["type"] = "X" pair_index = (1, "R1-2") return iter([(algns1[0], algns2[0], pair_index)]), algns1, algns2 # Generate a sorted, gap-filled list of all alignments algns1 = [ parse_pysam_entry( sam, min_mapq, sam_tags, store_seq, report_mismatches=report_mismatches ) for sam in sams1 ] algns2 = [ parse_pysam_entry( sam, min_mapq, sam_tags, store_seq, report_mismatches=report_mismatches ) for sam in sams2 ] # Sort alignments by the distance to the 5'-end: if len(algns1) > 0: algns1 = sorted(algns1, key=lambda algn: algn["dist_to_5"]) else: algns1 = [empty_alignment()] # Empty alignment dummy if len(algns2) > 0: algns2 = sorted(algns2, key=lambda algn: algn["dist_to_5"]) else: algns2 = [empty_alignment()] # Empty alignment dummy # Convert alignment gaps to alignments: if max_inter_align_gap is not None: _convert_gaps_into_alignments(algns1, max_inter_align_gap) _convert_gaps_into_alignments(algns2, max_inter_align_gap) is_chimeric_1 = len(algns1) > 1 is_chimeric_2 = len(algns2) > 1 if is_chimeric_1 or is_chimeric_2: # If at least one side is chimera, we must look for ligation pair, and # report linear alignments after deduplication of complex walks: return ( parse_complex_walk( algns1, algns2, max_insert_size, report_position, report_orientation, dedup_max_mismatch, expand, max_expansion_depth, ), algns1, algns2, ) else: # If no additional information, we assume each molecule is a single ligation with single unconfirmed pair: algn2 = algns2[0] if report_orientation == "walk": algn2 = flip_orientation(algn2) if report_position == "walk": algn2 = flip_position(algn2) pair_index = (1, "R1-2") return iter([(algns1[0], algn2, pair_index)]), algns1, algns2 #################### ### Walks utilities: #################### def rescue_walk(algns1, algns2, max_molecule_size): """ Rescue a single ligation that appears as a walk. Checks if a molecule with three alignments could be formed via a single ligation between two fragments, where one fragment was so long that it got sequenced on both sides. Uses three criteria: 1) the 3'-end alignment on one side maps to the same chromosome as the alignment fully covering the other side (i.e. the linear alignment) 2) the two alignments point towards each other on the chromosome 3) the distance between the outer ends of the two alignments is below the specified threshold. Alternatively, a single ligation get rescued when the 3' sub-alignment maps to multiple locations or no locations at all. In the case of a successful rescue, tags the 3' sub-alignment with type='X' and the linear alignment on the other side with type='R'. Returns ------- linear_side : int If the case of a successful rescue, returns the index of the side with a linear alignment. """ # If both sides have one alignment or none, no need to rescue! n_algns1 = len(algns1) n_algns2 = len(algns2) if (n_algns1 <= 1) and (n_algns2 <= 1): return None # Can rescue only pairs with one chimeric alignment with two parts. if not ( ((n_algns1 == 1) and (n_algns2 == 2)) or ((n_algns1 == 2) and (n_algns2 == 1)) ): return None first_read_is_chimeric = n_algns1 > 1 chim5_algn = algns1[0] if first_read_is_chimeric else algns2[0] chim3_algn = algns1[1] if first_read_is_chimeric else algns2[1] linear_algn = algns2[0] if first_read_is_chimeric else algns1[0] # the linear alignment must be uniquely mapped if not (linear_algn["is_mapped"] and linear_algn["is_unique"]): return None can_rescue = True # we automatically rescue chimeric alignments with null and non-unique # alignments at the 3' side if chim3_algn["is_mapped"] and chim5_algn["is_unique"]: # 1) in rescued walks, the 3' alignment of the chimeric alignment must be on # the same chromosome as the linear alignment on the opposite side of the # molecule can_rescue &= chim3_algn["chrom"] == linear_algn["chrom"] # 2) in rescued walks, the 3' supplemental alignment of the chimeric # alignment and the linear alignment on the opposite side must point # towards each other can_rescue &= chim3_algn["strand"] != linear_algn["strand"] if linear_algn["strand"] == "+": can_rescue &= linear_algn["pos5"] < chim3_algn["pos5"] else: can_rescue &= linear_algn["pos5"] > chim3_algn["pos5"] # 3) in single ligations appearing as walks, we can infer the size of # the molecule and this size must be smaller than the maximal size of # Hi-C molecules after the size selection step of the Hi-C protocol if linear_algn["strand"] == "+": molecule_size = ( chim3_algn["pos5"] - linear_algn["pos5"] + chim3_algn["dist_to_5"] + linear_algn["dist_to_5"] ) else: molecule_size = ( linear_algn["pos5"] - chim3_algn["pos5"] + chim3_algn["dist_to_5"] + linear_algn["dist_to_5"] ) can_rescue &= molecule_size <= max_molecule_size if can_rescue: # changing the type of the 3' alignment on side 1, does not show up in the output: if first_read_is_chimeric: algns1[1]["type"] = "X" algns2[0]["type"] = "R" return 1 # changing the type of the 3' alignment on side 2, does not show up in the output: else: algns1[0]["type"] = "R" algns2[1]["type"] = "X" return 2 else: return None def _convert_gaps_into_alignments(sorted_algns, max_inter_align_gap): """ Inplace conversion of gaps longer than max_inter_align_gap into alignments """ if (len(sorted_algns) == 1) and (not sorted_algns[0]["is_mapped"]): return last_5_pos = 0 for i in range(len(sorted_algns)): algn = sorted_algns[i] if algn["dist_to_5"] - last_5_pos > max_inter_align_gap: new_algn = empty_alignment() new_algn["dist_to_5"] = last_5_pos new_algn["algn_read_span"] = algn["dist_to_5"] - last_5_pos new_algn["read_len"] = algn["read_len"] new_algn["dist_to_3"] = new_algn["read_len"] - algn["dist_to_5"] last_5_pos = algn["dist_to_5"] + algn["algn_read_span"] sorted_algns.insert(i, new_algn) i += 2 else: last_5_pos = max(last_5_pos, algn["dist_to_5"] + algn["algn_read_span"]) i += 1 def parse_complex_walk( algns1, algns2, max_insert_size, report_position, report_orientation, dedup_max_mismatch=3, expand=False, max_expansion_depth=None, ): """ Parse a set of ligations that appear as a complex walk. This procedure is equivalent to intramolecular deduplication that preserved pair order in a walk. :param algns1: List of sequential lefts alignments :param algns2: List of sequential right alignments :param max_insert_size: maximum insert size when searching for overlapping ends of R1 and R2 :param report_position: one of "outer", "junction", "read", "walk"; sets pos5 and pos3 :param report_orientation: one of "pair", "junction", "read", "walk"; sets strand :param dedup_max_mismatch: allowed mismatch between intramolecular alignments to detect readthrough duplicates :param expand: perform combinatorial expansion of pairs or not :param max_expansion_depth: maximum depth (number of segments separating pair). All by default. :return: iterator with parsed pairs **Intramolecular deduplication** Forward read (left): right read (right): 5'------------------------->3' 3'<--------------------------5' algns1 algns2 <5---3><5---3><5---3><5---3> <3---5><3---5><3---5><3---5> l0 l1 l2 l3 r3 r2 r1 r0 Alignment - bwa mem reported hit or alignment after gaps conversion. Left and right alignments (algns1: [l0, l1, l2, l3], algns2: [r0, r1, r2, r3]) - alignments on left and right reads reported from 5' to 3' orientation. Intramolecular deduplication consists of two steps: I. iterative search of overlapping alignment pairs (aka overlap), II. if no overlaps or search not possible (less than 2 alignments on either sides), search for overlap of end alignments (aka partial overlap). III. report pairs before the overlap, deduplicated pairs of overlap and pairs after that. Iterative search of overlap is in fact scanning of the right read pairs for the hit with the 3'-most pair of the left read: 1. Initialize. Start from 3' of left and right reads. Set `current_left_pair` and `current_right_pair` pointers 2. Initial compare. Compare pairs l2-l3 and r3-r2 by `pairs_overlap`. If successful, we found the overlap, go to reporting. If unsuccessful, continue search. 3. Increment. Shift `current_right_pair` pointer by one (e.g., take the pair r2-r1). 4. Check. Check that this pair can form a potential overlap with left alignments: the number of pairs downstream from l2-l3 on left read should not be less than the number of pairs upstream from r2-r1 on right read. If overlap cannot be formed, no other overlap in this complex walk is possible, safely exit. If the potential overlap can be formed, continue comparison. 5. Compare. Compare the current pair of pairs on left and right reads. If comparison fails, go to step 3. If comparison is successful, go to 6. 6. Verify. Check that downstream pairs on the left read overlap with the upstream pairs on the right read. If yes, exit. If not, we do not have an overlap, go to step 3. """ AVAILABLE_REPORT_POSITION = ["outer", "junction", "read", "walk"] assert report_position in AVAILABLE_REPORT_POSITION, ( f"Cannot report position {report_position}, as it is not implemented" f'Available choices are: {", ".join(AVAILABLE_REPORT_POSITION)}' ) AVAILABLE_REPORT_ORIENTATION = ["pair", "junction", "read", "walk"] assert report_orientation in AVAILABLE_REPORT_ORIENTATION, ( f"Cannot report orientation {report_orientation}, as it is not implemented" f'Available choices are: {", ".join(AVAILABLE_REPORT_ORIENTATION)}' ) output_pairs = [] # Initialize (step 1). n_algns1 = len(algns1) n_algns2 = len(algns2) current_left_pair = current_right_pair = 1 remaining_left_pairs = ( n_algns1 - 1 ) # Number of possible pairs remaining on left read remaining_right_pairs = ( n_algns2 - 1 ) # Number of possible pairs remaining on right read checked_right_pairs = ( 0 # Number of checked pairs on right read (from the end of read) ) is_overlap = False # I. Iterative search of overlap, at least two alignments on each side: if (n_algns1 >= 2) and (n_algns2 >= 2): # Iteration includes check (step 4): while (remaining_left_pairs > checked_right_pairs) and ( remaining_right_pairs > 0 ): pair1 = (algns1[-current_left_pair - 1], algns1[-current_left_pair]) pair2 = (algns2[-current_right_pair - 1], algns2[-current_right_pair]) # Compare (initial or not, step 2 or 5): is_overlap = pairs_overlap( pair1, pair2, dedup_max_mismatch=dedup_max_mismatch ) if is_overlap: last_idx_left_temp = current_left_pair last_idx_right_temp = current_right_pair checked_right_temp = checked_right_pairs # Verify (step 6): while is_overlap and (checked_right_temp > 0): last_idx_left_temp += 1 last_idx_right_temp -= 1 pair1 = ( algns1[-last_idx_left_temp - 1], algns1[-last_idx_left_temp], ) pair2 = ( algns2[-last_idx_right_temp - 1], algns2[-last_idx_right_temp], ) is_overlap &= pairs_overlap( pair1, pair2, dedup_max_mismatch=dedup_max_mismatch ) checked_right_temp -= 1 if is_overlap: # exit current_right_pair += 1 break # Increment pointers (step 3) current_right_pair += 1 checked_right_pairs += 1 remaining_right_pairs -= 1 # No overlap found, roll the current_idx_right back to the initial value: if not is_overlap: current_right_pair = 1 # II. Search of partial overlap if there are less than 2 alignments at either sides, or no overlaps found if current_right_pair == 1: last_reported_alignment_left = last_reported_alignment_right = 1 if partial_overlap( algns1[-1], algns2[-1], max_insert_size=max_insert_size, dedup_max_mismatch=dedup_max_mismatch, ): if ( n_algns1 >= 2 ): # single alignment on right read and multiple alignments on left pair_index = (len(algns1) - 1, "R1") output_pairs.append( format_pair( algns1[-2], algns1[-1], pair_index=pair_index, algn2_pos3=algns2[-1]["pos5"], report_position=report_position, report_orientation=report_orientation, ) ) last_reported_alignment_left = 2 # set the pointer for reporting if ( n_algns2 >= 2 ): # single alignment on left read and multiple alignments on right pair_index = (len(algns1), "R2") output_pairs.append( format_pair( algns2[-1], algns2[-2], pair_index=pair_index, algn1_pos3=algns1[-1]["pos5"], report_position=report_position, report_orientation=report_orientation, ) ) last_reported_alignment_right = 2 # set the pointer for reporting # Note that if n_algns1==n_algns2==1 and alignments overlap, then we don't need to check, # it's a non-ligated DNA fragment that we don't report. else: # end alignments do not overlap, report regular pair: pair_index = (len(algns1), "R1-2") output_pairs.append( format_pair( algns1[-1], algns2[-1], pair_index=pair_index, report_position=report_position, report_orientation=report_orientation, ) ) else: # there was an overlap, set some pointers: last_reported_alignment_left = ( last_reported_alignment_right ) = current_right_pair # III. Report all remaining alignments. # Report all unique alignments on left read (sequential): for i in range(0, n_algns1 - last_reported_alignment_left): pair_index = (i + 1, "R1") output_pairs.append( format_pair( algns1[i], algns1[i + 1], pair_index=pair_index, report_position=report_position, report_orientation=report_orientation, ) ) # Report the pairs where both left alignments overlap right: for i_overlapping in range(current_right_pair - 1): idx_left = n_algns1 - current_right_pair + i_overlapping idx_right = n_algns2 - 1 - i_overlapping pair_index = (idx_left + 1, "R1&2") output_pairs.append( format_pair( algns1[idx_left], algns1[idx_left + 1], pair_index=pair_index, algn2_pos3=algns2[idx_right - 1]["pos5"], report_position=report_position, report_orientation=report_orientation, ) ) # Report all the sequential chimeric pairs in the right read, but not the overlap: reporting_order = range( 0, min(current_right_pair, n_algns2 - last_reported_alignment_right) ) for i in reporting_order: # Determine the pair index depending on what is the overlap: shift = -1 if current_right_pair > 1 else 0 pair_index = ( ( n_algns1 + min(current_right_pair, n_algns2 - last_reported_alignment_right) - i + shift ), "R2", ) output_pairs.append( format_pair( algns2[i + 1], algns2[i], pair_index=pair_index, report_position=report_position, report_orientation=report_orientation, ) ) # Sort the pairs according to the pair index: output_pairs.sort(key=lambda x: int(x[-1][0])) if expand: output_pairs = expand_pairs(output_pairs, max_expansion_depth) return iter(output_pairs) ### Additional functions for pairs ### def expand_pairs(pairs_list, max_expansion_depth=None): """ Perform combinatorial expansion of the pairs. Parameters ---------- pairs_list: List of formatted pairs (triplets: algn1, algn2, pair_index). max_expansion_depth: maximum depth of expansion; all by default (None), number will enforce only pairs from the same strand. Returns ------- list of expanded pairs """ for algn1, _algn1, pair_index1 in pairs_list: for _algn2, algn2, pair_index2 in pairs_list: if pair_index1 > pair_index2: continue elif pair_index1 == pair_index2: # output regular pair with no change yield algn1, _algn1, pair_index1 else: pair_order1, pair_type1 = pair_index1 pair_order2, pair_type2 = pair_index2 separated_by = pair_order2 - pair_order1 if ( pair_type1 == "R1-2" or pair_type2 == "R1-2" or (pair_type1 == "R1" and pair_type2 == "R2") ): pair_type = "R1-2" elif pair_type1 == pair_type2: pair_type = pair_type1 elif pair_type1 == "R1&2": pair_type = pair_type2 elif pair_type2 == "R1&2": pair_type = pair_type1 else: raise ValueError( f"Unexpected error, pair types: {pair_type1}, {pair_type2}" ) same_read = pair_type != "R1-2" if (max_expansion_depth is None) or ( (separated_by <= max_expansion_depth) and same_read ): pair_type = f"E{separated_by}_{pair_type}" yield algn1, algn2, (pair_order1, pair_type) ### Additional functions for complex walks rescue ### def partial_overlap(algn1, algn2, max_insert_size=500, dedup_max_mismatch=5): """ Two ends of alignments overlap if: 1) they are from the same chromosome, 2) map in the opposite directions, 3) the distance between the outer ends of the two alignments is below the specified max_insert_size, 4) the distance between the outer ends of the two alignments is above the maximum alignment size. (4) guarantees that the alignments point towards each other on the chromosomes. Allowed mismatch between intramolecular alignments to detect readthrough duplicates. Return: 1 if the alignments overlap or both have troubles with unique mapping, 0 if they do not overlap or if we do not have enough information (e.g. only one of the alignments have troubles with being mapped). """ # Alignments with no match or with multiple matches are counted as overlaps if not (algn1["is_mapped"] and algn1["is_unique"]): if not (algn2["is_mapped"] and algn2["is_unique"]): return 1 # We assume that successful alignment cannot be an overlap with unmapped or multi-mapped region if not (algn1["is_mapped"] and algn1["is_unique"]): return 0 if not (algn2["is_mapped"] and algn2["is_unique"]): return 0 # Both alignments are mapped and unique do_overlap = True do_overlap &= algn1["chrom"] == algn2["chrom"] do_overlap &= algn1["strand"] != algn2["strand"] if algn1["strand"] == "+": min_algn_size = max( algn1["pos3"] - algn1["pos5"], algn2["pos5"] - algn2["pos3"] ) distance_outer_ends = algn2["pos5"] - algn1["pos5"] else: min_algn_size = max( algn1["pos5"] - algn1["pos3"], algn2["pos3"] - algn2["pos5"] ) distance_outer_ends = algn1["pos5"] - algn2["pos5"] do_overlap &= distance_outer_ends <= max_insert_size + dedup_max_mismatch do_overlap &= distance_outer_ends >= min_algn_size - dedup_max_mismatch if do_overlap: return 1 return 0 def pairs_overlap(algns1, algns2, dedup_max_mismatch=3): """ We assume algns1 originate from left read, and algns2 originate from right read: left read: right read: ----------------------------> <---------------------------- algns1 algns2 5------------3_5------------3 3------------5_3------------5' left_5'-algn left_3'-algn right_3'-algn right_5'-algn Two pairs of alignments overlap if: 1) chromosomes/mapping/strand of left_5'-algn and right_3'-algn are the same, 2) chromosomes/mapping/strand of left_3'-algn and right_5'-algn are the same, 3) pos3 of left_5'-algn is close to pos5 of right_3'-algn (with dedup_max_mismatch), and 4) pos5 of left_3'-algn is close to pos3 of right_5'-algn. Return: 1 of the pairs of alignments overlap, 0 otherwise. """ left5_algn = algns1[0] left3_algn = algns1[1] right5_algn = algns2[0] right3_algn = algns2[1] # We assume that successful alignment cannot be an overlap with unmapped or multi-mapped region: mapped_left5_algn = left5_algn["is_mapped"] and left5_algn["is_unique"] mapped_left3_algn = left3_algn["is_mapped"] and left3_algn["is_unique"] mapped_right5_algn = right5_algn["is_mapped"] and right5_algn["is_unique"] mapped_right3_algn = right3_algn["is_mapped"] and right3_algn["is_unique"] if not mapped_left5_algn and not mapped_right3_algn: left_overlap = True elif not mapped_left5_algn and mapped_right3_algn: left_overlap = False elif mapped_left5_algn and not mapped_right3_algn: left_overlap = False else: left_overlap = True left_overlap &= left5_algn["chrom"] == right3_algn["chrom"] left_overlap &= left5_algn["strand"] != right3_algn["strand"] if not mapped_left3_algn and not mapped_right5_algn: right_overlap = True elif not mapped_left3_algn and mapped_right5_algn: right_overlap = False elif mapped_left3_algn and not mapped_right5_algn: right_overlap = False else: right_overlap = True right_overlap &= left3_algn["chrom"] == right5_algn["chrom"] right_overlap &= left3_algn["strand"] != right5_algn["strand"] same_pair = True same_pair &= abs(left5_algn["pos3"] - right3_algn["pos5"]) <= dedup_max_mismatch same_pair &= abs(left3_algn["pos5"] - right5_algn["pos3"]) <= dedup_max_mismatch if left_overlap & right_overlap & same_pair: return 1 else: return 0 def format_pair( hic_algn1, hic_algn2, pair_index, report_position="outer", report_orientation="pair", algn1_pos5=None, algn1_pos3=None, algn2_pos5=None, algn2_pos3=None, ): """ Return a triplet: pair of formatted alignments and pair_index in a walk :param hic_algn1: Left alignment forming a pair :param hic_algn2: Right alignment forming a pair :param algns1: All left read alignments for formal reporting :param algns2: All right read alignments for formal reporting :param pair_index: Index of the pair :param algn1_pos5: Replace reported 5'-position of the alignment 1 with this value :param algn1_pos3: Replace reported 3'-position of the alignment 1 with this value :param algn2_pos5: Replace reported 5'-position of the alignment 2 with this value :param algn2_pos3: Replace reported 3'-position of the alignment 2 with this value """ # Make sure the original data is not modified: hic_algn1, hic_algn2 = dict(hic_algn1), dict(hic_algn2) # Adjust the 5' and 3'-ends: hic_algn1["pos5"] = algn1_pos5 if not algn1_pos5 is None else hic_algn1["pos5"] hic_algn1["pos3"] = algn1_pos3 if not algn1_pos3 is None else hic_algn1["pos3"] hic_algn2["pos5"] = algn2_pos5 if not algn2_pos5 is None else hic_algn2["pos5"] hic_algn2["pos3"] = algn2_pos3 if not algn2_pos3 is None else hic_algn2["pos3"] hic_algn1["type"] = ( "N" if not hic_algn1["is_mapped"] else "M" if not hic_algn1["is_unique"] else "U" ) hic_algn2["type"] = ( "N" if not hic_algn2["is_mapped"] else "M" if not hic_algn2["is_unique"] else "U" ) # Change orientation and positioning of pair for reporting: # AVAILABLE_REPORT_POSITION = ["outer", "pair", "read", "walk"] # AVAILABLE_REPORT_ORIENTATION = ["pair", "pair", "read", "walk"] pair_type = pair_index[1] if report_orientation == "read": pass elif report_orientation == "walk": if pair_type == "R2": hic_algn1 = flip_orientation(hic_algn1) hic_algn2 = flip_orientation(hic_algn2) elif pair_type == "R1-2": hic_algn2 = flip_orientation(hic_algn2) elif report_orientation == "pair": if pair_type == "R1" or pair_type == "R1&R2": hic_algn2 = flip_orientation(hic_algn2) elif pair_type == "R2": hic_algn1 = flip_orientation(hic_algn1) elif report_orientation == "junction": if pair_type == "R1" or pair_type == "R1&R2": hic_algn1 = flip_orientation(hic_algn1) elif pair_type == "R2": hic_algn2 = flip_orientation(hic_algn2) else: hic_algn1 = flip_orientation(hic_algn1) hic_algn2 = flip_orientation(hic_algn2) if report_position == "read": pass elif report_position == "walk": if pair_type == "R2": hic_algn1 = flip_position(hic_algn1) hic_algn2 = flip_position(hic_algn2) elif pair_type == "R1-2": hic_algn2 = flip_position(hic_algn2) elif report_position == "outer": if pair_type == "R1" or pair_type == "R1&R2": hic_algn2 = flip_position(hic_algn2) elif pair_type == "R2": hic_algn1 = flip_position(hic_algn1) elif report_position == "junction": if pair_type == "R1" or pair_type == "R1&R2": hic_algn1 = flip_position(hic_algn1) elif pair_type == "R2": hic_algn2 = flip_position(hic_algn2) else: hic_algn1 = flip_position(hic_algn1) hic_algn2 = flip_position(hic_algn2) return [hic_algn1, hic_algn2, pair_index] def check_pair_order(algn1, algn2, chrom_enum): """ Check if a pair of alignments has the upper-triangular order or has to be flipped. """ # First, the pair is flipped according to the type of mapping on its sides. # Later, we will check it is mapped on both sides and, if so, flip the sides # according to these coordinates. has_correct_order = (algn1["is_mapped"], algn1["is_unique"]) <= ( algn2["is_mapped"], algn2["is_unique"], ) # If a pair has coordinates on both sides, it must be flipped according to # its genomic coordinates. if (algn1["chrom"] != pairsam_format.UNMAPPED_CHROM) and ( algn2["chrom"] != pairsam_format.UNMAPPED_CHROM ): has_correct_order = (chrom_enum[algn1["chrom"]], algn1["pos"]) <= ( chrom_enum[algn2["chrom"]], algn2["pos"], ) return has_correct_order ###################### ### Output utilities: ###################### def write_all_algnments(readID, all_algns1, all_algns2, out_file): """ Debug utility that outputs all alignments in .bam file before parsing walks/pairs """ for side_idx, all_algns in enumerate((all_algns1, all_algns2)): out_file.write(readID) out_file.write("\t") out_file.write(str(side_idx + 1)) out_file.write("\t") for algn in sorted(all_algns, key=lambda x: x["dist_to_5"]): out_file.write(algn["chrom"]) out_file.write("\t") out_file.write(str(algn["pos5"])) out_file.write("\t") out_file.write(algn["strand"]) out_file.write("\t") out_file.write(str(algn["mapq"])) out_file.write("\t") out_file.write(str(algn["cigar"])) out_file.write("\t") out_file.write(str(algn["dist_to_5"])) out_file.write("\t") out_file.write(str(algn["dist_to_5"] + algn["algn_read_span"])) out_file.write("\t") out_file.write(str(algn["matched_bp"])) out_file.write("\t") out_file.write("\n") def write_pairsam( algn1, algn2, readID, pair_index, sams1, sams2, out_file, drop_readid, drop_seq, drop_sam, add_pair_index, add_columns, ): """ Write output pairsam. Note: SAM is already tab-separated and any printable character between ! and ~ may appear in the PHRED field! (http://www.ascii-code.com/) Thus, use the vertical tab character to separate fields! """ cols = [ "." if drop_readid else readID, algn1["chrom"], str(algn1["pos"]), algn2["chrom"], str(algn2["pos"]), algn1["strand"], algn2["strand"], algn1["type"] + algn2["type"], ] if not drop_sam: for sams in [sams1, sams2]: if drop_seq: for sam in sams: sam.query_qualities = "" sam.query_sequence = "" cols.append( pairsam_format.INTER_SAM_SEP.join( [ sam.to_string().replace( "\t", pairsam_format.SAM_SEP ) # String representation of pysam alignment + pairsam_format.SAM_SEP + "Yt:Z:" + algn1["type"] + algn2["type"] for sam in sams ] ) ) if add_pair_index: cols.append(str(pair_index[0])) cols.append(pair_index[1]) for col in add_columns: # use get b/c empty alignments would not have sam tags (NM, AS, etc) cols.append(str(algn1.get(col, ""))) cols.append(str(algn2.get(col, ""))) out_file.write(pairsam_format.PAIRSAM_SEP.join(cols) + "\n") pairtools-1.0.3/pairtools/lib/parse_pysam.pyx000066400000000000000000000110151452673171500214070ustar00rootroot00000000000000from pysam.libcalignmentfile cimport AlignmentFile from pysam.libcalignedsegment cimport AlignedSegment, AlignmentHeader from pysam.libchtslib cimport * from pysam.libcutils cimport array_to_qualitystring cdef class AlignmentFilePairtoolized(AlignmentFile): """ Modified class that loads each entry as pairtoolozed alignment. """ def __next__(self): cdef int ret = self.cnext() if (ret >= 0): # Redefine the constructed object: return makeAlignedSegmentPairtoolized(self.b, self.header) elif ret == -2: raise IOError('truncated file') else: raise StopIteration cdef AlignedSegmentPairtoolized makeAlignedSegmentPairtoolized(bam1_t *src, AlignmentHeader header): '''return an AlignedSegmentPairtoolized object constructed from `src`''' # note that the following does not call __init__ # Redefine the constructed object: cdef AlignedSegmentPairtoolized dest = AlignedSegmentPairtoolized.__new__(AlignedSegmentPairtoolized) dest._delegate = bam_dup1(src) dest.header = header return dest cdef class AlignedSegmentPairtoolized(AlignedSegment): """ In the pairtoolized class we inherit everything and add some useful properties and functions on top of that. """ def is_unique(self, min_mapq): """true if read is unique mapping (by mapq)""" return self.mapq >= min_mapq property is_linear: """true if read is linear (SA is present in tages)""" def __get__(self): if self.has_tag('SA'): return False # for tag in self.tags: # if 'SA'==tag[0]: # return False return True property cigar_dict: """Parsed CIGAR as dictionary with interpretable fields""" def __get__(self): """Parse cigar tuples reported as cigartuples of pysam read entry. Reports alignment span, clipped nucleotides and more. See https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.cigartuples """ matched_bp = 0 algn_ref_span = 0 algn_read_span = 0 read_len = 0 clip5_ref = 0 clip3_ref = 0 cigarstring = self.cigarstring cigartuples = self.cigartuples if cigartuples is not None: for operation, length in cigartuples: if operation == 0: # M, match matched_bp += length algn_ref_span += length algn_read_span += length read_len += length elif operation == 1: # I, insertion algn_read_span += length read_len += length elif operation == 2: # D, deletion algn_ref_span += length elif ( operation == 4 or operation == 5 ): # S and H, soft clip and hard clip, respectively read_len += length if matched_bp == 0: clip5_ref = length else: clip3_ref = length return { "clip5_ref": clip5_ref, "clip3_ref": clip3_ref, "cigar": cigarstring, "algn_ref_span": algn_ref_span, "algn_read_span": algn_read_span, "read_len": read_len, "matched_bp": matched_bp } from cpython cimport array import cython cimport cython cpdef list get_mismatches_c(str seq, array.array quals, list aligned_pairs): ''' This function takes a SAM alignment and, for every mismatch between the read and reference sequences, returns a tuple (the reference bp, the read bp, PHRED quality of the bp, reference position, read position). Reference: https://github.com/gerlichlab/scshic_pipeline/blob/master/bin/seq_mismatches.pyx ''' cdef cython.int read_pos, ref_pos cdef str orig_bp, orig_bp_upper cdef list mismatches = [] for read_pos, ref_pos, orig_bp in aligned_pairs: orig_bp_upper = orig_bp.upper() if (seq[read_pos] != orig_bp_upper): mismatches.append( (orig_bp_upper, seq[read_pos], quals[read_pos], ref_pos, read_pos) ) return mismatchespairtools-1.0.3/pairtools/lib/phase.py000066400000000000000000000050111452673171500177730ustar00rootroot00000000000000def get_chrom_phase(chrom, phase_suffixes): if chrom.endswith(phase_suffixes[0]): return "0", chrom[: -len(phase_suffixes[0])] elif chrom.endswith(phase_suffixes[1]): return "1", chrom[: -len(phase_suffixes[1])] else: return "!", chrom def phase_side_XB(chrom, XB, AS, XS, phase_suffixes): phase, chrom_base = get_chrom_phase(chrom, phase_suffixes) XBs = [i for i in XB.split(";") if len(i) > 0] S1, S2, S3 = AS, XS, -1 # -1 if the second hit was not reported if AS > XS: # Primary hit has higher score than the secondary return phase, chrom_base, S1, S2, S3 elif len(XBs) >= 1: if len(XBs) >= 2: alt2_chrom, alt2_pos, alt2_CIGAR, alt2_NM, alt2_AS, alt_mapq = XBs[1].split( "," ) S3 = int(alt2_AS) if int(alt2_AS) == XS == AS: return "!", "!", S1, S2, S3 alt_chrom, alt_pos, alt_CIGAR, alt_NM, alt_AS, alt_mapq = XBs[0].split(",") alt_phase, alt_chrom_base = get_chrom_phase(alt_chrom, phase_suffixes) alt_is_homologue = (chrom_base == alt_chrom_base) and ( ((phase == "0") and (alt_phase == "1")) or ((phase == "1") and (alt_phase == "0")) ) if alt_is_homologue: return ".", chrom_base, S1, S2, S3 return "!", "!", S1, S2, S3 def phase_side_XA(chrom, XA, AS, XS, NM, phase_suffixes): phase, chrom_base = get_chrom_phase(chrom, phase_suffixes) XAs = [i for i in XA.split(";") if len(i.strip()) > 0] if len(XAs) >= 1: alt_chrom, alt_pos, alt_CIGAR, alt_NM = XAs[0].split(",") M1, M2, M3 = NM, int(alt_NM), -1 else: M1, M2, M3 = NM, -1, -1 # -1 if the second hit was not reported if AS > XS: # Primary hit has higher score than the secondary return phase, chrom_base, M1, M2, M3 elif len(XAs) >= 1: if len(XAs) >= 2: alt2_chrom, alt2_pos, alt2_CIGAR, alt2_NM = XAs[1].split(",") M3 = int(alt2_NM) if int(alt2_NM) == int(alt_NM) == NM: return "!", "!", M1, M2, M3 alt_chrom, alt_pos, alt_CIGAR, alt_NM = XAs[0].split(",") alt_phase, alt_chrom_base = get_chrom_phase(alt_chrom, phase_suffixes) alt_is_homologue = (chrom_base == alt_chrom_base) and ( ((phase == "0") and (alt_phase == "1")) or ((phase == "1") and (alt_phase == "0")) ) if alt_is_homologue: return ".", chrom_base, M1, M2, M3 return "!", "!", M1, M2, M3 pairtools-1.0.3/pairtools/lib/regions.pyx000066400000000000000000000030651452673171500205400ustar00rootroot00000000000000""" Moved from pairlib, library for fast regions assignment """ from cython.operator cimport dereference, postincrement, postdecrement from cpython cimport array import cython from libcpp.map cimport map from libcpp.algorithm cimport lower_bound, upper_bound from libcpp.string cimport string from libcpp.vector cimport vector import numpy as np cimport numpy as np cpdef np.ndarray assign_regs_c(np.ndarray chroms, np.ndarray pos, dict reg_dict): assert len(chroms) == len(pos) cdef int n = len(chroms) cdef np.ndarray[np.int64_t, ndim=2] result = -1 * np.ones((n, 3), dtype=np.int64) cdef map[string, vector[int]] reg_map = reg_dict cdef map[string, vector[int]].iterator reg_map_it = reg_map.begin() cdef map[string, vector[int]].iterator reg_map_end = reg_map.end() cdef vector[int].iterator lo_b, up_b cdef int position, reg_boundary_idx # this can be parallelized with prange for i in range(n): reg_map_it = reg_map.find(chroms[i]) if reg_map_it != reg_map_end: position = pos[i] up_b = upper_bound( dereference(reg_map_it).second.begin(), dereference(reg_map_it).second.end(), position) reg_boundary_idx = up_b - dereference(reg_map_it).second.begin() if reg_boundary_idx % 2 == 1: lo_b = up_b postdecrement(lo_b) result[i, 0] = (reg_boundary_idx - 1) // 2 result[i, 1] = dereference(lo_b) result[i, 2] = dereference(up_b) return resultpairtools-1.0.3/pairtools/lib/restrict.py000066400000000000000000000015231452673171500205360ustar00rootroot00000000000000from . import pairsam_format import warnings def find_rfrag(rfrags, chrom, pos): # Return empty if chromosome is unmapped: if chrom == pairsam_format.UNMAPPED_CHROM: return ( pairsam_format.UNANNOTATED_RFRAG, pairsam_format.UNMAPPED_POS, pairsam_format.UNMAPPED_POS, ) try: rsites_chrom = rfrags[chrom] except ValueError as e: warnings.warn( f"Chomosome {chrom} does not have annotated restriction fragments, return empty." ) return ( pairsam_format.UNANNOTATED_RFRAG, pairsam_format.UNMAPPED_POS, pairsam_format.UNMAPPED_POS, ) idx = min( max(0, rsites_chrom.searchsorted(pos, "right") - 1), len(rsites_chrom) - 2 ) return idx, rsites_chrom[idx], rsites_chrom[idx + 1] pairtools-1.0.3/pairtools/lib/scaling.py000066400000000000000000000333171452673171500203250ustar00rootroot00000000000000import numpy as np import pandas as pd from .regions import assign_regs_c import bioframe def geomprog(factor, start=1): yield start while True: start *= factor yield start def _geomrange(start, end, factor, endpoint): prev = np.nan for i in geomprog(factor, start): x = int(round(i)) if x > end: break if x == prev: continue prev = x yield x if endpoint and prev != end: yield end def geomrange(start, end, factor, endpoint=False): return np.fromiter(_geomrange(start, end, factor, endpoint), dtype=int) def geomspace(start, end, num=50, endpoint=True): factor = (end / start) ** (1 / num) return geomrange(start, end, factor, endpoint=endpoint) def _to_float(arr_or_scalar): if np.isscalar(arr_or_scalar): return float(arr_or_scalar) else: return np.asarray(arr_or_scalar).astype(float) def assign_regs(chroms, pos, regs): gb_regs = regs.sort_values(["chrom", "start", "end"]).groupby(["chrom"]) regs_dict = { chrom.encode(): regs_per_chrom[["start", "end"]] .values.flatten() .astype(np.int64) for chrom, regs_per_chrom in gb_regs } return assign_regs_c(np.asarray(chroms).astype("bytes"), np.asarray(pos), regs_dict) def cartesian_df_product(df1, df2, suffixes=["1", "2"]): return pd.merge( left=df1.assign(cartesian_product_dummy=1), right=df2.assign(cartesian_product_dummy=1), on=["cartesian_product_dummy"], how="outer", suffixes=suffixes, ).drop("cartesian_product_dummy", axis="columns") def make_empty_scaling(regions, dist_bins, multiindex=True): if dist_bins[0] != 0: dist_bins = np.r_[0, dist_bins] if dist_bins[-1] != np.iinfo(np.int64).max: dist_bins = np.r_[dist_bins, np.iinfo(np.int64).max] strands_table = pd.DataFrame( {"strand1": ["+", "+", "-", "-"], "strand2": ["+", "-", "+", "-"]} ) dists_table = pd.DataFrame( list(zip(dist_bins[:-1], dist_bins[1:])), columns=["min_dist", "max_dist"] ) out = regions.join(regions, on=None, lsuffix="1", rsuffix="2") out = cartesian_df_product(out, strands_table) out = cartesian_df_product(out, dists_table) if multiindex: index_by = [ "chrom1", "start1", "end1", "chrom2", "start2", "end2", "strand1", "strand2", "min_dist", "max_dist", ] out.set_index(index_by, inplace=True) return out def make_empty_cross_region_table( regions, drop_same_reg=True, split_by_strand=True, multiindex=True ): out = cartesian_df_product(regions, regions) if split_by_strand: strands_table = pd.DataFrame( {"strand1": ["+", "+", "-", "-"], "strand2": ["+", "-", "+", "-"]} ) out = cartesian_df_product(out, strands_table) if drop_same_reg: out = out[ (out["chrom1"] != out["chrom2"]) | (out["start1"] != out["start2"]) | (out["end1"] != out["end2"]) ] if multiindex: index_by = ["chrom1", "start1", "end1", "chrom2", "start2", "end2"] if split_by_strand: index_by += ["strand1", "strand2"] out.set_index(index_by, inplace=True) return out def bins_pairs_by_distance( pairs_df, dist_bins, regions=None, chromsizes=None, ignore_trans=False, keep_unassigned=False, ): dist_bins = np.r_[dist_bins, np.iinfo(np.int64).max] if regions is None: if chromsizes is None: chroms = sorted( set.union(set(pairs_df.chrom1.unique()), set(pairs_df.chrom2.unique())) ) regions = pd.DataFrame({"chrom": chroms, "start": 0, "end": -1}) regions = regions[["chrom", "start", "end"]] region_starts1, region_starts2 = 0, 0 region_ends1, region_ends2 = -1, -1 else: region_ends1 = pairs_df.chrom1.map(chromsizes).fillna(-1).astype(np.int64) region_ends2 = pairs_df.chrom2.map(chromsizes).fillna(-1).astype(np.int64) region_starts1 = np.where(region_ends1 > 0, 0, -1) region_starts2 = np.where(region_ends2 > 0, 0, -1) regions = pd.DataFrame( [ {"chrom": chrom, "start": 0, "end": length} for chrom, length in chromsizes.items() ] ) regions = regions[["chrom", "start", "end"]] try: regions = bioframe.from_any(regions) except Exception as e: raise ValueError(f"Invalid viewframe created from pairs file, {e}") else: if not bioframe.is_viewframe(regions): try: regions = bioframe.from_any(regions) except Exception as e: raise ValueError( f"Provided regions cannot be converted to viewframe, {e}" ) regions = regions[["chrom", "start", "end"]] _, region_starts1, region_ends1 = assign_regs( pairs_df.chrom1.values, pairs_df.pos1.values, regions ).T _, region_starts2, region_ends2 = assign_regs( pairs_df.chrom2.values, pairs_df.pos2.values, regions ).T pairs_reduced_df = pd.DataFrame( { "chrom1": pairs_df.chrom1.values, "start1": region_starts1, "end1": region_ends1, "chrom2": pairs_df.chrom2.values, "start2": region_starts2, "end2": region_ends2, "strand1": pairs_df.strand1.values, "strand2": pairs_df.strand2.values, "dist_bin_idx": np.searchsorted( dist_bins, np.abs(pairs_df.pos1 - pairs_df.pos2), side="right" ), "n_pairs": 1, }, copy=False, ) if not keep_unassigned: pairs_reduced_df = (pairs_reduced_df .query('(start1 >= 0) and (end1 > 0) and (start2 >= 0) and (end2 > 0)') .reset_index(drop=True)) pairs_reduced_df["min_dist"] = np.where( pairs_reduced_df["dist_bin_idx"] > 0, dist_bins[pairs_reduced_df["dist_bin_idx"] - 1], 0, ) pairs_reduced_df["max_dist"] = np.where( pairs_reduced_df["dist_bin_idx"] < len(dist_bins)-1, dist_bins[pairs_reduced_df["dist_bin_idx"]], np.iinfo(np.int64).max, ) # importantly, in the future, we may want to extend the function to plot scalings # for pairs from different regions! pairs_for_scaling_mask = ( (pairs_reduced_df.chrom1 == pairs_reduced_df.chrom2) & (pairs_reduced_df.start1 == pairs_reduced_df.start2) & (pairs_reduced_df.end1 == pairs_reduced_df.end2) & (pairs_reduced_df.min_dist > 0) & (pairs_reduced_df.max_dist < np.iinfo(np.int64).max) ) pairs_for_scaling_df = pairs_reduced_df.loc[pairs_for_scaling_mask] pairs_for_scaling_counts = pairs_for_scaling_df.groupby( by=[ "chrom1", "start1", "end1", "chrom2", "start2", "end2", "strand1", "strand2", "min_dist", "max_dist", ] ).agg({"n_pairs": "sum"}) pairs_for_scaling_counts = ( make_empty_scaling(regions, dist_bins) .assign(n_pairs=0) .add(pairs_for_scaling_counts, fill_value=0) ) pairs_for_scaling_counts["n_pairs"] = pairs_for_scaling_counts["n_pairs"].astype( np.int64 ) if ignore_trans: pairs_no_scaling_counts = None else: pairs_no_scaling_df = pairs_reduced_df.loc[~pairs_for_scaling_mask] pairs_no_scaling_counts = pairs_no_scaling_df.groupby( by=[ "chrom1", "start1", "end1", "chrom2", "start2", "end2", "strand1", "strand2", ] ).agg({"n_pairs": "sum"}) pairs_no_scaling_counts = ( make_empty_cross_region_table(regions) .assign(n_pairs=0) .add(pairs_no_scaling_counts, fill_value=0) ) pairs_no_scaling_counts["n_pairs"] = pairs_no_scaling_counts["n_pairs"].astype( np.int64 ) return pairs_for_scaling_counts, pairs_no_scaling_counts def contact_areas_same_reg(min_dist, max_dist, region_length): min_dist = _to_float(min_dist) max_dist = _to_float(max_dist) scaffold_length = _to_float(region_length) outer_areas = np.maximum(region_length - min_dist, 0) ** 2 inner_areas = np.maximum(region_length - max_dist, 0) ** 2 return 0.5 * (outer_areas - inner_areas) def _contact_areas_diff_reg( min_dist, max_dist, region_start1, region_end1, region_start2, region_end2 ): return ( contact_areas_same_reg(min_dist, max_dist, np.abs(region_end2 - region_start1)) + contact_areas_same_reg( min_dist, max_dist, np.abs(region_end1 - region_start2) ) - contact_areas_same_reg( min_dist, max_dist, np.abs(region_start1 - region_start2) ) - contact_areas_same_reg(min_dist, max_dist, np.abs(region_end1 - region_end2)) ) def _contact_areas_trans(min_dist, max_dist, region_length1, region_length2): return ( contact_areas_same_reg(min_dist, max_dist, region_length1 + region_length2) - contact_areas_same_reg(min_dist, max_dist, region_length1) - contact_areas_same_reg(min_dist, max_dist, region_length2) ) def compute_scaling( pairs, regions=None, chromsizes=None, dist_range=(int(1e1), int(1e9)), n_dist_bins=8 * 8, chunksize=int(1e7), ignore_trans=False, keep_unassigned=False, filter_f=None, nproc_in=1, cmd_in=None, ): """ Main function for computing scaling. Parameters ---------- pairs: pd.DataFrame, stream of fiel paht with pairs. regions: bioframe viewframe, anything that can serve as input to bioframe.from_any, or None chromsizes: additional dataframe with chromosome sizes, if different from regions dist_range: (int, int) tuple with distance ranges that will be split into windows n_dist_bins: number of logarithmic bins chunksize: size of chunks for calculations ignore_trans: bool, ignore trans or not keep_unassigned: bool, keep pairs that are not assigned to any region filter_f: filter function that can be applied to each chunk nproc_in cmd_in Returns ------- """ dist_bins = geomspace(dist_range[0], dist_range[1], n_dist_bins) if isinstance(pairs, pd.DataFrame): pairs_df = pairs elif isinstance(pairs, str) or hasattr(pairs, "buffer") or hasattr(pairs, "peek"): from . import fileio, headerops pairs_stream = ( fileio.auto_open( pairs, mode="r", nproc=nproc_in, command=cmd_in, ) if isinstance(pairs, str) else pairs ) header, pairs_body = headerops.get_header(pairs_stream) cols = headerops.extract_column_names(header) if chromsizes is None: chromsizes = headerops.extract_chromsizes(header) pairs_df = pd.read_csv( pairs_body, header=None, names=cols, chunksize=chunksize, sep="\t", dtype={"chrom1": str, "chrom2": str}, ) else: raise ValueError( "pairs must be either a path to a pairs file or a pd.DataFrame" ) sc, trans_counts = None, None for pairs_chunk in [pairs_df] if isinstance(pairs_df, pd.DataFrame) else pairs_df: if filter_f: pairs_chunk = filter_f(pairs_chunk) sc_chunk, trans_counts_chunk = bins_pairs_by_distance( pairs_chunk, dist_bins, regions=regions, chromsizes=chromsizes, ignore_trans=ignore_trans, keep_unassigned=keep_unassigned ) sc = sc_chunk if sc is None else sc.add(sc_chunk, fill_value=0) trans_counts = ( trans_counts_chunk if trans_counts is None else trans_counts.add(trans_counts_chunk, fill_value=0) ) # if not (isinstance(regions, pd.DataFrame) and # (set(regions.columns) == set(['chrom', 'start','end']))): # raise ValueError('regions must be provided as a dict or chrom-indexed Series of chromsizes or as a bedframe.') sc.reset_index(inplace=True) sc["n_bp2"] = contact_areas_same_reg( sc["min_dist"], sc["max_dist"], sc["end1"] - sc["start1"] ) if not ignore_trans: trans_counts.reset_index(inplace=True) trans_counts["n_bp2"] = (trans_counts["end1"] - trans_counts["start1"]) * ( trans_counts["end2"] - trans_counts["start2"] ) return sc, trans_counts def norm_scaling_factor(bins, cfreqs, anchor=1.0, binwindow=(0, 3)): i = np.searchsorted(bins, anchor) return cfreqs[i + binwindow[0] : i + binwindow[1]].mean() def norm_scaling(bins, cfreqs, anchor=1.0, binwindow=(0, 3)): return cfreqs / norm_scaling_factor(bins, cfreqs, anchor, binwindow) def unity_norm_scaling(bins, cfreqs, norm_range=(1e4, 1e9)): bin_lens = np.diff(bins) bin_mids = np.sqrt(bins[1:] * bins[:-1]) if norm_range is None: norm_cfreqs = cfreqs / np.sum(1.0 * (bin_lens * cfreqs)[np.isfinite(cfreqs)]) else: norm_cfreqs = cfreqs / np.sum( 1.0 * (bin_lens * cfreqs)[ np.isfinite(cfreqs) & (bin_mids > norm_range[0]) & (bin_mids < norm_range[1]) ] ) return norm_cfreqs pairtools-1.0.3/pairtools/lib/select.py000066400000000000000000000101101452673171500201460ustar00rootroot00000000000000from ..lib import fileio, pairsam_format, headerops import re, fnmatch # Create environment of important functions: wildcard_library = {} def wildcard_match(x, wildcard): if wildcard not in wildcard_library: regex = fnmatch.translate(wildcard) reobj = re.compile(regex) wildcard_library[wildcard] = reobj return wildcard_library[wildcard].fullmatch(x) csv_library = {} def csv_match(x, csv): if csv not in csv_library: csv_library[csv] = set(csv.split(",")) return x in csv_library[csv] regex_library = {} def regex_match(x, regex): if regex not in regex_library: reobj = re.compile(regex) regex_library[regex] = reobj return regex_library[regex].fullmatch(x) # Define default data types: TYPES = {"pos1": "int", "pos2": "int", "mapq1": "int", "mapq2": "int"} def evaluate_stream( headerless_stream, condition, column_names, type_cast=(), startup_code=None ): """ Evaluate expression for the input headerless stream. Parameters ---------- headerless_stream condition type_cast: Cast a given column to a given type. By default, only pos and mapq are cast to int, other columns are kept as str. Type: tupe of two strings. startup_code: An auxiliary code to execute before filtering. Use to define functions that can be evaluated in the CONDITION statement ======== Writes the output to one of two streams (regular or rest) """ # Define data types: TYPES.update(dict(type_cast)) # Execute startup code: if startup_code is not None: exec(startup_code, globals()) for i, col in enumerate(column_names): if col in TYPES: col_type = TYPES[col] condition = re.sub(r"\b%s\b" % col , "{}(COLS[{}])".format(col_type, i), condition) #condition.replace(col, "{}(COLS[{}])".format(col_type, i)) else: condition = re.sub(r"\b%s\b" % col, "COLS[{}]".format(i), condition) #condition = condition.replace(col, "COLS[{}]".format(i)) # Compile the filtering expression: match_func = compile(condition, "", "eval") for line in headerless_stream: COLS = line.rstrip().split(pairsam_format.PAIRSAM_SEP) # Evaluate filtering expression: filter_passed = eval(match_func) # Produce the output: yield filter_passed, line def evaluate_df(df, condition, type_cast=(), startup_code=None, engine="pandas"): """ Evaluate expression for the input headerless stream. Parameters ---------- df: input dataframe for evaluation condition: condition to evaluate type_cast: additional types transformations, if different from default startup_code: An auxiliary code to execute before filtering. Use to define functions that can be evaluated in the CONDITION statement ======== Writes the output to one of two streams (regular or rest) """ # Define data types: TYPES.update(dict(type_cast)) # Execute startup code: if startup_code is not None: exec(startup_code, globals()) # Set up the column formats: for col in df.columns: if col in TYPES.keys(): if not str(df.dtypes[col]) != TYPES[col]: df[col] = df[col].astype(TYPES[col]) if engine == "pandas": try: filter_passed_output = df.eval(condition) except ValueError as e: raise ValueError(f"Try passing engine python to fix this: {e}") else: # Set up the columns indexing for i, col in enumerate(df.columns): condition = re.sub(r"\b%s\b" % col, "COLS[{}]".format(i), condition) #condition = condition.replace(col, "COLS[{}]".format(i)) filter_passed_output = [] match_func = compile(condition, "", "eval") for i, r in df.iterrows(): COLS = r.values # Evaluate filtering expression: filter_passed = eval(match_func) filter_passed_output.append(True if filter_passed else False) return filter_passed_output pairtools-1.0.3/pairtools/lib/stats.py000066400000000000000000001146621452673171500200460ustar00rootroot00000000000000import numpy as np import pandas as pd from scipy import special from collections.abc import Mapping import sys import yaml from . import fileio from .select import evaluate_df from .._logging import get_logger logger = get_logger() class PairCounter(Mapping): """ A Counter for Hi-C pairs that accumulates various statistics. PairCounter implements two interfaces to access multi-level statistics: 1. as a nested dict, e.g. pairCounter['pair_types']['LL'] 2. as a flat dict, with the level keys separated by '/', e.g. pairCounter['pair_types/LL'] Other features: -- PairCounters can be saved into/loaded from a file -- multiple PairCounters can be merged via addition. """ _SEP = "\t" _KEY_SEP = "/" def __init__( self, min_log10_dist=0, max_log10_dist=9, log10_dist_bin_step=0.25, bytile_dups=False, filters=None, **kwargs, ): # Define filters and parameters for filters evaluation: if filters is not None: self.filters = filters else: self.filters = {"no_filter": ""} self.startup_code = kwargs.get("startup_code", "") self.type_cast = kwargs.get("type_cast", ()) self.engine = kwargs.get("engine", "pandas") # Define default filter: if "no_filter" not in self.filters: self.filters["no_filter"] = "" self._stat = {key: {} for key in self.filters} # some variables used for initialization: # genomic distance bining for the ++/--/-+/+- distribution self._dist_bins = np.r_[ 0, np.round( 10 ** np.arange( min_log10_dist, max_log10_dist + 0.001, log10_dist_bin_step ) ).astype(np.int_), ] # establish structure of an empty _stat: for key in self.filters: self._stat[key]["filter_expression"] = self.filters[key] self._stat[key]["total"] = 0 self._stat[key]["total_unmapped"] = 0 self._stat[key]["total_single_sided_mapped"] = 0 # total_mapped = total_dups + total_nodups self._stat[key]["total_mapped"] = 0 self._stat[key]["total_dups"] = 0 self._stat[key]["total_nodups"] = 0 ######################################## # the rest of stats are based on nodups: ######################################## self._stat[key]["cis"] = 0 self._stat[key]["trans"] = 0 self._stat[key]["pair_types"] = {} # to be removed: self._stat[key]["dedup"] = {} self._stat[key]["cis_1kb+"] = 0 self._stat[key]["cis_2kb+"] = 0 self._stat[key]["cis_4kb+"] = 0 self._stat[key]["cis_10kb+"] = 0 self._stat[key]["cis_20kb+"] = 0 self._stat[key]["cis_40kb+"] = 0 self._stat[key]["summary"] = dict( [ ("frac_cis", 0), ("frac_cis_1kb+", 0), ("frac_cis_2kb+", 0), ("frac_cis_4kb+", 0), ("frac_cis_10kb+", 0), ("frac_cis_20kb+", 0), ("frac_cis_40kb+", 0), ("frac_dups", 0), ("complexity_naive", 0), ] ) self._stat[key]["chrom_freq"] = {} self._stat[key]["dist_freq"] = { "+-": {bin.item(): 0 for bin in self._dist_bins}, "-+": {bin.item(): 0 for bin in self._dist_bins}, "--": {bin.item(): 0 for bin in self._dist_bins}, "++": {bin.item(): 0 for bin in self._dist_bins}, } self._stat[key]["chromsizes"] = {} # Summaries are derived from other stats and are recalculated on merge self._save_bytile_dups = bytile_dups if self._save_bytile_dups: self._bytile_dups = pd.DataFrame( index=pd.MultiIndex( levels=[[], []], codes=[[], []], names=["tile", "parent_tile"] ) ) self._summaries_calculated = False def __getitem__(self, key, filter="no_filter"): if isinstance(key, str): # let's strip any unintentional '/' # from either side of the key key = key.strip("/") if self._KEY_SEP in key: # multi-key to access nested elements k_fields = key.split(self._KEY_SEP) else: # single-key access flat part of PairCounter # or to access highest level of hierarchy return self._stat[filter][key] else: # clearly an error: raise ValueError("{} is not a valid key: must be str".format(key)) # K_FIELDS: # process multi-key case: # in this case key must be in ['pair_types','chrom_freq','dist_freq','dedup'] # get the first 'k' and keep the remainders in 'k_fields' k = k_fields.pop(0) if k in ["pair_types", "dedup"]: # assert there is only one element in key_fields left: # 'pair_types' and 'dedup' treated the same if len(k_fields) == 1: return self._stat[filter][k][k_fields[0]] else: raise ValueError( "{} is not a valid key: {} section implies 1 identifier".format( key, k ) ) elif k == "chrom_freq": # assert remaining key_fields == [chr1, chr2]: if len(k_fields) == 2: return self._stat[filter][k][tuple(k_fields)] else: raise ValueError( "{} is not a valid key: {} section implies 2 identifiers".format( key, k ) ) elif k == "dist_freq": # assert that last element of key_fields is the 'directions' # THIS IS DONE FOR CONSISTENCY WITH .stats FILE # SHOULD THAT BE CHANGED IN .stats AND HERE AS WELL? if len(k_fields) == 2: # assert 'dirs' in ['++','--','+-','-+'] dirs = k_fields.pop() # there is only genomic distance range of the bin that's left: (bin_range,) = k_fields # extract left border of the bin "1000000+" or "1500-6000": dist_bin_left = ( bin_range.strip("+") if bin_range.endswith("+") else bin_range.split("-")[0] ) # get the index of that bin: bin_idx = ( np.searchsorted(self._dist_bins, int(dist_bin_left), "right") - 1 ) # store corresponding value: return self._stat[filter]["dist_freq"][dirs][bin_idx] else: raise ValueError( "{} is not a valid key: {} section implies 2 identifiers".format( key, k ) ) else: raise ValueError("{} is not a valid key".format(k)) def __iter__(self): return iter(self._stat) def __len__(self): return len(self._stat) def calculate_summaries(self): """calculate summary statistics (fraction of cis pairs at different cutoffs, complexity estimate) based on accumulated counts. Results are saved into self._stat["filter_name"]['summary"] """ for key in self.filters.keys(): self._stat[key]["summary"]["frac_dups"] = ( (self._stat[key]["total_dups"] / self._stat[key]["total_mapped"]) if self._stat[key]["total_mapped"] > 0 else 0 ) for cis_count in ( "cis", "cis_1kb+", "cis_2kb+", "cis_4kb+", "cis_10kb+", "cis_20kb+", "cis_40kb+", ): self._stat[key]["summary"][f"frac_{cis_count}"] = ( (self._stat[key][cis_count] / self._stat[key]["total_nodups"]) if self._stat[key]["total_nodups"] > 0 else 0 ) self._stat[key]["summary"][ "complexity_naive" ] = estimate_library_complexity( self._stat[key]["total_mapped"], self._stat[key]["total_dups"], 0 ) if key == "no_filter" and self._save_bytile_dups: # Estimate library complexity with information by tile, if provided: if self._bytile_dups.shape[0] > 0: self._stat[key]["dups_by_tile_median"] = int( round( self._bytile_dups["dup_count"].median() * self._bytile_dups.shape[0] ) ) if "dups_by_tile_median" in self._stat[key].keys(): self._stat[key]["summary"][ "complexity_dups_by_tile_median" ] = estimate_library_complexity( self._stat[key]["total_mapped"], self._stat[key]["total_dups"], self._stat[key]["total_dups"] - self._stat[key]["dups_by_tile_median"], ) self._summaries_calculated = True @classmethod def from_file(cls, file_handle): """create instance of PairCounter from file Parameters ---------- file_handle: file handle Returns ------- PairCounter new PairCounter filled with the contents of the input file """ # fill in from file - file_handle: default_filter = "no_filter" stat_from_file = cls() for l in file_handle: fields = l.strip().split(cls._SEP) if len(fields) == 0: # skip empty lines: continue if len(fields) != 2: # expect two _SEP separated values per line: raise fileio.ParseError( "{} is not a valid stats file".format(file_handle.name) ) # extract key and value, then split the key: putative_key, putative_val = fields[0], fields[1] key_fields = putative_key.split(cls._KEY_SEP) # we should impose a rigid structure of .stats or redo it: if len(key_fields) == 1: key = key_fields[0] if key in stat_from_file._stat[default_filter]: stat_from_file._stat[default_filter][key] = int(fields[1]) else: raise fileio.ParseError( "{} is not a valid stats file: unknown field {} detected".format( file_handle.name, key ) ) else: # in this case key must be in ['pair_types','chrom_freq','dist_freq','dedup', 'summary'] # get the first 'key' and keep the remainders in 'key_fields' key = key_fields.pop(0) if key in ["pair_types", "dedup", "summary", "chromsizes"]: # assert there is only one element in key_fields left: # 'pair_types', 'dedup', 'summary' and 'chromsizes' treated the same if len(key_fields) == 1: try: stat_from_file._stat[default_filter][key][ key_fields[0] ] = int(fields[1]) except ValueError: stat_from_file._stat[default_filter][key][ key_fields[0] ] = float(fields[1]) else: raise fileio.ParseError( "{} is not a valid stats file: {} section implies 1 identifier".format( file_handle.name, key ) ) elif key == "chrom_freq": # assert remaining key_fields == [chr1, chr2]: if len(key_fields) == 2: stat_from_file._stat[default_filter][key][ tuple(key_fields) ] = int(fields[1]) else: raise fileio.ParseError( "{} is not a valid stats file: {} section implies 2 identifiers".format( file_handle.name, key ) ) elif key == "dist_freq": # assert that last element of key_fields is the 'directions' if len(key_fields) == 2: # assert 'dirs' in ['++','--','+-','-+'] dirs = key_fields.pop() # there is only genomic distance range of the bin that's left: (bin_range,) = key_fields # extract left border of the bin "1000000+" or "1500-6000": dist_bin_left = ( bin_range.strip("+") if bin_range.endswith("+") else bin_range.split("-")[0] ) # get the index of that bin: bin_idx = ( np.searchsorted( stat_from_file._dist_bins, int(dist_bin_left), "right" ) - 1 ) # store corresponding value: stat_from_file._stat[default_filter][key][dirs][bin_idx] = int( fields[1] ) else: raise fileio.ParseError( "{} is not a valid stats file: {} section implies 2 identifiers".format( file_handle.name, key ) ) else: raise fileio.ParseError( "{} is not a valid stats file: unknown field {} detected".format( file_handle.name, key ) ) # return PairCounter from a non-empty dict: return stat_from_file @classmethod def from_yaml(cls, file_handle): """create instance of PairCounter from file Parameters ---------- file_handle: file handle Returns ------- PairCounter new PairCounter filled with the contents of the input file """ # fill in from file - file_handle: stat_from_file = cls() stat = yaml.safe_load(file_handle) for key, filter in stat.items(): chromdict = {} for chroms in stat[key]["chrom_freq"].keys(): chromdict[tuple(chroms.split(cls._KEY_SEP))] = stat[key]["chrom_freq"][ chroms ] stat[key]["chrom_freq"] = chromdict stat_from_file._stat = stat return stat_from_file def add_pair( self, chrom1, pos1, strand1, chrom2, pos2, strand2, pair_type, filter="no_filter", ): """Gather statistics for a Hi-C pair and add to the PairCounter. Parameters ---------- chrom1: str chromosome of the first read pos1: int position of the first read strand1: str strand of the first read chrom2: str chromosome of the first read pos2: int position of the first read strand2: str strand of the first read pair_type: str type of the mapped pair of reads """ self._stat[filter]["total"] += 1 # collect pair type stats including DD: self._stat[filter]["pair_types"][pair_type] = ( self._stat[filter]["pair_types"].get(pair_type, 0) + 1 ) if chrom1 == "!" and chrom2 == "!": self._stat[filter]["total_unmapped"] += 1 elif chrom1 != "!" and chrom2 != "!": self._stat[filter]["total_mapped"] += 1 # only mapped ones can be duplicates: if pair_type == "DD": self._stat[filter]["total_dups"] += 1 else: self._stat[filter]["total_nodups"] += 1 self._stat[filter]["chrom_freq"][(chrom1, chrom2)] = ( self._stat[filter]["chrom_freq"].get((chrom1, chrom2), 0) + 1 ) if chrom1 == chrom2: self._stat[filter]["cis"] += 1 dist = np.abs(pos2 - pos1) bin = self._dist_bins[ np.searchsorted(self._dist_bins, dist, "right") - 1 ] self._stat[filter]["dist_freq"][strand1 + strand2][bin] += 1 if dist >= 1000: self._stat[filter]["cis_1kb+"] += 1 if dist >= 2000: self._stat[filter]["cis_2kb+"] += 1 if dist >= 4000: self._stat[filter]["cis_4kb+"] += 1 if dist >= 10000: self._stat[filter]["cis_10kb+"] += 1 if dist >= 20000: self._stat[filter]["cis_20kb+"] += 1 if dist >= 40000: self._stat[filter]["cis_40kb+"] += 1 else: self._stat[filter]["trans"] += 1 else: self._stat[filter]["total_single_sided_mapped"] += 1 def add_pairs_from_dataframe(self, df, unmapped_chrom="!"): """Gather statistics for Hi-C pairs in a dataframe and add to the PairCounter. Parameters ---------- df: pd.DataFrame DataFrame with pairs. Needs to have columns: 'chrom1', 'pos1', 'chrom2', 'pos2', 'strand1', 'strand2', 'pair_type' """ for key in self.filters.keys(): if key == "no_filter": df_filtered = df.copy() else: condition = self.filters[key] filter_passed = evaluate_df( df, condition, type_cast=self.type_cast, startup_code=self.startup_code, engine=self.engine, ) df_filtered = df.loc[filter_passed, :].reset_index(drop=True) total_count = df_filtered.shape[0] self._stat[key]["total"] += total_count # collect pair type stats including DD: for pair_type, type_count in ( df_filtered["pair_type"].value_counts().items() ): self._stat[key]["pair_types"][pair_type] = ( self._stat[key]["pair_types"].get(pair_type, 0) + type_count ) # Count the unmapped by the "unmapped" chromosomes (debatable, as WW are also marked as ! and they might be mapped): unmapped_count = np.logical_and( df_filtered["chrom1"] == unmapped_chrom, df_filtered["chrom2"] == unmapped_chrom, ).sum() self._stat[key]["total_unmapped"] += int(unmapped_count) # Count the mapped: df_mapped = df_filtered.loc[ (df_filtered["chrom1"] != unmapped_chrom) & (df_filtered["chrom2"] != unmapped_chrom), :, ] mapped_count = df_mapped.shape[0] self._stat[key]["total_mapped"] += mapped_count self._stat[key]["total_single_sided_mapped"] += int( total_count - (mapped_count + unmapped_count) ) # Count the duplicates: if "duplicate" in df_mapped.columns: mask_dups = df_mapped["duplicate"] else: mask_dups = df_mapped["pair_type"] == "DD" df_dups = df_mapped[mask_dups] dups_count = df_dups.shape[0] self._stat[key]["total_dups"] += int(dups_count) self._stat[key]["total_nodups"] += int(mapped_count - dups_count) df_nodups = df_mapped.loc[~mask_dups, :] mask_cis = df_nodups["chrom1"] == df_nodups["chrom2"] df_cis = df_nodups.loc[mask_cis, :].copy() # Count pairs per chromosome: for (chrom1, chrom2), chrom_count in ( df_nodups[["chrom1", "chrom2"]].value_counts().items() ): self._stat[key]["chrom_freq"][(chrom1, chrom2)] = ( self._stat[key]["chrom_freq"].get((chrom1, chrom2), 0) + chrom_count ) # Count cis-trans by pairs: self._stat[key]["cis"] += df_cis.shape[0] self._stat[key]["trans"] += df_nodups.shape[0] - df_cis.shape[0] dist = np.abs(df_cis["pos2"].values - df_cis["pos1"].values) df_cis.loc[:, "bin_idx"] = ( np.searchsorted(self._dist_bins, dist, "right") - 1 ) for (strand1, strand2, bin_id), strand_bin_count in ( df_cis[["strand1", "strand2", "bin_idx"]].value_counts().items() ): self._stat[key]["dist_freq"][strand1 + strand2][ self._dist_bins[bin_id].item() ] += strand_bin_count self._stat[key]["cis_1kb+"] += int(np.sum(dist >= 1000)) self._stat[key]["cis_2kb+"] += int(np.sum(dist >= 2000)) self._stat[key]["cis_4kb+"] += int(np.sum(dist >= 4000)) self._stat[key]["cis_10kb+"] += int(np.sum(dist >= 10000)) self._stat[key]["cis_20kb+"] += int(np.sum(dist >= 20000)) self._stat[key]["cis_40kb+"] += int(np.sum(dist >= 40000)) ### Add by-tile dups if key == "no_filter" and self._save_bytile_dups and (df_dups.shape[0] > 0): bytile_dups = analyse_bytile_duplicate_stats(df_dups) self._bytile_dups = self._bytile_dups.add( bytile_dups, fill_value=0 ).astype(int) def add_chromsizes(self, chromsizes): """Add chromsizes field to the output stats Parameters ---------- chromsizes: Dataframe with chromsizes, read by headerops.chromsizes """ chromsizes = chromsizes.to_dict() for filter in self._stat.keys(): self._stat[filter]["chromsizes"] = chromsizes return def __add__(self, other, filter="no_filter"): # both PairCounter are implied to have a list of common fields: # # 'total', 'total_unmapped', 'total_single_sided_mapped', 'total_mapped', # 'cis', 'trans', 'pair_types', 'cis_1kb+', 'cis_2kb+', # 'cis_10kb+', 'cis_20kb+', 'chrom_freq', 'dist_freq', 'dedup' # # If 'chromsizes' are present, they must be identical # # initialize empty PairCounter for the result of summation: sum_stat = PairCounter() # use the empty PairCounter to iterate over: for k, v in sum_stat._stat[filter].items(): if k != "chromsizes" and ( k not in self._stat[filter] or k not in other._stat[filter] ): # Skip any missing fields and warn logger.warning( f"{k} not found in at least one of the input stats, skipping" ) continue # not nested fields are summed trivially: if isinstance(v, int): sum_stat._stat[filter][k] = ( self._stat[filter][k] + other._stat[filter][k] ) # sum nested dicts/arrays in a context dependet manner: else: if k in ["pair_types", "dedup", "summary"]: # handy function for summation of a pair of dicts: # https://stackoverflow.com/questions/10461531/merge-and-sum-of-two-dictionaries sum_dicts = lambda dict_x, dict_y: { key: dict_x.get(key, 0) + dict_y.get(key, 0) for key in set(dict_x) | set(dict_y) } # sum a pair of corresponding dicts: sum_stat._stat[filter][k] = sum_dicts( self._stat[filter][k], other._stat[filter][k] ) elif k == "chrom_freq": # union list of keys (chr1,chr2) with potential duplicates: union_keys_with_dups = list(self._stat[filter][k].keys()) + list( other._stat[filter][k].keys() ) # dict.fromkeys will take care of keys' order and duplicates in a consistent manner: # https://stackoverflow.com/questions/1720421/how-to-concatenate-two-lists-in-python # last comment to the 3rd Answer sum_stat._stat[filter][k] = dict.fromkeys(union_keys_with_dups) # perform a summation: for union_key in sum_stat._stat[filter][k]: sum_stat._stat[filter][k][union_key] = self._stat[filter][ k ].get(union_key, 0) + other._stat[filter][k].get(union_key, 0) elif k == "dist_freq": for dirs in sum_stat[k]: from functools import reduce def reducer(accumulator, element): for key, value in element.items(): accumulator[key] = accumulator.get(key, 0) + value return accumulator sum_stat[k][dirs] = reduce( reducer, [self._stat[filter][k][dirs], other._stat[filter][k][dirs]], {}, ) # sum_stat[k][dirs] = self._stat[filter][k][dirs] + other._stat[filter][k][dirs] elif k == "chromsizes": if k in self._stat[filter] and k in other._stat[filter]: if self._stat[filter][k] == other._stat[filter][k]: sum_stat._stat[filter][k] = self._stat[filter][k] elif ( len(self._stat[filter][k]) == 0 or len(other._stat[filter][k]) == 0 ): logger.warning( "One of the stats has no chromsizes recorded," "writing the one that is present to the output" ) if len(self._stat[filter][k]) > 0: sum_stat._stat[filter][k] = self._stat[filter][k] else: sum_stat._stat[filter][k] = other._stat[filter][k] else: raise ValueError( "Can't merge stats with different chromsizes" ) else: logger.warning( "One or both stats don't have chromsizes recorded" ) return sum_stat # we need this to be able to sum(list_of_PairCounters) def __radd__(self, other): if other == 0: return self else: return self.__add__(other) def flatten(self, filter="no_filter"): """return a flattened dict (formatted same way as .stats file) Performed for a single filter.""" # dict for flat store: flat_stat = {} # Storing statistics for k, v in self._stat[filter].items(): if isinstance(v, int): flat_stat[k] = v # store nested dicts/arrays in a context dependet manner: # nested categories are stored only if they are non-trivial else: if (k == "dist_freq") and v: for i in range(len(self._dist_bins)): for dirs, freqs in v.items(): # last bin is treated differently: "100000+" vs "1200-3000": if i != len(self._dist_bins) - 1: dist = self._dist_bins[i] dist_next = self._dist_bins[i + 1] formatted_key = self._KEY_SEP.join( ["{}", "{}-{}", "{}"] ).format(k, dist, dist_next, dirs) else: formatted_key = self._KEY_SEP.join( ["{}", "{}+", "{}"] ).format(k, dist, dirs) # store key,value pair: flat_stat[formatted_key] = freqs[dist] elif (k in ["pair_types", "dedup", "chromsizes"]) and v: # 'pair_types' and 'dedup' are simple dicts inside, # treat them the exact same way: for k_item, freq in v.items(): formatted_key = self._KEY_SEP.join(["{}", "{}"]).format( k, k_item ) # store key,value pair: flat_stat[formatted_key] = freq elif (k == "chrom_freq") and v: for (chrom1, chrom2), freq in v.items(): formatted_key = self._KEY_SEP.join(["{}", "{}", "{}"]).format( k, chrom1, chrom2 ) # store key,value pair: flat_stat[formatted_key] = freq elif (k == "summary") and v: for key, frac in v.items(): formatted_key = self._KEY_SEP.join(["{}", "{}"]).format(k, key) # store key,value pair: flat_stat[formatted_key] = frac # return flattened dict return flat_stat def format_yaml(self, filter="no_filter"): """return a formatted dict (for the yaml output) Performed for all filters at once.""" from copy import deepcopy formatted_stat = {key: {} for key in self.filters.keys()} # Storing statistics for each filter for key in self.filters.keys(): for k, v in self._stat[key].items(): if isinstance(v, int): formatted_stat[key][k] = v # store nested dicts/arrays in a context dependet manner: # nested categories are stored only if they are non-trivial else: if (k != "chrom_freq") and v: # simple dicts inside # treat them the exact same way: formatted_stat[key][k] = deepcopy(v) elif (k == "chrom_freq") and v: # need to convert tuples of chromosome names to str freqs = {} for (chrom1, chrom2), freq in sorted(v.items()): freqs[ self._KEY_SEP.join(["{}", "{}"]).format(chrom1, chrom2) ] = freq # store key,value pair: formatted_stat[key][k] = deepcopy(freqs) # return formatted dict return formatted_stat def save(self, outstream, yaml=False, filter="no_filter"): """save PairCounter to tab-delimited text file. Flattened version of PairCounter is stored in the file. Parameters ---------- outstream: file handle yaml: is output in yaml format or table filter: filter to output in tsv mode Note ---- The order of the keys is not guaranteed Merging several .stats is not associative with respect to key order: merge(A,merge(B,C)) != merge(merge(A,B),C). Theys shou5ld match exactly, however, when soprted: sort(merge(A,merge(B,C))) == sort(merge(merge(A,B),C)) """ if not self._summaries_calculated: self.calculate_summaries() # write flattened version of the PairCounter to outstream, # will output all the filters if yaml: import yaml data = self.format_yaml() yaml.dump(data, outstream, default_flow_style=False, sort_keys=False) else: # will output a single filter data = self.flatten(filter=filter) for k, v in data.items(): outstream.write("{}{}{}\n".format(k, self._SEP, v)) def save_bytile_dups(self, outstream): """save bytile duplication counts to a tab-delimited text file. Parameters ---------- outstream: file handle """ if self._save_bytile_dups: self._bytile_dups.reset_index().to_csv(outstream, sep="\t", index=False) else: logger.error("Bytile dups are not calculated, cannot save.") def __repr__(self): return str(self._stat) ################## # Other functions: def do_merge(output, files_to_merge, **kwargs): # Parse all stats files. stats = [] for stat_file in files_to_merge: f = fileio.auto_open( stat_file, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) # use a factory method to instanciate PairCounter if kwargs.get("yaml", False): stat = PairCounter.from_yaml(f) else: stat = PairCounter.from_file(f) stats.append(stat) f.close() # combine stats from several files (files_to_merge): out_stat = sum(stats) # Save merged stats. outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) # save statistics to file ... out_stat.save(outstream) if outstream != sys.stdout: outstream.close() def estimate_library_complexity(nseq, ndup, nopticaldup=0): """Estimate library complexity accounting for optical/clustering duplicates Parameters ---------- nseq : int Total number of sequences ndup : int Total number of duplicates nopticaldup : int, optional Number of non-PCR duplicates, by default 0 Returns ------- float Estimated complexity """ nseq = nseq - nopticaldup if nseq == 0: logger.warning("Empty of fully duplicated library, can't estimate complexity") return 0 ndup = ndup - nopticaldup u = (nseq - ndup) / nseq if u == 0: logger.warning( "All the sequences are duplicates. Do you run complexity estimation on duplicates file?" ) return 0 seq_to_complexity = special.lambertw(-np.exp(-1 / u) / u).real + 1 / u complexity = float(nseq / seq_to_complexity) # clean np.int64 data type return complexity def analyse_bytile_duplicate_stats(df_dups, tile_dup_regex=False): """Count by-tile duplicates Parameters ---------- dups : pd.DataFrame Dataframe with duplicates that contains pared read IDs tile_dup_regex : bool, optional See extract_tile_info for details, by default False Returns ------- pd.DataFrame Grouped multi-indexed dataframe of pairwise by-tile duplication counts """ df_dups = df_dups.copy() df_dups["tile"] = extract_tile_info(df_dups["readID"], regex=tile_dup_regex) df_dups["parent_tile"] = extract_tile_info( df_dups["parent_readID"], regex=tile_dup_regex ) df_dups["same_tile"] = df_dups["tile"] == df_dups["parent_tile"] bytile_dups = ( df_dups.groupby(["tile", "parent_tile"]) .size() .reset_index(name="dup_count") .sort_values(["tile", "parent_tile"]) ) bytile_dups[["tile", "parent_tile"]] = np.sort( bytile_dups[["tile", "parent_tile"]].values, axis=1 ) bytile_dups = bytile_dups.groupby(["tile", "parent_tile"]).sum() return bytile_dups def extract_tile_info(series, regex=False): """Extract the name of the tile for each read name in the series Parameters ---------- series : pd.Series Series containing read IDs regex : bool, optional Regex to extract fields from the read IDs that correspond to tile IDs. By default False, uses a faster predefined approach for typical Illumina read names Example: r"(?:\w+):(?:\w+):(\w+):(\w+):(\w+):(?:\w+):(?:\w+)" Returns ------- Series Series containing tile IDs as strings """ if regex: split = series.str.extractall(regex).unstack().droplevel(1, axis=1) if split.shape[1] < 4: raise ValueError( f"Unable to convert tile names, does your readID have the tile information?\nHint: SRA removes tile information from readID.\nSample of your readIDs:\n{series.head()}" ) return split[0] + ":" + split[1] + ":" + split[2] else: try: split = [":".join(name.split(":")[2:5]) for name in series] except: raise ValueError( f"Unable to convert tile names, does your readID have the tile information?\nHint: SRA removes tile information from readID.\nSample of your readIDs:\n{series.head()}" ) return split def yaml2pandas(yaml_path): """Generate a pandas DataFrame with stats from a yaml file Formats the keys within each filter using the PairCounter.flatten() method, to achieve same naming as in non-yaml stats files. Parameters ---------- yaml_path : str Path to a yaml-formatted file with stats Returns ------- pd.DataFrame Dataframe with filter names in the index and stats in columns """ counter = PairCounter.from_yaml(open(yaml_path, "r")) stats = pd.concat( [ pd.DataFrame(counter.flatten(filter=filter), index=[filter]) for filter in counter.filters ] ) return stats pairtools-1.0.3/pytest.ini000066400000000000000000000003331452673171500155720ustar00rootroot00000000000000[pytest] addopts = --cov pairtools --cov-config .coveragerc --cov-report term-missing --cov-report html --cov-report xml filterwarnings = ignore::PendingDeprecationWarning testpaths = tests pairtools-1.0.3/readthedocs.yml000066400000000000000000000002021452673171500165440ustar00rootroot00000000000000 # .readthedocs.yml build: image: latest python: version: 3.8 pip_install: true requirements_file: requirements_doc.txt pairtools-1.0.3/requirements-dev.txt000066400000000000000000000000641452673171500176020ustar00rootroot00000000000000-r requirements.txt pytest pytest-flake8 pytest-cov pairtools-1.0.3/requirements.txt000066400000000000000000000001351452673171500170250ustar00rootroot00000000000000cython numpy>=1.10 click>=6.6 scipy>=1.7.0 pandas>=1.3.4 pysam>=0.15.0 pyyaml bioframe>=0.3.3pairtools-1.0.3/requirements_doc.txt000066400000000000000000000002331452673171500176510ustar00rootroot00000000000000Cython numpy nose scipy pandas pysam bioframe click>=7.0 git+https://github.com/golobor/sphinx-click ipython nbsphinx Sphinx sphinx_rtd_theme docutils>0.16pairtools-1.0.3/setup.py000066400000000000000000000077271452673171500152710ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import io import os import re import glob from setuptools import find_packages, setup from setuptools.extension import Extension try: from Cython.Distutils import build_ext as _build_ext from Cython.Build import cythonize HAVE_CYTHON = True except ImportError: from setuptools.command.build_ext import build_ext as _build_ext HAVE_CYTHON = False classifiers = """\ Development Status :: 4 - Beta Operating System :: OS Independent Programming Language :: Python Programming Language :: Python :: 3 Programming Language :: Python :: 3.7 Programming Language :: Python :: 3.8 Programming Language :: Python :: 3.9 Programming Language :: Python :: 3.10 """ def _read(*parts, **kwargs): filepath = os.path.join(os.path.dirname(__file__), *parts) encoding = kwargs.pop("encoding", "utf-8") with io.open(filepath, encoding=encoding) as fh: text = fh.read() return text def get_version(): version = re.search( r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', _read("pairtools", "__init__.py"), re.MULTILINE, ).group(1) return version long_description = _read("README.md") install_requires = [l for l in _read("requirements.txt").split("\n") if l] def get_ext_modules(): ext = ".pyx" if HAVE_CYTHON else ".c" src_files = glob.glob( os.path.join(os.path.dirname(__file__), "pairtools", "lib", "*" + ext) ) ext_modules = [] for src_file in src_files: name = "pairtools.lib." + os.path.splitext(os.path.basename(src_file))[0] if not "pysam" in name and not "regions" in name: ext_modules.append(Extension(name, [src_file])) elif "regions" in name: ext_modules.append( Extension( name, [src_file], language="c++", ) ) else: import pysam ext_modules.append( Extension( name, [src_file], extra_link_args=pysam.get_libraries(), include_dirs=pysam.get_include(), define_macros=pysam.get_defines(), #extra_objects=pysam.get_libraries(), ) ) if HAVE_CYTHON: # .pyx to .c ext_modules = cythonize(ext_modules) # , annotate=True return ext_modules class build_ext(_build_ext): # Extension module build configuration def finalize_options(self): _build_ext.finalize_options(self) # Fix to work with bootstrapped numpy installation # http://stackoverflow.com/a/21621689/579416 # Prevent numpy from thinking it is still in its setup process: __builtins__.__NUMPY_SETUP__ = False import numpy self.include_dirs.append(numpy.get_include()) def run(self): # Import numpy here, only when headers are needed import numpy # Add numpy headers to include_dirs self.include_dirs.append(numpy.get_include()) # Call original build_ext command _build_ext.run(self) setup( name="pairtools", author="Open2C", author_email="open.chromosome.collective@gmail.com", version=get_version(), license="MIT", description="CLI tools to process mapped Hi-C data", long_description=long_description, long_description_content_type="text/markdown", keywords=["genomics", "bioinformatics", "Hi-C", "contact"], url="https://github.com/open2c/pairtools", ext_modules=get_ext_modules(), cmdclass={"build_ext": build_ext}, zip_safe=False, classifiers=[s.strip() for s in classifiers.split("\n") if s], install_requires=install_requires, python_requires=">=3.7", entry_points={ "console_scripts": [ "pairtools = pairtools.cli:cli", #'pairsamtools = pairtools.cli:cli', ] }, packages=find_packages(), )pairtools-1.0.3/tests/000077500000000000000000000000001452673171500147045ustar00rootroot00000000000000pairtools-1.0.3/tests/data/000077500000000000000000000000001452673171500156155ustar00rootroot00000000000000pairtools-1.0.3/tests/data/mock.2.pairsam000066400000000000000000000021301452673171500202600ustar00rootroot00000000000000## pairs format v1.0.0 #shape: upper triangle #genome_assembly: unknown #samheader: @SQ SN:chr1 LN:100 #samheader: @SQ SN:chr2 LN:100 #samheader: @SQ SN:chr3 LN:100 #samheader: @PG ID:bwa PN:bwa VN:0.7.15-r1140 CL:bwa mem -SP /path/ucsc.hg19.fasta.gz /path/1.fastq.gz /path/2.fastq.gz #chromosomes: chr2 chr3 chr1 #columns: readID chrom1 pos1 chrom2 pos2 strand1 strand2 pair_type sam1 sam2 readid01 chr1 1 chr2 25 + + UU readid01129chr1160101Mchr2250CGFFXS:i:0Yt:Z:UU readid0165chr22560101Mchr110ATIIXS:i:0Yt:Z:UU readid02 chr1 1 chr1 40 + + UU readid02129chr1160101Mchr1400CGFFXS:i:0Yt:Z:UU readid0265chr14060101Mchr110ATIIXS:i:0Yt:Z:UU readid03 chr1 1 chr1 3 + + UR readid03129chr1160101Mchr130CGFFXS:i:0Yt:Z:UR readid0365chr1360101Mchr110ATIIXS:i:0Yt:Z:UR readid04 ! 0 chr1 3 - + NU readid04129chr1160101Mchr130CGFFXS:i:0Yt:Z:NU readid0465chr1360101Mchr110ATIIXS:i:0Yt:Z:NU readid05 ! 0 ! 0 - - NN readid05129chr1160101Mchr130CGFFXS:i:0Yt:Z:NN readid0565chr1360101Mchr110ATIIXS:i:0Yt:Z:NNpairtools-1.0.3/tests/data/mock.4dedup.pairsam000066400000000000000000000010731452673171500213110ustar00rootroot00000000000000## pairs format v1.0.0 #sorted: chr1-chr2-pos1-pos2 #shape: upper triangle #genome_assembly: unknown #chromosomes: chr1 chr2 #columns: readID chrom1 pos1 chrom2 pos2 strand1 strand2 pair_type sam1 sam2 readid1 ! 0 chr1 25 - + NU . . readid2 ! 0 chr1 25 - + NU . . readid3 chr1 1 chr1 20 + + UU . . readid4 chr1 1 chr1 20 + + UU . . readid5 chr1 1 chr1 25 + + UU . . readid6 chr1 1 chr1 27 + + UU . . readid7 chr1 1 chr1 28 + - UU . . readid8 chr1 1 chr1 28 + + UU . . readid9 chr1 1 chr1 50 + + UU . . readid10 chr1 2 chr1 21 + + UU . . readid11 chr1 1 chr2 25 + + UU . .pairtools-1.0.3/tests/data/mock.4filterbycov.pairs000066400000000000000000000013151452673171500222210ustar00rootroot00000000000000## pairs format v1.0.0 #shape: upper triangle #genome_assembly: unknown #samheader: @SQ SN:chr1 LN:100 #samheader: @SQ SN:chr2 LN:100 #samheader: @SQ SN:chr3 LN:100 #samheader: @PG ID:bwa PN:bwa VN:0.7.15-r1140 CL:bwa mem -SP /path/ucsc.hg19.fasta.gz /path/1.fastq.gz /path/2.fastq.gz #chromosomes: chr2 chr3 chr1 #chromsize: chr2 100 #chromsize: chr3 100 #chromsize: chr1 100 #columns: readID chrom1 pos1 chrom2 pos2 strand1 strand2 pair_type sam1 sam2 readid01 chr2 40 chr3 2 + + UU readid02 chr1 6 chr1 9 + + UR readid03 chr1 1 chr2 20 + + UU readid04 chr1 50 chr1 1 + + UU readid05 chr1 1 chr1 5 + + UU readid06 chr1 20 chr1 30 + + UR readid07 ! 0 chr1 3 - + NU readid08 ! 0 chr1 3 - + MU readid09 ! 0 ! 0 - - WW pairtools-1.0.3/tests/data/mock.4flip.pairs000066400000000000000000000016131452673171500206240ustar00rootroot00000000000000## pairs format v1.0.0 #shape: upper triangle #genome_assembly: unknown #samheader: @SQ SN:chr1 LN:10000 #samheader: @SQ SN:chr2 LN:10000 #samheader: @PG ID:bwa PN:bwa VN:0.7.15-r1140 CL:bwa mem -SP /path/ucsc.hg19.fasta.gz /path/1.fastq.gz /path/2.fastq.gz #chromosomes: chr1 chr2 #chromsize: chr1 10000 #chromsize: chr2 10000 #columns: readID chrom1 pos1 chrom2 pos2 strand1 strand2 pair_type sam1 sam2 readid01 chr1 1 chr1 2 + + UU readid02 chr1 1 chr2 2 + + UU readid03 chr1 2 chr1 1 + + UU readid04 chr1 21 chr1 2 + + UU readid05 chr2 2 chr1 1 + + UU readid06 chr2 1 chr1 2 + + UU readid07 chr1 2 chr1 1 - + UU readid08 chr1 2 chr1 1 + + RU readid09 ! 0 chr1 3 - + NU readid10 ! 0 chr1 3 - + MU readid11 ! 0 ! 0 + - WW readid12 chr1 1 chrU 1 + + UU readid13 chrU 1 chr1 1 + + UU readid14 chrU 100 chrU 1 + + UU readid15 chrU1 100 chrU 100 + + UU readid16 ! 0 chrU 100 + + NU readid17 chrU 0 ! 0 + + UN pairtools-1.0.3/tests/data/mock.4stats.pairs000066400000000000000000000013011452673171500210220ustar00rootroot00000000000000## pairs format v1.0.0 #shape: upper triangle #genome_assembly: unknown #samheader: @SQ SN:chr1 LN:100 #samheader: @SQ SN:chr2 LN:100 #samheader: @SQ SN:chr3 LN:100 #samheader: @PG ID:bwa PN:bwa VN:0.7.15-r1140 CL:bwa mem -SP /path/ucsc.hg19.fasta.gz /path/1.fastq.gz /path/2.fastq.gz #chromosomes: chr2 chr3 chr1 #chromsize: chr2 100 #chromsize: chr3 100 #chromsize: chr1 100 #columns: readID chrom1 pos1 chrom2 pos2 strand1 strand2 pair_type readid01 chr1 1 chr1 50 + + UU readid02 chr1 1 chr1 50 + + DD readid03 chr1 1 chr1 2 + + UU readid04 chr1 1 chr1 3 + + UR readid05 chr1 1 chr2 20 + + UU readid06 chr2 1 chr3 2 + + UU readid07 ! 0 chr1 3 - + NU readid08 ! 0 chr1 3 - + MU readid09 ! 0 ! 0 - - WW pairtools-1.0.3/tests/data/mock.chrom.sizes000066400000000000000000000000261452673171500207320ustar00rootroot00000000000000chr1 10000 chr2 10000 pairtools-1.0.3/tests/data/mock.pairsam000066400000000000000000000031101452673171500201170ustar00rootroot00000000000000## pairs format v1.0.0 #shape: upper triangle #genome_assembly: unknown #samheader: @SQ SN:chr1 LN:100 #samheader: @SQ SN:chr2 LN:100 #samheader: @SQ SN:chr3 LN:100 #samheader: @PG ID:bwa PN:bwa VN:0.7.15-r1140 CL:bwa mem -SP /path/ucsc.hg19.fasta.gz /path/1.fastq.gz /path/2.fastq.gz #chromosomes: chr2 chr3 chr1 #chromsize: chr2 100 #chromsize: chr3 100 #chromsize: chr1 100 #columns: readID chrom1 pos1 chrom2 pos2 strand1 strand2 pair_type sam1 sam2 readid01 chr1 1 chr2 20 + + UU readid01129chr1160101Mchr2200CGFFXS:i:0Yt:Z:UU readid0165chr22060101Mchr110ATIIXS:i:0Yt:Z:UU readid02 chr1 1 chr1 50 + + UU readid02129chr1160101Mchr1500CGFFXS:i:0Yt:Z:UU readid0265chr15060101Mchr110ATIIXS:i:0Yt:Z:UU readid03 chr1 1 chr1 2 + + UU readid03129chr1160101Mchr120CGFFXS:i:0Yt:Z:UU readid0365chr1260101Mchr110ATIIXS:i:0Yt:Z:UU readid04 chr1 1 chr1 3 + + UR readid04129chr1160101Mchr130CGFFXS:i:0Yt:Z:UR readid0465chr1360101Mchr110ATIIXS:i:0Yt:Z:UR readid05 chr2 1 chr3 2 + + UU readid05129chr2160101Mchr320CGFFXS:i:0Yt:Z:UU readid0565chr3260101Mchr210ATIIXS:i:0Yt:Z:UU readid06 ! 0 chr1 3 - + NU readid06129chr1160101Mchr130CGFFXS:i:0Yt:Z:NU readid0665chr1360101Mchr110ATIIXS:i:0Yt:Z:NU readid07 ! 0 chr1 3 - + MU readid07129chr1160101Mchr130CGFFXS:i:0Yt:Z:NU readid0765chr1360101Mchr110ATIIXS:i:0Yt:Z:NU readid08 ! 0 ! 0 - - WW readid08129chr1160101Mchr130CGFFXS:i:0Yt:Z:WW readid0865chr1360101Mchr110ATIIXS:i:0Yt:Z:WW pairtools-1.0.3/tests/data/mock.parse-all.sam000066400000000000000000000265061452673171500211400ustar00rootroot00000000000000@SQ SN:chr1 LN:10000 @SQ SN:chr2 LN:10000 @PG ID:mock PN:mock VN:0.0.0 CL:mock readid01 65 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1-2 readid01 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1-2 readid02 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1-2 readid02 145 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1-2 readid03 65 chr1 10 60 1S49M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1-2 readid03 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1-2 readid04 81 chr1 10 60 49M1S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,58,chr1,200,-,+,UU,1,R1-2 readid04 161 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,58,chr1,200,-,+,UU,1,R1-2 readid05 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,248,+,-,UU,1,R1-2 readid05 145 chr1 200 60 1S49M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,248,+,-,UU,1,R1-2 readid06 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,248,+,-,UU,1,R1-2 readid06 145 chr1 200 60 49M1S chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,248,+,-,UU,1,R1-2 readid07 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,247,+,-,UU,1,R1-2 readid07 145 chr1 200 60 1S48M1S chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,247,+,-,UU,1,R1-2 readid08 105 chr1 10 60 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,NU,1,R1-2 readid08 149 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,NU,1,R1-2 readid09 85 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,NU,1,R1-2 readid09 169 chr1 10 60 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,NU,1,R1-2 readid10 77 * 0 0 * * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NN,1,R1-2 readid10 141 * 0 0 * * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NN,1,R1-2 readid11 105 chr1 10 0 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM,1,R1-2 readid11 149 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM,1,R1-2 readid12 85 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM,1,R1-2 readid12 169 chr1 10 0 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM,1,R1-2 readid13 65 chr1 10 0 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,200,-,+,MU,1,R1-2 readid13 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,200,-,+,MU,1,R1-2 readid14 65 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,MU,1,R1-2 readid14 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,MU,1,R1-2 readid15 65 chr1 10 0 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,MM,1,R1-2 readid15 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,MM,1,R1-2 readid16 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1 readid16 2129 chr1 300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1 readid16 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1 readid17 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|chr1,200,chr1,5324,+,-,UU,2,R1-2 readid17 2129 chr1 5300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|chr1,200,chr1,5324,+,-,UU,2,R1-2 readid17 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|chr1,200,chr1,5324,+,-,UU,2,R1-2 readid18 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,300,+,25M25H,60,0; CT:Z:SIMULATED:chr1,10,chr1,324,+,-,UU,1,R1|chr1,200,chr1,300,+,+,UU,2,R1-2 readid18 2113 chr1 300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,10,chr1,324,+,-,UU,1,R1|chr1,200,chr1,300,+,+,UU,2,R1-2 readid18 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,300,+,+,UU,1,R1|chr1,200,chr1,300,+,+,UU,2,R1-2 readid19 81 chr1 300 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25H,60,0; CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1 readid19 2113 chr1 10 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr10,300,-,25M25S,60,0; CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1 readid19 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1 readid20 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,300,+,25M25H,60,0; CT:Z:SIMULATED:chr1,10,chr1,324,+,-,UU,1,R1|chr1,300,chr1,2000,+,+,UU,2,R1-2|chr1,200,chr1,2024,+,-,UU,3,R2 readid20 2113 chr1 300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,10,chr1,324,+,-,UU,1,R1|chr1,300,chr1,2000,+,+,UU,2,R1-2|chr1,200,chr1,2024,+,-,UU,3,R2 readid20 129 chr1 200 60 25M25S chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,2000,+,25S25M,60,0; CT:Z:SIMULATED:chr1,10,chr1,324,+,-,UU,1,R1|chr1,300,chr1,2000,+,+,UU,2,R1-2|chr1,200,chr1,2024,+,-,UU,3,R2 readid20 2177 chr1 2000 60 25S25M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,2000,+,25S25M,60,0; CT:Z:SIMULATED:chr1,10,chr1,324,+,-,UU,1,R1|chr1,300,chr1,2000,+,+,UU,2,R1-2|chr1,200,chr1,2024,+,-,UU,3,R2 readid21 105 chr1 10 60 25M25S * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|!,0,chr1,5324,-,-,NU,2,R1-2 readid21 2169 chr1 5300 60 25M25H * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|!,0,chr1,5324,-,-,NU,2,R1-2 readid21 141 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|!,0,chr1,5324,-,-,NU,2,R1-2 readid22 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|!,0,chr1,5324,-,-,MU,2,R1-2 readid22 2129 chr1 5300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|!,0,chr1,5324,-,-,MU,2,R1-2 readid22 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|!,0,chr1,5324,-,-,MU,2,R1-2 readid23 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,XX,1,R1-2 readid24 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|!,0,chr1,5324,-,-,NU,2,R1-2 readid24 2129 chr1 5300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|!,0,chr1,5324,-,-,NU,2,R1-2 pairtools-1.0.3/tests/data/mock.parse2.sam000066400000000000000000000265061452673171500204540ustar00rootroot00000000000000@SQ SN:chr1 LN:10000 @SQ SN:chr2 LN:10000 @PG ID:mock PN:mock VN:0.0.0 CL:mock readid01 65 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1-2 readid01 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1-2 readid02 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1-2 readid02 145 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1-2 readid03 65 chr1 10 60 1S49M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1-2 readid03 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1-2 readid04 81 chr1 10 60 49M1S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,58,chr1,200,-,+,UU,1,R1-2 readid04 161 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,58,chr1,200,-,+,UU,1,R1-2 readid05 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,248,+,-,UU,1,R1-2 readid05 145 chr1 200 60 1S49M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,248,+,-,UU,1,R1-2 readid06 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,248,+,-,UU,1,R1-2 readid06 145 chr1 200 60 49M1S chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,248,+,-,UU,1,R1-2 readid07 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,247,+,-,UU,1,R1-2 readid07 145 chr1 200 60 1S48M1S chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,247,+,-,UU,1,R1-2 readid08 105 chr1 10 60 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,NU,1,R1-2 readid08 149 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,NU,1,R1-2 readid09 85 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,NU,1,R1-2 readid09 169 chr1 10 60 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,NU,1,R1-2 readid10 77 * 0 0 * * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NN,1,R1-2 readid10 141 * 0 0 * * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NN,1,R1-2 readid11 105 chr1 10 0 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM,1,R1-2 readid11 149 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM,1,R1-2 readid12 85 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM,1,R1-2 readid12 169 chr1 10 0 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM,1,R1-2 readid13 65 chr1 10 0 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,200,-,+,MU,1,R1-2 readid13 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,200,-,+,MU,1,R1-2 readid14 65 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,MU,1,R1-2 readid14 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,MU,1,R1-2 readid15 65 chr1 10 0 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,MM,1,R1-2 readid15 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,MM,1,R1-2 readid16 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,34,chr1,324,+,+,UU,1,R1 readid16 2129 chr1 300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,34,chr1,324,+,+,UU,1,R1 readid16 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,34,chr1,324,+,+,UU,1,R1 readid17 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,34,chr1,5324,+,+,UU,1,R1|chr1,249,chr1,5300,+,-,UU,2,R1-2 readid17 2129 chr1 5300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,34,chr1,5324,+,+,UU,1,R1|chr1,249,chr1,5300,+,-,UU,2,R1-2 readid17 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,34,chr1,5324,+,+,UU,1,R1|chr1,249,chr1,5300,+,-,UU,2,R1-2 readid18 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,300,+,25M25H,60,0; CT:Z:SIMULATED:chr1,34,chr1,300,+,-,UU,1,R1|chr1,249,chr1,324,+,+,UU,2,R1-2 readid18 2113 chr1 300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,34,chr1,300,+,-,UU,1,R1|chr1,249,chr1,324,+,+,UU,2,R1-2 readid18 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,34,chr1,300,+,-,UU,1,R1|chr1,249,chr1,324,+,+,UU,2,R1-2 readid19 81 chr1 300 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25H,60,0; CT:Z:SIMULATED:chr1,34,chr1,324,+,+,UU,1,R1 readid19 2113 chr1 10 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr10,300,-,25M25S,60,0; CT:Z:SIMULATED:chr1,34,chr1,324,+,+,UU,1,R1 readid19 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,34,chr1,324,+,+,UU,1,R1 readid20 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,300,+,25M25H,60,0; CT:Z:SIMULATED:chr1,34,chr1,300,+,-,UU,1,R1|chr1,324,chr1,2024,+,+,UU,2,R1-2|chr1,224,chr1,2000,+,-,UU,3,R2 readid20 2113 chr1 300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,34,chr1,300,+,-,UU,1,R1|chr1,324,chr1,2024,+,+,UU,2,R1-2|chr1,224,chr1,2000,+,-,UU,3,R2 readid20 129 chr1 200 60 25M25S chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,2000,+,25S25M,60,0; CT:Z:SIMULATED:chr1,34,chr1,300,+,-,UU,1,R1|chr1,324,chr1,2024,+,+,UU,2,R1-2|chr1,224,chr1,2000,+,-,UU,3,R2 readid20 2177 chr1 2000 60 25S25M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,2000,+,25S25M,60,0; CT:Z:SIMULATED:chr1,34,chr1,300,+,-,UU,1,R1|chr1,324,chr1,2024,+,+,UU,2,R1-2|chr1,224,chr1,2000,+,-,UU,3,R2 readid21 105 chr1 10 60 25M25S * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,34,chr1,5324,+,+,UU,1,R1|!,0,chr1,5300,-,-,NU,2,R1-2 readid21 2169 chr1 5300 60 25M25H * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,34,chr1,5324,+,+,UU,1,R1|!,0,chr1,5300,-,-,NU,2,R1-2 readid21 141 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,34,chr1,5324,+,+,UU,1,R1|!,0,chr1,5300,-,-,NU,2,R1-2 readid22 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,34,chr1,5324,+,+,UU,1,R1|!,0,chr1,5300,-,-,MU,2,R1-2 readid22 2129 chr1 5300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,34,chr1,5324,+,-,UU,1,R1|!,0,chr1,5300,-,-,MU,2,R1-2 readid22 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,34,chr1,5324,+,-,UU,1,R1|!,0,chr1,5300,-,-,MU,2,R1-2 readid23 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,XX,1,R1-2 readid24 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,34,chr1,5324,+,+,UU,1,R1|!,0,chr1,5300,-,-,NU,2,R1-2 readid24 2129 chr1 5300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 CT:Z:SIMULATED:chr1,34,chr1,5324,+,+,UU,1,R1|!,0,chr1,5300,-,-,NU,2,R1-2 pairtools-1.0.3/tests/data/mock.rsites.bed000066400000000000000000000001011452673171500205220ustar00rootroot00000000000000chr1 0 100 chr1 100 500 chr1 500 10000 chr2 0 200 chr2 200 10000 pairtools-1.0.3/tests/data/mock.sam000066400000000000000000000235141452673171500172550ustar00rootroot00000000000000@SQ SN:chr1 LN:1000 @SQ SN:chr2 LN:1000 @PG ID:mock PN:mock VN:0.0.0 CL:mock readid01 65 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU readid01 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU readid02 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU readid02 145 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU readid03 65 chr1 10 60 1S49M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU readid03 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU readid04 81 chr1 10 60 49M1S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,58,chr1,200,-,+,UU readid04 161 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,58,chr1,200,-,+,UU readid05 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,248,+,-,UU readid05 145 chr1 200 60 1S49M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,248,+,-,UU readid06 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,248,+,-,UU readid06 145 chr1 200 60 49M1S chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,248,+,-,UU readid07 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,247,+,-,UU readid07 145 chr1 200 60 1S48M1S chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,247,+,-,UU readid08 105 chr1 10 60 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,NU readid08 149 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,NU readid09 85 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,NU readid09 169 chr1 10 60 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,NU readid10 77 * 0 0 * * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NN readid10 141 * 0 0 * * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NN readid11 105 chr1 10 0 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM readid11 149 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM readid12 85 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM readid12 169 chr1 10 0 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM readid13 65 chr1 10 0 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,200,-,+,MU readid13 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,200,-,+,MU readid14 65 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,MU readid14 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,MU readid15 65 chr1 10 0 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,MM readid15 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,MM readid16 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UR readid16 2129 chr1 300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UR readid16 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UR readid17 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid17 2129 chr1 5300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid17 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,WW readid18 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,300,+,25M25H,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid18 2113 chr1 300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid18 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,WW readid19 81 chr1 300 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25H,60,0; CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UR readid19 2113 chr1 10 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr10,300,-,25M25S,60,0; CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UR readid19 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UR readid20 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,300,+,25M25H,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid20 2113 chr1 300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid20 129 chr1 200 60 25M25S chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,2000,+,25S25M,60,0; CT:Z:SIMULATED:!,10,!,0,-,-,WW readid20 2177 chr1 2000 60 25S25M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,2000,+,25S25M,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid21 105 chr1 10 60 25M25S * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid21 2169 chr1 5300 60 25M25H * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid21 141 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,WW readid22 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid22 2129 chr1 5300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid22 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,WW readid23 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,XX pairtools-1.0.3/tests/data/mock.test-restr.pairs000066400000000000000000000013161452673171500217220ustar00rootroot00000000000000## pairs format v1.0.0 #shape: upper triangle #genome_assembly: unknown #samheader: @SQ SN:chr1 LN:10000 #samheader: @SQ SN:chr2 LN:10000 #samheader: @PG ID:bwa PN:bwa VN:0.7.15-r1140 CL:bwa mem -SP /path/ucsc.hg19.fasta.gz /path/1.fastq.gz /path/2.fastq.gz #chromosomes: chr1 chr2 #chromsize: chr1 10000 #chromsize: chr2 10000 #columns: readID chrom1 pos1 chrom2 pos2 strand1 strand2 pair_type rfrag_test1 rfrag_test2 readid01 chr1 1 chr2 20 + + UU 0 0 readid02 chr1 100 chr2 20 - + UU 0 0 readid03 chr1 100 chr2 20 + + UU 0 0 readid04 chr1 499 chr2 20 + + UU 1 0 readid05 chr1 600 chr2 20 + + UU 2 0 readid06 chr1 1 chr2 200 + + UU 0 0 readid07 chr1 1 chr2 500 + + UU 0 1 readid08 chr1 10001 chr2 10001 + + UU 2 1 pairtools-1.0.3/tests/test_dedup.py000066400000000000000000000114631452673171500174230ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import subprocess import pytest import tempfile testdir = os.path.dirname(os.path.realpath(__file__)) tmpdir = tempfile.TemporaryDirectory() tmpdir_name = tmpdir.name mock_pairsam_path_dedup = os.path.join(testdir, "data", "mock.4dedup.pairsam") dedup_path = os.path.join(tmpdir_name, "dedup.pairsam") unmapped_path = os.path.join(tmpdir_name, "unmapped.pairsam") dups_path = os.path.join(tmpdir_name, "dups.pairsam") dedup_max_path = os.path.join(tmpdir_name, "dedup_max.pairsam") unmapped_max_path = os.path.join(tmpdir_name, "unmapped_max.pairsam") dups_max_path = os.path.join(tmpdir_name, "dups_max.pairsam") dedup_markdups_path = os.path.join(tmpdir_name, "dedup.markdups.pairsam") unmapped_markdups_path = os.path.join(tmpdir_name, "unmapped.markdups.pairsam") dups_markdups_path = os.path.join(tmpdir_name, "dups.markdups.pairsam") max_mismatch = 1 @pytest.fixture def setup_dedup(): try: subprocess.check_output( [ "python", "-m", "pairtools", "dedup", mock_pairsam_path_dedup, "--output", dedup_path, "--output-dups", dups_path, "--output-unmapped", unmapped_path, "--max-mismatch", str(max_mismatch), ], ) subprocess.check_output( [ "python", "-m", "pairtools", "dedup", mock_pairsam_path_dedup, "--output", dedup_max_path, "--output-dups", dups_max_path, "--output-unmapped", unmapped_max_path, "--max-mismatch", str(max_mismatch), "--method", "max", ], ) subprocess.check_output( [ "python", "-m", "pairtools", "dedup", mock_pairsam_path_dedup, "--mark-dups", "--output", dedup_markdups_path, "--output-dups", dups_markdups_path, "--output-unmapped", unmapped_markdups_path, "--max-mismatch", str(max_mismatch), ], ) except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e def test_mock_pairsam(setup_dedup): pairsam_pairs = [ l.strip().split("\t") for l in open(mock_pairsam_path_dedup, "r") if not l.startswith("#") and l.strip() ] for (ddp, up, dp) in [ (dedup_path, unmapped_path, dups_path), (dedup_max_path, unmapped_max_path, dups_max_path), (dedup_markdups_path, unmapped_markdups_path, dups_markdups_path), ]: dedup_pairs = [ l.strip().split("\t") for l in open(ddp, "r") if not l.startswith("#") and l.strip() ] unmapped_pairs = [ l.strip().split("\t") for l in open(up, "r") if not l.startswith("#") and l.strip() ] dup_pairs = [ l.strip().split("\t") for l in open(dp, "r") if not l.startswith("#") and l.strip() ] # check that at least a few pairs remained in deduped and dup files assert len(dedup_pairs) > 0 assert len(dup_pairs) > 0 assert len(unmapped_pairs) > 0 import pandas as pd # check that all pairsam entries survived deduping: assert len(dedup_pairs) + len(unmapped_pairs) + len(dup_pairs) == len( pairsam_pairs ) def pairs_overlap(pair1, pair2, max_mismatch): overlap = ( (pair1[1] == pair2[1]) and (pair1[3] == pair2[3]) and (pair1[5] == pair2[5]) and (pair1[6] == pair2[6]) and (abs(int(pair1[2]) - int(pair2[2])) <= max_mismatch) and (abs(int(pair1[4]) - int(pair2[4])) <= max_mismatch) ) return overlap # check that deduped pairs do not overlap assert all( [ not pairs_overlap(pair1, pair2, max_mismatch) for i, pair1 in enumerate(dedup_pairs) for j, pair2 in enumerate(dedup_pairs) if i != j ] ) # check that the removed duplicates overlap with at least one of the # deduplicated entries assert all( [ any([pairs_overlap(pair1, pair2, 3) for pair2 in dedup_pairs]) for pair1 in dup_pairs ] ) tmpdir.cleanup() pairtools-1.0.3/tests/test_filterbycov.py000066400000000000000000000102041452673171500206420ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import subprocess import pytest import tempfile testdir = os.path.dirname(os.path.realpath(__file__)) mock_pairs_path_filterbycov = os.path.join(testdir, "data", "mock.4filterbycov.pairs") tmpdir = tempfile.TemporaryDirectory() tmpdir_name = tmpdir.name params = [ {"max_dist": 0, "max_cov": 3}, {"max_dist": 0, "max_cov": 2}, {"max_dist": 1, "max_cov": 1}, ] for p in params: p["lowcov_path"] = os.path.join( tmpdir_name, "lowcov.{}.{}.pairs".format(p["max_dist"], p["max_cov"]) ) p["highcov_path"] = os.path.join( tmpdir_name, "highcov.{}.{}.pairs".format(p["max_dist"], p["max_cov"]) ) p["unmapped_path"] = os.path.join( tmpdir_name, "unmapped.{}.{}.pairs".format(p["max_dist"], p["max_cov"]) ) @pytest.fixture def setup_filterbycov(): try: for p in params: subprocess.check_output( [ "python", "-m", "pairtools", "filterbycov", mock_pairs_path_filterbycov, "--output", p["lowcov_path"], "--output-highcov", p["highcov_path"], "--output-unmapped", p["unmapped_path"], "--max-dist", str(p["max_dist"]), "--max-cov", str(p["max_cov"]), ] ) except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e def test_mock_pairs(setup_filterbycov): all_pairs = [ l.strip().split("\t") for l in open(mock_pairs_path_filterbycov, "r") if not l.startswith("#") and l.strip() ] for p in params: lowcov_pairs = [ l.strip().split("\t") for l in open(p["lowcov_path"], "r") if not l.startswith("#") and l.strip() ] highcov_pairs = [ l.strip().split("\t") for l in open(p["highcov_path"], "r") if not l.startswith("#") and l.strip() ] unmapped_pairs = [ l.strip().split("\t") for l in open(p["unmapped_path"], "r") if not l.startswith("#") and l.strip() ] # check that at least a few pairs remained in deduped and dup files # assert len(lowcov_pairs) > 0 # assert len(highcov_pairs) > 0 # assert len(unmapped_pairs) > 0 # check that all pairs entries survived deduping: assert len(lowcov_pairs) + len(unmapped_pairs) + len(highcov_pairs) == len( all_pairs ) assert all([(pair[1] != "!" and pair[3] != "!") for pair in lowcov_pairs]) assert all([(pair[1] != "!" and pair[3] != "!") for pair in highcov_pairs]) assert all([(pair[1] == "!" or pair[3] == "!") for pair in unmapped_pairs]) def update_coverage(coverage, chrom, pos, max_dist): if chrom == "!": return coverage[chrom] = coverage.get(chrom, {}) for i in range(max(0, pos - max_dist), pos + max_dist + 1): coverage[chrom][i] = coverage[chrom].get(i, 0) + 1 coverage = {} for pair in all_pairs: update_coverage(coverage, pair[1], int(pair[2]), p["max_dist"]) update_coverage(coverage, pair[3], int(pair[4]), p["max_dist"]) for pair in lowcov_pairs: # print (p['max_cov'],p['max_dist']) # print (pair, coverage[pair[1]][int(pair[2])]) # print (pair, coverage[pair[3]][int(pair[4])]) assert coverage[pair[1]][int(pair[2])] <= p["max_cov"] assert coverage[pair[3]][int(pair[4])] <= p["max_cov"] for pair in highcov_pairs: # print (p['max_cov'],p['max_dist']) # print (pair, coverage[pair[1]][int(pair[2])]) # print (pair, coverage[pair[3]][int(pair[4])]) assert (coverage[pair[1]][int(pair[2])] > p["max_cov"]) or ( coverage[pair[3]][int(pair[4])] > p["max_cov"] ) tmpdir.cleanup() pairtools-1.0.3/tests/test_flip.py000066400000000000000000000036751452673171500172620ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import subprocess import pytest testdir = os.path.dirname(os.path.realpath(__file__)) mock_pairs_path = os.path.join(testdir, "data", "mock.4flip.pairs") mock_chromsizes_path = os.path.join(testdir, "data", "mock.chrom.sizes") def test_flip(): try: result = subprocess.check_output( [ "python", "-m", "pairtools", "flip", mock_pairs_path, "-c", mock_chromsizes_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e orig_pairs = [ l.strip().split("\t") for l in open(mock_pairs_path, "r") if not l.startswith("#") and l.strip() ] flipped_pairs = [ l.strip().split("\t") for l in result.split("\n") if not l.startswith("#") and l.strip() ] chrom_enum = {"!": 0, "chr1": 1, "chr2": 2, "chrU": 3, "chrU1": 4} # chrU stands for unannotated chromosome, which has less priority than annotated ones # chrU1 is another unannotated chromosome, which should go lexigographically after chrU for orig_pair, flipped_pair in zip(orig_pairs, flipped_pairs): has_correct_order = (chrom_enum[orig_pair[1]], int(orig_pair[2])) <= ( chrom_enum[orig_pair[3]], int(orig_pair[4]), ) if has_correct_order: assert all([c1 == c2 for c1, c2 in zip(orig_pair, flipped_pair)]) if not has_correct_order: assert orig_pair[1] == flipped_pair[3] assert orig_pair[2] == flipped_pair[4] assert orig_pair[3] == flipped_pair[1] assert orig_pair[4] == flipped_pair[2] assert orig_pair[5] == flipped_pair[6] assert orig_pair[6] == flipped_pair[5] assert orig_pair[7] == flipped_pair[7][::-1] pairtools-1.0.3/tests/test_header.py000066400000000000000000000026071452673171500175520ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import pytest import subprocess testdir = os.path.dirname(os.path.realpath(__file__)) def test_generate(): """Test generation of the header. Example run: pairtools header generate tests/data/mock.pairsam \ --chroms-path tests/data/mock.chrom.sizes --pairsam \ --sam-path tests/data/mock.sam """ mock_sam_path = os.path.join(testdir, "data", "mock.sam") mock_pairs_path = os.path.join(testdir, "data", "mock.pairsam") mock_chroms_path = os.path.join(testdir, "data", "mock.chrom.sizes") try: result = subprocess.check_output( [ "python", "-m", "pairtools", "header", "generate", "--chroms-path", mock_chroms_path, "--sam-path", mock_sam_path, "--pairsam", mock_pairs_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # check if the header got transferred correctly sam_header = [l.strip() for l in open(mock_sam_path, "r") if l.startswith("@")] pairsam_header = [l.strip() for l in result.split("\n") if l.startswith("#")] for l in sam_header: assert any([l in l2 for l2 in pairsam_header]) pairtools-1.0.3/tests/test_headerops.py000066400000000000000000000105051452673171500202700ustar00rootroot00000000000000# -*- coding: utf-8 -*- from pairtools.lib import headerops import pytest def test_make_standard_header(): header = headerops.make_standard_pairsheader() assert any([l.startswith("## pairs format") for l in header]) assert any([l.startswith("#shape") for l in header]) assert any([l.startswith("#columns") for l in header]) header = headerops.make_standard_pairsheader( chromsizes=[("b", 100), ("c", 100), ("a", 100)] ) assert sum([l.startswith("#chromsize") for l in header]) == 3 def test_samheaderops(): header = headerops.make_standard_pairsheader() samheader = [ "@SQ\tSN:chr1\tLN:100", "@SQ\tSN:chr2\tLN:100", "@SQ\tSN:chr3\tLN:100", "@PG\tID:bwa\tPN:bwa\tCL:bwa", "@PG\tID:bwa-2\tPN:bwa\tCL:bwa\tPP:bwa", ] header_with_sam = headerops.insert_samheader(header, samheader) assert len(header_with_sam) == len(header) + len(samheader) for l in samheader: assert any([l2.startswith("#samheader") and l in l2 for l2 in header_with_sam]) # test adding new programs to the PG chain header_extra_pg = headerops.append_new_pg(header_with_sam, ID="test", PN="test") # test if all lines got transferred assert all([(old_l in header_extra_pg) for old_l in header_with_sam]) # test if one PG got added assert len(header_extra_pg) == len(header_with_sam) + 1 # test if the new PG has PP matching the ID of one of already existing PGs new_l = [l for l in header_extra_pg if l not in header_with_sam][0] pp = [f[3:] for f in new_l.split("\t") if f.startswith("PP:")][0] assert ( len( [ l for l in header_extra_pg if l.startswith("#samheader") and ("\tID:{}\t".format(pp) in l) ] ) == 1 ) def test_merge_pairheaders(): headers = [["## pairs format v1.0"], ["## pairs format v1.0"]] merged_header = headerops._merge_pairheaders(headers) assert merged_header == headers[0] headers = [["## pairs format v1.0", "#a"], ["## pairs format v1.0", "#b"]] merged_header = headerops._merge_pairheaders(headers) assert merged_header == ["## pairs format v1.0", "#a", "#b"] headers = [ ["## pairs format v1.0", "#chromsize: chr1 100", "#chromsize: chr2 200"], ["## pairs format v1.0", "#chromsize: chr1 100", "#chromsize: chr2 200"], ] merged_header = headerops._merge_pairheaders(headers) assert merged_header == headers[0] def test_merge_different_pairheaders(): with pytest.raises(Exception): headers = [["## pairs format v1.0"], ["## pairs format v1.1"]] merged_header = headerops._merge_pairheaders(headers) def test_force_merge_pairheaders(): headers = [ ["## pairs format v1.0", "#chromsize: chr1 100"], ["## pairs format v1.0", "#chromsize: chr2 200"], ] merged_header = headerops._merge_pairheaders(headers, force=True) assert merged_header == [ "## pairs format v1.0", "#chromsize: chr1 100", "#chromsize: chr2 200", ] def test_merge_samheaders(): headers = [ ["@HD\tVN:1"], ["@HD\tVN:1"], ] merged_header = headerops._merge_samheaders(headers) assert merged_header == headers[0] headers = [ [ "@HD\tVN:1", "@SQ\tSN:chr1\tLN:100", "@SQ\tSN:chr2\tLN:100", ], [ "@HD\tVN:1", "@SQ\tSN:chr1\tLN:100", "@SQ\tSN:chr2\tLN:100", ], ] merged_header = headerops._merge_samheaders(headers) assert merged_header == headers[0] headers = [ [ "@HD\tVN:1", "@PG\tID:bwa\tPN:bwa\tPP:cat", ], [ "@HD\tVN:1", "@PG\tID:bwa\tPN:bwa\tPP:cat", ], ] merged_header = headerops._merge_samheaders(headers) print(merged_header) assert merged_header == [ "@HD\tVN:1", "@PG\tID:bwa-1\tPN:bwa\tPP:cat-1", "@PG\tID:bwa-2\tPN:bwa\tPP:cat-2", ] def test_merge_headers(): headers = [ [ "## pairs format v1.0", "#samheader: @HD\tVN:1", "#samheader: @SQ\tSN:chr1\tLN:100", "#samheader: @SQ\tSN:chr2\tLN:100", ] ] * 2 merged_header = headerops.merge_headers(headers) assert merged_header == headers[0] pairtools-1.0.3/tests/test_markasdup.py000066400000000000000000000017111452673171500203040ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import subprocess import pytest testdir = os.path.dirname(os.path.realpath(__file__)) def test_mock_pairsam(): mock_pairsam_path = os.path.join(testdir, "data", "mock.pairsam") try: result = subprocess.check_output( ["python", "-m", "pairtools", "markasdup", mock_pairsam_path], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e pairsam_body = [ l.strip() for l in open(mock_pairsam_path, "r") if not l.startswith("#") and l.strip() ] output_body = [ l.strip() for l in result.split("\n") if not l.startswith("#") and l.strip() ] # check that all pairsam entries survived sorting: assert len(pairsam_body) == len(output_body) # check that all pairtypes got changed to DD for l in output_body: assert l.split("\t")[7] == "DD" pairtools-1.0.3/tests/test_merge.py000066400000000000000000000072551452673171500174250ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import subprocess import pytest import tempfile testdir = os.path.dirname(os.path.realpath(__file__)) tmpdir = tempfile.TemporaryDirectory() tmpdir_name = tmpdir.name mock_pairsam_path_1 = os.path.join(testdir, "data", "mock.pairsam") mock_pairsam_path_2 = os.path.join(testdir, "data", "mock.2.pairsam") mock_sorted_pairsam_path_1 = os.path.join(tmpdir_name, "1.pairsam") mock_sorted_pairsam_path_2 = os.path.join(tmpdir_name, "2.pairsam") @pytest.fixture def setup_sort_two(): try: subprocess.check_output( [ "python", "-m", "pairtools", "sort", mock_pairsam_path_1, "--output", mock_sorted_pairsam_path_1, ], ) subprocess.check_output( [ "python", "-m", "pairtools", "sort", mock_pairsam_path_2, "--output", mock_sorted_pairsam_path_2, ], ) except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e def test_mock_pairsam(setup_sort_two): try: result = subprocess.check_output( [ "python", "-m", "pairtools", "merge", mock_sorted_pairsam_path_1, mock_sorted_pairsam_path_2, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # check that all pairsam entries survived sorting: pairsam_body_1 = [ l.strip() for l in open(mock_pairsam_path_1, "r") if not l.startswith("#") and l.strip() ] pairsam_body_2 = [ l.strip() for l in open(mock_pairsam_path_2, "r") if not l.startswith("#") and l.strip() ] output_body = [ l.strip() for l in result.split("\n") if not l.startswith("#") and l.strip() ] assert len(pairsam_body_1) + len(pairsam_body_2) == len(output_body) # check the sorting order of the output: prev_pair = None for l in output_body: cur_pair = l.split("\t")[1:8] if prev_pair is not None: assert cur_pair[0] >= prev_pair[0] if cur_pair[0] == prev_pair[0]: assert cur_pair[1] >= prev_pair[1] if cur_pair[1] == prev_pair[1]: assert cur_pair[2] >= prev_pair[2] if cur_pair[2] == prev_pair[2]: assert cur_pair[3] >= prev_pair[3] prev_pair = cur_pair # Check that the header is preserved: try: result = subprocess.check_output( [ "python", "-m", "pairtools", "merge", "--keep-first-header", mock_sorted_pairsam_path_1, mock_sorted_pairsam_path_2, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # check the headers: pairsam_header_1 = [ l.strip() for l in open(mock_sorted_pairsam_path_1, "r") if l.startswith("#") and l.strip() ] pairsam_header_2 = [ l.strip() for l in open(mock_sorted_pairsam_path_2, "r") if l.startswith("#") and l.strip() ] output_header = [ l.strip() for l in result.split("\n") if l.startswith("#") and l.strip() ] assert len(pairsam_header_1) + 1 == len(output_header) tmpdir.cleanup() pairtools-1.0.3/tests/test_parse.py000066400000000000000000000065201452673171500174320ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import pytest import subprocess testdir = os.path.dirname(os.path.realpath(__file__)) def test_python_version(): assert sys.version_info[0] == 3, "Use Python 3!" def test_mock_pysam(): """Parse non-chimeric alignments with walks-policy mask with pysam backend.""" mock_sam_path = os.path.join(testdir, "data", "mock.sam") mock_chroms_path = os.path.join(testdir, "data", "mock.chrom.sizes") try: result = subprocess.check_output( [ "python", "-m", "pairtools", "parse", "--walks-policy", "mask", "-c", mock_chroms_path, mock_sam_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # check if the header got transferred correctly sam_header = [l.strip() for l in open(mock_sam_path, "r") if l.startswith("@")] pairsam_header = [l.strip() for l in result.split("\n") if l.startswith("#")] for l in sam_header: assert any([l in l2 for l2 in pairsam_header]) # check that the pairs got assigned properly for l in result.split("\n"): if l.startswith("#") or not l: continue print(l) assigned_pair = l.split("\t")[1:8] simulated_pair = l.split("CT:Z:SIMULATED:", 1)[1].split("\031", 1)[0].split(",") print(assigned_pair) print(simulated_pair) print() assert assigned_pair == simulated_pair def test_mock_pysam_parse_all(): """Parse all alignment in each read with walks-policy all and pysam backend.""" mock_sam_path = os.path.join(testdir, "data", "mock.parse-all.sam") mock_chroms_path = os.path.join(testdir, "data", "mock.chrom.sizes") try: result = subprocess.check_output( [ "python", "-m", "pairtools", "parse", "--walks-policy", "all", "-c", mock_chroms_path, "--add-pair-index", mock_sam_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # check if the header got transferred correctly sam_header = [l.strip() for l in open(mock_sam_path, "r") if l.startswith("@")] pairsam_header = [l.strip() for l in result.split("\n") if l.startswith("#")] for l in sam_header: assert any([l in l2 for l2 in pairsam_header]) # check that the pairs got assigned properly id_counter = 0 prev_id = "" for l in result.split("\n"): if l.startswith("#") or not l: continue if prev_id == l.split("\t")[0]: id_counter += 1 else: id_counter = 0 prev_id = l.split("\t")[0] assigned_pair = l.split("\t")[1:8] + l.split("\t")[-2:] simulated_pair = ( l.split("CT:Z:SIMULATED:", 1)[1] .split("\031", 1)[0] .split("|")[id_counter] .split(",") ) print(assigned_pair) print(simulated_pair, prev_id) print() assert assigned_pair == simulated_pair pairtools-1.0.3/tests/test_parse2.py000066400000000000000000000072661452673171500175240ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import pytest import subprocess testdir = os.path.dirname(os.path.realpath(__file__)) def test_mock_pysam_parse2_read(): mock_sam_path = os.path.join(testdir, "data", "mock.parse2.sam") mock_chroms_path = os.path.join(testdir, "data", "mock.chrom.sizes") try: result = subprocess.check_output( [ "python", "-m", "pairtools", "parse2", "-c", mock_chroms_path, "--add-pair-index", "--flip", "--report-position", "junction", "--report-orientation", "pair", mock_sam_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # check if the header got transferred correctly sam_header = [l.strip() for l in open(mock_sam_path, "r") if l.startswith("@")] pairsam_header = [l.strip() for l in result.split("\n") if l.startswith("#")] for l in sam_header: assert any([l in l2 for l2 in pairsam_header]) # check that the pairs got assigned properly id_counter = 0 prev_id = "" for l in result.split("\n"): if l.startswith("#") or not l: continue if prev_id == l.split("\t")[0]: id_counter += 1 else: id_counter = 0 prev_id = l.split("\t")[0] assigned_pair = l.split("\t")[1:8] + l.split("\t")[-2:] print(l.split("SIMULATED:", 1)[1].split("\031", 1)[0].split("|"), id_counter) simulated_pair = ( l.split("SIMULATED:", 1)[1] .split("\031", 1)[0] .split("|")[id_counter] .split(",") ) print(assigned_pair) print(simulated_pair, prev_id) print() assert assigned_pair == simulated_pair def test_mock_pysam_parse2_pair(): mock_sam_path = os.path.join(testdir, "data", "mock.parse-all.sam") mock_chroms_path = os.path.join(testdir, "data", "mock.chrom.sizes") try: result = subprocess.check_output( [ "python", "-m", "pairtools", "parse2", "-c", mock_chroms_path, "--add-pair-index", "--flip", "--report-position", "outer", "--report-orientation", "pair", mock_sam_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # check if the header got transferred correctly sam_header = [l.strip() for l in open(mock_sam_path, "r") if l.startswith("@")] pairsam_header = [l.strip() for l in result.split("\n") if l.startswith("#")] for l in sam_header: assert any([l in l2 for l2 in pairsam_header]) # check that the pairs got assigned properly id_counter = 0 prev_id = "" for l in result.split("\n"): if l.startswith("#") or not l: continue if prev_id == l.split("\t")[0]: id_counter += 1 else: id_counter = 0 prev_id = l.split("\t")[0] assigned_pair = l.split("\t")[1:8] + l.split("\t")[-2:] simulated_pair = ( l.split("SIMULATED:", 1)[1] .split("\031", 1)[0] .split("|")[id_counter] .split(",") ) print(assigned_pair) print(simulated_pair, prev_id) print() assert assigned_pair == simulated_pair pairtools-1.0.3/tests/test_restrict.py000066400000000000000000000031251452673171500201550ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import pytest import subprocess testdir = os.path.dirname(os.path.realpath(__file__)) def test_restrict(): """Restrict pairs file""" mock_pairs_path = os.path.join(testdir, "data", "mock.test-restr.pairs") mock_rfrag_path = os.path.join(testdir, "data", "mock.rsites.bed") try: result = subprocess.check_output( [ "python", "-m", "pairtools", "restrict", "-f", mock_rfrag_path, mock_pairs_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # check if the header got transferred correctly true_header = [l.strip() for l in open(mock_pairs_path, "r") if l.startswith("@")] output_header = [l.strip() for l in result.split("\n") if l.startswith("#")] for l in true_header: assert any([l in l2 for l2 in output_header]) # check that the pairs got assigned properly cols = [x for x in output_header if x.startswith("#columns")][0].split(" ")[1:] COL_RFRAG1_TRUE = cols.index("rfrag_test1") COL_RFRAG2_TRUE = cols.index("rfrag_test2") COL_RFRAG1_OUTPUT = cols.index("rfrag1") COL_RFRAG2_OUTPUT = cols.index("rfrag2") for l in result.split("\n"): if l.startswith("#") or not l: continue line = l.split() assert line[COL_RFRAG1_TRUE] == line[COL_RFRAG1_OUTPUT] assert line[COL_RFRAG2_TRUE] == line[COL_RFRAG2_OUTPUT] pairtools-1.0.3/tests/test_scaling.py000066400000000000000000000011651452673171500177400ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import subprocess import pytest import pandas as pd import io testdir = os.path.dirname(os.path.realpath(__file__)) def test_scaling(): mock_pairsam_path = os.path.join(testdir, "data", "mock.pairsam") try: result = subprocess.check_output( ["python", "-m", "pairtools", "scaling", mock_pairsam_path], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e output = pd.read_csv(io.StringIO(result), sep="\t", header=0) assert output["n_pairs"].sum() == 5 pairtools-1.0.3/tests/test_select.py000066400000000000000000000156001452673171500175760ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import subprocess import pytest from pairtools.lib import pairsam_format testdir = os.path.dirname(os.path.realpath(__file__)) mock_pairsam_path = os.path.join(testdir, "data", "mock.pairsam") mock_chromsizes_path = os.path.join(testdir, "data", "mock.chrom.sizes") def test_preserve(): try: result = subprocess.check_output( ["python", "-m", "pairtools", "select", "True", mock_pairsam_path], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e pairsam_body = [ l.strip() for l in open(mock_pairsam_path, "r") if not l.startswith("#") and l.strip() ] output_body = [ l.strip() for l in result.split("\n") if not l.startswith("#") and l.strip() ] assert all(l in pairsam_body for l in output_body) def test_equal(): try: result = subprocess.check_output( [ "python", "-m", "pairtools", "select", '(pair_type == "RU") or (pair_type == "UR") or (pair_type == "UU")', mock_pairsam_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e print(result) pairsam_body = [ l.strip() for l in open(mock_pairsam_path, "r") if not l.startswith("#") and l.strip() ] output_body = [ l.strip() for l in result.split("\n") if not l.startswith("#") and l.strip() ] assert all(l.split("\t")[7] in ["RU", "UR", "UU"] for l in output_body) assert all( l in output_body for l in pairsam_body if l.split("\t")[7] in ["RU", "UR", "UU"] ) def test_csv(): try: result = subprocess.check_output( [ "python", "-m", "pairtools", "select", 'csv_match(pair_type, "RU,UR,UU")', mock_pairsam_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e print(result) pairsam_body = [ l.strip() for l in open(mock_pairsam_path, "r") if not l.startswith("#") and l.strip() ] output_body = [ l.strip() for l in result.split("\n") if not l.startswith("#") and l.strip() ] assert all(l.split("\t")[7] in ["RU", "UR", "UU"] for l in output_body) assert all( l in output_body for l in pairsam_body if l.split("\t")[7] in ["RU", "UR", "UU"] ) def test_wildcard(): try: result = subprocess.check_output( [ "python", "-m", "pairtools", "select", 'wildcard_match(pair_type, "*U")', mock_pairsam_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e print(result) pairsam_body = [ l.strip() for l in open(mock_pairsam_path, "r") if not l.startswith("#") and l.strip() ] output_body = [ l.strip() for l in result.split("\n") if not l.startswith("#") and l.strip() ] assert all(l.split("\t")[7] in ["NU", "MU", "RU", "UU"] for l in output_body) assert all( l in output_body for l in pairsam_body if l.split("\t")[7] in ["NU", "MU", "RU", "UU"] ) def test_regex(): try: result = subprocess.check_output( [ "python", "-m", "pairtools", "select", 'regex_match(pair_type, "[NM]U")', mock_pairsam_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e print(result) pairsam_body = [ l.strip() for l in open(mock_pairsam_path, "r") if not l.startswith("#") and l.strip() ] output_body = [ l.strip() for l in result.split("\n") if not l.startswith("#") and l.strip() ] assert all(l.split("\t")[7] in ["NU", "MU"] for l in output_body) assert all( l in output_body for l in pairsam_body if l.split("\t")[7] in ["NU", "MU"] ) def test_chrom_subset(): try: result = subprocess.check_output( [ "python", "-m", "pairtools", "select", "True", "--chrom-subset", mock_chromsizes_path, mock_pairsam_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e pairsam_body = [ l.strip() for l in open(mock_pairsam_path, "r") if not l.startswith("#") and l.strip() ] output_body = [ l.strip() for l in result.split("\n") if not l.startswith("#") and l.strip() ] output_header = [ l.strip() for l in result.split("\n") if l.startswith("#") and l.strip() ] chroms_from_chrom_field = [ l.strip().split()[1:] for l in result.split("\n") if l.startswith("#chromosomes:") ][0] assert set(chroms_from_chrom_field) == set(["chr1", "chr2"]) chroms_from_chrom_sizes = [ l.strip().split()[1] for l in result.split("\n") if l.startswith("#chromsize:") ] assert set(chroms_from_chrom_sizes) == set(["chr1", "chr2"]) def test_remove_columns(): """Test removal of columns from the file Example run: pairtools select True --remove-columns sam1,sam2 tests/data/mock.pairsam """ mock_pairs_path = os.path.join(testdir, "data", "mock.pairsam") try: result = subprocess.check_output( [ "python", "-m", "pairtools", "select", "True", "--remove-columns", "sam1,sam2", mock_pairs_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # check if the columns are removed properly: pairsam_header = [l.strip() for l in result.split("\n") if l.startswith("#")] for l in pairsam_header: if l.startswith("#columns:"): line = l.strip() assert ( line == "#columns: readID chrom1 pos1 chrom2 pos2 strand1 strand2 pair_type" ) # check that the pairs got assigned properly for l in result.split("\n"): if l.startswith("#") or not l: continue assert len(l.split(pairsam_format.PAIRSAM_SEP)) == 8 pairtools-1.0.3/tests/test_sort.py000066400000000000000000000037371452673171500173160ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import subprocess import pytest testdir = os.path.dirname(os.path.realpath(__file__)) def test_mock_pairsam(): mock_pairsam_path = os.path.join(testdir, "data", "mock.pairsam") try: result = subprocess.check_output( ["python", "-m", "pairtools", "sort", mock_pairsam_path], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # Check that the only changes strings are a @PG record of a SAM header, # the "#sorted" entry and chromosomes pairsam_header = [ l.strip() for l in open(mock_pairsam_path, "r") if l.startswith("#") ] output_header = [l.strip() for l in result.split("\n") if l.startswith("#")] print(output_header) print(pairsam_header) for l in output_header: if not any([l in l2 for l2 in pairsam_header]): assert ( l.startswith("#samheader: @PG") or l.startswith("#sorted") or l.startswith("#chromosomes") ) pairsam_body = [ l.strip() for l in open(mock_pairsam_path, "r") if not l.startswith("#") and l.strip() ] output_body = [ l.strip() for l in result.split("\n") if not l.startswith("#") and l.strip() ] # check that all pairsam entries survived sorting: assert len(pairsam_body) == len(output_body) # check the sorting order of the output: prev_pair = None for l in output_body: cur_pair = l.split("\t")[1:8] if prev_pair is not None: assert cur_pair[0] >= prev_pair[0] if cur_pair[0] == prev_pair[0]: assert cur_pair[2] >= prev_pair[2] if cur_pair[2] == prev_pair[2]: assert cur_pair[1] >= prev_pair[1] if cur_pair[1] == prev_pair[1]: assert cur_pair[3] >= prev_pair[3] prev_pair = cur_pair pairtools-1.0.3/tests/test_split.py000066400000000000000000000051051452673171500174510ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import subprocess import pytest import tempfile testdir = os.path.dirname(os.path.realpath(__file__)) mock_pairsam_path = os.path.join(testdir, "data", "mock.pairsam") tmpdir = tempfile.TemporaryDirectory() tmpdir_name = tmpdir.name pairs_path = os.path.join(tmpdir_name, "out.pairs") sam_path = os.path.join(tmpdir_name, "out.sam") @pytest.fixture def setup_split(): try: subprocess.check_output( [ "python", "-m", "pairtools", "split", mock_pairsam_path, "--output-pairs", pairs_path, "--output-sam", sam_path, ], ) except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e def test_split(setup_split): pairsam_lines = [l.strip() for l in open(mock_pairsam_path, "r") if l.strip()] pairs_lines = [l.strip() for l in open(pairs_path, "r") if l.strip()] sam_lines = [l.strip() for l in open(sam_path, "r") if l.strip()] # check that all entries survived splitting: n_pairsam = len([l for l in pairsam_lines if not l.startswith("#")]) n_pairs = len([l for l in pairs_lines if not l.startswith("#")]) n_sam = len([l for l in sam_lines if not l.startswith("@")]) // 2 assert n_pairsam == n_pairs assert n_pairsam == n_sam # check that the header survived splitting: pairsam_header = [ l.strip() for l in open(mock_pairsam_path, "r") if l.strip() and l.startswith("#") ] pairs_header = [ l.strip() for l in open(pairs_path, "r") if l.strip() and l.startswith("#") ] sam_header = [ l.strip() for l in open(sam_path, "r") if l.strip() and l.startswith("@") ] assert all( any(l in l2 for l2 in pairsam_header) for l in sam_header if not l.startswith("@PG") ) assert all( l in pairsam_header for l in pairs_header if (not (l.startswith("#columns") or l.startswith("#samheader"))) ) columns_pairsam = [l for l in pairsam_header if l.startswith("#columns")][ 0 ].split()[1:] columns_pairs = [l for l in pairs_header if l.startswith("#columns")][0].split()[1:] assert ( ("sam1" in columns_pairsam) and ("sam2" in columns_pairsam) and ("sam1" not in columns_pairs) and ("sam2" not in columns_pairs) ) assert [c for c in columns_pairsam if c != "sam1" and c != "sam2"] == columns_pairs tmpdir.cleanup() pairtools-1.0.3/tests/test_stats.py000066400000000000000000000103031452673171500174500ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import subprocess import numpy as np import yaml testdir = os.path.dirname(os.path.realpath(__file__)) def test_mock_pairsam(): mock_pairsam_path = os.path.join(testdir, "data", "mock.4stats.pairs") try: result = subprocess.check_output( ["python", "-m", "pairtools", "stats", "--yaml", mock_pairsam_path], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e stats = yaml.safe_load(result) # for k in stats["no_filter"]: # try: # stats["no_filter"][k] = int(stats["no_filter"][k]) # except (ValueError, TypeError): # stats["no_filter"][k] = float(stats["no_filter"][k]) assert stats["no_filter"]["total"] == 9 assert stats["no_filter"]["total_single_sided_mapped"] == 2 assert stats["no_filter"]["total_mapped"] == 6 assert stats["no_filter"]["total_dups"] == 1 assert stats["no_filter"]["cis"] == 3 assert stats["no_filter"]["trans"] == 2 assert stats["no_filter"]["pair_types"]["UU"] == 4 assert stats["no_filter"]["pair_types"]["NU"] == 1 assert stats["no_filter"]["pair_types"]["WW"] == 1 assert stats["no_filter"]["pair_types"]["UR"] == 1 assert stats["no_filter"]["pair_types"]["MU"] == 1 assert stats["no_filter"]["pair_types"]["DD"] == 1 assert stats["no_filter"]["chrom_freq"]["chr1/chr2"] == 1 assert stats["no_filter"]["chrom_freq"]["chr1/chr1"] == 3 assert stats["no_filter"]["chrom_freq"]["chr2/chr3"] == 1 for orientation in ("++", "+-", "-+", "--"): s = stats["no_filter"]["dist_freq"][orientation] for k, val in s.items(): if orientation == "++" and k in [1, 2, 32]: assert s[k] == 1 else: assert s[k] == 0 assert stats["no_filter"]["summary"]["frac_cis"] == 0.6 assert stats["no_filter"]["summary"]["frac_cis_1kb+"] == 0 assert stats["no_filter"]["summary"]["frac_cis_2kb+"] == 0 assert stats["no_filter"]["summary"]["frac_cis_4kb+"] == 0 assert stats["no_filter"]["summary"]["frac_cis_10kb+"] == 0 assert stats["no_filter"]["summary"]["frac_cis_20kb+"] == 0 assert stats["no_filter"]["summary"]["frac_cis_40kb+"] == 0 assert np.isclose(stats["no_filter"]["summary"]["frac_dups"], 1 / 6) def test_merge_stats(): mock_pairsam_path = os.path.join(testdir, "data", "mock.4stats.pairs") try: subprocess.check_output( [ "python", "-m", "pairtools", "stats", "--with-chromsizes", mock_pairsam_path, "--output", "mock.stats", ], ) subprocess.check_output( [ "python", "-m", "pairtools", "stats", "--no-chromsizes", mock_pairsam_path, "--output", "mock.no_chromsizes.stats", ], ) subprocess.check_output( [ "python", "-m", "pairtools", "stats", "mock.stats", "mock.stats", "--merge", "--output", "mock.merged_chromsizes.stats", ], ) subprocess.check_output( [ "python", "-m", "pairtools", "stats", "mock.stats", "mock.no_chromsizes.stats", "--merge", "--output", "mock.merged_mixed.stats", ], ) subprocess.check_output( [ "python", "-m", "pairtools", "stats", "mock.no_chromsizes.stats", "mock.no_chromsizes.stats", "--merge", "--output", "mock.merged_no_chromsizes.stats", ], ) except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e