pax_global_header00006660000000000000000000000064147471510550014523gustar00rootroot0000000000000052 comment=737a236399977351f769adb559830f1fb0c0e48a pairtools-1.1.3/000077500000000000000000000000001474715105500135415ustar00rootroot00000000000000pairtools-1.1.3/.flake8000066400000000000000000000006711474715105500147200ustar00rootroot00000000000000[flake8] exclude = __init__.py __main__.py max-line-length = 120 ignore = # whitespace before ':' E203 # too many leading '#' for block comment E266 # line too long E501 # line break before binary operator W503 select = # mccabe complexity C # pycodestyle E # pyflakes error F # pyflakes warning W # bugbear B # line exceeds max-line-length + 10% B950 pairtools-1.1.3/.github/000077500000000000000000000000001474715105500151015ustar00rootroot00000000000000pairtools-1.1.3/.github/workflows/000077500000000000000000000000001474715105500171365ustar00rootroot00000000000000pairtools-1.1.3/.github/workflows/python-build-wheels.yml000066400000000000000000000057611474715105500235750ustar00rootroot00000000000000name: Build wheels on: [workflow_dispatch] jobs: make_sdist: name: Make SDist runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: fetch-depth: 0 # Optional, use if you use setuptools_scm submodules: true # Optional, use if you have submodules - name: Install dependencies run: python -m pip install cython numpy pysam - name: Build SDist run: pipx run build --sdist - uses: actions/upload-artifact@v4 with: name: cibw-sdist path: dist/*.tar.gz build_wheels: name: Build wheels on ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: matrix: # macos-13 is an intel runner, macos-14 is apple silicon os: [ubuntu-latest] #, windows-latest, macos-13, macos-14] python-version: [ "3.11" ] # "3.7", "3.8", "3.9", "3.10", steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} # - name: Build wheels # uses: pypa/cibuildwheel@v2.21.0 # # uses: pypa/cibuildwheel@v2.17.0 # # env: # # CIBW_SOME_OPTION: value # # ... # # with: # # package-dir: . # # output-dir: wheelhouse # # config-file: "{package}/pyproject.toml" - name: Install cibuildwheel run: python -m pip install cibuildwheel==2.22.0 - name: Build wheels run: python -m cibuildwheel --output-dir dist # to supply options, put them in 'env', like: env: #CIBW_BUILD_FRONTEND: "pip; args: --no-build-isolation" CIBW_BUILD_FRONTEND: "build; args: --no-isolation" CIBW_BEFORE_ALL: "yum install bzip2-devel xz-devel -y;" # we have to recompile pysam so that repairwheel can later find various libraries (libssl, libnghttp2, etc) #CIBW_BEFORE_ALL: "yum install bzip2-devel xz-devel openssl-devel openldap-devel krb5-devel libssh-devel libnghttp2-devel -y;" CIBW_BEFORE_BUILD: "python -m pip install setuptools cython numpy pysam --no-binary pysam" # skip building 32-bit wheels (i686) CIBW_ARCHS_LINUX: "auto64" # we could use 2_28 to download pysam's wheel instead of compiling it ; # HOWEVER THAT DIDN'T WORK BECAUSE PYSAM DEPENDS ON LIBSSL, LIBNGHTTP2, ETC, WHICH CANNOT BE FOUND # SO WE ARE BACK TO COMPILING PYSAM'S WHEEL (no-binary pysam) # CIBW_MANYLINUX_X86_64_IMAGE: "manylinux_2_28" ## skip building pypy and musllinux CIBW_SKIP: pp* *musllinux* #CIBW_REPAIR_WHEEL_COMMAND: 'auditwheel -v repair -w {dest_dir} {wheel}' #PIP_NO_CACHE_DIR: "false" #PIP_NO_BUILD_ISOLATION: "false" #PIP_NO_BINARY: "pysam" - uses: actions/upload-artifact@v4 with: name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }} path: ./dist/*.whlpairtools-1.1.3/.github/workflows/python-publish-test.yml000066400000000000000000000033221474715105500236230ustar00rootroot00000000000000name: Publish Python Package to Test PyPI on: # release: # types: [published] workflow_dispatch: jobs: publish_all: name: Publish wheels and sdist to Test PyPI # if: github.event_name == 'release' && github.event.action == 'published' environment: testpypi permissions: id-token: write runs-on: ubuntu-latest steps: - uses: dawidd6/action-download-artifact@v7 with: # Required, if the repo is private a Personal Access Token with `repo` scope is needed or GitHub token in a job where the permissions `action` scope set to `read` #github_token: ${{secrets.GITHUB_TOKEN}} # Optional, workflow file name or ID # If not specified, will be inferred from run_id (if run_id is specified), or will be the current workflow workflow: python-build-wheels.yml # Optional, the status or conclusion of a completed workflow to search for # Can be one of a workflow conclusion: # "failure", "success", "neutral", "cancelled", "skipped", "timed_out", "action_required" # Or a workflow status: # "completed", "in_progress", "queued" # Use the empty string ("") to ignore status or conclusion in the search workflow_conclusion: success - name: Publish sdist 📦 to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: packages-dir: cibw-sdist repository-url: https://test.pypi.org/legacy/ - name: Publish wheels 📦 to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: packages-dir: cibw-wheels-ubuntu-latest-0 repository-url: https://test.pypi.org/legacy/ pairtools-1.1.3/.github/workflows/python-publish.yml000066400000000000000000000031201474715105500226420ustar00rootroot00000000000000name: Publish Python Package to PyPI on: # release: # types: [published] workflow_dispatch: jobs: publish_all: name: Publish wheels and sdist to PyPI # if: github.event_name == 'release' && github.event.action == 'published' environment: pypi permissions: id-token: write runs-on: ubuntu-latest steps: - uses: dawidd6/action-download-artifact@v7 with: # Required, if the repo is private a Personal Access Token with `repo` scope is needed or GitHub token in a job where the permissions `action` scope set to `read` #github_token: ${{secrets.GITHUB_TOKEN}} # Optional, workflow file name or ID # If not specified, will be inferred from run_id (if run_id is specified), or will be the current workflow workflow: python-build-wheels.yml # Optional, the status or conclusion of a completed workflow to search for # Can be one of a workflow conclusion: # "failure", "success", "neutral", "cancelled", "skipped", "timed_out", "action_required" # Or a workflow status: # "completed", "in_progress", "queued" # Use the empty string ("") to ignore status or conclusion in the search workflow_conclusion: success - name: Publish sdist 📦 to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: packages-dir: cibw-sdist - name: Publish wheels 📦 to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: packages-dir: cibw-wheels-ubuntu-latest-0 pairtools-1.1.3/.github/workflows/python-test.yml000066400000000000000000000026071474715105500221640ustar00rootroot00000000000000# This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions name: Test build, lint and test on: push: branches: [ master ] tags: - "v*" # Tag events matching v*, i.e. v1.0, v20.15.10 pull_request: branches: [ master ] jobs: build: runs-on: ubuntu-latest strategy: matrix: python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip wheel setuptools build pip install cython pysam numpy pip install -e .[test] --no-build-isolation -v -v - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest run: | pip install pytest pytest pairtools-1.1.3/.gitignore000066400000000000000000000022501474715105500155300ustar00rootroot00000000000000# vim undos *.un~ # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so *.c *.cpp # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # IPython Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # dotenv .env # virtualenv venv/ ENV/ # Spyder project settings .spyderproject # Rope project settings .ropeproject # cython compiled C extension _*.c *.DS_Store # VS code settings .vscode/* # Files generated as the examples examples/* pairtools-1.1.3/CHANGES.md000066400000000000000000000162421474715105500151400ustar00rootroot00000000000000### 1.1.3 (2025-01-31) ### Bugfixes of parse2: - Standartiation of complex pair types: names R1&2 and R1-2 are now uniform in the code. This was inconistent before leading to the bugs at the positions assignment for R1&2. - Walk policies are applied not only to complex walks but also to non-chimeric reads in parse2. Bugfizes of pairtools select: - Empty columns at the end of the .pairs file are not truncated anymore. New tests: - parse2 tests for R1&2 and R2 reads (read25/27). - parse2 tests for correct flipping (read25/26). ### 1.1.2 (2024-12-11) ### Bugfixes: - Drop Cython-generated .c/.cpp files from the sdist ### 1.1.1 (2024-12-10) ### Bugfixes: - Migrating to pyproject.toml + cibuildwheel. pairtools will now release binary wheels for Linux. --no-build-isolation is a mandatory flag now. - Require Cython during build to avoid the "circular import" bug. - fix API incomplete functionality for read-side detection by @agalitsyna **Full Changelog**: https://github.com/open2c/pairtools/compare/v1.1.0...v1.1.1 ### 1.1.0 (2024-04-23) ### Major bugfixes: - Fix a major bug in sort that previously broke the sorting order. This bug was introduced in recent versions of pairtools #230 - Fix a major bug in dedup that caused pair duplication and broken sorting order in non-Cython backends New features: - stats: calculate the distance of P(s) divergence between pairs of different directionalities #222 - dedup: allow column names in all backends, and allow sorting by arbitrary columns #162 New behavior and default settings: - dedup: turn mark-dups on by default #211 - parse: change the default --walks-policy to 5unique - parse: pair types are now always in upper case. Previously, letters in pair types were converted to lowercase if the corresponding side contained chimeric alignments. Minor bugfixes: - dedup: allow inputs with quotes #194 - dedup: allow empty input pairs file #201 - stats: minor bugfixes #200 Documentation: - a new notebook with the statistics of distances between PCR duplicates #233 - clean up phase walkthrough #218 - a new chapter on building workflows with pairtools #219 #226 #231 - a major cleanup Code updates: - make pairsio.py to read (and, in the future, write) .pairs files #195 - minor refactoring of parse #223 New Contributors: - @hkariti made their first contribution in #194 ### 1.0.3 (2023-11-20) ### - [x] `pairtools dedup`: update default chunksize to 10,000 to prevent memory overflow on datasets with high duplication rate ### 1.0.2 (2022-11-XX) ### - [x] `pairtools select` regex update (string substitutions failed when the column name was a substring of another) - [x] Warnings capture in dedup: pairs lines are always split after rstrip newline - [x] Important fixes of splitting schema - [x] Dedup comment removed (failed when the read qualities contained "#") - [x] Remove dbist build out of wheel - [x] pairtools scaling: fixed an issue with scaling maximum range value https://github.com/open2c/pairtools/issues/150#issue-1439106031 ### 1.0.1 (2022-09-XX) ### - [x] Fixed issue with pysam dependencies on pip and conda - [x] pytest test engine instead of nose - [x] Small fixes in teh docs and scaling ### 1.0.0 (2022-08-XX) ### This is a major release of pairtools since last release (April 2019!) #### Post merge: - [x] sphinx docs update with incorporated walkthroughs #### New tools: - [x] parse2 module with CLI for parsing complex walks - [x] scaling and header modules with CLI #### Fixes by modules: pairtools dedup - [x] finalize detection of optical duplicates https://github.com/open2c/pairtools/issues/106 and https://github.com/open2c/pairtools/issues/59, also related to https://github.com/open2c/pairtools/issues/54 - [x] chunked dedup by @Phlya - [x] improvement of dedup to include reporting of the parent readID by @Phlya and @agalitsyna pairtools stats/scaling - [x] split dedup stats and regular stats - [x] output chromosome size to the stats output https://github.com/open2c/pairtools/issues/83 - [x] pairtools stats: YAML output? https://github.com/open2c/pairtools/issues/111 and https://github.com/open2c/pairtools/issues/79 - [x] pairtools scaling tool which takes into account chromosome sizes: https://github.com/open2c/pairtools/issues/81, https://github.com/open2c/pairtools/issues/56? pairtools parse - [x] parse complex walks engine and tools: https://github.com/open2c/pairtools/pull/109 - [x] stdin and stdout reporting defaults: https://github.com/open2c/pairtools/issues/48 - [x] flipping issue: https://github.com/open2c/pairtools/issues/91 pairtools phase - [x] make work with both pip and github versions of bwa: https://github.com/open2c/pairtools/pull/114 pairtools restrict - [x] Handle empty pairs with "!" chromosomes: https://github.com/open2c/pairtools/issues/76 - [x] Problem with restriction sites header/first rfrag: https://github.com/open2c/pairtools/issues/73 - [x] Suggestions by @golobor: https://github.com/open2c/pairtools/issues/16 pairtools merge - [x] do not require sorting? https://github.com/open2c/pairtools/issues/23 - [x] headers handling: https://github.com/open2c/pairtools/issues/18 #### General improvements: Headers maintenance - [x] allow adding a header to a headerless file https://github.com/open2c/pairtools/issues/119 or broader addition of the headed module, draft: https://github.com/open2c/pairtools/pull/121 Code maintenance - [x] transfer pairlib into sandbox of pairtools lib - [x] separate cli and lib - [x] Remove OrderedDict: https://github.com/open2c/pairtools/issues/113 - [x] Clean up deprecation warnings, e.g. https://github.com/open2c/pairtools/issues/71 - [x] Fix input errors without explanations, e.g. https://github.com/open2c/pairtools/issues/61 #### Specific changes: Docs improvements - [x] pairtools walkthrough - [x] phasing walkthrough - [x] parse docs update Tests proposals - [x] add tests for dedup @Phlya : https://github.com/open2c/pairtools/issues/5 - [x] add tests for stats, and merge: https://github.com/open2c/pairtools/issues/5 Enhancements - [x] add summaries: https://github.com/open2c/pairtools/pull/105 - [x] support of [bwa mem2]( https://github.com/bwa-mem2/bwa-mem2), which is 2-3 times faster than usual bwa mem: https://github.com/open2c/pairtools/discussions/118 - [x] I/O single utility instead of repetitive code in each module ### 0.3.1 (2021-02-XX) ### * sample: a new tool to select a random subset of pairs * parse: add --readid-transform to edit readID * parse: add experimental --walk-policy all (note: it will be moved to a separate tool in future!) * all tools: use bgzip if pbgzip not available Internal changes: * parse: move most code to a separate _parse module * _headerops: add extract_chromosomes(header) * all tools: drop py3.5 support * switch from travis CI to github actions ### 0.3.0 (2019-04-23) ### * parse: tag pairs with missing FASTQ/SAM on one side as corrupt, pair type "XX" ### 0.2.2 (2019-01-07) ### * sort: enable lz4c compression of sorted chunks by default ### 0.2.1 (2018-12-21) ### * automatically convert mapq1 and mapq2 to int in `select` ### 0.2.0 (2018-09-03) ### * add the `flip` tool ### 0.1.1 (2018-07-19) ### * Bugfix: include _dedup.pyx in the Python package ### 0.1.0 (2018-07-19) ### * First release. pairtools-1.1.3/LICENSE000066400000000000000000000020541474715105500145470ustar00rootroot00000000000000MIT License Copyright (c) 2017-2021 Open2C Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. pairtools-1.1.3/MANIFEST.in000066400000000000000000000006201474715105500152750ustar00rootroot00000000000000include README.md include CHANGES.md include LICENSE include pyproject.toml include pytest.ini graft tests graft doc prune doc/_build prune doc/_templates global-include *.pyx global-include *.pxd global-exclude *.c global-exclude *.cpp global-exclude __pycache__/* global-exclude *.so global-exclude *.pyd global-exclude *.pyc global-exclude .git* global-exclude .deps/* global-exclude .DS_Store pairtools-1.1.3/Makefile000066400000000000000000000012661474715105500152060ustar00rootroot00000000000000.PHONY: init install clean-pyc clean-build build test publish docs-init docs init: conda install --file requirements.txt install: pip install -e . test: nosetests clean-pyc: find . -name '*.pyc' -exec rm --force {} + find . -name '*.pyo' -exec rm --force {} + find . -name '*~' -exec rm --force {} + clean-build: rm -rf build/ rm -rf dist/ clean: clean-pyc clean-build build: clean-build python setup.py sdist # python setup.py bdist_wheel publish: build twine upload dist/* publish-test: twine upload --repository-url https://test.pypi.org/legacy/ dist/* #docs-init: # conda install --file docs/requirements.txt # #docs: # cd docs && python make_cli_rst.py && make html pairtools-1.1.3/README.md000066400000000000000000000225611474715105500150260ustar00rootroot00000000000000# pairtools [![Documentation Status](https://readthedocs.org/projects/pairtools/badge/?version=latest)](http://pairtools.readthedocs.org/en/latest/) [![Build Status](https://travis-ci.org/mirnylab/pairtools.svg?branch=master)](https://travis-ci.org/mirnylab/pairtools) [![Join the chat on Slack](https://img.shields.io/badge/chat-slack-%233F0F3F?logo=slack)](https://bit.ly/2UaOpAe) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1490831.svg)](https://doi.org/10.5281/zenodo.1490831) ## Process Hi-C pairs with pairtools `pairtools` is a simple and fast command-line framework to process sequencing data from a Hi-C experiment. `pairtools` process pair-end sequence alignments and perform the following operations: - detect ligation junctions (a.k.a. Hi-C pairs) in aligned paired-end sequences of Hi-C DNA molecules - sort .pairs files for downstream analyses - detect, tag and remove PCR/optical duplicates - generate extensive statistics of Hi-C datasets - select Hi-C pairs given flexibly defined criteria - restore .sam alignments from Hi-C pairs - annotate restriction digestion sites - get the mutated positions in Hi-C pairs To get started: - Visit [pairtools tutorials](https://pairtools.readthedocs.io/en/latest/examples/pairtools_walkthrough.html), - Take a look at a [quick example](https://github.com/open2c/pairtools#quick-example), - Check out the detailed [documentation](http://pairtools.readthedocs.io). ## Data formats `pairtools` produce and operate on tab-separated files compliant with the [.pairs](https://github.com/4dn-dcic/pairix/blob/master/pairs_format_specification.md) format defined by the [4D Nucleome Consortium](https://www.4dnucleome.org/). All pairtools properly manage file headers and keep track of the data processing history. Additionally, `pairtools` define the [.pairsam format](https://pairtools.readthedocs.io/en/latest/formats.html#pairsam), an extension of .pairs that includes the SAM alignments of a sequenced Hi-C molecule. .pairsam complies with the .pairs format, and can be processed by any tool that operates on .pairs files. `pairtools` produces a set of additional extra columns, which describe properties of alignments, phase, mutations, restriction and complex walks. The full list of possible extra columns is provided in the [`pairtools` format specification](https://pairtools.readthedocs.io/en/latest/formats.html#extra-columns). ## Installation Requirements: - Python 3.x - Python packages `cython`, `pysam`, `bioframe`, `pyyaml`, `numpy`, `scipy`, `pandas` and `click`. - Command-line utilities `sort` (the Unix version), `samtools` and `bgzip` (shipped with `samtools`). If available, `pairtools` can compress outputs with `pbgzip` and `lz4`. For the full list of recommended versions, see [the requirements section in the pyproject.toml](https://github.com/open2c/pairtools/blob/main/pyproject.toml). There are three options for installing pairtools: 1. We highly recommend using the `conda` package manager to install `pairtools` together with all its dependencies. To get it, you can either install the full [Anaconda](https://www.continuum.io/downloads) Python distribution or just the standalone [conda](http://conda.pydata.org/miniconda.html) package manager. With `conda`, you can install `pairtools` and all of its dependencies from the [bioconda](https://bioconda.github.io/index.html) channel: ```sh $ conda install -c conda-forge -c bioconda pairtools ``` 2. Alternatively, install non-Python dependencies (`sort`, `samtools`, `bgzip`, `pbgzip` and `lz4`) separately and download `pairtools` with Python dependencies from PyPI using pip: ```sh $ pip install pairtools ``` 3. Finally, when the two options above don't work or when you want to modify `pairtools`, build `pairtools` from source via pip's "editable" mode: ```sh $ pip install numpy cython pysam $ git clone https://github.com/open2c/pairtools $ cd pairtools $ pip install -e ./ --no-build-isolation ``` ## Quick example Setup a new test folder and download a small Hi-C dataset mapped to sacCer3 genome: ```bash $ mkdir /tmp/test-pairtools $ cd /tmp/test-pairtools $ wget https://github.com/open2c/distiller-test-data/raw/master/bam/MATalpha_R1.bam ``` Additionally, we will need a .chromsizes file, a TAB-separated plain text table describing the names, sizes and the order of chromosomes in the genome assembly used during mapping: ```bash $ wget https://raw.githubusercontent.com/open2c/distiller-test-data/master/genome/sacCer3.reduced.chrom.sizes ``` With `pairtools parse`, we can convert paired-end sequence alignments stored in .sam/.bam format into .pairs, a TAB-separated table of Hi-C ligation junctions: ```bash $ pairtools parse -c sacCer3.reduced.chrom.sizes -o MATalpha_R1.pairs.gz --drop-sam MATalpha_R1.bam ``` Inspect the resulting table: ```bash $ less MATalpha_R1.pairs.gz ``` ## Pipelines - We provide a simple working example of a mapping bash pipeline in /examples/. - [distiller](https://github.com/open2c/distiller-nf) is a powerful Hi-C data analysis workflow, based on `pairtools` and [nextflow](https://www.nextflow.io/). ## Tools - `parse`: read .sam/.bam files produced by bwa and form Hi-C pairs - form Hi-C pairs by reporting the outer-most mapped positions and the strand on the either side of each molecule; - report unmapped/multimapped (ambiguous alignments)/chimeric alignments as chromosome "!", position 0, strand "-"; - perform upper-triangular flipping of the sides of Hi-C molecules such that the first side has a lower sorting index than the second side; - form hybrid pairsam output, where each line contains all available data for one Hi-C molecule (outer-most mapped positions on the either side, read ID, pair type, and .sam entries for each alignment); - report .sam tags or mutations of the alignments; - print the .sam header as #-comment lines at the start of the file. - `parse2`: read .sam/.bam files with long paired-and or single-end reads and form Hi-C pairs from complex walks - identify and rescue chrimeric alignments produced by singly-ligated Hi-C molecules with a sequenced ligation junction on one of the sides; - annotate chimeric alignments by restriction fragments and report true junctions and hops (One-Read-Based Interactions Annotation, ORBITA); - perform intra-molecule deduplication of paired-end data when one side reads through the DNA on the other side of the read; - report index of the pair in the complex walk; - make combinatorial expansion of pairs produced from the same walk; - `sort`: sort pairs files (the lexicographic order for chromosomes, the numeric order for the positions, the lexicographic order for pair types). - `merge`: merge sorted .pairs files - merge sort .pairs; - combine the .pairs headers from all input files; - check that each .pairs file was mapped to the same reference genome index (by checking the identity of the @SQ sam header lines). - `select`: select pairs according to specified criteria - select pairs entries according to the provided condition. A programmable interface allows for arbitrarily complex queries on specific pair types, chromosomes, positions, strands, read IDs (including matches to a wildcard/regexp/list). - optionally print the non-matching entries into a separate file. - `dedup`: remove PCR duplicates from a sorted triu-flipped .pairs file - remove PCR duplicates by finding pairs of entries with both sides mapped to similar genomic locations (+/- N bp); - optionally output the PCR duplicate entries into a separate file; - detect optical duplicates from the original Illumina read ids; - apply filtering by various properties of pairs (MAPQ; orientation; distance) together with deduplication; - output yaml or convenient tsv deduplication stats into text file. - NOTE: in order to remove all PCR duplicates, the input must contain \*all\* mapped read pairs from a single experimental replicate; - `maskasdup`: mark all pairs in a pairsam as Hi-C duplicates - change the field pair_type to DD; - change the pair_type tag (Yt:Z:) for all sam alignments; - set the PCR duplicate binary flag for all sam alignments (0x400). - `split`: split a .pairsam file into .pairs and .sam. - `flip`: flip pairs to get an upper-triangular matrix - `header`: manipulate the .pairs/.pairsam header - generate new header for headerless .pairs file - transfer header from one .pairs file to another - set column names for the .pairs file - validate that the header corresponds to the information stored in .pairs file - `stats`: calculate various statistics of .pairs files - `restrict`: identify the span of the restriction fragment forming a Hi-C junction - `phase`: phase pairs mapped to a diploid genome ## Contributing [Pull requests](https://akrabat.com/the-beginners-guide-to-contributing-to-a-github-project/) are welcome. For development, clone and install in "editable" (i.e. development) mode with the `-e` option. This way you can also pull changes on the fly. ```sh $ git clone https://github.com/open2c/pairtools.git $ cd pairtools $ pip install -e . ``` ## Citing `pairtools` Open2C*, Nezar Abdennur, Geoffrey Fudenberg, Ilya M. Flyamer, Aleksandra A. Galitsyna*, Anton Goloborodko*, Maxim Imakaev, Sergey V. Venev. "Pairtools: from sequencing data to chromosome contacts" bioRxiv, February 13, 2023. ; doi: https://doi.org/10.1101/2023.02.13.528389 ## License MIT pairtools-1.1.3/doc/000077500000000000000000000000001474715105500143065ustar00rootroot00000000000000pairtools-1.1.3/doc/Makefile000066400000000000000000000012571474715105500157530ustar00rootroot00000000000000# Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @cd ..; python setup.py build_ext --inplace; cd docs @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)pairtools-1.1.3/doc/_static/000077500000000000000000000000001474715105500157345ustar00rootroot00000000000000pairtools-1.1.3/doc/_static/hic-processing-pipeline.png000066400000000000000000001514661474715105500231770ustar00rootroot00000000000000PNG  IHDR~3sBIT|d pHYsgRtEXtSoftwarewww.inkscape.org< IDATxwE׽IH#$!B)Jޫt/ "_"_JP;kBzr~|v~3g={MxGNi O+ "l hmuDDzǀG[\l l 涰^",6KG])D`1Bb@`w`LwVJDd LmTf_fX|2[aJuoQDz3m:X0v;ZJ\ Z\NJ,[x8[b7KDdIq8vӮ؁7F~zuAw_ %`zc" <+iFliVmLޛZ\/I,߈Mj""K[X`=X|pHEqx|86dF~YWł<""v\݁wUm6}u- KEDDDz''&f:k'g;u`=c%ʻ1G˾l&QD$;fM\ $H{ץ;^⺈t'Bcs 2;t-"Yݓ̐Jneezs-"RO|A]L/❋kq]DDDDsI+H/Փ'NngUuR""u܉GU콝z, FY"K4@DD6`M`p66)3ؚQZr7l^,ՁLEXP{q:Xv[ xȊȒ?0({< XP``5`%l䍃eym3 XXkzc>3l?YeGewg >,}iƭ[$"Kdn/&,2Q_f(u1vBG+Ў^`,vlX;_#ۿ2ul _r =3{`;`&AP=;&ŽAb ?;^C ۧHp6x$ax<́lMRξ_Ifཛྷ&WD'%pAcTW=yKF$X:Wbñ bj78䍃s~&{ĶΫ׳ǯǽ&eDN0pubX_מ |-OEς/a=S܄|XЭٲe|߃e 駲$eh;+ 7'EV̼D]ATHD&|{t#5낉koVFѲeĆ*jÍ'4X`.>}F+#Û{pXNpS#{2bC@,bw0u]QM4XzHOuvG[..?o\n^@x8x]bƚ2="Y|~> ꇲICu us'#cEˆaજ߂VrA9 Oc#ͩHoˎ6/DͽÂɻaAX<v[NѲ}_~7^)R+9W=;n]PN"f`9e6Ʒ5H,+[5zm;OpmnjSY&a޸"-n,ڀ(;.8G}|-Zvh,o菝2߉f0BD' Dn ~8q=^+׮ %YTOHeSr\Ph'eqrLؙ>R\j,˷H, ?*(75+L0F T| _r'eMKrM~}}Lb=Ûe `YniTbo ǹG/X66g'g|*i2+x3v""""=Կ0un( ;̔=ZV쬜IM%"jc8-;έ~8E2J,,`MP8d8q؃g,$?+`+5u(yhǃeGNnhqcm^|CaOe;ˎ2g:E>y2W?愓`n X'/=eo`'Q}cr6+Xz޶Ԭf8-sC?_ "k{ zqqֵgs=,N]yaRCչsu궘Iw)afU>H? v2s yu {ryLuwC=u 5Ed 6k!Y86#4 qbv JDqD %6xh"<=~`.33d Aq`+u׽)"vDuy=|@Wug-]Ò.BEh#~:BNd&aj8nOR=[wzκòky|y늭$qV-cλiZ4dƚ[GT t #SYDzٿIg'Cu7diq} ύ( XwԾ=LFѸj""=kRX^!x<`}wco&3?Vfm<'?|bq& SMmFlEc-{<  N:F~6έ):oH?q j8E^OXxwk n.:8Trߣ-;qF"iLm\9o\N'[3eePEZ&iE'js9 QD/X:_8o>v:΂mgcP. q22P$x\/+l{1q%~b('AlmƮwA_Svؘ`3;zyX\NOEl¨:Xw띕SXV_!< L|߳epcJنrFOP8ۋp蝑Ī'cݰ=jX ~3}@wsvXnx?\7Ac? &uvz]W]MTaǰSI3PT1b2k}:8 ̻C{MHYnM,}cě""""-7NNffa'SfdNz !5>.ƭrjMT쵂w}Vfv2*"Ӹ ޴L 5DխK믁ex{?x.BnbNƏv5+7K(hk`]͝=뇙3u-8s"ngbNjishm2'l9?9uEf=UⱮ6<c%6u+ޘV ;,geNN,?4{G`ߜ튈Pq`Bua3w|^^,m/80,`uVg%%gԄ""ֆ8/VCGS&`cͶ>sa|Tv޺.gak͂OvP(";do.6T.%(Ol`Yv`7+Uo"p.qCSfk|fqvMPt]QdY>RNnfV7^-y ; x)e~:}neƪ+6Bp v|X.{hea=#oځ+1f5 ]WԳ! ߗ1g,<~ +a1gaåT{,ߍkQ>H:n=61Tߊ4(̪u-x^}.}3O~,_ܧf ISf~"' p7Hj5"""""""9&΍QDDZlx`cp $~Jlґ%yW-&""Ӟ!1~SƍG^&i9M("=a1o c>Mf&""=N'X9ֆM6;DNwR=Q,}@`VWBDDDDDDDDI.muEzk@%׺jH/p#pF""""""""K)XVW:h[W5ƷApz`_7֫+gb7XOjq]:$gb$⺈ȒkV1`ץ.Ǝ/_nq]DDDd)zݴd]`>|.?5Rz5b9`嶤&]Y.*߶i~qLMp\WVJDjķ%.]ay%R""ҷ.MpsݾoNNma]DDx7xRl؎#ED: 8,VhuZh߹o[Y.ձazF)WtI4[[ e;*"g`m /Z\fkxU`'Hus} i&bVWDD$SoHAKZXqTED]w<ꊈH""""""""DCjHOxh`S` l¢$6c&a?V˖\:J;uSe/`ԭ &wa38{cCS籬_bcc6bx#XW/W|;QY>}sVvޛyL?Kguүn]: o+%~/̉zÁ}m5X[92d#l\屮|n~h~ k~ ˌ? <}ҹǾ+bk~< < 7_/a`6XJln#9Xs)w#c=9~WS]wۼg_>w6ֵ/Xzl:{>Wq^xÁӲw lJsc;_Z=\dva ޾ `{9`cazf?ґlx3fl`-5c$pvCX uX[^\I͊?p0,{EYbD->@83n};%~Kyv2}7 1,w>KDe"KX pv.=;H;g;?[;=vzD}Wcזع?=y,㿱9kx-#3u.M:^mrFwN*vwud`}( ~J,_[s:o` ֫`OwH.> leO]eYSamcթX r?v,w/9܎'r[`^ NB.IJ>RoeCY;X< O{{9:ytkVTTaDg4v18?agyYݝM Dzb#b2G`'o/?.xm`[vGak2bK6`,,Y N,k2D F_,K9uaٌnkm N>v,ӱ}X,l-{r!6V#N=^Y.@a7t8'|%sTVϏc=*3X`m2>xֆ|ם3|6oeb߉;/c&m 콺 .}~[ÂK'' l ׼ Ѹ -m3/cm }_~ñTM{>}ڱE9^+R\83W[Pp7|7Xcv,y.]=kW~m;/s,s=v,8k{g8Xnw"3L²Ďñ,-عY8w֓ ?zVk+u#0 k/ηǮIq_C/Ŏ ^a ֖;ރms{ ""D37Sv8v`)Eۨ|`, H:/| ?idw'Se:>Fcw Fg~ ;t;& |OljE:vF CXvZ{%|?^[p Ӱح>N#č?߭O`An#۞  }߁B+3.o9awKufwev^ǛX(ԆݰpH0{!n2ĶgNt*X;UbgQb41~V,`Ne891XNjV>%2*mIA}{f8G_\Hwf8N,_Xc۸t}kt ~ؐ<2VLyԶ3a^|cc >3AoJHtg9 (sebH|{4KRHF' ,pg] Lb {L-VFDDd H9 &q=EF/ ,0Y4rѰh$|Yry'a)MܛX^6܆eȜrhY#}`2XP|=^,ghY7\f1%XvQ[` ȖŲS ],%=(c^ re 2.ZvD,/<2Ҙ3gvX3͛ qsP.. 8%ʤziu3te !^xP3azA6K\ƋUHk_e>H%if~|[7Z_,Ǖ$\x^˶+x"" Ȓ?b]S'ېƲ`&/WmϦt7z^GmhWꇿZ' E'wWoHLJ,K˚k܈0H^NaOq p6v2YOlQ*&+> g_Qv'o#+X[< ƿgȟ1wwt8oLGesC|.Jl]qg;M ǹfƂ.. ›kqKaƺCܟFѱq?q,{[OYV"Α瓟ҕVurhì5TKx]z"!$"h gY꺛'-enuaÏk79R=`l\"vAWt>OYgr.?LXP޾,)1Io,C/eX0 q}h-3؉jXfgC㇂eƠ7:9N2 (~z=(yLヌsL,n|aaO7N)ӎ [}_\3os\6:~̙ݔK-ߑ\`dcn[ L ǵ@݁ݸ^榪9k)IU\WKǎ#VkSbcx\@βXI[rTtLWkpE[ ,v`u{6&ϩ>h0^v]7S`y,;p^!x7]}uT;㸛 >.&?;{CqN-U,&ѽg{бQx5.gkƤuImʖPݩ,lt]u<x#)y]\&SfaXEǵf,90{p<,0-"זopu]WM{5{q&s1v0n_lϾpG8ˈ@z<03-87fkaV| eʝ=*,6iWvFº24;Y6" ]Nz"fu%] 6hpӂaE5K+Y/Y&Q=Ӱq1iw.`.*S .i䷲6T^ wxE,Ua,cCF*z]yhl# Xkܰ7 hMզpZNXkzkʼɁUmblx+v켱pDtQ"w-ո =w?nzV>uؚa`:eϮN`Dahڱ.py_ǞnջA;t1.U z];*ޯaT ,=CXҀ;M$lSѹsȧ==>XފJ0ۑsfH[wv\k z@x)כ3n ;~ݨ骹'D\:g5?TVOkz{ڹcN:iTDzeifom.ÏALdPnX PP[㿗G.ڇ~Ȋcoҍh ?ߦz"oo:mlڕ3ˮqrm7wH=f6:l5ߐ>+؆c]`1/3|lws:n-l/S~0nN!L)(EejpdNY"}\: s ՕW`דf"}|#{\>u)~Xfβ49{,6_w|D ~Xg(.kpj/př>)a'X)AiCǿgQ,vlwa ϒhԹ ώ~Z:{;a˰퇿`ewÂFyu'ҁ69>Hu6eJ72  ;Ul2)Xwj~O;ep 5)ew{  uXf`J8!#B(w#j4;''QHL)"]+uE]l Ǔ=O玏`筱F ;95UfAY6UDri gYL2|.N ~eF݆͞>[;ZbǰA C:`؝>|EAb8$vB4c>9/Xb'`(> 8j?ރ3۰}b'Ckvr>N,|.m2$IVbXݏ²_QX0x(pb]2^a㛱.`߉! pw:˚v~l,R,sPV{XXPfK,~|/^,vnb<ƺxe޵+,,=vq#Zw`C(SX>,?@+}ڄ@m7n2gfy.ڄXI~;zS>ca{C,~/kxʇnw݄ v̜}>uC=ƆZ ^֖cU؍%q7և_0b7DtvǴ`ǎ3c-)v6PnAع;vSFb=aM"3]v"v&vYþamo߆]}\\;W ǎ.x}v8,v>oQ>c,uHS]&*(c_)[HmVi,U{0g%_ȝQb[)eֹ`{өkd[%:6dOm$Y-`uw^b݃凖@_j\ff$9[8)zX&qOi&aSgGXfȭg,~wX_P~'X;u1(҆M9`ϒ?|^A90(2 =˚Kw`IxFcG=s s nO{:۹ ~w7.(j} LmJ9(W44MY{Yކ˼7$c<76"(sqN6h˸+u|^5X6uYc> =:o g`܂u}h46 hKC@ȇ)odɷ*6"lț矣ܤ""""""+&X5Ui_Q7Eu\MCKk$" mL,"""""d}XLMw ,Ƃ`_?MXajdM4v̺LODDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD9/ܝY Di#7&қҰ_3}V Jt~ס"""lnV̞[xX\Ңz4o`ץ'ikuzU[9F-HO2 F/ݙX[6;~ 6iDD:|ik[Xq-XI`d 8;GavB87  k {35 ۵c좴e]o+-Q͡=iFyel{a$""]m7`14l5:~V'k~xLεzCy0ݯD}>x#ڗ NŒ+zu &iq]RޡiFv,{O`&l;uE`'4i""yڜ^Kp8߮uXJV)\7_sYXD3vޢ6]N:aT)XvHo5k3*T3;;lnͣ:U6,8Rb7>EDخ-Ks;pOVH# צ2k)\kx`.Y][{cCy.B._nq]lπ| ۜFuVgcvmvEDBm-KW)g4N1?n IDATpsZ x 1gw}ThÏNz~+yzfvbhsDzƺq TzR xK@VGD=AD́oXo@^OV5 ƃXc(ez21 = lR][[mvVMڔ tC764vMH66^f6g=kdO\\l ?u%8vE]cuaѮ}\ZDَ} {uڰwd Mݕ]ðcmG!V.{:s/"""K؉B,p0?"૤/T/nnY]Vlp+ 1Ď%a;? f Jc﹈,' | Lpww/oe)L|jz=NBCuo1ڊ2nNZs OQ|섴(qu<pSq0dF|jXIdyyaF ğXف[9:4Xc7=S/c?=-Ur_hm~Yb3=mq))p~Q4qݯ'2 :2伿8; `ԟPܩ?S&^}(g݋wbWe/IDDDO N)bcq-hf> ʜSO`iXX7ma힝ݎΏbe/C2s}⃸R3k?$==Ӱ7|Q)Q epˮͩgc=yzkq]Dw9e 8 L. kV[: 86$ >lXX%V)#"ۗU ]]6їvs:Ι;߽XOm.g೯c-;`rk}_,?K{?NƮb Y9^"""8?,Lp'lm O6 L}wY"+3ޣy㞅lw'Elj Msؚ@" g|qb̓ݳ-kt\CnGK&|Ip45⺈t) 8&+",ۀ co~O%cԑر}XƯʯEM }u,x/Hٯ&"&6N~ j 8(1'Qc?1R;Iq {x}۸ 7]VxlkA|G4U "3mm}`]6+.b,H-vq{]Xm 6˔~X)(mҋXwオȟ4lGmE,Cl5؉j_,!哰n#~Xv`/!LcITrtm.e AG+BonPj!cNlg.p#ĉZv0_]߉t!Ԏ1X[eYu7xn1v 0 MƸ8#,o@VX(x|gVz}o](|O^jY-D_}'Ǟulq;;1l+rwx.\aTo'Uzn0 4 ò۱@ba]4Xw o 獜ۻktߒ>>k`y[t"=[ Ռq+'FۀO=]`tZ쩬,PqˁR}nUyYDe̯U ,ղ+ IJvC2{#^b>$ <&wb[Ǻ8%7 'o"ݯǞբ_P}.:kC‰V/n\u8[=-+CvN09ƬEm lؑP^F7. l¿``7C?"p'T("&wq/Nk L:< a2ՂuW:aCuxv]D:MleO CXw*> n{idMwibIml㉩\cYX)+R;ՃG`nOcjPțhu!l0Q(;'=lXkYP}AR4SҦYǞ!Nx_EuU:eo0j?pv=[vBӱ WǎաT0QDTmQNw;Y2fL[ack<s \wT`4 0O,o٪e,\X>W@QPE}52`ew sLq* 6 mPuFDJqOC^D6r,!;F\BD֧xnʯ=kbm||3fWE۰Oʼ ?:Wߵm͛Q>nְseL,騇y_*]wn;sb8>"x|<_w{*yˏ&ww;ܓXp]7e+?;\~7HE (|(bGEE"(("]D H=@H#g~ݝSu͙lyg|U\Ъy= MWz܏>7A.!{O+C=*fl_6߃"/?O~s<{{j%Q`3FpΟQۻ  u((][ KTiNdEE`s%EaC:Ffo]ǩJ?<&c؜03k(4u\S6zTOXٙa\>O|e`]ڛ/B#ޓs3^>?_6BT]=n icmqscwϺifffCІ$OPϲpU9{?6"8l\€sca֨.p{yqgOQ;Qj ΠE# ~#;qjcҏqyB< [^`R66z_mGfCJgmLyjgY$8yAZ6H&Yth<^hsA啟6 2yBԎ$l4?Y+ mX߇P[(?K/>=7;>O[ "YV}5$^D?seKff8ue2U9}Ehf!,B +Q"nGoE=O|M(A\d4 5NC'ynv/F5)~td,V~ }3[|BF8I9ˑ,u%ًm[z}|=4T%(ʥ~D~.ږgI^oWhgzd,Mq(dMgR?|o(Y4 6]HTa0fYM." 7vB:h('{Ѣ;|g%_ʊ'P~v7wtjB]FkHujf5zl:Pʺ%k BZ}CwHؒdOWw]7-HnzWqZl ܃?Uf[Q+GW333X_vEC'%g>t l n* kh[6ͬ6Gk،:!CF7N5`c```"0XoYSMuت.'fS[N<ʉyMufX-wqTVLFx')3wt3fff6، l X矇7ס P9&TV,nBeu(`ffQ.OZ^󛙙Yz&uq75L_N|+ܡl x/{Gi13\!6j?p V_=z<*k(0?:ǒ1+!իqy磊^~؁iOGy4T>XQ/PyQTN<8x+23NvF{дy^D7/D@S2FSMV] K,Ϩ]qlfff1;+`!p+p2;L`T|5'аRx03z p0v(N#́O礣8Meffݵj34ppx4@knB $t` jAvl6[㬛F i^Es`03֍F7n%;nV,崍v~Y{/̆-_Y[Qn[ 8ƒcpiffff-*{ϑݓ!hU w <6umf6\C=!8 M1:=lh33k]YAGњ,TzS|ZJ z|+VӁSQl [84} lffY*]`RW=݇pp7&F+zQҜx޶ԙ p#EP|BekM/Zez54ՆW63+0Z/>ЋU^+-+[gkV єD,s4/Q gYնM}CB9꩖!03+c {,]]fE4&x T]̪p>b<|zjvB W^ ffi+gS_N<ojnXt d#єvө_8{ uTVDϧC +p͟FU럋S]4Eki4P/^tLff7wQ_.B[V0]ݲGoT^:̬ik7PhǙ _D=/s\a̪4 F'g#v0]ff{ l_a3QkfffPܫ9h 5I&_1433+#՜ry8[ k045fGK~9-_YcЈtAx333¦R߫y6=zjg?fZ IDAT}v2Qff]*exC^U&̬FҽSd-ԏ*̆}_h1$pWb̬S^~zU&j|r"T U7R|D pE`*efff֪qϨ½ڞE37=ͬ?DSWs~Qљ!15m_\v-\H"'v4nifffj+5UAh< P϶v-Zp6̬;_r*5CeBj8 t>3f><8D RS?f4F|0333k3 55~(ܿu~tp.32̠6O; 65S?(_g,hfMS9ygYgО 5p>3333z 4, \Bm5W9<,u-F3"=hnwp`BΣ~mz@|3ffemG}_ᛓЃʆt'+/:433n 2j+,W!P+g|2s^ifVc"jh0^{}/^G9e '(Nz#mf/P_^ikJlxچg+M1)YhFN'R|5akGj(Z|  Q%.:X3n6LFS_H^./ffffA*MU9]iՁ[bc~PtzѕTPj|X o6K<,ˉOu%f6lAm38 o#oR[Ej?;d333F?/`*e*'R[9 _I}! Z̬Y=SVrb.I9 \Er1T'03]]u`JSdQ$c>Fغ 733e6zl0Zڛ\.')6}lD٠w?e*M5F3Sd333&(,4E֌4wj3d3dn'74E֬h85h:&4j̆{H* +M53^)CIy/3㛒_3>e=9̊?6O4L^e]O (S q`g)fmA*}p3 `!-8 7Aepmp>oy崛Dsv㡎fVos`:'f{TkHW;ʉ̲%3NA&=JrlxضXΣ}>4Gf?/'=ͬˋg9Y_fl͋0333Y A+}N]33v,xG:Δgw:̬wЙJa]u(f:WN\ٽK13+/+tR̬U?s>4wZ]33korba׮:e`>-3ڵ1333 >ӝrݺ 33%|y0KdffffMZxW Q%It23t5Su̬IJSkО`gѰl33\S}+p33XT6dS|{P̆̆%UJd`192  ^/\2̬"S[N68sjKUm4*e v%"̆&W@33Xj dsyU%^t._`ߪhfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffv7K*NK@rKU33kl7[@oi=еꄘ B ꄘY \ Я'0,j,~_qZ:+WN;Z8-ff6>p<0ⴔuI=C׹ꄘ BW[v-E]sҊI)׀J#cӀ+NY7=(y̦1;nO2^=x}R^k0V: L m$;ⴔsc7@o/BgS (pQ`&^kMgfC$y'*NKY8YEQ(X%w1]p9ɵ|ⴴ$?TpŬ&_[|>m0+ؾg?ӷ;`Qpμ,`~`p0xx}f8-O7T бpI^~z`߽/Fϧ'8x_Pp7GKgfC(3FKh6htcÅ yy*KEEhܛ>p0~fU8M>lJ𷻁ŀ5mQ5x7XF̮:!9Baa{`R{87OpJץ9 |F(/wi\{Ͻ+Ma4Wm9p(p>p7N({spo>cPi6F/Σ`p+W}ػQ2,rd_kAھڿ-"2;gQõԮ>Fv;^g^tmx_s\DE Y'r~pLVN3CsSjA>H: ̺Iuwyjp+LmEN/8: Wrn9uߟ)5>B~0 Uвr_"QzZ_{J 5R$9G79c^V>vp 9'-Pc7/yS"MSܦs`plI9~^0jcʵ-n 7\h[?}'? Gi8^:lT畏_.w α m{`VSj\lBV-_F/uDGp:u\ϽʱSIzBqC}^(o=Iuf)5Eӏo_ D-}k$_CDՕ x"'kVy]π3.?:Wk2{gy + 㴓}Gߑkx\QPfl[yHuk5 d7Jߋ[~ܫy&A3g/ՙ7@nF^蚮EՀ,D-~2?q&kWeA>4DOZ#;V1glOGk5_3c}ۖ3k0}M7*ḑm5<?dxa`tyjgȒu!63U]OX[k׌G\OvՀ ԾWjC2 +PϴP~}_ xJZvS[Q3kN[ظ wY.݀z4:{vp}&7 8/Ame_a~_εWW:k#OΛōd-E=Bm9.E碀A, 8Axhi~ڃR[s|ޘʭksq_s2oAFOǴ;F&q9 Y#.Ү9{P|(~e`ChMHj{{s0Tj[/py>#P^7'2gl_gS$2 -*6I%c֟eyo 8_m$n }Hp=.8 QG#Jy ȋ/|f^esє"0\1Or#֑( w:< 3hf6T6܃zM18%#8ߒmIH*FR?;cEjհ|u*M=yqmI*U𺰢=H {%pޒܟF} $tTW٬g kLۑscM8倳YpHM{h&{4߻HovElD;C {2AsyƯqs4p>zSp_MUmf lp~wXR>g#lol}sz4 έ<p^%8&-k Io\zPG8Z=Qj?E'`sej[I3McWFΠr!.˯9NJ$u}\aԶFgxE߇R -HE$M7G 8_uzHe.eibԶwt|;3Z0U%=H~9N^p'$_ "UAڃYp!dCN7OPopi7ۖ.ƺpYS}6 o{.NXG fY$n?V&lޟ_Ґg!q6U >xA KZBEGSbɄ㏐_KgYOҕy_CЁ"\бl%zhf=Qߋ=f}$Y=-2 95=ɨ3,DP{St<ܡOPɳeT\6/A2ٍ48'xJuɂU2j1iG7̬ "k̟?@f;M4DHN%頴qgJ'm@YrGFTɿZ/{r=}hW[<3|HxܽjmM2ZQ/ٰlI],A{o*n(6'ͼh{z*Vާ*dh$,ط' 0AtO3ݮipkQt`2Ci.?L#ٍK`2;Fx;뇫dN!|'ɪ8^ӱcP>Ita,٫y Dtp;+nw[xuv}ReWR7R$#P6ni3ΚG^x۠FQU|->dv ˨ÞVa޿Ifh֥/^+k+\,'e-De]rZV3ѨG,him8窨.*I/0/TtC0vE~o@=y:x)9M\dM4Wos̚17KT>g<m;Dt"i]KM0 yӚҟc^B^?cQohh$ӂ^Ӭ#?:-wjFFVȜE™H0.nM?#(oY#jZyP#O>hx"4+13nMƋC7EtƢi8F#|fX訁7+A/ao/ HpZ4ͤ;(;w,3)It%^^ `Xd~ IDAT(c,Cǜw/v8 $a-gǠKaѼjBKv`ߴV'wj}v ?eHFX6"QpԴ!/PY4T1&v 5w[eѼyGR܊}vVs3=, %~.g/ūuC2'd-th~x\=WA?Nxx z5f7FϧP;J.$$ rihEҫz:ZѪi4Y$"θ; U6x~%ioA% ^ܗ^ :%g?oPMezOھ©L^ݫSyٙDZ(^DvyHVm] uq9…~Bk=6ʈ3^|3uZx?w/Ds Gm"$C~G)<"@CIͬ9K]!ql2jZ)*7FypK|ߋH;P06>_$E?ݑ)(z-Vv-cFӑ~{ *.|˛sFӭ~Mw{)J'$qєۋ:PnL=)$km:6# L$pgtcXwH*>ߏ?<Pt:ur;7'R}>_^ot#laCwz_l_CIOzB#m{goQײ" Xe.;/ǧ5@Y|etieA(C<'s({l/qhآ5 ˺+xv}6nj?> e^_燨0z)اS4`ߝ 19u~l۾b? bǙY^&=Qpeo^s?*oA= oTlKWUZ֥6O I(Pp={0PE](~;p8r{x{FU.M=8hߣ}KUs%е>m/oc=Wa I~ ;u/ _ͪCN8|d2?-gpfַ9087]oPK8>N.Ho +oP,=vn>I3t(#}K`A?PownDž퍢!W5$9 /o$eޣ w:p+ysMmйѣBA?NIcНjut|AwGc€ҨgA t73f5y5' 8=&C\ 8nȾX"y.kzZi ffێ3LS4ဳY+Wo)a( A\Ʊ=λY_mpe5j"Zecى34~I=7\ Qts|1K6x>>&jH.8f ::t>lL=A9nhs(_|{S}~W+WԱpBjw@tԑnnby L8*|2cQ>(}fY yFD7پ6 ߈:¼ާuL}o0EPMdKPhFtWg72gXϠJ(\4'Ok{Ep;+';fh[^"b +^/cGi_Q< ̢YhĵWpא=Z$YVI~^P"zUm濹@2Yh;R~);(K6! TXǯfV^p z|)˷h({ ʇsYvo=^:&i)4tTZPyq{뙏ˀ/ѿbPs3ql/mHޗ>,اZ=Ĭ5ϣg~hh*Qox;5gdzC=dAuMsy,?ScD:zFCmR<-B}fC=]j? 5/Cut{ ]#׃=Q} j_կO ] Q#졹rE='Fi}<ϡ@|a{c?"`! 6B2 _a(nf@w@hơ\#НNfSU=k53Zp]SeK[ D#&DiJq<(Tfz|YcPC**'xߺuf6o, tQnaq4+lFSmo(Ԏ[jIʎ2C#b}\ vj$I5ff8Yg33 +lff_8G!O8ME < GC>܎;ffffffɅhܔϣtif?"ز4ޮASՙ0瀳 %K9`h: 4vh! bfffOaff03xVG Х=/ܪ3}?!lfff XN"YxHz12GIC+皙 B*NՊ^-:M0t vBw'i̺mD 0~ 2333맛Izs[qZ '<I򯬀xm|8ꄘ _fp^=)y̑1Jm[=ڐ5:wXa>233KqgkM*K uBe xOi1|#IcBcnjIm{_5?LߧQ,#K#33fY.fT33nG ⴘ G7wп] f,}MױV}RpfWW33ffVJֲrcaZ5e\zcXF/lW^yQ=cI`C}eSwyZdYocf 鏥u{֢#CCʍ'6؇gB@yJV>w^ M=\zR݀{ ο3Ivd~oF pJL`׎ڛqG3|,ڞ.;1[: Q(!;d:{e7D=ċ lu[=nCyhZQyUǟu{4y=8_m{ɴĮ ߩEGL9G<2^0lߞ:$wx74x/̆hn zi|:")pGwǡ _&u +n׃ 8l*1Ps9؅2w@PðsҸű=mz0uLـ=MtR{tmt}G=_S軶NwPơ" ?CvO^kxM+$7\0؁dAoO:`]hDVE90f7_*'IB_".ދPOHn'{:ߨ7[Po]Aթ$+(?\ : ϠU߾}Snm"<[_-g:SDž?Q~0 8'jmM6c{pΛkq^_ _xc"^Eـe9oE͚bOU9od, ^Nv;>oO dj[/cjO:43fEƒv#LcqtS1m?ۯ@ld=$yfUIYA0Z#Q]>Wۏ.$md4 TEE. ΛZd/KQ՜/E2%<5 baO9<'G[zL =Iw/ 8qHjG)Co T䝷fl 8O$3Dzќ67y>iwyTz:%8o&Ͽ Ie΍ϟ`_33-oJN > ǐl C}@toaTFO24TkMA>NA nNK_ǂǢUoP@8koJ!dD/I~j'4h΋rQha3,#H]ltPϓػ8t*E=.No!}}.ف't@j7 ta=ԛ4gi':^yY_\,z$nMgQ/ePpyelCplmH{P<%KF߽f3uý><›:R$#AӶ5zM ϳP햙hd_4?iR8I:=/]vq 4*^* TkFt=coR?gViѿ)n-Qya; 4z9Mjy xSO8;"4#iڴy;˳PXw?M bff6(A٬h3Q0ۨ7ND46~~[x4wynX3x~9VE.B< {K¹#LWy5?@ӕHz/L-b XY& lܧoߑ{e IG=u@om6|} FzJ IDAT(B45FU˒L}1S?eS, 7Ni%Cގ%{Xx,Ѩ1]74>9V^)t8ܯ]€h˰ϤWyApfh,58{-+2d4g{T[u`yڛDEu&H|OF=WF[;/ȧpf(2_784z>PVy;܎^zB(ދbÊ*UkZEk/zQQE@ EtI%̝v>'~g?gΞf֞Ϛ߬1x F[C1'9|d$c 8Ct1؊H;kb|廒 :;"ߌ5^~ a?E^b51vJԧcW7Zξok7 P/ӈ.U3rKқ{kS41Tbl}<szNZ{:PA<ͭ,_Lwv.7:b,iפA'{c"w;ѩ|#MznJgu4Vc׭:c&16p0'ƥĽHyxusҹR%iu}5;ꂸcU?Z\5ǰX6D~$,% 1d"}q?9b~CI:π3XXsCH9xńy阓_pMyޢst·A2U+)&hIi'YE] 6aқ/R݊/!.$&R [ :$ $g{zqˉbRܲk qt?-3nJӖC?\6D:ӪgDnN徙Y;ƇOHlxYhՍH/:9G54|z s]C%WusnGDyZb櫉F A#@<lJ[׾^uDzNݞ豕?x5p*pؔgg8X!D{SQ4; ѥk駈]Ҡz8?NiNS3Bdb F7\g91D7׹rM!oC:|O=C2~?Ѫ8bcn{%ZAKjeSL6qÞׯLWdK?YF-. #\ow!ךDsgeCw͌L9}(,ڪ*]J&m zm"͹({}hl;-<*wP+d۩Yy1n725y_D^G~S&d?p,u("s-!/@!^U%I35i`+֖8[RYenV:=Ímt C~AmwVs2#8M1~x6hIyV҉^eV7@Rf{0n_[1"ɑf\4OzשJX2!ZzOJ֮3C\Z T୕)zfqW5cנha۸ x3E5mRr:sD|u눇eiKFvӱ[~r@SӿɏG([J×&y.IZ"ұNLD7'Q>H)Dn<)kp:wuiG)u4NOp߷I̛vӛþg˯foʯWoCIsug\o ~<12 toIq߀ 1sH}V>GLZEj'ԯ??6Ǣ(ZIN?=74lN[~0q>TF-i / +(ߣNǁ_a=$ƺN '[D}cs|w*5ZC!`z׺S{-|W77O+u U4 ߉'Od]_YĤi;J7(nˎ/{0-r XiD'dZ{1W--#Yn  5x?/)moBH4G}$)o7 :k5h*?֊|siˎ-mIsUɺ_~9{ %9ĘyAimvtq㼔;7#l̫&__BSb^_Pz=JQo/,;"$bgY@Eo'뷬)6cx虖ቬ RWvfZD%7Sҝy+q^98ߖDEˈ`xn&rU3U]^{b]nd/''~K4'֤4IShlu߽UV!$I tS(΍w!y2%ݏ- <gK֗J~"Y{}RGܛypCW9*sx (ΙC-(M?r8=[I;q}i U&K2߅i]ʟ= $ZA w}q+f;$^A,n7uͶ&foOZn@\U[IPΓ+U6* MG"P_J/F+ >q]G{/\ۀ3*{$ U[gʿ{ym,ETewZ 7Dk*7R+Z7qiD]TWK[?}ϱbDhZ6gףp3}"ߖ Z_ .88=f ΛXFt>^g,_cvg]!ZR_bj ,Bdc O,[RthݭNL1ǟiCfG[ǨQoi "zx0|l+2̣8:+ry%ZZG3w#6ԪmL5[ݓMmb:w>cfINÔap!!x8Ue,YDຉ$-|+r;gU +l]ImJ|?='ͱ\K$]:Ed]z޻b$ ~@ѕ҂6!fي87=D.8N#ken'zG܀4v O'O#;(ZGu:r~iFlbED0)ƆIv!u2a9ڙAԝuts/"/7mĹ@⼲A9!Z=\AL9q]B7YT~[v#O#SD|9iw}"Iư|ȏ-?; 5$uVS|/sY$I##oP$ik@cbu~ybIa}"\5bHwgN|$I$4&$Mt)\t`D8ICG..s!6ҭއݐ%I$iTpX 1&D`4ˈ?2Z$93=W=dNV$I$iugC3;WMs1IDk (&';xIr)KYSf狵VηChMwJƟN˓I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$k}0i]I$I$IZUxa1ZVz``>Q/$I$I$[}.hű8d$I$I$IZי}- 8 ܕ >Œ$I$I-zsYFS]%Q,$I$I$[S}.hkpQ*$I$I$[h>e 8ؼu$I$I$IֵD@`R2'wgi6Je0H$I$I!lkkSP:1y"$&0 +("} 8#[ rۓ$I$IU{)xs,nԼs( 6݃_n DK*Z8C)Ơխپ$I$I/J/qGe+S Z0 T2"[YgAD8X+da[z|qAۑ$I$IU߉o ,ɶ Oyf<ˀC*< XY<"͟)Z(M4U餅3Iy/mN$I$IV[:$ϱҺg$.`p9$; 8_M:I$I$IZ-Mƞ6h<l68KبӀYL>R$I$IZ3Y~V\N܉28z2[U n(9@L2؋L gI$I$i䥭u!L"&XA{ v۩F7c+qƀ$I$I4+Yh$:̳2I0Mfvэ,cY$I$Iy O0$ߜo,/Ynt ;g1Ȁ$I$I4:n~nBO܍=v]"ݺؿ~.묈M&QL6xk 8K$I$I `a$hH N.;bhA*/vΖ>E$I$IƥCc.HM $I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$IVIkcw> KƷ9--ʐ ؿ߅4j>lBa_"ӁIm^< /ixm ױi}،~D$)E Èr`s\41ٻe1D,Fۉ=\h]`)q~a<,W?C}.UODraxR`Qt:X?$Iji?"8~ED: 2/nWo1ǭ 8e46"߯8c;({ plK[IwSԛsY:5(tx=i$I838Y"(.l6OM"Z=sne' -c>e~@YsY$RR٧eVwY@~U8H#4=lUD w<؂8OD$١Eo2eAm\CuEw>eĀ* XFe6V'Y#>J`>Eq:Y}.K7&%}+Q oI5\I\ L2ޚa.p - r |,DV-ߡ߮e>HqCYOKfMbpQҋIӉ1&Q w"WI/>˼Sr p~>1H16h$Ҫe##B:ﲼ:!Z|7Ė}-=k\E}.$I,ǮjMݘX"xفZn\>&cE/$( uʶxE9D ޵m?FLw[}^l0Oj-n׷($`kbi{w3W?RJ>ؐ{ 1~(.>e8͈%zΚĹa 19D=+{M#&mJ!&Eԉw\`lW npE $)q&S;ȿqgax=TJ9},e >ee|=#Id!lg bȿ{(:1k7&8ԙIԁ ެ%QoAViw!S{;H#B3[A$iM"nMn<)FI٪t+S *kOLU6[WPM]zxvMxil %ZލJy$5y<^v>K[&i:^La 8弊>U'$lj*(Z|I2mcgS\Wn!Ljz?kԟ'=Vn{ߢ|'#˝k.8 %I*bOۀu0FZɥt.Ҁ;n^4728y뷇dEˑv$eM{$hn<9&o$nȃDwDk4|Hă˃)&Nj7眵%43jip>$Ή{gSݲ{q،uU 8ϮXo}<7IsF 弙[3]c$ӀDAI@k4wXvI8CNnFt7&yzup~ST.K6&c؜d@&" [m3B'EK- ~en[&l@1ɘkdsB^wM &݇av|g DWoqu>o( pOű;^PL@[wf5i$J~..K]۩N浩$IR 8KJGy*V7#),pLdy"ݍ?S|OgtkRxVM;$dTbȆn8M$ꢼpݹSe?7jQ;?ڟ"nG ߤMI jcSb+)u+T Iu:cM~G@a3L!CG8?&z<`=,I҈0\/9]A< ZILq?1Dghu t88[vkBlwDݸ?"լL_cx[Q #j~{2Ug^C~*a ^ ܘ.1 D7J#"h$-dO 9aEn?&{:1ޟiI?x[њkg.ږhu#Chm0򭛡!p =D;sQDPyݫNW;g8JZ}Ylahn쒶,D 36%E 2 Γ[nU%IR{zWmҞ[b_ҴL)+1ks^SKOKٮ?,Y> pqJIb;{$~Ou 4jK_^L0m3H  Q?64 +}?n]Ϗ%iZ̝Rl݊igd?k6W-rP?EY;*4)CC4uEqzVvMM[tO\/޿ac]Y +֝dTbHe牝*K=G$a z7';ִp7T乥d "p;hez=Eknޟ~@ۙ,K}50ևƆmi aҺ:?7C?^-O1)HxF|v^cMg,Sؓޟl]jR2=2b HσĤe齝$ kՅ DCjZ 6Smy,t loqmek0QbE}츴[]ML(OOZ{HãW4]y}453lHE*7ne*j;?|>?^[0'ʯ)gA$;Iݹh:bhVnԍC?V'$K)oʶ3ܙ] 9a dݷ*>_iDA\ۃ*=SN[n)k]Շ1.ɶ8:Y`}I_ҡ2F x *n@L Lhm: As.Rl BŃ'|8~ 14sjKRusPC9TSyHyfӒ472=3YUP#9]lGܫy~E|{AsI &JUk;1 8ܝo]n^%K=F| m¾o".dyˍR'K1۵N6 b܇;+i4-D eɺ_u U9uGC^d bxtLDFE[8Ϣ|^7uoP{7Qݕ|ˊQ 8c&$1kMz# 8(ܭb{CW ('u#pBq8B %~s cԮ$I҈ؓ?3D˵7Ui@"1gp97"8Z~If?9N~OAR\.y"S@z@Зupx7yU-K'`hNH+AequuDQ"mΛC:ѽĹj DV!E=z;yWI#[>8<  ĹbIE9i#\q8T婲9rND~"Oz,:`ŘHmBs8 ԽDùDx;$]g_ z=.msަ5BRu^ؔD=A1znqKg[C ]$iHx }.XJH[8އrm@sYIZ5G14\~ڌ\x-Ź$um֯"_#P.E$ ΥIܡMYUy"\ab%(i|KǤc/X/sYƃR/ 4TS(XH0@esya/sY$I("%-ӱL?ߢHakW,n;$ͤdxǠN=b:̳s(`ٵő$iWWnޟ-ϧR~ѷRxX.RSXk8n`ldxZ XҒ4T%Fi c7[U},$I' ?c]GLtq(>klK4:'zt2 dw_YK.B4o͖/!&]K$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$IrNIENDB`pairtools-1.1.3/doc/_static/hic-processing-pipeline.svg000066400000000000000000000563741474715105500232140ustar00rootroot00000000000000 image/svg+xml paired-end DNA sequencing reads(.fastq/.sra) ligation junctionsaka Hi-C pairs(.pairs) paired-endsequence alignments(.sam/.bam) Hi-C contact maps(.cool) align reads to reference genome(bwa, bowtie2, ...) identifyligation junctions(pairtools) bin and count junctions(cooler) pairtools-1.1.3/doc/_static/read-vs-alignment-vs-pairs.svg000066400000000000000000011223441474715105500235430ustar00rootroot00000000000000 image/svg+xml side 1 side 2 5' 3' 5' 3' reads (.fastq) alignments (.sam/.bam) ligations (.pairs) --report-alignment-end 3 In cis pairs, we reporte the side with a lower coordinate first In trans pairs, we report the side with a "lower" chromosome first Pair reporting order Reporting chimeric reads Other options Unmapped reads Reads can have no alignments on either side ! Reads can have multiplealignments on either side When reads have multiple alignments at either side, we need a rule to select the one to report By default, we report the 5'-most unique alignment at each side Chimeric rescue --max-inter-align-gap ! ! Some reads with three alignments mayin fact represent Hi-C molecules formedthrough a single ligation event UU ! NU ? MU ! MM ! ! NM ! ! ! WW ! ! WW ! NN ? ? ! ? ? ! ! WW UU UR ? UR UR pairtools-1.1.3/doc/_static/read_pair_MU.png000066400000000000000000000410371474715105500207760ustar00rootroot00000000000000PNG  IHDR%sBIT|d pHYs.$.$* tEXtSoftwarewww.inkscape.org< IDATxux?(A!$+BԨS[گWzNFP$!.+?Xlv"z<]֟[e|N5^c'bK%~2CqcKR yQ%i3}ڦwo mL=ۼAqq\l<v2h 8G gܟsY[O[ y8fhBS̢l汃Ŗ$U.p3?;lIL9lh xs#_Ht`z>1-߰}{p,/pVnZW۲d]8*\ŚWoډهu8TmYn% ?)lxk_S%nhr>#dC:i7(gGNb7_k).⍵& OGj_M Gi%yqJ[lIc;Jxnr[lIaeYNS_U]ǥoQ}17TdųWiP^> ˣtĮ2VL^y"{2'l\ջY9m F[u哶⮭bŴ)k+HJ HG 9e` L/Yoۊ{S8=}~u*~.j-Ǧ`Zuki[lҏs3hwÖB[l9o glXW lEx.V=ue^CiFU/\O] @ &gsFmj/@ư-3,#[^D=eaI6ekhŖBPnq%/zw| *iƍi.w/Wbu9#K% h-Ss׻~pB.Ŗ+g3O_?[I{p02j'6x5k DɖM銟KGZ-fiGRœXny"6}?[l81HTW =wjݮ`rqϣt[B3h@w*o/:PF^mǨQ{ExlczKk<Go--?N.sao(Ezܕ v.M3iG_FCβǖ?v#.gb?>32ƌ8yܷhvHj""n˦ ɖp{ªJ[Q\x8x2/Tȱaa<8 "skQDww*7dݛwkH;:[l*\7m:l-yJƓqRS5U%.sμ[[Q EŖ=aK񈤞\=Bk\a 9Iܐ)ֺ5bR\dvl~v^^[2,ܱW~vrT]$;GD$՟6 \x 0YWs\Hj* {}38ltU٫qU^dMy[öe215Mk?r9 vlŖqRZ~qJmf-S8-=Ckv5$t;.ugedj7֮fN=Bc0Y6BWV66)Fk_7ב{O|Sovf@I|E$5Q$uEj ' O?-k_GH(YW?MDB1ؚ޽nE%%Kp'e~=)\^2y%+mr}7kOC&GLw/kqͷ-?FY-6YoSLpxM ǎE_k'6cf(CJ|rk--a[g\PYcKhɽ|x-l?KkI=&[ *ypB5\EH#ym9<(MLj\.3 $EDGi`Rv3uZXSbD/O8Q暹Y].p^p$)px|Zo5=|6fCj`>;Hg{÷<8FNq6V?=ocCȉdOy֖$i Gzp,9S-גG&ksIE/ؖkgisه{ëeZC[qb˶?f+m#pggKG\PCN7^ۖ,:bkU%ZGppm]D}W{a^Bqu+|4V빃8e"sݬ@IxUvz[$z(Oډݖi9 3(]#_<?{CLjC/~/-1GY5RW6~۔{"LYe$ݺd% x^[YE0"ɞL,o[Ï[lWƤ$潼u4d.g?-)I!/7\cIJ"tOy1}I.Iʄ{9Zۚ"9t~ֶ&&SHY뱦|,9߽l-F2=%߱mGZC._el{l ڻVΥ'mHwo]mʔtˈMkM"rIg. xMF$pc_nWg&4#\>,TIa-a^Z akl[lw&qƏkJTL? z\gOPG[l}i;b髧ٱp-hSqth(! b\^/5."SiTjE[BN-JXMBˬ@tWwmrt8aK-dKHdbKomÆin0/x<Mc7b\ǖܸOCzmBw6IAhyd\\ de|~vXAhqaJϜn=>'4>~:;Fri5V.]$SPbd/LB7m4;b(4=wrQ_fXvM6vgPwX"v\|b^+QAKչzGN_V`9M.Bw$uIso%ʳ㋍m씠[*m|eC^Ah4T7\{FZ,İ]QV%拦Uhka@V&ND oR B{UnCz[/汌 (,:З:0֣n=3u{(7~D=8 B A#fu,}Pt {7no'c'ty2Q+o\[~ºBp~j.!UI`~Wf>eme-'{ם,g9C' x kJ"!`JCMqM>^}*hn(WFDr V5n?ڞxEu^`AT Poi}X4V=~zB0pŽC+^YwVơ)AojN`pӁ肉Wh: ^ FM9>ϭWGj(b3Qڊ =`|c, e:$OGƗ^՗2u2}ٸ{ `tF {E[SOw"h:\ N" g(o܄fc{s9%UFL**'1Pf] @%xAT jT'o< 8Mv'&r[Ò&h}`-}ӨnL4 %gwB'%U2o6>^T,{v#,I#5hC%507gZ3da>AmQKÒ5X^5HJ7p)j`7~5*znCڀ8esF4R=GӋпDoB\]'wލ aM{Q1sQ)$~ctlߍ؁Uo"LP?FW]aI2n)B2e759.zs1f[š4=6E==|Bu~4kdV#ݓpW A0ZCi.Gl]XlKpNQn֎yEF)7_618o-=4Yc;NT{(/XGu/P't1](U;):Ul}1z?F`?ǾD4v8x566s9וKf\2[hznJTG{O#',芺Gm%8Q;o:C,pԅVJ KDl`?I(ϐU+0u/[|d ~B!:l%$*HG=Mduw@˝-tOVaxΟZvŨd$1OyͶEO?QtQ l|!QnnhcqͶ09atn{Pi߀CM]T׽6ґ\;(XO JH$竰'{|/8QaEjl,YK=nC: w.MDspc<|]#,)PRnCdtRwtj>%c}(MЍ n-lԨ_A{?b5%mޛEg§nCOOT֤\IIP٩|'ȝ6ґ CeU8|SQO֋3(;Q] :5>oC7݉J{+P'< $$ iPE֚$۝󀂰7 C Q㽽(Z_#]5Z w Qњ/,G;vf,{:1cԵx:cwU-B GG]0_{| 7mЩlպLQ( *B?6^2p+ @y23 X_QDx)hj^PixzA@a r-t3{zՠ7 BPٝ![(!_k75w(1IT;+žY؍5 . s=݆t@bQ#&HA=a B؂ YH*T4ROEHmC:@؊n D ]|xEZAA[6Ġt7GmHnon:hAԨA2i t^Qޯuv tM G *='^*$d$, Ay6߷ې=rb!D j=Ĩhj U(ynneAWZTeeBCITlx?SPYAMɈ8S&z%$Wx\YIIM ^ot4105VVz %;u'Vir vю|V.AEO8W;ha26:(FiFTzbg7ƌc&xtm q }W*+xp8%I#$~'x\l9 OCQ{٦o=ɀG3_Υ?hL&O)A6;GD&4EuzDHƨq[, ,/>JjPn1_~ȶكΰfN+vrt"C3A|x إUU OHd@uu:MY{[XMs5wq 7Uʖ8_ÛOO\ yE/.QHNK4=kw=LzXGtG54f IDATבDm`#^jg%$ÖBG+vr Cϟ|W_ԍ|ou+iTs"L1$ Gwz9 aŏ?d,sعGwsCMm nݟKM>8ꕦ|zl,7_mmt˱~ec=Hk[u z^_M|>e^}X~& \igبБH;Z"R੯qGkht* ƐO;ӚMlU>2'ߍ3T^>Op1vOP:TrPsQIMS]W s1u1g!7`#rFTnDcK5|>i`:+kVQZǫ̌Lpvim z^^coO-_U ߱hX՛oLJP{;ùt~#˯(&#&o3+!(Q;J(Μpy^L&6]hӚk W.[;Sg]k,3#s P z 0_E@qzPN@{juSgӚԅ[KKwskRjEwS+ɽ84I&ˇf|oetr/{;>-HÍc}c~{;22dpghqgq=ys 7͜F].HJ!+vy"JBBN2ɝYܜvfwi/aI5Qk~ʂZύtᖏQZZP9d`t3q~\Frj vqjz6ƊrM9|7%9R=9Ą65"hh &4Iz謄$>-؈ lχ%e+w2i`~?"j*2'TYi>M{;9)-P53nmn*ۃ7+^CA},޳ ʌˡ 2Gmp#5^n $;Ǹ̑lc5Sq1)5"R.@VXYÉHT34Иv݄DDٱe^೴NyBe ; :g}a *-~vmg/=4LAQTΓ/; BPnj#dm0ДukV?$!Z{YN~Z}7x;EKgz4/ޣǑNń((ZX?|֔i`Mƙw޽zx *g>GN} -"/iN?7,b/8jnήf?wv.8̛~IP&t 2 rb/#.:ZV;Ӌ O^M˨u7I=9wVѡ\eZ]}Xe:Աֽ[xc?,;k{B%<.Oڕk18dp>OC-Ǵ_MdOݙÇp͸A=vۇTlSkDDq_v]V e߿ЙANT)9+k;>.5Mk[zYdHWe]oBym-qe __wv?lюfeB9a@Xa9)1kjԂxaYfoaM{Z{G]H˥3{| $۵vݮb }Ѵ4ɝ3s]lYL4ΉyxSpQ\KL HהM\ns/<w+Svi$ICfn.`.A:r8p\]zPdYԴYwR@ -=lEP&toGM4) Z;<.ڕVP1_ѿMx=)IqlۼOL2Shxˉl:oR mϯZAKL'0p3 숐dYoOn^I|TkM.VLզԱ6w{o_KI.bixﺜ&w wCϬ\frg ݛsQu5ჇMsywFK#-K3,3x]Y1~δ3nΖs` +$&; wF2;jk5h>znF?h}t^:ͷwTi~zR(N 5.XԼBCIջal` SqXd-bjR\ljg'&m+6MoĄ129Ykob!нNbsɷ]k+w>7Pigz_ț^O4%)%Ju՝]Y|JDsp'9abx|9_k ~M|^ojO[Xj 415M75f#֔$DCK u:9a@Bm[sqF zEN&tf\` 0ˆذ:=ѕóM >.'8~ >pETsxtLSn,9!$ M$1lUyEKix_M|^ϺݺҡM-UUQަtV"k FhZ\4-bpf opy<,VN=J"ºb6ϜICz0ͭ 4A #AgiulS{B& 2,dcr5UZ;5(l%J?~AJ?~q\2܁ネyl֮CXQnrҏ{Eó+I\4D>xn * F?l)dN}JCLSBP }M`\9w*ĩD$ImJlyE1';zƐO㏅.O}lX0&vd;n 0gYhhUCgiͮV蕂œN1Z@ֱ %3b8y`i'/J 9L5}^: ϫr.cf-g/O倞psμWd1 ݝ߼` E8vܯ5A2sYC&6|YRh׽yBB'~0&"VU򑡂Ϙ^֔i6ZXV9+#45:^\m2byVf1Kx~?ap?+3gZv`jb yAk Iq+XtXK0B$@,[K0kT-o-(t /p^V1(<~~ֶֶ=oмmbDlZǺݻb>b=_F&3 ϭEYIMFϭ\NUcH/]ip?[KjkxҖcye9nF pk0Fcەߚ20Ԭ-uÈ5=6S,'/3<"ֳRwy^5B$Q>oQޘR":i+-f3+goa-rlͨцdZϨU\jq?tam& ƓD @: ]D-%1\mu9#REC9aH5/u6iM_=cLYecovaGm~=~G^dG6~:MSo&GЦQkgm<Fk|C6df `X_/6k#R3.nh)n >ԁgZ~77*6]4d},gcZI=9^W~ *Kf5kŦrF AyxGN^kOŴ/B?3k/.Ik;"b-Wn6}֎NLʄ񛵉\J`q0 p#u<Ѧ'z Pwsgp1pDc`!V 1ƈ9JKZ-)nSܚX-u BX  4UbqL7\f-T*C-K\xtr/&C|xm6L#<91F7yLi7#h̳#xG/k{`!PNߨAjG13 h9g5v:4-,6T9&--.Ϧ ڮmfMY^^TSw:20^S:Blo\N9|bR Wn 辙;b66LݟשڢG z a1μhE?{p,9񛵉GЫ[Y7||^4˨v xhX j\߀Tn}:8KZ£z@n &4d:P\u !iQR߸PID-.Z׋ HڇKڰEt@1tƂ tibRӍ.FdXv4={ }w9PpAv pCk9Bxo(!%0joX.ODB_R̸hJ|T4@/Hg,B`S K6.z(T3C8pNuhWXnAHd i']o\Eۄ?Bu~ӀE{tƂ tR?E16#9{XƿvHg,B ![┅΋x}pub~66!qKCY3;\&B du8=hX>n &3<춴](% m$nHr0.UI輼s<%xzߔθ`팭mAZICE)ml2G e/5,7.jθ3Qfib tfOC.JC * 1\g7.[ n]B:΃u$, ek~gOfdX^38Hhlމ^={ZQtM-uo^ZlIB`5 >=4SBFƝ >RSl/0.b9B '7K q-3LX-k)Ey 6#o24 jK[Qԝρc v"uoߍ׭UsWi)~@ϸUƾ'O ͭ,q硸%B:]/4.7Mn%;}6;FKS_^BWIDAT1.JU==k<#x5b#22K{pUWU6#tN7J{>jx!PD #bm-@:cA,gcL7QZAh+/PDTkO\JH,wriZs)j.B ֿcP &s#9 WKZoQ8Ԉzg`j,t1P1tƗ}U x#z'; z\jBDӴcn522QU?ī?Ӂ/JUݒZBXĬC}I 0N%dd,Bgת_` 3+xn#%dd,Bgg-05^J9QSȿ:'IENDB`pairtools-1.1.3/doc/_static/read_pair_MU_MM_NM.png000066400000000000000000001250171474715105500217620ustar00rootroot00000000000000PNG  IHDRsBIT|d pHYs.$.$* tEXtSoftwarewww.inkscape.org< IDATxexWր_;8 NN1-oq˘eN)M))lymگRJI@ӆf;fq,lHw$y$#y=3xIEWLK$Ci E12FQEQE?]T hKx)`\X 7-(Q`pmcvV뭁9ÀMK$SFEI&F%G* hNqKS`ik4x)P,#RKqE]yFAgCj~@]“L.yQ޿(JD5((({MK(Jh`R?N&18FKx%ہ}((ԉ ((((Vgҁ+UۅӘYfU. \&@4Af9{-Ⱦ˶9YosiI22{DY@mel%4ex2Ǽ5(0f^kQI9eJ>e;w4={KO(zs/./m 5ŭ%~q¥&( wDc[wmeҧcE䭻ZIKҧ_]=l3Y\R?GvqD' !ZSP:WdWy99iisW2SSX+^/_YŎ2uťIZG''VT[1G?Gk,^vCi!.@C. lh9 u|y\V>ͫTKDnD≷BB:1o .Gņ]Gty3u90Nv=i=FgҙlVC'w.ː#wAmrN>܉ݺsRTQAQEw}GjwnݹewEvtq:[.?Fp\s_vC0y4zߍui9\ie<{v^ <طº\.~~O l3$ \VgW AѸyڄ˙juQ\OM{J|2II^vnbKQc.'\̟p%ފj~FŔK8bRl݄{EJhv=+2.mp S7;v,ޱG (gۼX3:v#O7:v#Py?rSˁgvgs9iEi]srQ36os%)w t0o.A.37obQpQKy66{F:ˋ1o~=ae(e./{H'|E qDKiEIݶK |rA;mr"!Eq OHkҜWHjV'O;U.Y@jV1\r(5^]\R1]o݆KTof[2k\.rps|^L(x p5>o%z wPe ^6T=̻Tl\vL3S9}ٻz%zc.gv8˶⭥msy{b>_ʐK{.y+?JJw%5 F?OZfF\xm&.Jb %sbZiٶFrym.c\00m)uQƒfǒIOݦ!{رjIf\޾Ѥ}/#.KÎ, .5)'ݗtX޲9 ՜ӻ/tTO.gZqs~wo\QYy㯠bh֊W.qQŲb©iҡw˶y?t5'XX`oUow;ZZ3.S?dݬWow=2&xԏ7-cP3.~ȟbХ6b:(n^D?Cfvf\^E]°՛(\5ri2̸,~ WIOJQR<:UJ*+mT věs@VVrSa{1Ǽ6Nگ^|~;\ܘ_U.\#52 LzzsQD!pj=obH!pͥ\3r7}^p\iEt]m-+]̸Ǣ7-Ws.uii<8brZZN덵 ii ge;Ws.{3yqKkdE1Oњ,~]Ң~t;3.k{:ae,|F˾t?F#.JXv7l5re_1C. '9yzLaVݽ>'/˂),j_:<3. \:r%:nޜ ޞi,Z.5ACݴw˿ l֜]⢘b捿dwߥOIZX]d@ch'sʒB8H?4&B]A:M0x}5!n~U |˕Ѥ}>"ynrp2qZ q"c47\knᵆp;C;Nf%~f086dڀ¹]Ϲ9\etك>tǥtv yfQuA%٩ sm咐E ׼koe޶m|iK-iNm+]Zq!۷e3rxs"H0$7t[jx6(E M2/ ^Ml:%#ϓk QlKz&sy:77@'޴ \20ʐ$B\Yۣʄ*9y A\"m3G ĥM#j}l `Z'U[:qX-۾:.*6-.CKt3cSKw5t^vl F\"ؠ@zJB!aB]-%wKk" +?'۪{z)N3e@n%%=K]&][-SosD{+z|hAz+\8{W7pn|-.vZl}>VmIZFNjo^ETovOn~j%!.voeڐɠBl]yԟCHqt-]yfϋ[]LHsc2[vEO0p{κ.c O2eU9%/{9O][I᪹rYzpc˵O֝v3҂yw\oȋ.u9sjfkrB7w)6w2e;7r{d4jU`]gۜ\5L'\?&SsSrl;}\xՃii6d+\RӸ}pc.̚Q="35[SsyU.]Ğ]sC,u\:{H]sXt,Ɋ=Ac7?Ug9/.X',xGbߑ>μGǮsQc֠0G.qy2B`^o*O%UH` aˢZ^X~嵱8;$|W}4~]5Z!.]<%.߽).#O?yʉerլp9niuڍ<#.`dP 8ۀ\ *!.y-nO3.ws]1xMiނS1v3NTQۏ8;nU /CeMf^; FM`g(h|~ ."e _˟iexf?VR$ЄKF./@wQ˄se@:x(=&m. F浥TˍsyahL~5<x"6WlGk<֌\'h};ƟHU=GnZ kZkye<9 h=h3.gWXr 0rVfW/ϋcbjt:bLzɸhEߐ1]m߇mpC]m;u^i>w6^%">z':p-Ks9Ҵt=v߿&S?3.^Clych6ܼKytGfwk#Tvɀ]LwHS.tms_L/$Zh>zĂuiyhf=[Y,8m"M a9yzҷe_櫗tc(=O%ocsĐ,ynqk&D#._'o-&7ݫt-?|\ՇC;qaҳk\H)qYǿ/˧HNcK`GH4yŬ>Fǩ=O5 $4WoI^4;.=N5]z9{q96g©]N,n&C#.O?m]3x޶ /]x$ $d .&ϋbO /ǬObxK7a4Co89f<)d:Tb x{c^4cқpye \l祢yϏ6R1},ڱ ߛ6ٱUz>rJg[ɂdj#.V\vݳeK&]ƸEqT*b6 AP]R nfQ_\ᢸf(9R$Q,|9ȱ.Loπ+_$%=rf\r(5rٸyIib("IOHW lW"8`@b优ryRoEqMVݾ` ?xĘK\Naev=w.;LgfK%3XfDfnyVh#:u=cuZ]ԙS u C]غ%ֱ3eEq')\1<ʋ1oh%p?J]3)5zcEvq)+Njx}>1dfFJ*؛N%?.syp ViEqMToo>{Π˃.g%z YN5+?}ƌKSEG#.+aHuQ Y\ f.0䢸VC E'F\A#.\&Ħ_'s9r˿0L:eW/s GL6siTVͺլ97JZgJ;Bkecq1:ڥ K$Vi<)cp\q;\VS e_Tˢ7M< qh?oX_}kA\g:+ %.tyu|[.;i)?{|gdx?q92Μ>'sWN|:ɢ 6ElۏtXyԠI ǀw %.'7Eq1=N^MU @|c23ry% y%QRed^Ll+5RrSRR ;[iz:py\b[Zۏn'Zu S?dOuƩworٗ'mrm*NuhA{rLp9{O͈=L۴!n=8֠+.qQMмpyY(޸E{\7r{b(d'xx {5 HrRKC d ؀ˏ}^oW1 ~R}lg!t<4[2n.$UrLfZ xaRJQ;2'͇T*!pQ#yHz0PX+x_ٶ5%|?zNj ֞VbK Sh"rʪ5/Izxn+s95%O7/242[.5;!b$퀛H  [e $=5A*%d&ۄ-w!1T LaEq_~ `&ݜZB x5Կ(N+Vx@:/7LBOU۵uĮAꂭB(ьTz3xl;&8 O)?ߊc 68Vq? (ܬHOg*(Bm:ʪZ:"bp!i[V &A?TҝUFkx4ٿL % #緶lYHPC3dPj3'4zcj_y?{3nX,*d~ۊVgSm#z @y-?6=NCWǵ'IQS!: Xwxȗjj瞉@L 9T{d IC:Y*gLaEm9YWK$v|q%co>p!'+wQ~CG'k)XB‡ go]iWl.6$}O@' 2?x 7 !/%i\Ykw"9q:)VlƩ)I?> d-3>>9jJ=ؗ끣)FRFL$p&Xz|305C*P`|HH%U]5%yw a R( Ϊ'6r}ggc;75\MCǑՠb>*rQ#CTϵǣ%absr:=?s?JM |_֗HXNj!%RZ 3|?C*!hVMzajrQ6y7;يwO9!D? \M; 3%HI>,'۬XܯH~<JL+8œېk9΁}Cc߃-Jv2s_sq~:-0-p:#>$GcѤBbF7 wfk H!h 6_@^FTB+fLف'ؗ}?dT}¿(I p,>q{U M$ȊdA&]Q )"2_!a4H]џ? jr3RNR uJ)C3i>q#nژO>RR'J䴍Aݐ.sѤMJd|LX9i+\kk6:PINRB+Éؗ9v7R$x y+nD'MyHJgj_W?6"a1LL+.kډEu.Q:[ґ$Bːf x(ߘ|ⓋTʂlBZ:|V'd m=ߡ2=+>2$u`Œl#B*5)iE+~&_ hMҁb 2YJ| LQ*>dIEt yO2$V{8p'9n Z(!);Zg͌Q 3>Fsx]Cfu&sI8Cbt|UڥV41q1 iXKU5`DXO0 b$Lfu)3~D?+J,W8T̑tnAʇE36|624Gq?atθ|6HGpS%pySXH LcZcWǎ wNM%>IC&' ƙ KB5~UrXz#a҃s 4 vt'|?f%x6#'V`Ӱ2(Ѡ VLXuB6uW #&VxݾKi5U ?Һy2l1?ԏJ9L`n }a!m~Hy\D wk'G ̩% h[pMHɍrNDM(>,EJ+;R& LCJM nljؑ!`Hv͐DY\0dؗY`o(5 j.j ͐R/{poCB,nfU{ aO`%x-!E8Q|ACuR{$ EmX27vV< `6IW/Y CA(\dZD*ÐY-ng:#/?:ч ރ4v. I|3]"%x5fuvHY` RƸ}XN*6܅uqN3B'+b(HZI_#c (2YRqڙթBODxrB;gykN7> 蒕=F8!*6"D+" W ϿC[DX_:XFMgٴHL)EffV''YcV'UnE- ׽45U$1 s}< SbK}ƴH"dŭ[o6<$4y,;BB#63]dVK"N;$⠠n샶Xy5b.p$Xg"Ed"1&m'e-L$j#þ]n#+ώ&{Ek,Ss"DBw~' (l q#HsWFvX_譐NBa2Hw6j -r,6b rB%CDADp=0!eRH$g~`7vIa.ϴA/?eZ4trE'+)MB:CIn@M9yӬN %qWT7k `-"aX,ƇyݍK" Ҝd ԤJ0 \B[|W2LJ9+xmNƙy{WT3-#C mýzC:X,A&d3gvI"E\@s׿mVVڸca]a Q|iP㑉Y]c IHA:)bGB*\\=MT6 l_sI; h#^H:y8$o ?&2$w#`% N |s-658-&^1 )G81"73-$@f5\E nQ,X.cЉU& %6x1J'~š$v62hop&%ƿƼ}fr'+ݰ΅&T) 0 NQIEB >{2HYJ*:_ANX!F<jZ$Eʊc p_5/RNF^ĽU4.8M+BdUV?.D?B'K>Q}5EC?^di$ :dٸvZ &~4ɤH(0R] M D_pOlo?mO|2Ei,o# n/+ZF~UE\L$:=fUavo@wǾ2F?Ā1 !,yʜ⾯a%97(Q#Ei*wS #͐y?"u{"A,C;Npu!r h[p_ckT |HԌY}3.KΡ %W ?'6%V G4E(͑v}D$OcB #eYcdOI2= W?j_~lpR(8\S+%~AEQC R?xkUY uz'"kq:4ni "KR|yF?C欲(6. 8n?%p 2WԶq>_\Khd3:($B̹U N,! GWZu%d#sV%+^ː%y (A/Yu|2%s+q6(xﰾwEQ:ETbh) FGw}g!H}󺊢a+NAOEViJ.EQEINr6c=(IP2fHp#IQbCQf_ <p g=pt4}(J l*uH:s*Yz$+ÑDė e׾)'n@c+(p>F6,;Va5ϩgJ +] xץ4MWQXO/T.8WW$ȯ~|c(q;i+??"t#1VI"7C:Jh(Ӆ`dAyE?=" 7(X[{vW#~(&Pƿ?P?z$d+\L$U^BW l J9ߛQӐXzñVH~Ua"}Y,i,nBˢ2m`?sjqXR%u7nZE ӐKppW<+us]tK =ҨDӰqo_?F"Kk>34}gN:^"XG.\F*7%k *B+ꋿii~ IDAT%a52$7P3wgj1rRQUd}v}ejr9K s+ꇿa<8~j(X_\?uP q EZ7Hل_Y|!/;dɵuñ$O6v2/||gґ/L 1) li%e1dA/EWzc}n0#HC.$νȸLrֹj%H!4DSقhPXx.n0rxg%_MarYWL\92R>ك~QJ)e\2-%yH`z ?nXHE Z9j_]8:g $:eӭ H=ٟSd!+ˇ$&mƍX+8'DP{cG`yzV/d}0(3މ:?j". [TzQ"ն}jy9O[L(qC3\d nD B͐їc%( ʳHy]V6XX \Gc|g aUb]ε{%hj(]}]܂u2 (Ɂ (=%2N=)$h/6Ch3m(+J5ާ,oGhrȹrbf~ z4 M DBp&!(}hx8m-E:I>`)Rrν7_"s25h%5]g C:Δ p0@;hCOjQ͑B;SG$;+%>}H${>d&n }D5b^HIXI:nNˤĜ,d .BTv w^G:A(u dr)#zvH"`NA 1㍣Ers<]E\@>2cl >n\zX}@(s<MdP[Qљ@ 0WugE 0rS ϐi :vQ̓5ЁmX]msm_Fo"AM?"咉J6HXFv ` Ϳ]& yi%tFG{'L#[+(" 1' Y%8~cͩ՛ 4ds2Y!Le௴|1?Q LϤx{c<+=)`UJp6!8|18n2;>pWDGaXs4eS%7d"2'295# :vIt L"a%2!/"m-\xGr"( uiFdB>̜48,!t}\NºHGv5M8 ÿ(8:'&Tu/F##,?u(J(+Wv5J7uNm5gɶ6,.yôHĥ[N`",Ʊ@'sj1!NEQ9)_26fZPsh2:֡cE\bݔOvHӴN;1DI$lswa^ +NCbf{qӉ(t߉H:p*k#15݊5O$jNX%]ϑ<մHL($n̢CGb$oD(2kZ"r6R{Nb׽A:OBB2n_Luh, DX"nj5,$|5~D c-eD S (au*YWD 1CFE^ErrBlŲA~g "1)H!w$1x xH[-;.8԰K48 z~yY՘d#m"M2k(8RBD:효?Eq72?"$(Hh2:> /  p2H S9lH\}KJu@Wy8~4AC Li1!!#3ˑq<tBL|gȽۗІYb!^0 [ | l,YHRxxiBYRH(Ne? `>RY u>9D,D 9.D$HrDYI^dkXk!/t? Y]4-*YK|@$dN*( g;XvE&d+c7Qd~5(IdpC((ed۹PGf~^Ƚx.+ZQ%l@LDF~lFbFVv6YHGP2%VEW$Xi8oxj\R ӀO_QE c=&$ ["  $PҐk.4D(VQUO$E׹fuFtdպ( :_DVb2"s[x!uj NA_"3Ϭa!us;?e+CC耄"sbM2PEQC ZiZ7H Ɛx,̀I^JΫ5&EKt,Sܳ5:g2)ٴ~ ZU^{|׋ǓBv.v(/TSM"a>'vnZdRҭyśVyߦejQd@l}W &R #$_=rD液+HnJf~ǐ|[w1oy}%n=SgSXST-Hx8C'Y ezvWT0$5ss_|Lݸ%teh;h6|@vZtD˿7RlߑuAπxSGl9`@͸o|ss`}:W cpeή~}xacte6_/gQR)xyݫOثtMO=ZwkV|*vd&v|<ZCNI9Ľ]$97!ԟ w.JSRhT8{DFՌ4SRR|z{x<rBfKyUIZZ`mۗ|㟣 T6JTkv.*Ys'%xd^/wdGYssi$k ٰ{7>yFx}[i)+ wQž-#JK"U犳-/l\H!-'RO|^/)除\?II%-EH@e.G<Ҳ=f+?|nGjfr[II[5G0+{elD'!>x6to[ԗ uO,);lޚ^?`3j{[ط߼V= sxۻ-aͤN3U3ޣSHnֹV h"eҷs,2i- SA lmf4`չRRzu~dƘ'RRxHϱ,TbRzq7y߾dsWT͈~5'|}#3=CnO虖Se%{\Y< MeW^N6uH 7o $L7dd\mx.@:+^ndS*z9ۯ]!]+ w2v}IFe5WڢBnEٌwϟ?OvbsVTj3‰B?hjké#FZmAZ/?*YӤ>_~֧jnUɣqЧ~~;aСX: x.$Yl1TAC0vѿ`d}NX-ZbA18~xWdry效n5cvp8]/maףmcnBbVuP'h]>mAez[Tm|S7X-meY+O_1M3  C1#.AoQR L&>ޘQԄ3Jq焉n XoIazCjv=Gyg"q jTٽv`6J1/8y\·'5--x>i7A::c7Z-FYQ هPPەTԟoY+O7^>7ڂ8}U&䬒 75B[S=zH5*詝;ou*Àh? 10]` m-CpE|خOka oe iz)} cbG}kRcl`<.B7S\w)ǶbmvMK ^/OyҧHSGFJ}UYȩJ6[CsQ,|Ҁ@աq'c'Ҁ@[c4hVVm0+],/B]q>07 73JA|a}GUNg@ดl[+KPM=lҀ҆F¡98q3Y$ZJ'G~/9悛82a=K  }Hhl 8 @D=<_,wc!k_9h(gkYGL*uMgAKQo;g^Wf6&3w 6Ǟ:ֺJozVoEo$/=^+z$4SvĔ:ژ giiDΚ'h$b.;/ghܡ&A& 췍 U-Kߍ6Q8IZW@?mćF{'G4}:Lץ?wQC3޴V|@=0tn'Fl} MBUfnM m`q$ۧX,xvo_"'w1 E%z$%_+mAڂE}-gݐTD9_o܌m_Ao7UʺoC[/~= w1B-Sm/\aТphlJUuNnU>ŹQј1Bo>dק^Sl)0}ܓ6ISs{䘮2IRE1 )H}SGlO>/G[y5X+}6u59z#(=$ ?٘E;o FHa_5]`vԴT >Ru}Ö9Z̩7EW+E|;< yḱ#5:ij䋤ʠLJU1l13ڂێv {vኈGEz\^i$%_']TcVAn Gerl'; wd ̦yp=-;\}u"NENƣFz/ǡ /WGģ!ӝBcc ✚jl˕O[,O9!:y4bߛVc8&NoZX_YҌwH@iqB)?+? gCs,I<[V,KEz|nB /߫@-T~]ZˎgMVꏢtu_=A?'8VG|~8kţҌ)30tiz㗤7{k|>vh/TCz5"ljG}}F :ːApҧؠ /I3ֻiXlܧX)eES\+) ҧc,ol;~6jbX8΋6&l)k͸USaҧX)vKNHgק`)=\ا蓲W=KK1!eR{ɶ3g#U7I"#qՠQ9B7{-û{aDڀև1y,ׂK:{@Ez;<ʘQZW'w"[7> $}<ɶ}Y0o±_ ovX58UL@i hS IDATdlq?-LD;բQ/l҄ĥbT`6Hz:_xz']-j+;Sf<(g?iS *=@` \. ydݩF^gus3/m,fhZ()xbH5f\3"0 TbXQ$3tSG,!`uJ( \"|QWۨ+2.%P33fBb'HjPW hy=eE'gC._̂Wh|5na}fqPSKUbH}sqj%RYQf!fp33C])X>+KbۍJ3{,b!qc^m (1C{i'^߀`_qMX*ȏ{mJo+crg̩wЃٻ.: |8X 5 7Pj]']@OؾjpQ58hp^*k vV5Mec.w1W=$Qwaሻ\YwF[XMu²]kh\hqB=VX_@"UgBَvԹxk]h :Aټ=;b[*; 3^gz&Cˇϩy-L^6 mV3ؔǯNy`{qnT4`Ø$&N?j?rÂ|}3{0ud^gZ!lz-f_":-ZI0Z,=fhTs>'! rWEb A4p%-|n[.kQ{`A"bL}mʸ-Jnm [V?Ym3$^Hsܬw V<X𤋮@^h u;MJ;ݪyZuh=D 3?#F`Hl?\ X [̐-J&=fhNBK&/>s{S_<> 20-j^ cS~.ೢBIH‰Stu)c9?OSX&)f'b]1X}An)ݶuƌRG@%_"lq*d![\X?rg_O@z8*l 6 Zj*ICT6 ?yYLV W }CW\wărmVw!cW;ÅeXH^r{7_ X/ۡ]r;c(%m d kţWAH}__h3Dl'3CW5fv3H/K#Ԙ; k|}qx˳Le5fh!5`Tp0T" zC C7qc][LmSʁ}*z@B{aGQfȅ:ݖY>f(a\!OMYd] 1>n#]hwu0Q$ ӗ1C:0fȘR fzjn }ՠ1-&T * z.xO dLPs ?h7f}jp1O ޭ(MB6R!,[BDOw[j@ W@g`?caYrQwXmHLUGDžX XkPcLn :p;XGЪzQLZa̐ pT $@Z fHcuc01,\oy@ZQE /R" - Ubh`Dz^~aBǝ.%_c3'T ֗Bܙp9f i:ڰJem8BQLp X^{׫n[FSy0b5bЧxA*}S>3TM>>g z 1CrqH'o29BoGOnaYjp4uR58I߻caYr (kcxcsVӨP5fbG4ǧ 'o ,YD@;6*haق_CNˆS+#WcߕX7g̓bښꑻΈè nW#O7AǛǎt1##q|uNrj hC m[$hI$hrш MG]kK7_޴:]c12#yv4'q^F *  9Euux?BzrfFo7ץm<_@Ph%W8oSh~Gi!mڡ-fq@ N=х%o;NZfY()+)KHOt4 ҆QFbKA> } [̐hqD 2ɳwj(sLb`p(f.#wݓJrOIU1, 7WAr9} ˒v@ѣ,h TKh(3s{zm~:Vo2CGOvU3hQ6wa/4Z ܩ²FY1tiz҈5N]O/\XK@*%t AsUN0y4xA4ϓre1irR5f(HC0SAНHG1C䚱hT+mG1CNieJ$J̐ H9 1C4Ey%36=-F!Ezr8˧z[xHok1COIH}-QZ@{P)@vڧua4F'f?j|a]-VUݣ -#)&&kKJt klk?|VT_ۮ B̐m-@:)嘡/KjYXۋz|LHZxop $,,jPXzQ e?lj9y;X Xը53.B|}wO܏q&^9˙!FIoLJ1C?[%@(f6'1CDɑjTy"e*O">t.f*'1CK O+բ4|v[crW$Jߔt.]m-fH^=iFP[+V1Cԯ@h>HSt͈KP#Pd\ DO}̐%_R̐8Qz_"9[˜X ,(f%fB9fhI3rqNcZ^Xc6=F#ꜫpOՠ;; R z[:Ko[Z֨4d8WCTaҎ gt9C=ízvi[;w| L.}UI9yXȩsX:{Ro;^8,-uo/bG /s^bnjR"nR#!rMI1NVU#aW̐Qi=1,fkvϥ!ʄn"ye yZ[~cK1CKŘ y>>ųJ&)4l)P!vK1C,-yRPٌ3J"}jiCG98q3Qcԅeժ oKEb퉅ebb w)3sV jЃ*ԧnw8K'_nl0Kn!| t²%Ugby?YZph:H}JiI$UG~HoG}cơ #L?h1,a175fkr:z6C>{Q#_crP]2Z:~_t(f(㘱0 Ҥg3Ԩ}~kj$#L~oV Lc3#$ΘY4W㦁q>F][cEҕm1C3 Ԙ!AtLpk'1-mEfѧ6ᖱFPQ]]ݩ!O1'! >+KK u S_ؗV>f{B__1_(f(M.x:c3qBtᛙBg@uEP ZZqhaܵOt;[ΪUYGv} oc*KQ'zAAk lyMFqZ5XuGIo vs[x>@JڣgۚXC2k&_3Cǫk~CvwdqeRbVc0P^|}hߓ#;7}vN1x10pWB' 12|-V+[[-#bRDdBLQR҂# s']-\SҊZaa 6bdafd`>؄a~DxM C "aE/|Ld•F|Us36A$A]7FpNatƭ#D"^DЇ0`rV =ʚ;*w80í$~ 6&V@X8Moo}S*< Kv]k)W?F6#{1, \Kq3HEs7Q#Η|ԇ_J}R'뤙{鯍Uy&^{3Ae;SЀrڰ0ϢTX927‚z ʏ\ 0ĩ? ^2QB`ycf*@̃(`,`LT_ݟI#(ʸgmmAѧmI:u|f41G]x9hٳe߯ۃOD,uB,Q_pۯíz\#"<@-8;! bCiC=.>ܮxaCl6cz|P<>k³"0QXb" I(⾽RZT*?ūSF#t-.ollc6Ʉaa8'*} ~#M^7i`ɗ`ZD ;vMnR?" 9k6٫!Ug]zTŗ/ 3HU|@Pd2 +lE];'ikEoIN>BqZ7z, k`[9hn(˳{?O^p1EJDaPhx.9쇤ƺEjD!yVpxDPo!Zo4Oz[m(|ш ZqfaҵlCC wKxo=ONxu!Y53^#XXndqDq0މ'̪*l7%ϋS-mxNV|~oڣ5|)ດ1 QIٳB~70w ~cT ZZEA} R&~WuU#{māJy-1qHfT5*k٨ {$KN%pʸ|qҌ:o/+O5Ϥ/LkDهPXgTX6>!,U,cMmmAD@ mu|ܛ6 |#vm^V*E--M3` *,$> #ONga~J7_J5qTBo|$, ZWHH;)0k\~t)P]<+q}0 NY++(6,]KgАሻz6?5֢Т& J+KدeRӃ&Q=GyQsil< xIXx8:(خiaaubX#M5W.έ\c亥~>;jeesD 5*+IQ]==&ƶ6}S3vKLKxN?c_Q$-(RsT `7 $3ZmX*C{0HȆZW)6҃7cn:܊ȍ|LH^nI^|36<]MP͸[oחd %/|P/X,瘃C)D+m<$|(%JY#@ĥI)O0(Xh:VB%fCċy&aKӍ )/-8mTH9)@C횖r8y93vB@o2,džbzep1G]p.@Ŝ˔A&ج-^Vx1A0l c/M0lЇ$^!|Dbپ6>El[yDž-$~X3Ը§%j\I藤N%?) $$!y.9b ?=V}r\%"_<QACycN&@lmWHާMn9Jۨ3VUa\& ~!cwm^:1CĖ{/\oٻP&|/7#g爽N 5²W=. M, l[<=7 WܫPUMízq1dh#"" {~^mߣPs$/3RaR9W#?1CY߈hcxΘ=v1PF̐s&RPh1C yd=~%|S~w x dLu WZ6 f}R|/OJ[Ԙk 1 ӭ lԼ9m_#&_.]$4JYI( $ՓPNCGAbhf-Tbԙd(36J7o/Ycd˟2<g @mV+V okLQ&׭ʔbnw E`},U,D IDATJ,uJ,sJ, {Gň@F_F'\svg78qBUXm^~v1CBU\jyZ!`_q51CBF+iP{>cĘuB)f(R̐'}Ď6$"r7ظI;Z̐0fhR0뷝 ||61Pj ]r '/ k9K1Cn`gqԷNb;ێc##XV 47W60ݕ&eJƳX 5yi2D[KQ!c8X 2MTr?,1WF}yz ~Ug90q diK4hmn/e_Bp!h tx9CQ*>zB ~ @K@ xQX{bd` .I Oڂ.=>ERP';ڹ.f(i8`9 >|AGcs8ǦR~o>b̐X9)f?, 1ެh3a%f)$AцP /ցccb>yWuőKM8 aivo(B6"r#U8bԘ oq3ȩ v3M4S{WMڍ@f wr_k >.Cxe x;Sh $Vw,^̹;Xx@1ӕXcb,bAͰM^ T@qps:QhS;>X r,@J,PMK ^;` *$ Hr Qji!蚔1R,PYCUb. m3Mmf3n/yGJ_BOPT νVo*2c6/h2՘\бO^FcpiА1C߭+#WELO(@FJ?쟤5Ř!j/;fzo4 I9EZ! U j;ڐ\;b}f?д+ a \# g8$3t%h94M4Hk1d3ݎ^ M#-9!r|yJ;@H1Ci3t,}1&כ"avSE `3th#Ҍ7O?AZh 6$"r7al܂ƹ3/z3ȹ)Pb"}qޯQvQcԘ!%jIJ̐i=hp^k]?|k3Uq(kw].7` .X5bBc9 kTݓqXV<MuGƳ ,HmbF2?Kx@'v m؇nX>(cC>e]k ^*Gc ,:+\_qj노³"p/!fJ@:A,gEj,QWcqU<=h(f_dF^}!K1CuOȳGvnA36/Ĕ@wXx:]~ X7X!C1#.A_*4? wLO5McJ\~شXoq T].9<ECǞ!u  ME/ 0>wtڊbHWw ^̱r|3OIX:QA@^Hxɶ+4*W>a!+2 L4ƛv ?Cr4Ъkmf8g %:..hC^DZ@- %qhB\XKXV_Z$neFjqg ċvɗ6 EP -=3֊UjЍrz9f((z4"Z~ 8gxsLƛ3p6톺5;5ꂛ8ҘՕT <ӘW&T^o(Ti1Cy50J̐8fchdh(F7E@:Xƾ*B; ZqAQ/87 Hae:hу^mfqN}eqZ뫑A[PdxDh1Cߐba[U_ޓ e׭`yP SMAtۙPMK ^SbfM7b t^(Lb-m{?mL 9]q_t ^Xes{Rq'a\@mۡ03eCѦ+1P4f˜ 3@b5CD @dA^zGgQTm9Jzӯ[:n=?!ݿl[)S`;47`op-x轇Qu* ozF `ԑ5DW#A[:ض'U6 +~BX9n~/lTHF7!s\+sA o? =6M*X]o돛2Sh,GsXWh `thU8m?\AR'M?7j k dVUJOj}fAC[+^N"lV!X Ʉ#ǷިBʌ ~9,T d5x◟`ZPeQn_:RLhj_~/. -N}, E0}td Pg b.cfʱ@yI, BKzjNp!@ۉ|h5.ЪlW  #:'Wc&G-Ubq 4Q*kj[FCqy\|7_aق/b|0;! _`_^zil<66 Xm  -V?5QlYüddWigĎ|fv)rY/''^kȩǃc7 m5̷@҂?C FQVq[-8?бKy0B)hv'Т-ShGV+Z뫜n6td}Eɗ!0B{5#_Հ{QM3qL I: .ű?Uf[umBƵ߷“Xw""|A9PeW^aj>w:ٷՂ\Бk?E[EPRh @8@>?<0g [:&v꬝q.$V7 nuCG{QЦis`\MӭЦ CX_pӭ79~]k֦[- N܈ XZk(i֊ XN"#^зZRWZVCIf6-VA4Ե͛jV-UxO m``pFȕ}Q8ߗ Z@ O!ee@+Z^kmV+B`Y hlmEk,@˰onf6+'Z,hlkZ)O8Yڏ6 lk!j[Z`LHƭ^v 9 EJuS?㼭 hkx[B 턵wtl9 ?t4ТSS2W6BnUȁ`p&B;w?s]m2^$WpKߕ>CuD \GAHP\" yýI Ϝ8gah~_ua?4 bQGµ^&:YD[mu0IN~BNM n="6|p_ Hp1V;m!FQ6(n^p`+}m}K} Њ;z`hqBwC^{:h1څxVoVr툈Cqn2fh=ۡE-8۰ڬ˓} DD4hqm?:֡xXɕ jWQFO)>7) 4 `1%蟃?DDt zhz@c q T'@uGp h } }~B_S@ :"""Ȁh6h9Ͻbӛ|?<'"rOz \ТuUL6x@wR/ Qo2kPכ `r =(DL±ycV±FHS[ 2VK$ ߆WZ zDD۰#""հ?ثGDԻ^=""}DDv5}DDDDDDDDD370{ep$o߮A:e6{Ȉz Qxm$ 嶇 c^:<"""""""""KAxkEz{t90Uil;5 ^ p<|)}spr^_w+ <X徭2DDDv{`\(A] ^o t徟Ѕ> +yz7:nϴEf{`%ٰ}?1q]I&ǚ#O `qWwy#zDt,X9d{G?Y.7;^=" rt{d .fx&~9RwVNǢ.l;@PWa]8,2 "" _;'D];k'NSĻ~퍔'نۉw+n?"""" `# w}NdzO"""rs>Gc} Q4πa/u;mWFG/Z7_Ukv"L؉>{jg]'^ضPpP' KŻnáZ.U;~ox蘈)0U:~,So^QW}@a,60CDD-@7ZN=^|~{p'۟`|{ !qa25}݋C8)0ń+-CDDDdcS\dۛ|_Wxvlfh'DDD!/<[b€ΈR8A""\ {pk`itk.&O9kM*0 |֍}'cR~{HDDM/H-p` ;vA"0g.9 PIDAT8Tp>/F^%:#DDDԯ4X~{ x .Z\ `n/e?ȻZ3anG5Ovs}-Q8(@D9h(b/]k 3l'f8`~ h\ |_q!a Bćq|{ %u_bށ[pPﰸxub>Jq8_oQt`zQ_>Q7ҿ} `_k LvݰϷU".`["o!QUlo-n' oR4 ?6 JW"?gV+Ɇ#j60NQ_o+8rNO} 7*Fn_-σ-rplD:#l -Fȝ,0f%FHyF_w9(wT9iF6 #s :`ZQ 7ҿ crni@`vqDCod`xP}$7?0f;@ߡM8Ccyx8eQw#3.GO^:ǿt}@[T8v* M0 @Pmc!Q720| c ȑB -fXTx*q߅V6;8(@V X-045ܴz;yڶ0_}D9G'/Hd`oVlhݥ{~{ 7~H9 ۻ8_n$/ =)CwF$VtC0ꥲ CYZVeJtZ/6˼zu{8qִ;a9Μs+1r~`„(^0 I)?E(6 /3*:-J)]Y"5(g$}`@0mo88/ɿ lCR-Y"5( x+IF')@R@Ѿm'QuQ|H 'jGQlY*SH_#=YCqfpIiZSFȭԈ8 F9٦GB8Ap.!I9A^`BmӖ\VYߖ(=Y}qJA} 2'$?{+qIza۔Pk^\[7 r;yAR 6-ϋ^>Uj(=яϞ)} 25m$ _Vkem .C23jK+R`}.>~OգRśBBCRtɯ3'E(vFI~?Z?~'KoCg5 Qއ/^#]Y1x4ʷչ\.3,~,hjJh~g$~Eem)݋[OgMa?⢻ %yG_K}<|n:k<|6IKM3=!q(_ɇ HjjkٰdjUaPH—{ަt5$Iʇ t&/I?[)(ź73@$5ݬq?= E{ 3åXy _+ " $$I$I .'fHԴ:Mkݲh"sF!pBR6I2 NtEsfej}U}ۣQ]{kK}Ӏc3S=> mIQYgwUII:"ӈ51 zFM]3jꪞ#.n,TCm(R*fӻOqYIek3HWfX.ӈ>r7s>?. |Q8ʿK$I$)4/gc,=G`0!$I$I⠀6kA$I$I!Y IuVAH$I$I•n~#$I$Ip5]AIENDB`pairtools-1.1.3/doc/_static/read_pair_NU.png000066400000000000000000000241271474715105500210000ustar00rootroot00000000000000PNG  IHDR!6msBIT|d pHYs.$.$* tEXtSoftwarewww.inkscape.org< IDATxw|׽BXa :qօTgk BUqW[jZZ:ZPS3@$?νq$=x܇s'$sg|NurH5$Zy IX)Z׃Mԁcyz&ǫ| ׄs)Kz8.x=p=B.mùt ¹Z% 7sHH%SxIMޛ}Ns I7ɹjYsn.LNKY3Nqt:h\-Bd{>N.~$?\:s ]=A{6Ɵy_tFox |FrhtN*,-a -2e/ؚ69R٬jg{<~w\پb. ec4!ľ#(> ug >A&E3|xOKq~~z'ׄ$ԷIY kc1sG \GX`'>\~4z/H¨\4se%Tji]24;63o*w\:f[iˋ0w*ˊHʼI(5)1hCVr()bTl( qC/qvN <Lķ }/z3-2_m)70W4Wr?~)[ HHm^&.TV0K(٘orIi^61υP% +&2x£$g婫(^=OOOˆXNrh+lVw?t=|\6<ݎ]+Kcp' Kq'~wB14sc3G߳mMu_=9^3*O|:bi60nئ=ncmDc;,"m1$7 81iޤ I$ ə|#bKMOR39#?B019fя1c`f(/O|'p\Lv e~?{13mx x_YOck-=.{{jΓiPe󟼒 +$K$IO^E3I*'Y$5+܉I\lMzv";, Ƞ&}Z^4uٗKJ0lWzs?o. +A[0kmΥɘ5uU'Q[J JksUwh4̪b\A3&Ҷϰi'n.%=$՟Ȧ?rܑWЮ>VrYd6|λq9گ~;NߓP+fc<sE9A_b{7162 \m'zIui: = wޚ/;kWcA#HkOF1?tIdW{_tZ݉~5[Hek,jb~Jѻ6USe)h%p fSs 0gV[ȥ-|^wɯM' +h#p,XJ7}\2,/`3jyTg>4SGջ%cf7;; rG6;TawpwRpl~\HdzׄǬU3#^GW4LIHyǵR D;v ѕb)EFW$X~dҶp.i6w\/`v*N%gtb"; X[ +tdG"K̸\f1x$If(x2?qS˦ڦc2#LYH9|<rL ʟ,nA LYry {3Ij fOaŝWPe#-SoM.!Ch?F}Ԏ= &eك(IlҡmU?֎>ڴwƯ YYՕvu%MDJv76~+wYy_gz+z̆ k`JoF؜$u9p'wXxބX nۀ" t'cݏ.|9X_\S,y.9܏ݜb!v8z7'껴ij0We3&9N%!K)r*Ҧp6R &Td!BT]VVKBj:+@EfKKJ:'PMq'֜rIHؤ\T,qR4PZ]BT(]5\Jb6HWەq!و4h%nìh- Q$ljt6t?v)fH4V ^$^Yfa&wWDĚp?V˱"`9 L|DDZn$S.yFx+p?f{Yv ۟j9VĆޘ՘Z!"--@\u Pu͍`? H uUǵs)SGR^mbH0!1ivӑV$BhggfW-"-@̆8;OZ L""-RI8%k9Fx6fRDDXJl:]@c""V A9˗`ʯHv s[ atH|xYs l?8b^""R'~l9037‹1m&""ܫt˹Hq[T>L̘ġ1WZEv.^H s1+dG&# 3ᮻżDD \a;˹ߛKp( wۼh-jSU 2 | =3}ț|; [}F_jnDVx⻰*&x6p$rXK:=;Q+p/ˀ MHX5͞=pe|'KIy}(/ξטly8a,isp+{̉{ !Up)bŒָ'= Ǭnc8?(Aɛ|sHLxR'3Z9sY;)Ӂƒ+wU1OD/c锻(n@~MpErꘉ\ D`kmƹ0XYOj0c6iQ6|7;q~GZN\VNr^{p,_sR*}F_{>'(٘}\~޾|kgĝ>6]3q+ `8-!gmy e#TSq䌎~^_w~^m.r>N\=}{x򟜉a4~M S9 \m53+ ܕY9&"Ҵ?xu˜W+RT'tH2ĕ;KX:NҾ+=cԿRQjswڙS['uR:tkVdR`0"ĿкO""p+|Cry^&Vj7غxBٖNw RxYl='v6rn'[sdDQie,}ݽbMCϨ% pX ~uʥ+X~n?P:{P,95SlONA`J{+{?aj SewˡgЮ>N\Q79cO{KMf Ho=Wؾܝ;23:5g ѻ8ĥ}qu8] G^Vd:qy6w9g/TIS29DCZ " ;!N)+t˴.}ss?cs@052ʋͿ+IJrW TQ9CvU=Kq`Rdzo bXGtq2""M#jrEvG5#u/yv*˜xͧ/Pj'$w;cbV}ϺɹU;<1#z "Bd? w5'pR&T_} D3&TYa&dyum ;;ڏnGě~Ms>i&-`UA`灁1NFDI;fwV2Q[(zuEc[|?p@0\7;r_wWugKݪkVbviR:tq:qo}~w.bQ)왌;%{xg/Qd{xGA?~?ī'fy4""-FOJ'Ub_mrԶ4%Wߙ7_|ΰsh|\Cܶ?ĩў fKobH &4rV5 J^wgw=|owugvEw_a4'6ٷ8qBEug@梫95ڜs:P0鱈HQd?W;Wlix}+ Rew(@/ug/y.( ]/S??\ρMvvӾE0I'()։2z<9K^;t:Э]]-s$8qU}ԮUV[}WM|`X_N$;7x#`W{Cr;D0)շqVz6S2+;3|:xaF{.n"m;$!xZaHO0iCC "s= &9T:wz*U'+}̷b~[,lw,;SJ3;v)ȮG[]A~?t3<{U\8i'x -qfBDD~5xX&P%[|zѩz3\! L UsFnL}e)/ߓst.uuϧÐI @01,grE6id19ǺoPb~6nt^sL÷K`p*06緥HiW#>A;7%c6GqX::)PQ݉{ !x7ou7NLJN/2o+E.DugQJ75ɖwb!_۞ÌcG0=WY˨i$O_*xIDAT`A^5>@X4ΕqBJ*Jؾ|N &=t; 7PV9&!-4(ȃJPy)k(߱$}\/Ce*)^kmYGey%WX1Ĵv}եWQ^yt:ݽ2>3.}!7s(L{Ж_X̫DczyCnUBDĞ1j50o7^(++Ft'6F9^`QTLܡ1+yO='v"Mdfطʅա5$"ҭve'c*?މ*uS, 7йCEDy x֣sm'HaQoH N@NccrF8~} ,""0>M*"+y@(jF"0v1v$m' "g%v""q,Hבfw`!15""u  %+{MD09 5'czEG""uw)/5iEBHݴnm{ ZLGZ5""us y#*CI""G5p{H \""Hsiҁ$O^צe@Q$G~(JB]l_<<6N6乭:7z^l5zkk/ >ŸTrG3VV{I45"" z09ӑ*\ tDC1Zpw$HjMc-#Hó,^1"AS$!5b:e*nx#5}'"u ҺgF5v~'4 cHp fp@V6C7Y̥AǏQqi&"+pu$c(ٿ8b:R9Av4%*UHm2j8T[p/LNSc?4g">di,wXʣAǏ8:s .,~Vʋ!ğqB+j㇮Ebgq㲭X,#X<"ڷJSc,[>?~%YLGjq/1|o긡8~Z$'Tɢo"TQn7#V௞ R.8~LTܮpi0'Pj!>|^6R{}X⹭Hq/C$Xփ_a1A1f2WxAYTV{W5""5+.&\驲E/nFRE9o_Qqz;c~o޲ԠS$b"F&DqZ5""utx$yo|Q6S1GcߧVjQT.UpKŢp' sZrk0k};S2i>nG*4C2nZ2x]?;k2EW@:%qJH|Kj4C03Q9e~ B$Qr8$T˱KU3 S!.hc_ H` 7 c?&2t!-'{d3!^2]K?şA9kIENDB`pairtools-1.1.3/doc/_static/read_pair_NU_NN.png000066400000000000000000000413041474715105500213670ustar00rootroot00000000000000PNG  IHDR,sBIT|d pHYs.$.$* tEXtSoftwarewww.inkscape.org< IDATxu[ULgJ"nqY-^vq)H.n)@)5&&HnMzy$&77=ET;036"jWu!U@c!DrM`g!$t?\ eX!{c\ A֎ """"""""٤ Zn C-hk.Vq!JY\,:%6NѴE(=(FUD]yzr,Pkv:c~QLz$= <]4.+1,,gʒ,zf鱤cm67gkkoBK}Ynre]lUT\Jo_ux)YU?fm} c*Xe8p,Mz!Qr@,O!eiy Sidqʾ~3q^֟'?o8I^u@N fItxxawQH)O~oc_'[^>F @XQʲ%A(,cRw>$Y/V_'{˳喤,; |,^eYk'BՔ|9ztYF;q,8rpn'|&{TZVp7og7e)pDunv/+fOMCI$f./Ea' B\ twX˿DOSISziY4QaR,9K]:+B˝ۭ_޸p[,{8r;p},Nr,wpE"|2&6KhҾ;OFROVY1DwY}TRdI%vܳ/6eU"el _rQD \(D-H$ p`p 6rDLqV`ۈdR{c c06ͅc;rJ<8"E:mZ\ڄOVfxYf,Lot0˨"4Y:dH<k_d]<k_0|o_R^,8"78_eGYNNr,}1K"YNjNx.vZX k&s22BYw Que,1FGnkcM̲,3QgID)˝Uo. A7[;Imteobבʲβ,Nrw0Hka-e90BY@+Yfڭ2Qq Vqp4n {Yړgy")QD,5@ϛC cr_ ocg8\eW;;,9"; ?Fbt~f9͔s;NXM Z,fy2..%UdqQԘˊ\FE?r ,^Hv{ְ079ҲsVۃDQRYDjk'VteeI, V|xۮo/v?CGvٛzu=M1c+?e)j,Wj,Gc9ΩN! l\CEEp/~2u{|Yܳe]+^1ɅG3oeיeޤl5?άw`e h|g"ZmgCej8K|a#M[\yY< ~Dqhй$Bp\;')&1]Gu5r,Y&1]k Ǩc?pwkKa$j |A5?6E|VUIt ey{ila2{2DGԈ)!cSO(M\}L6,ws.P dɋVͭQYwŊ՝x 1!f,g* 0 fd9 2>wYD2Ύz$8eNY%?˭Ul,Wnb۰ygGYnb[)l.'FʵW"傈ey׾+t…IY u%\wX \kwcJgk߁%v.pd)jTQҸm._ĤGQj,'˲n923Y:,_IೈT76Λp,:ˣ+)DDY]fʱz.bmL6B,*.S}Y:D K3l,â/} -t!9'ؐKWYNʒpᗜK`,A[wbn/ݹld&{b[wbQ,gOa}se/6Kǔ,?H$bX{>VOk,GزP(]E~cvň} lĘ,c &gK{,^M3g뵙DHOY=Hs).x(LeMe.VŁDXC?x/^qP0)K6.ˀ$| fe(U7Zʀ.˾z$Hۀ<3vkIJrK,S(d+ ,921MK6Y'g DB:,%Ջ?Gg,+KZE$D\JmeN=y & c\,K&LǂԄ,O].GuD icw٨O'Rg瑘sk4Y^b۰ 8NK''exQ ;aKᎲ$a*$I\.:,&óLlYO:IYFJ!򜃻,'v-X ʊr~ V1YV}6HYKYznD, ?u&+esubbrTR;Yڿ [%ercRqHFbEb @GY+aKv +9_uw+p{wkJ7g)n Z@7eybl~gW꿬d h PڪI*ײzl*+*(.il'2BR %4I=Y2_Ml)l?b kqF@O`\YKW,=HpT'k,>wV+YY|lc%Y:cvT B܈wX4 h"D)$Y;rQUDDDDDDD Si$틱"G}X@"Yw'̗$rl[ؙ1 Ga']"`C3~ >8p$V{ |} Hu[b2wg x`26u`{ՂK&aT^ٷJ` 7 VN~Vc_E\UWA5Z |-Ӽ{KJH|8Hma}Ap24sM"{}FR[^6qDR_?bNU[r%Ӿd2O C59,]}(K4t>6rN]Jg ۊD./|uJMeOX|f$6Î|=ŽvA_j3K~%(\X댘GuXHa(Pw.ƪ)8;[uD6*[2I$v5/eX!UMXlQ9`E2ޑ|dFS%/g-E7}7" N֨^Lza&)|x- l4cr,"> y$UGV`4V$yNH⵶xw¿^ջ)'8>OHއnH K-VhX\>فhQD*a%ؙvNI!~m)m\ +%$l_W]S6~A9M#XgEUHNvmC$jSp|MWҽF$ðdG ^m%X}n#Fi]:+BJ5;!3ĉ=&XJW(,A9VX],ɲ)z "P}o%Yga{$rZcsʿ$8$:SI.~%%(`s^'u?EؼugV\[8X Me b Pg$ #ؼZ\x CqoV,3y I_`l:õm?Ӏ?('Q_gl˰"zҏ*4Ev#x |+ GO?jSh Rpc-v븋bl$U3>Sݰׂ7W<ݛG m$0YUk\"oHbVPm#0ض$͍fk3RP>I5^?ߑ~_hi\JpI2Z!NOnDZB{F C9y%hY߫V'4[&7]M)\(僩ߓ?CrI4iX^S/v?լ0-ȊR6^+~vz#1yHs/Kg5vb#Xxvָ\R6:OK {1U Da(~"{y)6vxo$0e(ҿ)waBc^"DͰؗ]zG:"UwTxh3eh<խ8;{UO]tqFSHzaS|2T=반 ƦtVh5cs~w!VDY.5$] e؋ ia+a5&0fQW;W%t-`!"RM٣r1lH!hx{FﰨFDC[&1a͒ؗIf# Hؼ}dGĨ{2 B~dJx1x;ƹ66Zb7OsۦG;yS#RDQ^M %y!${5F4uj;D}1M?fRa)qXgk%x菆.o8ڍHJj1Jp.'0Ӻཉ'Ct?nU! 0 Ϥ[WD$Wv:wQ! ؔ Y1\ lרh-!Yg8K& $^Q_Dݗk}fO' X*Y:V_R{ľ5q+Ha8ޚEV"شbR \Da-H9M.6)tFTj`)m`5! GNqt3󨥳tRc1sBPN^gIaoh+a_$D#ӼCicwoI56WRO*<@atyc'K`l옵PV| /A5ۗ.ZVF~$4\n4bDFLʃEs0x;LyIv-ѝ%n!QK潋44ؗIw;Rh t|)X|5ߑ#8rc,MQ`O(sI}0ϫ0Et]ghhkqۧv.yJ|R׏ԗMH!H>9/ɗx0$Nkz}3V6)1b V6y:9x?x~CEІ\ [-ie5~>룮)V,H>Mۜt&nOG )VO($3p闏jJݗɶ.K%v1۶jP$eYɵIpErvT| D"aju>RQ)l[JBe3zv0|!]--%74=)%R$ؙ\iD˰e0+nNW?H!9e:WMsGV~t'NwH_s޶Qqќe4_hjL⻎Hv5vuDK/lZD`® VDQͱbɣ)ʰ%PH>XD6S݂U$NnYx"{WfEbchO΄YzV$VY st5K"~qblם&h=^Fg }gY||"j6{/aسYKp4E,>H !?ޤ`ddHzclmHy0g;%"uS 샍 {^.&t0ܐ,?T8NGFv;REjG!Nj!?H[ 9\C3D$;.Ap?8RZޝtsW= 89w},S;%""R KXӏQgdSHvՅf """ z !o(*BDDDDDDDb}i~u+rпx0H$#zY Q_b s980],i\h%]0A| r p8708D] Ib`\"p u4YD$?%]7 H^:%sH$^+"^R+I*Mِc&|ӚNI "R mE%=Ӏ&>WBxa!"ElX0)hZ[}L:*vpHDDDDO"O*`nj:%] 4סDDDDD2Q䛻 ``$VyYiYD"""""Py $>qD$< F"J~m33c=#"RkI=FnC*,jsqDDDHT|8KTFT1VHSDDDDDDDr3 4b́ӱ_¤"""""""9untobsd<3yiy:bg+T}@\$MǦ xʁ#G$lhw2XrH\k&Jza{8uXxnNO8â-wݽɞΏn>x^K@Qq1Vݻ~x۰i6**+*h=`(=v96~{eE9SY邒x5Jg {94z7}A>qw',`sG&N(:"lZϮ>8I$""u[~@; }Ί~^F-w(O|ץt{4ND$-{o@R6 0Y}#Cͻ d!:_EJXq4XWwV;<:oot$M88tzqr=+Y[߸Z?g{mw)kQ .~ASlL|9Ί݀n∈H=NcEǺ ӇΊW0 C[$fw|BQaEf37fقxAce`w=vkb͒2~}xEtsLy *+l5; AW}p gX aC"ݰg:'gObK{j)4x`gȊB|Mꇴ:-#,./DHC,99L|zxF;~Ȱxr*.m՞{xOK^HQq`/OힻO}Uip9V}4"B_.N lk} ϯ%v,rGDD,D~sK`IW'd? )T/a#݀a;NcY'"k/nwHaTdR0p7cŜivΦybq3&1'펛I;_M56xqcT=n0s]ȁ5KoPwY"tx5GYDD$>#0T958WPJl}/ڊS=^NO~n^eFO-e87VUhO閞HDtojCKbDZ/ lqv`#,#hMHJH7cSz 8ZMXp63^wݲtjrl]#50u^;1$z][Ն""fzCr+p1c#sK;6|s>owlOڮU]'\GJTEE 8(*&nIZSo"J|Gp=wa+ys/N Uz,[|徴y=3CO 9" !XL+DDD?F[ lr"]TDԩ0zIQYϏ]6pjـ(dZ+WADDDw[V&6Hfqop٘qR9Rz1瓧YvᅵLI*d4w'` """ r_OŀyFn󈈄g͒2~}1QEt6aOXOo7iו_Lb`"_oUn%"""zי0""8BKjvRܸY9w|;Vc$ ##9#"~1tԩO^o7nۅ^{x1gS=z9*0ceL~{x^ݤ ]TVZSe#/pT 4]&aDDB1S=(*߈C}:FMfNf'O7ݍەL{j\]ڪ=v?1<TYoq>iA`r;g)$1/pTL4aDDBfh *+Y]O noֹMuLHvQq#ڬfB1 ~" ^seRH&:Fɭˊ qj)=w>f&\gѲtfD=XSӢ)}2V-͌x~#Kʨ`ʣAee.[@~}VV/4x"g)$1R*li㻼ﻢKÈ#k1;\JuD2#  """@lY1 OnC^e@SGS#HJ=NO{m|+/\áJ Hp gHYH9+K6ٕvCou7#ԑ:䵁#Ҡ<XRՆܷ o*Hpjbu>q0ДhK}Neʣ|OzpD|><>ud2Y1g`SG:#>e!tfy>O(.mJ#}=#?@:@[|`tvuDjd5gp8""@[j!9w`a P0` EDZd{lLtb oo?$xE;#fS;,)cƋoy7x )]1ċvv|/~x AmGF^60*WFϻc`AqEjY[a?#""YV q2`9#nAgCD$7Ak-o lS#oܓ#!tV/|ujkFp DZ6^F;"""w<;vkH.]0wX|*MnrF~z >SG<#]o#!ق((i}iq)A.sDje>} ""Z \ȑvujHy{4z$H 8KPYQǯ}#icx{3dͲvn6xFx[92:bgx뺖b" }d.GH]].:DD$]@pU#p{7ԗg4$ Q7߾lVw÷Y8H}SBDD|)@vUW,mіxcnQTTLmT]C ~q*|GaQUѼO~`/_y)miM5iNQq#|>ͣI8)_z_u,p!V<NpB1xDeؙ2gc'wUB`O|AWkTqy.v$k+hKY9v/FqFbND$otoOyr*֮ۮ 7)N:ҼMͦ4ϿʵkdpH Nyԑ,i$:C o>s%{c% @d7q>Ės5^BQQ;w߀PX[a/I - 0DX/e$.EDb Et$|.ťM>4n1~e TVT@QoK޾ŕ ,"ͺ㦻CMz~ @-HKo_1w:|2EJ8t! +yXFW 1](z8k!(tV>_ KL.V8$""6>|p8im^w}Xtw#;wb'v<_&1z.>xRg.ƆDI_ 9YI;Nش(v:}{+/ ! Ǒ,uZtO~%661 jORl>]*muZt]7U V3cN;([DDę/YQu6$ӆyj:özc7wqDDDbJ!zL7Wi;2Um;3 ""Q#ydѷX]+¦4T]WDDl.џҷ>žk llT3:ˣ4NDDrN`5;6$ߕcK~:ĦXE7sED$^:XGy! xX:HV:]@V9""""-A^I D:,\Su-4tls(Sn∈4hI ϷsEum[Ie#_ 'vy"p[۝ sp_  8ktZN.b-""""st|8H"W=-csciҾ8"""""84+'PHjJ['9Ӡ~-}kZ6}۴elr)""".]( 9A _MFOY+wi|Gsl~Vam2[lEDDD^`Fr6'XCBx@""iM;aZ:H"zE@,?"`}%6R5a!"R^^ (* cTÕh=݀BxMDžB'liZۄ0$;8e)7J8M+ r1<ϡ *\ Ƒ$|H{"v-qYt709vU 3uX^'"p8t aIN1';""nIp=_tgc-p}>Yɡ$*#"w %sǑ48[NTDc$9O!"٠sv  0x2v-0:(7H8|(:uF0Ur;LYDĝc$qHJ~?WCj|f+H8s- )1$:~6 9,NdAa?4B$7*+*+1TVS|2Uq"i-E7} ǷTZ>+BxbK 3uXyErG`X7HaEc뀋Hác$¡sx.&h^#96"hxHn]|5?}vG28YD$t$R8~t._lmɭUJ˘h5$K]nD8CE$t$R8~$V]Kq:vyl:SE h9m~ǗPիH`sݲUyzm/"czxq \f_'0 ;9| XY6H1۹/a -Q?.azjcrZ7yUi0:,DDW5V//8$:twv}.7T[Gr9a!"^cxH5~nA͇!^ϻr9 """U7aLa_QH X @e?=4C\DѮ-jxvmo-*"""ѵ;},htL'IY)IDAT&~oO]sJ)n ֕o-6fXxqA5N*"ٰ #w9HNEt" )#Fr708ULfj hnz +yf.D" Oc,|%PYܸYWS;Vn~kύ,TI}wʰmKˊjVYڑ S <<2GDkH9M$"u-.[""RsQ؇'W×M$UYJpci }7688wX`F-5;{H#,W`覑nWܯ󹺊@'D\ ¡"yJ"["ݗVuXGQlLϕ5%D$a!R8a!*)"R{  '1!`fu?~uÀ@{<x@:uX_%IU""< 6x)VW"9Ԥ떻""(W/{}@2y{-) E7 z.2Kx<$6BG+9/QRT@Bv47\ >J KkIK;ӓ}M;Wܣ^*;'uND|aFQ>Qc#"x{j8xrYA` +zIzHƫJh{kKQކ4ȚkBj~܂C41Nhґc?La~67hMq\<ԥfcWxuh?}iDqӈJ\ 8:9SFtJI=n'uC7Hf=%msNfU+Xq\2T0 p0a5"F5oǓF"R|d-SFD&ZcoI1kIA>"&NO-a{wW`btd-1^/?77N1 bb^4IP/’b^ÅA<ؽ9,_TReK؟\`*pDS. K\kEM&OtbSecdm\Z7H|6Zr2̥ϑи\~2ь\$Iͻje7oqo\~Zm{iŗ )*{%6B;ʣ˖PVrOdR$FEp K) PCw?r,p{Y@\DGy::Ғ_?gif|-[%;ߌ?ӆhd3nt40-dYĶ݌^N3ђKYBAhS~;)kViɥib"zI2Zr &H[BS.iX_65"BVl9Ɍz>MλEK.Gf?2:ΥEc䒛xS:eh%op/v[0u5'6LۑOkɥ&Nuephk&FqQb6㭯UɷSSF+2*蹔xY3N7FgNs䒹'ĩi8q);M1/6q*gJ&N>g^%ܝItި%2kǷv褥16c;swX㤣wލ1藶.0HS.y[["tG|CkɁ(ڧ%ZmN#i'3>{ԒKRSHjqZ}z$6 ^9tl ېt3ڴ#;hɥ*B;6*6dq4nhAK.AGDAFSq-|ꡃu+=VE`KEsTb+ 8"/bCi?œ~+Uu-m}D^ zs9MYdo^Ί6g;|:9ⅿ2v@;j}ߟ,ҡ]#>߿%m?/VŊ/-С1.zyfcɤ}ku0}Եb 2CfƻO,%u~îCK.ɭ{ f|oɘ\ZvH ZrIhҁv=cƇ9/iɥ"! FN=IIZrys͟,ٿ=h]B^P1m50TS.pV k US."d5.DŽM%g>6xKx(DŽϓz\Nе8-} 2Ne+GDmvlM7Q;rVFZr~>ۼɌiӎs5ђKK5V0((hirv>KJ<r;rq.y26(8;}ɓvž[wrt |ZݹAK.e ĩY}\kSX4M<#EV911EP)䈭4gJt z/Z*'t7}Q P#Q׎u b|@ T.E,˞\OTrNft2EtJZ'<MB*\D9HqU6V}z6yr G%#"&[nZt\ Ռ4a6B0Y N@! &ێ*i5ǍL^@q5k>jqz?+e: Vs*hMkMOrMX 739j r+58ύP<Ȃ(rB%犓XTXfwԑ` ?u$Q|$^pn%E}ճnz Hՙ>jJzgչ8؉`֛Ne+Wo}Q]3:uYxK/  dAu0Z ,BUZPC^TM%=5.Pwyya~F>CiUnD4F-yHJFM0 כN؋z`M""PKGm${/Fs޻X?ks~ĺy`&!r}#L\:Xs.j]f=f$j {`yVRKK~؛`_8ǩt*x]ʞ 9h]-D 9.ƭx3ju5ԐcSNu^z [Ea7KY0pjUH4HЛNX7> /MPmGxVr/X#Xof6wgu?jֳqRQ^E,koڲ w~I{4n5&Pk4p {+S,Q/ߍPTa+;gߵ_z5 n0֋J=Y.ԧ΍8_KQk*X9[o*!+'b<$c(= n@ȮC+uo_n/HԛD$BTcR@xK4м|*O>W E9PՔQ9[iGXJPi(KT)\ԥ9_)J&Z)kC[`2Ή_yH)N\>4^SC 56Ucwك& o8hs/2D.(QۃFvm &m&8Z񗂬 \@ԇ ע*muZi B5Xf˺eFFoBX ^?jtBP'ԆԤb$aښg/G˞B4ʞx?|;jXša g!k]ZftMp34bMd úoy 5A j-ew oS%pa\Ե'/=n\U8'k[`]^=d'*5SYԇ.N6M[v"O^@͜FU1yqkm}m1Qu~1̭Xe'8 xuX Sy}d*5X?}s9YMPTvrQ5Ձ܋^Vapvc:Rv\lRCᬧ|l+X;. ;&tzmhQ6QpzjE7ewNYuU5`%(BOsj?qs Q\yj:5\Fk!{V#QOk}@h].| VĢ:֡~ݨՑy DIYsej;'!ol*V%ߛ4^<͹X?X͹y.&z ?ghD܈IUܓwPk+Ʊ U9KF9 e\;@-T:.m:/՜KUQ)R}Gkm*oEm*kIKo2}#4*D-J^j&{'?<5i^]v e}X-vAod![M&ƺn|݂>'4ƹOT|}~*AD.} I-\T%A5R(^Kxq-ؘ$ոޏ=-ciN $QثÐL]'?QUr@bmjH[a8AƁ6~E x%j%TO8yVs.T>\CWT?qX/V 0UX)z)Da}Jo:"QK|E..D1/1j~/c5q|\G['j^Z±Wa՘We'|Gϐa kw/㼧OQ,-;!lVt«W,p-2L}ycMj =P-2u#ڲB^> ]4Rt@Cw"Uڜ`# 렮S'5"꒙qv5K}x%܇UjIQ=m]oAD5PkYE| 5qWE֖ U>W`!CݶƠߥAjpc9\94qwsC-3ze-O1r?@8 Z <ߠl$.] !4J,W"PBlB?]!,"0qu8k[87ڲB8fC)4@Bf>܃VGA5Ud7j۶I-B16Ew"*Rd&ΪZȞBQ],NG5Ρh"$E2 '5Or8묳2dRF˗3uT bEJ;%Xv Сw}Y =sNF_쇮iJ"O&c=Fڵ5trJJJꫯ},j1|Rܨ|ڶmOem*Dx5kÆ L9 ݄ cqxGw|>sLZrUo+[w&:qڟCfh<3fش#nxu#{r\Qt3INgǷևWMnlo1ܹFU3Gǎٶmqh;j<}YY?9P*7qfΜ7LVVm 5bkw΂ S%BT#I3P)KPWV\jSpU<讍_~z^h"5㨤4n"U;M1cW<@lj33>|4xIqav/:^:p#(WWY\\&Mk55Cm󴾬L.R7s(>>/φ FӦM4h999ᗁJPkNcQs$Vv1Z@e|-B g<P=V&8}qu4Gva2z=/v'kD[!}sf\f[Toy7Z_5[ >}ݏ811sVЧOf͚m6Z/{ JgSn}iErUT^Xk 64^1[#Aw`dYdMD\f9ȎX{bNf6f5hZ^ɱwgyrx"beZm{&r/ӿ^/yBQui\`̝;/S A1n8^|\ 1qhSQ=ztQ&7K\+7/b m uENCgk ow>e_a79777Ōu!$l вl όC޴KIIa޼yj驧b̙lݺ88j)T1XC7R:vs=9M!BDGnc5t-.kh;H2K޾fΡ1Ejgx HeoSizhsz#*ph}D5}K"kR3nzhbNg%66j:թS׈ &&G}~p=QƁ;֭eQ]jIt IDATQZ+]?;Kcch;)>oYЉ:ӠP9?{žkwo0Ì[޶1.V^f\gϛqLJwId"PZhO?7߄FURs=G+?9r$O<漎D ޷73jԨc@>p6j y%Э46_)_ I|z8{K$k3nzk=gHlޅ>ZOPPt˝ v̛Jfr"bjaܸq7Nw5ZDD(تqEDD0h ) b.ƹ hfbUs3R[>9 }dmZfM/غ͸ s߽cɭzP},_Q#FXtQ+,رcXWFobq^+X}J͕ ͟\5{[bD2j9͌K Hc݀>C[f>ckݥזCmwT!BS۶mi۶PP rGH׮UN - ȶũxW[ogs5U9cwt-.sNd Y3Z {wtgoYeV!U׷o_GA6[h܌e.T/&p AmbO>響`1D>_gn}jh6bXe, &c{fܺ*xKJ{AMe&YTlBhjlq,p->,L*suE/gfΞkqQ? EiH7:c˿̱M>h<+͸;U8!jpƶqbbm.JUw0жc{w\[^- 5VW>ڸ5FRs2VqyɌcSاABTMjj16sNA=56m g|%S~W)qˡ;z%gU QkaPtRnx{WD1ז3Ga~3nqݸ#OW1ܨڿQPR.SHrxx/8uQ-D'!Q;3>{"G.~9sBT]qq"7}1BiL!B7a[< k`3jFC? e$Zy1uԶ8{oɸܴں{=: q=նWYB=t &K|f\ !.++a7B'Q<_Pu/|g].#k}I"!>k^oHA7d!Dlܸnlo'6lvNBr[PdB!§j[Fӷj[~g/'ph5Ash۷j@;.;v !dR߁7M`$Dh-u%V)"ѷ%|g?x -g?3}*i}.3cߺXZVBTlɒ%p t}f͚ %DZدn6[ۺ[ou;?ͺvΝ|b ڶ{_BoÆ mbP "8*{e QCvvvA⎌V72@ˡ{FHu6c߽cRX#})!DMfQpGO!k wukdY*GRS3Θ.yqtr= Ìl_͞_f9Uq#(>Yw qe=qs-&LN:!s:ujIW3>~kJV… d-7A+A-s2>n5"鳟s|Œg?OQI#Yg]aKֆf쎊̸0;Mq<DZ=qж6mW^!333dn~;o_MP}lݺ~ L\H.Q3}FC?l%ҠUf|`dYdƮ(Z^q{rc[hy}D%Ϟ[bUBR[Y뼢XgO&藺r]w+N%(rssyꩧVa@f O/;6Xy ZԟZ.jf!pf@KDZWUӽ7x5{N=ڝ sOd,x4|'[f#;ְ,%fOmVd_ϟ^ k…̞4UNTQ 2n8<Ǐ~amܸql߾~QlW^%%%7~( V@Uhz?ެO?߶Z|n?}Dik}Q./^8/Ŷ1)c8b뜗ZurW?擹a\Q{rkuc6Рz⎡'n=$3'|oً4c/YL8GyD_F_s* [gIT7,8 z!^Z'.F>)|Tl%\NOy9uD%Ǽ\x|TQ )xr.{}FnuBCy{S..%zXhk7#ѣz?)B~g41kZp8i08B./XRىCN0scT~qJ\ B$ TaPWpƠ^ܛpn- '6!A]ּ/qLo1TrnYNA t!B J*ը sqB!%FyZ% !"@"yXOB!D6"|!\>اf1 !P f6Yp~ jNB!Suv@=' g~=xA_*BN@wU:x*=5."z`0BIC_ -[w"~1|\5ȭBm򀎔PjSw"p] &I !( ;r Pe8*Ԫ2'C|mxYf|ߟЯ=dݫ5O?6mhAQLf/6 Z/2ߣMw޼DDoI묬,Nʌ3|u/CY rԎcƌg!!!|D6k֌;v0UoҬj+rD4~veM68U{\&.^+Wk׮'!ĉڽ{7ڵȑ#ơ_6 Q=Ɓ~1w\jժ/+?1cFѣ!/p1jISܨ)xҤI<L5>UI3c+ZF8[͸ERtDm OrBaÆ?Gy8Ϡ[TlܹsIJJҗ >:up3j=ynA}r`ܸqaWE˫Vqrt478u*,419c!UsM86{Ք9NYsa޼yժ16 8)ScMo߾=O?t@ 7cmm|KNԊ69|5݌7hHiAQ!NT||<&M3~Fե3ws!.;wp9wp6WZl|Mbcu B>ڰΌ[$%sIs^_'^u%Nr]X^F믿ҥKC=tc7x`f͚U#ڜ^{\PCOj}8=;uĀ.o Eե+.kO{vx5[Vmh B8.W_}D 4ΐ!CjLc Ƙ1c쇆m!B]; Zr:d嶭f|FFN=%%O3um3E!O?I&^Aʣ>JTTT{W^dz gk v~Zjs(sglkvLb {! &Nƫ[.?77͙սz"%Bwb}f|U4I3 fܮV C5jB!#F6@kA>8ڧO`椅kVqFuVv5G<wu;Į!O \:Ǎ8ҹs`tnHF5=c" YfU}@&t.;N !ujբCC}@]d@PM?kVRRzm9&"::- !'wo]@} ˞efEE3 8xiӎ#Binl;:aZ{8o3M96]P\츶\?.k۴E!zBS؆m%#73nh\S͎3w`x6F!Bu:nc?RT:[sی ;w8p!_Jw~2C7g?bۯ9aczZ}g[׍uTZ9Z!𗬬,{x ٿP t3kUGc GB!!33vszǎVZ&%sQf[+N2'ߊ]B!?^fbl׭[{jgt$&kZйɄoMk!dٶXfZaY|yP!9:ZRs6'm}w}B!NƎ;عsRA^fYhp􊖭hd[ER24oiƾ" !'j̙Xl<Ï?ȠA^pE݌ܕ@va!mX]]ҡ?0WIzD: !D8x^mbcciԨ46m=hCMjWƯ=Y雹y Z&%jhvxuJdm[ڲDҷo_6mڤ;"##㏹uTK.eٲeC^q`ƌddd/;tfx^5 mٚV 4;V;"\!*D5rH>cݩ#h<1-e3p9_d-> [yKZ''BJqB-KJJ[x"^/wq1c,<]woU.k`oM4\Ҽmkr*8!+Z6=BϾܧ1Ok)z;v,/r 3#GW_Tlrsɀ9n3f iii%4k$$!'SJ;ֵI2_硃vhHjt~Q2ʕ+ڵ~$!8n ,`C)\dj>3L0A_F~vZn,Yb?EuM5.&(:uDjj Enmt}%11"@֬Yî]жPs 01ߟX}\/^LIIpp&Qv(A5 0FE +<0_^B!h#B sN #[nAy'UA64.*7c' !]J4RO5B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B!B`rN@i@ ?X\8/T "'R[m Q97iN sN@HYIqzNBP;qBn:MI5%DOALFt?t%6 D#hrQXR, cF K,j$?FpGDdn+FԢ јAzFнn=5t!Bң@#h9~)+>Fɶ8 ()D )ryO˓@jUo6BmFPt8]c:"Dގwf'}a4#|oـ * r\g4CJmqBb\Z^~tP$Ni(PO&˔Eܡt8naFߨt0؞xm=v@-`j_I.0+[Q|> (/QZ0"&ߥ9!L|TOB`(0IȐusc+-ň6U"$]7&Jtrt۱s9z :1![$DתOA7kLG  ēHb'a|C[Ҙ'VԤDZ\z71ℬ}] 6G5:v&-e"$ a t!BV<^|R=t9u]s7ј'QYհq#*8]u@<4ȡ1lՌFw"Wp,Ҿ/u/!N\g"[}AMU3ߠ!=ɌTvrܴl8~nbŶşdrhj SeO. W݈u!0X25\v,l DCM3]gt8!j-)f?[R9%!s]JB54\A^Qt8nJ kLG 2A]{`oQ>ذӅI }ޣ111nQ@3osz* iC;|y#kLGaGٓau\gͷLAgU (7><ǫiC[ p3/bVN7&Bw4#87\tD`-.ob_PCʆy~6e^4G&,ܽ*l!B69/kEy^}}>pW%'1qsz ?A/wŕpјmfeiLGbQA5гPE;lǛ7zgێrzS[0~%K^A~>_1!N$`y;4#BT1p5vl(e6p:q.>ٷ b /uJzQ{\{1íbKJ&w9ՃЪ9|z\5)Qs!=!ԈBTo&~p7-nA-.јyotJuLP!"XYTYr 9|b[,DUIO; !N1'"[w"BƸT!IENDB`pairtools-1.1.3/doc/_static/read_pair_UR_MorN.png000066400000000000000000001005671474715105500217420ustar00rootroot00000000000000PNG  IHDRsBIT|d pHYs.$.$* tEXtSoftwarewww.inkscape.org< IDATxwx?ZTEXbWk?;zQ!`EQ JB893M#{vy3;̙s(`[B8 T3-"B82-B̟@W"2y8?ۏ(L ?* ж+J0L Ҿ"[O>KB&AuEL  5 zQ^BITAM k G AAAA" P\צ-j/JzL^ڊ8Q }SmaŁ|Vk959~Mhx}r22F}``v8!)ˀ_l@'?c )T3Nۓ dϢκ:mzd_ejY8=~{Ş?g{в+v/i2:mO1e6|q9uڝZm{|[Yi!I-]Ԏm [9ƵZ]S>-߱glhuhٿ'33{ 1Wy _fnȠCҤF"5omTKvAsskO<(˯)7`OZvtShYusD^x墰+EZ}| :$6.)g\e䉡C~l$:1z^*aCkԈj=XԨcV˓: ?]R6'>jԒwkWjׁ #KdFlׁ6-O-]bLKxU 7=\);yӠ!l8s޺ ʛVzg1eFVmӒ֙!Ӳⵛpi9Kђw +_š%iyCkDZ_-Bէu$m"jNݻxk #ZF[b[wGZfŚI\QѴuhi;9#JpZ>zR$ß528/8Q.tF5)([rh (UD#}h‡_9Y,0*)Vڏ|H([AN^I~!KKQ/Ӳ|(2Q1qAR ;_kiR ej-GbD9MmiVU+LasS2e~wJnjDK苳dT 2ՠ!,:-vN|ѭ߿Î_&Fl=v2N:aFT;L#ZϛĎh-NݎgAZqq<ҽ' *n{VW %-A.0}6l>-Bv;ڦzlڍz:e|0@Z޽˩eKidLˑkVs'˴_$>Ɍ5%36[dPYN,JOu/dFxGÇh .jqQx 0JZкEqݰ4 hQ]s_RMiYu'%pCZ~Nw4=zcZM)NCӾ#hEH :@hU !߁y\bkK&ۖQUR>86Ϟ ePu73Zy͡%ݺ-[y ZGzmNz1-BŸ=hP |,#+hiXͮe>yU(.zP",wp]#jUi9oZͿ~0vXZ7JͿ4e+^\KK?eNDZ4ߨ$BfB+OjSi-^۪xw̅ABٿ6}N>y`@̛ÁqZKKH9 CZ~cvÞh|֕f >g_mDˡ1[tMιֈ!rz^yxџC]Z/^hDKh2ףMiGsp|(#_%:z He9UCC 洼v9FH :@h+%Z"{VV~eج4S!{VYcbU}XMK燊 ǦeHmqsmgلQ 򵖫 BQ׃i6 -:l5h 'sq&qjgPR~S5\QΜڏڅ6-P~k'kXZ KPX7eQEkzJ+3Z>mZzJk#ZJLRPaٓMV~>-N?A'r1k=ikt-A#e,(ppm A|Ȳi?q&m<JOgG^4 ,qB?oy,ue[n|ZrZj$)SBSZ&&>@jj?zQqhyթ)-,xy"hɊ+q\:tk0Z..eW.uUŪMtiYjӒ*V9[#y0뢏?؋Ք9)aTT??bOvǟ]}?Z.ǟmF>on~=#Zv>ms\ n~fm?gwgDKYsIr$ôd@K<풒ؽh#l<|I4Qn3_ Lښe@iINy(5*i))׆6L*pUnGj3V(oL˦ϟeWήe9Gn\E:,ڳWCos`b6Rk-b ihRoZʹR}atQ)G:-%eq/^qL^>pٿg6x+8iX+?IڠzִM]76L{!)ִ3,0JiY;?yF(]WFK3(%gwn=GˮZ#zT~*zy`HE@Gm} Wy(i2/JYW)^F@/lMQ/xl ^IcfSݯY~K,ҒŲWiiw,zdٻ3Fp}[#J x|='x/ siܣݵDǛ\7'S{[X-tk^7ii?U<>,l-]KYLzEna.9Ԋ=I+,侅?Ǽ/` 2Z( aMAwԡfw oKx'u;w1--/ϲk>̌J@ KXcZZ\_ךIc~ fo`V_mqfolfچuV68+-A*zғʐf09k -B] k4%3Z*a_xWڿg6}9-JX̦ϟ3-iY ?{ƈAgŊ*=h\F |rA-AcynqLې!$qEEnsYbAN'"?@I[Wm?в)-+'Z{2K,fi|)-Gе\g<"f<;%ֈ_:{deHK(^G'K&c(dSJchT :䞗rV{/oF49g]|{64>j1%,ڌk^aϟ_r!MzjoUv9ˈA[#u0FLT/ۮ(]4fRBkF?r#n#TҒKxʖQn8A}t 3[m- ,/)?-|rm_WhA9(j!XAȠCx` :BC5 F$KTLv   DeFޝ_@H.`:!r À+yuO<\ t(jO 56@[5+^uBeyX܄J^@QO{j֠Nc+Ы,`iɫ ~kU@,x{ .. i* QlBq<́4r㼥|{{1:vnչlfRT|>,!T yW0 * B} 8GH{&E)+c8Ө|c~Y9P&5Pu|&[)q{PWIawLLU9xwQÞ? Tr "SP TE?!)-B x/|=lLH^F3rC Bw1eG sU*<꣸Q^ar8} ߴCŹQO.ͤ Gtغ끢 @|>sQa*v p j0Bģ @=QS%:G-{3Šz7*@ 9.GEy& z!@b@_?C='nEna-UY޻}UޅQoI-tg&}Pnñ?ϯyP7P>m4 4!h.I'' Zv<*3c;Št`56mTQ1=Ix9_Р10㉎_caPυ3kY9U(lǢV)B`~P'y}|z-{L >_5ʒ*7 gz%Ȃ7CqFe-%},?5cF H@qz}@t1=pu P^PQz%yPMFme]~nhLq7(rHt,+G2* %!!t@<_(CQO >g2*$y.Gn"&v.Fͪ{W}="7 go |epfX^uQDqvnb&:Gk Ӆ HSនA!xLawp)U,Ti0=\rž7^eP݅L~*2+'"{QU!X G{BT)QaUXаpqVZ { OFy%Bb!3J~}}&ߥS/gzoP9S6\1@B㦅U|ĩju [$N]l2C&e(o,16Tm_B$YQA؇PvJX_zn V1Q717eD_}o_9jTz%Rzϸʔ8 BIDfra=/jGOP R {J IDAT݃J?!ymQ'Pչ |"P9% ByvaƏۋWAÿ y~.Ŀnq=m #evi!BL(1#~!*@=C(CU BIڋ: #4L`*|I BhZ@=tt}_C Dzxx5xsz*jA&vY9Bf3Z\mP8@.E#Us("UI?lO>pQ5BqaP8T1(f D󘇊ǹJۀǰ ϠrԽ<soAf#F& ]BE/PQM ޅ(syucN s6Q(?uYHi2ÝA'Vg3Pj P>Yv\@7j?P9c /TboY#;pc""tJ?*)&BUfܿͰPb!!p.ԀAp.(~ݫh:D=痢2Bp I&AE?C]ɔ(Ϗ32_]|:GpJn4OcSBԍd:o6c"T-R|u q"D`%:rB~kØ+mk(Uk`EjgP8xL M#|GU\<ˆ4OhbNy?l/]~9U| oxKSQNFD;}\ H p=0DzΨB\A &ycqhj<4ad/.k8~&^BFkwdTZwrBE9SQ!hmމpmqjo?R ű9Xf.tc?:IB¼  77 юTX_3}(n6G'&i4GDŽD8ˀѩn|WixP5MQ}f!=B[uL=B`4UÞZ` f05 B]UU=7`k'*AlX)@{;׹: ۷Gz^[P30ڄq{t@Mͣd?xp I۶(=۶S5{ap.WVGpI꣏6G' -exo$T*BX.Ӭ=@?hc#.ůǰH"T1׎//P)2Ryʰ`ɟ'R gؾ7p6 ¾Y* BF}:25  i$Ľ n{:" S#u`ҝ(c`!e|.h[} %GfxNB=xޯը4Ƞ_D~X-o3_#L$pF(yLV"m w<i&*pހנQ &onC>6%XDG@N2,W24_OiAމC NogTD pFT @]lU)P]QpQN?OhAዚ VFgq!x\Koqa`Bߧ@_r'l5 t@y]-,{!;5M B]rTS_o&.8 'pB7^ޗ:F˛qw)m~Yd8Š|S1,!4HDNDpx^ ATŴ `O°1Ay|K#E LBc[ M!55a'oOaTC !0Q lbUEF_ÍxTNY9B9H. AbM QkRx_cy!;@x"jyf8|z,is*d%_ٺ51;{4u|$JɅ;GfABZ k)~}ը""\E+J!45૟y!A%zTe8z_N";g>+ h.*~N 5Paj !i,O TE`ف yp5Y)TP7'TAUoE* -LL0#ǙP п+\LsAcѿ kJ?q_# hBpy؄ﴉ`WR9m$H1+RDBeʐ{&Zom=AW^o.0śdT=cL丩FL A& } Ƭ4+G(;@r"(ՙJ!ޯUJz O!}@x;֢&BGvj1×.6湨xȩǠj/Y( ͓*MQ:VU BuPK?2"HA<Mb3f.WT"'JBƒT=PnвQ i Ϭcb.J!CI>jSy仺5Hcv k5i"y jSx aGr"Ih؅g BXQm8X@x'!>nX  k)8<ńvK!' %4#\•(}s0 P6 k(#'jwnTu/P̃|,_G̕>t@<{Sn>G56SMAeg4| C}Ь@99?bZD.̞Gn MzrY9B^'D&i1pa-e:;½B34Pa[^cCLg`&\%2^Y8:nP"eGAГK {(jۍ*bJxX_yhP~N AjVNDs۠;6RvX)8/g聧ヰPa_USfA(a-1 ]ea-1*lOQ k G\(S}lEM @NBs"p'R = ;5ѥĶPBQ_0Zߥ ڏbVuW- w| LA%MEIi%`  ;Fxu kp t5Bw VLu|ݨ6Gj@oy|@nN BT7-Rfhd<_.yh%{PoN cѰpi)5Z4Q~бj?nw(:}#@0{c_+PcCU!y}-jXKAA{,B\5ĠJ?,dWA1?[G߰w`Xr-gʑ)~E ukO0Yi GąYHy^=llZAjzިȡ!*:sȭU-rg!h*MgL@.1\h0N+wofofOj7Ѩh Py}Ҡ^I֡|;H_xP^TdQ$ BXʅ +v 9.$wdVN@h ܈M+1u GָQQ#~ z LՋXA <-yڔ8A67AA5 t].B x֠4C9Pe2 80YDf6ZRTD\X ; 2-: Q!9.T|adPЂDy4o7jCOF}G_>74uU*"ʉ"IUdˠ~QP@ B$*]'9RU"ײO!B֨aHږ B8=8qAgeXڠCc TUr(/P4T]L;uh࿨Hj ~!"J!U"BĻ(ϒ̒V(i T/TB A10Դ8 UW(jU";EB<*\(bk7 39?ٻAQ);;hժk֬)w*ȑ#y |`wO,Y^zo*PzhѢ3g$%%|3}t>vm_+cS29<#rʸP ,-eWT "?e0TZhQsU+xsECYgf ֠CE(~^zYYp9E-7Itghkڦ s05J2y#> ]G?Ԍ>fƏoNhڴ)?ǀ%߰OVI9e,=ђش=ɧ 7O>IVTK4Sɰ؀C֭;wnpиqcfϞM.]o_ \꽮} 6qy/D I@[{=BNȴ`zp]^ E& } j;u;iyV@lz4;8H2q3,)ZV;n6=j'6HrK*q뭷Ҽys[`8o*ъ?~<<9Ew<#)!9J<*.F|w޿ ABS.P@h҆u,V$'@zL7kd̈́SsIjVǬvlz4;`çO8I闎JL7ll=zB*ڶmܹsIMM5*@Ԯ]3fPn]/e yrhذ!/BE !N?TaOOQՇ= i@7`|/N~F7| @G@4D}?OJ :8&!c˷~6Gwn-ܦܼ|lwOR;]0/ q\47ʱ Q%{z]vyޚ.#PС}%T&NȐ!C@6*0NYiAdoi\[,0c'6mOS8Q,m;˷| 堻ia|mڰEn!Ig2p\"zөS'n&ZA(waHM_iHسx#m"uTkj&XfH>ec<Bbh9ș^*VZ5QS^րCN􀃇ӫW/[aK% :Xz)))\y˞tZڳёuR< #8{#@a+./y6a^8+mb#m"6Fw$7ky3?<#.gy*r/r뭷4Zjŵ^k25EW3DEI4 2&LضeMxA饧M^NmӾדPWNǎޥҹsg;4hPUvd#qr_ F\ \ GE0,FؖEU() #;?#f"hl-GDKn/TwHj7v[kS¼l6|K7iDj+ń;Ü9s^9\.FqLzg?ZW/t膚P( <@TD=" FUX,<Ƭ'Q4 Eّ6OK79&;&_.{DiG8CM!OF hӦ ݻw?ٳ+A@c"ߟjL P?E]XĢ_6a'5@bi㜑饛H^]堻tŻ`>ĸ& 5G )%~SP}B\IH]|5 v~#;VVZgvי4A.C !>ѭ9 'MAS a3*Ž' #\i]P4hKrQ.vMiTDYiOwMwMΖ9oYZi9AȲ`7_=i+PY+`JR,mEXw:&{ (6wuN ?LanEeX*@ǎILLճuJcqh۶mi `=vM^Tv6^GġgVѩC9A щm]ҺB8 4!D(-6Ѭ Jhb}Mt]'EWW?Ih DEa% 5DJ {`]GJ'|`؞|\U#0mGߧsx]fD.4l˩q,'ԠHQFf >0`7opt6j9NwDnPu7&I&bO*6O9/# A(;IQ(%qt`T @>0k9@C[; #ys iQ QqըR_]?"gZp`oawGT ;i[جcv/\bԩS4#Wz b™:htPGD֎Zj'?xSf1LqRṈ|ߓ{pNy Q%BRRRPu P2PVuQ&v:ZTY l{l8U~.e˷o8;11%͵ gL4CvtB"MzjٺKq [Cc"33<\zB> 8ɧ!f=7q[?] ߑmހԕ0Mr#Gy'8?ôrۼI #GxXCYYY+Bqƙ>0[ ū8zGqӳ`:]0m>'vQebW:5i ahuVDAGA{ĺ0/j';z'k4#[V"B9p,'}AMbդgqvXbTt,O9 j4ic7}yvƭI9Jo[6jbbIr;/_HoxZ;v^A8gApQpo]%2 w[+wHL4{&4?n0LjwA_qxINf;߽G1) Bg{S^vYz $KIl#27}#ݡZf4>}`o]]QѪEYs}Vbuq[zzQÙV{ptzA^}(tim޼{Ap2]\sVg`}>P;8!TTfɶK,jP"@$6HrOPQ ] mm.dq4F fvƬ7JVƞų+!믿Mck.rrrJ[WShuΈϞu"2SӦ=zz[>UV;zm_xھm>p#m轸cp~rY,]jϳg7Eݲe˂I'֡ %g6N8NJޙ]c™RfyRy0Vw0i#m}_#* cCfʙ 41!{ee$N;4^ڛ;) O|=kw61gJr&Є$&>t3 Ps?ڟ?eΛ4:mUc:V}͆ 4:upn闎e.0G~#''ĆiRS/+VZgv]|ߦ+B萑ѣG^9\.^~eFmZJP0a |XUkҤI9`jpR97hvM:iǰw-ljw~lE@4:u}hyBd|䞗X4-_Ntȧ1eW\6D>rH$(gA;f{ Ôav0j*t wHĎѣG̀~wy'SLyϤIAY?P"fōy|wJ$:Rd 0-g>/x{r{Z12WB0$v,\ӧ׼c͛7STTv*Q3qDk58vXg-FMe #iE>Y8%Ipvxo|;0,VS˳s5Ӥ[;>jŎ /b7Z|Mt{B'1cưwsTGʥugɒ%L6\D&II%4ٛAZv[;^?k|pcleW) L5ecwa96 ?}ɟ}>nqW烾g]O?1lذg'm#-(¬Xai B^O&BfL.MxUzyhko7ʳc baeA9!a[j.z0..ޮ"#Gd„ cƼyBguӢW@9[Vq\Zw_XǓҐNoǥvenb+G0{A1c5-:*Ð1k,,:s> 8+˹3j+ txGCYf?!IJrl]˯)0p-e4D>4hJk>?3QaT Ȇa^kcOJà&w5YfJ3ǭ?1KvsѣGc Vk H⟫E]:DDbL+ \Bě?x1xFfhEq,cЬ aa"+--eԨQM[ {8ǥۯAo߾0g.\йy !8z1beeeH=)e\6B^P༽Jl"?e?̄ աW饢8[S/%uyyoMd&Ԧ-iuZ` =ĞEڏ{ˊpQ`ࢬ`p$7 |־1}͎HV~? E[VPQ8?Uf1kØ1cx(-[ƹˮ]Abއ蚋~%yyye$RyKj<4H99i)>`//hFY^Yג֬9;t$2+UV1+d&6g};7=Bξff .sΉU2|p^z)#y阵 |-77}f̘sirk$cɇyfps wر? #FԘC=zu5 mL81ojw˹;ϑ#Gy_zzzohY34n7~x؈$M6~G}ؽ{iӦwzAmB>}v/^0`@1`u&Mxb)UVVz7Ք^RRz ҨQj9x %%%<㍢B**N| S<ktgC!Q |@˖-v\o***شiiDT6mj<ד(** ]l܋M1߫O6dggӴiӪ^/7n ݼ3=A^1'""$(* t;&$+x=weLDDjmrOn'!ˁ/AA0mo4_DD$W8ׅ /0 ״ `p\DDʀ\`ۉԠ1fVn'F0+UT;A@%rԩ29Bmј?08DD"`If{Zc,i13;[ajpq@#̕)7afR4YvG(N$֜M uxk--"""$̚J>`jiìԱs1"E_5`pB].aߚ\=VDDD>0^&LDDD⏵ h>Sz]uyq9\EDDD$\ 0a˹K1kE}Ҩ]cNzss3R񙈈HD9{^znFEDDDB%P>l# |F=DDDֱ@)`G=J1KnD G9@F8bH=Sr9{I'.&0*wt| ^Sњ";*)ƺ$w@s 2]Nz`VlWK#>&LO1x$P.fšD ̠tA`, |Zv5H~v"aw+]ENJ0ED$NomX+^N :xKDD\ D v"RxxD 8DDDή/.동D"d:fj! x ɹжm[RScP^^бcG]ADJ&M<4FkCzz:[GرC*~h޼9ii=s޽߿5kFfz,"`ΝY& eTӀ&ֆۓ^FuTPP={B7\a T5e&M4aĉ 8+R0ԟ.u5)c' U8W:^XkWP1ω l0ݞ j>`Fkψ>'}̺%/"RCeԩ?6Y@  dX^&O\uU 8?ŋ[  Uq/z439 ɓ]L'z].$%1<7x`f0{OA駟UW]̙3M7bf;r/+fX IDATw/jٲ%f͢)}u%:'3o {8II |0a  oq"7ZnͨQ`m:XbJAڷoLnN%ƌÇ~ȢEMahV, ?ٸqc׿&զ|g7wlK:vKˌr\ngryN("R&Lwq~唂8&RS)xie$׃+#"X5jĄ N#HΝܹ\U=s8쳝~V.̵Wjذ!wY?zLвB`ǧjͩ-[E5Gp=z4-[t; &McqHIIGun @yũ@o=jɹiul*r0.ݍr"zv9ӈaÆ駟Za71?U?C`ugsWX^VM\R6%TKx< ;wTs ,>Smv*dffr)F :￟;wZ>εfggsgF;;\ĐeS:wM5E$q%%%qI'F7x`5kƾ}MCѠC$Z!C5q-tfHW;w]I!WvBƙī^zѫVqS :t(SL6] a}Nye_W. :ߝ\6m|;p0sWhED$񤥥q饗:7V.IK+$BXuDye%-юqq3 JDDׯkgk :5p  xoclӖ>?|gnD߀AW]J>g9hтvڹKTT61k:6,rK^4NIj""Gzziͩ T)S. *}ಉְ3Wc :K9֝={V[brj&vjs$rS<@k ۷p.;Kfפ)ED$ٓTGb[r\~S^Ys˫/xi2{Ѫ\""F}u<@3͛G5h*C& K)}t0x<nRd[wZH_ | 0S֞ekצv|vv"תUЊ-<@C疆 „20e Mv|nc{Lˆ{45MF#W6qQ},]L4Փ|HyH8x*4h@Z'~7dd 8Hdf-O">IչT6ѿEˠeeo/JƍiED~r=@sKIIIT{`grQt]FU+)(GDD$~kZ!]I[yu7IM嚮ҵ|uKTs!';p&.YY\ұz"""8">Is$>'WJ^^X4hGFe̞_#% DDDp;[mՄh3hݖ4xsڠL"""uzٳgsӾ:.l߾<"&M9[lfOYZr2<ގw;0a7xYGҿJOã_ݝۇwQ^Yc%L9}: /4V.vf@ċrLu;\vQ;^xݭ\R3`0y]M(lӖ-Zp.f[E:3 M”V2 V""u`/_vpuבv*Qz1cst|sq]wxHdf4N]tnn.ܱn0zឯ VpOᆴ,"r^|EFvdM 7v*Qx駝V/:AK_0p@Sspg2Wwƿ6mdK'e٦IĿseԌYf1{l6lv*Qk99gڵknރa%;z7< #/ CD$߿?OnrM71m4Sz :7=V.,KgN2TSfw̱/o݁)wa+-׬BD$ތ7.}.ժVyy9cǎunZi46 Nqq/ {z7l-*bֆuA KG CD$< zo^0oV^x1߹i#PF\Uw=gΜzsqǓ( G ۴g-ZײDDyg\٣ |C ߫z)֭ =C0 ֮]ѣ.CMVcD]FWtbłۣZn=tAC3D>;3SFǏwu65$2, n6]L':RS[;+,ݐw&ٿ)HE$q5R+?]uos墋.ip[ƌܴx B6&Ld~1=}IL[܄w{a<R!"xЃq,4v(30|pƍnV2n887R:`lڴ . gBs:efK+_bHNv<vk'""믿fM/av&``O9(H2M6qWR^n/\ _C*[p,-5zhFxKayy,$6m䇟؏{}>~8EEL_:5+*DD4|̙ c?8?#<ef^1c#87܀`,Qw#+9sYf)E^rRwゲ2^Y2h[z Zee""ƺ` [U(o( ֭[رc?ss1p!YuM13MN6s3tP:t@j/=n8&Oj)4gnf禇Xr s̱ߟ9s搑^VuT\\+¸qB''_Bkj[@zس7<Lt;*\Y9ו*.q]\9 DD>{b`l鈙*QxJDDb*`ILga%$DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD\vR'큖<(R."G# $WN`Kv@vkى3<ɽDDDDD_2rb_g.ئ-}9#&J0Yw@"p ~׽'SĕA:1}Uk`'_)-ogOIONJǻH] ڟYZ~]8͕,D[Дv\1ǽlDc@tR%.m”Z. k!}IN'r23]LGRVִ5Ǟ{Jx+Y$8_Wo5ʖx Zt!Y]Or1:YTcm_6F pq=\NGYAΥIUc0W;kJD$AFZͲ9MjvI1YHJn@eװH丟ԠCǔGvZCL`dۃ^V"1FW8BD&hJ{Qě3KYQ :{j$7oؐtfw :]1$OIѤסcVF#)z;pݶ([K4hFْ';xQ :Ħ{]0Y_$̎>L9D#/5fÁ"HbOIop[O5ʖspR7s1xp~ Nj"6}[6\LGY)ڲtDijNìe986q5ĜTq+HkښvtD$k#stMkpW?׫fl)<[Dsʺ]Yc}&P"zHP$A%a}Ohj-\veK u 1_:snt]V0mrH&YAaH SjT#*鸽9;u?7ht$J C1-tY>p9p~X_U܆bL%?T 3Xiz};][mZ;Ar0gw5E(g_i)/,_b:"Ge4 ־19b+y"V5:M5Ӭ?v鶭.#Rkϼ%X71w3X 9xny%M/8?M"ohN ,ڳtDjm/0 w.# ͏f) $9E@.DŽGo$ q1˫]$|iLEaI# 8/wsO$xiabC  +CDbk8m5 /r1SŎ|sgq{pux t}f--/+Q/c1oz!;_ɑ/|V´4sE2kw1vp׮v/s+`Nh|߇y+Hk\,Ͻae=X~cYغРC|xq?ՒW~ҽ{ӑ5qj8⿆5#="0h")e+xtDj-NJsE[WeF^|UN])ߘՠC$Si_^ԇW(XH ZǬy}0ݕ?۶Ʃ~ p7 .^Ll/@ye%O-4Ob_l|o*%7ęyَ0t0Dc#f%0sZ5f]U浇URJ`$aL}+ ocfZXi`A$|1XO{hI\ UTdt7#'b,#1\Ĝ˗xL]_Pe2`_~y .^`mYɶO^u1Aَm/1ŒS]hH8=lg-aYq[W" ՘խLq.f Mq8./AbzJ^&/ Wvyl6==]LGbP>Is@=PLMŜ3k6qscbt? X7w2oI\)Ü\LC\f$jfVALbfhr(fHktbnokc㹷| ?Ӄ?ؼor1Z+Q~WV sfwӧjcI a;,~ #Ӏ41̏/+s1Zl{~ƞ>p1/3'~G˛]W} El{ 0V԰ǹI٠930tsL6ΙY>6dI6fJ5hk1ח Or3Aqp\߂! wJV%X3H|Zw x~o_3lRwaj߇)C'4oi5ͨׄ$w dРěk3AŃYR<kgA78ɹ> :.IDATH I=MDD$LXm{)nMhAEL_"YiA%Ma6F=#iߤ)⪼ \DFk Nj#"""""" `UτLS3"""""".ѽ :H|Vb נ[DB IENDB`pairtools-1.1.3/doc/_static/read_pair_UR_criteria.png000066400000000000000000001276171474715105500226760ustar00rootroot00000000000000PNG  IHDRuuXWsBIT|d pHYs.$.$* tEXtSoftwarewww.inkscape.org< IDATxyxTdOB6 " "Ƣ"(j[mnhEڪU[Z*(.("" !=3qrΜ̙Lu23'3PHy~CK.@#>0A_ \0p1$"""qw q0A^ EDD`nE ""D==~ .""" ~o߾\yt҅$%rlЁ$k;!$7x`C|s=wӷl7xOrM<;a +VOЯ1hs7pY$&&2eڵkވӧ :2G0VB{Yiӆ)SpAZӣ Kz7FXW_/ۘbSXXx׬Y '`$ume Ne;O[u3(*v|ǚ'=lz\~xՏ=HHaM/[笟W6 ~?{Nj?uS'[usߧl7h_p!#Fh=I~ իC{^` 99_c[Z3fp5װg. IDDDZ8 """Q/pYddd2tP$Y)@F}kOAVbV{rڲb/Uu>c~볎}­V`OǤߓz|'ۄ[InɱtU|Xum󞷍]_b yҌ|>=G6n9E|MƌsoiY."ϟϨQGLDDDݑ,; uZZ 70ueVcҝ~׊fUw:'f.~ƪ N Y7>;g坿QUͪO_bJߜ4٨Q ~1M1ZƬYb*7 8ӧY."""ri/"""W'ŭ]HN׋3up16:s#7ĆoůZu=̪lnu"1slx!UǤw.}߭.@bV]]RȖ^{҅MslLœ9sHyfάYbmYg-ƒ>h L^roT"""ڋ6ݻ7;&`4|v*. CɌ(-N&0v'mmǷ}Vқ3o%y.&IVd[ubt::kj*w*}?)O>iQD؛;LfϞͰa?{6m֭-CDDDĢ%y"""2vILLtq8".ζڃ17̤9NZv7דɪK moһ"%'n꽡7 t&kl~iG7!٪[ɮ3݈:oN=CfKwm۶̛7U s=C-KL{"##I&9w ,6ײz p&6:bkjOlGFcҝEξ>.5h{g[UBC/ {!J׋,O_NN?0/C?Og$d[u-|n^71Mo?A;]VKh'zA޽ZGS9* EDDZ>iiitͱDz𭭎.ե3h\U@n_]7Bmor:fsζ7M_Xu_kՕ6} VTiiEl{[^DDuh~ѥK}طx lI}\mtS0p`ۛKnty_)-7>ѪK^l{s`lU'an}jx}"rTD/"""[;ۻ98Vo78rBWԵ pM ~l{k3Ug7=*?oվ4 .56xǤmoJZĞ[u&qM|g"OAALEHb~8Dh V Tmt>i!ho{ۅqco{s R+>Mz/xѪA#::>5ql;irss88DDD$z)iݬi;Ouζ 9A=fQwU\ GۛMi~Iڮ`ϦYuBz.j%79߹ *vw=M9BhTfhM` :&gX?ݪ;7ZuBzϻo^CWcҝ1>jڟw{=Iٝzk_]Ѵ7& )(iݬ[n=y",QD V էJ\j&ª+v|ˎ^vc6:ZֿNc%9Uox} ueV֥FL]f7KD Zuui![fLYvTiڋn/oNYrvlufqRQ?$C/#ԖXuJ^)^5FO YumYmt9FX7ucFI{Uo4nr%"""""""`UV9*`.oBlT0c]V]W^8%7e dx6Yΰmʢ V&̪o1gj`F޲eC'"""B{[`Y,XDtf< i۾L\ؠujO\<'nյely҂mtDr""""QLe~b VXXD!ζ7iX`'XvytyU|=[lSMo=xP 诳chL6,\omb|W.w&tmxw}$WUUŋ/h?Fp/"""^DDD^iO>aNana8+9`#`ܯHHϵuS#PWcՙ}N%{hޱߔoڪR2(+?57h?88G$n^&3 7㏻= oc7on?MT/K/Ě5k\K^ |aG"0%QpѯrfqNIw:講zg^Ijzӌǩooӓg^i{>l㋣ewXu]y)ёp뭷RUU0CqM7 JؕCMs]Dv( kE>ذVƏ}*p~uOĎ^b&N¦ Kɹ]0lkP<Bmt>6:{Z_k +_?ao$mO8=_,`h$R: ŋ+Le <xG8(**"""rLڋ;.]7̣>D"&=o#wKIwj *gӛա66KߤfNO~gX33 'ij)^.;>/Of_KF߱}t}/3M?9`0o[L2 0֬d/~:&~SIFEDD0GDDDL/ΟgnfCϰh{oˀYpZu{!wyVc46j_R*]/UW̶xl3mg~ 6Nt.r!{V/hfK/tRp?Xm?gy뮻.~ \pDDDi """,O@{:`恫-=[ee%wq=}C0DDD%Ph/""" Q_~ٳ' J3k֬O $ƍwsǜ9s"4"w9*ر ] ץ] <0|p:uވ({na?p5;D nDDDl)0x= 2' -{͡K!<7$w#""(L=؅9x 4+"""rD4^DDD\ GDD—=n=&a큈} B㐃2W@in@H*8 F nG>8#@i:4 M EDDDDDDDDB{j M uޤ^DDDDDDDD(X`oRp{{#^ґ&uIȑQh/Qh{8ޤ^DDDDDDDDq %k`oRp^؛܋x 1F~ 96iz`8kMCDDDDDDho5ʹf74^DDZ53DZka퉰񈈈N|k ?5E=H w5@b%"A3Ss""r-^ȉV[53c9{`=h Y_iƽHL{qU8{;͸opvq/""-[aƽl"͸h0oXϸ?H`o"QN&܁I w`oRp/""-[ZCpޤ^DD{917؛܋D1H&G'RID#npID# m\pIHRh/ޤD:7)ht4-I`oRp/""n;=1e`oRp/KD؛[IDcnh{{q˱m1&"QFDہI؛܋H4huK,؛܋H5%=G ؛܋D؛;EK`oRp/""nj떖7%7)Hi\-.oޤ^$J(ޤmI9[ZbpI[sl1}3&"Q@UGk`oRp/""떖7g`oRp/""Q܇)7)qB{ hM5ޤ^DD"!--!G`oRp/""-56s`oRp/"-%7ྥ&""Nn>I4H׊>BIKn@bOK <#?Ln--@YDDZH]x#c8_GDDbߗ0!+טi[ G(3Y\$KAj WQ-)7y 1 Ė:5.E{S}p?lGDD$6UA7Gqybb}`G5qRE"O4>Pt{,G+!Τ~PCw """>Fpɷ|8 H XtF5ED$0?}oW1_3KHZhQ8>zܯo/gPXDD$68w{ܻ0~|"""V@J<-RF{"B{ +S`/""Ҹ+X{K{(9r+X{MDS`/""rb5W`/"",ւ{"^"F}(9v+ V{" %܇{^&-=W`/"&q GHi{iZjp^Dڋ+7?"""ͯ ED5ki{iHF}Q`/"">--W`/""r{"r0G{Ӂˀ  JtJߙ&nh@C(+h qCZ| 3n% nK IDATk]4+ ^Jch)w"T*0 Ht{ ǠXP ޥoJ`i%aVH9큈HD{p^DD@+98]Kcn$'m\xmrzboŎo)rU?伮V, Ltǝj{ ;Πv/. N#dؿy1aI֯(k۶MM=?9ZgC~pym۶̝;!C7fRTTC̙c?L<9Z~؋:q2"`%v,Y$ kh1s{ h#G Awr< Xvp^H@#x 큄zKuB;a tO3<mڐ:jT Ք~{/)!;UmhUg<[u%ӭ:>=ܡo}V`xv ImCJZdm! ٻ ^`Κ4j(ƍe64_"&x 8нQ5|f͚5\ÿ/pY`{#ka|i0dpŸg 쥉?v{ 1cVzif]1RIcȱH"ܛ?^Fl7vKWqn$L 1`'Clf{ァ^"NY߶h0%!hL'{\*B+]˷Kϭ:ϩ_]ɮOfYuBzYǟx% IZS7Am<}-mʔ)̞=Z0;*1]fg޼yp * ^/?<%%%'?i;4YBNNqqZ/-KEEu3'$$5Cuu5{ zc\J`ߵk%ٵk}=qvn6~ر#MlG馃`F+h0sF͸qh.BL<իWEMSc:$cMx 8/ofztTWxJx}70|< 0;vҲc>-4h{/w^xf8䝋90i(NJ8kaH82_ӛ3'a#W^?K~OgcsuI!7A/xZo7{]f+W$++9s&^xYaսlKq϶;`wyn͛k1nF0 [`߳gOyN?tFu}Ph?`p']C2q&&Ob1_M@F^y1b_| _ PQadcp{aÈ푸I>{ByK_>h3klxa.ZP]UՏN| qmxT|k3JWϵ}0%_-'L@B‘I_s5ިJ&0 zLRwF%\ IAA|AK.SN9~f[&Z= X?@z!To}XhQv'N䡇_1"X̰{ѣGӷoN!}ŔmoBMMmw2S.c[nI -WW9VՎ>F2a5G^xU:t`̙4~r 8k5J;`,F˙hv&c7nnK0ǭo`ck/ekSu\>Nc0,f\:6]Z-lr:sժfd)Xwp'VK :9 znn0?`ɜx+u c{9e3ָ7*No-馛2eʡKOOW^OXT7܃ó>Kn`\ID|O{v!f]i?[4 9qYeoª ϽʝVTeuS'[u\J `ǨʤwM,=L~Drs4%%xZccE~7o^3Xeټu:߂%c5|׵>R[yhk 8Y~fyζ7%E|e-%K?:-6&1BAcsێvy\U`XjhƌCAA6m2]M7b|%33~ሸxVV u2$L%@FS(Ъlg5i\^_yC/b{F˜Uk7"6~N AFKo^'SCq?֘e3:lsbV;:kؾ+ٹ-NIbxMi=dԸ0ªk׮<?4uA;k*9z!I!]_$XZ+olW@v~Gj<^=&x|)Uw7ħfqTǪmP`]-_ oM ~wv/,_nŒMJ yb&q=s=G>}#8gQapD\ZZdCClRo<4ڶju%VcՉ{9v~zG~}_4Ӟ5RSj;Bf(%]Mu:NH#<W^y8 {2nqM$FBc6@:`Ű7B#r\/1t}Vݦ@R;dmtBKېsV]{ KJwQ^/Ǥ;~@m_w|O`ë9nKi׮v8;{W_͐!C~Ѳ(|xިl$4mtfDZ*G}8ö)j{VfqtR;R6Ҿ5M?4 3.9l{^]/tGim3Mݺu뮳D6q۷G$ZJ#?s-i0~FۮOf9 w)۴[ !3Ϫ*)4퍩x|>^^]r% "shr=n$$$p׺<գ\3΂eiꎣ!9Uזy֓VxڍN?tN֮]8iii<#dedygMi})qcH޿g96vmɗ morom FۛүYuZHڣ"Ӷ/9퀶7>yul{c*[[CK"Y8p IIITUUc!1fٛR?Fpoϸm3(]Z)k۴~uՎ>RZlߺN}&|)Mgf;O[:vC]eNfoZ3scXyI7/,99sF͞7iii 7mo=\z7|cCbj.w1~c90XQ_/Fk(b{jf Tl֪^r#7߲×:ijwUjY?=h|lsklx}ucҝx>k$ N:1f̱޺޽65 X {P`^篮`'ȴ͠(jmo7'9WocڏME޴6 ^($$$0d-n 9. 1 &T>Pd/M&FKz6Z| =^/Ƴe3qP\Oo7֯\:o(:'/?`I @Bzϻ qo^C`=&.`@mGK{n9޽{ӻwOꪫ.+ :c{kW˾bu .Dsc4fKKfUB|;!)BK<34>}/ڟ~9M?}w6&kE#UE[uI{XޘW_}=tV?:DR7`75Z1xgۛ)w `u)^5ת;y@{ۛ8s<Ԗ[uv("GhKw5j[qOζ70zM؄Ɠr3i]wl,_ŇTZuÍK6:~tǤ[3B3: 6'nj.h7bUZ1,m($`sF}tm0iMyFufKL U_w/cqt 7q]/t%zAۛ-.?5t F3'm­M{cҢ{x"W-qv: B dmMg Ӿg=^j'.C3~6f1P9^zo?v km{<? ݹi:PID]pm뭺[ö7ۿ𣗭:gX?ݪulG6W&5аͮoY:^[lza?4 $URR$|-z}z}/?trпmf6i#;BtqƮo5@+@MR*"rrs9}ΡsYW>}4Xd}muNo ^S$d8>HojKpݕXV]]/iYu8z6\ݰG-_RȾ1L~'5z27Oxi`.c&W`jyf_J^kfdGۛn:go}Th]r@ۛB{ۛZ ƤE6̱n`oi Zr#&4}#S[loQ[؀{`ue{#.-_[hcC%ػw1ϱPޱcG7!⎆mo7x|.V_m*̡`Y&m!JKN% BIҏOb`|,|Z3qx|΃;Ʊ,|PW9ޞH8pv"Ě߾}vww=: ;*1^}Xl|y}r +gSehNg_hXo7yڪӺݩ^0It[f?CU666|h*5Ilazrp;ĹalW`̜Omq)jS{ ߄^v'B}`]@f) >dmd'"GgϞ=ҭq4šf۠H[|cfF(cj8ucf{o|"9CBkv|{}dJo~M&chsҍo6:;HYumY1gޝHsr6nkj3V{pѱĒMI!{EJb~ Y֯(\8ͪR IDAT7͋ՎI yt9gڟWoT{ֆh{ܪ7 Q{veγV}{ JѰc-JKܜ^b5KN%{PJ8.K {?PK}U't"Ӷ3u+(\gյxu_."ruDup֜*rX |&B=š#T| g8mC WΦPYءOȫK.f;B˦;^_i^yѻ>{hGmh#"N eC 2D%Fa1ix`&ͶEU+W ?| uK՜6xq:/\*UXu㳵ܹɱwP~gs5|ڗBsx| ,7a6 [F|@"-V !34f򷩳IjGFSlߺ֪=>y.2Wxܡ%f/1zW'$uSI>{m˜g* a#^SV|Db[gp5`,60fax`85'Kr^hBo"t}W:ue?u<ENծ8'``]-e8w$ڴic/hTF,Ÿk:E8{sZi*.vvW^>XSC(oF9qԻW:Ƶ=LuOV$v&shã߭uPm򥿺/?r<=h.AiMj{^x1wnݺA%?,P_]Aq_yC/s6:])46:idxUWJѺOϩT1NkWʱK+VUnՇ;O$η[qnH p9(0f93]՞B~U6tz)?d=C_xڄes/4kgƽ{7??/  M u+JT3t^7!`MF4_7w7`@V׋HjԪ,1:ctaι(Ÿ P~Ny3>ڎx$}gfH&Ol/SO#Qy"6,qpl4b<ƈۄ*3ԯ:ypUZ 'V1"on;ЦoCvwB2mG=pi#ڎ|"ijéߡg0HϷj;/LAuzn0Vwcb7(vDX`oRp{=ߡVz7Nt`{+ #ى] + nUtq5F߁[ q9m,?XuAwª ?:94XPߝX/`ْ%K6OO%qfa7]E"47)W?N\rQ|c=h[IʡmΡKٳضPO>Fgct61` 5.x>[{&:fp_&lݞCgoRcu{O)Yr9/0;]2v~YcoӊfADMfprgk㓕ш^\)M2Nph,}a8~B_-(۷0BZW!fN^W/ț7Hz۳sޣ@W(+ټybs[T˖-c޼yXK;!=.'{Z̊C{9;oӤHwd>/'zu|V (X7zxϒ)$uˆMgSv7s{[1Ap'߱M}lmR'?w_|13gΤ}N6cƌT#TVVZZի֭=M16Y]pnUWctl+1WwtNn؂Ws@^csތz,`͡K%FlW:ASZ\=Hn:ve?#p`&$7)$xJwWo ϹZֺ+MA|c>E7P^1J͈GXch=h; 8X;Y~kCNߑ~1:M$1nĪF_比kw>mO,ؗ-z*-ctJvLVrigU0mb*l/ {,mf˖-׏VbŊ7~-"w4{XpN6VDt%f:8 v2pbsPkg]Bb9kmNѮ;:KJWm_<*"ÀJ1qJL4(ƠYD H5Gu} -{F:w|"mG8_=1F1:Z!>icNctv/.wՋB1:j< 4Wڗc~ 7E fVܻ\43- >r,B Ni mXbٽ`"-ΰ6;V]ٳu ǑfR@|J}Z_6?Jvm`|m}3X"w tU,⬿5g:#.cc~1sL:uW:vmx{G/:(29|/'jx/Z8p~s|<D( MZqvp`os5kČ^¢ț769Fct&L;߰w?ooϺ$pιؽx4D1`|\p̛7/$_D%&&G1bDj* xC0~܌1_ՋN;-lMK)9Hos57`|2;^ |wB {&ﻏ1:HϼLJ*!w=(7kXwߠWY+Sv7cֻ_1fӠkP?|9gZs}Hڵ3c vzόR=˖-zm"T+V 5U 12| ۸43(B`O,|F8fh)ϠqOWNHݛ99~.%Y9S^¦Y [6fܴ_"Oaa!7pyyy>9L>S+22>I| C? 뮻n) &aY؛BK1~HޤԵ)WUɦ /^ys?tf62z\Ӫ 9}G7טg4w#O3i;Q־e)/cz؃}O3>$ϩ>䓈 Xvus5w]G-zWOpY۷| 3beTNF~UhߞuyVwT w뱩Ud ͺ`|-uժJs+QsƌXϧ,_ϊR/R`C휨OfoTfή7avC ]^@Wa1?.7ٷbUkܖv_c<NHI!߰գn~׀7$.i>{N^OZ|#6̛7AQPPt+axj.L:o'-ŋ/HQQ6NՕ[肢#IrNMJȹz޻l*|gծL[h3Q[u1:z B|mSPRf-.Azh2k}0랪Jn>c,X~j?wG8 w1gϞkvmRFbL$q>HlL^|Yvzm6}\j}ػ oe9;f793P|J܉x+9eE7o<2\q )99/$#k~/)Y7G<D`rp+K:u*={t(--塇p!pPLW'c|&L૯+p+'w@:@P3Cۖ1BBj&g>WUcIߑ`{Z_OMkx S2zsYM13аeVzU%z5L7oO XqS .K.aڴi4hB_g͚5CY ƿB5Uv=9 }IhX`oҊSvoٻx Ưqc\8vZa&k贼^6M|ɛ; +@_ٺew?wl2 oM쇞VN\'[onB[nžʾx,jbL}Y FNN{94]Yh֭>TR/SxrYKmoSή9ck|oEuW-MoK/$&OLE c}$ؿ?^z)?0G ֭[`]LW'We}deeDb*Z>paUl#}˦SYY8&? kʪYluѐ,ۈ;Kqϒ]Y+ awQͰIhD'x2'/yk%K0`ӧOQ?~'?S;;}8fo#U+խaKB#F{I! dzvK~7Y紿I<qrVK~i7BMBWX+Zu!c,LowB2mG==ct}\PPݻy u p{<~_RYYɝwyϋ:^?J#%"C/]6meR1s%(W ,ZP***˯a#7|ɣ~R*Ӎ<7۶mc̜9S= 1b 2HVl cFO%+C}+zMڒ >{Ө琀wTkl@í wx@…ד¿qxUALzC4wsʂ 7.=~` F,zE]̙3i֬۶m>|8*F⊕@[orM7S#Xr Sͱ7j 5`$8H)<JpsH>l*֯1Sfpگq1~R5p+ys?xNIѨRͽNE`ط|V!E1FݨY,)on?i;.Bxp'Y+sl9_Cmz9t(`p>*  rwSQQpkz;6`j[O>zڄGRfrpd`/*,R_ar~FWmqSƛ-[K5;9 ('L^b,`=5gݧ8fЮlhځ>"6i(+޾x{Pv !P8-p%$QTQ}nGB[Ɂϻ|ҿIk(X1CѶ'%H|>oj^Q>%bl>8+Q7c%_bÇg6clHWUU7̔)Sxi۶ݝo Qn jG.^/s=Oݛ6mHhxl,gÎ\2OY1_$>;.?w$98 NSp'Buh_]0a|_~>5ܿd7c\y6\.zAILL#9s?!t̛Oj^BhЀTXCݻI ߥ͍Qߧyf-Zt!5j(nOV\i\trCکx !CHI \p+..fƌ1` 쇱V %Ee{`/Shc\9,U,X 0Y 1S1FIJ1vpvs7cQBln4 1 t'p}svd?V0.$G8aH@ oNq{Fq0/`N{ =HEJ`Ƹ p' '_(F]S?XE"Z`L[H h{1FE1?q IDAT~O˜ߝZ{J{^DB&¿'R'DR`or~r*G%+n$H10F,0F$Łk4lj,n /8^A h1{v{I5w̳"åǂc^(VC/sAT~Ay|sH(0BFe"4BLcgtkZeC##ٽ|_)v/8&d F 1^F>Ne1$GX}5" "ޤ^DDԭ u^`Ι'.Pfo%|v>EDD&Z{{DB$Bx +kZAʕ.%ʡjl>௄wDߩ tAPL6:݈H,ޤ^DDh M ExB)YmGy1fڻRK w16ta+WyY""-7)(Z{{1+ x0HqR0ED 16ޮx gh M ED.ޤ^DA35fddW6mڄs6mv!{t--%ϡVr^=Co;!S(Y$$$вeK9u<<툈DhM ED.ޤ^DjKޟ^馛o~CCUJLL(zloV`PU]>;ybqO**n%,\IIO\=x|fz˂~GsfUW]`KСCk_m*:;JD$ԅNrDD$եNrDJ}k OT{Gdf- pXqӭ[}`_>SNrꫯgJؔVDDյ^DD>8؃s+Ր=ࢋ.k׮̇1g/VO!p;W_ ,ON>}n%l:w̍7h?tFx/""@tK'T[}V酿RX*X%| (D 3s)}'a,;믿ޙ"q1}L}[u4'`ʙ3gwޮ];ڵktaxbz:ޕۅsT{ƴ) Ǩ3?G΂M?pz:U:о,IsUM>h؟P̸guܙkךg(#17#W`/"":{S{gq%wS i5?#Aąbƽ8뮳H*0;Tb"9ϸW`/""Q1Ch{癡[gP+1pJˠ,`ވ>x7P+"")T}P ED$\GK`o W`/оycǎc\ײV82/BBSp;zE\\ц|I%7;W`/"":{S0{Ƙ}ah޼sĚV@[Xo*]ct(!==&mk;WD. Vpm)X{ S 57+,nA-F9JԳ5gk0>64n^6tHw}S ؋zBII؛N6W`/@@rrr-Iۀy5Ω9FgbQ CG}ޡ9 =7lp^DDX M'+\n ~ >>Seר֨30Fv>4'J`o:^D c-7op^$2ZNRs|&pys>̜\bQp<P:ՇH49>{ ED$+t^Hs%EEٴ48#$[Xh:ٷoޝ^caG_ EDӱX M ؋H$-T[p^$:1|Wv\7ccYS;1m"0<< -J[9aR uYF؛j ؋H$וT3W`/=oٲŹnbDc΂>V_4Ok GRRH8Xi^BshU3+fp^DD-/K*dXrCĘ<>Y%a+FJ`oRp/d^+""3{K ]GDfPkgW`/=~y㜲co8|Wc<S&J~,68ԊH]`晐#K=.0lQD'x}V_$ژ<@nn..e>^dt[8'młX M %TƎk/ C~N.N"""""K; }FXSQdr؛ `̸YrD؛x,a.x>2wQXXpgvq58F-Z~~Cz+ zCaYesA/p3hs, GM6e˖-$$$8UJLL:i]ˉ_{\E𚭾x_8pZNmu%FepJJ"&-Çל 1cSO9FX]}L0,9@oU e{-_,Pp/""""rܶ4?صk|DL[}V'CW=DC3f;a~iƌta|r&Nh?o؋#>pWQ9G \+5*GDDDD>:<# <ή>)qcl.k=ka+ͣr~ 6l  wcl ѣsЫ/9ؒHT-7ق\q_[`o#""""r 졽x`ΝՍ~o+}QQ'N$8Xk qK0vHx穬䥗b7~7oП|j M+7)9uSxxGQ4iB)LZn #P`(FL8"a2Hw+fcN|嗹+ckZ ""!r)Fo`oҨSs'̀U@y஻W_%---lEh{-m7}ctՕT̟ǝse]وV}as]v#`eW7@-L6+.qeﭷw*)S M EDDDDNўXZ?LHWUqӍR`a!#uN 0] {`9>B FDDM0{S4M EDDDDNL͙v0L S/R)?1YpyM펠Q9㍐.""""K) """'n0x1~z+p.0?\'""""m4GJ}iTA 8.799*'#qLn w(Q`> EDDNN] ؋D. Oɉ^HdSh/!9 EDDNN EDDDD"B{ )Sp/""rrb-W`/""""N7 +{>{PPt/'Id:݋=.to=;\w<Z*Zi/!큽fB`o+`[+EDDNHW`/""""]^nd a+Wܯt{'W+>Zi/A烸p^NPvѺ ׂ^D0C\ 8hD\8Eq@oHlh|.𺠪}[;O_k? ¹J'< $z8N:D,݋H-KHDap_'{S E"N_mӍ[}0R@WdQ8lzdsXw<#!OÉtJ ?*'6U`/Q\2CQ` /WB(}:3g4K/X\Gzx,իǣ>Nشip90ڙDDD$)NH ؋D0F /O\\{ɓ:tKNcϚz|` Ϫs, t“O>y,7Q9:ӳط|ўs"M]2Ū3;]@z[2v>O%ṁ~w/;p'$Ӭߍ7m[̕HkE[WiKV~yrogAIIyh%o"55?opEr~? g.x_NDDDDq$"tTj8*GH `[շ{.%F*09omN{_{ I4yisҚnixΥ$5hn} InԺ=M7:_`^wx*FFX{+(ٹBJxT؞?&Gֲ%)iѢo.F:NM.t꫘xW7>""""^#‚{5DRp^$$9Cjj*ӦM[nv%z+0Vs-,qU+C>w 94k$NΟ)<&}C=K Zz\A| Ο?ʪӚNZ+~`w&v;5VCm^k; VBGYfC/x\߰a_>_5^xs]`r؋Dfӽ{w KyS;3vňrBۖ}ҺМ $R[ugw iwx;C큷uEOj믿su=z4fbܹ{1\W""" KX9+?'{"Ʌ лwox.1>|3hnձ3{~M=O1V'fd"el:uV"K>qiVv#U%t3Kv뭷eˬ17;ُ]vv6ӦM7xwҥ 8ژ8Nq$891*GHĺ`=n:Hty +sKl oFPùkڪ.NeQU'g}f?.0F}FHYQ)Hd)6~ު ̎}`׷Z+%2n^}U8LF5kVN;~ @ 0W`¹^HD-Z0tP'{qֵu0v%\6! }FWU_Ҭߍ)g*1L$U'F)9mhs6-dv>z [PبG<Pg+;gKݵOr`UrǍCo-t+oرcҥnmTO*0xٮDDDI 1a ؟p E"ZO#l}HynkѢ]v_~yڋi]Q9 OQ(G(xgf1x`[Wc`2Wqa#DwEVm9` idߪK7%o#zκܴ~5FSVIjȯ VqĢn^vNs q! I({"Ql󃤤$zd/"!bwa<cੂ%Ka_zX#!~\8a }U^<9kds) iYGeEu{F4Kț;+:.96aVv|'(O$ 28EDDD^"B{A^H԰ wԉ'{1F&)j*|}nHHk`ջN Xܰ;߄;a %y@^C,J}Nј9}GަUoT܍.==Ν;)KRp>D+*ڶm{D7Ʀ`rsj%ܻi**-dߙ2c IDAT4}d8z\aĩ.}nNʡ;ƥ챯wi1:%ly"1rsCDDD^") CT{"QAf͎vH fg;lu 0V&8ת륓}@6,`frOER[YO_ WCrǿCwҪMpӏAz"}2ǩ>DDDy %dp>LN&W`/222C$2] W׸b|> Ӧ:tyؽt@bFC޽sdXΦJ>#/Y;!6#ylV}qI)F;"aÆS}KD:^}Hp^$j%$%%9هHdjklfqÒ/%7hFϵM?֘]BfoXyvø}>61>Jz۳z̮OʡۭƥY2Hj( OjEDDD^"q r<{XUlu:Ъ9k]x\>_zwBRzt^M0) 6eUfI pNѶ5.wO?#R~Q %#W`ﰣ E}]ZzOĮn[=gsmuW{0DBzsuUqhWͶ}hxe>gS->%6W=p[&V'qgX;+fo!iȑSEDD$)WKp>B)W`/8p>D"O!n qQI\Am[YΞ_ZuBFC 8'ThS3K Z_kXuI^.;UgugXu|j&)>2V#?T"""<j #=W`/3l۶h=xu>^ lGR^-ޞS*wI⭺xzd M/۫J?YuJN{%>6~<>O`%;[u=lt͵dzĨc"\KĞA:B0v-`Æ N!Ylu/>h3˨Uvi)e˖C8[NivmδjK|wtZ }0kl *Wشl߰ln6VR _zhzDbŋe~0HQh/Q[}ȑ`ƍd?"o 1n*;̎Z n?M>sy;ہKιC9Jvm`[76:>O6崳}l1&#gymm\ XpH`CHxn>_~T[qE؛9˴ ] CF=~-?GڹVLZtJ>#M0vLHϦU7?{5`x&`%_q`VrНkԯyc6>>Vj߾}L4~h,A\"""R)}$t[ikκO:˴ ]7?6&/̿>.&ܿ[*XK|Ju{Ѷ5Ī3γjoe9'3{$-[ƛouː!C4i\rӭ/Lqux(H8"""R|Hii8d-€{NHݨn>-Zh;qNIH=C0tP+[ ;wo}vZi/"""G&Ơ6lO?+pK"a ,} {SاZ4~P؛yVֺi-:ZaBq>'pE_!g6-'GAV{Ϡ;InCA۾49oxpg}ٳ&8)w1b>`KkDDD$B( \?pW8ەH8I7Sj _2_)>R{3,nkqV]w;@>94՛Ӯoݕ&dݞs؛M_S^b.{\7UTT裏tdc2rH _~iӦѡvDDD$ٴ%P x9rd͐A$67m^`/Cn^AuVݠkco ٷ|U'7lA|g4KѹouGKJCo{8;K$z7Y~S8***7n][oO :n/Cn宇D']ϰ!冻$ImK$I&LlQ~ <xbo1G /۲uLI$i0$I(>^$I? %I.+}$I?K$I)$IR3$IC_ S %Ifh/I$uR_S %Ieh/I$uB)$IRn2$I!{I$)K$I>^$I-$IAz`bp/I$C{I$%O1$Ir$IF S %I^$IJ_{I$o3$IZ>^$I %I$N`bp/I$M$IZ`bp/I$=Ѱ$I”HSG`[؁}K͕Lvg똉 Gex$IR1$IҀD<ݜ: g a)gBB6%I$"H$IS7"9*fTo/9lʉsK!I$:C{I$Eo}9O^$IC{I$)MoاFpo`/I$u$IFOاdpo`/I$C{I$=b`$It %INpˁ}Jw{{I$ka7 I$UgBS%v %Wg`/I$u$Iԁ8%]_#p|7$I8$IR'FDL"M4B ӥ9&3H ꀵ7$I+%IN8" C/ (W3o`/I$X H$I`-LN*#PX>-QFa>.%I$I!IqH@U>3o `"pZ" `}@5K$I:gK$IHӷG q$ZU+qE`+-(8B$IR`h/I$`}G %I1$Iѕ>?] S %I3$IN`}w{I$k %I4=اbp}$It %I=اRpߓ}$Itx %I$z'OɅ7{I$ %I4f`җ S %I1$IҀ>/S %IC݀$I895@glPdġƱ$I\$I8#y:f_>0HIluLI$)K$ih$IwK$I ^$I %I=7$I>C{I$)M %I`h/I$߂{{I$)wK$I/$I[ %I^$I=$IRr57$Ir$ItK$I^$I\ %Ifh/IBEu'KIJusx}74jBy~T+U lS=b-qMtR prMa7!I6C{I_.܋U<{W֌'/ˁ5xy}z8'&:#pR؍?$Il!Ir06V8 ?v;T ɣY:fɏ^]0Xv#(H3?o#I"$I5xrRW7W7'lS؍c4ذew_'I^$u$i |/fر֜=W\qEܥ{KK`^ү*ZcɼumEc~[z5~{P"ƛ8&j+(h &4V7+,&˼nDD Rz~!@ZV@$B H$! V0R"< hس+XqFῃzo[o!s0'cRϟϲe(*ۧݻ_?>ҿWKy^"I3鼿d`WWPFF|æN!!O8C&̤R3}s1XuZ VXI?ZAtPm.-vWn |,U:X7|pJK[{S7/~ ֭[#?Кtڰ`~a^W=o%KpuԮ7[N$%)IrSŢEx 5d (U39F2OTzwQ&o~سUvl>#oC[+z65mlxvx$Ġ3fyģɁ@[LW,e>oNT0vښDS]A`#|`nΜ9\GBrҕ$I$I bΜ9,_Q8KH@PXUO&d@ݫ <5^^N݅H1/WؿksPN-'. [%Rv'Hி BXa1:Euӹ… 7}it'>u9bŊO~*6msyy _[Iq$IRd@^^կ 50KUƀS]ןq_eP:y ֹ?u>fkA=l =) z56e7@iZee7f̲ٻط|_2dTϜz7O]w?S_+IK)t^{x,$d~f5`TyBkMm"1&_蓾dv[o(-hg/RfEP[p9C&YGMbY#秞3{lpw6l ݒ%K4P7}]woزeKjӏI񦴒$ `Ǒ$Ie Ho׉si ̴ *U̸ϲO9-sM{coN8aAPSA[O~Nt`"̆.cC|w2z4꩗ Ѽ梛naÆza]r%,[l@ 7ܐi6$Ih^$gu3ghM&||Q j*yC9MḆ7gQcoٽ:m}I?^^Nrh/v[#/n؛6u1fDy;F{|Ͱ[s/\j[$I!X$IRg榊+R?H贺XVO'V Uӳ'zឌ7ڎYMu5A=/9P}~&$O{̂)pLn%clo75Ƙ43Wsrƀ_έޚt -@x]I09G$d qg؊&2oY,j~ZcYihR}oʩ~%o W:Tʵ zAH}A`)*aecL㾪zDjhg^M3z+L/ƹ$Ix %IR{1y0{G2\[yO>Moɱ72JgfKhn 2f~挟/Qv!t؛aDhu^p&8;i`;w.%%w>5^$IR %IR{̚5+>pՓɌ]-x~coFz'VPi GfE38ƻ51e߾Q>'IF7o^&C{I0C{IԞ ћ0aB}Hḗ AZS}u,?gujMJ3痿Ӽ/SHZ_J*~:8㔌zo\_f#IS2ue(_\'eh F9 0ӗvl$fT$s$= a!eW)pFZ>͚%iv*`wQQ׾y -x%N$Ȉ| ģ16OC9cD51oM l{2O5De!@᫭M/@AJ~^$gAA.Ȝes0;~ yWa85k6NǭwǍ}J; Mu{xqf2yM u"Vr#Hă} zs̭΁`x&.^eת|N@V]]^%K^$<`.R.+O?nR ufwqt`yz߉/O"-t/2Ӄ⭕4)HQQ }=ؿo|D"LoeN=L\t-#M4c,ΩH/C$^$gk-[C iCz,ٴz=vz\}y0s@6y)c͘MZ ??cMZOvȤٌQPZ {_B𯃺j8;i`yf)>$IR %IR{6[.>pDiu`59F.3;0>eĬ34ltP~a Wϰ[ $OmX͌%7Ԕџg㨋:cΆ~f >/#cC[zuz`$IRgh/IzAee%7n )Gsgmiu)4*=Db1FŠn޿X3jy Zz`NQ] 2ӂ701e ictet?t?fÆ %I %IR{^%֛O xl2ۑ2s]5:5F疖֧>eR{{$$|i9܎u:<W=)s+(t:o=EPcT{ҙ s?`ϓW7ԔџsE쏺)_"7u'\H4M6;4yyyqD"=y;L/&y lI4@K'w裏%)`6W 8XR? |t 3Pۺ}[1dL"(c_GoȘS/ W}4;Hҙ Z,۞c?})@БL\*}[ֲ{z@?k׆J_*~;P /+d|*΃$Iy%$IׯI~D"mr;RH %iu9vbŴ .W>E zKI[o;d§gԱ"s6'g~q]F63O5A3/٧{H477zq?q 7$$I$I:* H~ۑB482~+!9F p8wR5c_L$'|1󆲥Otl-4VMgWug#ϔWP7`#gnưho~jn=\2}-ڑ$I}$IkD"}B6k8x "cN`.mMkzа`N=@Go˺D1Chm5[c{DsɎs6Nꏞ5wnSO~ݻ7]k3̝wɗ%{;vկ~5}S9O!#IC{Iԑ@0bժU\s5mH)vK@z]i j>D3nWX3jb%9bC& ~KG3ourNK'KB+e?Sشizkǁ{I~*݀uٲe,]_mm-\rI7N 8KJ$KC;Ңw\uUԄؒ X2gGq 2<9_>>J[hW 4;=ITn#[~P__*NԆ˗s󪫫9y7ϐZ$I}LK$I< 0i$z.]ʰaBkLꪇz?G+s7k,׼Km^7y m?< , nKk!Ԟ)J`rj… x}3`bv_u@S*&:i $&zSKK$Ia-Ll 1d^da *3"P=^-I$3 %IN꫁}$I %IN}$I %ICȕ>^$I]$IRr-O1$Ir$ItاK$I^$IjG)$IRn1$I/}$I; %I4-O1$Ir$IԢ)$IRgh/I${I$o3$IҀ7P{I$2$IҀ6{I$o݀$I8y'x&>0 fgx$IR.2$IҀrs J`2 v5h"{cI$I8$IfXXo1*'w$IR2$I/)K$Igh/I$> %Ich/I$> %Igh/I$}.)=K$I]ch/I$;}.) %I3$I+}SK$Ich/I$upاNpo`/I$u$It c`ҙ^$I$IR't>^$I9$IR'>^$IY$IaHR`K$I$IBƮNܜ?zFK$I k9 IENDB`pairtools-1.1.3/doc/_static/read_pair_WW.png000066400000000000000000001004401474715105500210040ustar00rootroot00000000000000PNG  IHDR(G%sBIT|d pHYs.$.$* tEXtSoftwarewww.inkscape.org< IDATxw|SUǿIwK  D*8QQpx{{ (n=q#dSfJKMܓ$'IÓ$\n==&V6M T-ZFc"4e9[ V-0EFCՋB4Fh4Fhh4Fh4Fl|Yfc< 8aR8c޿1.,-%cLJvAk\/|:gxP ?<{Q$6+󏏍ݰ3ix|V}}`CO#Jm?mICFvIj[4ⶃN&iJ_s0'|izUYT Fq<~ELYAh&EHZRՋpZyBs:NA=Eg.Zc<4I3+]w+y?&t:*>w+\tȤөW+RQ==t5Xۄ'Wծ+Mb 񽖢l1 -]H?fi,.`{FMP lDĶM0wF@[0M|?I1}"BBxm*ٵZش L /"`0 -j-=:5 }&IN]Į0Gy`xղafĉOO+Rq k\&J@(؜Úo6G59%-Z$V4B8FLkM`{i)==S02߰(+e7h-*+"偾@>-0 1lzsj-;8yrczem%@ܺr+3es?i &P#E88Uy=E=e vVϼ2CKٓ)ag7P+EKJ&'p8X͔n_'J&'p8X-nZ,!d]B''< = {w. OZ; sl,v;/rk!Hr l&!JZ@Ohv[+_]FǓ=e:!GU3`B"c5e:!|^Ūj-2ШX5Z_C͆) \`#İz 5o*LM-׭w#ϯ\Β{hTvbrٽSq:>-*pIM`طWX,d^:EgVj m ˚n s:^=~=sr?])ђ4v2^⻫ɔsG@lX9{CQ+=Oth( 9Peoy9w-JZ|Fp%b'6_h -g{)޸T+=/{d5>6߿u Xȼ StW!qwE^bp|CK%C_%6~8+1F9V?e/qh;;f/LFNNV/tb8Je{^6ⴑnJh4uC'><-k)Q^Us-># DT{W]&ZKdu1IĻ_xJa7AnB#r;ek1wL׳oSlּv8ď7.c ީFˮMzZv:qw+Rw gN5ԋ>Dˡ cR3>CZ6d3 :_U89W ؿa1>xЈSh9u%=תѲ9oaĭ{%>Jf[K-YGh)ݑ+L10}-MCHAaWK)ђuEE# JjDiY|Hﰕ3m2UK᫤ʀiS*)GmRɞ2 K]KX9m 2r,̰Cy,%gdH;<-Y_s-rr]E}^W5,Wݛ(KemQ-j1\SF8\o*[ <_EoFWQ2'a-QySJ(m%zNzVeδ~aDi+-b ړ5IAF$%sYf9/]m QyE>ػkR^ڿa1N3Ar#0ch99qǑzx%ZJeOq^G x(ݾ=n =q(ruk}忾w?z&i^:mjCA)e:^k6ܴtF(ҢQN#!y[%%[VȾ%{P'=%=qv[ֱ5eT-y\7>mCU32ùu!4&^W`ɣPuB7)8X dV5pR 3;:h)m.CW&1r`.zFPClc:gu{;"Lld,|EA|I wZ!dMzN|e| ;-V.=Wrd;-z^{aU aX,dN|(ٺ0/aԅ_MP(+^#>PegY).j-㹹->elVbEx@=^=Zާk{Rļf\l4qz/vϢ㫤ܓ0-mֽ+ZE9{w:kM fk7wi\KLZO__VLM_<Flv;w/h6:4N z{ͮ2!Cib‡T`)ypْy4{;Ji3MP@$i,9+DW} OrA`VV|-}ZB"cȞ2ŏIUlZnxK^Sgb++h93MHHD4S36~UU4lFDW*Uܹo +Ľk\x8 9HZ|\̮gc6d%?\35e?qr̳ɣ&_zƍ=a`%톝E1r+os&*_|?bٳ;5ZCˌxwMŮ,ݻ :α7x~2Tkw D8%Z|f81A-.qp"-#&->br~'giIDUp&_AKZg'*ҕK42Z."Gy$w& lE~-u7O,[b Xx`PhyrRVV{!GВ<8H6i-&<`],;GmbX=ϵC -m|MػC7n6~.g&kd~i4w|l':GOh&-x̤$3cS; sd`jFZ~3zCO7FGv˂[ _D:S̙j\n@S2m?Ɏ?>ޣuQt}2v-/NqyF|mOTSWц\dWefh;aӀ i%􅮚2VRiSW\%$+_4j mEL4ӽvUy 9Ӧ(1t]U^iSJh94`)9&+3hjcmQ!/VGrqu{+*ⱥDvLTg؁8D+^vWa.w]IB"p8X-nS p+{-(\rhX#^WciG:uś)ђ~굴x{)XWg_ߢ|bўLs1<1#(2\^rC :2^[ϰ/!Z~dHZ4ADT-XJ'}F_ɖլ{v5ZȚtG._8Zڒ=%ݶlᓡ@KXLkL.vmb+W\F㎻eV{`>6_Q ZKZL+|->y#fNh KY9m˟!2EقJ˟&ZiS|FC 7?*lEo!C+-e6eJL(7hƍUh48^FqRߌi4pp)"5܎hR|%0ܗ׳3܎zh4 .mp4~C&ruqcY{0?6W?'"{jwυ CFw>_^hZ4|[*=PčxBܧ"!ZDg-0H"&yMz"p=ֵcTYko#ѩ8{<:mf2k8L͕r+ ^Aki  M?s9@m3?Jb-fĠ>f@Cj@dZZ#uE}j4^$ xN)z[SC "+ᣦ&CkĩulkAd:Ǥ?h QF>܋6#1hZ.ϑijCC?{k t l^n /"sk v@bB4-(l͏QipdM j4Ѱ$`+2zS!+J3pb-(XeaEĽBM$p:ic!r_6 }!gu-8D}ԥ 9Fmψ@j'4!Mx} a?b忝 YLŖIx1,54dr$ES iyb-icub-Ay=5Cc{{Ws1"P#~=Ո nU4&"Gk f] jz!.®ο} 3Mll^ x3Q,+p75=ty+Te442> ?{ IDAT74DD$ [!WR%y K"pޫ%b!kY(UǛ'#-k38jx+PMx |j3V 3 j{X@&'9Y`|>X&HC_v*ĽcۯaƎQ_'?K0z#n~E;o0 xa/ ٛƛ<&6}#Ev :4#NmbЇ@?X0~;+0 olLFSk.Z{6ԧ??OT sTߎ<}HRO[m 8z!&[w k}*h$Δ x.m"{qᜨz{Jς~.BuuZ4E";t1Q"?pt~}-E}M7`]tRCqSM@z 5}%M5C{$1QZK>ۄ!&WV84W< PUM*~]眸~Q4M9)<S}4~IB4~KkĪjpU%PQDWqf"# >@ogގL} 8+>СyvVo%\Y8uQOQSڏy̽7]xEC _oKqz"| 2<MfG]ԜzfoJaFm!wQs8D)^w_/ Muѐֿa%юݩs?0ڗsbj4N40@brl~<&"KNv>C1fɭ1lj55qm_b-NxH{ ?V ̙n_Rp5aX :pl`k":983T xPd]jw^qhT:"S1w"n#ow9=?T玱[{&&b?nP-D5O -C{P@yAji;1*VLeU c\'.U%ك. Fz!~#۩yދk4Ƃ4:[>q#>Q錦ӐSEև.߿x# z@{ Y2IUh7 'nRES;=+1*~PF,Gڇ?`W":TXa}>TDJw; BfF~XKb}ZVZ##q/Z5kbmBP-ٙď%t $0EG(֢ Rq>?K:&{XKc菸tW!##JF+Snlmy6R&ʹa,B/T D!ggSgˁ홊Z4k̩Ɋx;iyE&>uJJS89 f`F{t承p?Ex!"(uHw+ <ܝ &,.S'M P TzvGGcX<63Ư6X"SႡ}N?Dөlr\9^-[hXK 7&xQH}׿F9^CJlWyc0F.ULEN8o-*)fx]i8UBZ(a>#J/B=PG`ϝs)N*ghh?[׻M943!aq{EԞ%4撀9irG[~]AZs$fτg aʨYNF ,LyOU^8"L![ ݁0Do v8 r?quC{Mab-31B.jiM@Utmv]D"LET>GXC9 g"i4`Ie o3 eW@Ǣ=Գ{[^X }0'i04wU!d@j4D' vyBe$-+.3UMp75'%* B&P8~}+=ƸyYO'T簵_9ѷ)X<O˪wQt 2l5Op8pc&F.907B΃Gt\s{pjB\SJ dvGhZ2nFWa6|":9oE6 O@Ll{"2I5_Z]rb-d͈ںth4r4`YD9971ja=VDHiVj @=0h4Krb-,@dyRo47Ka6M#s(:')@r豵x(bĽ.4TAIYc`b-&Qr1obV/h|kHWd)My̵?E<'WÁwOr>7#[B-Eق/Q[i4- uC*P]c* E:nG_.Mi{;sX_?ZJs/vj3TCs"ƻi8Yr" -Dghd{w"b96)&hp`'" .13sDK_b^yB7kU\G [iuqZM?jvq g!&F iMx艸0A\l|Cc b@;+ne1x8*Ԣ+|?5G"@1VZD:e OwB8k b,DK"" }E d (Q~[i7'K!@vGLbrl+@.NPj :Q0_ c>b=z9uc%4mG,VDM0dlv4t|ˀ3 ~F F<z_a,OPw聘l@p)j4M착R!ɼ`1R!́~o&(iy.FlؐJ*0a l 3gp8\_*M#A| ,,/ӱcGBCr5xHf8yוjh \s ~R ?8gRhϨQX~=ְ1hw NXwBGVw\> XGb 3/{iwCO :H>:tG}Dv3`x).6١,w5ɉs=iӦlqqg8&f֟ɛo5:rTRzgV,`atR}bb**HLLkWOo^'z_鋧yU6z\IC]Ē$ >?f<_U^‚I $Eδ)Y}{TI˺w6Osi#/^YWgۥygϞ=·DdKeWDQFgG/2guƍc~cB>`2a:.}ߦ_~~cR-Xqe,_FE@F\۴E{vM 291cY9[6ڈTV~:*Ÿ[\<#9تOJJ }QOxx'M&}z_,y|,P=2x9ީ0;b*?kKWwɜ&YT?oiKvjws,}l,E]ow97W*'dvؑ~n7[os3d·O@Kv}q\'Nd̙XV-=AFP6v;3V46Է?1#5&'n۟mMeRh|l3ӈ24xa'ze:$2g߼HE.#zɉ ƛ&'lFN5^ZCetGLN98OkLj2't}'x"p>*|Wj38׏szd6MRŃxo- 䎝L׮ U,wQ689Fŝ}[1kkV̮ᰳo0msNƹw&' W7&'R>49a+-"eQc8lX!t{oyꩧl Q hD ә;w.Ty Νˈ#XҸHD@@ 59Pf_˖c@ަɉE|%߈OбF <6|HUXw)qQIN:xlw>~zӈz#q+;<`ġ񤻏w|;M%$`<#nt{bbҤIK"bWnݺ믿䄓$fϞZUaAq5ꍈcƌJ'' 3k,Iir%<|)ň&gLFa݋d}qʑ۹W ̆Nt8~=r?zMuY!doe%[VqlY6\hu$mh7SChh(?8G(jaL#惀(%|w %͸qox-: ȤS(/"III8'/1;!!\mukr~uvMxKeWY֯3ޭpKKqDreKj}G%uqxgaۼw(ٺƈOJX+YT}=~yLjAFGqhL駪i?f͢З^b3dܹtPwA ӥK^|E.蠚(+ 8=CǎyWD`ϔÖ|n=HLFIu]u23f*g8:[T >S?8S9r$s%}Ʉ x7l6H<|gN( ¸T?W8O>Yw'.I;ή2>ȕ7}$r\44RrʫVz39}|9}r ,.M`g/K2k[~2c/ CJȟGv1>4 #;Mo vl0.gИf~3u$%%qwr-8G}V)̝;y ._~~4d'Kǧ4$U9Fy j4F*㘩2AbT1כ>cӗԝʸ'S*c/!}J?3~7 В&'@ tc:٭]Vm6^^-ǒcftlWm{\KUcHc>_m)*Kx'c]ޥ?R#p%DH-;ENqf:u*ݻ׸%MN8yG\6xg IDAT00a]Nྦ)crh4GTZL)k2K7j2^_#q/qbi#v*X0bT`2歷"??}@BB&MRZJ믿{:&8ѵkWNFb\;,#N?ֽ݂VɆe'D:nlISKn_h ΛoɯZ .Lϑ ΰaØ? ~p}0aB7%MIe^2j4NTF7SJT^؀TMCTƮaz=7G\6mpꩧ駟:=A/XNm+-FܷM"Ǻt?;<n7-XURѣ'.Jpܷ&Zk4*͔5VRH/qtjwR6_s?xOh]ϾQsw9?F9##Nt 7Æ cذaex." #7X@wq7TFeҔTNc̙ 2.þrjňS[u&ef~3&\`:Qq#0FYǦ煕ˍV7wĚB#436?fcY!&]ͥϯ\fuzSh4@CL)7~y3.SfÞ߳/Ǩ'jw6ԋvGm~1a 9 ; b8+׏߫񩌝u*F$X2r36~b#wwP2j#FϲT`c, T&=u6#>-3⤏ہJf'Ŵ.ג)ftW_\~o ۍ]Ɉ L9M`S)eu}#n;p2bl=|Rx2湚45%WRo62dqS1y_L9 a1JOOwS NZ(xm* + m ol-)㍹FK]bB[YL\x80n}1[F%?P)9Rgw9VB"[qe3#N22S^Qf.hJGOKRXd,ƻ3ߒoJЭ)qgY)b[TF&ٵ+S*cQL;r)\NdR'{l]!1zo2mn\gTFOhdݞc< {i)n㝁mδK9+Gjjش z2GtcSq1_I#ڥط*J3erutLJ#V$96q@ö;L5Bv/ƈc;"x@k+P*++7:}۴5eC{([EpFvqF[ntN F\bgi'U%lhj5凗M';Y.xSJƄqP0HS+xgJSy |ϟfh|oI˄EWF{,Lۤ8wULXԴȳ{ &oZ$ I0e-p&~#ḍX:6:bb!ݳ_\( S]SQh4$4*gde256]ޅ5T **r?|1Iiy ڍ#.2(X>ψ-!dwWG4PTd*|G͈AޤtpVm{y_L>+`8l޼}Yrf{{i gՇp%[S{htOvMrr͡YlkI !k{wL w1u:{mMvrT]2re-Fln;+x-CEQIL >dZPI9#x/(ٲڈCIweo>#cXB`m]Lt{;TCn@u>r҉;b5qNa|F7#^Vy۶h$ m;⍲OHd|w>[nz܎5L CS>V齍8oTmQRr1LοЂCm)xh׵hX΢ZJ_si-ZbUhDIrSܧM"wn#g!1t>eANƹwT>||K_zbUn*ٷ l525J Jv\h<͂!B+˕eX3e6W0ms[)+Q^k4@9;<#rv_@ELLD.55ٳ;525[j䘩I >hZh<Ŋ+ҽ =A/!ŋ iH뱩sa.} "$+]2rw1kl>DV';[\P_kY ligt>zx'tF v &(6%%%̟αW6&~9e.%*4wQU?IX"B@DEADB}Cj[u[åR֥P @YdAd Ⱦ&3wL2ܙz3<,9sssgr$Gg*TZ1ͳ>n>p } ] ^ nÿQ\?ЮL=8u8W/32k<ņM$>`NS%&L(؊fmNu-yY_iwg6?a2¾p6&nIK4K_{^ 29 kaY雑YW>h!KAU`lڴ P30 1b;w{NiÔM>y HQ4+Yx4'"'OɁfʑ@fEgаln?.2+2kaɻPm&C^gۂ<3q+f臁@ziʰѯѰծ@2駟Xn]'AZ8s#|>Æ Fa!*&79~U\ӜOW,Axz4dV4Y^-[1g}髽Eed2r{$}> x}BpV[ZȗkV&..Z2+D,}QiG4|spY~/}9x3㝃NG;Ʃ&.V Ͼ*Y,}]7?@pkv/!ƶl>1N?|Wuq7d f/=k׎wde-O-%;v0nZ̊4SqTs7/3$^U39?-3_cƾI+ s}:E3 }F>pu yCؽdjr :]CcLU4ſghrײ9l%Yg^f^ -n |>sⳗ8k{raÆOO}b 5j+WF:W<1ŋ4i]v$g]S\xxVv /گ4fQYX4gULXk̊uʩ<5$o9-ٵCZ\lacgD`}l2$Pk͗92I3Ηعl6CHKa =՟V>yg]+>|ۜȬHfs#G:nݺ'PF C|>Kp#vu[h\V=p h]4 ǭs/ Z;Ml ^/hZ=g~|4R%S#/;0ab䳮 eoG{~2zjr'TxD$9p9`Gw=dVa__itՁvOٹlv^_H]m#kǽzshgYn/b'zo}:xwBz2Drxi||cְ;,tw.iڌ_$QQ]:jhs׬vsmK h""d.Z9r@x ue?@)8K_ md]Hk+fuwדhhg)..:kzRxb^u# 0@Jx9KۄYuÖ/e=1իsKϘukT-G癁vzL.`&֎q__ 8xón`گ^ԾHw.8?GǾb|u?Пy|A|I< Te˖C]+HByfw]-gYp8_ xcw'-Zc-Y~cVwbz\֬y=ize "YpڎuCҡO;:]C5_ `[0,}hZ>-G_U*!C0{lqDO2nݺs 'B~_s}=A1 4 3< Ksm!3h{^'iHr:GR@{Y[eyuO<;^7MoV'w{V/$'HKㄛ4#V}+w.>zu"Y̚5v7 5@W^y{7%')nʵ^˞=9pʞHTm+Vpuكfk7k)ukƘs) EE8ɽ䕐 "^fkNז7%8)Oz\H_~v E-.[ӧO/fR)qw2w\"#'?N~8xЛ1Gړ7n-vtY R+ȊоJ˨߳v:縫"-{#q=}^O2kլ9mg6ȡao9N <^|fx`&֌~x{uB|ɡ;8oR"{Bw2D8=NǬY ԙh>x wu=(Q,Y/T}yǸˏh ;ޤAf&6j\5EELܰŤGM:u]L/Lї5jЩaײ!^|E? "gÆ 4iލ5+0J1b={M4udTtvm;d$NEEE|GIAf˳,qnׯ29n!оqM/LF5jx,ݹ;]UF+m%{|W3xnk^(.{,kW&k;4zG/f5ФV-r?>X"o*#K6mXhQ'&￟W^y0JX~=[}ΛHGcC5ȧv&La~w}̜9ބYr,H_5t֭(۷od5j ӿ^qqqJ]6UDw^ :5T,N:I=AQXXntLȞWHjh#VٖG"'jժEjɿբ߮](*J^:5k,8 [ tfyQDusdرcGh/%(}-.pFD'""^ S"Q&ewcݐQ,#>fLFc R<S' ܁LFܺNX-]m e,s-{ciͻ9|g@`u+DTm\ " 0\%4,K_K$Wc8 yYYR j`2RaB| &+y") /L1f F$"""uD"""xshT1w 08DDD茹+LR\m8doxHua xm6f`SMDDD$r;^ 8Y*EDDv]Kuc( ?0"""Nј"N """"^kMHHTn]fɇ UW~)2/Uy I(b?O]q|]G ` 1n_<8H9/P1Eg H28 ?V鵫1ۘO IDATg0v{HnxD(>xTj@WB&"Au0fc>C`jS8{D|H0ҋ:1 +0""|:(: 6E#R p5ID$Y}7rρQcƣ(￴@`жsED$u:R< b1Km]DII=05=EDDʯ Xu 39Q@*X20SCHߺ:ɡAHٽL-[Rn<|J7oEEl^Sg~N#336m{ǚ5kزeKhh"dggӰaCVMEƍٰa14lؐOcİ|rvY &d] |Lp8m۶MϚ۷bŊD+ x`q_*U|}k֬Hrss}Nj_ zm(ƬgL4/`Yo^^}U_5Bߣ/xw-&ֿߕW^%~iC> >ܮ`ٿ*رc}͛775WwNq|t!")L=i`{.>L]v^`uxHTO3|p'' x 8ĸ tԮ]/.]D*IT^?tܙ._~y`*0ʻ$ 'b~fTR7xoۨDQ]Fw'q:p-pL$J*ٳ'*ӦMs`V l.*k15 @>}]r%̛7.7x>7=c8F1qDMNWC*+ׅ?cJ9 \m= 'G2~BYvv6}:a {- 7D:u3fLJLNڵkǸqB]($)Ե@FFHye{os~MVj1CHRax饗HK T8MԽ scM Gz/i@w#4eO>9T:c̘30b$׋kBz!|MVz;M[=P?N^z;:Ðb} 8[nuGC?q7{H :K|4}Bwr07v?a6M)>^z1tP뷘+:l4^}U>Cu2bڷoog;? nv.8+RI \ |of4L SBJ֊DFC\"3:twjkV W!gJv qHLdddбuh)?D+` gY8%"1ZWK`ԤI/U ,k4h f?n`t9WcnRI*YYYWqK`LT~XWJaÆv3+23='ZI@ծ{)`jEB |-xH#Z*R$HdvOF)"t=wd殁 j½DDF͚5]M@ULD<o~M\`I W"NtBJ8J3"9;zsUbdcݫ8ĥ9xHCŘGJn\hQr$"I!tEΝ;]M΁C" l>,xt`ݳ~$"~Vv` A9Z Eƍ_eP$M΁C"-3NNu&"oÆ vs[:.gժUqID8јmL>NJCDЮ]XԞGbիCIi*D$Νk79~rz~ᇸ$"K7¥2βJeIjgϦصOwΕZ̟?8D*}.LLG؇%"qA͛gw}LPzg̘נD*=2T:_܎CV"??sE"w2)9wŞ05D$)M6-zLPLuz,X.VDbI"cذav8Cr7-|>ƍ+t0ʪˀV{ 0jD$fj77W7c6I| <};>GG>gy={x 999s=Ub*~-+V&F:Wn0מs뭷zHepe! h:3qI6lX,V<u::p#F |* G%###QT&Ѥ2Ɲ2J :'x0\,XIO>u=vsW`'Nd…z$*ُT?a]&qKRڵkݻ7:/qq1xJ 4] 3sf>8&RIEh/S? h$%߿~yF osOhȔ5m4+kPQ8{^ꫯzHedOZbC]<qIӧOBMN̙3K.m۶yJ\ݻ]3\sc0EZ>ZXbΜ9tڕǓaHקO6olwR| bC֯_+`&Wr6n:K=8o 3?RXXSvo߾[yCs@bzw}Wb`վ8jŌqKR vcnn.*clq}0a= +vFIX{. |4}z衘(RD8jC\{10xw3I6jȐ!ՋzUkvE‘i,Yݻk׮R.tfmւ+̶9-q0l0f̘aw=`. ?^&v̙3ի=I>'\U_+M7֭[cHRV*$`վ -Rcv0DkbĈxFY"~ix{ ՞Ht0N>Ν;'Lw5w6S@8%))L-N=ۑ%Kڵk}tޝݻwK*0|p&NȭJ׮]iذaʦ)כGMCVE0`5%!ׄR_pٲe̙3'R<#vXw" }SO>뮻zðիy>}]\Oȇ$bL`Ro;v_W_M&M4HDa5xJw~Œy[aPu?| ~!W?jfG07oXl]twߥv-{n~m bgN x?ҊKt” ADpfׁP&g3r4f.:{丄6I^9""6<;1`WubZ tBh;^I&I^>asr^/n!Aa; fc,'*"zo{DbV&: xxHMץ<njza TVDD0.M*&cBR kgЪ@#Z9oDuSKRW6f%IIdUf^QA֡""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""4H!=!}M"jZ]B~ {H*-P-?@qC)1/yF$:~GkF$"K)3q|U4"J"DDDDDDDD4A!"""""""xN"""""""9{X:DqM#^ٵ,E ( pXu'g ϢFDDD\6f3(K7 iw^`9Ms}>#e ?|e L: :HhfS!ʷ;߭kf[oXc|'Bbx7e]39kDv-⑜rp5 &Q\8{!'E\uC5E@(kZGqM<,Vt N\xR~T-G ]DDD$bQ\sfu9Y㟀M\V;w&H$Xe&3iBDDbi j#?,MEpiڔ}GŎӉf IiS" """gl>=sLp^,cGqMN5n ?u`mF#zׁN vG0EDDbٓ LG3kC.esYE0 "z]1$-MPHY|w *us1Cuw@9bY*"""c/KugXhEu(^csC'sæ:'`;ýfRZu|6p/0\JCGu(v\WEADDDbpPt"XžoJϹwan(c<)}9 J97hBDDbm*աH#Xb:f#6j9'mU n/ Gu2 """gg4D;0"1HlJ>_CP8廀VSg=}y<&gߩGQ1$MPTpp}"rJ yqncv;tr`: ١v{6eO\1\&IœڞȑHli"}m֡w185:Y39ʈ~<9KN'@IwxEbG`?a/!'3whO|k? N"Qۡ""MXfY?Pػe?agw ;Vi1֡jo!O&(*R/H)"5L!s::i :N욘mAyQErpglmC_$P~&TeP<؞ȵM uTJ?#=W.,ު V԰u(N!r G:`&:L|L"0uP0bpb?1+Dy(Rq4O yֱ]ޱ+\]"79=w1sm=.q,VP IEG3"S@uz%<8{YwFvYV_&S÷ϼ;DbG`סxJ4"gA87%Nԡh)6c2:n dvM&.?p|N8A&[/0u(/ED΂Me!yCP@]8`֓BɕÉAHgHh\6Kv.fih-QvdgSb7}1 <;Cfb2}Di.pU "): jot$4gfUu(/Sɘ; 1ո}38 fʈ$4UUB72caE doGqȑd-oKȳδkjEc2p\!tiG)XBDD 30>`ub]BwC0snp^wCDDD$1Lz+0`HV]`=#ֵ1KB}u1IԥpÌADDD"@:[}SE+B q]׍)G """[?'Fq{s80ch!QQ\;cpaƐ4TBDDNgt>){a+ϯ,C!fҬV!1Hlm #?K[Z=`D_± w`96?f IC""oŵIE*k7gaR>C^9b !L61|M ןP̊ ҷq!} -e o\~M"Uv(`&>`N9b)av="rdNdm7jČʲ}X!x ̋LQoDŽr$mIDAT |-dDJŁbKcT( }WϘ#(ZdPnhB8&0)ݗywzoUYL\i\-%hRL¼G'pXZi}Ga؃Gz|i+"n;z^8|;䧯dDz$zN}B}bQDz]2[`5 MuSZnZzCi6ZKYa~c M51'uJAC>˿iP 8 8GVXD}1]'-05Fe{a?2Rv 8ۃۗ[ k+㡲B,jMZ|C?TL5[MZu/]C-F{- "CK8YڗfRm]N'_mpk3Z8f۵49 AQ U(qbӠ7a֚/RZ\ZEMgykՊ3#8R@pD4}f%8T+)S%<>3#$-/]M񮭦'}&AG,<*p(0L1{-:!5#{\0-:iAdd-Z޽*;(91!/>~ !*QM<ĞKM-f0o$5R,@=5陁Jug~~8PG?.EMר%M> Zg/Ex1Z}ϟ3/zv >c؉'Gc'jѲgOd|a'?&Ѳg6/zܰGP]Y|U*=_uǢ F.] R.سnFe mKaۏvʘk=l-]Hovh;;@bo5|^s;LWfNLVs__ ISi?,-Z PewWQ.5h)vUqTH Z|C0h^BJ-vF-R*9cKCr Z夳 -17Ѳ}_&@R'ܦEKLֽ8gEyNS,ֽxE>t?O#wTTe|QXIK"sP3A@UZ%0Dz]J;֪*@ BVϝNY 9 tZ҂=Zkڳ\KEi1kA~%4 g%(4OsPrJA:Peu,: xɘNTܡI7({hQZG,UE["ۥǵ\^Z QVjIeLk,zϘۨY#Pt5Psd_vCpVk)ڡ{⏯]o JC?^JGm(߸˞&B޺jL+fAQ@ޣR˦J; /G@Zzy5~{~\] +x=^T)κ^lznǩ/= yx?)|,/,Ds=9BbWT_[Su/Kn׀ɽrwq/#5iyUœњwoX~R{zѫۨZ}U˾~~.e? f';6Z&]ԛOb׊/ ۔i٥-[afnVq/s':6Zr-_e%ic-u P+-v \ ؝1oWI,㹋RMZZco Pú 'Bj=~©ufϛoV2[orڊ?f}rwqkܝݛaV[!mKLɣV['*zE8zk jP]ʄ23գU3{zPQWM4ձm-=km?Lc_?d|a?ǞEkXav OѢ0{>fmzG ђƦr{!ċh꠸5rMZE]:c\-7|j;j"h3m߿e4\Qc 7mK6R|Mm}.|{;b wP_D%vOQVWA8H{XҤe(5i ;;ӌ@iY={qsY"0Zb~#Z:M!' 2ұ'mzE-cZD%{0޻wm^GKa$?}e-K]I @>{rս L/[Kco1=_`u, URPk;UoRJ;RK|M9%S+R8H5 MTBґ{6GL~w{}?od 8iNZ\]VC|E ;+>>ڷ;o?YƕKꠈ-t8(~v۴e&1@u#NE˶B+euty-]W̮ɣgT4/&BP*Z|5-A;8x!DrEOوUuP*x9TTJ_O8Ы^Rʄ@c8tkzd篇2"[[aWEKQN:+kv8HM|`u ZT%sb79rNj;tNƺK1z~G҉h36ٳ]ՓxyM>jKBqfEOAǑ0 8s/K5f-K%|Bs‰j49*qET|C&jE/`&-YΉJ8 {ϚbK;4Oђԃ2H-cGwЕZ7Iz"wų-OrPX[xoyu s4MZ|C$I*]NդE[sQ|YQκ6}M):jpSa>2Nr[G _ctӖ+up.C{w%S-CnfKjD^1b.ÌըE[ K f6|ƧOkzyI#.2-K:jOIv/:j'٢k~詸D[tmwo*z.7Z*r쀋[ <01t@O]WZhi7ҡEkB#l+K f oWƲ-1('u\IK[zOCPHҲ}AXCԝT)n-&:'\j.Ee R TC38iBLK$m'S3RZZp'W<7L}v-S/ZA"9^nZ֡nMEwWĵ򆯙*r Zhh[.$h1.gh;~_L7j:Mɮ_˵|‹Z$u9/|{ҟ=`jk[/yAD0SXJU`-Ajv*ծ?*6њMIgz&B[ U3E7kLDݙ{q& [CZ{sGUjb WjCDp +fzAPp=A 4h E='U*k;~ޓ T&]ZاHNF ;{ػł=JǓ. ;}UfZ$;N4,|ע3I8~ao^<ӢE*y՛Bzb4=XIo^^1]2vE1Sj^]t@; {__e\-ZZtCW}CƧҩ7X&'ؓDfi!=j/&VftfurRzhMY^ʍ锋 _i)ǽ FѠBu9)/.0=!-Jžş@y~8DKoT1-euGham3Z,a@䶛vMx.-]jt5*qM2{S, FQGP1s *QQvãqxmMWK`߷TZ(3Cj)-Ԣ%,GHX-g{JOڌ6fi%jŸە`şE]}T~\TşMWɛn(Emw/E(Ίr l%:ԫǿ_i9P ]Bt".AAh[ ƢPBb*ZOezOZ"^7w!fn`6*;Q8x\_C51$Ե8F~-Th*Uw<^:*#Q-a5n-^!bx^݂Iګw9 ġjM !Գkwv*@<p :5#BP?븖 vvF9+XE]܄ƱXYKsrsYWA8G5d.<]7P7iA˹h'* ]WǺyѬEBjq+P9^B8PNA Ѩ ;3Q7cW) >bĒjs]j>t{Cݠe6]|pToq6Qbnzmi6 .Զ5 W>.'b4FO#@ u7R@8xnQ #Ujk-4vi!c%`%zIp|lijVE&R]a&㰧U^_xP;=QG+'Η>o5~uMgY{E|ER P/nxf-M0S5k dFcgiw/TTɃCtN wҰjPAqϷ8C=|…} <GhEB%p*UӵA(,DDɸՒ>¼'YXZ]ſ]"E8` ~Ө:@= Np1%ݴx 6/B=*Aݜ >cżvޣYK2 ԬEL:&cKk53wlwÜm:Pa0 ַ$ЄHB5qj:u  :PxAQHdz~G]Cӑ ǼOuլEL^>Bu*9؀fߧbx#w$֠lԸ [ ,\[Tw]¸8[A7P$e~ 6.|ָBcn<4奨B5aAwbprwEV' ؉* oE@yѪHC̎3p0:|Ҭ3|@ՏߨWMʦF^7ƏQBUBQ0ɇ?x6㫚uǁh%PѨHtT=3z^ƼK5[zZa)J.:?P-_9zLL6>[5Z%E%R@(Q?=fqѿPm:"x0TWTHFMujWk<ⴰ5*A7N1|ZZj۸^E#&(dUZ&pl3AaVEM~ԱN+ǯp9QQo^h_{n' Ql 4}3+P|k]z4Iݟ%NAT=!ժ(0).+oHtެ@r/q4ɥ!9{Hx6(cR\ԱU#Lk}WhU8Et\3wiU?܃yYKS*PM}ռT oIZjUYOs5oPmn!Bi LG5gp'5SE" 2gZ.zhҔVX:-D(\I[)/ԊU:CRZ% 'Vz4ϥ^L 3bL\ Ρ%L~xh֖ I^WVEBS#RE+"Tr)ALB19H3R߿HBoGZ B9VEBS"ܑvmi \:T4Z6u fGҶ_^lT-A0m?mu󼩮Z[o%<} B}gU[D`=꼙[H3p 4vk)0! ;ʻe}}SP3Y?($ϯĠZMT69Ƃ Btu<WOID^uZYKA+੭EhA(Ѩ:JMkTa5dsa)fn'c͹XcKi n'& Xp:$h^7w80͝4U:Z7SuT 'A"{9_tsZA/k#IsTAhJ\9.(3"-+0hU:FE Tx{oᓵ*ob~4k'a1p\NrT  3Ղ T "\YKSd*h  `&A 0Yo(ggcPzi⯄êyo0#GG_ A<``'}i6C} us$twAn-B04kݨ+KGAkF]܁yS VZ{LTDrA/p|&+9:E_*E`f-SY`h U]>kL8O@  |+G 'Q[4k$#~%n'Q[' i:W859~FE5=)j+xCը@#*r`+f-:* a=MOG_lp{?(@ TaΦ@[T@MSu\-u} ?AW('s [Y [(W B#^?8Lt&??]v/8( O[FDDGxxsM`Z'4Z".呛k<R15 8ĵ]vhBFRZZ֭[oCMLU;qVb<ZpI'q-0tP@HWdggdح{ !E:?WqKqVF˔jgYɞPV:xmY)Qa1t#ظQo7y@pD41]ef8{*EDu,g xbFݠ yOn]t%&95q*!J޷z+w>E pW;*r=ʄ5@jj*sƍ#2R 0Ú5kJNtbz|: ?WœtZu _xwdލp`AD|'Zv>(ݿٳg7uѷo_JK EX39Z0|p/^Ltt.w|rMʕ+j\ffwp-駟f#$VչY*:6Z5y;\ CΤөx-qY ;*/ΰ嬸 :xcI(S32G]ElTӊ :Kxg]BF Ts;)L4;SN9իW: G:Ή .g}A$y:㻖N>\f_s iJFs [wIGP=/m,J}3z& 3߳gvkA٪;ߏLd?uгgOM3s4cP էFׂO>>( Gq˖-cԩۮŝPcRW} D,VPP/3f >%?m;0{w8f9Q^\fvD|gF\dFRQ;I=6~|̵r6Dю {Ӱ:ؾ|Kpp0fͲ.j ܮI;Ps 焕$kwvݎt./7E .W^ >EOvd|gG\h['\P}t(\gɣ9' s nXsYQN{O=$5G1_K wum"xgaE7T^kȑ#YhQ@8'\3aZ#(ǻk /[AYAڻwSuV &e̵U|Le"u-y떱kp$;2½d,zܰ#%8<>޻v:_'ȑ#3C9Sű(x(kSM:0|LII { ` K;AK/I-A|Ndz9ӰN0[ ڰCtR.+KOvTB*,WAw}lT@;U߈o5R_?֧1-s̘1̟??D W_eժU_޵TK#pƯս{w^_iA0y21ӯ՞(ڹo^5= mtV6߼~D"SF$@;q?xD'[4%}QK@Y`\y啺xsr 7 D|ZOPWN _&$=#Vblhh0l6qKټIx[mr7eoQVGƧf骨$7q_L#W\qsaƍEOV3oyʖumu2N=Cj!tt@ et箻uֺeM60!s`5Jg27&;xgOvTB7cۆmJ[lg}E?hsP3OMc3vX [?7|YEyVgL(pAUt-:uj_B e<Ѱeڶ:CW}k eL~v2Zgҥl߮Ill,-s9wv-:?J8!CЯ_?ZAh8+IFh{ESZ`IUxm]a' 6)ݷ-Nt^t8l>6sMJ32l[]}@pp0Cmvut¨QXpk9g,:,+PFkFY)i?`ء-c|hvavae4k˜8q"s-& S%T* xH=d:Yvʘkm6y$1,+% ;eR764|N=>j؝Nv)b~DRRIIIe4{;<;p8Gpki۶m%R#1euDՄ21i-*'I`P,6#FeUASVǖO5쨄$?ٶN{"uo^h&2zB"[k٬HZ0;_WCӖNNo79-[Z߿?wT ejPƉ}r(_y״SPXX{֩EfȦf;gflחYawq1vllaԛG#{NPh8]ƻ98|][ Y7ѢLGa]t(EkIϞ=A%߽PƢ}]PF~3Ao]t.-v=A_ 7㻆v)}a2N#|7-|?>evǓ/%"a E,1xmpP$H.  -XR&t'a:("nS4xHmͿy=3)ڱٰy}e59?oG!M)^_x(miDx+̶ x- Z%.اAh2莽 {Q/װ#v" ;o2rζ //)ddzmwy9!ѭ ;mN6Eml[= AyyyVs. cCi^EHT+- 7w ػ(/(pY A/`My,:)g\MXT06}'p[d76qan@ozFRgY)iq!մSо}j[O$Ih Yڻw,+5C{0vڢ g4사lR'iϿ~҈lu6-|DA5Av hmmb~|vpD h{öԳ (DtV"QCtRX$œQ Hza] A4999VsOM >m۶MA!Q1$ְvduRϾ6Io$;C {mȣp)mo]a;BB:ތ(ݷ-F AHxAal|nH@qh|ag|<; ;23/0u dURViH>>s>~ev£ IDATrP+WtlxHm$7ɖyNdO0SAUv{*Hʘkj;>C ;(Io7Yba?$B3'<6'^ld% } R^\@Ƣ'lHuabNﳥ9'Qi>D"k 0'J xj*kZW)F~پ}{m x`Ͱˋ^[+yL{{w&vO} jEh32pFvx=f/_n5k(vZ},KN瑗h @at;/tGD2t0B7:V{*[hdӰ3>yVS(BQcY{7~YFdExlO-> @~ v)GRpzp:)/.`G8䂇}tyۗ}hC5sq V%q[NhSfѢE["##>}:aaab}_̟?YfY ͉xg35jxщxgr6O~dwOC*dv%ۖCSHRWyʑJ}N4eVZŒ%K^GL2N:s~mX Ap;(OFff&;v@A9gƂb6-|SM~3ؾJrUǡ pãIHٹr~вK?2d Bׯ>gE9I{ϨTbʔ)b[xٳִ;w`=:>"Y:~HI(~;$u3r~^@HRz-q`:G[T0μ%%?mK͚]c|ETTN B%-`3k ްKTMYڎZSA>pXlY"v-rM7s`ɒ%7W y衇#2XRnz) / $e{˶uw[x{g֞׉[osTÇ[ϸ;pQ<>r-|'Xz rMGPIڻӸ16Ȱ.yYަO԰W2 RRȦf4tyavlE;˗/w k|u =z9Ow2˗s"B3qt;ܰ,kD•jP b킶 RQjqnsmb9?KA9Mp-XGxhKȖ-[:t(כ]W%KpB9XER.b8 / ik Aܿ_/\ uPb6/|ԶNN#".ɰ7} %pLׁ;bao_6 8KFn6_/ɵ^k._70rHC|AK勶}t%Y%43i٥aolm梩`~'GPQs: ~N8 U8쳭T9B`*طoÇgZE ۢϰtsR_KYYYYqnԃBwmEHT { Pi%liަ|gO[ATBWoYMJGJ"%s ŻR2#TdOetkՎ=.Y^FaZxG;~_l|ɓ'ӣS_())aܸqeLjT*9|w?H<ҥK5jw.[lFGݻŋ3p@ 2gvTbG}?i3u "/Xt̽'USeAhz|;hi?࣍/jTG_|"k;@g_H;NKmX |d$gpVA!r[fp)S8,e M;IR$٘v mWR@=![%j\8s$paHځ7{Bs wBZPVT\\LD$~vckp"0|FǝCeر 8S#=oj 'pCM5u3f`Is p .~_VB-k/N:"+V!B D3qD DuunJyGSaȑkғ`ѢE_-S??uK,i?~ѕt)99wuWu7l'?}P2KʶG'ѣG3gvu׮!Hm4٨0l[c{ko5vjuM<ӧꫯ&]Wwmg.Bfg3f ˖-cҤI?Fڶ3Z)spBdAr9%g.a·J7^]lԩ7i(hpbv⮻*Duu5/8Z'/͠O>̘1. T8'K,:x+0C7$ګ1|_dڴiL."_^T|h58Q4h7n۶m0 ~ 4*?Ñ 'ec T!dS-*`!7zv6l>W\qEu00 -weƏbH]믿e˖rʤ{ au!{bpTҸK8rX^f)UcIO l%\B(u@UU?O7n\Rqj2rH^cCRcƌnr0'7Io|)#Ӂ S_x*ȱ=˹8Bj._V2KҌ'6`fϞG%lp wqIn+@t!X*SG t˻>,>:7[8ø~p"qNזUjNL^z)UUқAdgD< < 0`SLI;fjjj?~N$&Mħ>ۄM/&~_fw.ZRũ"0 e >Da2.^*/W^y%-b֭iw]QujjjMr4*%_!:S Ls1{BjN}UуKF=z4#GOS%7*Æ cĈŎOL' D@Ȥ";`xԞCT̤IYU>V,jfgR E N}*җy008,=cTjx 2` a*vꩧQd*!C~T9N25v")q HR9 P4Lr!ŌIڵބՉG.3vظهXj::ӌC=dOTRF.dWY$I)pTqFrD5082dHQD4?j/ JԐ],j%ⵤԣGw߸kpK~ӌCLN}*>9hJZMV Jm9 3f.Aiš,U zdvT :TMXܧI>R訽gIxSA$P χ2sdOJ%*MuK;JJɄDn(Ľd/%d+nli?$u^vtLWk~ xÒrχqϦMKy sW[(!YR)Y:e޺7X$gR:h3%{/n=kx;VJ6Bb?}${H dVH*)k׮j4y>$l}LaLGu+$۵q믿^Ԁ$5x+jگE^CRIYreܬki?UICRJ^DOQP83QC()$+q_,vLN|8*g۽kK*IoviŢ,M?χ4#SF`lٲdtSϓ#IEw;Ox=LeJZ·1@!)MS7r]̏ڹKJ*)|۶+d(&=/_TrS?󸩌RYٺu+rKuwZRHԕ}ywVk6H%3f$0ꫯ.ntR%ZF멌a*:͛93Yn]ڡ0tPZv(E5k,y睸kFKrڎ8ҍH ބDnm> M/GI; z5\{v(EUWWwgݣ4$J} ছn.cv*nR%1QE6m4nw,W_};b>o⮗Vv/8@SO0nܸCܭ~2qaʫ=SN흋䩧gk{2qUWiSz 4ް]l^ӧ/:h6Q;w?aa4`>O~C)3g|d {=ДrWT^G} 3jLTt ^{WL7|k6 xgrK.O>Ç+Nrʨ"KYvm+i_Z&,\O7Zb2 Խ\| py1z芛,uQ$``Ԯ-B\*K7o /N6?(z|୷b„ ,XC-_iӦ1k֬7SI:g75/OHQ/=uTFuɓ'K8mpV'K2qD}ݖ*A {ygS Im{FҨ^z)Ő2ԝDߓYW=!uдix! _~t1a„nSij3zL<9z 8?pLҘ?>cǎ' @5QE2g͛wM#{4C8m*sqQWW^Th͚5q\wuqk)ܘ4^}U`=avz! [l-a4`ҤIq0wܕjEDӢ*ܤL2W;۰aկr +6[jB$)-MfJ;.&p"_M;IR\A vځt-?7{G+#6nyAMIRx80bm&L:hʱt{iV W*=$ilN%gZA73c}Lځt p紴C!RY>!URL[JYaЍ N;NPO2PZK·\Lʯ'F̦3{iQ)?D'y?W0$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$IRwWvek9}/?0pg H$I=wPHa$paN$u[l|PT?vm_T!$IRo6yHi2a@5Ti@$I%`X?಴*$Iv00< 4R6W=p6ewT$IRO)X75)~PRDTC9?*6 $u6n ]1{G퇁w0pb্gC x'Gōqp27.y)`=U~ H{/P墇%h_`XBXj%[s ֵ5FaKts 1!P_T#R.'3+"xܴ\?̉!:fnHĿif'' n7 :+>qrRUv{4?vwزgLIRW%ܙh19na 1&@ko BnT9g*'Rq% GOÑ*A2z^@&r pX}l7nƴ\;ȉacmO^R 8>HTb:Xi}ǟE=T$ueQ;73!.ۿk8n<ϴ-Մpл1ď'pJc"S ~R)~}wfIT$uvmƃ?u#Έ'd _'i^TK*mf %GUN' L0s=`Ñ*0>P$+n$ŭzxpm0P$g`ih+-$Gz.!LJQoBQdɇ'}HrBՖ{1+? aQd5"#v:]bt ՗LEd2P IWճ+a}R ;]HeI] IRW+(2/-OO&b3p}VO\G)Äh;d.+⤱^7cNI1I$8F۵-\$Ok"HJSo{ѣSR8 aaF"2ɬq,PZ!a L|ϵxq{3, p&QRwICgZ[R18@y'/zmա Mu(9.oa+1,$3՗u(=`B{,6Ek1Hċ&K5J bU ;ӍFR(:<}^@I#,h>U"^#h68b3a:FKz4?x =.o%| >s,"n!= =(p$<.Ij?_aa]XXFqaZ?/֞+7%['TIvij;Rwq05P]\%WN<K 4ʐJ!ŀ\o"{=6n֡ho88c"~9ǭ'ScBcA*kz \J,R~EXWnewIEY^E2Β(JQ\'=:R2+fĒ)5_8YX@ ȮCqdvo2'$~{` Uߒm IԝgyP_iLzc:zeΓ{׋'8ţO" -gAlmGjo|%,O_I1l$L3i H RX ܓ,R)ZW(SrHt)q tz`9}7_m8NX%i`"=/ףC2Iԩ{uq 5pR?}{]@ Rwdyupv2crg{}aϞo8vch?R ?[Z |q2J{^c:{eegۻ@=s@%i #r^ԽmnFF}xWFO$F}3 BFk·Z>T 3($ImăB%@u0_:ɛ ޠPH$)sfϢkO?~z̺$\@9&ՈUI5"I*/E2>ZtRqH\fT$ImXvRO;9ţVmJ# 4 0- "w)i"Uc#&qIENDB`pairtools-1.1.3/doc/_static/report-orientation.svg000077500000000000000000000473761474715105500223450ustar00rootroot00000000000000 report-orientation read { walk { pair { default for both parse --walks-policy all and parse2 junction { --report-orientation pairtools-1.1.3/doc/_static/report-positions.svg000077500000000000000000000732751474715105500220360ustar00rootroot00000000000000 report-positions * * * * * * read { * * * * * * walk { * * * * * * outer { default for parse --walks-policy all * * * * * * junction { default for parse2 --report-position pairtools-1.1.3/doc/_static/rescue_modes.svg000066400000000000000000000512541474715105500211410ustar00rootroot00000000000000 rescue_modes 3 UU 2 UU UU all mask 5any, 5unique --walks-policy walk_pair_index 1 R2 R1-2 walk_pair_type R1 UU 3any, 3unique UU WW ! ! { pairtools-1.1.3/doc/_static/rescue_modes_readthrough.svg000066400000000000000000000600761474715105500235370ustar00rootroot00000000000000 Group 2 all mask 5any, 5unique --walks-policy UU UU UU UU 3any, 3unique UU WW ! ! { 3 2 walk_pair_index 1 R2 R1&2 walk_pair_type R1 pairtools-1.1.3/doc/_static/terminology.png000066400000000000000000001007201474715105500210120ustar00rootroot00000000000000PNG  IHDR(sBIT|d pHYs.$.$* tEXtSoftwarewww.inkscape.org< IDATxuUHx qu!aq]` / _'5JB L2q;3]=3ԓNUi:u=DDD$Ӿ|i8LLEjiOg)i_ ˠR%DDDD}@> .\`8pP ~Òuizv? 6f =E kX5=)zɜ6 l]]0:wk50 uWs)|AXVw;`Y mGy8յ=EHi<,)#)<'<"iMeiql <l>DzlLRH|OhHK)6M$eDZ60`Ez̹w8 (%xtMhl6IUx {1MN1a-&;a _&aQ$y?a _.rd8Eb-| 3w/θiFhL7xNrUUDD$[N1x ;9u3pkϗnB&qXqX2kjG{Udn lݵD$ MbwX՝X@,LU s%"0 Kbb:7\/vݷxZ )os yWR& [E"""֮Rd'_b-.dmWa'y ű=-+ޥ++)ǵ1JjqI8ߗEw=X"vdOްmb*ϰoV+öJISl x`1NX6bT:EL4q|۵] E UqkMX+@#]&_ 9)fVa$3BDD*=#Ǧx.Mpq'.\A '"QUZ>u{607  6FrxBOBǞ^` )QDD$]'no&'_i'N 6c@$pn]]xJ9XAd6 m&ـLnu  ZaHl`c',}mNvƦޘuEJsMX6ԋ]7(D$3UpY|%aZR>NbDJhHK'_Yê_$ I2Ů}q\G̒ʀ( -$iMO2α.l}#q]zEk 1cUڮUUDD$܅aI_Im` ߇XpW"x'M$דj^ZH ,|/ %;̶79u]ؼGĔ`ㄎĺv 8O9 wj}]uUu{MqO 6׾1lV5څ2+30x#X%Ve5S%EDD$T]`8%6a}lV $9g޲G Đ&鏡ve})>Vb qMh%0;)k؂uŝU6 뮚JߕcAD,<Ϳ^d8Ǫ \Z퉧x{.֋W,zk}l9Bō5p6> ?>й&cУX""""""""""""""""""""""""""""""""""""""""""rCDD*u l7<`Sn|¸DcS kMS |p;0 `+`+MM1mccK|Md/nx:Ӱ9/EDD8&\6OG*^8K7s9Di2دy)v]=Hck%w D6N>dc;`u~a'pd_2TDDDDKc'XcXM~`wK^Ijo׽:p8l @m6D-v*"""Uѽ@1 vE> aÁDkChͦ Ķ Kja-L]wdZGGYtw."c_d .8He 聝xŪOTENGw+cc TCld)w(> Ǘ~w;64Xx$NGm%;#=,"""IWU l haZ7LBmWн:{@rƐĊ?l;;^K:EQ={ݓh\}Nsmك8ILBD4MߋNF8Heq :SU[u;o8fǰRb*aZ?Z& 6BmNs޿<$urVNy3PU v_>P]ԘWd&L-֝+j.9p7lD&9h _`Ķ8¾kIs*iHl\⑃ʕ Z$r\y3Z8Hp%Z]bM5>ȴ7`x|JS/{b*cX+)p6 VhϢDJ/aT% ?cJ 5"m[\ە`]K^&/^D,'w vxdb-@gcpzP̥XumE*ee,`2;WJ ͫXOa)kF/M^S|@uާב>WіT^bL8$;J&]k9ʄ;%C'=u6\?T(VT# yx<_?%rL\` p6=Ǫ-5q>7?/N XG[y8\zdp*^Ŧ8̵*Zo`i6_PwOB:@Nu_NTm y |._J%KiwU?__[}CvSXqc1+X?Sr{k׼L}x˼^Ϛcus4yPweW]UE$.Z^NknoO񱫪\GyE7@Kn4XnןXn<;wf<kkEXÛ5}ҽACJ}>2U^һ_B=S&b6Oba &ϺۛXc1'I#'I#@3,i\ix4`˝4G;u/gX aQ6@Gg2-󽰮~qwSaEPKX K~&o™v" *VHg.| \ %5Us`k{^uYK L26㽍7Ggh)l?-sЖp.c_Y;u6qGF3)gBRZ56( tʠ|2իc),yy7hh0bn4!hTϵXְkQ[yn=Ф7Nf*{,zΧ*.Mgه[|6-M,s~d{כ=VHvr6ⷜ4͆@$- h&͑y㐊D,{?`Q@>6.4sd {b l; k8Ʀ!Jbq0~e0%g-%TG9m8-vŵ9}N :]Bgm}`TbӽL,fK}IwUuajn4>UwrY֭?cٮϓX&_5XnӖ'{%5oU?}n"}d}7-3ZACuU,RsMbth7)s">p"wn^5r1e{v?:GFn~kJ7mӶ'nxjh*7vݴ>=,53KqZ;µnݕ~7E^ZqLLXW" \-RٌNssEt5@ҸKJwъUeNB;ax;b'CV@w8ρ@KlLZ88 KBIc1&O_]'D*9[sn~)tX܃}%{t+BHa-bwz9l6a[.b< s,n`xѸ/R+[.NA޷/#LԴt\ฺLؤt\l2YȓX4j̥ݝY}>_1xKFիxmAEUiϮ}Y0i^=F=IҸhG_LkBK$i,-їju=IKK1vz\6ړtw1=*4תO˞LYX^*#pl:Xw˷+)M~翽ku|K0Z;.*X}X+sX5VGb1=qFxYX46u`-Nr$ªf]G!۩Jw^/6c4ɾG=5n.>?ye_¸L,:枼EЇ]; &A`U:qkwnitW?8.۷  <<׭Iឳte^a!_GfNCz9Sn'=O̚Azσ=t'̢S pd /\ǎ+cToco`yXz RS)Y_mˬ]Nn/yZz˂WogN,&xl]?ɥۅfvZ2i Ǻo#NkaؘA4NǙ  DMNHZ/_Ǧ_KqAh҈qFaAN8 yDJZhtO.+A8-c.ђi 0Pm36 op?㴄v>UJnNw DI\\gԳb9{n]{p_ ak aYU泧B М~;DՒ^{9ftI,+/k~|3<+@WX'\CއxK4jq`am}7a]_Ǿ/b'z7 K4a=, 8v IDAT51y NmܗwА/C&=r7,+hI9 `%9ֿؘ&X@VW2v~54Kw>y",Z=5JJذ <:lxxrwƢ"n48( \W >`je4y;h~5{~@ d])[qh⌫w$Vt.U Ou0U]-+@CCO!x_#9bh74mЛXMe;^I,(qd7S DRg1eXa8Lg1F?_;):I6`2c!s.ڒǝ\1{EsF` 2մp+RyOZ7}v@:uO?b S֯s5/SS?O(32Kundgm3WҢf-no O yy| j^ȵ`B9/a//!zx43"UMBǷDNo~2RVx%y:鷧Iٵ~yH,uWzm\ŜgXɾ>g4Ųy G__vntCዂ`pf?y9P]vÞGLbRb󹟏3@"ݍ؉ oc쏛{|+HqXf[e8.Z:c]zHw\!V&[=+NT 莽O`Xx l?KtnԋU%8G)5mY[tG/xPŋB*͎ޓMI?]w,_е;[yvcxV1HH ꎿqCq+EG_'eĭ# fϟ譿R}w5>y0:d|cgeK?xzz2S(?5?`OP~44<¹Yu0[287HHuwI|z"\c '`s=M{DZ0O)E& Т6e +TG`-c[< [Xf*oKpD?GnJ$5yZt.8M[N)3cFϞ\?>ѩ'ܴfMoъ:I,7miOZT*4O\ܴyT*pw'm_#1?7QgSg~甜7jon˝̴fN,9С75={&=ϥHGwWWI { WWjp\]c~Opm;Obim:t}p}X RS:rSp}XGj`ZbU"M{^> :NK'9qIbj@r=VЛMߣD}OhqIl'DID8f>Z\︼G/*d,]\[z˧˗^9Uy:G+Bgh]rX6"٫?Ө%c>펻& pdObKi:pDObis44"h}l;ޓXZv>-8%qַ1%R\wPuom$I$swLBl+ؕ[YS˱|TGɈzLx/=دkul,kng_ ȶ4 -@sGE_͠RU&i _X4i%)@N..|PZR'/"spO]0pWRɛXpG+/My<[!tl!.GcW"uY Nx'ca%݀3\'pXvNy s*= p;#qG/|`v.:EZ/e躿vx1JIhŒ=hiYw֒,6EZ*#1תO?=K^],ڸ9\4SpOdVNឭUp~ᤢ;x`dĒ؜{_KbU7acjcn-@+,9U%c.] 898Ϳ$n:Ͽ/t_ӱ֮ΡX+xNyGeSQ?(6$t|~a^%уX2U%a-*&9)g-X"wķ@qt*cn%"}֣iu箆>Zh _ܭm$>nwi)͝Ê6Iͼ|.ѓF52?1̝m8]#/Q{Ҵ'0Wl~Z,jf>ReB,\ҽ=4. ml~n.wA牔:1"JRvsL<&R%Xg imp&IƲd+>{ډ4?TObپb> >Hհǁ~F˯,X;VM,+/2VWcΛȪ_ xe~ D依1Nľ|݉%p{&F|9Xx1=%.$Qi0N6!t, O##V"ك^ys0zDZT5$pNP\r.]'t.Olr8΅5k$O#bnpޗRlxBS/q fӹ3+"P.(IoXފ]8L*Jh֧D.H6#mZwj#^/BbZ`$`VHf$8ޗ)o۱EHvjMoK$]a'bS]KyEDDLU&;!/y ^ęT$4J wشYSlGj"""iw?6@cA*?S\E26;0&I^-{ZTrmQXahM*@%-MX H3q>{3nhRq&}+<|>o."""ZyIi4RR=JWXB¥H,y5N@]EDDB^ iъTٲԉp8ӻL.8hnE* JشG"闇0Kώݱ[H 6DW,a'ߣ7U-vCnW80ȅm6c]S{dZ=`,e/4"VXPyt2(J_^% iE)3bH$PG;ZQM1A,췥A 9ؼ9 ] 39sHEp $o ӈk9ؼ}[q>NDTZw|Vc]Z%sj#8őƼH-pQiDXqg;TKE.1#NmˀDxgщHuC܃0<Hհ?r1pIeq*TJ*D'QH3؈s-ӈ$SK} hi^:J'ȭˁ'QIX5 ކ#t0pAHs"*&{ǿrDDD$C |jBk9+ƺI_|P8p9lj?bcEDDD<*>4`3VHCKrӈ8xD'JEDD$KnmRB + fﱊ%X^("""Rp]q,߉Z%{ƒEw8ˠҠ8{@%9""""5 STV*;܊ kd~B1T`YA肎TpNx=H"i|>bJKŒB&l\JTcp.3JGH 8 *N4"q;X,*4T$- mMxmj.MMv$NBV>6NE؉ix7^%RNq>;Fda-`߻Rwdhe@U2a-6HePPUBp0ca e}o^'O+,ɜWp^DDƺfJmlDN`]jOE]PEDDD*MR) 9XR_a7H:=O@?Hq1}6J)ɱKUgߛ$u)68_)"""R)MׁTB3D$KIwcا)5Ēj)WDDDD\bW%5r+>`#(P KbKºwA-D H8%jweP"Y3?BJ"ـ񊈈H)XKT`=Ήvq _cSjdB[6b{Kd)'c."ݷDNz$";rqEN4"Q8 x8&_uM}8>$t:VDDDDn$ De௄heP"Y#p% |mk\[CcEDDD*/8'Q|y=c''"UAp 0Na|T(38'%XI,q^Q zbŠ9Y\p8T,"""řG+:<މ*=JR^މnFKtv#8""""RN pN}<(cTf5o.}/21fEDDD$I ӈR5pAdPs-` 6bk`c3ZP.p PHhDNtj$DDjU$m 4V Z!8To_ G$mߪL"]\nL_ƥ8p46Ie0wG 8HEV]$TAD f'`VuIBUb`.""م@%""XL:Nޠr%`==it8NQ x8Mgv5DZHr™sKcfa] w"v_)0XGxehui &R yD~ u_$.;; \ao+؅W!{"]WgMık9(qu^RU)ʗ85;Q۰5&>lDYc3j}Ú,f[ /,d|p|ԩV-j v{f-X9X vur`!MY97P+?mۓ~Ɯ͛~]`u-*/D`AѧX/dĦku%UAyع`o"JXBӿ>^z$NxͻPsQc)𬇱TE$3wyGU5<{_*e!\;*IAM.aS5Q'f, l]6'w .޺k'{|>6q]_RZX}s 7qǀA!֒b>Zfm[~k$_\?[mwS3?<^[8ߝ8Tjm'^cNhsGQ5W)sȯUA~E: ,|V~zQCf\o}t:ָc#do}Ѽ|;j8E. \+FQy `4 x{CЕL녕t%͹@m{ %s~gQTlTx.juLÝҡm ?,;?[v߮^u׫PEE4nc+$|ttJ}ۄ$.qTY-NvK?|µMdž_>ۃwЎ|F\H~W0h@>$}er$ekI|n5؄99X5IyK4WArsSOtKq18 IDATk6^uã3gǕܲ6Y۶ըt +l Wrslq={F9H%k;gv'~]IO7(ޠ4wTp}}Ŝz~;"uڥtot ߸ # {.Ԅ {>ǹuC{8ni3gT-#2R?WDq^bm()8( J] uןuJ}7ึ_ZTuC^4Dzuz^Amڻ]떅t=tVs|K Ϡv֥UDBuzi[Z\F8F\ NձGDӰ74:Z*V|?Wq;:~U_Ըإg뾞㻗;uĺ>Օ@(&/):n@6} I^C;ht?b! ߝ`WUC {Of`䞥lYGDhT>?`Xm8N^`) l?Дka{>x&cɔ80 ptۥsb A:u9Cuk7z^NWrN@¼9!Ǹ[O껺>6k:KKhw,kXFH-|tIH%VQKqٲh k'}lK3\]Zv݇B?Q\8^JwwH߿֥N:;uX ~nIx)6Dlv>;&#7!X2ޥ؉/M7,ok`MN>wc'Ki῿[bs*{(߀}hb3wX&/Ӱ@XmL'XQ? ?vb'˱X%I’Wl K2/` k"̀< ԟII 8W¾#<!)񨁽w5Y W,ڸIL(u@R|L`BWPA|22 nI-n%8g16pr ]IVĩ~-qVhF>vmk邝ODHD5Zaݨ\ڦX N5G`kcX8Iǰ>Hk/XX~7;)kp+O0N7aD`s? $˝OLb7wҸ{ޓi8[Y5,&Ih?ykaK,ig)N&K^ XK$.9XDj-k'EmDO.IaZ9+ v#O$K1u$NCM( v~5.tX}o-^L$|<֥CZ]]Zm ]]Z(#"j?ѹWi +L6NI$}>+b|I[,m\\Ѩ%mtoY3~v]ɥw,֓fslXbvk8v q,lu{`FSnt$NW W'r$֕~V,g_l,Tf<m閮cƮZac^NǒA؉ms&bކM^]8{/|k}{ͻu<@#B[v&X; 4kY<~ mP]ӕyuZO>dlpv4+X;{oyXQQԵgh0v9<`]eZOo3̿Ok,i @XDjY"={b1}gf%u{'m+u>LD`c{Ӱ߱ag }$vw6dGhcDtm 9Hֵk ~-?qqpU/%ckI ͝]{viڥ5^ lw\_Sڰ"^A \ܲh*k'ۂ_iNwi=2j4r/{Hztqb=R%{g&gujq8@)xBbmh(Phq/?J Vܝ !F{ M̽dٝyy;ye{L 3_'yZ̀9ۖn6>W@` m"lk=w9AC'\n`{lp}>.5o?n  3cVEXh27agUOpĭ1NF?N¾؀9603s\{wUv+s!.?/e[!7sB 0b2ƞ7띰0=T'^SnŒ+YM2\ dt& =%v'[aEKg2UXyNTǎ7Ѷ%x7bw>Gc^k٘%ЉO ycnHFx˹9j).*;RWWsߔwCE:i<=oTJ5YX߭8{Ӝ[=3z6zAgvs>Ҽcf j;l+gX=zqy}vY8W,b^g{-zVdяߋ ȓ ē.4qg2DP}e;Ƅd2=nL6"r_0ѐwM8N2DfOgŘ yrr5V]YU D)&c-Yhp\HbC$`ȧO ᎍC=5J߄"l2*UuA#ۧlxEZ3CG=9|͓(-`:WpȸW$}ߦm|)-[ql!DVeW{}/z *.kN.o\ ^+p@xHQs|}ws++{[;Ef=&Z|]J~rru)=|ae%cA,SM/l{Id +B6MC"ǒ:/݊{|ROq[gvl9>([FԗdHO aZ4, t><1g/ن/ñ e+ yI ?z3xpW9ï[e5|l)".TZUWsAֳBfγy/M!D 7?1Dj]PI·v'6,qARm*y,vD,*:짃6=9Ӭ' x6Vh8b&Bnj o5DZn CD]rۦl 2_-'!ko ?npVƗB7pwa%!Jo9׆'0̛]W*1ᪧ >/ ǰl*U Dp StFsW W=oy甭N_DRB0dPCg>6)[5~r\ntN8|Š弹k+CZ "8}@Hm&ԩ}:9zrJYnTU!RQֺc@ u|'m{B՛t0APRz/T9)Vnח|ĭz˞N ٧/kU\ZNc]ُMk1`Vc~EI0*ac! i}@Hت α؀6V%6*Tu u od3xNHCd{W-yB?'&$o8˃}4x܄v)[ՍȽsabP8CE2?x+a6=qϩ װcKBP:,_BPN]W; n.l^BP<1.S\Bl=ŷ\G~J6FQDj.yKg}66^_).sR|]{D Mr㜏 Φ!9Vq[R>ꢁy +caU$xWDzV3e.|-y1阡IZk>.raa¤nj>+$GRO l2baaI]Tehs:pq}a\ NH=gm1d ঘsk"oZ'0Z8m;< &O7\O:q ޘƇ >.DӦfzf=BPwIg<>MѠb A]|;>BۺH KFGjk,1JiN0ib\-UP_?+>ԩѥ….ٰӧׇ=@tm$-'W$ԂBlo ۀ'pO<'ҺzMZ}/d&v(6Kf}]}̘퍍UIPdm598;3Ō\3NKf p2[M !8"JU;rcgufgؤ}qva`K0ћX$P{d_2 0)Rb\1,d8e1y'\k^A=z1sVpWwDsxjqQ!9L^2bN=79!DO`McBP[KǘlZ{K)|ݷW3Do IDAT_͜o:n_>ګY@kȏ WayWenr՘WhoKk37E*ͬm6>pzRDpycaB[y}] m|&2nޱצk1uv wPT3]sO}:1o1qj,4s?–B@oƼD.\id6'e@̳/l5 Ԋݱg +r9*S2/ X4X\lT7dn!v>as;Ha^M')3N">Dž{ٜOKאd]!ݘ<ǮH`iV,͐v#{8ó[@}[536͢we}6 KAi1^ڢ-ޘؼ e;JK`$GasgjqAaH^36/Y 2ǧP J1^'!\ &sMB&/W`F{C%1o|.k3gɔu'{]iܾawɾWce#) ؽ8Flc0Kӭk߂%cсW1G ڏQ9s'}࡜>hp|}Ɇ kT՘}Dܱ>ACm$¹ÔU6lQZ#MsD'ثHӸ}R<'k.v 52x=>~&sՔ 8yWF]E|kybVo=/ҲXeWm?7_~4>Dw8a?K]|gbR\O3@ąe 0Sív(ÓTc 7zyEaQV325l"qϯ01fe\- gK-ui8i|k m?q),l\c\F4yh;`4nm]E NTAuX\T sčF3  ֭.`TʴF:I=G BPUo>/{˖_0I\k EE\4\THǢqn}1a~to|=RSGlGuygr!YTlhesKxwCo{ ,M);g#~I!%f?{SZ1_}Tt_rxk|Etgx| س_ZF. lX2;K)H cLJ]('pzbyu܁m`d8F1#CMXR8T!['15ݽmX ps2Ae@;hiy˖p rѡRm+erI3,<ظ)1JSc.eX>.] b ʻo!"7+4J1A25lD$ٔ G!"bBO@)ats_:RoXw$\oˁ/N!BSw'H G->Ju_4k~5qߥ4{Zz)o~m t~Ǘv*pO<%UXyȂmڲ_׭;K?gE_-!D +uUaЄ(`d8m7qޟܗmkVfypl]8*}YphތyW+ ]G! Vrm 96_%[viG42E6BZ?/`s_*Ǿ46pϒ<%w ey싨'H_u`)oӉo82O-0{ev)ڌt= "yvs_p?ou^׺jMBT`?UtLiyoP\”F̤n0 2`qt=W'Py%݇R u-v/M7CS f7$i c9 Q4lX``J=.":K?|?5J.{·d݂MWc5Aqg,'m mWaFx=l1twt ;cρ1U1#g)*.ﱗ:c>B 2E6Tb~(w +x Y星v#}[%ߍ ۦ8Nr"ݽ+1U6Oc^d3v->vnÌdNaqZc`iڵű7awйU ј)W؀̛,,/xoM;1юd-趮$l@> Ax\MD̎~b`v&(>=Ga3rڗh ^E%&̀6m9{O^y;B }-+9"`_B@(rO=0#6cy? }wYl}<{?ѿ LJ\p*A<kX~a7;tbD|)H $9/xrbɸ 햡_ah,1VNBl"yyo+[Hv}?f%3`qo= phu _ϱdcބymw&Z&}" cܢTԀMB zG6܆&|DPUKi?}Cډo4h?4c|L0XL$cDyaFFdy%,"1˟/~>/c6p  M6c|M0_н Xv*#|L$o9]H(`:WTp\@4h,ZQ=RyG%y}yq?WB3 v3 6COfHZ`s/{˃27ܷD*&˟Hn4/> v+NlXWjکa}6SW42LfH{-(Bj Qp{[$/ %XUMi WOF Ir/Jl;YYCR i8nƔD3񌷜M(n )-/`s•Kep0,\&BR~4pi$&y?U_ h.n@K|CmRVA&fqcRш)6#s\"|Xyr3񍷜LnoGX8XPjAFC_N oyhV敽ˋ\S9?]MW!݇=S`kv'j&Բ1}ңet](XJ[F GK+C'qY@ BC C7ړh[~f ' ˱3b8gOXCkR+lo9u?;&X% U]ݧybLOXX2Vd7_t PZ\C;BtT*:7!s9%! "FF$e a=~ ךk|#5HbR'3rB ! Қ>a&V424.[Àq27aQ!s{h{?&s3&?;zHci]Vбs;'k'( JJ(kH]uyD8D. ܺT1"؏$+O[|]KvB}y3*dwÅ7mu7#+f5+[͖5YDZ|tFi؉dA]ј4 ه%hʂT4:57#YB(}537S!q9e<,e ÕKM7HBsvzbt͘2`_CtH][z+aFQ3Wɓ+A\1u,Vt\o96JoyzN{'Oenf ! Mk2M͢MOQ=>(r؏̞R,.~pgBsry[ޓpaze%&BYc~-Xc3Omwbt %hd?aiD2Z2B(3#=F6E6:-_ [AH_1-Խ%QqUm1tAf휲Qa+882CJxo#WB2ul-7t1g8[O<߉3xW ElFc`cM|mNWVGVL|oz˻MD lƛmSN۳%'QZ|11gy;`Y*~NxϘ+Dc\o0ˆ=z4+epb )¼Wؙ0T\!X,6Vң V"WCv{ ΀r7Y^7O"FpAQ@nl/Q?LJW+m$zT#lXüA5B.[oavBSo/g8dLK0ǍZ`c~ 0qE \+GB ޶K;Oǐܓ3}OڣIeb^aU;ac00W R$?-z!&i]gLC <'@Sm{$1;7Vo'baBe7QlKH22g H]n̟܋b 懊,551%=v7`' `}V`63Bʣm L  ?'isV\=baOcP=l&g0cY~N}(BdǼNӰ~o {wos<?>1yq=3x_cֿ Iy 3{kG܏1C+l~mC[u'0Z@0 4*N.oq_E#qۤ )Ag!DnW,x/opSiXLX9xS-n`nr}9ɈF\n3b(lh&]Shc.w 3t9޶>X=1q#{Un]g>#XƥC0f4Fk.{?oҴ˗ʼ@dSX)!D/#R9߈*rO܊Kij9m1gBtYUM02k>vI<"ƅ8mą%*Q&-#Zy|iMVXXXY`p%9 `1!Jx3I5>Xa\q .5C \3K|Ƽ]1v&P i.1-R5L`#VB@/K0#az ֝]nG}v<`c,{BjoK1Ö8ļ3bI0-\[ _~6`ϝ@Yփ3t-|l96u/Q}ayKM1¢VcFH$e_biR 鈭1~r{Hx#&B0Xyc =OQ!$L65=3]^Y BѴ J}Nƅaf=~ˏ͗P'ǩ_.ljXn+l_,'r v documentation" by default. # html_title = 'pairtools v0.0.1' # A shorter title for the navigation bar. Default is the same as html_title. # html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. # html_logo = None # The name of an image file (relative to this directory) to use as a favicon of # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. # html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. # html_extra_path = [] # If not None, a 'Last updated on:' timestamp is inserted at every page # bottom, using the given strftime format. # The empty string is equivalent to '%b %d, %Y'. # html_last_updated_fmt = None # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. # html_use_smartypants = True # Custom sidebar templates, maps document names to template names. # html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. # html_additional_pages = {} # If false, no module index is generated. # html_domain_indices = True # If false, no index is generated. # html_use_index = True # If true, the index is split into individual pages for each letter. # html_split_index = False # If true, links to the reST sources are added to the pages. # html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. # html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. # html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. # html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). # html_file_suffix = None # Language to be used for generating the HTML full-text search index. # Sphinx supports the following languages: # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr', 'zh' # html_search_language = 'en' # A dictionary with options for the search language support, empty by default. # 'ja' uses this config value. # 'zh' user can custom change `jieba` dictionary path. # html_search_options = {'type': 'default'} # The name of a javascript file (relative to the configuration directory) that # implements a search results scorer. If empty, the default will be used. # html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. htmlhelp_basename = "pairtoolsdoc" # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). #'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). #'pointsize': '10pt', # Additional stuff for the LaTeX preamble. #'preamble': '', # Latex figure (float) alignment #'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, "pairtools.tex", "pairtools Documentation", "Open2C", "manual"), ] # The name of an image file (relative to this directory) to place at the top of # the title page. # latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. # latex_use_parts = False # If true, show page references after internal links. # latex_show_pagerefs = False # If true, show URL addresses after external links. # latex_show_urls = False # Documents to append as an appendix to all manuals. # latex_appendices = [] # If false, no module index is generated. # latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [(master_doc, "pairtools", "pairtools Documentation", [author], 1)] # If true, show URL addresses after external links. # man_show_urls = False # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ( master_doc, "pairtools", "pairtools Documentation", author, "pairtools", "One line description of project.", "Miscellaneous", ), ] # Documents to append as an appendix to all manuals. # texinfo_appendices = [] # If false, no module index is generated. # texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. # texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. # texinfo_no_detailmenu = False pairtools-1.1.3/doc/designnotes.rst000066400000000000000000000110711474715105500173620ustar00rootroot00000000000000Design notes ============= Designing scientific software and formats requires making a multitude of tantalizing technical decisions and compromises. Often, the reasons behind a certain decision are non-trivial and convoluted, involving many factors. Here, we collect the notes and observations made during the desing stage of `pairtools` and provide a justification for most non-trivial decisions. We hope that this document will elucidate the design of `pairtools` and may prove useful to developers in their future projects. .pairs format ------------- The motivation behind some of the technical decisions in the pairtools' flavor of .pairs/.pairsam: - `pairtools` can store SAM entries together with the Hi-C pair information in .pairsam files. Storing pairs and alignments in the same row enables easy tagging and filtering of paired-end alignments based on their Hi-C information. - `pairtools` use the exclamation mark "!" instead of '.' as 'chrom' of unmapped reads because it has the lowest lexicographic sorting order among all characters. The use of '0' and '-' in the 'pos' and 'strand' fields of unmapped reads allows us to keep the types of these fields as 'unsigned int' and enum{'+','-'}, respectively. - "rescued" pairs have two types "UR" and "RU" instead of just "RU". We chose this design because rescued pairs are two-sided and thus are flipped based on (chrom, pos), and not based on the side types. With two pair types "RU" and "UR", `pairtools` can keep track of which side of the pair was rescued. - in "rescued" pairs, the type "R" is assigned to the non-chimeric side. This may seem counter-intuitive at first, since it is the chimeric side that gets rescued, but this way `pairtools` can keep track of the type of the 5' alignment on the chimeric side (the alignment on the non-chimeric side has to be unique for the pair to be rescued). - `pairtools` rely on a text format, .pairs, instead of hdf5/parquet-based tables or custom binaries. We went with a text format for a few reasons: - text tables enable easy access to data from any language and any tool. This is especially important at the level of Hi-C pairs, the "rawest" format of information from a Hi-C experiment. - hdf5 and parquet have a few shortcomings that hinder their immediate use in `pairtools`. Specifically, hdf5 cannot compress variable-length strings (which are, in turn, required to store sam alignments and some optional information on pairs) and parquet cannot append columns to existing files, modify datasets in place or store multiple tables in one file (which is required to keep table indices in the same file with pairs). - text tables have a set of well-developed and highly-optimized tools for sorting (Unix sort), compression (bgzip/lz4) and random access (tabix). - text formats enable easy streaming between individual command-line tools. Having said that, text formats have many downsides - they are bulky when not compressed, compression and parsing requires extra computational resources, they cannot be modified in place and random access requires extra tools. In the future, we plan to develop a binary format based on existing container formats, which would mitigate these downsides. CLI --- - many `pairtools` perform multiple actions at once, which contradicts the "do one thing" philosophy of Unix command line. We packed multiple (albeit, related) functions into one tool to improve the performance of `pairtools`. Specifically, given the large size of Hi-C data, a significant fraction of time is spent on compression/decompression, parsing, loading data into memory and sending it over network (for cloud/clusters). Packing multiple functions into one tool cuts down the amount of such time consuming operations. - ``pairtools parse`` requires a .chromsizes file to know the order of chromosomes and perform pair flipping. - `pairtools` use `bgzip `_ compression by default instead of gzip. Using `bgzip` allows us to create an index with `pairix `_ and get random access to data. - `paritools` have an option to compress outputs with `lz4 `_. `Lz4 is much faster and only slighly less efficient than gzip `_. This makes lz4 a better choice for passing data between individual pairtools before producing final result (which, in turn, requires bgzip compression). pairtools-1.1.3/doc/examples/000077500000000000000000000000001474715105500161245ustar00rootroot00000000000000pairtools-1.1.3/doc/examples/benchmark/000077500000000000000000000000001474715105500200565ustar00rootroot00000000000000pairtools-1.1.3/doc/examples/benchmark/Snakefile000066400000000000000000000243371474715105500217130ustar00rootroot00000000000000cores_choices = [1, 2, 4] chromap = expand( "output/result.chromap.{cores}.pairs", cores=cores_choices, ) juicer = expand( "output/result.juicer.{cores}.pairs", cores=cores_choices, ) hicexplorer = expand( "output/result.hicexplorer.{cores}.cool", cores=cores_choices, ) fanc_bwa = expand( "output/result.fanc_bwa.{cores}.pairs", cores=cores_choices, ) fanc_bowtie = expand( "output/result.fanc_bowtie2.{cores}.pairs", cores=cores_choices, ) hicpro = expand( "output/result.hicpro.{cores}.pairs", cores=cores_choices, ) tadbit = expand( "output/result.tadbit.{cores}.reads", cores=cores_choices, ) tadbit_bowtie = expand( "output/result.tadbit_bowtie2.{cores}.reads", cores=cores_choices, ) pairtools = expand( "output/result.pairtools.{cores}.pairs", cores=cores_choices, ) pairtools_bwamem2 = expand( "output/result.pairtools_bwamem2.{cores}.pairs", cores=cores_choices, ) # mapping only: bowtie = expand( "output/result.bowtie.{cores}.sam", cores=cores_choices, ) bwamem = expand( "output/result.bwamem.{cores}.sam", cores=cores_choices, ) bwamem2 = expand( "output/result.bwamem2.{cores}.sam", cores=cores_choices, ) rule all: input: lambda wildcards: tadbit + tadbit_bowtie + bowtie + bwamem2 + pairtools + pairtools_bwamem2 + chromap + hicpro + fanc_bowtie + fanc_bwa + hicexplorer # + bowtie + bwamem + bwamem2 # + juicer # + pairtools + pairtools_bwamem2 + chromap + hicpro + fanc_bowtie + fanc_bwa + hicexplorer # hicexplorer # heavy because it creates coolers # juicer # run separately with the number of cores equal to tested, b/c multiplw juicers cannot be run with the same path rule test: input: fastq1="data/SRR6107789_1.fastq.gz", fastq2="data/SRR6107789_2.fastq.gz", genomefile="data/hg38/hg38.fa", chromsizes="data/hg38/hg38.fa.sizes", genome_index_bwa="data/hg38/index/bwa/hg38.fa", genome_index_chromap="data/hg38/index/chromap/hg38", genome_index_bwamem2="data/hg38/index/bwa-mem2/hg38", genome_index_bowtie2="data/hg38/index/bowtie2/hg38", genome_index_gem="data/hg38/index/gem/hg38.gem", genome_rsites="data/hg38/hg38.DpnII.bed", threads: lambda wildcards: int(wildcards.cores), output: file="output/result.{mode}.{cores}.{format}", benchmark: repeat( "benchmarks/result.{mode}.{cores}.{format}.benchmark", 5, ) run: if wildcards.mode == "pairtools_bwamem2": shell(""" soft/bwa-mem2/bwa-mem2 mem -t {wildcards.cores} -SP {input.genome_index_bwamem2} {input.fastq1} {input.fastq2} | \ soft/pairtools1.0.2/bin/pairtools parse --nproc-in {wildcards.cores} --nproc-out {wildcards.cores} --drop-sam --drop-seq -c {input.chromsizes} | \ soft/pairtools1.0.2/bin/pairtools sort --nproc {wildcards.cores} | \ soft/pairtools1.0.2/bin/pairtools dedup -p {wildcards.cores} --chunksize 1000000 \ -o {output.file} """) elif wildcards.mode == "pairtools": shell(""" soft/pairtools1.0.2/bin/bwa mem -t {wildcards.cores} -SP {input.genome_index_bwa} {input.fastq1} {input.fastq2} | \ soft/pairtools1.0.2/bin/pairtools parse --nproc-in {wildcards.cores} --nproc-out {wildcards.cores} --drop-sam --drop-seq -c {input.chromsizes} | \ soft/pairtools1.0.2/bin/pairtools sort --nproc {wildcards.cores} | \ soft/pairtools1.0.2/bin/pairtools dedup -p {wildcards.cores} --chunksize 1000000 \ -o {output.file} """) elif wildcards.mode == "chromap": shell(""" soft/chromap/bin/chromap --preset hic \ -t {wildcards.cores} -x {input.genome_index_chromap} -r {input.genomefile} \ -1 {input.fastq1} -2 {input.fastq2} -o {output.file} """) elif wildcards.mode == "fanc_bwa": shell(""" TMP_FILE1=$(mktemp -u output/tmp.XXXXXXXX.bam) TMP_FILE2=$(mktemp -u output/tmp.XXXXXXXX.bam) soft/fanc/bin/fanc map -t {wildcards.cores} {input.fastq1} {input.genome_index_bwa} $TMP_FILE1 samtools sort -n -@ {wildcards.cores} $TMP_FILE1 -o $TMP_FILE1.sorted.bam soft/fanc/bin/fanc map -t {wildcards.cores} {input.fastq2} {input.genome_index_bwa} $TMP_FILE2 samtools sort -n -@ {wildcards.cores} $TMP_FILE2 -o $TMP_FILE2.sorted.bam soft/fanc/bin/fanc pairs -f -g {input.genome_rsites} $TMP_FILE1.sorted.bam $TMP_FILE2.sorted.bam {output.file} rm $TMP_FILE1 $TMP_FILE2 $TMP_FILE1.sorted.bam $TMP_FILE2.sorted.bam """) elif wildcards.mode == "fanc_bowtie2": shell(""" TMP_FILE1=$(mktemp -u output/tmp.XXXXXXXX.bam) TMP_FILE2=$(mktemp -u output/tmp.XXXXXXXX.bam) soft/fanc/bin/fanc map -t {wildcards.cores} {input.fastq1} {input.genome_index_bowtie2} $TMP_FILE1 samtools sort -n -@ {wildcards.cores} $TMP_FILE1 -o $TMP_FILE1.sorted.bam soft/fanc/bin/fanc map -t {wildcards.cores} {input.fastq2} {input.genome_index_bowtie2} $TMP_FILE2 samtools sort -n -@ {wildcards.cores} $TMP_FILE2 -o $TMP_FILE2.sorted.bam soft/fanc/bin/fanc pairs -f -g {input.genome_rsites} $TMP_FILE1.sorted.bam $TMP_FILE2.sorted.bam {output.file} rm $TMP_FILE1 $TMP_FILE2 $TMP_FILE1.sorted.bam $TMP_FILE2.sorted.bam """) elif wildcards.mode == "hicpro": shell(""" cd soft/HiC-Pro_env/HiC-Pro/ mkdir -p output TMP_DIR=$(mktemp -d -u output/tmp.XXXXXXXX) TMP_CONFIG=$(mktemp -u output/tmp.XXXXXXXX.config) cp config-hicpro.txt $TMP_CONFIG sed -i 's/N_CPU = 4/N_CPU = {wildcards.cores}/' $TMP_CONFIG bin/HiC-Pro -i rawdata/ -o $TMP_DIR -c $TMP_CONFIG # Cleanup: cp $TMP_DIR/hic_results/data/sample1/sample1.allValidPairs ../../../{output.file} rm -r $TMP_DIR; rm $TMP_CONFIG """) elif wildcards.mode == "juicer": # Note that this process is not guaranteed to work well in parallel mode; # recommended to run separately shell(""" soft/juicer-1.6/CPU/juicer.sh -g hg38 -d data/4juicer/ -s DpnII -S early \ -p {input.chromsizes} -y {input.genome_rsites} -z {input.genome_index_bwa} -t {wildcards.cores} -D soft/juicer-1.6/CPU # Cleanup: mv data/4juicer/aligned/merged_nodups.txt {output.file} rm -rf data/4juicer/aligned; rm -rf data/4juicer/splits/[^S]* """) elif wildcards.mode == "hicexplorer": shell(""" TMP_DIR=$(mktemp -d -u output/tmp.XXXXXXXX) soft/hicexplorer/bin/hicBuildMatrix --samFiles \ <(bwa mem -A1 -B4 -E50 -L0 {input.genome_index_bwa} -t {wildcards.cores} {input.fastq1} | samtools view -@ {wildcards.cores} -Shb -) \ <(bwa mem -A1 -B4 -E50 -L0 {input.genome_index_bwa} -t {wildcards.cores} {input.fastq2} | samtools view -@ {wildcards.cores} -Shb -) \ --restrictionSequence GATC \ --danglingSequence GATC \ --restrictionCutFile {input.genome_rsites} \ --threads {wildcards.cores} \ --inputBufferSize 1000000 \ --QCfolder $TMP_DIR \ -o {output.file} # Cleanup: rm -r $TMP_DIR """) elif wildcards.mode == "tadbit": shell(""" TMP_DIR=$(mktemp -d -u tadbit_output/tmp.XXXXXXXX) soft/tadbit/bin/tadbit map $TMP_DIR -C {wildcards.cores} --mapper_binary soft/tadbit/bin/gem-mapper --fastq {input.fastq1} --read 1 --index {input.genome_index_gem} --renz DpnII || true soft/tadbit/bin/tadbit map $TMP_DIR -C {wildcards.cores} --mapper_binary soft/tadbit/bin/gem-mapper --fastq {input.fastq2} --read 2 --index {input.genome_index_gem} --renz DpnII || true soft/tadbit/bin/tadbit parse $TMP_DIR --genome data/hg38/hg38.fa || true soft/tadbit/bin/tadbit filter $TMP_DIR -C {wildcards.cores} --format mid || true mv $TMP_DIR/03_filtered_reads/valid_r1-r2_intersection_*.tsv {output.file} rm -r $TMP_DIR """) elif wildcards.mode == "tadbit_bowtie2": shell(""" TMP_DIR=$(mktemp -d -u tadbit_output/tmp.XXXXXXXX) soft/tadbit/bin/tadbit map $TMP_DIR -C {wildcards.cores} --mapper bowtie2 --mapper_binary soft/tadbit/bin/bowtie2 --fastq {input.fastq1} --read 1 --index {input.genome_index_bowtie2} --renz DpnII || true soft/tadbit/bin/tadbit map $TMP_DIR -C {wildcards.cores} --mapper bowtie2 --mapper_binary soft/tadbit/bin/bowtie2 --fastq {input.fastq2} --read 2 --index {input.genome_index_bowtie2} --renz DpnII || true soft/tadbit/bin/tadbit parse $TMP_DIR --genome data/hg38/hg38.fa || true soft/tadbit/bin/tadbit filter $TMP_DIR -C {wildcards.cores} --format mid || true mv $TMP_DIR/03_filtered_reads/valid_r1-r2_intersection_*.tsv {output.file} rm -r $TMP_DIR """) elif wildcards.mode == "bowtie": shell(""" soft/tadbit/bin/bowtie2 -p 4 -x {input.genome_index_bowtie2} -1 {input.fastq1} -2 {input.fastq2} -S {output.file} """) elif wildcards.mode == "bwamem": shell(""" soft/pairtools0.3.0/bin/bwa mem -t 4 -SP {input.genome_index_bwa} {input.fastq1} {input.fastq2} > {output.file} """) elif wildcards.mode == "bwamem2": shell(""" soft/bwa-mem2/bwa-mem2 mem -t 4 -SP {input.genome_index_bwamem2} {input.fastq1} {input.fastq2} > {output.file} """) pairtools-1.1.3/doc/examples/benchmark/benchmark.ipynb000066400000000000000000021104541474715105500230620ustar00rootroot00000000000000{ "cells": [ { "cell_type": "markdown", "id": "864d317a-4960-4315-846d-ba2f36014614", "metadata": { "tags": [] }, "source": [ "# Pairtools benchmarking\n", "\n", "Welcome to pairtools benchmarking. These are the instructions on how to test performance of different software for mapping Hi-C and Hi-C-like methods.\n", "Mapping usually results in the file with mapped pairs, which is then converted into binned matrix format. Pairs format is the \"rawest\" interpretable type of data after reads.\n", "\n", "Reviewing the literature suggests that there are at least 6 methods to map Hi-C and Hi-C-like data. These include:\n", "\n", "- **pairtools** is a lightweight Python CLI that extracts and manipulates Hi-C contacts post-alignment. Aslignment can be done by:\n", " - bwa mem\n", " - bwa-mem2, ahn optimized version of bwa mem, which [x2-2.5 improves speed over bwa](https://github.com/bwa-mem2/bwa-mem2)\n", "\n", "- **chromap** is a [fast alignment tool for chromatin profiles](https://www.nature.com/articles/s41467-021-26865-w), not specialized for Hi-C but [parameterized for a broad range of sequencing data including Hi-C short reads](https://github.com/haowenz/chromap#map-hi-c-short-reads). \n", "\n", " Does not require separate step of mapping.\n", "\n", "- **HiC-Pro** is a [pipeline for Hi-C and DNase-C mapping](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-015-0831-x), \"optimized and flexible\".\n", "\n", " It calls mapping within. By default, creates the output cooler files with binned data, but the script can be tinkered in order to stop the processing at the step of pairs. \n", "\n", "- **Juicer** is a [platform for analysis of Hi-C data](https://github.com/aidenlab/juicer), which is already adapted to a wide range of cluster types.\n", "\n", " It calls mapping within. Has an option to stop the data processing at the step of pairs, without further construction of binned matrices. \n", "\n", "- **HiCExplorer** is a [broad-range set of tools for processing, normalization, analysis and visualization Hi-C and Hi-C-like methods](https://doi.org/10.1038/s41467-017-02525-w). \n", "\n", " It [builds Hi-C binned matrix post-alignment with bwa mem](https://hicexplorer.readthedocs.io/en/latest/content/tools/hicBuildMatrix.html#hicbuildmatrix). \n", "\n", "- **FAN-C** is a [set of CLI tools that runs the mapping (bowtie or bwa mem), extracts and manipulates Hi-C contacts](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-020-02215-9). It also has the [tools for data visualization and downstream analysis](https://github.com/vaquerizaslab/fanc).\n", "\n", "- **TADbit** is [multi-task Python API](https://3dgenomes.github.io/TADbit/index.html) that handles all the steps from the alignment of paired-end reads to the detection of Topologically Associating Domain (TAD) borders, compartments and three-dimensional modeling of chromatin based on interaction matrices.\n", "\n", "\n", "*We benchmark these programs on one million of representative reads.*\n", "These reads are taken from random replicate from Rao SSP et al., [\"Cohesin Loss Eliminates All Loop Domains.\"](https://pubmed.ncbi.nlm.nih.gov/28985562/), Cell, 2017 Oct 5;171(2):305-320.e24\n", "
\n", "Generally, it is useful to assess how much computational time you need per million of reads.\n", "
\n", "As long as you have this assessment, you may multiply the size of your experiment by the whole library size (in mlns of reads), because we expect linear growth of computational complexity of reads mapping with library size.\n", "\n", "\n", "The benchmarking consists of four general steps. If you want to reproduce it, you need to run steps 1 and 2 manually in order to create the working environment, and then use snakemake script to run the benchmarks. \n", "
\n", "You may use the commands form the \"3. Run\" section to get an understanding how each indiviaul framework works and what parameters can be changed. \n", "
\n", "Note that you need separate run of juicer with single value of --ncores, because it does not support parallel launches (because it writes to the default output).\n", "
\n", "Finally, there is a visualization section with a display of all the results that we calcualted on our machines. \n", "\n", "1. [Install software](#1.-Install-software)\n", "\n", "2. [Download data and genome](#2.-Download-data-and-genome). \n", "\n", "3. [Run](#3.-Run)\n", "\n", "4. [Visualize benchmarks](#4.-Visualize-benchmarks)\n" ] }, { "cell_type": "markdown", "id": "8ae7b1ea-f64b-4740-8694-2fdb1d7353c4", "metadata": {}, "source": [ "## 1. Install software\n", "\n", "We will use separate conda environments to install different utilities. Each utility will have its own environment and peth to the binaries." ] }, { "cell_type": "code", "execution_count": null, "id": "0f98ab45-3759-4260-ab9f-79e487410d5f", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "mkdir ./soft" ] }, { "cell_type": "markdown", "id": "b9dc2f27-868f-4bfd-bd9f-d88d18d6655f", "metadata": {}, "source": [ "### pairtools" ] }, { "cell_type": "markdown", "id": "cd56a6ab-3836-445c-ab70-73eaa4e80da8", "metadata": {}, "source": [ "#### pairtools v1.0.2" ] }, { "cell_type": "code", "execution_count": null, "id": "f3e0038a-f034-4c40-8e5d-d50f2351679f", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "conda create -y --prefix soft/pairtools1.0.2 python=3.9 pip\n", "conda activate soft/pairtools1.0.2\n", "pip install cython numpy pysam\n", "pip install git+https://github.com/open2c/pairtools.git@v1.0.2\n", "\n", "conda install -c conda-forge lz4-c # conda install -c anaconda lz4\n", "\n", "conda install -y -c bioconda \"bwa>=0.7.17\"" ] }, { "cell_type": "markdown", "id": "a7548c59-7cd2-40f8-85da-7a6b2ede143d", "metadata": {}, "source": [ "#### bwa-mem2" ] }, { "cell_type": "code", "execution_count": null, "id": "695cdebb-7a4b-4ca9-b2a5-f0a178874b77", "metadata": {}, "outputs": [], "source": [ "%%bash \n", "conda activate soft/pairtools1.0.2\n", "\n", "# bwa-mem2: compile from source (not recommended for general users)\n", "\n", "# Get the source\n", "git clone --recursive https://github.com/bwa-mem2/bwa-mem2 soft/bwa-mem2\n", "cd soft/bwa-mem2\n", "\n", "# Compile\n", "make\n", "\n", "# Exit compilation folder\n", "cd ../../" ] }, { "cell_type": "markdown", "id": "0bc9befa-e4cc-4cf3-84d5-fbae94a2e6fb", "metadata": {}, "source": [ "### chromap" ] }, { "cell_type": "code", "execution_count": null, "id": "2f02a8e1-998e-4383-bc8a-d9d493b425ef", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "conda create -y --prefix soft/chromap\n", "conda activate soft/chromap\n", "conda install -y -c bioconda -c conda-forge chromap" ] }, { "cell_type": "markdown", "id": "37f50ca4-74a2-44a3-8038-83a4d7b43c85", "metadata": {}, "source": [ "### HiC-Pro\n", "\n", "[HiC-Pro](https://github.com/nservant/HiC-Pro) is a popular software for Hi-C mapping, its now part of nf-core Hi-C pipeline, supports both fragment-based analysis of Hi-C and fragement-free analysis of DNase-based Hi-C." ] }, { "cell_type": "code", "execution_count": null, "id": "45c9697c-5f49-4a53-bbf0-18535f05e465", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "git clone https://github.com/nservant/HiC-Pro.git soft/HiC-Pro_env/HiC-Pro\n", "conda env create -f soft/HiC-Pro_env/HiC-Pro/environment.yml -p soft/HiC-Pro_env\n", "### Working environment will be soft/HiC-Pro_env\n", "\n", "conda activate soft/HiC-Pro_env\n", "\n", "# Install dependencies\n", "conda install -y -c bioconda bowtie2 samtools pysam numpy scipy bx-python\n", "conda install -y -c r r r-rcolorbrewer r-ggplot2\n", "\n", "# Copy prepared config:\n", "cp configs/config-hicpro_install.txt soft/HiC-Pro_env/HiC-Pro/config-install.txt\n", "cp configs/config-hicpro.txt soft/HiC-Pro_env/HiC-Pro/config-hicpro.txt\n", "\n", "# Configure and install:\n", "cd soft/HiC-Pro_env/HiC-Pro\n", "make configure\n", "make install\n", "\n", "cd ../../../\n", "\n", "# Patch the code to retain only data processing steps with no creating of maps:\n", "sed -i \"s/all : init mapping proc_hic merge_persample hic_qc build_raw_maps ice_norm/all : init mapping proc_hic merge_persample #hic_qc build_raw_maps ice_norm/\" soft/HiC-Pro_env/HiC-Pro/scripts/Makefile\n" ] }, { "cell_type": "code", "execution_count": null, "id": "18e29459-334a-458f-8244-ede873b25258", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "# Note that the configs should be adjusted for your system:\n", "cp configs/config-hicpro_install.txt soft/HiC-Pro_env/HiC-Pro/config-install.txt\n", "cp configs/config-hicpro.txt soft/HiC-Pro_env/HiC-Pro/config-hicpro.txt" ] }, { "cell_type": "markdown", "id": "d00d4aed-94b4-4de6-83b2-9950c9d7b949", "metadata": {}, "source": [ "### FAN-C" ] }, { "cell_type": "code", "execution_count": null, "id": "0ebd9a60-f0d0-4f0b-9a64-d2ea386a15f9", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "conda create -y --prefix soft/fanc python=3.9 pip hdf5\n", "conda activate soft/fanc\n", "pip install fanc\n", "conda install -y -c bioconda samtools" ] }, { "cell_type": "markdown", "id": "a2b58a8e-b828-47c7-87f2-86337657f5e4", "metadata": {}, "source": [ "### Juicer" ] }, { "cell_type": "code", "execution_count": null, "id": "c2611844-0e32-465f-befa-a8e296bf54d2", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "conda create -y --prefix soft/juicer\n", "conda activate soft/juicer\n", "\n", "conda install -y -c bioconda bwa java-jdk\n", "conda install -y -c conda-forge coreutils\n", "\n", "# Download the recommended stable version:\n", "wget https://github.com/aidenlab/juicer/archive/refs/tags/1.6.zip\n", "unzip 1.6.zip\n", "rm 1.6.zip\n", "mv juicer-1.6 soft/juicer-1.6\n", "\n", "# Download compile jar files of the stable version:\n", "wget http://hicfiles.tc4ga.com.s3.amazonaws.com/public/juicer/juicer_tools.1.6.2_jcuda.0.7.5.jar\n", "mv juicer_tools.1.6.2_jcuda.0.7.5.jar soft/juicer-1.6/CPU/scripts/common/juicer_tools.jar\n", "\n", "# Copy the scripts to some accessible location:\n", "mkdir -p soft/juicer-1.6/CPU/scripts/\n", "cp -r soft/juicer-1.6/CPU/[^s]* soft/juicer-1.6/CPU/scripts/" ] }, { "cell_type": "markdown", "id": "3e02b40b-1f5c-4bf8-89fc-36af2f485c55", "metadata": {}, "source": [ "### HiCExplorer" ] }, { "cell_type": "code", "execution_count": null, "id": "8d5e4ffd-7908-44eb-9d22-22cb24170207", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "conda create -y --prefix soft/hicexplorer python=3.9\n", "conda activate soft/hicexplorer\n", "conda install -y -c bioconda hicexplorer bwa" ] }, { "cell_type": "markdown", "id": "380efd65-263d-4b24-9921-8d0be8013c7d", "metadata": {}, "source": [ "### TADbit" ] }, { "cell_type": "code", "execution_count": null, "id": "040231d7-c0c4-425f-85a4-da0cfc62eec1", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "conda create -y --prefix soft/tadbit\n", "conda activate soft/tadbit\n", "\n", "# # Install mappers:\n", "conda install -y -q -c bioconda gem3-mapper bowtie2\n", "\n", "# install tadbit\n", "conda install -y -q -c bioconda tadbit" ] }, { "cell_type": "markdown", "id": "e325db7c-93d8-4e48-9ba6-8867956398cd", "metadata": {}, "source": [ "## 2. Download data and genome" ] }, { "cell_type": "code", "execution_count": null, "id": "aaea1786-7c9b-425c-9aac-de8ac709688c", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "mkdir data" ] }, { "cell_type": "markdown", "id": "d4372383-a702-44f5-89e7-66746700f765", "metadata": {}, "source": [ "### Download raw data\n", "\n", "Test data from Rao et al. 2017, 1 mln pairs: " ] }, { "cell_type": "code", "execution_count": null, "id": "05b7140b-454d-4bd2-842f-a0d042701a4e", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "fastq-dump -O data --gzip --split-files SRR6107789 --minSpotId 0 --maxSpotId 1000000" ] }, { "cell_type": "code", "execution_count": null, "id": "2d2952e1-528a-41dc-8efc-be866e958c89", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "# Put the data in accessible folder for juicer: \n", "mkdir -p data/4juicer/fastq/\n", "mkdir -p data/4juicer/splits/\n", "cp data/SRR6107789_1.fastq.gz data/4juicer/fastq/SRR6107789_R1.fastq.gz\n", "cp data/SRR6107789_2.fastq.gz data/4juicer/fastq/SRR6107789_R2.fastq.gz\n", "cp data/4juicer/fastq/* data/4juicer/splits/" ] }, { "cell_type": "code", "execution_count": null, "id": "083d9534-ce41-45b1-98f4-2007c64fb5f3", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "# Put the data in accessible folder for HiC-Pro:\n", "mkdir -p soft/HiC-Pro_env/HiC-Pro/rawdata/sample1\n", "cp data/S*fastq.gz soft/HiC-Pro_env/HiC-Pro/rawdata/sample1/" ] }, { "cell_type": "markdown", "id": "a4683297-4109-4786-8faa-26089fa8d3e4", "metadata": {}, "source": [ "### Install genome" ] }, { "cell_type": "markdown", "id": "1c29a2d6-cdf4-4552-b856-9316b8e332d4", "metadata": {}, "source": [ "#### Genomepy installation\n", "will install fasta, bwa and bowtie2 indexes:" ] }, { "cell_type": "code", "execution_count": null, "id": "3bf9ab65-da9c-41f0-adc6-f7f9d268e55e", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "# Activate bwa plugin for genomepy:\n", "! genomepy plugin enable bwa bowtie2" ] }, { "cell_type": "code", "execution_count": null, "id": "2b7dc978-408d-4b32-8a32-119645b24c9f", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "# Install hg38 genome by genomepy:\n", "! genomepy install hg38 -g data/" ] }, { "cell_type": "code", "execution_count": null, "id": "3fe79dca-fc66-4b81-904f-bfedc7cfd5b1", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "# Restrict the genome:\n", "! cooler digest data/hg38/hg38.fa.sizes data/hg38/hg38.fa DpnII --rel-ids 1 -o data/hg38/hg38.DpnII.bed" ] }, { "cell_type": "markdown", "id": "8db4bf50-7e32-4b01-bb2c-a2f1c02565f7", "metadata": { "tags": [] }, "source": [ "#### Build genome index: bwa-mem2" ] }, { "cell_type": "code", "execution_count": null, "id": "e2691bce-a469-495c-aa3e-2abb6105b1f4", "metadata": {}, "outputs": [], "source": [ "%%bash \n", "mkdir data/hg38/index/bwa-mem2/\n", "soft/bwa-mem2/bwa-mem2 index -p data/hg38/index/bwa-mem2/hg38 data/hg38/hg38.fa" ] }, { "cell_type": "markdown", "id": "3274559c-b130-4d40-93f1-59efc3abb1ed", "metadata": { "tags": [] }, "source": [ "#### Build genome index: chromap" ] }, { "cell_type": "code", "execution_count": null, "id": "993b7093-d896-4726-bfd5-77c86bb5d302", "metadata": {}, "outputs": [], "source": [ "%%bash \n", "mkdir data/hg38/index/chromap\n", "chromap -i -r data/hg38/hg38.fa -o data/hg38/index/chromap/hg38" ] }, { "cell_type": "markdown", "id": "aecf762a-f9e4-4b87-863a-ca92ae88bb2e", "metadata": { "tags": [] }, "source": [ "#### Build genome index: GEM" ] }, { "cell_type": "code", "execution_count": null, "id": "3ccdc3c1-f762-4cac-9821-4479d44c2f78", "metadata": {}, "outputs": [], "source": [ "%%bash \n", "mkdir data/hg38/index/gem\n", "gem-indexer -T 8 -i data/hg38/hg38.fa -o data/hg38/index/gem/hg38" ] }, { "cell_type": "markdown", "id": "5a9bd0a9-9dc0-4942-bb1b-cf7b77363bb6", "metadata": {}, "source": [ "## 3. Run\n", "\n", "The banchmarking is usually cumbersome, but it can be simplified by snakemake. We provide a Snakemake pipeline that will allow you to benchmark different approaches.\n", "\n", "The output of snakemake will consist of resulting Hi-C pairs/maps in `output` folder and benchmarking files in `benchmarks` folder. \n", "The file names have the information on parameters in their names:\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "id": "ad48c488-05f4-4b2d-a18d-2b399e8b03b0", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "# Running \n", "snakemake --cores 10" ] }, { "cell_type": "code", "execution_count": null, "id": "9b086bae-ef42-41bb-9254-42af10c9ab1b", "metadata": { "tags": [] }, "outputs": [], "source": [ "%%bash\n", "\n", "# Cleanup (only if you want to erase all the output)\n", "rm output/*; rm benchmarks/*" ] }, { "cell_type": "markdown", "id": "e46dffea-87ac-4157-8938-ae032d50a591", "metadata": {}, "source": [ "## Manual run\n", "\n", "You may also run them to test individual steps of the pipeline." ] }, { "cell_type": "markdown", "id": "6dcbff7b-8caf-4512-9c44-375eac698730", "metadata": {}, "source": [ "### pairtools" ] }, { "cell_type": "code", "execution_count": null, "id": "7da2496b-fd21-4383-a3df-ba9fadb9e505", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "soft/bwa-mem2/bwa-mem2 mem -t 5 -SP data/hg38/index/bwa-mem2/hg38 data/SRR6107789_1.fastq.gz data/SRR6107789_2.fastq.gz | \\\n", " soft/pairtools1.0.2/bin/pairtools parse --nproc-in 5 --nproc-out 5 --drop-sam --drop-seq -c data/hg38/hg38.fa.sizes | \\\n", " soft/pairtools1.0.2/bin/pairtools sort --nproc 5 | \\\n", " soft/pairtools1.0.2/bin/pairtools dedup -p 5 --backend cython \\\n", " -o output/result.pairtools.pairs" ] }, { "cell_type": "markdown", "id": "b0c9a7e3-8e08-42bf-9748-cd94eff6731a", "metadata": {}, "source": [ "### chromap" ] }, { "cell_type": "code", "execution_count": null, "id": "e3012cdb-be1c-46ef-bb7a-20eff2d34fba", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "soft/chromap/bin/chromap --preset hic --low-mem \\\n", " -t 5 -x data/hg38/index/chromap/hg38 -r data/hg38/hg38.fa \\\n", " -1 data/SRR6107789_1.fastq.gz -2 data/SRR6107789_2.fastq.gz -o output/result.chromap.pairs" ] }, { "cell_type": "markdown", "id": "32e60c83-1fab-4fcb-ba0b-8c1258e457c6", "metadata": {}, "source": [ "### HiC-Pro" ] }, { "cell_type": "code", "execution_count": null, "id": "5931f3a6-82f1-4fd8-b65c-9ed648b5f986", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "cd soft/HiC-Pro_env/HiC-Pro\n", "bin/HiC-Pro -i rawdata/ -o output -c config-hicpro.txt\n", "\n", "cd ../../../" ] }, { "cell_type": "markdown", "id": "43171a68-8928-418c-9779-268a5d4923d3", "metadata": {}, "source": [ "### FAN-C\n", "Based on [CLI tutorial](https://fan-c.readthedocs.io/en/latest/fanc-executable/fanc-generate-hic/fanc_modular_steps.html):" ] }, { "cell_type": "code", "execution_count": null, "id": "802e66a8-4c93-45d6-a735-4a68a1d9184a", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "fanc map -t 5 data/SRR6107789_1.fastq.gz data/hg38/index/bwa/hg38.fa output/fanc-output_1.bam\n", "fanc map -t 5 data/SRR6107789_2.fastq.gz data/hg38/index/bwa/hg38.fa output/fanc-output_2.bam\n", "samtools sort -@ 5 -n output/fanc-output_1.bam -o output/fanc-output_1.sorted.bam\n", "samtools sort -@ 5 -n output/fanc-output_2.bam -o output/fanc-output_2.sorted.bam\n", "fanc pairs output/fanc-output_1.sorted.bam output/fanc-output_2.sorted.bam output/fanc-output.pairs -g data/hg38/hg38.DpnII.bed" ] }, { "cell_type": "markdown", "id": "46f11121-bff6-4f92-8d80-aa86b01ffcc0", "metadata": {}, "source": [ "### Juicer" ] }, { "cell_type": "code", "execution_count": null, "id": "a1a5571c-b1da-4125-b915-34884be7299e", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "soft/juicer-1.6/CPU/juicer.sh -g hg38 -d data/4juicer/ -s DpnII -S early -p data/hg38/hg38.fa.sizes -y data/hg38/hg38.DpnII.bed -z data/hg38/index/bwa/hg38.fa -t 5 -D soft/juicer-1.6/CPU\n" ] }, { "cell_type": "markdown", "id": "871ac7b7-0180-4103-a8b3-bd49b7269d83", "metadata": {}, "source": [ "### HiCExplorer\n", "Based on the example: https://hicexplorer.readthedocs.io/en/latest/content/example_usage.html\n", "\n", "Note that it does not procude the pairs, but binned coolers." ] }, { "cell_type": "code", "execution_count": null, "id": "5f14967d-ed63-4d20-a006-bc038d1f1f6f", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "hicBuildMatrix --samFiles \\\n", " <(bwa mem -t 4 -A1 -B4 -E50 -L0 data/hg38/index/bwa/hg38.fa data/SRR6107789_1.fastq.gz | samtools view -Shb -) \\\n", " <(bwa mem -t 4 -A1 -B4 -E50 -L0 data/hg38/index/bwa/hg38.fa data/SRR6107789_2.fastq.gz | samtools view -Shb -) \\\n", " --restrictionSequence GATC \\\n", " --danglingSequence GATC \\\n", " --restrictionCutFile data/hg38/hg38.DpnII.bed \\\n", " --threads 4 \\\n", " --inputBufferSize 100000 \\\n", " --QCfolder hicexplorer_tmp \\\n", " -o hicexplorer_output.cool" ] }, { "cell_type": "markdown", "id": "3a856970-2770-4d52-b170-2308b9864ea3", "metadata": {}, "source": [ "### TADbit" ] }, { "cell_type": "code", "execution_count": null, "id": "463b8edd-9b9b-405a-ba21-6b76429ece41", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "tadbit map tadbit_output --fastq data/SRR6107789_1.fastq.gz --read 1 --index data/hg38/index/gem/hg38.gem --renz DpnII \n", "tadbit map tadbit_output --fastq data/SRR6107789_2.fastq.gz --read 2 --index data/hg38/index/gem/hg38.gem --renz DpnII \n", "\n", "tadbit parse tadbit_output --genome data/hg38/hg38.fa \n", "\n", "tadbit filter tadbit_output --format short" ] }, { "cell_type": "markdown", "id": "f2b8fa6a-d282-4ae5-9154-4578be8418dd", "metadata": {}, "source": [ "### Read mapping only" ] }, { "cell_type": "code", "execution_count": null, "id": "6b48d56b-122d-4827-a384-0cfeb797e081", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "\n", "# bwa mem\n", "soft/pairtools0.3.0/bin/bwa mem -t 4 -SP data/hg38/index/bwa/hg38.fa data/SRR6107789_1.fastq.gz data/SRR6107789_2.fastq.gz > bwa-mem.sam\n", "\n", "# bwa mem2\n", "soft/bwa-mem2/bwa-mem2 mem -t 4 -SP data/hg38/index/bwa-mem2/hg38 data/SRR6107789_1.fastq.gz data/SRR6107789_2.fastq.gz > bwa-mem2.sam\n", "\n", "# bowtie2 only\n", "soft/tadbit/bin/bowtie2 -p 4 -x data/hg38/index/bowtie2/hg38 -1 data/SRR6107789_1.fastq.gz -2 data/SRR6107789_2.fastq.gz -S bowtie2.sam\n" ] }, { "cell_type": "markdown", "id": "9b3b93e5-47b1-408f-a4d5-32a85060fd8a", "metadata": {}, "source": [ "## 4. Visualize benchmarks" ] }, { "cell_type": "code", "execution_count": 2, "id": "8eb57b57-db42-420a-a2e7-631fda0676e4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "36 CPUs at 1211 GHz\n" ] } ], "source": [ "# Check the CPU properties:\n", "import psutil\n", "print(f\"{psutil.cpu_count()} CPUs at {psutil.cpu_freq().current:.0f} GHz\") " ] }, { "cell_type": "code", "execution_count": 3, "id": "5a59f6bc-be2d-442b-b4ac-07237f38c38b", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import numpy as np\n", "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "\n", "mpl.rcParams['font.family'] = \"sans-serif\"\n", "figsize_A4 = np.array([11.69, 8.27])\n", "plt.rcParams[\"figure.figsize\"] = figsize_A4.T\n", "plt.rcParams['figure.facecolor']='white'\n", "plt.rcParams['font.size']=16\n", "\n", "import glob" ] }, { "cell_type": "code", "execution_count": 4, "id": "986fae72-ac93-4bff-9749-3b3a70057e17", "metadata": {}, "outputs": [], "source": [ "## If you start from .csv. file: \n", "# df = pd.read_csv('benchmarking_1mln.csv')" ] }, { "cell_type": "code", "execution_count": 13, "id": "dd9e829a-f25e-4c66-b22d-01e008143396", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "39\n" ] } ], "source": [ "# If you start from your own benchmarks:\n", "files = glob.glob(\"benchmarks/*\") #+ glob.glob(\"benchmarks_v1_2022/*\")# + \n", "print(len(files))" ] }, { "cell_type": "code", "execution_count": 14, "id": "08707677-e087-44ca-8e8a-9d74ef4482a4", "metadata": {}, "outputs": [], "source": [ "def get_params(filename):\n", " split = filename.split('.')\n", " util= split[1]\n", " ncores = int(split[2])\n", " \n", " return util, ncores\n", "\n", "timings = []\n", "for f in files:\n", " t = pd.read_table(f)\n", " t[['util', 'ncores']] = get_params(f)\n", " timings.append(t)\n", "timings = pd.concat(timings)" ] }, { "cell_type": "code", "execution_count": 15, "id": "d43f8549-4765-441c-b94c-eb76a950ca4d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
sh:m:smax_rssmax_vmsmax_ussmax_pssio_inio_outmean_loadcpu_timeutilncores
0482.10290:08:0217068.9720572.7316931.9116951.7116275.210.02102.18493.82pairtools_bwamem21
1482.52610:08:0217079.4620508.7316941.4616956.3432534.1139.25101.73498.88pairtools_bwamem21
2488.99970:08:0817055.0420508.5916920.9616939.6942104.6178.47100.17502.58pairtools_bwamem21
3484.34600:08:0416981.5920380.6016961.3016962.3745493.36117.7093.0650.54pairtools_bwamem21
4483.31590:08:0316969.0220595.5116944.7516945.7961922.79156.9399.61507.64pairtools_bwamem21
\n", "
" ], "text/plain": [ " s h:m:s max_rss max_vms max_uss max_pss io_in \\\n", "0 482.1029 0:08:02 17068.97 20572.73 16931.91 16951.71 16275.21 \n", "1 482.5261 0:08:02 17079.46 20508.73 16941.46 16956.34 32534.11 \n", "2 488.9997 0:08:08 17055.04 20508.59 16920.96 16939.69 42104.61 \n", "3 484.3460 0:08:04 16981.59 20380.60 16961.30 16962.37 45493.36 \n", "4 483.3159 0:08:03 16969.02 20595.51 16944.75 16945.79 61922.79 \n", "\n", " io_out mean_load cpu_time util ncores \n", "0 0.02 102.18 493.82 pairtools_bwamem2 1 \n", "1 39.25 101.73 498.88 pairtools_bwamem2 1 \n", "2 78.47 100.17 502.58 pairtools_bwamem2 1 \n", "3 117.70 93.06 50.54 pairtools_bwamem2 1 \n", "4 156.93 99.61 507.64 pairtools_bwamem2 1 " ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "timings.head()" ] }, { "cell_type": "code", "execution_count": 87, "id": "ba0c1c4f-ac4c-43f5-8245-e32d1d4cc3cf", "metadata": {}, "outputs": [], "source": [ "df = timings.sort_values(['ncores', 'util'])" ] }, { "cell_type": "code", "execution_count": 88, "id": "06fa2f0b-1c9e-473b-bbdd-3157f1d81a1a", "metadata": {}, "outputs": [], "source": [ "df.to_csv('benchmarking_1mln.csv')" ] }, { "cell_type": "code", "execution_count": 17, "id": "306e3829-ab40-4bfb-804b-775b5bf6a170", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['bowtie', 'bwamem', 'bwamem2', 'chromap', 'fanc_bowtie2',\n", " 'fanc_bwa', 'hicexplorer', 'hicpro', 'juicer', 'pairtools',\n", " 'pairtools_bwamem2', 'tadbit', 'tadbit_bowtie2'], dtype=object)" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.unique(df.util)" ] }, { "cell_type": "code", "execution_count": 21, "id": "8b20d808-78aa-4efc-9c2d-999b4e393968", "metadata": {}, "outputs": [], "source": [ "labels = [\n", " 'chromap', \n", " 'pairtools_bwamem2', \n", " 'tadbit',\n", " 'pairtools', \n", " 'tadbit_bowtie2',\n", " 'juicer', \n", " 'hicpro', \n", " 'hicexplorer', \n", " 'fanc_bwa', \n", " 'fanc_bowtie2',\n", " 'bwamem2',\n", " 'bwamem',\n", " 'bowtie',\n", "]\n", "labels_mod = [\n", " 'Chromap', \n", " 'bwa-mem2 + pairtools', \n", " 'GEM + TADbit',\n", " 'bwa mem + pairtools', \n", " 'bowtie2 + TADbit',\n", " 'Juicer', \n", " 'Hi-Pro', \n", " 'HiCExplorer', \n", " 'bwa mem + FANC', \n", " 'bowtie2 + FANC',\n", " 'bwa-mem2',\n", " 'bwa mem',\n", " 'bowtie2',\n", "]" ] }, { "cell_type": "code", "execution_count": 24, "id": "8f31f3be-cf8f-4976-9a60-28e97c13593d", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig, axes = plt.subplots(nrows=1, ncols=2, sharey=True)\n", "\n", "cmap = ['#FD7F69', '#9FC9DD', '#89A76F']\n", "\n", "style_dict = dict(\n", " orient='h',\n", " palette=cmap,\n", " edgecolor=\"k\",\n", " linewidth=2.0,\n", " errwidth=2.0,\n", " capsize=0.07)\n", "\n", "ax = axes[0]\n", "b = sns.barplot(x=\"s\", \n", " y=\"util\", \n", " data=df.sort_values('util'),\n", " order=labels,\n", " hue='ncores',\n", " hue_order=[4,2,1],\n", " ax=ax,\n", " **style_dict\n", ")\n", "plt.setp(b.patches, linewidth=0.5)\n", "\n", "ax.set_ylabel('')\n", "ax.set_xlabel('Time (sec)')\n", "ax.set_yticklabels(labels_mod)\n", "ax.set_axisbelow(True)\n", "ax.grid(which='both', axis='x', color='k')\n", "#ax.set_xscale('log')\n", "ax.set_xlim([0, 5e3])\n", "ax.set_xticks(np.arange(0, 6000, 100), minor=True)\n", "ax.grid(which='minor', axis='x', alpha=0.2, color='k')\n", "ax.get_legend().remove()\n", "\n", "ax = axes[1]\n", "b = sns.barplot(x=\"max_rss\", \n", " y=\"util\", \n", " data=df.sort_values('util'),\n", " order=labels,\n", " hue='ncores',\n", " hue_order=[4,2,1],\n", " ax=ax,\n", " **style_dict)\n", "plt.setp(b.patches, linewidth=0.5)\n", "\n", "ax.set_ylabel('')\n", "ax.set_xlabel('Maximum Resident Set Size (MB)')\n", "ax.set_yticklabels(labels_mod)\n", "ax.set_axisbelow(True)\n", "ax.grid(which='both', axis='x', color='k')\n", "ax.set_xticks(np.arange(0, 30000, 1000), minor=True)\n", "ax.grid(which='minor', axis='x', alpha=0.2, color='k')\n", "\n", "fig.suptitle('Benchmark of different Hi-C mapping tools for 1 mln reads (5 iterations)', y=0.99)\n", "\n", "# (x, y, width, height)\n", "bb = (fig.subplotpars.left, fig.subplotpars.top+0.002, fig.subplotpars.right-fig.subplotpars.left, 0.2)\n", "ax.legend(bbox_to_anchor=bb, title=\"Number of cores\", loc=\"lower right\", ncol=3, borderaxespad=0., bbox_transform=fig.transFigure, frameon=False)\n", "\n", "plt.savefig(\"benchmarking_1mln.pdf\")" ] }, { "cell_type": "code", "execution_count": 25, "id": "1f41e7db-0f80-45f6-96f6-d1ae81055b83", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig, axes = plt.subplots(nrows=1, ncols=2, sharey=True)\n", "\n", "cmap = ['#FD7F69', '#9FC9DD', '#89A76F']\n", "\n", "style_dict = dict(\n", " orient='h',\n", " palette=cmap,\n", " edgecolor=\"k\",\n", " linewidth=2.0,\n", " errwidth=2.0,\n", " capsize=0.07)\n", "\n", "ax = axes[0]\n", "b = sns.barplot(x=\"s\", \n", " y=\"util\", \n", " data=df.sort_values('util'),\n", " order=labels,\n", " hue='ncores',\n", " hue_order=[4,2,1],\n", " ax=ax,\n", " **style_dict\n", ")\n", "plt.setp(b.patches, linewidth=0.5)\n", "\n", "ax.set_ylabel('')\n", "ax.set_xlabel('Time (sec)')\n", "ax.set_yticklabels(labels_mod)\n", "ax.set_axisbelow(True)\n", "ax.grid(which='both', axis='x', color='k')\n", "ax.set_xscale('log')\n", "ax.set_xlim([1, 5e3])\n", "# ax.set_xticks(np.arange(0, 5000, 100), minor=True)\n", "ax.grid(which='minor', axis='x', alpha=0.2, color='k')\n", "ax.get_legend().remove()\n", "\n", "ax = axes[1]\n", "b = sns.barplot(x=\"max_rss\", \n", " y=\"util\", \n", " data=df.sort_values('util'),\n", " order=labels,\n", " hue='ncores',\n", " hue_order=[4,2,1],\n", " ax=ax,\n", " **style_dict)\n", "plt.setp(b.patches, linewidth=0.5)\n", "\n", "ax.set_ylabel('')\n", "ax.set_xlabel('Maximum Resident Set Size (MB)')\n", "ax.set_yticklabels(labels_mod)\n", "ax.set_axisbelow(True)\n", "ax.grid(which='both', axis='x', color='k')\n", "ax.set_xticks(np.arange(0, 30000, 1000), minor=True)\n", "ax.grid(which='minor', axis='x', alpha=0.2, color='k')\n", "\n", "fig.suptitle('Benchmark of different Hi-C mapping tools for 1 mln reads (5 iterations)', y=0.99)\n", "\n", "# (x, y, width, height)\n", "bb = (fig.subplotpars.left, fig.subplotpars.top+0.002, fig.subplotpars.right-fig.subplotpars.left, 0.2)\n", "ax.legend(bbox_to_anchor=bb, title=\"Number of cores\", loc=\"lower right\", ncol=3, borderaxespad=0., bbox_transform=fig.transFigure, frameon=False)\n", "\n", "plt.savefig(\"benchmarking_1mln_log.pdf\")" ] }, { "cell_type": "code", "execution_count": 89, "id": "475f8c97-3de4-4ff1-b1e9-6a01331a6c52", "metadata": {}, "outputs": [], "source": [ "labels = [\n", " 'chromap', \n", " 'pairtools_bwamem2', \n", " 'tadbit',\n", " 'pairtools', \n", " 'tadbit_bowtie2',\n", " 'juicer', \n", " 'hicpro', \n", " 'hicexplorer', \n", " 'fanc_bwa', \n", " 'fanc_bowtie2',\n", " # 'bwamem2',\n", " # 'bwamem',\n", " # 'bowtie',\n", "]\n", "labels_mod = [\n", " 'Chromap', \n", " 'bwa-mem2 + pairtools', \n", " 'GEM + TADbit',\n", " 'bwa mem + pairtools', \n", " 'bowtie2 + TADbit',\n", " 'Juicer', \n", " 'Hi-Pro', \n", " 'HiCExplorer', \n", " 'bwa mem + FANC', \n", " 'bowtie2 + FANC',\n", " # 'bwa-mem2',\n", " # 'bwa mem',\n", " # 'bowtie2',\n", "]" ] }, { "cell_type": "code", "execution_count": 90, "id": "3b2cd5e8-71bf-4039-a6b1-be7565b84759", "metadata": {}, "outputs": [], "source": [ "df = timings.sort_values(['ncores', 'util'])\n", "df.loc[:, \"max_rss_gb\"] = df.loc[:, \"max_rss\"]/1024\n", "df.loc[:, \"min\"] = df.loc[:, \"s\"]" ] }, { "cell_type": "code", "execution_count": 91, "id": "08e08c97-3e00-438e-810e-77eae9c6804b", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig, axes = plt.subplots(nrows=1, ncols=2, sharey=True)\n", "\n", "cmap = ['#FD7F69', '#9FC9DD', '#89A76F']\n", "\n", "style_dict = dict(\n", " orient='h',\n", " palette=cmap,\n", " edgecolor=\"k\",\n", " linewidth=2.0,\n", " errwidth=2.0,\n", " capsize=0.07)\n", "\n", "ax = axes[0]\n", "b = sns.barplot(x=\"s\", \n", " y=\"util\", \n", " data=df.sort_values('util'),\n", " order=labels,\n", " hue='ncores',\n", " hue_order=[4,2,1],\n", " ax=ax,\n", " **style_dict\n", ")\n", "plt.setp(b.patches, linewidth=0.5)\n", "\n", "ax.set_ylabel('')\n", "ax.set_xlabel('Time (sec)')\n", "ax.set_yticklabels(labels_mod)\n", "ax.set_axisbelow(True)\n", "ax.grid(which='both', axis='x', color='k')\n", "#ax.set_xscale('log')\n", "ax.set_xlim([0, 5e3])\n", "ax.set_xticks(np.arange(0, 5000, 100), minor=True)\n", "ax.grid(which='minor', axis='x', alpha=0.2, color='k')\n", "ax.get_legend().remove()\n", "\n", "\n", "# Add text, slowdown over chromap\n", "ncores_order = [4, 2, 1]\n", "for icore, ncores in enumerate(ncores_order):\n", " for ilabels, util in enumerate(labels):\n", " if util==\"chromap\":\n", " continue\n", " \n", " df_reference = df.query(f'ncores=={ncores} and util==\"chromap\"')\n", " mean_reference = np.mean(df_reference['min'].values)\n", " df_target = df.query(f'ncores=={ncores} and util==\"{util}\"')\n", " mean_target = np.mean(df_target['min'].values)\n", " \n", " slowdown = mean_target / mean_reference\n", " \n", " w = b.patches[0].get_height()\n", " \n", " b.text( s=f\"x {slowdown:.1f}\", \n", " x=mean_target+150, y=ilabels + (icore-1)*w,\n", " ha = 'left', va = 'center', fontsize=8, weight='bold')\n", "\n", " \n", "\n", "ax = axes[1]\n", "b = sns.barplot(x=\"max_rss\", \n", " y=\"util\", \n", " data=df.sort_values('util'),\n", " order=labels,\n", " hue='ncores',\n", " hue_order=[4,2,1],\n", " ax=ax,\n", " **style_dict)\n", "plt.setp(b.patches, linewidth=0.5)\n", "\n", "ax.set_ylabel('')\n", "ax.set_xlabel('Maximum Resident Set Size (MB)')\n", "ax.set_yticklabels(labels_mod)\n", "ax.set_axisbelow(True)\n", "ax.grid(which='both', axis='x', color='k')\n", "ax.set_xticks(np.arange(0, 30000, 1000), minor=True)\n", "ax.grid(which='minor', axis='x', alpha=0.2, color='k')\n", "\n", "fig.suptitle('Benchmark of different Hi-C mapping tools for 1 mln reads (5 iterations)', y=0.99)\n", "\n", "# (x, y, width, height)\n", "bb = (fig.subplotpars.left, fig.subplotpars.top+0.002, fig.subplotpars.right-fig.subplotpars.left, 0.2)\n", "ax.legend(bbox_to_anchor=bb, title=\"Number of cores\", loc=\"lower right\", ncol=3, borderaxespad=0., bbox_transform=fig.transFigure, frameon=False)\n", "\n", "plt.savefig(\"benchmarking_1mln.pdf\")" ] }, { "cell_type": "code", "execution_count": 92, "id": "c7c68d19-7606-4d35-a150-580fd5c78cd6", "metadata": {}, "outputs": [], "source": [ "dct_mapper = {\n", " 'bowtie': 'bowtie', \n", " 'bwamem': 'bwamem', \n", " 'bwamem2': 'bwamem2', \n", " 'chromap': \"\", \n", " 'fanc_bowtie2': 'bowtie',\n", " 'fanc_bwa': 'bwamem', \n", " 'hicexplorer': 'bwamem', \n", " 'hicpro':'bowtie', \n", " 'juicer': 'bwamem', \n", " 'pairtools': 'bwamem',\n", " 'pairtools_bwamem2': 'bwamem2', \n", " 'tadbit': 'GEM', \n", " 'tadbit_bowtie2': 'bowtie'\n", "}\n", "df.loc[:, \"mapper\"] = df.util.replace(dct_mapper)" ] }, { "cell_type": "code", "execution_count": 93, "id": "2996f9c4-0ed4-4d81-80aa-7044eed59648", "metadata": {}, "outputs": [], "source": [ "df = pd.merge(df, df, left_on=['mapper', 'ncores'], right_on=['util', 'ncores'], suffixes=[\"\", \"_mapper\"])" ] }, { "cell_type": "code", "execution_count": 94, "id": "90943478-8a46-486d-b6d7-74a1306dbc70", "metadata": {}, "outputs": [], "source": [ "labels = [\n", " 'pairtools_bwamem2', \n", " # 'tadbit',\n", " 'pairtools', \n", " 'tadbit_bowtie2',\n", " 'juicer', \n", " 'fanc_bwa', \n", " 'hicexplorer', \n", " 'hicpro', \n", " 'fanc_bowtie2',\n", " # 'bwamem2',\n", " # 'bwamem',\n", " # 'bowtie',\n", "]\n", "labels_mod = [\n", " 'bwa-mem2 + pairtools', \n", " # 'GEM + TADbit',\n", " 'bwa mem + pairtools', \n", " 'bowtie2 + TADbit',\n", " 'Juicer', \n", " 'bwa mem + FANC', \n", " 'HiCExplorer', \n", " 'Hi-Pro', \n", " 'bowtie2 + FANC',\n", " # 'bwa-mem2',\n", " # 'bwa mem',\n", " # 'bowtie2',\n", "]" ] }, { "cell_type": "code", "execution_count": 113, "id": "7d976913-dc1e-49f3-ac37-5bc2f16a13fd", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig, axes = plt.subplots(nrows=1, ncols=2, sharey=True)\n", "\n", "cmap = ['#FD7F69', '#9FC9DD', '#89A76F']\n", "style_dict = dict(\n", " orient='h',\n", " palette=cmap,\n", " edgecolor=\"k\",\n", " linewidth=2.0,\n", " errwidth=2.0,\n", " capsize=0.07)\n", "\n", "cmap1 = ['#ECECEC', '#BFBFBF', '#868686']\n", "style_dict1 = dict(\n", " orient='h',\n", " palette=cmap1,\n", " edgecolor=\"k\",\n", " linewidth=2.0,\n", " errwidth=2.0,\n", " capsize=0.07, \n", " alpha=0.8)\n", "\n", "\n", "ax = axes[0]\n", "b = sns.barplot(x=\"s\", \n", " y=\"util\", \n", " data=df.sort_values('util'),\n", " order=labels,\n", " hue='ncores',\n", " hue_order=[4,2,1],\n", " ax=ax,\n", " **style_dict\n", ")\n", "plt.setp(b.patches, linewidth=0.5)\n", "\n", "\n", "b1 = sns.barplot(x=\"s_mapper\", \n", " y=\"util\", \n", " data=df.sort_values('util'),\n", " order=labels,\n", " hue='ncores',\n", " hue_order=[4,2,1],\n", " ax=ax,\n", " **style_dict1\n", ")\n", "plt.setp(b1.patches, linewidth=0.5)\n", "\n", "\n", "ax.set_ylabel('')\n", "ax.set_xlabel('Time (sec)')\n", "ax.set_yticklabels(labels_mod)\n", "ax.set_axisbelow(True)\n", "ax.grid(which='both', axis='x', color='k')\n", "#ax.set_xscale('log')\n", "ax.set_xlim([0, 5e3])\n", "ax.set_xticks(np.arange(0, 6000, 100), minor=True)\n", "ax.grid(which='minor', axis='x', alpha=0.2, color='k')\n", "ax.get_legend().remove()\n", "\n", "\n", "# Add text, runtime percentage out of mapper timing\n", "ncores_order = [4, 2, 1]\n", "for icore, ncores in enumerate(ncores_order):\n", " for ilabels, util in enumerate(labels):\n", " if util==\"chromap\":\n", " continue\n", " \n", " df_target = df.query(f'ncores=={ncores} and util==\"{util}\"')\n", " mean_target = np.mean(df_target['min'].values)\n", " mean_mapper = np.mean(df_target['min_mapper'].values)\n", " \n", " prc = 100 * (mean_mapper) / mean_target\n", " \n", " w = b.patches[0].get_height()\n", " \n", " if prc>100:\n", " signature = f\"~0 : ~100 %\"\n", " else:\n", " signature = f\"{prc:.0f} : {100-prc:.0f} %\"\n", " \n", " b.text( s=signature, \n", " x=mean_target+150, y=ilabels + (icore-1)*w,\n", " ha = 'left', va = 'center', fontsize=8, weight='bold')\n", "\n", " \n", "\n", "ax = axes[1]\n", "b = sns.barplot(x=\"max_rss\", \n", " y=\"util\", \n", " data=df.sort_values('util'),\n", " order=labels,\n", " hue='ncores',\n", " hue_order=[4,2,1],\n", " ax=ax,\n", " **style_dict)\n", "plt.setp(b.patches, linewidth=0.5)\n", "\n", "\n", "b1 = sns.barplot(x=\"max_rss_mapper\", \n", " y=\"util\", \n", " data=df.sort_values('util'),\n", " order=labels,\n", " hue='ncores',\n", " hue_order=[4,2,1],\n", " ax=ax,\n", " **style_dict1\n", ")\n", "plt.setp(b1.patches, linewidth=0.5)\n", "ax.get_legend().remove()\n", "\n", "\n", "\n", "# Add text, runtime percentage out of mapper timing\n", "ncores_order = [4, 2, 1]\n", "for icore, ncores in enumerate(ncores_order):\n", " for ilabels, util in enumerate(labels):\n", " if util==\"chromap\":\n", " continue\n", " \n", " df_target = df.query(f'ncores=={ncores} and util==\"{util}\"')\n", " mean_target = np.mean(df_target['max_rss'].values)\n", " mean_mapper = np.mean(df_target['max_rss_mapper'].values)\n", " \n", " prc = 100 * (mean_mapper) / mean_target\n", " \n", " w = b.patches[0].get_height()\n", " \n", " if prc>=100:\n", " signature = f\"\"\n", " else:\n", " signature = f\"{prc:.0f} : {100-prc:.0f} %\"\n", " \n", " b.text( s=signature, \n", " x=mean_target+550, y=ilabels + (icore-1)*w,\n", " ha = 'left', va = 'center', fontsize=8, weight='bold')\n", "\n", " \n", "\n", "ax.set_ylabel('')\n", "ax.set_xlabel('Maximum Resident Set Size (MB)')\n", "ax.set_yticklabels(labels_mod)\n", "ax.set_axisbelow(True)\n", "ax.grid(which='both', axis='x', color='k')\n", "ax.set_xticks(np.arange(0, 30000, 1000), minor=True)\n", "ax.grid(which='minor', axis='x', alpha=0.2, color='k')\n", "\n", "# fig.suptitle('Benchmark of different Hi-C mapping tools for 1 mln reads (5 iterations)', y=0.99)\n", "\n", "# (x, y, width, height)\n", "bb = (fig.subplotpars.left, fig.subplotpars.top+0.002, fig.subplotpars.right-fig.subplotpars.left, 0.2)\n", "# ax.legend(bbox_to_anchor=bb, title=\"Number of cores\", loc=\"lower right\", ncol=3, borderaxespad=0., bbox_transform=fig.transFigure, frameon=False)\n", "\n", "plt.savefig(\"benchmarking_1mln.mappers.pdf\")" ] }, { "cell_type": "code", "execution_count": null, "id": "a08db361-28ab-4a3d-a69a-4d6869e28a39", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "test", "language": "python", "name": "test" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.13" } }, "nbformat": 4, "nbformat_minor": 5 } pairtools-1.1.3/doc/examples/benchmark/benchmarking_1mln.csv000066400000000000000000000431411474715105500241550ustar00rootroot00000000000000,s,h:m:s,max_rss,max_vms,max_uss,max_pss,io_in,io_out,mean_load,cpu_time,util,ncores 0,444.776,0:07:24,3496.51,4314.64,3468.51,3472.45,0.0,780.64,398.88,1775.19,bowtie,1 1,446.6064,0:07:26,3495.83,4314.77,3466.35,3470.77,0.0,1568.73,396.66,1776.07,bowtie,1 2,439.4402,0:07:19,3495.91,4314.39,3468.05,3471.94,0.0,2373.96,403.7,1781.37,bowtie,1 3,449.5572,0:07:29,3497.03,4314.64,3469.36,3473.22,0.0,3148.84,394.6,1784.75,bowtie,1 4,449.9673,0:07:29,3490.93,4314.89,3470.16,3471.92,40.96,3942.04,393.64,1785.49,bowtie,1 0,280.5757,0:04:40,6019.11,6453.95,5994.93,5998.23,7.86,746.23,366.91,1031.28,bwamem,1 1,286.6525,0:04:46,6004.57,6453.95,5986.34,5988.55,1485.98,1679.14,356.05,1027.24,bwamem,1 2,300.468,0:05:00,6009.04,6517.95,6000.2,6000.56,6660.16,2736.5,375.23,1138.7,bwamem,1 3,336.5047,0:05:36,6027.58,6645.95,6018.34,6018.94,8138.28,3669.41,375.65,1280.85,bwamem,1 4,323.7325,0:05:23,6012.26,6453.95,6002.96,6003.52,8932.21,4602.32,391.87,1291.62,bwamem,1 0,169.2031,0:02:49,17583.51,21017.69,17574.31,17574.8,13309.02,870.68,260.2,441.72,bwamem2,1 1,190.2805,0:03:10,17611.99,20913.7,17601.16,17601.72,29713.3,1554.78,185.67,357.92,bwamem2,1 2,198.262,0:03:18,17553.89,20425.7,17545.7,17546.26,46154.95,2487.69,161.87,328.04,bwamem2,1 3,117.1978,0:01:57,17591.75,20553.7,17583.7,17584.1,46179.89,3669.41,358.44,430.41,bwamem2,1 4,142.6662,0:02:22,17615.75,20688.82,17606.59,17607.45,62596.36,4602.32,301.67,447.83,bwamem2,1 0,155.4555,0:02:35,19062.05,20632.42,19039.34,19042.16,14756.57,0.02,78.1,122.2,chromap,1 1,129.4362,0:02:09,19034.41,20605.15,19011.44,19014.27,14756.57,88.36,86.69,116.07,chromap,1 2,130.3877,0:02:10,19034.32,20605.15,19011.52,19014.34,14756.57,176.69,86.2,121.96,chromap,1 3,133.456,0:02:13,19030.1,20598.45,19007.18,19010.0,14756.57,265.02,84.37,128.14,chromap,1 4,129.5292,0:02:09,19045.01,20615.43,19022.31,19025.15,14756.57,353.35,87.22,134.56,chromap,1 0,4261.7448,1:11:01,7186.76,8679.24,4290.61,5702.39,1719.48,10257.31,36.77,348.34,fanc_bowtie2,1 1,4077.2502,1:07:57,7188.25,8680.68,4290.98,5703.88,1719.5,20183.4,34.11,330.67,fanc_bowtie2,1 2,4131.9376,1:08:51,7189.56,8681.8,4292.67,5709.75,1719.51,30697.16,35.35,351.1,fanc_bowtie2,1 3,4050.4727,1:07:30,9027.23,10823.83,5831.42,7385.27,1719.51,42084.66,35.13,382.96,fanc_bowtie2,1 4,4020.6237,1:07:00,9032.61,10828.28,5837.03,7391.75,2343.53,52412.16,34.58,394.25,fanc_bowtie2,1 0,2731.6263,0:45:31,7185.57,8945.22,5718.36,5838.26,0.38,7029.16,35.71,346.69,fanc_bwa,1 1,2715.4035,0:45:15,9044.39,10839.64,5846.46,7385.01,0.42,14315.23,32.27,353.83,fanc_bwa,1 2,2769.2431,0:46:09,7188.17,9009.55,5735.71,5855.89,0.44,21877.82,37.54,374.56,fanc_bwa,1 3,2706.3695,0:45:06,9043.16,10838.74,5844.58,7393.22,0.44,29434.04,35.55,376.27,fanc_bwa,1 4,2682.8176,0:44:42,6172.31,8945.66,5679.21,5812.1,0.44,37010.45,30.23,380.22,fanc_bwa,1 0,961.0734,0:16:01,21569.48,23347.51,21492.25,21513.06,2618.56,0.02,138.26,532.15,hicexplorer,1 1,908.2106,0:15:08,22286.37,23347.51,22206.49,22228.31,2618.56,0.54,140.61,522.7,hicexplorer,1 2,896.8171,0:14:56,21604.46,23347.51,21524.48,21546.3,2618.56,1.05,138.93,499.36,hicexplorer,1 3,910.574,0:15:10,22440.63,23347.51,22361.79,22382.79,2618.56,1.56,140.63,542.86,hicexplorer,1 4,895.9424,0:14:55,21516.94,23347.51,21469.93,21475.81,2618.56,2.07,138.9,529.28,hicexplorer,1 0,1111.7203,0:18:31,6730.91,7406.83,6675.2,6682.8,237.25,1090.37,186.34,87.74,hicpro,1 1,1130.0595,0:18:50,6730.21,7406.82,6675.14,6682.36,903.14,2215.98,186.08,80.43,hicpro,1 2,1123.3536,0:18:43,6712.22,7406.82,6674.77,6677.97,5265.45,3341.59,182.57,89.92,hicpro,1 3,1181.2436,0:19:41,6715.32,7406.82,6675.26,6680.12,6316.99,4467.2,177.76,100.38,hicpro,1 4,1116.4777,0:18:36,6715.52,7406.82,6674.92,6679.32,6490.31,5592.8,188.64,114.97,hicpro,1 0,951.7772,0:15:51,5457.95,5857.16,5432.96,5439.95,0.0,2882.08,95.15,7.25,juicer,1 1,946.1429,0:15:46,5458.0,16410.45,5433.02,5439.99,0.0,4613.73,92.49,14.04,juicer,1 2,950.1664,0:15:50,5458.07,5857.16,5433.16,5440.17,0.2,7180.02,95.32,26.17,juicer,1 3,1004.5055,0:16:44,5458.27,16410.45,5433.17,5439.37,0.2,10377.86,93.19,30.43,juicer,1 4,1088.6224,0:18:08,5458.32,5857.16,5433.14,5439.08,0.2,13611.21,94.37,43.31,juicer,1 0,1031.7979,0:17:11,5868.29,8533.19,5732.12,5752.16,0.57,0.02,101.37,1048.77,pairtools,1 1,1057.2271,0:17:37,5866.38,8533.06,5731.94,5746.23,5193.45,39.25,101.33,1078.93,pairtools,1 2,1020.0639,0:17:00,5753.98,8533.06,5731.69,5732.67,10586.02,78.47,102.21,1056.3,pairtools,1 3,1044.6887,0:17:24,5852.79,8533.19,5734.25,5766.39,15784.64,117.71,99.64,29.63,pairtools,1 4,1046.266,0:17:26,5824.86,8533.06,5734.24,5757.25,21050.2,156.94,99.64,38.13,pairtools,1 0,482.1029,0:08:02,17068.97,20572.73,16931.91,16951.71,16275.21,0.02,102.18,493.82,pairtools_bwamem2,1 1,482.5261,0:08:02,17079.46,20508.73,16941.46,16956.34,32534.11,39.25,101.73,498.88,pairtools_bwamem2,1 2,488.9997,0:08:08,17055.04,20508.59,16920.96,16939.69,42104.61,78.47,100.17,502.58,pairtools_bwamem2,1 3,484.346,0:08:04,16981.59,20380.6,16961.3,16962.37,45493.36,117.7,93.06,50.54,pairtools_bwamem2,1 4,483.3159,0:08:03,16969.02,20595.51,16944.75,16945.79,61922.79,156.93,99.61,507.64,pairtools_bwamem2,1 0,611.4665,0:10:11,13519.86,15003.46,13507.46,13508.44,22937.93,2801.37,50.95,5.28,tadbit,1 1,442.9993,0:07:22,13559.35,15003.46,13509.66,13529.09,40809.44,6184.54,55.05,25.04,tadbit,1 2,385.3129,0:06:25,13561.27,15003.46,13508.5,13522.26,40809.46,9295.42,59.39,22.13,tadbit,1 3,390.4362,0:06:30,13561.43,15003.46,13509.03,13522.27,40809.5,12333.14,58.93,23.74,tadbit,1 4,447.8446,0:07:27,13560.76,15003.46,13509.25,13528.03,51014.88,15465.8,51.07,24.5,tadbit,1 0,862.2741,0:14:22,3639.61,4465.74,3612.88,3616.18,9401.06,3187.84,73.44,23.31,tadbit_bowtie2,1 1,815.1788,0:13:35,3640.85,4464.65,3582.08,3602.21,9401.11,6292.2,82.09,20.52,tadbit_bowtie2,1 2,817.2921,0:13:37,3736.97,4560.27,3712.91,3716.56,13567.1,9504.59,81.15,19.79,tadbit_bowtie2,1 3,819.7936,0:13:39,3640.98,4464.65,3617.2,3619.83,13567.22,12458.8,80.4,19.48,tadbit_bowtie2,1 4,760.4646,0:12:40,3641.41,4464.98,3621.55,3622.75,13567.24,15676.85,81.76,20.99,tadbit_bowtie2,1 0,444.1354,0:07:24,3496.54,4314.39,3479.12,3481.87,27.41,783.47,400.27,1779.49,bowtie,2 1,446.9715,0:07:26,3496.32,4314.39,3478.36,3481.27,27.41,1570.48,397.44,1782.37,bowtie,2 2,484.7029,0:08:04,3511.4,4338.64,3490.33,3495.52,27.41,2346.41,368.72,1797.04,bowtie,2 3,450.6565,0:07:30,3488.59,4313.88,3466.99,3471.75,27.41,3148.2,393.42,1786.37,bowtie,2 4,453.0468,0:07:33,3488.74,4314.02,3466.96,3470.63,27.41,3936.13,392.09,1793.93,bowtie,2 0,330.2381,0:05:30,6045.63,6645.95,6021.65,6027.11,5173.52,870.68,375.32,1240.4,bwamem,2 1,329.5853,0:05:29,6028.34,6645.95,6019.0,6019.59,6654.07,1803.59,382.07,1265.1,bwamem,2 2,297.1758,0:04:57,6003.03,6453.95,5994.1,5994.56,9493.45,2736.5,363.55,1091.44,bwamem,2 3,263.866,0:04:23,5991.57,6453.95,5982.83,5983.36,10181.14,3669.41,388.27,1039.2,bwamem,2 4,258.637,0:04:18,5992.16,6453.95,5983.26,5983.78,10181.15,4477.87,351.12,927.49,bwamem,2 0,141.9624,0:02:21,17604.44,20094.61,17587.78,17591.4,16300.85,870.68,311.82,444.2,bwamem2,2 1,133.0894,0:02:13,17620.68,20849.7,17603.97,17607.69,26647.44,1554.78,267.76,361.73,bwamem2,2 2,119.1224,0:01:59,17591.03,21040.61,17573.69,17577.37,26647.45,2736.5,354.15,430.73,bwamem2,2 3,119.2461,0:01:59,17618.88,20912.61,17601.95,17605.61,26647.45,3669.41,353.61,437.38,bwamem2,2 4,119.4407,0:01:59,17610.04,20579.05,17592.86,17596.52,26647.46,4602.32,352.7,443.75,bwamem2,2 0,99.0323,0:01:39,19065.62,20682.54,19043.42,19050.13,14756.72,0.02,106.11,105.91,chromap,2 1,93.6757,0:01:33,19075.63,20687.33,19053.32,19060.02,25886.48,88.36,121.88,118.06,chromap,2 2,120.2376,0:02:00,19145.25,20776.39,19130.55,19133.61,37626.33,176.69,104.51,132.92,chromap,2 3,83.0685,0:01:23,18948.87,20569.96,18934.38,18937.44,41077.34,270.34,159.69,144.2,chromap,2 4,98.8891,0:01:38,19049.5,20677.47,19034.7,19037.84,41077.34,353.35,126.08,140.51,chromap,2 0,2463.1096,0:41:03,7184.84,8822.2,4290.22,5704.96,1790.3,9624.3,17.33,337.56,fanc_bowtie2,2 1,2388.2986,0:39:48,7183.59,8820.36,4839.33,5975.55,1790.35,20084.17,17.7,322.7,fanc_bowtie2,2 2,2313.5663,0:38:33,6195.27,8222.25,3496.78,4531.77,1790.41,30597.45,18.65,343.23,fanc_bowtie2,2 3,2351.3693,0:39:11,9008.62,11035.56,5810.7,7349.54,1790.42,41169.94,17.91,356.34,fanc_bowtie2,2 4,2369.8083,0:39:29,7186.87,8822.57,4288.14,5702.9,1790.45,51675.89,19.01,377.66,fanc_bowtie2,2 0,1774.8625,0:29:34,9036.59,11062.79,5886.57,7381.14,180.82,6716.99,24.32,331.93,fanc_bwa,2 1,1740.5138,0:29:00,7185.89,9381.23,5771.89,5896.22,180.85,14316.39,27.45,352.99,fanc_bwa,2 2,1693.8652,0:28:13,7188.38,9315.89,5720.5,5842.73,180.88,21905.75,22.64,337.38,fanc_bwa,2 3,1673.5139,0:27:53,7184.62,9376.55,5740.96,5861.3,180.95,29485.2,22.17,333.76,fanc_bwa,2 4,1671.1137,0:27:51,7186.76,9379.55,5780.07,5902.28,180.97,37075.34,22.58,346.62,fanc_bwa,2 0,746.2236,0:12:26,22026.01,24493.83,21948.96,21969.79,2997.02,0.53,170.7,512.85,hicexplorer,2 1,710.6231,0:11:50,23024.07,24502.34,22945.46,22966.38,3015.5,0.54,171.76,495.71,hicexplorer,2 2,705.1923,0:11:45,22126.58,24493.83,22047.82,22069.79,3015.5,1.05,176.52,507.18,hicexplorer,2 3,712.7222,0:11:52,23066.12,24493.83,22987.89,23008.64,3015.5,1.56,171.49,518.58,hicexplorer,2 4,715.7957,0:11:55,22876.23,24493.83,22797.87,22819.07,3015.5,2.58,172.73,557.14,hicexplorer,2 0,1073.3043,0:17:53,6730.26,7406.83,6678.91,6685.25,9.88,1089.03,184.85,64.55,hicpro,2 1,1038.1439,0:17:18,6730.3,7406.82,6675.17,6685.6,9.89,2215.98,182.2,73.83,hicpro,2 2,1033.7443,0:17:13,6730.34,7406.82,6675.99,6686.95,221.47,3341.59,185.87,88.03,hicpro,2 3,1022.0066,0:17:02,6731.1,7406.83,6675.88,6688.85,222.14,4467.19,188.24,109.28,hicpro,2 4,1094.391,0:18:14,6730.64,7406.82,6676.12,6689.47,222.14,5592.8,175.63,105.96,hicpro,2 0,502.114,0:08:22,5634.02,34265.69,5609.24,5616.47,70.82,2785.59,175.44,10.71,juicer,2 1,502.401,0:08:22,5634.24,6057.17,5609.44,5616.5,70.89,5661.32,175.44,14.17,juicer,2 2,502.5511,0:08:22,5634.45,6057.17,5609.73,5616.78,70.92,8537.05,175.43,18.41,juicer,2 3,500.9428,0:08:20,5634.69,6057.17,5609.77,5616.84,70.92,11412.78,175.96,24.15,juicer,2 4,500.3197,0:08:20,5634.83,6057.17,5610.01,5617.05,70.92,14367.65,176.2,21.5,juicer,2 0,519.8764,0:08:39,6037.57,9285.21,5901.66,5924.11,5249.05,0.02,194.04,1009.67,pairtools,2 1,483.1521,0:08:03,6037.39,9285.21,5901.68,5924.15,5249.05,39.25,200.27,974.78,pairtools,2 2,480.6768,0:08:00,6037.79,9285.21,5903.83,5941.17,5249.05,78.47,201.33,980.38,pairtools,2 3,465.3507,0:07:45,6037.73,9285.21,5903.88,5941.19,5249.05,117.7,194.76,924.69,pairtools,2 4,466.1686,0:07:46,6038.33,9285.21,5903.87,5941.35,5249.05,156.93,194.4,930.11,pairtools,2 0,228.2886,0:03:48,17383.15,22336.79,17245.43,17268.2,0.0,0.02,186.3,427.67,pairtools_bwamem2,2 1,221.768,0:03:41,17381.39,22167.71,17243.96,17266.91,0.0,39.25,193.13,437.76,pairtools_bwamem2,2 2,221.3263,0:03:41,17362.04,22295.71,17224.0,17246.68,0.0,78.47,193.49,445.19,pairtools_bwamem2,2 3,217.581,0:03:37,17340.12,22400.79,17203.5,17233.95,0.0,117.7,196.51,452.01,pairtools_bwamem2,2 4,223.3371,0:03:43,17393.16,22144.79,17255.85,17277.7,0.01,156.93,192.59,462.11,pairtools_bwamem2,2 0,610.9462,0:10:10,13529.42,15232.48,13516.34,13517.79,36766.11,2801.28,49.41,96.95,tadbit,2 1,376.1804,0:06:16,13569.33,15232.48,13545.14,13547.36,61864.18,5678.4,62.61,85.0,tadbit,2 2,300.593,0:05:00,13570.71,15232.47,13518.36,13536.02,61954.27,9211.43,31.59,15.96,tadbit,2 3,297.5085,0:04:57,13570.71,15232.48,13518.59,13531.8,61954.29,12339.2,53.0,24.9,tadbit,2 4,296.7615,0:04:56,13570.29,15232.48,13518.88,13531.85,61954.32,15451.43,72.1,28.21,tadbit,2 0,463.5859,0:07:43,3666.75,4713.83,3630.19,3636.92,0.02,2804.57,129.07,84.87,tadbit_bowtie2,2 1,455.2237,0:07:35,3666.76,4713.65,3630.33,3637.05,0.04,6087.06,128.93,11.28,tadbit_bowtie2,2 2,455.0799,0:07:35,3666.91,4713.66,3630.34,3637.1,0.07,9386.64,128.87,14.03,tadbit_bowtie2,2 3,458.9496,0:07:38,3667.79,4714.59,3631.29,3638.12,321.35,12458.88,123.9,12.09,tadbit_bowtie2,2 4,452.953,0:07:32,3667.88,4714.02,3597.93,3621.82,321.36,15940.75,129.9,20.51,tadbit_bowtie2,2 0,420.8718,0:07:00,3496.11,4314.01,3471.04,3476.4,15.42,768.89,393.0,1654.84,bowtie,4 1,422.8224,0:07:02,3496.45,4314.02,3471.73,3477.1,15.42,1557.63,391.27,1658.74,bowtie,4 2,419.9842,0:06:59,3494.73,4314.02,3470.36,3475.6,15.42,2354.96,393.91,1662.14,bowtie,4 3,420.7907,0:07:00,3495.93,4314.02,3471.43,3476.67,15.42,3145.49,393.27,1666.11,bowtie,4 4,412.5256,0:06:52,3497.47,4314.64,3472.82,3478.16,15.42,3952.15,401.55,1671.29,bowtie,4 0,252.8627,0:04:12,6006.18,6453.96,5982.69,5986.96,0.83,746.23,358.77,908.39,bwamem,4 1,261.662,0:04:21,6007.87,6453.96,5984.64,5988.93,5174.14,1803.59,380.15,999.85,bwamem,4 2,274.3422,0:04:34,6024.01,6581.96,5999.88,6003.05,6023.34,2736.5,373.3,1032.61,bwamem,4 3,298.8224,0:04:58,6014.57,6453.96,5990.63,5993.79,11196.66,3669.41,359.87,1087.63,bwamem,4 4,285.7411,0:04:45,5997.78,6453.96,5988.62,5989.24,12674.78,4477.87,357.24,1036.2,bwamem,4 0,111.3764,0:01:51,17591.11,20784.61,17566.66,17572.18,0.02,932.93,377.1,420.58,bwamem2,4 1,111.2423,0:01:51,17569.3,20537.94,17544.68,17550.22,0.02,1865.84,376.76,425.65,bwamem2,4 2,112.9415,0:01:52,17579.97,20665.95,17555.33,17560.86,0.02,2798.75,371.87,432.32,bwamem2,4 3,111.3421,0:01:51,17573.66,20489.7,17548.98,17554.51,0.02,3731.66,376.83,437.97,bwamem2,4 4,111.3777,0:01:51,17544.64,20889.7,17520.3,17525.84,0.02,4353.51,270.46,325.61,bwamem2,4 0,63.9876,0:01:03,19049.48,20739.12,19027.24,19032.6,14756.71,0.02,122.87,79.43,chromap,4 1,46.2109,0:00:46,17981.29,19696.59,17959.17,17964.53,14756.71,88.36,86.01,43.67,chromap,4 2,46.4987,0:00:46,17964.62,19692.91,17941.91,17947.24,14756.71,176.69,85.48,49.23,chromap,4 3,46.4768,0:00:46,17972.1,19695.5,17949.51,17954.84,14756.71,265.02,85.79,55.0,chromap,4 4,46.1236,0:00:46,17970.75,19693.68,17948.35,17953.68,14756.71,353.35,86.4,60.65,chromap,4 0,1480.6598,0:24:40,6107.95,8576.53,3509.07,4446.13,0.04,9605.84,30.34,315.13,fanc_bowtie2,4 1,1496.7858,0:24:56,9031.23,11498.52,5833.15,7371.95,0.07,20128.0,29.31,332.72,fanc_bowtie2,4 2,1463.532,0:24:23,7184.77,9101.79,4217.2,5669.65,0.07,31165.73,31.89,344.45,fanc_bowtie2,4 3,1450.3658,0:24:10,6120.51,8586.21,3510.02,4468.34,0.07,41105.85,30.36,336.53,fanc_bowtie2,4 4,1452.4516,0:24:12,7187.97,9104.71,4292.93,5709.08,0.07,51577.66,30.16,338.06,fanc_bowtie2,4 0,1150.8569,0:19:10,7186.05,9953.48,5723.31,5859.42,5224.05,6719.89,32.08,312.36,fanc_bwa,4 1,1135.3056,0:18:55,7224.26,9950.27,5660.22,5824.15,5224.08,14390.72,35.73,329.42,fanc_bwa,4 2,1125.9794,0:18:45,9041.66,11507.65,5849.72,7432.53,5224.11,21886.05,34.66,314.55,fanc_bwa,4 3,1128.931,0:18:48,9044.25,11511.71,5853.64,7435.73,5224.14,29477.74,34.51,319.67,fanc_bwa,4 4,1128.7103,0:18:48,9040.23,11509.34,5851.56,7432.95,5224.14,37075.91,34.49,326.24,fanc_bwa,4 0,596.2727,0:09:56,23080.44,26188.7,23034.84,23042.16,0.0,0.53,199.86,493.7,hicexplorer,4 1,594.7958,0:09:54,23219.5,26188.7,23173.16,23180.65,0.0,1.04,203.29,503.47,hicexplorer,4 2,609.9005,0:10:09,23002.46,26188.7,22956.89,22965.46,0.0,1.05,192.2,508.65,hicexplorer,4 3,597.4554,0:09:57,23259.77,26188.7,23215.77,23224.37,19.97,2.01,204.19,518.44,hicexplorer,4 4,622.1449,0:10:22,23509.05,26188.7,23466.36,23474.47,5281.18,2.58,171.82,538.96,hicexplorer,4 0,619.9254,0:10:19,6759.34,7631.43,6704.49,6717.01,3.34,1088.32,295.46,59.78,hicpro,4 1,558.5864,0:09:18,6759.78,7631.43,6705.0,6715.83,3.34,2216.0,327.68,66.81,hicpro,4 2,550.9692,0:09:10,6760.03,7631.43,6716.98,6725.62,3.34,3341.63,337.46,79.54,hicpro,4 3,549.0615,0:09:09,6759.72,7631.44,6717.38,6725.84,3.34,4467.26,338.57,88.86,hicpro,4 4,548.9341,0:09:08,6760.25,7631.43,6717.1,6725.58,3.34,5592.88,339.01,94.98,hicpro,4 0,297.0198,0:04:57,5996.96,6457.18,5977.55,5979.61,3412.56,2785.59,297.18,6.35,juicer,4 1,319.5797,0:05:19,5989.58,6457.18,5979.42,5981.48,9268.84,5739.07,309.19,7.78,juicer,4 2,327.0335,0:05:27,5988.18,6457.18,5977.84,5978.92,10206.69,8537.05,311.25,12.23,juicer,4 3,331.7378,0:05:31,5988.45,6457.18,5978.38,5979.71,16123.1,11364.07,297.09,18.28,juicer,4 4,334.7347,0:05:34,5989.59,6457.18,5979.36,5980.55,19292.39,14066.95,290.47,19.62,juicer,4 0,241.5991,0:04:01,6399.55,10581.27,6265.49,6303.07,0.17,0.02,385.61,935.31,pairtools,4 1,241.5127,0:04:01,6398.66,10581.27,6265.65,6303.01,0.17,39.25,386.39,940.88,pairtools,4 2,242.2225,0:04:02,6399.6,10581.27,6266.3,6303.79,0.17,78.47,384.74,943.77,pairtools,4 3,241.9401,0:04:01,6398.33,10581.27,6264.49,6301.69,0.17,117.7,385.22,947.96,pairtools,4 4,242.4722,0:04:02,6399.78,10581.27,6265.86,6303.43,0.17,156.93,384.24,951.72,pairtools,4 0,149.1161,0:02:29,18032.98,24632.93,17895.58,17910.56,6662.34,0.02,321.06,482.55,pairtools_bwamem2,4 1,129.8502,0:02:09,18011.57,24911.85,17874.46,17889.42,7215.82,39.25,336.11,443.2,pairtools_bwamem2,4 2,149.7954,0:02:29,17943.55,24532.52,17806.29,17821.07,13922.12,78.47,316.37,488.94,pairtools_bwamem2,4 3,151.1137,0:02:31,17982.62,24665.18,17847.23,17861.77,30195.88,117.7,311.36,487.72,pairtools_bwamem2,4 4,151.164,0:02:31,18000.05,24454.96,17865.51,17883.09,46478.44,156.93,310.2,489.74,pairtools_bwamem2,4 0,258.4889,0:04:18,13609.79,15690.48,13542.78,13567.37,15901.3,3103.55,57.27,19.48,tadbit,4 1,226.7053,0:03:46,10086.89,15275.6,10018.9,10043.75,15902.2,5596.25,40.62,76.32,tadbit,4 2,228.1718,0:03:48,8939.3,15275.61,8872.51,8896.66,15902.23,9327.17,39.3,23.97,tadbit,4 3,226.9155,0:03:46,9170.13,15275.61,9102.29,9127.05,15902.26,11800.28,40.77,79.04,tadbit,4 4,225.4808,0:03:45,9664.37,15275.61,9635.06,9642.85,15902.27,14928.92,41.44,83.32,tadbit,4 0,320.7662,0:05:20,3689.32,5212.95,3650.62,3659.72,0.02,3171.16,145.76,25.25,tadbit_bowtie2,4 1,348.5605,0:05:48,3687.51,5211.26,3648.52,3653.68,3011.82,6430.47,170.42,20.75,tadbit_bowtie2,4 2,338.4871,0:05:38,3688.18,5211.85,3649.18,3654.37,3012.64,9240.8,172.35,8.57,tadbit_bowtie2,4 3,340.9527,0:05:40,3689.14,5212.57,3649.83,3654.76,3014.55,12098.38,165.3,65.98,tadbit_bowtie2,4 4,346.3,0:05:46,3688.78,5212.19,3649.61,3654.97,3060.71,15516.58,170.85,94.72,tadbit_bowtie2,4 pairtools-1.1.3/doc/examples/duplicate_distance.ipynb000066400000000000000000005130661474715105500230260ustar00rootroot00000000000000{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Should one consider pairs with sligthly mismatching coordinates as duplicates?\n", "Is there a significant enrichment of pairs shifted on one or both sides by just a few base pairs? If so, we should allow small offsets in the deduplication procedure.\n", "\n", "Let's investigate it in an example dataset. It was generated using the Arima Hi-C kit, and published in Ghurye et al. 2019." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "from pairtools.lib import headerops, fileio\n", "import pandas as pd\n", "from scipy.spatial import KDTree # Change to cKDTree for faster performance with scipy <1.6.0\n", "import matplotlib.pyplot as plt\n", "from matplotlib.colors import LogNorm\n", "import seaborn as sns" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Read in the pairs file and get the header, take columns from the header\n", "def read_pairs(pairs_file):\n", " pairs_stream = fileio.auto_open(pairs_file, 'r')\n", " header, pairs_stream = headerops.get_header(pairs_stream)\n", " columns = headerops.get_colnames(header)\n", " df = pd.read_table(pairs_stream, comment=\"#\", header=None, names=columns)\n", " return df\n", "\n", "def dup_dist(df, r, cols=['pos1', 'pos2'], p=1):\n", " # KDtree is a data structure that allows for fast nearest-neighbor lookup\n", " tree = KDTree(df[cols].to_numpy())\n", " # Find all pairs within a distance r of each other\n", " pairs = tree.query_pairs(r, p=p, output_type='ndarray')\n", " df['dist1'] = pd.NA\n", " df['dist2'] = pd.NA\n", " ids1 = df['readID'].to_numpy()[pairs[:, 0]]\n", " ids2 = df['readID'].to_numpy()[pairs[:, 1]]\n", " # Calculate the distance between the pairs (we know they are within r of each other, but not the exact distance)\n", " dists1 = np.abs(df[cols[0]].to_numpy()[pairs[:, 1]]-df[cols[0]].to_numpy()[pairs[:, 0]].astype(int))\n", " dists2 = np.abs(df[cols[1]].to_numpy()[pairs[:, 1]]-df[cols[1]].to_numpy()[pairs[:, 0]].astype(int))\n", " return pd.DataFrame({'id1':ids1, 'id2':ids2, 'dist1':dists1, 'dist2':dists2}).sort_values(by=['id1', 'id2']).reset_index(drop=True)\n", "\n", "def calculate_dup_dists(df, cols=['pos1', 'pos2'], r=20):\n", " df = df.drop(columns='readID').reset_index().rename(columns={'index': 'readID'}) # Add fake read IDs, according to the order of the pairs\n", " dup_dists = df.groupby(['chrom1', 'chrom2', 'strand1', 'strand2']).apply(dup_dist, r=r, p=np.inf, # p specifies the Minkowski distance parameter, inf means the maximum distance on either side is r\n", " cols=cols, include_groups=False).sort_values(by=['id1', 'id2']).reset_index(drop=True)\n", " return dup_dists" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "This is a file created by merging the nodups and dups output from the distiller pipeline using pairtools merge. This way it contains all the original duplicates, but not unmapped reads." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "pairs_df = read_pairs('SRR6675327.hg19.pairs.gz')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Remove unnecessary chromosomes and sample 10 mln pairs for faster computation\n", "pairs_df = pairs_df[pairs_df['chrom1'].isin([f'chr{x}' for x in list(range(1, 23))+['X', 'Y']]) & \\\n", " pairs_df['chrom2'].isin([f'chr{x}' for x in list(range(1, 23))+['X', 'Y']])].sample(int(1e7))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we find pairs that are within 50 bp of each other on both sides, and calculate distances between them." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "dup_dists = calculate_dup_dists(pairs_df,\n", " cols=['pos51', 'pos52'], # Use 5-prime positions of alignment (in this case, stored in the pairs file)\n", " r=50, # Consider pairs with distance less than 50 bp\n", " )" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
id1id2dist1dist2
03303330200
13719372000
29587958600
3105071050800
4197411974000
\n", "
" ], "text/plain": [ " id1 id2 dist1 dist2\n", "0 3303 3302 0 0\n", "1 3719 3720 0 0\n", "2 9587 9586 0 0\n", "3 10507 10508 0 0\n", "4 19741 19740 0 0" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# id1/2 correspond to the fake readIDs we created earlier \n", "dup_dists.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# Convert the distances to a matrix of counts\n", "counts = dup_dists[['dist1', 'dist2']].value_counts().sort_index()\n", "counts_2d = counts.reset_index().pivot(index='dist1', columns='dist2', values='count')" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dist20123456789...41424344454647484950
dist1
054050699774175254622458445417489650754846...3342313727722612264625672879279225942602
118570368439092105259926012270208124072438...1748136111681198124710941434123111971120
211672298017161665174516521529157215831795...15621324112010771025967126310511047998
312716294118781764173118271627159317681814...157313741123114410929951435107710891020
412715307023511997195619692010183517842090...1569142911311256117810831383113211381069
\n", "

5 rows × 51 columns

\n", "
" ], "text/plain": [ "dist2 0 1 2 3 4 5 6 7 8 9 ... 41 \\\n", "dist1 ... \n", "0 54050 6997 7417 5254 6224 5844 5417 4896 5075 4846 ... 3342 \n", "1 18570 3684 3909 2105 2599 2601 2270 2081 2407 2438 ... 1748 \n", "2 11672 2980 1716 1665 1745 1652 1529 1572 1583 1795 ... 1562 \n", "3 12716 2941 1878 1764 1731 1827 1627 1593 1768 1814 ... 1573 \n", "4 12715 3070 2351 1997 1956 1969 2010 1835 1784 2090 ... 1569 \n", "\n", "dist2 42 43 44 45 46 47 48 49 50 \n", "dist1 \n", "0 3137 2772 2612 2646 2567 2879 2792 2594 2602 \n", "1 1361 1168 1198 1247 1094 1434 1231 1197 1120 \n", "2 1324 1120 1077 1025 967 1263 1051 1047 998 \n", "3 1374 1123 1144 1092 995 1435 1077 1089 1020 \n", "4 1429 1131 1256 1178 1083 1383 1132 1138 1069 \n", "\n", "[5 rows x 51 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Rows correspond to the distance between the left ends of the pairs, columns to the distance between the right ends\n", "counts_2d.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now we plot the data as a heatmap. We use log scale since the range of values is huge." ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Now we plot\n", "f, ax = plt.subplots()\n", "im = ax.imshow(counts_2d,\n", " norm=LogNorm(),\n", " cmap='rocket_r',\n", " )\n", "cb = plt.colorbar(im, ax=ax)\n", "ax.set(xticks=np.arange(0, 51, 5), yticks=np.arange(0, 51, 5), ylabel='Distance 1 (bp)', xlabel='Distance 2 (bp)')\n", "cb.set_label('Number of duplicates')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Interestingly, there is a strong enrichment of the first columns / top row, which almost doesn't get weaker with increasing distance.\n", "\n", "Probably it corresponds to pairs where one end of the final sequenced fragment is at an unligated restriction site used in the procedure. When this analysis is performed with micro-C data, where the enzyme doesn't cut at specific sequences, this is not observed." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's zoom into the top-left corner. That's where one might observe enrichment of nor precisely matching duplicates." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "f, ax = plt.subplots()\n", "im = ax.imshow(counts_2d.iloc[:20, :20],\n", " norm=LogNorm(),\n", " cmap='rocket_r',\n", " )\n", "cb = plt.colorbar(im, ax=ax)\n", "ax.set(xticks=np.arange(20), yticks=np.arange(20), ylabel='Distance 1 (bp)', xlabel='Distance 2 (bp)')\n", "cb.set_label('Number of duplicates')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "normed = counts_2d / np.sqrt(counts_2d.mean(axis=0).to_numpy() * counts_2d.mean(axis=1).to_numpy()[:, np.newaxis])" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "f, ax = plt.subplots()\n", "im = ax.imshow(normed,\n", " norm=LogNorm(vmin=0.1, vmax=10),\n", " cmap='coolwarm',\n", " )\n", "cb = plt.colorbar(im, ax=ax)\n", "ax.set(xticks=np.arange(0, 51, 5), yticks=np.arange(0, 51, 5), ylabel='Distance 1 (bp)', xlabel='Distance 2 (bp)')\n", "cb.set_label('number of duplicates / expected (product of marginals)')" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "f, ax = plt.subplots()\n", "im = ax.imshow(normed.iloc[:20, :20],\n", " norm=LogNorm(vmin=0.1, vmax=10),\n", " cmap='coolwarm',\n", " )\n", "cb = plt.colorbar(im, ax=ax)\n", "ax.set(xticks=np.arange(20), yticks=np.arange(20), ylabel='Distance 1 (bp)', xlabel='Distance 2 (bp)')\n", "cb.set_label('Number of duplicates / expected (product of marginals)')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" } }, "nbformat": 4, "nbformat_minor": 2 } pairtools-1.1.3/doc/examples/example_pipeline.sh000066400000000000000000000044211474715105500220010ustar00rootroot00000000000000#!/usr/bin/env bash if [ $# -le 3 ] ; then echo "Usage: bash example_pipeline.sh BWA_INDEX FASTQ_1 FASTQ_2 OUTPUT_PREFIX" echo "" echo "A example of a bash pipeline to align the sequencing data from a " echo "single Hi-C experiment." echo "" echo "positional arguments:" echo "" echo "BWA_INDEX The path to a bwa index of the reference genome." echo "CHROM_SIZES The path to a file with chromosome sizes." echo "FASTQ_1 The path to a fastq file with the sequences of " echo " the first side of Hi-C molecules." echo "FASTQ_2 The path to a fastq file with the sequences of " echo " the second side of Hi-C molecules." echo "OUTPUT_PREFIX The prefix to the paths of generated outputs. " echo "" echo "" exit 0 fi set -o errexit set -o nounset set -o pipefail INDEX=$1 CHROM_SIZES=$2 FASTQ1=$3 FASTQ2=$4 OUTPREFIX=$5 N_THREADS=8 UNMAPPED_SAM_PATH=${OUTPREFIX}.unmapped.bam UNMAPPED_PAIRS_PATH=${OUTPREFIX}.unmapped.pairs.gz NODUPS_SAM_PATH=${OUTPREFIX}.nodups.bam NODUPS_PAIRS_PATH=${OUTPREFIX}.nodups.pairs.gz DUPS_SAM_PATH=${OUTPREFIX}.dups.bam DUPS_PAIRS_PATH=${OUTPREFIX}.dups.pairs.gz bwa mem -SP -t "${N_THREADS}" "${INDEX}" "${FASTQ1}" "${FASTQ2}" | { # Classify Hi-C molecules as unmapped/single-sided/multimapped/chimeric/etc # and output one line per read, containing the following, separated by \\v: # * triu-flipped pairs # * read id # * type of a Hi-C molecule # * corresponding sam entries pairtools parse --chroms-path "{CHROM_SIZES}" } | { # Block-sort pairs together with SAM entries pairtools sort --nproc 4 } | { # Remove duplicates, separate mapped and unmapped reads pairtools dedup \ --output \ >( pairtools split \ --output-pairs ${NODUPS_PAIRS_PATH} \ --output-sam ${NODUPS_SAM_PATH} ) \ --output-dups \ >( pairtools markasdup \ | pairtools split \ --output-pairs ${DUPS_PAIRS_PATH} \ --output-sam ${DUPS_SAM_PATH} ) \ --output-unmapped >( pairtools split \ --output-pairs ${UNMAPPED_PAIRS_PATH} \ --output-sam ${UNMAPPED_SAM_PATH} ) } pairtools-1.1.3/doc/examples/example_singlecell_pipeline.sh000066400000000000000000000055271474715105500242120ustar00rootroot00000000000000#!/usr/bin/env bash if [ $# -le 3 ] ; then echo "Usage: bash example_pipeline.sh BWA_INDEX FASTQ_1 FASTQ_2 OUTPUT_PREFIX" echo "" echo "A example of a bash pipeline to align the sequencing data from a " echo "single Hi-C experiment." echo "" echo "positional arguments:" echo "" echo "BWA_INDEX The path to a bwa index of the reference genome." echo "CHROM_SIZES The path to a file with chromosome sizes." echo "FASTQ_1 The path to a fastq file with the sequences of " echo " the first side of Hi-C molecules." echo "FASTQ_2 The path to a fastq file with the sequences of " echo " the second side of Hi-C molecules." echo "OUTPUT_PREFIX The prefix to the paths of generated outputs. " echo "" echo "" exit 0 fi set -o errexit set -o nounset set -o pipefail INDEX=$1 CHROM_SIZES=$2 FASTQ1=$3 FASTQ2=$4 OUTPREFIX=$5 N_THREADS=8 UNMAPPED_SAM_PATH=${OUTPREFIX}.unmapped.bam UNMAPPED_PAIRS_PATH=${OUTPREFIX}.unmapped.pairs.gz NODUPS_SAM_PATH=${OUTPREFIX}.nodups.bam NODUPS_PAIRS_PATH=${OUTPREFIX}.nodups.pairs.gz DUPS_SAM_PATH=${OUTPREFIX}.dups.bam DUPS_PAIRS_PATH=${OUTPREFIX}.dups.pairs.gz LOWFREQPAIRS_SAM_PATH=${OUTPREFIX}.lowfreq.bam LOWFREQPAIRS_PAIRS_PATH=${OUTPREFIX}.lowfreq.pairs.gz HIGHFREQPAIRS_SAM_PATH=${OUTPREFIX}.highfreq.bam HIGHFREQPAIRS_PAIRS_PATH=${OUTPREFIX}.highfreq.pairs.gz bwa mem -SP -t "${N_THREADS}" "${INDEX}" "${FASTQ1}" "${FASTQ2}" | { # Classify Hi-C molecules as unmapped/single-sided/multimapped/chimeric/etc # and output one line per read, containing the following, separated by \\v: # * triu-flipped pairs # * read id # * type of a Hi-C molecule # * corresponding sam entries pairtools parse "{CHROM_SIZES}" } | { # Block-sort pairs together with SAM entries pairtools sort } | { # Set unmapped and ambiguous reads aside pairtools select '(pair_type == "UU") or (pair_type == "UR") or (pair_type == "RU")' \ --output-rest >( pairtools split \ --output-pairs ${UNMAPPED_PAIRS_PATH} \ --output-sam ${UNMAPPED_SAM_PATH} ) } | { # Remove duplicates pairtools dedup \ --output-dups \ >( pairtools markasdup \ | pairtools split \ --output-pairs ${DUPS_PAIRS_PATH} \ --output-sam ${DUPS_SAM_PATH} ) } | { # Remove high frequency interactors pairtools multifilter \ --output \ >( pairtools split \ --output-pairs ${LOWFREQ_PAIRS_PATH} \ --output-sam ${LOWFREQ_SAM_PATH} ) \ --output-high-frequency-interactors \ >( pairtools markasdup \ | pairtools split \ --output-pairs ${HIGHFREQPAIRS_PAIRS_PATH} \ --output-sam ${HIGHFREQPAIRS_SAM_PATH} ) } pairtools-1.1.3/doc/examples/pairtools_phase_walkthrough.ipynb000066400000000000000000003174421474715105500250150ustar00rootroot00000000000000{ "cells": [ { "cell_type": "markdown", "id": "112fe2d5-aaed-4eb1-b3f5-2f5889a9c89f", "metadata": {}, "source": [ "# Pairtools phase walkthrough\n", "\n", "Welcome to the pairtools phase walkthrough! This notebook will guide you through the process of resolving contacts between homologous chromosomes using haplotype-resolved Hi-C analysis.\n", "\n", "## What is haplotype-resolved Hi-C?\n", "\n", "Haplotype-resolved Hi-C distinguishes interactions within individual chromosomes (cis-homolog contacts) from those between homologous chromosomes (trans-homolog contacts). This separation is possible because homologous chromosomes carry variations (e.g. single nucleotide variants, or, SNVs) that can be used to tell them apart.\n", "\n", "The experimental challenge of haplotype-resolved Hi-C is to increase the number of SNVs that are essential to distinguish reads from different chromosomes. This can be done by mating highly diverged homozygous strains and studying their F1 progeny. \n", "\n", "Several studies have successfully leveraged haplotype-resolved Hi-C for novel insights:\n", "1. Erceg et al. (2019) explored chromosome pairing in Drosophila embryos [1].\n", "2. Collombet et al. (2020) studied chromosomal organization during early mammalian embryogenesis [2].\n", "3. Tan et al. 2018 uses available heterozygous positions to infer the 3D structures of single chromosomes by single-cell variant of the protocol Dip-C [3]\n", "4. Duan et al. use dikaryonic nuclei of fungi with 0.7% heterozygosity [4]" ] }, { "attachments": { "62e74fba-c1c1-44b5-a3e2-3699c3cac7ce.png": { "image/png": "" } }, "cell_type": "markdown", "id": "c3795661-e308-44e6-9b0f-3f0396541250", "metadata": {}, "source": [ "Several approaches have been developed to process Hi-C data from haplotype-resolved experiments. In `pairtools`, we implement the approach that was used in Erceg et al. Here is its brief outline:\n", "\n", "1. Create the haplotype-resolved genome. First, we will create a \"concatenated\" reference genome that contains sequences of both homologs of each chromosome. \n", "\n", " - Incorporate known SNVs (usually in .vcf format) into the reference genome using [bcftools](https://samtools.github.io/bcftools/bcftools.html) to create FASTA files with the sequences of both homologs.\n", " - Add suffixes to the name of each homolog that identify the type (`_hap1` or `_hap2`).\n", "\n", "2. Map the Hi-C data to the concatenated reference and parse resulting alignment into Hi-C pairs. Compared to the standard Hi-C pipeline, this step would contain a couple of modifications:\n", " - Make the aligner report two suboptimal alignments (aka the second and the third hit).\n", " - Parse allowing multimappers (mapq 0). \n", " \n", " Note that, upon mapping to the homolog-resolved genome, Hi-C reads will report the identity of their homologue as the suffix of the chromosome name.\n", " \n", "3. Phase the resulting pairs based on the reported suboptimal alignments. \n", "\n", " By checking the scores of two suboptimal alignments, we will distinguish the true multi-mappers from unresolved pairs (i.e. cases when the read aligns to the location with no distinguishing SNV). Phasing will remove the haplotype suffixes from chromosome names and add extra fields to the .pairs file with:\n", "\n", " - '.' (non-resolved)\n", " - '0' (first haplotype) \n", " - '1' (second haplotype)\n", " \n", " Phasing schema: \n", " \n", " ![image.png](attachment:62e74fba-c1c1-44b5-a3e2-3699c3cac7ce.png)\n", "\n", "\n", "4. Post-procesing. Sort and [dedup](#pairtools-dedup) Hi-C pairs and calculate [stats](#Stats), similarly to the standard Hi-C pipeline. " ] }, { "cell_type": "markdown", "id": "9dc8a020-7c4b-471d-9dfd-a5e346f10a27", "metadata": {}, "source": [ "[1] Erceg, J., AlHaj Abed, J., Goloborodko, A., Lajoie, B. R., Fudenberg, G., Abdennur, N., Imakaev, M., McCole, R. B., Nguyen, S. C., Saylor, W., Joyce, E. F., Senaratne, T. N., Hannan, M. A., Nir, G., Dekker, J., Mirny, L. A., & Wu, C. T. (2019). The genome-wide multi-layered architecture of chromosome pairing in early Drosophila embryos. Nature communications, 10(1), 4486. https://doi.org/10.1038/s41467-019-12211-8\n", "\n", "[2] Collombet, S., Ranisavljevic, N., Nagano, T., Varnai, C., Shisode, T., Leung, W., Piolot, T., Galupa, R., Borensztein, M., Servant, N., Fraser, P., Ancelin, K., & Heard, E. (2020). Parental-to-embryo switch of chromosome organization in early embryogenesis. Nature, 580(7801), 142–146. https://doi.org/10.1038/s41586-020-2125-z\n", "\n", "[3] Tan, L., Xing, D., Chang, C. H., Li, H., & Xie, X. S. (2018). Three-dimensional genome structures of single diploid human cells. Science (New York, N.Y.), 361(6405), 924–928. https://doi.org/10.1126/science.aat5641\n", "\n", "[4] Duan, H., Jones, A. W., Hewitt, T., Mackenzie, A., Hu, Y., Sharp, A., Lewis, D., Mago, R., Upadhyaya, N. M., Rathjen, J. P., Stone, E. A., Schwessinger, B., Figueroa, M., Dodds, P. N., Periyannan, S., & Sperschneider, J. (2022). Physical separation of haplotypes in dikaryons allows benchmarking of phasing accuracy in Nanopore and HiFi assemblies with Hi-C data. Genome biology, 23(1), 84. https://doi.org/10.1186/s13059-022-02658-2\n" ] }, { "cell_type": "markdown", "id": "a0b4c550-8168-4780-82e0-1e18493135af", "metadata": {}, "source": [ "We will test this pipeline on a sample from Collombet et al. 2019 [2], which is a great example of single-cell Hi-C obtained on mice hybrids of highly heterozygous parents. For the sake of brevity, we will focus on just one cell from the dataset, GSM3691125_2CSE_70. \n", "\n", "Note that, because this is a single-cell sample, the properties of this dataset may differ from what you may obtain on bulk data. " ] }, { "cell_type": "markdown", "id": "5ab026af-fe25-4a70-82ef-52af6fb25371", "metadata": {}, "source": [ "## Create the homolog-resolved genome\n", "\n", "To phase input reads, we need to map the data to the concatenated genome with two haplotypes. \n", "Below, we will generate such genome in several steps. You will need a reference genome, and one or two lists of mutations to instroduce to the reference.\n", "\n", "#### Download reference genome" ] }, { "cell_type": "code", "execution_count": 5, "id": "9ec0743f-a299-43f0-b568-7e963ed95df8", "metadata": { "tags": [ "hide-output" ] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2024-03-18 13:18:25-- https://ftp.ensembl.org/pub/release-68/fasta/mus_musculus/dna/Mus_musculus.GRCm38.68.dna_sm.toplevel.fa.gz\n", "Resolving ftp.ensembl.org (ftp.ensembl.org)... 193.62.193.169\n", "Connecting to ftp.ensembl.org (ftp.ensembl.org)|193.62.193.169|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 861993605 (822M) [application/x-gzip]\n", "Saving to: ‘Mus_musculus.GRCm38.68.dna_sm.toplevel.fa.gz’\n", "\n", "Mus_musculus.GRCm38 100%[===================>] 822.06M 2.43MB/s in 5m 35s \n", "\n", "2024-03-18 13:24:01 (2.45 MB/s) - ‘Mus_musculus.GRCm38.68.dna_sm.toplevel.fa.gz’ saved [861993605/861993605]\n", "\n" ] } ], "source": [ "! wget https://ftp.ensembl.org/pub/release-68/fasta/mus_musculus/dna/Mus_musculus.GRCm38.68.dna_sm.toplevel.fa.gz" ] }, { "cell_type": "markdown", "id": "6f31f657", "metadata": {}, "source": [ "The genome must be indexed for subsequent steps. However, indexing requires the genome to be compressed with bgzip." ] }, { "cell_type": "code", "execution_count": 13, "id": "79e6471d", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "zcat Mus_musculus.GRCm38.68.dna_sm.toplevel.fa.gz | bgzip -c > Mus_musculus.GRCm38.68.dna_sm.toplevel.fa.bgz\n", "samtools faidx Mus_musculus.GRCm38.68.dna_sm.toplevel.fa.bgz\n", "rm Mus_musculus.GRCm38.68.dna_sm.toplevel.fa.gz" ] }, { "cell_type": "markdown", "id": "7683d63a-bc2f-4c49-8371-fd57f4111072", "metadata": {}, "source": [ "#### Download .vcf file with variants" ] }, { "cell_type": "code", "execution_count": 4, "id": "4a347a3b-2ee7-4824-a209-8377edddf640", "metadata": { "tags": [ "hide-output" ] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2024-03-18 13:18:14-- https://ftp.ebi.ac.uk/pub/databases/mousegenomes/REL-1505-SNPs_Indels/strain_specific_vcfs/CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Resolving ftp.ebi.ac.uk (ftp.ebi.ac.uk)... 193.62.193.165\n", "Connecting to ftp.ebi.ac.uk (ftp.ebi.ac.uk)|193.62.193.165|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 785849127 (749M) [application/x-gzip]\n", "Saving to: ‘CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz.2’\n", "\n", " CAST_EiJ.mgp.v5.sn 0%[ ] 4.82M 2.42MB/s ^C\n" ] } ], "source": [ "! wget https://ftp.ebi.ac.uk/pub/databases/mousegenomes/REL-1505-SNPs_Indels/strain_specific_vcfs/CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz" ] }, { "cell_type": "markdown", "id": "88363fb6-c233-4a07-a208-a5e5a2679038", "metadata": {}, "source": [ "#### Index the variants" ] }, { "cell_type": "code", "execution_count": 6, "id": "84cebce3-29c6-42df-98bf-5388a51fb268", "metadata": {}, "outputs": [], "source": [ "! bcftools index CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz" ] }, { "cell_type": "markdown", "id": "2dd599a0-64f9-4c8b-b78f-8eabf49c052e", "metadata": {}, "source": [ "#### Introduce the variants into the genome" ] }, { "cell_type": "code", "execution_count": 16, "id": "848c9fe5-a632-4139-ba56-60871d8d1eb4", "metadata": { "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Warning: Sequence \"JH584295.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584292.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456368.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456396.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456359.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456382.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456392.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456394.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456390.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456387.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456381.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456370.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456372.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456389.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456378.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456360.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456385.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456383.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456213.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456239.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456367.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456366.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456393.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456216.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456379.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584304.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456212.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584302.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584303.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456210.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456219.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584300.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584298.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584294.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456354.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584296.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584297.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456221.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584293.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456350.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456211.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584301.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456233.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584299.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584295.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584292.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456368.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456396.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456359.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456382.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456392.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456394.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456390.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456387.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456381.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456370.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456372.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456389.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456378.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456360.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456385.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456383.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456213.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456239.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456367.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456366.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456393.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456216.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456379.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584304.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456212.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584302.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584303.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456210.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456219.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584300.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584298.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584294.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456354.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584296.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584297.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456221.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584293.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456350.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456211.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584301.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"GL456233.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n", "Warning: Sequence \"JH584299.1\" not in CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz\n" ] } ], "source": [ "%%bash\n", "bcftools consensus --fasta-ref Mus_musculus.GRCm38.68.dna_sm.toplevel.fa.bgz \\\n", " --haplotype 1 CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz | sed -E 's/(>[^[:space:]]+).*/\\1_hap1/g' | bgzip -c > GRCm38_EiJ_snpsonly_hap1.fa.gz\n", "\n", "bcftools consensus --fasta-ref Mus_musculus.GRCm38.68.dna_sm.toplevel.fa.bgz \\\n", " --haplotype 2 CAST_EiJ.mgp.v5.snps.dbSNP142.vcf.gz | sed -E 's/(>[^[:space:]]+).*/\\1_hap2/g' | bgzip -c > GRCm38_EiJ_snpsonly_hap2.fa.gz\n" ] }, { "cell_type": "markdown", "id": "af1f6027", "metadata": {}, "source": [ "Note that some of these inserted variants may change the total number of nucleotides. This would introduce differences between the coordinate systems of two homologs and complicate downstream analyses. Thus, to make your analyses simpler.you may want insert only single-nuleotide variants and exclude \n", "by using `--include` parameter of `bcftools consensus` (e.g. `--include '(STRLEN(REF)=1) & (STRLEN(ALT[0])=1)'`).\n", "This will make sure that the genomic coorditates correspond between the haplotypes. \n", "Correspondence of coordinates is not a requirement, but might be important for downstream analysis. " ] }, { "cell_type": "markdown", "id": "dfd7c4cb-31dd-43df-8510-95fd0ff9f78f", "metadata": {}, "source": [ "#### Create the bwa index of homolog-resolved genome" ] }, { "cell_type": "markdown", "id": "99d28f6f-b754-4a95-95d5-9e5e51d14571", "metadata": {}, "source": [ "Concatenate the genomes of two homologs and index them together. " ] }, { "cell_type": "code", "execution_count": 17, "id": "92ff8a4f-2115-4131-8c4a-cbd040dcdffb", "metadata": { "tags": [ "hide-output" ] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[bwa_index] Pack FASTA... 62.34 sec\n", "[bwa_index] Construct BWT for the packed sequence...\n", "[BWTIncCreate] textLength=10923487096, availableWord=780616804\n", "[BWTIncConstructFromPacked] 10 iterations done. 99999992 characters processed.\n", "[BWTIncConstructFromPacked] 20 iterations done. 199999992 characters processed.\n", "[BWTIncConstructFromPacked] 30 iterations done. 299999992 characters processed.\n", "[BWTIncConstructFromPacked] 40 iterations done. 399999992 characters processed.\n", "[BWTIncConstructFromPacked] 50 iterations done. 499999992 characters processed.\n", "[BWTIncConstructFromPacked] 60 iterations done. 599999992 characters processed.\n", "[BWTIncConstructFromPacked] 70 iterations done. 699999992 characters processed.\n", "[BWTIncConstructFromPacked] 80 iterations done. 799999992 characters processed.\n", "[BWTIncConstructFromPacked] 90 iterations done. 899999992 characters processed.\n", "[BWTIncConstructFromPacked] 100 iterations done. 999999992 characters processed.\n", "[BWTIncConstructFromPacked] 110 iterations done. 1099999992 characters processed.\n", "[BWTIncConstructFromPacked] 120 iterations done. 1199999992 characters processed.\n", "[BWTIncConstructFromPacked] 130 iterations done. 1299999992 characters processed.\n", "[BWTIncConstructFromPacked] 140 iterations done. 1399999992 characters processed.\n", "[BWTIncConstructFromPacked] 150 iterations done. 1499999992 characters processed.\n", "[BWTIncConstructFromPacked] 160 iterations done. 1599999992 characters processed.\n", "[BWTIncConstructFromPacked] 170 iterations done. 1699999992 characters processed.\n", "[BWTIncConstructFromPacked] 180 iterations done. 1799999992 characters processed.\n", "[BWTIncConstructFromPacked] 190 iterations done. 1899999992 characters processed.\n", "[BWTIncConstructFromPacked] 200 iterations done. 1999999992 characters processed.\n", "[BWTIncConstructFromPacked] 210 iterations done. 2099999992 characters processed.\n", "[BWTIncConstructFromPacked] 220 iterations done. 2199999992 characters processed.\n", "[BWTIncConstructFromPacked] 230 iterations done. 2299999992 characters processed.\n", "[BWTIncConstructFromPacked] 240 iterations done. 2399999992 characters processed.\n", "[BWTIncConstructFromPacked] 250 iterations done. 2499999992 characters processed.\n", "[BWTIncConstructFromPacked] 260 iterations done. 2599999992 characters processed.\n", "[BWTIncConstructFromPacked] 270 iterations done. 2699999992 characters processed.\n", "[BWTIncConstructFromPacked] 280 iterations done. 2799999992 characters processed.\n", "[BWTIncConstructFromPacked] 290 iterations done. 2899999992 characters processed.\n", "[BWTIncConstructFromPacked] 300 iterations done. 2999999992 characters processed.\n", "[BWTIncConstructFromPacked] 310 iterations done. 3099999992 characters processed.\n", "[BWTIncConstructFromPacked] 320 iterations done. 3199999992 characters processed.\n", "[BWTIncConstructFromPacked] 330 iterations done. 3299999992 characters processed.\n", "[BWTIncConstructFromPacked] 340 iterations done. 3399999992 characters processed.\n", "[BWTIncConstructFromPacked] 350 iterations done. 3499999992 characters processed.\n", "[BWTIncConstructFromPacked] 360 iterations done. 3599999992 characters processed.\n", "[BWTIncConstructFromPacked] 370 iterations done. 3699999992 characters processed.\n", "[BWTIncConstructFromPacked] 380 iterations done. 3799999992 characters processed.\n", "[BWTIncConstructFromPacked] 390 iterations done. 3899999992 characters processed.\n", "[BWTIncConstructFromPacked] 400 iterations done. 3999999992 characters processed.\n", "[BWTIncConstructFromPacked] 410 iterations done. 4099999992 characters processed.\n", "[BWTIncConstructFromPacked] 420 iterations done. 4199999992 characters processed.\n", "[BWTIncConstructFromPacked] 430 iterations done. 4299999992 characters processed.\n", "[BWTIncConstructFromPacked] 440 iterations done. 4399999992 characters processed.\n", "[BWTIncConstructFromPacked] 450 iterations done. 4499999992 characters processed.\n", "[BWTIncConstructFromPacked] 460 iterations done. 4599999992 characters processed.\n", "[BWTIncConstructFromPacked] 470 iterations done. 4699999992 characters processed.\n", "[BWTIncConstructFromPacked] 480 iterations done. 4799999992 characters processed.\n", "[BWTIncConstructFromPacked] 490 iterations done. 4899999992 characters processed.\n", "[BWTIncConstructFromPacked] 500 iterations done. 4999999992 characters processed.\n", "[BWTIncConstructFromPacked] 510 iterations done. 5099999992 characters processed.\n", "[BWTIncConstructFromPacked] 520 iterations done. 5199999992 characters processed.\n", "[BWTIncConstructFromPacked] 530 iterations done. 5299999992 characters processed.\n", "[BWTIncConstructFromPacked] 540 iterations done. 5399999992 characters processed.\n", "[BWTIncConstructFromPacked] 550 iterations done. 5499999992 characters processed.\n", "[BWTIncConstructFromPacked] 560 iterations done. 5599999992 characters processed.\n", "[BWTIncConstructFromPacked] 570 iterations done. 5699999992 characters processed.\n", "[BWTIncConstructFromPacked] 580 iterations done. 5799999992 characters processed.\n", "[BWTIncConstructFromPacked] 590 iterations done. 5899999992 characters processed.\n", "[BWTIncConstructFromPacked] 600 iterations done. 5999999992 characters processed.\n", "[BWTIncConstructFromPacked] 610 iterations done. 6099999992 characters processed.\n", "[BWTIncConstructFromPacked] 620 iterations done. 6199999992 characters processed.\n", "[BWTIncConstructFromPacked] 630 iterations done. 6299999992 characters processed.\n", "[BWTIncConstructFromPacked] 640 iterations done. 6399999992 characters processed.\n", "[BWTIncConstructFromPacked] 650 iterations done. 6499999992 characters processed.\n", "[BWTIncConstructFromPacked] 660 iterations done. 6599999992 characters processed.\n", "[BWTIncConstructFromPacked] 670 iterations done. 6699999992 characters processed.\n", "[BWTIncConstructFromPacked] 680 iterations done. 6799999992 characters processed.\n", "[BWTIncConstructFromPacked] 690 iterations done. 6899999992 characters processed.\n", "[BWTIncConstructFromPacked] 700 iterations done. 6999999992 characters processed.\n", "[BWTIncConstructFromPacked] 710 iterations done. 7099999992 characters processed.\n", "[BWTIncConstructFromPacked] 720 iterations done. 7199999992 characters processed.\n", "[BWTIncConstructFromPacked] 730 iterations done. 7299999992 characters processed.\n", "[BWTIncConstructFromPacked] 740 iterations done. 7399999992 characters processed.\n", "[BWTIncConstructFromPacked] 750 iterations done. 7499999992 characters processed.\n", "[BWTIncConstructFromPacked] 760 iterations done. 7599999992 characters processed.\n", "[BWTIncConstructFromPacked] 770 iterations done. 7699999992 characters processed.\n", "[BWTIncConstructFromPacked] 780 iterations done. 7799999992 characters processed.\n", "[BWTIncConstructFromPacked] 790 iterations done. 7899999992 characters processed.\n", "[BWTIncConstructFromPacked] 800 iterations done. 7999999992 characters processed.\n", "[BWTIncConstructFromPacked] 810 iterations done. 8099999992 characters processed.\n", "[BWTIncConstructFromPacked] 820 iterations done. 8199999992 characters processed.\n", "[BWTIncConstructFromPacked] 830 iterations done. 8299999992 characters processed.\n", "[BWTIncConstructFromPacked] 840 iterations done. 8399999992 characters processed.\n", "[BWTIncConstructFromPacked] 850 iterations done. 8499999992 characters processed.\n", "[BWTIncConstructFromPacked] 860 iterations done. 8599999992 characters processed.\n", "[BWTIncConstructFromPacked] 870 iterations done. 8699999992 characters processed.\n", "[BWTIncConstructFromPacked] 880 iterations done. 8799999992 characters processed.\n", "[BWTIncConstructFromPacked] 890 iterations done. 8899999992 characters processed.\n", "[BWTIncConstructFromPacked] 900 iterations done. 8999999992 characters processed.\n", "[BWTIncConstructFromPacked] 910 iterations done. 9099999992 characters processed.\n", "[BWTIncConstructFromPacked] 920 iterations done. 9199999992 characters processed.\n", "[BWTIncConstructFromPacked] 930 iterations done. 9299999992 characters processed.\n", "[BWTIncConstructFromPacked] 940 iterations done. 9399999992 characters processed.\n", "[BWTIncConstructFromPacked] 950 iterations done. 9499999992 characters processed.\n", "[BWTIncConstructFromPacked] 960 iterations done. 9599999992 characters processed.\n", "[BWTIncConstructFromPacked] 970 iterations done. 9699999992 characters processed.\n", "[BWTIncConstructFromPacked] 980 iterations done. 9799999992 characters processed.\n", "[BWTIncConstructFromPacked] 990 iterations done. 9899999992 characters processed.\n", "[BWTIncConstructFromPacked] 1000 iterations done. 9999999992 characters processed.\n", "[BWTIncConstructFromPacked] 1010 iterations done. 10099999992 characters processed.\n", "[BWTIncConstructFromPacked] 1020 iterations done. 10199999992 characters processed.\n", "[BWTIncConstructFromPacked] 1030 iterations done. 10298557624 characters processed.\n", "[BWTIncConstructFromPacked] 1040 iterations done. 10387491112 characters processed.\n", "[BWTIncConstructFromPacked] 1050 iterations done. 10466531432 characters processed.\n", "[BWTIncConstructFromPacked] 1060 iterations done. 10536778632 characters processed.\n", "[BWTIncConstructFromPacked] 1070 iterations done. 10599210536 characters processed.\n", "[BWTIncConstructFromPacked] 1080 iterations done. 10654696152 characters processed.\n", "[BWTIncConstructFromPacked] 1090 iterations done. 10704007912 characters processed.\n", "[BWTIncConstructFromPacked] 1100 iterations done. 10747832296 characters processed.\n", "[BWTIncConstructFromPacked] 1110 iterations done. 10786779480 characters processed.\n", "[BWTIncConstructFromPacked] 1120 iterations done. 10821391832 characters processed.\n", "[BWTIncConstructFromPacked] 1130 iterations done. 10852151336 characters processed.\n", "[BWTIncConstructFromPacked] 1140 iterations done. 10879486440 characters processed.\n", "[BWTIncConstructFromPacked] 1150 iterations done. 10903777896 characters processed.\n", "[BWTIncConstructFromPacked] 1160 iterations done. 10923487096 characters processed.\n", "[bwt_gen] Finished constructing BWT in 1160 iterations.\n", "[bwa_index] 4810.53 seconds elapse.\n", "[bwa_index] Update BWT... 27.18 sec\n", "[bwa_index] Pack forward-only FASTA... 48.19 sec\n", "[bwa_index] Construct SA from BWT and Occ... 1602.31 sec\n", "[main] Version: 0.7.17-r1188\n", "[main] CMD: bwa index GRCm38_EiJ_snpsonly.fa.gz\n", "[main] Real time: 6563.846 sec; CPU: 6550.547 sec\n" ] } ], "source": [ "%%bash\n", "cat GRCm38_EiJ_snpsonly_hap1.fa.gz GRCm38_EiJ_snpsonly_hap2.fa.gz > GRCm38_EiJ_snpsonly.fa.gz\n", "bwa index GRCm38_EiJ_snpsonly.fa.gz" ] }, { "cell_type": "markdown", "id": "22017c7e-71af-4ef3-8237-364402e896fb", "metadata": {}, "source": [ "Generate chromosome sizes file: " ] }, { "cell_type": "code", "execution_count": 18, "id": "69489018-edde-4aa0-b7ac-7c7b4351764c", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "faidx GRCm38_EiJ_snpsonly.fa.gz -i chromsizes > GRCm38_EiJ_snpsonly.chromsizes" ] }, { "cell_type": "markdown", "id": "bd264406-be74-4060-9798-e18040c44889", "metadata": { "tags": [] }, "source": [ "## Download data\n", "\n", "Uncomment the `--minSpotId` and `--maxSpotId` if you want to run the small test instead of full run." ] }, { "cell_type": "code", "execution_count": 2, "id": "f4e310c0-2d16-4e7d-87d7-44feec8e6256", "metadata": {}, "outputs": [], "source": [ "! fastq-dump SRR8811373 --gzip --split-spot --split-3 # --minSpotId 0 --maxSpotId 1000000" ] }, { "cell_type": "code", "execution_count": 3, "id": "571e94fb-3dec-4042-9e21-6c39802ed8df", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SRR8811373_1.fastq.gz SRR8811373_2.fastq.gz\n" ] } ], "source": [ "! ls SRR8811373*.fastq.gz" ] }, { "cell_type": "markdown", "id": "a8a9fd19", "metadata": {}, "source": [ "## Map data with bwa mem to diploid genome\n", "\n", "In homolog-resolved Hi-C experiments, reads are first aligned against the reference genome and then parsed with pairtools, similar to the standard pairtools-based Hi-C pipeline. However, an additional challenge arises in distinguishing between un-phaseable reads (reads that map equally well to two homologous locations on two homologous chromosomes) and multimappers (reads that map to repeats in the genome).\n", "\n", "To differentiate between these cases, we can examine the top three candidate alignments for each read:\n", "- If the top two alignments map to two homologs of the same chromosome with identical scores, and the third-best alignment has a lower score, the read is considered un-phaseable.\n", "- Conversely, if the third-best alignment has the same score as the first two, the read is classified as a multimapper.\n", "\n", "bwa mem provides information on the top three alignments, but the exact usage depends on the version:\n", "\n", "1. Regular bwa binary (using XA alignment tag):\n", "\n", "The latest official bwa release (0.7.17-r1188, as of March 2024) includes the XA tag, which lists alternative (secondary) alignments, their CIGARs, and the number of mismatches relative to the reference. pairtools parse can parse this tag along with other alignment characteristics (AS, XS, and NM tags) to infer the scores of the top three alignments. In this case, no extra mapping flags are required besides the standard Hi-C flags -SP:\n", "\n", "```bash\n", "bwa mem -SP -t 5 mm10_EiJ_snpsonly.fa.gz est.1.fastq.gz test.2.fastq.gz | samtools view -@ 8 -b > mapped.XA.bam\n", "```\n", "\n", "2. Cutting-edge bwa version from GitHub (using XB alignment tag):\n", "\n", "The latest unreleased version of bwa, available on GitHub, can directly report the scores of all secondary alignments using the XB tag, providing more precise results. To use this option, download bwa's source code, compile it manually, and align reads with an additional -u flag:\n", "\n", "```bash\n", "./bwa/bwa mem -SPu -t 5 mm10_EiJ_snpsonly.fa.gz test.1.fastq.gz test.2.fastq.gz | samtools view -@ 8 -b > mapped.XB.bam\n", "```\n", "\n", "In this tutorial, we will use the first, simpler option.\n", "\n", "Note: [bwa-mem2](https://github.com/bwa-mem2/bwa-mem2) produces [very similar results to bwa mem](https://github.com/open2c/pairtools/discussions/118) while being [x2-3 times faster](https://github.com/bwa-mem2/bwa-mem2#performance). pairtools are compatible with bwa-mem2, and its use is highly recommended for improved performance." ] }, { "cell_type": "code", "execution_count": 4, "id": "12f8a13d-fba6-45f7-8112-291fb883d7d0", "metadata": { "tags": [ "hide-output" ] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[M::bwa_idx_load_from_disk] read 0 ALT contigs\n", "[M::process] read 331126 sequences (50000026 bp)...\n", "[M::process] read 127590 sequences (19265939 bp)...\n", "[M::mem_pestat] # candidate unique pairs for (FF, FR, RF, RR): (1, 238, 0, 0)\n", "[M::mem_pestat] skip orientation FF as there are not enough pairs\n", "[M::mem_pestat] analyzing insert size distribution for orientation FR...\n", "[M::mem_pestat] (25, 50, 75) percentile: (70, 98, 141)\n", "[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 283)\n", "[M::mem_pestat] mean and std.dev: (109.44, 53.37)\n", "[M::mem_pestat] low and high boundaries for proper pairs: (1, 354)\n", "[M::mem_pestat] skip orientation RF as there are not enough pairs\n", "[M::mem_pestat] skip orientation RR as there are not enough pairs\n", "[M::mem_process_seqs] Processed 331126 reads in 276.310 CPU sec, 55.358 real sec\n", "[W::bseq_read] the 1st file has fewer sequences.\n", "[M::mem_pestat] # candidate unique pairs for (FF, FR, RF, RR): (0, 98, 0, 0)\n", "[M::mem_pestat] skip orientation FF as there are not enough pairs\n", "[M::mem_pestat] analyzing insert size distribution for orientation FR...\n", "[M::mem_pestat] (25, 50, 75) percentile: (73, 107, 164)\n", "[M::mem_pestat] low and high boundaries for computing mean and std.dev: (1, 346)\n", "[M::mem_pestat] mean and std.dev: (118.69, 58.74)\n", "[M::mem_pestat] low and high boundaries for proper pairs: (1, 437)\n", "[M::mem_pestat] skip orientation RF as there are not enough pairs\n", "[M::mem_pestat] skip orientation RR as there are not enough pairs\n", "[mem_sam_pe] paired reads have different names: \"SRR8811373.22935\", \"SRR8811373.229358\"\n", "\n" ] } ], "source": [ "%%bash\n", "bwa mem -SP -t 5 GRCm38_EiJ_snpsonly.fa.gz SRR8811373_1.fastq.gz SRR8811373_2.fastq.gz \\\n", " | samtools view -@ 8 -b > mapped.XA.bam" ] }, { "cell_type": "markdown", "id": "3bce4691-6268-4885-b8e0-1933a561d4b5", "metadata": {}, "source": [ "## pairtools parse\n", "\n", "In order to be phased, parsed Hi-C pairs need to (a) be parsed without mapq filtering and (b) contain a few additional tags (XA,NM,AS,XS). \n", "The former modification (a) is needed, because, the default `--min-mapq` value of 1 removes all multiply mapped sequences. This also removes all un-phaseable reads, as they map equally well to both homologs and thus have mapq of 0. Since we would like to keep un-phaseable reads, we need to set --min-mapq 0." ] }, { "cell_type": "code", "execution_count": 5, "id": "efc63459-aa2f-44f5-804e-a2346d2b7820", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "[E::idx_find_and_load] Could not retrieve index file for 'mapped.XA.bam'\n" ] } ], "source": [ "%%bash\n", "pairtools parse --min-mapq 0 --add-columns XA,NM,AS,XS --drop-sam --walks-policy all \\\n", " -c GRCm38_EiJ_snpsonly.chromsizes mapped.XA.bam -o unphased.XA.pairs.gz" ] }, { "cell_type": "markdown", "id": "c39c62f0", "metadata": {}, "source": [ "count the number of pairs in the output file" ] }, { "cell_type": "code", "execution_count": 7, "id": "e1788b32", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "253813\n" ] } ], "source": [ "!zcat unphased.XA.pairs.gz | grep -v ^\\# | wc -l" ] }, { "cell_type": "markdown", "id": "c90ff16b-bb5b-4ceb-8fe3-feeae8ada021", "metadata": {}, "source": [ "## pairtools phase\n", "\n", "Phasing will remove the tags \"\\_hap1\" and \"\\_hap2\" from chromosome names and add a separate field for the phase:" ] }, { "cell_type": "code", "execution_count": 8, "id": "6c8deaee-cb68-4b53-b306-bf223523ab45", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "pairtools phase --phase-suffixes _hap1 _hap2 --tag-mode XA --clean-output unphased.XA.pairs.gz -o phased.XA.pairs.gz" ] }, { "cell_type": "markdown", "id": "c17443ec-b647-4818-aced-bdc686109396", "metadata": {}, "source": [ "## pairtools dedup\n", "\n", "Sort prior to dedup: " ] }, { "cell_type": "code", "execution_count": 9, "id": "6aabbc13-a8d4-43f2-b388-62e7b3b576ab", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "pairtools sort phased.XA.pairs.gz --nproc 10 -o phased.sorted.XA.pairs.gz" ] }, { "cell_type": "markdown", "id": "84d0442c-ba94-4571-8c89-44067acecb47", "metadata": {}, "source": [ "Deduplication now should take additional columns with phases into account: " ] }, { "cell_type": "code", "execution_count": 10, "id": "9fd3b266-4faa-4fc0-974d-b0ca9bbeb961", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "pairtools dedup --mark-dups --extra-col-pair phase1 phase2 \\\n", " --output-dups - --output-unmapped - --output-stats phased.XA.dedup.stats \\\n", " -o phased.sorted.XA.nodup.pairs.gz phased.sorted.XA.pairs.gz" ] }, { "cell_type": "code", "execution_count": 17, "id": "0e485084", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total\t232761\n", "total_unmapped\t29132\n", "total_single_sided_mapped\t44444\n", "total_mapped\t159185\n", "total_dups\t5219\n", "total_nodups\t153966\n", "cis\t17712\n", "trans\t136254\n", "pair_types/MM\t21281\n", "pair_types/NM\t7241\n", "pair_types/NN\t610\n", "pair_types/NU\t19507\n", "pair_types/MU\t22736\n", "pair_types/UU\t153966\n", "pair_types/UM\t2201\n", "pair_types/DD\t5219\n", "cis_1kb+\t9470\n", "cis_2kb+\t9225\n", "cis_4kb+\t8839\n", "cis_10kb+\t8161\n", "cis_20kb+\t7590\n", "cis_40kb+\t7162\n", "summary/frac_cis\t0.11503838509800865\n", "summary/frac_cis_1kb+\t0.06150708598002156\n", "summary/frac_cis_2kb+\t0.05991582557187951\n", "summary/frac_cis_4kb+\t0.05740877856150059\n", "summary/frac_cis_10kb+\t0.053005208942234\n", "summary/frac_cis_20kb+\t0.049296597950196794\n", "summary/frac_cis_40kb+\t0.046516763441279245\n", "summary/frac_dups\t0.032785752426422086\n", "summary/complexity_naive\t2374298.3333298806\n", "chrom_freq/1/1\t4333\n", "chrom_freq/10/10\t7537\n", "chrom_freq/1/6\t173\n", "chrom_freq/1/7\t121\n" ] } ], "source": [ "!cat phased.XA.dedup.stats | head -35" ] }, { "cell_type": "markdown", "id": "d7ae3575-aef8-4a8b-9707-b37627653ba9", "metadata": {}, "source": [ "Dedup might generate warning that phase columns now contain mixed data types ('.' alongside with 0 and 1). This warning is inherited from reading by reading the pairs file by pandas." ] }, { "cell_type": "markdown", "id": "89f9d829-3f79-49b4-b74d-8bca732b8a44", "metadata": {}, "source": [ "## Stats\n", "\n", "First, filter different types of reads:" ] }, { "cell_type": "code", "execution_count": 30, "id": "727a9d2b-5977-4763-81e5-64589c067688", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "pairtools select '(phase1==\"0\") and (phase2==\"0\")' phased.sorted.XA.nodup.pairs.gz -o phased.XA.phase0.pairs.gz\n", "pairtools select '(phase1==\"1\") and (phase2==\"1\")' phased.sorted.XA.nodup.pairs.gz -o phased.XA.phase1.pairs.gz\n", "pairtools select '(phase1==\".\") or (phase2==\".\")' phased.sorted.XA.nodup.pairs.gz -o phased.XA.unphased.pairs.gz\n", "pairtools select '(phase1!=phase2) and (phase1!=\".\") and (phase2!=\".\") and (phase1!=\"!\") and (phase2!=\"!\")' phased.sorted.XA.nodup.pairs.gz \\\n", " -o phased.XA.trans-phase.pairs.gz" ] }, { "cell_type": "markdown", "id": "916a5ca1-e549-4501-82d2-8a6e0645b864", "metadata": {}, "source": [ "Calculate stats for these different types:" ] }, { "cell_type": "code", "execution_count": 31, "id": "1172f899-41d6-4ca2-ab21-a283340011f8", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "WARNING:py.warnings:/users/anton.goloborodko/src/pairtools/pairtools/cli/stats.py:192: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", " for chunk in pd.read_table(body_stream, names=cols, chunksize=100_000):\n", "\n", "WARNING:py.warnings:/users/anton.goloborodko/src/pairtools/pairtools/cli/stats.py:192: DtypeWarning: Columns (1) have mixed types. Specify dtype option on import or set low_memory=False.\n", " for chunk in pd.read_table(body_stream, names=cols, chunksize=100_000):\n", "\n", "WARNING:py.warnings:/users/anton.goloborodko/src/pairtools/pairtools/lib/stats.py:880: RuntimeWarning: divide by zero encountered in double_scalars\n", " complexity = float(nseq / seq_to_complexity) # clean np.int64 data type\n", "\n" ] } ], "source": [ "%%bash\n", "pairtools stats phased.XA.phase0.pairs.gz -o phased.XA.phase0.stats\n", "pairtools stats phased.XA.phase1.pairs.gz -o phased.XA.phase1.stats\n", "pairtools stats phased.XA.unphased.pairs.gz -o phased.XA.unphased.stats\n", "pairtools stats phased.XA.trans-phase.pairs.gz -o phased.XA.trans-phase.stats" ] }, { "cell_type": "markdown", "id": "71a53db7", "metadata": {}, "source": [ "These stats show that rather few reads end up being phased, and that the vast majority of reads are unphased. \n", "\n", "Furthermore, the number of phased reads is comparable between the two haplotypes. The minor mismatch rate is likely to be caused by imperfect annotation of SNVs." ] }, { "cell_type": "code", "execution_count": 32, "id": "2567ccea", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total\t574\n", "total_unmapped\t0\n", "total_single_sided_mapped\t0\n", "total_mapped\t574\n", "total_dups\t13\n", "total_nodups\t561\n", "cis\t535\n", "trans\t26\n", "pair_types/UU\t561\n", "pair_types/DD\t13\n", "cis_1kb+\t68\n", "cis_2kb+\t67\n", "cis_4kb+\t60\n", "cis_10kb+\t56\n", "cis_20kb+\t48\n", "cis_40kb+\t42\n", "summary/frac_cis\t0.9536541889483066\n", "summary/frac_cis_1kb+\t0.12121212121212122\n", "summary/frac_cis_2kb+\t0.11942959001782531\n", "summary/frac_cis_4kb+\t0.10695187165775401\n" ] } ], "source": [ "!cat phased.XA.phase0.stats | head -20" ] }, { "cell_type": "code", "execution_count": 1, "id": "8f521224", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total\t485\n", "total_unmapped\t0\n", "total_single_sided_mapped\t0\n", "total_mapped\t485\n", "total_dups\t9\n", "total_nodups\t476\n", "cis\t394\n", "trans\t82\n", "pair_types/UU\t476\n", "pair_types/DD\t9\n", "cis_1kb+\t53\n", "cis_2kb+\t51\n", "cis_4kb+\t50\n", "cis_10kb+\t44\n", "cis_20kb+\t41\n", "cis_40kb+\t38\n", "summary/frac_cis\t0.8277310924369747\n", "summary/frac_cis_1kb+\t0.11134453781512606\n", "summary/frac_cis_2kb+\t0.10714285714285714\n", "summary/frac_cis_4kb+\t0.10504201680672269\n" ] } ], "source": [ "!cat phased.XA.phase1.stats | head -20" ] }, { "cell_type": "code", "execution_count": 33, "id": "3bb8d589", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total\t241\n", "total_unmapped\t0\n", "total_single_sided_mapped\t0\n", "total_mapped\t241\n", "total_dups\t0\n", "total_nodups\t241\n", "cis\t177\n", "trans\t64\n", "pair_types/UU\t241\n", "cis_1kb+\t85\n", "cis_2kb+\t83\n", "cis_4kb+\t81\n", "cis_10kb+\t75\n", "cis_20kb+\t70\n", "cis_40kb+\t64\n", "summary/frac_cis\t0.7344398340248963\n", "summary/frac_cis_1kb+\t0.35269709543568467\n", "summary/frac_cis_2kb+\t0.34439834024896265\n", "summary/frac_cis_4kb+\t0.3360995850622407\n", "summary/frac_cis_10kb+\t0.3112033195020747\n" ] } ], "source": [ "!cat phased.XA.trans-phase.stats | head -20" ] }, { "cell_type": "code", "execution_count": 34, "id": "29e85a23", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "total\t200077\n", "total_unmapped\t0\n", "total_single_sided_mapped\t42192\n", "total_mapped\t157885\n", "total_dups\t5197\n", "total_nodups\t152688\n", "cis\t52294\n", "trans\t100394\n", "pair_types/UU\t152688\n", "pair_types/MU\t21277\n", "pair_types/NU\t18827\n", "pair_types/UM\t2088\n", "pair_types/DD\t5197\n", "cis_1kb+\t29483\n", "cis_2kb+\t28885\n", "cis_4kb+\t27905\n", "cis_10kb+\t26158\n", "cis_20kb+\t24601\n", "cis_40kb+\t23101\n", "summary/frac_cis\t0.3424892591428272\n" ] } ], "source": [ "!cat phased.XA.unphased.stats | head -20" ] }, { "cell_type": "code", "execution_count": null, "id": "2369ced7", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "id": "25fdebb4-24ca-4280-950e-baa9cc92d28e", "metadata": {}, "source": [ "Visualize with multiQC:" ] }, { "cell_type": "code", "execution_count": null, "id": "9039184f-65a1-43bd-9495-85266fc1fed6", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "multiqc phased.XA.*phase*.stats -o multiqc_report_phasing" ] }, { "cell_type": "code", "execution_count": null, "id": "ed403d73-7b5f-432b-9e91-e8c70906d31b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from IPython.display import IFrame\n", "\n", "IFrame(src='./multiqc_report_phasing/multiqc_report.html', width=1200, height=700)" ] }, { "cell_type": "code", "execution_count": null, "id": "20e713fe-c962-4d6f-af73-17c21b987a5a", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "main", "language": "python", "name": "main" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" } }, "nbformat": 4, "nbformat_minor": 5 } pairtools-1.1.3/doc/examples/pairtools_restrict_walkthrough.ipynb000066400000000000000000010367611474715105500255570ustar00rootroot00000000000000{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "a26ff7fa-0774-497c-8df8-4686845bf3b6", "metadata": {}, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 2, "id": "882425fb-e34a-41c7-8103-270da19ecec2", "metadata": {}, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "import matplotlib.pyplot as plt\n", "import matplotlib.ticker \n", "import matplotlib.gridspec\n", "import seaborn as sns\n", "\n", "%matplotlib inline\n", "plt.style.use('seaborn-poster')\n", "\n", "import pandas as pd\n", "import pairtools\n", "import bioframe" ] }, { "cell_type": "markdown", "id": "66194c2b-8c1b-4e21-80ef-1d2bf069199c", "metadata": {}, "source": [ "# Pairtools restrict walkthrough\n", "\n", "The common approach to analyse Hi-C data is based to analyse the contacts of the restriction fragments. It is used in *hiclib*, Juicer, HiC-Pro. \n", "\n", "Throughout this notebook, we will work with one of [Rao et al. 2014 datasets for IMR90 cells](https://data.4dnucleome.org/experiment-set-replicates/4DNES1ZEJNRU/) [1]. \n", "\n", "\n", "[1] Rao, S. S., Huntley, M. H., Durand, N. C., Stamenova, E. K., Bochkov, I. D., Robinson, J. T., Sanborn, A. L., Machol, I., Omer, A. D., Lander, E. S., & Aiden, E. L. (2014). A 3D map of the human genome at kilobase resolution reveals principles of chromatin looping. Cell, 159(7), 1665–1680. https://doi.org/10.1016/j.cell.2014.11.021" ] }, { "cell_type": "markdown", "id": "8a77207f-d444-4d5c-ab6c-1f2a1cf4c7b2", "metadata": {}, "source": [ "### Download the data from 4DN portal\n", "\n", "To download the data from 4DN, you may need to [register, get key and secret and write a spceialized curl command for your user](https://data.4dnucleome.org/help/user-guide/downloading-files): " ] }, { "cell_type": "code", "execution_count": 61, "id": "a3d3eafc-5c28-40d4-be2a-8c4ba23e9809", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " % Total % Received % Xferd Average Speed Time Time Time Current\n", " Dload Upload Total Spent Left Speed\n", "100 330 100 330 0 0 931 0 --:--:-- --:--:-- --:--:-- 932\n", "100 3395M 100 3395M 0 0 29.7M 0 0:01:54 0:01:54 --:--:-- 33.1M 0:01:48 0:00:12 0:01:36 32.8M\n" ] } ], "source": [ "!curl -O -L --user RG6CSRMC:xlii3stnkphfygmu https://data.4dnucleome.org/files-processed/4DNFIW2BKSNF/@@download/4DNFIW2BKSNF.pairs.gz" ] }, { "cell_type": "code", "execution_count": null, "id": "22d0732a-9d6a-4957-8081-5cad5b3abf09", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "# Get total number of contacts to assess how many reads you can read in the future:\n", "pairtools stats 4DNFIW2BKSNF.pairs.gz | head -n 1\n", "# This will produce around 173 M pairs" ] }, { "cell_type": "code", "execution_count": null, "id": "ff187814-015c-4f6a-b0e8-082161dfcef7", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "# Sample the fraction of pairs that will produce ~ 1 M of pairs:\n", "pairtools sample 0.007 4DNFIW2BKSNF.pairs.gz -o 4DNFIW2BKSNF.pairs.sampled.gz" ] }, { "cell_type": "markdown", "id": "e8a51837-c1a9-4c83-a140-8be9f8cbbbed", "metadata": {}, "source": [ "#### Annotate restriction fragments" ] }, { "cell_type": "code", "execution_count": null, "id": "61b32154-a8ec-48d1-9370-eaf6bc357e08", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "# Digest the genome into restriction fragments:\n", "cooler digest ../tests_chromap/hg38/hg38.fa.sizes ../tests_chromap/hg38/hg38.fa MboI > hg38/hg38.MboI.restricted.bed" ] }, { "cell_type": "code", "execution_count": null, "id": "e0699dee-a95f-4114-82c5-9758c74b5d27", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "# Annotate restriction fragments in the sampled file: \n", "pairtools restrict -f hg38/hg38.MboI.restricted.bed 4DNFIW2BKSNF.pairs.sampled.gz -o 4DNFIW2BKSNF.pairs.sampled.restricted.gz" ] }, { "cell_type": "markdown", "id": "34c594fe-41df-4f42-a25d-7c050f020fb2", "metadata": {}, "source": [ "#### Read the pairs and analyse them as dataframe" ] }, { "cell_type": "code", "execution_count": 3, "id": "309d3c54-7b2d-4a5e-a750-87eb0b6914d9", "metadata": {}, "outputs": [], "source": [ "from pairtools.lib import headerops, fileio" ] }, { "cell_type": "code", "execution_count": 4, "id": "40daf717-6ffd-4c27-8b68-d553d458a713", "metadata": {}, "outputs": [], "source": [ "pairs_file = '4DNFIW2BKSNF.pairs.sampled.restricted.gz'" ] }, { "cell_type": "code", "execution_count": 5, "id": "6d363f7a-6053-488e-ad59-9df14260a7f6", "metadata": {}, "outputs": [], "source": [ "pairs_stream = fileio.auto_open(pairs_file, 'r')\n", "header, pairs_stream = headerops.get_header(pairs_stream)\n", "columns = headerops.get_colnames(header)" ] }, { "cell_type": "code", "execution_count": 6, "id": "deb04397-579b-4305-9dec-4f58e61e7ad4", "metadata": {}, "outputs": [], "source": [ "df = pd.read_table(pairs_stream, comment=\"#\", header=None)\n", "df.columns = columns" ] }, { "cell_type": "code", "execution_count": null, "id": "7688d530-4860-40e9-b865-affb7c35ccf1", "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 7, "id": "44d60718-dd56-4113-a409-57e5c1b882c0", "metadata": {}, "outputs": [], "source": [ "df.loc[:, 'dist_rfrag1_left'] = df.pos1 - df.rfrag_start1\n", "df.loc[:, 'dist_rfrag1_right'] = df.rfrag_end1 - df.pos1\n", "\n", "df.loc[:, 'dist_rfrag2_left'] = df.pos2 - df.rfrag_start2\n", "df.loc[:, 'dist_rfrag2_right'] = df.rfrag_end2 - df.pos2" ] }, { "cell_type": "markdown", "id": "330e034a-e4f2-4deb-ab2c-9103e9083fa2", "metadata": {}, "source": [ "Many of the 5'-ends of reads are mapped to the restriction sites: " ] }, { "cell_type": "code", "execution_count": 8, "id": "1a7ef073-082b-4aa6-972f-85ada84be4d4", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "xmin = 0\n", "xmax = 2000\n", "step = 20\n", "\n", "sns.distplot(df.query('strand1==\"+\"').dist_rfrag1_left, bins=np.arange(xmin, xmax, step), label='Distance from the 5\\' read end to the nearest upstream rsite, + mapped reads')\n", "sns.distplot(df.query('strand1==\"+\"').dist_rfrag1_right, bins=np.arange(xmin, xmax, step), label='Distance from the 5\\' read end to the nearest downstream rsite, + mapped reads')\n", "\n", "plt.xlim(xmin, xmax)\n", "plt.legend()\n", "plt.tight_layout()" ] }, { "cell_type": "code", "execution_count": 9, "id": "8fb2a16b-a921-4451-9250-4c0e381ac516", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "xmin = 0\n", "xmax = 200\n", "step = 1\n", "\n", "sns.distplot(df.query('strand1==\"+\"').dist_rfrag1_left, bins=np.arange(xmin, xmax, step), label='Distance from the 5\\' read end to the nearest upstream rsite, + mapped reads')\n", "sns.distplot(df.query('strand1==\"+\"').dist_rfrag1_right, bins=np.arange(xmin, xmax, step), label='Distance from the 5\\' read end to the nearest downstream rsite, + mapped reads')\n", "\n", "plt.xlim(xmin, xmax)\n", "plt.legend()\n", "plt.tight_layout()" ] }, { "cell_type": "markdown", "id": "2cfd8a30-79d4-4926-b0e9-809ac185228c", "metadata": {}, "source": [ "However, if we select only the pairs that map to the restriction sites, there is no significant skew in scaling:" ] }, { "cell_type": "code", "execution_count": 10, "id": "97eac9b5-2b51-4529-9056-48d061c30d6b", "metadata": {}, "outputs": [], "source": [ "hg38_chromsizes = bioframe.fetch_chromsizes('hg38', \n", " as_bed=True)\n", "hg38_cens = bioframe.fetch_centromeres('hg38')\n", "hg38_arms = bioframe.make_chromarms(hg38_chromsizes, \n", " dict(hg38_cens.set_index('chrom').mid), \n", " cols_chroms=('chrom', 'start', 'end') )\n", "\n", "# To fix pandas bug in some versions: \n", "hg38_arms['start'] = hg38_arms['start'].astype(int)\n", "hg38_arms['end'] = hg38_arms['end'].astype(int)" ] }, { "cell_type": "code", "execution_count": 11, "id": "28358a56-a6fe-4ec7-9ca6-52822a6224b9", "metadata": {}, "outputs": [], "source": [ "import pairtools.lib.scaling as scaling" ] }, { "cell_type": "code", "execution_count": 12, "id": "3769c57b-78c7-48e4-85bf-710f7f459e1e", "metadata": {}, "outputs": [], "source": [ "def plot(cis_scalings, n, xlim=(1e1,1e9), label='' ):\n", " strand_gb = cis_scalings.groupby(['strand1', 'strand2'])\n", " for strands in ['+-', '-+', '++', '--']:\n", " sc_strand = strand_gb.get_group(tuple(strands))\n", " sc_agg = (sc_strand\n", " .groupby(['min_dist','max_dist'])\n", " .agg({'n_pairs':'sum', 'n_bp2':'sum'})\n", " .reset_index())\n", "\n", " dist_bin_mids = np.sqrt(sc_agg.min_dist * sc_agg.max_dist)\n", " pair_frequencies = sc_agg.n_pairs / sc_agg.n_bp2\n", " pair_frequencies = pair_frequencies/cis_scalings.n_pairs.sum()\n", " mask = pair_frequencies>0\n", " label_long = f'{strands[0]}{strands[1]} {label}'\n", "\n", " if np.sum(mask)>0:\n", " plt.loglog(\n", " dist_bin_mids[mask],\n", " pair_frequencies[mask],\n", " label=label_long,\n", " lw=2\n", " )\n", "\n", " plt.gca().xaxis.set_major_locator(matplotlib.ticker.LogLocator(base=10.0,numticks=20))\n", " plt.gca().yaxis.set_major_locator(matplotlib.ticker.LogLocator(base=10.0,numticks=20))\n", " plt.gca().set_aspect(1.0)\n", " plt.xlim(xlim)\n", "\n", " plt.grid(lw=0.5,color='gray')\n", " plt.legend(loc=(1.1,0.4))\n", " plt.ylabel('contact frequency, \\nHi-C molecule per bp pair normalized by total')\n", " plt.xlabel('distance, bp')\n", "\n", " plt.tight_layout()" ] }, { "cell_type": "code", "execution_count": 13, "id": "bf07b649-d184-4ded-827b-d8ff3f9f4284", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Get the pairs where R1 is far enough from site of restriction, but not too far\n", "df_subset = df.query(\"(strand1=='+' and dist_rfrag1_left>5 and dist_rfrag1_left<=250)\")\n", "n_distant = len(df_subset)\n", "cis_scalings_distant, trans_levels_distant = scaling.compute_scaling(\n", " df_subset,\n", " regions=hg38_arms,\n", " chromsizes=hg38_arms,\n", " dist_range=(10, 1e9), \n", " n_dist_bins=128,\n", " chunksize=int(1e7),\n", " )\n", "plot(cis_scalings_distant, n_distant, label=\"pairs, 5' distant from rsite\")\n", "\n", "\n", "# Get the pairs where R1 is too far enough from site of restriction\n", "df_subset = df.query(\"(strand1=='+' and dist_rfrag1_left>550)\")\n", "n_toodistant = len(df_subset)\n", "cis_scalings_toodistant, trans_levels_toodistant = scaling.compute_scaling(\n", " df_subset,\n", " regions=hg38_arms,\n", " chromsizes=hg38_arms,\n", " dist_range=(10, 1e9), \n", " n_dist_bins=128,\n", " chunksize=int(1e7),\n", " )\n", "plot(cis_scalings_toodistant, n_toodistant, label=\"pairs, 5' too far from rsite\")\n", "\n", "\n", "# Get the pairs where R1 is very close to the site of restriction\n", "df_subset = df.query(\"(strand1=='+' and dist_rfrag1_left<5)\")\n", "n_tooclose = len(df_subset)\n", "cis_scalings_tooclose, trans_levels_tooclose = scaling.compute_scaling(\n", " df_subset,\n", " regions=hg38_arms,\n", " chromsizes=hg38_arms,\n", " dist_range=(10, 1e9), \n", " n_dist_bins=128,\n", " chunksize=int(1e7),\n", " )\n", "plot(cis_scalings_tooclose, n_tooclose, label=\"pairs, 5' close to rsite\")\n", "# Try another replicate of replicate, maybe the last one " ] }, { "cell_type": "markdown", "id": "60967f0b-7f50-429f-8865-046d3fd0d878", "metadata": {}, "source": [ "#### How many pairs we take if not strictly filtering by dangling ends and self-circles? " ] }, { "cell_type": "code", "execution_count": 14, "id": "bfb77fa0-85ee-4573-b745-57353b74f646", "metadata": {}, "outputs": [], "source": [ "df.loc[:, \"type_rfrag\"] = \"Regular pair\"\n", "\n", "mask_neighboring_rfrags = (np.abs(df.rfrag1-df.rfrag2)<=1)\n", "\n", "mask_DE = (df.strand1==\"+\") & (df.strand2==\"-\") & mask_neighboring_rfrags\n", "df.loc[mask_DE, \"type_rfrag\"] = \"DanglingEnd\"\n", "\n", "mask_SS = (df.strand1==\"-\") & (df.strand2==\"+\") & mask_neighboring_rfrags\n", "df.loc[mask_SS, \"type_rfrag\"] = \"SelfCircle\"\n", "\n", "mask_Err = (df.strand1==df.strand2) & mask_neighboring_rfrags\n", "df.loc[mask_Err, \"type_rfrag\"] = \"Mirror\"" ] }, { "cell_type": "code", "execution_count": 15, "id": "c6913bb4-f861-4098-a193-94a134df4ea5", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "type_rfrag\n", "DanglingEnd 76902\n", "Mirror 3214\n", "Regular pair 1132002\n", "SelfCircle 3036\n", "Name: readID, dtype: int64" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sort_values(\"type_rfrag\").groupby(\"type_rfrag\").count()['readID']" ] }, { "cell_type": "code", "execution_count": 16, "id": "c2e77360-e8ee-43c6-8322-b9990aef19bc", "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Full scaling\n", "\n", "n = len(df)\n", "cis_scalings, trans_levels = scaling.compute_scaling(\n", " df,\n", " regions=hg38_arms,\n", " chromsizes=hg38_arms,\n", " dist_range=(10, 1e9), \n", " n_dist_bins=128,\n", " chunksize=int(1e7),\n", " )\n", "plot(cis_scalings, n, label=\"pairs\")\n", "\n", "# The point where the scalings by distance become balanced:\n", "plt.axvline(2e3, ls='--', c='gray', label='Balancing point')\n", "\n", "plt.savefig(\"./oriented_scalings.pdf\")" ] }, { "cell_type": "code", "execution_count": 17, "id": "a506a74c-230f-4219-9273-99b6f04e211d", "metadata": {}, "outputs": [], "source": [ "df.loc[:, \"type_bydist\"] = \"Regular pair\"\n", "\n", "mask_ondiagonal = (np.abs(df.pos2-df.pos1)<=2e3)\n", "\n", "mask_DE = (df.strand1==\"+\") & (df.strand2==\"-\") & mask_ondiagonal\n", "df.loc[mask_DE, \"type_bydist\"] = \"DanglingEnd\"\n", "\n", "mask_SS = (df.strand1==\"-\") & (df.strand2==\"+\") & mask_ondiagonal\n", "df.loc[mask_SS, \"type_bydist\"] = \"SelfCircle\"\n", "\n", "mask_Err = (df.strand1==df.strand2) & mask_ondiagonal\n", "df.loc[mask_Err, \"type_bydist\"] = \"Mirror\"" ] }, { "cell_type": "code", "execution_count": 18, "id": "255bda45-6a64-4795-a964-546e55d67145", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "type_bydist\n", "DanglingEnd 135381\n", "Mirror 18383\n", "Regular pair 1053213\n", "SelfCircle 8177\n", "Name: readID, dtype: int64" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sort_values(\"type_bydist\").groupby(\"type_bydist\").count()['readID']" ] }, { "cell_type": "code", "execution_count": 19, "id": "b628bdfb-abbf-45df-8f33-2056dc96f19f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
readID
type_bydistDanglingEndMirrorRegular pairSelfCircle
type_rfrag
DanglingEnd76898040
Mirror03176380
Regular pair584831520710529945318
SelfCircle001772859
\n", "
" ], "text/plain": [ " readID \n", "type_bydist DanglingEnd Mirror Regular pair SelfCircle\n", "type_rfrag \n", "DanglingEnd 76898 0 4 0\n", "Mirror 0 3176 38 0\n", "Regular pair 58483 15207 1052994 5318\n", "SelfCircle 0 0 177 2859" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.sort_values([\"type_rfrag\", \"type_bydist\"])\\\n", " .groupby([\"type_rfrag\", \"type_bydist\"])\\\n", " .count()[['readID']]\\\n", " .reset_index()\\\n", " .pivot(columns=\"type_bydist\", index=\"type_rfrag\")\\\n", " .fillna(0).astype(int)" ] }, { "cell_type": "markdown", "id": "23a56c6f-c2d1-48e4-9b2e-860622af5a3f", "metadata": {}, "source": [ "False Positives are in 3rd row, False Negatives are in 3rd column. Filtering by distance is, thus, nearly as effective as filtering by restriction fragment, but removes additional pairs that can be potential undercut by restriction enzyme.\n", "\n", "Removing all contacts closer than 2 Kb will remove Hi-C artifacts." ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.10", "language": "python", "name": "python310" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" } }, "nbformat": 4, "nbformat_minor": 5 } pairtools-1.1.3/doc/examples/pairtools_walkthrough.ipynb000066400000000000000000004632711474715105500236370ustar00rootroot00000000000000{ "cells": [ { "cell_type": "markdown", "id": "112fe2d5-aaed-4eb1-b3f5-2f5889a9c89f", "metadata": { "tags": [] }, "source": [ "# Pairtools walkthrough\n", "\n", "Welcome to the pairtools walkthrough. \n", "\n", "Pairtools is a tool for extraction of pairwise contacts out of sequencing chromosomes conformation capture data, such as Hi-C, Micro-C or MC-3C.\n", "Pairtools is used for obtaining .cool files by [distiller](https://github.com/open2c/distiller-nf/blob/master/distiller.nf), and has many more applications (see single-cell walkthrough or phasing walkthrough). \n", "\n", "Here, we will cover the basic steps from raw reads to .cool file with binned contacts.\n", "\n", "Outline:\n", "\n", "- [Download raw data](#Download-raw-data)\n", "\n", "- [Install reference genome](#Install-reference-genome)\n", "\n", "- [Map data with bwa mem](#Map-data-with-bwa-mem)\n", "\n", "- [Extract contacts](#Contacts-extraction)\n", "\n", "- [MultiQC]( #MultiQC )\n", "\n", "- [Load pairs to cooler](#Load-pairs-to-cooler)\n", "\n", "- [Visualize cooler](#Visualize-cooler)" ] }, { "cell_type": "markdown", "id": "bd264406-be74-4060-9798-e18040c44889", "metadata": {}, "source": [ "### Download raw data\n", "\n", "\"Raw\" data, or .fastq files are generated by sequencing facilities or can be taken from public databases, such as SRA. We will take a sample from Rao et at al. 2017, human datasets.\n", "To reduce computateion time, take 5 mln reads instead of full sample:" ] }, { "cell_type": "code", "execution_count": null, "id": "f4e310c0-2d16-4e7d-87d7-44feec8e6256", "metadata": {}, "outputs": [], "source": [ "! fastq-dump SRR13849430 --gzip --split-spot --split-3 --minSpotId 0 --maxSpotId 5000000" ] }, { "cell_type": "code", "execution_count": null, "id": "571e94fb-3dec-4042-9e21-6c39802ed8df", "metadata": {}, "outputs": [], "source": [ "! ls SRR13849430*.fastq.gz" ] }, { "cell_type": "code", "execution_count": 1, "id": "e9fb044d-1ba0-48c7-b40a-99d033518e43", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "@SRR13849430.1 1 length=150\n", "NTCTCAGCCTTTATAAGATAGAAGAGAGTTGGGACCTTGCTCTAAATTCTGCTTTAGCAAGGGACTTTTGTACCTGCTTTCTTCCTTTATCCAGATCTAAAAATAGTTTATATGCTGACAACTCCCTGATGTTATTCTTTGTAGTATTTG\n", "+SRR13849430.1 1 length=150\n", "#AAFFJJJJJJJJJJJAJAJJJJFJJJAFFFFFFA7A-FJ7JJJ-AJAJJF-<-JJFFJ7FJJF7FJJFJJ test.bam" ] }, { "cell_type": "markdown", "id": "89f9d829-3f79-49b4-b74d-8bca732b8a44", "metadata": {}, "source": [ "After mapping, you have .sam/.bam alignment file, which cannot be interpreted as pairs directly. You need to extract contacts from it:" ] }, { "cell_type": "code", "execution_count": 3, "id": "955bcafa-e521-4627-8c8b-94e05e46e6b8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SRR13849430.1\t121\tchr12\t78795720\t60\t53S97M\t=\t78795720\t0\tCAAATACTACAAAGAATAACATCAGGGAGTTGTCAGCATATAAACTATTTTTAGATCTGGATAAAGGAAGAAAGCAGGTACAAAAGTCCCTTGCTAAAGCAGAATTTAGAGCAAGGTCCCAACTCTCTTCTATCTTATAAAGGCTGAGAN\t-7-7---A------7--77--))))7--F-A)7F( pairtools split \\\n", " --output-pairs test.nodups.pairs.gz \\\n", " --output-sam test.nodups.bam \\\n", " ) \\\n", " --output-unmapped \\\n", " >( pairtools split \\\n", " --output-pairs test.unmapped.pairs.gz \\\n", " --output-sam test.unmapped.bam \\\n", " ) \\\n", " --output-dups \\\n", " >( pairtools split \\\n", " --output-pairs test.dups.pairs.gz \\\n", " --output-sam test.dups.bam \\\n", " ) \\\n", " --output-stats test.dedup.stats \\\n", " test.pairs.gz" ] }, { "cell_type": "code", "execution_count": 7, "id": "d9aaceeb-1a88-4c24-9fc2-3f44069715a1", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SRR13849430.513\tchr20\t23502312\tchr20\t23063544\t+\t+\tRU\t60\t60\n", "SRR13849430.1442\tchr7\t57224960\tchrX\t82818236\t+\t+\tUU\t60\t30\n", "SRR13849430.2378\tchr5\t115925933\tchr21\t24124840\t+\t+\tUU\t60\t50\n", "SRR13849430.2547\tchr1\t52097837\tchr12\t1888807\t-\t-\tUU\t60\t60\n", "SRR13849430.3015\tchr17\t74750879\tchr11\t117356318\t+\t-\tUR\t60\t60\n", "SRR13849430.3027\tchr15\t34977762\tchr15\t31897447\t-\t+\tUR\t11\t60\n", "SRR13849430.3406\tchr11\t1171960\tchr9\t121265592\t+\t-\tUU\t60\t60\n", "SRR13849430.3988\tchr16\t86824176\tchr13\t104521019\t-\t+\tUU\t60\t17\n", "SRR13849430.4030\tchr17\t73189645\tchr4\t49092470\t-\t+\tUU\t60\t31\n", "SRR13849430.4316\tchr8\t124329308\tchr8\t124336541\t-\t-\tUU\t60\t60\n" ] } ], "source": [ "%%bash\n", "# Unique pairs:\n", "gzip -dc test.nodups.pairs.gz | grep -v \"#\" | head -n 10" ] }, { "cell_type": "code", "execution_count": 8, "id": "ca3e27a7-7905-46b2-8ad4-245c28f01102", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "SRR13849430.60371\tchr2\t44507613\tchr7\t116276932\t-\t+\tDD\t60\t57\n", "SRR13849430.67567\tchr5\t62425895\tchr5\t62425612\t-\t+\tDD\t60\t60\n", "SRR13849430.97623\tchr3\t162233323\tchr3\t162154449\t-\t+\tDD\t60\t52\n", "SRR13849430.108366\tchr8\t48691403\tchr8\t48872239\t-\t-\tDD\t60\t60\n", "SRR13849430.138622\tchr16\t8435050\tchr16\t6032751\t+\t-\tDD\t60\t60\n", "SRR13849430.146482\tchr14\t86385083\tchr2\t119648648\t+\t+\tDD\t60\t60\n", "SRR13849430.148232\tchrX\t21885792\tchrX\t21887418\t+\t-\tDD\t60\t60\n", "SRR13849430.149771\tchr16\t6646543\tchr16\t6648097\t-\t-\tDD\t60\t60\n", "SRR13849430.156983\tchr4\t55704089\tchr4\t76039070\t+\t+\tDD\t60\t13\n", "SRR13849430.157962\tchr6\t47656758\tchr6\t47748395\t+\t-\tDD\t60\t35\n" ] } ], "source": [ "%%bash\n", "# Only duplicated pairs:\n", "gzip -dc test.dups.pairs.gz | grep -v \"#\" | head -n 10" ] }, { "cell_type": "markdown", "id": "7441b723-5c5d-4502-8330-c8b7b4a24e30", "metadata": {}, "source": [ "#### pairtools select\n", "\n", "Sometimes you may need certain types of pairs based on their properties, such as mapq, pair type, distance or orientation. \n", "For all these manipulations, there is `pairtools select` which requires a file and pythonic condition as an input:" ] }, { "cell_type": "code", "execution_count": null, "id": "3a2de712-b4ef-4ee3-af68-d19f2fa8fb8f", "metadata": {}, "outputs": [], "source": [ "%%bash \n", "pairtools select \"mapq1>0 and mapq2>0\" test.nodups.pairs.gz -o test.nodups.UU.pairs.gz" ] }, { "cell_type": "markdown", "id": "1e6445fa-551b-4583-aa61-587a27370fa4", "metadata": { "tags": [] }, "source": [ "#### pairtools stats\n", "\n", "Describe the types fo distance properties of pairs: " ] }, { "cell_type": "code", "execution_count": null, "id": "3aca9ac8-668b-46c4-a1c2-6172303f284a", "metadata": {}, "outputs": [], "source": [ "%%bash\n", "pairtools stats test.pairs.gz -o test.stats" ] }, { "cell_type": "markdown", "id": "ca2c1c56-9024-4fa0-abb9-ed1f9ab313f1", "metadata": {}, "source": [ "### MultiQC" ] }, { "cell_type": "code", "execution_count": null, "id": "0967edf9-fdf6-4294-98fc-a2c069917de6", "metadata": { "tags": [ "hide-output" ] }, "outputs": [], "source": [ "%%bash\n", "multiqc test.stats" ] }, { "cell_type": "code", "execution_count": 9, "id": "d76bd76c-f0f5-4921-b873-9390e715eab9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " " ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from IPython.display import IFrame\n", "\n", "IFrame(src='./multiqc_report.html', width=1200, height=700)" ] }, { "cell_type": "markdown", "id": "e0dc157d-a8c6-4319-b83c-d450f2a822f3", "metadata": {}, "source": [ "### Load pairs to cooler\n", "Finally, when you obtained a list of appropriate pairs, you may create coolers with it: " ] }, { "cell_type": "code", "execution_count": null, "id": "3d9df0e2-f8d3-487b-8369-cddf8bdd54df", "metadata": { "tags": [ "hide-output" ] }, "outputs": [], "source": [ "%%bash\n", "cooler cload pairs \\\n", " -c1 2 -p1 3 -c2 4 -p2 5 \\\n", " --assembly hg38 \\\n", " ~/.local/share/genomes/hg38/hg38.fa.sizes:1000000 \\\n", " test.nodups.UU.pairs.gz \\\n", " test.hg38.1000000.cool" ] }, { "cell_type": "code", "execution_count": null, "id": "083da222-8d15-408b-ad8c-7fa35881597f", "metadata": { "tags": [ "hide-output" ] }, "outputs": [], "source": [ "%%bash\n", "cooler zoomify \\\n", " --nproc 5 \\\n", " --out test.hg38.1000000.mcool \\\n", " --resolutions 1000000,2000000 \\\n", " --balance \\\n", " test.hg38.1000000.cool" ] }, { "cell_type": "markdown", "id": "9a17fb3c-d5f8-472e-b80a-e7708798ea72", "metadata": {}, "source": [ "### Visualize cooler\n", "\n", "Based on [open2c vis example](https://github.com/open2c/open2c_examples/blob/master/viz.ipynb)" ] }, { "cell_type": "code", "execution_count": 36, "id": "1839183d-4d5c-4b29-926c-0d56e00c8b8a", "metadata": {}, "outputs": [], "source": [ "import cooler\n", "import matplotlib as mpl\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "import cooltools.lib.plotting\n", "from matplotlib.colors import LogNorm\n", "import seaborn as sns\n", "import bioframe\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 11, "id": "7bae233c-36f2-483c-8957-766e200739a4", "metadata": {}, "outputs": [], "source": [ "file = \"test.hg38.1000000.mcool::/resolutions/1000000\"" ] }, { "cell_type": "code", "execution_count": 12, "id": "2b4cc40b-5aaf-4db8-b870-ba190fdb5d01", "metadata": {}, "outputs": [], "source": [ "clr = cooler.Cooler(file)" ] }, { "cell_type": "code", "execution_count": 19, "id": "4350d8c1-b50c-43f7-92e5-43802122320b", "metadata": {}, "outputs": [], "source": [ "# Define chromosome starts\n", "chromstarts = []\n", "for i in clr.chromnames:\n", " chromstarts.append(clr.extent(i)[0])" ] }, { "cell_type": "code", "execution_count": 20, "id": "cd823dec-49c8-46e0-96b6-dcb0344f9d9c", "metadata": {}, "outputs": [], "source": [ "from matplotlib.ticker import EngFormatter\n", "bp_formatter = EngFormatter('b')\n", "\n", "def format_ticks(ax, x=True, y=True, rotate=True):\n", " if y:\n", " ax.yaxis.set_major_formatter(bp_formatter)\n", " if x:\n", " ax.xaxis.set_major_formatter(bp_formatter)\n", " ax.xaxis.tick_bottom()\n", " if rotate:\n", " ax.tick_params(axis='x',rotation=45)" ] }, { "cell_type": "code", "execution_count": 40, "id": "896235bb-749b-4c2e-95ae-352c91452b24", "metadata": {}, "outputs": [], "source": [ "# Define the bounds of the continuous fragment of whole-genome interaction map\n", "chrom_start, chrom_end = clr.chromnames.index('chr3'), clr.chromnames.index('chr6')\n", "start, end = chromstarts[chrom_start], chromstarts[chrom_end]" ] }, { "cell_type": "code", "execution_count": 43, "id": "a0d99510-d5e6-4de5-861b-8eeddcb6c25b", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "vmax = 15\n", "norm = LogNorm(vmin=1, vmax=vmax)\n", "\n", "f, axs = plt.subplots(\n", " figsize=(13, 10),\n", " nrows=2, \n", " ncols=1,\n", " sharex=False, sharey=False)\n", "\n", "ax = axs[0]\n", "ax.set_title('Interaction maps (chr1)')\n", "im = ax.matshow(clr.matrix(balance=False).fetch('chr1'), vmax=vmax, cmap='fall'); \n", "plt.colorbar(im, ax=ax ,fraction=0.046, pad=0.04, label='chr1');\n", "\n", "ax = axs[1]\n", "ax.set_title('Chromosomes 3-5')\n", "im = ax.matshow(clr.matrix(balance=False)[start:end, start:end], norm=norm, cmap='fall'); \n", "plt.colorbar(im, ax=ax ,fraction=0.046, pad=0.04, label='Whole-genome');\n", "ax.set_xticks(np.array(chromstarts[chrom_start:chrom_end])-start, clr.chromnames[chrom_start:chrom_end], rotation=90);\n", "ax.set_yticks(np.array(chromstarts[chrom_start:chrom_end])-start, clr.chromnames[chrom_start:chrom_end], rotation=90);\n", "\n", "format_ticks(axs[0], rotate=False)\n", "\n", "plt.tight_layout()" ] }, { "cell_type": "code", "execution_count": null, "id": "e07ca165-15ed-459c-af7b-3156de81f935", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "test", "language": "python", "name": "test" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 5 } pairtools-1.1.3/doc/examples/scalings_example.ipynb000066400000000000000000003753671474715105500225320ustar00rootroot00000000000000{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "vscode": { "languageId": "python" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2023-12-18 23:04:28-- https://osf.io/download/crgu8/\n", "Resolving osf.io (osf.io)... 35.190.84.173\n", "Connecting to osf.io (osf.io)|35.190.84.173|:443... connected.\n", "HTTP request sent, awaiting response... 302 FOUND\n", "Location: https://files.de-1.osf.io/v1/resources/638ue/providers/osfstorage/623993788d53ef082867e2b9 [following]\n", "--2023-12-18 23:04:28-- https://files.de-1.osf.io/v1/resources/638ue/providers/osfstorage/623993788d53ef082867e2b9\n", "Resolving files.de-1.osf.io (files.de-1.osf.io)... 35.186.249.111\n", "Connecting to files.de-1.osf.io (files.de-1.osf.io)|35.186.249.111|:443... connected.\n", "HTTP request sent, awaiting response... " ] }, { "name": "stdout", "output_type": "stream", "text": [ "302 Found\n", "Location: https://storage.googleapis.com/cos-osf-prod-files-de-1/b72f21bb7e21d00541fb3cbc50ef77d67b75970f7d255912ca527fb4c8e5cc0a?response-content-disposition=attachment%3B%20filename%3D%22test_pairs.wp-all.pairs%22%3B%20filename%2A%3DUTF-8%27%27test_pairs.wp-all.pairs&GoogleAccessId=files-de-1%40cos-osf-prod.iam.gserviceaccount.com&Expires=1702937129&Signature=0tM4rWv5RPZjFTYCKzHNbOZoInW3EOidpN5eHp1mlQv%2BWxUbUQ%2BqHDqVmJB4afWQ9CeQpovADOp%2BHNDVxr4GhotEENmyAd7GGdoGRkS%2FeSXM3G1hLxR5H3S5kBWfTVk57pRUXu%2FNj%2FZXOTJo5zKizIdmYlpPZ%2BDZZJvfXuExR3G6OWXcM3P7ufpFe6d53ZxiqpyhxIKVOU4uYo7zwFyxxofGrd5DONsz53sEH15S%2Fc84emZrxaFiQW5%2FEaupoEtLJQHcFc5au3fh8wApS4gtTU%2FDkINvnkyEDXyxND7%2FJDfQ0xJRRjelDXuFVHHDxbIqIfVhQnFK0QMEmlyoBLax1g%3D%3D [following]\n", "--2023-12-18 23:04:29-- https://storage.googleapis.com/cos-osf-prod-files-de-1/b72f21bb7e21d00541fb3cbc50ef77d67b75970f7d255912ca527fb4c8e5cc0a?response-content-disposition=attachment%3B%20filename%3D%22test_pairs.wp-all.pairs%22%3B%20filename%2A%3DUTF-8%27%27test_pairs.wp-all.pairs&GoogleAccessId=files-de-1%40cos-osf-prod.iam.gserviceaccount.com&Expires=1702937129&Signature=0tM4rWv5RPZjFTYCKzHNbOZoInW3EOidpN5eHp1mlQv%2BWxUbUQ%2BqHDqVmJB4afWQ9CeQpovADOp%2BHNDVxr4GhotEENmyAd7GGdoGRkS%2FeSXM3G1hLxR5H3S5kBWfTVk57pRUXu%2FNj%2FZXOTJo5zKizIdmYlpPZ%2BDZZJvfXuExR3G6OWXcM3P7ufpFe6d53ZxiqpyhxIKVOU4uYo7zwFyxxofGrd5DONsz53sEH15S%2Fc84emZrxaFiQW5%2FEaupoEtLJQHcFc5au3fh8wApS4gtTU%2FDkINvnkyEDXyxND7%2FJDfQ0xJRRjelDXuFVHHDxbIqIfVhQnFK0QMEmlyoBLax1g%3D%3D\n", "Resolving storage.googleapis.com (storage.googleapis.com)... 142.251.37.123, 142.251.36.91, 142.251.36.123, ...\n", "Connecting to storage.googleapis.com (storage.googleapis.com)|142.251.37.123|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 564902846 (539M) [application/octet-stream]\n", "Saving to: ‘./tmp/test.pairs’\n", "\n", "./tmp/test.pairs 100%[===================>] 538.73M 163MB/s in 3.5s \n", "\n", "2023-12-18 23:04:33 (156 MB/s) - ‘./tmp/test.pairs’ saved [564902846/564902846]\n", "\n" ] } ], "source": [ "!wget https://osf.io/download/crgu8/ -O ./tmp/test.pairs" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "vscode": { "languageId": "python" } }, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "vscode": { "languageId": "python" } }, "outputs": [], "source": [ "import warnings\n", "warnings.filterwarnings(\"ignore\")\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "import matplotlib.pyplot as plt\n", "import matplotlib.ticker \n", "import matplotlib.gridspec \n", "\n", "%matplotlib inline\n", "plt.style.use('seaborn-poster')\n", "\n", "import pairtools\n", "import pairtools.lib.scaling as scaling\n", "import bioframe" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "vscode": { "languageId": "python" } }, "outputs": [], "source": [ "pairs_path = './tmp/test.pairs'" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "vscode": { "languageId": "python" } }, "outputs": [], "source": [ "mm10_chromsizes = bioframe.fetch_chromsizes('mm10', as_bed=True)\n", "mm10_arms = mm10_chromsizes\n", "\n", "# hg38_chromsizes = bioframe.fetch_chromsizes('hg38', as_bed=True)\n", "# hg38_cens = bioframe.fetch_centromeres('hg38')\n", "# hg38_arms = bioframe.split(hg38_chromsizes, hg38_cens, cols_points=['chrom', 'mid'])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "vscode": { "languageId": "python" } }, "outputs": [], "source": [ "cis_scalings, trans_levels = scaling.compute_scaling(\n", " pairs_path,\n", " regions=mm10_arms,\n", " chromsizes=mm10_chromsizes,\n", " dist_range=(10, 1000000000), \n", " n_dist_bins_decade=8,\n", " chunksize=int(1e7),\n", " #cmd_in=\"gzip -dc \"\n", " )\n", "\n", "# calculate average trans contact frequency _per directionality pair_\n", "# convert from int to float64 to avoid overflow\n", "avg_trans = (\n", " trans_levels.n_pairs.astype('float64').sum() \n", " / trans_levels.n_bp2.astype('float64').sum()\n", ")\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "vscode": { "languageId": "python" } }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA4sAAAMmCAYAAABckRRTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd1zV1RvA8c9lXeYFEXEi7pV75RaxnJkjtcxt5ao0RznKXOWorDBH/dRELc3ce+QOce+9QhlOUDZc1vf3x5WrCAhcLlyQ5/168ere7z3fc56vKx7OOc9RKYqiIIQQQgghhBBCPMfM1AEIIYQQQgghhMh7JFkUQgghhBBCCJGKJItCCCGEEEIIIVKRZFEIIYQQQgghRCqSLAohhBBCCCGESEWSRSGEEEIIIYQQqUiyKIQQQgghhBAiFQtTByBePUlJSdy9excHBwdUKpWpwxFCCCHyDUVRiIiIoESJEpiZyc/0hRCmJcmiMLq7d+/i5uZm6jCEEEKIfCsgIIBSpUqZOgwhRAEnyaIwOgcHB0D3PzqNRmPiaHLWmjVr6NGjh6nDyHHynK+egvKs8pyvloLwnOHh4bi5uen/XyqEEKYkyaIwuuSlpxqN5pVPFm1tbV/5ZwR5zldRQXlWec5XS0F5TkC2cQgh8gRZDC+EEEIIIYQQIhVJFoUQQgghhBBCpCLJohBCCCGEEEKIVCRZFEIIIYQQQgiRiiSLQgghhBBCCCFSkWRRCCGEEEIIIUQqcnSGMBqtVotWqyU8PNzUoQghhBBCCCGySWYWhdHMnDkTR0dH3NzcTB2KEEIIIYQQIpskWRRGM2HCBMLCwggICDB1KEIIIYQQQohskmWowmjUajVqtdrUYQghhBBCCCGMQGYWhRBCCCGEEEKkIsmiEEIIIYQQQohUJFkUQgghhBBCCJGKJItCCCGEEEIIIVKRZFEIIYQQQgghRCqSLBYQq1evpmHDhtja2uLs7Ezr1q25d++eqcMSQgghhBBC5FGSLBYAc+bMoW/fvrzxxhts27aN5cuXU69ePWJjY00dmhBCCCGEECKPknMWX3E3b95kwoQJeHl5MWzYMP31t956y4RRCWNITFLYcCYI98K2NCjjbOpwhBBCCCHEK0aSxVfc77//jpWVFR988IGpQxFG9s22yyw9fBu1hRl7x7SkVCFbU4ckhBBCCCFeIbIMNZuuXbvGL7/8woABA6hRowYWFhaoVCq++eabTN2/Zs0aPDw8KFSoEHZ2dtSqVYvvvvuO+Ph4o8Tn6+tLlSpVWLZsGe7u7lhYWFCrVi127NhhlP6FaVy+G84y39sAaBOS8L0VYtqAhBBCCCHEK0eSxWxauHAhI0aMYNmyZVy8eJHExMRM3/vZZ5/Rs2dPDh8+TMOGDWnXrh3+/v6MGzcOT09PYmJish3f/fv3uXbtGpMnT+abb75h+/btlC5dmrfffptLly5lu3+R+xRFYfLmiyQpz66dDQg1WTxCCCGEEOLVJMliNlWvXp2xY8fy559/cuXKFfr27Zup+zZu3IiXlxf29vYcO3aMXbt2sW7dOm7cuEGNGjXw8fFh0qRJKe7x9vZGpVJl+LV27Vr9PUlJSURGRrJ48WL69u1LmzZtWLduHcWKFWP27NlG/bUQuWPDmSBO3H6S4tpZ/1DTBCOEEEIIIV5Zsmcxmz788MMU783MMpd/z5gxA4Dx48dTt25d/XUXFxcWLFhA8+bNmTdvHpMmTcLR0RGArl270qhRowz7LlmypP51oUKFAGjVqpX+mpWVFU2bNpWZxXwoIjaeGduv6t87qC2I0CZw7UEEMXGJ2FiZmzA6IYQQQgjxKpFk0QSCgoI4ceIEAO+//36qz5s1a4abmxsBAQFs376dXr16AeDo6KhPHDPrtdde4/jx46muK4pitKMzAgMDU7yPiIgwSr8itZ/33CA4UgtA29eK4mRjxeqTASQmKVwICqNhWamKKoQQQgghjEOWoZrAmTNnAHB2dqZs2bJptqlfv36KtoZ6++23Adi7d6/+mlarxcfHRz9Gdrm5uaX4qlatmlH6FSldux+B99OiNmoLM77qWI3apZ30n5+TfYtCCCGEEMKIZGbRBPz8/AAoXbp0um3c3NxStDXU22+/TePGjfnwww+ZOXMmxYoV45dffuHJkyd88cUX2eo7I2vWrMHW9tU+ziEoKIhVq1bl+DiKAotu2ZKYpPsr29wlGp9dm7gXYwbYA7D58Hnsg47lyPi59ZymVlCeEwrOs8pzvloKwnNGR0ebOgQhhNCTZNEEkpdp2tnZpdvG3l6XAISHh2drLDMzM7Zu3crYsWMZM2YMMTExNGjQgH379vHaa69lq+9kAQEBgG7GMi4ujsjISBo2bEiPHj3QaDRGGSOvWrVqlX6ZcE7afO4ufud1s8ylnW2Z93E7rC3NSUxSWDxlF9FxiTxWaejVq0uOjJ9bz2lqBeU5oeA8qzznq6UgPGd4eHiqeghCCGEqsgy1AHB2dub333/n8ePHxMTEcOjQoUwVysmsUqVKUapUKVasWEG1atVo2LCh0foWEKlN4Nttl/XvJ3eqhrWlrpCNuZmKGiV1+1iDQmN4GGGcfahCCCGEEEJIsmgCDg4OAERFRaXbJjIyEiBfzcxNmDCBsLAw/UyjMI5f9t7gQbiuqI1nFVdaVy2a4vPn9y3KERpCCCGEEMJYJFk0gTJlygC8NKlK/iy5rSiYbj6MZImPbt+qlbkZkzulLh5Uu5ST/vW5wNBcikwIIYQQQrzqJFk0gTp16gAQEhKSbgGbkydPAqQ4gzGvmzlzJo6OjvriPCL7vt91lYQkBYAhLcvhXjj1PtcUM4tSEVUIIYQQQhiJJIsmUKpUKRo0aADAypUrU33u4+NDQEAAarWaDh065HZ4BpNlqMalKAq+N0MAKGxnxXCPCmm2K+5oQ1GNGoDzAWEkPU0uhRBCCCGEyA5JFk1k4sSJAMyaNYvTp0/rr4eEhDB8+HAAPvnkExwdHU0SnyHUajUajSZf7bPMy4JCY4jQJgBQy80JGyvzdNvWdnMCIEKbwK1HkbkRnhBCCCGEeMXJ0RnZdPr0aX1yB3Dr1i0AfvvtN7Zu3aq/vmHDBooXL65/36VLF0aMGMHcuXNp1KgRrVu3xs7Ojr179xIaGkrTpk2ZPn167j2IEWi1WrRabbaP+xA61+5H6F9XLubw0ra13Qqx69IDAM4EhFKx6MvbCyGEEEIIkRFJFrMpPDycY8dSH4QeGBhIYGCg/r1Wq03VxsvLi6ZNmzJ//nx8fX2Jj4+nfPnyjB8/nlGjRmFlZZWjsRvbzJkzmTp1qqnDeGVcfS5ZrJJBsljL7dkM9LmAUHrWl32jQgghhBAieyRZzCYPDw8UxfA9Yj179qRnz55GjMh0JkyYwOjRowkPD5ciN0aQMll8+dLemqWcUKlAUaTIjRBCCCGEMA7ZsyiMRvYsGte1+7rlvJbmKsoVSV0F9Xn2agsquepmH6/ejyAmLjHH4xNCCCGEEK82SRaF0STvV5Q9i9mnTUjkv0dRAJQvYo+lecZ/VZOL3CQmKVy8G5aT4QkhhBBCiAJAkkVhNHLOovHcehilP18xo/2KyVKct+gfmgNRCSGEEEKIgkSSRWE0cs6i8Vx78Gx2tnIG+xWTJc8sApwNDDVyREIIIYQQoqCRAjfCaNRqNWq12tRhvBKyUgk1WUVXe2wszYmJT5SZRSGEEEIIkW0ysyiMRvYsGs/Ve88li8UzlyxamJtRo5TuCI2g0BgeRaQ+rkUIIYQQQojMkmRRGI3sWTSea09nFjXWFhTTWGf6vjrPL0WVIzSEEEIIIUQ2SLIojEb2LBpHaHQc98NjAd35iiqVKtP3pti3GPDE2KEJIYQQQogCRPYsCqORPYvGkWK/YiaXoCZ7viLquQA5PkMIIYQQQhhOZhaFyGOuPZcsVs5kcZtkxTTWuDroEvZzAaEkPT1+QwghhBBCiKySZFGIPObq/WcFgjJbCTWZSqXSL0WN0CbwX3CkMUMTQgghhBAFiCSLwmikGqpxPL8MtVLRrCWLkHIp6hk5QkMIIYQQQhhIkkVhNFINNfuSkhSuP00WSxWywcHaMst91JaKqEIIIYQQwggkWRRGI9VQsy/wSQxRcYlA1pegJqtZyonkAqrnAkONFJkQQgghhChoJFkURqNWq9FoNGg0GlOHkm+l3K9o2K+jvdqCSq66RPPqvQhi4xONEpsQQgghhChYJFkUIg/JTiXU59VycwQgIUnhYpAcoSGEEEIIIbJOkkUh8pAUZyxmI1ms7VZI//pcoCSLQgghhBAi6yRZFCIPSV6GamVuRlkXO4P7qVjUXv866ElMtuMSQgghhBAFj4WpAxCvDq1Wqz8+Q2RdbHwifsFRAFRwtcfC3PCf5RTTWOtfPwiPzXZsQgghhBCi4JGZRWE0cnRG9tx8GEmSonudnSWoAEUc1PrX9yVZFEIIIYQQBpBkURiNHJ2RPSn2KxbPXrJobWlOIVvdGY33wyRZFEIIIYQQWSfLUIXRqNVq1Gp1xg1Fmq49d2xGZQOPzXheUY01T6LjeRgRi6IoqJIPXxRCCCGEECITZGZRiDzCWJVQkxVz1O1bjE9UeBwVl+3+hBBCCCFEwSLJohB5RHKyWMjWEleH7M/QPl/kRvYtCiGEEEKIrJJkUYg8ICRSy6MILQCVizkYZcmoq1REFUIIIYQQ2SDJohB5wLUUS1Czv18RXphZDNMapU8hhBBCCFFwSLIoRB5g7P2KAMUcny1llZlFIYQQQgiRVZIsCpEHPD+zWNlIyWJRWYYqhBBCCCGyQY7OEEaj1WrRarWEh4dn3FikcPW5YzMqFTV+sigFboQQQgghRFbJzKIwmpkzZ+Lo6Iibm5upQ8lXkpIUrj+IBMC9sC12auP8DMfZ1gpLc12hnPthkiwKIYQQQoiskWRRGM2ECRMICwsjICDA1KHkK/6Po4mJTwSgspFmFQHMzFS4OuhmF2UZqhBCCCGEyCpJFoXRqNVqNBoNGo1xqnkWFM8vQTVWcZtkxRx1yeKT6Hi0CYlG7VsIIYQQQrzaJFkUwsRSVEItbtxE+/njMx6Gy/EZQgghhBAi8yRZFMLEcqISajJXzbPjM6TIjRBCCCGEyApJFoUwseSZRbWFGWUK2xm17+dnFqXIjRBCCCGEyApJFoUwoZi4RG6HRAG6IzPMzVRG7T95zyJIkRshhBBCCJE1kiwKYUIXgsJQFN3rakberwgpz1qUZFEIIYQQQmSFJIsFgIeHByqVKs2vWbNmmTq8Au20/xP96zqlnYzef4plqFLgRgghhBBCZIFxTv8WedqCBQsIDw9PcW3FihUsWLCADh06mCgqAXD6zrNksa57IaP3n2JmUfYsCiGEEEKILJBksQCoVq1aqmsjRoygRo0a1KxZ0wQRCQBFUTgTEAqAg7UFFYrYG30MGytzNNYWhMcmSDVUIYQQQgiRJbIMtQC6ceMGJ06coE+fPqYOpUALfBLDowjd0tDabk6YGbm4TbLkIjcPwmNRkjdICiGEEEIIkQFJFo3g2rVr/PLLLwwYMIAaNWpgYWGBSqXim2++ydT9a9aswcPDg0KFCmFnZ0etWrX47rvviI+Pz5F4//jjD8zMzHj//fdzpH+ROSn3Kxp/CWqy5KWo2oQkwmJy5s+UEEIIIYR49eSbZaibN28GoEmTJri4uJg4mpQWLlyIl5eXQfd+9tlneHl5YWFhgaenJ/b29uzbt49x48axZcsWdu/ejY2NjVHj/fPPP2nZsiWlSpUyar8ia874h+pf182B4jbJUha5icXJ1irHxhJCCCGEEK+OfDOz2KVLF7p37461tXXGjXNZ9erVGTt2LH/++SdXrlyhb9++mbpv48aNeHl5YW9vz7Fjx9i1axfr1q3jxo0b1KhRAx8fHyZNmpTiHm9v73Qrmz7/tXbt2jTHPHr0KLdu3ZIlqHnAmednFt1yfmYR4L4UuRFCCCGEEJmUb2YWnZ2dAbC3N34RkOz68MMPU7w3M8tcDj5jxgwAxo8fT926dfXXXVxcWLBgAc2bN2fevHlMmjQJR0dHALp27UqjRo0y7LtkyZJpXv/jjz+wtrame/fumYpR5IzY+EQu3dVVqK3gao+jrWWOjVXUUc5aFEIIIYQQWZdvksXXXnsNX19fwsPD0WiMf3h5bgsKCuLEiRMAae4dbNasGW5ubgQEBLB9+3Z69eoFgKOjoz5xzKqEhARWr15Np06djPprGBgYmOJ9RESE0fp+VV0ICiMhSVdspo6bU46O9fwy1Ady1qIQQgghhMikfLMMdfDgwSQmJvLLL7+YOhSjOHPmDKCbMS1btmyaberXr5+ibXbt2rWL4OBgoy9BdXNzS/GV1lEdIqWcPl/xeS/uWRRCCCGEECIz8s3MYu/evTl+/DiTJ08mNjaWUaNG6Zem5kd+fn4AlC5dOt02bm5uKdpm1x9//EHhwoVp3769UfrLyJo1a7C1tc2VsUwlKCiIVatWZfm+zbdtAN3S0/sXj7Dq1mEjR/ZMeLwKcADg9JVbrIq9mOU+DH3O/KagPCcUnGeV53y1FITnjI6ONnUIQgihl2+SRU9PTwBsbW2ZMWMGs2fPpkKFChQpUgRzc/M071GpVOzduzc3w8y05KWadnZ26bZJ3p8ZHh6e7fEiIyPZvHkz/fv3x9LSuPvjAgICANBqtcTFxREZGUnDhg3p0aPHK7Fk+GVWrVqlXyKcWYqi8NOMvYAWB7UFIwf0zLEzFgESkxS++2oHiUkK5vaF6dWreZb7MOQ586OC8pxQcJ5VnvPVUhCeMzw8PFUtBCGEMJV8kyweOHAgxfuEhASuXr3K1atX071Hpcq5b8DzG3t7e6KionKk7+QjOKZMmcLUqVNzZIxXSVBoDA8jdHsHa7k55WiiCGBupsLVQc29sFgpcCOEEEIIITIt3ySLkydPNnUIRuXgoFsW+LIELjIyEiDfzM5NmDCB0aNHEx4erl9CK1I7nUvnKz6vqMaae2GxBEfGEZ+YhKV5vtmuLIQQQgghTESSRRMpU6YM8GwJZ1qSP0tuK14Nzxe3qZPDxW2SPV/k5mGElpJONrkyrhBCCCGEyL9kesFE6tSpA0BISEi6BWxOnjwJkOIMxrxs5syZODo6yqxiBs4EhOpf5/SxGcmKatT61/fDZCmqEEIIIYTImCSLJlKqVCkaNGgAwMqVK1N97uPjQ0BAAGq1mg4dOuR2eAaZMGECYWFhL50tLehi4xO5fDcMgHJF7HCytcqVcYs6Pn/WoiSLQgghhBAiY5IsmtDEiRMBmDVrFqdPn9ZfDwkJYfjw4QB88sknODo6miS+rFKr1Wg0mnyzx9IULgaFEZ+oAFC3dO4sQYWUy1AlWRRCCCGEEJmRJ/csJh+FUaVKFS5dupTiWlaoVCoSEhKMGltaTp8+rU/uAG7dugXAb7/9xtatW/XXN2zYQPHixfXvu3TpwogRI5g7dy6NGjWidevW2NnZsXfvXkJDQ2natCnTp0/P8fiNRavVotVqjXLUx6vqtP+z/YqmShbvS7IohBBCCCEyIU8mi4qipPjvi6/zmvDwcI4dO5bqemBgIIGBgfr3Wq02VRsvLy+aNm3K/Pnz8fX1JT4+nvLlyzN+/HhGjRqFlVXuLFM0hpkzZ8rRGRk483wlVHenXBs3xTJU2bMohBBCCCEyIU8mi/v37wfA1tY21bW8yMPDI1vJbM+ePenZs6cRIzINOTrj5RRF0c8s2qstqOjqkGtjF5WZRSGEEEIIkUV5Mlls2bJlpq6JvEWtVqNWqzNuWEDdDYvlQbhudrmWmyPmZqpcG9tebYG92oJIbYI+BiGEEEIIIV5GCtwIo0neryh7FtP2/PmKublfMVny8RkPwmPz9LJuIYQQQgiRN0iyKIxGzll8uRT7FU2QLBZ7um8xOi6RCG3OF34SQgghhBD5W55chpoZ9+/f5+7du0RFRb10lqRFixa5GFXBJnsWX+75Sqi13ZxyffyiDimL3GisLXM9BiGEEEIIkX/kq2QxKSmJn376iQULFnD79u0M2+fW0RlCR/Yspi82PpFLd8MAKOdiRyG73K9y+3xF1PvhsVQsmnsFdoQQQgghRP6Tb5LFpKQkunTpwrZt2zK930r2ZeUuOWcxfZfuhhGfqPvzWMcES1DhhbMW5fgMIYQQQgiRgXyzZ3HZsmVs3boVBwcH/vzzTx4/fgxAsWLFSEhIICgoiBUrVlC1alVcXFzYvXs3SUlJJo66YJE9i+kz1fmKz3v++IyHEVIRVQghhBBCvFy+SRb/+OMPVCoVs2bNolevXjg5Oek/MzMzo3jx4vTu3ZtTp05RqVIlunbtytWrV00XcAE0YcIEwsLCCAgIMHUoec7z+xXruJloZtFRZhaFEEIIIUTm5Ztk8dy5cwD07t07xfXExMQU762trZk3bx5RUVHMnDkz1+ITuj2LGo0GjUZj6lDynNN3QgGwszKncjHT7BVMPjoDdHsWhRBCCCGEeJl8kyxGRkbi6OiIg8Ozb7StrKyIjIxM1bZ27do4ODiwf//+3AxRiDTdDY3RJ2e13JwwN1OZJI4i9mqSh34gyaIQQgghhMhAvkkWXV1dU80iOjs7Exsby71791JcVxQFrVbLw4cPczNEIdJ0+Gaw/rUpzldMZmFuhou9bnZRlqEKIYQQQoiM5Jtk0c3NjcjISH1hG4BatWoBsGHDhhRtd+zYQVxcHIUKme4b84IouRKqVEN9JilJYdG//+nft6hUxITRPNu3GBypJSFRCkAJIYQQQoj05ZtksXHjxgD8+++/+mvdu3dHURRGjx7NtGnT2L59Oz/++CN9+/ZFpVLRpk0bU4VbIBmzGmpUfBS+d31Zc30Nj2MfZ3xDHrX78n2uP9Atla7nXogGZUz7A4zkiqhJCgRHxpk0FiGEEEIIkbflm3MWu3fvzo8//siyZcvo3LkzAAMGDOD333/nyJEjTJ06Vd9WURSKFCnCtGnTTBVugTRhwgRGjx5NeHh4phJGJSmJ2EuXSYoI54kqhmsxd7gYfZMzEVe5FH2TWPMkUKn4/cLvrOiwAhcbl1x4CuNRFIVf9t3Uv//UswIqlWn2KyZ7scjN8xVShRBCCCGEeF6+SRYbNWqU6txEc3Nzdu/ezbRp01i7di2BgYE4Ojry5ptv8s033+Du7m6iaAsmtVqNWq3OuCEQ/+ABtz8fTcLx0/prJZ5+Jc8HJwGRNvDva3cYlTCYBV2X4WBlmkqihth/7SGX7uqW5NYo6UhLEy9BBSimeeH4DDkSUwghhBBCpCPfJIvpsbOzY/bs2cyePdvUoYhMCt+5i7tfT0IJj3hpOzNAEwMdTyo0v3SF3y++y5BJa7G2ss2dQLNBURTm7n02q/hJHphVhGfLUAEeRkiRGyGEEEIIkb58nyyK/CMxMooH335L2HMFiUIc4HANS0pYulDczAkX7HFMVGOmjScpOprY69dBq0UTA23W+HHS14Na33rh0KixCZ8kY763QjgbEApA5aIOvFm1qGkDeur5ZadSEVUIIYQQQrxMvk4WQ0NDefToEQBFihTBycnJtAGJdEWfPsPdceOIDwjQXztSRcWGrkX53zsrKW5fPM374u/f5/q3X2H2z2EACgdFEDhgEA5t21D0iy+wLFkyV+LPqrl7b+hff+JZATMTna34ohTLUOWsRSGEEEII8RL5phpqsocPHzJ+/HjKly9P4cKFqVKlClWqVKFw4cKUL1+eCRMmyPmKJpLW0RlKQgKP5v7CnT599IlijBXM72jG4p6O/PT2onQTRQDLYsV47ZfFhHuNw6/Ys4QrYtdubnXoyOM//sy5BzLQcb/HHPPTVXAt52JHhxrpP19uc30uWXwgyaIQQgghhHiJfJUs7tmzh6pVq/L999/j5+eHoigpvvz8/Pjuu++oWrUqe/bsMXW4Bc6LR2fE+ftzu3dvghcsgKfFia6VhM8HmXO0jg3z31hAhUIVMtX3620HkLRoFgs7mBH6dMuiotXy4JtviL12LUeex1Dz9j/bqzi8VQXM88isIoDG2gIbS3NAlqEKIYQQQoiXyzfJ4pUrV+jUqRNPnjzBxcWFr7/+mr1793L16lWuXr3Kvn37mDx5Mq6urjx58oS3336bK1eumDrsAmXChAmEhYUR8HQGMS7oLrHnzgOgmKlY3dyMyX3MCXY25/sW31PHtU6W+n+r4ts0+HA8I4eYs7/mswTsSR6aXTwbEMqh67ql0W7ONnSuXcLEEaWkUqn0+xYfhmtNHI0QQgghhMjL8s2exSlTpqDVamnQoAE7d+6kUKGUh5tXqlQJDw8PRowYQbt27Thx4gTTpk1j1apVJoq44Hnx6Az7xo0w69eXB/9sY8qbYdwsqUvwpjaeTKvSrQwao2+1voTEhLBUtZjXryZiGwehmzfjOmY05nlgz+q8585VHNayApbmee/nMUU1avyCo4jQJhClTcBOnW/+GRBCCCGEELko730nm479+/ejUqlYsmRJqkTxeYUKFWLx4sUA7Nu3L7fCE+m4/t7rDO4dqU8UR9QZQbeK3bLV58i6I3mz6tscrPF0dlGrJXTd+uyGmm2X74az58oDAIo7WvNOvbxZfKeoFLkRQgghhBCZkG+SxcjISDQaDdWrV8+wbY0aNXB0dCQyMjIXIhPpOf/oPGN8xxNtqduv+H6V9/mwxofZ7lelUjG6/mj2NXyW9IT8+QdKYmK2+86O+QeezSoOaVEOtYW5CaNJ3/MVUR/IvkUhhBBCCJGOfJMslilThtjYWBIzkRAkJCQQGxuLu7t7LkQm0mNpbomtpa4aTdsybRnXcJzRDqZ3sXGhSaPunC2r6y/x7j0iDx40St+GuPkwgu0X7ulis7fivYalTRZLRmRmUQghhBBCZEa+2az07rvvMm3aNP766y969+790rarV69Gq9XSq1evXIpOpKWqc1WWt1/O4guLmdRoEmYq4/5sYuBrA5nY4C9q+8UD8Gj5Mhw8PY06Rmbsu/qA8esuoCi69x81L4e1Zd6cVQT0BW4AHkiRGyGEEEaWmJhIWFgY0dHRpg5FCPECW1tbHB0dMTfP3Peq+SZZHD9+PLt27WLo0KGYm5vz3nvvpdlu9erVDB06lMaNGzNu3LhcjlK8yF3jzvSm03Ok7+L2xSn15tvc372OYqGgPXoc7a1bqMuXz5HxXhQRG8+6AGtOnjupv1bSyYY+jfL2jHZROWtRCCFEDklMTOTu3bsUKlSIQoUKGW1FkRAi+xRFISoqirt371KiRIlMJYz5JlmcPXs2Hh4eXLlyhd69ezNx4kRatmxJyZK6IiJ3797l4MGD3L59G0dHRzw8PJg1a1aafX399de5GbrIQR/U/JBf622g/17dvshHK5ZTasrUHB/38M1gvlh7nqBQK/21lpWKMPudmnm+umhRzbOKtXLWohBCCGMKCwujUKFC2NvbmzoUIcQLVCqV/u9mWFgYzs7OGd6Tt7+rfc6UKVP0P51SFIXbt29z586dFG2Up+sAw8LC0k0UQZLFnKLVatFqtYSHh+famGUcy2D21hvEHtqNdTyEbtxA8TFjMXdwyJHxouMSmLXjKsuPPPuzZ2dlzqS3qvFuA7d88RPUohprrCzMiEtI4kJQGIqi5Iu4hRBC5H3R0dEvrVovhDA9Ozs7QkNDX61ksUWLFvINbR43c+ZMpk7N+Vm9F/V7fRi7qv9DmzMK5rHxPF63liIDBhp1jOBILfuvPmTe/pvcCXm2B6OcfQLLhrfCzdnWqOPlJEtzMxqWccbnZjBBoTH8FxxF+SLyE2AhhBDGId+vCZG3ZeXvaL5JFg8cOGDqEEQGJkyYwOjRowkPD8fNzS3Xxq3iXIU/OzSAM8cBuLtsCS79+qMyM7ygjqIoXL4Xzr4rD9l79SHnAkP1BWwArC3NGN+uCpZ3juarRDFZ84ou+NwMBsDnRrAki0IIIYQQIpV8c3SGyPvUajUajQaNRpPrY7/TdhQX3XU/JbG6F0LEv/8a1M/FoDAmbrhAk1n76DjXhzn/XOdsQMpEsZ57IXaMbMGApmUxy6c/PG1esYj+9b83HpkwEiGEEEIIkVflSrJYrlw5ypUrx3vvvcfly5dzY0hRwNR2rc3N1hX1728unpvlPk7deULXBYdZecyfey8UfqlSzIHhHuVZO7Qxa4c2pqyLXbZjNqUqxRxwsdcV5zlyK4S4hCQTRySEEEIIIfKaXFmGevv2bf1/165dS7du3Zg0aRI1atTIjeFFAdGy11gerR1MkXBQn7yM9vZt1GXKZOreSG0Co1afJT5RN4VoZWFG0/KF8axaFM8qrpR0ssnByHOfmZmKZhVc2Hj2LlFxiZzxf8Lr5QqbOiwhhBBCCJGH5MrMYv/+/enfvz9vvvkmNjY2rF27ltq1a+fG0KIAaVyqGeeaFQfATIGL//sh0/dO23IJ/8e6wjX13Atx9us3WTqwIX0bub9yiWKylEtRg00YiRBCCCGEyItyZWZx6dKl+tcJCQmcOHGCQ4cO5cbQogBRqVS8NmgkcbvHY5UAZtsPkDgxEvMMznraefEef58MBHTHYPzUsza2Vvmm9pPBmld00b/+92YwY9tWNmE0QgghhBAir8n1AjcWFhY0btyYcePG5fbQogDweK0T52s7AmAdm8ildUte2v5BeCzj11/Qv5/y9muULpz/qpsawlVjTZViuvMozweGEhodZ+KIhBBCCCFEXiLVUAuATZs20ahRIzQaDa6urnTs2JGzZ8+aOqwcYaYyo8i7vfTvQzasS7dtUpLC2DXnCI2OB6B99WJ0r1cqx2PMS5pV0M0uKgocvhli4miEEEKIgmvAgAGoVCo5Lk7kKQYni5999hmXLl0yZiwiB/zzzz907dqVihUrsnbtWn777TcePnxI69atuXfvnqnDyxEt2n3EfWfdH+1iVx8R4nc1zXbevrf1e/WKatTM6FqjwB0k3LySHKEhhBBCCCHSZnCyOHfuXGrWrEnjxo1ZvHgxkZGRxoxLGMnKlStxd3dn+fLltGnThq5du7J69WoeP37Mrl27TB1ejrC1tOWJZy39+zPLf07V5tr9CGbtfJZE/tCjFoXsrHIjvDylYRlnrCx0/wz8eyMY5fkDJYUQQgghRIFmcLLo4eEBwLFjxxgyZAjFixfngw8+wNfX11ixCSOIj4/H3t4+xYyZo6NuT19S0qt7tl7NPp+S/HTq3b4pnlWbkMjIv87ozxYc1LRsisqgBYmNlTkNyzgDEBQaw3/BUSaOSAghhBBC5BUGl3zct28ffn5+LFmyhGXLlhEUFIS3tzfe3t5UrlyZDz/8kL59+1KkiPG/Cd+5cydr1qzh9OnTPHz4EABXV1fq1q1Ljx49aNeundHHTM+1a9fYvXs3p06d4tSpU1y5coXExESmT5/OV199leH9a9asYf78+Zw7d464uDgqVKhA7969GTVqFJaWltmOb9CgQbRt25affvqJAQMGEBkZybhx43Bzc6Nbt27Z7j+vqlClMTsqaihzIxyXkHjO7v2Lum++D8APu65x9X4EAJWLOvBFu4JdBbR5RRd8buqW4/rcCKZ8kZdXjxVCCCEM0ekXHx5FaE0dhkGKOKjZ8mkzU4chRK7L1vkAZcuW5ZtvvmHatGns2rWLxYsXs3XrVq5evcrnn3/OxIkT6dSpE4MGDaJdu3bZ3g8WGBhIr1699LOXzy+Zu3fvHufPn8fb25umTZuycuVKSpXK+WIlCxcuxMvLy6B7P/vsM7y8vLCwsMDT0xN7e3v27dvHuHHj2LJlC7t378bGJntn/Hl6erJ+/Xp69+7N6NGjAd3v2549e3BycspW33md7dsdYc4qAPz/Xk5ixXb8dTyAdad1x2RYmZvx83u1sbY0N2WYJtesogvs0L3+98Yj+jcpY9J4hBBCvJoeRWi5Hx5r6jBMbvHixfj4+KS6nnxt1qxZeHt7p/p8/PjxVKlSJafDEyIFoxwmZ2ZmRvv27Wnfvj3BwcEsW7aMJUuWcPXqVdatW8f69espWbIkAwcOZODAgZQpUybLYzx58oTmzZvj7++Poii0aNGCli1bUrJkSQDu3r3LwYMHOXToEIcPH6Zly5acOnUqxxOi6tWrM3bsWOrUqUPdunWZMWMGK1asyPC+jRs34uXlhb29PQcPHqRu3boABAcH4+npiY+PD5MmTeKHH54dLO/t7c3AgQMz7HvNmjV0794dgKNHj9KvXz/ef/99evToQWRkJN999x3t27fH19eXokWLGvjked/r743gyi+rsI4Dt+N3eP/XHWhx0n/+RbvKVC2uMV2AeUTVYhpc7K0IjozjyK0Q4hKS9PsYhRBCCGMp4qA2dQgGM2bsPj4+LFu2LN3P06spMWDAAKpUqcLVq1eZNWtWqs+bNWvGhx9+aLQ4hQAjJYvPc3FxYcyYMYwZM4ajR4/yxRdf4OPjQ1BQEN988w3ffvstb775JqNHj+bNN9/MdL/Tpk3jzp07uLq6snbtWpo1S3spgK+vL++88w63b99m+vTpzJkzx1iPlqYX/1KamWXum+wZM2YAup8SJSeKoPv1W7BgAc2bN2fevHlMmjRJv8ewa9euNGrUKMO+kxNogE8//ZQmTZrw66+/6q+1atUKd3d3fvrppzT/sXkV+N4K5s+j/lQrUwzP6/ex1UKzJ9vZW+h9NNYWDGxalkFNy5o6zDzBzExFswoubDx7l6i4RM74P+H1coVNHZYQQohXjCzj1EnetvWiAQMGsGzZMvbv36+vDZKW+/fvp5tsSrIojM3oySJAZGQkf/31F0uWLOH48eOAbsmoq6srDx8+ZNeuXezevZsOHTqwcuVKHBwcMuxz48aNqFQqlixZkm6iCNCkSRMWL15Mp06dWL9+fY4ni4YICgrixIkTALz//vupPm/WrBlubm4EBASwfft2evXSnRvo6OioTxwz69KlS7Rv3z7FNY1GQ4UKFbhx44aBT5BSYGBgivcRERFG6ddQq0/4M27dBQD8S3jieX0lAG38L9FhSA061ihZ4Jeevqh5xSJsPHsX0FVFlWRRCCGEyJs8PDykernINUZda+bj48PAgQMpXrw4Q4YM4dixY6jVavr27cu///7L/fv3uXjxIsOGDcPS0pLt27fz5ZdfZqrve/fuYWNjQ8eOHTNs26FDB2xsbLh//352HylHnDlzBgBnZ2fKlk17dqt+/fop2hqqTJkynDx5MsW18PBwbt68me7YWeXm5pbiq1q1akbp1xAxcYl8v+ua/v39kg0JcdIdiVH9tpZSlpckUUxD84ou+tf/Pi12I4QQQgghCrZszywmT4UvXbqUGzdu6H/SUb16dT766CP69u2bYt9gtWrVmD9/Pu+++y4eHh5s2LCBuXPnZjiOq6srYWFhmYpJpVJhbm6Oi4tLxo1NwM/PD4DSpUun28bNzS1FW0N98sknfPzxxwwZMoR33nmHyMhI5syZg1ar5aOPPspW3xlZs2YNtra2OTrGi3weWREcaQ1ANU0875cJ526NChT+9zJmChxdMJv/WoQabbygoCBWrVpltP5Mqai1HQ9izTkf8IQlK1Zh+9y/Dq/Sc75MQXlOKDjPKs/5aikIzxkdHW3qEIQQQs/gZHHTpk38/vvv7Nixg8TERBRFwdbWlp49ezJ48OAM99a1aNGCokWLcu/evUyN165dO5YsWcKRI0do3LjxS9v6+voSGRmpX76Z1yQv07Szs0u3jb297viC8PDwbI01bNgwrK2tmTdvHitXrsTGxoa6deuyf/9+Klc2zpERAQEBKd5HRERQrVo1evTogUaTewVkYuMT+em7/YCuLPePgzypUkxDdJPXudNONyNd7fxDas95gyK2xjnSZdWqVXn2z1lW3dp6mcU+fiioKFbLg441i+s/e5We82UKynNCwXlWec5XS0F4zvDwcNl3JoTIMwxehtq1a1e2bt1KQkICNWvWZP78+dy9e5fff/89U0VYANRqdabXXE+ePJnChQszYMCAl8623b59m4EDB+Lq6srkyZMz1ferTKVSMWjQIE6fPk1ERAQPHz5k586dvP7660Ybo1SpUpQqVYoiRYqg0WgytQc1J6w5GcDDp+c3tX2tKFWK6RJV2zLleFJFl/iUClHYs/PXdPsoyJpXepZA/3vjkQkjEUIIIYQQeYHBM4u2trb06tWLwYMH06BBA4P6uH37dqbb3rp1i1mzZjFmzBiqV69Oz5498fDwSHV0xurVq7GysuKHH37g5s2b3Lx5M1VfLVq0MCheY0lOpqKiotJtExkZCZCrM3PZNXPmTKZOnWqSseMSklh44Jb+/aeeFVN8Xrz7+8R+oyt2FLFpC4ldJmJuJnsXn9ewjDNWFmbEJSTx741gFEXJ9tmoQgghhMic9KqkCmFKBieL9+/f1y+VzA0eHh4pvnFdvnw5y5cvT7NtTExMuvvxVCoVCQkJORJjZiWfM/ni8s3nJX9myJmUpjJhwgRGjx5NeHi4fs9lbll3OpC7YbqDfltXcaV6yZRVY0t3eY9Ls3/CMj6J2uci8L19kOblPHM1xrzOxsqchmWc8bkZTFBoDP8FR1G+SO79HRdCCCGEEHmLwctQczNRTKYoSra/kpKScj3uF9WpUweAkJCQdJfUJlcwff4MRpG2+MQk5u9/NoP8aeuKqdqY29uT0LweAA6xcGrDb7kWX46Ii4ZT3vB7e1g/GIJTz6AbotlzVVF9bkhVVCGEEEKIgszgZNHPz4/Ro0fj5eWVYds5c+YwevRo/P39DR2OpKQko32ZWqlSpfRLd1euXJnqcx8fHwICAlCr1XTo0CG3wzPYzJkzcXR0zPVZxY1nggh8EgNAi0pFqO3mlGa78u8/KxhQ+MAF7kflzaNVXiosEP6ZDD9Vgy0jwd8Xzq+G+Q1178MzVzAqPSmO0JB9i0IIIYQQBZrByeLy5cvx8vLK1J6m2NhYvLy8WLFihaHDvXImTpwIwKxZszh9+rT+ekhICMOHDwd0x144OjqmeX9eNGHCBMLCwl66vNbYEhKTWPDcXsURnhXSbatp3JRYZ92MeJ2bCuuP/Z7j8RmFooD/Ufi7P/xcEw7/DDFPXmiTqJtpnFsH9kyBmFCDhqpaTIOLve5cyr1XH/Lj7mskJJr+ByxCCCGEECL3GZwsbt++HYAuXbpk2LZPnz4oisK2bdsMHS7POn36NI0aNdJ/JT/jb7/9luL6i0eEdOnShREjRhAZGUmjRo1o37493bt3p0KFCly4cIGmTZsyffp0UzxSvrL1/D38gnWFghqXK0z9Ms7ptlWZm+PctSsA5gpErF5DTEJMrsRpsMCT8D8P+L0tXN6oSwoBzCyh5nswaBe0+gqsnlagTYgBn5/AqxYc9oL4rD2fmZmK9xvqzv9UFJi77ybvLzpGWJwUuhFCCCGEKGgMLnBz584d7OzsXnqwfDJ3d3fs7Oy4c+eOocOl4u/vz5kzZ3j48CEArq6u1K1bN9eXQIaHh3Ps2LFU1wMDAwkMDNS/12q1qdp4eXnRtGlT5s+fj6+vL/Hx8ZQvX57x48czatQorKyscjR2Y8vtaqiJSQrznturOCKNvYovKtFnINe9/8AsUcHjeCxbL6+jR80+ORmm4UJuwYpuoA17ds2uCNT/AOoPAoeiumulG+ne/zsHTiyCxDiIDYV/voZDP4CjGzgUA4fiunscij99XwIKlwObQimGHflGJdSW5vz4z3USkxSO337MBXM7Klx+wBvViube8wshhBBCCJMyOFkMDQ196aHyL7KysiIkJMTQ4fT++ecfJk2axIkTJ9L8vGHDhkyfPp033ngj22NlhoeHR6bPikxLz5496dmzpxEjMp3croa64+I9bj7UHTHSoEwhGpVLf1YxmWXx4qhaN4fdh9DEwI2Vi1Bq9M57R0RoI+Gv3s8SRdfXoMmnUL0bWKhTt7crDO1mQKOhsH8GnPsLUEAbDg8v6b7SY1sYCleEwhWgcHnMCldgaOmieHa2ZOme0xD9CBfCubPyT867JlLdMQ4zKztwLguFykChsrrXjm5gkb9+wCGEEEIIIdJncLLo4uLCvXv3CAkJoXDhwi9tGxISQmhoKK6uroYOB8C0adOYOnWqPjmzsLDAxUVXkCM4OJiEhASOHTtG27ZtmTp1Kl999VW2xhNZo1arUavTSGRyQFKSwrx9KWcVM5vwlRs6Er/dhwB4/eBDjgQcpknpZjkSp0EUBTZ/Ao+u6N67VIYPdoHaIeN7nUpD1191ieXB7yDoNETe1802vjhMEsRFmhNyN5qQc1eIiriJEmaOdagZ1loVWkuFNoUhuLDC48IKD5wV/tQmER8JJRMSaOS/l7qxWmySf1iiMgPHUlCkClRsA5Xb694LIYQQQoh8yeBksWHDhmzatInff/+dzz///KVtlyxZgqIo1K9f39Dh2Lp1K1OmTAHgjTfeYOLEiTRp0kS/VDM+Ph5fX19mzpzJ7t27mTx5MnXr1s1X1UTzO61Wi1arJTw8PMfH+ufKA67ejwCgtpsTzSq4ZHDHM9bVqqGtXRn12WuUeAJb1/xMkzF5KFn0nQuXNuheqzXw3p+ZSxSfV/Q16LlM91pRdAVxIu5BxD2i/7vCqSV/UejCY8yf1q6xAF4spaSOV1HyPpS8nzIJj7CGgCJWnHSxY0thsHZMoKxtLPWIoWqoP+ah/nBjN2wfC8VrQ5WOui/XapDXZnCFEEIIIUS6DC5w069fPxRF4euvv2bTpk3pttu4cSNff/01KpWKfv36GTocc+bMQaVS0b9/f3bv3o2Hh0eKPX2Wlpa0bNmSnTt30r9/fxRFYc6cOQaPJ7IuN4/O+PPYs2NYRmZhVjFZuWGj9K8r7LjEf2H/GS22bLm1T1fNNFnX38Al472YL6VSga0zinMlTm05xPWxC3E59yxRfNEjDVwuDfedIK0mDrFQLQDanFHov0fh3XXmNFxhR/QaF7b6lGDtpaJsuu/MkThbHt8/D/u/hYVNdEV3dk6Aazsg+nH2nkkIIYQQQuQ4g2cWu3TpQseOHdm2bRvdunWjRYsWdOzYEXd3d0BXAGfbtm0cOnQIRVFo164dPXr0MDjQ06dPo1Kp+O677zJsO3v2bJYvX86pU6cMHk9kXW7tWQyLjsf3pu7A+JJONnhULpLlPhxatCC6dBFs/R9RJRB2bvFieJ+MzwzNUU/uwNpBuvWhAC2+gCrGmRkP8PmHO5MnUjgoUn8t3AZulbdFW7IwZmXcsK9QBdcqdShbtDJN7ItjYWZBYlQU0TdvcOjPFdRwciL2+g0ir9zAJiw01RgOseAQCASaA+aANTdsIKA4RLkmYFM4nJIRv9Pg6AKsAIpUBffGULqJ7r+yZFUIIYQQIk8xOFkE+Ouvv+jTpw+bNm3i4MGDHDp0KMXnyXsLO3fubJQzFh0dHSlSJOPEwNXVFScnJxITE7M9psi83NqzuPfqAxKSdH+22lUvZlBxGpVKRcmPhvFk0jQA7NfuI6xHGI7qHDjXMi4Kdn+lm02r0BoqtQP7F/bvxsfA6j7Pzk+s2AY8JmR76JjgBxyd9DHF9l8ieWdxEnC2aRHqTfqJ98vUe+n95nZ2ONSqTfjlK5Tu1Ut/PTE8HO3NW2hv3eTRpTMEXz6HxZ0gHMJSVv3VxMBr/wH/WQAWJKls2F8E4osmUL6QPxX9r2Fh8/S8S8fSUO1taPwxaEpk+9mFEEIIIUT2ZCtZtLOzY8OGDezevRtvb2+OHDnCgwcPAChWrBiNGzdmwIABvPnmm9kOtFq1apw8eZLw8HA0Gs1L24aHhxMeHk6DBg2yPa7IvNzas7jj4n3963bVixncj2vnd7g753tsQmOodzWBrf8upvcbY4wRYko7x8Pp5brXlzcCKihZT1cApnIHcK0KWz6D++d1bZzLQbdFYGbwKnEUReHU79/DvGUUi3m2mPROCQtUnw/l/XbDs1UB1lyjwbZuHWzr1qFQjx5Ueno9LvQJd04d4MGxQ2jPXcDxxn3sop/90MZMgdIPgYe6XZI3cCRWk0ghFy1OLo+wf7AAi2O/Qa33oNkoKFze4BiFEEIIIUT2ZCtZTNamTRvatGljjK7SNXz4cPr378/s2bP59ttvX9p29uzZJCYmMnz48ByNSaSUG+csRmkTOHT9EQBFHNTUK10ogzvSZ2Zlhab3e8TPX4oZELliFfGeI7A0szRStMDV7c8SRT0Fgk7qvvZNB/uiEKn7IQuWdvDun2DjZPCQiqJw+MshFF7/r/5atBpuvvs67Uf9jCYbfWfEyqkQFVt3pWLrrvpYYm/fJvDoXh4eP0zC2Yu43ItMsVnaOtycmHBbYv6zRTFTcHCPoUjIKtRn/kD1Whdd0li8Vo7FLIQQQggh0mb41EUu69u3L6NGjWLWrFkMHTqUO3fupGrj7+/PsGHDmDVrFqNHj6ZPnzx62PorasKECYSFhREQEJBjYxy49ghtgm6mrE21opiZZa+6Ztl+Q4i3Mgfg9VNR7L+4Odsx6kU+gs2fPnvvMQGaj9WdmZii3YNnrzvPg6LVDB5SSUri6OiBKRLFS3ULY7d2Ke9O9M7RRDEtKpUKm7JlqdjrQ5r+tJSW+09Q0mcP/335PvtbFOFqSUh47l8hVZKKSD9b/Ha5cu6AC8G7dqAsbEHM713gjm+uxi6EEEIIUdAZZWbR2Dw9PdP9TKPRsGjRIhYtWkTp0qUpWbIkAHfv3tUnkBqNhtOnT9O6dWv27t2bKzGL3NmzuPPSsyWo7asXz3Z/5o6OJL3lCev/QZ0At5YugJ/eyXa/KApsGQHRukI8VO4ILcfpKpO2ngRPbsO1nXB9B9z2gaQEXUGb6t0MHzI+nhOf9sfpwBlAtzfx4sAm9PxicbaWnBqbk0tJOvadBH0ncSv0FpsureHioU1UvhqG5zkF+1hdO/V9Sx7dd+Y/jUJ8lbPUvNWRBLdGFOowBUq/btJnEEIIIYQoCLKdLCqKwuHDh7l48SJPnjwhPj7+pe2//vrrDPs8cOBApsa+c+dOmjOMYWFhHDhwIE99gyyyLzY+kX1XdLNwjjaWvF7O2Sj9Vh32OTc3/IOZArUP3uV84ElqljL8TFAAzqyAa9t1r+2KQCevlGcMFioDjYbqvmLDdIVtCpUxeLgkrZbTQ3rjcPQSAIkqODOkOX1G/pan/x6UdyrP6KbjiW88hqN3j7L9+g5it+7C81g0pUJ0bRzCVXDchhtnbfCvdoMqAZ0oWbYRDu2+huz+PgkhhBBCiHRlK1ncsmULH3/8MUFBQZm+JzPJ4uTJk7MTlnhFHb4ZTFScrljKm9WKYmlunFXUajc3IpvWQONzAado8Fn2PTW/XG14h4/9dOcJJus0F+xfUsXX2lH3ZaCk6GjOftALuzPXAYg3hxMft2DQsF/zdKL4PEszS5qXak7zUs2J95jKibvHOLj9D5w3+1Ljpu4HUDZxUPmsJfHnC7OtyjUK/deZRpUa4tjmayhRx8RPIIQQQuScKVOmMHXqVJYuXcqAAQNMHY4oQAxOFg8cOEC3bt30x1OUKlWKkiVLYm1tne2gJFnMn3K6GmqKKqivGV4FNS1VPhnHXR/dHtcy289zv+ksijUcDLZZnL1MSoQNQyHu6XmGdfsZ7azEtCSGh3NhwHvYXPYDINYSjo70YMgH8/NNovgiSzNLmpRqRpPBzUj4MIGzJ7Zyb+kSSvvcxCoBLJKg1mUzuKxhZ/mrxFztSstqdSjbZjoUq27q8IUQQgghXhkGJ4szZswgMTGRGjVqsHTpUurWrWvMuEQ+lJPVUOMTk9jzdAmqnZU5zSq6GLV/x6qVuORmQaGABEqFwD+bfqOvz09Q/R1o8IHuqIvMOOwFAUd1rwuVgbYzjBrn8xIeP+Zy33dR3woEIEoNvqNb8Um/eZip8k3tqpeyMLOg/utd4PUuhN8P4OS86Wi2HsYuVlfkqOYt4JYN509cZfe5znSoUx+3ttOgSGWTxi2EEEII8Sow+DvKEydOoFKp+PPPPyVRFEDOVkM99t9jQqN1yxFbVXHF2tLceJ3HhMKKrpQu90h/yeKaNY+T4uDsn7DIE/7nAWf+gLjo9Pu5dw72P00OVWbQ9TdQOxgvzuckhoZype97WD5NFMNs4cA4z1cqUXyRppgbnt/8jzo+x4gc1otQzbNiSpWCoMU2Ky797zxrprfj7h/94fF/JoxWCCGEECL/M3hmMT4+Hnt7e6pXl2VfQicnq6HuvHRP/9oYVVD1oh/Dii5w7xwli8FpBydsI1TU9IO/WrgwPPahrt3dM7DpY9g5EVwq6pan2hamzsOHcCgIbAvDsV8h6WmBp2ajoHQj48X5nMTISK4N7IvFLV1SHuIABz/3ZHz3ua9sovg8c3t7Goz8GmX4BK6s+oOHi3+l6EPd0mf3h8Bua66cOs4FH0+qN36TEp0noXIqbdqghRBCwG8tIfKhqaMwjL0rDDlo6iiEyHUGJ4uVKlXi8uXLJCQkYGGRJ0/gEK+IpCSFXZd0S1DVFmZ4VH5JsZisiHwIy7vAQ10FUZWdC85vdyL2z62YKRBy25Yng2ZT6MxK3awhgDYMgk7qu6gCsO+F41mK1YSW440T4wuSoqO5+eFAVFduAhBqB/+Macak7l6YmxlxtjUfUFlaUq3fQKr26c/1jesJmPc9Je/qksYSIcABG/47+y8B/zanWMvOlOkxPVuFhIQQQmRT5EOIuGvqKPK0jRs3snHjxlTXz549C8DixYvTPDXgww8/pFmzZjkbnCiQDM7yBg0axIgRI9i0aRPvvGOEc+nEqykxAczMUx4bkUWn/Z/wKEILQItKRbBTG+GHE+F3YXlnCNZVEMW+GPTfTKk4e26s3IpKgeZn41imiuGzwQch6DScWAw3dkN0CKCk3a+FNXT7H1hYZT/GFyTFxuI3dDBJZy/qHsEG1n5cnW+7/4KFWcH9gY3KzIzK3bpTqes7XNm2Bn+v73EP0BUYcglVgY8tQRd388h3O+V6jaNwsw/A7NWfgRVCiDzH3tXUERjOSLGPHTuW4ODgVNe9vb0BXVK4bNmydO8/fPgwhw8fTnXdw8NDkkWRIwz+DvPjjz9m586dDB06lBIlStC4cWNjxiVeFXunwsPL0G42uFQwqAujV0ENDYBlneCJroIomlLQfzMULo8lYNmwPgnHTlI0FM7u+oMnNQZQqFQ9KPW0yE1Som6fY3QI/2xezZtN6kDMY915iWWag2vV7Mf4AiUuDv9PPyXu+ClAV8zG+8PSzOq9GGuL7FcgfhWoVCqqvdWTqh17cGbHCgJ/8aKin26PqXOoCvZbcu7qDzg0/JWaQ+aiLtfUxBELIUQBI8s4Wbt2bZpnhCcni1OmTGHKlCmpPpejM4SpGJwsTp8+nbp163LkyBGaNWtG8+bNadCgAQ4OLy/okZlzFkX+lOrojOCbcHShbh/fgkbQ+GNo8Tmo7TPdp6Io7HyaLFqYqXijatHsBRkWCEs7QJi/7r2TO/TfAoXc9U2KvtuLoGO6paZNTsew/PJyRtYd+awPM3OwKwx2hQm2rQBV38peTBlQEhIIGD2amH99AIixgoX9XPimvzeOallW+SKVSkXdDv2o074v+7cuJOKX36jkHwdA8XtmsCmafZcGUqxVZWp/uBCVYykTRyyEEKKguH37tqlDECJLDE4Wp0yZoj/HTVEUDh06xL///pvhfZIsvrpSHZ0RFQz2RSE8UJcwHv4Zzv8NbabrjqTIxNLUi0HhBIXGANC4fGEcbS2zF+T2z58lioUrQL/N4FgyRRP7N95A5ahBCQvn9WsKn53+k/7V+uNk7ZS9sbNAURSU+HiU2FjuT5tG1B7dvkitBXj1smfiwMUUtzdioZ9XkEqlwrPTcOI6fMjOFTOxW/Q3JUJ0R26UuWlOwn832XzMk6rvdKXSO9PAImeKMwkhhBBC5FcGJ4stWrTI1UO/586dC0D37t0pUaJEro0rMm/ChAmMHj2a8PBw3NzcwL0RfHICfH6Ew3MhUavb2L7uAzi5FDp8B0VfS9lJ5CMIPA4BxyDgBKUf+PGu+VusTmyV/SqoN/bAte261/bFYMB2cEg9U2lmZYVT5848Wb4Cq0Sodz6K5ZeXM6LuiOyNn4akuDiC584l/J9/UKJjSIqLQ9FqUbRaUFLui4w3hzk9LPlkwDwqO8s5gpllZW7F2wMmE/PeGDb/+AWl1+3HKQoskqDSeXMirm5m246tNPnsB1OHKoQQQgiRpxicLKZViSknjRo1CnNzc4YOHZqr44rMS/PoDCtb8PwKar+vO3bi+g7d9Ts+8GtzaPgRuFSCgKcJYvI+wqccgekWv3NCqcKb1d4wPLiEONj5XIXSNtPTTBSTOb3TnSfLVwDQ6lwS06+upF+1fkadXYx/+JCgESOJeVrh7GUSzODHrmb06juL14u/brQYChIba3venbiA0KH32DrlU17bfwnreLCNg3JHk7j60Sic6hYirn0rrJyMsDdWCCGEECKfyzclFF1cXEhISMDKyvhVJkUucC4H7/8F13fBjnG6pFBJ1J1NmAErVSI/OqykiP0Hho9//H8QckP32u11qNHjpc2tK1fCukYNYi9coNwDcA2INHh2MSEpgc23NnMx+CJh2jDCtGHY3brHe953cApP1LUxg1B7iDOHOEvdLGK8BcRZqIiyht11zejY7XM6lOuQ5fFFSk7Oxekzdy3+Ny/jM2kYtc4+xEwBpwgVTgdDOfGWB+ou7ak3+gdUBew4EiGEEEKI5+WbZLFu3br8888/PHr0iCJFjHTOnsh9ldpC2ZZwZB4c+gESYp59Zq6GknWhVANwex3vW3a0OTGIEqrH1Nae0i0hrdIx62NGPoSDs5++UUH72ZnaL+n0zjvcv3ABAM9zSax0W0n/1/pnqajMk9gnfH7oc47dO6a/1vxCEoN2JGGlyxMJ1sD375jjVyztmOwt7fmgxgf0q9Yv0+OKjJWuUI33Vx3k4L51PJwzleq34gFwDlbB4p347voHl5GTqfzWy3+wIIQQQuS09KqkCpHTjJIsbty4kd27d+Pv709MTAx79z47pDwqKopz586hUqmydbzGiBEj2LVrF9OnT9fvXxT5lKU1tBgLtd6Di+vAzALcGkGxGvrzCRMSk1ix/RDH4/uwwOrp7/fO8VDeEyxtsjbe3qmgfVqhtW4/KFEnU7dpOnbgwaxZKLGxNLuksNxTN7v4aZ1PM3X/lZArfLb/M+5G6Q4gNktS6L0/iU7Hn+1F9Ctjw5ZBVSjrUoSGtkUoaluUIrZFcLV1xdXGFVc7VxwsHXJ1f3BB09LzHRI8OrNi8QSK/rmVsg90150DEkn4fBKHtq6gydw1WFhJARwhhBBCFCzZShb9/Pzo1q0b58+fB3QVHF/8platVtOnTx/u3LmDj4+PwQlj+/bt+eGHHxg/fjxPnjxh7Nix1KpVKzvhC1NzLAVNR6b50dLDt7n1KIpbvM55q9rUjDsLof7g8zO0mpD5MQJPwZk/dK/VjtA689V4zR0c0LRrR9jGjdhp4fVrCittdXsXM5pd3HJrC1OPTEWbqAWglOLEzD2uqE9d1rdxevdd2n85kQ6ytNrkLMwsGDj4e36zrcDlK+tp+I8/RcLBTFFR5MANjnSqR/nvl1CipuwXFUIIIUTBYWbojeHh4bzxxhucO3eOYsWKMXDgQOzs7FK1s7CwYPDgwSiKwvr16w0OtFy5csybNw8LCwtWrlxJ3bp1sbe3x93dnXLlyqX5Vb58eYPHE6YT8DiaH/+5DuiOP7Ds+L1u9hHA5yd47PeSu5+TlAQ7vnj2vtUEsHPJUixO3d/Rv/Y8rxAZH8lXPl9xKPAQMc8voX0qPime2cdnM9FnIglxsVT1V/jsiDM/LVU9SxQtLCg2ZTLFp05BJYlinqIx1zD2213whxc7m1iR9PS6y51EAj7sz/F5Y1NVqRVCCCGEeFUZPLP4888/4+fnR4MGDdi1axdOTk5s27aN6OjoVG07d+7MxIkT8fX1NTjQtA4xjY6OTnO8ZLJ0L/9RFIWvNl4kJl63oa9fI3eq1qoO94fq9jkmamHXROi1KuPOzq+GoJO610WqQIMPsxyPTb16WJUpQ9zt21S/o1D0icIBDnAg8ABWZlaUVEoSdymOZiWb4aR24qvtn6EcO83IGwq1/BTsYwEekpxemDs7U8rrZ2wbNMhyLCL3tKjUhvq/NWPBT31pvuoymhjQhKuIX7iNfy4cotnMDdg4l8y4IyGEEEKIfMzgZHHDhg2oVCq8vLxwcnJ6adsqVapgaWnJ9evXDR2OpUuXGnyvyD+2nL/HweuPACimsWZs26fnCbYcBxfWQuR9XaGb67uhUpv0O4oNhz2Tn71vNwvMLbMcj0qlwvGdbjya8yMArc/Dypa6z+KS4vDDj4X/fs/Zq9/R/JLCJ4EKZmlNPJmZYdekCcWnTsGypCQZ+YGtpS1jv1jHjsbLeTh5FhXuKlgmQqmDERzs70nFkV9Q/o1sVOgVQgghhMjjDE4Wb926haWlJQ0bNsywrUqlQqPREBYWZuhw9O/f3+B7Re7QarVotVrCw8MNuj80Oo5pWy7p30/t/BoO1k8TPGuN7mzE9R/p3u8cB+VagkU6RUcOfQ+RTyuVVHkLyrcyKCYApy5dePSzFyQm8s5NZxpO+grfQF8eH9xLjZMh1Luh6CubPs/MwQH75s2xb+WBffPmmGfwQxWRN7Vv3o8H61uzbWR3Gh8LBcD9hhmBX3+P/+mteIz+C1V6fw6FEEIIIfIxg/csJiYmYmlpiZlZxl0oikJkZGSaexrFq2PmzJk4Ojri5uZm2P3brxIcGQdA29eK0va1Fw5Gr9EDSjfRvX78H/j+krqTJ3fgxGI4ulD33lwNbb81KJ5kFkWKYN9SN52Y+PARlX/cyrvj9zD4j2AaX02ZKD52tcGm77uUXraMSr6HKfnjHBw7dZJEMZ8r6lSSAd6HOTesLdFPt5m6PlZRaPlVNnz6OtHBAaYNUAghhBAiBxicLJYqVYro6GgePnyYYdvjx4+j1WopV66cocOloigKwcHB+Pv7G61PkT0TJkwgLCyMgICsf+N89L8QVp/U3WevtmDq29VTN1KpoMP3oHr6x/bQDxB8A278AzvGwS/1wasmbBsDSboz82g6AgqVMfCJnnm+0E3EP/+QGBKif2/u7Ixz/36UXb+OJgdPUebLKdi93hCVZdaXvYq8y0xlxnsjf8baewFBRcwBUCdA1f1adn70JteObDBxhEIIIYQQxmVwsujp6QnAkiVLXtpOURQmTZqESqWiXbt2hg6nd+LECbp06YKjoyNFixZNlYCGhoYyZMgQhg4dSkxM6mqVIueo1Wo0Gg0ajSZL98XGJzJxwwX9+y/aVaaYo3XajYtVhwZPl6ImxMC8+vBndzj2K4TcSNnWvSk0G5WlWNJj36IFFsWezXSqLC1xaNuW/3q9R8WDByg6YQLW1apJUaUC4LW6rWi68zCXGhTXX6t6RUXQ2IlsXThcqqUKIYQQ4pVhcLI4ZswYLCwsmDFjBps3b06zjZ+fH127dmXPnj3Y2NjwySefGBwowOLFi2natCmbN28mMjISRVFQXvjGzMnJiYCAABYtWsSGDfKT/vxgwYFb/PcoCoA6pZ3o/br7y29oNRFs0zgCQ2UOpRtDq6/go33QfytYGWfps8rCArcF8yn0fi+KTZlMRZ9/KeX1M+GVKskMYgFkY+dI9xX7uD2sK7FPf/uLh0DJBftZ9llTtDERpg1QCCGEEMIIDE4WK1SowIIFC4iOjqZr165UqlSJ0NBQADp06ED16tWpWLEiW7ZswczMjCVLllC8ePGXd/oSZ8+eZdiwYSQkJDBkyBAOHTqEi0vaZ+YNGDAARVHYsWOHweOJ3HHjQQQLD9wEwMJMxcxuNTA3y2B2zsYJOs8HtSM4ukG9AdBzBXzxHwzaCS0/h5L1IBP7abPCulo1in39NYXeew9zR0ej9i3yp/YjZ+CwbBH3XHTLUq3joeGuJ2zo3Yirl31MHJ0QQgghRPYYXA0V4IMPPqB48eJ8+umn3Lx5U399586d+tfu7u4sWLCA9u3bZ2cofvzxRxITE/nss8/48UfdMQbm5uZptm3VSlf58tSpU9kaU+SspCSFiRsuEJ+omx0e3KIcVYplcglr5XYwQfarCtOrULcZbruOsOOjt6h8WreHu9blJAIGf8TVT3vT5d2vTByhEEIIIYRhspUsgm4WsV27dhw8eBBfX1/u3btHYmIiRYsWpUmTJnh6emJhke1hOHjwICqVii+++CLDtkWKFMHOzo7AwMBsjytyzp/H/Tlx+wkA7oVtGdG6ookjEsIwajsHuqw8yO7vx1Bk+Xas46FUMETO/JMFV48y5KuNmJtn/99BIYQQQojcZJTvXszMzGjVqpV+Ri8nPHjwAHt7e4oVK5ZxY3TFViIiZN8QwJ49e/jqq684e/YsGo2Gbt26MXv2bBxNuJTyxoMIvt12Wf9+RtcaWFumPVMsRH7R5vM53GzajntjRuLyRME+Flr+dYv5QfXpNmMzpVxKmzpEIYQQQohMM+6mrhxkZ2dHTEwMSUlJGbaNjIwkNDQUZ2fnXIgsbzt48CDt2rXD3d2djRs38u2337J27Vq6dOmSqjhQbomNT+TTVWeIjdf9Xr7/emmaVkh7/6kQ+U2FJm9Sf+shAsvbAmCmwJuHtOz7qB27j6wxcXRCCCGEEJmXb5LFqlWrkpiYyLlz5zJsu379epKSkqhbt24uRJa3TZs2jerVq/PXX3/Rrl07PvroI/73v/9x4MABtm3bZpKYZu24ytX7ulnfiq72TOpYzSRxCJFTbAq78MaWE9x94zX9tQZXFBLGf81PS4ab7Ac1QgghhBBZYfAy1ORzFrNCpVKxd+9eg8Z755138PX1Zfr06axfvz7ddv/99x/jxo1DpVLRs2dPg8Z6lRw7doxhw4alOP+vTZs2AGzcuJG33norV+PZc/kB3r63AbCyMOOX9+tgYyXLT8WrR2VmRut5azm/+AeSvJagjoeyD6DwvP18HdCScRN2Yq+2NXWYQgghhBDpMjhZPHDgQKbaJScpiqJk68DyYcOGsXDhQjZt2kT37t0ZPXq0/qfzjx494s6dO2zdupW5c+cSGhpK7dq16dOnj8HjZda1a9fYvXs3p06d4tSpU1y5coXExESmT5/OV19lXAVxzZo1zJ8/n3PnzhEXF0eFChXo3bs3o0aNwtII5/eZm5tjZWWV4pqlpSUqlYpLly5lu/+seBAey+drn80Mf9WxauarnwqRT9X8cCyP6zbjxrAP0IQloYmBd1c/Yv6DxvScuoWyrrKPUQghhO7ot2XLlrF//348PDwM7kelUuHu7s7t27eNFpsouAxOFidPnvzSz8PCwjhx4gSHDx/G2dmZYcOGZasqqrW1Ndu3b6d9+/asX7+eDRs26D97vuiNoihUqlSJTZs2pXu0hjEtXLgQLy8vg+797LPP8PLywsLCAk9PT+zt7dm3bx/jxo1jy5Yt7N69Gxsbm2zFV6lSJY4dO5bi2okTJ1AUhcePH2er76xITFIYtfosT6LjAXijalH6NnLPtfGFMCXnuo2ov9OHY73bU+i/MMwVeHt/HIcetCPo64U0q93S1CEKIYQQQqSSY8lisn///ZeuXbty7tw5Nm/ebOhwAFSoUIEzZ87www8/sHTpUgICAlJ8XqxYMQYNGsS4ceNwcHDI1liZVb16dcaOHUudOnWoW7cuM2bMYMWKFRnet3HjRry8vLC3t+fgwYP6/ZXBwcF4enri4+PDpEmT+OGHH/T3eHt7M3DgwAz7XrNmDd27dwdgxIgR9OvXj++++45BgwYREBDAsGHDMDc3x8zIh9a/zG+HbuF7KwSAoho133Wvma2ZZiHyG/NChWi85TDHR/ZBs+csAA0vK9wZOZQ1n31Cj64fmzZAIYQQQogX5PjBX82bN2fhwoW89957/PLLL3z66afZ6s/e3p4pU6YwZcoUgoKCUpzrWKZMGeMEnQUffvhhiveZTcBmzJgBwPjx41MU4nFxcWHBggU0b96cefPmMWnSJP0RF127dqVRo0YZ9l2yZEn96z59+nDp0iUmTZrEuHHjMDc35+OPP0atVqPR5M4S0DP+T/hx93UAVCr46d3aONtZZXCXEK8elbk5r89bxYVfvyVx/h+o48H9AYRPm8fiGxf44POF8kMUIYQQQuQZuXJKdNeuXbG0tOT333/PdrL4vJIlS6ZIjPKLoKAgTpw4AcD777+f6vNmzZrh5uZGQEAA27dvp1evXgA4Ojpm+WxElUrFrFmz+PLLL/Hz86NkyZI4OjpSuHBhRowYkf2HAQIDA1O8f/58y/DYeEb8dYaEJN3+0o89KtCkvByTIQq2GkO/5G6NeviNHoVzGGhioNHSg/x+uw19vHagtsyVf5qFECJXvbv1XYJjgk0dhkFcbFxY/dZqU4chRK7Lle9ILCwssLKy4saNG0bvOz5etwfOGMVgcsuZM2cAcHZ2pmzZsmm2qV+/PgEBAZw5c0afLGaHg4MDNWvWBGDRokXExsZmaklrZri5uaV5XVEUvt54kYDHMQDUKe3EyDcqGmVMIfK7Ek3b4bSpKv8OeIvStxMwV6DJvkC29mhA899241q0iKlDFEIIowqOCeZh9ENTh2FyixcvxsfHJ9X15GuzZs3C29s71efjx4+nSpUq+vcDBgxId4zg4OA0P3dxcUmxxUmIjORKsnjx4kUiIyOzPCuWFq1Wy9KlS1mzZg2nT58mPDwcAI1GQ926denZsycDBgxArVZne6yc4ufnB0Dp0ulXQUxOwJLbGur48ePs37+f2rVrk5CQwJ49e5g7dy4//fQT5cqVy1bfGZm8eCObHulmEdVmCm/YBrD2b/8cHTO3BQUFsWrVKlOHkePkOXNQ788J2zmXGqeiAKh2NZbzPVpypesHuBQrkWPDyu/pq0We89URHR1t6hByjItN/l1ZZMzYfXx8WLZsWbqf79q1K83rAwYMSJEsvqyPqKioND93d3eXZFFkjZLDTp8+rdSsWVMxMzNT2rZtm62+zp07p1SoUEExMzNTVCpVml9mZmZKxYoVlfPnzxvpCbKmf//+CqBMnz493TbffvutAihNmzZNt83EiRMVQGnTpk224jlz5ozSuHFjxcHBQbGxsVFef/11Zd26ddnq80UBAQEpvi5fvqwAyp17j5Qhy08q7uO2KpvOBhl1zLxi5cqVpg4hV8hz5rzN095VTlevolyurPs6WreKsnHpghwbT35PXy3ynK+OsLAwBVDCwsJMHYpBAgICTB1CvpX8PeT+/fuz1Q+guLu7GyUm8erK7N9Vg2cWPT09X/p5bGwsgYGBBAUFoSgKFhYWfPnll4YOR2BgIK1ateLJkyeo1WreffddWrZsqd+zePfuXQ4ePMjq1au5efMmnp6enD17Nl/uaTSm2rVr4+vrm6NjlCpVCtDN+mq1Wv11J1srFvapy5H/QmSfohAZ6DTpL/6tPJ3IOStxCQNNFNh8P5dl187R+5sFWJjnXvViIYQQQgjIxjLUAwcOZLptqVKl9BU+DTVlyhSePHlChQoV2LFjB+XLl0/VZsCAAUyaNIn27dtz8+ZNpkyZwqJFiwweM6ckH+sRFRWVbpvIyEiAXKtYagwzZ85k6tSpKa6pVCpJFIXIpOY9J3GzQh2ufvE55QPBMhEabjjI3/5taLdwC86a7J27KoQQQgiRFTl2zqKFhQWFChWiRo0aNG3aNNtn+u3YsQOVSsXSpUvTTBSTlStXjt9//53mzZuzY8eObI2ZU5KP+HjxnMjnJX9miuNADDVhwgRGjx5NeHh4ukVvhBAvV6HuWxRZVYVtn3SlzrkEAOqcCsK3RxPKeG2mehX5uyWEEEKI3JFjyaKxPX78GHt7e5o2bZph26ZNm2Jvb8/jx49zIbKsq1OnDgAhISH4+fmlWRH15MmTACnOYBRCFAyORSrQc8Ux/vzck3q7wzBXoPydWB4NbMv2r+bRoePLtwEIIYQQQhhDvtkEU7JkSRITEzPVVlEUEhMTKVEi5yoJZkepUqVo0KABACtXrkz1uY+PDwEBAajVajp06JDb4Rls5syZODo6yqyiEEZgYWVL/5+PcPXD6kRY664VeaLg+uXH/OH1I4qimDZAIYQQQrzy8k2y2LlzZ2JiYti+fXuGbXfs2EFMTAxdunTJ+cAMNHHiREB3ls7p06f110NCQhg+fDgAn3zyiVGOG8ktEyZMICws7KXLa4UQWaBS0X3MGrRT3yOosO6SXSzU/m0RK0YPJD4xybTxCSGEMBpvb28URcHDwyNb/SiKwu3bt40SkxAGL0P19zfeeXkvO28w2ddff83mzZv54IMPWL9+PY0bN06z3dGjR/nggw+oWLEikyZNMlqM6Tl9+rQ+uQO4desWAL/99htbt27VX9+wYQPFixfXv+/SpQsjRoxg7ty5NGrUiNatW2NnZ8fevXsJDQ2ladOmTJ8+PcfjF0Lkfc07T+Z62VpcGDeBan5gngQNdhxj0z0P3li0CycHKXwjhBBCCOMzOFlMa5+dIVQqFQkJCRm227RpE8OHD2fatGk0b96c5s2b4+HhkerojIMHD6LRaBg2bBibNm1Ks69+/foZJXaA8PBwjh07lup6YGAggYGB+vfPHymRzMvLi6ZNmzJ//nx8fX2Jj4+nfPnyjB8/nlGjRmFlZWW0OHNDWtVQhRDGUalmF4r8UYPNI7rS8FQ8AK+dfcTR7g2psmATZcqXM3GEQgghhHjVGJwsGmu/TGb7GTBgACqVSn/PoUOHOHToUJr9hYWFMWbMmDT7UalURk0WPTw8svVr0bNnT3r27Gm0eExJqqEKkbMKFS7P+8tO8Me4NtTf8RCLJHC/k4B/v448nDyLhm06mzpEIYQQQrxCDE4W/fz8OHr0KEOHDsXS0pKhQ4fSqlUr/UxfUFAQBw4c4NdffyU+Pp6FCxfSqFEjgwMtXbq0PlkUeZNarUatVps6DCFeaZYWagbOOciash9QZrEv9rFQJAQix41n9/UTtPnkG1OHKIQQQohXhMHJYmxsLIMHD6ZixYrs2rWLwoULp/i8UqVKtGrVipEjR9KmTRuGDBnC8ePHqVSpkkHjyUbdvE+r1aLVagkPDzd1KEK88np8sgSfiguJmDqX4o/BPgZs5q9j04UTdJi3FUtLS1OHKIQQQoh8zuBqqNOnTycyMpIlS5akShSf5+zszOLFiwkPD5eCLa84OTpDiNzVrO0w3Jct45q77p9ycwUqHfRn/9v1eOB308TRCSGEECK/MzhZ3L9/Pw4ODtSqVSvDtrVr10aj0bBv3z5DhxP5gBydIUTuK1+xIa3X/8uRJhr9NTe/eP7r2YnTa343YWRCCCGEyO8MThYfP35MXFwcSUkZn/OVlJREXFwcjx8/NnQ4kQ+o1Wo0Gg0ajSbjxkIIo3Gwc2bgkqMc/6ABEU9P0XCKAMvJ37N7TF+jFSQTQgghRMFicLJYqlQptFot69evz7Dt+vXriY2NleWJr7jk/YqyZ1GI3KdSqej/+XIi53zOTV2dMSySwG3bSfZ1ep3oR8GmDVAIIYQQ+Y7ByWL37t1RFIXBgwezc+fOdNvt2rWLwYMHo1Kp6NGjh6HDiXxA9iwKYXqenoOovXIThxo+K3BT4mYE5zu2wO/QPyaMTAghhBD5jcHVUL/88ks2btzItWvX6NixIzVr1sTDw0N/dMbdu3c5ePAgZ8+eRVEUqlatysSJE40WuMh75JxFIfIGt6KVGPT7SeZPf4ummwKwjwXHcIXw4SM4PKgzTUfPMnWIQgghhMgHDE4W7e3tOXjwIP3792fXrl2cO3eO8+fPp2iTvE+mXbt2eHt7Y2dnl71oRZ4m5ywKkXdYWljx2dTd/PHalxRasJ5y98EqAZz/t4kdF45D60GmDlEIIYQQeZzBySKAq6srO3bs4PDhw6xdu5ZTp07x6NEjAIoUKUK9evXo0aMHTZo0MUqwQgghsqZPz285Xf0Njk/6hIaXdAXJyhy5x53AmYQ0q0PhMq+ZOEIhhBBC5FXZShaTNW3alKZNmxqjKyGEEEZWt1orKqw4zB+ft6f5/lAsksA9IIkr73bHZsJw6nX51NQhCiGEECIPMrjAjRAvkmqoQuRdGlsnhs8/wukPWxBuq7tWOAzMv1rAhsndUDJxDJIQQgghChajJIuPHj3i77//5ocffmDatGnG6DJdkZGR/Pzzz3To0IHq1atTvnz5FJ+Hh4ezcuVKVq1alaNxiNSkGqoQeV//0b8R+910AoqqAFAnQJXVV/i7T12iHgeYODohhBBC5CXZWoYaFxfH2LFj+d///kd8fLz++tdff61/HRoaSvny5YmKiuLKlSuULVvW4PFOnjxJly5duHfvnr54jkqlStFGo9Ewa9YsLl26RJEiRXjjjTcMHk9kjVRDFSJ/aPlGd367E0bY1vlUvxIDQM3TWv59pw3uUyZStWVfE0cohBBCiLzA4JnFpKQkOnfuzPz584mPj6ds2bJYWKTOPZ2cnOjXrx9xcXH8/fffBgf64MEDOnTowN27d6lRowbTpk1Do9Gk2Xbw4MEoisKmTZsMHk9knVqtRqPRpPv7IoTIOzS2tnRZc4JTbV8j3lx3zf0eRI6cwc7ZfeHpD+SEEEIIUXAZnCwuX76cXbt2UaxYMXx9fbl58ybOzs5ptu3ZsycA+/btM3Q4fvjhB4KDg2nbti0nT57kq6++wsbGJs22HTp0AODo0aMGjyeEEK86Swtz+nit5c6ETwl5+jMe+1hwX3qSdQPqExd237QBCiGEEMKkDE4Wly1bhkql4qeffuL1119/adu6detiZmbGpUuXDB2O7du3o1Kp+P7779OcwXxeuXLlUKvV3Lp1y+DxhBCioOjUZzhF/tzI1XJW+mvVjkWz751W3D663oSRCSGEEMKUDE4Wz58/j0ql4u23386wrVqtxtHRkeDgYEOH4/bt21hbW1O9evVMtbe3tycyMtLg8YQQoiCpXLEy7Tee4phnORKfbgV3D4QHw7/k37lDZFmqEEIIUQAZXOAmKioKBwcHrK2tM9U+Pj4+wxnBlzE3NychISFTbRMTEwkPD5e9c7lMq9Xqj88QQuQ/1lYWDFiwjTWLZ1Hy12UUigRNNCT8eoiN15rT6cddmKvtTB2mECKf8nunOwnZmDgwJQsXF8quW2vqMITIdQZnb0WKFOHu3btERUVhZ/fybx7+++8/IiMjqVixoqHD4e7uzuXLl/H396d06dIvbXvgwAHi4+OpXLmyweOJrJs5cyZTp041dRhCiGzq8eF4zr3+JtfHDKCSfwIWSVB5bwhbejWk2c9rcSld1dQhCiHyoYTgYBIePDB1GEKILDA4WWzcuDHr1q1j8+bN9OrV66Vtf/75Z1QqFS1atDB0ONq1a8fly5dZsGABs2bNSredVqtl/PjxqFQq3nrrLYPHE1knR2cI8eqoVaMe5TcdY/2wdjQ4+giAypeTON2vG05fjKVhhw9MHKEQIr+xcHExdQgGM1bsY8eOTXNblre3N7NmzeLq1aupPvvhhx9wyce/diJ/MzhZHDJkCGvXruWrr76iSZMmuLu7p9luzpw5zJs3D5VKxfDhww0OdMyYMfz666/MmTOHEiVKMGzYsBSfK4rCwYMH+eKLLzh16hQuLi7ZGk9knVqtRq1WmzoMIYSR2NvY0s/7EH99O5Qqqw6iTgC3+xA26QfWXPSh+9glqMwM3vouhChgZBknrF27ljt37qS67u3tzc6dOzl48GCqz6ZMmSLJojAZg/8v37p1az788EP8/PyoV68egwcPJjo6GoDvvvuOTz75hPLly/PFF18AMGrUKOrUqWNwoMWKFeOvv/7CwsKCUaNGUbhwYR4/fgxAtWrVcHJyonXr1pw8eRIbGxtWr16No6OjweMJIYTQee/LX4n+7kseP90G7hgFVbyPsvyTZoRHRJg2OCGEyEdu376NoiipvkC3jSqtz8qUKWPaoEWBlq0fCS9cuJDPP/+c0NBQFi9eTMTTbxomTJjAwoUL8fPzQ6VSMX78eL7//vtsB9uxY0eOHDlC8+bNiYyMJD4+HkVRuHr1KhERESiKQpMmTTh8+DCtWrXK9nhCCCF0mnToQ4V1m7ntZgmARRI03PeEbYMacf2G4cciCSGEECLvMrw8KboKpbNnz2bIkCF4e3tz5MgR7t27R2JiIkWLFqVJkyYMGDCASpUqGSteateuzYEDB/Dz88PX1zfVeMYcSwghxDNF3SrSZtsJtgxuR+Wj9wGofSGJ68O6c3/iFFp4vmviCIUQQghhTAYni+fPnwegXLlylCtXjmnTphktqMwoW7YsZcuWzdUxhRCioDO3UtPFez97vx9BEe9/sEyE8oHwaMIU/v7oLD0/nGnqEIUQQghhJAYvQ61duzZ169YlNjbWmPEIIYTIB1p/PhfVdxMIf3pyUpEwqOi1kUWTOuv33wghhBAifzM4WXR0dMTR0VGqMwkhRAFVo2M/yv71F/eK6v5XYh0PzdZcZ8mHDYmICTdxdEIIIYTILoOXoVaqVIkzZ84QGxuLtbW1MWOiXLlyRulHpVJx69Yto/QlhBAiNdeKtWi29Qh7+nlS9koUAE0PR7L93SbU/2Ul5d1rmjhCIYQQQhjK4GSxb9++nDhxguXLlzN48GBjxsTt27eN0o9KpTJKPyJztFotWq2W8HCZURCiILFy0NB+/Qm2f9KJMntvYQbUvJ7IrT7v8nDWVBo37WnqEIUQQghhAIOTxY8//pi9e/fy2WefYW5uzsCBAzEz0uHMS5cuNUo/InfNnDmTqVOnmjoMIYQJqFQqOs7fyr4fx+LkvQ2bOHB7BI9HTmbHRH/adxtr6hCFEEIIkUUGJ4sffPABTk5OWFhYMHjwYCZMmED9+vUpUqQI5ubmad6jUqlYsmRJhn3379/f0LCECU2YMIHRo0cTHh6Om5ubqcMRQpiA5+gfOFuhNk++/ZZCYeAcCdZTl7Dm3m16fDzP1OEJIYQQIgsMTha9vb1RqVT6qnfBwcHs3LkzzbbJ7TKbLIr8Sa1Wo1arTR2GEMLEar/dh//cynPns0EUewC2Wqgyfy9/POhG76nrZIuAEEIIkU8YnCz269dP/ocvhBAiTeXqNMbhj52c+qgj7rcTsUiCen9f4Y9HLen1y14sLCxNHaIQQgghMpCtmcXc5O/vb9B9pUuXNnIkQgghMqOImzvN1hxh7yAPKl2IBqD+/kesf/91Oi87iNrGwcQRCiGEEOJlMpUsDho0CCcnJ3788cecjiddZcuWzfI9KpWKhISEHIhGCCFEZtg7ONB+5TE2DnuTGj73AahxPoZd3RrTcvlWHIuUMW2AQgghhEhXpsqXent789dff6W80cyMkiVL5khQaVEUJctfSUlJuRafEEKItFlZWtBj0T7Odq5J/NP6ZxX9Evm3d0fCQgJMG5wQQggh0pWpZNHMzCzNGbrk4ja5wc/P76VfZ8+eZdGiRbz22ms4OzuzZcsW/Pz8ci2+3Hbz5k2GDh1K3bp1sbS0pEyZMi9t26FDB+zt7XFxcWH48OFERUXlXrBCiAJPpVLRa/Zqrg56i+indbDK+yexr187wkPvmTY4IYQQQqQpU8tQCxcuTEhICA8fPsTV1TWnY0qTu7t7hm1q1qxJ3759ad++PYMGDeL06dO5EJlpXLp0ia1bt9KwYUMUReHJkydptgsLC8PT05MSJUqwZs0aHj9+zOjRo3nw4AHr1q3L5aiFEAVdzzHfs9HeEfd5f2IdD1VuJbFjwJu0X74XjaaoqcMTQgghxHMylSw2btyYLVu20LJlSzp16oS9vT0AkZGRTJs2LUsDfv3111mPMgusrKyYO3cuNWrUYNq0afz22285Op6pdOrUic6dOwMwdOjQdI8t+e2333j06BEnT57UJ/o2Nja88847nDp1inr16uVazEIIAdBlyFdsidPi/utaLBOh5tVENn3Qms5LD6CxdzF1eEIIIYR4KlPJ4uTJk9m7dy/Xrl3j+vXr+utRUVFMnTo1SwPmdLII8Nprr6HRaNixY0eOj2UqZmaZWkHM9u3b8fT0TDEj/Pbbb2Nvb8/WrVslWRRCmESnT6ezLS6O0ks2Y5EE9S8ksmZIK3r+7yAOds6mDk8IIYQQZDJZrFOnDmfPnuV///sfFy9eJCYmhgMHDmBpaUnjxo1zOsYsi4uLIzo6mtjYWIP7uHbtGrt37+bUqVOcOnWKK1eukJiYyPTp0/nqq68yvH/NmjXMnz+fc+fOERcXR4UKFejduzejRo3C0jL3zhe7fPky/fr1S3HNwsKCSpUqceXKlVyLQwghXtRxzGx2auNwW74TM6DJqQSWf+pB//mHsLdxMnV4QgghRIGX6XMWy5cvz+zZs/XvzczMcHZ2Zv/+/TkSWHasXLmShIQE3NzcDO5j4cKFeHl5GXTvZ599hpeXFxYWFnh6emJvb8++ffsYN24cW7ZsYffu3djY2BgcW1Y8efIEJyenVNcLFSrE48ePcyUGIYRIT7uJP7FbG4fb6n0AePrGs2hUSz766RD2No4mjk4IIYQo2DK3ljENpUuXzlYyllX+/v4v/bp+/Tr79u1j5MiRDB8+HJVKxdtvv23weNWrV2fs2LH8+eefXLlyhb59+2bqvo0bN+Ll5YW9vT3Hjh1j165drFu3jhs3blCjRg18fHyYNGlSinu8vb1RqVQZfq1du9bg5xFCiLyqzdT53O3SVP++3YE4FozzIC7B8NUhQgghhMi+TM8svuj27dtGDCNjZcuWzXRbRVGoWLEiU6ZMMXi8Dz/8MMX7zO4RnDFjBgDjx4+nbt26+usuLi4sWLCA5s2bM2/ePCZNmoSjo+6n5l27dqVRo0YZ9m3IuZaFChUiNDQ01fUnT55QsWLFLPcnhBA5ofWsxRyI7UfRnScA6PhPLHO/fpMx3x5CpVKZODohhBCvGg8PDw4ePMj+/fvx8PAwdTh5lsHJYm7L7JmOZcuW5d1332X8+PFoNJocjiqloKAgTpzQfaPz/vvvp/q8WbNmuLm5ERAQwPbt2+nVqxcAjo6O+sTR2KpWrZpqb2JiYiLXr1/P1szr8wIDA1O8j4iIMEq/QoiCpeVPyzgU3R3XQ5cxU+CNzcHMd+rBJ1/IqgohhBDCFPJNsujn5/fSzy0sLChUqBC2tra5FFFqZ86cAcDZ2TndmdD69esTEBDAmTNn9MliTurQoQNTp07l0aNHFClSBIAtW7YQGRlJx44djTJGbi5HFkK8ulQqFS1+XcP+Hh4Uv/QIdQI0WHWJFYXH0PeDOaYOTwghhChw8k2y6O7ubuoQMpSc0JYuXTrdNsmJVUbJb0aio6PZvn07AP/99x/R0dH6PY0NGjTQ/3oNGTKEX375hc6dOzNp0iSePHnC6NGj6dy5M/Xr189WDBlZs2aNSZP33BAUFMSqVatMHUaOk+d89eTlZ1V1+Yik0B8oGRSHJgbcFm3nx9AEipfIeLn+i/LycxqTPOerIzo62tQhiBx2/Phx1q5dy4EDB/D39+fx48cUKlSIhg0bMnLkSN54440U7SdMmMCsWbMYMmQIv/76a5p9Xrx4kRo1auDq6kpgYGCKyvt3797lhx9+YMeOHdy5cwdzc3OqVKlC//79GTp0KBYWKdOBAQMGsGzZMpYuXUr9+vWZPn06hw4d4uHDh0yaNIkpU6YQHx/P6tWr2bFjB6dOneLu3bvEx8dTunRp2rZty/jx4ylRokSasYaEhDB16lQ2btzIgwcPKFasGF27dmXKlCl89tln+rEHDBiQ6t69e/cyf/58jhw5QkhICE5OTjRt2pQvvvgi0yc0HDhwgFatWunfP/8a0I99+/ZtypYti7u7O7du3cLLy4sVK1Zw48YNoqKi9KseL1++zOrVq9mzZw+3b9/m0aNHODg4UKdOHQYPHkzPnj3TjaFly5b8888//Pjjjyxfvpz//vsPW1tbWrZsybfffkvVqlVT3Xvq1Cm+++47Dh8+zMOHD7G2tsbFxYU6derQr18//TnsxpJvksX8IHn5pZ2dXbpt7O3tAQgPD8/WWA8fPqRHjx4priW/f/4vmJOTE/v27WPEiBF0794da2trevTowQ8//JCt8Z8XEBCQ4n1ERATVqlWjR48eub4UOLetWrUqV2aITU2e89WT1581rmNHjnVqjktIEkVDoeqW3bj80JEG9dtkqZ+8/pzGIs/56ggPD09VN+FVMnPmTIPua9++PbVr1051/ezZswafqz1hwoQ0ry9fvpygoKAM2xlq4sSJ7N+/n9dee4169ephZ2fHrVu32Lp1K1u3buXnn39m5MiR+vYDBw5k1qxZrF69mp9//hlra+tUfS5duhSAPn36pEgUDx06RJcuXXjy5AllypThzTffRKvVcvz4cT799FO2bNnC1q1b0zzWzdfXl6FDh1K8eHFatGhBTEwMDg4OADx48IC+ffvi6OhI1apVqVmzJlFRUZw9e5ZffvmFv/76C19fXypUqJCiz3v37tG8eXNu3bqFs7Mzb731FklJSSxfvpydO3emmRwlGzt2LHPmzMHMzIz69evTvHlz/P392bRpE1u2bGHRokUMHDgww1//YsWK0b9/f3bu3MmDBw9o27YtxYoV03/+YsyKotCtWzd27txJ8+bNqVq1KpcuXdJ//uOPP7JkyRKqVKlCjRo1cHJywt/fn/3797N3716OHj3Kjz/+mGYs8fHxdOjQAV9fX1q0aEHVqlU5fvw4GzZsYP/+/Zw5c4YyZcro2+/du5f27dsTHx9PrVq1aNy4MYmJiQQFBbFt2zYSExMLbrIYFxfH1atXsbKyokqVKi9te/XqVeLi4qhatWqunmmYm8qUKZPpfZyVKlVi586dORZLqVKlANBqtWi12hwbRwhRcFg5O1NjxQau9uyMYySUuQ9XJo3E5X+bKesmxbmEyK/i4uIMui8pKSnd64b2mZ74+Hij9/m8MWPGsGLFCooXL57i+pEjR2jXrh2ff/453bt31xc2rFSpEk2bNuXw4cNs3LiR9957L8V9CQkJ/PHHHwApkqX79+/TrVs3QkNDWbBgAUOGDNEXbAwJCaFnz57s3r2bmTNn8vXXX6eKc9GiRYwfP55vv/02VaFHR0dHNm3aRLt27bCystJfj4+PZ/LkycycOZORI0eybdu2FPd9/PHH3Lp1Cw8PDzZt2qSfVAgNDaVTp05s3LgxzV+zRYsWMWfOHCpUqMC6deuoWbOm/rNDhw7x1ltvMXToUJo1a5ZhAccqVarg7e2Nh4cHDx48YPz48S8tcOPv709SUhIXLlygUqVKqT7v27cvEydOpFy5cimuX7t2jTfeeIOffvqJ9957j4YNG6a619fXlzp16nDr1i19whobG0uXLl3YtWsXM2fO5LffftO3//bbb4mPj+ePP/6gd+/eKfoKCwvLkTPUDT46I7f99ddf1KlTh7lz52bYdsaMGdSpU4c1a9bkQmTPJP+0JSoqKt02kZGRAK/kjNvMmTNxdHSUPYxCCKNwKleJ4nN/IVqte1/VD45+1oWQsCemDUwIIbKhffv2qRJFgMaNG/Pxxx8THx/Ppk2bUnw2aNAg4NkM4vO2bdvGw4cPqV+/PtWrV9df//nnnwkJCeHjjz9m2LBhKRK+woULs3z5ciwtLZk3b16aExCVKlXim2++SfNEAAcHB95+++0UiSKApaUlM2bMoESJEuzcuTNF0cM7d+6wceNGzMzMWLhwYYrvhZ2cnFi4cGGa1a+TkpL0Jxz89ddfKRJFgBYtWjBp0iTi4uJSJFbGNGPGjDQTRYCWLVumShQBKleurD8uL73j71QqFUuXLk0xs2ltbc3UqVMB2LNnT4r2Dx48AHQ1SV7k6OiYqdMVsirfzCwmJ379+vXLsO1HH33EH3/8wd9//51mVdKckjxN/OKyzOclf/b8lPKrYsKECYwePZrw8HBJGIUQRuHe5A1CJ40kfrIXlolQ+1ISW0e05v0lJ7C0MDd1eEKILHoxucis9I4wMzMzM7jP9FhaWhq9zxeFhISwbds2Ll68yJMnT4iPjwfgxo0bgG5W6nk9e/ZkxIgR7Nmzh8DAQP2qLniWQCYnlMmSZ/XefffdNGMoWbIkFStW5PLly9y4cSNVMtSlSxfMzV/+7+y5c+fYu3cvfn5+REVF6WeAExISSEpK4ubNm9SpUweAf//9F0VRqFevXpqrBKtXr07NmjU5d+5ciutnzpzh7t27lC9fnnr16qUZR/LMoK+v70vjNdQ777zz0s8jIyPZsWMHZ86cITg4WD8zfe/ePSD172ey0qVLU6tWrVTXk5fjPr8cGqBhw4ZcvnyZ3r17M3HiRBo1apRqz6mxGdz7oUOHsLKyynQGe/z4cWJjY2nRooVB412+fBkLCwsaNGiQYdvGjRtjaWmZYj1xbkj+yxASEoKfn1+aFVFPnjwJkOIMRiGEEOmr1X0oBwNv4frrVgAaHovhzy86MuDHnFteL4TIGcbe/1e7du009zJmR2YmJrJj0aJFjBo16qUr0V6sbWFvb0+PHj3w9vZm+fLlTJw4EdDVsNi2bRvW1tap9vP+999/ADRv3jzDmB49epQqWXzZxEZUVBR9+/Zlw4YNL+33+edIPmrtZf2WKVMmVbKY/By3bt3K8NzdR48evfRzQ7i6ur60YOOWLVsYOHAgISEh6bZJr1ZJekUxk2ddX9zeNXPmTM6fP8+OHTvYsWMHNjY21K1bFw8PD3r37v3SPZ+GMjhZ9PDwoHjx4qky3vS8++67BAQEkJCQYNB4d+/eRaPRZPgTDtAdo6HRaLh7965BYxmqVKlSNGjQgBMnTrBy5Uq+/PLLFJ/7+PgQEBCAWq1Oc/o4v5s5c6Z+2lwIIYyp5Wffs+3ubcptvghA3V13WOM1hh4j5UgNIUT+cerUKYYMGYK5uTmzZ8+mU6dOlC5dGltbW1QqFf/73/8YMmRImstCBw0ahLe3N8uWLdMni3/88QcJCQl0794dJyenFO2TZ/m6d+/+0uKLoFuW+iIbG5t020+YMIENGzZQpUoVZs2aRYMGDXBxcdHPyDZp0oQjR46k+RwvS/jSW4YKusI0bdu2felzuLi4vPRzQ7zs1yEoKIh3332XmJgYvvjiC3r37k2ZMmWwt7fHzMyM3bt307Zt23TrjKQ3Y56eYsWKcfLkSQ4ePMiePXs4fPgwx44d4/Dhw8yYMYOZM2cybty4LPWZkWzNW2a2wIqh7Z9na2tLeHg4iYmJGSaMCQkJhIeHv/Q3N6dMnDiRrl27MmvWLNq3b6+fQQwJCWH48OEAfPLJJzg6OuZ6bDlNlqEKIXJSh9l/s8m/EZXPhmOZCKWXbcenyus0a5u6LLkQQuRFa9asQVEUPv30U7744otUnycvQ01L8+bNqVChAtevX+fw4cM0bdoUb29vIPUSVNAd13bjxg3GjRtn9OPS/v77bwBWr16dag8hpP0cyQV7bt++nW6/aX2W/D1l4cKF9c+bV2zZsoWYmBi6du3K7NmzU33+st9PQ6lUKjw8PPRLb2NjY/H29ubjjz9m4sSJdO/enfLlyxttvFwrcBMREZGtyqSVKlUiISEh1UbPtOzZs4f4+PhUpW+z4vTp0zRq1Ej/lbzu+7fffktxPXktcrIuXbowYsQIIiMjadSoEe3bt6d79+5UqFCBCxcu0LRpU6ZPn25wXEIIUVCpVCo6Lt2HX2ndDww10ZAwbTL+/103cWRCCJE5jx8/BtI+Pzw2NpZ169a99P7kaqfe3t6cOnWKCxcu4ObmRuvWrVO1bd++PfAssTOmlz3Hrl27CA4OTnW9efPmqFQqTp06xfXrqf/dvnz5cqolqIB+1vLy5ctG3WKWPAtq6KpHePmvg6IorFy50uC+M8va2pqhQ4dSs2ZNkpKSOH/+vFH7z5Vk8fjx4zx+/DjNyk+Z1alTJxRFYcyYMYSFhaXbLiwsjDFjxqBSqejUqZPB44WHh3Ps2DH9V/If+sDAwBTX0zoqwsvLi9WrV9O4cWN8fX3Zvn07pUqVYtasWezbt88kM565QaqhCiFymqWNHfUW/cWDQrr3RUPgwrB3iImOMW1gQgiRCcl7ypYtW5aiUmhsbCzDhw/Hz8/vpff3798fMzMz/v77b+bPn5/i2os+//xznJyc+PHHH5kzZ06ax4H4+fnpj90w5Dl++eWXFNevXbvG0KFD07ynTJkydOrUiaSkJIYNG5bi+cPCwhg2bFiaqxAtLS2ZPHkyiqLQtWtXfHx8UrVJTExk3759HD16NNPPkFwkKDsJaPKvw9q1a1NMICUmJvL1118bveDODz/8gL+/f6rrV69e1c9ippW4ZouSSd7e3kqrVq30XyqVSlGr1Smuvfjl4eGh1K5dW7GyslLMzMyUQYMGZXa4VMLCwpSiRYsqZmZmStmyZZVly5Ypjx490n/+6NEjxdvbWylbtqyiUqkUV1dX5cmTJwaPJ7IuNjZWCQsLUwICAhRACQsLM3VIOW7lypWmDiFXyHO+evL7s574Z5lyvFYV5XJl3dfa95ooSUlJqdrl9+fMLHnOV0dYWFi+/n9oQECAqUPI0548eaK4u7srgFK4cGGlS5cuyjvvvKO4uroqDg4OysiRIxVA6d+/f7p9tGvXTgEUQFGpVMqtW7fSbXvw4EHFxcVFARRXV1fF09NT6d27t/LWW28p5cuXVwDl9ddfT3FP//79FUBZunRpuv2uW7dOUalUCqDUqFFDee+99xRPT0/F0tJS8fT0VJo0aaIAyv79+1PcFxQUpJQpU0b//N26dVO6du2qODs7KxUrVlTefvttBVD+/PPPVGN+/vnn+ud+7bXXlM6dOyvvvfee4uHhoTg5OSmAsnDhwnRjftHWrVsVQLGyslLeeustZdCgQcoHH3ygHD58WFEURfHz81MAxd3dPd0+4uPjlXr16imAYm9vr3Ts2FHp2bOn4u7urlhaWirjxo1TAKVly5Yp7tu/f3+a15+X/KzPc3R0VAClSpUqSteuXZX3339f8fDwUCwsLBRA6devX6afP7N/VzO9Z/H27dscOHAgxbW4uLhU19JTsWJFpk2bltnhUtFoNGzatIkOHTpw+/Zt/TS8tbU1oPuJDOimfJ2cnNi4cWOqjb4iZ6nVatRqtanDEEIUAPXf6Memj45Tbv5eLJKg2pnHbBzXj67frTB1aEIIkS4nJydOnjzJ5MmT2bVrFzt27KBw4cK0adOGyZMnpzlr9qJBgwaxc6euGnSLFi3SPOMvWYsWLbh06RLz5s1j27ZtnDhxAq1Wi6urK6VLl6ZPnz4ZHguRlm7dunHw4EGmTp3KuXPnuHXrFuXKlWPKlCmMHTuWNm3apHlfiRIlOH78OFOnTmXTpk1s3bqVokWL0qtXL6ZNm0aPHj2AtAvVfPfdd3Tp0oUFCxbg4+PDzp07sbKyonjx4nh4ePDWW2/RrVu3TD9Dx44dWbRoEQsXLmTfvn1ER0cD0KxZM5o0aZKpPiwsLDhw4AAzZ85k3bp17N27F41GQ5MmTVi3bh0RERFp7mU01Pz589m7dy8nTpzg4MGDREVFUaxYsf+zd+dxNpb/H8df9+z7ZuwzjC1lJ1sRomxFqajUt7RJu6gspZJCoiKpfiXVt28qKiqKQkqWiCRblsHM2MeY1ez3748xZ+Y4s545M2dmvJ+Pxzy47/u6r/tzzZntc66Na6+9lpEjR3LDDTc47Fm5jPOZa7HWrl1rlRhOnjwZPz8/xo4dW+g9Li4uBAQE0KpVK3r16lWilUyLc/jwYcaPH89XX31lM8bY3d2dW265hVdeeaVa7mNY2aWlpZGWlmZZ4CY+Pt5qw9XqaOHChTZLVVdHamf1U13a+vHY3nReljP0J8uA0089SK/7RluuV5d2FkftrD4SEhIIDAyssr9DL9wDUKSkzp49S+PGjYmPj+fEiRPlsrKp5Cnp92qJexZ79uxJz549Lce5yeILL7xgX4R2atiwIQsXLiQlJYXNmzdz4sQJIGcp2Y4dOxa5D4qUL22dISIV7T8zVvLJicvpsiUdVxMC5rzH/tZdaNr5CmeHJiIiBfjjjz/o3Lmz1blTp05x//33ExcXx+DBg5UoViJ2b50RGRnpkJ5Ce/n4+Fglr+J82jpDRCqai6sbN761jJ/u7EurAybeaRAz5n7Clm/EK8Df2eGJiMgFunTpQlhYGJdddhk1atQgJiaGbdu2kZSURIMGDZg7d66zQ5R87F4NtWHDhhpmIFY8PT0JCAioksNmRKTqCgoOo8XUWcSc31O61ulsfrlvgHODEhGRAj333HOEh4ezfft2vvrqK7Zu3Urjxo2ZOHEiW7duVYdDJWN3svjPP/9w0003MWnSpGLLjh8/nptuuok9e/bY+ziLI0eOMHr0aFq0aIGfnx9ubtado2fPnmXq1KlMmzatTPumiIhI1dGy7QBOP9Cf1PPb+TbcEcvaKU84NygREbExZcoU1q9fz4kTJ0hPTycpKYnt27fzyiuvUKNGDWeHJxewO1n85JNPWLp0KY0aNSq2bN26dVm6dCmffPKJvY8DYNmyZbRu3Zq33nqLPXv2kJKSYrMfS1BQEMuWLeO5557j+++/L9PzpHRyF7dJSEhwdigichG6bcQbrOufN88l+IuVGId2OzEiERGRqs3uZPHnn38G4Prrry+27G233YZpmqxcudLex3HgwAFuvfVWEhMT6devH5988gnBwcEFlr3//vsxTZNly5bZ/TwpvWnTphEYGKjhAyLiNPe//APr2+b8anPPhIZLFpEWe8rJUYmIiFRNdieLR44cwd/fn1q1ahVbtnbt2gQEBBAdHW3v45g5cyYpKSnccccdLF++nDvvvBMPD48Cy1577bUAbN682e7nSelNmDCB+Ph4oqKinB2KiFykvD396DTlPQ7WyTkOioeNIwbajEIRERGR4tmdLCYnJ2MYRonLG4ZBfHy8vY/jp59+wjAMXnrppWLLhoWF4e3tzaFDh+x+npSeFrgRkcqgzSXdOfXQLSR55RzX2pfEpvH3ODcoERGRKsjuZLF27dokJCRw9OjRYsvGxMQQHx9fpj1TYmJi8PHxKdEcScjZWuPcuXN2P09ERKquEbdO4afB9S3Hft9t5Mi3nzoxIpGLg7u7O+np6c4OQ0SKkJ6ejru7e4nK2p0sXnnllQDMmTOn2LK5Za64wv5Nkj09PcnIyChR2bS0NM6ePUtgYKDdz5PS0wI3IlKZPP7sElZ0zlkx2zXb4OTkV0iLPujkqESqN19f3zKNJBOR8hcfH4+fn1+JytqdLI4aNQrTNJk5cyazZ88utNybb77JzJkzMQyDkSNH2vs4mjZtSkZGBnv37i227A8//EBWVhatW7e2+3lSelrgRkQqEz9PPzKvvotdDXKmTPgmw9b7bsLMynJyZCLVl6+vL5mZmcTFxZGdne3scEQkn+zsbOLi4sjMzMTX17dE97gVX6RgPXr04L777mP+/PmMGTOGt956i/79+9OwYUMADh8+zA8//MChQ4cwTZO7776ba665xt7HMXjwYLZu3crMmTN5//33Cy135swZnnnmGQzD4MYbb7T7eVJ6EyZMYMyYMSQkJChhFJFKob5XGCcfvot6Uz4mKBmCDqex5ZnhdJr1hbNDE6mWDMOgTp06JCYmlmiqkohUrICAAOrUqVPi8nYniwDvvvsufn5+zJkzh4MHD/LOO+9YXTdNE8MweOKJJ3jttdfK8iieeOIJ5s2bx4cffoi/vz9jx461un769Gm+//57Jk+ezOHDh4mIiOCBBx4o0zOldDw9PfH09HR2GCIiVkbdMI7n/17L8M8OAeD1w3airv6Y8Ovvdm5gItWUYRha8E6kmihTsujq6sobb7zBqFGj+Pjjj9mwYQMnTpwAoE6dOlxxxRXcddddNG/evMyBBgYG8t133zFgwABmz57N7NmzLaux+vj4kJaWBuQkqDVr1mTJkiV4eXmV+bkiIlK1GYbBuHGL+XD/lVz7Rzpu2QYxU6ZTp1Nv3GtrFISIiEhhypQs5mrevDlTp051RFVF6tSpE9u3b+fZZ5/liy++sCSIqampQM4KXEOHDmX69OmEhYWVezwiIlI1+Hn60m3yxxy493aaHIPAePht1GB6f70VSrENlIiIyMXE7gVunKV+/fp89NFHxMXF8dtvv/Hll1+ycOFCVq9ezZkzZ/j000+VKIqIiI3LG7Xj5MP3kuKRc1x3dyq/T77DuUGJiIhUYg7pWXQGLy8vunXr5uwwRESkCrl36NPM2vILA5fmbKHh89U2jvT8jAZXD3dyZCIiIpWPQ5LFI0eOsH79eo4ePUpycjKmaRZa9vnnn3fEI6USSktLs+y1KCJSWT308td8ta8zHXel45UB+1+cQt32vXAPqufs0ERERCqVMiWLR48e5cEHH+SHH34oMkGEvJVRlSxWX9OmTWPy5MnODkNEpEg+7p60nvYpJ0YMo3Yc1D0Byx+7nhs++VPzF0VERPKxe85ifHw8PXv2ZPny5bi6utKmTRtM08Td3Z1u3brRtGlTICdJDA4OpmfPnvTo0aNEdbu6ujrkw82tyo6yrZImTJhAfHw8UVFRzg5FRKRI7Zq35uiD95N5/rfgJZvP8fPMO50blIiISCVjd7I4e/ZsDhw4QPPmzdm3bx/btm0DICQkhF9//ZW9e/dy5MgR7r77buLi4rjmmmtYs2ZNieo2TdNhH1JxPD09ta+SiFQZw0eMZcPVjS3H/gu3cvCXD50YkYiISOVid9fb0qVLMQyDWbNm0aBBgwLL1K9fnwULFuDu7s7zzz9Phw4dGDBgQLF1lzSpFBERKYs7Z33Dyhs70fxQOgEpsH/ya9S/tBuedcq+P7CIiEhVZ3eyuH//fgzD4JprrrE6n5GRYVN28uTJfPDBB8yZM6dEyWLPnj3tDUtERKTEfLw8aPL6QmJH3EyNBAg/Bisev5nB/9sC7l7ODk9ERMSp7B6GmpGRQXBwMO7u7pZz3t7eBa6EWbduXYKCgti6dau9jxMRESkXrVu04PgTo0l3zTlu9ncWP0+8zrlBiYiIVAJ2J4v16tUjJSXF6lzdunXJzMzk33//tTqfu51CfHy8vY8rUFZWFqdOneLUqVNkZWU5tG4REbl43HLHg6y/vr3luOYPR9n18QQnRiQiIuJ8dieLjRs3JjU1lSNHjljOde7cGchZ/Ca/OXPmkJ2dTXh4uL2Ps0hKSuK1116jY8eOeHt7U6dOHerUqYO3tzcdO3Zk1qxZJCcnl/k5IiJycXlg6qf80cYfAI9MiJv3DQl/r3ByVCIiIs5jd7J49dVXY5omK1eutJy79957MU2Td999l6uvvppnnnmGwYMHM378eAzDYNiwYWUK9p9//qFNmzaMHz+erVu3kpmZaVn1NDMzk61bt/LMM8/Qpk0bdu7cWaZniYjIxcXN1YU+b3/LoTo5vxpD4g02Pz0aM+G4kyMTERFxDruTxdtuu42ePXuyZ88ey7lrrrmGRx99FNM0Wbt2LbNmzeL777/HNE26dOnCc889Z3egcXFx9O3bl0OHDuHl5cWDDz7IokWL2LBhAxs2bGDRokWMHDkSLy8vIiMj6devH3FxcXY/T0ovd7hxQfNWRUSqgrCadTAmzyDBO+e43mFY+/hAyLJdvE1ERKS6s3s11EaNGhW4xcWcOXMYOHAgixYtIjo6msDAQK699lpGjBhhtRhOac2aNYvjx4/TpEkTVqxYQePGja2ud+nShZtvvpmnn36afv36ERkZyeuvv86UKVPsfqaUzrRp05g8ebKzwxARKZP+Pa/j7bvX0Ou9ZbiYUHPjOf6eNpw2zy1ydmgiIiIVyu6exaL079+f+fPns2LFCr788kseeOCBMiWKAN9++y2GYTB//nybRDG/Jk2aMH/+fEzTZOnSpWV6ppTOhAkTiI+PJyoqytmhiIiUycNPvMbPPesCOb8oMxbt4NjXrzo3KBERkQpmd7J45MgRYmJiSlz+6NGjVovhlFZkZCQ+Pj706NGj2LI9e/bEx8eHyMhIu58npefp6UlAQAABAQHODkVEpEwMw+DuN5ayrVnOG50+aQaHZiwgadsyJ0cmIiJScexOFiMiIiyrn5ZEt27diuwRdDTDMCrsWSIiUv0EefvTdNZHRIXm/D4JOmuw7ekxZJ/Y6+TIREREKkaZhqGaplmu5fNr1KgRKSkp/P7778WW/e2330hOTiYiIsLu54mIiHS+pANnn59IvE/OcWi0C+sfHgLntICaiIhUf+UyZ7EgKSkpuLnZvZ4O119/PaZpcv/99xMdHV1ouejoaB544AEMw2Dw4MF2P09ERARgWN872fTgIDJcc45r7DT548n+WiFVRESqvQpJFvfv38/p06epWbOm3XU89dRT1KpVi3///ZeWLVvyxBNPsGTJEjZv3szmzZtZsmQJjz32GC1btuTff/+lVq1ajB071oGtEBGRi9XjI19l+eDmlmPftQnsnnYblGHEjIiISGVX4q6+pUuX2qwuGh8fz7333lvoPaZpcvbsWdatWwdA9+7d7QwTQkJC+PHHHxk8eDDR0dHMnTuXuXPnFvjM8PBwli5dSkhIiN3Pqwr279/PzJkz+eOPP9ixYwf169fn0KFDdpcTEZGCGYbBmClf8u6xHly7MR4XE84t2klMo0nUv+NlZ4cnIiJSLkqcLP7111989NFHGIZhmXt47tw5PvrooxLdX6NGDV544QW7gszVrl07/vnnH+bOncuiRYv4559/yMrKAsDV1ZXWrVszbNgwHn744YtiRc6dO3fy/fff07lzZ0zTJC6u4Dk0JS0nIiKF83DzYNhb37FqeB867MvAO80gas5iApq0xr/rrc4OT0RExOFKnCy2a9eOu+++23L88ccf4+3tzbBhwwq9x8XFhYCAAFq1asWQIUMc0tMXEBDAxIkTmThxIhkZGZw5cwbI6Xks616OVc2gQYO44YYbABg1ahQ//vhjmcqJiEjR6vjX5LI3P+bwPXfQ8KRJYLzB3xOep+vHl+DaoL2zwxMREXGoEieLN9xwgyXhgJxkMTAwkAULFpRLYCXh7u5O7dq1nfZ8Z3NxKdmU05KWExGR4nVo0p6vJz/P2acmE5QMIcdc2Pjw7Vzx6Spcguo7OzwRERGHsTuLWLNmDV999ZUjY6l09u7dy1tvvcWIESNo3bo1bm5uGIbByy+XbH7KokWL6NWrF8HBwfj6+tK2bVtmzJhBRoZW0BMRqcpuuvo2/nzkZtLPv+Uast9gwwP9MdOSnBuYiIiIA9m9l0XPnj0dGUepxMTEsGPHDuLi4opNvO666y67n/POO+8we/Zsu+4dPXo0s2fPxs3Njd69e+Pn58fq1asZN24c3333HStXrsTb29vu2ERExLkeuWcKM44f5rr/bsHVhJAdmax/6Bq6ffA7uLg6OzwREZEys3/jQyf4448/GD16NJs2bSpRecMwypQstmrViqeeeor27dvToUMHpk6dyn//+99i71uyZAmzZ8/Gz8+PtWvX0qFDBwBOnz5N7969WbduHZMmTWLmzJmWez766CPuueeeYutetGgRt9xyi91tEhERxzAMg6cnfMyMhEHcsOQgACHr41k/dhBXvrHcydGJiIiUXZmSxaysLObPn8+XX37JP//8Q1xcHJmZmYWWNwyjyOtF2bhxI7179yYtLQ3TNPHy8iI0NBRX1/J79/b++++3Oi7p3L+pU6cCMH78eEuiCBAaGsq8efO46qqrmDt3LpMmTSIwMBCAIUOG0LVr12Lrrl9f82FERCoLF8OFMa98w1sJfRi4+jQAgT9EsqnGvXR57kMnRyciIlI2dieLycnJ9O3bl40bN1q20ihPzz77LKmpqTRv3pz333+fbt26YRhGuT+3tGJiYti8eTMAw4cPt7nevXt3wsPDiYqKYvny5dx+++0ABAYGWhLHqiY6OtrqODEx0UmRiIhUPA9XDx56cyUL7r+K3n8k4wL4LNzAttqTaf9A2baMEhERcSa7k8WXX36ZDRs2YBgG119/PTfeeCP169fHy8vLkfFZ/PHHHxiGwVdffUWLFi3K5RmOsG3bNiBnK49GjRoVWKZjx45ERUWxbds2S7JYlYWHhzs7BBERp/Lx8Oa2d3/m27t60PWfDNyyIHPO5+yp1ZBLbxjh7PBERETsYneyuHjxYgzDYNq0aTzzzDOOjKlA7u7u+Pv7V+pEESAyMhKABg0aFFomN7nKLWuvlJQUli/PmRdz8OBBUlJSWLx4MQCdOnWiYcOGpSrnaIsWLcLHx6dc6q4sYmJiWLhwobPDKHdqZ/VzsbS1otuZev3D/H3uLdocyMYrAxJffJXP9h3GqH9JuT5Xr2f1kZKS4uwQREQs7E4Wo6KicHFx4bHHHnNkPIVq3bo1GzZs4Ny5c5V6FdHcIZi+vr6FlvHz8wMgISGhTM86efIkQ4cOtTqXe7xgwQJGjBhRqnL2ioqKsjpOTEykRYsWDB06lICAgDLVXdktXLiwWvQOF0ftrH4ulrY6o527el7Bvgduo1k0+J2Dxl98Tp133yPi8h7l9ky9ntVHQkKCzZoJIiLOYvc+iyEhIfj7+1dY4vbEE0+QmZnJ/PnzK+R5VUFERASmaRb4kT8BLGk5e4WFhREWFkbNmjUJCAjA39+/zHWKiFRVLRq1pearb3CkVs5xYCLEPPwgB7f+6tzARERESsnuZLF3797Ex8fb9CqVl5tuuolx48YxduxYXnnllUo7TCM3UUpOTi60TFJSzqbN1a3Xbdq0aQQGBmoOo4hc9K64vD9er7zI0ZCc45B4iHr0Qfb9tc65gYmIiJSC3cNQn332WZYsWcK4ceP47LPPHBlToXKTkYkTJ/Lyyy8TERFB3bp1Cy1vGAarVq2qkNhyRUREALZDM/PLvZZbtrqYMGECY8aMISEhQQmjiFz0el51K+unGZwc9wK1zkKtM3DksQfInPt/XNb2KmeHJyIiUiy7k8XLLruMpUuXMmzYMAYMGMC4cePo1KlTkXP1yuqpp55i9uzZAKSlpbF371727t1baHlnbK3Rvn17AGJjY4mMjCxwRdQtW7YAWO3BKCIi1c+VPYfxx3Q48/QLhCRCvVNw5ImRZL31f7RqrYRRREQqN7uTRVdXV8v/V65cycqVK4u9xzAMMjMz7XrevHnzeP311wFo2rQpvXv3platWlZxVAZhYWF06tSJzZs389lnn/Hss89aXV+3bh1RUVF4enoycOBAJ0VZPqZNm8bkyZOdHYaISKXSudcwtk2HhKdfICAFGhyH/U+OJOP192jfpvwWvRERESkru5NF0zQdGUex5s2bh2EYjBo1irlz5zql17CkJk6cyJAhQ5g+fToDBgyw9CDGxsby8MMPA/Doo48SGBjozDAdTsNQRUQK1r7PMP5+FZKeeQG/c9A0GvY8/SBZr71LxzY9nR2eiIhIgexOFtesWePIOIp18OBBDMPg1VdfrbBEcevWrZbkDuDAgQMAvPfee3z//feW8998843V3Mkbb7yRxx9/nDlz5tC1a1f69OmDr68vq1at4uzZs3Tr1o0pU6ZUSBtERKRyaHPtMHZNy+bcuMl4p8Glh2HX06M48eRoruv/oLPDExERsWF3stizZ8W+ExoSEkJKSoplj8KKkJCQwKZNm2zOR0dHEx0dbTlOS0uzKTN79my6devG22+/zfr168nIyKBJkyaMHz+eJ598Eg8Pj3KN3Rk0DFVEpGgt+t/G3qxszo2fgmcGtDgMJ158k//+u5n/PP6Bs8MTERGxYvfWGRWtT58+xMfHc+TIkQp7Zq9evQrdnzD/R2Grmg4bNoy1a9cSHx9PSkoKO3bsYNy4cdUyUYScYagVuZ2KiEhV1Py64QTMeInk89sU1z4Lrd7/nY/HXkV6auXcFkpERC5ODk0Ws7KyOHXqFKdOnSIrK8uRVTNp0iT8/f15/PHHyc7Odmjd4hienp4EBARUu/0jRUQcrfGAoTRb/A3Ha+cM8PHKgM7LTvPNfZ04cWiXk6MTERHJUeZkMSkpiddee42OHTvi7e1NnTp1qFOnDt7e3nTs2JFZs2YVuUF9SXl4eDB//nx+/fVXWrZsyQcffMCmTZs4cuRIkR9ScdLS0khISCAhIcHZoYiIVHqBTS7lqh83caBNDcu5Nn9m8+eDN7H75/86MTIREZEcds9ZBPjnn38YPHgwhw8ftlkdNTMzk61bt7Jt2zbmzZvHt99+S8uWLe1+Vv79CuPj43nwweIXAyjLVh1SepqzKCJSOm7ePlz3xW98N+keGn+1CVcTGh02iH32FTbu/I2uj84D1zL9qhYREbGb3T2LcXFx9O3bl0OHDuHl5cWDDz7IokWL2LBhAxs2bGDRokWMHDkSLy8vIiMj6devH3FxcXYHWpK5gxd+aLhqxdKcRRGR0jMMg8Evf8TpyQ+TcH4eY414A+/3f+PHkZeTePhv5wYoIiIXLbvfrpw1axbHjx+nSZMmrFixgsaNG1td79KlCzfffDNPP/00/fr1IzIyktdff93uLSMiIyPtDVUqiKenJ56ens4OQ0SkSuo17DF2X9aOw4+OouGJbDwyoeHv6ay/dxi+t/eh+71zwMXV2WGKiMhFxO5k8dtvv8UwDObPn2+TKObXpEkT5s+fz9VXX83SpUvtThYbNmxob6giIiJVwmWtryL0m5UsHz2Mzn+cAaBBjEHq7NX8d3N7BkycT2jDTk6OUkRELhZ2D0ONjIzEx8eHHj16FFu2Z8+e+Pj4qHewmtMCNyIiZVczpD53f/I7h18cxalAA8hZLbXj2gx+v/8uFs2/CzNL8/FFRKT8Vdg+i4ZhVNSjxEmmTZtGYGAg4eHhzg5FRKTK63/bE3RYuY5/Ouf9TL0kCpq+uZk3HmvPjp1LnBeciIhcFOxOFhs1akRKSgq///57sWV/++03kpOTC928XqoHLXAjIuJYfoEhDP1kJXEvTyIuIOdXtlcGDFidybEHJ/DGxKuIPnnQyVGKiEh1ZXeyeP3112OaJvfffz/R0dGFlouOjuaBBx7IWe1t8GB7HydVgKenJwEBAQQEBDg7FBGRauXKW4bTbuUG9ne8xHIu/DT0//o02267jgN//4+TCWXf01hERCQ/uxe4eeqpp/jwww/5999/admyJSNGjODqq6+mfv36AMTExLBq1So++eQTEhMTqV27NmPHjnVY4CIiIhcTn6AABn26lD+XLifxjWepfTwVgKZHoek3B/j1r04c6DeUO+4eR1iwj5OjFRGR6sDuZDEkJIQff/yRwYMHEx0dzdy5c5k7d65NOdM0CQ8PZ+nSpYSEhJQpWKnc0tLSLIvciIhI+bj8hoGYgwfw14K3Sf7gHWqcydlTuGWkyWXvfsmva5ZyYNAzPH7HUAK93Z0crYiIVGVlWuCmXbt2/PPPP7z88su0adMGFxcXTNPENE1cXFxo27YtU6dOZceOHbRr185BIUtlpQVuREQqhmEYtL/3Ubr9up3EEVdz1j/nvAvQfm8aN7wxha9GXM03a//CNE2nxioiIlVXmVdDDQgIYOLEiWzbto2UlBSOHTvGsWPHSElJYevWrYwfP15z2C4SWuBGRKRiGW5udB4/j6hHxnBsYDhJXjnn3bPgiu2xhD9xOx+NHMreqFjnBioiIlWSQ7fOcHd3p3bt2tSuXRt3dw19udhogRsREefI8vSj9+srueTLD9hzhQ/prjnnfVOh6287OXNLdxa9MI6k1AznBioiIlVKhe2zaI/ly5czZswYXn755RKVN02TKVOmMGbMGFauXFnO0YmIiFQugZd0Y8iCP3Gf+wTbW+T9ig+Kh1ZffMu2fu3Z8Nn/nBihiIhUJXYniytWrCAkJIQ77rij2LI33XQTISEhrF69usT1p6SkcO+99zJ79mzat29fonsMw6Bdu3a8+eab3H///aSlpZX4eSIiItVFi6tHMXTRdvaO783uBoblfOiJLAKmTGHZA7dwLk29jCIiUjS7k8WFCxcSHx/P8OHDiy17++23c/bsWRYuXFji+hctWsTJkye59tprue6660p836BBg7j22muJiYnhq6++KvF9IiIi1Ymrqxs3jnibHl+vYeWdDYiukXPexTRo/NtONgzsxO5t/zg3SBERqdTsThb/+OMPDMOgV69exZYdOHAghmGwYcOGEtf/7bffYhgGDz30UKljGzVqFKZp8s0335T6XhERkeok1K82Tzy3gqD/vcOKK93JPn++bkwayfcM5atZ07RiqoiIFMjuZDEqKoqgoCB8fX2LLevr60twcDAxMTElrn/btm0A9O7du9SxXXPNNQBs3bq11PeK/XL3WNQ+iyIilU+niF48+H+bWHP/JcSe32rDNxVavP8JX9x+Ladj9bNbRESs2Z0sGobBuXPnSlz+3LlzpKenl7j8yZMn8ff3x9/fv9Sx5d534sSJUt8r9tM+iyIilZu3mzePPrUUl9nP8HfTvPNt/4ph+w1XsvLH5c4LTkREKh27k8Xw8HBSU1P5+++/iy27fft2zp07R1hYWInrz87OLvOwGA2rqVjaZ1FEpGq48sp7uO6zVWy61s+yzUa901nUfHosX3/0jnODExGRSsPuZLFPnz6Ypslzzz1XZLncMoZh0KdPnxLXX7NmTZKSkoiPjy91bPHx8SQmJhIaGlrqe8V+2mdRRKTq8Auox4g5f3BmTE+OnV/8xisDIl6fw+olnzs3OBERqRTsThZHjx6Nu7s7y5Yt4+abb+bQoUM2ZQ4dOsTNN9/MsmXLcHV1ZfTo0SWuv3Xr1gD8/PPPpY7tp59+AqBVq1alvldEROSiYRhcfd+7tP+/2eyPyDnlnQ4BL0xm6+ofnBqaiIg4n93JYuPGjZk7dy6mabJkyRKaNm1Kq1atuO6667juuuto1aoVTZs2ZenSpQDMmTOHSy65pMT19+3bF9M0mT59OtnZ2cXfcF52djbTp0/HMAz69+9f6naJiIhcbIJb9uWKN97iwPkp575pkDl2DPv/WO/cwERExKnsThYBHnjgAb7++mvCwsLIzs5m165d/PDDD/zwww/s2rWL7OxswsPD+frrrxk1alSp6r733nupUaMGW7duZeTIkWRlZRV7T1ZWFiNHjmTr1q0EBwdzzz332Ns0ERGRi0roZdfQ6pUpRNbLOfY/B7EP3c+J3TucG5iIiDiNW1kruPHGG7n++utZvXo1GzZssKxAWqdOHa644gp69+6Nq6trqev18/PjzTff5D//+Q8LFixg27ZtTJgwgYEDB+Lj42NVNiUlhWXLljF9+nT++usvDMPgzTffxM/Pr6zNExERuWhEdL6FxOdiOfzSmzQ8DgHJJgfvug3vL78loFETZ4cnIiIVrMzJIoCbmxt9+/alb9++jqjO4o477iAmJoaJEyfy119/ceutt+Li4kLTpk0JCQkB4MyZMxw4cICsrCxM08QwDKZOncqdd97p0FhEREQuBq17P0hC0hmiXvuE8FMQlJjNP7fdQPuvf8C7vrZGEhG5mJRpGGpFeOaZZ1ixYgVt27bFNE2ysrLYu3cvGzZsYMOGDezdu5fMzExM06Rdu3asWLGCcePGOTtsERGRKqvb4AlkjbqeoznvyxIcn8XWodeRfvqUcwMTEZEK5ZCexfLWp08ftm7dyqZNm1i9ejW7du0iNjYWgBo1atCiRQuuvvpqunbt6uRIL25paWmkpaWRkJDg7FBERKSMBtzxGl8kxeGy4HfqnIWQMxn8+tBQrln0i7NDExGRClIlksVcXbp0oUuXLs4OQwoxbdo0Jk+e7OwwRETEQW598APmp9yC9393EpgC9XecYP8Pi2k64BZnhyYiIhWg0g9DlapjwoQJxMfHExUV5exQRETEQe594ku2XeFlOT76ymTMzEwnRiQiIhWlyieLTz75JPfdd5+zwxDA09OTgIAAAgICnB2KiIg4iOHiQq9H3yWyTs5xzdOZ/Dr1SecGJSIiFaLKJ4uff/45H330kbPDEBERqbYuu6wL0dc2txz7f/UzKSeOOjEiERGpCFU+WRQREZHy958xn7K1pQGAdxqsHT3cyRGJiEh5U7IoIiIixfL29sN/+D2keOYcN9h2gn2rljo3KBERKVdKFkVERKREBt/8NFu7eAM5f0BEvfwcZna2c4MSEZFyo2SxCtu/fz+jRo2iQ4cOuLu7ExERUWC5xYsXM2TIEBo0aICPjw8tW7Zk1qxZZGRkVGzAIiJS5fUaN5+jNXL+X/dYJmumj3ZqPCIiUn6qfLJ4xRVX0KNHD2eH4RQ7d+7k+++/JyIiglatWhVabubMmXh6ejJjxgyWLVvG8OHDee6557j//vsrMFoREakOmjVpT2T/Sy3Hfot/4tzpk06MSEREyoubswMoq6+//trZITjNoEGDuOGGGwAYNWoUP/74Y4HlvvvuO2rWrGk5vvrqqzFNk0mTJjFjxgxq165dIfGKiEj1cNe4z1m6qT2t9pv4p8BPY4cx+ONfnB2WiIg4WJXvWbyYubiU7OXLnyjmuvzyywE4elRLn4uISOl4eHjie+8DpJ9/y7nR5hPs+eVb5wYlIiIOp2SxCHv37uWtt95ixIgRtG7dGjc3NwzD4OWXXy7R/YsWLaJXr14EBwfj6+tL27ZtmTFjRqWYK/jrr7/i4eFBkyZNnB2KiIhUQQNvepKtnXwAcMuGQ9OedXJEIiLiaJVyGGrv3r0BaNiwIQsWLLA6VxqGYbBq1Sq743jnnXeYPXu2XfeOHj2a2bNn4+bmRu/evfHz82P16tWMGzeO7777jpUrV+Lt7W13bGWxa9cuZs+ezciRIwkICHBKDCIiUvX1mPQRR28bRmgCNDycyW/vv8RVDzzv7LBERMRBKmWy+MsvvwBw6aWX2pwrDcMwyhRHq1ateOqpp2jfvj0dOnRg6tSp/Pe//y32viVLljB79mz8/PxYu3YtHTp0AOD06dP07t2bdevWMWnSJGbOnGm556OPPuKee+4ptu5FixZxyy232N2m06dPc+ONN9K0aVOmT59udz0iIiJNGrdmbe/GhC45CEDmfz8n+56JuLhVyj8vRESklCrlT/MXXngBgNDQUJtzFenC1UJLOkdw6tSpAIwfP96SKEJOe+bNm8dVV13F3LlzmTRpEoGBgQAMGTKErl27Flt3/fr1Sxq+jcTERAYMGEB6ejq//PILvr6+dtclIiICcPOkT9n0+5WEn4I6J01WT3uYayb9n7PDEhERB6jUyWJx5yqjmJgYNm/eDMDw4cNtrnfv3p3w8HCioqJYvnw5t99+OwCBgYGWxLE8pKWlccMNN3Do0CHWrVtHvXr1HFZ3dHS01XFiYqLD6hYRkcot0DeYo9d3JnzBHwD4fPcbmWMScfP1d3JkIiJSVlrgxsG2bdsGQEhICI0aNSqwTMeOHa3KlresrCxuu+02Nm/ezPLly2nevLlD6w8PD7f6aNGihUPrFxGRyu32Me+xt0HO1I/gBPj5uf84OSIREXGEStmzWJVFRkYC0KBBg0LLhIeHW5W1V0pKCsuXLwfg4MGDpKSksHjxYgA6depEw4YNAXjkkUdYsmQJU6ZMISsri40bN1rqaNGiRbktcrNo0SJ8fHzKpe7KIiYmhoULFzo7jHKndlY/F0tb1c6KE9Xtcpof2QJAjV/28sX/zSPbP9ihz6gM7SxvKSkpzg5BRMRCyaKD5Q7BLGo+oJ+fHwAJCQlletbJkycZOnSo1bnc4wULFjBixAgAfvzxRwAmTZrEpEmTrMqvWbOGXr16lSmOqKgoq+PExERatGjB0KFDq/1qqwsXLrQMJa7O1M7q52Jpq9pZcUzzNhZua037PVn4nYOAjV8w4MO1Dn1GZWhneUtISLBZM0FExFkqbbLYuHHjMtdhGAYHDhxwQDSVU0REBKZpFlvu0KFD5RpHWFgYkDMvMi0trVyfJSIilZNhGPjd9wiZ4+bglg31/jjJyR2bqNW6i7NDExERO1XaZNERCU5Zt86wh79/zoT+5OTkQsskJSUBVLtet2nTpjF58mRnhyEiIk4yeNBDfLjw/7hiayoembDtpUfot2iLs8MSERE7VdpksajVT2fOnElKSgrPP1/5Nv6NiIgAbIdm5pd7LbdsdTFhwgTGjBlDQkKCZV6miIhcXJo8PoWUkU/jkw5h/yRzeNUiGvYZWvyNIiJS6VTJZPHdd98lJSWlUm6n0b59ewBiY2OJjIwscEXULVty3mXNvwejiIhIddCz6/W80+UVev12FhcT9r3+kpJFEZEqSltnOFhYWBidOnUC4LPPPrO5vm7dOqKiovD09GTgwIEVHV65mjZtGoGBgepVFBG5yF35zFzictZyo/6BTHZ9OtO5AYmIiF2ULJaDiRMnAjB9+nS2bt1qOR8bG8vDDz8MwKOPPkpgYKBT4isvEyZMID4+vsghuCIiUv21bXY5f15V33J8av6HmFmZToxIRETsoWSxCFu3bqVr166Wj2XLlgHw3nvvWZ0/duyY1X033ngjjz/+OElJSXTt2pUBAwZwyy230LRpU3bs2EG3bt2YMmWKM5okIiJSIQZPXEBMSM7/ax0z2fjiCKfGIyIipVdp5yxWBgkJCWzatMnmfHR0NNHR0ZbjgraLmD17Nt26dePtt99m/fr1ZGRk0KRJE8aPH8+TTz6Jh4dHucbuDFoNVUREcjWsGc7X17en/ifbAHD/fgvxI3YQ2KS1kyMTEZGSUs9iEXr16oVpmsV+FLaq6bBhw1i7di3x8fGkpKSwY8cOxo0bVy0TRdAwVBERsfbQ0wvYdokrAL7nDDaOu9vJEYmISGkoWRSH8fT0JCAgoNrtHykiIvbxcvck6MlJpJx/j7TBP+f4c4FGoIiIVBVKFsVh0tLSSEhIICEhwdmhiIhIJTHw6lv5o2c9y3HiB5+TmXjGiRGJiEhJVdo5i7179y702pkzZ4otA2AYBqtWrXJoXFI4zVkUEZGC3PrKV/zx95U0PGFSOxa+f+YmbnznF2eHJSIixai0yeIvv/yCYRiYpllkmaIYhuHgqKQoEyZMYMyYMSQkJGivRRERsQgJCCLh3rvJnv4RLiZE/HaC3WsXclnP250dmoiIFKHSJot33XWXkr0qxtPTE09PT2eHISIildCwu8fxvx8X02FbEp6ZsG/6yzTvNhQXt4r9U+RQ/CHSstJoHtK8Qp8rIlIVVdpk8aOPPnJ2CCIiIuJAV7zyGSeGDSYoCZpFZvP11Nu55flFFfb8IwlHuG/FfaRlp/F/1/4fLWq0qLBni4hURVrgRhxGC9yIiEhRGjduxuHBPSzHdb/9h6gDmyvk2dGJ0dy38j5OnjtJfFo8M7fMLHKqi4iIKFkUB5o2bRqBgYGarygiIoUa+uw7HGyYM7ApJAnWPP9AuSdtx5KOcf/K+zmefByApkFNmdVzlqa7iIgUQ8miOMyECROIj48nKirK2aGIiEgl5erqQt1n3yD9/ESYy7em8cGrN5dbwng8+Tj3rriXmKQYABoHRPBB2GCCvYLL5XkiItWJkkVxGE9PTwICAggICHB2KCIiUol16HEN+7s1AcDFhM7/3c17MxyfMJ5MOcn9K+8nOikagAhXXz74dzs1vhsN0X869FkiItWRkkURERGpcAPfWMyRJj4AeGTBlZ/sZt5rN5NtZjuk/tPnTnP/yvs5nHAYgPCMTD6I/Jea5+JzCqyf7ZDniIhUZ0oWRUREpMJ5+3hx5ee/EdXEGwD3LLjq493Mfe2WMieMZ87F8sD3w4mMjwSgfkYm84+doHZWFrh6Qoe7ofekMrdBRKS6q7RbZ0jVk5aWZlkRVUREpDj+/j50/t9atgzvQf2DqbhnQa+PdzOHm3nsqcW4urgWeX92chIZe7eSumc7p3b+zalDB8k4FUtWchpXhxtE9nWhppnF/OMnqOvuD1feD10eBL9aFdRCEZGqTcmiOMy0adOYPHmys8MQEZEqJCjIn/b/W8v2O3pQ92Aa7lnQ++M9vMktPP7Ul7i7uFvK+pw+xonXn+HM+t/JjDyDW/IFdeX7f51Yk4CMLLpfaVD/2mnQ7g7w9KuQNomIVBdKFsVhJkyYwJgxY0hISND2GSIiUmI1ggNo+fEq9tx9NbUOZuCeBX0/2sOb5wZRxzcU723/Und/EpfEm5w5f09J/oDpshM+z+xF/a59GeHhizbKEBEpHSWL4jCenp54eno6OwwREamCatesQeaHP3Pw3j6EHszELRuuX3gYOFxg+RRPiK4BJ4INTgQZnAv0xqhVh5rNWtMqLpC6Mz/BBZOb967hnTcD+CNyKK/e0oYAL/cC6xMREVtKFkVERKRSqF+nFtn/t4IjI/sScjDL6lqGKxysB/vDPdjXoA7xzdrTsn4r+jRuxz1hza2GqwKc9m7AqSlTAHhwx7dM9/Jn0LEE3h7egVb1AyusTSIiVZmSRREREak0wsPqkfnOco6PGYTH6RTOhvpwPKIlaV1uI/psKmPuvgk/z+L/fAm9Yzhm7GlOz3sHF0ye/nMhkzx8uWleKs8PasEdXRpgGBqYKiJSFCWLIiIiUqk0atiA0P9t4ejZVNrU9MXdNWenr4ULF5YoUcwV+thjZJw8Sfzir3DPzuL5TR/xdPeHeW5JNtujzvLa0Lbl1QQRkWpB+yyKw+Rum6GtM0REpKz8vdxpXsffkijawzAM6r74In69egHgk5nGlA0fUCv5DB0aBjsoUhGR6kvJojjMtGnTCAwM1EqoIiJSaRhubtR/43W82+b0IoakJfLWtgXc0sTXyZGJiFR+ShbFYSZMmEB8fDxRUVHODkVERMTCxdubsHffwaNxYwACTh/j6FNPYZqmkyMTEanclCyKw3h6ehIQEEBAQICzQxEREbHiFhxMg/f/D7datXANCaHWmLFa4EZEpBha4EZEREQuCu716xP+wfu4eHri0bChs8MREan0lCyKiIjIRcPrkkucHYKISJWhYagiIiIiIiJiQ8miiIiIiIiI2FCyKCIiIiIiIjaULIqIiIiIiIgNLXAjDpOWlkZaWhoJCQnODkVERERERMpIPYviMNOmTSMwMJDw8HBnhyIiIiIiImWkZFEcZsKECcTHxxMVFeXsUEREREREpIw0DFUcxtPTE09PT2eHISIiIiIiDqCeRREREREREbGhZFFERERERERsKFkUERERERERG0oWRURERERExIaSRREREREREbGhZLEK279/P6NGjaJDhw64u7sTERFRYLmvv/6a7t27ExoaiqenJ40bN2bMmDHExcVVbMAiIiIiIlJlaOuMKmznzp18//33dO7cGdM0C03+zpw5Q69evXj66acJDAxkx44dTJ48me3bt7Nq1aoKjlpERERERKoCJYtV2KBBg7jhhhsAGDVqFD/++GOB5e6//36r4169euHl5cXIkSM5cuQIDRo0KPdYRURERESkatEw1CrMxcX+ly8kJASAjIwMR4UjIiIiIiLViJLFIuzdu5e33nqLESNG0Lp1a9zc3DAMg5dffrlE9y9atIhevXoRHByMr68vbdu2ZcaMGU5L0LKyskhNTWXLli1MnjyZgQMH0qRJE6fEIiIiIiIilZuGoRbhnXfeYfbs2XbdO3r0aGbPno2bmxu9e/fGz8+P1atXM27cOL777jtWrlyJt7e3gyMuWo0aNYiPjwegb9++fPnllxX6fBERERERqTrUs1iEVq1a8dRTT/G///2P3bt385///KdE9y1ZsoTZs2fj5+fHpk2bWLFiBV999RX79u2jdevWrFu3jkmTJlnd89FHH2EYRrEfixcvtrs9v/zyC7///jvvvvsuu3btYtCgQWRlZdldn4iIiIiIVF/qWSzChQvDlHSO4NSpUwEYP348HTp0sJwPDQ1l3rx5XHXVVcydO5dJkyYRGBgIwJAhQ+jatWuxddevX7+k4dto164dAFdeeSXt2rWja9eufPPNN9xyyy121ykiIiIiItWTkkUHi4mJYfPmzQAMHz7c5nr37t0JDw8nKiqK5cuXc/vttwMQGBhoSRwrQocOHTAMg/3795e5rujoaKvjxMTEMtcpIiIiIiLOpWTRwbZt2wbkrDbaqFGjAst07NiRqKgotm3bZkkWK9rvv/+OaZo0bty4zHWFh4c7ICIREREREalMlCw6WGRkJECRexfmJle5Ze2VkpLC8uXLATh48CApKSmWOY2dOnWiYcOGAPTr148+ffrQsmVLPD092bZtG6+99hpt2rThxhtvLFMMRVm0aBE+Pj7lVn9lEBMTw8KFC50dRrlTO6ufi6Wtamf1cjG0MyUlxdkhiIhYKFl0sNwhmL6+voWW8fPzAyAhIaFMzzp58iRDhw61Opd7vGDBAkaMGAFA586d+fTTTy3JaUREBA8//DBjxozBw8OjTDEAREVFAZCWlkZ6ejpJSUl07tyZoUOHEhAQUOb6K7OFCxc6rXe4Iqmd1c/F0la1s3q5GNqZkJBgs2aCiIizKFmswiIiIjBNs9hyU6ZMYcqUKeUWR1hYGAAvvvgikydPLrfniIiIiIhIxdHWGQ7m7+8PQHJycqFlkpKSAKpdr9uECROIj4+39DSKiIiIiEjVpWTRwSIiIgCKTJhyr+WWFRERERERqWyULDpY+/btAYiNjS10AZstW7YAWO3BWB1MmzaNwMBArY4qIiIiIlINKFl0sLCwMDp16gTAZ599ZnN93bp1REVF4enpycCBAys6vHKlYagiIiIiItWHksVyMHHiRACmT5/O1q1bLedjY2N5+OGHAXj00UcJDAx0SnzlxdPTk4CAgGo3F1NERERE5GKk1VCLsHXrVktyB3DgwAEA3nvvPb7//nvL+W+++Ya6detajm+88UYef/xx5syZQ9euXenTpw++vr6sWrWKs2fP0q1bt3JdndRZ0tLSSEtLK/OWICIiIiIi4nxKFouQkJDApk2bbM5HR0cTHR1tOU5LS7MpM3v2bLp168bbb7/N+vXrycjIoEmTJowfP54nn3zSIfsbVjbTpk3T1hkiIiIiItWEksUi9OrVq0T7GBZm2LBhDBs2zIERVW4TJkxgzJgxJCQkaJEbEREREZEqTnMWRURERERExIaSRXEYbZ0hIiIiIlJ9KFkUh9HWGSIiIiIi1YfmLIrDeHp64unpaTmeNWsWXl5epapjwIABtGvXzub8X3/9xQ8//GBXXBMmTCjw/CeffEJMTEyp62vbtm2Be2SeOnWKDz74oNT1Adx1113Ur1/f5vzPP//M5s2bS11faGgoDzzwQIHXXn/99QIXZSpKZmYmf/zxB507d7a5tnv3bpYsWVLqGAHGjBlj9TWT64svvuDgwYOlru/SSy9lyJAhNucTEhJ4++23i70/MzOTadOmWZ279dZbady4sU3Z3377jXXr1pU6Rn9/fx599NECr82dO5fExMRS19m9e3euuuoqm/MHDx7kiy++KPCegtqa3yOPPFLgNjjffPMNe/bsKXWMjRs35tZbb7U5n5aWxuuvv17q+iBn5enLLrvM5vwff/zBqlWrgOLbmZ+npydjxowp8Nr777/P6dOnSx1jp06duOaaa2zOx8TE8Mknn5S6PoD777+fmjVr2pxfvnw527dvL3V99evX56677irwWkk/dxfSz3JrpflZnpqaatezRUTKg5JFKTfp6em4uJSu8zo7O7vQ8+np6Y4IyyIjI8OuOjMzMws8b5qm3TEW1u6srCy76szIyCj0Wlpaml11ZmVlFXi+Krw2QInru7Cco1+bou5JT0+v0NfG3s+xo78my/v7xhFfn/Z+TZbH901hC69VhdemKvy8cPbPckd/fkREykLJojiM9lkUEREREak+lCyKw1y4z6KHh0ep95MsrCfSxcXF4XtTuru721Wnm1vB3zaGYdgdY2HtdnV1tatOd3f3Qq8VNOyzOJmZmbi6uhZ4rSq8NkCJ6svMzLSpw9GvTVH32PM9kxtLQYp6bQpqa0m4ubk5/GuyPL9vStPOor437P2aLI/vG8MwCjxfFV6bqvDzwtk/ywvrnRQRcQbDLMtGgiL55O9ZDA8PJz4+vsA5T9XJwoULuf32250dRrlTO6ufi6Wtamf1cjG0MyEhgcDAwIvid6iIVH7qWRSHuXCBGxERERERqbq0dYaIiIiIiIjYULIoIiIiIiIiNjQMVRxGq6GKiIiIiFQf6lkUh5k2bRqBgYGEh4c7OxQRERERESkjJYviMBMmTCA+Pp6oqChnhyIiIiIiImWkYajiMFoNVURERESk+lDPooiIiIiIiNhQsigiIiIiIiI2lCyKiIiIiIiIDc1ZFIfR1hkiIiIiItWHehbFYbR1hoiIiIhI9aFkURxGW2eIiIiIiFQfGoYqDqOtM0REREREqg/1LIqIiIiIiIgNJYsiIiIiIiJiQ8miiIiIiIiI2FCyKCIiIiIiIja0wI04nGmaABfFfospKSlqZzVysbQTLp62qp3Vy8XQztz25f4uFRFxJsPUTyNxoOjoaO2zKCIiUkZ//PEHnTp1cnYYInKRU8+ilJudO3cSFhbm7DDKRUxMDC1atABg165d1K9f38kRlQ+1s/q5WNqqdlYvF2M7a9eu7eRoRESULEo5CggIICAgwNlhlIv8w6D8/f3VziruYmknXDxtVTurl4uxnS4uWlZCRJxPP4lERERERETEhpJFERERERERsaEFbkRERERERMSGehZFRERERETEhpJFERERERERsaFkUURERERERGwoWRQREREREREbShZFRERERETEhpJFERERERERsaFkUURERERERGwoWRQREREREREbShZFRERERETEhpJFERERERERsaFkUURERERERGwoWRQREREREREbShZFRERERETEhpJFERERERERsaFkUURERERERGwoWRSn279/P6NGjaJDhw64u7sTERHh7JDKxeLFixkyZAgNGjTAx8eHli1bMmvWLDIyMpwdmkN9/fXXdO/endDQUDw9PWncuDFjxowhLi7O2aGVm8zMTNq0aYNhGHz++efODsehfvnlFwzDsPlo1aqVs0MrF1988QWdO3fGx8eHkJAQ+vTpw7Fjx5wdlsP06tWrwNfTMAymT5/u7PAcaunSpXTt2pWAgABq1arFddddx19//eXssBzu559/pmvXrnh5eVGrVi1GjRpFfHy8s8MSkWrCzdkBiOzcuZPvv/+ezp07Y5pmtU0qZs6cSUREBDNmzKB27dqsX7+e5557jr///puPP/7Y2eE5zJkzZ+jVqxdPP/00gYGB7Nixg8mTJ7N9+3ZWrVrl7PDKxezZszl16pSzwyhXH3zwAS1btrQc+/j4ODGa8jFr1iwmTJjAU089xauvvkpycjK//vorqampzg7NYebNm0dCQoLVuf/+97/MmzePgQMHOikqx/vpp58YMmQId9xxBy+99BLJyclMnTqVPn368M8//1C3bl1nh+gQa9eupX///tx88828+OKLREVFMWHCBPbu3cvq1asxDMPZIYpIVWeKOFlWVpbl/w8++KDZsGFD5wVTjk6ePGlzbsqUKSZgHj9+3AkRVZz/+7//MwHz8OHDzg7F4aKiokx/f3/z448/NgFz4cKFzg7JodasWWMC5oYNG5wdSrnat2+f6e7ubs6bN8/ZoVS4Tp06ma1bt3Z2GA41YsQIMyIiwszOzracO3DggAmYCxYscF5gDta7d2+zbdu2Vu386quvTMD87rvvnBiZiFQXGoYqTuficnF8GdasWdPm3OWXXw7A0aNHKzqcChUSEgJQ7YbcAowePZrBgwfTo0cPZ4ciZfDhhx/i4eHBfffd5+xQKtS+ffvYvHkzd955p7NDcaiMjAz8/PysetYCAwMByM7OdlZYDrdp0yauvfZaq3b27dsXgCVLljgpKhGpTi6Ov9Kl1Pbu3ctbb73FiBEjaN26NW5ubhiGwcsvv1yi+xctWkSvXr0IDg7G19eXtm3bMmPGjEqXLDi7nb/++iseHh40adKkLM0oljPamZWVRWpqKlu2bGHy5MkMHDiw2rXzxx9/ZOXKlbz22muObEaxnPF63nDDDbi6ulK7dm1GjhzJmTNnHNWcQlVkO9evX8+ll17Kxx9/TMOGDXFzc6Nt27b88MMPjm6WDWf+HPr0009xcXFh+PDhZW1GsSqynffeey979uzhjTfeIC4ujqioKB577DHCw8O56aabHN00KxXZTldXVzw8PKzOubu7YxgGO3fudEh7ROQi5+yuTamcnnjiCROw+ZgyZUqJ73VzczP79u1r3nTTTWZQUJAJmN27dzdTUlIKvbeih6E6q52maZo7d+40vb29zUcffdRRzSk21opsZ2BgoOU5ffv2NZOSkhzdrEJjrYh2njt3zmzSpIk5c+ZM0zRNMzIyssKGoVZkO7du3WqOHTvW/O6778w1a9aY06dPNwMDA81WrVqZqamp5dVEq1grop3Nmzc3/fz8zLp165qffPKJuWLFCvP666833dzczH/++ae8mmgVqzN+DjVp0sS8+uqrHdWUEsVaUe389ttvTX9/f8tzGjVqZO7du7c8mlZgrBXRzo4dO5p9+vSxOvfbb7+ZgHnJJZc4tF0icnFSsigFev/9982nnnrK/N///mfu3r3b/M9//lOiX3bffPONCZh+fn7mn3/+aTl/6tQps3Xr1iZgjh07ttD7KzpZdFY7T506ZTZr1sxs3bp1hSRRzmjntm3bzN9//9189913zbCwMPPqq682MzMzHdquC1VkOydNmmRedtllZnp6ummaFZssOuvrNtfKlSsrZO5XRbazWbNmJmAuW7bMci4tLc0MCwsz//Of/zi2YRdw1uu5YcMGEzDnz5/vsLYUpSLbuWHDBjMoKMh88MEHzZ9//tlcsmSJeeWVV5qNGzcu9zniFdnOTz75xATMV1991Tx16pS5detWs1WrVqarq6t56aWXlkv7ROTiomRRSuTuu+8u0S+7Tp06mYD58ssv21zLfbfT09PTPHv2bIH3O3uBm4poZ0JCgtmxY0ezYcOGZkxMjMNiL42Kej1zbdy40QTMRYsWlSnu0iqvdh46dMj09PQ0Fy9ebMbFxZlxcXHm9u3bLX94F/f5cLSKfj1N0zRDQkLMhx9+2O6Y7VGe7ezcubMJ2PTc3HrrrWaHDh0c04ASqqjX85FHHjG9vLzM+Ph4h8RdWuXZzo4dO5oDBw60KhsfH28GBQWZ48aNc0wDSqg825mdnW2OGzfO9PDwMAHT1dXVfPzxx83LL7+8wnqMRaR605xFcZiYmBg2b94MUOD8l+7duxMeHk5aWhrLly+v6PAcpiztTEtL44YbbuDQoUOsWLGCevXqVUjM9nDk69mhQwcMw2D//v3lEmtZ2NPOyMhI0tLSuOWWWwgODiY4OJi2bdsCcN9991G/fv2Ka0AJlcf3Z2Vclt/edubfFiQ/0zQr5dYZZX09MzMz+eKLLxg0aBABAQHlHq+97G3nzp07LQuI5QoICKBp06bs27evfIO2g73tzN0f8/Tp02zfvp0TJ04wa9Ys9u3bR7du3SosfhGpvpQsisNs27YNyFn5slGjRgWW6dixo1XZqsjedmZlZXHbbbexefNmli9fTvPmzcs/2DJw5Ov5+++/Y5omjRs3dmyQDmBPO9u1a8eaNWusPhYuXAjApEmTKmRRlNJy5Ou5YsUKzpw5Q+fOnR0bpAPY287BgwcDWO0FmpaWxrp16yzlK5Oyvp4rVqzg9OnTlX4VVHvbGRERwZYtW6zKJSQksH///kLrcaayvp7+/v60adOGGjVqsGDBAlJTU7nnnnvKL2ARuWi4OTsAqT4iIyMBaNCgQaFlwsPDrcoCpKSkWN4pPXjwICkpKSxevBiATp060bBhw/IK2S72tvORRx5hyZIlTJkyhaysLDZu3Gi51qJFi0r37r697ezXrx99+vShZcuWeHp6sm3bNl577TXatGnDjTfeWK4x28OedgYFBdGrVy+rMocOHQJyXsurrrrK8YGWkb2v55133kmjRo24/PLL8ff3Z9OmTbz66qu0a9eO2267rXyDtoO97Rw8eDBXXHEF999/P9OmTaNOnTq89dZbxMXF8cwzz5Rv0Hawt525Pv30U2rUqMGAAQPKJ0AHsbedjz76KI888ggPPvggN998M0lJScyaNYu0tDQeeOCB8g3aDva2848//mDNmjW0a9eOzMxMfv75Z+bMmcMbb7xRKd+cE5GqR8miOExiYiIAvr6+hZbx8/MDct7hzXXy5EmGDh1qVS73eMGCBYwYMcLBkZaNve388ccfgZyep0mTJlmVX7NmjU3y4Wz2trNz5858+umnlj9oIiIiePjhhxkzZozNEu+Vgb3trGrsbWfLli357LPPmD17NufOnSMsLIz77ruPF154oVq9ni4uLnz//fc89dRTjB07lnPnztGpUydWr15d6BBVZyrL121SUhLffvstd999N+7u7uUXpAPY286HHnoILy8v5s6dy2effYa3tzcdOnRgzZo1lXJUh73t9PDwYOnSpbzyyitkZmbSpk0bFi1aVO7bg4jIxUPJojhdREQEpmk6O4xyl9vzVN1NmTKFKVOmODuMClddv44nTJjAhAkTnB1GhQgJCeHDDz/kww8/dHYo5crPz4/k5GRnh1GuDMPg3nvv5d5773V2KOWqXbt2rF+/3tlhiEg1pjmL4jD+/v4ARf4RkpSUBFDphlyWhtqZR+2sOtTOPGpn1aF25qkO7RSRqkfJojhMREQEAFFRUYWWyb2WW7YqUjvzqJ1Vh9qZR+2sOtTOPNWhnSJS9ShZFIdp3749ALGxsQUuqABYVqfr0KFDhcXlaGpnHrWz6lA786idVYfamac6tFNEqh4li+IwYWFhdOrUCYDPPvvM5vq6deuIiorC09OTgQMHVnR4DqN25lA7qxa1M4faWbWonTmqSztFpOpRsigONXHiRACmT5/O1q1bLedjY2N5+OGHgZwlzQMDA50Sn6OonWpnVaR2qp1VkdpZvdopIlWLYVbH5fukzLZu3Wr55QRw4MABTp8+TVhYGPXr17ec/+abb6hbt67VvU888QRz5szB3d2dPn364Ovry6pVqzh79izdunXjp59+wtvbu8LaUhS1U+3MpXaqnc6idqqduapSO0XkImGKFGDNmjUmUOxHZGRkgfd/8cUXZo8ePcyAgADT29vbbNWqlTl9+nQzLS2tYhtSDLVT7cxP7VQ7nUHtVDvzqyrtFJGLg3oWRURERERExIbmLIqIiIiIiIgNJYsiIiIiIiJiQ8miiIiIiIiI2FCyKCIiIiIiIjaULIqIiIiIiIgNJYsiIiIiIiJiQ8miiIiIiIiI2FCyKCIiIiIiIjaULIqIiIiIiIgNJYsiIiIiIiJiQ8miiFx0IiIiMAyDjz76yOr8oUOHMAwDwzA4dOiQU2KTwv3yyy+W10dERETKn5uzAxARqa5++eUXfvnlFyIiIhgxYoSzwxEREREpFfUsioic5+7uTvPmzWnevDnu7u5lru+XX35h8uTJNj2YIiIiIlWBehZFRM6rX78+e/bscXYYIiIiIpWCehZFRERERETEhpJFEal2TNPkvffeo2PHjvj6+lKjRg2uueYafvzxxyLvK26Bmx07dnDPPffQuHFjvLy88PX1pVGjRvTp04dXX32V2NhYq3omT54MwNq1ay315n788ssvlnqPHTvGm2++yfXXX0/z5s3x8/PDz8+Pli1b8tRTT3Hs2LFCY86/WE9aWhovv/wyLVq0wNvbm9DQUG688Ub++uuvItudlZXFxx9/zIABA6hduzaenp7Uq1ePq666itdee43jx48XeN++fft46KGHuOSSS/Dx8cHf35927doxefJk4uPji3ymI6xfv55BgwZRs2ZNvL29adOmDW+88QZZWVkFlu/VqxeGYfDiiy+SmprK5MmTueyyy/D29qZmzZoMGzaMXbt2lXvcIiIiVYYpIlKNZGZmmkOHDjUBEzBdXV3NoKAg0zAM0zAMc86cOWbDhg1NwFywYIHVvZGRkZb7IiMjra798MMPpoeHh+W6p6enGRgYaDkGzJ9++sk0TdM8cuSIWbt2bdPX19cETHd3d7N27dpWH7///rul7rvvvttSh4eHhxkSEmK6uLhYztWsWdPcvn17ge3NbcucOXPMjh07WmLz9va23O/j42Nu2rSpwPuPHz9udunSxVLWMAwzODjY9PLyspx74403bO774IMPTHd3d6tn5D9u1KiRuX///pK/cCWwZs0aS/1fffWV6ebmZgJmUFCQ5f+A2a9fPzMtLc3m/p49e5qAOX78ePOKK66wfL4DAgKsXtcVK1Y4NG4REZGqSj2LIlKtvPbaayxatMjSs3fmzBni4uKIiYnhtttuY+zYsZw6darU9T7yyCOkp6czcOBAdu/eTWpqKmfPniUxMZFNmzbx+OOPExAQAEB4eDjHjx/nqaeeAuDKK6/k+PHjVh9XXnmlpe5mzZrx+uuvs2fPHs6dO0dsbCypqan8+uuvdOnShVOnTjF8+HBM0yw0vhdeeIHTp0/z448/kpycTFJSEr/++ithYWGkpKTw+OOP29yTnp7O4MGD2bRpE4GBgbz77rvExcVx5swZUlJS2L17Ny+99BI1a9a0um/58uU88MADuLu7M2XKFI4dO0ZycjIpKSn8/vvvdOzYkcjISG666Says7NL/bkuiXvvvZdrrrmGgwcPEhcXR3x8PLNmzcLV1ZUVK1bwwgsvFHrvO++8w/bt2/n4449JSkoiPj6e7du307FjR9LS0rj11lsL7U0VERG5qDg7WxURcZTk5GRLL9FTTz1lcz0rK8vs3bu3pReppD2LJ06csJw/duxYieN54YUXTMDs2bOnnS0yzTNnzpg1a9Y0AfOXX36xuZ7bs+jt7W3u27fP5vrixYstsR8+fNjq2rvvvmvp+dy4cWOJ4snMzDQbN25sAubXX39dYJnY2Fizbt26lh5AR8nfs9iyZUszNTXVpsyUKVNMwPTy8jJjY2OtruX2LALmp59+WmDcuZ/rp59+2mFxi4iIVFXqWRSRamPlypUkJCTg5ubGM888Y3PdxcWFiRMnlrpePz8/XFxyflxWdI9TcHCwpRfy999/L7TcLbfcQtOmTW3ODx482LKJ/T///GN1LXdLj9tvv50uXbqUKJ61a9dy8OBBmjRpwpAhQwosExISwoABA4Cc16Q8jB07Fk9PT5vzo0ePxsfHh9TUVL777rsC723YsCHDhw+3OR8SEsJDDz0EwKJFixwbsIiISBWkZFFEqo0///wTgBYtWtgMnczVvXt33NxKt2uQj48PPXv2BKBfv3689NJL/Pnnn4UupGKPP/74g3vvvZdLL70UPz8/q8Vwli5dCsDRo0cLvb9Tp04Fnnd3d6dWrVoAxMXFWc5nZGSwZcsWAK677roSx7l+/XoAoqOjqVOnTqEfX3zxBQBHjhwpcd2l0atXrwLP+/n5cfnllwOwdevWAsv07NnTkkAXdA1yFik6c+ZM2QMVERGpwrTPoohUG7lzEevXr19oGU9PT0JDQ0vdQ/jBBx8waNAgdu3axQsvvMALL7yAj48P3bt355ZbbuGuu+4qsKerJGbOnMkzzzxjmZPo4uJCcHAwHh4eAMTHx5OamkpycnKhdfj7+xd6zcvLC8hJEHOdOXOGzMxMIKenraRyV2ZNS0vjxIkTxZZPSUkpcd2lUdRrnHvt5MmTdt8LOV9PISEhdkYoIiJS9alnUUSkBBo3bszff//Nt99+y0MPPUTr1q05d+4cK1euZOTIkbRq1arInr/C7Ny5k3HjxmGaJkOGDGHr1q2kpaVx5swZy2I4t9xyC0CRC9xUlNze1H79+mGaZrEf+bcIERERkapFyaKIVBu5Q0+LStrS09M5ffq0XfW7uroyaNAg5s2bx99//83JkyeZO3cuAQEB7N+/n9GjR5e6zq+++ors7GyaNGnCl19+Sfv27W2GyZakB6+0atSogbu7OwCHDx8u8X21a9cGym94aUkV9RrnXssdfmvPvUChQ5lFREQuFkoWRaTayJ2rtnPnzkITwnXr1lmGX5ZVaGgojzzyCM8//zyATS9a7qI4RfUIRkdHA9CuXbsC51KmpKSwadMmh8Sbn5ubGx07dgRg2bJlJb4vd7GdPXv2cODAAYfHVVJr164t8HxycrJlLmaHDh1KdW/+axERERqCKiIiFz0liyJSbfTt25fAwEAyMzN57bXXbK6bpsm0adNKXW96enqR1729vYGceXz55e67ePbs2ULvDQwMBODff/8tMKl87bXXSEhIKE24JTZixAgAFi5cWOKEtHfv3jRs2BDTNHnyySeL3EcxIyODpKQkR4RqY9asWQW+LnPmzCElJQUvLy8GDRpU4L2HDh1i4cKFNufj4uJ49913ARg6dKhjAxYREamClCyKSLXh4+PD+PHjgZwk6+WXXyYxMRHIWZjlrrvuYu3atfj4+JSq3vXr19OuXTveeust9u3bZ0nqMjMzWb58OS+99BKAZbuIXK1atQJg165dhSZj1157LQA7duzgySeftCSGZ86cYdKkSUyePJkaNWqUKt6SGjFiBJ06dSIjI4N+/frxf//3f5bnm6bJ7t27GTduHP/9738t97i7uzNv3jxcXFz47rvv6NevHxs3brQkjdnZ2ezatYvp06dzySWX8Ndff1k989ChQ5ZVXnO37rDHkSNHGDJkCIcOHQLg3LlzvPnmm0yaNAnI2UKjsJ7BwMBAHnjgAT799FPLoj87duygf//+nDhxgqCgIMaMGWN3bCIiItVGRW/sKCJSnjIzM81bbrnFsvm6q6urGRwcbBqGYRqGYc6ZM8eykf2CBQus7o2MjLTcFxkZaTmffzN4wPTw8DBDQkJMFxcXy7lLLrnEjImJsaovIyPDbNasmaVMSEiI2bBhQ7Nhw4bmhg0bLOVuuukmSxnDMMzg4GBL3ffdd5959913m4B5991327S3sLaUtMzRo0fNDh06WJ7v4uJihoSEmN7e3pZzb7zxhs19n332mVUZT09Ps0aNGqa7u7vV52rdunWFfo6Lirkg+V+Hr776ynRzczMBMygoyPJ/wOzXr5+Zmppqc3/Pnj1NwBw/frzZtWtXS9yBgYFW7fjxxx9LFZeIiEh1pZ5FEalWXF1d+fLLL3n33Xfp0KGDZfuJ3r17s2zZMh577LFS19mpUye++OILRo4cSfv27QkODiYhIYGAgACuuOIKZsyYwbZt26hXr57VfW5ubqxatYoRI0bQoEEDEhMTOXz4MIcPHyY1NdVS7osvvmD69OlcdtlluLu7Y5omV155JR9//DEffPBB2T4hxahbty4bN27kvffe4+qrryY4OJikpCRCQkK46qqrmDlzZoEb2N9+++3s27eP8ePH065dOzw9PTl79iz+/v507dqVMWPGsG7dOrp162Z1X+4CMoZhWOaY2uOmm25i7dq1XH/99bi6uuLm5kbr1q15/fXXWbZsWZHbmHh6erJmzRpefPFFIiIiSEtLIzQ0lKFDh/Lnn3/Sr18/u+MSERGpTgzTrARrsYuIyEVh6tSpPPvss9x8880sXry4Qp/dq1cv1q5dywsvvMCLL75Yoc8WERGpitSzKCIiFWbt2rUYhmFZQVZEREQqLyWLIiJSITIzM1m/fj033HADbdq0cXY4IiIiUgzbTb1ERETKgZubm2V1WhEREan81LMoIiIiIiIiNrTAjYiIiIiIiNhQz6KIiIiIiIjYULIoIiIiIiIiNpQsioiIiIiIiA0liyIiIiIiImJDyaKIiIiIiIjYULIoIiIiIiIiNpQsioiIiIiIiA0liyIiIiIiImJDyaKIiIiIiIjYULIoIiIiIiIiNpQsioiIiIiIiA0liyIiIiIiImJDyaKIiIiIiIjYULIoIiIiIiIiNpQsioiIiIiIiA0liyIiIiIiImJDyaKIiIiIiIjYULIoIiIiIiIiNpQsioiIiIiIiA0liyIiIiIiImJDyaKIiIiIiIjYULIoIiIiIiIiNpQsioiIiIiIiA0liyIiIiIiImJDyaKIiIiIiIjYULIoIiIiIiIiNpQsioiIiIiIiA0liyIiIiIiImJDyaKIiIiIiIjYULIoIiIiIiIiNtycHYDY5+TJkxw5coSUlBR69Ojh7HBERERERKSaUc9iFfP111/Trl076tatS5cuXejdu7fV9bNnz9K/f3/69+9PfHy8k6IUEREREZGqTsliFTJ58mSGDh3K33//jWmalo/8goKCCAwM5KeffmLRokVOilRERERERKo6JYtVxNq1a5k8eTJubm5Mnz6dI0eOULt27QLL3nXXXZimyYoVKyo4ShERERERqS40Z7GKmDNnDoZh8MILL/DMM88UWbZbt24A/PXXXxUQmYiIiIiIVEeGeeE4RqmU6tWrx4kTJ4iNjSUoKAiAunXrcvLkSbKysmzKBwYGkpmZSXJycgVHKiIiIiIi1YGGoVYRsbGxBAQEWBLF4ri6upKdnV2+QYmIiIiISLWlZLGKCAwMJDExkYyMjGLLnjlzhvj4eEJDQysgMhERERERqY6ULFYRbdq0wTRNNm7cWGzZ//3vf5imSefOnSsgMhERERERqY6ULFYRt99+O6ZpMmnSpCJ7Fzdu3Mizzz6LYRj85z//qcAIRURERESkOtECN1VEdnY2V155JZs3b6ZLly48+uijPPHEE5w5c4aNGzdy+PBhvv/+ez7//HPS09Pp06cPP/30k7PDFhERERGRKkrJYhVy6tQpbrzxRjZs2IBhGAWWMU2T7t278+2335Z4MRwREREREZELaRhqFVKzZk1+/fVXPvzwQ7p164aHhwemaWKaJi4uLnTp0oX/+7//Y82aNUoURURERESkTNSzWIVlZ2dz5swZsrKyqFGjBm5ubs4OSUREREREqgn1LFZhLi4uhIaGUrt27UqfKC5atIhevXoRHByMr68vbdu2ZcaMGSXaCiS/jz76CMMwivz48ccfy6kVIiIiIiIXj8qdYUiRzp49y6lTp4CcIaqVdejp6NGjmT17Nm5ubvTu3Rs/Pz9Wr17NuHHj+O6771i5ciXe3t6lqrNJkyZ07969wGv169d3RNgiIiIiIhc1JYtVzMmTJ3n99ddZtGgRhw4dsroWERHBsGHDePLJJ6lVq5ZzArzAkiVLmD17Nn5+fqxdu5YOHToAcPr0aXr37s26deuYNGkSM2fOLFW93bt356OPPiqHiEVEREREBDQMtUr5+eefueyyy3jttdeIjIy0LG6T+xEZGcmMGTO47LLL+Pnnn50dLgBTp04FYPz48ZZEESA0NJR58+YBMHfuXOLj450Sn4iIiIiIFEzJYhWxe/duBg0aRFxcHKGhoTz//POsWrWKPXv2sGfPHlavXs0LL7xArVq1iIuLY/DgwezevdupMcfExLB582YAhg8fbnO9e/fuhIeHk5aWxvLlyys6PBERERERKYKGoVYRL774ImlpaXTq1Ikff/yR4OBgq+uXXHIJvXr14vHHH6d///5s3ryZl156iYULFzopYti2bRsAISEhNGrUqMAyHTt2JCoqim3btnH77beXuO79+/fz3HPPcfLkSfz8/GjVqhWDBw8mNDTUIbHnFx0dbXMuLCzM4c8REREREalMlCxWEWvWrMEwDObPn2+TKOYXHBzMBx98QNu2bVm9enUFRmgrMjISgAYNGhRaJjw83KpsSf3+++/8/vvvVue8vLx48cUXGTduXCkjLVpujPlFRUXh7++PYRgOfZaIiBTNNE0SExOpV68eLi4aICUiUp6ULFYRSUlJBAQE0KpVq2LLtm7dmsDAQJKSkiogssIlJiYC4OvrW2gZPz8/ABISEkpUZ506dXj22WcZPHgwjRs3xtPTk7179/LWW2/x3//+l/Hjx5OVlcXEiRPL3oAiFJRAiohIxYmKitIoDxGRcqZksYqIiIggMjKSrKwsXF1diyybmZlJampqoUM/q7L+/fvTv39/q3MdO3bk448/pm3btowdO5aXXnqJ++67j9q1azvkmVFRUVbHCQkJtGzZkqioKAICAgBI/fdfDt+eNy/z3/rwym05316Lr19MWEAxf9D8uxK+ujfvuN1wum3vR/y5TMup3pfWZM7t5xcJOrYdPrrOcm1tVmseyRhtU22Qtxu/PNQSt3kd+cvDg1F181bJnfy1JxGRyZbjT27M5pXrxnD047UkrttmOV/vjlZ8eXkbuo/5zHIuNSiLttfkbNuyMutyxpmPsm3StYU27/tPX6LZG99ajk9eVpurPl1WaPlFixYxdOjQQq9XNlUtXqh6MSve8lVQvN8d+I5XNr1iOb7dvR5P7P8jr0CXh6F3+b4xV5CEhATCw8Px9/ev8GeLiFxslCxWEbfeeisvvfQSn3/+OXfccUeRZb/44gvS0tJKNQewPOT+Ik9OTi60TG7vZ27SVRZPPPEE06ZN4/Tp06xcuZL//Oc/Za4TbOcn5vaCBgQEWOL2a9OGWA8PyMoCoHkcuHq5gGEQSywtAloU/RAjGTzzDWmtGUZoSDCJp/M+d43q1cz3eWpiVb5hdjIuLj421Q68PJwQjwzwNEj0dcfVO+eNBrdMk8uOZVjeeMh0gc3N3PF2SSY0PAQz3xsSXufcyY49hl++cx5BWQScf36r7FOYGV5FvoaXD76T9LnLcMvOOfY8eBo/d3dcCtlf08fHxyFfExWlqsULVS9mxVu+Cor3ZPZJy88MgOZxByzf9wBceTc4sY2aBiAiUv402L+KGD9+PF26dGHUqFF8/vnnhZb74osvGDVqFFdccYXD5+6VVkREBGDbM5df7rXcsmXh6upKs2bNgIIXpSlPLh4eeORrg28a1MgZhcuhhEPFV5B8yvrYJ5QgH3erU/WD8iVWvjWtrtU0Ct56ZEDrupB8GoDjbnl/9DU5Dq4ZWZbjA3Uh2dOFw4lReIR6WtWRdjqdjJgYq3Ne+UYWNzROkG2aZGebBbcNuKR+Ww6Fe1iO3TPh0K8/FFpeRJzvcMJhq+OI1Hxv/NW/HGpdVsERiYhIRVPPYhXx6quv0qtXL3bv3s0dd9zBxIkT6dmzJ/Xr1wfg6NGjrF27lkOHDhEYGEivXr2YPn16gXU9//zzFRJz+/btAYiNjSUyMrLAYbFbtmwBsNqDsSxiY2MBnDI8ybNZM9IPHLAch58yiQ0wbP7gKlBKrPWxbyjBPh5Wp8KC8yWLru7gHQzn4gCoQTxgAnnvtAd6u9M+/jCHZ8zALT6YtE4GhORca3HEOrHbHZ5z3/7kY4QHW/ekpp9MxPBJtDoXWMMfyElCvY10ahJPRnY2ni4FD5E2DIOk9s3g8E7LuahV39G4300FlhcR57vwZ1fDjLxh8bS/s4KjERERZ1CyWEW8+OKLliE3pmly6NAhDh+2/kVumjkJQHx8fKGJIlRcshgWFkanTp3YvHkzn332Gc8++6zV9XXr1hEVFYWnpycDBw4s8/O2bt3Kv//+C0Dnzp3LXF9peTZrSuKPeccNTsFfTezsWfStSbNafqzec9JyqkXdQOsyfrUtyaKHkUUgycTjZ7nc99KanBz3FBlHjwLeXH0E3Ftk8b9eLlx2QbK4q0HO19a/abH086kJhglmzrm0mFi8/dKtyvvUqQ3krWAbbpwkM8vEs4ifKLV6XQtL8pJF1807Ci8sIk6VlZ3FkYQjlmPv7GxqnR9mj5sXtLrZSZGJiEhFUrJYRfTo0aNKzs+YOHEiQ4YMYfr06QwYMMDSgxgbG8vDDz8MwKOPPkpgYF4i9M033zBhwgTq16/PqlWrLOdTUlJYsGABd911l03P4a+//srdd98NQPfu3Z2TLF5yidVx+KmchOxQ/KHib7ZJFkO5t3sjth6JY+/xRB7s2YQGNS6Yk+hbE07tsRzWNM4Sb+Yli9eFuZ9PFPN032XS+d8sjHy5YrYBe8PO9yxmJWKkn8XDL5P0xJxhsNkpqTSzHoWKe4MGcGyj5TjMOEVmVuHDUAE6XHUL+7zexD8157jGsWTOHY3Gu55WMxSpbI6nHCc9O+9NooYZmXnjFi4bDF6BBd4nIiLVi5LFKuKXX35xdgh2ufHGG3n88ceZM2cOXbt2pU+fPvj6+rJq1SrOnj1Lt27dmDJlitU98fHx7N27l9TUVKvz6enpPProo4wdO5b27dvToEEDMjMz+ffff/nnn3+AnG1DvvzyywprX35e5+dL5spNFk+knCAlIwUfd9sFaCzOzyu08K1JbT8vFo26svB7/GpZHdY04tlv5iRe/p5uXF7bm4JmbnpkWh9H1oZz5xet2GemQcppPALykkWAiJPW97hHXALH8o7DjVNkZGcXHisQ7FuDw5cG0eqvs5Zze1Z8Sft7xhR5n4iUs5it1EneCdlZcH4o+eH4C4egZuQdaAiqiMhFQwvcSLmbPXs2X3zxBVdccQXr169n+fLlhIWFMX36dFavXo13IStiXsjHx4dJkybRu3dvTpw4wQ8//MC3337LiRMnuOaaa3jvvffYsmULdevWLecWFcw9PBzDy8tyHHYaXM4v+hKVWPgiP8AFyaIB3iHFP9DPemuQUPIWubmmRW3c09OKrwOIzNexF+1ikpJyGs+AzELLZ3q64Rre3Opc7jDU4rh0bm91HPfr6hLFKCLl5M+PyXyrD932zIP/DYWsnKTwwuHzlvmKQQ0g4qoKDlJERJxFPYtSIYYNG8awYcNKVHbEiBGMGDHC5ryHhwcvvfSSgyNzHMPVFc8mTUjdmTMvzyMLasfBsRo5f3g1D2le8I2mCSn5kkXvYHAtwbdmESui9m9Vh+xz1mNHI2vnTEW8sJcw4YJRoAfT4wgvIlnMqBWMERxhdS7cOEVGVtE9iwCNrx0C/7fGchyw/RBmdjaGi963EnGGxC/fJea72pjZBoE7/6RujecxBk6zXQk183zPYrs7QN+vIiIXDSWLVVBaWho//fQTf/75JydP5vzlX6tWLS6//HKuvfZaPD09i6lByotns2aWZBGgwSmTYzWMouctpiVCdr7kzKdGyR52wTDUcI8kOAe9mtfk2stqk/L7fqvrh2obvDvAhav/NrlrvTs+8Wn49emDa6NfyVlJNcc+M40mgYUniy51a0NwQ+tnGyfJLGLrjFytWvbi11AX6p3OSSx9U7I4sW0DdS7vVuy9IuJ4sRvPYGbnJH/xkT6Ysz9j2aFQfgr4x6pcTs+iAe2GOyFKERFxFiWLVczcuXOZPHkyZ86cKfB6jRo1ePHFFy2Lx0jFunCRmwanTDZdCocO/wJtHyz4ptSz1sfeQSV72AXDUP/T2oueV/WkUagvhmGQfS7F6nqaG5guBqvbGTS+cwSPRPwHtxo1aDr/SiBva4x9Hu7c4G99b35eYeHgFUiS4Y+fmXNfPSOWyIx0wLfQ+wDcXdw53SaMeqvzVlncv3KxkkURZ8jOIj3e+k2ehMM+9Pv2bebdVAfyve/YMCMTGvXIGYYqIiIXDY0lqUIef/xxnnjiCWJjYzEMgxYtWtC3b1/69u1LixYtMAyD06dP89hjj/H44487O9yLkqfNIjc5/x6LO1j4Tanx1sclXWXwgmGobimnaVzTL2+LlQsWCErLt21jHd+6uNXI6cFs5hFsVW6/hzsu7iZuPgX3LgY0zGnjafc6ec82siHhaIHlL+TfvYfVccaGLSW6T0Qcy0w4QVaa7Z8BKYc8eeDnsxjnt2MKzsoiMDsb2v+nokMUEREnU7JYRaxYsYK5c+dimiYjR47kyJEj7Nixgx9++IEffviBHTt2EBUVxQMPPIBpmrz99tusXLnS2WFfdArqWQSINYtYbMbeZPGCYagkW09GzE45Z/0Y97ytV+r65S0C1NSnjlW5fe45WWVhi9z4NWwEwGk36/tcLlg9sTCtrxlGhmveceiB02QmJRZ+g4iUi8zofy37qV6o998m963IBtPM6VX0DITLrq/gCEVExNmULFYR77zzDoZh8NRTT/Huu+9Sr149mzJ169blvffeY+zYsZimyTvvvOOESC9ubrVq4pJvz8g6ceCeYXKKrMJvclDPIkkXJIvnrJPFtLydMKjjm5fo+fqEUj8jLzE87eZKnIsLHoUki+716wNwxsP6a9A1/khBxW2E12rGoYi8FXDdsmDf6iUluldEHCfpwL4ir/fdZnLPT9lEpGdA65vBvWQrV4uISPWhZLGK2LRpEy4uLkycOLHYshMnTsTFxYUNGzZUQGQls2jRInr16kVwcDC+vr60bduWGTNmkJF/765S+PPPPxk6dCi1a9fGy8uLRo0a8dhjj1kW/HEWwzDwbNbUcuxiQv1YSDYgJaOQeYD2Jouu7tZbbCSfgnx7HdrMWSwkWcQ7hKYXvA77PdwL7VnMTRbjLkgW3RNKliwCpF1+mdXxv4sWYJrFL5AjIo6TeCTS6nhzM4PDF7wHNeBPk5v2ppPRWgvbiIhcjJQsVhFnzpwhICCAoKCgYssGBwcTGBhIXFxc+QdWAqNHj2bYsGH8/vvvdO7cmf79+3PkyBHGjRtH7969OXdBD1hxFi9eTNeuXVm8eDENGzbkhhtuwMXFhblz59KmTRv2799ffCXlyKuQoainkwtJZO1NFsF6KGp2ptViOea5C4eh5vzr6+6Lv7t/3gXvYJqmp1uVXePjzeq6Hlwo08MV1+CcOY7xntb7WXoUt5dkPhGDrLdRidh2jC+3fpRzcHIPIecirRJfEXG89KPW2+scqQlTbnclKtS6nNe+AI76Wr/BIyIiFwcli1VESEgICQkJxMfHF1v27NmzxMfHExwcXGzZ8rZkyRJmz56Nn58fmzZtYsWKFXz11Vfs27eP1q1bs27dOiZNmlTi+o4ePcrdd99NZmYm7733Hn/88QdffPEF//77L3feeScnTpxg+PDhTu2l8mzc2Oo4/HyyeCqxkJ43m2QxqOQPu3DeYtIJy38vnLOYu8BNXd+6lkVwAPAOplm6dc/ifwMDmNXYnwtl1Aqy3BvvZd2z6JFU8mSxY8fBHG+a9/XpkQmbP32DPWtfhnld6HfkVVg+tsT1iUjpZZ0+ZXUc52eQ7OfG9rvbWZ0/ei6UqDjrBbNEROTioGSxiujUqRPZ2dnMmDGj2LIzZswgOzubLl26VEBkRZs6dSoA48ePp0OHDpbzoaGhzJs3D8jZDqQkSTDAm2++SUpKCtdccw0jR460nHd1deWdd94hMDCQzZs3O3VxH886PlbHuSuink4oJJkqS8+i74XJYl7vpc2cxfMb5dT2td5yA59gmqbbDgdO9DFIuGCKknd4RN51L+ueRa+k6JLFTM5w3UvvtN7e5artGTy973+k5CayWz6EyN9KXKeIlE523Fmr4yQ/N74f8j1P3jLP6nzGOVei4wrfTkdERKovJYtVxKhRozBNk+nTp/Pkk08WODfv5MmTjB49munTp2MYBqNGjXJCpHliYmLYvHkzAMOH28536d69O+Hh4aSlpbF8+fIS1fnNN98UWp+fnx+DBw8G4Ouvv7Y37DLz9Lde+dQyDDUxpqDijhuGCjnzFs8zUy/sWcxJwur6Wid5eAfTKCMDnwKGfcbUsD4OisjbGsRw8+K4mdc76Jl6CjJKPqS41vVDML3zNnK75ChkxLvySo18PeIrJkJ2EYsDiYjdMhOSrY7N4BDC/MNwDQoi2zVvG+aQ1ASilCyKiFyUlCxWEQMHDmTkyJGYpsmcOXMICwujXbt2XHfddVx33XW0a9eOsLAw3nrrLQAefPBBBgwY4NSYt23bBuQMoW3UqFGBZTp27GhVtiiJiYmW+Yi595WlvpKKjo62+oiJKSTpO881NRo377wEJzQRfFJNTiUfL/iGc2etj8trGOr5OYu2yWIIHsDjcWdxPT9895K0dO6MT6BhC+vPs8f5xW0A3FxdiDIvWA3jbMkXuXH18yWo/0Crc1f/nc23/n5853e+d/b437D98xLXKSIll51iPVfZtWbO0HLDMDBD8t4p8s84x9ETJRv9ISIi1Ytb8UWksnj33Xe55JJLePnllzl79ix///03f//9t1WZ4OBgJk2axOjRo50TZD6RkTkr7TVo0KDQMuHh4VZli3Lo0CHL/wurszT1lVRunSV2ei+eQRlknsvbTDD8FJyqf7rg8hU0DDV3gRurlVABvHN68u5ISGJwYk5Pg//5pDG+900cXbklL7Q2bSz/d3M1iDJr0Yl/8+qKOww1m5c4/KCbhhB/vrcYoMc/Jgt7mkypEULr1HQiMjNh1UvQ4gbw9CtxvSJSPJcUE8ibvxxQL+9NPbdatTBP5b35lBBzrCJDExGRSkLJYhUzZswYHnroIVauXMmff/7JqVM5ww5r1qxJx44dufbaa/H2rhx7YSUm5my07uvrW2gZP7+cBCAhIaHE9RVVZ2nqK6tFixbh4+Njc77foY14BmaSnO9vqwanTA6cjGThwoU25fsfP0T+pYi+/mEVaW6bSxRD3aRd9Mp3fHDHRjadznlGs+go8n+WcnsWd23cRfIf+YafmdncioELpiVJBEh38WF5UjJhHTvid+gQZy+7lL/274cDBwD495gnPhf0LG75+Sv2/Rlbothznm3SMtgf97ic1zYoGdodNPmzmQsTa9bgf8dOYCQdZ8f/jeSf0EElr7eCxcTEFPjaVmZVLWbF61iumam0yvd+UoI3JByPt8TcMDPD6udS/OGoStOelBQNiRURqShKFqsgb29vbrjhBm644QZnh3JRiIqyXpgmMTGRFi1aMHToUAICAqwLZ2fD1Cc5e0HnYPgpk8PeLtx+++22D3hjKuSb5njT7SPAzdO2XEGObYf35loOG9fyo/H5Zxz87LP81VqSxTsG30G4/wW9pTMmQYp1kucRVIfb77wT7ryzwEef/Hkf0Wus9/Ls2CSUjv0KaGMRTv/6Hqd+yXsj4Oq/Tf5sBju8PFnt402flHO0jl9N67teg8D6RdTkPAsXLiz4ta3EqlrMitexMg/8xT4zr1cxzg+u69aP65tcD8DxffuJ27vHct0rOZEhtzyMl7urTV0VLSEhgfvvv9/ZYYiIXBQ0Z1HKjb9/ztYLycnJhZZJSkoCsE26iqivqDpLU19JhYWFWX3Ur19EwhIfBZnn8Ay03tC+6TGT05mFfB7yD0N18y55oghFD0O9YM5iqgcYGNT2uWA1VLAMRbXiE2p7Lp/cYahWzh4uuHBmesH7Jh76ncCAv4G8Hs0O+00CknOO3w4OJBsg81zOcFQRcYikg3usjuP8DKsh6m61rL+3a6QmEB1Xuj1xRUSk6lOyKOUmIiICsO2Zyy/3Wm7ZojRs2NDy/yNHCl5IpTT1lYvT+wDwCsrAcM1Ljhodh+TUVDKyL9iiIjsb0vINmS3NfEUA31DyzzmyThaTLP/PMiDLBWp418DD1cO2Hu8Q23M+NWzP5ePuahCVfcECN3GHbAvu/RFmNoNXasOXd8GB1TntNk1YNRl3n2x86+T1gbplw1U7c5LFfR4e/ORzflj1359zfPf6ImMSkZJJjDxodRznB3X98ha/cqtp/b2tFVFFRC5OGoZaCfXu3dsh9RiGwapVqxxSlz3at28PQGxsLJGRkQWuiLplS87iKfn3YCxMQEAATZs2Zf/+/WzZsoXWrVuXqb5ycXovAIYLeIdmkHIip5fQ1YRLYkxiz8VaLzCTlkD+XjW8g0r3PFd38AnJG0KafConEXNxITslryczzQMwDNuVUC3PLaBn0bfoZNHNxYVj1CDDdMXdOL/6a9wFSbxpwvKnIPVszvGupTkfQQ2hUQ+I2gRAUOMUko97WW67+u9slnUywDCYFxzENSnncAWiPx/N30OX0rdVIe0QkRJJjbH+Xj3rB7V88noTL+xZDFHPoojIRUnJYiX0yy+/OKQewzCKL1SOwsLC6NSpE5s3b+azzz7j2Weftbq+bt06oqKi8PT0ZODAgYXUYm3IkCG89tprfPbZZ9xzzz1W15KSkvjuu+8AuOmmmxzTiNI6tdfyX5+aaZZkEeCyKJPT505bJ4tlWQk1l2+tvGTRzIJzcZjewZhpeUNhC10JNVeBw1CL71nMxoWjZg0aGud7NNPi4VxcXn0n/skZmnuhs4dh238th371U3Hxdif7XE7Pa4NT0OQYxNQwiUt3Y1WGHz2SU+ngt5dnPn8L77uf5KpmNW3rFZESST9x3GpoUbKvG+4u7pZjt1q2PYtHzqhnUUTkYqNksRJ64YUXnB2Cw0ycOJEhQ4Ywffp0BgwYYOnxi42N5eGHHwbg0UcfJTAwL0n65ptvmDBhAvXr17fpGR09ejRvv/02P//8M++//z4PPPAAAFlZWTz88MOcPXuWTp060bdv3wpq4QVO520j4VPTeg+zy6JMTqWcsi7viGTRrxac2p13nHQC8+huqyJpxSWLPgUNQy1uzmLOn5pHzFo0JG/4K3GH85LFfSuLrCOXi5cvgddfT9yivG00pn2cla9EAJEE4OKWzRONF/LkO3XxevwOOkUUELeIFCs1Lpb8azmnXrCKdkFzFn/XMFQRkYuOksVKqDolizfeeCOPP/44c+bMoWvXrvTp0wdfX19WrVrF2bNn6datG1OmTLG6Jz4+nr1795KammpTX7169fjoo4+4/fbbGTlyJPPnzyciIoLNmzdz8OBBateuzWeffea8XtV8PYveNTLIdjFxyc6JpVkM7Es8bl0+d3hmLnuTxfyST5L95yKrU7nJYqmGoRbTs+jmktOuqAu2z+DsYajXLuf/+362vtbiBjiyEZJOWJ/v+hCBtW+yShYLkp3pQtK/Xry0bx4bd67mp6F1iW3qzcg2I2keUvL9HUUudlkJSVbH6b7W+5i6BgWBmxtk5oxQ0DBUEZGLk5JFKXezZ8+mW7duvP3226xfv56MjAyaNGnC+PHjefLJJ/HwKGDBlSIMHTqUxo0bM3XqVH777Te2bdtG3bp1eeSRR5g0aRK1axew2mdFSI6Fc2cshy41IzhXKwXf4zlLzXtkQfqOXdAy3z2OGoaa39kosnd8D/n6DVLtSRZ9i+5ZdD/fsxh94YqocedXRD131jInEQA3L7jx3Zx5lv/+CH9+DDFboHEv6PE0Xm6eeLVoQequXUU+F8Aw4YrDe2DmHvbXhVVhq/C+6n6CG1+Ke3gDPOrVwcXfcSviilQ7ydYjH9L9rX8GGIaBW82aZB7L2TDWP+Mcx0+erajoRESkklCyKBVi2LBhDBs2rERlR4wYwYgRI4osc/nll/PVV185IDIHOr3X+rhmc7Lrbod8nYluf++F2/KVcdQw1Pz+XICZkkT+ZDHNPacX0JFzFt1ci+hZBDi4JmcOZa6I7uBxPqbLBuV85GMA9V6bwdEJE0neswf3wACOZp8hyT2bVE+DwGST8NO2cTQ9Bk2PpZG8+W3yb07iE+5B/QWLcQtrVmQ7RC5GbsnWW9kY/rY/G9xr1bIkiwAucWdISsvEz1N/OoiIXCy0dUYVERcXx+rVqy2rfeZ3/PhxbrvtNurUqUNwcDB33HEHx48fL6AWKVenLkgWQy/Bq4H1PKCA3dHWZcojWYz5k+xM62G46eWwwI2bS86PD5u9FnN7Fvf9ZH2+WfHzSD2bNKHRl1+wY8J4mv/2G0sn3cfEe9x4abgrY+935aXbXdjapGRDjFOi0jn++IgSlRW5mJhZWXiey1uFOdELAr1sRxIUvCKq5i2KiFxMlCxWEe+//z7XXnst//vf/6zOp6en06NHDxYtWsTJkyeJj4/n888/p3fv3gXO+ZNylG9xGwBqNqdGRDDZ+XKbOgfjMTPzViktl2GoYJMsprqDh4sHIV6FLAhT4AI3xa+GCoX0LGZnw/4L5is2vabI+grim9qD7EzfnAPD4J8IF6YPc+XJB1z5qZ1BejEdHIm7AAMNywAAYelJREFUzpC46INSP1ekOss6fsQylxpy9lis6eJvU67AvRbPaN6iiMjFRMliFbFyZc6qknfccYfV+U8++YT9+/cTEBDAzJkzefvtt6lVqxZ79+5l3rx5zgj14nVhshh6CTUDQjmUbwqlZ1o2qXvy9UCWR88ikJ1lnSymeUBt39q4GIV8y1/Ys+jqAZ62fzzml7saaiwBpJh5W4Rw9ggc3269iE1IE6jRpMj6CrLraBrpsb1snx2YScsOZ2gw5Dgz7oQ3b3BhYQ8XVrcxOB1iWpWNmf4m3/7+L2v2nmTv8bOYpmlTn8jFJOWg9WrJcX4GgS62c3wLWhE1SttniIhcVDTxoIo4ePAgAC1atLA6v3jxYgzDYOrUqTz00EMANGzYkOuvv56vv/6aMWPGVHisF61Ttsmih08NDtSHxvlGBads3ox3q/Or3Ngki0Glf24ByaJ5Qc9imjuEehexYI33BT2LPjWgmBVl3V1yrxtEmTVpbpwfYpuZCtuse8Bpdm2RdRUkMyubXUcTyMjqipvfbtx8DxLkGcxDbUcxdOsS3FNWgCs84HaGB1vkfQ4Ck1x4571M3NJz4jOTs4iedR9vD8zGcD1HhEcfvhw2GR93n8IeLVKtJUTutzqO9wN/fG3KFdSzqBVRRUQuLupZrCJOnjxJYGAgPj55f+BmZ2ezbt06DMPg1ltvtZzv168fLi4u7N69u6CqpDykJ0P8kbxjv9rgHQTeQRyvb72QRPwfG/IOHNGz6BNKzvIweQoahhroWUTdnv5QK98yrQ2vLPaxuT2LUMBQ1O0LrY/tSBb3nUwiLTMbTHfOHXmA8OSXWTNsNcMvG477jfNyPsfAlamp3JaQaLkv3s9gQW9Xq7p67jpN85PxGC4ZHM78kUFf38LO0ztLHZNIdRB/2DpZPOfrVuB2QwXNWYzSnEURkYuKksUqIisry2YO4l9//UVKSgqtWrUiJCSvZ8jV1ZWgoCCSkpIurKbCJSYmMnHiRJo3b463tzehoaFcd911rF692q76evXqhWEYhX7UqVPIAi7l7fQ+6+PQS3L+9QoisY51spi2dRtm9vlzjkgWXd1s5hdemCymuRsEeQYVXodhwNCPoOVN0P4/0H96sY/NXQ0VCljkJj3f156bNzTsXmx9F9oRnf9zY9ChXmPcXM4PhvANhds+g8Bw8PDnySa3EO5X31L653YGu8Lz7nYBHvwhC9esnCGoJ1OjuGP5nbyz/R0ys/PNIRW5CCQcj7I6TvfzLLCchqGKiIiGoVYR9erV49ChQ+zbt49mzXK2Ali2bBkA3bvb/iGelJREjRpFL1BS3k6ePMlVV13Fv//+S926dRk0aBAnTpzghx9+4IcffmD27Nk89thjdtXdr1+/AhPDwEA7ki1HKGC+IgDewfh5ZhFdw42w2PPX4hNJP3gQz6ZNc/YizM+eYaiQMxQ1JW9fiews6/eB0tyhlkcxn5ual8DQBSV+pLtL3jOiL+xZzK9RD3D3KnG9uf6OOWt13KZ+kHWBsI4wegeY2fi4uDL15F/c/ePdZJvZmIbB+/1dmfFhFu7nd+9ocAoGbTJZcmVOkptlZjLvr3msi17HK91fISIwotQxilRF6XGxVsdZAQXvSepWy3YYakzcOUzTLLAnUkREqh8li1VEjx49iIyMZOzYscyfP59jx47x9ttvYxgGAwYMsCq7f/9+0tPTqVevnpOizTFy5Ej+/fdf+vTpw7fffmsZQrt8+XIGDx7M6NGj6dmzJ23atCl13ePHj6dXr14OjrgMClgJFQDvIGpmZrG7gUFYbN7CKilbtuQkizY9i3ZuJO9r/Ued6VETSLMcp3pAkL2JaCGsexaLSBbtGIIKsCMmweq4dVgBya5hgJEz5LRdrXbc1+o+3t/xPgAxoQbfXOHCsHV5PbtD15lsusTgWGjea/H36b8Z9v0w5vSeQ9e6Xe2KVaQqMROsR524hhT8/esaFATu7pCRAeQki4lpmcSfyyDIx6O8wxQRkUpAw1CriLFjx+Lh4cGyZcuoU6cO7du35+TJk7Ro0YKBAwdalf3xxx8B6NSpkzNCBWDXrl0sXboUV1dX5s+fbzXXcuDAgYwYMYLs7GymTZvmtBgdqoA9FgHwCiI0K4td4dbvwqdsPr9fZv5k0d0XXN3te/4FG2pnB19qdZzuBgEediaihXAvahhqfnZsmZGZDbuP5SWLnm4uNKvlV+x9D7V9iP4R/QHwc/fD59pmGAF5w0zds0xe/yCT196HB5dn0WdbNg1PmKSnpfDE6ifYHZszz3fP8QR+2nWC1IysUscuUtm5JqVbHfvUaVhgOcMwcKuZtzCWf8Y5PLIytH2GiMhFRD2LVUSrVq1YsmQJjzzyCJGRkbi4uNCrVy8++OADm+FA8+fPB6BPnz7OCBWAb775BoBu3brRsKHtHyLDhw9n/vz5fPfdd2RkZODubmeSVFkU2rMYTGhWFnsuTBa3bMHMzMBIz1uYxa75irla3Ah/f5Hzf786ZGdEANstl1M9KHrOoh3cXIpY4CZXjWYQ0qjUdZ9MdSE9M69HsEW9AKsFdQrj7urOjB4zeLbLs/h5+OGWnkzKxvYc/iHvR52radLwdCYNT0Of7Tk9jOmuEFUzgXXL7uDXetezJL42kYF1aRRek88f6EqgTxX/+pQK80fkGV78difJ8T60jomnVX0nDY0vgmey9ZsgIREtIbrgsu41a5F59Fhe2dQEouNSCu7pFxGRakfJYhXSv39/Dhw4wKlTp/Dz88Pb29umTEZGBm+++Sbg3J7Fbdu2AdCxY8cCr+eeT05OZt++fTZbghTnm2++YcmSJZw7d47atWtz5ZVX0rdvX1xcnNBZnpUJsQfyjj38wb9uzv+9g6iZlUVsgMGJIKh9Nud05okTZBzcg9VArrIki80HwB1fwbFt0OJGzNesN6JPcy+HZDFfz2ISPiQYAQSY1kNHadbXrrqjz1mvZtqmFH9wG4aRN+TWKxCfW54gaN8szu633Rogl0cWNDkOTY6fg62L6AFkGi78Vr8t41NO88ZTg/Fydy30fhGAM8np3PfRZhLTMgE37py/ia8eupImNYvvFa8opmninW+NmiQvaNywDZHRBa+eXeAiN1oRVUTkoqFksQqqWbPw+WHu7u707NmzAqMpWGRkJAANGjQo8HpAQAABAQEkJCQQGRlZ6mRxzpw5NucuueQSPv30U4cnydHR1m+5JyYmWheIi4TsjLzj0GZ5exSeH4YKsCfMoPbZfPMW/9jouGTRMKDZNTkfQHaK9TCxNHej6K0z7OB+QU/fCdfaBGRemCyWfggq/9/efYc3WbUPHP8+aZLuTcsqUED2LkNUlkxBhqAooD9FcOF6EVBBRUQcuF8RFRVFlBdQRAQEGTKVTSlDluxCW6B7j4zn90dpaJq2dCRtU+7PdfW6+qyTO2nT5n7OOfcBogoki21C/MrUDgBdnqBWzy/ReyWTFuNGVoIOs+HGNxW0qpk7L0XQY+FB1kT8Qa93JnHJN5vanrUJ9ihm2K24aX22+dS1RDFXUoaBR77by69P306wd+mLPDmCKTEBbb6OxSRP6BTckHMUkSwW+H/jn5Uiw1CFEOImIsmicIi8hMrTs+jeHC8vL1JSUkhJSSnynIK6d+/OQw89RI8ePQgJCSExMZF9+/bx2muvcfToUfr27cvu3btp0aJFuZ9Dnnr16hV/QlFDUMFS4AbgWH2Fnv/kSxbDD+CXP39z9ytfoPmYM60/zN1wncUyKJgsxig1aUK+JUR0HtDgjjK1HZVRIFksz1A+nTvK4A8IzHiEwObpqCrkpGrJjNeRlaAnM15HerIOF1Ph1R1dUGl5+C+ih/7F9rYKO1rrGNJ0GENuGYru2lIernFxUiHyJncxIYNFuy8QduUk446uwdWUwyXvYCK9a/Llhb280BO8XC6iaT+8TPN47SXz7Amr7WQvBT/XouczF9azeEl6FoUQ4qYhyaKw8dJLL7Fq1apSXzd//vxCl/Gwp1mzZllte3h4ULduXQYOHEj37t3Zt28f06ZN47fffnNoHADLli3Dw8ODFvHraJ9v/8GoTI4vub4o/b3ocTebOV5g3mL8rv3Uuev69rnLiexeUmAx+zK6JfIC+Qe+Zetgw8oN6BT7zb3LMgFc/5B5PseXHvme4iX9Lfy17NdSt2s0Q0ymt2Vbp1HZt/l3DpQzD/MMnUlI2kFqpx8jSPMvfj6Z0DA3qTab4VNNAP+mexB6VeWWaJWW1kvRoTVD34MqfQ/mAMuIYpnlWAtg+7o/iR5zP1TGUOgyiIqKYomdft8qQlWPd+kFd9pEn+WN3d+hU3NvEIWkx9H18jE4BVGbAVS866/h+L1jifNpVmx7juJ2ZDv5y19leCgsXbq0yNc34OxZ8o8PCcxKZtf5y5X6s8jIkGRVCCEqiiSLwkZ0dDQnT5688YkFpKVdL8fu7Z37YT89Pf2G5/sUscZXabi6uvLqq69yzz33sG7dOrsWzbl40TprSE1NpWXLlowcOTI39hXb4PoSh7TvM5L2LQZf3/HxW9QwmbjoryXRE/yvvSTapDQMmRp07rmFXBo2b0fDQaPtEvPZZcvyLZwBRr2Wh8c8bJe282TmmJj5zzrL9im31pC9xrId0vcpRnco/fM5cikZ85G/Ldvt6gXw0JjBxVxRBjkZrP39F2IOrKWz5gS+pDNazeathgYWtc5doPyWKJX7/zLT/px6g8ZyBZ/5l+CUbFpMGGffWB1kyZIljB5tn9+3ilAV4zWrZs4knWHfxQuY9qzmjX2bLIli4RRSI91pu301NX98vVJuLPx9bqfVdo6nlodHjy7y9U0LqcfFfDcP/bNSSTHrGDVqVKX1pKekpPDYY49VymMLIcTNxjlugYsKtWjRIlRVLfXXXXdd7yYLDQ0FIDIystDHyD/8NO/c8sobepqdnU1cXNwNzi65kJAQq6+6detan1Bw2YygAj0G1yqioigcr2/94SozNt+sxfLMWSyg4DBUjdb+86XyF7gB2EIXaP8QuPlB+wehXdk+2B+OSrLadkjVRb0HfYY+yLqQ5xma8za9jZ+yuNufvDv+IGHBHQA4XVfhnVEuvP6QC//UL9mH4vR5n5N0Odb+8YoqJzYjlmG/DWPEqhEsXjeRN7ZswM1YsqVWEsIzSPuxcpYNSr9iPQfb6GVbKC2/woahZhnMxBVYfkMIIUT1JD2LwiHCwsL49ddf2b9/f6HH8/Z7enrStGlTuzxmfHy85fu8nk2HU1WIyzdPT6MD/wJLRbj7USMjt/T8mVoKtx+/3lNlyD83z47JoqnAMC2NvvgPhGWh1VgnUAYVuOdzUOdeL/BTBkcuJVttl2u+YjFctS4seuxWDl9KpkGAB8E+uQn13D6fMzdiLmeTz9LIsw6N3U/TgY3UilJJjnTnpOrKBZ0W9dpzbBGp4nft5fbMzmDD5DcZuWiOzF+s5j498CnnU85TK0Hl1aUmPPN15Rs18NlQDUmeCiFxKiFxKh3OqNRKun7Oxf8u4pZ+/4euTuFFwBzFmO/vJAC+xY/s0AZbF7gJyMq9yXcxMYMgb1e7xiaEEKLqkWRROMQ999zDa6+9xo4dO4iMjLSpirp48WIAhgwZYrfhokuXLgVyexi9vCqoVH1qDORfKzGwMbgUeFu5+RKUktvjkOphfciUo7E6z17MmdeTxRwtuGk8ijm7bBRFQatRMJpzk1+jSc07UK52DxdIFts6cD03V60LnUMDrPZ5672Zdus0q33/HDvCxqWvMDh4B3WVZCJc9bwaFMhFnY7bjpt54bfra0K2OvAnv/zwMyMfecBhcYvKFZMWw5qzawhIUZm+xGS5WQBgBuYO0bCrRe57O280wZpElfe/M+GR1yGXCdsfG84ti5fTwC+0wmJXUtKstrWBtYo938XPD3Q6MORWfM5LFi8lZhJW398hMQohhKg6ZBiqcIhWrVoxbNgwTCYT48ePJzPfsMg//viD77//Ho1Gw7Rp02yuffjhh2nevDlz58612r9lyxa2bt2KqlrPIcvJyWH27Nl89tlnAEyePNkBz6gIek8Y/hV0mwTNB0Pj3rbnuPsTdG35jLQCo0EdlSyqmVmW77N04I79exbBeiiqwWQu5sySyTKY+PfK9eTbU+9CwxqVv0Zd65ZtaPbU/3i75WqWh86gdnAflsWm8kBKKruaK1bDVDUqeP3wBntWvgZGGapXHX1/9Hvc0w28ttREUIFiznF3ZPBwQBxvxsbzgEsN2vg1QafRc9Vf4Zu7rP/l1jmbwbdTB/PW7rdIy7FO4hxFX2D4qHethkWcmUtRFLRBNa6fb8hEbzJwMUGKzAghxM1AehaFw3z99dccO3aMP//8k8aNG9O9e3euXr3Ktm3bUFWVTz/9lLZt29pcFxkZycmTJ23mHR46dIgXXniBmjVr0r59ewIDA4mNjeXw4cNcuXIFgClTpjB+/PgKeX5AboLXblTx57hfX2sxzd26182Ubf9kUTUaUQzX13rL1oG74phkUafRkEVukpjXw1geJy6nWrXTqq4vLpqqMZyzZR0f3njgDuDaciDGHF47/xfdD37DF30O0fL73EQRIDQKvvz7Vy5Fr+GuIV/hGdKlssIWdpaQlcAfh5fx6lITIQVGdAa1TaFF/QxoOJR16S147aGpABhMBrafP8zLmZ+xpc1e7jxy/Xf8/m0mXq+/lFczYvm096cOj9893XpeZVDDdje8RhcUjDE6xrIdIMtnCCHETUN6FoXDBAcHs3//fqZOnYqXlxcrV67k8OHDDBgwgD///JPnn3++VO317NmTCRMm0KBBAw4fPszy5cv5+++/8fLy4uGHH2bHjh188MEHDno25eCWL1ks0LNozsmXCNkpWSxY3Ca7gnoWLcNQy+HIpSSrbUfNV7QLrR5u6UPP+5YyrOYowjtY33t7YIvKu1oDt/85jpHL+vP27rdZe3Ytl9MvV1LAwh4Wb3uHF37KpNEV6/1+zdPxGj0Cnt0P9y8k0a2B5ZjORUefxh1ZM2Ye5zsOItH/+nvFRYX/rDSx5/QWErMSHRq7qqp45+vAzHCFRo3Cbnhd4WstZhZxthBCiOpEehadxA8//FCq893c3PDz86Nly5aEhIQ4KKob8/Hx4d133+Xdd0te+W/r1q2F7u/QoQNffPGFnSKrQPmGoaYXOwzVzy4PZ86w/hCX5cCeRa3L9fgN5vIPQz0SVXHzFe1JcW3K/XO3cLzPnbhl5vbqBqTBiJ1mlvbUEBUXTcb5JRxPW8wSFdKa1qF1w650rtWZzjU7U9urtkPiMiUlgUaDy7XlaSJTItl6cSuRKRdIMFyi/Sk/mmm9UDITuBwTTZzZk6s1u2N0ccdsVjGr4KKBsPr+lgJAN7PUwyuo9ckamlsXFCW2oR/hTy/gof5di72+po8b0559m+jY7ah/pKCYc2+2BCfDw38a2dZ7G/fcco+DooecpAR0+ToWUzyhTXC9G16nDbIucuOflSLDUIUQ4iYhyaKTGDt2bJmrK7Zo0YIXX3yRRx55xM5RiRJx96OGseLmLKpZ1slijg7ccMwHfV2+IaKqCiazWq5ho0ejrSeAVemexQLcAmoQOvU1Ls94w7Jv2G6VIXtMaAvk0Vm6S2xvvZwvOq0gqoaCv74WXWt1Z2rXZwlwty64UxamtHRiP/6YxJ9+QjWbSWgezJYmWfzeOI0Mt+s/nz937KDDJRODjhlpEgkeqkrDWiZ2hrTnG4/BXFBzi594uWr5+uGO3N64RlEPWe2pB5Zw6JXXaXXe+t9mZL1gvr77LRb37lyidvy9XHF7di6ZMfdw5cD13+/u/6gsPLTWocni5bNHrLbTPUGvcyni7OsK61nck5RZ7ve7EEKIqk+SRSdRv359FEUhNjaWjGvLImi1WmrUyP3wFh8fj+FatTpPT08CAwNJTk4mOTmZY8eOMW7cOPbs2eOcPXPOzs0Pf7MZrapi0Clka8H12pRCq2TRtfgS9iVVcBhqlk6pkJ5FyC1y46K58YfPoiSkWxffaBDoWea2KoPfffcRv/QnDMePA7lzGDWFjM51M0D/CJX+ESYONlRY2zmaddk/s/3Un/yn8TRGNGyOOT0d1WDAtVHD3IqU16RmGfDQa60/pCdegKhwyEomOfwol77fhCYxt8iRAgQev8J9x2GoCxy4RSGiscItMSqd/lXxT4e8fwUqkB2vo+PREwTVOs6hZjq2N6rDaV0jHluxjQndOtKj8S3U9KhJkHtQuX7WzkTd+y0Xp88i8IL1++hkPV+W3/c2P4ztiqu25K+Fe8Mu6O65i4RL2zBczV1rVWsGr427yRyWibvWMe/XqJMHyF+/NNujZDNRbJPFZAwmlaupWdT2dUysQgghqgZJFp3E+fPn+eyzz5gyZQoDBgxg2rRp3HbbbZZlJwwGA7t27WL27Nls3ryZl19+mQkTJnDmzBneffddvvvuO7766iuGDh3KXXfdVcnP5ibj7o8GCDCZuKrVkuYOrtcKfpoNGlQzKG7etktulFHBYajZ+oqZswjlL3KTkXN9jJybTuN0vRaKiwt1pr/GhTEPlvia9udU2p9TMQMaYoFJnC1wjjG4FtHBDYhwq8VeXTBR/rW5u30AT9Y6Ss0La8iJ2s8BxY2ECB8antAWORldb4KuJ1W6nrzxz6n+ZYX6l40M2h5JTEAkiV4KCbt/Y60XJHgrpNXyptvQpxnT+v+q95qScae58vabpF+wXn7mn/oubB71AfMfvq1UiWIebf83qLHxNmKuJYsAvQ4a2HVpB71D+5Y77MIkRJ60ShYNHiX7m2M7DDX3D9jFhExJFoUQopqTZNFJrF27lokTJzJ27Fi+/fZbm+M6nY4ePXrQo0cPxo8fz3PPPUeTJk3o27cv8+fPR6PRMH/+fL755htJFiuaux8ANfKSRTcIzLc0o8mgQevnmDUWwbEFbnQa67TEWM7lMzLzJYseeuf88+QRFkbwiy8SO2cOanY2eHujaDPx0KehdTeTnaolK15vc11xfTzaq5epf/Uy9YFh1/aZ/1C54AkHfOCydx2aXYKG6bbXmm/Q9o24qBASDyHx+RNMFUjm6qp3+Xbw74ye9DWenn7leJSqK+37F0g8aZ0onqwLqx58kAUP3WHTu15ifvVx6TuUnPDN6DNy26iZBH+uW0rvpxyTLGZejrbaNnuVbP3Vgj2Lt2gyeahrffw87LNGrhBCiKpLqqE6iY8//hhFUZg9e/YNz3333XdRVZUPP/zQsu/ll18GYO/evQ6LURThWuGaoGuJVMEiN+Ycxc5rLBZSDdVhw1Cte5QM5aiIajSZycmXbLqXYC5VVRU4fhzNwvfT7PAhWuzbS/O/I6j38n3U6ZpEw35xhPaNxad+Bihlf700qoJ3mkJotELXk1wbTnpdlg7m99fw5tMqET2NJNUsPPlO8KvBv/1uZd1LPVjyVFP2tXcn0zaXLVRwMtzxvyMcvrMbp798D3NW1o0vciLqqY0k/mE9z+98MHz8gB9fjflP2RPFa7x7TMCjkfX71W/dXoxmYxFXlI+aYF1t1cW3ZH93tMHWPYtNtdm8dU8bmtb0tltsQgghqibnvHV/Ezp48CB+fn4EFRgOVJjg4GD8/PzYv3+/ZV/jxo3x8vIiNjbWkWHaWLt2LXv37iU8PJzw8HBiYnLX6rp48WK5qrTm5OTwySefsHjxYk6fPo1er6ddu3Y8++yz3HffffYK3z7cfAHlepEbd4XcnplcphyNXZNF2zmLEOigAjcFPywby1ERNcNgvf6bu955k0UARavFkkq7aFEGfQBNBsDFPbhrtNTV6glOySFxyzFS9pwkOzWZWNfcmwkZeoVMV3AxQ+gVlYBSrtf+TwMtK29rSJ2ADjzdoRextzfiSIaRjItR+O/ZhmdcDMlaFwa+8BTN27e0GUZqysggbvUSkn76AdO/V1CMxQ8z9UsxYfj0e459s4C6QzvhN+0rFFfnmm9qw2Qg7pvJpEVZ32iZO8SFh28bj4euZL1yxardFrfWIaT8k4Dm2m9L+38NHDq+lY6t7N+7qE2xvqOgDyhZFV4XPz/Q6eDavHhjBf8fEUIIUXkkWXQSWVlZ5OTkkJ6ejqdn8R/C0tPTSUlJwdXV1Wq/oii4u1fs/JIxY8aQnJx84xNLISMjg379+rFz5078/Py46667SEtLY/PmzWzbto3Jkydb9apWOo0LuPlYls+wqYiabedkscCcRbOrDq3imLe6rsCcwvKstZh/CCqAh5Mni4Vq0jf36xodEDwIggHMZi4e/h/PHfyYM4p1z5JvmkqjyyqNLkOjyypBySoBqeBTYKm7NL2Wb9r14d+2Q5k6sCUDWtUskAg2hgd7ALBkyRLqdGhVaJguHh7UfGA8NR8Yj2rIwrx3CUnr5+F65RzGTA2paTpiz3ninmn983fJULi8NJycuMHU/HSj3ebhVobLf84lfn86Gq53s+5ooZBQ15sHmj1gt8cx93yMhB3vUuNi7mulNcPpJQvo+Jb9k0XXNIPVtm+dxiW6TlEUdEFBGKJzh7GaU1IwZ2WhcZPlVIQQorpz3v/kN5lmzZpx8OBBPv/8c1566aViz/38888xmUw0bdrUsi8pKYnU1FSrfRVhxIgRNGnShLCwMMLCwgguMPelLF555RV27txJmzZt2Lx5s6UibHh4OL169eKjjz6iV69eDB48uNyPZTdufgSZ4gFIK5Cv27tn0Zhh3Q2lceANAtthqOXoWSyQLDrzMNQy0Wio1/7/+F+rEby68Wk2xR6wHEr2Uoi4ReF0Ey3N9IE08alP4/o98VZDiNibwoF9lzAaTMTWvYUJA1vzeZf66Mo5RDKPonPD5Y5HCbx9LOE7NxCz/r8MUPZQu2Uqa6MCqHHY1WYI7NUtMWR++hChLywBJyx+k5F4mcPrP6Ne9PUbbmYFlnXX8GTbJ/HW22/4Zb07RnOl8Vtw8fq/46A/D2KeaULjYt/3gGea9XssOLR1ia/V5ksWIbd3UV/vxms0CiGEcG6SLDqJcePG8dxzz/HKK6+QlpbGpEmT8MtXTh8gOTmZjz/+mHfeeQdFURg/frzl2N9//w1A27ZtKzJsvvvuO7u2l5iYyJdffgnAl19+aUkUATp27MjLL7/M9OnTefvtt6tWsujuT43kqwCkuRUyDPVaERx7yEhLstp28XDccMCCCUl5qqFm5Fj3plXLnsUS8NR58vHABaw4tYKDsQep41WHZv7NaB7QnNqetW2GjA5qA6kPGjgTm07zWt64OSrJVhQ63jGAnTU7ct+KXbgnnqBRnRhcGh/HeDmSoTvN+F2rraQ1KZz79SBn/Z7hzkc/d6pqqaqq8sHSh2l9xHpkxvbWCvf2fZ5HWtl3vVqN3p20lv0w79uO77WkOzDJzKkNy2g2cJTdHifDkIFfvvtIWToIrt+sxNcXLHJjvHpVkkUhhLgJSLLoJJ5++mnWr1/P77//zttvv837779Py5YtqVOnDgAxMTEcPXoUg8GAqqrcfffdPP3005brFy5ciIuLi9NXQl27di05OTnUr1+fO+64w+b4mDFjmD59Ort37yY6Otry+lQ6dz9qJOTe1S9Y4MZk5wI3WalJVtuOTBa1Gvv1LNoOQ715/zxpFA33Nr2Xe5veW6Lzvd10tK/n59igrrn9lhr8NmUw0cl9OXUlldNX0zgYc5o5Nd/itcWxlnUla8UpbNq8hSOGF3hy3EeOS2LtyGQ28dxvz5J0JYr7L17fb9RA6H9eYmDbsQ55XNfbHidqz2Z8w6//zl/530K7JotXrp6zrO8KkOGpElizbomvL7h8hvHqVXuFJoQQogqTaqhOQlEUfv31V2bOnImnpyc5OTkcPHiQtWvXsnbtWiIiIsjJycHT05MZM2awYsUKq7v5y5Ytw2AwMG7cuEp8FuUXEREBQKdOnQo93qhRIwICAoDcokBVhpsfQZYCN9aH7D0MNTvdeo6o3tNxFQttCtyUY86izTDUm7Rn0RkoikJdP3d6NQvmse6NmHt/f+ZP/YOtPa0LpvTZD+HRG/jPl89xKTGjiNaqhhxTDk+un8hfKX8xapv1TQ/z3XcysNtYhz12x/YdSG4YQP5HDQg/j8GOCdnVC8ettrM8wc3Tp8TX2/QsSpEbIYS4Kdy8t+6dkFarZfr06UyaNIkNGzYQERFhqW4aFBREhw4d6N+//w0L4Dizc+fOAVC/fv0izwkJCSEhIcFybnldunTJajs1NbWIM4vh7k+NIgrcmO2cLOakp5J/9TPXUnwgLC1dgTmL5aqGejMUuKnGPHWePPnpWnYOuZOgyCTL/ifWqkwZv50JP/yHuQ/9lwaBVe/vU7ohnUfXPs3xpAN0PK3SJOb6MVWnpeXk1x36+G46F5LqjuGfRvNoezZ3n4sKl5Z8T8P/FD9HvaTOn4og/yQEo3vphgYXNgxVCCFE9SfJohPy9PRk+PDhDB8+vLJDqXB5iVpxCbGXlxcAKSkpdnnMevaYl+Puhw7wM5lIc7fujbN3z6Ipw7raiKuX/douqOCcxfKss5hpsJ6zKD2Lzkfr6kbXr5fw77AhuGTn/jwDU+GJdWb+O2wnLyx4h0/HvUq9ADssO2EnCVkJPPT741xM/xdFVXlgu/UNj8AxY9DVquXwOEI6DiXy1PVkESD5559Qn5uCoin/IKCrpw5ZbWvddUWcWTibYajSsyiEEDcFSRZFoV566SVWrVpV6uvmz59Pt27dHBBR1bNs2TI8PEr2obd5wgU6ADVMJpLdCiSL2Ro27Qzn6sFSLqZXBL+YGPL3JUZGX8FNiWLJkiV2aT+/i5FukG9pgQ1/buKct6noC4qxN14HXB+je+bEMZYkHSxfgBUkKsoxr68jOTLmgP53UX/175bt206onKyrsCNsBc9+5suA0Pr460t3Y8ER8SaryXyfvRRNVhxDDqv0izBTK+n6cbNOy/YaNTCW4XFLG2+qQeFsjRYkeB23rKvpGp/B7++8Q1rjki1xURztqUjrbW8Pq/huFK/blSs0z7d96fARtlXS73xGRtUe0iyEENWJJItOKDs7m40bNxIeHs7Va0OBgoOD6dixI/369bNZX7EsoqOjOXnyZKmvS0uzT8JTFG/v3Pl36enpRZ6TF4OPj32GX168eNFqOzU1lZYtWzJy5MiSP0Z4Dqz+lRomE1EF7uibchT6DBoOtdvZJd6/f/nWartjp9tJuuzF6NGj7dJ+foeWH+bAvuuvT/cevbizedmWR0n/6ywrLl2fV3Vrxw6M7tGo3DFWhCVLljjk9XUkR8asjhrFpcxM0v7cZNk3dpOZ24/D9h4L+CvmJT56ahR1/Eq+rIu94z2VcIpZXz3CuP1JdD2hoivkHkfQ2EcZ+fjjZWq/LPE+9JmObW2nM3zn9US6vZpGvXI+78vpl9n/1ZtW+7waNrSK70bxGhMTOTXvK8t2oEZDl0r6nU9JSeGxxx6rlMcWQoibjSSLTmbu3LnMnDmThISEQo8HBgbyxhtvWFVCLYtFixaxaNGicrXhCKGhoQBERkYWeU7eHMO8c8srJCTEartMw1uvLY0RZDST6QUmJXdOEth/GKqalWW17ekTSNLlbLu1n58911ksWA1VhqE6L0VRqD1rFmcPHcIUG2fZ3zQami6FC7e8x5tpmcx48VFq+zpuHdCiHDi7k6MTn2Lav4Yiz9EGBxM4vmILgt3Wuh1nYlyB6+/hqwf+prwD4Q9FhdMg3xRDg1ZF27Jzqdpw8fMDnQ4Mua+ZDEMVQoibg1RDdSLPP/88//nPf4iPj0dRFFq2bEn//v3p378/LVu2RFEU4uLieO6553j++ecrO1yHCAsLA2D//v2FHj979qwlke7QoUOFxXVD7v5A7jBUFMVq+QxTjgbV1X5FaJSsHKttH5+gIs4sP63GjussGgoki06w1IIomtbfn5A5c3KTjAIanFZ4bsUcfn/seaLOR9te7EBrtywm4f8eo1MRiaKLh4bAxx+j4a/LC43dkfq0COaCewurfcrFxHK3ez5iG9p893GyA03UbNuzVG0oioIu37xFc0oK5szMcscmhBCiapNk0UmsX7+euXPnoqoqTzzxBJGRkRw5coQ//viDP/74gyNHjnDx4kUef/xxVFXl888/Z8OGDZUdtt0NGjQIvV5PZGQkO3bssDm+ePFiALp27Vp11lgEcPMDoKYpt+iH1fIZqoLZVLpiE8VxKZgs+pZtWGhJFKyGat91FiVZdHYeHTrQeN0f+D78f5gK/LfRmBS6HfmbuMF38e/rszBccXx1zW/nziR44izqxtve1PAIzqZurxyabFhD8OTJaGvUcHg8BTWr6U2yezeu5hto4J5mLncvXvLBcKttnb+Zmo1KP+xditwIIcTNR5JFJ/Hll1+iKApTpkxh3rx5hSZCtWvX5quvvmLy5MmoqsqXX35ZCZHaR58+fWjevDkrVqyw2u/v78+ECRMAePrpp4mPj7ccO3DgAO+99x4Ar776asUFWxLXhqHWMRaxfIadKrcCliqUAGYFfL0d96HXvussSjXU6sjFz486r7yC7y8LiWhq+zPVGw2Yfl7Mqb59iZk5E0NUlN1jyMgyMO/pkXSduxTPAiOy0+sbaDToKg16x+MzeR5KjVC7P35JKYrC7U3uILLA/Z30fZsKv6AEsk3ZeJ69bLVPGxSE4lL6WSiy1qIQQtx8ZM6ik9izZw8ajYZXXnnlhue+8sorfPLJJ+zatasCIiverFmzWLNmjc3+oUOHotfnVtEMCwvjiy++sDp+5swZLly4QHJyss2177zzDnv37mXXrl00adKE3r17k56ezqZNmzAYDEyaNInBgwc75gmV1bVhqHUMeT2LCnA9sTIlJ6OrW9cuD6XLud67l6UDX1cHLp2hceQ6i/LnqTqp17IL9b74gje+m8DDm8w0umJ9XDEYSFqylKRlv1BjwlPUePppFKV0awEW5uympZz++E16nrG9kZHePoOOzZJQFOCOidCkX7kfr7zubF6bg4E6OHV9mGzUrnX4DhpVpvaOxx+n4WXr96WhWdmKaXn26I42qAba4GC0QUHo7LGskBBCiCpNPo05iYSEBHx8fPArwRwaf39/fH19SUws/1yX8jpz5gx79uyx2R8REWH53s3NzeZ4cTw8PNi6dSsff/wx//vf/1i7di16vZ7bbruNZ599lpEjR5Y7brvTe4HiQh1jbrKYXuApm+zUs6iqKvqc6x+Kc/QKWo3j3uYFexbLtc6iDEOt9rqH9CBi8JNMq/81nU6pjNhhpvHlAicZjcR9NhdTUjI1p00t8xqDhqRoTr89lszNF6iXbt1Glg5cuqXQqea16s31ukLv18r0OPbWOTSA9X7+wPVhuUknTpS5vcNR4XTI1wFo1Kp4dulbprb8q+LfViGEEA4lyaKTCAgIIC4ujuTkZHx9i+8pSkpKIjk5mRqVMOemoO+//57vv/++1NedP3++2ON6vZ6pU6cyderUsgVW0RQF3P3wzojH22S2GYZqSrLtQS2LzIwUq7HlRgcnXAWroRrLMWexYM+iDEOtnp5p/wwx6TH8rvzOviYK7c6q3LvDTPMCo08Tf/wRc2YGtWfORHEpxe+C2UTMmk8wzPsCzrjiXmC2xVU/qNM9gSaeWeARCA17wqAPwMV+84bLw8tVS0LttsCfln2aqLLfTLoY8Red870tTYFG6re/sxwRCiGEuJnInEUn0blzZ8xmM++///4Nz33//fcxm83ceuutFRCZKLFrQ1HrGo3WBW7IHYZqD0lJ1t00JlfH3g/SObAaqvQsVk8uGhfe7f4u3/b/lnZ+TTjUWMPr/+fCG2M0RPtbn5v8y3KiJzyCml6y94cxci+XpnQlbcZXpJ+xXW/2WCMNjV69lyZPLIJJJ+DFMzByAXhW/o21/IJaDLa6oeSdYMacFF/0BUVQVZWsI/9Y7dMG6HDzqVrPVwghRNUlyaKTeOqpp1BVldmzZ/PCCy9w9apt5cCrV68yceJEZs+ejaIoPPXUU5UQqSjStYqodYxG0tyse+TslSymJFsXnDC7Ora3xHadxfIMQ7UucOOhk4EP1VmX2l34cdivzO36JreYXTjWQMOMh1yILLDSS8r2cC4NC6P7hblw9Dcw5laoicuM40TCCTIMGRBziLRvRxD37L2krk3DmGH9u5Oph5+HBNB92RbqDXkrd26iT+3cHv8qqG+z9lwIvh6bBoXknStL3U5Megy1ozKs9plCGpQ7PiGEEDcP+TTmJAYNGsQTTzzB119/zZw5c/j8889p2bIlda8VRYmKiuLYsWOYTLm9M08++SQDBw6szJBFQZaKqEYu2PQsJtnlIdJS4vDMt6262fau2JNtNVQZhipKTlEUejYbTveG/Vm+ZBRfup3mjTEuvPqTyWouY9olPZ5/RJOe9CjbageyIiiEPYYEVFQ0QLuEHB5bCUGXvWwe458GCmseCOXD0T9Qw905etS6hAbyQ6AeIq+Xbj2/ez3+g8aVqp1DsYdoeNn6Bo7SVkacCCGEKDnpWXQi8+bN48MPP8TX1xej0cjhw4ct6ywePnwYo9GIn58fH3/8sU11UVEFWIahmhy2dEZaqvVQNcW9dMWDSqtgNVRDOYah5i9wo0FFr5U/TzcLjd6TkQ+v4rNWr+KtdeXN0S6cKFAcOP2KG/+uqcX+I3oi0uNQr1UTbhapMmGRhqDLtkVs5vfXsOSppnw0+kenSRQBPF21JATXstqX8O/pUrfzz6Vw6uUbbGDSqQR3v7u84QkhhLiJSM+ik5k0aRITJkxgw4YNhIeHE3ttnaugoCA6depEv379cHd3v0ErolJcG4Zau7BhqHYqcJNukyw69nfBUT2LOskTbz6KQquuD/FDq7u5b/kE3h71Dy8uN9P2/PUbEG4GGPm3Sr8IEz931+CWAw9uMeNS4B7Fv3VgzlAX/Bo1Y37/bwhwC6jgJ1N+5ma3wfoLlm1tTAaoaqmGzkYf2Gn92gSYqNkkzI5RCiGEqO4kWXRC7u7uDBs2jGHDhlV2KDe0du1a9u7dS3h4OOHh4cTExABw8eJFQkJCytRmaGgoFy5cKPL4rbfeyu7du8vUtkNdG4bqyAI3WalJVttaD8/CT7QTXcFqqGXsWVRVlcx8BW70mrL3UArnVsvbnz8f+oHRv77EeyP/5LlVZrqetP598EuHJ9YVfmNifZjCwj4abgluwTf9vsHv2k0aZ9Pu9uHkfL4U/bW3hX+8ihp3DiWoUYmuzzJmoT8dabVPqeGD4iL/9oUQQpSc/NcQDjVmzBiS7ZQIFXTvvffi5WU7R6lx48YOebxyuzYMtY4jk8V06+GsWk/b18eetJqC6yyWrWcxy2B9nSSLNzc3rZ5fR37C5A2f8PHw7+hwRuWhLWbqxRV9jUHrwoFHunCkrcIIn1CeD3seX9filxmqyvq0aMH2GgqhV3LfCzqjQszOX6kzbEqJrj8af5TQy9bvK2PjpnaPUwghRPUmyaJwqBEjRtCkSRPCwsIICwsjODjYbm1/+OGHhIaG2q09h7vWw+FjVtHorD/E2StZzE6zbkfv4W2Xdotiu85i2ZK8jAKVUPUyDPWmpygKHw+YxLKjbZivX8rrQQo9jngxdN8uArOsb4qYatWh6ZdzaduiBWMrJ1y783TVcTXIndAr16uZnty9ucTJ4qHYQzSKsX4/ut/ay54hCiGEuAlIslgF9e7d2y7tKIrCpk2b7NJWWX333XeV+vhVyrVhqAC1zCYy9Bo8cnK3jUlJdnkIQ3qa1barl2N7VmyHoZatZ7FgJVTpWRR5Rrbqx8hW/ViyZAmjvxlNZkoqF7/6FsPiH9FkZuDRtx8hb8/Cxdd5exGLklGvPvxzwrKdfuFiia/9JzKcW/NNYTbrzYT2GW7P8IQQQtwEJFmsgrZu3WqXdpQquobYTcv9+orjtY1G0tz1lmSRrCzMOTlo9PpyPYQxI936IR2cLNoOQy1bkpd/viJIgRtRNHcfb5q+OBH1hWcxJSejDQys7JAcxj+sN/xxPVnUxeaAIQt0xVc5VlWV+MP7yH/PRQnQ4O5nv5EdQgghbg6SLFZBM2bMqOwQnMKCBQtISEjAaDRSp04devbsSY8ePSo7rKLlK7SRt3xGcL5Ro+bkZDRBQbbXlYI5w3oBbndv/yLOtA/bYajSsygqhqLVVutEEaBHv3tIfPsLyxpX/nEaMiP34964W7HXXUq7RM1L1jeOTLUlURRCCFF6kixWQZIslsybb75ps69z584sXryYW265xW6Pc+nSJavt1NTUsjWUbxhqbpEbBbieFJmSk9GWM1lUMzOttj0cnCzqCiydUdZ1Fm3nLEqyKESdWvX411+hZmLu+8E7E47+/RudbpAsHoo9ROMC8xVp0cZRYQohhKjGZLCXcDp33303ixcv5vTp02RmZnLu3Dl++OEH6tevz759++jVqxdXr1612+PVq1fP6qtly5ZlayjfMNS6RiPpBUaSlbfIjaqqkJVttc/F4Utn2GedxUybnsUyhyREtRJfy7qicdTRGy8LdOjqIRpdtk4WA3sNtGtcQgghbg7SsygK9dJLL7Fq1apSXzd//ny6dSv+rnd5ff7551bboaGhhIaGcvfdd9OxY0fOnz/PO++8w3//+1+HxrFs2TI8PDxKdc39ihYX1Ugdo5GjBZbP2LJ6NSn//lvmeLLVbHQFlqDYuns3aVevEhUVxZIlS8rcdlGiMzTA9Q+z5yMvsWTJqVK3czhRC1x/LbMzUh0Sr6M46vV1JGeL+WaNNyWwBnB9NIPhcuwN292esoF78hW3UfUq2yIToZjrnOn1zSgw3F4IIYTjSLLo5GrXrk1sbCxGo/HGJ5dCdHQ0J0+eLPV1aWlpNz7JQQICApg4cSITJ05k9erVdksWL160rkCYmppKy5YtGTlyJD4+PqVr7MMZkHaFOtfmLOZ3R7v2+A2/p8xxRqVFsf7n96329Rk4CI+wDrmVJEePLnPbRTl5OZXP/rvdsh1cqzajR99a6nZc9l1kSeRhy3aAjxejR4+wS4wVwVGvryM5W8w3a7z7XNPh748s27oEGNz/dtwCGxR6/smEk6z4PN562FANPaMfeqhC4q0IKSkpPPbYY5UdhhBC3BRksFc1oKr2n9+1aNEiVFUt9dddd91l91hKo0WLFoDtPMPyCAkJsfqqW7du2Ru7NhTVx2wmx9X6kDEpsRxRQnJ2Mq451vs0Hu6Fn2wnjltnUeYsCgHQrGs/q23fOA3nIrYWef7qM6tthqAa69dzRGhCCCFuApIsimolPj537JW3t2MXoy+zaxVRFUCnt56nlxZ/uVxNJ2Un4Waw/pCocXdssqgrsHRGmddZlKUzhCiUd+36pHpcvylTMwnOHl9f6LlGs5E159bQqEBxG9f2nR0ZohBCiGpMPpKJamXp0qUAdOnSpZIjKUK+iqjuugLJYlxMuZpOyU7B1WC9T3FwsliwZ7HM6yzK0hlCFEpRFBJDrNdLTb14tNBzd0XvIiMxlvZnrd8/de5yniHdQgghqhZJFkWV1KdPH5o3b86KFSus9q9cuZLw8HCb81NTU5k4caKlKM+kSZMqJM5Sy1cR1Utr3QuXmRhbrqaTspNskkVNKQvwlJbNMNSy9ixKsihEkfQtW1htKwlpnN2x3Oa81WdWMWy3Ge+sfDt9XPBqJstmCCGEKBspcOPkHDFf0Z5mzZrFmjVrbPYPHToUvV4PQFhYGF988YXV8TNnznDhwgWSCywnsWXLFj799FPq169PmzZt8PPzIzo6moMHD5KYmIhWq+XDDz+kb9++jntS5XFtGCqAn9YIXJ+4aEhKKlfTydnJ1C2YLFb0MNQyz1mUpTOEKErNDl3ht12WbXOiFr9NU1A79EbxyL0BlZqTSsSRDXy4z/o96DbucRTF+qaOEEIIUVKSLDq5OXPmkFlgIfaq5MyZM+zZs8dmf0REhOV7Nzc3m+NFueeee0hLS+PAgQPs37+fhIQE9Ho99evX54EHHuDpp5+mTZsqfBfdI8DybQ2tdWanpqSUq+mk7CT0+ZpUdVoUF5dytXkjtsNQy9azmGWQnkUhitKgUy8i+cSyHXhFwwl9Bg1+mkjdRxcCsPH0aoZvN6DPVyvKUMOD5k88V9HhCiGEqEYkWXRy999/f2WHUKzvv/+e77//vtTXnT9/vtD9vXr1olevXuWKqVI17gNb3gagpot1sqhJLd/aYSk5KbjlTxbd9OVqryR0LgUL3NinGqoUuBHiOo/Qxhj0Luiu9cDXi4NNRwNol70S4/E1aFvczfZ1c3n8iPX7z33ydBSNvJmEEEKUnfwXEaIihXSEB3+Brs9Q54GF5OTr+NOn5xR9XQkkZyZa9So4urgNgFZjr6UzpGdRiKIoLi54DOxvta/PPvgtOpCcFc9z8d81dNqURP63TVK9YJqUY91WIYQQAqRn0WlERkaW6nw3Nzf8/Pws8wJFFdKkHzTph5+qctz9RfRpuZ/w3DLNmI1GNNqyvS3TUxOstl3cHVvcBsBFY59hqFINVYjiNXxtJociwnGPvGrZ1367nuO9Uzg5/zk6nrFeuNX31fcrOkQhhBDVkCSLTqJhw4Zlvm7AgAE8//zzNGvWzM5RifJQFIUcDx2k5fYoaoC4uEiCazUqU3tZadbFgFw8PMsb4g0pioLORbEsmVH2YahS4EaI4rh4e9Pq+8Ucumcg3im54801Krhs96FOgWVlT7RowPBet1ZClEIIIaob+UjmJFRVLdPX2bNnmTdvHu3bt2fRokWV/TREASZv66Gily+fLnNb2QWSRW0FJIsA2nxzosrcs1igwI1OehaFsOFapy7Bn39KZr4BI64GqJlvUIHBBUKnflDxwQkhhKiWJFl0EufOnWPx4sX4+flRs2ZN3njjDTZv3syJEyc4ceIEW7Zs4Y033qBWrVr4+/uzdOlSDhw4wNdff03Lli3Jzs5m/PjxHDt2rLKfisjPx7pLIO7y+TI1Y1bNGDJSrfY5eo3FPPkropZ9zqIUuBGiJEI738nFFx/AVMRqGDs61KPDrVW4IrQQQginIh/JnERGRgZPPPEEzZo14/jx47z++uv06tWLpk2b0rRpU3r27Mnrr7/O8ePHadq0KU8++SSenp489thjhIeH061bN4xGI59++mmFxXz16lV++OEHxowZQ5MmTXBzc8PDw4PmzZvz/PPPF1nxtCRycnJ47733aNeuHZ6envj7+9OrVy9++eUX+z2BCqDz87PaTowt3dzUPKk5qehyrBM1R6+xmCd/RVSjuWw9i/mHobrpNGhkWTghijTkoddZOSzEZn+aG9R6ZmolRCSEEKK6kmTRSbz11lukp6fzzTff4FcgwcjP19eXb775huTkZGbNmgWAXq/nvffeQ1VVtm7dWjEBA5MmTeKRRx7hp59+wsPDg6FDh3LnnXeSkJDAZ599RuvWrdm4cWOp283IyODOO+9k6tSpREZGctddd9GlSxd27NjByJEjmTJligOejWO4+dew2k6NiylTOynZKbgaCiaLJV+/sjzyV0Q1mHKHP5dW/gI3HnqZSi1EcTSKhgenfcNvXa3fK6u7+jP8tt6VFJUQQojqSJJFJ7F161Z8fHxo3br1Dc9t3bo1Pj4+bN682bKva9euuLq6EhUV5cgwrQQEBDBz5kwiIyM5dOgQP//8M2vWrOHs2bOMGjWK9PR0Ro0aRWJiYqnafeWVV9i5cydt2rTh1KlTLF++nPXr17N79268vLz46KOP+P333x30rOzLM6CW1XZGwtUizixeUnYSrtbLNlbI0hlgu9aiqZRFbnKMZqvCOO46l2LOFkIAhPqGon/yGX7oreF0bfj1dgWPh8ZWdlhCCCGqGUkWnURCQgLZ2dkl6rUxm83k5OQQHx9v2acoCh4VNIctz5w5c3j99depW7eu1X4vLy++/fZbvL29SUhIYM2aNSVuMzExkS+//BKAL7/8kho1rvfMdezYkZdffhmAt99+2w7PwPF8g6xfG0NS6RLnPMk5ybgVWKZRUwFLZ4D1nEUofUXUgstmeOglWRSiJCbe+jguox7k3cdqETNqCFNuH1fZIQkhhKhmJFl0EnXr1iU7O5uVK1fe8NxVq1aRlZVllaRlZmaSmJhIUFCQI8MsMQ8PD8tSHhcvXizxdWvXriUnJ4f69etzxx132BwfM2YMALt37yY6Oto+wTqQd2Btq21zSkqZhnEW1rNYUXMWteVcazHDYF3cRpJFIUrGRePCWz1eYeeDW5jb/z20GhnCLYQQwr4kWXQSw4cPR1VVHn/8cavhpQVt2bKFxx9/HEVRGDFihGX/4cOHAWjSpInDYy0Jg8FgKXBTu3bt4k/OJyIiAoBOnToVerxRo0YEBAQAcPDgwXLFWBFcCsw/dcswkpCVUPjJxUjOTrZNFj0qZxhqaSuiFlxj0V2SRSGEEEKIKkFuQzqJ6dOns2LFCs6ePUu/fv0ICwuje/fu1KlTB4CYmBj++usvwsPDUVWVRo0a8eqrr1quX7hwIQB9+/atlPgL+vbbb4mLi8Pd3Z2BAweW+Lpz584BUL9+/SLPCQkJISEhwXJueV26dMlqOzU1tYgzS8/F19dq2ysTotOiCXQPLFU7ydnJuFXSnMWCw1ANpayIajsMVf4sCSGEEEJUBfKpzEn4+Pjw119/8dBDD7FlyxbCw8M5cOCA1Tl5wxd79erFjz/+iG++ROSJJ57gwQcfpFWrVhUad2GOHDnCiy++COQmwTVr1izxtXmJmqdn0QvOe3l5AZCSklKOKK+rV69eofvt0X6Oi4Y00/VkSZMG/17+lwauDUrVTnR8NKQbScvXq+epgvZajBkZGXZ7PWzkZGLOzrBsJiYm46bmFHOBtdiERKvrXYyZZJgcGK8DOPT1dRBni1nidSxnijcvzrIM2RdCCFE6iip/bZ3O9u3bWb58OREREcTGxgIQFBREhw4dGDFiBD179iz3Y7z00kusWrWq1NfNnz+fbt26FXn80qVLdOvWjQsXLjB06FB+++03FKXki+r179+fjRs38uqrr/LWW28Ves4dd9zBzp07eeedd5g2bVqpn0NBpYlPCCFExdi7dy+dO3eu7DCEEKJak55FJ9SjRw969Ojh0MeIjo7m5MmTpb4uLS2tyGOXL1+mT58+XLhwgQEDBvDzzz+XOhHz9vYGID09/YYx+Pj4lKrtouQvwBMTE0OXLl0AOHr0KCEhtgtjVyVRUVG0bNkSgGPHjtlUpq1qJF7Hc7aYJV7HcuZ4SzMqRQghRNlIsigKtWjRIhYtWmS39q5evUrv3r35999/6du3L7/99huurq6lbic0NBSAyMjIIs/Jm2OYd255FZUQ+vj42C0hdZT8w8q8vb0lXjtztnjB+WKWeB3LmePVaKRGnxBCOJoki04qJSWFiIgIrl7NXcQ9ODiYDh06VMl/9LGxsfTu3Zvjx4/Tp08fVq1ahZubW5naCgsLA2D//v2FHj979iwJCbnVRDt06FC2gIUQQgghhBCSLDqbiIgIXnvtNdavX28zuV9RFAYOHMisWbNo37595QRYQFxcHL179+bo0aP06dOH1atX416OKp2DBg1Cr9cTGRnJjh07bNZaXLx4MQBdu3a1VIoVQgghhBBClJ4UuHEiS5Ys4dFHH8VgMFgSxbyhnNnZ2Zbz9Ho933//PaNGjaqUOPMkJCRw5513cvjwYfr27cuqVatKnCj26dOHqKgo3n33XYYPH251bOLEiXz66ae0bduWzZs3ExiYu8zEgQMH6NmzJ2lpaaxevZrBgwfb/TkJIYQQQghxs5Bk0UmcPHmStm3bYjAYaNOmDa+88gp9+/a1JErx8fFs3LiRd999lyNHjqDT6Th8+DDNmjWrtJhHjBjBihUrUBSFkSNHFpko3nPPPdxzzz1W+0JDQ7lw4QILFixg7NixVscyMjLo27cvu3btwt/fn969e5Oens6mTZswGAxMmjSJjz76yEHPSgghhBBCiJuDDEN1Eh988AEGg4EBAwawcuVK9Hq91fHAwEBGjRrFiBEjGDJkCBs3buTDDz/km2++qaSIscwdVFWVn3/+ucjzQkNDbZLF4nh4eLB161Y+/vhj/ve//7F27Vr0ej233XYbzz77LCNHjixv6EIIIYQQQtz0pGfRSTRq1IgLFy5w6tQpGjVqVOy5p0+fpmnTpjRo0IBz585VUIRCCCGEEEKI6kSSRSfh7u6Ou7u7pbfuRgICAsjMzCQzM9PBkQkhhBBCCCGqI1mkyEm4u7uTkZGB0Wi84bkGg4GMjIxyVR0VQgghhBBC3NwkWXQSrVq1wmAwsHTp0hueu3TpUnJycmjVqlUFRCaEEEIIIYSojiRZdBKjR49GVVWeeeYZVq5cWeR5v/32G8888wyKojBmzJgKjFAIIYQQQghRncicRSdhNBrp3r07e/bsQVEUWrVqRe/evalbty4AUVFRbN68maNHj6KqKl27duWvv/7CxcWlkiMXQgghhBBCOCNJFp1IYmIijzzyCL///jsAiqJYHc/7UQ4dOpQFCxbg7+9f4TEKIYQQQgghqgdJFp3Qrl27+PnnnwkPDyc2NhaAoKAgOnXqxP3330/Xrl0rOUIhhBBCCCGEs5NkUQghhBBCCCGEDSlwI4QQQgghhBDChiSLQgghhBBCCCFsaCs7AGHrzTfftFtbr7/+ut3aEkIIIYQQQtw8ZM5iFaTRaGwqnZaVyWSySztCCCGEEEKIm4sMQ62CevToYbcvUblOnz7NU089RVhYGDqdjtDQ0MoOqVi//PILw4cPp379+nh4eNCqVSs++ugjDAZDZYdWpF9//ZVu3bpRo0YNXF1dadSoEZMmTSIxMbGyQ7sho9FI27ZtURSFpUuXVnY4hdq6dSuKoth8tW7durJDK9ZPP/1Ely5d8PDwICAggD59+hATE1PZYdno1atXoa+voijMnj27ssMr0sqVK+natSs+Pj4EBwdz9913c/DgwcoOq0h//vknXbt2xc3NjeDgYJ566imSk5MrOywhhKjyZBhqFbR169bKDkHYydGjR/n999/p0qULqqpW+QTmww8/JDQ0lPfff5+aNWuyc+dOXnvtNQ4fPszChQsrO7xCJSQk0KtXL1588UV8fX05cuQIM2fO5NChQ2zatKmywyvWp59+aln+pqqbP38+rVq1smx7eHhUYjTF++ijj5g2bRpTpkzhvffeIz09ne3bt5OVlVXZodn44osvSElJsdr3448/8sUXXzBo0KBKiqp4GzduZPjw4Tz44IO8+eabpKen884779CnTx/++ecfateuXdkhWtm2bRt33XUX9957L2+88QYXL15k2rRpnDx5ks2bN9ttJI8QQlRLqhDCYUwmk+X7J598Um3QoEHlBVMCV69etdk3a9YsFVAvX75cCRGVzddff60C6oULFyo7lCJdvHhR9fb2VhcuXKgC6pIlSyo7pEJt2bJFBdRdu3ZVdiglcurUKVWn06lffPFFZYdSZp07d1bbtGlT2WEUaezYsWpoaKhqNpst+86cOaMC6oIFCyovsCL07t1bbdeunVW8y5cvVwF19erVlRiZEEJUfTIMVQgH0mic6y0WFBRks69jx44AREdHV3Q4ZRYQEABQpYfPTpw4kaFDh8pwcTv77rvv0Ov1jB8/vrJDKZNTp06xb98+HnroocoOpUgGgwEvLy+rHjlfX18AzGZzZYVVpD179tCvXz+rePv37w/Ab7/9VklRCSGEc3CuT7JC2MHJkyf57LPPGDt2LG3atEGr1aIoCm+99VaJrl+2bBm9evXC398fT09P2rVrx/vvv++wxKSy492+fTt6vZ7GjRtX6ZhNJhNZWVns37+fmTNnMmjQoBLHXNHxrlu3jg0bNvDBBx+UqP3Kjhdg2LBhuLi4ULNmTZ544gkSEhKqZLw7d+6kefPmLFy4kAYNGqDVamnXrh1//PFHlYy3oEWLFqHRaBgzZkyJ463omMeNG8eJEyf45JNPSExM5OLFizz33HPUq1ePESNGVLl4XVxc0Ov1Vvt0Oh2KonD06NESPZ4QQty0KrtrU4iK9p///EcFbL5mzZpV4mu1Wq3av39/dcSIEaqfn58KqN26dVMzMjKKvLasw1ArK15VVdWjR4+q7u7u6rPPPlvlY/b19bU8Tv/+/dW0tLQqGW9mZqbauHFj9cMPP1RVVVXPnTtX6mGoFRnvgQMH1MmTJ6urV69Wt2zZos6ePVv19fVVW7durWZlZVW5eJs1a6Z6eXmptWvXVn/44Qd1/fr16uDBg1WtVqv+888/VS7egho3bqzeeeedJYqzMmNetWqV6u3tbXmchg0bqidPnqyS8Xbq1Ent06eP1b6//vpLBdSmTZuWOGYhhLgZSbIobjrffPONOmXKFPV///ufevz4cfX//u//SvQhZcWKFSqgenl5qeHh4Zb9sbGxaps2bVRAnTx5cpHXlzVZrKx4Y2Nj1SZNmqht2rQpVeJVWTFHRESoO3bsUOfNm6eGhISod955p2o0GqtcvNOnT1dbtGih5uTkqKpatmSxsn4n8mzYsKFU89MqMt4mTZqogLpmzRrLvuzsbDUkJET9v//7vyoXb367du1SAfXbb78tUZyVFfOuXbtUPz8/9cknn1T//PNP9bffflNvv/12tVGjRiWe21yR8f7www8qoL733ntqbGyseuDAAbV169aqi4uL2rx58xLFK4QQNytJFsVN75FHHinRh5TOnTurgPrWW2/ZHMu7S+3q6qomJSUVer29CtxURLwpKSlqp06d1AYNGqhRUVFOEXN+u3fvVgF12bJlVSre8+fPq66uruovv/yiJiYmqomJieqhQ4csCcKNnldFx1ucgIAA9emnn65y8Xbp0kUFbHqXHnjgATUsLKzKxZvfM888o7q5uanJycllirOiYu7UqZM6aNAgq3OTk5NVPz8/9eWXX65y8ZrNZvXll19W9Xq9CqguLi7q888/r3bs2LFMvbhCCHEzkTmLQpRAVFQU+/btAyh0LlG3bt2oV68e2dnZrF27tqLDs1GeeLOzsxk2bBjnz59n/fr11KlTp8rHXFBYWBiKonD69GmHxApli/fcuXNkZ2dz33334e/vj7+/P+3atQNg/Pjx1K1bt0rFeyOOXHKgrPHmX94jP1VVHbp0RnlfX6PRyE8//cSQIUPw8fFxWJz5lTXmo0ePWgpf5fHx8eGWW27h1KlTVS7evDUr4+LiOHToEFeuXOGjjz7i1KlT3HHHHQ6LVwghqgNJFoUogYiICCC3ymbDhg0LPadTp05W51amssZrMpkYNWoU+/btY+3atTRr1szxwV5jz9d4x44dqKpKo0aN7BtkPmWJt3379mzZssXqa8mSJQBMnz69VEVYKiLeoqxfv56EhAS6dOli3yDzKWu8Q4cOBbBaYzM7O5u///7bcr4jlPf1Xb9+PXFxcRVaBbWsMYeGhrJ//36r81JSUjh9+nSR7dhDeV9jb29v2rZtS2BgIAsWLCArK4tHH33UYfEKIUR1oK3sAIRwBufOnQOgfv36RZ5Tr149q3MBMjIyLHe4z549S0ZGBr/88gsAnTt3pkGDBlUq3meeeYbffvuNWbNmYTKZ2L17t+VYy5YtHdrjUdaYBwwYQJ8+fWjVqhWurq5ERETwwQcf0LZtW+65554qFa+fnx+9evWyOuf8+fNA7uvbvXt3+wd6TVlf34ceeoiGDRvSsWNHvL292bNnD++99x7t27dn1KhRVS7eoUOHctttt/HYY4/x7rvvUqtWLT777DMSExN56aWXqly8eRYtWkRgYCADBw50TICFKGvMzz77LM888wxPPvkk9957L2lpaXz00UdkZ2fz+OOPV7l49+7dy5YtW2jfvj1Go5E///yTOXPm8Mknnzj0hpIQQlQHkiwKUQKpqakAeHp6FnmOl5cXkHuHPc/Vq1cZOXKk1Xl52wsWLGDs2LF2jjRXWeNdt24dkNvLNX36dKvzt2zZYpPo2FNZY+7SpQuLFi2yfDgMDQ3l6aefZtKkSTbl8qtCvJWlrPG2atWKxYsX8+mnn5KZmUlISAjjx49nxowZVfL11Wg0/P7770yZMoXJkyeTmZlJ586d2bx5c5FDVCszXoC0tDRWrVrFI488gk6nc1iMBZU15gkTJuDm5sbcuXNZvHgx7u7uhIWFsWXLFoeORihrvHq9npUrV/L2229jNBpp27Yty5YtK/EyH0IIcTOTZFEIBwoNDUVV1coOo8TyermcyaxZs5g1a1Zlh1FmVf13ZNq0aUybNq2ywyiVgIAAvvvuO7777rvKDqVEvLy8SE9Pr+wwSkxRFMaNG8e4ceMqO5QSad++PTt37qzsMIQQwinJnEUhSsDb2xug2A90aWlpABVWnKI4zhYvOF/MEq9jSbyO52wxO1u8QghRHUiyKEQJhIaGAnDx4sUiz8k7lnduZXK2eMH5YpZ4HUvidTxni9nZ4hVCiOpAkkUhSqBDhw4AxMfHF1qcArBUBwwLC6uwuIribPGC88Us8TqWxOt4zhazs8UrhBDVgSSLQpRASEgInTt3BmDx4sU2x//++28uXryIq6srgwYNqujwbDhbvOB8MUu8jiXxOp6zxexs8QohRHUgyaIQJfTKK68AMHv2bA4cOGDZHx8fz9NPPw3klpT39fWtlPgKcrZ4wflilngdS+J1PGeL2dniFUIIZ6eoVbkMnxAOcODAAcuHCoAzZ84QFxdHSEgIdevWtexfsWIFtWvXtrr2P//5D3PmzEGn09GnTx88PT3ZtGkTSUlJ3HHHHWzcuBF3d/ebOl5njFnilXidOV5njNnZ4hVCiJuWKsRNZsuWLSpww69z584Vev1PP/2k9ujRQ/Xx8VHd3d3V1q1bq7Nnz1azs7MlXieNWeKVeJ05XmeM2dniFUKIm5X0LAohhBBCCCGEsCFzFoUQQgghhBBC2JBkUQghhBBCCCGEDUkWhRBCCCGEEELYkGRRCCGEEEIIIYQNSRaFEEIIIYQQQtiQZFEIIYQQQgghhA1JFoUQQgghhBBC2JBkUQghhBBCCCGEDUkWhRBCCCGEEELYkGRRCCGEEEIIIYQNSRaFEDed0NBQFEXh+++/t9p//vx5FEVBURTOnz9fKbGJom3dutXy8xFCCCGE42krOwAhhKiutm7dytatWwkNDWXs2LGVHY4QQgghRKlIz6IQQlyj0+lo1qwZzZo1Q6fTlbu9rVu3MnPmTJseTCGEEEIIZyA9i0IIcU3dunU5ceJEZYchhBBCCFElSM+iEEIIIYQQQggbkiwKIaodVVX56quv6NSpE56engQGBtK3b1/WrVtX7HU3KnBz5MgRHn30URo1aoSbmxuenp40bNiQPn368N577xEfH2/VzsyZMwHYtm2bpd28r61bt1rajYmJ4b///S+DBw+mWbNmeHl54eXlRatWrZgyZQoxMTFFxpy/WE92djZvvfUWLVu2xN3dnRo1anDPPfdw8ODBYp+3yWRi4cKFDBw4kJo1a+Lq6kqdOnXo3r07H3zwAZcvXy70ulOnTjFhwgSaNm2Kh4cH3t7etG/fnpkzZ5KcnFzsY9rDzp07GTJkCEFBQbi7u9O2bVs++eQTTCZToef36tULRVF44403yMrKYubMmbRo0QJ3d3eCgoK4//77OXbsmMPjFkIIIZyGKoQQ1YjRaFRHjhypAiqguri4qH5+fqqiKKqiKOqcOXPUBg0aqIC6YMECq2vPnTtnue7cuXNWx/744w9Vr9dbjru6uqq+vr6WbUDduHGjqqqqGhkZqdasWVP19PRUAVWn06k1a9a0+tqxY4el7UceecTShl6vVwMCAlSNRmPZFxQUpB46dKjQ55v3XObMmaN26tTJEpu7u7vleg8PD3XPnj2FXn/58mX11ltvtZyrKIrq7++vurm5WfZ98sknNtfNnz9f1el0Vo+Rf7thw4bq6dOnS/6DK4EtW7ZY2l++fLmq1WpVQPXz87N8D6gDBgxQs7Ozba7v2bOnCqhTp05Vb7vtNsvr7ePjY/VzXb9+vV3jFkIIIZyV9CwKIaqVDz74gGXLlll69hISEkhMTCQqKopRo0YxefJkYmNjS93uM888Q05ODoMGDeL48eNkZWWRlJREamoqe/bs4fnnn8fHxweAevXqcfnyZaZMmQLA7bffzuXLl62+br/9dkvbTZo04eOPP+bEiRNkZmYSHx9PVlYW27dv59ZbbyU2NpYxY8agqmqR8c2YMYO4uDjWrVtHeno6aWlpbN++nZCQEDIyMnj++edtrsnJyWHo0KHs2bMHX19f5s2bR2JiIgkJCWRkZHD8+HHefPNNgoKCrK5bu3Ytjz/+ODqdjlmzZhETE0N6ejoZGRns2LGDTp06ce7cOUaMGIHZbC71a10S48aNo2/fvpw9e5bExESSk5P56KOPcHFxYf369cyYMaPIa7/88ksOHTrEwoULSUtLIzk5mUOHDtGpUyeys7N54IEHiuxNFUIIIW4qlZ2tCiGEvaSnp1t6iaZMmWJz3GQyqb1797b0IpW0Z/HKlSuW/TExMSWOZ8aMGSqg9uzZs4zPSFUTEhLUoKAgFVC3bt1qczyvZ9Hd3V09deqUzfFffvnFEvuFCxesjs2bN8/S87l79+4SxWM0GtVGjRqpgPrrr78Wek58fLxau3ZtSw+gveTvWWzVqpWalZVlc86sWbNUQHVzc1Pj4+OtjuX1LALqokWLCo0777V+8cUX7Ra3EEII4aykZ1EIUW1s2LCBlJQUtFotL730ks1xjUbDK6+8Uup2vby80Ghy/1xWdI+Tv7+/pRdyx44dRZ533333ccstt9jsHzp0qGUR+3/++cfqWN6SHqNHj+bWW28tUTzbtm3j7NmzNG7cmOHDhxd6TkBAAAMHDgRyfyaOMHnyZFxdXW32T5w4EQ8PD7Kysli9enWh1zZo0IAxY8bY7A8ICGDChAkALFu2zL4BCyGEEE5IkkUhRLURHh4OQMuWLW2GTubp1q0bWm3pVg3y8PCgZ8+eAAwYMIA333yT8PDwIguplMXevXsZN24czZs3x8vLy6oYzsqVKwGIjo4u8vrOnTsXul+n0xEcHAxAYmKiZb/BYGD//v0A3H333SWOc+fOnQBcunSJWrVqFfn1008/ARAZGVnitkujV69ehe738vKiY8eOABw4cKDQc3r27GlJoAs7BrlFihISEsofqBBCCOHEZJ1FIUS1kTcXsW7dukWe4+rqSo0aNUrdQzh//nyGDBnCsWPHmDFjBjNmzMDDw4Nu3bpx33338fDDDxfa01USH374IS+99JJlTqJGo8Hf3x+9Xg9AcnIyWVlZpKenF9mGt7d3kcfc3NyA3AQxT0JCAkajEcjtaSupvMqs2dnZXLly5YbnZ2RklLjt0ijuZ5x37OrVq2W+FnJ/nwICAsoYoRBCCOH8pGdRCCFKoFGjRhw+fJhVq1YxYcIE2rRpQ2ZmJhs2bOCJJ56gdevWxfb8FeXo0aO8/PLLqKrK8OHDOXDgANnZ2SQkJFiK4dx3330AxRa4qSh5vakDBgxAVdUbfuVfIkQIIYQQzkWSRSFEtZE39LS4pC0nJ4e4uLgyte/i4sKQIUP44osvOHz4MFevXmXu3Ln4+Phw+vRpJk6cWOo2ly9fjtlspnHjxvz888906NDBZphsSXrwSiswMBCdTgfAhQsXSnxdzZo1AccNLy2p4n7Gecfyht+W5VqgyKHMQgghxM1CkkUhRLWRN1ft6NGjRSaEf//9t2X4ZXnVqFGDZ555htdffx3AphctryhOcT2Cly5dAqB9+/aFzqXMyMhgz549dok3P61WS6dOnQBYs2ZNia/LK7Zz4sQJzpw5Y/e4Smrbtm2F7k9PT7fMxQwLCyvVtfmPhYaGyhBUIYQQNz1JFoUQ1Ub//v3x9fXFaDTywQcf2BxXVZV333231O3m5OQUe9zd3R3InceXX966i0lJSUVe6+vrC8C///5baFL5wQcfkJKSUppwS2zs2LEALFmypMQJae/evWnQoAGqqvLCCy8Uu46iwWAgLS3NHqHa+Oijjwr9ucyZM4eMjAzc3NwYMmRIodeeP3+eJUuW2OxPTExk3rx5AIwcOdK+AQshhBBOSJJFIUS14eHhwdSpU4HcJOutt94iNTUVyC3M8vDDD7Nt2zY8PDxK1e7OnTtp3749n332GadOnbIkdUajkbVr1/Lmm28CWJaLyNO6dWsAjh07VmQy1q9fPwCOHDnCCy+8YEkMExISmD59OjNnziQwMLBU8ZbU2LFj6dy5MwaDgQEDBvD1119bHl9VVY4fP87LL7/Mjz/+aLlGp9PxxRdfoNFoWL16NQMGDGD37t2WpNFsNnPs2DFmz55N06ZNOXjwoNVjnj9/3lLlNW/pjrKIjIxk+PDhnD9/HoDMzEz++9//Mn36dCB3CY2iegZ9fX15/PHHWbRokaXoz5EjR7jrrru4cuUKfn5+TJo0qcyxCSGEENVGRS/sKIQQjmQ0GtX77rvPsvi6i4uL6u/vryqKoiqKos6ZM8eykP2CBQusrj137pzlunPnzln2518MHlD1er0aEBCgajQay76mTZuqUVFRVu0ZDAa1SZMmlnMCAgLUBg0aqA0aNFB37dplOW/EiBGWcxRFUf39/S1tjx8/Xn3kkUdUQH3kkUdsnm9Rz6Wk50RHR6thYWGWx9doNGpAQIDq7u5u2ffJJ5/YXLd48WKrc1xdXdXAwEBVp9NZvVZ///13ka9xcTEXJv/PYfny5apWq1UB1c/Pz/I9oA4YMEDNysqyub5nz54qoE6dOlXt2rWrJW5fX1+r57Fu3bpSxSWEEEJUV9KzKISoVlxcXPj555+ZN28eYWFhluUnevfuzZo1a3juuedK3Wbnzp356aefeOKJJ+jQoQP+/v6kpKTg4+PDbbfdxvvvv09ERAR16tSxuk6r1bJp0ybGjh1L/fr1SU1N5cKFC1y4cIGsrCzLeT/99BOzZ8+mRYsW6HQ6VFXl9ttvZ+HChcyfP798L8gN1K5dm927d/PVV19x55134u/vT1paGgEBAXTv3p0PP/yw0AXsR48ezalTp5g6dSrt27fH1dWVpKQkvL296dq1K5MmTeLvv//mjjvusLour4CMoiiWOaZlMWLECLZt28bgwYNxcXFBq9XSpk0bPv74Y9asWVPsMiaurq5s2bKFN954g9DQULKzs6lRowYjR44kPDycAQMGlDkuIYQQojpRVLUK1GIXQghxU3jnnXd49dVXuffee/nll18q9LF79erFtm3bmDFjBm+88UaFPrYQQgjhjKRnUQghRIXZtm0biqJYKsgKIYQQouqSZFEIIUSFMBqN7Ny5k2HDhtG2bdvKDkcIIYQQN2C7qJcQQgjhAFqt1lKdVgghhBBVn/QsCiGEEEIIIYSwIQVuhBBCCCGEEELYkJ5FIYQQQgghhBA2JFkUQgghhBBCCGFDkkUhhBBCCCGEEDYkWRRCCCGEEEIIYUOSRSGEEEIIIYQQNiRZFEIIIYQQQghhQ5JFIYQQQgghhBA2JFkUQgghhBBCCGHj/wEhkkE817d5lQAAAABJRU5ErkJggg==", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = plt.figure(figsize=(6,10))\n", "gs = matplotlib.gridspec.GridSpec(2,1, height_ratios=[8, 1.5])\n", "ax1 = fig.add_subplot(gs[0,0])\n", "ax2 = fig.add_subplot(gs[1,0])\n", "\n", "strand_gb = cis_scalings.groupby(['strand1', 'strand2'])\n", "for strands in ['+-', '-+', '++', '--']:\n", " sc_strand = strand_gb.get_group(tuple(strands))\n", " sc_agg = (sc_strand\n", " .groupby(['min_dist','max_dist'])\n", " .agg({'n_pairs':'sum', 'n_bp2':'sum'})\n", " .reset_index())\n", "\n", " dist_bin_mids = np.sqrt(sc_agg.min_dist * sc_agg.max_dist)\n", " pair_frequencies = sc_agg.n_pairs / sc_agg.n_bp2\n", " mask = pair_frequencies>0\n", " label = f'{strands[0]}{strands[1]}'\n", "\n", " ax1.loglog(\n", " dist_bin_mids[mask],\n", " pair_frequencies[mask],\n", " label=label,\n", " lw=2\n", " )\n", "\n", " ax2.semilogx(\n", " np.sqrt(dist_bin_mids.values[1:]*dist_bin_mids.values[:-1]),\n", " np.diff(np.log10(pair_frequencies.values)) / np.diff(np.log10(dist_bin_mids.values)),\n", " label=label\n", " )\n", " \n", "ax1.axhline(avg_trans, ls='--', c='gray', label='average trans')\n", "\n", "plt.sca(ax1)\n", "plt.gca().set_aspect(1.0)\n", "plt.gca().xaxis.set_major_locator(matplotlib.ticker.LogLocator(base=10.0,numticks=20))\n", "plt.gca().yaxis.set_major_locator(matplotlib.ticker.LogLocator(base=10.0,numticks=20))\n", "plt.xlim(1e1,1e9)\n", "# plt.ylim(avg_trans / 3, plt.ylim()[1])\n", "\n", "plt.grid(lw=0.5,color='gray')\n", "plt.legend(loc=(1.1,0.4))\n", "plt.ylabel('contact frequency, \\nHi-C molecule per bp pair')\n", "plt.xlabel('distance, bp')\n", "\n", "plt.sca(ax2)\n", "plt.xlim(1e1,1e9)\n", "plt.ylim(-2,0.5)\n", "plt.gca().set_aspect(1.0)\n", "plt.ylabel('log-log slope') \n", "plt.xlabel('distance, bp')\n", "\n", "plt.yticks(np.arange(-2,0.6,0.5))\n", "plt.gca().xaxis.set_major_locator(matplotlib.ticker.LogLocator(base=10.0,numticks=20))\n", "plt.grid(lw=0.5,color='gray')\n", "\n", "# fig.tight_layout()\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "vscode": { "languageId": "python" } }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "main", "language": "python", "name": "main" }, "language_info": { "name": "", "version": "" } }, "nbformat": 4, "nbformat_minor": 4 } pairtools-1.1.3/doc/formats.rst000066400000000000000000000336201474715105500165170ustar00rootroot00000000000000Formats for storing Hi-C pairs ============================== .pairs ------ `.pairs` is a simple tabular format for storing DNA contacts detected in a Hi-C experiment. The detailed `.pairs specification `_ is defined by the 4DN Consortium. The body of a .pairs contains a table with a variable number of fields separated by a "\\t" character (a horizontal tab). The .pairs specification fixes the content and the order of the first seven columns: ======== =========== =============================================================================== index name description ======== =========== =============================================================================== 1 read_id the ID of the read as defined in fastq files 2 chrom1 the chromosome of the alignment on side 1 3 pos1 the 1-based genomic position of the outer-most (5') mapped bp on side 1 4 chrom2 the chromosome of the alignment on side 2 5 pos2 the 1-based genomic position of the outer-most (5') mapped bp on side 2 6 strand1 the strand of the alignment on side 1 7 strand2 the strand of the alignment on side 2 ======== =========== =============================================================================== A .pairs file starts with a header, an arbitrary number of lines starting with a "#" character. By convention, the header lines have a format of "#field_name: field_value". The `.pairs specification `_ mandates a few standard header lines (e.g., column names, chromosome order, sorting order, etc), all of which are automatically filled in by `pairtools`. The entries of a .pairs file can be flipped and sorted. "Flipping" means that *the sides 1 and 2 do not correspond to side1 and side2 in sequencing data.* Instead, side1 is defined as the side with the alignment with a lower sorting index (using the lexographic order for chromosome names, followed by the numeric order for positions and the lexicographic order for pair types). This particular order of "flipping" is defined as "upper-triangular flipping", or "triu-flipping". Finally, pairs are *typically* block-sorted: i.e. first lexicographically by chrom1 and chrom2, then numerically by pos1 and pos2. Pairtools' flavor of .pairs --------------------------- .pairs files produced by `pairtools` extend .pairs format in a few ways. 1. `pairtools` store null, unmapped, ambiguous (multiply mapped) and chimeric (if not parsed by `parse2` or `--walks-policy all` of `parse`) alignments as chrom='!', pos=0, strand='-'. #. `pairtools` store the header of the source .sam files in the '#samheader:' fields of the pairs header. When multiple .pairs files are merged, the respective '#samheader:' fields are checked for consistency and merged. #. Each pairtool applied to .pairs leaves a record in the '#samheader' fields (using a @PG sam tag), thus preserving the full history of data processing. #. `pairtools` append an extra column describing the type of a Hi-C pair: ======== =========== =============================================================================== index name description ======== =========== =============================================================================== 8 pair_type the type of a Hi-C pair ======== =========== =============================================================================== .. _section-pair-types: Pair types ---------- `pairtools` use a simple two-character notation to define all possible pair types, according to the quality of alignment of the two sides. The type of a pair can be defined unambiguously using the table below. To use this table, identify which side has an alignment of a "poorer" quality (unmapped < multimapped < unique alignment) and which side has a "better" alignment and find the corresponding row in the table. ======================== ====== =============== ========= ================== ========= ================== =========== . . . Less informative alignment More informative alignment . ------------------------ ------ --------------- ---------------------------- ---------------------------- ----------- Pair type Code >2 alignments Mapped Unique Mapped Unique Sidedness walk-walk WW |check| |cross| |cross| |cross| |cross| 0 [1]_ null NN |cross| |cross| |cross| 0 corrupt XX |cross| |cross| |cross| 0 [2]_ null-multi NM |cross| |cross| |check| |cross| 0 null-rescued NR |check| |cross| |check| |check| 1 [3]_ null-unique NU |cross| |cross| |check| |check| 1 multi-multi MM |cross| |check| |cross| |check| |cross| 0 multi-rescued MR |check| |check| |cross| |check| |check| 1 [3]_ multi-unique MU |cross| |check| |cross| |check| |check| 1 rescued-unique RU |check| |check| |check| |check| |check| 2 [3]_ unique-rescued UR |check| |check| |check| |check| |check| 2 [3]_ unique-unique UU |cross| |check| |check| |check| |check| 2 duplicate DD |cross| |check| |check| |check| |check| 2 [4]_ ======================== ====== =============== ========= ================== ========= ================== =========== .. [1] "walks", or, `C-walks `_ are Hi-C molecules formed via multiple ligation events which cannot be reported as a single pair. .. [2] "corrupt" pairs are those with technical issues - e.g. missing a FASTQ sequence/SAM entry from one side of the molecule. .. [2] "rescued" pairs have two non-overlapping alignments on one of the sides (referred below as the chimeric side/read), but the inner (3'-) one extends the only alignment on the other side (referred as the non-chimeric side/read). Such pairs form when one of the two ligated DNA fragments is shorter than the read length. In this case, one of the reads contains this short fragment entirely, together with the ligation junction and a chunk of the other DNA fragment (thus, this read ends up having two non-overlapping alignments). Following the procedure introduced in `HiC-Pro `_ and `Juicer `_, `pairtools parse` rescues such Hi-C molecules, reports the position of the 5' alignment on the chimeric side, and tags them as "NU", "MU", "UR" or "RU" pair type, depending on the type of the 5' alignment on the chimeric side. Such molecules can and should be used in downstream analysis. Read more on the rescue procedure in :doc:`the section on parsing `. .. [3] `pairtools dedup` detects molecules that could be formed via PCR duplication and tags them as "DD" pair type. These pairs should be excluded from downstream analyses. .pairsam -------- `pairtools` also define .pairsam, a valid extension of the .pairs format. On top of the pairtools' flavor of .pairs, .pairsam format adds two extra columns containing the alignments from which the Hi-C pair was extracted: ======== =========== =============================================================================== index name description ======== =========== =============================================================================== 9 sam1 the sam alignment(s) on side 1; separate supplemental alignments by NEXT_SAM 10 sam2 the sam alignment(s) on side 2; separate supplemental alignments by NEXT_SAM ======== =========== =============================================================================== Note that, normally, the fields of a sam alignment are separated by a horizontal tab character (\\t), which we already use to separate .pairs columns. To avoid confusion, we replace the tab character in sam entries stored in sam1 and sam2 columns with a UNIT SEPARATOR character (\\031). Finally, sam1 and sam2 can store multiple .sam alignments, separated by a string '\\031NEXT_SAM\\031' .. |check| unicode:: U+2714 .. check .. |cross| unicode:: U+274C .. cross Extra columns ---------------- `pairtools` can operate on `.pairs/.pairsam` with extra columns. Extra columns are specified in the order defined by the order their addition by various tools. Column names can be checked in the header of `.pairs/.pairsam` file. We provide `pairtools header` utilities for manipulating and verifying compatibility of headers and their columns. The list of additional columns used throughout `pairtools` modules: =================================== =================== ====================== ================================================== ================= extra column generating module format how to add it description =================================== =================== ====================== ================================================== ================= mapq1, mapq2 `parse/parse2` number from 0 to 255 `pairtools parse --add-columns mapq` `Mapping quality `_, as reported in .sam/.bam, $-10 log_{10}(P_{error})$ pos51, pos52 `parse/parse2` genomic coordinate `pairtools parse --add-columns pos5` 5' position of alignment (closer to read start) pos31, pos32 `parse/parse2` genomic coordinate `pairtools parse --add-columns pos3` 3' position of alignment (further from read start) cigar1, cigar2 `parse/parse2` string `pairtools parse --add-columns cigar` `CIGAR, or Compact Idiosyncratic Gapped Alignment Report `_ of alignment, as reported in .sam/.bam read_len1, read_len2 `parse/parse2` number `pairtools parse --add-columns read_len` read length matched_bp1, matched_bp2 `parse/parse2` number `pairtools parse --add-columns matched_bp` number of matched alignment basepairs to the reference algn_ref_span1, algn_ref_span2 `parse/parse2` number `pairtools parse --add-columns algn_ref_span` basepairs of reference covered by alignment algn_read_span1, algn_read_span2 `parse/parse2` number `pairtools parse --add-columns algn_read_span` basepairs of read covered by alignment dist_to_51, dist_to_52 `parse/parse2` number `pairtools parse --add-columns dist_to_5` distance to 5'-end of read dist_to_31, dist_to_32 `parse/parse2` number `pairtools parse --add-columns dist_to_3` distance to 3'-end of read seq1, seq2 `parse/parse2` string `pairtools parse --add-columns seq` sequence of alignment mismatches1, mismatches2 `parse/parse2` string `pairtools parse --add-columns mismatches` comma-separated list of mismatches relative to the reference, "{ref_letter}:{mut_letter}:{phred}:{ref_position}:{read_position}" XB1/2,AS1/2,XS1/2 or any sam tag `parse/parse2` `pairtools parse --add-columns XA,XB,NM` format depends on `tag specification `_ walk_pair_type `parse/parse2` string `pairtools parse2 --add-pair-index` Type of the pair relative to R1 and R2 reads of paired-end sequencing, see `pasring docs `_ walk_pair_index `parse/parse2` number `pairtools parse2 --add-pair-index` Order of the pair in the complex walk, starting from 5'-end of left read, see `pasring docs `_ phase `phase` 0, 1 or "." `pairtools phase` Phase of alignment (haplotype 1, 2, on unphased), see `phasing walkthrough `_ rfrag1, rfrag2 `restrict` number `pairtools restrict` Unique index of the restriction fragment after annotating pairs positions, see `restriction walkthrough `_ rfrag_start1, rfrag_start2 `restrict` number `pairtools restrict` Coordinate of the start of restriction fragment rfrag_end1, rfrag_end2 `restrict` number `pairtools restrict` Coordinate of the end of restriction fragment =================================== =================== ====================== ================================================== ================= pairtools-1.1.3/doc/index.rst000066400000000000000000000054641474715105500161600ustar00rootroot00000000000000.. pairtools documentation master file, created by sphinx-quickstart on Wed Dec 6 12:32:49 2017. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. Overview ======== `pairtools` is a simple and fast command-line framework to process sequencing data from a Hi-C experiment. `pairtools` perform various operations on Hi-C pairs and occupy the middle position in a typical Hi-C data processing pipeline: .. figure:: _static/hic-processing-pipeline.png :width: 100% :alt: The diagram of a typical processing pipeline for Hi-C data :align: center In a typical Hi-C pipeline, DNA sequences (reads) are aligned to the reference genome, converted into ligation junctions and binned, thus producing a Hi-C contact map. `pairtools` aim to be an all-in-one tool for processing Hi-C pairs, and can perform following operations: - detect ligation junctions (a.k.a. Hi-C pairs) in aligned paired-end sequences of Hi-C DNA molecules - sort .pairs files for downstream analyses - detect, tag and remove PCR/optical duplicates - generate extensive statistics of Hi-C datasets - select Hi-C pairs given flexibly defined criteria - restore .sam alignments from Hi-C pairs `pairtools` produce .pairs files compliant with the `4DN standard `_. `pairtools` uses a two-character notation to define pair types (see table :ref:`_section-pair-types`) The full list of available pairtools: ============ ============================================== Pairtool Description ============ ============================================== dedup Find and remove PCR/optical duplicates. filterbycov Remove pairs from regions of high coverage. flip Flip pairs to get an upper-triangular matrix. markasdup Tag pairs as duplicates. merge Merge sorted .pairs/.pairsam files. parse Find ligation junctions in .sam, make .pairs. phase Phase pairs mapped to a diploid genome. restrict Assign restriction fragments to pairs. select Select pairs according to some condition. sort Sort a .pairs/.pairsam file. split Split a .pairsam file into .pairs and .sam. stats Calculate pairs statistics. ============ ============================================== Contents: .. toctree:: :hidden: self .. toctree:: :maxdepth: 3 quickstart installation parsing sorting formats stats protocols_pipelines designnotes cli_tools .. toctree:: :maxdepth: 3 :caption: Tutorials :titlesonly: ./examples/pairtools_walkthrough.ipynb ./examples/scaling_example.ipynb ./examples/pairtools_restrict_walkthrough.ipynb ./examples/pairtools_phase_walkthrough.ipynb ./examples/benchmark/benchmark.ipynb * :ref:`genindex` pairtools-1.1.3/doc/installation.rst000066400000000000000000000053111474715105500175410ustar00rootroot00000000000000Installation ============ Requirements ------------ - Python 3.x - Python packages `numpy` and `click` - Command-line utilities `sort` (the Unix version), `bgzip` (shipped with `samtools`) and `samtools`. If available, `pairtools` can compress outputs with `bgzip`, `pbgzip` and `lz4`. Install using conda ------------------- We highly recommend using the `conda` package manager to install pre-compiled `pairtools` together with all its dependencies. To get it, you can either install the full `Anaconda `_ Python distribution or just the standalone `conda `_ package manager. With `conda`, you can install pre-compiled `pairtools` and all of its dependencies from the `bioconda `_ channel: .. code-block:: bash $ conda install -c conda-forge -c bioconda pairtools Install using pip ----------------- Alternatively, compile and install `pairtools` and its Python dependencies from PyPI using pip: .. code-block:: bash $ pip install pairtools Install the development version ------------------------------- Finally, you can install the latest development version of `pairtools` from github. First, make a local clone of the github repository: .. code-block:: bash $ git clone https://github.com/open2c/pairtools Then, you can compile and install `pairtools` in `the development mode `_, which installs the package without moving it to a system folder and thus allows immediate live-testing any changes in the python code. Please, make sure that you have `cython` and `pysam` installed! .. code-block:: bash $ pip install cython pysam numpy $ cd pairtools $ pip install -e ./ --no-build-isolation A few notes on the installation: - `pairtools` have to use `--no-build-isolation`, because it extends `pysam` via Cython and re-compiles it during the build process. When build isolation is enabled, these `pysam` objects get lost after the build. - Because of the `--no-build-isolation` flag, build does not install build-requires, so you have to install `cython`, `pysam` and `numpy` manually before the build. Building on MacOS is not fully supported yet, but this tentative solution seems to work: .. code-block:: bash $ pip install build cython pysam numpy delocate $ cd pairtools $ python -m build -n $ DYLD_LIBRARY_PATH=$(python -c 'import pysam, os; print(os.path.dirname(pysam.get_libraries()[0]))') delocate-wheel -v dist/{wheel_name}.whl $ pip install dist/{wheel_name}.whl Note that you need to change the `{wheel_name}` to the actual name of the wheel file. pairtools-1.1.3/doc/parsing.rst000066400000000000000000000367651474715105500165240ustar00rootroot00000000000000Parsing sequence alignments into Hi-C pairs =========================================== Overview -------- Hi-C experiments aim to measure the frequencies of contacts between all pairs of loci in the genome. In these experiments, the spacial structure of chromosomes is first fixed with formaldehyde crosslinks, after which DNA is partially digested with restriction enzymes and then re-ligated back. Then, DNA is shredded into smaller pieces, released from the nucleus, sequenced and aligned to the reference genome. The resulting sequence alignments reveal if DNA molecules were formed through ligations between DNA from different locations in the genome. These ligation events imply that ligated loci were close to each other when the ligation enzyme was active, i.e. they formed "a contact". ``pairtools parse`` detects ligation events in the aligned sequences of DNA molecules formed in Hi-C experiments and reports them in the .pairs/.pairsam format. Terminology ----------- Throughout this document we will be using the same visual language to describe how DNA sequences (in the .fastq format) are transformed into sequence alignments (.sam/.bam) and into ligation events (.pairs). .. figure:: _static/terminology.png :scale: 50 % :alt: The visual language to describe transformation of Hi-C data :align: center DNA sequences (reads) are aligned to the reference genome and converted into ligation events Short-read sequencing determines the sequences of the both ends (or, **sides**) of DNA molecules (typically 50-300 bp), producing **read pairs** in .fastq format (shown in the first row on the figure above). In such reads, base pairs are reported from the tips inwards, which is also defined as the **5'->3'** direction (in accordance of the 5'->3' direction of the DNA strand that sequence of the corresponding side of the read). Alignment software maps both reads of a pair to the reference genome, producing **alignments**, i.e. segments of the reference genome with matching sequences. Typically, if the read length is not very large (< 150 bp), there will be only two alignments per read pair, one on each side. But, sometimes, the parts of one or both sides may map to different locations on the genome, producing more than two alignments per DNA molecule (see :ref:`section-walks`). ``pairtools parse`` converts alignments into **ligation events** (aka **Hi-C pairs** aka **pairs**). In the simplest case, when each side has only one unique alignment (i.e. the whole side maps to a single unique segment of the genome), for each side, we report the chromosome, the genomic position of the outer-most (5') aligned base pair and the strand of the reference genome that the read aligns to. ``pairtools parse`` assigns to such pairs the type ``UU`` (unique-unique). Unmapped/multimapped reads -------------------------- Sometimes, one side or both sides of a read pair may not align to the reference genome: .. figure:: _static/read_pair_NU_NN.png :scale: 50 % :alt: Read pairs missing an alignment on one or both sides :align: center Read pairs missing an alignment on one or both sides In this case, ``pairtools parse`` fills in the chromosome of the corresponding side of Hi-C pair with ``!``, the position with ``0`` and the strand with ``-``. Such pairs are reported as type ``NU`` (null-unique, when the other side has a unique alignment) or ``NN`` (null-null, when both sides lack any alignment). Similarly, when one or both sides map to many genome locations equally well (i.e. have non-unique, or, multi-mapping alignments), ``pairtools parse`` reports the corresponding sides as (chromosome= ``!``, position= ``0``, strand= ``-``) and type ``MU`` (multi-unique) or ``MM`` (multi-multi) or ``NM`` (null-multi), depending on the type of the alignment on the other side. .. figure:: _static/read_pair_MU_MM_NM.png :scale: 50 % :alt: Read pairs with a non-unique alignment on one or both sides :align: center Read pairs with a non-unique (multi-) alignment on one side ``pairtools parse`` calls an alignment to be multi-mapping when its `MAPQ score `_ (which depends on the scoring gap between the two best candidate alignments for a segment) is equal or greater than the value specied with the ``--min-mapq`` flag (by default, 1). .. _section-walks: Multiple ligations (walks) -------------------------- If the read is long enough (e.g. larger than 150 bp), it may contain more than two alignments: .. figure:: _static/read_pair_WW.png :scale: 50 % :alt: A sequenced Hi-C molecule that was formed via multiple ligations :align: center A sequenced Hi-C molecule that was formed via multiple ligations Molecules like these typically form via multiple ligation events and we call them walks [1]_. The mode of walks reporting is controlled by ``--walks-policy`` parameter of ``pairtools parse``. You can report all the alignments in the reads by using ``pairtools parse2`` (see :ref:`parse2`). A pair of sequential alignments on a single read is **ligation junction**. Ligation junctions are the Hi-C contacts that have been directly observed in the experiment. However, traditional Hi-C pairs do not have direct evidence of ligation because they arise from read pairs that do not necessarily contain ligation junction. To filter out the molecules with complex walks, ``--walks-policy`` can be set to: - ``mask`` to tag these molecules as type ``WW`` (single ligations are rescued, see :ref:`Rescuing single ligations`), - ``5any`` to report the 5'-most alignment on each side, - ``5unique`` to report the 5'-most unique alignment on each side, - ``3any`` to report the 3'-most alignment on each side, - ``3unique`` to report the 3'-most unique alignment on each side, - ``all`` to report all sequential alignments (complex ligations are rescued, see :ref:`Rescuing complex walks`). Parse modes for walks: .. figure:: _static/rescue_modes.svg :width: 60 % :alt: Parse modes for walks :align: center Rescuing single ligations ------------------------- Importantly, some of DNA molecules containing only one ligation junction may still end up with three alignments: .. figure:: _static/read_pair_UR.png :scale: 50 % :alt: Not all read pairs with three alignments come from "walks" :align: center Not all read pairs with three alignments come from "walks" A molecule formed via a single ligation gets three alignments when one of the two ligated DNA pieces is shorter than the read length, such that that read on the corresponding side sequences through the ligation junction and into the other piece [2]_. The amount of such molecules depends on the type of the restriction enzyme, the typical size of DNA molecules in the Hi-C library and the read length, and sometimes can be considerable. ``pairtools parse`` detects such molecules and **rescues** them (i.e. changes their type from a *walk* to a single-ligation molecule). It tests walks with three aligments using three criteria: .. figure:: _static/read_pair_UR_criteria.png :scale: 50 % :alt: The three criteria used for "rescue" :align: center The three criteria used to "rescue" three-alignment walks: cis, point towards each other, short distance 1. On the side with two alignments (the **chimeric** side), the "inner" (or, 3') alignment must be on the same chromosome as the alignment on the non-chimeric side. 2. The "inner" alignment on the chimeric side and the alignment on the non-chimeric side must point toward each other. 3. These two alignments must be within the distance specified with the ``--max-molecule-size`` flag (by default, 2000bp). Sometimes, the "inner" alignment on the chimeric side can be non-unique or "null" (i.e. when the unmapped segment is longer than ``--max-inter-align-gap``, as described in :ref:`Interpreting gaps between alignments`). ``pairtools parse`` ignores such alignments altogether and thus rescues such *walks* as well. .. figure:: _static/read_pair_UR_MorN.png :scale: 50 % :alt: A walk with three alignments get rescued, when the middle alignment is multi- or null :align: center A walk with three alignments get rescued, when the middle alignment is multi- or null. Interpreting gaps between alignments ------------------------------------ Reads that are only partially aligned to the genome can be interpreted in two different ways. One possibility is to assume that this molecule was formed via at least two ligations (i.e. it's a *walk*) but the non-aligned part (a **gap**) was missing from the reference genome for one reason or another. Another possibility is to simply ignore this gap (for example, because it could be an insertion or a technical artifact), thus assuming that our molecule was formed via a single ligation and has to be reported: .. figure:: _static/read_pair_gaps_vs_null_alignment.png :scale: 50 % :alt: A gap between alignments can be ignored or interpeted as a "null" alignment :align: center A gap between alignments can interpeted as a legitimate segment without an alignment or simply ignored Both options have their merits, depending on a dataset, quality of the reference genome and sequencing. ``pairtools parse`` ignores shorter *gaps* and keeps longer ones as "null" alignments. The maximal size of ignored *gaps* is set by the ``--max-inter-align-gap`` flag (by default, 20bp). Rescuing complex walks ------------------------- We call the multi-fragment DNA molecule that is formed during Hi-C (or any other chromosome capture with sequencing) a walk. If the reads are long enough, the right (reverse) read might read through the left (forward) read. Thus, left read might span multiple ligation junctions of the right read. The pairs of contacts that overlap between left and right reads are intermolecular duplicates that should be removed. If the walk has no more than two different fragments at one side of the read, this can be rescued with simple ``pairtools parse --walks-policy mask``. However, in complex walks (two fragments on both reads or more than two fragments on any side) you need specialized functionality that will report all the deduplicated pairs in the complex walks. This is especially relevant if you have the reads length > 100 bp, since more than 20% or all restriction fragments in the genome are then shorter than the read length. We put together some statistics about number of short restriction fragments for DpnII enzyme: ======== ================= ================== ================== ================== ================== Genome #rfrags <50 bp <100 bp <150 bp <175 bp <200 bp -------- ----------------- ------------------ ------------------ ------------------ ------------------ hg38 828538 (11.5%) 1452918 (20.2%) 2121479 (29.5%) 2587250 (35.9%) 2992757 (41.6%) mm10 863614 (12.9%) 1554461 (23.3%) 2236609 (33.5%) 2526150 (37.9%) 2780769 (41.7%) dm3 65327 (19.6%) 108370 (32.5%) 142662 (42.8%) 156886 (47.1%) 169339 (50.9%) ======== ================= ================== ================== ================== ================== Consider the read with overlapping left and right sides: .. figure:: _static/rescue_modes_readthrough.svg :width: 60 % :alt: Complex walk with overlap :align: center ``pairtools`` can detect such molecules and parse them. Briefly, we detects all the unique ligation junctions, and do not report the same junction as a pair multiple times. To parse complex walks, you may use ``pairtools parse --walks-policy all`` and ``parse2``, which have slightly different functionalities. ``pairtools parse --walks-policy all`` is used with regular paired-end Hi-C, when you want all pairs in the walk to be reported as if they appeared in the sequencing data independently. ``parse2`` is used with single-end data or when you want to customize your reporting (orientation, position of alignments, or perform combinatorial expansion). For example, ``parse2`` defaults to reporting ligation junctions instead of outer ends of the alignments. The complete guide through the reporting options of ``parse2``, orientation: .. figure:: _static/report-orientation.svg :width: 60 % :alt: parse2 --report-orientation :align: center position: .. figure:: _static/report-positions.svg :width: 60 % :alt: parse2 --report-position :align: center Sometimes it is important to restore the sequence of ligation events (e.g., for MC-3C data). For that, you can add special columns ``walk_pair_index`` and ``walk_pair_type`` by setting ``--add-pair-index`` option of ``parse2``, that will keep the order and type of pair in the whole walk in the output .pairs file. - ``walk_pair_index`` contains information on the order of the pair in the complex walk, starting from 5'-end of left read - ``walk_pair_type`` describes the type of the pair relative to R1 and R2 reads of paired-end sequencing: - "R1-2" - unconfirmed pair, right and left alignments in the pair originate from different reads (left or right). This might be indirect ligation (mediated by other DNA fragments). - "R1" - pair originates from the left read. This is direct ligation. - "R2" - pair originated from the right read. Direct ligation. - "R1&2" - pair was sequenced at both left and right read. Direct ligation. With this information, the whole sequence of ligation events can be restored from the .pair file. Combinatorial expansion is a way to increase the number of contacts in you data, which assumes that all DNA fragments in the same molecule (read) are in contact. Use ``--expand`` parameter for combinatorial expansion. Note that expanded pairs have modified pair type, "E{separation}_{pair type}", e.g.: - "E1_R1" is a pair obtained by combining left alignment of some pair in R1 read and right alignment of the next pair in R1 sequence of the same read. - "E2_R1" is a pair obtained by combining left alignment of some pair in R1 read and right alignment of the pair separated by 2 alignments in R1 sequence of the same read. - "E2_R1&2" as above, both source pairs were sequenced on both R1 and R2. - "E4_R1-2" is a pair obtained by combining left alignment of some pair in R1 read and right alignment of some pair in R1 sequence, separated by at least 4 alignments in between. Note that "-" in the pair type means that pair is separated by unsequenced gap, which may contain other pairs. Aligner settings ----------------- We recommended using local DNA sequence aligners, such as `BWA-MEM `_ and `Bowtie2 `_ (in the local alignment mode), as opposed to global aligners (e.g. Bowtie2 in the end-to-end mode). Local aligners assume that DNA molecules may contain fragments aligning to different locations in the genome and thus are better suited for mapping chimeric Hi-C molecules. Aligning Hi-C reads may further require adjusting aligner settings. Some aligners assume DNA libraries contain only contiguous fragments, leading to 'mate rescue' where one read's alignment is modified or even forced based on its pair's alignment. This behavior is incompatible with Hi-C, which produces chimeric molecules with unrelated alignments on each side. To avoid erroneous results, disable mate rescue/pairing and align reads pairs independently. In `bwa mem`, use the '-SP' flags to achieve this. .. [1] Following the lead of `C-walks `_ .. [2] This procedure was first introduced in `HiC-Pro `_ and the in `Juicer `_ . pairtools-1.1.3/doc/protocols_pipelines.rst000066400000000000000000000147311474715105500211420ustar00rootroot00000000000000Workflows and Parameters ======================== This page provides guidance on using pairtools for the most common Hi-C protocols and helps users fine-tune the pipeline for different variations of the Hi-C protocol. It covers recommended parameters and best practices for processing Hi-C data using pairtools. Typical Hi-C Workflow ---------------------- A typical pairtools workflow for processing standard Hi-C data is outlined below. Please, note that this is a shorter version. For a detailed reproducible example, please, check the Jupyter notebook "Pairtools Walkthrough". 1. Align sequences to the reference genome with ``bwa mem``: .. code-block:: console bwa mem -SP index_file input.R1.fastq input.R2.fastq > input.sam 2. Parse alignments into Hi-C pairs using ``pairtools parse``: .. code-block:: console pairtools parse -c /path/to/chrom_sizes -o output.pairs.gz input.sam 3. Sort pairs using ``pairtools sort``: .. code-block:: console pairtools sort --nproc 8 -o output.sorted.pairs.gz output.pairs.gz 4. Detect and remove duplicates using ``pairtools dedup`` and generate statistics: .. code-block:: console pairtools dedup \ --output output.nodups.pairs.gz \ --output-dups output.dups.pairs.gz \ --output-unmapped output.unmapped.pairs.gz --output-stats output.dedup.stats \ output.sorted.pairs.gz 5. Aggregate into a cooler file: .. code-block:: console cooler cload pairs -c1 2 -p1 3 -c2 4 -p2 5 /path/to/chrom_sizes:1000 output.nodups.pairs.gz output.1000.cool Recommended pairtools parameters for standard Hi-C protocols ------------------------------------------------------------ To adapt the standard workflow for common variations of the Hi-C protocol, consider adjusting the following parameters: 1. ``pairtools parse --walks-policy``: This parameter determines how pairtools parse handles reads with multiple alignments (walks). We recommend specifying the value explicitly, as the default has changed between versions of ``pairtools parse``. Our current recommendation is to use ``--walks-policy 5unique``, which is the default setting in the latest version of pairtools. With this option, pairtools parse reports the two 5'-most unique alignments on each side of a paired read as a pair. This option increases the number of reported pairs compared to the most conservative ``--walks-policy mask``. However, it's important to note that ``5unique`` can potentially report pairs of non-directly ligated fragments (i.e., two fragments separated by one or more other DNA fragments). Such non-direct (also known as "higher-order" or "nonadjacent") ligations have slightly different statistical properties than direct ligations, as illustrated in several Pore-C papers [`1 `_ , `2 `_]. An alternative is the ``--walks-policy 3unique`` policy, which reports the two 3'-most unique alignments on each side of a paired read as a pair, thus decreasing the chance of reporting non-direct ligations. However, ``3unique`` may not work well in situations where the combined length of a read pair is longer than the length of a DNA fragment (e.g. long read experiments). In this case, the 3' sides of the two reads will cover the same locations in the DNA molecule, and the 3' alignments may end up identical. Finally, the experimental ``--walks-policy all`` option reports all alignments of a read pair as separate pairs. This option maximizes the number of reported pairs. The downside is that it breaks the assumption that there is only one pair per read, which is not compatible with retrieval of .sam records from .pairsam output and may also complicate the interpretation of pair statistics. 2. ``pairtools select "(mapq1>=30) and (mapq2>=30)"``: This filtering command selects only pairs with high-quality alignments, where both reads in a pair have a mapping quality (MAPQ) score of 30 or higher. Applying this filter helps remove false alignments between partially homologous sequences, which often cause artificial high-frequency interactions in Hi-C maps. This step is essential for generating maps for high-quality dot calls. Note that we recommend storing the most comprehensive, unfiltered list of pairs and applying the filter on the fly prior to contact aggregation: .. code-block:: console pairtools select "(mapq1>=30) and (mapq2>=30)" output.nodups.pairs.gz | \ cooler cload pairs -c1 2 -p1 3 -c2 4 -p2 5 chromsizes.txt:1000 - output.mapq_30.1000.cool Technical tips -------------- - **Pipe between commands to save space and I/O throughput** Use Unix pipes to connect the output of one command directly to the input of the next command in the pipeline. This eliminates the need to store intermediate files on disk, saving storage space and reducing I/O overhead. Specifically, mapping, parsing, sorting and deduplication can all be connected into a single pipeline: .. code-block:: console bwa mem -SP index input.R1.fastq input.R2.fastq | \ pairtools parse -c chromsizes.txt | \ pairtools sort | \ pairtools dedup | \ --output output.nodups.pairs.gz \ --output-dups output.dups.pairs.gz \ --output-unmapped output.unmapped.pairs.gz --output-stats output.dedup.stats - **Use recommended compression for efficient storage and processing.** .sam, .pairs and .pairsam files are text-based format that are rather inefficient and slow to process. Pairtools recognize .bam, .gz and .lz4 file extensions and automatically compress and decompress files on the fly. Compression saves space, and reduces I/O overhead at a relatively minor CPU cost. - **Parallelize tasks and manage resources effectively for faster execution.** Each pairtool has the CLI flags --nproc-in and --nproc-out to control the number of cores dedicated to input decompression and output compression. Additionally, `pairtools sort` parallelizes sorting with `--nproc`.ß Advanced Workflows ------------------ For more advanced workflows, please check the following projects: - `Distiller-nf `_ is a feature-rich Open2C Hi-C processing pipeline for the Nextflow workflow manager. - `Distiller-sm `_ is a similarly feature-rich and optimized pipeline implemented in Snakemake. pairtools-1.1.3/doc/quickstart.rst000066400000000000000000000024131474715105500172320ustar00rootroot00000000000000Quickstart ========== Install `pairtools` and all of its dependencies using the `conda `_ package manager and the `bioconda `_ channel for bioinformatics software. .. code-block:: bash $ conda install -c conda-forge -c bioconda pairtools Setup a new test folder and download a small Hi-C dataset mapped to sacCer3 genome: .. code-block:: bash $ mkdir /tmp/test-pairtools $ cd /tmp/test-pairtools $ wget https://github.com/open2c/distiller-test-data/raw/master/bam/MATalpha_R1.bam Additionally, we will need a .chromsizes file, a TAB-separated plain text table describing the names, sizes and the order of chromosomes in the genome assembly used during mapping: .. code-block:: bash $ wget https://raw.githubusercontent.com/open2c/distiller-test-data/master/genome/sacCer3.reduced.chrom.sizes With `pairtools parse`, we can convert paired-end sequence alignments stored in .sam/.bam format into .pairs, a TAB-separated table of Hi-C ligation junctions: .. code-block:: bash $ pairtools parse -c sacCer3.reduced.chrom.sizes -o MATalpha_R1.pairs.gz --drop-sam MATalpha_R1.bam Inspect the resulting table: .. code-block:: bash $ less MATalpha_R1.pairs.gz pairtools-1.1.3/doc/sorting.rst000066400000000000000000000076131474715105500165340ustar00rootroot00000000000000Sorting pairs ============= In order to enable efficient random access to Hi-C pairs, we **flip** and **sort** pairs. After sorting, interactions become arranged in the order of their genomic position, such that, for any given pair of regions, we easily find and extract all of their interactions. And, after flipping, all artificially duplicated molecules (either during PCR or in optical sequencing) end up in adjacent rows in sorted lists of interactions, such that we can easily identify and remove them. Sorting ------- ``pairtools sort`` arrange pairs in the order of (chrom1, chrom2, pos1, pos2). This order is also known as *block sorting*, because all pairs between any given pair of chromosomes become grouped into one continuous block. Additionally, ``pairtools sort`` also sorts pairs with identical positions by `pair_type`. This does not really do much for mapped reads, but it nicely splits unmapped reads into blocks of null-mapped and multi-mapped reads. We note that there is an alternative to block sorting, called *row sorting*, where pairs are sorted by (chrom1, pos1, chrom2, pos2). In `pairtools sort`, we prefer block-sorting since it cleanly separates cis interactions from trans ones and thus is a more optimal solution for typical use cases. Flipping -------- In a typical paired-end experiment, *side1* and *side2* of a DNA molecule are defined by the order in which they got sequenced. Since this order is essentially random, any given Hi-C pair, e.g. (chr1, 1.1Mb; chr2, 2.1Mb), may appear in a reversed orientation, i.e. (chr2, 2.1Mb; chr1, 1.1Mb). If we were to preserve this order of sides, interactions between same loci would appear in two different locations of the sorted pair list, which would complicate finding PCR/optical duplicates. To ensure that Hi-C pairs with similar coordinates end up in the same location of the sorted list, we **flip** pairs, i.e. we choose *side1* as the side with the lowest genomic coordinate. Thus, after flipping, for *trans* pairs (chrom1!=chrom2), order(chrom1)`_), - number of contacts between all chromosome pairs - **Summary statistics** include: - fraction of duplicates - fraction of cis interactions (at different minimal distance cutoffs) out of total - estimation of library complexity Summary statistics can inform you about the quality of the data. For example, more trans interactions can be a sign of problems with the 3C+ procedure and lower signal-to-noise ratio. Substantial mapping to mitochondrial chromosome (chrM) might be a sign of random ligation. - **P(s), or scaling.** The dependence of contact frequency on the genomic distance referred to as the P(s) curve or scaling, which is a rich source of both biologically relevant information and technical quality of 3C+ experiments. The shape of P(s) is often used to characterize mechanisms of genome folding and reveal issues with QC. Interactive visualization of stats with MultiQC --------- Install `multiqc`: .. code-block:: bash pip install --upgrade --force-reinstall git+https://github.com/open2c/MultiQC.git Note that (for now) the pairtools module for MultiQC is only available in the open2C fork and not in the main MultiQC repository. Run MultiQC in a folder with one or multiple .stats files: .. code-block:: bash multiqc . This will produce a nice .html file with interactive graphical summaries of the stats. Estimating library complexity ---------------------------- Pairtools assumes that each sequencing read is randomly chosen with replacement from a finite pool of fragments in DNA library [1]_ [2]_. With each new sequenced molecule, the expected number of observed unique molecules increases according to a simple equation: .. math:: U(N+1) = U(N) + \left(1 - \frac{U(N)}{C} \right), where :math:`N` is the number of sequenced molecules, :math:`U(N)` is the expected number of observed unique molecules after sequencing :math:`N` molecules, and :math:`C` is the library complexity. This differential equation yields [1, 2]: .. math:: {U(N) \over C} = 1 - exp\left( - \frac{N}{C} \right), which can be solved as .. math:: C = \Re \left( W_{Lambert} \left( - \frac{ \exp\left( - \frac{1}{U} \right) } {U} \right) \right) + \frac{1}{U} Library complexity can guide in the choice of sequencing depth of the library and provide an estimate of library quality. Illumina sequencing duplicates ----------------- Importantly, you can estimate the complexity of Hi-C libraries using only small QC samples to decide if their quality permits deeper sequencing [3]_. These estimates, however, can be significantly biased by the presence of “optical” or “clustering” duplicates. Such duplicates occur as artefacts of the sequencing procedure. Optical duplicates appear in data generated on sequencers with non-patterned flowcells in cases the instrument either erroneously splits a signal from a single sequenced molecule into two. On the other hand, clustering duplicates appear on patterned flowcells, when during cluster generation a cluster occupies adjacent nanowells. [4]_. The rate of optical and clustering duplication depends on the technology and the operating conditions (e.g. molarity of the library loaded onto the flowcell), but not on the library complexity or sequencing depth. Thus, in small sequencing samples in particular the clustering duplication on recent Illumina instruments can severely inflate the observed levels of duplication [5]_, resulting in underestimation of the library complexity. While the frequency of PCR duplicates increases with sequencing depth, optical or clustering duplication levels may stay constant for a particular sequencer, provided the library is loaded at the same molarity. This means that the high frequency of clustering duplicates on the NovaSeq leads to severe underestimation of library complexity in the pilot runs. In particular, the recent models of Illumina sequencers with patterned flowcells (such as NovaSeq) suffer from increased clustering duplication rate, which may far exceed the level of PCR duplication. Luckily, optical and clustering duplicates can be distinguished from the PCR ones, as the former are located next to each other on the sequencing flow cell. In case of Illumina sequencers, pairtools dedup can infer the positions of sequencing reads from their IDs and focuses on geometrically distant duplicates to produce unbiased estimates of PCR duplication and library complexity. Although SRA does not store original read IDs from the sequencer, this analysis is possible when pairtools is run on a dataset with original Illumina-generated read IDs. Note that in our experience even when accounting for optical/clustering duplicates, the complexity can be greatly underestimated, but is still a useful measurement to choose the most complex libraries. .. [1] Picard. http://broadinstitute.github.io/picard/ .. [2] Thread: [Samtools-help] Pickard estimate for the size of a library - wrong or non-transparent? https://sourceforge.net/p/samtools/mailman/samtools-help/thread/DUB405-EAS154589A1ACEF2BE4C573D4592180@phx.gbl/ .. [3] Rao, S. S. P. et al. A 3D map of the human genome at kilobase resolution reveals principles of chromatin looping. Cell 159, 1665–1680 (2014). .. [4] Duplicates on Illumina. BioStars. https://www.biostars.org/p/229842/ .. [5] Illumina Patterned Flow Cells Generate Duplicated Sequences. https://sequencing.qcfail.com/articles/illumina-patterned-flow-cells-generate-duplicated-sequences/pairtools-1.1.3/pairtools/000077500000000000000000000000001474715105500155555ustar00rootroot00000000000000pairtools-1.1.3/pairtools/__init__.py000066400000000000000000000002601474715105500176640ustar00rootroot00000000000000""" pairtools ~~~~~~~~~ CLI tools to process mapped Hi-C data :copyright: (c) 2017-2024 Open2C :author: Open2C :license: MIT """ __version__ = "1.1.3" # from . import lib pairtools-1.1.3/pairtools/__main__.py000066400000000000000000000000731474715105500176470ustar00rootroot00000000000000from .cli import cli if __name__ == "__main__": cli() pairtools-1.1.3/pairtools/_logging.py000066400000000000000000000006221474715105500177140ustar00rootroot00000000000000import logging _loggers = {} def get_logger(name="pairtools"): # Based on ipython traitlets global _loggers if name not in _loggers: _loggers[name] = logging.getLogger(name) # Add a NullHandler to silence warnings about not being # initialized, per best practice for libraries. _loggers[name].addHandler(logging.NullHandler()) return _loggers[name] pairtools-1.1.3/pairtools/cli/000077500000000000000000000000001474715105500163245ustar00rootroot00000000000000pairtools-1.1.3/pairtools/cli/__init__.py000066400000000000000000000125201474715105500204350ustar00rootroot00000000000000# -*- coding: utf-8 -*- import click import functools import sys from .. import __version__ import logging from .._logging import get_logger CONTEXT_SETTINGS = { "help_option_names": ["-h", "--help"], } @click.version_option(version=__version__) @click.group(context_settings=CONTEXT_SETTINGS) @click.option( "--post-mortem", help="Post mortem debugging", is_flag=True, default=False ) @click.option( "--output-profile", help="Profile performance with Python cProfile and dump the statistics " "into a binary file", type=str, default="", ) @click.option("-v", "--verbose", help="Verbose logging.", count=True) @click.option( "-d", "--debug", help="On error, drop into the post-mortem debugger shell.", is_flag=True, default=False, ) def cli(post_mortem, output_profile, verbose, debug): """Flexible tools for Hi-C data processing. All pairtools have a few common options, which should be typed _before_ the command name. """ if post_mortem: import traceback try: import ipdb as pdb except ImportError: import pdb def _excepthook(exc_type, value, tb): traceback.print_exception(exc_type, value, tb) print() pdb.pm() sys.excepthook = _excepthook if output_profile: import cProfile import atexit pr = cProfile.Profile() pr.enable() def _atexit_profile_hook(): pr.disable() pr.dump_stats(output_profile) atexit.register(_atexit_profile_hook) # Initialize logging to stderr logging.basicConfig(stream=sys.stderr) logging.captureWarnings(True) root_logger = get_logger() # Set verbosity level if verbose > 0: root_logger.setLevel(logging.DEBUG) if verbose > 1: # pragma: no cover try: import psutil import atexit @atexit.register def process_dump_at_exit(): process_attrs = [ "cmdline", # 'connections', "cpu_affinity", "cpu_num", "cpu_percent", "cpu_times", "create_time", "cwd", # 'environ', "exe", # 'gids', "io_counters", "ionice", "memory_full_info", # 'memory_info', # 'memory_maps', "memory_percent", "name", "nice", "num_ctx_switches", "num_fds", "num_threads", "open_files", "pid", "ppid", "status", "terminal", "threads", # 'uids', "username", ] p = psutil.Process() info_ = p.as_dict(process_attrs, ad_value="") for key in process_attrs: root_logger.debug("PSINFO:'{}': {}".format(key, info_[key])) except ImportError: root_logger.warning("Install psutil to see process information.") else: root_logger.setLevel(logging.INFO) # Set hook for postmortem debugging if debug: # pragma: no cover import traceback try: import ipdb as pdb except ImportError: import pdb def _excepthook(exc_type, value, tb): traceback.print_exception(exc_type, value, tb) print() pdb.pm() sys.excepthook = _excepthook def common_io_options(func): @click.option( "--nproc-in", type=int, default=3, show_default=True, help="Number of processes used by the auto-guessed input decompressing command.", ) @click.option( "--nproc-out", type=int, default=8, show_default=True, help="Number of processes used by the auto-guessed output compressing command.", ) @click.option( "--cmd-in", type=str, default=None, help="A command to decompress the input file. " "If provided, fully overrides the auto-guessed command. " "Does not work with stdin and pairtools parse. " "Must read input from stdin and print output into stdout. " "EXAMPLE: pbgzip -dc -n 3", ) @click.option( "--cmd-out", type=str, default=None, help="A command to compress the output file. " "If provided, fully overrides the auto-guessed command. " "Does not work with stdout. " "Must read input from stdin and print output into stdout. " "EXAMPLE: pbgzip -c -n 8", ) @functools.wraps(func) def wrapper(*args, **kwargs): return func(*args, **kwargs) return wrapper from . import ( dedup, sort, flip, merge, markasdup, select, split, restrict, phase, parse, parse2, stats, sample, filterbycov, header, scaling, ) pairtools-1.1.3/pairtools/cli/dedup.py000066400000000000000000000446421474715105500200110ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import sys import ast import pathlib from .._logging import get_logger logger = get_logger() import click from ..lib import fileio, headerops, pairsam_format from ..lib.dedup import streaming_dedup, streaming_dedup_cython from ..lib.stats import PairCounter from . import cli, common_io_options UTIL_NAME = "pairtools_dedup" @cli.command() @click.argument("pairs_path", type=str, required=False) ### Output files: @click.option( "-o", "--output", type=str, default="", help="output file for pairs after duplicate removal." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, the output is printed into stdout.", ) @click.option( "--output-dups", type=str, default="", help="output file for duplicated pairs. " " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " If the path is the same as in --output or -, output duplicates together " " with deduped pairs. By default, duplicates are dropped.", ) @click.option( "--output-unmapped", type=str, default="", help="output file for unmapped pairs. " "If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed. " "If the path is the same as in --output or -, output unmapped pairs together " "with deduped pairs. If the path is the same as --output-dups, output " "unmapped reads together with dups. By default, unmapped pairs are dropped.", ) @click.option( "--output-stats", type=str, default="", help="output file for duplicate statistics." " If file exists, it will be open in the append mode." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, statistics are not printed.", ) @click.option( "--output-bytile-stats", type=str, default="", help="output file for duplicate statistics." " Note that the readID should be provided and contain tile information for this option. " " This analysis is possible when pairtools is run on a dataset with original Illumina-generated read IDs, " " because SRA does not store original read IDs from the sequencer. " " By default, by-tile duplicate statistics are not printed. " " If file exists, it will be open in the append mode. " " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed.", ) ### Set the dedup method: @click.option( "--max-mismatch", type=int, default=3, show_default=True, help="Pairs with both sides mapped within this distance (bp) from each " "other are considered duplicates. [dedup option]", ) @click.option( "--method", type=click.Choice(["max", "sum"]), default="max", help="define the mismatch as either the max or the sum of the mismatches of" "the genomic locations of the both sides of the two compared molecules. [dedup option]", show_default=True, ) @click.option( "--backend", type=click.Choice(["scipy", "sklearn", "cython"]), default="scipy", help="What backend to use: scipy and sklearn are based on KD-trees," " cython is online indexed list-based algorithm." " With cython backend, duplication is not transitive with non-zero max mismatch " " (e.g. pairs A and B are duplicates, and B and C are duplicates, then A and C are " " not necessary duplicates of each other), while with scipy and sklearn it's " " transitive (i.e. A and C are necessarily duplicates)." " Cython is the original version used in pairtools since its beginning." " It is available for backwards compatibility and to allow specification of the" " column order." " Now the default scipy backend is generally the fastest, and with chunksize below" " 1 mln has the lowest memory requirements. [dedup option]", # " 'cython' is deprecated and provided for backwards compatibility", ) ### Scipy and sklearn-specific options: @click.option( "--chunksize", type=int, default=10_000, show_default=True, help="Number of pairs in each chunk. Reduce for lower memory footprint." " Below 10,000 performance starts suffering significantly and the algorithm might" " miss a few duplicates with non-zero --max-mismatch." " Only works with '--backend scipy or sklearn'. [dedup option]", ) @click.option( "--carryover", type=int, default=100, show_default=True, help="Number of deduped pairs to carry over from previous chunk to the new chunk" " to avoid breaking duplicate clusters." " Only works with '--backend scipy or sklearn'. [dedup option]", ) @click.option( "-p", "--n-proc", type=int, default=1, help="Number of cores to use. Only applies with sklearn backend." "Still needs testing whether it is ever useful. [dedup option]", ) ### Output options: @click.option( "--mark-dups/--no-mark-dups", default=True, is_flag=True, help='Specify if duplicate pairs should be marked as DD in "pair_type" and ' "as a duplicate in the sam entries. True by default. [output format option]", ) @click.option( "--keep-parent-id", is_flag=True, help="If specified, duplicate pairs are marked with the readID of the retained" " deduped read in the 'parent_readID' field. [output format option]", ) @click.option( "--extra-col-pair", nargs=2, # type=click.Tuple([str, str]), multiple=True, help="Extra columns that also must match for two pairs to be marked as " "duplicates. Can be either provided as 0-based column indices or as column " 'names (requires the "#columns" header field). The option can be provided ' "multiple times if multiple column pairs must match. " 'Example: --extra-col-pair "phase1" "phase2". [output format option]', ) ### Input options: @click.option( "--sep", type=str, default=pairsam_format.PAIRSAM_SEP_ESCAPE, help=r"Separator (\t, \v, etc. characters are " "supported, pass them in quotes). [input format option]", ) @click.option( "--send-header-to", type=click.Choice(["dups", "dedup", "both", "none"]), default="both", help="Which of the outputs should receive header and comment lines. [input format option]", ) @click.option( "--c1", type=str, default=pairsam_format.COLUMNS_PAIRS[1], help=f"Chrom 1 column; default {pairsam_format.COLUMNS_PAIRS[1]}" "[input format option]", ) @click.option( "--c2", type=str, default=pairsam_format.COLUMNS_PAIRS[3], help=f"Chrom 2 column; default {pairsam_format.COLUMNS_PAIRS[3]}" "[input format option]", ) @click.option( "--p1", type=str, default=pairsam_format.COLUMNS_PAIRS[2], help=f"Position 1 column; default {pairsam_format.COLUMNS_PAIRS[2]}" "[input format option]", ) @click.option( "--p2", type=str, default=pairsam_format.COLUMNS_PAIRS[4], help=f"Position 2 column; default {pairsam_format.COLUMNS_PAIRS[4]}" "[input format option]", ) @click.option( "--s1", type=str, default=pairsam_format.COLUMNS_PAIRS[5], help=f"Strand 1 column; default {pairsam_format.COLUMNS_PAIRS[5]}" "[input format option]", ) @click.option( "--s2", type=str, default=pairsam_format.COLUMNS_PAIRS[6], help=f"Strand 2 column; default {pairsam_format.COLUMNS_PAIRS[6]}" "[input format option]", ) @click.option( "--unmapped-chrom", type=str, default=pairsam_format.UNMAPPED_CHROM, help="Placeholder for a chromosome on an unmapped side; default {}".format( pairsam_format.UNMAPPED_CHROM ), ) # Output stats option @click.option( "--yaml/--no-yaml", is_flag=True, default=False, help="Output stats in yaml format instead of table. [output stats format option]", ) # Filtering options for reporting stats: @click.option( "--filter", default=None, required=False, multiple=True, help="Filter stats with condition to apply to the data (similar to `pairtools select` or `pairtools stats`). " "For non-YAML output only the first filter will be reported. [output stats filtering option] " "Note that this will not change the deduplicated output pairs. " """Example: pairtools dedup --yaml --filter 'unique:(pair_type=="UU")' --filter 'close:(pair_type=="UU") and (abs(pos1-pos2)<10)' --output-stats - test.pairs """, ) @click.option( "--engine", default="pandas", required=False, help="Engine for regular expression parsing for stats filtering. " "Python will provide you regex functionality, while pandas does not accept " "custom funtctions and works faster. [output stats filtering option]", ) @click.option( "--chrom-subset", type=str, default=None, required=False, help="A path to a chromosomes file (tab-separated, 1st column contains " "chromosome names) containing a chromosome subset of interest for stats filter. " "If provided, additionally filter pairs with both sides originating from " "the provided subset of chromosomes. This operation modifies the #chromosomes: " "and #chromsize: header fields accordingly. " "Note that this will not change the deduplicated output pairs. [output stats filtering option]", ) @click.option( "--startup-code", type=str, default=None, required=False, help="An auxiliary code to execute before filteringfor stats. " "Use to define functions that can be evaluated in the CONDITION statement. [output stats filtering option]", ) @click.option( "-t", "--type-cast", type=(str, str), default=(), multiple=True, help="Cast a given column to a given type for stats filtering. By default, only pos and mapq " "are cast to int, other columns are kept as str. Provide as " "-t , e.g. -t read_len1 int. Multiple entries are allowed. [output stats filtering option]", ) @common_io_options def dedup( pairs_path, output, output_dups, output_unmapped, output_stats, output_bytile_stats, chunksize, carryover, max_mismatch, method, sep, send_header_to, c1, c2, p1, p2, s1, s2, unmapped_chrom, mark_dups, extra_col_pair, keep_parent_id, backend, n_proc, **kwargs, ): """Find and remove PCR/optical duplicates. Find PCR/optical duplicates in an upper-triangular flipped sorted pairs/pairsam file. Allow for a +/-N bp mismatch at each side of duplicated molecules. PAIRS_PATH : input triu-flipped sorted .pairs or .pairsam file. If the path ends with .gz/.lz4, the input is decompressed by bgzip/lz4c. By default, the input is read from stdin. """ dedup_py( pairs_path, output, output_dups, output_unmapped, output_stats, output_bytile_stats, chunksize, carryover, max_mismatch, method, sep, send_header_to, c1, c2, p1, p2, s1, s2, unmapped_chrom, mark_dups, extra_col_pair, keep_parent_id, backend, n_proc, **kwargs, ) if __name__ == "__main__": dedup() def dedup_py( pairs_path, output, output_dups, output_unmapped, output_stats, output_bytile_stats, chunksize, carryover, max_mismatch, method, sep, send_header_to, c1, c2, p1, p2, s1, s2, unmapped_chrom, mark_dups, extra_col_pair, keep_parent_id, backend, n_proc, **kwargs, ): sep = ast.literal_eval('"""' + sep + '"""') send_header_to_dedup = send_header_to in ["both", "dedup"] send_header_to_dup = send_header_to in ["both", "dups"] instream = fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) out_stats_stream = fileio.auto_open( output_stats, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) bytile_dups = False if output_bytile_stats: out_bytile_stats_stream = fileio.auto_open( output_bytile_stats, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) bytile_dups = True if not keep_parent_id: logger.warning( "Force output --parent-readID because --output-bytile-stats provided." ) keep_parent_id = True # generate empty PairCounter if stats output is requested: if output_stats: filter = kwargs.get("filter", None) # Define filters and their properties first_filter_name = "no_filter" # default filter name for full output if filter is not None and len(filter) > 0: first_filter_name = filter[0].split(":", 1)[0] if len(filter) > 1 and not kwargs.get("yaml", False): logger.warn( f"Output the first filter only in non-YAML output: {first_filter_name}" ) filter = dict([f.split(":", 1) for f in filter]) else: filter = None out_stat = PairCounter( bytile_dups=bytile_dups, filters=filter, startup_code=kwargs.get("startup_code", ""), # for evaluation of filters type_cast=kwargs.get("type_cast", ()), # for evaluation of filters engine=kwargs.get("engine", "pandas"), ) else: out_stat = None if not output_dups: outstream_dups = None elif output_dups == "-" or ( pathlib.Path(output_dups).absolute() == pathlib.Path(output).absolute() ): outstream_dups = outstream else: outstream_dups = fileio.auto_open( output_dups, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if not output_unmapped: outstream_unmapped = None elif output_unmapped == "-" or ( pathlib.Path(output_unmapped).absolute() == pathlib.Path(output).absolute() ): outstream_unmapped = outstream elif ( pathlib.Path(output_unmapped).absolute() == pathlib.Path(output_dups).absolute() ): outstream_unmapped = outstream_dups else: outstream_unmapped = fileio.auto_open( output_unmapped, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) header, body_stream = headerops.get_header(instream) if not any([l.startswith("#sorted") for l in header]): logger.warning( "Pairs file appears not to be sorted, dedup might produce wrong results." ) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) dups_header = header.copy() if keep_parent_id and len(dups_header) > 0: dups_header = headerops.append_columns(dups_header, ["parent_readID"]) if outstream == outstream_dups: header = dups_header if send_header_to_dedup: outstream.writelines((l + "\n" for l in header)) if send_header_to_dup and outstream_dups and (outstream_dups != outstream): outstream_dups.writelines((l + "\n" for l in dups_header)) if ( outstream_unmapped and (outstream_unmapped != outstream) and (outstream_unmapped != outstream_dups) ): outstream_unmapped.writelines((l + "\n" for l in header)) column_names = headerops.extract_column_names(header) extra_cols1 = [] extra_cols2 = [] if extra_col_pair is not None: for col1, col2 in extra_col_pair: extra_cols1.append(column_names[col1] if col1.isnumeric() else col1) extra_cols2.append(column_names[col2] if col2.isnumeric() else col2) if backend == "cython": # warnings.warn( # "'cython' backend is deprecated and provided only" # " for backwards compatibility", # DeprecationWarning, # ) extra_cols1 = [column_names.index(col) for col in extra_cols1] extra_cols2 = [column_names.index(col) for col in extra_cols2] c1 = column_names.index(c1) c2 = column_names.index(c2) p1 = column_names.index(p1) p2 = column_names.index(p2) s1 = column_names.index(s1) s2 = column_names.index(s2) streaming_dedup_cython( method, max_mismatch, sep, c1, c2, p1, p2, s1, s2, extra_cols1, extra_cols2, unmapped_chrom, body_stream, outstream, outstream_dups, outstream_unmapped, out_stat, mark_dups, keep_parent_id, ) elif backend in ("scipy", "sklearn"): streaming_dedup( in_stream=body_stream, colnames=column_names, chunksize=chunksize, carryover=carryover, method=method, mark_dups=mark_dups, max_mismatch=max_mismatch, extra_col_pairs=list(extra_col_pair), keep_parent_id=keep_parent_id, unmapped_chrom=unmapped_chrom, outstream=outstream, outstream_dups=outstream_dups, outstream_unmapped=outstream_unmapped, out_stat=out_stat, backend=backend, n_proc=n_proc, c1=c1, c2=c2, p1=p1, p2=p2, s1=s1, s2=s2, ) else: raise ValueError("Unknown backend") # save statistics to a file if it was requested: if out_stat: out_stat.save( out_stats_stream, yaml=kwargs.get("yaml", False), # format as yaml filter=( first_filter_name if not kwargs.get("yaml", False) else None ), # output only the first filter if non-YAML output ) if bytile_dups: out_stat.save_bytile_dups(out_bytile_stats_stream) if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if outstream_dups and (outstream_dups != outstream): outstream_dups.close() if ( outstream_unmapped and (outstream_unmapped != outstream) and (outstream_unmapped != outstream_dups) ): outstream_unmapped.close() if out_stats_stream: out_stats_stream.close() pairtools-1.1.3/pairtools/cli/filterbycov.py000066400000000000000000000231021474715105500212240ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import sys import ast import warnings import pathlib import click from ..lib import fileio, pairsam_format, headerops, dedup from . import cli, common_io_options from ..lib.filterbycov import streaming_filterbycov from ..lib.stats import PairCounter UTIL_NAME = "pairtools_filterbycov" ###################################### ## TODO: - output stats after filtering ## edit/update mark as dup to mark as multi ################################### @cli.command() @click.argument("pairs_path", type=str, required=False) @click.option( "-o", "--output", type=str, default="", help="output file for pairs from low coverage regions." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, the output is printed into stdout.", ) @click.option( "--output-highcov", type=str, default="", help="output file for pairs from high coverage regions." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " If the path is the same as in --output or -, output duplicates together " " with deduped pairs. By default, duplicates are dropped.", ) @click.option( "--output-unmapped", type=str, default="", help="output file for unmapped pairs. " "If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed. " "If the path is the same as in --output or -, output unmapped pairs together " "with deduped pairs. If the path is the same as --output-highcov, " "output unmapped reads together. By default, unmapped pairs are dropped.", ) @click.option( "--output-stats", type=str, default="", help="output file for statistics of multiple interactors. " " If file exists, it will be open in the append mode." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, statistics are not printed.", ) @click.option( "--max-cov", type=int, default=8, help="The maximum allowed coverage per region." ) @click.option( "--max-dist", type=int, default=500, help="The resolution for calculating coverage. For each pair, the local " "coverage around each end is calculated as (1 + the number of neighbouring " "pairs within +/- max_dist bp) ", ) @click.option( "--method", type=click.Choice(["max", "sum"]), default="max", help="calculate the number of neighbouring pairs as either the sum or the max" " of the number of neighbours on the two sides", show_default=True, ) @click.option( "--sep", type=str, default=pairsam_format.PAIRSAM_SEP_ESCAPE, help=r"Separator (\t, \v, etc. characters are " "supported, pass them in quotes) ", ) @click.option( "--comment-char", type=str, default="#", help="The first character of comment lines" ) @click.option( "--send-header-to", type=click.Choice(["lowcov", "highcov", "both", "none"]), default="both", help="Which of the outputs should receive header and comment lines", ) @click.option( "--c1", type=int, default=pairsam_format.COL_C1, help="Chrom 1 column; default {}".format(pairsam_format.COL_C1), ) @click.option( "--c2", type=int, default=pairsam_format.COL_C2, help="Chrom 2 column; default {}".format(pairsam_format.COL_C2), ) @click.option( "--p1", type=int, default=pairsam_format.COL_P1, help="Position 1 column; default {}".format(pairsam_format.COL_P1), ) @click.option( "--p2", type=int, default=pairsam_format.COL_P2, help="Position 2 column; default {}".format(pairsam_format.COL_P2), ) @click.option( "--s1", type=int, default=pairsam_format.COL_S1, help="Strand 1 column; default {}".format(pairsam_format.COL_S1), ) @click.option( "--s2", type=int, default=pairsam_format.COL_S2, help="Strand 2 column; default {}".format(pairsam_format.COL_S2), ) @click.option( "--unmapped-chrom", type=str, default=pairsam_format.UNMAPPED_CHROM, help="Placeholder for a chromosome on an unmapped side; default {}".format( pairsam_format.UNMAPPED_CHROM ), ) @click.option( "--mark-multi", is_flag=True, help='If specified, duplicate pairs are marked as FF in "pair_type" and ' "as a duplicate in the sam entries.", ) @common_io_options def filterbycov( pairs_path, output, output_highcov, output_unmapped, output_stats, max_dist, max_cov, method, sep, comment_char, send_header_to, c1, c2, p1, p2, s1, s2, unmapped_chrom, mark_multi, **kwargs ): """Remove pairs from regions of high coverage. Find and remove pairs with >(MAX_COV-1) neighbouring pairs within a +/- MAX_DIST bp window around either side. Useful for single-cell Hi-C experiments, where coverage is naturally limited by the chromosome copy number. PAIRS_PATH : input triu-flipped sorted .pairs or .pairsam file. If the path ends with .gz/.lz4, the input is decompressed by bgzip/lz4c. By default, the input is read from stdin. """ filterbycov_py( pairs_path, output, output_highcov, output_unmapped, output_stats, max_dist, max_cov, method, sep, comment_char, send_header_to, c1, c2, p1, p2, s1, s2, unmapped_chrom, mark_multi, **kwargs ) def filterbycov_py( pairs_path, output, output_highcov, output_unmapped, output_stats, max_dist, max_cov, method, sep, comment_char, send_header_to, c1, c2, p1, p2, s1, s2, unmapped_chrom, mark_multi, **kwargs ): ## Prepare input, output streams based on selected outputs ## Default ouput stream is low-frequency interactors sep = ast.literal_eval('"""' + sep + '"""') send_header_to_lowcov = send_header_to in ["both", "lowcov"] send_header_to_highcov = send_header_to in ["both", "highcov"] instream = ( fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) if pairs_path else sys.stdin ) outstream = ( fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output else sys.stdout ) out_stats_stream = ( fileio.auto_open( output_stats, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output_stats else None ) # generate empty PairCounter if stats output is requested: out_stat = PairCounter() if output_stats else None # output the high-frequency interacting pairs if not output_highcov: outstream_high = None elif output_highcov == "-" or ( pathlib.Path(output_highcov).absolute() == pathlib.Path(output).absolute() ): outstream_high = outstream else: outstream_high = fileio.auto_open( output_highcov, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) # output unmapped pairs if not output_unmapped: outstream_unmapped = None elif output_unmapped == "-" or ( pathlib.Path(output_unmapped).absolute() == pathlib.Path(output).absolute() ): outstream_unmapped = outstream elif ( pathlib.Path(output_unmapped).absolute() == pathlib.Path(output_highcov).absolute() ): outstream_unmapped = outstream_high else: outstream_unmapped = fileio.auto_open( output_unmapped, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) # prepare file headers header, body_stream = headerops.get_header(instream) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) # header for low-frequency interactors if send_header_to_lowcov: outstream.writelines((l + "\n" for l in header)) # header for high-frequency interactors if send_header_to_highcov and outstream_high and (outstream_high != outstream): outstream_high.writelines((l + "\n" for l in header)) # header for unmapped pairs if ( outstream_unmapped and (outstream_unmapped != outstream) and (outstream_unmapped != outstream_high) ): outstream_unmapped.writelines((l + "\n" for l in header)) # perform filtering of pairs based on low/high-frequency of interaction streaming_filterbycov( method, max_dist, max_cov, sep, c1, c2, p1, p2, s1, s2, unmapped_chrom, body_stream, outstream, outstream_high, outstream_unmapped, out_stat, mark_multi, ) ## FINISHED! # save statistics to a file if it was requested: TO BE TESTED if out_stat: out_stat.save(out_stats_stream) if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if outstream_high and (outstream_high != outstream): outstream_high.close() if ( outstream_unmapped and (outstream_unmapped != outstream) and (outstream_unmapped != outstream_high) ): outstream_unmapped.close() if out_stats_stream: out_stats_stream.close() if __name__ == "__main__": filterbycov() pairtools-1.1.3/pairtools/cli/flip.py000066400000000000000000000110601474715105500176260ustar00rootroot00000000000000import sys import click from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options import warnings UTIL_NAME = "pairtools_flip" @cli.command() @click.argument("pairs_path", type=str, required=False) @click.option( "-c", "--chroms-path", type=str, required=True, help="Chromosome order used to flip interchromosomal mates: " "path to a chromosomes file (e.g. UCSC chrom.sizes or similar) whose " "first column lists scaffold names. Any scaffolds not listed will be " "ordered lexicographically following the names provided.", ) @click.option( "-o", "--output", type=str, default="", help="output file." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, the output is printed into stdout.", ) @common_io_options def flip(pairs_path, chroms_path, output, **kwargs): """Flip pairs to get an upper-triangular matrix. Change the order of side1 and side2 in pairs, such that (order(chrom1) < order(chrom2) or (order(chrom1) == order(chrom2)) and (pos1 <=pos2)) Equivalent to reflecting the lower triangle of a Hi-C matrix onto its upper triangle, resulting in an upper triangular matrix. The order of chromosomes must be provided via a .chromsizes file. PAIRS_PATH : input .pairs/.pairsam file. If the path ends with .gz or .lz4, the input is decompressed by bgzip/lz4c. By default, the input is read from stdin. """ flip_py(pairs_path, chroms_path, output, **kwargs) def flip_py(pairs_path, chroms_path, output, **kwargs): instream = ( fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) if pairs_path else sys.stdin ) outstream = ( fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output else sys.stdout ) chromosomes = headerops.get_chrom_order(chroms_path) chrom_enum = dict( zip( [pairsam_format.UNMAPPED_CHROM] + list(chromosomes), range(len(chromosomes) + 1), ) ) header, body_stream = headerops.get_header(instream) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) outstream.writelines((l + "\n" for l in header)) column_names = headerops.extract_column_names(header) if len(column_names) == 0: column_names = pairsam_format.COLUMNS chrom1_col = column_names.index("chrom1") chrom2_col = column_names.index("chrom2") pos1_col = column_names.index("pos1") pos2_col = column_names.index("pos2") pair_type_col = ( column_names.index("pair_type") if "pair_type" in column_names else -1 ) col_pairs_to_flip = [ (column_names.index(col), column_names.index(col[:-1] + "2")) for col in column_names if col.endswith("1") and (col[:-1] + "2") in column_names ] for line in body_stream: cols = line.rstrip('\n').split(pairsam_format.PAIRSAM_SEP) is_annotated1 = cols[chrom1_col] in chrom_enum.keys() is_annotated2 = cols[chrom2_col] in chrom_enum.keys() if not is_annotated1 or not is_annotated2: warnings.warn(f"Unannotated chromosomes in the pairs file!") # Flip so that annotated chromosome stands first: if is_annotated1 and not is_annotated2: has_correct_order = True elif is_annotated2 and not is_annotated1: has_correct_order = False elif not is_annotated1 and not is_annotated2: has_correct_order = cols[chrom1_col] < cols[chrom2_col] else: # both are annotated: has_correct_order = (chrom_enum[cols[chrom1_col]], int(cols[pos1_col])) <= ( chrom_enum[cols[chrom2_col]], int(cols[pos2_col]), ) # flipping: if not has_correct_order: for col1, col2 in col_pairs_to_flip: if (col1 < len(cols)) and (col2 < len(cols)): cols[col1], cols[col2] = cols[col2], cols[col1] if pair_type_col != -1 and pair_type_col < len(cols): cols[pair_type_col] = cols[pair_type_col][1] + cols[pair_type_col][0] outstream.write(pairsam_format.PAIRSAM_SEP.join(cols)) outstream.write("\n") if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if __name__ == "__main__": flip() pairtools-1.1.3/pairtools/cli/header.py000066400000000000000000000437421474715105500201400ustar00rootroot00000000000000import sys import click import warnings import subprocess from ..lib import fileio, pairsam_format, headerops from ..lib.parse_pysam import AlignmentFilePairtoolized from . import cli, common_io_options UTIL_NAME = "pairtools_header" @cli.group() def header(): """ Manipulate the .pairs/.pairsam header """ pass # Common options for all header tools: def register_subcommand(func): return header.command()( click.argument("pairs_path", type=str, required=False)( click.option( "-o", "--output", type=str, default="", help="output file." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, the output is printed into stdout.", )( click.option( "--nproc-in", type=int, default=1, show_default=True, help="Number of processes used by the auto-guessed input decompressing command.", )( click.option( "--nproc-out", type=int, default=8, show_default=True, help="Number of processes used by the auto-guessed output compressing command.", )( click.option( "--cmd-in", type=str, default=None, help="A command to decompress the input. " "If provided, fully overrides the auto-guessed command. " "Does not work with stdin. " "Must read input from stdin and print output into stdout. " "EXAMPLE: pbgzip -dc -n 3", )( click.option( "--cmd-out", type=str, default=None, help="A command to compress the output. " "If provided, fully overrides the auto-guessed command. " "Does not work with stdout. " "Must read input from stdin and print output into stdout. " "EXAMPLE: pbgzip -c -n 8", )(func) ) ) ) ) ) ) def add_arg_help(func): func.__doc__ = func.__doc__.format( """ PAIRS_PATH : input .pairs/.pairsam file. If the path ends with .gz or .lz4, the input is decompressed by bgzip/lz4c. By default, the input is read from stdin. """ ) return func @register_subcommand @add_arg_help @click.option( "--chroms-path", type=str, default=None, required=False, help="Chromosome order used to flip interchromosomal mates: " "path to a chromosomes file (e.g. UCSC chrom.sizes or similar) whose " "first column lists scaffold names. Any scaffolds not listed will be " "ordered lexicographically following the names provided.", ) @click.option( "--sam-path", type=str, default=None, required=False, help="Input sam file to inherit the header." " Either --sam or --chroms-path should be provided to store the chromosome sizes in the header.", ) @click.option( "--columns", type=click.STRING, default="", help="Report columns describing alignments " "Can take multiple values as a comma-separated list." f"By default, assign standard .pairs columns: {','.join(pairsam_format.COLUMNS)}", ) @click.option( "--extra-columns", type=click.STRING, default="", help="Report extra columns describing alignments " "Can take multiple values as a comma-separated list.", ) @click.option( "--assembly", type=str, default="", help="Name of genome assembly (e.g. hg19, mm10) to store in the pairs header.", ) @click.option( "--no-flip", is_flag=True, help="If specified, assume that the pairs are not filpped in genomic order and instead preserve " "the order in which they were sequenced.", ) @click.option( "--pairs/--pairsam", is_flag=True, default=True, help=f"If pairs, then the defult columns will be set to: {','.join(pairsam_format.COLUMNS_PAIRS)}" f"\nif pairsam, then to: {','.join(pairsam_format.COLUMNS_PAIRSAM)}", ) def generate(pairs_path, output, chroms_path, sam_path, columns, assembly, **kwargs): """ Generate the header """ generate_py(pairs_path, output, chroms_path, sam_path, columns, assembly, **kwargs) def generate_py(pairs_path, output, chroms_path, sam_path, columns, assembly, **kwargs): instream = fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) header, body_stream = headerops.get_header(instream, ignore_warning=True) # Parse chromosome sizes present in the input chromosomes: if chroms_path and not sam_path: chromsizes = headerops.get_chromsizes_from_file(chroms_path) # chromosomes = headerops.get_chromsizes_from_file(chroms_path) # Parse chromosome sizes present in sam input: if sam_path: # open input sam file with pysam input_sam = AlignmentFilePairtoolized( sam_path, "r", threads=kwargs.get("nproc_in") ) samheader = input_sam.header chromsizes = headerops.get_chromsizes_from_pysam_header(samheader) # if chroms_path: # chromosomes = headerops.get_chrom_order(chroms_path, list(chromsizes.keys())) # else: # chromosomes = chromsizes.keys() # Read the input columns: if columns: columns = columns.split(",") else: if kwargs.get("pairs", True): columns = pairsam_format.COLUMNS_PAIRS else: columns = pairsam_format.COLUMNS_PAIRSAM extra_columns = kwargs.get("extra_columns", "") if extra_columns: columns += extra_columns.split(",") # Write new header to the pairsam file new_header = headerops.make_standard_pairsheader( assembly=assembly, chromsizes=chromsizes, columns=columns, shape="whole matrix" if kwargs["no_flip"] else "upper triangle", ) if sam_path: new_header = headerops.insert_samheader_pysam(new_header, samheader) new_header = headerops.append_new_pg(new_header, ID=UTIL_NAME, PN=UTIL_NAME) # Check that the number of columns in the body corresponds to the header: if not headerops.validate_cols(instream, columns): raise ValueError( f"Number of columns mismatch:\n\t#columns: {headerops.SEP_COLS.join(columns)}\n\t{body_stream.readline()}" ) ######## # Write the output after successful checks: outstream = ( fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output else sys.stdout ) outstream.writelines((l + "\n" for l in new_header)) outstream.flush() if body_stream == sys.stdin: for line in body_stream: outstream.write(line) else: command = r""" /bin/bash -c 'export LC_COLLATE=C; export LANG=C; cat """ if kwargs.get("cmd_in", None): command += r""" <(cat {} | {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( pairs_path, kwargs["cmd_in"] ) elif pairs_path.endswith(".gz"): command += ( r""" <(bgzip -dc -@ {} {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( kwargs["nproc_in"], pairs_path ) ) elif pairs_path.endswith(".lz4"): command += r""" <(lz4c -dc {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( pairs_path ) else: command += r""" <(sed -n -e '\''/^[^#]/,$p'\'' {})""".format(pairs_path) command += "'" subprocess.check_call(command, shell=True, stdout=outstream) if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() @register_subcommand @add_arg_help @click.option( "--reference-file", "-r", help="Header file for transfer", type=str, required=True ) def transfer(pairs_path, output, reference_file, **kwargs): """ Transfer the header from one pairs file to another """ transfer_py(pairs_path, output, reference_file, **kwargs) def transfer_py(pairs_path, output, reference_file, **kwargs): instream = fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) header, body_stream = headerops.get_header(instream, ignore_warning=True) # Read the header from reference file instream_header = fileio.auto_open( reference_file, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) reference_header, _ = headerops.get_header(instream_header) # Close the reference stream after extraction of the header: if instream_header != sys.stdin: instream_header.close() reference_columns = headerops.extract_column_names(reference_header) # Check that the number of columns in the body corresponds to the header: if not headerops.validate_cols(instream, reference_columns): raise ValueError( f"Number of columns mismatch:\n\t#columns: {headerops.SEP_COLS.join(reference_columns)}\n\t{body_stream.readline()}" ) ######## # Write the output after successful checks: outstream = ( fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output else sys.stdout ) reference_header = headerops.append_new_pg( reference_header, ID=UTIL_NAME, PN=UTIL_NAME ) outstream.writelines((l + "\n" for l in reference_header)) outstream.flush() if body_stream == sys.stdin: for line in body_stream: outstream.write(line) else: command = r""" /bin/bash -c 'export LC_COLLATE=C; export LANG=C; cat """ if kwargs.get("cmd_in", None): command += r""" <(cat {} | {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( pairs_path, kwargs["cmd_in"] ) elif pairs_path.endswith(".gz"): command += ( r""" <(bgzip -dc -@ {} {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( kwargs["nproc_in"], pairs_path ) ) elif pairs_path.endswith(".lz4"): command += r""" <(lz4c -dc {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( pairs_path ) else: command += r""" <(sed -n -e '\''/^[^#]/,$p'\'' {})""".format(pairs_path) command += "'" subprocess.check_call(command, shell=True, stdout=outstream) if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() @register_subcommand @add_arg_help @click.option( "--columns", "-c", help=f"Comma-separated list of columns to be added, e.g.: {','.join(pairsam_format.COLUMNS)}", type=str, required=True, ) def set_columns(pairs_path, output, columns, **kwargs): """ Add the columns to the .pairs/pairsam file """ set_columns_py(pairs_path, output, columns, **kwargs) def set_columns_py(pairs_path, output, columns, **kwargs): instream = ( fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) if pairs_path else sys.stdin ) outstream = ( fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output else sys.stdout ) header, body_stream = headerops.get_header(instream) header = headerops.set_columns(header, columns.split(",")) outstream.writelines((l + "\n" for l in header)) outstream.flush() if body_stream == sys.stdin: for line in body_stream: outstream.write(line) else: command = r""" /bin/bash -c 'export LC_COLLATE=C; export LANG=C; cat """ if kwargs.get("cmd_in", None): command += r""" <(cat {} | {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( pairs_path, kwargs["cmd_in"] ) elif pairs_path.endswith(".gz"): command += ( r""" <(bgzip -dc -@ {} {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( kwargs["nproc_in"], pairs_path ) ) elif pairs_path.endswith(".lz4"): command += r""" <(lz4c -dc {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( pairs_path ) else: command += r""" <(sed -n -e '\''/^[^#]/,$p'\'' {})""".format(pairs_path) command += "'" subprocess.check_call(command, shell=True, stdout=outstream) if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() @register_subcommand @add_arg_help @click.option( "--reference-file", "-r", help="Header file for comparison (optional)", type=str, required=False, default="", ) @click.option( "--reference-columns", "-c", help=f"Comma-separated list of columns fro check (optional), e.g.: {','.join(pairsam_format.COLUMNS)}", type=str, required=False, default="", ) def validate_columns(pairs_path, output, reference_file, reference_columns, **kwargs): """ Validate the columns of the .pairs/pairsam file against reference or within file. If the checks pass, then returns full pairs file. Otherwise throws an exception. If reference_file is provided, check: 1) columns are the same between pairs and reference_file 2) number of columns in the pairs body is the same as the number of columns If reference_columns are provided, check: 1) pairs columns are the same as provided 2) number of columns in the pairs body is the same as the number of columns If no reference_file or columns, then check only the number of columns in the pairs body. Checks only the first line in the pairs stream! """ validate_columns_py(pairs_path, output, reference_file, reference_columns, **kwargs) def validate_columns_py( pairs_path, output, reference_file, reference_columns, **kwargs ): instream = fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) header, body_stream = headerops.get_header(instream) pairs_columns = headerops.extract_column_names(header) # Convert reference columns string into list, if provided if reference_columns: reference_columns = reference_columns.split(",") # Read the header from reference file if reference_file: instream_header = fileio.auto_open( reference_file, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) reference_header, _ = headerops.get_header(instream_header) # Close the reference stream after extraction of the header: if instream_header != sys.stdin: instream_header.close() if reference_columns: warnings.warn( "--reference-columns are ignored, as --reference-file is provided" ) reference_columns = headerops.extract_column_names(reference_header) if reference_columns: if pairs_columns != reference_columns: raise ValueError( f"Pairs columns differ from reference columns:\n\t{pairs_columns}\n\t{reference_columns}" ) # Check that the number of columns in the body corresponds to the header: if not headerops.validate_cols(instream, pairs_columns): raise ValueError( f"Number of columns mismatch:\n\t#columns: {headerops.SEP_COLS.join(pairs_columns)}\n\t{body_stream.readline()}" ) ######## # Write the output after successful checks: outstream = ( fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output else sys.stdout ) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) outstream.writelines((l + "\n" for l in header)) outstream.flush() if body_stream == sys.stdin: for line in body_stream: outstream.write(line) else: command = r""" /bin/bash -c 'export LC_COLLATE=C; export LANG=C; cat """ if kwargs.get("cmd_in", None): command += r""" <(cat {} | {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( pairs_path, kwargs["cmd_in"] ) elif pairs_path.endswith(".gz"): command += ( r""" <(bgzip -dc -@ {} {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( kwargs["nproc_in"], pairs_path ) ) elif pairs_path.endswith(".lz4"): command += r""" <(lz4c -dc {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( pairs_path ) else: command += r""" <(sed -n -e '\''/^[^#]/,$p'\'' {})""".format(pairs_path) command += "'" subprocess.check_call(command, shell=True, stdout=outstream) if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if __name__ == "__main__": header() pairtools-1.1.3/pairtools/cli/markasdup.py000066400000000000000000000037041474715105500206710ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import sys import click from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options from ..lib.dedup import mark_split_pair_as_dup UTIL_NAME = "pairtools_markasdup" @cli.command() @click.argument("pairsam_path", type=str, required=False) @click.option( "-o", "--output", type=str, default="", help="output .pairsam file." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, the output is printed into stdout.", ) @common_io_options def markasdup(pairsam_path, output, **kwargs): """Tag all pairs in the input file as duplicates. Change the type of all pairs inside a .pairs/.pairsam file to DD. If sam entries are present, change the pair type in the Yt SAM tag to 'Yt:Z:DD'. PAIRSAM_PATH : input .pairs/.pairsam file. If the path ends with .gz, the input is gzip-decompressed. By default, the input is read from stdin. """ markasdup_py(pairsam_path, output, **kwargs) def markasdup_py(pairsam_path, output, **kwargs): instream = fileio.auto_open( pairsam_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) header, body_stream = headerops.get_header(instream) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) outstream.writelines((l + "\n" for l in header)) for line in body_stream: cols = line.rstrip('\n').split(pairsam_format.PAIRSAM_SEP) mark_split_pair_as_dup(cols) outstream.write(pairsam_format.PAIRSAM_SEP.join(cols)) outstream.write("\n") if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if __name__ == "__main__": markasdup() pairtools-1.1.3/pairtools/cli/merge.py000066400000000000000000000166431474715105500200070ustar00rootroot00000000000000#!/usr/bin/env python import sys import glob import math import subprocess import click from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options UTIL_NAME = "pairtools_merge" @cli.command() @click.argument( "pairs_path", nargs=-1, type=str, ) @click.option( "-o", "--output", type=str, default="", help="output file." " If the path ends with .gz/.lz4, the output is compressed by bgzip/lz4c." " By default, the output is printed into stdout.", ) @click.option( "--max-nmerge", type=int, default=8, show_default=True, help="The maximal number of inputs merged at once. For more, store " "merged intermediates in temporary files.", ) @click.option( "--tmpdir", type=str, default="", help="Custom temporary folder for merged intermediates.", ) @click.option( "--memory", type=str, default="2G", show_default=True, help="The amount of memory used by default.", ) @click.option( "--compress-program", type=str, default="", show_default=True, help="A binary to compress temporary merged chunks. " "Must decompress input when the flag -d is provided. " "Suggested alternatives: lz4c, gzip, lzop, snzip. " "NOTE: fails silently if the command syntax is wrong. ", ) @click.option( "--nproc", type=int, default=8, help="Number of threads for merging.", show_default=True, ) @click.option( "--nproc-in", type=int, default=1, show_default=True, help="Number of processes used by the auto-guessed input decompressing command.", ) @click.option( "--nproc-out", type=int, default=8, show_default=True, help="Number of processes used by the auto-guessed output compressing command.", ) @click.option( "--cmd-in", type=str, default=None, help="A command to decompress the input. " "If provided, fully overrides the auto-guessed command. " "Does not work with stdin. " "Must read input from stdin and print output into stdout. " "EXAMPLE: pbgzip -dc -n 3", ) @click.option( "--cmd-out", type=str, default=None, help="A command to compress the output. " "If provided, fully overrides the auto-guessed command. " "Does not work with stdout. " "Must read input from stdin and print output into stdout. " "EXAMPLE: pbgzip -c -n 8", ) @click.option( "--keep-first-header/--no-keep-first-header", default=False, show_default=True, help="Keep the first header or merge the headers together. Default: merge headers.", ) @click.option( "--concatenate/--no-concatenate", default=False, show_default=True, help="Simple concatenate instead of merging sorted files.", ) # Using custom IO options def merge( pairs_path, output, max_nmerge, tmpdir, memory, compress_program, nproc, **kwargs ): """Merge .pairs/.pairsam files. By default, assumes that the files are sorted and maintains the sorting. Merge triu-flipped sorted pairs/pairsam files. If present, the @SQ records of the SAM header must be identical; the sorting order of these lines is taken from the first file in the list. The ID fields of the @PG records of the SAM header are modified with a numeric suffix to produce unique records. The other unique SAM and non-SAM header lines are copied into the output header. PAIRS_PATH : upper-triangular flipped sorted .pairs/.pairsam files to merge or a group/groups of .pairs/.pairsam files specified by a wildcard. For paths ending in .gz/.lz4, the files are decompressed by bgzip/lz4c. """ merge_py( pairs_path, output, max_nmerge, tmpdir, memory, compress_program, nproc, **kwargs, ) def merge_py( pairs_path, output, max_nmerge, tmpdir, memory, compress_program, nproc, **kwargs ): paths = sum([glob.glob(mask) for mask in pairs_path], []) if len(paths) == 0: raise ValueError(f"No input paths: {pairs_path}") outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) # if there is only one input, bypass merging and do not modify the header if len(paths) == 1: instream = fileio.auto_open( paths[0], mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) for line in instream: outstream.write(line) if outstream != sys.stdout: outstream.close() return headers = [] for path in paths: f = fileio.auto_open( path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) h, _ = headerops.get_header(f) headers.append(h) f.close() # Skip other headers if keep_first_header is True (False by default): if kwargs.get("keep_first_header", False): break if not headerops.all_same_columns(headers): raise ValueError("Input pairs cannot contain different columns") merged_header = headerops.merge_headers(headers) merged_header = headerops.append_new_pg(merged_header, ID=UTIL_NAME, PN=UTIL_NAME) outstream.writelines((l + "\n" for l in merged_header)) outstream.flush() # If concatenation requested instead of merging sorted input: if kwargs.get("concatenate", False): command = r""" /bin/bash -c 'export LC_COLLATE=C; export LANG=C; cat """ # Full merge that keeps the ordered input: else: command = r""" /bin/bash -c 'export LC_COLLATE=C; export LANG=C; sort -k {0},{0} -k {1},{1} -k {2},{2}n -k {3},{3}n -k {4},{4} --merge --field-separator=$'\''{5}'\'' {6} {7} {8} -S {9} {10} """.replace( "\n", " " ).format( pairsam_format.COL_C1 + 1, pairsam_format.COL_C2 + 1, pairsam_format.COL_P1 + 1, pairsam_format.COL_P2 + 1, pairsam_format.COL_PTYPE + 1, pairsam_format.PAIRSAM_SEP_ESCAPE, " --parallel={} ".format(nproc) if nproc > 1 else " ", " --batch-size={} ".format(max_nmerge) if max_nmerge else " ", " --temporary-directory={} ".format(tmpdir) if tmpdir else " ", memory, ( " --compress-program={} ".format(compress_program) if compress_program else " " ), ) for path in paths: if kwargs.get("cmd_in", None): command += r""" <(cat {} | {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( path, kwargs["cmd_in"] ) elif path.endswith(".gz"): command += ( r""" <(bgzip -dc -@ {} {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( kwargs["nproc_in"], path ) ) elif path.endswith(".lz4"): command += r""" <(lz4c -dc {} | sed -n -e '\''/^[^#]/,$p'\'')""".format( path ) else: command += r""" <(sed -n -e '\''/^[^#]/,$p'\'' {})""".format(path) command += "'" subprocess.check_call(command, shell=True, stdout=outstream) if outstream != sys.stdout: outstream.close() if __name__ == "__main__": merge() pairtools-1.1.3/pairtools/cli/parse.py000066400000000000000000000222211474715105500200070ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import click import sys from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options from ..lib.stats import PairCounter from ..lib.parse_pysam import AlignmentFilePairtoolized from ..lib.parse import streaming_classify UTIL_NAME = "pairtools_parse" @cli.command() @click.argument("sam_path", type=str, required=False) @click.option( "-c", "--chroms-path", type=str, required=True, help="Chromosome order used to flip interchromosomal mates: " "path to a chromosomes file (e.g. UCSC chrom.sizes or similar) whose " "first column lists scaffold names. Any scaffolds not listed will be " "ordered lexicographically following the names provided.", ) @click.option( "-o", "--output", type=str, default="", help="output file. " " If the path ends with .gz or .lz4, the output is bgzip-/lz4-compressed." "By default, the output is printed into stdout. ", ) @click.option( "--assembly", type=str, help="Name of genome assembly (e.g. hg19, mm10) to store in the pairs header.", ) @click.option( "--min-mapq", type=int, default=1, show_default=True, help="The minimal MAPQ score to consider a read as uniquely mapped", ) @click.option( "--max-molecule-size", type=int, default=750, show_default=True, help="The maximal size of a Hi-C molecule; used to rescue single ligations" "(from molecules with three alignments) and to rescue complex ligations." "The default is based on oriented P(s) at short ranges of multiple Hi-C." "Not used with walks-policy all.", ) @click.option( "--drop-readid", is_flag=True, help="If specified, do not add read ids to the output", ) @click.option( "--drop-seq", is_flag=True, help="If specified, remove sequences and PHREDs from the sam fields", ) @click.option( "--drop-sam", is_flag=True, help="If specified, do not add sams to the output" ) @click.option( "--add-pair-index", is_flag=True, help="If specified, each pair will have pair index in the molecule", ) @click.option( "--add-columns", type=click.STRING, default="", help="Report extra columns describing alignments " "Possible values (can take multiple values as a comma-separated " "list): a SAM tag (any pair of uppercase letters) or {}.".format( ", ".join(pairsam_format.EXTRA_COLUMNS) ), ) @click.option( "--output-parsed-alignments", type=str, default="", help="output file for all parsed alignments, including walks." " Useful for debugging and rnalysis of walks." " If file exists, it will be open in the append mode." " If the path ends with .gz or .lz4, the output is bgzip-/lz4-compressed." " By default, not used.", ) @click.option( "--output-stats", type=str, default="", help="output file for various statistics of pairs file. " " By default, statistics is not generated.", ) @click.option( "--report-alignment-end", type=click.Choice(["5", "3"]), default="5", help="specifies whether the 5' or 3' end of the alignment is reported as" " the position of the Hi-C read.", ) @click.option( "--max-inter-align-gap", type=int, default=20, show_default=True, help="read segments that are not covered by any alignment and" ' longer than the specified value are treated as "null" alignments.' " These null alignments convert otherwise linear alignments into walks," " and affect how they get reported as a Hi-C pair (see --walks-policy).", ) @click.option( "--walks-policy", type=click.Choice(["mask", "5any", "5unique", "3any", "3unique", "all"]), default="5unique", help="the policy for reporting unrescuable walks (reads containing more" " than one alignment on one or both sides, that can not be explained by a" " single ligation between two mappable DNA fragments)." ' "mask" - mask walks (chrom="!", pos=0, strand="-"); ' ' "5any" - report the 5\'-most alignment on each side;' ' "5unique" - report the 5\'-most unique alignment on each side, if present;' ' "3any" - report the 3\'-most alignment on each side;' ' "3unique" - report the 3\'-most unique alignment on each side, if present;' ' "all" - report all available unique alignments on each side.', show_default=True, ) @click.option( "--readid-transform", type=str, default=None, help="A Python expression to modify read IDs. Useful when read IDs differ " "between the two reads of a pair. Must be a valid Python expression that " "uses variables called readID and/or i (the 0-based index of the read pair " "in the bam file) and returns a new value, e.g. \"readID[:-2]+'_'+str(i)\". " "Make sure that transformed readIDs remain unique!", show_default=True, ) @click.option( "--flip/--no-flip", is_flag=True, default=True, help="If specified, do not flip pairs in genomic order and instead preserve " "the order in which they were sequenced.", ) @common_io_options def parse( sam_path, chroms_path, output, output_parsed_alignments, output_stats, **kwargs ): """Find ligation pairs in .sam data, make .pairs. SAM_PATH : an input .sam/.bam file with paired-end sequence alignments of Hi-C molecules. If the path ends with .bam, the input is decompressed from bam with samtools. By default, the input is read from stdin. """ parse_py( sam_path, chroms_path, output, output_parsed_alignments, output_stats, **kwargs ) def parse_py( sam_path, chroms_path, output, output_parsed_alignments, output_stats, **kwargs ): ### Set up input stream if sam_path: # open input sam file with pysam input_sam = AlignmentFilePairtoolized( sam_path, "r", threads=kwargs.get("nproc_in") ) else: # read from stdin input_sam = AlignmentFilePairtoolized("-", "r", threads=kwargs.get("nproc_in")) ### Set up output streams outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) out_alignments_stream, out_stats_stream = None, None if output_parsed_alignments: out_alignments_stream = fileio.auto_open( output_parsed_alignments, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output_stats: out_stats_stream = fileio.auto_open( output_stats, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if out_alignments_stream: out_alignments_stream.write( "readID\tside\tchrom\tpos\tstrand\tmapq\tcigar\tdist_5_lo\tdist_5_hi\tmatched_bp\n" ) # generate empty PairCounter if stats output is requested: out_stat = PairCounter() if output_stats else None ### Set up output parameters add_columns = kwargs.get("add_columns", []) add_columns = [col for col in add_columns.split(",") if col] for col in add_columns: if not ( (col in pairsam_format.EXTRA_COLUMNS) or (len(col) == 2 and col.isupper()) ): raise Exception("{} is not a valid extra column".format(col)) columns = pairsam_format.COLUMNS + ( [c + side for c in add_columns for side in ["1", "2"]] ) if kwargs.get("drop_sam", True): columns.pop(columns.index("sam1")) columns.pop(columns.index("sam2")) if not kwargs.get("add_pair_index", False): columns.pop(columns.index("walk_pair_index")) columns.pop(columns.index("walk_pair_type")) ### Parse header samheader = input_sam.header if not samheader: raise ValueError( "The input sam is missing a header! If reading a bam file, please use `samtools view -h` to include the header." ) ### Parse chromosome files present in the input sam_chromsizes = headerops.get_chromsizes_from_pysam_header(samheader) chromosomes = headerops.get_chrom_order(chroms_path, list(sam_chromsizes.keys())) ### Write new header to the pairsam file header = headerops.make_standard_pairsheader( assembly=kwargs.get("assembly", ""), chromsizes=[(chrom, sam_chromsizes[chrom]) for chrom in chromosomes], columns=columns, shape="whole matrix" if not kwargs["flip"] else "upper triangle", ) header = headerops.insert_samheader_pysam(header, samheader) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) outstream.writelines((l + "\n" for l in header)) ### Parse input and write to the outputs streaming_classify( input_sam, outstream, chromosomes, out_alignments_stream, out_stat, **kwargs ) # save statistics to a file if it was requested: if out_stat: out_stat.save(out_stats_stream) if outstream != sys.stdout: outstream.close() # close optional output streams if needed: if out_alignments_stream and out_alignments_stream != sys.stdout: out_alignments_stream.close() if out_stats_stream and out_stats_stream != sys.stdout: out_stats_stream.close() if __name__ == "__main__": parse() pairtools-1.1.3/pairtools/cli/parse2.py000066400000000000000000000303251474715105500200750ustar00rootroot00000000000000# !/usr/bin/env python # -*- coding: utf-8 -*- import click import sys from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options from ..lib.stats import PairCounter from ..lib.parse_pysam import AlignmentFilePairtoolized from ..lib.parse import streaming_classify UTIL_NAME = "pairtools_parse2" @cli.command() @click.argument("sam_path", type=str, required=False) # Parsing options: @click.option( "-c", "--chroms-path", type=str, required=True, help="Chromosome order used to flip interchromosomal mates: " "path to a chromosomes file (e.g. UCSC chrom.sizes or similar) whose " "first column lists scaffold names. Any scaffolds not listed will be " "ordered lexicographically following the names provided.", ) @click.option( "-o", "--output", type=str, default="", help="output file with pairs. " " If the path ends with .gz or .lz4, the output is bgzip-/lz4-compressed." "By default, the output is printed into stdout. ", ) @click.option( "--report-position", type=click.Choice(["junction", "read", "walk", "outer"]), default="outer", help="""Reported position of alignments in pairs of complex walks (pos columns). Each alignment in .bam/.sam Hi-C-like data has two ends, and you can report one or another depending of the position of alignment on a read or in a pair. "junction" - inner ends of sequential alignments in each pair, aka ligation junctions, "read" - 5'-end of alignments relative to R1 or R2 read coordinate system (as in traditional Hi-C), "walk" - 5'-end of alignments relative to the whole walk coordinate system, "outer" - outer ends of sequential alignments in each pair (parse2 default). """, ) @click.option( "--report-orientation", type=click.Choice(["pair", "read", "walk", "junction"]), default="pair", help="""Reported orientataion of pairs in complex walk (strand columns). Each alignment in .bam/.sam Hi-C-like data has orientation, and you can report it relative to the read, pair or whole walk coordinate system. "pair" - orientation as if each pair in complex walk was sequenced independently from the outer ends or molecule (as in traditional Hi-C, also complex walks default), "read" - orientation defined by the read (R1 or R2 read coordinate system), "walk" - orientation defined by the walk coordinate system, "junction" - reversed "pair" orientation, as if pair was sequenced in both directions starting from the junction""", ) @click.option( "--assembly", type=str, help="Name of genome assembly (e.g. hg19, mm10) to store in the pairs header.", ) @click.option( "--min-mapq", type=int, default=1, show_default=True, help="The minimal MAPQ score to consider a read as uniquely mapped.", ) @click.option( "--max-inter-align-gap", type=int, default=20, show_default=True, help="Read segments that are not covered by any alignment and" ' longer than the specified value are treated as "null" alignments.' " These null alignments convert otherwise linear alignments into walks," " and affect how they get reported as a Hi-C pair.", ) @click.option( "--max-insert-size", type=int, default=500, show_default=True, help="When searching for overlapping ends of left and right read (R1 and R2), this sets the minimal distance " "when two alignments on the same strand and chromosome are considered part of the same fragment (and thus reported as the same alignment " "and not a pair). For traditional Hi-C with long restriction fragments and shorter molecules after ligation+sonication, this " "can be the expected molecule size. For complex walks with short restriction fragments, this can be the expected restriction fragment " "size. Note that unsequenced insert is *terra incognita* and might contain unsequenced DNA (including ligations) in it. " "This parameter is ignored in --single-end mode. ", ) @click.option( "--dedup-max-mismatch", type=int, default=3, show_default=True, help="Allowed mismatch between intramolecular alignments to detect readthrough duplicates. " "Pairs with both sides mapped within this distance (bp) from each " "other are considered duplicates. ", ) @click.option( "--single-end", is_flag=True, help="If specified, the input is single-end. " "Never use this for paired-end data, because R1 read will be omitted. " "If single-end data is provided, but parameter is unset, the pairs will be " "generated, but may contain artificial UN pairs. ", ) @click.option( "--expand/--no-expand", is_flag=True, help="If specified, perform combinatorial expansion on the pairs. " "Combinatorial expansion is a way to increase the number of contacts in you data, assuming that all DNA fragments in the same molecule (read) are in contact. " "Expanded pairs have modified pair type, 'E{separation}_{pair type}'", ) @click.option( "--max-expansion-depth", type=int, default=None, show_default=True, help="Works in combination with --expand. " "Maximum number of segments separating pair. By default, expanding all possible pairs." "Setting the number will limit the expansion depth and enforce contacts from the same " "side of the read. ", ) @click.option( "--add-pair-index", is_flag=True, help="If specified, parse2 will report pair index in the walk as additional columns (R1, R2, R1&R2 or R1-R2). " "See documentation: https://pairtools.readthedocs.io/en/latest/parsing.html#rescuing-complex-walks " "For combinatorial expanded pairs, two numbers will be reported: " "original pair index of the left and right segments. ", ) @click.option( "--flip/--no-flip", is_flag=True, default=False, help="If specified, flip pairs in genomic order and instead preserve " "the order in which they were sequenced. Note that no flip is recommended for analysis of walks because it will " "override the order of alignments in pairs. Flip is required for appropriate deduplication of sorted pairs. " "Flip is not required for cooler cload, which runs flipping internally. ", ) @click.option( "--add-columns", type=click.STRING, default="", help="Report extra columns describing alignments " "Possible values (can take multiple values as a comma-separated " "list): a SAM tag (any pair of uppercase letters) or {}.".format( ", ".join(pairsam_format.EXTRA_COLUMNS) ), ) @click.option( "--drop-readid/--keep-readid", is_flag=True, default=False, help="If specified, do not add read ids to the output. By default, keep read ids. Useful for long walks analysis. ", ) @click.option( "--readid-transform", type=str, default=None, help="A Python expression to modify read IDs. Useful when read IDs differ " "between the two reads of a pair. Must be a valid Python expression that " "uses variables called readID and/or i (the 0-based index of the read pair " "in the bam file) and returns a new value, e.g. \"readID[:-2]+'_'+str(i)\". " "Make sure that transformed readIDs remain unique!", show_default=True, ) @click.option( "--drop-seq/--keep-seq", is_flag=True, default=False, help="Remove sequences and PHREDs from the sam fields by default. Kept otherwise. ", ) @click.option( "--drop-sam/--keep-sam", is_flag=True, default=False, help="Do not add sams to the output by default. Kept otherwise. ", ) @click.option( "--output-parsed-alignments", type=str, default="", help="output file with all parsed alignments (one alignment per line)." " Useful for debugging and analysis of walks." " If file exists, it will be open in the append mode." " If the path ends with .gz or .lz4, the output is bgzip-/lz4-compressed." " By default, not used.", ) @click.option( "--output-stats", type=str, default="", help="output file for various statistics of pairs file. " " By default, statistics is not generated.", ) @common_io_options def parse2( sam_path, chroms_path, output, output_parsed_alignments, output_stats, **kwargs ): """Extracts pairs from .sam/.bam data with complex walks, make .pairs. SAM_PATH : an input .sam/.bam file with paired-end or single-end sequence alignments of Hi-C (or Hi-C-like) molecules. If the path ends with .bam, the input is decompressed from bam with samtools. By default, the input is read from stdin. """ parse2_py( sam_path, chroms_path, output, output_parsed_alignments, output_stats, **kwargs ) def parse2_py( sam_path, chroms_path, output, output_parsed_alignments, output_stats, **kwargs ): ### Set up input stream if sam_path: # open input sam file with pysam input_sam = AlignmentFilePairtoolized( sam_path, "r", threads=kwargs.get("nproc_in") ) else: # read from stdin input_sam = AlignmentFilePairtoolized("-", "r", threads=kwargs.get("nproc_in")) ### Set up output streams outstream = ( fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output else sys.stdout ) out_alignments_stream = ( fileio.auto_open( output_parsed_alignments, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output_parsed_alignments else None ) out_stats_stream = ( fileio.auto_open( output_stats, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output_stats else None ) if out_alignments_stream: out_alignments_stream.write( "readID\tside\tchrom\tpos\tstrand\tmapq\tcigar\tdist_5_lo\tdist_5_hi\tmatched_bp\n" ) # generate empty PairCounter if stats output is requested: out_stat = PairCounter() if output_stats else None ### Set up output parameters add_columns = kwargs.get("add_columns", []) add_columns = [col for col in add_columns.split(",") if col] for col in add_columns: if not ( (col in pairsam_format.EXTRA_COLUMNS) or (len(col) == 2 and col.isupper()) ): raise Exception("{} is not a valid extra column".format(col)) columns = pairsam_format.COLUMNS + ( [c + side for c in add_columns for side in ["1", "2"]] ) if kwargs.get("drop_sam", True): columns.pop(columns.index("sam1")) columns.pop(columns.index("sam2")) if not kwargs.get("add_pair_index", False): columns.pop(columns.index("walk_pair_index")) columns.pop(columns.index("walk_pair_type")) ### Parse header samheader = input_sam.header if not samheader: raise ValueError( "The input sam is missing a header! If reading a bam file, please use `samtools view -h` to include the header." ) ### Parse chromosome files present in the input sam_chromsizes = headerops.get_chromsizes_from_pysam_header(samheader) chromosomes = headerops.get_chrom_order(chroms_path, list(sam_chromsizes.keys())) ### Write new header to the pairsam file header = headerops.make_standard_pairsheader( assembly=kwargs.get("assembly", ""), chromsizes=[(chrom, sam_chromsizes[chrom]) for chrom in chromosomes], columns=columns, shape="whole matrix" if not kwargs["flip"] else "upper triangle", ) header = headerops.insert_samheader_pysam(header, samheader) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) outstream.writelines((l + "\n" for l in header)) ### Parse input and write to the outputs streaming_classify( input_sam, outstream, chromosomes, out_alignments_stream, out_stat, parse2=True, **kwargs ) # save statistics to a file if it was requested: if out_stat: out_stat.save(out_stats_stream) if outstream != sys.stdout: outstream.close() if out_alignments_stream: out_alignments_stream.close() if out_stats_stream: out_stats_stream.close() if __name__ == "__main__": parse2() pairtools-1.1.3/pairtools/cli/phase.py000066400000000000000000000246121474715105500200030ustar00rootroot00000000000000import sys import click import re, fnmatch from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options from ..lib.phase import phase_side_XB, phase_side_XA UTIL_NAME = "pairtools_phase" @cli.command() @click.argument("pairs_path", type=str, required=False) @click.option( "-o", "--output", type=str, default="", help="output file." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, the output is printed into stdout.", ) @click.option( "--phase-suffixes", nargs=2, # type=click.Tuple([str, str]), help="Phase suffixes (of the chrom names), always a pair.", ) @click.option( "--clean-output", is_flag=True, help="Drop all columns besides the standard ones and phase1/2", ) @click.option( "--tag-mode", type=click.Choice(["XB", "XA"]), default="XB", help="Specifies the mode of bwa reporting." " XA will parse 'XA', the input should be generated with: --add-columns XA,NM,AS,XS --min-mapq 0" " XB will parse 'XB' tag, the input should be generated with: --add-columns XB,AS,XS --min-mapq 0 " " Note that XB tag is added by running bwa with -u tag, present in github version. " " Both modes report similar results: XB reports 0.002% contacts more for phased data, " " while XA can report ~1-2% more unphased contacts because its definition multiple mappers is more premissive. ", ) @click.option( "--report-scores/--no-report-scores", is_flag=True, default=False, help="Report scores of optional, suboptimal and second suboptimal alignments. " "NM (edit distance) with --tag-mode XA and AS (alfn score) with --tag-mode XB ", ) @common_io_options def phase( pairs_path, output, phase_suffixes, clean_output, tag_mode, report_scores, **kwargs ): """Phase pairs mapped to a diploid genome. Diploid genome is the genome with two set of the chromosome variants, where each chromosome has one of two suffixes (phase-suffixes) corresponding to the genome version (phase-suffixes). By default, phasing adds two additional columns with phase 0, 1 or "." (unpahsed). Phasing is based on detection of chromosome origin of each mapped fragment. Three scores are considered: best alignment score (S1), suboptimal alignment (S2) and second suboptimal alignment (S3) scores. Each fragment can be: 1) uniquely mapped and phased (S1>S2>S3, first alignment is the best hit), 2) uniquely mapped but unphased (S1=S2>S3, cannot distinguish between chromosome variants), 3) multiply mapped (S1=S2=S3) or unmapped. PAIRS_PATH : input .pairs/.pairsam file. If the path ends with .gz or .lz4, the input is decompressed by bgzip/lz4c. By default, the input is read from stdin. """ phase_py( pairs_path, output, phase_suffixes, clean_output, tag_mode, report_scores, **kwargs ) if __name__ == "__main__": phase() def phase_py( pairs_path, output, phase_suffixes, clean_output, tag_mode, report_scores, **kwargs ): instream = ( fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) if pairs_path else sys.stdin ) outstream = ( fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output else sys.stdout ) header, body_stream = headerops.get_header(instream) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) old_column_names = headerops.extract_column_names(header) idx_phase1 = len(old_column_names) idx_phase2 = len(old_column_names) + 1 if clean_output: new_column_names = [ col for col in old_column_names if col in pairsam_format.COLUMNS ] new_column_idxs = [ i for i, col in enumerate(old_column_names) if col in pairsam_format.COLUMNS ] new_column_idxs += [idx_phase1, idx_phase2] else: new_column_names = list(old_column_names) new_column_names.append("phase1") new_column_names.append("phase2") if report_scores: if tag_mode == "XB": new_column_names.append("S1_1") new_column_names.append("S1_2") new_column_names.append("S2_1") new_column_names.append("S2_2") new_column_names.append("S3_1") new_column_names.append("S3_2") if clean_output: new_column_idxs += [(idx_phase2 + i + 1) for i in range(6)] elif tag_mode == "XA": new_column_names.append("M1_1") new_column_names.append("M1_2") new_column_names.append("M2_1") new_column_names.append("M2_2") new_column_names.append("M3_1") new_column_names.append("M3_2") if clean_output: new_column_idxs += [(idx_phase2 + i + 1) for i in range(6)] header = headerops._update_header_entry( header, "columns", " ".join(new_column_names) ) if tag_mode == "XB": if ( ("XB1" not in old_column_names) or ("XB2" not in old_column_names) or ("AS1" not in old_column_names) or ("AS2" not in old_column_names) or ("XS1" not in old_column_names) or ("XS2" not in old_column_names) ): raise ValueError( "The input pairs file must be parsed with the flag --add-columns XB,AS,XS --min-mapq 0" ) COL_XB1 = old_column_names.index("XB1") COL_XB2 = old_column_names.index("XB2") COL_AS1 = old_column_names.index("AS1") COL_AS2 = old_column_names.index("AS2") COL_XS1 = old_column_names.index("XS1") COL_XS2 = old_column_names.index("XS2") elif tag_mode == "XA": if ( ("XA1" not in old_column_names) or ("XA2" not in old_column_names) or ("NM1" not in old_column_names) or ("NM2" not in old_column_names) or ("AS1" not in old_column_names) or ("AS2" not in old_column_names) or ("XS1" not in old_column_names) or ("XS2" not in old_column_names) ): raise ValueError( "The input pairs file must be parsed with the flag --add-columns XA,NM,AS,XS --min-mapq 0" ) COL_XA1 = old_column_names.index("XA1") COL_XA2 = old_column_names.index("XA2") COL_NM1 = old_column_names.index("NM1") COL_NM2 = old_column_names.index("NM2") COL_AS1 = old_column_names.index("AS1") COL_AS2 = old_column_names.index("AS2") COL_XS1 = old_column_names.index("XS1") COL_XS2 = old_column_names.index("XS2") outstream.writelines((l + "\n" for l in header)) for line in body_stream: cols = line.rstrip('\n').split(pairsam_format.PAIRSAM_SEP) cols.append("!") cols.append("!") if report_scores: for _ in range(6): cols.append("-1") pair_type = cols[pairsam_format.COL_PTYPE] if cols[pairsam_format.COL_C1] != pairsam_format.UNMAPPED_CHROM: if tag_mode == "XB": phase1, chrom_base1, S1_1, S2_1, S3_1 = phase_side_XB( cols[pairsam_format.COL_C1], cols[COL_XB1], int(cols[COL_AS1]), int(cols[COL_XS1]), phase_suffixes, ) elif tag_mode == "XA": phase1, chrom_base1, S1_1, S2_1, S3_1 = phase_side_XA( cols[pairsam_format.COL_C1], cols[COL_XA1], int(cols[COL_AS1]), int(cols[COL_XS1]), int(cols[COL_NM1]), phase_suffixes, ) if not report_scores: cols[idx_phase1] = phase1 else: ( cols[idx_phase1], cols[idx_phase1 + 2], cols[idx_phase1 + 4], cols[idx_phase1 + 6], ) = (phase1, str(S1_1), str(S2_1), str(S3_1)) cols[pairsam_format.COL_C1] = chrom_base1 if chrom_base1 == "!": cols[pairsam_format.COL_C1] = pairsam_format.UNMAPPED_CHROM cols[pairsam_format.COL_P1] = str(pairsam_format.UNMAPPED_POS) cols[pairsam_format.COL_S1] = pairsam_format.UNMAPPED_STRAND pair_type = "M" + pair_type[1] if cols[pairsam_format.COL_C2] != pairsam_format.UNMAPPED_CHROM: if tag_mode == "XB": phase2, chrom_base2, S1_2, S2_2, S3_2 = phase_side_XB( cols[pairsam_format.COL_C2], cols[COL_XB2], int(cols[COL_AS2]), int(cols[COL_XS2]), phase_suffixes, ) elif tag_mode == "XA": phase2, chrom_base2, S1_2, S2_2, S3_2 = phase_side_XA( cols[pairsam_format.COL_C2], cols[COL_XA2], int(cols[COL_AS2]), int(cols[COL_XS2]), int(cols[COL_NM2]), phase_suffixes, ) if not report_scores: cols[idx_phase2] = phase2 else: ( cols[idx_phase2], cols[idx_phase2 + 2], cols[idx_phase2 + 4], cols[idx_phase2 + 6], ) = (phase2, str(S1_2), str(S2_2), str(S3_2)) cols[pairsam_format.COL_C2] = chrom_base2 if chrom_base2 == "!": cols[pairsam_format.COL_C2] = pairsam_format.UNMAPPED_CHROM cols[pairsam_format.COL_P2] = str(pairsam_format.UNMAPPED_POS) cols[pairsam_format.COL_S2] = pairsam_format.UNMAPPED_STRAND pair_type = pair_type[0] + "M" cols[pairsam_format.COL_PTYPE] = pair_type if clean_output: cols = [cols[i] for i in new_column_idxs] outstream.write(pairsam_format.PAIRSAM_SEP.join(cols)) outstream.write("\n") if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if __name__ == "__main__": phase() pairtools-1.1.3/pairtools/cli/restrict.py000066400000000000000000000064731474715105500205470ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import sys import click import warnings import numpy as np from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options from ..lib.restrict import find_rfrag UTIL_NAME = "pairtools_restrict" @cli.command() @click.argument("pairs_path", type=str, required=False) @click.option( "-f", "--frags", type=str, required=True, help="a tab-separated BED file with the positions of restriction fragments " "(chrom, start, end). Can be generated using cooler digest.", ) @click.option( "-o", "--output", type=str, default="", help="output .pairs/.pairsam file." " If the path ends with .gz/.lz4, the output is compressed by bgzip/lz4c." " By default, the output is printed into stdout.", ) @common_io_options def restrict(pairs_path, frags, output, **kwargs): """Assign restriction fragments to pairs. Identify the restriction fragments that got ligated into a Hi-C molecule. Note: rfrags are 0-indexed PAIRS_PATH : input .pairs/.pairsam file. If the path ends with .gz/.lz4, the input is decompressed by bgzip/lz4c. By default, the input is read from stdin. """ restrict_py(pairs_path, frags, output, **kwargs) def restrict_py(pairs_path, frags, output, **kwargs): instream = fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) header, body_stream = headerops.get_header(instream) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) header = headerops.append_columns( header, [ "rfrag1", "rfrag_start1", "rfrag_end1", "rfrag2", "rfrag_start2", "rfrag_end2", ], ) outstream.writelines((l + "\n" for l in header)) rfrags = np.genfromtxt( frags, delimiter="\t", comments="#", dtype=None, encoding="ascii", names=["chrom", "start", "end"], ) rfrags.sort(order=["chrom", "start", "end"]) chrom_borders = np.r_[ 0, 1 + np.where(rfrags["chrom"][:-1] != rfrags["chrom"][1:])[0], rfrags.shape[0] ] rfrags = { rfrags["chrom"][i]: np.concatenate([[0], rfrags["end"][i:j] + 1]) for i, j in zip(chrom_borders[:-1], chrom_borders[1:]) } for line in body_stream: cols = line.rstrip('\n').split(pairsam_format.PAIRSAM_SEP) chrom1, pos1 = cols[pairsam_format.COL_C1], int(cols[pairsam_format.COL_P1]) rfrag_idx1, rfrag_start1, rfrag_end1 = find_rfrag(rfrags, chrom1, pos1) chrom2, pos2 = cols[pairsam_format.COL_C2], int(cols[pairsam_format.COL_P2]) rfrag_idx2, rfrag_start2, rfrag_end2 = find_rfrag(rfrags, chrom2, pos2) cols += [str(rfrag_idx1), str(rfrag_start1), str(rfrag_end1)] cols += [str(rfrag_idx2), str(rfrag_start2), str(rfrag_end2)] outstream.write(pairsam_format.PAIRSAM_SEP.join(cols)) outstream.write("\n") if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if __name__ == "__main__": restrict() pairtools-1.1.3/pairtools/cli/sample.py000066400000000000000000000036141474715105500201630ustar00rootroot00000000000000import sys import click import random from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options UTIL_NAME = "pairtools_sample" @cli.command() @click.argument("fraction", type=float, required=True) @click.argument("pairs_path", type=str, required=False) @click.option( "-o", "--output", type=str, default="", help="output file." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, the output is printed into stdout.", ) @click.option( "-s", "--seed", type=int, default=None, help="the seed of the random number generator.", ) @common_io_options def sample(fraction, pairs_path, output, seed, **kwargs): """Select a random subset of pairs in a pairs file. FRACTION: the fraction of the randomly selected pairs subset PAIRS_PATH : input .pairs/.pairsam file. If the path ends with .gz or .lz4, the input is decompressed by bgzip/lz4c. By default, the input is read from stdin. """ sample_py(fraction, pairs_path, output, seed, **kwargs) def sample_py(fraction, pairs_path, output, seed, **kwargs): instream = fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) header, body_stream = headerops.get_header(instream) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) outstream.writelines((l + "\n" for l in header)) random.seed(seed) for line in body_stream: if random.random() <= fraction: outstream.write(line) if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if __name__ == "__main__": sample() pairtools-1.1.3/pairtools/cli/scaling.py000066400000000000000000000056661474715105500203330ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import io import sys import click import pandas as pd from ..lib import fileio from . import cli, common_io_options from ..lib.scaling import compute_scaling UTIL_NAME = "pairtools_scaling" @cli.command() @click.argument("input_path", type=str, nargs=-1, required=False) @click.option( "-o", "--output", type=str, default="", help="output .tsv file with summary." ) @click.option( "--view", "--regions", help="Path to a BED file which defines which regions (viewframe) of the chromosomes to use. " "By default, this is parsed from .pairs header. ", type=str, required=False, default=None, ) @click.option( "--chunksize", type=int, default=100_000, show_default=True, required=False, help="Number of pairs in each chunk. Reduce for lower memory footprint.", ) @click.option( "--dist-range", type=click.Tuple([int, int]), default=(1, 1_000_000_000), show_default=True, required=False, help="Distance range. ", ) @click.option( "--n-dist-bins-decade", type=int, default=8, show_default=True, required=False, help="Number of bins to split the distance range in log10-space, specified per a factor of 10 difference.", ) @common_io_options def scaling(input_path, output, view, chunksize, dist_range, n_dist_bins_decade, **kwargs): """Calculate pairs scalings. INPUT_PATH : by default, a .pairs/.pairsam file to calculate statistics. If not provided, the input is read from stdin. The files with paths ending with .gz/.lz4 are decompressed by bgzip/lz4c. Output is .tsv file with scaling stats (both cis scalings and trans levels). """ scaling_py(input_path, output, view, chunksize, dist_range, n_dist_bins_decade, **kwargs) def scaling_py(input_path, output, view, chunksize, dist_range, n_dist_bins_decade, **kwargs): if len(input_path) == 0: raise ValueError(f"No input paths: {input_path}") instream = fileio.auto_open( input_path[0], mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if view is not None: view = pd.read_table(view) # Pass the header to the instream so that it can parse the header automatically cis_scalings, trans_levels = compute_scaling( instream, regions=view, chromsizes=None, dist_range=dist_range, n_dist_bins_decade=n_dist_bins_decade, chunksize=chunksize, ) summary_stats = pd.concat([cis_scalings, trans_levels]) # save statistics to the file summary_stats.to_csv(outstream, sep="\t", index=False) if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if __name__ == "__main__": scaling() pairtools-1.1.3/pairtools/cli/select.py000066400000000000000000000176031474715105500201640ustar00rootroot00000000000000import sys import click import re, fnmatch import warnings from ..lib import fileio, pairsam_format, headerops from ..lib.select import evaluate_stream from . import cli, common_io_options UTIL_NAME = "pairtools_select" @cli.command() @click.argument("condition", type=str) @click.argument("pairs_path", type=str, required=False) @click.option( "-o", "--output", type=str, default="", help="output file." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, the output is printed into stdout.", ) @click.option( "--output-rest", type=str, default="", help="output file for pairs of other types. " " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, such pairs are dropped.", ) # Deprecated option to be removed in the future: # @click.option( # "--send-comments-to", # type=click.Choice(['selected', 'rest', 'both', 'none']), # default="both", # help="Which of the outputs should receive header and comment lines", # show_default=True) @click.option( "--chrom-subset", type=str, default=None, help="A path to a chromosomes file (tab-separated, 1st column contains " "chromosome names) containing a chromosome subset of interest. " "If provided, additionally filter pairs with both sides originating from " "the provided subset of chromosomes. This operation modifies the #chromosomes: " "and #chromsize: header fields accordingly.", ) @click.option( "--startup-code", type=str, default=None, help="An auxiliary code to execute before filtering. " "Use to define functions that can be evaluated in the CONDITION statement", ) @click.option( "-t", "--type-cast", type=(str, str), default=(), multiple=True, help="Cast a given column to a given type. By default, only pos and mapq " "are cast to int, other columns are kept as str. Provide as " "-t , e.g. -t read_len1 int. Multiple entries are allowed.", ) @click.option( "--remove-columns", "-r", help=f"Comma-separated list of columns to be removed, e.g.: {','.join(pairsam_format.COLUMNS)}", type=str, default="", required=False, ) @common_io_options def select( condition, pairs_path, output, output_rest, # send_comments_to, chrom_subset, startup_code, type_cast, remove_columns, **kwargs, ): """Select pairs according to some condition. CONDITION : A Python expression; if it returns True, select the read pair. Any column declared in the #columns line of the pairs header can be accessed by its name. If the header lacks the #columns line, the columns are assumed to follow the .pairs/.pairsam standard (readID, chrom1, chrom2, pos1, pos2, strand1, strand2, pair_type). Finally, CONDITION has access to COLS list which contains the string values of columns. In Bash, quote CONDITION with single quotes, and use double quotes for string variables inside CONDITION. PAIRS_PATH : input .pairs/.pairsam file. If the path ends with .gz or .lz4, the input is decompressed by bgzip/lz4c. By default, the input is read from stdin. The following functions can be used in CONDITION besides the standard Python functions: - csv_match(x, csv) - True if variable x is contained in a list of comma-separated values, e.g. csv_match(chrom1, 'chr1,chr2') - wildcard_match(x, wildcard) - True if variable x matches a wildcard, e.g. wildcard_match(pair_type, 'C*') - regex_match(x, regex) - True if variable x matches a Python-flavor regex, e.g. regex_match(chrom1, 'chr\d') \b Examples: pairtools select '(pair_type=="UU") or (pair_type=="UR") or (pair_type=="RU")' pairtools select 'chrom1==chrom2' pairtools select 'COLS[1]==COLS[3]' pairtools select '(chrom1==chrom2) and (abs(pos1 - pos2) < 1e6)' pairtools select '(chrom1=="!") and (chrom2!="!")' pairtools select 'regex_match(chrom1, "chr\d+") and regex_match(chrom2, "chr\d+")' pairtools select 'True' --chrom-subset mm9.reduced.chromsizes """ select_py( condition, pairs_path, output, output_rest, # send_comments_to, chrom_subset, startup_code, type_cast, remove_columns, **kwargs, ) def select_py( condition, pairs_path, output, output_rest, # send_comments_to, chrom_subset, startup_code, type_cast, remove_columns, **kwargs, ): instream = fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) # Optional output created only if requested: outstream_rest = None if output_rest: outstream_rest = fileio.auto_open( output_rest, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) # Parse the input stream: header, body_stream = headerops.get_header(instream) # Modify the header: header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) # Filter out unwanted columns: if remove_columns: input_columns = headerops.extract_column_names(header) remove_columns = remove_columns.split(",") for col in remove_columns: if col in pairsam_format.COLUMNS_PAIRS: warnings.warn( f"Removing required {col} column for .pairs format. Output is not .pairs anymore" ) elif col in pairsam_format.COLUMNS_PAIRSAM: warnings.warn( f"Removing required {col} column for .pairsam format. Output is not .pairsam anymore" ) updated_columns = [x for x in input_columns if x not in remove_columns] if len(updated_columns) == len(input_columns): warnings.warn( f"Some column(s) {','.join(remove_columns)} not in the file, the operation has no effect" ) else: header = headerops.set_columns(header, updated_columns) # Update the chromosomes: new_chroms = None if chrom_subset is not None: new_chroms = [l.strip().split("\t")[0] for l in open(chrom_subset, "r")] if new_chroms is not None: header = headerops.subset_chroms_in_pairsheader(header, new_chroms) outstream.writelines((l + "\n" for l in header)) if output_rest: outstream_rest.writelines((l + "\n" for l in header)) column_names = headerops.extract_column_names(header) if len(column_names) == 0: column_names = pairsam_format.COLUMNS # Columns filtration rule: if remove_columns: column_scheme = [input_columns.index(COL) for COL in updated_columns] # Format the condition: condition = condition.strip() if new_chroms is not None: condition = ( f"({condition}) and (chrom1 in {new_chroms}) and (chrom2 in {new_chroms})" ) for filter_passed, line in evaluate_stream( body_stream, condition, column_names, type_cast, startup_code ): COLS = line.rstrip('\n').split(pairsam_format.PAIRSAM_SEP) if remove_columns: COLS = [ COLS[idx] for idx in column_scheme ] # re-order the columns according to the scheme: line = pairsam_format.PAIRSAM_SEP.join(COLS) + "\n" # form the line if filter_passed: outstream.write(line) elif outstream_rest: outstream_rest.write(line) if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if output_rest and outstream_rest != sys.stdout: outstream_rest.close() if __name__ == "__main__": select() pairtools-1.1.3/pairtools/cli/sort.py000066400000000000000000000136721474715105500176760ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import io import sys import click import subprocess import shutil import warnings from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options UTIL_NAME = "pairtools_sort" @cli.command() @click.argument("pairs_path", type=str, required=False) @click.option( "-o", "--output", type=str, default="", help="output pairs file." " If the path ends with .gz or .lz4, the output is compressed by bgzip " "or lz4, correspondingly. By default, the output is printed into stdout.", ) @click.option( "--c1", type=str, default=pairsam_format.COLUMNS_PAIRS[1], help=f"Chrom 1 column; default {pairsam_format.COLUMNS_PAIRS[1]}" "[input format option]", ) @click.option( "--c2", type=str, default=pairsam_format.COLUMNS_PAIRS[3], help=f"Chrom 2 column; default {pairsam_format.COLUMNS_PAIRS[3]}" "[input format option]", ) @click.option( "--p1", type=str, default=pairsam_format.COLUMNS_PAIRS[2], help=f"Position 1 column; default {pairsam_format.COLUMNS_PAIRS[2]}" "[input format option]", ) @click.option( "--p2", type=str, default=pairsam_format.COLUMNS_PAIRS[4], help=f"Position 2 column; default {pairsam_format.COLUMNS_PAIRS[4]}" "[input format option]", ) @click.option( "--pt", type=str, default=pairsam_format.COLUMNS_PAIRS[7], help=f"Pair type column; default {pairsam_format.COLUMNS_PAIRS[7]}" "[input format option]", ) @click.option( "--extra-col", nargs=1, type=str, multiple=True, help="Extra column (name or numerical index) that is also used for sorting." "The option can be provided multiple times." 'Example: --extra-col "phase1" --extra-col "phase2". [output format option]', ) @click.option( "--nproc", type=int, default=8, show_default=True, help="Number of processes to split the sorting work between.", ) @click.option( "--tmpdir", type=str, default="", help="Custom temporary folder for sorting intermediates.", ) @click.option( "--memory", type=str, default="2G", show_default=True, help="The amount of memory used by default.", ) @click.option( "--compress-program", type=str, default="auto", show_default=True, help="A binary to compress temporary sorted chunks. " "Must decompress input when the flag -d is provided. " "Suggested alternatives: gzip, lzop, lz4c, snzip. " 'If "auto", then use lz4c if available, and gzip ' "otherwise.", ) @common_io_options def sort( pairs_path, output, c1, c2, p1, p2, pt, extra_col, nproc, tmpdir, memory, compress_program, **kwargs, ): """Sort a .pairs/.pairsam file. Sort pairs in the lexicographic order along chrom1 and chrom2, in the numeric order along pos1 and pos2 and in the lexicographic order along pair_type. PAIRS_PATH : input .pairs/.pairsam file. If the path ends with .gz or .lz4, the input is decompressed by bgzip or lz4c, correspondingly. By default, the input is read as text from stdin. """ sort_py( pairs_path, output, c1, c2, p1, p2, pt, extra_col, nproc, tmpdir, memory, compress_program, **kwargs, ) def sort_py( pairs_path, output, c1, c2, p1, p2, pt, extra_col, nproc, tmpdir, memory, compress_program, **kwargs, ): instream = fileio.auto_open( pairs_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) header, body_stream = headerops.get_header(instream) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) header = headerops.mark_header_as_sorted(header) outstream.writelines((l + "\n" for l in header)) outstream.flush() if compress_program == "auto": if shutil.which("lz4c") is not None: compress_program = "lz4c" else: warnings.warn( "lz4c is not found. Using gzip for compression of sorted chunks, " "which results in a minor decrease in performance. Please install " "lz4c for faster sorting." ) compress_program = "gzip" column_names = headerops.extract_column_names(header) columns = [c1, c2, p1, p2, pt] + list(extra_col) # Now generating the "-k ," expressions for all columns. # If column name is in the default pairsam format and has an integer dtype there, do numerical sorting cols = [] for col in columns: colindex = int(col) if col.isnumeric() else column_names.index(col) + 1 cols.append( f"-k {colindex},{colindex}{'n' if issubclass(pairsam_format.DTYPES_PAIRSAM.get(column_names[colindex-1], str), int) else ''}" ) cols = " ".join(cols) command = rf""" /bin/bash -c 'export LC_COLLATE=C; export LANG=C; sort {cols} --stable --field-separator=$'\''{pairsam_format.PAIRSAM_SEP_ESCAPE}'\'' --parallel={nproc} {f'--temporary-directory={tmpdir}' if tmpdir else ''} -S {memory} {f'--compress-program={compress_program}' if compress_program else ''}' """.replace( "\n", " " ) with subprocess.Popen( command, stdin=subprocess.PIPE, bufsize=-1, shell=True, stdout=outstream ) as process: stdin_wrapper = io.TextIOWrapper(process.stdin, "utf-8") for line in body_stream: stdin_wrapper.write(line) stdin_wrapper.flush() process.communicate() if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if __name__ == "__main__": sort() pairtools-1.1.3/pairtools/cli/split.py000066400000000000000000000112151474715105500200310ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import sys import pipes import click from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options UTIL_NAME = "pairtools_split" @cli.command() @click.argument("pairsam_path", type=str, required=False) @click.option( "--output-pairs", type=str, default="", help="output pairs file." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " If -, pairs are printed to stdout." " If not specified, pairs are dropped.", ) @click.option( "--output-sam", type=str, default="", help="output sam file." " If the path ends with .bam, the output is compressed into a bam file." " If -, sam entries are printed to stdout." " If not specified, sam entries are dropped.", ) @common_io_options def split(pairsam_path, output_pairs, output_sam, **kwargs): """Split a .pairsam file into .pairs and .sam. Restore a .sam file from sam1 and sam2 fields of a .pairsam file. Create a .pairs file without sam1/sam2 fields. PAIRSAM_PATH : input .pairsam file. If the path ends with .gz or .lz4, the input is decompressed by bgzip or lz4c. By default, the input is read from stdin. """ split_py(pairsam_path, output_pairs, output_sam, **kwargs) def split_py(pairsam_path, output_pairs, output_sam, **kwargs): instream = fileio.auto_open( pairsam_path, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) # Output streams if (not output_pairs) and (not output_sam): raise ValueError("At least one output (pairs and/or sam) must be specified!") if (output_pairs == "-") and (output_sam == "-"): raise ValueError("Only one output (pairs or sam) can be printed in stdout!") outstream_pairs = None outstream_sam = None if output_pairs: outstream_pairs = fileio.auto_open( output_pairs, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if output_sam: outstream_sam = fileio.auto_open( output_sam, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) header, body_stream = headerops.get_header(instream) header = headerops.append_new_pg(header, ID=UTIL_NAME, PN=UTIL_NAME) columns = headerops.extract_column_names(header) has_sams = False if columns: # trust the column order specified in the header if ("sam1" in columns) and ("sam2" in columns): sam1col = columns.index("sam1") sam2col = columns.index("sam2") columns.pop(max(sam1col, sam2col)) columns.pop(min(sam1col, sam2col)) header = headerops._update_header_entry( header, "columns", " ".join(columns) ) has_sams = True elif ("sam1" in columns) != ("sam2" in columns): raise ValueError( "According to the #columns header field only one sam entry is present" ) else: # assume that the file has sam columns and follows the pairsam format sam1col = pairsam_format.COL_SAM1 sam2col = pairsam_format.COL_SAM2 has_sams = True if output_pairs: outstream_pairs.writelines((l + "\n" for l in header)) if output_sam: outstream_sam.writelines( (l[11:].strip() + "\n" for l in header if l.startswith("#samheader:")) ) # Split sam1 = None sam2 = None for line in body_stream: cols = line.rstrip('\n').split(pairsam_format.PAIRSAM_SEP) if has_sams: if sam1col < sam2col: sam2 = cols.pop(sam2col) sam1 = cols.pop(sam1col) else: sam1 = cols.pop(sam1col) sam2 = cols.pop(sam2col) if output_pairs: # hard-coded tab separator to follow the DCIC pairs standard outstream_pairs.write("\t".join(cols)) outstream_pairs.write("\n") if output_sam and has_sams: for col in (sam1, sam2): if col != ".": for sam_entry in col.split(pairsam_format.INTER_SAM_SEP): outstream_sam.write( sam_entry.replace(pairsam_format.SAM_SEP, "\t") ) outstream_sam.write("\n") if output_pairs and outstream_pairs != sys.stdout: outstream_pairs.close() if output_sam and outstream_sam != sys.stdout: outstream_sam.close() if __name__ == "__main__": split() pairtools-1.1.3/pairtools/cli/stats.py000066400000000000000000000164721474715105500200460ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import io import sys import click import pandas as pd from ..lib import fileio, pairsam_format, headerops from . import cli, common_io_options from ..lib.stats import PairCounter, do_merge from .._logging import get_logger logger = get_logger() UTIL_NAME = "pairtools_stats" @cli.command() @click.argument("input_path", type=str, nargs=-1, required=False) @click.option("-o", "--output", type=str, default="", help="output stats tsv file.") @click.option( "--merge", is_flag=True, help="If specified, merge multiple input stats files instead of calculating" " statistics of a .pairs/.pairsam file. Merging is performed via summation of" " all overlapping statistics. Non-overlapping statistics are appended to" " the end of the file. Supported for tsv stats with single filter.", ) @click.option( "--n-dist-bins-decade", type=int, default=PairCounter.N_DIST_BINS_DECADE_DEFAULT, show_default=True, required=False, help="Number of bins to split the distance range in log10-space, specified per a factor of 10 difference.", ) @click.option( "--with-chromsizes/--no-chromsizes", is_flag=True, default=True, help="If enabled, will store sizes of chromosomes from the header of the pairs file" " in the stats file.", ) @click.option( "--yaml/--no-yaml", is_flag=True, default=False, help="Output stats in yaml format instead of table. ", ) @click.option( "--bytile-dups/--no-bytile-dups", default=False, help="If enabled, will analyse by-tile duplication statistics to estimate" " library complexity more accurately." " Requires parent_readID column to be saved by dedup (will be ignored otherwise)" " Saves by-tile stats into --output_bytile-stats stream, or regular output if --output_bytile-stats is not provided.", ) @click.option( "--output-bytile-stats", default="", required=False, help="output file for tile duplicate statistics." " If file exists, it will be open in the append mode." " If the path ends with .gz or .lz4, the output is bgzip-/lz4c-compressed." " By default, by-tile duplicate statistics are not printed." " Note that the readID and parent_readID should be provided and contain tile information for this option.", ) # Filtering options: @click.option( "--filter", default=None, required=False, multiple=True, help="Filters with conditions to apply to the data (similar to `pairtools select`). " "For non-YAML output only the first filter will be reported. " """Example: pairtools stats --yaml --filter 'unique:(pair_type=="UU")' --filter 'close:(pair_type=="UU") and (abs(pos1-pos2)<10)' test.pairs """, ) @click.option( "--engine", default="pandas", required=False, help="Engine for regular expression parsing. " "Python will provide you regex functionality, while pandas does not accept custom funtctions and works faster. ", ) @click.option( "--chrom-subset", type=str, default=None, required=False, help="A path to a chromosomes file (tab-separated, 1st column contains " "chromosome names) containing a chromosome subset of interest. " "If provided, additionally filter pairs with both sides originating from " "the provided subset of chromosomes. This operation modifies the #chromosomes: " "and #chromsize: header fields accordingly.", ) @click.option( "--startup-code", type=str, default=None, required=False, help="An auxiliary code to execute before filtering. " "Use to define functions that can be evaluated in the CONDITION statement", ) @click.option( "-t", "--type-cast", type=(str, str), default=(), multiple=True, help="Cast a given column to a given type. By default, only pos and mapq " "are cast to int, other columns are kept as str. Provide as " "-t , e.g. -t read_len1 int. Multiple entries are allowed.", ) @common_io_options def stats( input_path, output, merge, n_dist_bins_decade, bytile_dups, output_bytile_stats, filter, **kwargs ): """Calculate pairs statistics. INPUT_PATH : by default, a .pairs/.pairsam file to calculate statistics. If not provided, the input is read from stdin. If --merge is specified, then INPUT_PATH is interpreted as an arbitrary number of stats files to merge. The files with paths ending with .gz/.lz4 are decompressed by bgzip/lz4c. """ stats_py( input_path, output, merge, n_dist_bins_decade, bytile_dups, output_bytile_stats, filter, **kwargs, ) def stats_py( input_path, output, merge, n_dist_bins_decade, bytile_dups, output_bytile_stats, filter, **kwargs ): if merge: do_merge(output, input_path, n_dist_bins_decade=n_dist_bins_decade, **kwargs) return if len(input_path) == 0: raise ValueError(f"No input paths: {input_path}") instream = fileio.auto_open( input_path[0], mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) if bytile_dups and not output_bytile_stats: output_bytile_stats = outstream if output_bytile_stats: bytile_dups = True header, body_stream = headerops.get_header(instream) cols = headerops.extract_column_names(header) # Check necessary columns for reporting by-tile stats: if bytile_dups and "parent_readID" not in cols: logger.warning( "No 'parent_readID' column in the file, not generating duplicate stats." ) bytile_dups = False # Define filters and their properties first_filter_name = "no_filter" # default filter name for full output if filter is not None and len(filter) > 0: first_filter_name = filter[0].split(":", 1)[0] if len(filter) > 1 and not kwargs.get("yaml", False): logger.warn( f"Output the first filter only in non-YAML output: {first_filter_name}" ) filter = dict([f.split(":", 1) for f in filter]) else: filter = None stats = PairCounter( n_dist_bins_decade=n_dist_bins_decade, bytile_dups=bytile_dups, filters=filter, startup_code=kwargs.get("startup_code", ""), # for evaluation of filters type_cast=kwargs.get("type_cast", ()), # for evaluation of filters engine=kwargs.get("engine", "pandas"), ) # Collecting statistics for chunk in pd.read_table(body_stream, names=cols, chunksize=100_000): stats.add_pairs_from_dataframe(chunk) if kwargs.get("with_chromsizes", True): chromsizes = headerops.extract_chromsizes(header) stats.add_chromsizes(chromsizes) if bytile_dups: stats.save_bytile_dups(output_bytile_stats) # save statistics to file ... stats.save( outstream, yaml=kwargs.get("yaml", False), # format as yaml filter=first_filter_name if not kwargs.get("yaml", False) else None, # output only the first filter if non-YAML output ) if instream != sys.stdin: instream.close() if outstream != sys.stdout: outstream.close() if __name__ == "__main__": stats() pairtools-1.1.3/pairtools/lib/000077500000000000000000000000001474715105500163235ustar00rootroot00000000000000pairtools-1.1.3/pairtools/lib/__init__.py000066400000000000000000000003741474715105500204400ustar00rootroot00000000000000from . import fileio from . import dedup from . import filterbycov from . import headerops from . import pairsio from . import pairsam_format from . import parse from . import parse_pysam from . import restrict from . import stats from . import select pairtools-1.1.3/pairtools/lib/dedup.py000066400000000000000000000544711474715105500200110ustar00rootroot00000000000000import numpy as np import pandas as pd import scipy.spatial from scipy.sparse import coo_matrix from scipy.sparse.csgraph import connected_components from csv import QUOTE_NONE from . import dedup_cython, pairsam_format from .._logging import get_logger logger = get_logger() import time # Ignore pandas future warnings: import warnings warnings.simplefilter(action="ignore", category=FutureWarning) # Setting for cython deduplication: # you don't need to load more than 10k lines at a time b/c you get out of the # CPU cache, so this parameter is not adjustable MAX_LEN = 10000 def streaming_dedup( in_stream, colnames, chunksize, carryover, method, mark_dups, max_mismatch, extra_col_pairs, unmapped_chrom, outstream, outstream_dups, outstream_unmapped, keep_parent_id, out_stat, backend, n_proc, c1="chrom1", c2="chrom2", p1="pos1", p2="pos2", s1="strand1", s2="strand2", ): deduped_chunks = _dedup_stream( in_stream=in_stream, colnames=colnames, method=method, chunksize=chunksize, carryover=carryover, mark_dups=mark_dups, max_mismatch=max_mismatch, extra_col_pairs=extra_col_pairs, keep_parent_id=keep_parent_id, backend=backend, n_proc=n_proc, c1=c1, c2=c2, p1=p1, p2=p2, s1=s1, s2=s2, unmapped_chrom=unmapped_chrom, ) t0 = time.time() N = 0 for df_chunk in deduped_chunks: N += df_chunk.shape[0] # Write the stats if requested: if out_stat is not None: out_stat.add_pairs_from_dataframe(df_chunk, unmapped_chrom=unmapped_chrom) # Define masks of unmapped and duplicated reads: mask_mapped = np.logical_and( (df_chunk[c1] != unmapped_chrom), (df_chunk[c2] != unmapped_chrom), ) mask_duplicates = df_chunk["duplicate"] # Clean up dataframe: df_chunk = df_chunk.drop(columns=["duplicate"]) # Save the pairs: # Stream unmapped: if outstream_unmapped: df_chunk.loc[~mask_mapped, :].to_csv( outstream_unmapped, index=False, header=False, sep="\t", quoting=QUOTE_NONE, ) # If outstream_dups is the same as outstream, we save the mapped pairs to the same file if outstream_dups == outstream: df_chunk.loc[mask_mapped, :].to_csv( outstream, index=False, header=False, sep="\t", quoting=QUOTE_NONE ) else: # Save the dups: if outstream_dups: df_chunk.loc[mask_duplicates, :].to_csv( outstream_dups, index=False, header=False, sep="\t", quoting=QUOTE_NONE, ) # Drop readID if it was created (not needed for nodup and unmapped pairs): if keep_parent_id: df_chunk = df_chunk.drop(columns=["parent_readID"]) # Save unique: if outstream: df_chunk.loc[mask_mapped & (~mask_duplicates), :].to_csv( outstream, index=False, header=False, sep="\t", quoting=QUOTE_NONE ) t1 = time.time() t = t1 - t0 logger.debug(f"total time: {t}") if N > 0: logger.debug(f"time per mln pairs: {t/N*1e6}") else: logger.debug(f"Processed {N} pairs") def _dedup_stream( in_stream, colnames, method, chunksize, carryover, mark_dups, max_mismatch, extra_col_pairs, keep_parent_id, backend, n_proc, c1, c2, p1, p2, s1, s2, unmapped_chrom, ): # Stream the input dataframe: dfs = pd.read_table( in_stream, comment=None, names=colnames, chunksize=chunksize, dtype=pairsam_format.DTYPES_PAIRSAM, sep="\t", ) # Set up the carryover dataframe: df_prev_nodups = pd.DataFrame([]) prev_i = 0 # Iterate over chunks: for df in dfs: df["carryover"] = False input_chunk = pd.concat( [df_prev_nodups, df], axis=0, ignore_index=True ).reset_index(drop=True) df_marked = _dedup_chunk( input_chunk, r=max_mismatch, method=method, keep_parent_id=keep_parent_id, extra_col_pairs=extra_col_pairs, backend=backend, n_proc=n_proc, c1=c1, c2=c2, p1=p1, p2=p2, s1=s1, s2=s2, unmapped_chrom=unmapped_chrom, ) df_marked = ( df_marked[~df_marked["carryover"]] .drop(columns=["carryover"]) .reset_index(drop=True) ) mask_duplicated = df_marked["duplicate"] if mark_dups: df_marked.loc[mask_duplicated, "pair_type"] = "DD" yield df_marked # Filter out duplicates and store specific columns: df_nodups = df_marked.loc[~mask_duplicated, colnames] # Re-define carryover pairs: df_prev_nodups = df_nodups.tail(carryover).reset_index(drop=True) df_prev_nodups["carryover"] = True prev_i = len(df_prev_nodups) def _make_adj_mat(arr, size, r, method, n_proc=None, backend=None): if method not in ("max", "sum"): raise ValueError('Unknown method, only "sum" or "max" allowed') if method == "sum": p = 1 else: p = np.inf if backend == "sklearn": from sklearn import neighbors a_mat = neighbors.radius_neighbors_graph( arr, radius=r, p=p, n_jobs=n_proc, ) return a_mat elif backend == "scipy": import scipy.spatial from scipy.sparse import coo_matrix z = scipy.spatial.KDTree( arr, ) a = z.query_pairs(r=r, p=p, output_type="ndarray") a0 = a[:, 0] a1 = a[:, 1] a_mat = coo_matrix((np.ones_like(a0), (a0, a1)), shape=(size, size)) return a_mat else: raise ValueError('Unknown backend, only "scipy" or "sklearn" allowed') def _cluster_pairs( df_mapped, cols, p1, p2, r, method, n_proc, backend, ): groups = ( df_mapped[cols] .drop_duplicates() .reset_index(drop=True) .reset_index() .rename(columns={"index": "group"}) ) df_mapped = df_mapped.merge(groups, how="left", on=list(cols)) components = [] maxcluster_id = 0 for name, group in df_mapped.groupby("group"): a_mat = _make_adj_mat( group[[p1, p2]], size=group.shape[0], r=r, method=method, n_proc=n_proc, backend=backend, ) comp = connected_components(a_mat, directed=False)[1] + maxcluster_id + 1 components.append( pd.Series( name="cluster_id", index=group.index, data=comp, ) ) maxcluster_id = components[-1].max() df_mapped["cluster_id"] = pd.concat(components) df_mapped.drop(columns=["group"], inplace=True) return df_mapped def _cluster_pairs_nonmatching_col_pairs( df_mapped, col_pairs, p1, p2, r, method, n_proc, backend, ): groups_left = ( df_mapped[col_pairs[:, 0]] .drop_duplicates() .reset_index(drop=True) .reset_index() .rename(columns={"index": "group"}) ) df_mapped = df_mapped.merge(groups_left, how="left", on=list(col_pairs[:, 0])) groups_right = ( df_mapped[col_pairs[:, 1]] .drop_duplicates() .reset_index(drop=True) .reset_index() .rename(columns={"index": "group"}) ) df_mapped = df_mapped.merge( groups_right, on=list(col_pairs[:, 1]), suffixes=["_left", "_right"] ) components = [] maxcluster_id = 0 for name, group in df_mapped.groupby("group_left"): group = group[group["group_right"] == name] a_mat = _make_adj_mat( group[[p1, p2]], size=group.shape[0], r=r, method=method, n_proc=n_proc, backend=backend, ) components.append( pd.Series( name="cluster_id", index=group.index, data=connected_components(a_mat, directed=False)[1] + maxcluster_id, ) ) maxcluster_id = components[-1].max() df_mapped["cluster_id"] = pd.concat(components) df_mapped.drop(columns=["group_left", "group_right"], inplace=True) return df_mapped def _dedup_chunk( df, r, method, keep_parent_id, extra_col_pairs, backend, n_proc, c1, c2, p1, p2, s1, s2, unmapped_chrom, ): """Mark duplicates in a dataframe of pairs Parameters ---------- df : pd.DataFrame Dataframe with pairs, has to contain columns 'chrom1', 'pos1', 'chrom2', 'pos2' 'strand1', 'strand2' r : int Allowed distance between two pairs to call them duplicates method : str 'sum' or 'max' - whether 'r' uses sum of distances on two ends of pairs, or the maximal distance keep_parent_id : bool If True, the read ID of the read that was not labelled as a duplicate from a group of duplicates is recorded for each read marked as duplicate. Only possible with non-cython backends extra_col_pairs : list of tuples List of extra column pairs that need to match between two reads for them be considered duplicates (e.g. useful if alleles are annotated) backend : str 'scipy', 'sklearn', 'cython' unmapped_chrom : str, optional Which character denotes unmapped reads in the chrom1/chrom2 fields, by default "!" n_proc : int, optional How many cores to use, by default 1 Only works for 'sklearn' backend Returns ------- pd.DataFrame Dataframe with marked duplicates (extra boolean field 'duplicate'), and optionally recorded 'parent_readID' """ # Store the index of the dataframe: index_colname = df.index.name if index_colname is None: index_colname = "index" df = df.reset_index() # Remove the index temporarily # Set up columns to store the dedup info: df["cluster_id"] = -1 df["duplicate"] = False # Split mapped and unmapped reads: mask_unmapped = (df[c1] == unmapped_chrom) | (df[c2] == unmapped_chrom) df_unmapped = df.loc[mask_unmapped, :].copy() df_mapped = df.loc[~mask_unmapped, :].copy() N_mapped = df_mapped.shape[0] # If there are some mapped reads, dedup them: if N_mapped > 0: col_pairs = np.array( [ (c1, c1), (c2, c2), (s1, s1), (s2, s2), ] + extra_col_pairs ) if (col_pairs[:, 0] == col_pairs[:, 1]).all(): df_mapped = _cluster_pairs( df_mapped, col_pairs[:, 0], p1, p2, r, method, n_proc, backend, ) else: df_mapped = _cluster_pairs_nonmatching_col_pairs( df_mapped, col_pairs, p1, p2, r, method, n_proc, backend, ) mask_dups = df_mapped["cluster_id"].duplicated() df_mapped.loc[mask_dups, "duplicate"] = True # Mark parent IDs if requested: if keep_parent_id: df_mapped.loc[:, "parent_readID"] = df_mapped["cluster_id"].map( df_mapped[~mask_dups].set_index("cluster_id")["readID"] ) df_unmapped["parent_readID"] = "" # Reconstruct original dataframe with removed duplicated reads # (here, we rely on the sorting order that puts unmapped reads first): df = pd.concat([df_unmapped, df_mapped]).reset_index(drop=True) df = df.set_index(index_colname) # Set up the original index df = df.drop( ["cluster_id"], axis=1 ) # Remove the information that we don't need anymore: return df ### Cython deduplication #### def streaming_dedup_cython( method, max_mismatch, sep, c1ind, c2ind, p1ind, p2ind, s1ind, s2ind, extra_cols1, extra_cols2, unmapped_chrom, instream, outstream, outstream_dups, outstream_unmapped, out_stat, mark_dups, keep_parent_id=False, readid_ind=0, ): """ Cython-powered deduplication with online algorithm based on indexed list. Parameters ---------- method: "max" or "sum" max_mismatch: maximum allowed mismatch to count the pairs as duplicates sep: separator of the fields in the input file c1ind: index of the chr1 column c2ind: index of the chr2 column p1ind: index of the pos1 column p2ind: index of the pos2 column s1ind: index of the strand1 column s2ind: index of the strand2 column extra_cols1: extra columns for left alignment in a pair to add extra_cols2: extra columns for right alignment in a pair to add unmapped_chrom: Symbol of the chromosome for the unmapped alignment instream: input stream of pair file outstream: output stram of deduplicated pairs outstream_dups: output stream of duplicates (optionally with added parent_id, see keep_parent_id option) outstream_unmapped: output stram of unmapped pairs out_stat: output statistics mark_dups: if True, will add "DD" as the pair_type keep_parent_id: if True, additional column "parent_readID will be added to the output, can be useful for optical duplicates search readid_ind: index of the readID column in the input file Returns ------- """ maxind = max(c1ind, c2ind, p1ind, p2ind, s1ind, s2ind) if bool(extra_cols1) and bool(extra_cols2): maxind = max(maxind, max(extra_cols1), max(extra_cols2)) all_scols1 = [s1ind] + extra_cols1 all_scols2 = [s2ind] + extra_cols2 # if we do stats in the dedup, we need PAIR_TYPE # i do not see way around this: if out_stat: ptind = pairsam_format.COL_PTYPE maxind = max(maxind, ptind) dd = dedup_cython.OnlineDuplicateDetector( method, max_mismatch, returnData=False, keep_parent_id=keep_parent_id ) c1 = [] c2 = [] p1 = [] p2 = [] s1 = [] s2 = [] idx = [] line_buffer = [] cols_buffer = [] chromDict = {} strandDict = {} curMaxLen = max(MAX_LEN, dd.getLen()) t0 = time.time() N = 0 instream = iter(instream) read_idx = 0 # read index to mark the parent readID while True: rawline = next(instream, None) stripline = rawline.strip("\n") if rawline else None # take care of empty lines not at the end of the file separately if rawline and (not stripline): logger.warning("Empty line detected not at the end of the file") continue if stripline: cols = stripline.split(sep) if len(cols) <= maxind: raise ValueError( "Error parsing line {}: ".format(stripline) + " expected {} columns, got {}".format(maxind, len(cols)) ) if (cols[c1ind] == unmapped_chrom) or (cols[c2ind] == unmapped_chrom): if outstream_unmapped: outstream_unmapped.write(stripline) # don't forget terminal newline outstream_unmapped.write("\n") # add a pair to PairCounter if stats output is requested: if out_stat: out_stat.add_pair( cols[c1ind], int(cols[p1ind]), cols[s1ind], cols[c2ind], int(cols[p2ind]), cols[s2ind], cols[ptind], unmapped_chrom=unmapped_chrom, ) else: line_buffer.append(stripline) cols_buffer.append(cols) c1.append(fetchadd(cols[c1ind], chromDict)) c2.append(fetchadd(cols[c2ind], chromDict)) p1.append(int(cols[p1ind])) p2.append(int(cols[p2ind])) idx.append(read_idx) read_idx += 1 if bool(extra_cols1) and bool(extra_cols2): s1.append( fetchadd("".join(cols[i] for i in all_scols1), strandDict) ) s2.append( fetchadd("".join(cols[i] for i in all_scols2), strandDict) ) else: s1.append(fetchadd(cols[s1ind], strandDict)) s2.append(fetchadd(cols[s2ind], strandDict)) N += 1 if (not stripline) or (len(c1) == curMaxLen): if keep_parent_id: res, parents = dd.push( ar(c1, 32), ar(c2, 32), ar(p1, 32), ar(p2, 32), ar(s1, 32), ar(s2, 32), ) else: res = dd.push( ar(c1, 32), ar(c2, 32), ar(p1, 32), ar(p2, 32), ar(s1, 32), ar(s2, 32), ) if not stripline: if keep_parent_id: res_tmp, parents_tmp = dd.finish() parents = np.concatenate([parents, parents_tmp]) else: res_tmp = dd.finish() res = np.concatenate([res, res_tmp]) for i in range(len(res)): # not duplicated pair: if not res[i]: outstream.write(line_buffer[i]) # don't forget terminal newline outstream.write("\n") if out_stat: out_stat.add_pair( cols_buffer[i][c1ind], int(cols_buffer[i][p1ind]), cols_buffer[i][s1ind], cols_buffer[i][c2ind], int(cols_buffer[i][p2ind]), cols_buffer[i][s2ind], cols_buffer[i][ptind], unmapped_chrom=unmapped_chrom, ) # duplicated pair: else: if out_stat: out_stat.add_pair( cols_buffer[i][c1ind], int(cols_buffer[i][p1ind]), cols_buffer[i][s1ind], cols_buffer[i][c2ind], int(cols_buffer[i][p2ind]), cols_buffer[i][s2ind], "DD", unmapped_chrom=unmapped_chrom, ) if outstream_dups: if mark_dups: # DD-marked pair: output = sep.join(mark_split_pair_as_dup(cols_buffer[i])) else: # pair as is: output = line_buffer[i] if keep_parent_id: # Add parentID as the last column: parent_readID = line_buffer[parents[i]].split(sep)[ readid_ind ] output = sep.join([output, parent_readID]) outstream_dups.write(output) # don't forget terminal newline outstream_dups.write("\n") # flush buffers and perform necessary checks here: c1 = [] c2 = [] p1 = [] p2 = [] s1 = [] s2 = [] line_buffer = line_buffer[len(res) :] cols_buffer = cols_buffer[len(res) :] if not stripline: if len(line_buffer) != 0: raise ValueError( "{} lines left in the buffer, ".format(len(line_buffer)) + "should be none;" + "something went terribly wrong" ) break # process next line ... # all lines have been processed at this point. # streaming_dedup is over. t1 = time.time() t = t1 - t0 logger.debug(f"total time: {t}") if N > 0: logger.debug(f"time per mln pairs: {t/N*1e6}") else: logger.debug(f"Processed {N} pairs") def fetchadd(key, mydict): key = key.strip() if key not in mydict: mydict[key] = len(mydict) return mydict[key] def ar(mylist, val): return np.array(mylist, dtype={8: np.int8, 16: np.int16, 32: np.int32}[val]) #### Markasdup utilities: #### def mark_split_pair_as_dup(cols): # if the original columns ended with a new line, the marked columns # should as well. original_has_newline = cols[-1].endswith("\n") cols[pairsam_format.COL_PTYPE] = "DD" if (len(cols) > pairsam_format.COL_SAM1) and (len(cols) > pairsam_format.COL_SAM2): for i in (pairsam_format.COL_SAM1, pairsam_format.COL_SAM2): # split each sam column into sam entries, tag and assemble back cols[i] = pairsam_format.INTER_SAM_SEP.join( [ mark_sam_as_dup(sam) for sam in cols[i].split(pairsam_format.INTER_SAM_SEP) ] ) if original_has_newline and not cols[-1].endswith("\n"): cols[-1] = cols[-1] + "\n" return cols def mark_sam_as_dup(sam): """Tag the binary flag and the optional pair type field of a sam entry as a PCR duplicate.""" samcols = sam.split(pairsam_format.SAM_SEP) if len(samcols) == 1: return sam samcols[1] = str(int(samcols[1]) | 1024) for j in range(11, len(samcols)): if samcols[j].startswith("Yt:Z:"): samcols[j] = "Yt:Z:DD" return pairsam_format.SAM_SEP.join(samcols) pairtools-1.1.3/pairtools/lib/dedup_cython.pyx000066400000000000000000000135311474715105500215550ustar00rootroot00000000000000""" Legacy code: ``mark_duplicates`` is an offline method that finds duplicates in a given input dataset. For other applications on much larger datasets you may consider an online method ``OnlineDuplicateDetector`` which is implemented as a class. Note that for both methods data types are fixed: * chromosomes are int32 * position is int32 * strand is int32, which is basically the same as C type "char". """ import numpy as np import cython cimport numpy as np cimport cython ### Online deduplicator used in pairtools.dedup Cython: cdef class OnlineDuplicateDetector(object): cdef cython.int [:] c1 cdef cython.int [:] c2 cdef cython.int [:] p1 cdef cython.int [:] p2 cdef cython.int [:] s1 cdef cython.int [:] s2 cdef cython.char [:] rm cdef cython.int [:] parent_idxs cdef int methodid cdef int low cdef int high cdef int N cdef int max_mismatch cdef int returnData cdef int keep_parent_id def __init__(self, method, max_mismatch, returnData=False, keep_parent_id=False): if returnData == False: self.returnData = 0 else: self.returnData = 1 if keep_parent_id == False: self.keep_parent_id = 0 else: self.keep_parent_id = 1 self.parent_idxs = np.zeros(0, np.int32) self.N = 0 self.c1 = np.zeros(0, np.int32) self.c2 = np.zeros(0, np.int32) self.p1 = np.zeros(0, np.int32) self.p2 = np.zeros(0, np.int32) self.s1 = np.zeros(0, np.int32) self.s2 = np.zeros(0, np.int32) self.rm = np.zeros(0, np.int8) if method == "max": self.methodid = 0 elif method == "sum": self.methodid = 1 else: raise ValueError('method should be "sum" or "max"') self.max_mismatch = int(max_mismatch) self.low = 0 self.high = 1 def _shrink(self): if self.returnData == 1: firstret = self.rm[:self.low] retainMask = (np.asarray(firstret) == False) del firstret ret = [] for ar in [self.c1, self.c2, self.p1, self.p2, self.s1, self.s2]: ret.append(np.asarray(ar)[:self.low][retainMask]) self.c1 = self.c1[self.low:] self.c2 = self.c2[self.low:] self.p1 = self.p1[self.low:] self.p2 = self.p2[self.low:] self.s1 = self.s1[self.low:] self.s2 = self.s2[self.low:] pastrm = self.rm[:self.low] self.rm = self.rm[self.low:] self.high = self.high-self.low self.N = self.N - self.low if self.returnData == 1: self.low = 0 return ret if self.keep_parent_id == 1: # Return parent readIDs alongside with duplicates mask: pastidx = self.parent_idxs[:self.low] self.low = 0 return pastrm, pastidx self.low = 0 return pastrm def _run(self, finish=False): cdef int finishing = 0 cdef int extraCondition if finish: finishing = 1 while True: if self.low == self.N: break if self.high == self.N: if finishing == 1: self.low += 1 self.high = self.low + 1 continue else: break if self.rm[self.low] == 1: self.low += 1 self.high = self.low+1 continue # if high already removed, just continue if self.rm[self.high] == 1: self.high += 1 continue # if we jumped too far, continue if ((self.c1[self.high] != self.c1[self.low]) or (self.p1[self.high] - self.p1[self.low] > self.max_mismatch) or (self.p1[self.high] - self.p1[self.low] < 0 )): self.low += 1 self.high = self.low + 1 # restart high continue if self.methodid == 0: extraCondition = max( abs(self.p1[self.low] - self.p1[self.high]), abs(self.p2[self.low] - self.p2[self.high])) <= self.max_mismatch elif self.methodid == 1: # sum of distances <= max_mismatch extraCondition = ( abs(self.p1[self.low] - self.p1[self.high]) + abs(self.p2[self.low] - self.p2[self.high]) <= self.max_mismatch ) else: raise ValueError( "Unknown method id, this should not happen. " "Check code of this function.") if ((self.c2[self.low] == self.c2[self.high]) and (self.s1[self.low] == self.s1[self.high]) and (self.s2[self.low] == self.s2[self.high]) and extraCondition): self.rm[self.high] = 1 if self.keep_parent_id == 1: self.parent_idxs[self.high] = self.low self.high += 1 continue self.high += 1 return self._shrink() def push(self, c1, c2, p1, p2, s1, s2): self.c1 = np.concatenate([self.c1, c1]) self.c2 = np.concatenate([self.c2, c2]) self.p1 = np.concatenate([self.p1, p1]) self.p2 = np.concatenate([self.p2, p2]) self.s1 = np.concatenate([self.s1, s1]) self.s2 = np.concatenate([self.s2, s2]) self.rm = np.concatenate([self.rm, np.zeros(len(c1), dtype=np.int8)]) if self.keep_parent_id == 1: self.parent_idxs = np.concatenate([self.parent_idxs, np.zeros(len(c1), dtype=np.int32)]) self.N = self.N + len(c1) return self._run(finish=False) def finish(self): return self._run(finish=True) def getLen(self): return int(self.N)pairtools-1.1.3/pairtools/lib/fileio.py000066400000000000000000000217561474715105500201570ustar00rootroot00000000000000import shutil import pipes import subprocess import sys class ParseError(Exception): pass def auto_open(path, mode, nproc=1, command=None): """Guess the file format from the extension and use the corresponding binary to open it for reading or writing. If the extension is not known, open the file as text. If the binary allows parallel execution, specify the number of threads with `nproc`. If `command` is supplied, use it to open the file instead of auto-guessing. The command must accept the filename as the last argument, accept input through stdin and print output into stdout. Supported extensions and binaries (with comments): .bam - samtools view (allows parallel writing) .gz - pbgzip if available, otherwise bgzip .lz4 - lz4c (does not support parallel execution) """ # Empty filepath or False provided if not path or path == "-": if mode == "r": return sys.stdin if mode == "w": return sys.stdout if command: if mode == "w": t = pipes.Template() t.append(command, "--") f = t.open(path, "w") elif mode == "r": t = pipes.Template() t.append(command, "--") f = t.open(path, "r") else: raise ValueError("Unknown mode : {}".format(mode)) return f elif path.endswith(".bam"): if shutil.which("samtools") is None: raise ValueError( { "w": "samtools is not found, cannot compress output", "r": "samtools is not found, cannot decompress input", }[mode] ) if mode == "w": t = pipes.Template() t.append( "samtools view -bS {} -".format( "-@ " + str(nproc - 1) if nproc > 1 else "" ), "--", ) f = t.open(path, "w") elif mode == "r": t = pipes.Template() t.append("samtools view -h", "--") f = t.open(path, "r") else: raise ValueError("Unknown mode for .bam : {}".format(mode)) return f elif path.endswith(".gz"): if shutil.which("pbgzip") is not None: if mode == "w": t = pipes.Template() t.append("pbgzip -c -n {}".format(nproc), "--") f = t.open(path, "w") elif mode == "a": t = pipes.Template() t.append("pbgzip -c -n {} $IN >> $OUT".format(nproc), "ff") f = t.open(path, "w") elif mode == "r": t = pipes.Template() t.append("pbgzip -dc -n {}".format(nproc), "--") f = t.open(path, "r") else: raise ValueError("Unknown mode for .gz : {}".format(mode)) elif shutil.which("bgzip") is not None: if mode == "w": t = pipes.Template() t.append("bgzip -c -@ {}".format(nproc), "--") f = t.open(path, "w") elif mode == "a": t = pipes.Template() t.append("bgzip -c -@ {} $IN >> $OUT".format(nproc), "ff") f = t.open(path, "w") elif mode == "r": t = pipes.Template() t.append("bgzip -dc -@ {}".format(nproc), "--") f = t.open(path, "r") else: raise ValueError("Unknown mode for .gz : {}".format(mode)) elif shutil.which("gzip") is not None: if mode == "w": t = pipes.Template() t.append("gzip -c", "--") f = t.open(path, "w") elif mode == "a": t = pipes.Template() t.append("gzip -c $IN >> $OUT", "ff") f = t.open(path, "w") elif mode == "r": t = pipes.Template() t.append("gzip -dc", "--") f = t.open(path, "r") else: raise ValueError("Unknown mode for .gz : {}".format(mode)) else: raise ValueError( { "w": "pbgzip, bgzip and gzip are not found, cannot compress output", "a": "pbgzip, bgzip and gzip are is not found, cannot compress output", "r": "pbgzip, bgzip and gzip are is not found, cannot decompress input", }[mode] ) return f elif path.endswith(".lz4"): if shutil.which("lz4c") is None: raise ValueError( { "w": "lz4c is not found, cannot compress output", "a": "lz4c is not found, cannot compress output", "r": "lz4c is not found, cannot decompress input", }[mode] ) if mode == "w": t = pipes.Template() t.append("lz4c -cz", "--") f = t.open(path, "w") elif mode == "a": t = pipes.Template() t.append("lz4c -cz $IN >> $OUT", "ff") f = t.open(path, "w") elif mode == "r": t = pipes.Template() t.append("lz4c -cd", "--") f = t.open(path, "r") else: raise ValueError("Unknown mode : {}".format(mode)) return f else: return open(path, mode) class PipedIO: def __init__(self, file_or_path, command, mode="r"): """ An experimental class that reads/writes a file, piping the contents through another process. Parameters ---------- file_or_path : file-like object or str A path to the input/output file or an already opened file-like object. command : str A command to launch a reading/writing process. If mode is 'w', the process must accept input via stdin. If mode is 'r', the process must put output into stdout. If mode is 'r' and file_or_path is str, the path will be appended to the command as the last argument. mode : str The mode for opening, same as in open(mode=). Returns ------- file: a file-like object """ if issubclass(type(command), str): command = command.split(" ") self._command = command self._mode = mode if mode.startswith("r"): if issubclass(type(file_or_path), str): self._proc = subprocess.Popen( command + [file_or_path], universal_newlines=True, stdout=subprocess.PIPE, ) else: self._proc = subprocess.Popen( command, universal_newlines=True, stdin=file_or_path, stdout=subprocess.PIPE, ) self._stream = self._proc.stdout self._close_stream = self._proc.stdout.close elif mode.startswith("w") or mode.startswith("a"): f = ( open(file_or_path, mode=mode) if issubclass(type(file_or_path), str) else file_or_path ) self._proc = subprocess.Popen( command, universal_newlines=True, stdin=subprocess.PIPE, stdout=f ) self._stream = self._proc.stdin self.buffer = self._stream.buffer self.closed = self._stream.closed self.flush = self._stream.flush self.fileno = self._stream.fileno self.read = self._stream.read self.readline = self._stream.readline self.readlines = self._stream.readlines self.seek = self._stream.seek self.seekable = self._stream.seekable self.truncate = self._stream.truncate self.tell = self._stream.tell self.writable = self._stream.writable self.write = self._stream.write self.writelines = self._stream.writelines def close(self, timeout=None): self._stream.close() retcode = self._proc.wait(timeout=timeout) return retcode def get_stream_handlers(instream): """ Get the readline and peek functions for the provided input stream. Parameters: instream (file-like object): The input stream to get the handlers for. Returns: tuple: A tuple containing the following elements: - readline_f (function): The readline function for the input stream. - peek_f (function): The peek function for the input stream. Raises: ValueError: If the peek function cannot be found for the provided stream. """ readline_f, peek_f = None, None if hasattr(instream, "buffer"): peek_f = instream.buffer.peek readline_f = instream.buffer.readline elif hasattr(instream, "peek"): peek_f = instream.peek readline_f = instream.readline else: raise ValueError("Cannot find the peek() function of the provided stream!") return readline_f, peek_f pairtools-1.1.3/pairtools/lib/filterbycov.py000066400000000000000000000173761474715105500212430ustar00rootroot00000000000000import numpy as np import warnings from .dedup import mark_split_pair_as_dup from . import pairsam_format def fetchadd(key, mydict): key = key.strip() if key not in mydict: mydict[key] = len(mydict) return mydict[key] def ar(mylist, val): return np.array(mylist, dtype={8: np.int8, 16: np.int16, 32: np.int32}[val]) def _filterbycov(c1_in, p1_in, c2_in, p2_in, max_dist, method): """ This is a slow version of the filtering code used for testing purposes only Use cythonized version in the future!! """ c1 = np.asarray(c1_in, dtype=int) p1 = np.asarray(p1_in, dtype=int) c2 = np.asarray(c2_in, dtype=int) p2 = np.asarray(p2_in, dtype=int) M = np.r_[ np.c_[c1, p1], np.c_[c2, p2] ] # M is a table of (chrom, pos) with 2*N rows assert c1.shape[0] == c2.shape[0] N = 2 * c1.shape[0] ind_sorted = np.lexsort((M[:, 1], M[:, 0])) # sort by chromosomes, then positions # M[ind_sorted] # ind_sorted # M, M[ind_sorted] if method == "sum": proximity_count = np.zeros( N ) # keeps track of how many molecules each framgent end is close to elif method == "max": proximity_count = np.zeros(N) else: raise ValueError("Unknown method: {}".format(method)) low = 0 high = 1 while True: # boundary case finish if low == N: break # boundary case - CHECK if high == N: low += 1 high = low + 1 continue # check if "high" is proximal enough to "low" # first, if chromosomes not equal, we have gone too far, and the positions are not proximal if M[ind_sorted[low], 0] != M[ind_sorted[high], 0]: low += 1 high = low + 1 # restart high continue # next, if positions are not proximal, increase low, and continue elif np.abs(M[ind_sorted[high], 1] - M[ind_sorted[low], 1]) > max_dist: low += 1 high = low + 1 # restart high continue # if on the same chromosome, and the distance is "proximal enough", add to count of both "low" and "high" positions else: proximity_count[low] += 1 proximity_count[high] += 1 high += 1 # unsort proximity count # proximity_count = proximity_count[ind_sorted] proximity_count[ind_sorted] = np.copy(proximity_count) # print(M) # print(proximity_count) # if method is sum of pairs if method == "sum": pcounts = proximity_count[0 : N // 2] + proximity_count[N // 2 :] + 1 elif method == "max": pcounts = np.maximum( proximity_count[0 : N // 2] + 1, proximity_count[N // 2 :] + 1 ) else: raise ValueError("Unknown method: {}".format(method)) return pcounts def streaming_filterbycov( method, max_dist, max_cov, sep, c1ind, c2ind, p1ind, p2ind, s1ind, s2ind, unmapped_chrom, instream, outstream, outstream_high, outstream_unmapped, out_stat, mark_multi, ): # doing everything in memory maxind = max(c1ind, c2ind, p1ind, p2ind, s1ind, s2ind) # if we do stats in the dedup, we need PAIR_TYPE # i do not see way around this: if out_stat: ptind = pairsam_format.COL_PTYPE maxind = max(maxind, ptind) c1 = [] c2 = [] p1 = [] p2 = [] s1 = [] s2 = [] line_buffer = [] cols_buffer = [] chromDict = {} strandDict = {} n_unmapped = 0 n_high = 0 n_low = 0 instream = iter(instream) while True: rawline = next(instream, None) stripline = rawline.strip() if rawline else None # take care of empty lines not at the end of the file separately if rawline and (not stripline): warnings.warn("Empty line detected not at the end of the file") continue if stripline: cols = stripline.split(sep) if len(cols) <= maxind: raise ValueError( "Error parsing line {}: ".format(stripline) + " expected {} columns, got {}".format(maxind, len(cols)) ) if (cols[c1ind] == unmapped_chrom) or (cols[c2ind] == unmapped_chrom): if outstream_unmapped: outstream_unmapped.write(stripline) # don't forget terminal newline outstream_unmapped.write("\n") # add a pair to PairCounter if stats output is requested: if out_stat: out_stat.add_pair( cols[c1ind], int(cols[p1ind]), cols[s1ind], cols[c2ind], int(cols[p2ind]), cols[s2ind], cols[ptind], ) else: line_buffer.append(stripline) cols_buffer.append(cols) c1.append(fetchadd(cols[c1ind], chromDict)) c2.append(fetchadd(cols[c2ind], chromDict)) p1.append(int(cols[p1ind])) p2.append(int(cols[p2ind])) s1.append(fetchadd(cols[s1ind], strandDict)) s2.append(fetchadd(cols[s2ind], strandDict)) else: # when everything is loaded in memory... res = _filterbycov(c1, p1, c2, p2, max_dist, method) for i in range(len(res)): # not high-frequency interactor pairs: if not res[i] > max_cov: outstream.write(line_buffer[i]) # don't forget terminal newline outstream.write("\n") if out_stat: out_stat.add_pair( cols_buffer[i][c1ind], int(cols_buffer[i][p1ind]), cols_buffer[i][s1ind], cols_buffer[i][c2ind], int(cols_buffer[i][p2ind]), cols_buffer[i][s2ind], cols_buffer[i][ptind], ) # high-frequency interactor pairs: else: if out_stat: out_stat.add_pair( cols_buffer[i][c1ind], int(cols_buffer[i][p1ind]), cols_buffer[i][s1ind], cols_buffer[i][c2ind], int(cols_buffer[i][p2ind]), cols_buffer[i][s2ind], "FF", ) if outstream_high: outstream_high.write( # DD-marked pair: sep.join(mark_split_pair_as_dup(cols_buffer[i])) if mark_multi # pair as is: else line_buffer[i] ) # don't forget terminal newline outstream_high.write("\n") # flush buffers and perform necessary checks here: c1 = [] c2 = [] p1 = [] p2 = [] s1 = [] s2 = [] line_buffer = line_buffer[len(res) :] cols_buffer = cols_buffer[len(res) :] if not stripline: if len(line_buffer) != 0: raise ValueError( "{} lines left in the buffer, ".format(len(line_buffer)) + "should be none;" + "something went terribly wrong" ) break break pairtools-1.1.3/pairtools/lib/headerops.py000066400000000000000000000564241474715105500206620ustar00rootroot00000000000000from collections import defaultdict import sys import copy import itertools import warnings import numpy as np import pandas as pd from .. import __version__ from . import pairsam_format from .fileio import ParseError, get_stream_handlers from .._logging import get_logger logger = get_logger() PAIRS_FORMAT_VERSION = "1.0.0" SEP_COLS = " " SEP_CHROMS = " " COMMENT_CHAR = "#" def get_header(instream, comment_char=COMMENT_CHAR, ignore_warning=False): """Returns a header from the stream and an the reaminder of the stream with the actual data. Parameters ---------- instream : a file object An input stream. comment_char : str The character prepended to header lines (use '@' when parsing sams, '#' when parsing pairsams). ignore_warning : bool If True, then no warning will be generated if header of pairs file is empty. Returns ------- header : list The header lines, stripped of terminal spaces and newline characters. remainder_stream : stream/file-like object Stream with the remaining lines. """ header = [] if not comment_char: raise ValueError("Please, provide a comment char!") comment_byte = comment_char.encode() readline_f, peek_f = get_stream_handlers(instream) current_peek = peek_f(1) while current_peek.startswith(comment_byte): # consuming a line from buffer guarantees # that the remainder of the buffer starts # with the beginning of the line. line = readline_f() if isinstance(line, bytes): line = line.decode() # append line to header, since it does start with header header.append(line.rstrip('\n')) # peek into the remainder of the instream current_peek = peek_f(1) # apparently, next line does not start with the comment # return header and the instream, advanced to the beginning of the data if len(header) == 0 and not ignore_warning: logger.warning( "Headerless input, please, add the header by `pairtools header generate` or `pairtools header transfer`" ) return header, instream def extract_fields(header, field_name, save_rest=False): """ Extract the specified fields from the pairs header and return a list of corresponding values, even if a single field was found. Additionally, can return the list of intact non-matching entries. """ fields = [] rest = [] for l in header: if l.lstrip(COMMENT_CHAR).startswith(field_name + ":"): fields.append(l.split(":", 1)[1].rstrip('\n').lstrip()) elif save_rest: rest.append(l) if save_rest: return fields, rest else: return fields def extract_column_names(header): """ Extract column names from header lines. """ columns = extract_fields(header, "columns") if len(columns) != 0: return columns[0].split(SEP_COLS) else: return [] def validate_cols(stream, columns): """ Validate that the number of columns coincides between stream and columns. Checks only the first line in the pairs stream! Note that it irreversibly removes the header from the stream. Parameters ---------- stream: input stream, body or full .pairs file columns: columns to validate against Returns ------- True if the number of columns is identical between file and columns """ comment_byte = COMMENT_CHAR.encode() readline_f, peek_f = get_stream_handlers(stream) current_peek = peek_f(1) while current_peek.startswith(comment_byte): # consuming a line from buffer guarantees # that the remainder of the buffer starts # with the beginning of the line. line = readline_f() # peek into the remainder of the instream current_peek = peek_f(1) line = readline_f() if isinstance(line, bytes): line = line.decode() ncols_body = len(line.split(pairsam_format.PAIRSAM_SEP)) ncols_reference = ( len(columns) if isinstance(columns, list) else columns.split(SEP_COLS) ) return ncols_body == ncols_reference def validate_header_cols(stream, header): """Validate that the number of columns corresponds between the stream and header""" columns = extract_column_names(header) return validate_cols(stream, header) def is_empty_header(header): if len(header) == 0: return True if not header[0].startswith("##"): return True else: return False def extract_chromsizes(header): """ Extract chromosome sizes from header lines. """ chromsizes_str = extract_fields(header, "chromsize") chromsizes_str = list(zip(*[s.split(SEP_CHROMS) for s in chromsizes_str])) chromsizes = pd.Series(data=chromsizes_str[1], index=chromsizes_str[0]).astype( np.int64 ) return chromsizes def get_chromsizes_from_pysam_header(samheader): """Convert pysam header to pairtools chromosomes dict (ordered by Python default since 3.7). Example of pysam header converted to dict: dict([ ('SQ', [{'SN': 'chr1', 'LN': 248956422}, {'SN': 'chr10', 'LN': 133797422}, {'SN': 'chr11', 'LN': 135086622}, {'SN': 'chr12', 'LN': 133275309}]), ('PG', [{'ID': 'bwa', 'PN': 'bwa', 'VN': '0.7.17-r1188', 'CL': 'bwa mem -t 8 -SP -v1 hg38.fa test_1.1.fastq.gz test_2.1.fastq.gz'}]) ]) """ SQs = samheader.to_dict()["SQ"] chromsizes = [(sq["SN"], int(sq["LN"])) for sq in SQs] return dict(chromsizes) def get_chromsizes_from_file(chroms_file): """ Produce an "enumeration" of chromosomes based on the list of chromosomes """ chrom_sizes = dict() with open(chroms_file, "rt") as f: for line in f: chrom, size = line.strip().split("\t") chrom_sizes[chrom] = int(size) return chrom_sizes def get_chromsizes_from_pysam_header(samheader): """Convert pysam header to pairtools chromosomes (ordered dict). Example of pysam header converted to dict: dict([ ('SQ', [{'SN': 'chr1', 'LN': 248956422}, {'SN': 'chr10', 'LN': 133797422}, {'SN': 'chr11', 'LN': 135086622}, {'SN': 'chr12', 'LN': 133275309}]), ('PG', [{'ID': 'bwa', 'PN': 'bwa', 'VN': '0.7.17-r1188', 'CL': 'bwa mem -t 8 -SP -v1 hg38.fa test_1.1.fastq.gz test_2.1.fastq.gz'}]) ]) """ SQs = samheader.to_dict()["SQ"] chromsizes = [(sq["SN"], int(sq["LN"])) for sq in SQs] return dict(chromsizes) def get_chrom_order(chroms_file, sam_chroms=None): """ Produce an "enumeration" of chromosomes based on the list of chromosomes """ chrom_enum = dict() i = 1 with open(chroms_file, "rt") as f: for line in f: chrom = line.strip().split("\t")[0] if chrom and ((not sam_chroms) or (chrom in sam_chroms)): chrom_enum[chrom] = i i += 1 if sam_chroms: remaining = sorted( chrom for chrom in sam_chroms if chrom not in chrom_enum.keys() ) for chrom in remaining: chrom_enum[chrom] = i i += 1 return chrom_enum def make_standard_pairsheader( assembly=None, chromsizes=None, columns=pairsam_format.COLUMNS, shape="upper triangle", ): header = [] header.append("## pairs format v{}".format(PAIRS_FORMAT_VERSION)) header.append("#shape: {}".format(shape)) header.append( "#genome_assembly: {}".format(assembly if assembly is not None else "unknown") ) if chromsizes is not None: try: chromsizes = chromsizes.items() except AttributeError: pass for chrom, length in chromsizes: header.append("#chromsize: {} {}".format(chrom, length)) header.append("#columns: " + SEP_COLS.join(columns)) return header def subset_chroms_in_pairsheader(header, chrom_subset): new_header = [] for line in header: if line.startswith("#chromsize:"): if line.strip().split()[1] in chrom_subset: new_header.append(line) elif line.startswith("#chromosomes:"): line = SEP_CHROMS.join( ["#chromosomes:"] + [c for c in line.strip().split()[1:] if c in chrom_subset] ) new_header.append(line) else: new_header.append(line) return new_header def insert_samheader(header, samheader): """Insert samheader into header.""" new_header = [l for l in header if not l.startswith("#columns")] if samheader: new_header += ["#samheader: " + l for l in samheader] new_header += [l for l in header if l.startswith("#columns")] return new_header def insert_samheader_pysam(header, samheader): """Insert samheader into header,pysam version.""" new_header = [l for l in header if not l.startswith("#columns")] if samheader: new_header += ["#samheader: " + l for l in str(samheader).strip().split("\n")] new_header += [l for l in header if l.startswith("#columns")] return new_header def mark_header_as_sorted(header): header = copy.deepcopy(header) if is_empty_header(header): raise Exception("Input file is not valid .pairs, has no header or is empty.") if not any([l.startswith("#sorted") for l in header]): if header[0].startswith("##"): header.insert(1, "#sorted: chr1-chr2-pos1-pos2") else: header.insert(0, "#sorted: chr1-chr2-pos1-pos2") for i in range(len(header)): if header[i].startswith("#chromosomes"): chroms = header[i][12:].strip().split(SEP_CHROMS) header[i] = "#chromosomes: {}".format(SEP_CHROMS.join(sorted(chroms))) return header def append_new_pg(header, ID="", PN="", VN=None, CL=None, force=False): header = copy.deepcopy(header) if is_empty_header(header): raise Exception("Input file is not valid .pairs, has no header or is empty.") samheader, other_header = extract_fields(header, "samheader", save_rest=True) new_samheader = _add_pg_to_samheader(samheader, ID, PN, VN, CL, force) new_header = insert_samheader(other_header, new_samheader) return new_header def _update_header_entry(header, field, new_value): header = copy.deepcopy(header) found = False newline = "#{}: {}".format(field, new_value) for i in range(len(header)): if header[i].startswith(COMMENT_CHAR + field): header[i] = newline found = True if not found: if header[-1].startswith("#columns"): header.insert(-1, newline) else: header.append(newline) return header def _add_pg_to_samheader(samheader, ID="", PN="", VN=None, CL=None, force=False): """Append a @PG record to an existing sam header. If the header comes from a merged file and thus has multiple chains of @PG, append the provided PG to all of the chains, adding the numerical suffix of the branch to the ID. Parameters ---------- header : list of str ID, PN, VN, CL : std The keys of a new @PG record. If absent, VN is the version of pairtools and CL is taken from sys.argv. force : bool If True, ignore the inconsistencies among @PG records of the existing header. Returns ------- new_header : list of str A list of new headers lines, stripped of newline characters. """ if VN is None: VN = __version__ if CL is None: CL = " ".join(sys.argv) pre_pg_header = [ line.strip() for line in samheader if line.startswith("@HD") or line.startswith("@SQ") or line.startswith("@RG") ] post_pg_header = [ line.strip() for line in samheader if not line.startswith("@HD") and (not line.startswith("@SQ")) and (not line.startswith("@RG")) and (not line.startswith("@PG")) ] pg_chains = _parse_pg_chains(samheader, force=force) for i, br in enumerate(pg_chains): new_pg = {"ID": ID, "PN": PN, "VN": VN, "CL": CL} new_pg["PP"] = br[-1]["ID"] if len(pg_chains) > 1: new_pg["ID"] = new_pg["ID"] + "-" + str(i + 1) + "." + str(len(br) + 1) new_pg["raw"] = _format_pg(**new_pg) br.append(new_pg) new_header = ( pre_pg_header + [pg["raw"] for br in pg_chains for pg in br] + post_pg_header ) return new_header def _format_pg(**kwargs): out = ["@PG"] + [ "{}:{}".format(field, kwargs[field]) for field in ["ID", "PN", "CL", "PP", "DS", "VN"] if field in kwargs ] return "\t".join(out) def _parse_pg_chains(header, force=False): pg_chains = [] parsed_pgs = [] for l in header: if l.startswith("@PG"): tag_value_pairs = l.strip().split("\t")[1:] if not all(":" in tvp for tvp in tag_value_pairs): warnings.warn( f"Skipping the following @PG line, as it does not follow the SAM header standard of TAG:VALUE: {l}" ) continue parsed_tvp = dict( [tvp.split(":", maxsplit=1) for tvp in tag_value_pairs if ":" in tvp] ) if parsed_tvp: parsed_tvp["raw"] = l.strip() parsed_pgs.append(parsed_tvp) while True: if len(parsed_pgs) == 0: break for i in range(len(parsed_pgs)): pg = parsed_pgs[i] if "PP" not in pg: pg_chains.append([pg]) parsed_pgs.pop(i) break else: matching_chains = [ branch for branch in pg_chains if branch[-1]["ID"] == pg["PP"] ] if len(matching_chains) > 1: if force: matching_chains[0].append(pg) parsed_pgs.pop(i) break else: raise ParseError( "Multiple @PG records with the IDs identical to the PP field of another record:\n" + "\n".join([br[-1]["raw"] for br in matching_chains]) + "\nvs\n" + pg["raw"] ) if len(matching_chains) == 1: matching_chains[0].append(pg) parsed_pgs.pop(i) break if force: pg_chains.append([pg]) parsed_pgs.pop(i) break else: raise ParseError( "Cannot find the parental @PG record for the @PG records:\n" + "\n".join([pg["raw"] for pg in parsed_pgs]) ) return pg_chains def _toposort(dag, tie_breaker): """ Topological sort on a directed acyclic graph Uses Kahn's algorithm with a custom tie-breaking option. The dictionary ``dag`` can be interpreted in two ways: 1. A dependency graph (i.e. arcs point from values to keys), and the generator yields items with no dependences followed by items that depend on previous ones. 2. Arcs point from keys to values, in which case the generator produces a **reverse** topological ordering of the nodes. Parameters ---------- dag: dict of nodes to sets of nodes Directed acyclic graph encoded as a dictionary. tie_breaker: callable Function that picks a tie breaker from a set of nodes with no unprocessed dependences. Returns ------- Generator Notes ----- See . Based in part on activestate recipe: by Sam Denton (MIT licensed). """ # Drop self-edges. for k, v in dag.items(): v.discard(k) # Find all nodes that don't depend on anything # and include them with empty dependencies. indep_nodes = set.union(*dag.values()) - set(dag.keys()) dag.update({node: set() for node in indep_nodes}) while True: if not indep_nodes: break out = tie_breaker(indep_nodes) indep_nodes.discard(out) del dag[out] yield out for node, deps in dag.items(): deps.discard(out) if len(deps) == 0: indep_nodes.add(node) if len(dag) != 0: raise ValueError("Circular dependencies exist: {} ".format(list(dag.items()))) def merge_chrom_lists(*lsts): sentinel = "!NONE!" g = defaultdict(set) for lst in lsts: if len(lst) == 1: g[lst[0]].add(sentinel) for a, b in zip(lst[:-1], lst[1:]): g[b].add(a) if len(g) == 0: return [] chrom_list = list(_toposort(g.copy(), tie_breaker=min)) if sentinel in chrom_list: chrom_list.remove(sentinel) chrom_list = sorted(chrom_list) return chrom_list def _merge_samheaders(samheaders, force=False): # first, append an HD line if it is present in any files # if different lines are present, raise an error HDs = set.union( *[ set(line for line in samheader if line.startswith("@HD")) for samheader in samheaders ] ) if len(HDs) > 1 and not force: raise ParseError("More than one unique @HD line is found in samheaders!") HDs = [list(HDs)[0]] if HDs else [] # second, confirm that all files had the same SQ lines # add SQs from the first file, keeping its order SQs = [ set(line for line in samheader if line.startswith("@SQ")) for samheader in samheaders ] common_SQs = set.intersection(*SQs) SQs_same = all([len(samheader) == len(common_SQs) for samheader in SQs]) if not SQs_same and not (force): raise ParseError("The SQ (sequence) lines of the sam headers are not identical") SQs = [line for line in samheaders[0] if line.startswith("@SQ")] # third, append _all_ PG chains, adding a unique index according to the # provided merging order PGs = [] for i, samheader in enumerate(samheaders): for line in samheader: if line.startswith("@PG"): split_line = line.split("\t") for j in range(len(split_line)): if split_line[j].startswith("ID:") or split_line[j].startswith( "PP:" ): split_line[j] = split_line[j] + "-" + str(i + 1) PGs.append("\t".join(split_line)) # finally, add all residual unique lines rest = sum( [ list( set( line for line in samheader if (not line.startswith("@HD")) and (not line.startswith("@SQ")) and (not line.startswith("@PG")) ) ) for samheader in samheaders ], [], ) new_header = [] new_header += HDs new_header += SQs new_header += PGs new_header += rest return new_header def _merge_pairheaders(pairheaders, force=False): new_header = [] # first, add all keys that are expected to be the same among all headers keys_expected_identical = [ "## pairs format", "#sorted:", "#shape:", "#genome_assembly:", "#columns:", ] keys_orginal = [l.split()[0] for header in pairheaders for l in header] for k in keys_expected_identical: lines = [[l for l in header if l.startswith(k)] for header in pairheaders] same = all([l == lines[0] for l in lines]) if not (same or force): raise ParseError( "The following header entries must be the same " "the merged files: {}".format(k) ) new_header += lines[0] # second, merge and add the chromsizes fields. chrom_lists = [] chromsizes = {} for header in pairheaders: chromlist = [] for line in header: if line.startswith("#chromsize:"): chrom, length = line.strip("#chromsize:").split() chromsizes[chrom] = length chromlist.append(chrom) chrom_lists.append(chromlist) chroms_merged = merge_chrom_lists(*chrom_lists) if "#chromosomes:" in keys_orginal: chrom_line = "#chromosomes: {}".format(" ".join(chroms_merged)) new_header.extend([chrom_line]) chromsize_lines = [ "#chromsize: {} {}".format(chrom, chromsizes[chrom]) for chrom in chroms_merged ] new_header.extend(chromsize_lines) # finally, add a sorted list of other unique fields other_lines = sorted( set( l for h in pairheaders for l in h if not any( l.startswith(k) for k in keys_expected_identical + ["#chromosomes", "#chromsize"] ) ) ) if other_lines: if new_header[-1].startswith("#columns"): new_header = new_header[:-1] + other_lines + [new_header[-1]] else: new_header = new_header + other_lines return new_header def all_same_columns(pairheaders): key_target = "#columns:" lines = [[l for l in header if l.startswith(key_target)] for header in pairheaders] all_same = all([l == lines[0] for l in lines]) return all_same def merge_headers(headers, force=False): samheaders, pairheaders = zip( *[extract_fields(h, "samheader", save_rest=True) for h in headers] ) # HD headers contain information that becomes invalid after processing # with distiller. Do not print into the output. new_pairheader = _merge_pairheaders(pairheaders, force=False) new_samheader = _merge_samheaders(samheaders, force=force) new_header = insert_samheader(new_pairheader, new_samheader) return new_header def append_columns(header, columns): """ Appends columns to the header, separated by SEP_COLS Parameters ---------- header: Previous header columns: List of column names to append Returns ------- Modified header (appended columns to the field "#columns") """ for i in range(len(header)): if header[i].startswith("#columns: "): header[i] += SEP_COLS + SEP_COLS.join(columns) return header def get_colnames(header): """ Get column names of the header, separated by SEP_COLS Parameters ---------- header: Previous header Returns ------- List of column names """ for i in range(len(header)): if header[i].startswith("#columns: "): columns = header[i].split(SEP_COLS)[1:] return columns return [] def set_columns(header, columns): """ Set columns to the header, separated by SEP_COLS Parameters ---------- header: Previous header columns: List of column names to append Returns ------- Modified header (appended columns to the field "#columns") """ for i in range(len(header)): if header[i].startswith("#columns:"): header[i] = "#columns:" + SEP_COLS + SEP_COLS.join(columns) return header # def _guess_genome_assembly(samheader): # PG = [l for l in samheader if l.startswith('@PG') and '\tID:bwa' in l][0] # CL = [field for field in PG.split('\t') if field.startswith('CL:')] # # return ga pairtools-1.1.3/pairtools/lib/pairsam_format.py000066400000000000000000000036671474715105500217150ustar00rootroot00000000000000PAIRSAM_FORMAT_VERSION = "1.0.0" PAIRSAM_SEP = "\t" PAIRSAM_SEP_ESCAPE = r"\t" SAM_SEP = "\031" SAM_SEP_ESCAPE = r"\031" INTER_SAM_SEP = "\031NEXT_SAM\031" COL_READID = 0 COL_C1 = 1 COL_P1 = 2 COL_C2 = 3 COL_P2 = 4 COL_S1 = 5 COL_S2 = 6 COL_PTYPE = 7 COL_SAM1 = 8 COL_SAM2 = 9 COLUMNS = [ "readID", "chrom1", "pos1", "chrom2", "pos2", "strand1", "strand2", "pair_type", "sam1", "sam2", "walk_pair_index", "walk_pair_type", ] # Required columns for formats: COLUMNS_PAIRSAM = [ "readID", "chrom1", "pos1", "chrom2", "pos2", "strand1", "strand2", "pair_type", "sam1", "sam2", ] COLUMNS_PAIRS = [ "readID", "chrom1", "pos1", "chrom2", "pos2", "strand1", "strand2", "pair_type", ] DTYPES_PAIRSAM = { "readID": str, "chrom1": str, "pos1": int, "chrom2": str, "pos2": int, "strand1": str, "strand2": str, "pair_type": str, "sam1": str, "sam2": str, } DTYPES_PAIRS = { "readID": str, "chrom1": str, "pos1": int, "chrom2": str, "pos2": int, "strand1": str, "strand2": str, "pair_type": str, } UNMAPPED_CHROM = "!" UNMAPPED_POS = 0 UNMAPPED_STRAND = "-" UNANNOTATED_RFRAG = -1 EXTRA_COLUMNS = [ "mapq", "pos5", "pos3", "cigar", "read_len", "matched_bp", "algn_ref_span", "algn_read_span", "dist_to_5", "dist_to_3", "seq", "mismatches", # Format: "{ref_letter}:{mut_letter}:{phred}:{ref_position}:{read_position}" "read_side", "algn_idx", "same_side_algn_count" ] DTYPES_EXTRA_COLUMNS = { "mapq": int, "pos5": int, "pos3": int, "cigar": str, "read_len": int, "matched_bp": int, "algn_ref_span": int, "algn_read_span": int, "dist_to_5": int, "dist_to_3": int, "seq": str, "mismatches": str, "read_side": int, "algn_idx": int, "same_side_algn_count": int, } pairtools-1.1.3/pairtools/lib/pairsio.py000066400000000000000000000036221474715105500203460ustar00rootroot00000000000000import pandas as pd from . import fileio, headerops def read_pairs(pairs, nproc=3, cmd_in=None, **kwargs): """ Reads a file with .pairs format and returns a header, a dataframe of pairs, and chromsizes. Parameters: pairs (str or file-like object): A path to a .pairs file to read or an open file-like object/handle. nproc (int): Number of processes to use for reading the file. Default is 3. cmd_in (str): The command to be used for reading the file. Default is None. **kwargs: Additional keyword arguments to be passed to pd.read_csv. Useful options include: - chunksize (int): If specified, return an iterable object of type TextFileReader that reads in chunks of lines. - usecols (list-like or callable): Return a subset of the columns. If list-like, all elements must either be positional or strings. If callable, the callable function will be evaluated against the column names, returning names where the callable function evaluates to True. Returns: tuple: A tuple containing the following elements: - pairs_df (pd.DataFrame): A pandas DataFrame with pairs. - header (list of str): The original header of the pairs file. - chromsizes (dict): A dictionary containing chromosome sizes extracted from the header. """ pairs_stream = ( fileio.auto_open( pairs, mode="r", nproc=nproc, command=cmd_in, ) if isinstance(pairs, str) else pairs ) header, pairs_body = headerops.get_header(pairs_stream) cols = headerops.extract_column_names(header) chromsizes = headerops.extract_chromsizes(header) pairs_df = pd.read_csv( pairs_body, header=None, names=cols, sep="\t", dtype={"chrom1": str, "chrom2": str}, **kwargs ) return pairs_df, header, chromsizespairtools-1.1.3/pairtools/lib/parse.py000066400000000000000000001571361474715105500200240ustar00rootroot00000000000000""" Set of functions used for pairsam parse, migrated from pairtools/pairtools_parse.py Parse operates with several basic data types: I. pysam-based: 1. **sam entry** is a continuous aligned fragment of the read mapped to certain location in the genome. Because we read sam entries from .sam/.bam files automatically with modified pysam, each sam entry is in fact special AlignedSegmentPairtoolized Cython object that has alignment attributes and can be easily accessed from Python. Sam entries are gathered into reads by `group_alignments_by_side` function. 2. **read** is a collection of sam entries corresponding to a single Hi-C molecule. It is represented by three variables: readID, sams1 and sams2, which keep left and right sam entries, correspondingly. Read is populated from the stream of sam entries on a fly, the process happenning in `streaming_classify` function. II. python-based data types are parsed from pysam-based ones: 1. **alignment** is a continuous aligned fragment represented as dictionary with relevant fields, such as "chrom", "pos5", "pos3", "strand", "type", etc. `empty_alignment` creates empty alignment, `parse_pysam_entry` create new alignments from pysam entries, `mask_alignment` clears some fields of the alignment to match the default "unmapped" state. `flip_alignment`, `flip_orientation` and `flip_ends` are useful functions that help to orient alignments. 2. **pair** of two alignments is represented by three variables: algn1 (left alignment), algn2 (right alignment) and pair_index. Pairs are obtained by `parse_read` or `parse2_read`. Additionally, these functions also output all alignments for each side. """ from . import pairsam_format from .parse_pysam import get_mismatches_c def streaming_classify( instream, outstream, chromosomes, out_alignments_stream, out_stat, **kwargs ): """ Parse input sam file into individual reads, pairs, walks, then write to the outstream(s). Additional kwargs: min_mapq, drop_readid, drop_seq, drop_sam, add_pair_index, add_columns, # comma-separated list report_alignment_end, max_inter_align_gap parse: max_molecule_size walks_policy parse2: single_end: indicator whether single-end data is provided report_position, one of: "outer", "junction", "read", "walk" report_orientation, one of: "pair", "junction", "read", "walk" dedup_max_mismatch: For intramolecular deduplication max_insert_size: maximum insert size when searching for overlapping ends of R1 and R2 expand: perform combinatorial expansion or not max_expansion_depth: maximum expansion depth, works in combination with expand=True """ parse2 = kwargs.get("parse2", False) ### Store output parameters in a usable form: chrom_enum = dict( zip( [pairsam_format.UNMAPPED_CHROM] + list(chromosomes), range(len(chromosomes) + 1), ) ) add_columns = kwargs.get("add_columns", "") if isinstance(add_columns, str) and len(add_columns) > 0: add_columns = add_columns.split(",") elif len(add_columns) == 0: add_columns = [] elif not isinstance(add_columns, list): raise ValueError(f"Unknown type of add_columns: {type(add_columns)}") sam_tags = [col for col in add_columns if len(col) == 2 and col.isupper()] store_seq = "seq" in add_columns ### Compile readID transformation: readID_transform = kwargs.get("readid_transform", None) if readID_transform is not None: readID_transform = compile(readID_transform, "", "eval") ### Iterate over input pysam: instream = iter(instream) for (readID, (sams1, sams2)) in read_alignment_block(instream, sort=True, group_by_side=True, return_readID=True, readID_transform=readID_transform): ### Parse if not parse2: # regular parser: pairstream, all_algns1, all_algns2 = parse_read( sams1, sams2, min_mapq=kwargs["min_mapq"], max_molecule_size=kwargs["max_molecule_size"], max_inter_align_gap=kwargs["max_inter_align_gap"], walks_policy=kwargs["walks_policy"], sam_tags=sam_tags, store_seq=store_seq, report_mismatches=True if "mismatches" in add_columns else False, ) else: # parse2 parser: pairstream, all_algns1, all_algns2 = parse2_read( sams1, sams2, min_mapq=kwargs["min_mapq"], max_inter_align_gap=kwargs["max_inter_align_gap"], max_insert_size=kwargs.get("max_insert_size", 500), single_end=kwargs["single_end"], report_position=kwargs["report_position"], report_orientation=kwargs["report_orientation"], sam_tags=sam_tags, dedup_max_mismatch=kwargs["dedup_max_mismatch"], store_seq=store_seq, expand=kwargs["expand"], max_expansion_depth=kwargs["max_expansion_depth"], report_mismatches=True if "mismatches" in add_columns else False, ) ### Write: read_has_alignments = False for (algn1, algn2, pair_index) in pairstream: read_has_alignments = True # Alignment end defaults to 5' if report_alignment_end is unspecified: if kwargs.get("report_alignment_end", "5") == "5": algn1["pos"] = algn1["pos5"] algn2["pos"] = algn2["pos5"] else: algn1["pos"] = algn1["pos3"] algn2["pos"] = algn2["pos3"] if kwargs["flip"]: flip_pair = not check_pair_order(algn1, algn2, chrom_enum) if flip_pair: algn1, algn2 = algn2, algn1 sams1, sams2 = sams2, sams1 write_pairsam( algn1, algn2, readID=readID, pair_index=pair_index, sams1=sams1, sams2=sams2, out_file=outstream, drop_readid=kwargs["drop_readid"], drop_seq=kwargs["drop_seq"], drop_sam=kwargs["drop_sam"], add_pair_index=kwargs["add_pair_index"], add_columns=add_columns, ) # add a pair to PairCounter for stats output: if out_stat: out_stat.add_pair( algn1["chrom"], int(algn1["pos"]), algn1["strand"], algn2["chrom"], int(algn2["pos"]), algn2["strand"], algn1["type"] + algn2["type"], ) # write all alignments: if out_alignments_stream and read_has_alignments: write_all_algnments( readID, all_algns1, all_algns2, out_alignments_stream ) ############################ ### Alignment utilities: ### ############################ def empty_alignment(): return { "chrom": pairsam_format.UNMAPPED_CHROM, "pos5": pairsam_format.UNMAPPED_POS, "pos3": pairsam_format.UNMAPPED_POS, "pos": pairsam_format.UNMAPPED_POS, "strand": pairsam_format.UNMAPPED_STRAND, "dist_to_5": 0, "dist_to_3": 0, "mapq": 0, "is_unique": False, "is_mapped": False, "is_linear": True, "cigar": "*", "algn_ref_span": 0, "algn_read_span": 0, "matched_bp": 0, "clip3_ref": 0, "clip5_ref": 0, "read_len": 0, "type": "N", "mismatches": "", } def group_alignments_by_side(sams): """Group pysam AlignedSegments (sams) into left-read (R1) and right-read (R2) sam entries""" sams1 = [] sams2 = [] for sam_entry in sams: flag = sam_entry.flag if (flag & 0x40) != 0: sams1.append(sam_entry) # left read, or first read in a pair else: sams2.append(sam_entry) # right read, or mate pair return sams1, sams2 def read_alignment_block(instream, sort=True, group_by_side=True, return_readID=True, readID_transform=None): sams = [] prev_readID = None while True: sam_entry = next(instream, None) readID = sam_entry.query_name if sam_entry else None if readID_transform is not None and readID is not None: readID = eval(readID_transform) # Read is fully populated, then parse and write: if not (sam_entry) or ((readID != prev_readID) and prev_readID): if sort: sams = sorted(sams, key=lambda a: (a.is_read2, a.query_alignment_start)) out = sams if not group_by_side else group_alignments_by_side(sams) out = out if not return_readID else (prev_readID, out) yield out sams.clear() if sam_entry is None: break else: sams.append(sam_entry) prev_readID = readID def parse_pysam_entry( sam, min_mapq, sam_tags=None, store_seq=False, report_3_alignment_end=False, report_mismatches=False, ): """Parse alignments from pysam AlignedSegment entry :param sam: input pysam AlignedSegment entry :param min_mapq: minimal MAPQ to consider as a proper alignment :param sam_tags: list of sam tags to store :param store_seq: if True, the sequence will be parsed and stored in the output :param report_3_alignment_end: if True, 3'-end of alignment will be reported as position (will be deprecated) :param report_mismatches: if True, mismatches will be parsed from MD field :return: parsed aligned entry (dictionary) """ flag = sam.flag is_mapped = (flag & 0x04) == 0 mapq = sam.mapq is_unique = sam.is_unique(min_mapq) is_linear = sam.is_linear cigar = sam.cigar_dict if is_mapped: if (flag & 0x10) == 0: strand = "+" dist_to_5 = cigar["clip5_ref"] dist_to_3 = cigar["clip3_ref"] else: strand = "-" dist_to_5 = cigar["clip3_ref"] dist_to_3 = cigar["clip5_ref"] if is_unique: chrom = sam.reference_name if strand == "+": # Note that pysam output is zero-based, thus add +1: pos5 = sam.reference_start + 1 pos3 = sam.reference_start + cigar["algn_ref_span"] else: pos5 = sam.reference_start + cigar["algn_ref_span"] # Note that pysam output is zero-based, thus add +1: pos3 = sam.reference_start + 1 # Get number of matches: if not sam.has_tag("MD") or not report_mismatches: mismatches = "" else: seq = sam.query_sequence.upper() quals = sam.query_qualities aligned_pairs = sam.get_aligned_pairs(with_seq=True, matches_only=True) mismatches = get_mismatches_c(seq, quals, aligned_pairs) mismatches = ",".join( [ f"{original}:{mutated}:{phred}:{ref}:{read}" for original, mutated, phred, ref, read in mismatches ] ) # n_matches = len(aligned_pairs) else: chrom = pairsam_format.UNMAPPED_CHROM strand = pairsam_format.UNMAPPED_STRAND pos5 = pairsam_format.UNMAPPED_POS pos3 = pairsam_format.UNMAPPED_POS mismatches = "" else: chrom = pairsam_format.UNMAPPED_CHROM strand = pairsam_format.UNMAPPED_STRAND pos5 = pairsam_format.UNMAPPED_POS pos3 = pairsam_format.UNMAPPED_POS dist_to_5 = 0 dist_to_3 = 0 mismatches = "" algn = { "chrom": chrom, "pos5": pos5, "pos3": pos3, "strand": strand, "mapq": mapq, "is_mapped": is_mapped, "is_unique": is_unique, "is_linear": is_linear, "dist_to_5": dist_to_5, "dist_to_3": dist_to_3, "type": ("N" if not is_mapped else ("M" if not is_unique else "U")), "mismatches": mismatches, } algn.update(cigar) algn["pos"] = algn["pos3"] if report_3_alignment_end else algn["pos5"] ### Add tags to the alignment: if sam_tags: tags = sam.tags for tag in sam_tags: algn[tag] = "" for col, value in tags: for tag in sam_tags: if col == tag: algn[tag] = value continue if store_seq: algn["seq"] = sam.seq return algn def mask_alignment(algn): """ Reset the coordinates of an alignment. """ algn["chrom"] = pairsam_format.UNMAPPED_CHROM algn["pos5"] = pairsam_format.UNMAPPED_POS algn["pos3"] = pairsam_format.UNMAPPED_POS algn["pos"] = pairsam_format.UNMAPPED_POS algn["strand"] = pairsam_format.UNMAPPED_STRAND return algn def flip_alignment(hic_algn): """ Flip a single alignment as if it was sequenced from the opposite end :param hic_algn: Alignment to be modified :return: """ hic_algn = dict(hic_algn) # overwrite the variable with the copy of dictionary hic_algn["pos5"], hic_algn["pos3"] = hic_algn["pos3"], hic_algn["pos5"] hic_algn["strand"] = "+" if (hic_algn["strand"] == "-") else "-" return hic_algn def flip_orientation(hic_algn): """ Flip orientation of a single alignment :param hic_algn: Alignment to be modified :return: """ hic_algn = dict(hic_algn) # overwrite the variable with the copy of dictionary hic_algn["strand"] = "+" if (hic_algn["strand"] == "-") else "-" return hic_algn def flip_position(hic_algn): """ Flip ends of a single alignment :param hic_algn: Alignment to be modified :return: """ hic_algn = dict(hic_algn) # overwrite the variable with the copy of dictionary hic_algn["pos5"], hic_algn["pos3"] = hic_algn["pos3"], hic_algn["pos5"] return hic_algn def _convert_gaps_into_alignments(sorted_algns, max_inter_align_gap): """ Inplace conversion of gaps longer than max_inter_align_gap into alignments """ if (len(sorted_algns) == 1) and (not sorted_algns[0]["is_mapped"]): return last_5_pos = 0 for i in range(len(sorted_algns)): algn = sorted_algns[i] if algn["dist_to_5"] - last_5_pos > max_inter_align_gap: new_algn = empty_alignment() new_algn["dist_to_5"] = last_5_pos new_algn["algn_read_span"] = algn["dist_to_5"] - last_5_pos new_algn["read_len"] = algn["read_len"] new_algn["dist_to_3"] = new_algn["read_len"] - algn["dist_to_5"] last_5_pos = algn["dist_to_5"] + algn["algn_read_span"] sorted_algns.insert(i, new_algn) i += 2 else: last_5_pos = max(last_5_pos, algn["dist_to_5"] + algn["algn_read_span"]) i += 1 def normalize_alignment_list(algns, side, sort_by="dist_to_5", max_inter_align_gap=None): """ Normalize the alignment list: insert empty alignments in gaps between alignments, sort by distance to the 5' end, add read side, alignment index. Args: algns (list): The list of alignments. side (str): The side of the alignment. sort_by (str, optional): The key to sort the alignments by. Defaults to "dist_to_5". max_inter_align_gap (int, optional): The maximum allowed gap between alignments. Defaults to None. Returns: list: The normalized alignment list. """ if len(algns) == 0: algns = [empty_alignment()] if sort_by: algns = sorted(algns, key=lambda algn: algn[sort_by]) if max_inter_align_gap is not None: _convert_gaps_into_alignments(algns, max_inter_align_gap) for i, algn in enumerate(algns): algn["read_side"] = side algn["algn_idx"] = i algn["same_side_algn_count"] = len(algns) return algns #################### ### Parsing utilities: #################### def parse_read( sams1, sams2, min_mapq, max_molecule_size, max_inter_align_gap, walks_policy, sam_tags, store_seq, report_mismatches=False, ): """ Parse sam entries corresponding to a single read (or Hi-C molecule) into pairs of alignments. Returns ------- stream: iterator Each element is a triplet: (algn1, aldn2, pair_index) algn1, algn2: dict Two alignments selected for reporting as a Hi-C pair. pair_index pair index of a pair in the molecule. algns1, algns2: lists All alignments, sorted according to their order in on a read. """ # Check if there is at least one sam entry per side: if walks_policy == "all": is_empty = (len(sams1) == 0 and len(sams2) < 2) or ( len(sams2) == 0 and len(sams1) < 2 ) else: is_empty = (len(sams1) == 0) or (len(sams2) == 0) if is_empty: algns1 = [empty_alignment()] algns2 = [empty_alignment()] algns1[0]["type"] = "X" algns2[0]["type"] = "X" pair_index = (1, "R1-2") return iter([(algns1[0], algns2[0], pair_index)]), algns1, algns2 # Generate a sorted, gap-filled list of all alignments algns1 = [ parse_pysam_entry( sam, min_mapq, sam_tags, store_seq, report_mismatches=report_mismatches ) for sam in sams1 ] algns2 = [ parse_pysam_entry( sam, min_mapq, sam_tags, store_seq, report_mismatches=report_mismatches ) for sam in sams2 ] algns1 = normalize_alignment_list(algns1, 1, sort_by="dist_to_5", max_inter_align_gap=max_inter_align_gap) algns2 = normalize_alignment_list(algns2, 2, sort_by="dist_to_5", max_inter_align_gap=max_inter_align_gap) # By default, assume each molecule is a single pair with single unconfirmed pair: hic_algn1 = algns1[0] hic_algn2 = algns2[0] pair_index = (1, "R1-2") # Define the type of alignment on each side: is_chimeric_1 = len(algns1) > 1 is_chimeric_2 = len(algns2) > 1 # Parse chimeras if is_chimeric_1 or is_chimeric_2: # Report all the linear alignments in a read pair if walks_policy == "all": # Report linear alignments after deduplication of complex walks with default settings: return ( parse_complex_walk( algns1, algns2, max_molecule_size, report_position="outer", report_orientation="pair", ), algns1, algns2, ) elif walks_policy in ["mask", "5any", "5unique", "3any", "3unique"]: # Report only two alignments for a read pair rescued_linear_side = rescue_walk(algns1, algns2, max_molecule_size) # Walk was rescued as a simple walk: if rescued_linear_side is not None: pair_index = (1, "R1" if rescued_linear_side == 1 else "R2") # Walk is unrescuable: else: if walks_policy == "mask": hic_algn1 = mask_alignment(dict(hic_algn1)) hic_algn2 = mask_alignment(dict(hic_algn2)) hic_algn1["type"] = "W" hic_algn2["type"] = "W" elif walks_policy == "5any": hic_algn1 = algns1[0] hic_algn2 = algns2[0] elif walks_policy == "5unique": hic_algn1 = algns1[0] for algn in algns1: if algn["is_mapped"] and algn["is_unique"]: hic_algn1 = algn break hic_algn2 = algns2[0] for algn in algns2: if algn["is_mapped"] and algn["is_unique"]: hic_algn2 = algn break elif walks_policy == "3any": hic_algn1 = algns1[-1] hic_algn2 = algns2[-1] elif walks_policy == "3unique": hic_algn1 = algns1[-1] for algn in algns1[::-1]: if algn["is_mapped"] and algn["is_unique"]: hic_algn1 = algn break hic_algn2 = algns2[-1] for algn in algns2[::-1]: if algn["is_mapped"] and algn["is_unique"]: hic_algn2 = algn break # DEPRECATED: lower-case reported walks on the chimeric side if walks_policy != "mask": if is_chimeric_1: hic_algn1 = dict(hic_algn1) # hic_algn1["type"] = hic_algn1["type"].lower() if is_chimeric_2: hic_algn2 = dict(hic_algn2) # hic_algn2["type"] = hic_algn2["type"].lower() else: raise ValueError(f"Walks policy {walks_policy} is not supported.") return iter([(hic_algn1, hic_algn2, pair_index)]), algns1, algns2 def parse2_read( sams1, sams2, min_mapq, max_inter_align_gap, max_insert_size, single_end, report_position="outer", report_orientation="pair", sam_tags=[], dedup_max_mismatch=3, store_seq=False, report_mismatches=False, expand=False, max_expansion_depth=None, ): """ Parse sam entries corresponding to a Hi-C molecule into alignments in parse2 mode for a Hi-C pair. Returns ------- stream: iterator Each element is a triplet: (algn1, aldn2, pair_index) algn1, algn2: dict Two alignments selected for reporting as a Hi-C pair. pair_index pair index of a pair in the molecule, a tuple: (1, "R1-2") algns1, algns2: lists All alignments, sorted according to their order in on a read. """ # Single-end mode: if single_end: # Generate a sorted, gap-filled list of all alignments algns1 = [ parse_pysam_entry( sam, min_mapq, sam_tags, store_seq, report_mismatches=report_mismatches ) for sam in sams2 # note sams2, that's how these reads are typically parsed ] algns1 = normalize_alignment_list(algns1, 1, sort_by="dist_to_5", max_inter_align_gap=max_inter_align_gap) algns2 = [] # Empty alignment dummy if len(algns1) > 1: # Look for ligation pair, and report linear alignments after deduplication of complex walks: # (Note that coordinate system for single-end reads does not change the behavior) output = parse_complex_walk( algns1, algns2, max_insert_size, report_position, report_orientation, dedup_max_mismatch, expand, max_expansion_depth, ) output = [x for x in output if x[-1][-1] != "R1-2"] return (output, algns1, algns2) elif len(algns1) == 1: # If no additional information, we assume each molecule is a single ligation with single unconfirmed pair: algn2 = empty_alignment() pair_index = (1, "R1") return iter([(algns1[0], algn2, pair_index)]), algns1, algns2 else: # If no additional information, we assume each molecule is a single ligation with single unconfirmed pair: algn1 = empty_alignment() algn2 = empty_alignment() pair_index = (1, "R1") return iter([(algn1, algn2, pair_index)]), algns1, algns2 # Paired-end mode: else: # Check if there is at least one SAM entry per side: is_empty = (len(sams1) == 0 and len(sams2) < 2) or ( len(sams2) == 0 and len(sams1) < 2 ) if is_empty: algns1 = [empty_alignment()] algns2 = [empty_alignment()] algns1[0]["type"] = "X" algns2[0]["type"] = "X" pair_index = (1, "R1-2") return iter([(algns1[0], algns2[0], pair_index)]), algns1, algns2 # Generate a sorted, gap-filled list of all alignments algns1 = [ parse_pysam_entry( sam, min_mapq, sam_tags, store_seq, report_mismatches=report_mismatches ) for sam in sams1 ] algns2 = [ parse_pysam_entry( sam, min_mapq, sam_tags, store_seq, report_mismatches=report_mismatches ) for sam in sams2 ] algns1 = normalize_alignment_list(algns1, 1, sort_by="dist_to_5", max_inter_align_gap=max_inter_align_gap) algns2 = normalize_alignment_list(algns2, 2, sort_by="dist_to_5", max_inter_align_gap=max_inter_align_gap) is_chimeric_1 = len(algns1) > 1 is_chimeric_2 = len(algns2) > 1 if is_chimeric_1 or is_chimeric_2: # If at least one side is chimera, we must look for ligation pair, and # report linear alignments after deduplication of complex walks: return ( parse_complex_walk( algns1, algns2, max_insert_size, report_position, report_orientation, dedup_max_mismatch, expand, max_expansion_depth, ), algns1, algns2, ) else: # If no additional information, we assume each molecule is a single ligation with single unconfirmed pair: pair_index = (1, "R1-2") algn1, algn2, pair_index = format_pair( algns1[0], algns2[0], pair_index=pair_index, report_position=report_position, report_orientation=report_orientation, ) return iter([(algn1, algn2, pair_index)]), algns1, algns2 #################### ### Walks utilities: #################### def rescue_walk(algns1, algns2, max_molecule_size): """ Rescue a single ligation that appears as a walk. Checks if a molecule with three alignments could be formed via a single ligation between two fragments, where one fragment was so long that it got sequenced on both sides. Uses three criteria: 1) the 3'-end alignment on one side maps to the same chromosome as the alignment fully covering the other side (i.e. the linear alignment) 2) the two alignments point towards each other on the chromosome 3) the distance between the outer ends of the two alignments is below the specified threshold. Alternatively, a single ligation get rescued when the 3' sub-alignment maps to multiple locations or no locations at all. In the case of a successful rescue, tags the 3' sub-alignment with type='X' and the linear alignment on the other side with type='R'. Returns ------- linear_side : int If the case of a successful rescue, returns the index of the side with a linear alignment. """ # If both sides have one alignment or none, no need to rescue! n_algns1 = len(algns1) n_algns2 = len(algns2) if (n_algns1 <= 1) and (n_algns2 <= 1): return None # Can rescue only pairs with one chimeric alignment with two parts. if not ( ((n_algns1 == 1) and (n_algns2 == 2)) or ((n_algns1 == 2) and (n_algns2 == 1)) ): return None first_read_is_chimeric = n_algns1 > 1 chim5_algn = algns1[0] if first_read_is_chimeric else algns2[0] chim3_algn = algns1[1] if first_read_is_chimeric else algns2[1] linear_algn = algns2[0] if first_read_is_chimeric else algns1[0] # the linear alignment must be uniquely mapped if not (linear_algn["is_mapped"] and linear_algn["is_unique"]): return None can_rescue = True # we automatically rescue chimeric alignments with null and non-unique # alignments at the 3' side if chim3_algn["is_mapped"] and chim5_algn["is_unique"]: # 1) in rescued walks, the 3' alignment of the chimeric alignment must be on # the same chromosome as the linear alignment on the opposite side of the # molecule can_rescue &= chim3_algn["chrom"] == linear_algn["chrom"] # 2) in rescued walks, the 3' supplemental alignment of the chimeric # alignment and the linear alignment on the opposite side must point # towards each other can_rescue &= chim3_algn["strand"] != linear_algn["strand"] if linear_algn["strand"] == "+": can_rescue &= linear_algn["pos5"] < chim3_algn["pos5"] else: can_rescue &= linear_algn["pos5"] > chim3_algn["pos5"] # 3) in single ligations appearing as walks, we can infer the size of # the molecule and this size must be smaller than the maximal size of # Hi-C molecules after the size selection step of the Hi-C protocol if linear_algn["strand"] == "+": molecule_size = ( chim3_algn["pos5"] - linear_algn["pos5"] + chim3_algn["dist_to_5"] + linear_algn["dist_to_5"] ) else: molecule_size = ( linear_algn["pos5"] - chim3_algn["pos5"] + chim3_algn["dist_to_5"] + linear_algn["dist_to_5"] ) can_rescue &= molecule_size <= max_molecule_size if can_rescue: # changing the type of the 3' alignment on side 1, does not show up in the output: if first_read_is_chimeric: algns1[1]["type"] = "X" algns2[0]["type"] = "R" return 1 # changing the type of the 3' alignment on side 2, does not show up in the output: else: algns1[0]["type"] = "R" algns2[1]["type"] = "X" return 2 else: return None def parse_complex_walk( algns1, algns2, max_insert_size, report_position, report_orientation, dedup_max_mismatch=3, expand=False, max_expansion_depth=None, ): """ Parse a set of ligations that appear as a complex walk. This procedure is equivalent to intramolecular deduplication that preserved pair order in a walk. :param algns1: List of sequential lefts alignments :param algns2: List of sequential right alignments :param max_insert_size: maximum insert size when searching for overlapping ends of R1 and R2 :param report_position: one of "outer", "junction", "read", "walk"; sets pos5 and pos3 :param report_orientation: one of "pair", "junction", "read", "walk"; sets strand :param dedup_max_mismatch: allowed mismatch between intramolecular alignments to detect readthrough duplicates :param expand: perform combinatorial expansion of pairs or not :param max_expansion_depth: maximum depth (number of segments separating pair). All by default. :return: iterator with parsed pairs **Intramolecular deduplication** Forward read (left): right read (right): 5'------------------------->3' 3'<--------------------------5' algns1 algns2 <5---3><5---3><5---3><5---3> <3---5><3---5><3---5><3---5> l0 l1 l2 l3 r3 r2 r1 r0 Alignment - bwa mem reported hit or alignment after gaps conversion. Left and right alignments (algns1: [l0, l1, l2, l3], algns2: [r0, r1, r2, r3]) - alignments on left and right reads reported from 5' to 3' orientation. Intramolecular deduplication consists of two steps: I. iterative search of overlapping alignment pairs (aka overlap), II. if no overlaps or search not possible (less than 2 alignments on either sides), search for overlap of end alignments (aka partial overlap). III. report pairs before the overlap, deduplicated pairs of overlap and pairs after that. Iterative search of overlap is in fact scanning of the right read pairs for the hit with the 3'-most pair of the left read: 1. Initialize. Start from 3' of left and right reads. Set `current_left_pair` and `current_right_pair` pointers 2. Initial compare. Compare pairs l2-l3 and r3-r2 by `pairs_overlap`. If successful, we found the overlap, go to reporting. If unsuccessful, continue search. 3. Increment. Shift `current_right_pair` pointer by one (e.g., take the pair r2-r1). 4. Check. Check that this pair can form a potential overlap with left alignments: the number of pairs downstream from l2-l3 on left read should not be less than the number of pairs upstream from r2-r1 on right read. If overlap cannot be formed, no other overlap in this complex walk is possible, safely exit. If the potential overlap can be formed, continue comparison. 5. Compare. Compare the current pair of pairs on left and right reads. If comparison fails, go to step 3. If comparison is successful, go to 6. 6. Verify. Check that downstream pairs on the left read overlap with the upstream pairs on the right read. If yes, exit. If not, we do not have an overlap, go to step 3. """ AVAILABLE_REPORT_POSITION = ["outer", "junction", "read", "walk"] assert report_position in AVAILABLE_REPORT_POSITION, ( f"Cannot report position {report_position}, as it is not implemented" f'Available choices are: {", ".join(AVAILABLE_REPORT_POSITION)}' ) AVAILABLE_REPORT_ORIENTATION = ["pair", "junction", "read", "walk"] assert report_orientation in AVAILABLE_REPORT_ORIENTATION, ( f"Cannot report orientation {report_orientation}, as it is not implemented" f'Available choices are: {", ".join(AVAILABLE_REPORT_ORIENTATION)}' ) output_pairs = [] # Initialize (step 1). n_algns1 = len(algns1) n_algns2 = len(algns2) current_left_pair = current_right_pair = 1 remaining_left_pairs = ( n_algns1 - 1 ) # Number of possible pairs remaining on left read remaining_right_pairs = ( n_algns2 - 1 ) # Number of possible pairs remaining on right read checked_right_pairs = ( 0 # Number of checked pairs on right read (from the end of read) ) is_overlap = False # I. Iterative search of overlap, at least two alignments on each side: if (n_algns1 >= 2) and (n_algns2 >= 2): # Iteration includes check (step 4): while (remaining_left_pairs > checked_right_pairs) and ( remaining_right_pairs > 0 ): pair1 = (algns1[-current_left_pair - 1], algns1[-current_left_pair]) pair2 = (algns2[-current_right_pair - 1], algns2[-current_right_pair]) # Compare (initial or not, step 2 or 5): is_overlap = pairs_overlap( pair1, pair2, dedup_max_mismatch=dedup_max_mismatch ) if is_overlap: last_idx_left_temp = current_left_pair last_idx_right_temp = current_right_pair checked_right_temp = checked_right_pairs # Verify (step 6): while is_overlap and (checked_right_temp > 0): last_idx_left_temp += 1 last_idx_right_temp -= 1 pair1 = ( algns1[-last_idx_left_temp - 1], algns1[-last_idx_left_temp], ) pair2 = ( algns2[-last_idx_right_temp - 1], algns2[-last_idx_right_temp], ) is_overlap &= pairs_overlap( pair1, pair2, dedup_max_mismatch=dedup_max_mismatch ) checked_right_temp -= 1 if is_overlap: # exit current_right_pair += 1 break # Increment pointers (step 3) current_right_pair += 1 checked_right_pairs += 1 remaining_right_pairs -= 1 # No overlap found, roll the current_idx_right back to the initial value: if not is_overlap: current_right_pair = 1 if (n_algns2 == 0): last_reported_alignment_left = 1 last_reported_alignment_right = 0 else: # II. Search of partial overlap if there are less than 2 alignments at either sides, or no overlaps found if (current_right_pair == 1): last_reported_alignment_left = last_reported_alignment_right = 1 if partial_overlap( algns1[-1], algns2[-1], max_insert_size=max_insert_size, dedup_max_mismatch=dedup_max_mismatch, ): if ( n_algns1 >= 2 ): # single alignment on right read and multiple alignments on left pair_index = (len(algns1) - 1, "R1") output_pairs.append( format_pair( algns1[-2], algns1[-1], pair_index=pair_index, algn2_pos3=algns2[-1]["pos5"], report_position=report_position, report_orientation=report_orientation, ) ) last_reported_alignment_left = 2 # set the pointer for reporting if ( n_algns2 >= 2 ): # single alignment on left read and multiple alignments on right pair_index = (len(algns1), "R2") output_pairs.append( format_pair( algns2[-1], algns2[-2], pair_index=pair_index, algn1_pos3=algns1[-1]["pos5"], report_position=report_position, report_orientation=report_orientation, ) ) last_reported_alignment_right = 2 # set the pointer for reporting # Note that if n_algns1==n_algns2==1 and alignments overlap, then we don't need to check, # it's a non-ligated DNA fragment that we don't report. else: # end alignments do not overlap, report regular pair: pair_index = (len(algns1), "R1-2") output_pairs.append( format_pair( algns1[-1], algns2[-1], pair_index=pair_index, report_position=report_position, report_orientation=report_orientation, ) ) else: # there was an overlap, set some pointers: last_reported_alignment_left = ( last_reported_alignment_right ) = current_right_pair # III. Report all remaining alignments. # Report all unique alignments on left read (sequential): for i in range(0, n_algns1 - last_reported_alignment_left): pair_index = (i + 1, "R1") output_pairs.append( format_pair( algns1[i], algns1[i + 1], pair_index=pair_index, report_position=report_position, report_orientation=report_orientation, ) ) # Report the pairs where both left alignments overlap right: for i_overlapping in range(current_right_pair - 1): idx_left = n_algns1 - current_right_pair + i_overlapping idx_right = n_algns2 - 1 - i_overlapping pair_index = (idx_left + 1, "R1&2") output_pairs.append( format_pair( algns1[idx_left], algns1[idx_left + 1], pair_index=pair_index, algn2_pos3=algns2[idx_right - 1]["pos5"], report_position=report_position, report_orientation=report_orientation, ) ) # Report all the sequential chimeric pairs in the right read, but not the overlap: reporting_order = range( 0, min(current_right_pair, n_algns2 - last_reported_alignment_right) ) for i in reporting_order: # Determine the pair index depending on what is the overlap: shift = -1 if current_right_pair > 1 else 0 pair_index = ( ( n_algns1 + min(current_right_pair, n_algns2 - last_reported_alignment_right) - i + shift ), "R2", ) output_pairs.append( format_pair( algns2[i + 1], algns2[i], pair_index=pair_index, report_position=report_position, report_orientation=report_orientation, ) ) # Sort the pairs according to the pair index: output_pairs.sort(key=lambda x: int(x[-1][0])) if expand: output_pairs = expand_pairs(output_pairs, max_expansion_depth) return iter(output_pairs) ### Additional functions for pairs ### def expand_pairs(pairs_list, max_expansion_depth=None): """ Perform combinatorial expansion of the pairs. Parameters ---------- pairs_list: List of formatted pairs (triplets: algn1, algn2, pair_index). max_expansion_depth: maximum depth of expansion; all by default (None), number will enforce only pairs from the same strand. Returns ------- list of expanded pairs """ for algn1, _algn1, pair_index1 in pairs_list: for _algn2, algn2, pair_index2 in pairs_list: if pair_index1 > pair_index2: continue elif pair_index1 == pair_index2: # output regular pair with no change yield algn1, _algn1, pair_index1 else: pair_order1, pair_type1 = pair_index1 pair_order2, pair_type2 = pair_index2 separated_by = pair_order2 - pair_order1 if ( pair_type1 == "R1-2" or pair_type2 == "R1-2" or (pair_type1 == "R1" and pair_type2 == "R2") ): pair_type = "R1-2" elif pair_type1 == pair_type2: pair_type = pair_type1 elif pair_type1 == "R1&2": pair_type = pair_type2 elif pair_type2 == "R1&2": pair_type = pair_type1 else: raise ValueError( f"Unexpected error, pair types: {pair_type1}, {pair_type2}" ) same_read = pair_type != "R1-2" if (max_expansion_depth is None) or ( (separated_by <= max_expansion_depth) and same_read ): pair_type = f"E{separated_by}_{pair_type}" yield algn1, algn2, (pair_order1, pair_type) ### Additional functions for complex walks rescue ### def partial_overlap(algn1, algn2, max_insert_size=500, dedup_max_mismatch=5): """ Two ends of alignments overlap if: 1) they are from the same chromosome, 2) map in the opposite directions, 3) the distance between the outer ends of the two alignments is below the specified max_insert_size, 4) the distance between the outer ends of the two alignments is above the maximum alignment size. (4) guarantees that the alignments point towards each other on the chromosomes. Allowed mismatch between intramolecular alignments to detect readthrough duplicates. Return: 1 if the alignments overlap or both have troubles with unique mapping, 0 if they do not overlap or if we do not have enough information (e.g. only one of the alignments have troubles with being mapped). """ # Alignments with no match or with multiple matches are counted as overlaps if not (algn1["is_mapped"] and algn1["is_unique"]): if not (algn2["is_mapped"] and algn2["is_unique"]): return 1 # We assume that successful alignment cannot be an overlap with unmapped or multi-mapped region if not (algn1["is_mapped"] and algn1["is_unique"]): return 0 if not (algn2["is_mapped"] and algn2["is_unique"]): return 0 # Both alignments are mapped and unique do_overlap = True do_overlap &= algn1["chrom"] == algn2["chrom"] do_overlap &= algn1["strand"] != algn2["strand"] if algn1["strand"] == "+": min_algn_size = max( algn1["pos3"] - algn1["pos5"], algn2["pos5"] - algn2["pos3"] ) distance_outer_ends = algn2["pos5"] - algn1["pos5"] else: min_algn_size = max( algn1["pos5"] - algn1["pos3"], algn2["pos3"] - algn2["pos5"] ) distance_outer_ends = algn1["pos5"] - algn2["pos5"] do_overlap &= distance_outer_ends <= max_insert_size + dedup_max_mismatch do_overlap &= distance_outer_ends >= min_algn_size - dedup_max_mismatch if do_overlap: return 1 return 0 def pairs_overlap(algns1, algns2, dedup_max_mismatch=3): """ We assume algns1 originate from left read, and algns2 originate from right read: left read: right read: ----------------------------> <---------------------------- algns1 algns2 5------------3_5------------3 3------------5_3------------5' left_5'-algn left_3'-algn right_3'-algn right_5'-algn Two pairs of alignments overlap if: 1) chromosomes/mapping/strand of left_5'-algn and right_3'-algn are the same, 2) chromosomes/mapping/strand of left_3'-algn and right_5'-algn are the same, 3) pos3 of left_5'-algn is close to pos5 of right_3'-algn (with dedup_max_mismatch), and 4) pos5 of left_3'-algn is close to pos3 of right_5'-algn. Return: 1 of the pairs of alignments overlap, 0 otherwise. """ left5_algn = algns1[0] left3_algn = algns1[1] right5_algn = algns2[0] right3_algn = algns2[1] # We assume that successful alignment cannot be an overlap with unmapped or multi-mapped region: mapped_left5_algn = left5_algn["is_mapped"] and left5_algn["is_unique"] mapped_left3_algn = left3_algn["is_mapped"] and left3_algn["is_unique"] mapped_right5_algn = right5_algn["is_mapped"] and right5_algn["is_unique"] mapped_right3_algn = right3_algn["is_mapped"] and right3_algn["is_unique"] if not mapped_left5_algn and not mapped_right3_algn: left_overlap = True elif not mapped_left5_algn and mapped_right3_algn: left_overlap = False elif mapped_left5_algn and not mapped_right3_algn: left_overlap = False else: left_overlap = True left_overlap &= left5_algn["chrom"] == right3_algn["chrom"] left_overlap &= left5_algn["strand"] != right3_algn["strand"] if not mapped_left3_algn and not mapped_right5_algn: right_overlap = True elif not mapped_left3_algn and mapped_right5_algn: right_overlap = False elif mapped_left3_algn and not mapped_right5_algn: right_overlap = False else: right_overlap = True right_overlap &= left3_algn["chrom"] == right5_algn["chrom"] right_overlap &= left3_algn["strand"] != right5_algn["strand"] same_pair = True same_pair &= abs(left5_algn["pos3"] - right3_algn["pos5"]) <= dedup_max_mismatch same_pair &= abs(left3_algn["pos5"] - right5_algn["pos3"]) <= dedup_max_mismatch if left_overlap & right_overlap & same_pair: return 1 else: return 0 def format_pair( hic_algn1, hic_algn2, pair_index, report_position="outer", report_orientation="pair", algn1_pos5=None, algn1_pos3=None, algn2_pos5=None, algn2_pos3=None, ): """ Return a triplet: pair of formatted alignments and pair_index in a walk :param hic_algn1: Left alignment forming a pair :param hic_algn2: Right alignment forming a pair :param algns1: All left read alignments for formal reporting :param algns2: All right read alignments for formal reporting :param pair_index: Index of the pair :param algn1_pos5: Replace reported 5'-position of the alignment 1 with this value :param algn1_pos3: Replace reported 3'-position of the alignment 1 with this value :param algn2_pos5: Replace reported 5'-position of the alignment 2 with this value :param algn2_pos3: Replace reported 3'-position of the alignment 2 with this value """ # Make sure the original data is not modified: hic_algn1, hic_algn2 = dict(hic_algn1), dict(hic_algn2) # Adjust the 5' and 3'-ends: hic_algn1["pos5"] = algn1_pos5 if not algn1_pos5 is None else hic_algn1["pos5"] hic_algn1["pos3"] = algn1_pos3 if not algn1_pos3 is None else hic_algn1["pos3"] hic_algn2["pos5"] = algn2_pos5 if not algn2_pos5 is None else hic_algn2["pos5"] hic_algn2["pos3"] = algn2_pos3 if not algn2_pos3 is None else hic_algn2["pos3"] hic_algn1["type"] = ( "N" if not hic_algn1["is_mapped"] else "M" if not hic_algn1["is_unique"] else "U" ) hic_algn2["type"] = ( "N" if not hic_algn2["is_mapped"] else "M" if not hic_algn2["is_unique"] else "U" ) # Change orientation and positioning of pair for reporting: # AVAILABLE_REPORT_POSITION = ["outer", "pair", "read", "walk"] # AVAILABLE_REPORT_ORIENTATION = ["pair", "pair", "read", "walk"] pair_type = pair_index[1] if report_orientation == "read": pass elif report_orientation == "walk": if pair_type == "R2": hic_algn1 = flip_orientation(hic_algn1) hic_algn2 = flip_orientation(hic_algn2) elif pair_type == "R1-2": hic_algn2 = flip_orientation(hic_algn2) elif report_orientation == "pair": if pair_type == "R1" or pair_type == "R1&2": hic_algn2 = flip_orientation(hic_algn2) elif pair_type == "R2": hic_algn1 = flip_orientation(hic_algn1) elif report_orientation == "junction": if pair_type == "R1" or pair_type == "R1&2": hic_algn1 = flip_orientation(hic_algn1) elif pair_type == "R2": hic_algn2 = flip_orientation(hic_algn2) else: hic_algn1 = flip_orientation(hic_algn1) hic_algn2 = flip_orientation(hic_algn2) if report_position == "read": pass elif report_position == "walk": if pair_type == "R2": hic_algn1 = flip_position(hic_algn1) hic_algn2 = flip_position(hic_algn2) elif pair_type == "R1-2": hic_algn2 = flip_position(hic_algn2) elif report_position == "outer": if pair_type == "R1" or pair_type == "R1&2": hic_algn2 = flip_position(hic_algn2) elif pair_type == "R2": hic_algn1 = flip_position(hic_algn1) elif report_position == "junction": if pair_type == "R1" or pair_type == "R1&2": hic_algn1 = flip_position(hic_algn1) elif pair_type == "R2": hic_algn2 = flip_position(hic_algn2) elif pair_type == "R1-2": hic_algn1 = flip_position(hic_algn1) hic_algn2 = flip_position(hic_algn2) else: raise ValueError(f"Unknown pair type: {pair_type}") return [hic_algn1, hic_algn2, pair_index] def check_pair_order(algn1, algn2, chrom_enum): """ Check if a pair of alignments has the upper-triangular order or has to be flipped. """ # First, the pair is flipped according to the type of mapping on its sides. # Later, we will check it is mapped on both sides and, if so, flip the sides # according to these coordinates. has_correct_order = (algn1["is_mapped"], algn1["is_unique"]) <= ( algn2["is_mapped"], algn2["is_unique"], ) # If a pair has coordinates on both sides, it must be flipped according to # its genomic coordinates. if (algn1["chrom"] != pairsam_format.UNMAPPED_CHROM) and ( algn2["chrom"] != pairsam_format.UNMAPPED_CHROM ): has_correct_order = (chrom_enum[algn1["chrom"]], algn1["pos"]) <= ( chrom_enum[algn2["chrom"]], algn2["pos"], ) return has_correct_order ###################### ### Output utilities: ###################### def write_all_algnments(readID, all_algns1, all_algns2, out_file): """ Debug utility that outputs all alignments in .bam file before parsing walks/pairs """ for side_idx, all_algns in enumerate((all_algns1, all_algns2)): out_file.write(readID) out_file.write("\t") out_file.write(str(side_idx + 1)) out_file.write("\t") for algn in sorted(all_algns, key=lambda x: x["dist_to_5"]): out_file.write(algn["chrom"]) out_file.write("\t") out_file.write(str(algn["pos5"])) out_file.write("\t") out_file.write(algn["strand"]) out_file.write("\t") out_file.write(str(algn["mapq"])) out_file.write("\t") out_file.write(str(algn["cigar"])) out_file.write("\t") out_file.write(str(algn["dist_to_5"])) out_file.write("\t") out_file.write(str(algn["dist_to_5"] + algn["algn_read_span"])) out_file.write("\t") out_file.write(str(algn["matched_bp"])) out_file.write("\t") out_file.write("\n") def write_pairsam( algn1, algn2, readID, pair_index, sams1, sams2, out_file, drop_readid, drop_seq, drop_sam, add_pair_index, add_columns, ): """ Write output pairsam. Note: SAM is already tab-separated and any printable character between ! and ~ may appear in the PHRED field! (http://www.ascii-code.com/) Thus, use the vertical tab character to separate fields! """ cols = [ "." if drop_readid else readID, algn1["chrom"], str(algn1["pos"]), algn2["chrom"], str(algn2["pos"]), algn1["strand"], algn2["strand"], algn1["type"] + algn2["type"], ] if not drop_sam: for sams in [sams1, sams2]: if drop_seq: for sam in sams: sam.query_qualities = "" sam.query_sequence = "" cols.append( pairsam_format.INTER_SAM_SEP.join( [ sam.to_string().replace( "\t", pairsam_format.SAM_SEP ) # String representation of pysam alignment + pairsam_format.SAM_SEP + "Yt:Z:" + algn1["type"] + algn2["type"] for sam in sams ] ) ) if add_pair_index: cols.append(str(pair_index[0])) cols.append(pair_index[1]) for col in add_columns: # use get b/c empty alignments would not have sam tags (NM, AS, etc) cols.append(str(algn1.get(col, ""))) cols.append(str(algn2.get(col, ""))) out_file.write(pairsam_format.PAIRSAM_SEP.join(cols) + "\n") pairtools-1.1.3/pairtools/lib/parse_pysam.pyx000066400000000000000000000110151474715105500214060ustar00rootroot00000000000000from pysam.libcalignmentfile cimport AlignmentFile from pysam.libcalignedsegment cimport AlignedSegment, AlignmentHeader from pysam.libchtslib cimport * from pysam.libcutils cimport array_to_qualitystring cdef class AlignmentFilePairtoolized(AlignmentFile): """ Modified class that loads each entry as pairtoolozed alignment. """ def __next__(self): cdef int ret = self.cnext() if (ret >= 0): # Redefine the constructed object: return makeAlignedSegmentPairtoolized(self.b, self.header) elif ret == -2: raise IOError('truncated file') else: raise StopIteration cdef AlignedSegmentPairtoolized makeAlignedSegmentPairtoolized(bam1_t *src, AlignmentHeader header): '''return an AlignedSegmentPairtoolized object constructed from `src`''' # note that the following does not call __init__ # Redefine the constructed object: cdef AlignedSegmentPairtoolized dest = AlignedSegmentPairtoolized.__new__(AlignedSegmentPairtoolized) dest._delegate = bam_dup1(src) dest.header = header return dest cdef class AlignedSegmentPairtoolized(AlignedSegment): """ In the pairtoolized class we inherit everything and add some useful properties and functions on top of that. """ def is_unique(self, min_mapq): """true if read is unique mapping (by mapq)""" return self.mapq >= min_mapq property is_linear: """true if read is linear (SA is present in tages)""" def __get__(self): if self.has_tag('SA'): return False # for tag in self.tags: # if 'SA'==tag[0]: # return False return True property cigar_dict: """Parsed CIGAR as dictionary with interpretable fields""" def __get__(self): """Parse cigar tuples reported as cigartuples of pysam read entry. Reports alignment span, clipped nucleotides and more. See https://pysam.readthedocs.io/en/latest/api.html#pysam.AlignedSegment.cigartuples """ matched_bp = 0 algn_ref_span = 0 algn_read_span = 0 read_len = 0 clip5_ref = 0 clip3_ref = 0 cigarstring = self.cigarstring cigartuples = self.cigartuples if cigartuples is not None: for operation, length in cigartuples: if operation == 0: # M, match matched_bp += length algn_ref_span += length algn_read_span += length read_len += length elif operation == 1: # I, insertion algn_read_span += length read_len += length elif operation == 2: # D, deletion algn_ref_span += length elif ( operation == 4 or operation == 5 ): # S and H, soft clip and hard clip, respectively read_len += length if matched_bp == 0: clip5_ref = length else: clip3_ref = length return { "clip5_ref": clip5_ref, "clip3_ref": clip3_ref, "cigar": cigarstring, "algn_ref_span": algn_ref_span, "algn_read_span": algn_read_span, "read_len": read_len, "matched_bp": matched_bp } from cpython cimport array import cython cimport cython cpdef list get_mismatches_c(str seq, array.array quals, list aligned_pairs): ''' This function takes a SAM alignment and, for every mismatch between the read and reference sequences, returns a tuple (the reference bp, the read bp, PHRED quality of the bp, reference position, read position). Reference: https://github.com/gerlichlab/scshic_pipeline/blob/master/bin/seq_mismatches.pyx ''' cdef cython.int read_pos, ref_pos cdef str orig_bp, orig_bp_upper cdef list mismatches = [] for read_pos, ref_pos, orig_bp in aligned_pairs: orig_bp_upper = orig_bp.upper() if (seq[read_pos] != orig_bp_upper): mismatches.append( (orig_bp_upper, seq[read_pos], quals[read_pos], ref_pos, read_pos) ) return mismatchespairtools-1.1.3/pairtools/lib/phase.py000066400000000000000000000050111474715105500177720ustar00rootroot00000000000000def get_chrom_phase(chrom, phase_suffixes): if chrom.endswith(phase_suffixes[0]): return "0", chrom[: -len(phase_suffixes[0])] elif chrom.endswith(phase_suffixes[1]): return "1", chrom[: -len(phase_suffixes[1])] else: return "!", chrom def phase_side_XB(chrom, XB, AS, XS, phase_suffixes): phase, chrom_base = get_chrom_phase(chrom, phase_suffixes) XBs = [i for i in XB.split(";") if len(i) > 0] S1, S2, S3 = AS, XS, -1 # -1 if the second hit was not reported if AS > XS: # Primary hit has higher score than the secondary return phase, chrom_base, S1, S2, S3 elif len(XBs) >= 1: if len(XBs) >= 2: alt2_chrom, alt2_pos, alt2_CIGAR, alt2_NM, alt2_AS, alt_mapq = XBs[1].split( "," ) S3 = int(alt2_AS) if int(alt2_AS) == XS == AS: return "!", "!", S1, S2, S3 alt_chrom, alt_pos, alt_CIGAR, alt_NM, alt_AS, alt_mapq = XBs[0].split(",") alt_phase, alt_chrom_base = get_chrom_phase(alt_chrom, phase_suffixes) alt_is_homologue = (chrom_base == alt_chrom_base) and ( ((phase == "0") and (alt_phase == "1")) or ((phase == "1") and (alt_phase == "0")) ) if alt_is_homologue: return ".", chrom_base, S1, S2, S3 return "!", "!", S1, S2, S3 def phase_side_XA(chrom, XA, AS, XS, NM, phase_suffixes): phase, chrom_base = get_chrom_phase(chrom, phase_suffixes) XAs = [i for i in XA.split(";") if len(i.strip()) > 0] if len(XAs) >= 1: alt_chrom, alt_pos, alt_CIGAR, alt_NM = XAs[0].split(",") M1, M2, M3 = NM, int(alt_NM), -1 else: M1, M2, M3 = NM, -1, -1 # -1 if the second hit was not reported if AS > XS: # Primary hit has higher score than the secondary return phase, chrom_base, M1, M2, M3 elif len(XAs) >= 1: if len(XAs) >= 2: alt2_chrom, alt2_pos, alt2_CIGAR, alt2_NM = XAs[1].split(",") M3 = int(alt2_NM) if int(alt2_NM) == int(alt_NM) == NM: return "!", "!", M1, M2, M3 alt_chrom, alt_pos, alt_CIGAR, alt_NM = XAs[0].split(",") alt_phase, alt_chrom_base = get_chrom_phase(alt_chrom, phase_suffixes) alt_is_homologue = (chrom_base == alt_chrom_base) and ( ((phase == "0") and (alt_phase == "1")) or ((phase == "1") and (alt_phase == "0")) ) if alt_is_homologue: return ".", chrom_base, M1, M2, M3 return "!", "!", M1, M2, M3 pairtools-1.1.3/pairtools/lib/regions.pyx000066400000000000000000000030651474715105500205370ustar00rootroot00000000000000""" Moved from pairlib, library for fast regions assignment """ from cython.operator cimport dereference, postincrement, postdecrement from cpython cimport array import cython from libcpp.map cimport map from libcpp.algorithm cimport lower_bound, upper_bound from libcpp.string cimport string from libcpp.vector cimport vector import numpy as np cimport numpy as np cpdef np.ndarray assign_regs_c(np.ndarray chroms, np.ndarray pos, dict reg_dict): assert len(chroms) == len(pos) cdef int n = len(chroms) cdef np.ndarray[np.int64_t, ndim=2] result = -1 * np.ones((n, 3), dtype=np.int64) cdef map[string, vector[int]] reg_map = reg_dict cdef map[string, vector[int]].iterator reg_map_it = reg_map.begin() cdef map[string, vector[int]].iterator reg_map_end = reg_map.end() cdef vector[int].iterator lo_b, up_b cdef int position, reg_boundary_idx # this can be parallelized with prange for i in range(n): reg_map_it = reg_map.find(chroms[i]) if reg_map_it != reg_map_end: position = pos[i] up_b = upper_bound( dereference(reg_map_it).second.begin(), dereference(reg_map_it).second.end(), position) reg_boundary_idx = up_b - dereference(reg_map_it).second.begin() if reg_boundary_idx % 2 == 1: lo_b = up_b postdecrement(lo_b) result[i, 0] = (reg_boundary_idx - 1) // 2 result[i, 1] = dereference(lo_b) result[i, 2] = dereference(up_b) return resultpairtools-1.1.3/pairtools/lib/restrict.py000066400000000000000000000015231474715105500205350ustar00rootroot00000000000000from . import pairsam_format import warnings def find_rfrag(rfrags, chrom, pos): # Return empty if chromosome is unmapped: if chrom == pairsam_format.UNMAPPED_CHROM: return ( pairsam_format.UNANNOTATED_RFRAG, pairsam_format.UNMAPPED_POS, pairsam_format.UNMAPPED_POS, ) try: rsites_chrom = rfrags[chrom] except ValueError as e: warnings.warn( f"Chomosome {chrom} does not have annotated restriction fragments, return empty." ) return ( pairsam_format.UNANNOTATED_RFRAG, pairsam_format.UNMAPPED_POS, pairsam_format.UNMAPPED_POS, ) idx = min( max(0, rsites_chrom.searchsorted(pos, "right") - 1), len(rsites_chrom) - 2 ) return idx, rsites_chrom[idx], rsites_chrom[idx + 1] pairtools-1.1.3/pairtools/lib/scaling.py000066400000000000000000000363711474715105500203270ustar00rootroot00000000000000import numpy as np import pandas as pd from .regions import assign_regs_c from . import pairsio import bioframe def geomprog(factor, start=1): yield start while True: start *= factor yield start def _geomrange(start, end, factor, endpoint): prev = np.nan for i in geomprog(factor, start): x = int(round(i)) if x > end: break if x == prev: continue prev = x yield x if endpoint and prev != end: yield end def geomrange(start, end, factor, endpoint=False): return np.fromiter(_geomrange(start, end, factor, endpoint), dtype=int) def geomspace(start, end, num=50, endpoint=True): factor = (end / start) ** (1 / num) return geomrange(start, end, factor, endpoint=endpoint) def _to_float(arr_or_scalar): if np.isscalar(arr_or_scalar): return float(arr_or_scalar) else: return np.asarray(arr_or_scalar).astype(float) def assign_regs(chroms, pos, regs): gb_regs = regs.sort_values(["chrom", "start", "end"]).groupby("chrom") regs_dict = { chrom.encode(): regs_per_chrom[["start", "end"]] .values .flatten() .astype(np.int64) for chrom, regs_per_chrom in gb_regs } return assign_regs_c(np.asarray(chroms).astype("bytes"), np.asarray(pos), regs_dict) def cartesian_df_product(df1, df2, suffixes=["1", "2"]): return pd.merge( left=df1.assign(cartesian_product_dummy=1), right=df2.assign(cartesian_product_dummy=1), on=["cartesian_product_dummy"], how="outer", suffixes=suffixes, ).drop("cartesian_product_dummy", axis="columns") def make_empty_scaling(regions, dist_bins, multiindex=True): if dist_bins[0] != 0: dist_bins = np.r_[0, dist_bins] if dist_bins[-1] != np.iinfo(np.int64).max: dist_bins = np.r_[dist_bins, np.iinfo(np.int64).max] strands_table = pd.DataFrame( {"strand1": ["+", "+", "-", "-"], "strand2": ["+", "-", "+", "-"]} ) dists_table = pd.DataFrame( list(zip(dist_bins[:-1], dist_bins[1:])), columns=["min_dist", "max_dist"] ) out = regions.join(regions, on=None, lsuffix="1", rsuffix="2") out = cartesian_df_product(out, strands_table) out = cartesian_df_product(out, dists_table) if multiindex: index_by = [ "chrom1", "start1", "end1", "chrom2", "start2", "end2", "strand1", "strand2", "min_dist", "max_dist", ] out.set_index(index_by, inplace=True) return out def make_empty_cross_region_table( regions, drop_same_reg=True, split_by_strand=True, multiindex=True ): out = cartesian_df_product(regions, regions) if split_by_strand: strands_table = pd.DataFrame( {"strand1": ["+", "+", "-", "-"], "strand2": ["+", "-", "+", "-"]} ) out = cartesian_df_product(out, strands_table) if drop_same_reg: out = out[ (out["chrom1"] != out["chrom2"]) | (out["start1"] != out["start2"]) | (out["end1"] != out["end2"]) ] if multiindex: index_by = ["chrom1", "start1", "end1", "chrom2", "start2", "end2"] if split_by_strand: index_by += ["strand1", "strand2"] out.set_index(index_by, inplace=True) return out def bins_pairs_by_distance( pairs_df, dist_bins, regions=None, chromsizes=None, ignore_trans=False, keep_unassigned=False, ): dist_bins = np.r_[dist_bins, np.iinfo(np.int64).max] if regions is None: if chromsizes is None: chroms = sorted( set.union(set(pairs_df.chrom1.unique()), set(pairs_df.chrom2.unique())) ) regions = pd.DataFrame({"chrom": chroms, "start": 0, "end": -1}) regions = regions[["chrom", "start", "end"]] region_starts1, region_starts2 = 0, 0 region_ends1, region_ends2 = -1, -1 else: region_ends1 = pairs_df.chrom1.map(chromsizes).fillna(-1).astype(np.int64) region_ends2 = pairs_df.chrom2.map(chromsizes).fillna(-1).astype(np.int64) region_starts1 = np.where(region_ends1 > 0, 0, -1) region_starts2 = np.where(region_ends2 > 0, 0, -1) regions = pd.DataFrame( [ {"chrom": chrom, "start": 0, "end": length} for chrom, length in chromsizes.items() ] ) regions = regions[["chrom", "start", "end"]] try: regions = bioframe.from_any(regions) except Exception as e: raise ValueError(f"Invalid viewframe created from pairs file, {e}") else: if not bioframe.is_viewframe(regions): try: regions = bioframe.from_any(regions) except Exception as e: raise ValueError( f"Provided regions cannot be converted to viewframe, {e}" ) regions = regions[["chrom", "start", "end"]] _, region_starts1, region_ends1 = assign_regs( pairs_df.chrom1.values, pairs_df.pos1.values, regions ).T _, region_starts2, region_ends2 = assign_regs( pairs_df.chrom2.values, pairs_df.pos2.values, regions ).T pairs_reduced_df = pd.DataFrame( { "chrom1": pairs_df.chrom1.values, "start1": region_starts1, "end1": region_ends1, "chrom2": pairs_df.chrom2.values, "start2": region_starts2, "end2": region_ends2, "strand1": pairs_df.strand1.values, "strand2": pairs_df.strand2.values, "dist_bin_idx": np.searchsorted( dist_bins, np.abs(pairs_df.pos1 - pairs_df.pos2), side="right" ), "n_pairs": 1, }, copy=False, ) if not keep_unassigned: pairs_reduced_df = (pairs_reduced_df .query('(start1 >= 0) and (start2 >= 0)') # do not test for end1 and end2, as they can be -1 if regions and not specified .reset_index(drop=True)) pairs_reduced_df["min_dist"] = np.where( pairs_reduced_df["dist_bin_idx"] > 0, dist_bins[pairs_reduced_df["dist_bin_idx"] - 1], 0, ) pairs_reduced_df["max_dist"] = np.where( pairs_reduced_df["dist_bin_idx"] < len(dist_bins)-1, dist_bins[pairs_reduced_df["dist_bin_idx"]], np.iinfo(np.int64).max, ) # importantly, in the future, we may want to extend the function to plot scalings # for pairs from different regions! cis_region_pairs = ( (pairs_reduced_df.chrom1 == pairs_reduced_df.chrom2) & (pairs_reduced_df.start1 == pairs_reduced_df.start2) & (pairs_reduced_df.end1 == pairs_reduced_df.end2) ) pairs_for_scaling_mask = ( cis_region_pairs & (pairs_reduced_df.min_dist > 0) & (pairs_reduced_df.max_dist < np.iinfo(np.int64).max) ) pairs_for_scaling_df = pairs_reduced_df.loc[pairs_for_scaling_mask] pairs_for_scaling_counts = pairs_for_scaling_df.groupby( by=[ "chrom1", "start1", "end1", "chrom2", "start2", "end2", "strand1", "strand2", "min_dist", "max_dist", ] ).agg({"n_pairs": "sum"}) pairs_for_scaling_counts = ( make_empty_scaling(regions, dist_bins) .assign(n_pairs=0) .add(pairs_for_scaling_counts, fill_value=0) ) pairs_for_scaling_counts["n_pairs"] = pairs_for_scaling_counts["n_pairs"].astype( np.int64 ) if ignore_trans: pairs_no_scaling_counts = None else: pairs_no_scaling_df = pairs_reduced_df.loc[~cis_region_pairs] pairs_no_scaling_counts = pairs_no_scaling_df.groupby( by=[ "chrom1", "start1", "end1", "chrom2", "start2", "end2", "strand1", "strand2", ] ).agg({"n_pairs": "sum"}) pairs_no_scaling_counts = ( make_empty_cross_region_table(regions) .assign(n_pairs=0) .add(pairs_no_scaling_counts, fill_value=0) ) pairs_no_scaling_counts["n_pairs"] = pairs_no_scaling_counts["n_pairs"].astype( np.int64 ) return pairs_for_scaling_counts, pairs_no_scaling_counts def contact_areas_same_reg(min_dist, max_dist, region_length): min_dist = _to_float(min_dist) max_dist = _to_float(max_dist) scaffold_length = _to_float(region_length) outer_areas = np.maximum(region_length - min_dist, 0) ** 2 inner_areas = np.maximum(region_length - max_dist, 0) ** 2 return 0.5 * (outer_areas - inner_areas) def _contact_areas_diff_reg( min_dist, max_dist, region_start1, region_end1, region_start2, region_end2 ): return ( contact_areas_same_reg(min_dist, max_dist, np.abs(region_end2 - region_start1)) + contact_areas_same_reg( min_dist, max_dist, np.abs(region_end1 - region_start2) ) - contact_areas_same_reg( min_dist, max_dist, np.abs(region_start1 - region_start2) ) - contact_areas_same_reg(min_dist, max_dist, np.abs(region_end1 - region_end2)) ) def _contact_areas_trans(min_dist, max_dist, region_length1, region_length2): return ( contact_areas_same_reg(min_dist, max_dist, region_length1 + region_length2) - contact_areas_same_reg(min_dist, max_dist, region_length1) - contact_areas_same_reg(min_dist, max_dist, region_length2) ) def compute_scaling( pairs, regions=None, chromsizes=None, dist_range=(int(1e0), int(1e9)), n_dist_bins_decade=8, chunksize=int(1e7), ignore_trans=False, keep_unassigned=False, filter_f=None, nproc_in=4, ): """ Compute the contact-frequency-vs-distance (aka "scaling") curve from a table of contacts. Parameters ---------- pairs : pd.DataFrame or str or file-like object A table with pairs of genomic coordinates representing contacts. It can be a pandas DataFrame, a path to a pairs file, or a file-like object. regions : bioframe viewframe or None, optional Genomic regions of interest. It can be anything that can serve as input to bioframe.from_any, or None if not applicable. chromsizes : pd.DataFrame or None, optional Additional dataframe with chromosome sizes, if different from regions. dist_range : tuple of int, optional The range of distances to calculate the scaling curve. Default is (10, 1000000000). n_dist_bins : int, optional The number of distance bins per order of magnitude in a log10-space. Default is 8. chunksize : int, optional Size of chunks for calculations. Default is 10000000. ignore_trans : bool, optional Ignore trans interactions or not. Default is False. keep_unassigned : bool, optional Keep pairs that are not assigned to any region or not. Default is False. filter_f : function or None, optional A function that to filter contacts. Default is None. nproc_in : int, optional Number of processes to use for reading pairs file. Default is 1. Returns ------- sc : pd.DataFrame Scaling information for each distance bin. trans_counts : pd.DataFrame or None Trans interaction counts for each distance bin. None if ignore_trans is True. """ dist_bins = geomspace( dist_range[0], dist_range[1], int(np.round(np.log10(dist_range[1]/dist_range[0])*n_dist_bins_decade)) ) if isinstance(pairs, pd.DataFrame): pairs_df = pairs elif isinstance(pairs, str) or hasattr(pairs, "buffer") or hasattr(pairs, "peek"): pairs_df, _, _ = pairsio.read_pairs(pairs, nproc=nproc_in, chunksize=chunksize) else: raise ValueError( "pairs must be either a path to a pairs file or a pd.DataFrame" ) sc, trans_counts = None, None for pairs_chunk in [pairs_df] if isinstance(pairs_df, pd.DataFrame) else pairs_df: if filter_f: pairs_chunk = filter_f(pairs_chunk) sc_chunk, trans_counts_chunk = bins_pairs_by_distance( pairs_chunk, dist_bins, regions=regions, chromsizes=chromsizes, ignore_trans=ignore_trans, keep_unassigned=keep_unassigned ) sc = sc_chunk if sc is None else sc.add(sc_chunk, fill_value=0) trans_counts = ( trans_counts_chunk if trans_counts is None else trans_counts.add(trans_counts_chunk, fill_value=0) ) # if not (isinstance(regions, pd.DataFrame) and # (set(regions.columns) == set(['chrom', 'start','end']))): # raise ValueError('regions must be provided as a dict or chrom-indexed Series of chromsizes or as a bedframe.') sc.reset_index(inplace=True) sc["n_bp2"] = contact_areas_same_reg( sc["min_dist"], sc["max_dist"], sc["end1"] - sc["start1"] ) if not ignore_trans: trans_counts.reset_index(inplace=True) trans_counts["n_bp2"] = ( (trans_counts["end1"] - trans_counts["start1"]) * ( trans_counts["end2"] - trans_counts["start2"] )) return sc, trans_counts def norm_scaling_factor(bins, cfreqs, norm_window): """ Calculate the normalization factor for a contact-frequency-vs-distance curve, by setting the average contact frequency in a specified range of distances to 1.0. Args: bins (array-like): The distance bins. cfreqs (array-like): The contact frequencies. norm_window (tuple of float): A tuple with the range of distances to use for normalization. Returns: float: The normalization scaling factor. """ lo, hi = np.searchsorted(bins, norm_window) return cfreqs[lo:hi+1].mean() def norm_scaling(bins, cfreqs, norm_window, log_input=False): """ Normalize a contact-frequency-vs-distance curve, by setting the average contact frequency in a given window to 1.0. Args: bins (array-like): The distance bins. cfreqs (array-like): The contact frequencies. norm_window (tuple of float): A tuple with the range of distances to use for normalization. log_input (bool, optional): Whether the input contact frequencies were log-transformed. Defaults to False. Returns: float or array-like: The normalized contact frequencies. """ norm = norm_scaling_factor(bins, cfreqs, norm_window) if log_input: return cfreqs - norm else: return cfreqs / norm def unity_norm_scaling(bins, cfreqs, norm_range=(1e4, 1e9)): bin_lens = np.diff(bins) bin_mids = np.sqrt(bins[1:] * bins[:-1]) if norm_range is None: norm_cfreqs = cfreqs / np.sum(1.0 * (bin_lens * cfreqs)[np.isfinite(cfreqs)]) else: norm_cfreqs = cfreqs / np.sum( 1.0 * (bin_lens * cfreqs)[ np.isfinite(cfreqs) & (bin_mids > norm_range[0]) & (bin_mids < norm_range[1]) ] ) return norm_cfreqs pairtools-1.1.3/pairtools/lib/select.py000066400000000000000000000101141474715105500201510ustar00rootroot00000000000000from ..lib import fileio, pairsam_format, headerops import re, fnmatch # Create environment of important functions: wildcard_library = {} def wildcard_match(x, wildcard): if wildcard not in wildcard_library: regex = fnmatch.translate(wildcard) reobj = re.compile(regex) wildcard_library[wildcard] = reobj return wildcard_library[wildcard].fullmatch(x) csv_library = {} def csv_match(x, csv): if csv not in csv_library: csv_library[csv] = set(csv.split(",")) return x in csv_library[csv] regex_library = {} def regex_match(x, regex): if regex not in regex_library: reobj = re.compile(regex) regex_library[regex] = reobj return regex_library[regex].fullmatch(x) # Define default data types: TYPES = {"pos1": "int", "pos2": "int", "mapq1": "int", "mapq2": "int"} def evaluate_stream( headerless_stream, condition, column_names, type_cast=(), startup_code=None ): """ Evaluate expression for the input headerless stream. Parameters ---------- headerless_stream condition type_cast: Cast a given column to a given type. By default, only pos and mapq are cast to int, other columns are kept as str. Type: tupe of two strings. startup_code: An auxiliary code to execute before filtering. Use to define functions that can be evaluated in the CONDITION statement ======== Writes the output to one of two streams (regular or rest) """ # Define data types: TYPES.update(dict(type_cast)) # Execute startup code: if startup_code is not None: exec(startup_code, globals()) for i, col in enumerate(column_names): if col in TYPES: col_type = TYPES[col] condition = re.sub(r"\b%s\b" % col , "{}(COLS[{}])".format(col_type, i), condition) #condition.replace(col, "{}(COLS[{}])".format(col_type, i)) else: condition = re.sub(r"\b%s\b" % col, "COLS[{}]".format(i), condition) #condition = condition.replace(col, "COLS[{}]".format(i)) # Compile the filtering expression: match_func = compile(condition, "", "eval") for line in headerless_stream: COLS = line.rstrip('\n').split(pairsam_format.PAIRSAM_SEP) # Evaluate filtering expression: filter_passed = eval(match_func) # Produce the output: yield filter_passed, line def evaluate_df(df, condition, type_cast=(), startup_code=None, engine="pandas"): """ Evaluate expression for the input headerless stream. Parameters ---------- df: input dataframe for evaluation condition: condition to evaluate type_cast: additional types transformations, if different from default startup_code: An auxiliary code to execute before filtering. Use to define functions that can be evaluated in the CONDITION statement ======== Writes the output to one of two streams (regular or rest) """ # Define data types: TYPES.update(dict(type_cast)) # Execute startup code: if startup_code is not None: exec(startup_code, globals()) # Set up the column formats: for col in df.columns: if col in TYPES.keys(): if not str(df.dtypes[col]) != TYPES[col]: df[col] = df[col].astype(TYPES[col]) if engine == "pandas": try: filter_passed_output = df.eval(condition) except ValueError as e: raise ValueError(f"Try passing engine python to fix this: {e}") else: # Set up the columns indexing for i, col in enumerate(df.columns): condition = re.sub(r"\b%s\b" % col, "COLS[{}]".format(i), condition) #condition = condition.replace(col, "COLS[{}]".format(i)) filter_passed_output = [] match_func = compile(condition, "", "eval") for i, r in df.iterrows(): COLS = r.values # Evaluate filtering expression: filter_passed = eval(match_func) filter_passed_output.append(True if filter_passed else False) return filter_passed_output pairtools-1.1.3/pairtools/lib/stats.py000066400000000000000000001306611474715105500200420ustar00rootroot00000000000000import numpy as np import pandas as pd from scipy import special from collections.abc import Mapping import sys import yaml from . import fileio from .select import evaluate_df from .._logging import get_logger logger = get_logger() def parse_number(s): if s.isdigit(): return int(s) elif s.replace(".", "", 1).isdigit(): return float(s) else: return s def flat_dict_to_nested(input_dict, sep='/'): output_dict = {} for key, value in input_dict.items(): if type(key) == tuple: key_parts = key elif type(key) == str: key_parts = key.split(sep) else: raise ValueError(f"Key type can be either str or tuple. Found key {key} of type {type(key)}.") current_dict = output_dict for key_part in key_parts[:-1]: current_dict = current_dict.setdefault(key_part, {}) current_dict[key_parts[-1]] = value return output_dict def nested_dict_to_flat(d, tuple_keys=False, sep='/'): """Flatten a nested dictionary to a flat dictionary. Parameters ---------- d: dict A nested dictionary to flatten. tuple_keys: bool If True, keys will be joined into tuples. Otherwise, they will be joined into strings. sep: str The separator to use between the parent key and the key if tuple_keys==False. Returns ------- dict A flat dictionary. """ if tuple_keys: join_keys = lambda k1,k2: (k1,) + k2 else: join_keys = lambda k1,k2: (k1+sep+k2) if k2 else k1 out = {} for k1, v1 in d.items(): if isinstance(v1, dict): out.update({ join_keys(k1,k2): v2 for k2, v2 in nested_dict_to_flat(v1, tuple_keys, sep).items() }) else: if tuple_keys: out[(k1,)] = v1 else: out[k1] = v1 return out def is_nested_dict(d): """Check if a dictionary is nested. Parameters ---------- d: dict A dictionary to check. Returns ------- bool True if the dictionary is nested, False otherwise. """ if not isinstance(d, dict): return False for v in d.values(): if isinstance(v, dict): return True return False def is_tuple_keyed_dict(d): """Check if a dictionary is tuple-keyed. Parameters ---------- d: dict A dictionary to check. Returns ------- bool True if the dictionary is tuple-keyed, False otherwise. """ if not isinstance(d, dict): return False for k,v in d.items(): if not isinstance(k, tuple): return False if isinstance(v, dict): return False return True def is_str_keyed_dict(d): """Check if a dictionary is string-keyed. Parameters ---------- d: dict A dictionary to check. Returns ------- bool True if the dictionary is string-keyed, False otherwise. """ if not isinstance(d, dict): return False for k,v in d.keys(): if not isinstance(k, str): return False if isinstance(v, dict): return False return True def swap_levels_nested_dict(nested_dict, level1, level2, sep='/'): """Swap the order of two levels in a nested dictionary. Parameters ---------- nested_dict: dict A nested dictionary. level1: int The index of the first level to swap. level2: int The index of the second level to swap. Returns ------- dict A nested dictionary with the levels swapped. """ if is_tuple_keyed_dict(nested_dict): out = {} for k1, v1 in nested_dict.items(): k1_list = list(k1) k1_list[level1], k1_list[level2] = k1_list[level2], k1_list[level1] out[tuple(k1_list)] = v1 return out elif is_nested_dict(nested_dict): out = nested_dict_to_flat(nested_dict, tuple_keys=True) out = swap_levels_nested_dict(out, level1, level2) out = flat_dict_to_nested(out) return out elif is_str_keyed_dict(nested_dict): out = nested_dict_to_flat(nested_dict, sep=sep) out = swap_levels_nested_dict(out, level1, level2) out = {sep.join(k):v for k,v in out.items()} return out else: raise ValueError("Input dictionary must be either nested, string-keyed or tuple-keyed") class PairCounter(Mapping): """ A Counter for Hi-C pairs that accumulates various statistics. PairCounter implements two interfaces to access multi-level statistics: 1. as a nested dict, e.g. pairCounter['pair_types']['LL'] 2. as a flat dict, with the level keys separated by '/', e.g. pairCounter['pair_types/LL'] Other features: -- PairCounters can be saved into/loaded from a file -- multiple PairCounters can be merged via addition. """ _SEP = "\t" _KEY_SEP = "/" DIST_FREQ_REL_DIFF_THRESHOLD = 0.05 N_DIST_BINS_DECADE_DEFAULT = 8 MIN_LOG10_DIST_DEFAULT = 0 MAX_LOG10_DIST_DEFAULT = 9 def __init__( self, min_log10_dist=MIN_LOG10_DIST_DEFAULT, max_log10_dist=MAX_LOG10_DIST_DEFAULT, n_dist_bins_decade=N_DIST_BINS_DECADE_DEFAULT, bytile_dups=False, filters=None, **kwargs, ): # Define filters and parameters for filters evaluation: if filters is not None: self.filters = filters else: self.filters = {"no_filter": ""} self.startup_code = kwargs.get("startup_code", "") self.type_cast = kwargs.get("type_cast", ()) self.engine = kwargs.get("engine", "pandas") # Define default filter: if "no_filter" not in self.filters: self.filters["no_filter"] = "" self._stat = {key: {} for key in self.filters} # some variables used for initialization: # genomic distance bining for the ++/--/-+/+- distribution log10_dist_bin_step = 1.0 / n_dist_bins_decade self._dist_bins = np.unique( np.r_[ 0, np.round( 10 ** np.arange( min_log10_dist, max_log10_dist + 0.001, log10_dist_bin_step ) ).astype(np.int_), ] ) # establish structure of an empty _stat: for key in self.filters: self._stat[key]["filter_expression"] = self.filters[key] self._stat[key]["total"] = 0 self._stat[key]["total_unmapped"] = 0 self._stat[key]["total_single_sided_mapped"] = 0 # total_mapped = total_dups + total_nodups self._stat[key]["total_mapped"] = 0 self._stat[key]["total_dups"] = 0 self._stat[key]["total_nodups"] = 0 ######################################## # the rest of stats are based on nodups: ######################################## self._stat[key]["cis"] = 0 self._stat[key]["trans"] = 0 self._stat[key]["pair_types"] = {} # to be removed: # self._stat[key]["dedup"] = {} self._stat[key]["cis_1kb+"] = 0 self._stat[key]["cis_2kb+"] = 0 self._stat[key]["cis_4kb+"] = 0 self._stat[key]["cis_10kb+"] = 0 self._stat[key]["cis_20kb+"] = 0 self._stat[key]["cis_40kb+"] = 0 self._stat[key]["summary"] = dict( [ ("frac_cis", 0), ("frac_cis_1kb+", 0), ("frac_cis_2kb+", 0), ("frac_cis_4kb+", 0), ("frac_cis_10kb+", 0), ("frac_cis_20kb+", 0), ("frac_cis_40kb+", 0), ("frac_dups", 0), ("complexity_naive", 0), ] ) self._stat[key]["chrom_freq"] = {} self._stat[key]["dist_freq"] = { "+-": {bin.item(): 0 for bin in self._dist_bins}, "-+": {bin.item(): 0 for bin in self._dist_bins}, "--": {bin.item(): 0 for bin in self._dist_bins}, "++": {bin.item(): 0 for bin in self._dist_bins}, } self._stat[key]["chromsizes"] = {} # Summaries are derived from other stats and are recalculated on merge self._save_bytile_dups = bytile_dups if self._save_bytile_dups: self._bytile_dups = pd.DataFrame( index=pd.MultiIndex( levels=[[], []], codes=[[], []], names=["tile", "parent_tile"] ) ) self._summaries_calculated = False def __getitem__(self, key, filter="no_filter"): if isinstance(key, str): # let's strip any unintentional '/' # from either side of the key key = key.strip("/") if self._KEY_SEP in key: # multi-key to access nested elements k_fields = key.split(self._KEY_SEP) else: # single-key access flat part of PairCounter # or to access highest level of hierarchy return self._stat[filter][key] else: # clearly an error: raise ValueError("{} is not a valid key: must be str".format(key)) # K_FIELDS: # process multi-key case: # in this case key must be in ['pair_types','chrom_freq','dist_freq','dedup'] # get the first 'k' and keep the remainders in 'k_fields' k = k_fields.pop(0) if k in ["pair_types", "dedup"]: # assert there is only one element in key_fields left: # 'pair_types' and 'dedup' treated the same if len(k_fields) == 1: return self._stat[filter][k][k_fields[0]] else: raise ValueError( "{} is not a valid key: {} section implies 1 identifier".format( key, k ) ) elif k == "chrom_freq": # assert remaining key_fields == [chr1, chr2]: if len(k_fields) == 2: return self._stat[filter][k][tuple(k_fields)] else: raise ValueError( "{} is not a valid key: {} section implies 2 identifiers".format( key, k ) ) elif k == "dist_freq": # assert that last element of key_fields is the 'directions' # THIS IS DONE FOR CONSISTENCY WITH .stats FILE # SHOULD THAT BE CHANGED IN .stats AND HERE AS WELL? if len(k_fields) == 2: # assert 'dirs' in ['++','--','+-','-+'] dirs = k_fields.pop() # there is only genomic distance range of the bin that's left: (bin_range,) = k_fields # extract left border of the bin "1000000+" or "1500-6000": dist_bin_left = int( bin_range.strip("+") if bin_range.endswith("+") else bin_range.split("-")[0] ) # store corresponding value: return self._stat[filter]["dist_freq"][dirs][dist_bin_left] else: raise ValueError( "{} is not a valid key: {} section implies 2 identifiers".format( key, k ) ) else: raise ValueError("{} is not a valid key".format(k)) def __iter__(self): return iter(self._stat) def __len__(self): return len(self._stat) def find_dist_freq_convergence_distance(self, rel_threshold): """Finds the largest distance at which the frequency of pairs of reads with different strands deviates from their average by the specified relative threshold.""" out = {} all_strands = ["++", "--", "-+", "+-"] for filter in self.filters: out[filter] = {} dist_freqs_by_strands = { strands: np.array(list(self._stat[filter]["dist_freq"][strands].values())) for strands in all_strands} # Calculate the average frequency of pairs with different strands avg_freq_all_strands = np.mean(np.vstack(list(dist_freqs_by_strands.values())), axis=0) # Calculate the largest distance at which the frequency of pairs of at least one strand combination deviates from the average by the given threshold rel_deviations = {strands: np.nan_to_num( np.abs(dist_freqs_by_strands[strands] - avg_freq_all_strands) / avg_freq_all_strands) for strands in all_strands} idx_maxs = {strand:0 for strand in all_strands} for strands in all_strands: bin_exceeds = rel_deviations[strands] > rel_threshold if np.any(bin_exceeds): idx_maxs[strands] = np.max(np.nonzero(bin_exceeds)) # Find the largest distance and the strand combination where frequency of pairs deviates from the average by the given threshold: convergence_bin_idx = 0 convergence_strands = '??' convergence_dist = '0' for strands in all_strands: if (idx_maxs[strands] > convergence_bin_idx): convergence_bin_idx = idx_maxs[strands] convergence_strands = strands if idx_maxs[strands] < len(self._dist_bins): convergence_dist = self._dist_bins[convergence_bin_idx+1] else: convergence_dist = np.iinfo(np.int64) out[filter]["convergence_dist"] = convergence_dist out[filter]["strands_w_max_convergence_dist"] = convergence_strands out[filter]['convergence_rel_diff_threshold'] = rel_threshold out[filter]['n_cis_pairs_below_convergence_dist'] = { strands:dist_freqs_by_strands[strands][:convergence_bin_idx+1].sum() for strands in all_strands for strands in all_strands } out[filter]['n_cis_pairs_below_convergence_dist_all_strands'] = sum( list(out[filter]['n_cis_pairs_below_convergence_dist'].values())) n_cis_pairs_above_convergence_dist = { strands:dist_freqs_by_strands[strands][convergence_bin_idx+1:].sum() for strands in all_strands for strands in all_strands } out[filter]['n_cis_pairs_above_convergence_dist_all_strands'] = sum( list(n_cis_pairs_above_convergence_dist.values())) norms = dict( cis=self._stat[filter]['cis'], total_mapped=self._stat[filter]['total_mapped'] ) if 'total_nodups' in self._stat[filter]: norms['total_nodups'] = self._stat[filter]['total_nodups'] for key, norm_factor in norms.items(): out[filter][f'frac_{key}_in_cis_below_convergence_dist'] = { strands: n_cis_pairs / norm_factor for strands, n_cis_pairs in out[filter]['n_cis_pairs_below_convergence_dist'].items() } out[filter][f'frac_{key}_in_cis_below_convergence_dist_all_strands'] = sum( list(out[filter][f'frac_{key}_in_cis_below_convergence_dist'].values())) out[filter][f'frac_{key}_in_cis_above_convergence_dist_all_strands'] = ( sum(list(n_cis_pairs_above_convergence_dist.values())) / norm_factor ) return out def calculate_summaries(self): """calculate summary statistics (fraction of cis pairs at different cutoffs, complexity estimate) based on accumulated counts. Results are saved into self._stat["filter_name"]['summary"] """ convergence_stats = self.find_dist_freq_convergence_distance( self.DIST_FREQ_REL_DIFF_THRESHOLD) for filter_name in self.filters.keys(): for cis_count in ( "cis", "cis_1kb+", "cis_2kb+", "cis_4kb+", "cis_10kb+", "cis_20kb+", "cis_40kb+", ): self._stat[filter_name]["summary"][f"frac_{cis_count}"] = ( (self._stat[filter_name][cis_count] / self._stat[filter_name]["total_nodups"]) if self._stat[filter_name]["total_nodups"] > 0 else 0 ) self._stat[filter_name]["summary"]["dist_freq_convergence"] = convergence_stats[filter_name] self._stat[filter_name]["summary"]["frac_dups"] = ( (self._stat[filter_name]["total_dups"] / self._stat[filter_name]["total_mapped"]) if self._stat[filter_name]["total_mapped"] > 0 else 0 ) self._stat[filter_name]["summary"][ "complexity_naive" ] = estimate_library_complexity( self._stat[filter_name]["total_mapped"], self._stat[filter_name]["total_dups"], 0 ) if filter_name == "no_filter" and self._save_bytile_dups: # Estimate library complexity with information by tile, if provided: if self._bytile_dups.shape[0] > 0: self._stat[filter_name]["dups_by_tile_median"] = int( round( self._bytile_dups["dup_count"].median() * self._bytile_dups.shape[0] ) ) if "dups_by_tile_median" in self._stat[filter_name].keys(): self._stat[filter_name]["summary"][ "complexity_dups_by_tile_median" ] = estimate_library_complexity( self._stat[filter_name]["total_mapped"], self._stat[filter_name]["total_dups"], self._stat[filter_name]["total_dups"] - self._stat[filter_name]["dups_by_tile_median"], ) self._summaries_calculated = True @classmethod def from_file(cls, file_handle, n_dist_bins_decade=N_DIST_BINS_DECADE_DEFAULT): """create instance of PairCounter from file Parameters ---------- file_handle: file handle Returns ------- PairCounter new PairCounter filled with the contents of the input file """ # fill in from file - file_handle: default_filter = "no_filter" stat_from_file = cls(n_dist_bins_decade=n_dist_bins_decade) raw_stat = {} for l in file_handle: key_val_pair = l.strip().split(cls._SEP) if len(key_val_pair) == 0: # skip empty lines: continue if len(key_val_pair) != 2: # expect two _SEP separated values per line: raise fileio.ParseError( "{} is not a valid stats file".format(file_handle.name) ) raw_stat[key_val_pair[0]] = parse_number(key_val_pair[1]) ## TODO: check if raw_stat does not contain any unknown keys # Convert flat dict to nested dict stat_from_file._stat[default_filter].update(flat_dict_to_nested(raw_stat, sep=cls._KEY_SEP)) stat_from_file._stat[default_filter]['chrom_freq'] = nested_dict_to_flat( stat_from_file._stat[default_filter]['chrom_freq'], tuple_keys=True) bin_to_left_val = lambda bin: int(bin.rstrip('+') if ('+' in bin) else bin.split('-')[0]) stat_from_file._stat[default_filter]['dist_freq'] = { bin_to_left_val(k): v for k,v in stat_from_file._stat[default_filter]['dist_freq'].items() } stat_from_file._stat[default_filter]['dist_freq'] = swap_levels_nested_dict( stat_from_file._stat[default_filter]['dist_freq'], 0, 1 ) return stat_from_file @classmethod def from_yaml(cls, file_handle, n_dist_bins_decade=N_DIST_BINS_DECADE_DEFAULT): """create instance of PairCounter from file Parameters ---------- file_handle: file handle Returns ------- PairCounter new PairCounter filled with the contents of the input file """ # fill in from file - file_handle: stat = yaml.safe_load(file_handle) stat_from_file = cls( n_dist_bins_decade=n_dist_bins_decade, filters={key: val.get("filter_expression", "") for key, val in stat.items()} ) for key, filter in stat.items(): chromdict = {} for chroms in stat[key]["chrom_freq"].keys(): chromdict[tuple(chroms.split(cls._KEY_SEP))] = stat[key]["chrom_freq"][ chroms ] stat[key]["chrom_freq"] = chromdict stat_from_file._stat = stat return stat_from_file def add_pair( self, chrom1, pos1, strand1, chrom2, pos2, strand2, pair_type, unmapped_chrom="!", filter="no_filter", ): """Gather statistics for a Hi-C pair and add to the PairCounter. Parameters ---------- chrom1: str chromosome of the first read pos1: int position of the first read strand1: str strand of the first read chrom2: str chromosome of the first read pos2: int position of the first read strand2: str strand of the first read pair_type: str type of the mapped pair of reads unmapped_chrom: str what string denotes chromosomes in unmapped pairs (default: "!") filter: str name of the filter toward which the pair should count (default: "no_filter") """ self._stat[filter]["total"] += 1 # collect pair type stats including DD: self._stat[filter]["pair_types"][pair_type] = ( self._stat[filter]["pair_types"].get(pair_type, 0) + 1 ) if chrom1 == unmapped_chrom and chrom2 == unmapped_chrom: self._stat[filter]["total_unmapped"] += 1 elif chrom1 != unmapped_chrom and chrom2 != unmapped_chrom: self._stat[filter]["total_mapped"] += 1 # only mapped ones can be duplicates: if pair_type == "DD": self._stat[filter]["total_dups"] += 1 else: self._stat[filter]["total_nodups"] += 1 self._stat[filter]["chrom_freq"][(chrom1, chrom2)] = ( self._stat[filter]["chrom_freq"].get((chrom1, chrom2), 0) + 1 ) if chrom1 == chrom2: self._stat[filter]["cis"] += 1 dist = np.abs(pos2 - pos1) dist_bin = self._dist_bins[ np.searchsorted(self._dist_bins, dist, "right") - 1 ] self._stat[filter]["dist_freq"][strand1 + strand2][dist_bin] += 1 for dist_kb in [1, 2, 4, 10, 20, 40]: if dist >= dist_kb * 1000: self._stat[filter][f"cis_{dist_kb}kb+"] += 1 else: self._stat[filter]["trans"] += 1 else: self._stat[filter]["total_single_sided_mapped"] += 1 def add_pairs_from_dataframe(self, df, unmapped_chrom="!"): """Gather statistics for Hi-C pairs in a dataframe and add to the PairCounter. Parameters ---------- df: pd.DataFrame DataFrame with pairs. Needs to have columns: 'chrom1', 'pos1', 'chrom2', 'pos2', 'strand1', 'strand2', 'pair_type' """ for key in self.filters.keys(): if key == "no_filter": df_filtered = df.copy() else: condition = self.filters[key] filter_passed = evaluate_df( df, condition, type_cast=self.type_cast, startup_code=self.startup_code, engine=self.engine, ) df_filtered = df.loc[filter_passed, :].reset_index(drop=True) total_count = df_filtered.shape[0] self._stat[key]["total"] += total_count # collect pair type stats including DD: for pair_type, type_count in ( df_filtered["pair_type"].value_counts().items() ): self._stat[key]["pair_types"][pair_type] = ( self._stat[key]["pair_types"].get(pair_type, 0) + type_count ) # Count the unmapped by the "unmapped" chromosomes (debatable, as WW are also marked as ! and they might be mapped): unmapped_count = np.logical_and( df_filtered["chrom1"] == unmapped_chrom, df_filtered["chrom2"] == unmapped_chrom, ).sum() self._stat[key]["total_unmapped"] += int(unmapped_count) # Count the mapped: df_mapped = df_filtered.loc[ (df_filtered["chrom1"] != unmapped_chrom) & (df_filtered["chrom2"] != unmapped_chrom), :, ] mapped_count = df_mapped.shape[0] self._stat[key]["total_mapped"] += mapped_count self._stat[key]["total_single_sided_mapped"] += int( total_count - (mapped_count + unmapped_count) ) # Count the duplicates: if "duplicate" in df_mapped.columns: mask_dups = df_mapped["duplicate"] else: mask_dups = df_mapped["pair_type"] == "DD" df_dups = df_mapped[mask_dups] dups_count = df_dups.shape[0] self._stat[key]["total_dups"] += int(dups_count) self._stat[key]["total_nodups"] += int(mapped_count - dups_count) df_nodups = df_mapped.loc[~mask_dups, :] mask_cis = df_nodups["chrom1"] == df_nodups["chrom2"] df_cis = df_nodups.loc[mask_cis, :].copy() # Count pairs per chromosome: for (chrom1, chrom2), chrom_count in ( df_nodups[["chrom1", "chrom2"]].value_counts().items() ): self._stat[key]["chrom_freq"][(chrom1, chrom2)] = ( self._stat[key]["chrom_freq"].get((chrom1, chrom2), 0) + chrom_count ) # Count cis-trans by pairs: self._stat[key]["cis"] += df_cis.shape[0] self._stat[key]["trans"] += df_nodups.shape[0] - df_cis.shape[0] # Count cis distance frequencies: dist = np.abs(df_cis["pos2"].values - df_cis["pos1"].values) df_cis.loc[:, "bin_idx"] = ( np.searchsorted(self._dist_bins, dist, "right") - 1 ) for (strand1, strand2, bin_id), strand_bin_count in ( df_cis[["strand1", "strand2", "bin_idx"]].value_counts().items() ): self._stat[key]["dist_freq"][strand1 + strand2][ self._dist_bins[bin_id].item() ] += strand_bin_count self._stat[key]["cis_1kb+"] += int(np.sum(dist >= 1000)) self._stat[key]["cis_2kb+"] += int(np.sum(dist >= 2000)) self._stat[key]["cis_4kb+"] += int(np.sum(dist >= 4000)) self._stat[key]["cis_10kb+"] += int(np.sum(dist >= 10000)) self._stat[key]["cis_20kb+"] += int(np.sum(dist >= 20000)) self._stat[key]["cis_40kb+"] += int(np.sum(dist >= 40000)) ### Add by-tile dups if key == "no_filter" and self._save_bytile_dups and (df_dups.shape[0] > 0): bytile_dups = analyse_bytile_duplicate_stats(df_dups) self._bytile_dups = self._bytile_dups.add( bytile_dups, fill_value=0 ).astype(int) def add_chromsizes(self, chromsizes): """Add chromsizes field to the output stats Parameters ---------- chromsizes: Dataframe with chromsizes, read by headerops.chromsizes """ chromsizes = chromsizes.to_dict() for filter in self._stat.keys(): self._stat[filter]["chromsizes"] = chromsizes return def __add__(self, other, filter="no_filter"): # both PairCounter are implied to have a list of common fields: # # 'total', 'total_unmapped', 'total_single_sided_mapped', 'total_mapped', # 'cis', 'trans', 'pair_types', 'cis_1kb+', 'cis_2kb+', # 'cis_10kb+', 'cis_20kb+', 'chrom_freq', 'dist_freq', 'dedup' # # If 'chromsizes' are present, they must be identical # # initialize empty PairCounter for the result of summation: sum_stat = PairCounter() # use the empty PairCounter to iterate over: for k, v in sum_stat._stat[filter].items(): if k != "chromsizes" and ( (k not in self._stat[filter]) or (k not in other._stat[filter]) ): # Skip any missing fields and warn logger.warning( f"{k} not found in at least one of the input stats, skipping" ) continue # not nested fields are summed trivially: if isinstance(v, int): sum_stat._stat[filter][k] = ( self._stat[filter][k] + other._stat[filter][k] ) # sum nested dicts/arrays in a context dependet manner: else: if k in ["pair_types", "dedup"]: # handy function for summation of a pair of dicts: # https://stackoverflow.com/questions/10461531/merge-and-sum-of-two-dictionaries sum_dicts = lambda dict_x, dict_y: { key: dict_x.get(key, 0) + dict_y.get(key, 0) for key in set(dict_x) | set(dict_y) } # sum a pair of corresponding dicts: sum_stat._stat[filter][k] = sum_dicts( self._stat[filter][k], other._stat[filter][k] ) elif k == "chrom_freq": # union list of keys (chr1,chr2) with potential duplicates: union_keys_with_dups = list(self._stat[filter][k].keys()) + list( other._stat[filter][k].keys() ) # dict.fromkeys will take care of keys' order and duplicates in a consistent manner: # https://stackoverflow.com/questions/1720421/how-to-concatenate-two-lists-in-python # last comment to the 3rd Answer sum_stat._stat[filter][k] = dict.fromkeys(union_keys_with_dups) # perform a summation: for union_key in sum_stat._stat[filter][k]: sum_stat._stat[filter][k][union_key] = self._stat[filter][ k ].get(union_key, 0) + other._stat[filter][k].get(union_key, 0) elif k == "dist_freq": for dirs in sum_stat[k]: from functools import reduce def reducer(accumulator, element): for key, value in element.items(): accumulator[key] = accumulator.get(key, 0) + value return accumulator sum_stat[k][dirs] = reduce( reducer, [self._stat[filter][k][dirs], other._stat[filter][k][dirs]], {}, ) # sum_stat[k][dirs] = self._stat[filter][k][dirs] + other._stat[filter][k][dirs] elif k == "chromsizes": if k in self._stat[filter] and k in other._stat[filter]: if self._stat[filter][k] == other._stat[filter][k]: sum_stat._stat[filter][k] = self._stat[filter][k] elif ( len(self._stat[filter][k]) == 0 or len(other._stat[filter][k]) == 0 ): logger.warning( "One of the stats has no chromsizes recorded," "writing the one that is present to the output" ) if len(self._stat[filter][k]) > 0: sum_stat._stat[filter][k] = self._stat[filter][k] else: sum_stat._stat[filter][k] = other._stat[filter][k] else: raise ValueError( "Can't merge stats with different chromsizes" ) else: logger.warning( "One or both stats don't have chromsizes recorded" ) sum_stat.calculate_summaries() return sum_stat # we need this to be able to sum(list_of_PairCounters) def __radd__(self, other): if other == 0: return self else: return self.__add__(other) def flatten(self, filter="no_filter"): """return a flattened dict (formatted same way as .stats file) Performed for a single filter.""" # dict for flat store: flat_stat = {} # Storing statistics for k, v in self._stat[filter].items(): if isinstance(v, int): flat_stat[k] = v # store nested dicts/arrays in a context dependent manner: # nested categories are stored only if they are non-trivial else: if (k == "dist_freq") and v: for i in range(len(self._dist_bins)): for dirs, freqs in v.items(): dist = self._dist_bins[i] # last bin is treated differently: "100000+" vs "1200-3000": if i < len(self._dist_bins) - 1: dist_next = self._dist_bins[i + 1] formatted_key = self._KEY_SEP.join( ["{}", "{}-{}", "{}"] ).format(k, dist, dist_next, dirs) elif i == len(self._dist_bins) - 1: formatted_key = self._KEY_SEP.join( ["{}", "{}+", "{}"] ).format(k, dist, dirs) else: raise ValueError("There is a mismatch between dist_freq bins in the instance") # store key,value pair: try: flat_stat[formatted_key] = freqs[dist] except: # in some previous versions of stats, last bin was not reported, so we need to skip it now: if (dist not in freqs) and (i == len(self._dist_bins) - 1): flat_stat[formatted_key] = 0 else: raise ValueError(f"Error in {k} {dirs} {dist} {dist_next} {freqs}: source and destination bins do not match") elif (k in ["pair_types", "dedup", "chromsizes", 'summary']) and v: # 'pair_types' and 'dedup' are simple dicts inside, # treat them the exact same way: flat_stat.update( {k+self._KEY_SEP+k2 : v2 for k2,v2 in nested_dict_to_flat(v, sep=self._KEY_SEP).items()}) elif (k == "chrom_freq") and v: for (chrom1, chrom2), freq in v.items(): formatted_key = self._KEY_SEP.join(["{}", "{}", "{}"]).format( k, chrom1, chrom2 ) # store key,value pair: flat_stat[formatted_key] = freq # return flattened dict return flat_stat def format_yaml(self, filter="no_filter"): """return a formatted dict (for the yaml output) Performed for all filters at once.""" from copy import deepcopy formatted_stat = {filter_name: {} for filter_name in self.filters.keys()} # Storing statistics for each filter for filter_name in self.filters.keys(): for k, v in self._stat[filter_name].items(): if (k == "chrom_freq"): v = {self._KEY_SEP.join(k2):v2 for k2, v2 in v.items()} if v: formatted_stat[filter_name][k] = deepcopy(v) # return formatted dict formatted_stat = nested_dict_to_flat(formatted_stat, tuple_keys=True) for k in formatted_stat: v = formatted_stat[k] if isinstance(v, np.generic): formatted_stat[k] = v.item() formatted_stat = flat_dict_to_nested(formatted_stat) return formatted_stat def save(self, outstream, yaml=False, filter="no_filter"): """save PairCounter to tab-delimited text file. Flattened version of PairCounter is stored in the file. Parameters ---------- outstream: file handle yaml: is output in yaml format or table filter: filter to output in tsv mode Note ---- The order of the keys is not guaranteed Merging several .stats is not associative with respect to key order: merge(A,merge(B,C)) != merge(merge(A,B),C). Theys shou5ld match exactly, however, when soprted: sort(merge(A,merge(B,C))) == sort(merge(merge(A,B),C)) """ if not self._summaries_calculated: self.calculate_summaries() # write flattened version of the PairCounter to outstream, # will output all the filters if yaml: import yaml data = self.format_yaml() yaml.dump(data, outstream, default_flow_style=False, sort_keys=False) else: # will output a single filter data = self.flatten(filter=filter) for k, v in data.items(): outstream.write("{}{}{}\n".format(k, self._SEP, v)) def save_bytile_dups(self, outstream): """save bytile duplication counts to a tab-delimited text file. Parameters ---------- outstream: file handle """ if self._save_bytile_dups: self._bytile_dups.reset_index().to_csv(outstream, sep="\t", index=False) else: logger.error("Bytile dups are not calculated, cannot save.") def __repr__(self): return str(self._stat) ################## # Other functions: def do_merge(output, files_to_merge, **kwargs): # Parse all stats files. stats = [] for stat_file in files_to_merge: f = fileio.auto_open( stat_file, mode="r", nproc=kwargs.get("nproc_in"), command=kwargs.get("cmd_in", None), ) # use a factory method to instanciate PairCounter if kwargs.get("yaml", False): stat = PairCounter.from_yaml(f, n_dist_bins_decade=kwargs.get('n_dist_bins_decade', PairCounter.N_DIST_BINS_DECADE_DEFAULT)) else: stat = PairCounter.from_file(f, n_dist_bins_decade=kwargs.get('n_dist_bins_decade', PairCounter.N_DIST_BINS_DECADE_DEFAULT)) stats.append(stat) f.close() # combine stats from several files (files_to_merge): out_stat = sum(stats) # Save merged stats. outstream = fileio.auto_open( output, mode="w", nproc=kwargs.get("nproc_out"), command=kwargs.get("cmd_out", None), ) # save statistics to file ... out_stat.save(outstream) if outstream != sys.stdout: outstream.close() def estimate_library_complexity(nseq, ndup, nopticaldup=0): """Estimate library complexity accounting for optical/clustering duplicates Parameters ---------- nseq : int Total number of sequences ndup : int Total number of duplicates nopticaldup : int, optional Number of non-PCR duplicates, by default 0 Returns ------- float Estimated complexity """ nseq = nseq - nopticaldup if nseq == 0: logger.warning("Empty of fully duplicated library, can't estimate complexity") return 0 ndup = ndup - nopticaldup u = (nseq - ndup) / nseq if u == 0: logger.warning( "All the sequences are duplicates. Do you run complexity estimation on duplicates file?" ) return 0 seq_to_complexity = special.lambertw(-np.exp(-1 / u) / u).real + 1 / u complexity = float(nseq / seq_to_complexity) # clean np.int64 data type return complexity def analyse_bytile_duplicate_stats(df_dups, tile_dup_regex=False): """Count by-tile duplicates Parameters ---------- dups : pd.DataFrame Dataframe with duplicates that contains pared read IDs tile_dup_regex : bool, optional See extract_tile_info for details, by default False Returns ------- pd.DataFrame Grouped multi-indexed dataframe of pairwise by-tile duplication counts """ df_dups = df_dups.copy() df_dups["tile"] = extract_tile_info(df_dups["readID"], regex=tile_dup_regex) df_dups["parent_tile"] = extract_tile_info( df_dups["parent_readID"], regex=tile_dup_regex ) df_dups["same_tile"] = df_dups["tile"] == df_dups["parent_tile"] bytile_dups = ( df_dups.groupby(["tile", "parent_tile"]) .size() .reset_index(name="dup_count") .sort_values(["tile", "parent_tile"]) ) bytile_dups[["tile", "parent_tile"]] = np.sort( bytile_dups[["tile", "parent_tile"]].values, axis=1 ) bytile_dups = bytile_dups.groupby(["tile", "parent_tile"]).sum() return bytile_dups def extract_tile_info(series, regex=False): """Extract the name of the tile for each read name in the series Parameters ---------- series : pd.Series Series containing read IDs regex : bool, optional Regex to extract fields from the read IDs that correspond to tile IDs. By default False, uses a faster predefined approach for typical Illumina read names Example: r"(?:\w+):(?:\w+):(\w+):(\w+):(\w+):(?:\w+):(?:\w+)" Returns ------- Series Series containing tile IDs as strings """ if regex: split = series.str.extractall(regex).unstack().droplevel(1, axis=1) if split.shape[1] < 4: raise ValueError( f"Unable to convert tile names, does your readID have the tile information?\nHint: SRA removes tile information from readID.\nSample of your readIDs:\n{series.head()}" ) return split[0] + ":" + split[1] + ":" + split[2] else: try: split = [":".join(name.split(":")[2:5]) for name in series] except: raise ValueError( f"Unable to convert tile names, does your readID have the tile information?\nHint: SRA removes tile information from readID.\nSample of your readIDs:\n{series.head()}" ) return split def yaml2pandas(yaml_path): """Generate a pandas DataFrame with stats from a yaml file Formats the keys within each filter using the PairCounter.flatten() method, to achieve same naming as in non-yaml stats files. Parameters ---------- yaml_path : str Path to a yaml-formatted file with stats Returns ------- pd.DataFrame Dataframe with filter names in the index and stats in columns """ counter = PairCounter.from_yaml(open(yaml_path, "r")) stats = pd.concat( [ pd.DataFrame(counter.flatten(filter=filter), index=[filter]) for filter in counter.filters ] ) return stats pairtools-1.1.3/pyproject.toml000066400000000000000000000032211474715105500164530ustar00rootroot00000000000000[project] name = "pairtools" dynamic = ['version',] dependencies = [ 'cython', 'numpy>=1.10', 'click>=6.6', 'scipy>=1.7.0', 'pandas>=1.3.4', 'pysam>=0.15.0', 'pyyaml', 'bioframe>=0.3.3', ] requires-python = ">=3.9" description = "CLI tools to process mapped Hi-C data" authors = [ {name = "Open2C", email = "open.chromosome.collective@gmail.com"}, ] license = {text = "MIT License"} keywords = ["genomics", "bioinformatics", "Hi-C", "contact", "chromosome"] readme = "README.md" classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Science/Research", "Topic :: Scientific/Engineering :: Bio-Informatics", "Operating System :: OS Independent", "License :: OSI Approved :: MIT License", "Programming Language :: Python", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", ] [project.optional-dependencies] test = [ 'pytest', 'pytest-flake8', 'pytest-cov', ] doc = [ 'sphinx-click', 'ipython', 'nbsphinx', 'Sphinx>=7.0', 'sphinx_rtd_theme', 'docutils>0.16', ] [project.urls] Homepage = "https://github.com/open2c/pairtools" Documentation = "https://pairtools.readthedocs.io/en/latest/" Repository = "https://github.com/open2c/pairtools.git" Issues = "https://github.com/open2c/pairtools/issues" Changelog = "https://github.com/open2c/pairtools/blob/master/CHANGES.md" [project.scripts] pairtools = "pairtools.cli:cli" [build-system] requires = [ "setuptools", "cython", "numpy", "pysam"] build-backend = "setuptools.build_meta" pairtools-1.1.3/pytest.ini000066400000000000000000000003331474715105500155710ustar00rootroot00000000000000[pytest] addopts = --cov pairtools --cov-config .coveragerc --cov-report term-missing --cov-report html --cov-report xml filterwarnings = ignore::PendingDeprecationWarning testpaths = tests pairtools-1.1.3/readthedocs.yml000066400000000000000000000017311474715105500165530ustar00rootroot00000000000000 # .readthedocs.yml # For some reason, readthedocs' platform does not import the installed package (from .../envs/), # but instead reads another copy from .../checkouts/. This other copy does not have compiled # cython objects and throws and error. # To overcome this issue, we had to use a custom job to install pairtools in the editable mode # and thus ensure that the cython code is compiled. # Another potentially useful trick in the future: setting environmental variables # (e.g. PIP_VERBOSE and PIP_NO_BUILD_ISOLATION=false) can control pip's behaviour in # the standard install job. version: 2 build: os: ubuntu-22.04 tools: python: "3.10" jobs: post_create_environment: - pip install numpy cython pysam - pip install --no-build-isolation -e .[doc] - python -c "import pairtools.lib.dedup_cython" # python: # install: # - method: pip # path: . # extra_requirements: # - doc sphinx: configuration: doc/conf.py pairtools-1.1.3/setup.py000066400000000000000000000055351474715105500152630ustar00rootroot00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import io import os import re import glob from setuptools import find_packages, setup from setuptools.extension import Extension try: from Cython.Distutils import build_ext as _build_ext from Cython.Build import cythonize except ImportError: raise ImportError('Cython is now required to build the extension modules.') def _read(*parts, **kwargs): filepath = os.path.join(os.path.dirname(__file__), *parts) encoding = kwargs.pop("encoding", "utf-8") with io.open(filepath, encoding=encoding) as fh: text = fh.read() return text def get_version(): version = re.search( r'^__version__\s*=\s*[\'"]([^\'"]*)[\'"]', _read("pairtools", "__init__.py"), re.MULTILINE, ).group(1) return version def get_ext_modules(): ext = ".pyx" src_files = glob.glob( #os.path.join(os.path.dirname(__file__), "pairtools", "lib", "*" + ext) os.path.join("pairtools", "lib", "*" + ext) ) ext_modules = [] for src_file in src_files: name = "pairtools.lib." + os.path.splitext(os.path.basename(src_file))[0] if 'pysam' in name: import pysam ext_modules.append( Extension( name, [src_file], extra_link_args=pysam.get_libraries(), include_dirs=pysam.get_include(), define_macros=pysam.get_defines(), ) ) elif "regions" in name: ext_modules.append( Extension( name, [src_file], language="c++", ) ) else: ext_modules.append(Extension(name, [src_file])) ext_modules = cythonize(ext_modules) # , annotate=True return ext_modules class build_ext(_build_ext): # Extension module build configuration def finalize_options(self): _build_ext.finalize_options(self) # Fix to work with bootstrapped numpy installation # http://stackoverflow.com/a/21621689/579416 # Prevent numpy from thinking it is still in its setup process: #__builtins__.__NUMPY_SETUP__ = False import numpy self.include_dirs.append(numpy.get_include()) def run(self): # Import numpy here, only when headers are needed import numpy # Add numpy headers to include_dirs self.include_dirs.append(numpy.get_include()) # Call original build_ext command _build_ext.run(self) setup( version=get_version(), ext_modules=get_ext_modules(), cmdclass={"build_ext": build_ext}, zip_safe=False, # entry_points={ # "console_scripts": [ # "pairtools = pairtools.cli:cli", # ] # }, packages=find_packages(), ) pairtools-1.1.3/tests/000077500000000000000000000000001474715105500147035ustar00rootroot00000000000000pairtools-1.1.3/tests/data/000077500000000000000000000000001474715105500156145ustar00rootroot00000000000000pairtools-1.1.3/tests/data/mock.2.pairsam000066400000000000000000000021301474715105500202570ustar00rootroot00000000000000## pairs format v1.0.0 #shape: upper triangle #genome_assembly: unknown #samheader: @SQ SN:chr1 LN:100 #samheader: @SQ SN:chr2 LN:100 #samheader: @SQ SN:chr3 LN:100 #samheader: @PG ID:bwa PN:bwa VN:0.7.15-r1140 CL:bwa mem -SP /path/ucsc.hg19.fasta.gz /path/1.fastq.gz /path/2.fastq.gz #chromosomes: chr2 chr3 chr1 #columns: readID chrom1 pos1 chrom2 pos2 strand1 strand2 pair_type sam1 sam2 readid01 chr1 1 chr2 25 + + UU readid01129chr1160101Mchr2250CGFFXS:i:0Yt:Z:UU readid0165chr22560101Mchr110ATIIXS:i:0Yt:Z:UU readid02 chr1 1 chr1 40 + + UU readid02129chr1160101Mchr1400CGFFXS:i:0Yt:Z:UU readid0265chr14060101Mchr110ATIIXS:i:0Yt:Z:UU readid03 chr1 1 chr1 3 + + UR readid03129chr1160101Mchr130CGFFXS:i:0Yt:Z:UR readid0365chr1360101Mchr110ATIIXS:i:0Yt:Z:UR readid04 ! 0 chr1 3 - + NU readid04129chr1160101Mchr130CGFFXS:i:0Yt:Z:NU readid0465chr1360101Mchr110ATIIXS:i:0Yt:Z:NU readid05 ! 0 ! 0 - - NN readid05129chr1160101Mchr130CGFFXS:i:0Yt:Z:NN readid0565chr1360101Mchr110ATIIXS:i:0Yt:Z:NNpairtools-1.1.3/tests/data/mock.4dedup.pairsam000066400000000000000000000010731474715105500213100ustar00rootroot00000000000000## pairs format v1.0.0 #sorted: chr1-chr2-pos1-pos2 #shape: upper triangle #genome_assembly: unknown #chromosomes: chr1 chr2 #columns: readID chrom1 pos1 chrom2 pos2 strand1 strand2 pair_type sam1 sam2 readid1 ! 0 chr1 25 - + NU . . readid2 ! 0 chr1 25 - + NU . . readid3 chr1 1 chr1 20 + + UU . . readid4 chr1 1 chr1 20 + + UU . . readid5 chr1 1 chr1 25 + + UU . . readid6 chr1 1 chr1 27 + + UU . . readid7 chr1 1 chr1 28 + - UU . . readid8 chr1 1 chr1 28 + + UU . . readid9 chr1 1 chr1 50 + + UU . . readid10 chr1 2 chr1 21 + + UU . . readid11 chr1 1 chr2 25 + + UU . .pairtools-1.1.3/tests/data/mock.4dedup_diffcolnames.pairsam000066400000000000000000000010551474715105500240220ustar00rootroot00000000000000## pairs format v1.0.0 #sorted: chr1-chr2-pos1-pos2 #shape: upper triangle #genome_assembly: unknown #chromosomes: chr1 chr2 #columns: readID chr1 p1 chr2 p2 str1 str2 pair_type sam1 sam2 readid1 ! 0 chr1 25 - + NU . . readid2 ! 0 chr1 25 - + NU . . readid3 chr1 1 chr1 20 + + UU . . readid4 chr1 1 chr1 20 + + UU . . readid5 chr1 1 chr1 25 + + UU . . readid6 chr1 1 chr1 27 + + UU . . readid7 chr1 1 chr1 28 + - UU . . readid8 chr1 1 chr1 28 + + UU . . readid9 chr1 1 chr1 50 + + UU . . readid10 chr1 2 chr1 21 + + UU . . readid11 chr1 1 chr2 25 + + UU . .pairtools-1.1.3/tests/data/mock.4filterbycov.pairs000066400000000000000000000013151474715105500222200ustar00rootroot00000000000000## pairs format v1.0.0 #shape: upper triangle #genome_assembly: unknown #samheader: @SQ SN:chr1 LN:100 #samheader: @SQ SN:chr2 LN:100 #samheader: @SQ SN:chr3 LN:100 #samheader: @PG ID:bwa PN:bwa VN:0.7.15-r1140 CL:bwa mem -SP /path/ucsc.hg19.fasta.gz /path/1.fastq.gz /path/2.fastq.gz #chromosomes: chr2 chr3 chr1 #chromsize: chr2 100 #chromsize: chr3 100 #chromsize: chr1 100 #columns: readID chrom1 pos1 chrom2 pos2 strand1 strand2 pair_type sam1 sam2 readid01 chr2 40 chr3 2 + + UU readid02 chr1 6 chr1 9 + + UR readid03 chr1 1 chr2 20 + + UU readid04 chr1 50 chr1 1 + + UU readid05 chr1 1 chr1 5 + + UU readid06 chr1 20 chr1 30 + + UR readid07 ! 0 chr1 3 - + NU readid08 ! 0 chr1 3 - + MU readid09 ! 0 ! 0 - - WW pairtools-1.1.3/tests/data/mock.4flip.pairs000066400000000000000000000016131474715105500206230ustar00rootroot00000000000000## pairs format v1.0.0 #shape: upper triangle #genome_assembly: unknown #samheader: @SQ SN:chr1 LN:10000 #samheader: @SQ SN:chr2 LN:10000 #samheader: @PG ID:bwa PN:bwa VN:0.7.15-r1140 CL:bwa mem -SP /path/ucsc.hg19.fasta.gz /path/1.fastq.gz /path/2.fastq.gz #chromosomes: chr1 chr2 #chromsize: chr1 10000 #chromsize: chr2 10000 #columns: readID chrom1 pos1 chrom2 pos2 strand1 strand2 pair_type sam1 sam2 readid01 chr1 1 chr1 2 + + UU readid02 chr1 1 chr2 2 + + UU readid03 chr1 2 chr1 1 + + UU readid04 chr1 21 chr1 2 + + UU readid05 chr2 2 chr1 1 + + UU readid06 chr2 1 chr1 2 + + UU readid07 chr1 2 chr1 1 - + UU readid08 chr1 2 chr1 1 + + RU readid09 ! 0 chr1 3 - + NU readid10 ! 0 chr1 3 - + MU readid11 ! 0 ! 0 + - WW readid12 chr1 1 chrU 1 + + UU readid13 chrU 1 chr1 1 + + UU readid14 chrU 100 chrU 1 + + UU readid15 chrU1 100 chrU 100 + + UU readid16 ! 0 chrU 100 + + NU readid17 chrU 0 ! 0 + + UN pairtools-1.1.3/tests/data/mock.4stats.pairs000066400000000000000000000013011474715105500210210ustar00rootroot00000000000000## pairs format v1.0.0 #shape: upper triangle #genome_assembly: unknown #samheader: @SQ SN:chr1 LN:100 #samheader: @SQ SN:chr2 LN:100 #samheader: @SQ SN:chr3 LN:100 #samheader: @PG ID:bwa PN:bwa VN:0.7.15-r1140 CL:bwa mem -SP /path/ucsc.hg19.fasta.gz /path/1.fastq.gz /path/2.fastq.gz #chromosomes: chr2 chr3 chr1 #chromsize: chr2 100 #chromsize: chr3 100 #chromsize: chr1 100 #columns: readID chrom1 pos1 chrom2 pos2 strand1 strand2 pair_type readid01 chr1 1 chr1 50 + + UU readid02 chr1 1 chr1 50 + + DD readid03 chr1 1 chr1 2 + + UU readid04 chr1 1 chr1 3 + + UR readid05 chr1 1 chr2 20 + + UU readid06 chr2 1 chr3 2 + + UU readid07 ! 0 chr1 3 - + NU readid08 ! 0 chr1 3 - + MU readid09 ! 0 ! 0 - - WW pairtools-1.1.3/tests/data/mock.chrom.sizes000066400000000000000000000000261474715105500207310ustar00rootroot00000000000000chr1 10000 chr2 10000 pairtools-1.1.3/tests/data/mock.pairsam000066400000000000000000000035741474715105500201340ustar00rootroot00000000000000## pairs format v1.0.0 #shape: upper triangle #genome_assembly: unknown #samheader: @SQ SN:chr1 LN:100 #samheader: @SQ SN:chr2 LN:100 #samheader: @SQ SN:chr3 LN:100 #samheader: @PG ID:bwa PN:bwa VN:0.7.15-r1140 CL:bwa mem -SP /path/ucsc.hg19.fasta.gz /path/1.fastq.gz /path/2.fastq.gz #chromosomes: chr2 chr3 chr1 #chromsize: chr2 100 #chromsize: chr3 100 #chromsize: chr1 100 #columns: readID chrom1 pos1 chrom2 pos2 strand1 strand2 pair_type sam1 sam2 readid01 chr1 1 chr2 20 + + UU readid01129chr1160101Mchr2200CGFFXS:i:0Yt:Z:UU readid0165chr22060101Mchr110ATIIXS:i:0Yt:Z:UU readid02 chr1 1 chr1 50 + + UU readid02129chr1160101Mchr1500CGFFXS:i:0Yt:Z:UU readid0265chr15060101Mchr110ATIIXS:i:0Yt:Z:UU readid03 chr1 1 chr1 2 + + UU readid03129chr1160101Mchr120CGFFXS:i:0Yt:Z:UU readid0365chr1260101Mchr110ATIIXS:i:0Yt:Z:UU readid04 chr1 1 chr1 3 + + UR readid04129chr1160101Mchr130CGFFXS:i:0Yt:Z:UR readid0465chr1360101Mchr110ATIIXS:i:0Yt:Z:UR readid05 chr2 1 chr3 2 + + UU readid05129chr2160101Mchr320CGFFXS:i:0Yt:Z:UU readid0565chr3260101Mchr210ATIIXS:i:0Yt:Z:UU readid06 ! 0 chr1 3 - + NU readid06129chr1160101Mchr130CGFFXS:i:0Yt:Z:NU readid0665chr1360101Mchr110ATIIXS:i:0Yt:Z:NU readid07 ! 0 chr1 3 - + MU readid07129chr1160101Mchr130CGFFXS:i:0Yt:Z:NU readid0765chr1360101Mchr110ATIIXS:i:0Yt:Z:NU readid08 ! 0 ! 0 - - WW readid08129chr1160101Mchr130CGFFXS:i:0Yt:Z:WW readid0865chr1360101Mchr110ATIIXS:i:0Yt:Z:WW readid09 chr1 120 chr1 121 + + UU readid09129chr112060101Mchr11210CGFFXS:i:0Yt:Z:UU readid0965chr112160101Mchr11200ATIIXS:i:0Yt:Z:UU readid10 chr1 13 chr1 14 + + UU readid10129chr11360101Mchr1140CGFFXS:i:0Yt:Z:UU readid1065chr11460101Mchr1130ATIIXS:i:0Yt:Z:UU pairtools-1.1.3/tests/data/mock.parse-all.sam000066400000000000000000000265061474715105500211370ustar00rootroot00000000000000@SQ SN:chr1 LN:10000 @SQ SN:chr2 LN:10000 @PG ID:mock PN:mock VN:0.0.0 CL:mock readid01 65 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1-2 readid01 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1-2 readid02 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1-2 readid02 145 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1-2 readid03 65 chr1 10 60 1S49M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1-2 readid03 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1-2 readid04 81 chr1 10 60 49M1S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,58,chr1,200,-,+,UU,1,R1-2 readid04 161 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,58,chr1,200,-,+,UU,1,R1-2 readid05 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,248,+,-,UU,1,R1-2 readid05 145 chr1 200 60 1S49M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,248,+,-,UU,1,R1-2 readid06 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,248,+,-,UU,1,R1-2 readid06 145 chr1 200 60 49M1S chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,248,+,-,UU,1,R1-2 readid07 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,247,+,-,UU,1,R1-2 readid07 145 chr1 200 60 1S48M1S chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,247,+,-,UU,1,R1-2 readid08 105 chr1 10 60 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,NU,1,R1-2 readid08 149 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,NU,1,R1-2 readid09 85 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,NU,1,R1-2 readid09 169 chr1 10 60 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,NU,1,R1-2 readid10 77 * 0 0 * * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NN,1,R1-2 readid10 141 * 0 0 * * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NN,1,R1-2 readid11 105 chr1 10 0 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM,1,R1-2 readid11 149 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM,1,R1-2 readid12 85 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM,1,R1-2 readid12 169 chr1 10 0 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM,1,R1-2 readid13 65 chr1 10 0 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,200,-,+,MU,1,R1-2 readid13 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,200,-,+,MU,1,R1-2 readid14 65 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,MU,1,R1-2 readid14 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,MU,1,R1-2 readid15 65 chr1 10 0 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,MM,1,R1-2 readid15 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,MM,1,R1-2 readid16 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1 readid16 2129 chr1 300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1 readid16 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1 readid17 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|chr1,200,chr1,5324,+,-,UU,2,R1-2 readid17 2129 chr1 5300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|chr1,200,chr1,5324,+,-,UU,2,R1-2 readid17 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|chr1,200,chr1,5324,+,-,UU,2,R1-2 readid18 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,300,+,25M25H,60,0; CT:Z:SIMULATED:chr1,10,chr1,324,+,-,UU,1,R1|chr1,200,chr1,300,+,+,UU,2,R1-2 readid18 2113 chr1 300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,10,chr1,324,+,-,UU,1,R1|chr1,200,chr1,300,+,+,UU,2,R1-2 readid18 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,300,+,+,UU,1,R1|chr1,200,chr1,300,+,+,UU,2,R1-2 readid19 81 chr1 300 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25H,60,0; CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1 readid19 2113 chr1 10 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr10,300,-,25M25S,60,0; CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1 readid19 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1 readid20 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,300,+,25M25H,60,0; CT:Z:SIMULATED:chr1,10,chr1,324,+,-,UU,1,R1|chr1,300,chr1,2000,+,+,UU,2,R1-2|chr1,200,chr1,2024,+,-,UU,3,R2 readid20 2113 chr1 300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,10,chr1,324,+,-,UU,1,R1|chr1,300,chr1,2000,+,+,UU,2,R1-2|chr1,200,chr1,2024,+,-,UU,3,R2 readid20 129 chr1 200 60 25M25S chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,2000,+,25S25M,60,0; CT:Z:SIMULATED:chr1,10,chr1,324,+,-,UU,1,R1|chr1,300,chr1,2000,+,+,UU,2,R1-2|chr1,200,chr1,2024,+,-,UU,3,R2 readid20 2177 chr1 2000 60 25S25M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,2000,+,25S25M,60,0; CT:Z:SIMULATED:chr1,10,chr1,324,+,-,UU,1,R1|chr1,300,chr1,2000,+,+,UU,2,R1-2|chr1,200,chr1,2024,+,-,UU,3,R2 readid21 105 chr1 10 60 25M25S * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|!,0,chr1,5324,-,-,NU,2,R1-2 readid21 2169 chr1 5300 60 25M25H * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|!,0,chr1,5324,-,-,NU,2,R1-2 readid21 141 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|!,0,chr1,5324,-,-,NU,2,R1-2 readid22 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|!,0,chr1,5324,-,-,MU,2,R1-2 readid22 2129 chr1 5300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|!,0,chr1,5324,-,-,MU,2,R1-2 readid22 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|!,0,chr1,5324,-,-,MU,2,R1-2 readid23 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,XX,1,R1-2 readid24 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|!,0,chr1,5324,-,-,NU,2,R1-2 readid24 2129 chr1 5300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,5300,+,+,UU,1,R1|!,0,chr1,5324,-,-,NU,2,R1-2 pairtools-1.1.3/tests/data/mock.parse2-single-end.expand.sam000066400000000000000000000037551474715105500237550ustar00rootroot00000000000000@SQ SN:chr1 LN:10000 @SQ SN:chr2 LN:10000 @PG ID:mock PN:mock VN:0.0.0 CL:mock readid01 0 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1 readid01 0 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1 readid02 0 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1|chr1,10,chr1,500,+,+,UU,1,E1_R1|chr1,200,chr1,500,+,+,UU,2,R1 readid02 0 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1|chr1,10,chr1,500,+,+,UU,1,E1_R1|chr1,200,chr1,500,+,+,UU,2,R1 readid02 16 chr1 500 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1|chr1,10,chr1,500,+,+,UU,1,E1_R1|chr1,200,chr1,500,+,+,UU,2,R1 readid03 0 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1|chr1,10,chr1,500,+,+,UU,1,E1_R1|chr1,249,chr1,500,-,+,UU,2,R1 readid03 16 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1|chr1,10,chr1,500,+,+,UU,1,E1_R1|chr1,249,chr1,500,-,+,UU,2,R1 readid03 16 chr1 500 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU,1,R1|chr1,10,chr1,500,+,+,UU,1,E1_R1|chr1,249,chr1,500,-,+,UU,2,R1pairtools-1.1.3/tests/data/mock.parse2-single-end.sam000066400000000000000000000022031474715105500224620ustar00rootroot00000000000000@SQ SN:chr1 LN:10000 @SQ SN:chr2 LN:10000 @PG ID:mock PN:mock VN:0.0.0 CL:mock readid01 0 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1 readid01 0 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1 readid02 0 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1|chr1,200,chr1,500,+,+,UU,2,R1 readid02 0 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1|chr1,200,chr1,500,+,+,UU,2,R1 readid02 16 chr1 500 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU,1,R1|chr1,200,chr1,500,+,+,UU,2,R1 pairtools-1.1.3/tests/data/mock.parse2.sam000066400000000000000000000354431474715105500204530ustar00rootroot00000000000000@SQ SN:chr1 LN:10000 @SQ SN:chr2 LN:10000 @PG ID:mock PN:mock VN:0.0.0 CL:mock readid01 65 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,59,chr1,249,+,+,UU,1,R1-2 readid01 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,59,chr1,249,+,+,UU,1,R1-2 readid02 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,59,chr1,200,+,-,UU,1,R1-2 readid02 145 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,59,chr1,200,+,-,UU,1,R1-2 readid03 65 chr1 10 60 1S49M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,58,chr1,249,+,+,UU,1,R1-2 readid03 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,58,chr1,249,+,+,UU,1,R1-2 readid04 81 chr1 10 60 49M1S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,-,+,UU,1,R1-2 readid04 161 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,-,+,UU,1,R1-2 readid05 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,59,chr1,200,+,-,UU,1,R1-2 readid05 145 chr1 200 60 1S49M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,59,chr1,200,+,-,UU,1,R1-2 readid06 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,59,chr1,200,+,-,UU,1,R1-2 readid06 145 chr1 200 60 49M1S chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,59,chr1,200,+,-,UU,1,R1-2 readid07 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,59,chr1,200,+,-,UU,1,R1-2 readid07 145 chr1 200 60 1S48M1S chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,59,chr1,200,+,-,UU,1,R1-2 readid08 105 chr1 10 60 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,59,-,+,NU,1,R1-2 readid08 149 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,59,-,+,NU,1,R1-2 readid09 85 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,59,-,+,NU,1,R1-2 readid09 169 chr1 10 60 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,59,-,+,NU,1,R1-2 readid10 77 * 0 0 * * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NN,1,R1-2 readid10 141 * 0 0 * * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NN,1,R1-2 readid11 105 chr1 10 0 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM,1,R1-2 readid11 149 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM,1,R1-2 readid12 85 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM,1,R1-2 readid12 169 chr1 10 0 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM,1,R1-2 readid13 65 chr1 10 0 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,249,-,+,MU,1,R1-2 readid13 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,249,-,+,MU,1,R1-2 readid14 65 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,59,-,+,MU,1,R1-2 readid14 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,59,-,+,MU,1,R1-2 readid15 65 chr1 10 0 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,MM,1,R1-2 readid15 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,MM,1,R1-2 readid16 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,34,chr1,324,+,+,UU,1,R1 readid16 2129 chr1 300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,34,chr1,324,+,+,UU,1,R1 readid16 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,34,chr1,324,+,+,UU,1,R1 readid17 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,34,chr1,5324,+,+,UU,1,R1|chr1,249,chr1,5300,+,-,UU,2,R1-2 readid17 2129 chr1 5300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,34,chr1,5324,+,+,UU,1,R1|chr1,249,chr1,5300,+,-,UU,2,R1-2 readid17 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,34,chr1,5324,+,+,UU,1,R1|chr1,249,chr1,5300,+,-,UU,2,R1-2 readid18 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,300,+,25M25H,60,0; CT:Z:SIMULATED:chr1,34,chr1,300,+,-,UU,1,R1|chr1,249,chr1,324,+,+,UU,2,R1-2 readid18 2113 chr1 300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,34,chr1,300,+,-,UU,1,R1|chr1,249,chr1,324,+,+,UU,2,R1-2 readid18 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,34,chr1,300,+,-,UU,1,R1|chr1,249,chr1,324,+,+,UU,2,R1-2 readid19 81 chr1 300 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25H,60,0; CT:Z:SIMULATED:chr1,34,chr1,324,+,+,UU,1,R1 readid19 2113 chr1 10 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr10,300,-,25M25S,60,0; CT:Z:SIMULATED:chr1,34,chr1,324,+,+,UU,1,R1 readid19 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,34,chr1,324,+,+,UU,1,R1 readid20 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,300,+,25M25H,60,0; CT:Z:SIMULATED:chr1,34,chr1,300,+,-,UU,1,R1|chr1,324,chr1,2024,+,+,UU,2,R1-2|chr1,224,chr1,2000,+,-,UU,3,R2 readid20 2113 chr1 300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,34,chr1,300,+,-,UU,1,R1|chr1,324,chr1,2024,+,+,UU,2,R1-2|chr1,224,chr1,2000,+,-,UU,3,R2 readid20 129 chr1 200 60 25M25S chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,2000,+,25S25M,60,0; CT:Z:SIMULATED:chr1,34,chr1,300,+,-,UU,1,R1|chr1,324,chr1,2024,+,+,UU,2,R1-2|chr1,224,chr1,2000,+,-,UU,3,R2 readid20 2177 chr1 2000 60 25S25M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,2000,+,25S25M,60,0; CT:Z:SIMULATED:chr1,34,chr1,300,+,-,UU,1,R1|chr1,324,chr1,2024,+,+,UU,2,R1-2|chr1,224,chr1,2000,+,-,UU,3,R2 readid21 105 chr1 10 60 25M25S * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,34,chr1,5324,+,+,UU,1,R1|!,0,chr1,5300,-,-,NU,2,R1-2 readid21 2169 chr1 5300 60 25M25H * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,34,chr1,5324,+,+,UU,1,R1|!,0,chr1,5300,-,-,NU,2,R1-2 readid21 141 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,34,chr1,5324,+,+,UU,1,R1|!,0,chr1,5300,-,-,NU,2,R1-2 readid22 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,34,chr1,5324,+,+,UU,1,R1|!,0,chr1,5300,-,-,MU,2,R1-2 readid22 2129 chr1 5300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,34,chr1,5324,+,-,UU,1,R1|!,0,chr1,5300,-,-,MU,2,R1-2 readid22 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,34,chr1,5324,+,-,UU,1,R1|!,0,chr1,5300,-,-,MU,2,R1-2 readid23 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,XX,1,R1-2 readid24 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,34,chr1,5324,+,+,UU,1,R1|!,0,chr1,5300,-,-,NU,2,R1-2 readid24 2129 chr1 5300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 CT:Z:SIMULATED:chr1,34,chr1,5324,+,+,UU,1,R1|!,0,chr1,5300,-,-,NU,2,R1-2 readid25 97 chr1 1 60 36S45M = 1 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,200,+,38M43S,60,0; CT:Z:SIMULATED:chr1,1,chr1,237,-,+,UU,1,R1&2 readid25 2145 chr1 200 60 38M43H chr1 1 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,1,+,36S45M,60,0; CT:Z:SIMULATED:chr1,1,chr1,237,-,+,UU,1,R1&2 readid25 145 chr1 1 60 168S53M = 1 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,200,-,132S38M51S,60,0; CT:Z:SIMULATED:chr1,1,chr1,237,-,+,UU,1,R1&2 readid25 2193 chr1 200 60 132H38M51H chr1 1 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,1,-,168S53M,60,0; CT:Z:SIMULATED:chr1,1,chr1,237,-,+,UU,1,R1&2 readid26 97 chr1 200 60 36S45M = 1 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,1,+,38M43S,60,0; CT:Z:SIMULATED:chr1,38,chr1,200,+,-,UU,1,R1&2 readid26 2145 chr1 1 60 38M43H chr1 1 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,200,+,36S45M,60,0; CT:Z:SIMULATED:chr1,38,chr1,200,+,-,UU,1,R1&2 readid26 145 chr1 200 60 168S53M = 1 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,1,-,132S38M51S,60,0; CT:Z:SIMULATED:chr1,38,chr1,200,+,-,UU,1,R1&2 readid26 2193 chr1 1 60 132H38M51H chr1 1 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,200,-,168S53M,60,0; CT:Z:SIMULATED:chr1,38,chr1,200,1,+,-,UU,1,R1&2 readid27 97 chr1 128 60 81M = 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 CT:Z:SIMULATED:chr1,1,chr1,369,-,+,UU,1,R2 readid27 145 chr1 200 60 170M51S = 128 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,1,-,168S53M,60,0; CT:Z:SIMULATED:chr1,1,chr1,369,-,+,UU,1,R2 readid27 2193 chr1 1 60 168H53M chr1 128 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,200,-,170M51S,60,0; CT:Z:SIMULATED:chr1,1,chr1,369,-,+,UU,1,R2 pairtools-1.1.3/tests/data/mock.rsites.bed000066400000000000000000000001011474715105500205210ustar00rootroot00000000000000chr1 0 100 chr1 100 500 chr1 500 10000 chr2 0 200 chr2 200 10000 pairtools-1.1.3/tests/data/mock.sam000066400000000000000000000235141474715105500172540ustar00rootroot00000000000000@SQ SN:chr1 LN:1000 @SQ SN:chr2 LN:1000 @PG ID:mock PN:mock VN:0.0.0 CL:mock readid01 65 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU readid01 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU readid02 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU readid02 145 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,249,+,-,UU readid03 65 chr1 10 60 1S49M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU readid03 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UU readid04 81 chr1 10 60 49M1S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,58,chr1,200,-,+,UU readid04 161 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,58,chr1,200,-,+,UU readid05 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,248,+,-,UU readid05 145 chr1 200 60 1S49M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,248,+,-,UU readid06 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,248,+,-,UU readid06 145 chr1 200 60 49M1S chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,248,+,-,UU readid07 97 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,247,+,-,UU readid07 145 chr1 200 60 1S48M1S chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,247,+,-,UU readid08 105 chr1 10 60 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,NU readid08 149 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,NU readid09 85 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,NU readid09 169 chr1 10 60 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,NU readid10 77 * 0 0 * * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NN readid10 141 * 0 0 * * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NN readid11 105 chr1 10 0 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM readid11 149 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM readid12 85 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM readid12 169 chr1 10 0 50M = 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,NM readid13 65 chr1 10 0 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,200,-,+,MU readid13 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,200,-,+,MU readid14 65 chr1 10 60 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,MU readid14 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,chr1,10,-,+,MU readid15 65 chr1 10 0 50M chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,MM readid15 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,MM readid16 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,300,-,25M25H,60,0; CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UR readid16 2129 chr1 300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UR readid16 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UR readid17 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid17 2129 chr1 5300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid17 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,WW readid18 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,300,+,25M25H,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid18 2113 chr1 300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid18 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,WW readid19 81 chr1 300 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25H,60,0; CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UR readid19 2113 chr1 10 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr10,300,-,25M25S,60,0; CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UR readid19 129 chr1 200 60 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:chr1,10,chr1,200,+,+,UR readid20 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,300,+,25M25H,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid20 2113 chr1 300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid20 129 chr1 200 60 25M25S chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,2000,+,25S25M,60,0; CT:Z:SIMULATED:!,10,!,0,-,-,WW readid20 2177 chr1 2000 60 25S25M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,2000,+,25S25M,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid21 105 chr1 10 60 25M25S * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid21 2169 chr1 5300 60 25M25H * 0 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid21 141 * 0 0 * chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,WW readid22 65 chr1 10 60 25M25S chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,5300,-,25M25H,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid22 2129 chr1 5300 60 25M25H chr1 200 0 AAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 SA:Z:chr1,10,+,25M25S,60,0; CT:Z:SIMULATED:!,0,!,0,-,-,WW readid22 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,WW readid23 129 chr1 200 0 50M chr1 10 0 AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA NM:i:0 NM:i:0 CT:Z:SIMULATED:!,0,!,0,-,-,XX pairtools-1.1.3/tests/data/mock.test-restr.pairs000066400000000000000000000013161474715105500217210ustar00rootroot00000000000000## pairs format v1.0.0 #shape: upper triangle #genome_assembly: unknown #samheader: @SQ SN:chr1 LN:10000 #samheader: @SQ SN:chr2 LN:10000 #samheader: @PG ID:bwa PN:bwa VN:0.7.15-r1140 CL:bwa mem -SP /path/ucsc.hg19.fasta.gz /path/1.fastq.gz /path/2.fastq.gz #chromosomes: chr1 chr2 #chromsize: chr1 10000 #chromsize: chr2 10000 #columns: readID chrom1 pos1 chrom2 pos2 strand1 strand2 pair_type rfrag_test1 rfrag_test2 readid01 chr1 1 chr2 20 + + UU 0 0 readid02 chr1 100 chr2 20 - + UU 0 0 readid03 chr1 100 chr2 20 + + UU 0 0 readid04 chr1 499 chr2 20 + + UU 1 0 readid05 chr1 600 chr2 20 + + UU 2 0 readid06 chr1 1 chr2 200 + + UU 0 0 readid07 chr1 1 chr2 500 + + UU 0 1 readid08 chr1 10001 chr2 10001 + + UU 2 1 pairtools-1.1.3/tests/data/mock_empty.4dedup.pairsam000066400000000000000000000003111474715105500225200ustar00rootroot00000000000000## pairs format v1.0.0 #sorted: chr1-chr2-pos1-pos2 #shape: upper triangle #genome_assembly: unknown #chromosomes: chr1 chr2 #columns: readID chrom1 pos1 chrom2 pos2 strand1 strand2 pair_type sam1 sam2pairtools-1.1.3/tests/test_dedup.py000066400000000000000000000162631474715105500174250ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import subprocess import pytest import tempfile testdir = os.path.dirname(os.path.realpath(__file__)) tmpdir = tempfile.TemporaryDirectory() tmpdir_name = tmpdir.name mock_pairsam_path_dedup = os.path.join(testdir, "data", "mock.4dedup.pairsam") mock_pairsam_path_dedup_diff_colnames = os.path.join( testdir, "data", "mock.4dedup_diffcolnames.pairsam" ) dedup_path = os.path.join(tmpdir_name, "dedup.pairsam") unmapped_path = os.path.join(tmpdir_name, "unmapped.pairsam") dups_path = os.path.join(tmpdir_name, "dups.pairsam") dedup_path_cython = os.path.join(tmpdir_name, "dedup.cython.pairsam") unmapped_path_cython = os.path.join(tmpdir_name, "unmapped.cython.pairsam") dups_path_cython = os.path.join(tmpdir_name, "dups.cython.pairsam") dedup_max_path = os.path.join(tmpdir_name, "dedup_max.pairsam") unmapped_max_path = os.path.join(tmpdir_name, "unmapped_max.pairsam") dups_max_path = os.path.join(tmpdir_name, "dups_max.pairsam") dedup_markdups_path = os.path.join(tmpdir_name, "dedup.markdups.pairsam") unmapped_markdups_path = os.path.join(tmpdir_name, "unmapped.markdups.pairsam") dups_markdups_path = os.path.join(tmpdir_name, "dups.markdups.pairsam") dedup_path_diff_colnames = os.path.join(tmpdir_name, "dedup.diff_colnames.pairsam") unmapped_path_diff_colnames = os.path.join( tmpdir_name, "unmapped.diff_colnames.pairsam" ) dups_path_diff_colnames = os.path.join(tmpdir_name, "dups.diff_colnames.pairsam") max_mismatch = 1 mock_empty_pairsam_path_dedup = os.path.join(testdir, "data", "mock_empty.4dedup.pairsam") @pytest.fixture def setup_dedup(): try: subprocess.check_output( [ "python", "-m", "pairtools", "dedup", mock_pairsam_path_dedup, "--output", dedup_path, "--output-dups", dups_path, "--output-unmapped", unmapped_path, "--max-mismatch", str(max_mismatch), ], ) subprocess.check_output( [ "python", "-m", "pairtools", "dedup", mock_pairsam_path_dedup, "--output", dedup_path_cython, "--output-dups", dups_path_cython, "--output-unmapped", unmapped_path_cython, "--max-mismatch", str(max_mismatch), "--backend", "cython", ], ) subprocess.check_output( [ "python", "-m", "pairtools", "dedup", mock_pairsam_path_dedup, "--output", dedup_max_path, "--output-dups", dups_max_path, "--output-unmapped", unmapped_max_path, "--max-mismatch", str(max_mismatch), "--method", "max", ], ) subprocess.check_output( [ "python", "-m", "pairtools", "dedup", mock_pairsam_path_dedup, "--mark-dups", "--output", dedup_markdups_path, "--output-dups", dups_markdups_path, "--output-unmapped", unmapped_markdups_path, "--max-mismatch", str(max_mismatch), ], ) subprocess.check_output( [ "python", "-m", "pairtools", "dedup", mock_pairsam_path_dedup_diff_colnames, "--mark-dups", "--output", dedup_path_diff_colnames, "--output-dups", dups_path_diff_colnames, "--output-unmapped", unmapped_path_diff_colnames, "--max-mismatch", str(max_mismatch), "--c1", "chr1", "--c2", "chr2", "--p1", "p1", "--p2", "p2", "--s1", "str1", "--s2", "str2", ], ) except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e def test_mock_pairsam(setup_dedup): pairsam_pairs = [ l.strip().split("\t") for l in open(mock_pairsam_path_dedup, "r") if not l.startswith("#") and l.strip() ] for (ddp, up, dp) in [ (dedup_path, unmapped_path, dups_path), (dedup_max_path, unmapped_max_path, dups_max_path), (dedup_markdups_path, unmapped_markdups_path, dups_markdups_path), ( dedup_path_diff_colnames, unmapped_path_diff_colnames, dups_path_diff_colnames, ), ]: dedup_pairs = [ l.strip().split("\t") for l in open(ddp, "r") if not l.startswith("#") and l.strip() ] unmapped_pairs = [ l.strip().split("\t") for l in open(up, "r") if not l.startswith("#") and l.strip() ] dup_pairs = [ l.strip().split("\t") for l in open(dp, "r") if not l.startswith("#") and l.strip() ] # check that at least a few pairs remained in deduped and dup files assert len(dedup_pairs) > 0 assert len(dup_pairs) > 0 assert len(unmapped_pairs) > 0 import pandas as pd # check that all pairsam entries survived deduping: assert len(dedup_pairs) + len(unmapped_pairs) + len(dup_pairs) == len( pairsam_pairs ) def pairs_overlap(pair1, pair2, max_mismatch): overlap = ( (pair1[1] == pair2[1]) and (pair1[3] == pair2[3]) and (pair1[5] == pair2[5]) and (pair1[6] == pair2[6]) and (abs(int(pair1[2]) - int(pair2[2])) <= max_mismatch) and (abs(int(pair1[4]) - int(pair2[4])) <= max_mismatch) ) return overlap # check that deduped pairs do not overlap assert all( [ not pairs_overlap(pair1, pair2, max_mismatch) for i, pair1 in enumerate(dedup_pairs) for j, pair2 in enumerate(dedup_pairs) if i != j ] ) # check that the removed duplicates overlap with at least one of the # deduplicated entries assert all( [ any([pairs_overlap(pair1, pair2, 3) for pair2 in dedup_pairs]) for pair1 in dup_pairs ] ) empty_dedup_pairs = [ l.strip().split("\t") for l in open(mock_empty_pairsam_path_dedup, "r") if not l.startswith("#") and l.strip() ] assert len(empty_dedup_pairs) == 0 tmpdir.cleanup() pairtools-1.1.3/tests/test_filterbycov.py000066400000000000000000000102041474715105500206410ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import subprocess import pytest import tempfile testdir = os.path.dirname(os.path.realpath(__file__)) mock_pairs_path_filterbycov = os.path.join(testdir, "data", "mock.4filterbycov.pairs") tmpdir = tempfile.TemporaryDirectory() tmpdir_name = tmpdir.name params = [ {"max_dist": 0, "max_cov": 3}, {"max_dist": 0, "max_cov": 2}, {"max_dist": 1, "max_cov": 1}, ] for p in params: p["lowcov_path"] = os.path.join( tmpdir_name, "lowcov.{}.{}.pairs".format(p["max_dist"], p["max_cov"]) ) p["highcov_path"] = os.path.join( tmpdir_name, "highcov.{}.{}.pairs".format(p["max_dist"], p["max_cov"]) ) p["unmapped_path"] = os.path.join( tmpdir_name, "unmapped.{}.{}.pairs".format(p["max_dist"], p["max_cov"]) ) @pytest.fixture def setup_filterbycov(): try: for p in params: subprocess.check_output( [ "python", "-m", "pairtools", "filterbycov", mock_pairs_path_filterbycov, "--output", p["lowcov_path"], "--output-highcov", p["highcov_path"], "--output-unmapped", p["unmapped_path"], "--max-dist", str(p["max_dist"]), "--max-cov", str(p["max_cov"]), ] ) except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e def test_mock_pairs(setup_filterbycov): all_pairs = [ l.strip().split("\t") for l in open(mock_pairs_path_filterbycov, "r") if not l.startswith("#") and l.strip() ] for p in params: lowcov_pairs = [ l.strip().split("\t") for l in open(p["lowcov_path"], "r") if not l.startswith("#") and l.strip() ] highcov_pairs = [ l.strip().split("\t") for l in open(p["highcov_path"], "r") if not l.startswith("#") and l.strip() ] unmapped_pairs = [ l.strip().split("\t") for l in open(p["unmapped_path"], "r") if not l.startswith("#") and l.strip() ] # check that at least a few pairs remained in deduped and dup files # assert len(lowcov_pairs) > 0 # assert len(highcov_pairs) > 0 # assert len(unmapped_pairs) > 0 # check that all pairs entries survived deduping: assert len(lowcov_pairs) + len(unmapped_pairs) + len(highcov_pairs) == len( all_pairs ) assert all([(pair[1] != "!" and pair[3] != "!") for pair in lowcov_pairs]) assert all([(pair[1] != "!" and pair[3] != "!") for pair in highcov_pairs]) assert all([(pair[1] == "!" or pair[3] == "!") for pair in unmapped_pairs]) def update_coverage(coverage, chrom, pos, max_dist): if chrom == "!": return coverage[chrom] = coverage.get(chrom, {}) for i in range(max(0, pos - max_dist), pos + max_dist + 1): coverage[chrom][i] = coverage[chrom].get(i, 0) + 1 coverage = {} for pair in all_pairs: update_coverage(coverage, pair[1], int(pair[2]), p["max_dist"]) update_coverage(coverage, pair[3], int(pair[4]), p["max_dist"]) for pair in lowcov_pairs: # print (p['max_cov'],p['max_dist']) # print (pair, coverage[pair[1]][int(pair[2])]) # print (pair, coverage[pair[3]][int(pair[4])]) assert coverage[pair[1]][int(pair[2])] <= p["max_cov"] assert coverage[pair[3]][int(pair[4])] <= p["max_cov"] for pair in highcov_pairs: # print (p['max_cov'],p['max_dist']) # print (pair, coverage[pair[1]][int(pair[2])]) # print (pair, coverage[pair[3]][int(pair[4])]) assert (coverage[pair[1]][int(pair[2])] > p["max_cov"]) or ( coverage[pair[3]][int(pair[4])] > p["max_cov"] ) tmpdir.cleanup() pairtools-1.1.3/tests/test_flip.py000066400000000000000000000036751474715105500172610ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import subprocess import pytest testdir = os.path.dirname(os.path.realpath(__file__)) mock_pairs_path = os.path.join(testdir, "data", "mock.4flip.pairs") mock_chromsizes_path = os.path.join(testdir, "data", "mock.chrom.sizes") def test_flip(): try: result = subprocess.check_output( [ "python", "-m", "pairtools", "flip", mock_pairs_path, "-c", mock_chromsizes_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e orig_pairs = [ l.strip().split("\t") for l in open(mock_pairs_path, "r") if not l.startswith("#") and l.strip() ] flipped_pairs = [ l.strip().split("\t") for l in result.split("\n") if not l.startswith("#") and l.strip() ] chrom_enum = {"!": 0, "chr1": 1, "chr2": 2, "chrU": 3, "chrU1": 4} # chrU stands for unannotated chromosome, which has less priority than annotated ones # chrU1 is another unannotated chromosome, which should go lexigographically after chrU for orig_pair, flipped_pair in zip(orig_pairs, flipped_pairs): has_correct_order = (chrom_enum[orig_pair[1]], int(orig_pair[2])) <= ( chrom_enum[orig_pair[3]], int(orig_pair[4]), ) if has_correct_order: assert all([c1 == c2 for c1, c2 in zip(orig_pair, flipped_pair)]) if not has_correct_order: assert orig_pair[1] == flipped_pair[3] assert orig_pair[2] == flipped_pair[4] assert orig_pair[3] == flipped_pair[1] assert orig_pair[4] == flipped_pair[2] assert orig_pair[5] == flipped_pair[6] assert orig_pair[6] == flipped_pair[5] assert orig_pair[7] == flipped_pair[7][::-1] pairtools-1.1.3/tests/test_header.py000066400000000000000000000026071474715105500175510ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import pytest import subprocess testdir = os.path.dirname(os.path.realpath(__file__)) def test_generate(): """Test generation of the header. Example run: pairtools header generate tests/data/mock.pairsam \ --chroms-path tests/data/mock.chrom.sizes --pairsam \ --sam-path tests/data/mock.sam """ mock_sam_path = os.path.join(testdir, "data", "mock.sam") mock_pairs_path = os.path.join(testdir, "data", "mock.pairsam") mock_chroms_path = os.path.join(testdir, "data", "mock.chrom.sizes") try: result = subprocess.check_output( [ "python", "-m", "pairtools", "header", "generate", "--chroms-path", mock_chroms_path, "--sam-path", mock_sam_path, "--pairsam", mock_pairs_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # check if the header got transferred correctly sam_header = [l.strip() for l in open(mock_sam_path, "r") if l.startswith("@")] pairsam_header = [l.strip() for l in result.split("\n") if l.startswith("#")] for l in sam_header: assert any([l in l2 for l2 in pairsam_header]) pairtools-1.1.3/tests/test_headerops.py000066400000000000000000000105051474715105500202670ustar00rootroot00000000000000# -*- coding: utf-8 -*- from pairtools.lib import headerops import pytest def test_make_standard_header(): header = headerops.make_standard_pairsheader() assert any([l.startswith("## pairs format") for l in header]) assert any([l.startswith("#shape") for l in header]) assert any([l.startswith("#columns") for l in header]) header = headerops.make_standard_pairsheader( chromsizes=[("b", 100), ("c", 100), ("a", 100)] ) assert sum([l.startswith("#chromsize") for l in header]) == 3 def test_samheaderops(): header = headerops.make_standard_pairsheader() samheader = [ "@SQ\tSN:chr1\tLN:100", "@SQ\tSN:chr2\tLN:100", "@SQ\tSN:chr3\tLN:100", "@PG\tID:bwa\tPN:bwa\tCL:bwa", "@PG\tID:bwa-2\tPN:bwa\tCL:bwa\tPP:bwa", ] header_with_sam = headerops.insert_samheader(header, samheader) assert len(header_with_sam) == len(header) + len(samheader) for l in samheader: assert any([l2.startswith("#samheader") and l in l2 for l2 in header_with_sam]) # test adding new programs to the PG chain header_extra_pg = headerops.append_new_pg(header_with_sam, ID="test", PN="test") # test if all lines got transferred assert all([(old_l in header_extra_pg) for old_l in header_with_sam]) # test if one PG got added assert len(header_extra_pg) == len(header_with_sam) + 1 # test if the new PG has PP matching the ID of one of already existing PGs new_l = [l for l in header_extra_pg if l not in header_with_sam][0] pp = [f[3:] for f in new_l.split("\t") if f.startswith("PP:")][0] assert ( len( [ l for l in header_extra_pg if l.startswith("#samheader") and ("\tID:{}\t".format(pp) in l) ] ) == 1 ) def test_merge_pairheaders(): headers = [["## pairs format v1.0"], ["## pairs format v1.0"]] merged_header = headerops._merge_pairheaders(headers) assert merged_header == headers[0] headers = [["## pairs format v1.0", "#a"], ["## pairs format v1.0", "#b"]] merged_header = headerops._merge_pairheaders(headers) assert merged_header == ["## pairs format v1.0", "#a", "#b"] headers = [ ["## pairs format v1.0", "#chromsize: chr1 100", "#chromsize: chr2 200"], ["## pairs format v1.0", "#chromsize: chr1 100", "#chromsize: chr2 200"], ] merged_header = headerops._merge_pairheaders(headers) assert merged_header == headers[0] def test_merge_different_pairheaders(): with pytest.raises(Exception): headers = [["## pairs format v1.0"], ["## pairs format v1.1"]] merged_header = headerops._merge_pairheaders(headers) def test_force_merge_pairheaders(): headers = [ ["## pairs format v1.0", "#chromsize: chr1 100"], ["## pairs format v1.0", "#chromsize: chr2 200"], ] merged_header = headerops._merge_pairheaders(headers, force=True) assert merged_header == [ "## pairs format v1.0", "#chromsize: chr1 100", "#chromsize: chr2 200", ] def test_merge_samheaders(): headers = [ ["@HD\tVN:1"], ["@HD\tVN:1"], ] merged_header = headerops._merge_samheaders(headers) assert merged_header == headers[0] headers = [ [ "@HD\tVN:1", "@SQ\tSN:chr1\tLN:100", "@SQ\tSN:chr2\tLN:100", ], [ "@HD\tVN:1", "@SQ\tSN:chr1\tLN:100", "@SQ\tSN:chr2\tLN:100", ], ] merged_header = headerops._merge_samheaders(headers) assert merged_header == headers[0] headers = [ [ "@HD\tVN:1", "@PG\tID:bwa\tPN:bwa\tPP:cat", ], [ "@HD\tVN:1", "@PG\tID:bwa\tPN:bwa\tPP:cat", ], ] merged_header = headerops._merge_samheaders(headers) print(merged_header) assert merged_header == [ "@HD\tVN:1", "@PG\tID:bwa-1\tPN:bwa\tPP:cat-1", "@PG\tID:bwa-2\tPN:bwa\tPP:cat-2", ] def test_merge_headers(): headers = [ [ "## pairs format v1.0", "#samheader: @HD\tVN:1", "#samheader: @SQ\tSN:chr1\tLN:100", "#samheader: @SQ\tSN:chr2\tLN:100", ] ] * 2 merged_header = headerops.merge_headers(headers) assert merged_header == headers[0] pairtools-1.1.3/tests/test_markasdup.py000066400000000000000000000017111474715105500203030ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import subprocess import pytest testdir = os.path.dirname(os.path.realpath(__file__)) def test_mock_pairsam(): mock_pairsam_path = os.path.join(testdir, "data", "mock.pairsam") try: result = subprocess.check_output( ["python", "-m", "pairtools", "markasdup", mock_pairsam_path], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e pairsam_body = [ l.strip() for l in open(mock_pairsam_path, "r") if not l.startswith("#") and l.strip() ] output_body = [ l.strip() for l in result.split("\n") if not l.startswith("#") and l.strip() ] # check that all pairsam entries survived sorting: assert len(pairsam_body) == len(output_body) # check that all pairtypes got changed to DD for l in output_body: assert l.split("\t")[7] == "DD" pairtools-1.1.3/tests/test_merge.py000066400000000000000000000073131474715105500174170ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import subprocess import pytest import tempfile testdir = os.path.dirname(os.path.realpath(__file__)) tmpdir = tempfile.TemporaryDirectory() tmpdir_name = tmpdir.name mock_pairsam_path_1 = os.path.join(testdir, "data", "mock.pairsam") mock_pairsam_path_2 = os.path.join(testdir, "data", "mock.2.pairsam") mock_sorted_pairsam_path_1 = os.path.join(tmpdir_name, "1.pairsam") mock_sorted_pairsam_path_2 = os.path.join(tmpdir_name, "2.pairsam") @pytest.fixture def setup_sort_two(): try: subprocess.check_output( [ "python", "-m", "pairtools", "sort", mock_pairsam_path_1, "--output", mock_sorted_pairsam_path_1, ], ) subprocess.check_output( [ "python", "-m", "pairtools", "sort", mock_pairsam_path_2, "--output", mock_sorted_pairsam_path_2, ], ) except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e def test_mock_pairsam(setup_sort_two): try: result = subprocess.check_output( [ "python", "-m", "pairtools", "merge", mock_sorted_pairsam_path_1, mock_sorted_pairsam_path_2, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # check that all pairsam entries survived sorting: pairsam_body_1 = [ l.strip() for l in open(mock_pairsam_path_1, "r") if not l.startswith("#") and l.strip() ] pairsam_body_2 = [ l.strip() for l in open(mock_pairsam_path_2, "r") if not l.startswith("#") and l.strip() ] output_body = [ l.strip() for l in result.split("\n") if not l.startswith("#") and l.strip() ] assert len(pairsam_body_1) + len(pairsam_body_2) == len(output_body) # check the sorting order of the output: prev_pair = None for l in output_body: cur_pair = l.split("\t")[1:8] if prev_pair is not None: assert cur_pair[0] >= prev_pair[0] if cur_pair[0] == prev_pair[0]: assert cur_pair[2] >= prev_pair[2] if cur_pair[2] == prev_pair[2]: assert int(cur_pair[1]) >= int(prev_pair[1]) if int(cur_pair[1]) == int(prev_pair[1]): assert int(cur_pair[3]) >= int(prev_pair[3]) prev_pair = cur_pair # Check that the header is preserved: try: result = subprocess.check_output( [ "python", "-m", "pairtools", "merge", "--keep-first-header", mock_sorted_pairsam_path_1, mock_sorted_pairsam_path_2, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # check the headers: pairsam_header_1 = [ l.strip() for l in open(mock_sorted_pairsam_path_1, "r") if l.startswith("#") and l.strip() ] pairsam_header_2 = [ l.strip() for l in open(mock_sorted_pairsam_path_2, "r") if l.startswith("#") and l.strip() ] output_header = [ l.strip() for l in result.split("\n") if l.startswith("#") and l.strip() ] assert len(pairsam_header_1) + 1 == len(output_header) tmpdir.cleanup() pairtools-1.1.3/tests/test_parse.py000066400000000000000000000065201474715105500174310ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import pytest import subprocess testdir = os.path.dirname(os.path.realpath(__file__)) def test_python_version(): assert sys.version_info[0] == 3, "Use Python 3!" def test_mock_pysam(): """Parse non-chimeric alignments with walks-policy mask with pysam backend.""" mock_sam_path = os.path.join(testdir, "data", "mock.sam") mock_chroms_path = os.path.join(testdir, "data", "mock.chrom.sizes") try: result = subprocess.check_output( [ "python", "-m", "pairtools", "parse", "--walks-policy", "mask", "-c", mock_chroms_path, mock_sam_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # check if the header got transferred correctly sam_header = [l.strip() for l in open(mock_sam_path, "r") if l.startswith("@")] pairsam_header = [l.strip() for l in result.split("\n") if l.startswith("#")] for l in sam_header: assert any([l in l2 for l2 in pairsam_header]) # check that the pairs got assigned properly for l in result.split("\n"): if l.startswith("#") or not l: continue print(l) assigned_pair = l.split("\t")[1:8] simulated_pair = l.split("CT:Z:SIMULATED:", 1)[1].split("\031", 1)[0].split(",") print(assigned_pair) print(simulated_pair) print() assert assigned_pair == simulated_pair def test_mock_pysam_parse_all(): """Parse all alignment in each read with walks-policy all and pysam backend.""" mock_sam_path = os.path.join(testdir, "data", "mock.parse-all.sam") mock_chroms_path = os.path.join(testdir, "data", "mock.chrom.sizes") try: result = subprocess.check_output( [ "python", "-m", "pairtools", "parse", "--walks-policy", "all", "-c", mock_chroms_path, "--add-pair-index", mock_sam_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # check if the header got transferred correctly sam_header = [l.strip() for l in open(mock_sam_path, "r") if l.startswith("@")] pairsam_header = [l.strip() for l in result.split("\n") if l.startswith("#")] for l in sam_header: assert any([l in l2 for l2 in pairsam_header]) # check that the pairs got assigned properly id_counter = 0 prev_id = "" for l in result.split("\n"): if l.startswith("#") or not l: continue if prev_id == l.split("\t")[0]: id_counter += 1 else: id_counter = 0 prev_id = l.split("\t")[0] assigned_pair = l.split("\t")[1:8] + l.split("\t")[-2:] simulated_pair = ( l.split("CT:Z:SIMULATED:", 1)[1] .split("\031", 1)[0] .split("|")[id_counter] .split(",") ) print(assigned_pair) print(simulated_pair, prev_id) print() assert assigned_pair == simulated_pair pairtools-1.1.3/tests/test_parse2.py000066400000000000000000000174511474715105500175200ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import pytest import subprocess testdir = os.path.dirname(os.path.realpath(__file__)) def test_mock_pysam_parse2_read(): mock_sam_path = os.path.join(testdir, "data", "mock.parse2.sam") mock_chroms_path = os.path.join(testdir, "data", "mock.chrom.sizes") try: result = subprocess.check_output( [ "python", "-m", "pairtools", "parse2", "-c", mock_chroms_path, "--add-pair-index", "--flip", "--report-position", "junction", "--report-orientation", "pair", mock_sam_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # check if the header got transferred correctly sam_header = [l.strip() for l in open(mock_sam_path, "r") if l.startswith("@")] pairsam_header = [l.strip() for l in result.split("\n") if l.startswith("#")] for l in sam_header: assert any([l in l2 for l2 in pairsam_header]) # check that the pairs got assigned properly id_counter = 0 prev_id = "" for l in result.split("\n"): if l.startswith("#") or not l: continue if prev_id == l.split("\t")[0]: id_counter += 1 else: id_counter = 0 prev_id = l.split("\t")[0] assigned_pair = l.split("\t")[1:8] + l.split("\t")[-2:] print(l.split("SIMULATED:", 1)[1].split("\031", 1)[0].split("|"), id_counter) simulated_pair = ( l.split("SIMULATED:", 1)[1] .split("\031", 1)[0] .split("|")[id_counter] .split(",") ) print(assigned_pair) print(simulated_pair, prev_id) print() assert assigned_pair == simulated_pair def test_mock_pysam_parse2_pair(): mock_sam_path = os.path.join(testdir, "data", "mock.parse-all.sam") mock_chroms_path = os.path.join(testdir, "data", "mock.chrom.sizes") try: result = subprocess.check_output( [ "python", "-m", "pairtools", "parse2", "-c", mock_chroms_path, "--add-pair-index", "--flip", "--report-position", "outer", "--report-orientation", "pair", mock_sam_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # check if the header got transferred correctly sam_header = [l.strip() for l in open(mock_sam_path, "r") if l.startswith("@")] pairsam_header = [l.strip() for l in result.split("\n") if l.startswith("#")] for l in sam_header: assert any([l in l2 for l2 in pairsam_header]) # check that the pairs got assigned properly id_counter = 0 prev_id = "" for l in result.split("\n"): if l.startswith("#") or not l: continue if prev_id == l.split("\t")[0]: id_counter += 1 else: id_counter = 0 prev_id = l.split("\t")[0] assigned_pair = l.split("\t")[1:8] + l.split("\t")[-2:] simulated_pair = ( l.split("SIMULATED:", 1)[1] .split("\031", 1)[0] .split("|")[id_counter] .split(",") ) print(assigned_pair) print(simulated_pair, prev_id) print() assert assigned_pair == simulated_pair def test_mock_pysam_parse2_single_end(): """Testing single-end mode for parse2, no-flip mode. --report-position is outer (parse2 default) --report-orientation is pair (parse2 default) """ mock_sam_path = os.path.join(testdir, "data", "mock.parse2-single-end.sam") mock_chroms_path = os.path.join(testdir, "data", "mock.chrom.sizes") try: result = subprocess.check_output( [ "python", "-m", "pairtools", "parse2", "-c", mock_chroms_path, "--single-end", "--add-pair-index", "--no-flip", "--report-position", "outer", "--report-orientation", "pair", mock_sam_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # check if the header got transferred correctly sam_header = [l.strip() for l in open(mock_sam_path, "r") if l.startswith("@")] pairsam_header = [l.strip() for l in result.split("\n") if l.startswith("#")] for l in sam_header: assert any([l in l2 for l2 in pairsam_header]) # check that the pairs got assigned properly id_counter = 0 prev_id = "" for l in result.split("\n"): if l.startswith("#") or not l: continue if prev_id == l.split("\t")[0]: id_counter += 1 else: id_counter = 0 prev_id = l.split("\t")[0] assigned_pair = l.split("\t")[1:8] + l.split("\t")[-2:] print(l.split("SIMULATED:", 1)[1].split("\031", 1)[0].split("|"), id_counter) simulated_pair = ( l.split("SIMULATED:", 1)[1] .split("\031", 1)[0] .split("|")[id_counter] .split(",") ) print(assigned_pair) print(simulated_pair, prev_id) print() assert assigned_pair == simulated_pair def test_mock_pysam_parse2_single_end_expand(): """Testing single-end mode for parse2, no-flip mode, with --expand. --report-position is outer (parse2 default) --report-orientation is pair (parse2 default) """ mock_sam_path = os.path.join(testdir, "data", "mock.parse2-single-end.expand.sam") mock_chroms_path = os.path.join(testdir, "data", "mock.chrom.sizes") try: result = subprocess.check_output( [ "python", "-m", "pairtools", "parse2", "-c", mock_chroms_path, "--single-end", "--expand", "--add-pair-index", "--no-flip", "--report-position", "outer", "--report-orientation", "pair", mock_sam_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # check if the header got transferred correctly sam_header = [l.strip() for l in open(mock_sam_path, "r") if l.startswith("@")] pairsam_header = [l.strip() for l in result.split("\n") if l.startswith("#")] for l in sam_header: assert any([l in l2 for l2 in pairsam_header]) # check that the pairs got assigned properly id_counter = 0 prev_id = "" for l in result.split("\n"): if l.startswith("#") or not l: continue if prev_id == l.split("\t")[0]: id_counter += 1 else: id_counter = 0 prev_id = l.split("\t")[0] assigned_pair = l.split("\t")[1:8] + l.split("\t")[-2:] print(l.split("SIMULATED:", 1)[1].split("\031", 1)[0].split("|"), id_counter) simulated_pair = ( l.split("SIMULATED:", 1)[1] .split("\031", 1)[0] .split("|")[id_counter] .split(",") ) print(assigned_pair) print(simulated_pair, prev_id) print() assert assigned_pair == simulated_pair pairtools-1.1.3/tests/test_restrict.py000066400000000000000000000031251474715105500201540ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import pytest import subprocess testdir = os.path.dirname(os.path.realpath(__file__)) def test_restrict(): """Restrict pairs file""" mock_pairs_path = os.path.join(testdir, "data", "mock.test-restr.pairs") mock_rfrag_path = os.path.join(testdir, "data", "mock.rsites.bed") try: result = subprocess.check_output( [ "python", "-m", "pairtools", "restrict", "-f", mock_rfrag_path, mock_pairs_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # check if the header got transferred correctly true_header = [l.strip() for l in open(mock_pairs_path, "r") if l.startswith("@")] output_header = [l.strip() for l in result.split("\n") if l.startswith("#")] for l in true_header: assert any([l in l2 for l2 in output_header]) # check that the pairs got assigned properly cols = [x for x in output_header if x.startswith("#columns")][0].split(" ")[1:] COL_RFRAG1_TRUE = cols.index("rfrag_test1") COL_RFRAG2_TRUE = cols.index("rfrag_test2") COL_RFRAG1_OUTPUT = cols.index("rfrag1") COL_RFRAG2_OUTPUT = cols.index("rfrag2") for l in result.split("\n"): if l.startswith("#") or not l: continue line = l.split() assert line[COL_RFRAG1_TRUE] == line[COL_RFRAG1_OUTPUT] assert line[COL_RFRAG2_TRUE] == line[COL_RFRAG2_OUTPUT] pairtools-1.1.3/tests/test_scaling.py000066400000000000000000000013031474715105500177310ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import subprocess import pytest import pandas as pd import io testdir = os.path.dirname(os.path.realpath(__file__)) def test_scaling(): mock_pairsam_path = os.path.join(testdir, "data", "mock.pairsam") try: result = subprocess.check_output( ["python", "-m", "pairtools", "scaling", mock_pairsam_path], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e output = pd.read_csv(io.StringIO(result), sep="\t", header=0) assert ( output["n_pairs"].sum() == 9 ) # double unmapped pairs are currently ignored by lib.scaling pairtools-1.1.3/tests/test_select.py000066400000000000000000000156001474715105500175750ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import subprocess import pytest from pairtools.lib import pairsam_format testdir = os.path.dirname(os.path.realpath(__file__)) mock_pairsam_path = os.path.join(testdir, "data", "mock.pairsam") mock_chromsizes_path = os.path.join(testdir, "data", "mock.chrom.sizes") def test_preserve(): try: result = subprocess.check_output( ["python", "-m", "pairtools", "select", "True", mock_pairsam_path], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e pairsam_body = [ l.strip() for l in open(mock_pairsam_path, "r") if not l.startswith("#") and l.strip() ] output_body = [ l.strip() for l in result.split("\n") if not l.startswith("#") and l.strip() ] assert all(l in pairsam_body for l in output_body) def test_equal(): try: result = subprocess.check_output( [ "python", "-m", "pairtools", "select", '(pair_type == "RU") or (pair_type == "UR") or (pair_type == "UU")', mock_pairsam_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e print(result) pairsam_body = [ l.strip() for l in open(mock_pairsam_path, "r") if not l.startswith("#") and l.strip() ] output_body = [ l.strip() for l in result.split("\n") if not l.startswith("#") and l.strip() ] assert all(l.split("\t")[7] in ["RU", "UR", "UU"] for l in output_body) assert all( l in output_body for l in pairsam_body if l.split("\t")[7] in ["RU", "UR", "UU"] ) def test_csv(): try: result = subprocess.check_output( [ "python", "-m", "pairtools", "select", 'csv_match(pair_type, "RU,UR,UU")', mock_pairsam_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e print(result) pairsam_body = [ l.strip() for l in open(mock_pairsam_path, "r") if not l.startswith("#") and l.strip() ] output_body = [ l.strip() for l in result.split("\n") if not l.startswith("#") and l.strip() ] assert all(l.split("\t")[7] in ["RU", "UR", "UU"] for l in output_body) assert all( l in output_body for l in pairsam_body if l.split("\t")[7] in ["RU", "UR", "UU"] ) def test_wildcard(): try: result = subprocess.check_output( [ "python", "-m", "pairtools", "select", 'wildcard_match(pair_type, "*U")', mock_pairsam_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e print(result) pairsam_body = [ l.strip() for l in open(mock_pairsam_path, "r") if not l.startswith("#") and l.strip() ] output_body = [ l.strip() for l in result.split("\n") if not l.startswith("#") and l.strip() ] assert all(l.split("\t")[7] in ["NU", "MU", "RU", "UU"] for l in output_body) assert all( l in output_body for l in pairsam_body if l.split("\t")[7] in ["NU", "MU", "RU", "UU"] ) def test_regex(): try: result = subprocess.check_output( [ "python", "-m", "pairtools", "select", 'regex_match(pair_type, "[NM]U")', mock_pairsam_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e print(result) pairsam_body = [ l.strip() for l in open(mock_pairsam_path, "r") if not l.startswith("#") and l.strip() ] output_body = [ l.strip() for l in result.split("\n") if not l.startswith("#") and l.strip() ] assert all(l.split("\t")[7] in ["NU", "MU"] for l in output_body) assert all( l in output_body for l in pairsam_body if l.split("\t")[7] in ["NU", "MU"] ) def test_chrom_subset(): try: result = subprocess.check_output( [ "python", "-m", "pairtools", "select", "True", "--chrom-subset", mock_chromsizes_path, mock_pairsam_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e pairsam_body = [ l.strip() for l in open(mock_pairsam_path, "r") if not l.startswith("#") and l.strip() ] output_body = [ l.strip() for l in result.split("\n") if not l.startswith("#") and l.strip() ] output_header = [ l.strip() for l in result.split("\n") if l.startswith("#") and l.strip() ] chroms_from_chrom_field = [ l.strip().split()[1:] for l in result.split("\n") if l.startswith("#chromosomes:") ][0] assert set(chroms_from_chrom_field) == set(["chr1", "chr2"]) chroms_from_chrom_sizes = [ l.strip().split()[1] for l in result.split("\n") if l.startswith("#chromsize:") ] assert set(chroms_from_chrom_sizes) == set(["chr1", "chr2"]) def test_remove_columns(): """Test removal of columns from the file Example run: pairtools select True --remove-columns sam1,sam2 tests/data/mock.pairsam """ mock_pairs_path = os.path.join(testdir, "data", "mock.pairsam") try: result = subprocess.check_output( [ "python", "-m", "pairtools", "select", "True", "--remove-columns", "sam1,sam2", mock_pairs_path, ], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # check if the columns are removed properly: pairsam_header = [l.strip() for l in result.split("\n") if l.startswith("#")] for l in pairsam_header: if l.startswith("#columns:"): line = l.strip() assert ( line == "#columns: readID chrom1 pos1 chrom2 pos2 strand1 strand2 pair_type" ) # check that the pairs got assigned properly for l in result.split("\n"): if l.startswith("#") or not l: continue assert len(l.split(pairsam_format.PAIRSAM_SEP)) == 8 pairtools-1.1.3/tests/test_sort.py000066400000000000000000000037751474715105500173170ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import subprocess import pytest testdir = os.path.dirname(os.path.realpath(__file__)) def test_mock_pairsam(): mock_pairsam_path = os.path.join(testdir, "data", "mock.pairsam") try: result = subprocess.check_output( ["python", "-m", "pairtools", "sort", mock_pairsam_path], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e # Check that the only changes strings are a @PG record of a SAM header, # the "#sorted" entry and chromosomes pairsam_header = [ l.strip() for l in open(mock_pairsam_path, "r") if l.startswith("#") ] output_header = [l.strip() for l in result.split("\n") if l.startswith("#")] print(output_header) print(pairsam_header) for l in output_header: if not any([l in l2 for l2 in pairsam_header]): assert ( l.startswith("#samheader: @PG") or l.startswith("#sorted") or l.startswith("#chromosomes") ) pairsam_body = [ l.strip() for l in open(mock_pairsam_path, "r") if not l.startswith("#") and l.strip() ] output_body = [ l.strip() for l in result.split("\n") if not l.startswith("#") and l.strip() ] # check that all pairsam entries survived sorting: assert len(pairsam_body) == len(output_body) # check the sorting order of the output: prev_pair = None for l in output_body: cur_pair = l.split("\t")[1:8] if prev_pair is not None: assert cur_pair[0] >= prev_pair[0] if cur_pair[0] == prev_pair[0]: assert cur_pair[2] >= prev_pair[2] if cur_pair[2] == prev_pair[2]: assert int(cur_pair[1]) >= int(prev_pair[1]) if int(cur_pair[1]) == int(prev_pair[1]): assert int(cur_pair[3]) >= int(prev_pair[3]) prev_pair = cur_pair pairtools-1.1.3/tests/test_split.py000066400000000000000000000051051474715105500174500ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import subprocess import pytest import tempfile testdir = os.path.dirname(os.path.realpath(__file__)) mock_pairsam_path = os.path.join(testdir, "data", "mock.pairsam") tmpdir = tempfile.TemporaryDirectory() tmpdir_name = tmpdir.name pairs_path = os.path.join(tmpdir_name, "out.pairs") sam_path = os.path.join(tmpdir_name, "out.sam") @pytest.fixture def setup_split(): try: subprocess.check_output( [ "python", "-m", "pairtools", "split", mock_pairsam_path, "--output-pairs", pairs_path, "--output-sam", sam_path, ], ) except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e def test_split(setup_split): pairsam_lines = [l.strip() for l in open(mock_pairsam_path, "r") if l.strip()] pairs_lines = [l.strip() for l in open(pairs_path, "r") if l.strip()] sam_lines = [l.strip() for l in open(sam_path, "r") if l.strip()] # check that all entries survived splitting: n_pairsam = len([l for l in pairsam_lines if not l.startswith("#")]) n_pairs = len([l for l in pairs_lines if not l.startswith("#")]) n_sam = len([l for l in sam_lines if not l.startswith("@")]) // 2 assert n_pairsam == n_pairs assert n_pairsam == n_sam # check that the header survived splitting: pairsam_header = [ l.strip() for l in open(mock_pairsam_path, "r") if l.strip() and l.startswith("#") ] pairs_header = [ l.strip() for l in open(pairs_path, "r") if l.strip() and l.startswith("#") ] sam_header = [ l.strip() for l in open(sam_path, "r") if l.strip() and l.startswith("@") ] assert all( any(l in l2 for l2 in pairsam_header) for l in sam_header if not l.startswith("@PG") ) assert all( l in pairsam_header for l in pairs_header if (not (l.startswith("#columns") or l.startswith("#samheader"))) ) columns_pairsam = [l for l in pairsam_header if l.startswith("#columns")][ 0 ].split()[1:] columns_pairs = [l for l in pairs_header if l.startswith("#columns")][0].split()[1:] assert ( ("sam1" in columns_pairsam) and ("sam2" in columns_pairsam) and ("sam1" not in columns_pairs) and ("sam2" not in columns_pairs) ) assert [c for c in columns_pairsam if c != "sam1" and c != "sam2"] == columns_pairs tmpdir.cleanup() pairtools-1.1.3/tests/test_stats.py000066400000000000000000000147561474715105500174670ustar00rootroot00000000000000# -*- coding: utf-8 -*- import os import sys import subprocess import numpy as np import yaml import pytest testdir = os.path.dirname(os.path.realpath(__file__)) def test_mock_pairsam(): mock_pairsam_path = os.path.join(testdir, "data", "mock.4stats.pairs") try: result = subprocess.check_output( ["python", "-m", "pairtools", "stats", "--yaml", mock_pairsam_path], ).decode("ascii") except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e stats = yaml.safe_load(result) # for k in stats["no_filter"]: # try: # stats["no_filter"][k] = int(stats["no_filter"][k]) # except (ValueError, TypeError): # stats["no_filter"][k] = float(stats["no_filter"][k]) assert stats["no_filter"]["total"] == 9 assert stats["no_filter"]["total_single_sided_mapped"] == 2 assert stats["no_filter"]["total_mapped"] == 6 assert stats["no_filter"]["total_dups"] == 1 assert stats["no_filter"]["cis"] == 3 assert stats["no_filter"]["trans"] == 2 assert stats["no_filter"]["pair_types"]["UU"] == 4 assert stats["no_filter"]["pair_types"]["NU"] == 1 assert stats["no_filter"]["pair_types"]["WW"] == 1 assert stats["no_filter"]["pair_types"]["UR"] == 1 assert stats["no_filter"]["pair_types"]["MU"] == 1 assert stats["no_filter"]["pair_types"]["DD"] == 1 assert stats["no_filter"]["chrom_freq"]["chr1/chr2"] == 1 assert stats["no_filter"]["chrom_freq"]["chr1/chr1"] == 3 assert stats["no_filter"]["chrom_freq"]["chr2/chr3"] == 1 for orientation in ("++", "+-", "-+", "--"): s = stats["no_filter"]["dist_freq"][orientation] for k, val in s.items(): if orientation == "++" and k in [1, 2, 42]: assert s[k] == 1 else: assert s[k] == 0 assert stats["no_filter"]["summary"]["frac_cis"] == 0.6 assert stats["no_filter"]["summary"]["frac_cis_1kb+"] == 0 assert stats["no_filter"]["summary"]["frac_cis_2kb+"] == 0 assert stats["no_filter"]["summary"]["frac_cis_4kb+"] == 0 assert stats["no_filter"]["summary"]["frac_cis_10kb+"] == 0 assert stats["no_filter"]["summary"]["frac_cis_20kb+"] == 0 assert stats["no_filter"]["summary"]["frac_cis_40kb+"] == 0 assert np.isclose(stats["no_filter"]["summary"]["frac_dups"], 1 / 6) def test_merge_stats(): mock_pairsam_path = os.path.join(testdir, "data", "mock.4stats.pairs") try: subprocess.check_output( [ "python", "-m", "pairtools", "stats", "--with-chromsizes", mock_pairsam_path, "--output", "mock.stats", ], ) subprocess.check_output( [ "python", "-m", "pairtools", "stats", "--no-chromsizes", mock_pairsam_path, "--output", "mock.no_chromsizes.stats", ], ) subprocess.check_output( [ "python", "-m", "pairtools", "stats", "mock.stats", "mock.stats", "--merge", "--output", "mock.merged_chromsizes.stats", ], ) subprocess.check_output( [ "python", "-m", "pairtools", "stats", "mock.stats", "mock.no_chromsizes.stats", "--merge", "--output", "mock.merged_mixed.stats", ], ) subprocess.check_output( [ "python", "-m", "pairtools", "stats", "mock.no_chromsizes.stats", "mock.no_chromsizes.stats", "--merge", "--output", "mock.merged_no_chromsizes.stats", ], ) except subprocess.CalledProcessError as e: print(e.output) print(sys.exc_info()) raise e from pairtools.lib.stats import PairCounter @pytest.fixture def pair_counter(): counter = PairCounter(filters={"f1": "filter1", "f2": "filter2"}) counter._dist_bins = np.array([1, 1000, 10000, 100000, 1000000]) # Populate the counter with some sample data counter._stat["f1"]["dist_freq"] = { "++": {1: 80, 1000: 80, 10000: 91, 100000: 95}, "--": {1: 100, 1000: 100, 10000: 100, 100000: 100}, "-+": {1: 100, 1000: 100, 10000: 100, 100000: 100}, "+-": {1: 120, 1000: 120, 10000: 109, 100000: 105}, } counter._stat["f2"]["dist_freq"] = { "++": {1: 200, 1000: 180, 10000: 160, 100000: 140}, "--": {1: 220, 1000: 190, 10000: 170, 100000: 150}, "-+": {1: 210, 1000: 185, 10000: 165, 100000: 145}, "+-": {1: 230, 1000: 195, 10000: 175, 100000: 155}, } return counter def test_find_dist_freq_convergence_distance(pair_counter): result = pair_counter.find_dist_freq_convergence_distance(0.1) assert "f1" in result assert "f2" in result f1_result = result["f1"] assert "convergence_dist" in f1_result assert "strands_w_max_convergence_dist" in f1_result assert "convergence_rel_diff_threshold" in f1_result assert "n_cis_pairs_below_convergence_dist" in f1_result assert "n_cis_pairs_below_convergence_dist_all_strands" in f1_result assert "n_cis_pairs_above_convergence_dist_all_strands" in f1_result assert "frac_cis_in_cis_below_convergence_dist" in f1_result assert "frac_cis_in_cis_below_convergence_dist_all_strands" in f1_result assert "frac_cis_in_cis_above_convergence_dist_all_strands" in f1_result assert "frac_total_mapped_in_cis_below_convergence_dist" in f1_result assert "frac_total_mapped_in_cis_below_convergence_dist_all_strands" in f1_result assert "frac_total_mapped_in_cis_above_convergence_dist_all_strands" in f1_result assert f1_result["convergence_rel_diff_threshold"] == 0.1 assert f1_result["convergence_dist"] == 10000 assert f1_result["strands_w_max_convergence_dist"] == "++" # f2_result = result["f2"] # assert "convergence_dist" in f2_result # assert "strands_w_max_convergence_dist" in f2_result # assert "convergence_rel_diff_threshold" in f2_result # Add more assertions for f2_result as needed