pax_global_header00006660000000000000000000000064140630534130014511gustar00rootroot0000000000000052 comment=d9a2447e47149b6be9cda2f4c4810adaf0fce114 svim-2.0.0/000077500000000000000000000000001406305341300124665ustar00rootroot00000000000000svim-2.0.0/.gitignore000066400000000000000000000020251406305341300144550ustar00rootroot00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python env/ build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ *.egg-info/ .installed.cfg *.egg # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *,cover .hypothesis/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # IPython Notebook .ipynb_checkpoints # pyenv .python-version # celery beat schedule file celerybeat-schedule # dotenv .env # virtualenv venv/ ENV/ # Spyder project settings .spyderproject # Rope project settings .ropeproject svim-2.0.0/.travis.yml000066400000000000000000000003001406305341300145700ustar00rootroot00000000000000language: python python: - "3.6" - "3.7" - "3.8" - "3.9" install: - pip install pysam numpy scipy matplotlib edlib pyspoa py-cpuinfo script: - python3 -m unittest discover -s src/ svim-2.0.0/LICENSE000066400000000000000000001045141406305341300135000ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 3, 29 June 2007 Copyright (C) 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The GNU General Public License is a free, copyleft license for software and other kinds of works. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others. For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it. For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions. Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users. Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS 0. Definitions. "This License" refers to version 3 of the GNU General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code. The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions. All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law. No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies. You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. 5. Conveying Modified Source Versions. You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: a) The work must carry prominent notices stating that you modified it, and giving a relevant date. b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms. You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge. c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either (1) a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms. "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or d) Limiting the use for publicity purposes of names of licensors or authors of the material; or e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination. You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies. You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients. Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents. A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom. If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Use with the GNU Affero General Public License. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such. 14. Revised Versions of this License. The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty. THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16. If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode: Copyright (C) This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an "about box". You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see . The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . svim-2.0.0/README.rst000066400000000000000000000154211406305341300141600ustar00rootroot00000000000000SVIM - Structural variant identification using long reads ========================================================= .. image:: https://img.shields.io/pypi/v/svim?style=flat :target: https://pypi.org/project/svim/ .. image:: https://img.shields.io/conda/vn/bioconda/svim?style=flat :target: https://anaconda.org/bioconda/svim .. image:: https://img.shields.io/conda/dn/bioconda/svim?label=bioconda%20downloads&style=flat :target: https://anaconda.org/bioconda/svim .. image:: https://img.shields.io/badge/published%20in-Bioinformatics-blue.svg :target: https://doi.org/10.1093/bioinformatics/btz041 SVIM (pronounced *SWIM*) is a structural variant caller for long sequencing reads. It is able to detect and classify the following six classes of structural variation: deletions, insertions, inversions, tandem duplications, interspersed duplications and translocations. SVIM also estimates the genotypes of deletions, insertions, inversions and interspersed duplications. Unlike other methods, SVIM integrates information from across the genome to precisely distinguish similar events, such as tandem and interspersed duplications and simple insertions. In our experiments on simulated data and real datasets from PacBio and Nanopore sequencing machines, SVIM reached consistently better results than competing methods. **Note!** To analyze haploid or diploid genome assemblies or contigs, please use our other method `SVIM-asm `_. Background on Structural Variants and Long Reads ------------------------------------------------ .. image:: https://raw.githubusercontent.com/eldariont/svim/master/docs/SVclasses.png :align: center Structural variants (SVs) are typically defined as genomic variants larger than 50bps (e.g. deletions, duplications, inversions). Studies have shown that they affect more bases in an average genome than SNPs or small Indels. Consequently, they have a large impact on genes and regulatory regions. This is reflected in the large number of genetic disorders and other disease that are associated to SVs. Common sequencing technologies by providers such as Illumina generate short reads with high accuracy. However, they exhibit weaknesses in repeat and low-complexity regions where SVs are particularly common. Single molecule long-read sequencing technologies from Pacific Biotechnologies and Oxford Nanopore produce reads with error rates of up to 15% but with lengths of several kbps. The high read lengths enable them to cover entire repeats and SVs which facilitates SV detection. Installation ------------ .. code-block:: bash #Install via conda into a new environment (recommended): installs all dependencies including read alignment dependencies conda create -n svim_env --channel bioconda svim #Install via conda into existing (active) environment: installs all dependencies including read alignment dependencies conda install --channel bioconda svim #Install via pip (requires Python 3.6.* or newer): installs all dependencies except those necessary for read alignment (ngmlr, minimap2, samtools) pip install svim #Install from github (requires Python 3.6.* or newer): installs all dependencies except those necessary for read alignment (ngmlr, minimap2, samtools) git clone https://github.com/eldariont/svim.git cd svim pip install . Dependencies ------------ - *edlib* for edit distance computation - *matplotlib>=3.3.0* for plotting - *numpy* and *scipy* for hierarchical clustering - *pysam* (>=0.15.2) for SAM/BAM file processing - *pyspoa* (>=0.0.6) for consensus sequence computation - *py-cpuinfo* (>=7.0.0) for CPU info retrieval (checking for SIMD capabilities) Changelog --------- - **v2.0.0**: adds consensus sequence computation for insertions, improves clustering step (considers sequence similarity when clustering insertions and prevents signatures from same read to be clustered together), outputs sequence alleles for all SV types except BNDs and DUPs by default, updates default parameters, bugfixes - **v1.4.2**: fixes invalid start coordinates in VCF output, issues warning for invalid characters in contig names - **v1.4.1**: improves clustering of translocation breakpoints (BNDs), improves --all_bnds mode, bugfixes - **v1.4.0**: fixes and improves clustering of insertions, adds option --all_bnds to output all SV classes in breakend notation, updates default value of --partition_max_distance to avoid very large partitions, bugfixes - **v1.3.1**: small changes to partitioning and clustering algorithm, adds two new command-line options to output duplications as INS records in VCF, removes limit on number of supplementary alignments, removes q5 filter, bugfixes - **v1.3.0**: improves BND detection, adds INFO:ZMWS tag with number of supporting PacBio wells, adds sequence alleles for INS, adds FORMAT:CN tag for tandem duplications, bugfixes - **v1.2.0**: adds 3 more VCF output options: output sequence instead of symbolic alleles in VCF, output names of supporting reads, output insertion sequences of supporting reads - **v1.1.0**: outputs BNDs in VCF, detects large tandem duplications, allows skipping genotyping, makes VCF output more flexible, adds genotype scatter plot - **v1.0.0**: adds genotyping of deletions, inversions, insertions and interspersed duplications, produces plots of SV length distribution, improves help descriptions - **v0.5.0**: replaces graph-based clustering with hierarchical clustering, modifies scoring function, improves partitioning prior to clustering, improves calling from coordinate-sorted SAM/BAM files, improves VCF output - **v0.4.4**: includes exception message into log files, bug fixes, adds tests and sets up Travis - **v0.4.3**: adds support for coordinate-sorted SAM/BAM files, improves VCF output and increases compatibility with IGV and truvari, bug fixes Input ----- SVIM analyzes long reads given as a FASTA/FASTQ file (uncompressed or gzipped) or a file list. Alternatively, it can analyze an alignment file in BAM format. SVIM has been successfully tested on PacBio CLR, PacBio CCS (HiFi) and Oxford Nanopore data. It has been tested on alignment files produced by the read aligners `minimap2 `_, `pbmm2 `_ and `NGMLR `_. Output ------ SVIM's main output file called `variants.vcf` is placed into the given working directory. Usage ---------------------- Please see our `wiki `_. Contact ------- If you experience problems or have suggestions please create an issue or a pull request or contact heller_d@molgen.mpg.de. Citation --------- Feel free to read and cite our paper in Bioinformatics: https://doi.org/10.1093/bioinformatics/btz041 License ------- The project is licensed under the GNU General Public License. svim-2.0.0/docs/000077500000000000000000000000001406305341300134165ustar00rootroot00000000000000svim-2.0.0/docs/SVclasses.png000066400000000000000000004160351406305341300160430ustar00rootroot00000000000000PNG  IHDR a pHYs.#.#x?vtEXtSoftwarewww.inkscape.org< IDATxweU449JEE%(`DQ  uqT1(*((#(DAr sNM7]뢨wUXof쾬Ts>Q$I4nf5:\k,SRҧ5J$I~/u=Zd2x7 ׁJMv"$I$IB~\O>أH,;"$I$IiM41lͤ$I$IRLB:X V:b3w9b6DJ$If,M{Ia]ٗ$I$M wII$I MB_MdpgDb'U5p>E$IC$Iږ>\ r$I$IjMDX'I$I4LBwe{>p.WIg'p "G$I$>{`-D3V$I$M5vzI$I4LB:u؃Lu+bI8xl"I$IҴ`9_$v4u< ѕ.E$IVW`-$I$i trΚ`-G{hE _N+֩17C8_^B$I$I@orΪۀ/I M^B$I$Iv2pa> r$pvṧI$ISPn`~-~ 40VDbܣXd"MYf-Cy`)I$IңOv2tA4v-Ki!Jän2`N$IZ`ӟeJ$I4sVB&3BT#I$I G.Wk$I$IPH$I$IT+;HmDGn=1-f#I$IJOUH$I$I8Z^$I$IpSjm<ՕL^{Tx^$I$I9༭-Z^$I$IԆ_3xS $I$Imv>l?WVmqp3p ppOJlSpޛZ^d5 8x]Kx,Zey}1 Q H5 Mky#p%p)@8ͬlǒC:dA3xKqy`)`p=p_ $I0xX?\Hܫ_CtɾX܃ {KfM6'd+K{@"N96cc%ˈ8ۢ~,tueY͒USxc˝A\{ KT+,]`I,,'r"I$iYؔSy?rq'qrcs? |xLyO%39I>nj(h?'%'7MlhsDZXy~xF/%ѺDi $ZuOk^y,y/$I2tv%nW0iF7r_(8{1a"W7Y5}Ϥ}{Wod6b ­ng:6! RǮ+c"e"ߧs߈?*<7kU`gD߁ci/v"V/vDRt~ P~$I$M[ r}DLnn/Lb7G8ٶDdgNN&^b6<ؒ(șxCDaןIV^Kt%8($=曼mC<%# 'O!"y(pz[.RB$Ld_"If̒Xs5 w'-ĭScgk$[^,;שb`sO!bm@NSv Swubm݀Eak*nC4~*q<؎[Sc<Px= 6؇moQ$IYDbP=W^ d uCr{Z\DrAf=ipMk{OﵑN,Ǜw)"Ptur?7U5t;e '&iD"CuLt,'M+b?^ڮB.IO$Izm}r:c h6!;]kCt+r]6sis\7MgI"U_wM{;F4ZZ(B_s v5Yus8.|Lbw- XD[ pߎ(zmϙ_$Ih/{'Jep G0w]G{Iwb5R=>8°>l ,N݊, ((:(kJ:F='ޚN}F4o#&6!XXD| f-51_c2U$IV9"ws_¹MB:.,sW(N}A5QӏBw7q oF΅t~\Lv^Qny"PԵLY!I$Iꇪt3:M;ݴu j!ݲDbZ|f<EcM$Ht:Кǚ 5?|{d"U &>UنA-{,`[U>Pn.I$IDC6'¹l^Bדo>q|5bf<{j.HSrfbU FƏg; f?L_tO$bsm<7q5\oBUwv:S$IM4w39l@ VH]r=CvUGJ4_X82cMBDW;^Z!FE-\4]$IRniGǕ7IMz_ܷhU b!s+qdz Lyg7RHwPLtoBĎpc{&T)ۀv]O#I$IYع4dMȣdޗ54gSnѨa-,9cw2o) shXg.` J! zܹMS֣n66RH9Jx1 J$iYHMK ]z9hhΦ Z!RDAfM8צw.nzzUH7Zr""Zj5=B=v?-$I4Ht!5ۺwء21w5htTXSq1;:?&ikѻoXknk=Hk9B.~/B'm]Ӆw$I$A܇^Z"O |MBh7?f!}ڟ祓$}{b^~U-[ZofT/Ϋs<<3,[vX Wsݐ/ۊᲭ n)II`iTz1;6t-E 44_SrJC/+̱ qC۹ +Um:ҭԃ8\XBћk\$I$![Hwu*ǽ ]R~90Htk]mL uQH5_lXqB=ksW!rÏ{e1SH.k\#I$iz.Z/mrU(1Hthlse g M<7<yEz]ykBQv' T5WtHm5mJ5OJ$i qJCR- V`d 龞\eW~L?o'PFtm NLBYozpݎC]UZHw#}G$Iڐ-q%JCX@ ׄA)%#zN34뾀(]]c}1B=nG]K n<@t$INދOb <\MB^6jzo? f<'~b'c!CirtW\?O qW*s.'zn#W +IIfta=Z(&:GuFS(xBMe&_\s4 f7mwYl ^F-ϱ6ysXxM$IlC!6⽩bU`͵ _0"S*f_S- O؟{ƃmZxzsK\$I$Mw4_ "GMS*ĸ"\ӥBg_/#`M8:ˉ&fcC̺(#KV\߮*>mѿhѻ<Ձz4$I`twO&1yDWf.kO^<º;"Ib J@t֞v$nxʾV'.B'\SN'g1zQW$I$iP\ Iru++[DT֛j4^6|B{ӀcV0DqYWb!pp(Q@`V'>J. f"*y6Ւ,n$bӧN~/^B|Jkug^@|EdɒZʞT^Wž$I6Sb,bGgO0lj©{+<"h1eײ L7h>z:zR1:Kw'oݻp1* `Ĺ1~XsӬZy`'Q/l`aLrkhZ;"F?Dہ}@|ɫuOĴg5"s.mפXDlp]͋J$i0Hn]\PeC}Cr{zk":9w,q#v"yӍ3 Ef_w)1cICtsb罇c.fqprݧ3[P\vk9D/Z ?Btqj}+5CV:kS/#I$IMZNC-D2GxDkqk]K!\eWa~Sq*z5pD/><1 qsF_۳h@U;c\Ln1]7aYf'c=&1W6h)6g+}RZN8pt^Yu+'E$IQ󈝼0)޿\]8O^:sumcw&봠إyo%qڀ\]~ !:,?s\fHq3q>^Ls^xv$v\G湃핎+ϻ9._puű/& bہ7c?@sI$M"RąD,οK d02tOJiH6Hp-iᚗ%m-K$gdeEK}H#InomU / htR!n6Պ鞚I$IjKBuݛEdP?~[s j!ݿ(k$c{6j(|,QKQxc-E.ʛwHnyN3㿰plWHb=v'e݁+eB&9QRD dwqZHw%ѼLZv"I$i[+soqqYN\ _3 Y Rc& b!""Q54 1qGDb(k3(${OJ [w'j!ݹt.6moX?y-$IdCykQ+*1- ~HXHArMC]ƜAΙl2ى9B}f:|IvJ!2XWT3s=HncU)`ÈjN!I$ImZHw5.) s}ʅqW\40Wv`uF>)Φ<+5{1 v9с{;"nV*Y \ r;wj!}Up^fA-[|D3EFZ71GB[ ײ:#^9$I$M}o#w_qw'Lzf=j\S&G-TMXHWh'8!1{c?3l.!1Òx]byDDBӽpbgdKOd5D,UNm*['1dOMEiRHwePZPq20 O[n䚆׻'Xŵ?17P~V!]V:yޞwe%Nv!hTB+^K& =Dt:$IAPn1mW֘kD̏mB(_Jγ_e{M$ O"WMtIOk<1$I$5);954^VUq|'SDǙDrk?f IDATjR&>s J|pX6q\ul8V &!WX6<0rp8$w<%̭1$I$ӭ[ԜxY=uo&/g^@3K]#xr&$Wn|,"RDwy=+%mCb,rŴ$I$ilptlLȼ(;أH:y#E*%/ݹow`sQ^B- <\9q.X.` :QټM6 b,$I fO&:QO$d:260TUn^eo3sqs\bsՑy^.n`LKִuğ.^YDy($IH<{Anƿs 9Zg%?(qj_t[9ԋ&b9#w'mOb73mWxI$MQM=ؕq"p' W-ʫ.ue:f7*rAd:e$I4(\B@e-$=O, 7 {=Ɯ 23RrɕOXrtjj78I$Ie/ܑ8ף]Sݯ?`.GXVs9B"g 54{yQIl18?xK=GHU5K$I6Htx\Ѵ&:zg~@R,6]t&|}Un\soFx25,V8,rn^TxrD!sLBDAQI$IlhNNUwcm3@44kwIJ~N$T<Iڈn"|Iڈqd:DbqA^$I4M;Md&pmO6R kw,m|?I$i..}_^Id6!Ezh*|]ƶMGؠ?'F6M䝦;&uI$IR$|;؍fv'&U=w7%]ι B0B$IRdǖoe) aaX #s^xw֚`s |X؃KżK_f`LXZq`ci$Ij ث/N"w*x B: N27Hw'PE=7G'gVb7d ZXÆia $I6npUTs.e'^JmE{ l"G*<[͎ܓD҅oI$InMVVў<Ry'b kXa:."ekcU"`'b6l 1y-!K[ \$IfnI>lmYl&sМSLJ7i]D`k[δ5Q[f Y^ආ$I$4UHtr^/Z V^؞fCfk&>Qw~ݒmQY uvkĹ[$I4j҆9GY!Įtv|g#_x \˘` l <({-\sF n#W+y~AX4_#: 8j :<`ab&>kVL Tob)H25=HįJ<ح_ ʢM\A}lp:ee =q{'k=/0&I$5`&U_WkĹbd0H|π λN@۬ϛ׌LMɽ(y &v8رnK`rA31K$I l8sm8w0 MtxX Xx`ܫqp+'[ sjنs2ط\.~|x>zA]OHrYא I$Iv` 6q<5{!s7fM^ |X'z>>BB$VHޫM4g7'_^n$I4HNH~2ECq1_lݯJ6-~Ti5K2lCh:I^D/] >Ǔo4p(㖭1g]'xo~Uy5$IZ5(Ui&5vYڲ[U#e_"fgcVkE B{ [E36XAxDfhs7"`sXDrٛ` "}۽S$ITͲwA$F|s?KL񲫉", o#rkw=QPWZvOp1HYx1]<2>xL }cqCNT#؂H8$I$ ˉ' U"Ɛ`gMMҝ_q xtܧc5߁(Dx~]’\mЬ E|$.ՉJ [ܳM'Yk_nUK$Iꉙn8['ݒ?Jwӻ +VD"u9h!d&Dw0pduV\"h5͉@knbz^zDVz*[ʋ!:+}\$IFtc~ZaJe& 뉤8GOU3j׭)oQhU1|W7Y؁H9 M;\|~&Q| smFe~|\$I4<"'c)DcRN1ƅ-&r45 xk`E/KfDTK^K4: X?Sе)q&mrI#o#2N.io93D'Չ(MD]G|f$Iz>A\J |99DHQx2˦$s5ۉx:O7 Kj=.r ~X=^AI5rH$Id#_|fYb PgטGeƺ;4A v__qDaWI'g%.+nw;e@'I$ ?%x /9ȱ^$1;kդRq:e<Z9c?.9']MBtʾߠ{wDVvW%piro爵#vx۾\'&96yV\Lta;θ&>fq;991~DJ$I&O Y2 \v!\ebU%]d,od'y;e%q@,3rcC$5eƮ8n"%qѥ|ZDR̯tI$ISWvt.^\\aܺ_S8Q5iQ䞣}c{՚|KלoM碍wOkϮ@|'"إT+%}cݬ<r /&y[b3f@wJ{q<8^ $v!8?1~"W%櫲$I&"ˁguskr?wlSvK̵e͹4 $ 8ܿ&q;".NtʎSbmmF@ňՉ]ҲCCnt.ՙcQw~.sYH^yx3>YB:I$I*d!][W64_,C1 7"HNB!FwmY_:gJ!te Mp `!$I >lȗc\u7B>9^ψgc\*MF$B>n%D4?@cfo.[⵴q5t$ITx3]|V9R_&|6bQg 6 U,<~b'&M=G_'hPg< >c[V||`~53\xr&WTu;K+QOJ$Ib[AR돉Ax1ptcI v%:eߺzp.;]D2P I$IZb$mFMΦZ[bWz<cU.tw m\ڇKO1#I$Iهr  ,WxSss}d /-CNMVMXܹ?01anI$I귇y^JUZHg==hԖ뉘-1  Xԧo"vW/g`;7^<޲!$I&O?mi  1Ϫ8_iPF@F]Fte~ p  |̵&oz0WŮ]b:"C4$@||oIA:I$IR5ݸ 68fi;)@xy^]& XGtknjۧgbqWJo%fI$I xV滚ȋi"k`s'sSuwpR1 vv?sAvar7 TӹREtx*#~8G (@S$IB!b/?n7cwr:*Vh1!/눎hޯ߅-Ʊ߮%ޮdI"lhcNu,8MLv;b< 89$I$I-(hD"E[ed IDATEtP/eك":1$"ip̱NMKv"p`8M؇hv^x5ϔ$I`+io!'XS\\ByI %?h/6MlIo/I$Ij77~cq9G64ַYkbrvbDqX|uw[ 8ٽj25.DG}%݌sTƒ4,iol`;y`M=g#""&yFb44爥kK;UO ̗ɋjI$iMTr D*+6?8qwxnkhmc C۹?7?sA%.< Eugkw^DbZ3O$I ]AdlIv{Ľs}xC]L;W|APܿ{nr9 ܉⫥!>Mď^ lLyb/=#fxb+Sw Xs(μ?:{sqCv5"15څ~N| +%XD츴xv>ŵ__&bn(<$I~"eDL<"rp8҆wTKwr^C.P J!TIDd?\IF;z"l[r`X +]мuXhN*`+Oļ~G\ "4g |x +1FΝIq?.mx#N$rWK|QG[CI$Yir ږ D]D쳈{ˉws5X:D*DDDĖSRS$'7z{n:9k3= q-^"s-:k\Dx%ux=E\$I$iM4ڒg_~""1uN&K4-CmAhVVV$ uN0.$`5 D|pu$vq6p}w3ѱX<2dP-K6'+奉XܽwD;JI$IZfȼu[^+qnV bu [mD\k'lЬGĈ6eIs乎ٯJ$I$ E$I$Iޙ FxىZ$I$ITAt=D'I$I4eگJ$I$I5} :I$IlF"`~-P$I$I=^F'I$I$1FRo%I$IuE$I$Ie_$I$Izv{j`>O$I$w9(a(I$I$j<:%I$I{!8I$IKw:I$I$>Lx}[$I$ITz":ǿ.fk$I$Im"$I$I4.s.`~-P$I$Ii?7zm'I$IԖg=(򵾭N$I$Iꭗ=^N$I$Igp_3@I$IN}[$I$I[s`N$I$Ito$¾N$I$%9(澭N$I$ÍtڷI$I$IG4I$I|AӁ}[$I$I[?s-M$I$I%t ۷I$I$dn&,ַI$I$Ie$I$I~OBoi$I$I"_$I$IeݿI$I$Im,f쾭N$I$ ^@ +vDmI$I$i:h-$I$IR77h-$I$I$I$I$I$I$I$I$I$Iw\Ul˦R! T"tP *EQA@QRC4$ ۖg.33s_39;{|ϑ$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$I$%F*$I$I$Ǩ6;1h<XcIc $I$Ic!TqT}`iY$%[00|111 !$IoB$hGB$*r%+5yb|1kccIգ0']E׋%[%S%IR!) j4@ P lFL0hQ*a'I$I$'F caw+uI$IT!HihCȯx\`*"$ m+ˀ;W3_6W$I$IhĆĺ.M#V6&y I$I$&+G1 O-k`:=l`y`JI$IT1Bi~a|8ȵ5%n|U;%I$I# mgS l 4qٕPY7`r$I$IRNoP1an9b Oe0Vke׈㋁Zh9VYI$I K[q0`R%B٨㏰v$I$IzE8w3J*1XN$I$I=`2aqȽ݇v.jLw]D'I$IbPB>$9_+lE)$I$IsC[s e)'$I$IɽTOq11I* m[nw ICc&R I`17juU$ <6طYa@M!I$IH_"L f,SҔGԘ؜$I$IRR4v{+}/`)B:$6<+'u;F ;9nz +4l̈ug6O]mqT"I(KghHICM!I$I$Y #;&&I$I{-.h`rpLLb.cs!{2ߘSIJt:wᮻ`$)ڠ&C=vC#JKݔB:m;V2H@$I$I X昘$I$Iz?=F䘘$(m5vsvҧ9e=Fҭ1~<\q O&I׷ƌƱu⎣nHJ!]x.mD9,Eq$I$IR9,fFcL,OD-I$I$݈e^ cbVTHRӖfo}_c&8On9#=mY9hhN3aD-]o;ֿ/GG<pT]:x'&{; < AT[u:[soxmI$I$Ur{"m 4v:(aK^EyqFm$I$IԳ r|bƠưMG i`Xccr,W7x]IR{7??dOy,$}r$%S:6 .N: &M{\.Ik{M xxlV8ݡZ$np3|0ski_yh`1s886sydw,$I$I 1&V1Ǘ+tF,$I$I 1&H{^6bߖ060G@s;عsYtTvl3Ƀ|?*[cmad8S^5x$ʿ^3o'ū&5moo躈.uxϓD I$I$)_*vLliQxp"`:anI$I$6FD;&a<t]DJ¸vEf$( )n>͉mGLҭ1~<̘?N2IJ{ҵ[_~cFqtKJ!D" td%I$I$1u6ŌF?O+kOͼd_ <\EW/Ӏubs'Dq\$I$Iկh퓁%)[&BOZީ8a ,~ "zϳV#}1ֈ%IJ9}o>)/G|K(D*ncO3JKRt-FM*N`Cx7yyEg!]B1ٙ9.4V~a6*܏fp5/`v>fe}4zDI$I$g\ ofXgi\lZ2\ne,E\$I$Im"z6<|m\Oқr=' k``svVe,:/ܨ{*#tӟ+"~%I$I4p-q}h;pLLV[٦)`Fs~ e2+g! SWÈ忞(ǖt4ղsn۱!e%l:p? >x n ;{ %_$I$I=_<=`;`w"XE$I$IRZ[Sc4Go q$%TR3F w؝[xxde/.*#]G \ӧWڒRuk2w'p˨{ 80Aha)K%ň;vQ!ܠH$I `CT(8\n)uts!ҟ;H`CH=W Hphny1xr|ŽuKcc#u}Hا|7pԘ߮M)``c#cڹeCYgx;suˡ)lR4jk;wu&6+>Wwl%v{/L=+դR4wM^QWSC]mMR*ƖW* }c jw C3d#b06%?B$Img*SXH-wqN#aLl#8`,m8cEtcbc"WXg$ILpS@]%܇GƦ#b޻W(ǷtcbQ W`/qr[\|L8t[V7VދnʹUbSK+ZZr^x朓Z[YٞK]M , ҷ/&b@ ll PO]'O{p(n\O<.GZZ`|*XKC{fX,wR66)}À. a(@~O~Ela4o?F@K:MKk-5M:`UK{/Xݚ{G^MZ9ȥښb~2|)RE SWK*ߊkj+ߣ&EC1 ƞO]-l1x$8qϚ %mH- XyL:t5aEݼ<)u;l+K$I[O$VeoV־ 8)fq+!?ycJ$I$Ic>a׹K H}<[rw@EK~sjwjQG5bL$)kk8S1XzcdL55q;TWS^sj(`c jlgMcܑ &tMH*Jv{ rJ=Kq8)&,U̦W$I3|ӕ5v9 $,NU,J6@I$=qzqPZ \@=k/&_$++cbšp _$aᒥz뭜~qGTZ[[-r/IgcҤI,]ڝ+Qݴ}0aN`_;j^iH$I$I ~_#"Xi\w $IRyM&B%6'hbmEߟZ\yLlpk!١zիn0kIy|7cʕ7X nޛG*;Z_~ 'Ŵi~.P]H"p(p3>FXhMspI$I$e8arПnwvp p|;&&I$I8:9ExTȸCT:gÆ c?؃#kAg9O9&Я! -6V45#Rsk-%fZӅ!ji@ӭl*HtniM9ͷ=ғX|ww; 6F:䮳@m- >uu0p`p]SӰxq)’%ŽvѢPtUe %U`e{㴴@$-\[ʴp_N} }-zF}m>+>MXW_[=@*O5PCM*EC/%%ݝM˫5]t"\6hj)nU-Optt[AƖ[ӴFd:l-d݈enko+ijmn' "UaE0@ra}/Fe9]`vbu$I$IR 8#8 BA]#?01I$I$Uk=/D$] NۀP.w[U,X@۫7"E6v+9cf%"矯p2I]*;y]BqeFss3sa7Y $TLacw`S^z+-W*J'"ڿC(gF?$I$IR:[D-ߨB)$I$IJ+.VYRNk&ӭ;ǔ$㾺_?=1E#)fܑ ;/;'JʪetX}/+ \۸:SNm [9 )w#oR`?$I$IR5E77} 11I$I$%rhX6j 11IRVmm|;XD'UJN0qEtR{4oz|EtRB:Eth>Dߡ~$I$Ir }pq}:&&I$I${$GG3 11IRV2Orb)|3wqډF fφc}+$UՏ=Ck%*]HG9ڏ>U@"W@$I$IR9]L5瘘$I$IB`j[";&&I\薮n'>çG^[Hr4VK/<אTQڑnWX,PU`o`DDOK[nmfus$I$IKp2,0 K{ K@sEKLlr $I$IR<giCXjYW~@?`Eq1:5Jп$ J=ɽ \G_SPu.L(ػB;} $)%>c;\t<;))aJYHKpp=SRb-FisWf4h51^6o#} B]6toI$I$.?'\`okƎK/咔8ݹ],]]3Mϼ[2Ib9Dl \E?oVfV%׺^*À-o.pI$I$g:ǗN$8EtG$I$I}Cӄq?w=fi< q-$ f,kb66m~YEtU&It+ EaP>~|xDI$I$Iv}„| alU'I$I$:pno׵S 4{(v $UHKV5qO'4dۑn 7$%Rż%_eySsCےTH%RR`f+K$I$I='ɳSqXEXTjT`Q$I$I3g׿݀ÁG^=R.x; VzIR:rOq׌Hg/wKwudf8|X*H}:?59嶚ux(al*eӁ_h_\`eC+ml V(Binr"pa˗$I$I*]dkp/h_P`wg# cb&=O[BwIU_,kXH'f)@p8`Iq$%؆5@-p_`R$%Ϧ/dڦ-1咔<){QEڞ%$IFf/&,,IlD(YŒFR c)`d$I$I$IB:0WÁ+1g|E1ODR8m:,mkkc"Iͷi/i^/ۀxH;މ9$I]V!)k J6o5k AبQIXwIW Y!$U- )q֋;10]$)N$L.,m1b678$I$II_oWId%l228$I Jm;J!{ƜERrf;ƚDR5w/D$)o"*  %)-?[D$I8qZe,$IUnw¶mq1glb6xHJ 6,@~qYY$I$}䘘HJi/sY$I$>Y^!;}?9$IR a]IpŠPG(: ]иH$E9pwIh„aYΣn:~")ڎ@+~ъI$IJM-2_X[}wIRwIU!zڸH/$IR5؍T_7"//?C#*LR59 $IԅpCbdR70fq@cJcp_W:ԕ#WIqxHwѾJ$IRORocN 809M$y$IT-N}<ԘH* SJ$(Cw oxn9 ~.WRs>Є1rۓSN9$IR.O޷|=,ma<߂wlgbq$Idjn%,tR7x$ $I –ŜCR>wIK(yA$%,YOY$I&uIlX 4S$)ʑ\BQ1g$I(n _I%pAGD$R$&b|y$III*~ww龣q$0_~oI$Iʩ/ JX sIz,ڷl&/8 D-n%|߿9d;U9$IRy/[GRm /Z˒HRO4$I$U1|4 0<| H$bw\PR~vsIqpNA$%G'pG:IL5L -Eyv(U(I=$I$U X9} XL֍9$IR jxzh_L$)6Zb")gvy%,f؊\> n#)>J 8 P>%L I$I\4f_׫}]pA$)WV%|À+pT0&H`|!$I̦(xHJki_̥><}JꙆ>wy+\$I0I`xYިaH$U=VGĜER5o7ǜEwyu$M}Zʢ(`EvA`q∸aE DEEDY** (;(P7SF$Iys+o4>y9ϲ:;灷%H*߹{镒$l9C-nNw:j.{Kj>/In$Iq2qM8(E>BuoIZh%x`,I=y/S,zx{n ~O`&IbxvWq ,pB?ޒATݮg/$I}>*+DF?>#K$N͒ԛQ*j 8 !IXRlL௱nIs˳#$I$<"5WqݟxU$iuo&fH}#$͈E1ԛ@$o`)'`6"v/`.0&7G$I$&Nݾ Ha,p'L$i n'X}Muã=EK3x`Cr]<"Iԃ)GȒbN/*SkIa7$ITWbI;xp|>ԳWşu[$4'~6JnTuEĘIFZI;8 |ﺈ<[R#À#I$I+eX"5퀗-If2-;BRmIuJI!pq$IRZÀ#[$ok{ vb3e cN)>F5^|/E$Iۨs^"MI$ Mͳ#$x`I}26;@Rm8^H$IRx p $d\Իd*t:ItwR- OogƋL-pkwzKRcco3ڹ9$I/Z"uNMnJ7 %6`Tn$Il!6H$I$UˁCss$ )b1XlQ߫[$j\pqj$I$ݖ{~VTEsKr$IQ$l"JnB?z7-0$IR'v0ެ| X3lQԻ%eB\I$Ij62E*A9z`tr$IҐ)XIekn&Ƌk[$O?:[$4wHn$If4ؔLbzv$I$ Cg[j[$IwfGH}w!W I \\"I* C$9!Ύ$I~m`gs6gH$) 6F#$vHI KRMĺ8h͑Tvo)p9Ie|x#I$I`1pBv4$I*XS1k%X~C!w01blv \ l"Iԃe c.͑TQcrsT}H*1I$I랫q5 ?!VOn$IrFo־"|SĎ)_;p;gI1^,&&H$52(e.:X p$p7q9 X|"lo'Ƴ_$H$I@| ujv;,fH$_ LT5Ʒ X{'H*i!$I}0xCvZ tf40+;BR-t# K$IP0=M`xrfͿ $IRߵò#$FGv4yBI$IRj#I$ICKW&Hi3I$C$o-DdHCl`I08(C$T[;Taρ9kDI{ppqr$I$ aE֤?nNn$IR} A`LrH]+[b0nNr}/~ $H$5r5o&H*ۛƋ}oss$lb 0+7G$IϰnNjV֮׫/Kl$IKp+m% 0Je[`1Iru?$l;SMⅻK:{Ƌ3#Do@I9EI$IR͑$IbGȞ-7eH$|#$zR 6OnTI$Ijd;`)ąH*pfxB:IFv$I$f40Pvjcp qOO@{n$Ib8Kvؓ-URc$F'~\$DA<~OѕuT ^S`If$I:6h6B| \Lˑ$IRB$0%ERCL9ԣ ݟ9$IZIK[$dGfO/Iw<1a_H*B⋹9sbr$I0&E܍K$two4'H*'b!. ж u}Iek$ƌ3[$Izk%7GR_"6{X'7XSywC$.`I$I*ѶݯFpCk灹!$Ij.>B\`HROV l"%; XLnT́ H$x-n"o6ޔQ LN'7GRᮤ/>"I$I%vNr $I4`:I1&;@J>wvZ !I$ITP-9;ER/n&J$I -$IҀ:xOv @V/ -jck$Iw:cbvm|R?vT _NڳC$I$p-]-$IFԱ"lRzHr4^C|&%H*Qn4)I5xy JKb p^r$I$I9\;6Y=/E$ICɺC$x+;ĄIe;/6Jn$IjST,6͑T=Ƌ.`NnNtROT {b$I$I-ǽ"E+"+8K$I|C*/4T'sI}IL.MTQǀqb:I]H*7-1E$IJ KjъX37E$IC*\! " `lvg$IYpZR߬Q3mMT c>#p/nNn$Iy7W`̈́Àu[$I4ĝJfIeۀ8V Tr4NNZ[$m]`!p!09E$IJ,x8կ3ERF; "I$Iu!<1npvv$Iq-<+w? vQp?qI廐/$IRffH*ޑl\heT$Ib~LYe XḼ4E$I_jBCnU[fH*`.IH*Xp71^t_i*{;dGHò#$I$FZ?~.EN"I$_#C$o=!R?jHr̐$IuME1CjIN5^t4UĿ:RRcfM rs$I$v@܃Ylܢ|/\1I$g ݩߕ"lxqВ#I?%IRFsǁӒ[$ƹ YrKX X@8IO5^|3E$IfvE9`I$GnTÀ'wÒ[0xsAr/t&H$Ūԛa&r&OLb5E$Ih"թgruIt$I*D\#oZIxR "n$Ij`>7 Ml4iv؎8!T$I$6ǽ(e(YKlbI$%| 0;DRZ#;rI}Պ%IRڀ[?q2} x89mLnʐ0 !FdGH$IR u$ SŋMn$I^6/˓[$x\q$>Oe$I%;V_+rs$l `!x$3X8/7ERZ37G$Ij zϏ&' m[$IN \Ijdyz-Ҋ,!&"lxI|jQ̍x"Ǝ"`ܜؼ(7GRRH$IRlw "I$jc+ڜXObgpIjd qjs-$I=i%N1xyrmF}~v0K߻$5r1^1E$Ib훀-dw`,bI$n$Z[#45;@Rm8^HhT+osbIM;qI$IR8z4mbK8X <>%IDE,I,$.On `Ip(n.!Iʷ;06;BR-lȎqQ I$IMx8$^qY!$I@؍XHw'>TԳQ)R.ԳqĘIF&Osp] ,.M2'El"I= Zv$I$5C-utI$i I]5#>:xhKnT-ہ-$I࿋XynjNN7q2$=.`)Cj$I$5"ϧ&8>@kn$I4gGH =;DŪI0 =;B$_!&Ww$H*1Sx`Tnΐ4'OIWbprr$I$5 O°FZ6)Lˑ$IǮC[$m p=PŴ/I}s$IRl|X7;DRVޜ2 |"xӁˀ ;$I$}8DmT'I$ICd ` pbr]C`&;Dz'ߟ'H*tYa`I$I$I$IF^kOj)i<.I$i0+;BR-l ШLW7w"x'$IJja?mgKj`Iś|xKv$I$57C AܷЙ"I$ d$i9-I $IR ځ &7G9 XBFr%Ƌ9$ItOLK6H$I_#&NnT|9;DS#$$I}=~p[^̉TE-gk Z5JN5#I$IMTbq{b$I47o"G T:; xpvmO X IDATlq%C$I*_Q⚻ xWnM%_]6TCpSI$IH?vOlؒ-}Ĺ!$IR)vރ;MH(` y#<&H*ߗ|+I78"xd_gH`Xv$I$ s6Lnh3}|I$t#1jcrvyI84;D$I4L >JROZcpr$I$ e A$I88Iś wJTI}Ղ'IdHw!Gߚ"l[7O[$I$Gܗ 8!el <<2E$I*ÙgU[$m_bOn%.!wpp"ՍpIAu`RnM"ƋgƨONH*غTcƁ9$I M7[l%I^vT ;*0Ai` qIkDY,ITN  Hҋ9jXL, nnf'H* [$I$٭,Kn'|:;D$IIxFk`lrmKlv$IRŠ1oԓ퉓+%ڳ#$xb"$I$ip^I-+ox/jn$IT?kz LKn$I$eGH}5#BZ=#$vI$I"?^b?.Il$Ijhbg.-ʶ&41^|!EJ`.D'w\l"I7Wa/wR`Ie[t I$I*% LJn9!$IRݵ? |`5}y"pcIF&;[$rbAK\ )/͑$I!)zl7k[$IК 6/4  KnTc-$I|_ѹ9 x4qiY`xKRc-u _͑$I! `䖞pX$IR?yp=0.;DRfgT$T wn}jx'!jja+`I$Iv#ɝܲ={n$I$IekH7Hap!MvZx;p8*Iʷ#c`~n­ NMnQ 6/$I$ip|jĖeA[l"I$5 ݓ;$UC)Ӎq -ʶ 1f/E$T7H>LLnTb!+[$I$iZ8>c Q3$I4`FnI}7&;@M?fH*ަuIFVTͫ #p/J$IRek^xr3__$IV'vG:;DRځӲC4FӲ#$ƌI NНGkFڀ[6-WK/&H*$1b#+$I S䯻6p7qMx mI$iHkn'ތ(ER!Ƌ-ĂIp2Q$IϖO-x8J$IR)˯]~]P$I~ (2ERF'&ܢrMN%Mv*Icp:p-"g-ĂsSd?}ԅZ9 AbbfI$IRX ïk#6z`DI$Ic灷$H*WWeXq[zr2xqj$I$I͠dH*TŽ@{n$I$t}=zZp'I$03#$tWcSzsr NW'H$528Oݖ7G۪,%H*po/C$IT] ~| ,I$Ia7iIt`q\I["IInT퀥.ԣrsTjB$Iv.`m<К%I$i%&NKnTӁgxBs$Iu0X@ue9 ?TN͎T WeGH$I^X<9pq'8A$Io"xœk~$H*ρ]C$Izky`I,!ƌ0OUzv,px?*I$I*Y}5[$I$1!ES P_rM&Ƌ [$IzT #CTITΎ͑TƋ[$I$I_GuvVZ$I~7X%;BR-ӳ#jr~v"I$I`CL$5)q߁#[$I$IakǁY-aݟ&tI$IGo"_"x&۶)$/$IR6#6 "xmgͳCTjc|"I$I% x XHFLKǵ?n$I4u05ERvcƛ[47$%IJuqpjrMKK$R_&fGH5$I$ih/v{_@'|Xu$I$ qe!jvqՄzX=E$6ޗ#`]Tm |n1p/W$I4x^OD>ڭcH$IҠ 6/[/gH*$IZob1CxF 헛؉jbPRcec{rs$I$iHi:e`lq˽>$IZwӕԻQǁ-\ÀjcRv$IR^!VNɎP\I5 kIe;/œ%I$itt|݉캀w[$IAsNoLnTx 5 !j,`I$I 1#Ik>K)I$IZ1g 6_QたC$IóC$4`ZvER$I/bX"IIśZ[ycSVtvt8Xɖ-ol[cs;dc—UiO-Et'UՓGG aK!aMKjt0{KUg40_BUGfN'Z^p߱93Toڬf`tUeo!icgM&_16R<-m4^i2^6 'Un3`mu`!Y~t`ۯmj>]=utmtĽ^hZHY|1W'U]=AVumu`@sp`a\=: {^|뫻nYMg:#pBuŎN۽ƋnXGG GGnN^Z3U`>V/V_4 nnm#pnuTش[tr%0O>tr`ӴQȟTW nny7D`emyl6־cV[v}|bkɗU类n3!4A][{fꏪ)` @e-O(Y=tl0c'Ulyű9PM|d3{M{Su--M@/o}=;x륦AX(ts!=ipu&U?S6:M˪؋>t漩 {ƋWZ˾`!\TY nMZmޭ6foCuMuUMY״H`%) n؛Sk{fokG8W[Z2ymvT}Q6U4_7cs4Q `o6Umh$ꩃ[zOC`8icNwUWX%Tlz6rؔ6UOoz^Tur|]`'W~et0{wP}p uR{a,=BiS!C?6'<^MNֆU>;7g>?=;BӘM[֫+FwdCئ vV ixo)G1kyU]4^|喻~D<'=ڱ num[SխG MqmƋ4-絼LVvNjMϨ`^Z=}l 0s6X;:XGVOk`5MߙM-MUU?MyF!]PzT&3O4M0=wl ^]z}tߦVv#fZDf-`]sopGTW/6 nY˶V/rnr˜q`\}G{vxK,VO6=-/{K u7-_n36#7K#u5={nտV.ÑaX4Y;-O0[յMb;-~zp (zwNN5z`{g/ͦxsrq}|ŨX/6VP]3:X7=L4:dA[GwToGg6 IDATploDqul0sxcSuMFk ޟNۥM8N,ͣ# zJ3TW n'geC8l[.R*0'U~`^SzsGwx M'_r_ޛ֏-7V[G թ#5ջGO[u䮏3&8 n5:X6~þ[ش[w} m/jfY}:ch 0gwv#w4 `eWGu-շV,+Ǐ;~O:*YT oSꖣCFG t ۚdC M_7sMC9іNj뫋}yիƋOT'zx}}F0KKՃwA-f,Ug7?\m۝7c_v4][`ayWkiR n'ƋOWۉ:jtpV}oUzy!ίwC;zvˣCVRuuncSp~Ӊ4Nv êgVjl 0s7GnߙM:cc_Z~u°X t?6 omHkB1fsk-߷`xM}-5ݥycstaG na9`ӽ[^\⦋Vz{umumM;C9 N=DglcNj7Wr|M_No5Tgp pӜ˫F}s+G::Xke8hӛ/+ƋWdCuulM쬾jp EP=4;{wYCǪ諭bT,-շW?<:Xwutg[ nmzz[-FG c.o7mR/9yybӟOWw/4|V;ƌnn1:XGvN֖^;Utb[Sw^QCmoT lsi-Ms]l^Y5_Ywt~xjuBGj=PuM{j8pp4^-^iCՋvUu}x\=zs'1|Rcƥ9 WVGG[Uoz#[y|gdS'߱9p'p1+;zxcs/qlRNӟnY; {rT]l?T4=L|ܫWf꺖Nj́uճػ7m(zM `lsl]iMۆ]Mů n)4:XwFGeM'ѽ1-״!Ǫm~nvȠ_VOi= NEvY1cS]O7=:&>:X 5mR'Tϯn3:ֱm`խǦ8P]ή,mMW4=-4V|z`_\<:M;6mz_[`=X -^S,mT0:Xw5:b!V[vՎssյcSiΟTW n{Qr`.jy꒱9[}nY[VwLl͙߳ƋT[Nw0/at kIo<^|:llkT^]vp 0/Q]=:֑[Tomz-p0|SӉ藏ʮ*]4^:wlʿig(}q~4:`/[}LVvlӳ} 컣ͣ#`9jGұT}klS}`a|Mmtėڦ?ׁ=;xv[ՋFzBgy6sstBP=kŹMY4g[`kʁo]]4:XUvy]NMg3Ʀ392^|,UZKz+zsӳkѥu՟f#*֧o^]=e` ӪVoȄ `ϾǦ1-UGTlz`Od2 0_rt6Tmo7-Yrp 0oOjyHuXsz՝H{ntqy[y;iQ=y`j,GW4=yMu`޾ixou\P}zG-mxաcs`8j?PuTıM'3^;:`:~t^apӫ'7-~V)stiӆ%79mnz&rx,2#` yD!03Wwj۸ mGG ¦ŸՇ& m`rx-+Of-/`m,F;hӳ7u5"8ǦW nn_yӍq[y;:gKo۪慨qtz`ܣCvjum.[`ݽlj+Xdgnݪxg[y4cg.[Thɧfg[/4'gZW}naP , w{Y| :uMB[}akXnį ,{W4Vսo6fG[n'4-x61SՎcs`a<{G--0q-ŶTixGvickwW6j`lKu`!<X0GUV:bp ,VY08:U/n;f,lC慎 nXɣcs;i ``a/Z=gUW[»GIU֍m6M-dxΎ4ֆ9;Wt?9EXىՋqf8CR==cloW' naՋGwb.k<[u`W]1:Xd,O|Wf-?2M?`5A?>Y6jtyo?r?7li-_g[`j?{PuZuKٔlyiMs.ܱ;GwrumӘ7wڴ8f*~^`9iݎEs]ՏVǏf귫KG5yn웗7Fqt9MsY,zNc[UnŭFMM>6hnT![`=ԟ0smp^~/m^Wھ o4cWskêۍ؋紼Ecss?mӪo,~XON} ujSջA-pX@/4Q_۬֞Nt>yeume[yҴ?W[Umzl&lzUˋ<6{TmtE:mthVT/,{VXs3>3Uoݵir3GG a{u`m1ql0smyx8X6To90Y盾/kp pt񻣺`nt7s]fWXX65M|j:l>t[`ذ[Tn;Y''R]?܏aWfb:0X`VzBql p6:X ֋oizmt~\8,[ΦO77fR:0_oϹz`6Uh/vV468S]::Xg-?{`~sywi?r wW&=WH|wxj7?R36[/vV$g7m*jil0c[W>]1VRumƦ9G_}0{W;}`ޖn/~&}o;QsE{phuM"=?^4^Z}E<^lp M %VmD4:zFm74X+n9:&:iQlNT?T}ivǦMO|p _j(M/웍ՙ{_P}wo\-+9i']9:nt0QM;vF*3M'.o귫/T̶i;+cO7HV,4^MzsMAKcsճY09:zt7ѱvMՁŶn?0ϼXC6Tom [⟛vY1VRxuLV^ 0#nq`NhZDwxCkюnZT[y|;mՎQ[窦V2Vj?M[y]u}Wy_׼ -/y[a=ΦI_X_j3ՙcs;pc7}vBM }p p|O껫9`a\ٍ/E7X@޴gC;iGM^[fp 0o4^}3S[U[w>i)`m{\u֮[ 5.#pt]/kajg]ÊEۦL`% M'۪CG^\Z]WzzWnU=wTvy⦅1Oޜ7zgn;VrE!{svd{s`3ڴi' ns5}p {5]^W]8#jZ|-Ҙ`/^KnXɯ5m4Eu_jCuuՃwi ݰÆꧪ lƺzq˧R5`]3`|CӉR{է'6?4q `%N4y `NWUzg ˫^_W=^@9MAΫ.ks7M;;T62GTϭQsp 0/V\b-뎅tVW֬S{t.^=:צ?W=z֘ FG akuȦ 7VYǑQ|Ws;4m>##l"ulo`&~vt ͦ #u~k#YP}`ἴzEf!pįzt0cx/w7%sG>O7Fd-zƭFg3w_ߩ:|\ 0soi덇!1/|c}Ç10XO؆۪ZZ>|?`` IDAT 0>a۷wѮ1|O~#3 3G?et0S|_>e` 0G^rnBpe'R?u{ sڃ.YZzVs[Hm/1:c:?_/>qu,:Kܿ;mktPx/+ f8W7ngԓ.O9E#WGB:0&7,5ݺW==mƍ9^z.wYyc]euK_u=wGÿg!ԉt;rۖK޿{s QwZy{z~9K!uz ipE?{u㏾iWԛ\?suq&KYH ^btq{/+9BunANlN˖ ӣ#ǎ8w<~1{z←.N9~ꑏ޽Uw%W-H㥊`UZU Tm}WXSEЊE7 (]j%@H$skƄ̜s|>sL欽ٗ^/D"uפ6lHJFOwW}%Vm|>U3Ӫ9Ɇ$WVP%9 Ò:$U2~y ^ov9h l~`Dzx6l̂#2849tgvwueѼ ms9?k~ooԟ5No6ʂ9=Ss=w;fOb{ݜOsc ̟zjw'RIyE&/%͝[gc,(azzvy ulH $}}(ox$[$m7nLWߟvK,-,-[32fPYhGFpF$0== ]{@vwWW68ڂ[e^oOR[sƳ.  m͛>޹t=ۿm)H7{9UE>;TK:Nw)]zWKֽ`ٴ'M{0].s49Yv77Ư5wY /0|%ZmpxEy;&҄ȿvndt4CϺ70<2a2:z{JJVo យc]wɉzZoˉW];3I^?A$V@df:VTОz{{s'l*+_җ'GqDXwWפ XCx}YH7{}`zj$sV˪fJ :3| ܕ䔪pW@z^}ݷ0po<)w\'gUt\t׾:?.ۛ^iM9s~;w]vY.천3W$ݗ^߭[W۲ɦMݰ!/$ٲ%ټFFF 6nL_$I__c\>ͰiSQ]wMz'H-e]'IWW]ׂYg]__FGGfݫ;AHGFGãneҮV2 [#Hts\65gޛ|rO&gP{:YI"Б=[̆-onYghrɷrJ,ZԼjrY殘H:RƸ[ݕ۴IkѦM'矟vZr͙MΌtа&2sŝrkG$]klMh VJN>9Ǔ3L9fj3#4lU\ߚ\q}nNPLD:#Nar7dw6mfcNH8#9#*qrɲyosաt@GjdFwmU7gP j o}+9:e!Ht=YDD3!W| Ҝ} DM?ySsIFFj_uq WOcpx$?kUιItmΌt@Gm?ӿ{m\'oyK$Xrѓ/`uFV?~O6lhqD4D:#N2_{ͭշd`xxjkM9&9䬳CyY3@&K[۷%/_kxht@GلtI|ӴX4HoE~Lw36劻VdYhjG@GY6oʙD7n,ҝ}v{fmWM~C9[r%u(3i1lϿ' ?]z(yۓsM<3y H ۶&|g9$idt4#-+nȺUToL;.9P'6:l7.M.89#uCsݽ30<\u(4D:# Jۑ뫎:֚0_/3D:f4thU0 >&oQamlp$PnG-n:I mژ~0qFUж灇Vc;5C:D:[<}+W+0dd欽:<@[_<: M {1?8ΰf[s?WY,@{?:E"@sV0k@EWߟ:V1V|CV|Ca EU0MI:1Iz~>I_I|wJ`2U$ЈIVub˒\Pu@GmUUIvyY+?:fOKp'&acfx|'x$)f],ֱYlLf'$y[ΦEvL3ɡu98ɩ$c{UyzUѠ5I:*W($C ~+Q-Z$$E;2IweW%y><ɼ˺Ny)OHHb&$Mwdbi]GnQ,02I~Xqhf۔)`; %y\e7'u?!ɾ5NrSf{jJRK-N32ɋRI-IIrk\ʔ֭)FߑIHrw"nz冀ۓl:h8-.0M4֦kS-&ppJbΖ_w괓RN3[}NJ}jd= <ɿ%$K2)GsXǧ9Yq[M+3 aL}wV&gkŸ#_$_$Hr|ʃ>Թձ1~+L%zJwuqagrI{>~[OI[%PwރɮKB'L<R΂N _2@Zaaʓ_I>G)/Irlu3!IΝٓthL򉪃5'e־&ߦw\$'&2/ߤ$w*XILr^;$ovSOwI^]u lHK&T$'LG 0f$oNrER64baSoKג;S&9X`Ns)|W<hInM$* IDAT`{JrZMXhRf$e)ɾM&WhLْHwY')$>%Mxa*]I(ɲ$Z0 %I c$O2+Ӌ*`gN!nINRI*{c)oU 6龔d$e2Ɂ ~)O=@W\O8)O-ȯ$y4BuLxT'{Xӝ$˒=xuްϙ|WʬtR@'o,eWMt})tY?M 57Ɯ$HrG ʼ7iwʠ[q,V'Ro\%P[Id_O)md6%% *z}ZL'ɹI8zG%9GQq,@{<%~"K I'ysf_?/tW'9 OŔvۑP46v$JoIS>n8ɿװӒk ʂkeI~Pl`qm t'i&3/ד_u @^UєJykJB3ăeJ;0{u)I)b$չ{I$ҔEIO,IF&YIN3$:v*$gMRIKr[éن$릸Z$ydLMX@&:^O$9'Z }0)I;le{RHtv'Y4˾uz$佻|7ɋ&;S$5\gl2Jꯏ'Ԅ6aБfc"|$ɜ t%"ۄu<)u_;f%Ij({ʵu$%9 SR:_)OZ''Ӛ$o- I.xk'Mr^ $dϔYX{c5b$NgY2:i)jqG+:Zq_tON$Nq[Ͳ"e$gQ? `f`X13lL[$LRC{Z3,kILWI^]Ӓ\d̗!eL3]'$m}8ek]{oq?MDz(+|s*2k\?F\Tu?&>tNVu*rA e<$'%yaU ,$45"`(Hl4$g$yJ@F?,YY)n$߮ܳrfEIj(wr ɳ,o`SS$Pn$/mq,3CI~?ɧXW^shz ۬IgM0DuI.^)7HQ=J _ܛZ5ilf?or,3$}5{eǵ8` B9$Id]~)=I~$kF|-PM}ͱOG$-±׸ I<$&1i1{$96IJxC)$$)pyb+)ߗr<ߒrCpkgI탓''yNJ~)$rlO*e?!o)ĿOێ+OR:)FiJ?u,)SW"YfAJQ)H96Rf,ɏR옚R7OylghJ[gC9ƔLϟ#>:2Suݟ侔cϵi?InRfZ5)p90R>gL[rNaئ$$Mdn?Jrv#V)~#-=7 :yeJzR҇%yZJ_)*7:_uC-Io<"؜Y~}V՞XR><4~M#9ڷ.WddIM#l۾}LLi:lmlc7ek;`IvXr.X營D;Uv>)ym"[~`GpG%;ՌD}Rrn2#)IrAM*\\cNla,ݜ$%yuWT(1R^ϥTm)ej.ۛ$[)ʭ5nr<HʀIWĔκyᔛ|_NW51=S{הt{sASkgN9&>;)I-HI:9&d'f$YR/$y{`Z=?]$6$gQ)Z~2E;:˟d=#)->rNsS~I>/L+[;k*w~ʠV/+ בM|5ei$ ցu[yx'ԣj)7m'Է f4Z>$9I.{OO޾* N9GvlUfI)m͹m[]!u$ia,$XI_zdwJaijoo?}Gֱ3\2Օ)/$Z,O}>{cZ.}M81R{bjWJ5I~3%ye5ȷtusؙSRjkqB /aJ_d6 Jky~7Ac~1"Pr,[c/`|)?{ I$/J j}SmiʾQ#^2VkR_HwbGgx]jcKw:kMʵ>ˤ.iu,dZ(R':}~j?)yNw9̶I״NNbc&O8hSG^jo/)ϭN_R}U~x;5RNlb(轅$5"ZRD~#K&5]thѢ3Fg0h繿?ܹnٟ}9_^9w}=eg-Lj07_o*GbfJk1 XnD{2K+v -#~il~p\˃?>1&[s0{&'qϱD",Zq ,A.#Xvmbg`qz7{ 3Lb/Dک't oX|j>ON%[rvTlr7q+`ӮXv\ \}.6c:@񗀭Mn}b><=[ޔX:[߭X*Gaρf͓q"a`{ _ :9e8/L5 n\Le_H@|-ڽgcvR6 T~54l4+߮c[18pz*ž9f ʻ\1 Kd0v>lgM6?}&9N,WsWCXNvIѾS?Jԇ%tV(G%8Kv.~&й >;8:9}OI\X?v%f:%%=:41taCK:| <2 pZ$v4w`KI0LX0v^l}zp` Y 588)OX6j"t EtK``y/ *X,X!X֦T&IX6TYh!Zܫ1QLfZbt`NI񭇕O g\) `[e< ]HgNTPԮTWh@~-+J|6^k¸zUH7>-5k39]*w!籹Es;c8Μ~?UȺlĆ8B2It}XdJe-OSe&&'bLêČU"ELyctab>Z2!3IT#t;btcu_̸7h1yLE\VSe(Lcy#΋/b): 8ƶ ;q<ӰP(.O|yci e VS|ǍڳM*-VL*d ŵnG97jwӸjV,BOҞ͎)&}Σθ|t7⸫-doz+w; BuI?`Tҵ-(m&HɚSB射5y}?0@{VCuϐ69MЬNb,DOpҊX!}6 q)9_UXbt?c?e,BuH]i/ҊҭBGL2e  v(RH7i#[ƶ*6jcȲQ6!\1BI5"k醣di ey-b֏eD t63Yc 68뙝qI˾/`-:AWF9>%xKۜcȖBSL V*%{ܦd΄lU+t tm:-Kct=qq:6 s@iՊh#hD-o\S3˔r[_ǂPB3(*l9OQ)c.y=pxQ!}fz5?H_fA,aTbc :9^sv0 s~ž3)Zu 9=g`V,]C_!XPʗr$8p‚%LQJ?3U!sW`y7pXNd+L4o5lzݜo/?z(ilDSLRW[p+'-c2y^$X[t>As(pi+i8f&'wp;`؞IF^3'Kcٽ o3"kcOpn'['51~j [ԊA؜",!X1V%>ثz>yW^KLTcԱcĮ"|_cIRbOgg7 (*fpVPUy S998A &sѲ.6i.-!xPQvF`N,UDz/e"Sh5 =,y?~/Բ:ň~`R9_snn9"Sfee,iMhk-hњA%=bdE;p,(s9nxCpi仝CȞa0X0bAX8I)8N~Kqk5Hּ';P Kʹ]A_Ql@غ/3 P26!g<*ab,TzEGaf}j4ҝKbZI\Wc0) ,n~|.VOzY(Xv/q0ocYͱs8Zt׀F|n!lU?nXrJXK\ (.}+,NŽ6sDh9O^wm/*3l,`{SXqYe9' }O 1vѩsl|8/ɹc#;3r>vt 9?2|q9ٖέ5/!'~wOg ?L-i6"]Y?Ĵa9̷։{ ړO>&fMj?A_*> p1YڽclV39`v12-LH˜s%p2ď0J;ڳd 3N~yϝ-FدrO^5' ڍήb.x6cܷ1K~`6IkmXx6| w88Ӏt}<9Sf2g’XvN3.EdǮ|yVb>oS3v&k3SxxfvTZe<2Xv" e `am4ws?Mv>f[ ms?E8YCgך`‰V(Q"{й p&ʓ,ko1!BxKt 8L`z |3yQ₋Os*Iw(VHe,pvNDO"1_ʏ*`8s-@;'ܿ)?X&+C3_{GD)Xt@ػ9`3',)Hqz ~ݧU,Y֞T> IDAT7:lbW94U{3t|p"}X^":DW*o콸6:`Yct~ fm :C0{|6> {OaJx88=Aczw`Ha7bӱM`;91-Gw8sM VQ,+XՒ1GDZH ބ49nŲ Mƪm-Ռ`XFP?G35Ŗ|=zt)5,c*XƝ}*H8,A*ЃӪa/îXXu{9ț1P=&79b,}և$<+qI ZJ^hdUǞwbb'WmP/G̠qObkNX Ծo1QWcϬJU7cP0VX%Ed<-q c `1lo 儡wG8F?Gr3V)j v$(ؖr>DAgY.#[W0N{{c~V/M ܄ގ*ْ=A~jċ*w^aL1T}cU_,|[Uc1:N7ҎŰKl8o5Wa|X,}4\hq>Z!=w5x [, v&܇Te5,n?EǘOb1A뀅uXL*@0%] f:o@*,vЏ[be# f8㤠sNf&^*Y*$XB+qYX،Ho·9ŕ8?e_Z/0(n&SLy3L<~^ \>-@~|me~mʬY_VX=j; V$nl#qiφkk[k"Csw`ӹ*|ͣۈk[?&i7O\ A|pnl*hLchΏ͋wYODXlx&|g4@b*C گo;ވ=kz֤E+~%}Y~v-^OUBbVS'p`3E8߉l7jӺYca4T-l)ڎODƒ9+} ֖B{o ʲh{6〵}X KТ/-6j[t}qKֶFqsa3E~]%^O88pjmJyb+q!nỹeda!}5+1-tJHT`Kh?VHZvC#8PZYX9THwho8 Cg';VH&@ aUOxƢLW7e/)4pŅt֞$< ,pYu([-`k*u]í`فC b2ש`z#D1Y)E\5o®i~X!Ll#!ŰJq9 {>Zb{gE -K2 馠UZ VQL__}5T-?GʎLjRYtci.Z l7"T|QsǡU";p_b% \EXj:Ȕ,BF0TL<ֈ=kh|^ ܷ`^7ƽ HjhűW0"[\Hךc^؅tT*.lM0u'nҩbhUBTu 1 D)SaT>vGdV .Ò*Ї}jeSuzЮ[U\ӎ-f!LLtȏ9~IKeZ%N j6qB}+\>+vD8,ּ-ؼH"~Dќ5X3eҵo6C6!Lϛͩ5_BE,}o-g> x2_,kXUX!ihW0X^9յ^CctK_iB1"*Z}= &TU`HRUj/MZ'TIjYgTms!]kTB~4kk([g6f) +V9۷ =cJDEHzŔg $\zXW4vԈ!]хq6n)]4,\ᴮHxasfɌڡ$PS׹1B?E/]Sw8ZőLn%' ԓN\<(&qWw,s84b`">!(Bn?90Ak %FHw; pq'';'ՠZB:EtF:EtUb>/`PlյvnrPC77cbߏ<PTȬBwڌ]1v5v+u]+WĹAYtLjchׅt3g}.BQHHWc`”@ b,NYO/h٤Nm!,S} PY+Ahl.j`sh5~qzcў۶#Z>-ů .!<0>:c#>L4btVqL\r*_ƪGM\"ZFU C_w l`oU^_>#WtXISׅ߰tp.lj}MȸnX[AUKhu ~ >?cxWQxչ@R"حD <&$a?LF;J2%8N}gLĪfa84tS FH9%b_JeqǸl,סՑ,lW%/&(ù-ӏ^ū!R{"*`оv #;'9T!2A:w^Yb'TULXN3~ #!9س =D< |U{M)}5ˉc/B~V]B^p7 utl_+[~'^-6ߏ-[Թ@ ƒr*#ϗOf 锊Y/Ihmj!@.U~~^tn:iPX8~nU!_2žBbt6m>$CMM^B:/Z1IUkYfvM#N>9[/OŰ83بx}7 AH7=o}@M!{G }98#jVo9(3~>ׄcro`Ah2Y(d}2,+F /e1 NRs0WAaB7*2)s/E|QKL2sb_U e~,@Dr~^n2z݊cB0קj-L,(y8#-/>!{&FE j5Ǭq:`-XkEyӇ%5:JZs"}`~3:qqVB;30{"DnpBY 9[ߏe,TZJOe±V<@z DݼTo1V8Vu<{-OGVhm[<~D ZFt"BsbYҮŲ9)#aUeCݡ، j-r~ w);y6EyJ;zY@g,pj&M/XxVV c+͗~JR+hbA4{&jz~X|*>LM~?+ Âiĕd}C;}^V}K&ƅt陈=۲(ZE%賬>a⾿`mIp~¾ÿ; kCU^0Xp!6b}dlU}b6*] ~lAw :9/2+k}ganm:5m0x1}88%fH-)ytS±!kJ71 oHXJidqq8SI}-xJ qYj9Bg%!lvKL`] hBSЖV;&<`I[qal:*qG |^:Y);:`pӘ?ҖWGTJ_88N)QSUJZ4ąt Q*AC7c~߭PD,KOQ̇m)Y+PVtA#LLUP&K<)Ӣ>Y-Yx~E1X󐏒&^re~THR\ Yh[8eCMؓzKX6Y%p=27.l>Q! @Iȴ"tɆ2p26kf.;;<^"ݳC iW‚%VDB}8 m=} &8="7?eMPU]c.1y)RRo ҁ_V+-sr*f^əqqMReb$Rwgb7<~l6E*)LcMGEi9V7Œ뤦ɪ\U4'#f_`sP9"BF`nJh.SH;*s2h%ScɤBM cTa stqLH7m"sgN,9ʹ/[Py-j*dw\@:E~V)01D,IڍI69CaYzA7jK";2 …t &,T0%k"Y  @ f^{vDJQ?XwCa;㔝nXk)sfGpUرZ.hNT?>A!  |$A 2R+1L֊~ ?N~U-Mx]Ro uXbu@sʲH':t8SB5wŝ2P׊eTn-6_68i 9Wmd ew~*dHQ)38 h67LʳQIFƔ_ IDAT-Ⲳ;X뱌Ʋ!reUHݷh3~;㔝xz^v?hj? {[{Y{p?w'tLc+ۀ Ѫwo>|oLYPf/زw.8tO-GI]xrr:K#6*Nv仏[9o(JS OZ'vVsX8{ nсZl88t)+ECsEo0( !/G,@˾q UQ7uBqy 2ݧ3R-$&Ly6R`+,Hhztz!֬sCqgN4mDֵ)KBy8h[чeX'羦'zMx3xK|B77k2Z\H輦ϕſKٟ)1(Tf(a:'di߅I8rG>ՇZ p R}tj ew[}= tVͿOaUw<"N_%)C]WHC,^sQyTQᔉ!}7)IÒxI<>/}r#.pw6|o#Osu]j[fʺXKu@ޚ:;ێ8D,h_ܴbH*:a0?p-HXAιt?'%E-ʜQWDpou%:E D7y|?="7x\SOYCZ'hn~˂lJ䒂Ձ+?Ve: 6;5Co&9Gtqf6YVҹkNz^v?Ly >DUC`[I\u=lm֋oՓĮV-}]U>~_ߑ@X:~y'aeAĞ[ec߽mV{JU~z #V}^_:NpV9I \HKz3p+p;Z9ɣغPSnsiԲGl)O>qn,,s!-̹xk z):88@3!]?*xe3%E vR˧L.ƜO9^͎K iX@/ )Ŷ071Y @Հ2Յ2>AeC=2xNEJ} θpi`V"42pOW&<3 vqe҄O{9(F?uVtYm$=&[>>@`1ڄɿBI3P!\?} v']zK ܝiD~Rf~@Jy͌ >4'ʽ0>qqO˄o-5V : l(Η7?8xVW E2Be ,UHMܿЫj^@fUHGXR=Aˀ73ob+{["''}~7{?չ>6aߎ8Nw0ך>\q( ̗W)"2 rVX JlVK%tYX/ZDUYY]<&^)E AklQ7m ]. Mkߢ*rؐLF+G8@cIʵ+ghkqU^Ο[aMHt_8& a0!?o1뀬B뀑Ϝ?3{&Q21Z8z7rq'Jm?!-R !±/6h%/vDс7iVhY3H?+,` s)3}Ga<5)u2k%2~>%$)nv^K&QIl~c`O,F :H8v:9tn@mMJܿ8S~tZ?X?#nj4 +g=?ߌTk5㛭U{ Ay8 |- UvBa|~hY!X ٢$rsz1Y 2w&MU>4 ,#Z7⇘*֟B#ʕ(l]IlR)3pXrRHL`y-%Zp٪<BV[1ߣbI| (:`G`poϡ'Ry?}0R¾?:qqVE[[e,6DsbQXwh,&eYb\_:N^쏞L,/ՑlH4bOZ P&賌/Cl+b90wh9^UԹ:o)[pu>/ivqe9$e`c}(Sb+Hy08 ^]'A\"{&1\8uW-ky_PkE,SPri,12b},!O(eE,;&,%~i PvAv=Kg|jJ`XP(ڿ Vt VQkmXUk_bA׍I9Uf_&<,u3hՅRۺ-aߎt%~DG$.JL#Qq{-,%X hq,ҏЫCg|sH'M@vvj0k5x+l,fJy>< |;reZQHqq,&_B, lΗX_8*$t +F~W(e>DvQ4cx3XFo%h `!Z2B f])1:˄~A,K62|ߍ(cU_M6M|q^: BC'' gM̴ )BX%x &VN!Dvk<>ѪoӁ[Y߅e3Jh,!=[^Gmw*m< ۗHou=7>^b0V)T|7E8v~]*b_-?P߃6-m= Z$-p$v/}q^E kV#B9?*E݅8N9]Ns J2%1!؞Jk(~~>S0^8~9r }KnDd,L?Ka 08ep!ZBi3+88Njvʺ;ű͐N<(&Jʗ2x|h6­hά@X&Mz&݃_$"2pSnqY]3SќbYD͡-sg 73Wfb-3~ 3yc"C:uҋB`,,m(#c8Nw*4,k>l}&s5H>kMy lkVֻ3_cI²:%;u݊U0u̿up: Uv'|fH㌟x m\M]g&8lAkx NݟwaDu?E`]5lI``ytZ dK UgNggTD|`bze`3jo08h~fA7KB\؋;>Y#}+;}ܧU,֞Äv*A, %¹88l .&WӇՎ xf B8~&6ǼgْuzĜ:?/#%,v(Uߥ88N08Si]y#ǒY vQ*nB&8c792؈vZG6|ٛ͋[1g18P,n,# ,`>ep `~c">7bFW TBX`X(}}@,eD"ez*;cy,wpnCV NqDYkH|ͱt0Zf5_֚!{ }9ţc |+1̮J=%Xx,ѿ0iY>y{8j5)8<ԶK_LhHyw;q|]vO$R?m~ZqhZ~\ȾK$.hwgއW<KP@r56kv*}(|lj͎㔗'puVwƾW@wx3{W,c/LXW?YG;+2kV9O]| S%?Ƃ=(ga c8E88NVVՍOIJǹE]- 8 ^!sW䛱T p#>؀*qA ‚i\) 1 ;88V6Ov~.~f0&USqxsXy,43 l#UY2d,Oa9,Z}dVYN}&6wuqZ*R'c`.P߫Q扱s+1P(_*p| `b/oYM5U sC[pcs;ZOUGr(J;z4~T1׀k{`"@#0!A; o8>b$_Rxf,ALWJ1(]w߶ zRU)P`^Iaw .EkɕJŷ8 Wn,a;s=^(-6[MB;;W|=q!N=|6UOvxhQcuCg=jbj_w6ڜt8Pmp\XPb`P}:뀋==d,Xqq-ñ6 1k긯g6i +K֐2w7}mu`7",{scPwu,h,n9tߢG! ^v\$%+h~ C6eo~ .2#Ӫ[b|sdŜۆB?FQm'6?e\S( {<)VDkaL9ewb^@5S˅>α"۞Y Ux Ihkv#>"Qqz[ўK )m#kI⇩eFZsMO*[)[Z#~ e L@\j`Ua̭ê(sºvOֶwlD_h}LZ>Q+j_m"6A9m?Pn*} ,h66Oxp`L? m>v`]JIn;bVU_w>`/Xz|> eR^!خX}<#7C<j҇F(k'qr=CE軬^C9>5q?،0Taä&'؎MQ˫B?ick zXrmml !X% yrzf1mhc EHWmb~F}.UVE,-Tx8ZY p91:%JTtV +lMГT0A̘6W|wѪ^1 VMpSo#1QKk߲ :x8Z|HYĂE *4N؇Db>1S V$·6K#3Uh5?Q6'|ñ@ VI\A^%@Ufxy ~, W%]&:9N;@S -{U!Ghnth&B:3{vh0HY("+XoX0 @(F39 $!ݼ/<*XF['ab,Rq>]_cbð:k¸]HWMHRoǮOb"0G5WmE ]l~r>D7FΣJ FUߦag3׶h_EL hYmy, f6鼅tðMv%mFqgrq\[k5Mj{? "|rlƾRBp*oUMiݧͱ19{3SBcBT1ͅ`ٰop)t&6` zО*H1Q IDAT6XBe/,%vqܩt`>`ܦ`{Łg'?zWUX{f--8¸&`ac^LFͅtqAWۈ%Y'vol=GO|kYϯh!]c{b3{XN'مt堛tgݓr6e딐.eҩt` =j_3+ǬO1X`{(#ը-ܢt`)X,+ q.S~ EoT.O[5t,M]tJHV=˺i:\s?|lbWqBt_ 3|ln}Cg`a#tKGQs!ޞSSpD ΧQ딐.uٿf!H~?z v*V⫩7%i+XUTm[H7L#6 5ݏt{Y+^(]^ƒGDl0nҁU}Ms`8,N$w!dR,Qdhu%obIHo񩀱DV+o!p?]+kñʐ=, یyr0K읃 ) 7 f;o`vq"%Y,Y\QoM/t޿;> 08aZ"[;W>S3-_|V%2mEŔlj}wZ2Z^ FXk+~D10AXu&4VC/`뫧;<}uz8t7y åB9qb& n(PL \̡ p\`06Lh$#( ǩ@ԍey|ٙ0&Ž+D7ǷV0! (LxJ\=qq6}tbΊxBnҌV܇Rcv,v 6->^Ąy2 {6OcN ; '$/%Dzw Dg3<S0FԘXTO0{ G|8.f|~Bjk Ock>u=̻qꬾ<3 Vm&b{͡>-*<GT}g7d, qi=GN:'ªS7*40 K-")|,3$P|1ax˰:EƽM@;#\ek*HYcwuI'aqo'ܯ88X%1ې-:@)2x8ρaYXHlFx/6I-,>>fT0)fB,L/o:y˨كZ0A6ſC)oJ ~:LU3ߛ|Dv8 *0TgK%9w6tA98KL1k LeD}:/  ݈KR~[q6Tak#)v 9 8⒟\DLHS7`ª'kSt`-XU$(&jʻj"Xm{(isGa&p>MHT},܇évMl{_n|b gcW{scBLcQZm1qs'rsrOk]o9حH7ZKk%8~_Pa{v!d,s_g^Jl1ah\g-g?N;߱}!l.sx|3ʴ?ucX|p qqzUHbwۤ ~T>-M(Qg`TUGǮmPV_N4n],I~8)-Ӏ/P3!-eγZ\+ ݋9:]?SsN&͜mj啡n"&Tx+{CDcϳgjl\V< Cno$߮) `=I-oFؽ5 UDIMu93/6&_Qyuo\c޸ir|QWj=W4 tWm?mnAVqF#qMulg5EJ+o Myn[ɵwqEܚ"ާ<>$9ofƇM{ ;.^ꏫcq38y36j8晤0/loLg ~/?9V $>'/iT7^6^kXv3+s:4}m~Ƥ;)Ƨp[6{RcfL?QuƺƤ)x{IPgߞރ񾹞b'xkcp*Z>>ߦcQo7vVc< !6u7ɽ\myʌ1>COIOMഞ5ùgMy }}cƥ|Wc<.jmc~#rf4J77%fi}{oky:koǤlfz5<[f?s>n(,޿g463?,gv^Wf8l\5>sc;6X4bwG7^c1w^q35 ӺapvcLhk6߫Yݽޟ٭==3lkǘb[@!,N| 4 kLKu]\\NU5YqD= >[mrٍؘ pƍZ\VjL0yQk'RCizA6~AbMQ%W8䆗5>dTǚk٨zFe|i^Ҙ4j @RiE7}%}jq+2m|p7Ċ6޳Nkq6Jjw_xxZӯwuĴnk\1!Z}n{5PZ}jQzuJoCx̅?2Q-jW8C뽖y~11a-4Ҹ&_zWmoTߟ9}ac-K!FR՘(^Wo\7Omvk~okno|@vk.bTݺx;OԨ;n>֘|^oq]}q;7~o^ƫL[Lwˍ{W6&Zsg289-n`;5&]ي@|qOƽ< =A?%&z<1>`Q٘rFRէ|W4^G_cLn֣Z ָ_3<+1ط47=::6;;njύf{mkRuֽ8_T]Z{lU:uugpVta;wl[sT?ݸ>SkOQUi'%`Rs]UY|:֭xߍ@W6qM_}r딻6V ҔL3ݷ5"{p~=_xqOv>6;ܸn{ОԸVzm5|޿e߸Yêl|9$qH5} FaNX6{FR%>zηܶqo5w7I{rpcδ^âݮ1pwFK>V:N[ٍ1<鋆zi\Ooe4bM?ooCc]~WwնƪY?^H4M2晍?7lfwL[Ӎs}ܨQ>ek25p!m{byּ4XL㽍".{i>EzHvcF<pxkw lE,{k4..j\~1w$ܣ'OlĬ75nȎj\Q#MNbqNh$^;v;_j뼞q@8th*EAo4>p{5Oj>mh6Nks'dn^t:1n9aeظCOl|0}dc@ Ij'Kָ&>e%7EZ H6h'6΁>݈}o=+6nLj;1u<>iX]=k~hph=vVfs@cIoL8iE&txmXxm{W6䯽{׼Ǭ[;Sj ?bǰHiLH91Xmk+4'5/HVwe߫6^??[mvΏh&nؗ9>?SG5wıw81Fto4 !|mcU6u5G4rw\}[jx7j;?kJ:1ʍo7ߗ'-'D`I5V5§neU T?6qonT]8LfUym՛-nMu  IDAT}e^G8:`]Xf^=iA$^P=oA[qa՟.;`EX,;}_T',;`/՟/;}ɮtGU^yѨcoݫW'U4LUY^}zyc2=w}ueu\vqlز.aGxx-3`|ܗ^e uزέް &ݦz&Շ7Y] ^爵]}˭l'5>D:-WN_iy nelYT]GՑ:eC]v[Ŷ+Z#\>U}kyQ̘0٧Z 'e|ژ(sru걍ĺ'Whߝ,sF3< dea ՗/4>3x۪]fq^}zDM `t1/5>Qd}w]= \\}o -#n5WXjs]zu%ѕD:gܥ1S`5g&`nZ=aw~˫S7˰zxAS{ƢE,?SzmM<2Xa)=1`{Qlשb+-ãg|W, UX]o-?nItR\ `rduF'TXjDM?7꣋ aH#ݤ̒c{5էK+}k`yU?V@uݱz䌏9b`k8ꇫ{7MKU⌏9:6ʭ?ձ)M mGWϯھn'UWa:OWi%2OHũ1+IuZr,r@uY.?ű۪W޵nn5Ϫ7..(\(tk\?fc^P}O w+nu򄶯UOP}juDLH7oV/zSu v{"Q1! fs*ͫ{T?Y2ḃ7.ӹ喭݅ud3O/:(7&vսm<#4n9a9mt׫XRݩ4yEe'(g_^]\*qwUل _۾klݪW]Twٌ _b%,;DǬ#Ȃ6a9[^1ٜPjRq( NncCڤXɘlq[!F՝oMhf&xS`w#n5׫ni,Ƒ~S`8{ i,11J"{']c3Mp^5oY,Q|z„͌we/M9@cuVo$'wToi$.= r؄covVu:շCVi&DzY/;՟NhIMe-b+%M {/UW1W g8/'P}v9::etVGVVձ-jLƘضƘY :mDuw@WGê{V?_=zx&0_9uuvy'WX]4y:~}םpm#&dsj']v}Pݾku:]zcbRwcm'qɫv lVI;ɓkN[GV_y ۪_~f[dRol{W1nݿb3yՏp$Vߚ^6R꾍ñn=1?폪4}BW3ñ9 9`xp#im5o+UonIt:sۡUVs:nI?{J[UTn`+WOh[YOmIt9zV v>j+HwLh;:zmu5yc߫/껪ThcVeW{]йy&pwGܭ:dB]oTިSUTh@ϮuwE6ջUe'ݲzuuЄ7[RVsjkWiգ߭Tq^4p~^ꡭHb`v`5y^lUTR}QMN;z~c 3;z؄_:`c.>^9N\e5GU~Qt؇,+7j&|Su mk4wۻ9QhW7^i PhB{O-4yCunmWw~U{``u*ʱjBWUߞ߶F&Ss&'}1&~z@uU{HhmM^]:q>vcxBLHwbJ.ۨsTc5S[7:%= m~:ʾvxqcoWi{Dc`g8$I~wlcJUNx^WݵzNunHԻAZ-'SMh`tT|lo1+5 JE]ޘWVFBIqGV_ݬ @}t}_pVli>(%CUuW5{Ϯ~_qq߬NZW8CNXeƊp/hNFQ]]zL`vܭTWYV'UWXebQ;^=|oX=z [شtRqƊtyFZ$vF%Y`;M<'>c~>Rbyscnťx}u掉WcgxoTߪ@,/nsn(< lN\TGSթsw@#vXUlItU{Uo$):={Ѩ|]m{6Vk`kbu沃`Λ17y5Go81&{"]1Ρe8Q :~{,;g7;Մ]ھ>o6a6?aMH|SWN⿪WWm S_96_Tw[v[v|V\vzʿUzFZ.]V;slV\vTв؏ݭiY]FaI4?Xݢqհ_]}yPvʄ_.7s1a}OL`.u__vH&xXW^Rk~.@zKc}o#~ǫW4Ɣ>~~`׷bU߮^nn/b&ҽ&ܰΫת{; 7W+"7ˏT j$KpyV阿Tl$}eN}dͩNI`m]܌1uDcboy%6nVj$g=z|unGW/_=fתs뱯$Y0qGT'UwwuU9zBusZ_q9~zⲃݩelyw\v>ز>Ub㎪NnwuUEC)'n{}/-;`ݲ؏}HVϜO7pc6؍Zf[k{9`vac 7s`ue\:-:YJܓVF^gˬs=O< { c,Y @Fcո.̶N`ή{~un6Rj+f:!HW]WiV=Du}]G8gnCVikFKyeN1iݽ+ViF[}m}f%mSQq{wT/ʌ}9a#26 I^C_g˜4Yc]^H{ʄ4z:^xX}f٨/^uo4 D:mLh_7+s,{ycLg'n3.=l{fo[p&J9oiTz~ =Cn[dBӪ>6cgz"K}W'ҶQX՗fF217 hONS'EVڏ^X2c_y;g#m5Wl$:wmҘ5gO-#FաK&ި=wMYVƄ^ť&I{3 o8c?羽Fا11`CHW/hs*4Y}c5UϭR}9ϫ3 8CHN]gDߩN_Mh{hcmcXi} 헩fim4ݘ4WgOW?V]sNoV>CoCgg-OzQGiʜhtVF?iLh;z`kO)z[u*4c\9z`ug7&cSs6 Oս E?Q5aݺGC-3꟪ ՝MNjOO@Nh{Bc5t|u*ۉ)zh#mê˭%cOF=z m.;c\{lu*ϭ^-`ىt5ܾF3#7oMhƀASt&6͸!0zR_~hBU߳~Ԩ+گyտWkwX#a֍Bkyc`[ݽ:iB7P7;U[^JܿZ}bnҨ} /;E?v\M{h#Y ڟQݠ=v8ա=:1)b]c>U&E)G6v\;`Št5YZug6]߯UO1Ww[vcnD11Sk9WT|kZ×_.~Tw+XI8>ޘ|IY|zwig=1>#s^XnRV3E<^b$UV7n큔.Pݧ:kaUV6 I~)yuc`6*qO)yz yFW'|T>2E 0qIr{z\#Y=wpuƘ5 uP;`?pEˎFInX}jRݿzL]T_1gT^[jV~p}*/^G_,Wtϛ~ A{"USzqmozxk'>H 7jK}EƘ 7SGWgo/` 5#.>ګҭάQ}Ҝ֕mnNu<'FSFWY,.^S=zیi=Ql&]1mOY}OՃs WTO7cȞW}ye`VO`hW$ߜ퍉?h u׫VӍIb66s=Vdu]qsj#ɮj&,߃T7n^tue p`}[voL ?x/> l.^WƤ7J u5*ӽxջ}귫g4hmT~ʾ73@`i^]\X{e?޵`޶Wiʾ7VOT:ʾT_}x9aV=1h%l=ToL*;teWVG/+()DusEvT~lc\M z~\zʾS}ǮOkӘ쟾^Q,>A߯VGUKI3Xr{L&o7_~iczE;Wż~+~`ue }?UOZz[*W\VP;]L{\K nDtXUS=ee۱2oU5VuʦIaGq[} IDATVZ]S}Ɏ1sLn X#s 猉Wv3>u|c73(`KF;oX`3gjS#F͍x?VэdV꽍j~vuelT_pe߭W]R\0m$\\yɱw|_+MViתnb\#KW./` {7cV d}ٍg,/` xU +WYkEm՟U˾6#(`CO56_n>U}ʾUoI"ؗJ~k '|}@Fk/4:\TZ[VPRܦz}GUVdH#{HU@0*` (CQ"(AE1, E (E#`:}Zw圽&s?Lf9;#𹻉Y$I$IZ)1c_j'I$IR'z 7_U>_vzb}*5I$I:IcO=!9q$1T=J͞$I$u!d7:ZZMym\ ,kq.I$IZnZ,GN.Co 5$ Ӊ9{&gzJEMM'I$IRXJm pp.T| ;VnH$I]}'`&TWӨH\ǮY>c-%I$Iz&rAu}Z"I&P &P}i$I$IjT;O~X;-lӁ!D]$I4\쑘EU^.ǯM5mn DvI$IT;;P>⡁Ed$3 #vXD$I$I`/6.F$ab۩~ I$Ij`}gctuI 7$Iw~cʯS}%-׋5~j }I$IR'm, ߘGR^b]?=1I$IRč}7;Z\^~jVM%I$Ib>q_2kwUs?p!۷7^m uh'&$I$I#&vx->`P]nRG^IyV +nJ5 GoVM!I${'!~VvAc qa9'? 4Y$I$Ijg!.Y$j9";I$Ij6Pȼ&~]':(55I~9U>$I$=:/AGٙ+id-oÀKkN$I$ur|UOlD- uѥ6$/< ZC?X$IO~c~v%=|`zN4^;;".p6r p!p=sV(I$Ih'6 xX8sq-X9!<&au} eV(I$IR{ 8HH:p'Az?I bJQ$I$/~vB5tbQ?pdV M^I~F\ n=4/$I$IM5XoM/LK$)[p7q=nOL$Ilq 瘆6꒞[/p6}ryq$I$DdVvAp?Kݲ3UoV(M!s4wCDA2!+$I$I-p OĮ/IK$)C//`Rq1+;HlBFi|sA&W5ԝ-I$IQ-{5q(05-lgKX+I$I uڑ`l9EҳM#E,^D$IR[x pVv{@oHʱ+qxqv$C[ŶF&$I$~P; X5%,ÈA|Hx$I$Xf`,Bk8>8!+$I$uxYb31i$DOA`D$I$S6*Ӏu`Z"Iu's$I$I-=V&g]֦~gO׌'g$ITO;gڦ.uźAjd T}]T%I$I-M,%uoGr"I$IĮowfyl8|(nE$IR='of6:~6'4(skQ?U9[$I)&{6D]ĵi`R'&I$Imf6qs7/;Bl < Dbckjr2Kf9f0%uͩ:Ԇ"[I$IRDLq 4V+-^bI/7$I$ukkj:ی$vjګ;b/W$I9z=K/$e pI[BI$I^$`3` a-] ##Z^$Iש{Mt݁ `MZ"i鼂^  ͉#I$IRS{BIJ73q=nOL$Ijd6q6/;bx9ՖN"&J"I$)|0+;H{ B]%v9z17;H^[jS3rI$I gHk  غ6 J%I$IjЎ W7򱒚gp!['Or*9$I<~O#Zk}`r < )-x'g0'zOKiܒ$IN%r<ʴDJ'Wt$I$)8`u` [WHan3,}Ν&I$Ic+b]?sM%I.k.h^J"I$IZc/}55Iʲ)0xw9 \J$IԾݱR+-G4q.f$gY"I$I]bE`vCDWđTQ m:Ԝ-I$I4_ < GRMF۱I$I 8xsv0x jop7ьG$I wv.7x/;zLz n<.'R; Ւ$I2x艽Fۤ%e,ຆe)$I$&4,-w "uՓd) lPE$IҢO@fe l~L %5C|Oe_N$I$uIr,u)R<%I$Ij#[s4?$g:4iLβ]LLzrA$I> lD ĽTf[y5q8*;H%L~b\$I$XT[ h$uDv`rp4.$I$-BpW::i!݀S7E$I`r>pԽJmHE$IN0 x U9< (+Z5n`OL$I6ٻm|(i^^^4(b ,$IZk, C.Ӂim!mkc2P$I$iDOQ/cȬ@|8O %I$I`bAZ$` jk80O.Fő$I"Qfe"pC";׋A?%lPMw$I${= JR-I\j$I$i &nei/yii^V^%gie[kO8$I̅tkR LMKNB |Ek6)$I$IjD?GM$K.35~\ lH$I.%yAZd9 E?c!ii6I;39K+}F$I{{v6ѿ.bL׋-h֢ZhR#K$I6Υ}pvZ"Iu3\;I$IH,R;tk8~fVI$IsKܟ~>#'Do~A Q8 5pdR&I$IZa'vAIʲ,p3t7g$Iv1O\u*9Ԏzq ;;nOLr{D/I$X59c^`r9:0%+:b ZlC=N$I$u˯{;xKs[ LCݞ$I$= Լ `p1p~C')x^rI$IKo>D-q 0ԦaGKf47;[RJmubPĀ(I$I:׉oR[%/$C[mIOF&$IZꦅt)Ք&I/l-(zqlv4{TÒnȉ$I$IRKlHpR JK$)Jj%H$I&%$;:< 쒜ER~|xR$IR'geQm 'pNճ!shP,0|൸V$IRN,!k+QaY:FxlfrlÈmd$I$-5K=qSY$wIu$Iΰ`r>o"`BV(Iz_=5I$IR|)< H)UAo T$I$Ig`'-΋#)Wc?+f$I$>EQIݧx ޥ6x8جA$I:.%yAjdAT[ uF qxvƉD`Rs$I$D zk9%vԽ#zb&=1I$Imo6T!UFQF60;H6NVӨ3 ǧg$I lDmc8p4p1\b_=5u;Xl, dǖZ.$I$jgJI$.^LK$I$j-H\HhA|lE$I<{~ uwmh--2vp;phV(I$IZ`e?DOR3R7 𹳀BI$I6҆ژ,R569K >P'&I$֎"&j X39ڔDu,$I|sgeQG:}GҨY K~l_j`6I$IRg>Ĥn J,NvI$IDcc^v6Ճw%g(Z!9K X59$I\HV"1}`byx\Hf>  IDAT}B:qK$I:7IeT zbP-'&I$V6NޜN4n2ht3);H$In3bӄ hCZo^-ՉŌ jkN+C 'e$I$KL-5{bRwj(>B9*I$IRR~|'14XV(풳Iă$I$uUo] | #8jHn4nI$IR'X]r&#u㨆 I$IJ5Jv61F)b$́ q%I$ 7C. ?z{"ڃ^DepESI$I3QjĤ4867K׈$I$i&󲃴{J6|$-ljkÔ$I|㳲NƔQ™dB!sb`ڨ($IRyx84;!D?8Zˉg #wRjwou$I7o2[jKI$IjIfFA:QF|3tSFД}$I$# gv+Չ{W k$u۩6 I$Ijw#c?ږBp'*5vCݞ$I$Q7YV|M 6O'g$I$ Ƈ." ˉ#&v=ux$I$ƞؗB;Iݩx)CsӁrI$I`GgÁ'Is990>;$I | =;_v7'fr'׋SF6)139K'+pAvI$IR- /N'?GR a %I$IR &zbőTߥ(up#GR&I$IB <lEZR7׊Y:TVZ$I&Ҵ;E`Las4ԝ-I$Ijw+JBIs{i$I$=˳t-'sHufp?,$IRpdriI-p|q{,j׋/e8ءnR۔X\N}ݐ$I$ &zb?ʋ#ڞl\P'I$i!f yALj0`M~E$IȬ RN^Sjo{ºfC\/f߇u$Iv78 R[KR&I='[3I$IzNHc ~UA0h80ء$I$-#Et3HM2>3Dc'z[riIlJ,}G9\J$I&M5H|'o`BZ"IuUa+d$I$UG4r #ItyH|ݿM4%I$Ij?+b`ր)$ūzzۖӸ%I$IHbẋj2I~TI$IRioHj?zsܹE$Ij[;;QubwsMĽ1i׺b Rm}>$I$c񋩆엖HR|xjuW-2I$IG;t ǯ.zHbRҤr>$I|YAA1a`d8 X.)S;C\/.64ڐ$I$m> T$uMھԆdI$I2yAd`״,lMnbw@I$IƅtF˯À/i.S'Fu8w4{$I$!}őT7S ;I$I"h~xv.62G`( F5gfrn ,$IR;Bw/DJ,'$I$IM3艭YjbIkp?%I$IjP8sZn`, C óH$IJBW nwJ_spA$I$5Į#zb'HNOϭ< B`'I$CM%| Aias;.p"I${ͲH5q&Og`p)MZ8^DJrѣ~X\p+I$Ijw}7(r`PjDOlB:{b$IRMÉ3Iz>߁esH$I6 RƕC~`˴D0:"%A,PC&j`D$I$5d`d9>=HRlDL %I$\HWO{C8H0uqG:I$ian~DP{p7#A؅j!4nI$IR'8 t4T=K͞$Idsij(Ө3ن7;+$I$ V-+O}wfT 〓Nn$I ztkIIu K$$I.#4>|かi`=.9$I$mJ,[v %F#zPW4ԝ-I$Ijw;wcJЬPjaEsZ/)I$Imb;l z^[j8_=+[{`rrI$nnn^Dj#=?%>'Π9^D!A5b;\$I$5C/n9I>HR f  ^H$IjyAH*,z~#v  ;H$IȬ R |9ze-EsHmZ~ܡN$IV";qY$ۈg$IT{GwcUo}ѹqԅ,Z>~$I$v~6{:q(ЮSA\/^Dj#[_+#$I$Il㋉؟g3$uCDaߤ:m$I$LL^·`<.k/=?&g$I$u?P;X+'y#їZ@,qK$I:çg^`Db`|`j$I&>C,Zm$y-E%f$I2-G "uiTlC<$ [Z7^") OI$I$IR vؗj;$u55Ծ\lH$I8>;TN",H$IгHPTSvvLKd׋A&ȶxGQ $I$] <)#I<ԆQ]+$I$%M%}^v-!WV W^H~#~@$IZR.XF?{IK\H'5HρqZI$I`ÆljқI1ۀ.VY$In p p^v-`x9|X!/:4 93hpJVI$)O"u {&>p01$6&e:X/>?\$I= 1m์mq&zbW5I$Ib X5\HY v;(;$I$ ^p~5[1'^B,{I 8K$Ip}'vMNI5K A)1 `s!B;I$I-6ehxX#B0zh'5J$I A.6=Ԇo Ub;z !ip}><,_j$I$_Jmp/>O\$=1I$f DM1YA$rb:I$u5`J۔IP·we$I: :vM΢1'gQs  |w$IRr!TO 5&wQ zx)[jN$I$uݫjInm]j$I&';joq*gM,jl_E$Ij^`zؤJ,|}xYRz)x]Bہs)$I$f3ۥ%T+g_n] \H$ICH쑘Ea2pcymE͵ u$I$)RۖuIn~DnI$IR{0vHK$.V%6$I$-w|=%č)Y$x`\^I$.vH!iVڍ;^O\/fIZ4}ρKmwON$IV^TOp(V`R{p'p4M$Ilbռ jmğI [fx`Q$I"iL'z"[ sNy-|ITS]P'I$Ijw[PʾmlHR]zk=1I$i;, N1WZ+wYZ'S^D$Ij끗dFu..zqF >2qJ$I&ST%u݈S)_sv$I$_F&gQ{F4Yz4D$I=Dˀ)$ Ӹ%I$InpqJm74%><L.5{b$ITJ Y^?^A\/&g$I$ !CD`zV(I086uJ"I$IcZݦ^I~L %f}?6HI$I$.%yA4hz cj}IY$^H$IK9$5$lO Z׋+:l kR%I$I*~ 9q$8ܡN$I*fYDi^ ܁Ӻp}t`Dr 50`($IO@fe;{1#߁s!sMRer7JXY$I$Ijb8|%I!bA58 3~Y$I6 J1xN$=iTjf&gQ^sY$I6D` Zn?ZfS^h %vPw"$I$E>XjI18FH#HH(!# HE)Р ^PX FE`CPpE+ҤDz$dg<3I~뼅xTCݞ$I?p?Nv՞ "p/nH$IT7S#OgđTmn,NF%$I$/lp3 8jx#q"]s[F K%I$Ii)_ۀ;&fQ} 6/ײYg"pYW$I$!vDҀ[0 SjÁ bJGTÿҹ$I$٭'y/qĆ2t-1> *5{b$Ij o ܖCp:p4erIWJu$IR]LDR {6:X|Cs*TӈmJ$I>0u3瀫pdm \ UG'6$IĤyAT  DHZ19cĿ!J"I$Ij3bLVCmCb b v,5$IfxtC}Ô4hb|X(5{b$I6E Vڙ{Ebwn b $3PXJ$If㲃HN=ìEs5xZN4I5>xmJ"I$I($`r1x&_h}mH$ID,,'d$I$i C.5Md$'1RkI$I7.'wꃭ(K9b8:D$IRM쬼rvXbnY$ߚ|b̸69$I0uHvua }$}sI Nl67 $I$Iͪ 88`8 '`Pja8ppq%D[BI$I}a6a^v`WÒH#{UH$I ѳì jo1^\;+1Y$OtCH$I'x]lPjeIq}Cݞ$I 鴸n$N2T\Su-_;KMH$IYW; fT+[qĎ+d$I$x^Q9q$D3}`R[{|nB'I&2Dw.!`,1j'YT?+ӒH$I[ϺZ/S`Rs7nI$IRJ$Iz9ӀÀ|x$-T]V+1$I46cvInbx|nw6bIj"C7R"&I$I~C< u^jP'IڙMiDMe=`>,oy,$Ij]=ϱH9x1eo9t~]`~K'n,NY$I$I*5嵤5x= .~ J$IzÁG;JL Q_{58p*q$I$Ih$!ջ}+b.6I$C'ppj K.$IԴJ,zX+9HÁN5`XrI$IFT^j*I*b|xQj$If UOlRI,ͧ$IwDRSh LJ"I4>A$}ڀ=dz&O+>Gcp'p9ʬP$I$R?0G-{KV(I9p ph 01+$IZlbRȼ jz^oM 0ت\^{|Z;yq$I"=YA$bǿoJC}d9!T;oYj-I$Ijvcljݏ7Ix1><Yj$Iԯv\D 1,[S&rL΢ 3&I$I8<~ IDAT/o&1^l&"M b"Ĥ"Ik>rF`D$I$mogÓ2I+j*%$I$-ặYԷ\H5(9$I$I*!N&OcJLK$I$IR߸x $TsɎ,.=-$I$-!z F%eQV)ǧ/EhH!I= jo=bXU'.灵ϗT_CYiuo"zu$I$T|eK%Q-T %Ie6sü N#~nK"&f$I4x>DR!Ƌ !fIq;1>Ps$I$5wϻ]I /;FH$iNLg,g+'g#bxXT'I$IZ\ _i]H)m |x}_x8xEV(I$Izip6pyCZf)$E1lR < J%I&6 8 88;EXふhM;K}mCA $It'&ɋ3/d&OZ{$,b|0܍[$Iϼ=G:5)b|xXI$IOJ΢%"rmE؆{e$I$)$͟S6/{MNju.nDj~J"I$Izo(v7&MN+DJ`ͬP$Ij%lAv$Izs!2m xkv-偛˵~ruv1H$IK6W^A$A9Di`R)IyqeC$I$]N39ZӅu2Y$I$I)zl\}_յ#6GVj#rI$I[,\s%.`bLXj{#L%I$ILnh3P}>RW&gQiXH !H$ImKAyq$5p/0Pwѭ$I$5~ jZ"Iu4x#4I$ 7zH (+x8ܷ\㓳H/6A$IT[.Ze!]-֮k}L4.%:.K_\\#$I$IkggjNMI$.ځ=S+7, SerI$lCj:(urV6jgYiox}YA$ITK;;Hxъ(_W%@'őTST$I$Y^E57м8jgC $I~5DB9\Hfq8Y$I$IjF;qSj^IR}\|ܷ_e$I$M8(|`RV(I18~pp9%IP`,d6"NqV7\mA0 ծa$IAA1^l-_ہܘGRMB>nܒ$Ifxb^I55'R'&Ifoe jk(0| X&/$Ij`>DR!ƋAjd81b dY$h$[s[2I$I[cOQm><0=+ژv $Iҩ  ~no&OH"-X tƑ$IR"IZ\.[1@[y} bR$$=TG^j@gZ"I$-db热HK`<`$5(YF~ E$IbA3>T'RIjMx'.$I$5w.bqNi$bCۨJ$I#ÉJV3a'gTճH$I4 F׻Pm´{Z"Iub|3..$I$5ef&i$Q b|nOL$)At"U3,o3q`,$IX>'lKzyGqAĤSHw,Pjcjm~$p `P$I$^{xqZֶ-m`r"qyTI$i& "-R GH$i`'z H9x17;Hj&cG%TW`Rs7nI$IR3jV(P}qrZ"Iuu.1>P:1nH$I[8 DqtV(I1†Os_%I$iN,5qZm7039WOLTI$I* #zKɉ"ƦutK$IlV)_ۀ_ϻ7TWSܿԆő$IԌ"v); 4 8\H}apшdKI$IO#Jm$ZY$p-Y$I$IQ9`R[X9+3f$I> \DC7>OOh+'PMґ$Is/n(g RSaq7M#PG$I^YsǩHܵcħ$Il ׼ R?8X< le0.,RJ2+I3" -Xj$A3}D1I$I$I3z8 ('i^ Sj[MI>7] R?x50.9`0D֍48}gY$IeT{^AZ(1F/ +-FqK$Ix`z[*IK$Nw]fOL$I" L,G'fiv.`72$I$#Km@GR&I6ppFo>L %I$IR/ <,C݈% /G=*I$I$5."s4!&2+5Iy[M"I5XO,V&ƋU.b̆8tӻ<-zo;qK$I0`z$&iA۠I$-7;ܖCo[%gTcrH$'" b2UOoR{%p LV(I1xp7K$IԌ%N`_L+bIp6YF/NI$Idfe TE38hb4ؽ 7q2$I $-.O%?'K+'1Rk$I$IͧؼkrH5g1R$I"mL,8:;4W/)TGL" Eg$IR G\"I/e21^D/F쬻8ډgBINV q2ݽ@WR.I$Izcp=1gRF,JRm %N]jo:xB$IR;h.<lY\J Ę~rI$IZzS󭛖HR]#444/$I$IKm3 N<>LIzv=1Ib0#;4ڀs=jL'iQ#7#H$iX|!I/ewb';ˉS-9HRl\\&f$I$P{ ZNI5]`r b$KIf"%{` jkc`z4GbI$-DdVvI7/fblP^xXHR]㟥\nܒ$IfNy wZ"Iuu177{b$eN6z&T~祿\&g2 n$&E$Io߁HOAT#)*`Dd2!҆wo$$I$^<< )퉾$\Sۀ__e$I2DJ:` 56Xl L"epjrI$Ib#D<6'ۘ}x I$ItQm&x=?-ڍ'gyB$IԂ:!5Yh9`NN"e[8~-\#I$IR$&Q%QOav[P$I$DO쵥6HIƄ[BI$ wGgjDa>qb$?]Y$Ih'I¼/>w'0<+ZJ5/K$ILoN#ƈ7IAi6g^vFwWP5$iQ>E[NrI$-|}۬ jo1^~5/4͉#Fڀ݀*NqB$IԜځ%zbZpn4%)6{#p Rx'.$I#λ*+ ޝ.&O[&gخfQ$~.#6L*;{;1^Dj`zcb#Y$g6\Ԝ<$I$Ij6mJ~Wy`Pjq'\Z{b$IR C<%C7039Tg'&e$I$I9RۖjRֶ.rEL N4$I$5sy`%N xEZ"Iu p5pXE?sI$iP \< 쓜%2Z>9TW F璳H$ڨ>3^eр  R^eT]$[E#&u&$I$7VEVj#Iہ=tƦ$IҠ3XJ,0k.&H$I^ +EnoIR[1ⶆڰ,$I$-qTϴϻ?$IR.͆=1I$i Nt|X?9@&FPN1pxbIV#30);ڛAfQ} -!9q$V<"S$I$Ij6KľPI+ڈV~l]jk"J$lf^v m BL>$I$X^w&IR}zb;'I$IMF ߖk,`p1f'g$I$I}k,RHR~ tکBI$IK; 66X!+Z VCn)$IRKLDR͡:A\+֧đT3MĄC͈u$$I$7#zbZ;05-:n&ڞD/x+)$I\H'mݥrʵAriBY[$IA$xqav :qU"+fT[_!ƈ u$If3 |=JRmD~MN$IR dĮwS)J70394ؼxuJrI$I40>GFT$8qlFe$I$'zbSȋ#f6'X>I$II8^:؉B^jI"I$I0' 1!iu06)5w$I$5~$p3$5:=Zj$IRlCѐ?ڀOoA)I/e]D˝H$ ""#ƋdQ˙E дDd>K SI$I{&zbOKmq$M>|=f)$IRӛM<" r"]'gYZ1aBO?i,%Id>kVvI7/fQim] %%:[ x7K$IP1poJ"IuZjChy$)~ A;U^LK̲4P=L" vv"I488;۟/2я{g %6!&Jb;p$I$9mL5GG;X6-:D!-ppV(I$I/m"A?-, IkXy-|IDAT[$I$5l |jaxꔤk z?n[K$IT[WRmZബ@jk(p?rCݞ$IT#RYV$g$IjfI.RŸFi#z74_GR m|ؽOA,I$IL>Nu☼8jf5cy 3$I$Iͨ{?{bzK,=w>J$9A A<%M!rv3H |G$llCRK_8ܷ~lIR}D} ͋#I$IRY8x{ |X/+:'e$Ih:`rILxIڀ O"I$Igw6hOK$NF't g?ZX'I$IRy UOlRxj?&6$I-f(0ꢝ8> bA]tR\Ryq:I +'f$Ijo"!ƋsHKcp(q}f] %F"&OC%I$Idp4ہ2I|'vfu]i$IҀMDQsLrIpp?^rI0dVvI7/f8qg 9q$L;qO5Kmd+$I$Ii5Y'vZuPIRpp 0F̫Xį$I z>ELԪ/@]R} O1$I/IC7%TW5A>>JLH&`.QF IwzvD"Mmx:` jouF`r'ITO@feT{sbnviL.>P2qz$5J,/.#-$I$IKn RO+6nR e$IKυtRxx6ϟW.IALxhO"IT_ &;;/N"%X@|vpd[c(Vf`R;8<+$I$I"7=1I=F?tWY$InSbCz3Y03ϗdNŻH$I$Q;_Km6`׬Pj3ч>,'$I$IKeI$IKOO]GRm| إO'6$I v{^I|xUrh]E$I$ RD`TOIRw=gKݸ%I$Id8|K Y$څDO $IsA$ۀ;$fTor< /Iw'ӳHb 4XTp1!kcm;:Ŗږ2S(Z2FDdIU V@8bA""TL*3L2PF"2؉^Xt<^=|9]{%F󫗮{ːD{_w~ZӚ7ffՋSW۪Uͳǜ{s~/68 07>ݴ;ݎx/frQwFfMG{PucWO.:WVc;%ˣOT[]ozLL|ݯ5PגB:Xc/T}>z8fM .Igg`YΪ>_=tt`^4_ltUWm),eZ[߰D\`/k(]2*lvWeWXU]^=n7#[~= M/DWg`oCS5OT=ݦ&_l*|U~ݸl&wS}TճW 7N[7nM A5}>^:nsЦ.χl [_ݴqqVݷzWӎUΩ~[7lZ״&qq޴[մr7_4쪞48 0.l348 O ߳êiZj |hZ<:p5!6#TTGNt35}VcJ󫣃뮏'5u EMk 'ޫ7TSAe^jǘ8Tlիی u޴&vƠ,toMk/m Xk'G{u7.]w˹y9hzQu,꯫MœFoW_YJ=A]Ot`3ٷ֚ETm.S.n&+ڮ {\Y<*tJӇ篎 wLuqӜ700OUoVx/(.ty[}::t53կU ?poٹg]_=pfAQ}iM֍[`˸WuZǣ]=T}ώls=680?G4-]Z=jp!êޝ拻[^UiZ|8ݹz]Ûykmn:Y}iMsvmM5[Z]oP&ՃW uhuW A6= pQY U7Y frDΦw^]=:dT(`izuj̚[k>_\ݦk W΁yOuASf_9_u"6?::ܶi}=pD\ݤzAӎtyOՏ Iu7}6Mw%oWݨP[ݎ9M\׫^^}|5ai{t_ӃUܻzSic3wlSWoVCn}ՉWwoO3ۚvg#Btu[uiuIӳߩɦºGfMkb7dytuWl`W5ubF_T }:i!n꺭!#=&b\,nV:>S54 ]ӽw6_j2qPu,|m]߹zj𽞰z4wӴCm[۶Kk׮ڵkt >\}̙]gkn{st`S0WriCuauW7zt:}/ c3ꕣCU'Ww8:kM7.y[]9r XSH7B: gսGfW;0 0ogf `w(GTGu֝bIm&t,B:M!ESH)`h X4t,B:M!ESH)`h X4t,B:M!E1:x@u!E9z[[3F6ë6:)5S ꁣCpnU[cSH[)CpKۍұꑣC5:hiYo^<:{B:nnx2:\X Ost`1.mu kbij!KJ!5=0.15.2', 'numpy', 'scipy', 'matplotlib>=3.3.0', 'edlib', 'pyspoa>=0.0.6', 'py-cpuinfo>=7.0.0'], scripts=['src/svim/svim']) svim-2.0.0/src/000077500000000000000000000000001406305341300132555ustar00rootroot00000000000000svim-2.0.0/src/svim/000077500000000000000000000000001406305341300142335ustar00rootroot00000000000000svim-2.0.0/src/svim/SVCandidate.py000066400000000000000000001120731406305341300167360ustar00rootroot00000000000000class Candidate: """Candidate class for structural variant candidates. Candidates reflect the final SV types and can be merged from signatures of several reads. """ def __init__(self, source_contig, source_start, source_end, members, score, std_span, std_pos, support_fraction = ".", genotype = "./.", ref_reads = None, alt_reads = None): self.source_contig = source_contig self.source_start = source_start self.source_end = source_end self.members = members self.score = score self.std_span = std_span self.std_pos = std_pos self.type = None self.support_fraction = support_fraction self.genotype = genotype self.ref_reads = ref_reads self.alt_reads = alt_reads def get_source(self): return (self.source_contig, self.source_start, self.source_end) def get_key(self): contig, start, end = self.get_source() return (self.type, contig, end) def downstream_distance_to(self, candidate2): """Return distance >= 0 between this candidate's end and the start of candidate2.""" this_contig, this_start, this_end = self.get_source() other_contig, other_start, other_end = candidate2.get_source() if self.type == candidate2.type and this_contig == other_contig: return max(0, other_start - this_end) else: return float("inf") def get_std_span(self, ndigits=2): if self.std_span: return round(self.std_span, ndigits) else: return "." def get_std_pos(self, ndigits=2): if self.std_pos: return round(self.std_pos, ndigits) else: return "." def get_bed_entry(self): return "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}".format(self.source_contig, self.source_start, self.source_end, "{0};{1};{2}".format(self.type, self.get_std_span(), self.get_std_pos()), self.score, ".", "["+"][".join([ev.as_string("|") for ev in self.members])+"]") def get_vcf_entry(self): raise NotImplementedError class CandidateDeletion(Candidate): def __init__(self, source_contig, source_start, source_end, members, score, std_span, std_pos, support_fraction = ".", genotype = "./.", ref_reads = None, alt_reads = None): self.source_contig = source_contig #0-based start of the deletion (first deleted base) self.source_start = max(0, source_start) #0-based end of the deletion (one past the last deleted base) self.source_end = source_end self.members = members self.score = score self.std_span = std_span self.std_pos = std_pos self.type = "DEL" self.support_fraction = support_fraction self.genotype = genotype self.ref_reads = ref_reads self.alt_reads = alt_reads def get_vcf_entry(self, sequence_alleles = False, reference = None, read_names = False, zmws = False): contig, start, end = self.get_source() if self.ref_reads != None and self.alt_reads != None: dp_string = str(self.ref_reads + self.alt_reads) else: dp_string = "." filters = [] if self.genotype == "0/0": filters.append("hom_ref") if sequence_alleles: ref_allele = reference.fetch(contig, max(0, start-1), end).upper() alt_allele = reference.fetch(contig, max(0, start-1), start).upper() else: ref_allele = "N" alt_allele = "<" + self.type + ">" info_template="SVTYPE={0};END={1};SVLEN={2};SUPPORT={3};STD_SPAN={4};STD_POS={5}" info_string = info_template.format(self.type, end, start - end, len(set([sig.read for sig in self.members])), self.get_std_span(), self.get_std_pos()) read_ids = [member.read for member in self.members] if read_names: info_string += ";READS={0}".format(",".join(read_ids)) if zmws: valid_pacbio_names = True zmw_list = set() for read_id in read_ids: fields = read_id.split("/") if len(fields) != 3: valid_pacbio_names = False break zmw_list.add("/".join(fields[0:2])) if valid_pacbio_names: info_string += ";ZMWS={0}".format(len(zmw_list)) return "{chrom}\t{pos}\t{id}\t{ref}\t{alt}\t{qual}\t{filter}\t{info}\t{format}\t{samples}".format( chrom=contig, pos=max(1, start), id="PLACEHOLDERFORID", ref=ref_allele, alt=alt_allele, qual=int(self.score), filter="PASS" if len(filters) == 0 else ";".join(filters), info=info_string, format="GT:DP:AD", samples="{gt}:{dp}:{ref},{alt}".format(gt=self.genotype, dp=dp_string, ref=self.ref_reads if self.ref_reads != None else ".", alt=self.alt_reads if self.alt_reads != None else ".")) class CandidateInversion(Candidate): def __init__(self, source_contig, source_start, source_end, members, score, std_span, std_pos, support_fraction = ".", genotype = "./.", ref_reads = None, alt_reads = None): self.source_contig = source_contig #0-based start of the inversion (first inverted base) self.source_start = max(0, source_start) #0-based end of the inversion (one past the last inverted base) self.source_end = source_end self.members = members self.score = score self.std_span = std_span self.std_pos = std_pos self.type = "INV" self.support_fraction = support_fraction self.genotype = genotype self.ref_reads = ref_reads self.alt_reads = alt_reads self.complement = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'} def get_vcf_entry(self, sequence_alleles = False, reference = None, read_names = False, zmws = False): contig, start, end = self.get_source() if self.ref_reads != None and self.alt_reads != None: dp_string = str(self.ref_reads + self.alt_reads) else: dp_string = "." filters = [] if self.genotype == "0/0": filters.append("hom_ref") if sequence_alleles: ref_allele = reference.fetch(contig, start, end).upper() alt_allele = "".join(self.complement.get(base.upper(), base.upper()) for base in reversed(ref_allele)) else: ref_allele = "N" alt_allele = "<" + self.type + ">" info_template="SVTYPE={0};END={1};SUPPORT={2};STD_SPAN={3};STD_POS={4}" info_string = info_template.format(self.type, end, len(set([sig.read for sig in self.members])), self.get_std_span(), self.get_std_pos()) read_ids = [member.read for member in self.members] if read_names: info_string += ";READS={0}".format(",".join(read_ids)) if zmws: valid_pacbio_names = True zmw_list = set() for read_id in read_ids: fields = read_id.split("/") if len(fields) != 3: valid_pacbio_names = False break zmw_list.add("/".join(fields[0:2])) if valid_pacbio_names: info_string += ";ZMWS={0}".format(len(zmw_list)) return "{chrom}\t{pos}\t{id}\t{ref}\t{alt}\t{qual}\t{filter}\t{info}\t{format}\t{samples}".format( chrom=contig, pos=start+1, id="PLACEHOLDERFORID", ref=ref_allele, alt=alt_allele, qual=int(self.score), filter="PASS" if len(filters) == 0 else ";".join(filters), info=info_string, format="GT:DP:AD", samples="{gt}:{dp}:{ref},{alt}".format(gt=self.genotype, dp=dp_string, ref=self.ref_reads if self.ref_reads != None else ".", alt=self.alt_reads if self.alt_reads != None else ".")) class CandidateNovelInsertion(Candidate): def __init__(self, dest_contig, dest_start, dest_end, sequence, members, score, std_span, std_pos, support_fraction = ".", genotype = "./.", ref_reads = None, alt_reads = None): self.dest_contig = dest_contig #0-based start of the insertion (base after the insertion) self.dest_start = max(0, dest_start) #0-based start of the insertion (base after the insertion) + length of the insertion self.dest_end = dest_end self.sequence = sequence self.members = members self.score = score self.std_span = std_span self.std_pos = std_pos self.type = "INS" self.support_fraction = support_fraction self.genotype = genotype self.ref_reads = ref_reads self.alt_reads = alt_reads def get_destination(self): return (self.dest_contig, self.dest_start, self.dest_end) def get_bed_entry(self): return "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}".format(self.dest_contig, self.dest_start, self.dest_end, "{0};{1};{2}".format(self.type, self.get_std_span(), self.get_std_pos()), self.score, ".", "["+"][".join([ev.as_string("|") for ev in self.members])+"]") def get_vcf_entry(self, sequence_alleles = False, reference = None, insertion_sequences = False, read_names = False, zmws = False): contig, start, end = self.get_destination() if self.ref_reads != None and self.alt_reads != None: dp_string = str(self.ref_reads + self.alt_reads) else: dp_string = "." filters = [] if self.genotype == "0/0": filters.append("hom_ref") if sequence_alleles and self.sequence != "": ref_allele = reference.fetch(contig, max(0, start-1), max(0, start-1) + 1).upper() alt_allele = ref_allele + self.sequence else: ref_allele = "N" alt_allele = "<" + self.type + ">" info_template="SVTYPE={0};END={1};SVLEN={2};SUPPORT={3};STD_SPAN={4};STD_POS={5}" info_string = info_template.format(self.type, start, end - start, len(set([sig.read for sig in self.members])), self.get_std_span(), self.get_std_pos()) if insertion_sequences: insertion_seqs = [member.sequence for member in self.members] info_string += ";SEQS={0}".format(",".join(insertion_seqs)) read_ids = [member.read for member in self.members] if read_names: info_string += ";READS={0}".format(",".join(read_ids)) if zmws: valid_pacbio_names = True zmw_list = set() for read_id in read_ids: fields = read_id.split("/") if len(fields) != 3: valid_pacbio_names = False break zmw_list.add("/".join(fields[0:2])) if valid_pacbio_names: info_string += ";ZMWS={0}".format(len(zmw_list)) return "{chrom}\t{pos}\t{id}\t{ref}\t{alt}\t{qual}\t{filter}\t{info}\t{format}\t{samples}".format( chrom=contig, pos=max(1, start), id="PLACEHOLDERFORID", ref=ref_allele, alt=alt_allele, qual=int(self.score), filter="PASS" if len(filters) == 0 else ";".join(filters), info=info_string, format="GT:DP:AD", samples="{gt}:{dp}:{ref},{alt}".format(gt=self.genotype, dp=dp_string, ref=self.ref_reads if self.ref_reads != None else ".", alt=self.alt_reads if self.alt_reads != None else ".")) class CandidateDuplicationTandem(Candidate): def __init__(self, source_contig, source_start, source_end, copies, fully_covered, members, score, std_span, std_pos, support_fraction = ".", genotype = "./.", ref_reads = None, alt_reads = None): self.source_contig = source_contig #0-based start of the region (first copied base) self.source_start = max(0, source_start) #0-based end of the region (one past the last copied base) self.source_end = source_end #number of copies inserted after end of the region (in tandem) self.copies = copies self.members = members self.score = score self.std_span = std_span self.std_pos = std_pos self.type = "DUP_TAN" self.support_fraction = support_fraction self.genotype = genotype self.ref_reads = ref_reads self.alt_reads = alt_reads self.fully_covered = fully_covered def get_destination(self): source_contig, source_start, source_end = self.get_source() return (source_contig, source_end, source_end + self.copies * (source_end - source_start)) def get_bed_entries(self, sep="\t"): source_contig, source_start, source_end = self.get_source() dest_contig, dest_start, dest_end = self.get_destination() source_entry = sep.join(["{0}", "{1}", "{2}", "{3}", "{4}", "{5}", "{6}"]).format(source_contig, source_start, source_end, "tan_dup_source;>{0}:{1}-{2};{3};{4}".format( dest_contig, dest_start, dest_end, self.get_std_span(), self.get_std_pos()), self.score, ".", "[" + "][".join( [ev.as_string("|") for ev in self.members]) + "]") dest_entry = sep.join(["{0}", "{1}", "{2}", "{3}", "{4}", "{5}", "{6}"]).format(dest_contig, dest_start, dest_end, "tan_dup_dest;<{0}:{1}-{2};{3};{4}".format( source_contig, source_start, source_end, self.get_std_span(), self.get_std_pos()), self.score, ".", "[" + "][".join( [ev.as_string("|") for ev in self.members]) + "]") return (source_entry, dest_entry) def get_vcf_entry_as_ins(self, sequence_alleles = False, reference = None, read_names = False, zmws = False): source_contig, source_start, source_end = self.get_source() dest_contig, dest_start, dest_end = self.get_destination() svtype = "INS" if self.ref_reads != None and self.alt_reads != None: dp_string = str(self.ref_reads + self.alt_reads) else: dp_string = "." filters = [] if self.genotype == "0/0": filters.append("hom_ref") if not self.fully_covered: filters.append("not_fully_covered") if sequence_alleles: ref_allele = reference.fetch(source_contig, source_start, source_end).upper() alt_allele = ref_allele * (self.copies + 1) else: ref_allele = "N" alt_allele = "<" + self.type + ">" info_template="SVTYPE={0};END={1};SVLEN={2};SUPPORT={3};STD_SPAN={4};STD_POS={5}" info_string = info_template.format(svtype, source_end, dest_end - dest_start, len(set([sig.read for sig in self.members])), self.get_std_span(), self.get_std_pos()) read_ids = [member.read for member in self.members] if read_names: info_string += ";READS={0}".format(",".join(read_ids)) if zmws: valid_pacbio_names = True zmw_list = set() for read_id in read_ids: fields = read_id.split("/") if len(fields) != 3: valid_pacbio_names = False break zmw_list.add("/".join(fields[0:2])) if valid_pacbio_names: info_string += ";ZMWS={0}".format(len(zmw_list)) return "{chrom}\t{pos}\t{id}\t{ref}\t{alt}\t{qual}\t{filter}\t{info}\t{format}\t{samples}".format( chrom=source_contig, pos=source_start + 1, id="PLACEHOLDERFORID", ref=ref_allele, alt=alt_allele, qual=int(self.score), filter="PASS" if len(filters) == 0 else ";".join(filters), info=info_string, format="GT:DP:AD", samples="{gt}:{dp}:{ref},{alt}".format(gt=self.genotype, dp=dp_string, ref=self.ref_reads if self.ref_reads != None else ".", alt=self.alt_reads if self.alt_reads != None else ".")) def get_vcf_entry_as_dup(self, read_names = False, zmws = False): contig = self.source_contig start = self.source_start end = self.source_end length = self.source_end - self.source_start svtype = "DUP:TANDEM" if self.ref_reads != None and self.alt_reads != None: dp_string = str(self.ref_reads + self.alt_reads) else: dp_string = "." filters = [] if self.genotype == "0/0": filters.append("hom_ref") if not(self.fully_covered): filters.append("not_fully_covered") info_template="SVTYPE={0};END={1};SVLEN={2};SUPPORT={3};STD_SPAN={4};STD_POS={5}" info_string = info_template.format(svtype, end, length, len(set([sig.read for sig in self.members])), self.get_std_span(), self.get_std_pos()) read_ids = [member.read for member in self.members] if read_names: info_string += ";READS={0}".format(",".join(read_ids)) if zmws: valid_pacbio_names = True zmw_list = set() for read_id in read_ids: fields = read_id.split("/") if len(fields) != 3: valid_pacbio_names = False break zmw_list.add("/".join(fields[0:2])) if valid_pacbio_names: info_string += ";ZMWS={0}".format(len(zmw_list)) return "{chrom}\t{pos}\t{id}\t{ref}\t{alt}\t{qual}\t{filter}\t{info}\t{format}\t{samples}".format( chrom=contig, pos=start+1, id="PLACEHOLDERFORID", ref="N", alt="<" + svtype + ">", qual=int(self.score), filter="PASS" if len(filters) == 0 else ";".join(filters), info=info_string, format="GT:CN:DP:AD", samples="{gt}:{cn}:{dp}:{ref},{alt}".format(gt=self.genotype, cn=self.copies + 1, dp=dp_string, ref=self.ref_reads if self.ref_reads != None else ".", alt=self.alt_reads if self.alt_reads != None else ".")) class CandidateDuplicationInterspersed(Candidate): def __init__(self, source_contig, source_start, source_end, dest_contig, dest_start, dest_end, members, score, std_span, std_pos, cutpaste=False, support_fraction = ".", genotype = "./.", ref_reads = None, alt_reads = None): self.source_contig = source_contig #0-based start of the region (first copied base) self.source_start = max(0, source_start) #0-based end of the region (one past the last copied base) self.source_end = source_end self.dest_contig = dest_contig #0-based start of the insertion (base after the insertion) self.dest_start = max(0, dest_start) #0-based end of the insertion (base after the insertion) + length of the insertion self.dest_end = dest_end self.members = members self.score = score self.std_span = std_span self.std_pos = std_pos self.cutpaste= cutpaste self.type = "DUP_INT" self.support_fraction = support_fraction self.genotype = genotype self.ref_reads = ref_reads self.alt_reads = alt_reads def get_destination(self): return (self.dest_contig, self.dest_start, self.dest_end) def get_bed_entries(self, sep="\t"): source_contig, source_start, source_end = self.get_source() dest_contig, dest_start, dest_end = self.get_destination() source_entry = sep.join(["{0}", "{1}", "{2}", "{3}", "{4}", "{5}", "{6}"]).format(source_contig, source_start, source_end, "int_dup_source;>{0}:{1}-{2};{3};{4}".format( dest_contig, dest_start, dest_end, self.get_std_span(), self.get_std_pos()), self.score, "origin potentially deleted" if self.cutpaste else ".", "[" + "][".join( [ev.as_string("|") for ev in self.members]) + "]") dest_entry = sep.join(["{0}", "{1}", "{2}", "{3}", "{4}", "{5}", "{6}"]).format(dest_contig, dest_start, dest_end, "int_dup_dest;<{0}:{1}-{2};{3};{4}".format( source_contig, source_start, source_end, self.get_std_span(), self.get_std_pos()), self.score, "origin potentially deleted" if self.cutpaste else ".", "[" + "][".join( [ev.as_string("|") for ev in self.members]) + "]") return (source_entry, dest_entry) def get_vcf_entry_as_ins(self, sequence_alleles = False, reference = None, read_names = False, zmws = False): source_contig, source_start, source_end = self.get_source() dest_contig, dest_start, dest_end = self.get_destination() svtype = "INS" if self.ref_reads != None and self.alt_reads != None: dp_string = str(self.ref_reads + self.alt_reads) else: dp_string = "." filters = [] if self.genotype == "0/0": filters.append("hom_ref") if sequence_alleles: ref_allele = reference.fetch(dest_contig, max(0, dest_start-1), max(0, dest_start-1) + 1).upper() alt_allele = ref_allele + reference.fetch(source_contig, source_start, source_end).upper() else: ref_allele = "N" alt_allele = "<" + self.type + ">" info_template="SVTYPE={0};{1}END={2};SVLEN={3};SUPPORT={4};STD_SPAN={5};STD_POS={6}" info_string = info_template.format(svtype, "CUTPASTE;" if self.cutpaste else "", dest_start, dest_end - dest_start, len(set([sig.read for sig in self.members])), self.get_std_span(), self.get_std_pos()) read_ids = [member.read for member in self.members] if read_names: info_string += ";READS={0}".format(",".join(read_ids)) if zmws: valid_pacbio_names = True zmw_list = set() for read_id in read_ids: fields = read_id.split("/") if len(fields) != 3: valid_pacbio_names = False break zmw_list.add("/".join(fields[0:2])) if valid_pacbio_names: info_string += ";ZMWS={0}".format(len(zmw_list)) return "{chrom}\t{pos}\t{id}\t{ref}\t{alt}\t{qual}\t{filter}\t{info}\t{format}\t{samples}".format( chrom=dest_contig, pos=max(1, dest_start), id="PLACEHOLDERFORID", ref=ref_allele, alt=alt_allele, qual=int(self.score), filter="PASS" if len(filters) == 0 else ";".join(filters), info=info_string, format="GT:DP:AD", samples="{gt}:{dp}:{ref},{alt}".format(gt=self.genotype, dp=dp_string, ref=self.ref_reads if self.ref_reads != None else ".", alt=self.alt_reads if self.alt_reads != None else ".")) def get_vcf_entry_as_dup(self, read_names = False, zmws = False): contig, start, end = self.get_source() svtype = "DUP:INT" if self.ref_reads != None and self.alt_reads != None: dp_string = str(self.ref_reads + self.alt_reads) else: dp_string = "." filters = [] if self.genotype == "0/0": filters.append("hom_ref") info_template="SVTYPE={0};{1}END={2};SVLEN={3};SUPPORT={4};STD_SPAN={5};STD_POS={6}" info_string = info_template.format(svtype, "CUTPASTE;" if self.cutpaste else "", end, end - start, len(set([sig.read for sig in self.members])), self.get_std_span(), self.get_std_pos()) read_ids = [member.read for member in self.members] if read_names: info_string += ";READS={0}".format(",".join(read_ids)) if zmws: valid_pacbio_names = True zmw_list = set() for read_id in read_ids: fields = read_id.split("/") if len(fields) != 3: valid_pacbio_names = False break zmw_list.add("/".join(fields[0:2])) if valid_pacbio_names: info_string += ";ZMWS={0}".format(len(zmw_list)) return "{chrom}\t{pos}\t{id}\t{ref}\t{alt}\t{qual}\t{filter}\t{info}\t{format}\t{samples}".format( chrom=contig, pos=start+1, id="PLACEHOLDERFORID", ref="N", alt="<" + svtype + ">", qual=int(self.score), filter="PASS" if len(filters) == 0 else ";".join(filters), info=info_string, format="GT:DP:AD", samples="{gt}:{dp}:{ref},{alt}".format(gt=self.genotype, dp=dp_string, ref=self.ref_reads if self.ref_reads != None else ".", alt=self.alt_reads if self.alt_reads != None else ".")) class CandidateBreakend(Candidate): def __init__(self, source_contig, source_start, source_direction, dest_contig, dest_start, dest_direction, members, score, std_pos1, std_pos2, support_fraction = ".", genotype = "./.", ref_reads = None, alt_reads = None): self.source_contig = source_contig #0-based source of the translocation (first base before the translocation) self.source_start = max(0, source_start) self.source_direction = source_direction self.dest_contig = dest_contig #0-based destination of the translocation (first base after the translocation) self.dest_start = max(0, dest_start) self.dest_direction = dest_direction self.members = members self.score = score self.std_pos1 = std_pos1 self.std_pos2 = std_pos2 self.type = "BND" self.support_fraction = support_fraction self.genotype = genotype self.ref_reads = ref_reads self.alt_reads = alt_reads def get_source(self): return (self.source_contig, self.source_start) def get_destination(self): return (self.dest_contig, self.dest_start) def get_std_pos1(self, ndigits=2): if self.std_pos1: return round(self.std_pos1, ndigits) else: return "." def get_std_pos2(self, ndigits=2): if self.std_pos2: return round(self.std_pos2, ndigits) else: return "." def get_bed_entries(self, sep="\t"): source_contig, source_start = self.get_source() dest_contig, dest_start = self.get_destination() source_entry = sep.join(["{0}", "{1}", "{2}", "{3}", "{4}", "{5}"]).format(source_contig, source_start, source_start + 1, "bnd;>{0}:{1};{2};{3}".format( dest_contig, dest_start, self.get_std_pos1(), self.get_std_pos2()), self.score, "[" + "][".join( [ev.as_string("|") for ev in self.members]) + "]") dest_entry = sep.join(["{0}", "{1}", "{2}", "{3}", "{4}", "{5}"]).format(dest_contig, dest_start, dest_start + 1, "bnd;<{0}:{1};{2};{3}".format( source_contig, source_start, self.get_std_pos1(), self.get_std_pos2()), self.score, "[" + "][".join( [ev.as_string("|") for ev in self.members]) + "]") return (source_entry, dest_entry) def get_vcf_entry(self, read_names = False, zmws = False): source_contig, source_start = self.get_source() dest_contig, dest_start = self.get_destination() if (self.source_direction == 'fwd') and (self.dest_direction == 'fwd'): alt_string = "N[{contig}:{start}[".format(contig = dest_contig, start = dest_start+1) elif (self.source_direction == 'fwd') and (self.dest_direction == 'rev'): alt_string = "N]{contig}:{start}]".format(contig = dest_contig, start = dest_start+1) elif (self.source_direction == 'rev') and (self.dest_direction == 'rev'): alt_string = "]{contig}:{start}]N".format(contig = dest_contig, start = dest_start+1) elif (self.source_direction == 'rev') and (self.dest_direction == 'fwd'): alt_string = "[{contig}:{start}[N".format(contig = dest_contig, start = dest_start+1) if self.ref_reads != None and self.alt_reads != None: dp_string = str(self.ref_reads + self.alt_reads) else: dp_string = "." filters = [] if self.genotype == "0/0": filters.append("hom_ref") info_template="SVTYPE={0};SUPPORT={1};STD_POS1={2};STD_POS2={3}" info_string = info_template.format(self.type, len(set([sig.read for sig in self.members])), self.get_std_pos1(), self.get_std_pos2()) read_ids = [member.read for member in self.members] if read_names: info_string += ";READS={0}".format(",".join(read_ids)) if zmws: valid_pacbio_names = True zmw_list = set() for read_id in read_ids: fields = read_id.split("/") if len(fields) != 3: valid_pacbio_names = False break zmw_list.add("/".join(fields[0:2])) if valid_pacbio_names: info_string += ";ZMWS={0}".format(len(zmw_list)) return "{chrom}\t{pos}\t{id}\t{ref}\t{alt}\t{qual}\t{filter}\t{info}\t{format}\t{samples}".format( chrom=source_contig, pos=source_start+1, id="PLACEHOLDERFORID", ref="N", alt=alt_string, qual=int(self.score), filter="PASS" if len(filters) == 0 else ";".join(filters), info=info_string, format="GT:DP:AD", samples="{gt}:{dp}:{ref},{alt}".format(gt=self.genotype, dp=dp_string, ref=self.ref_reads if self.ref_reads != None else ".", alt=self.alt_reads if self.alt_reads != None else ".")) def get_vcf_entry_reverse(self, read_names = False, zmws = False): source_contig, source_start = self.get_destination() dest_contig, dest_start = self.get_source() if (self.source_direction == 'rev') and (self.dest_direction == 'rev'): alt_string = "N[{contig}:{start}[".format(contig = dest_contig, start = dest_start+1) elif (self.source_direction == 'fwd') and (self.dest_direction == 'rev'): alt_string = "N]{contig}:{start}]".format(contig = dest_contig, start = dest_start+1) elif (self.source_direction == 'fwd') and (self.dest_direction == 'fwd'): alt_string = "]{contig}:{start}]N".format(contig = dest_contig, start = dest_start+1) elif (self.source_direction == 'rev') and (self.dest_direction == 'fwd'): alt_string = "[{contig}:{start}[N".format(contig = dest_contig, start = dest_start+1) if self.ref_reads != None and self.alt_reads != None: dp_string = str(self.ref_reads + self.alt_reads) else: dp_string = "." filters = [] if self.genotype == "0/0": filters.append("hom_ref") info_template="SVTYPE={0};SUPPORT={1};STD_POS1={2};STD_POS2={3}" info_string = info_template.format(self.type, len(set([sig.read for sig in self.members])), self.get_std_pos2(), self.get_std_pos1()) read_ids = [member.read for member in self.members] if read_names: info_string += ";READS={0}".format(",".join(read_ids)) if zmws: valid_pacbio_names = True zmw_list = set() for read_id in read_ids: fields = read_id.split("/") if len(fields) != 3: valid_pacbio_names = False break zmw_list.add("/".join(fields[0:2])) if valid_pacbio_names: info_string += ";ZMWS={0}".format(len(zmw_list)) return "{chrom}\t{pos}\t{id}\t{ref}\t{alt}\t{qual}\t{filter}\t{info}\t{format}\t{samples}".format( chrom=source_contig, pos=source_start+1, id="PLACEHOLDERFORID", ref="N", alt=alt_string, qual=int(self.score), filter="PASS" if len(filters) == 0 else ";".join(filters), info=info_string, format="GT:DP:AD", samples="{gt}:{dp}:{ref},{alt}".format(gt=self.genotype, dp=dp_string, ref=self.ref_reads if self.ref_reads != None else ".", alt=self.alt_reads if self.alt_reads != None else ".")) svim-2.0.0/src/svim/SVIM_CLUSTER.py000066400000000000000000000147521406305341300165750ustar00rootroot00000000000000import os import logging from svim.SVIM_clustering import partition_and_cluster def cluster_sv_signatures(sv_signatures, options): """Takes a list of SVSignatures and splits them up by type. The SVSignatures of each type are clustered and returned as a tuple of (deletion_signature_clusters, insertion_signature_clusters, inversion_signature_clusters, tandem_duplication_signature_clusters, insertion_from_signature_clusters, completed_translocation_signatures).""" deletion_signatures = [ev for ev in sv_signatures if ev.type == "DEL"] insertion_signatures = [ev for ev in sv_signatures if ev.type == "INS"] inversion_signatures = [ev for ev in sv_signatures if ev.type == "INV"] tandem_duplication_signatures = [ev for ev in sv_signatures if ev.type == "DUP_TAN"] translocation_signatures = [ev for ev in sv_signatures if ev.type == "BND"] insertion_from_signatures = [ev for ev in sv_signatures if ev.type == "DUP_INT"] # Cluster SV signatures deletion_signature_clusters = partition_and_cluster(deletion_signatures, options, "deleted regions") insertion_signature_clusters = partition_and_cluster(insertion_signatures, options, "inserted regions") inversion_signature_clusters = partition_and_cluster(inversion_signatures, options, "inverted regions") tandem_duplication_signature_clusters = partition_and_cluster(tandem_duplication_signatures, options, "tandem duplicated regions") translocation_signature_clusters = partition_and_cluster(translocation_signatures, options, "translocation breakpoints") insertion_from_signature_clusters = partition_and_cluster(insertion_from_signatures, options, "inserted regions with detected region of origin") return (deletion_signature_clusters, insertion_signature_clusters, inversion_signature_clusters, tandem_duplication_signature_clusters, insertion_from_signature_clusters, translocation_signature_clusters) def write_signature_clusters_bed(working_dir, clusters): """Write signature clusters into working directory in BED format.""" deletion_signature_clusters, insertion_signature_clusters, inversion_signature_clusters, tandem_duplication_signature_clusters, insertion_from_signature_clusters, translocation_signature_clusters = clusters # Print SV signature clusters if not os.path.exists(working_dir + '/signatures'): os.mkdir(working_dir + '/signatures') deletion_signature_output = open(working_dir + '/signatures/del.bed', 'w') insertion_signature_output = open(working_dir + '/signatures/ins.bed', 'w') inversion_signature_output = open(working_dir + '/signatures/inv.bed', 'w') tandem_duplication_signature_source_output = open(working_dir + '/signatures/dup_tan_source.bed', 'w') tandem_duplication_signature_dest_output = open(working_dir + '/signatures/dup_tan_dest.bed', 'w') translocation_signature_output = open(working_dir + '/signatures/trans.bed', 'w') insertion_from_signature_output = open(working_dir + '/signatures/dup_int.bed', 'w') for cluster in deletion_signature_clusters: print(cluster.get_bed_entry(), file=deletion_signature_output) for cluster in insertion_signature_clusters: print(cluster.get_bed_entry(), file=insertion_signature_output) for cluster in inversion_signature_clusters: print(cluster.get_bed_entry(), file=inversion_signature_output) for cluster in tandem_duplication_signature_clusters: bed_entries = cluster.get_bed_entries() print(bed_entries[0], file=tandem_duplication_signature_source_output) print(bed_entries[1], file=tandem_duplication_signature_dest_output) for cluster in insertion_from_signature_clusters: bed_entries = cluster.get_bed_entries() print(bed_entries[0], file=insertion_from_signature_output) print(bed_entries[1], file=insertion_from_signature_output) for cluster in translocation_signature_clusters: bed_entries = cluster.get_bed_entries() print(bed_entries[0], file=translocation_signature_output) print(bed_entries[1], file=translocation_signature_output) deletion_signature_output.close() insertion_signature_output.close() inversion_signature_output.close() tandem_duplication_signature_source_output.close() tandem_duplication_signature_dest_output.close() translocation_signature_output.close() insertion_from_signature_output.close() def write_signature_clusters_vcf(working_dir, clusters, version): """Write signature clusters into working directory in VCF format.""" deletion_signature_clusters, insertion_signature_clusters, inversion_signature_clusters, tandem_duplication_signature_clusters, insertion_from_signature_clusters, translocation_signature_clusters = clusters if not os.path.exists(working_dir + '/signatures'): os.mkdir(working_dir + '/signatures') vcf_output = open(working_dir + '/signatures/all.vcf', 'w') # Write header lines print("##fileformat=VCFv4.3", file=vcf_output) print("##source=SVIMV{0}".format(version), file=vcf_output) print("##ALT=", file=vcf_output) print("##ALT=", file=vcf_output) print("##ALT=", file=vcf_output) print("##ALT=", file=vcf_output) print("##ALT=", file=vcf_output) print("##INFO=", file=vcf_output) print("##INFO=", file=vcf_output) print("##INFO=", file=vcf_output) print("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO", file=vcf_output) vcf_entries = [] for cluster in deletion_signature_clusters: vcf_entries.append((cluster.get_source(), cluster.get_vcf_entry())) for cluster in insertion_signature_clusters: vcf_entries.append((cluster.get_source(), cluster.get_vcf_entry())) for cluster in inversion_signature_clusters: vcf_entries.append((cluster.get_source(), cluster.get_vcf_entry())) for cluster in tandem_duplication_signature_clusters: vcf_entries.append((cluster.get_source(), cluster.get_vcf_entry())) # Sort and write entries to VCF for source, entry in sorted(vcf_entries, key=lambda pair: pair[0]): print(entry, file=vcf_output) vcf_output.close()svim-2.0.0/src/svim/SVIM_COLLECT.py000066400000000000000000000157621406305341300165430ustar00rootroot00000000000000import logging import pysam from svim.SVIM_intra import analyze_alignment_indel from svim.SVIM_inter import analyze_read_segments def bam_iterator(bam): """Returns an iterator for the given SAM/BAM file (must be query-sorted). In each call, the alignments of a single read are yielded as a 3-tuple: (list of primary pysam.AlignedSegment, list of supplementary pysam.AlignedSegment, list of secondary pysam.AlignedSegment).""" alignments = bam.fetch(until_eof=True) current_aln = next(alignments) current_read_name = current_aln.query_name current_prim = [] current_suppl = [] current_sec = [] if current_aln.is_secondary: current_sec.append(current_aln) elif current_aln.is_supplementary: current_suppl.append(current_aln) else: current_prim.append(current_aln) while True: try: next_aln = next(alignments) next_read_name = next_aln.query_name if next_read_name != current_read_name: yield (current_prim, current_suppl, current_sec) current_read_name = next_read_name current_prim = [] current_suppl = [] current_sec = [] if next_aln.is_secondary: current_sec.append(next_aln) elif next_aln.is_supplementary: current_suppl.append(next_aln) else: current_prim.append(next_aln) except StopIteration: break yield (current_prim, current_suppl, current_sec) def retrieve_other_alignments(main_alignment, bam): """Reconstruct other alignments of the same read for a given alignment from the SA tag""" #reconstructing other alignments from SA tag does not work if sequence of main_alignment is hard-clipped if main_alignment.get_cigar_stats()[0][5] > 0: return [] try: sa_tag = main_alignment.get_tag("SA").split(";") except KeyError: return [] other_alignments = [] # For each other alignment encoded in the SA tag for element in sa_tag: # Read information from the tag if element == "": continue fields = element.split(",") if len(fields) != 6: logging.warning('SA tag does not consist of 6 fields. This could be a sign of invalid characters (e.g. commas or semicolons) in a chromosome name of the reference genome.') continue rname = fields[0] pos = int(fields[1]) strand = fields[2] # CIGAR string encoded in SA tag is shortened cigar = fields[3] mapq = int(fields[4]) nm = int(fields[5]) # Generate an aligned segment from the information a = pysam.AlignedSegment() a.query_name = main_alignment.query_name a.query_sequence= main_alignment.query_sequence if strand == "+": a.flag = 2048 else: a.flag = 2064 a.reference_id = bam.get_tid(rname) a.reference_start = pos - 1 try: a.mapping_quality = mapq except OverflowError: a.mapping_quality = 0 a.cigarstring = cigar a.next_reference_id = -1 a.next_reference_start = -1 a.template_length = 0 a.query_qualities = main_alignment.query_qualities a.set_tags([("NM", nm, "i")]) other_alignments.append(a) return other_alignments def analyze_alignment_file_querysorted(bam, options): alignment_it = bam_iterator(bam) sv_signatures = [] #Translocation signatures from other SV classes are stored separately for --all_bnd option translocation_signatures_all_bnds = [] read_nr = 0 while True: try: alignment_iterator_object = next(alignment_it) primary_aln, suppl_aln, sec_aln = alignment_iterator_object if len(primary_aln) != 1 or primary_aln[0].is_unmapped or primary_aln[0].mapping_quality < options.min_mapq: continue read_nr += 1 if read_nr % 10000 == 0: logging.info("Processed read {0}".format(read_nr)) good_suppl_alns = [aln for aln in suppl_aln if not aln.is_unmapped and aln.mapping_quality >= options.min_mapq] sigs, trans_sigs = analyze_alignment_indel(primary_aln[0], bam, primary_aln[0].query_name, options) sv_signatures.extend(sigs) translocation_signatures_all_bnds.extend(trans_sigs) for alignment in good_suppl_alns: sigs, trans_sigs = analyze_alignment_indel(alignment, bam, alignment.query_name, options) sv_signatures.extend(sigs) translocation_signatures_all_bnds.extend(trans_sigs) sigs, trans_sigs = analyze_read_segments(primary_aln[0], good_suppl_alns, bam, options) sv_signatures.extend(sigs) translocation_signatures_all_bnds.extend(trans_sigs) except StopIteration: break except KeyboardInterrupt: logging.warning('Execution interrupted by user. Stop detection and continue with next step..') break return sv_signatures, translocation_signatures_all_bnds def analyze_alignment_file_coordsorted(bam, options): alignment_it = bam.fetch(until_eof=True) sv_signatures = [] #Translocation signatures from other SV classes are stored separately for --all_bnd option translocation_signatures_all_bnds = [] read_nr = 0 while True: try: current_alignment = next(alignment_it) if current_alignment.is_unmapped or current_alignment.is_secondary or current_alignment.mapping_quality < options.min_mapq: continue if current_alignment.is_supplementary: sigs, trans_sigs = analyze_alignment_indel(current_alignment, bam, current_alignment.query_name, options) sv_signatures.extend(sigs) translocation_signatures_all_bnds.extend(trans_sigs) else: read_nr += 1 if read_nr % 10000 == 0: logging.info("Processed read {0}".format(read_nr)) supplementary_alignments = retrieve_other_alignments(current_alignment, bam) good_suppl_alns = [aln for aln in supplementary_alignments if not aln.is_unmapped and aln.mapping_quality >= options.min_mapq] sigs, trans_sigs = analyze_alignment_indel(current_alignment, bam, current_alignment.query_name, options) sv_signatures.extend(sigs) translocation_signatures_all_bnds.extend(trans_sigs) sigs, trans_sigs = analyze_read_segments(current_alignment, good_suppl_alns, bam, options) sv_signatures.extend(sigs) translocation_signatures_all_bnds.extend(trans_sigs) except StopIteration: break except KeyboardInterrupt: logging.warning('Execution interrupted by user. Stop detection and continue with next step..') break return sv_signatures, translocation_signatures_all_bndssvim-2.0.0/src/svim/SVIM_COMBINE.py000066400000000000000000000747471406305341300165420ustar00rootroot00000000000000import os import logging import re from collections import defaultdict from math import pow, sqrt import time from statistics import mean, stdev from pysam import FastaFile from spoa import poa from cpuinfo import get_cpu_info from svim.SVIM_clustering import form_partitions, partition_and_cluster_candidates, calculate_score from svim.SVCandidate import CandidateInversion, CandidateDuplicationTandem, CandidateDeletion, CandidateNovelInsertion, CandidateBreakend from svim.SVIM_merging import flag_cutpaste_candidates, merge_translocations_at_insertions def write_candidates(working_dir, candidates): int_duplication_candidates, inversion_candidates, tan_duplication_candidates, deletion_candidates, novel_insertion_candidates, breakend_candidates = candidates if not os.path.exists(working_dir + '/candidates'): os.mkdir(working_dir + '/candidates') deletion_candidate_output = open(working_dir + '/candidates/candidates_deletions.bed', 'w') inversion_candidate_output = open(working_dir + '/candidates/candidates_inversions.bed', 'w') tandem_duplication_candidate_source_output = open(working_dir + '/candidates/candidates_tan_duplications_source.bed', 'w') tandem_duplication_candidate_dest_output = open(working_dir + '/candidates/candidates_tan_duplications_dest.bed', 'w') interspersed_duplication_candidate_source_output = open(working_dir + '/candidates/candidates_int_duplications_source.bed', 'w') interspersed_duplication_candidate_dest_output = open(working_dir + '/candidates/candidates_int_duplications_dest.bed', 'w') novel_insertion_candidate_output = open(working_dir + '/candidates/candidates_novel_insertions.bed', 'w') breakend_candidate_output = open(working_dir + '/candidates/candidates_breakends.bed', 'w') for candidate in deletion_candidates: print(candidate.get_bed_entry(), file=deletion_candidate_output) for candidate in int_duplication_candidates: bed_entries = candidate.get_bed_entries() print(bed_entries[0], file=interspersed_duplication_candidate_source_output) print(bed_entries[1], file=interspersed_duplication_candidate_dest_output) for candidate in inversion_candidates: print(candidate.get_bed_entry(), file=inversion_candidate_output) for candidate in tan_duplication_candidates: bed_entries = candidate.get_bed_entries() print(bed_entries[0], file=tandem_duplication_candidate_source_output) print(bed_entries[1], file=tandem_duplication_candidate_dest_output) for candidate in novel_insertion_candidates: print(candidate.get_bed_entry(), file=novel_insertion_candidate_output) for candidate in breakend_candidates: bed_entries = candidate.get_bed_entries() print(bed_entries[0], file=breakend_candidate_output) print(bed_entries[1], file=breakend_candidate_output) deletion_candidate_output.close() inversion_candidate_output.close() interspersed_duplication_candidate_source_output.close() interspersed_duplication_candidate_dest_output.close() tandem_duplication_candidate_source_output.close() tandem_duplication_candidate_dest_output.close() novel_insertion_candidate_output.close() breakend_candidate_output.close() def sorted_nicely(vcf_entries): """ Sort the given vcf entries (in the form ((contig, start, end), vcf_string, sv_type)) in the way that humans expect. e.g. chr10 comes after chr2 Algorithm adapted from https://blog.codinghorror.com/sorting-for-humans-natural-sort-order/""" convert = lambda text: int(text) if text.isdigit() else text alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] tuple_key = lambda entry: ( alphanum_key(str(entry[0][0])), entry[0][1], entry[0][2] ) return sorted(vcf_entries, key = tuple_key) def write_final_vcf(int_duplication_candidates, inversion_candidates, tandem_duplication_candidates, deletion_candidates, novel_insertion_candidates, breakend_candidates, version, contig_names, contig_lengths, types_to_output, options): vcf_output = open(options.working_dir + '/variants.vcf', 'w') # Write header lines print("##fileformat=VCFv4.2", file=vcf_output) print("##fileDate={0}".format(time.strftime("%Y-%m-%d|%I:%M:%S%p|%Z|%z")), file=vcf_output) print("##source=SVIM-v{0}".format(version), file=vcf_output) for contig_name, contig_length in zip(contig_names, contig_lengths): print("##contig=".format(contig_name, contig_length), file=vcf_output) if "DEL" in types_to_output: print("##ALT=", file=vcf_output) if "INV" in types_to_output: print("##ALT=", file=vcf_output) if (not options.tandem_duplications_as_insertions and "DUP:TANDEM" in types_to_output) or \ (not options.interspersed_duplications_as_insertions and "DUP:INT" in types_to_output): print("##ALT=", file=vcf_output) if not options.tandem_duplications_as_insertions and "DUP:TANDEM" in types_to_output: print("##ALT=", file=vcf_output) if not options.interspersed_duplications_as_insertions and "DUP:INT" in types_to_output: print("##ALT=", file=vcf_output) if "INS" in types_to_output: print("##ALT=", file=vcf_output) if "BND" in types_to_output: print("##ALT=", file=vcf_output) print("##INFO=", file=vcf_output) print("##INFO=", file=vcf_output) print("##INFO=", file=vcf_output) print("##INFO=", file=vcf_output) print("##INFO=", file=vcf_output) print("##INFO=", file=vcf_output) print("##INFO=", file=vcf_output) print("##INFO=", file=vcf_output) print("##INFO=", file=vcf_output) if options.insertion_sequences: print("##INFO=", file=vcf_output) if options.read_names: print("##INFO=", file=vcf_output) if options.zmws: print("##INFO=", file=vcf_output) print("##FILTER=", file=vcf_output) print("##FILTER=", file=vcf_output) print("##FORMAT=", file=vcf_output) print("##FORMAT=", file=vcf_output) print("##FORMAT=", file=vcf_output) if not options.tandem_duplications_as_insertions and "DUP:TANDEM" in types_to_output: print("##FORMAT=", file=vcf_output) print("#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t" + options.sample, file=vcf_output) # Open reference genome sequence file sequence_alleles = not options.symbolic_alleles if sequence_alleles: try: reference = FastaFile(options.genome) except ValueError: logging.warning("The given reference genome is missing an index file ({path}.fai). Sequence alleles cannot be retrieved.".format(options.genome)) sequence_alleles = False except IOError: logging.warning("The given reference genome is missing ({path}). Sequence alleles cannot be retrieved.".format(options.genome)) sequence_alleles = False else: reference = None # Prepare VCF entries depending on command-line parameters vcf_entries = [] if "DEL" in types_to_output: for candidate in deletion_candidates: vcf_entries.append((candidate.get_source(), candidate.get_vcf_entry(sequence_alleles, reference, options.read_names, options.zmws), "DEL")) if "INV" in types_to_output: for candidate in inversion_candidates: vcf_entries.append((candidate.get_source(), candidate.get_vcf_entry(sequence_alleles, reference, options.read_names, options.zmws), "INV")) if "INS" in types_to_output: for candidate in novel_insertion_candidates: vcf_entries.append((candidate.get_destination(), candidate.get_vcf_entry(sequence_alleles, reference, options.insertion_sequences, options.read_names, options.zmws), "INS")) if options.tandem_duplications_as_insertions: if "INS" in types_to_output: for candidate in tandem_duplication_candidates: vcf_entries.append((candidate.get_destination(), candidate.get_vcf_entry_as_ins(sequence_alleles, reference, options.read_names, options.zmws), "INS")) else: if "DUP:TANDEM" in types_to_output: for candidate in tandem_duplication_candidates: vcf_entries.append((candidate.get_source(), candidate.get_vcf_entry_as_dup(options.read_names, options.zmws), "DUP_TANDEM")) if options.interspersed_duplications_as_insertions: if "INS" in types_to_output: for candidate in int_duplication_candidates: vcf_entries.append((candidate.get_destination(), candidate.get_vcf_entry_as_ins(sequence_alleles, reference, options.read_names, options.zmws), "INS")) else: if "DUP:INT" in types_to_output: for candidate in int_duplication_candidates: vcf_entries.append((candidate.get_source(), candidate.get_vcf_entry_as_dup(options.read_names, options.zmws), "DUP_INT")) if "BND" in types_to_output: for candidate in breakend_candidates: vcf_entries.append(((candidate.get_source()[0], candidate.get_source()[1], candidate.get_source()[1] + 1), candidate.get_vcf_entry(options.read_names, options.zmws), "BND")) vcf_entries.append(((candidate.get_destination()[0], candidate.get_destination()[1], candidate.get_destination()[1] + 1), candidate.get_vcf_entry_reverse(options.read_names, options.zmws), "BND")) if sequence_alleles: reference.close() # Sort and write entries to VCF svtype_counter = defaultdict(int) for source, entry, svtype in sorted_nicely(vcf_entries): variant_id = "svim.{svtype}.{number}".format(svtype = svtype, number = svtype_counter[svtype] + 1) entry_with_id = entry.replace("PLACEHOLDERFORID", variant_id, 1) svtype_counter[svtype] += 1 print(entry_with_id, file=vcf_output) vcf_output.close() def generate_insertion_consensus(ins_cluster, reference, window_padding = 100, maximum_haplotype_length = 10000, allowed_size_deviation = 2.0): #compute window containing all members of cluster member_pos = [member.start for member in ins_cluster.members] window_start = min(member_pos) - window_padding window_end = max(member_pos) + window_padding #construct haplotype sequences from all reads haplotypes = [] for member in ins_cluster.members: haplotype_sequence = reference.fetch(ins_cluster.contig, max(0, window_start), max(0, member.start)).upper() haplotype_sequence += member.sequence.upper() haplotype_sequence += reference.fetch(ins_cluster.contig, max(0, member.start), max(0, window_end)).upper() haplotypes.append(haplotype_sequence) largest_haplotype_length = max([len(h) for h in haplotypes]) if largest_haplotype_length > maximum_haplotype_length: logging.info("Skipping consensus computation for insertion with haplotypes exceeding maximum length ({0} > {1})".format(largest_haplotype_length, maximum_haplotype_length)) return (1, ()) #compute consensus of all reads using SPOA try: consensus_reads, msa_reads = poa(haplotypes, algorithm=1, m=2, n=-4, g=-4, e=-2, q=-24, c=-1) except MemoryError: logging.warning("Error: spoa ran out of memory while computing a consensus sequence for a cluster of insertion signatures (size = {0}, maximum haplotype length = {1}).".format(ins_cluster.size, largest_haplotype_length)) return (2, ()) except ValueError as ve: logging.warning("Error: spoa failed while computing a consensus sequence for a cluster of insertion signatures (size = {0}, maximum haplotype length = {1}): {2}".format(ins_cluster.size, largest_haplotype_length, ve)) return (2, ()) #re-align consensus sequence to reference sequence in the window ref_sequence = reference.fetch(ins_cluster.contig, max(0, window_start), max(0, window_end)).upper() try: consensus_reads_ref, msa_reads_ref = poa([consensus_reads, ref_sequence], algorithm=1, m=2, n=-4, g=-4, e=-2, q=-24, c=-1) except MemoryError: logging.warning("Error: spoa ran out of memory while aligning the consensus sequence for a cluster of insertion signatures (size = {0}, maximum haplotype length = {1}).".format(ins_cluster.size, largest_haplotype_length)) return (2, ()) except ValueError as ve: logging.warning("Error: spoa failed while aligning the consensus sequence for a cluster of insertion signatures (size = {0}, maximum haplotype length = {1}): {2}".format(ins_cluster.size, largest_haplotype_length, ve)) return (2, ()) #locate insertion relative to reference and check whether size is close to expected size expected_size = ins_cluster.end - ins_cluster.start matches = [] for match in re.finditer(r'-+', msa_reads_ref[1]): match_size = match.end() - match.start() size_ratio = max(match_size, expected_size) / min(match_size, expected_size) matches.append((match.start(), match_size, size_ratio)) good_matches = [m for m in matches if m[2] < allowed_size_deviation] if len(good_matches) == 0: logging.info("Consensus failure (no suitable insertion found in realignment step). \ Expected size: {0}; Match sizes: {1}; Ref: {2}; Consensus: {3}".format(expected_size, "/".join([str(m[1]) for m in matches]), msa_reads_ref[1], msa_reads_ref[0])) return (3, ()) elif len(good_matches) == 1: realigned_insertion_start = max(0, window_start) + good_matches[0][0] realigned_insertion_size = good_matches[0][1] insertion_consensus = msa_reads_ref[0][good_matches[0][0]:good_matches[0][0]+good_matches[0][1]] logging.debug("Consensus success. Expected size: {0}; Consensus size: {1}; Consensus insertion: {2}".format(expected_size, realigned_insertion_size, insertion_consensus)) return (0, (realigned_insertion_start, realigned_insertion_size, insertion_consensus)) else: logging.info("Consensus failure (multiple suitable insertions found in realignment step). \ Expected size: {0}; Match sizes: {1}; Ref: {2}; Consensus: {3}".format(expected_size, "/".join([str(m[1]) for m in matches]), msa_reads_ref[1], msa_reads_ref[0])) return (4, ()) def prepare_insertion_candidates(insertion_signature_clusters, options): novel_insertion_candidates = [] if options.skip_consensus or ("sse4_1" not in get_cpu_info()["flags"]): if options.skip_consensus: logging.info("Skipping computation of insertion consensus sequences because of --skip_consensus flag.") else: logging.warning("Skipping computation of insertion consensus sequences " "because CPU does not support SSE 4.1 instruction set.") for ins_cluster in insertion_signature_clusters: if ins_cluster.score > 0: novel_insertion_candidates.append(CandidateNovelInsertion(ins_cluster.contig, ins_cluster.start, ins_cluster.end, "", ins_cluster.members, ins_cluster.score, ins_cluster.std_span, ins_cluster.std_pos)) return novel_insertion_candidates logging.info("Generating and realigning consensus sequence for insertions..") with FastaFile(options.genome) as reference: # 0-successful, 1-skipped, 2-failed, 3-no consensus, 4-multiple consensuses status_counter = [0, 0, 0, 0, 0] for ins_cluster in insertion_signature_clusters: if ins_cluster.score > 0: if len(ins_cluster.members) < 3: novel_insertion_candidates.append(CandidateNovelInsertion(ins_cluster.contig, ins_cluster.start, ins_cluster.end, ins_cluster.members[0].sequence, ins_cluster.members, ins_cluster.score, ins_cluster.std_span, ins_cluster.std_pos)) continue status, consensus_result = generate_insertion_consensus(ins_cluster, reference, maximum_haplotype_length=options.max_consensus_length) try: status_counter[status] += 1 except KeyError: logging.error("Unknown status {0} returned by consensus computation.".format(status)) continue # Successful if status == 0: realigned_insertion_start, realigned_insertion_size, insertion_consensus = consensus_result novel_insertion_candidates.append(CandidateNovelInsertion(ins_cluster.contig, realigned_insertion_start, realigned_insertion_start + realigned_insertion_size, insertion_consensus, ins_cluster.members, ins_cluster.score, ins_cluster.std_span, ins_cluster.std_pos)) #Unsuccessful else: novel_insertion_candidates.append(CandidateNovelInsertion(ins_cluster.contig, ins_cluster.start, ins_cluster.end, "", ins_cluster.members, ins_cluster.score, ins_cluster.std_span, ins_cluster.std_pos)) message = ("Generated and realigned consensus sequences for {0} insertions " "({1} skipped, {2} failed with an error, {3} failed with no " "consensus, {4} failed with multiple consensuses).") logging.info(message.format(status_counter[0], status_counter[1], status_counter[2], status_counter[3], status_counter[4])) return novel_insertion_candidates def combine_clusters(signature_clusters, options): deletion_signature_clusters, insertion_signature_clusters, inversion_signature_clusters, tandem_duplication_signature_clusters, insertion_from_signature_clusters, translocation_signature_clusters = signature_clusters ############################### # Create inversion candidates # ############################### inversion_candidates = [] for inv_cluster in inversion_signature_clusters: inversion_candidates.append(CandidateInversion(inv_cluster.contig, inv_cluster.start, inv_cluster.end, inv_cluster.members, inv_cluster.score, inv_cluster.std_span, inv_cluster.std_pos)) ######################################## # Create tandem duplication candidates # ######################################## tan_dup_candidates = [] for tan_dup_cluster in tandem_duplication_signature_clusters: source_contig, source_start, source_end = tan_dup_cluster.get_source() dest_contig, dest_start, dest_end = tan_dup_cluster.get_destination() num_copies = int(round((dest_end - dest_start) / (source_end - source_start))) fully_covered = True if sum([sig.fully_covered for sig in tan_dup_cluster.members]) else False tan_dup_candidates.append(CandidateDuplicationTandem(tan_dup_cluster.source_contig, tan_dup_cluster.source_start, tan_dup_cluster.source_end, num_copies, fully_covered, tan_dup_cluster.members, tan_dup_cluster.score, tan_dup_cluster.std_span, tan_dup_cluster.std_pos)) ##################################### # Cluster translocation breakpoints # ##################################### # Cluster translocations by contig and pos1 # logging.info("Cluster translocation breakpoints..") # translocations_fwdfwd = [tra for tra in translocation_signature_clusters if tra.direction1 == "fwd" and tra.direction2 == "fwd"] # translocations_revrev = [tra for tra in translocation_signature_clusters if tra.direction1 == "rev" and tra.direction2 == "rev"] # translocations_fwdrev = [tra for tra in translocation_signature_clusters if tra.direction1 == "fwd" and tra.direction2 == "rev"] # translocations_revfwd = [tra for tra in translocation_signature_clusters if tra.direction1 == "rev" and tra.direction2 == "fwd"] # translocation_partitions_fwdfwd = form_partitions(translocations_fwdfwd, options.trans_partition_max_distance) # translocation_partitions_revrev = form_partitions(translocations_revrev, options.trans_partition_max_distance) # translocation_partitions_fwdrev = form_partitions(translocations_fwdrev, options.trans_partition_max_distance) # translocation_partitions_revfwd = form_partitions(translocations_revfwd, options.trans_partition_max_distance) ############################## # Create breakend candidates # ############################## breakend_candidates = [] for tra_cluster in translocation_signature_clusters: breakend_candidates.append(CandidateBreakend(tra_cluster.source_contig, tra_cluster.source_start, tra_cluster.direction1, tra_cluster.dest_contig, tra_cluster.dest_start, tra_cluster.direction2, tra_cluster.members, tra_cluster.score, tra_cluster.std_span, tra_cluster.std_pos)) ################################################### # Merge translocation breakpoints with insertions # ################################################### logging.info("Combine inserted regions with translocation breakpoints..") new_insertion_from_clusters, inserted_regions_to_remove_1 = merge_translocations_at_insertions(translocation_signature_clusters, insertion_signature_clusters, options) insertion_from_signature_clusters.extend(new_insertion_from_clusters) ############################################################################ # Create interspersed duplication candidates and flag cut&paste insertions # ############################################################################ logging.info("Create interspersed duplication candidates and flag cut&paste insertions..") int_duplication_candidates = flag_cutpaste_candidates(insertion_from_signature_clusters, deletion_signature_clusters, options) ################################### # Remove inserted region clusters # ################################### #find all inserted regions overlapping interspersed duplication or tandem duplication candidates int_duplication_iterator = iter(sorted(int_duplication_candidates, key=lambda cand: cand.get_destination())) tan_duplication_iterator = iter(sorted(tan_dup_candidates, key=lambda cand: cand.get_destination())) int_duplications_end = False tan_duplications_end = False inserted_regions_to_remove_2 = [] try: current_int_duplication = next(int_duplication_iterator) except StopIteration: int_duplications_end = True try: current_tan_duplication = next(tan_duplication_iterator) except StopIteration: tan_duplications_end = True for inserted_region_index, inserted_region in enumerate(insertion_signature_clusters): contig1, start1, end1 = inserted_region.get_source() length1 = end1 - start1 if not int_duplications_end: contig2, start2, end2 = current_int_duplication.get_destination() while contig2 < contig1 or (contig2 == contig1 and end2 < start1): try: current_int_duplication = next(int_duplication_iterator) contig2, start2, end2 = current_int_duplication.get_destination() except StopIteration: int_duplications_end = True break if not int_duplications_end: length2 = end2 - start2 #if overlapping interspersed duplication of similar length if contig2 == contig1 and start2 < end1 and (length1 - length2) / max(length1, length2) < 0.2: inserted_regions_to_remove_2.append(inserted_region_index) else: if not tan_duplications_end: contig2, start2, end2 = current_tan_duplication.get_destination() while contig2 < contig1 or (contig2 == contig1 and end2 < start1): try: current_tan_duplication = next(tan_duplication_iterator) contig2, start2, end2 = current_tan_duplication.get_destination() except StopIteration: tan_duplications_end = True break if not tan_duplications_end: length2 = end2 - start2 #if overlapping tandem duplication of similar length if contig2 == contig1 and start2 < end1 and (length1 - length2) / max(length1, length2) < 0.2: inserted_regions_to_remove_2.append(inserted_region_index) # remove found inserted regions all_inserted_regions_to_remove = sorted(list(set(inserted_regions_to_remove_1 + inserted_regions_to_remove_2)), reverse=True) for ins_index in all_inserted_regions_to_remove: del(insertion_signature_clusters[ins_index]) ############################## # Create deletion candidates # ############################## deletion_candidates = [] for del_cluster in deletion_signature_clusters: if del_cluster.score > 0: deletion_candidates.append(CandidateDeletion(del_cluster.contig, del_cluster.start, del_cluster.end, del_cluster.members, del_cluster.score, del_cluster.std_span, del_cluster.std_pos)) ##################################### # Create novel insertion candidates # ##################################### novel_insertion_candidates = prepare_insertion_candidates(insertion_signature_clusters, options) ###################### # Cluster candidates # ###################### logging.info("Cluster interspersed duplication candidates one more time..") final_int_duplication_candidates = partition_and_cluster_candidates(int_duplication_candidates, options, "interspersed duplication candidates") return (deletion_candidates, inversion_candidates, final_int_duplication_candidates, tan_dup_candidates, novel_insertion_candidates, breakend_candidates) svim-2.0.0/src/svim/SVIM_alignment.py000066400000000000000000000063721406305341300174310ustar00rootroot00000000000000import os import logging from subprocess import run, CalledProcessError class ToolMissingError(Exception): pass class AlignmentPipelineError(Exception): pass def check_prereqisites(aligner): devnull = open(os.devnull, 'w') try: run(['gunzip', '--help'], stdout=devnull, stderr=devnull, check=True) run([aligner, '--help'], stdout=devnull, stderr=devnull, check=True) run(['samtools', '--help'], stdout=devnull, stderr=devnull, check=True) except FileNotFoundError as e: raise ToolMissingError('The alignment pipeline cannot be started because {0} was not found. Is it installed and in the PATH?'.format(e.filename)) from e except CalledProcessError as e: raise ToolMissingError('The alignment pipeline cannot be started because {0} failed.'.format(" ".join(e.cmd))) from e def run_alignment(working_dir, genome, reads_path, reads_type, cores, aligner, nanopore): """Align full reads with NGMLR or minimap2.""" check_prereqisites(aligner) reads_file_prefix = os.path.splitext(os.path.basename(reads_path))[0] full_aln = "{0}/{1}.{2}.coordsorted.bam".format(working_dir, reads_file_prefix, aligner) full_aln_index = "{0}/{1}.{2}.coordsorted.bam.bai".format(working_dir, reads_file_prefix, aligner) if not (os.path.exists(full_aln) & os.path.exists(full_aln_index)): try: command_align = ['set', '-o', 'pipefail', '&&'] if aligner == "ngmlr": # We need to uncompress gzipped files for NGMLR first if reads_type == "fasta_gzip" or reads_type == "fastq_gzip": command_align += ['gunzip', '-c', os.path.realpath(reads_path)] command_align += ['|', 'ngmlr', '-t', str(cores), '-r', genome] if nanopore: command_align += ['-x', 'ont'] else: command_align += ['ngmlr', '-t', str(cores), '-r', genome, '-q', os.path.realpath(reads_path)] if nanopore: command_align += ['-x', 'ont'] elif aligner == "minimap2": if nanopore: command_align += ['minimap2', '-t', str(cores), '-x', 'map-ont', '-a', genome, os.path.realpath(reads_path)] else: command_align += ['minimap2', '-t', str(cores), '-x', 'map-pb', '-a', genome, os.path.realpath(reads_path)] command_align += ['|', 'samtools', 'view', '-b', '-@', str(cores)] command_align += ['|', 'samtools', 'sort', '-@', str(cores), '-o', full_aln] command_index = ['samtools', 'index', full_aln] logging.info("Starting alignment pipeline..") run(" ".join(command_align), shell=True, check=True, executable='/bin/bash') run(" ".join(command_index), shell=True, check=True, executable='/bin/bash') except CalledProcessError as e: raise AlignmentPipelineError('The alignment pipeline failed with exit code {0}. Command was: {1}'.format(e.returncode, e.cmd)) from e logging.info("Alignment pipeline finished") return full_aln else: logging.warning("Alignment output file {0} already exists. Skip alignment and use the existing file.".format(full_aln)) return full_alnsvim-2.0.0/src/svim/SVIM_clustering.py000066400000000000000000000566511406305341300176370ustar00rootroot00000000000000from __future__ import print_function import sys import logging from random import seed, sample from statistics import mean, stdev import numpy as np from scipy.cluster.hierarchy import linkage, fcluster from edlib import align from pysam import FastaFile from svim.SVSignature import SignatureClusterUniLocal, SignatureClusterBiLocal from svim.SVCandidate import CandidateDuplicationInterspersed def form_partitions(sv_signatures, max_distance): """Form partitions of signatures using mean distance.""" sorted_signatures = sorted(sv_signatures, key=lambda evi: evi.get_key()) partitions = [] current_partition = [] for signature in sorted_signatures: if len(current_partition) > 0 and current_partition[-1].downstream_distance_to(signature) > max_distance: partitions.append(current_partition[:]) current_partition = [] current_partition.append(signature) if len(current_partition) > 0: partitions.append(current_partition[:]) return partitions def compute_haplotype_edit_distance(signature1, signature2, reference, window_padding = 100): window_start = min(signature1.start, signature2.start) - window_padding window_end = max(signature1.start, signature2.start) + window_padding #construct haplotype sequences for both signatures haplotype1 = reference.fetch(signature1.contig, max(0, window_start), max(0, signature1.start)).upper() haplotype1 += signature1.sequence.upper() haplotype1 += reference.fetch(signature1.contig, max(0, signature1.start), max(0, window_end)).upper() haplotype2 = reference.fetch(signature2.contig, max(0, window_start), max(0, signature2.start)).upper() haplotype2 += signature2.sequence.upper() haplotype2 += reference.fetch(signature2.contig, max(0, signature2.start), max(0, window_end)).upper() return align(haplotype1, haplotype2)["editDistance"] def span_position_distance(signature1, signature2, signature_type, reference, position_distance_normalizer, edit_distance_normalizer, cluster_max_distance): if signature_type == "DEL" or signature_type == "DUP_TAN": span1 = signature1.get_source()[2] - signature1.get_source()[1] span2 = signature2.get_source()[2] - signature2.get_source()[1] center1 = (signature1.get_source()[1] + signature1.get_source()[2]) // 2 center2 = (signature2.get_source()[1] + signature2.get_source()[2]) // 2 position_distance = abs(center1 - center2) / position_distance_normalizer span_distance = abs(span1 - span2) / max(span1, span2) return position_distance + span_distance elif signature_type == "INV": #two signatures from same read can be clustered together" span1 = signature1.get_source()[2] - signature1.get_source()[1] span2 = signature2.get_source()[2] - signature2.get_source()[1] center1 = (signature1.get_source()[1] + signature1.get_source()[2]) // 2 center2 = (signature2.get_source()[1] + signature2.get_source()[2]) // 2 position_distance = abs(center1 - center2) / position_distance_normalizer span_distance = abs(span1 - span2) / max(span1, span2) return position_distance + span_distance elif signature_type == "INS": #center is the insertion location span1 = signature1.get_source()[2] - signature1.get_source()[1] span2 = signature2.get_source()[2] - signature2.get_source()[1] center1 = signature1.get_source()[1] center2 = signature2.get_source()[1] position_distance = abs(center1 - center2) / position_distance_normalizer if position_distance > 2*cluster_max_distance: #do not compute edit distance if insertions are too distant span_distance = abs(span1 - span2) / max(span1, span2) return position_distance + span_distance else: edit_distance = compute_haplotype_edit_distance(signature1, signature2, reference) sequence_distance = edit_distance / max(span1, span2) / edit_distance_normalizer return position_distance + sequence_distance elif signature_type == "DUP_INT": #position distance is computed for source and destination span1 = signature1.get_source()[2] - signature1.get_source()[1] span2 = signature2.get_source()[2] - signature2.get_source()[1] source_center1 = (signature1.get_source()[1] + signature1.get_source()[2]) // 2 source_center2 = (signature2.get_source()[1] + signature2.get_source()[2]) // 2 position_distance_source = abs(source_center1 - source_center2) / position_distance_normalizer position_distance_destination = abs(signature1.get_destination()[1] - signature2.get_destination()[1]) / position_distance_normalizer span_distance = abs(span1 - span2) / max(span1, span2) return position_distance_source + position_distance_destination + span_distance elif signature_type == "BND": #only position distance is computed dist1 = abs(signature1.get_source()[1] - signature2.get_source()[1]) dist2 = abs(signature1.get_destination()[1] - signature2.get_destination()[1]) if signature1.direction1 == signature2.direction1 and signature1.direction2 == signature2.direction2: position_distance = (dist1 + dist2) / 3000 else: position_distance = 99999 return position_distance else: return None def span_position_distance_clusters(cluster1, cluster2, position_distance_normalizer): "Span position distance function for merging clusters" span1 = cluster1.get_source()[2] - cluster1.get_source()[1] span2 = cluster2.get_source()[2] - cluster2.get_source()[1] center1 = (cluster1.get_source()[1] + cluster1.get_source()[2]) // 2 center2 = (cluster2.get_source()[1] + cluster2.get_source()[2]) // 2 position_distance = abs(center1 - center2) / position_distance_normalizer span_distance = abs(span1 - span2) / max(span1, span2) return position_distance + span_distance def span_position_distance_intdup_candidates(signature1, signature2, position_distance_normalizer): "Span position distance function for clustering candidates" span1 = signature1.get_source()[2] - signature1.get_source()[1] span2 = signature2.get_source()[2] - signature2.get_source()[1] source_center1 = (signature1.get_source()[1] + signature1.get_source()[2]) // 2 source_center2 = (signature2.get_source()[1] + signature2.get_source()[2]) // 2 position_distance_source = abs(source_center1 - source_center2) / position_distance_normalizer position_distance_destination = abs(signature1.get_destination()[1] - signature2.get_destination()[1]) / position_distance_normalizer span_distance = abs(span1 - span2) / max(span1, span2) return position_distance_source + position_distance_destination + span_distance def clusters_from_partitions(partitions, reference, options): """Finds clusters in partitions using span-position distance and hierarchical clustering. Assumes that all signatures in the given partition are of the same type and on the same contig""" clusters_final = [] large_partitions = 0 duplicate_signatures = 0 #initialize random number generator with fixed number to produce same output from same input seed(1524) # Find clusters in each partition individually. for partition in partitions: if len(partition) > 100: partition_sample = sample(partition, 100) large_partitions += 1 else: partition_sample = partition element_type = partition_sample[0].type assert(element_type in ["DEL", "DUP_TAN", "INV", "INS", "DUP_INT", "BND"]) #remove similar signatures coming from the same read if element_type == "INV": #no duplication removal for inversions because they consist of two complementary signatures from the same read partition_sample_without_duplicates = partition_sample else: duplicates_from_same_read = set() for i in range(len(partition_sample)-1): for j in range(i+1, len(partition_sample)): if partition_sample[i].read == partition_sample[j].read and span_position_distance(partition_sample[i], partition_sample[j], element_type, reference, options.position_distance_normalizer, options.edit_distance_normalizer, options.cluster_max_distance) <= options.cluster_max_distance: duplicates_from_same_read.add(j) duplicate_signatures += len(duplicates_from_same_read) partition_sample_without_duplicates = [partition_sample[i] for i in range(len(partition_sample)) if i not in duplicates_from_same_read] if len(partition_sample_without_duplicates) == 1: clusters_final.append([partition_sample_without_duplicates[0]]) continue #compute pairwise distances distances = [] if element_type == "INV": for i in range(len(partition_sample_without_duplicates)-1): for j in range(i+1, len(partition_sample_without_duplicates)): distances.append(span_position_distance(partition_sample_without_duplicates[i], partition_sample_without_duplicates[j], element_type, reference, options.position_distance_normalizer, options.edit_distance_normalizer, options.cluster_max_distance)) else: for i in range(len(partition_sample_without_duplicates)-1): for j in range(i+1, len(partition_sample_without_duplicates)): if partition_sample_without_duplicates[i].read == partition_sample_without_duplicates[j].read: distances.append(99999) else: distances.append(span_position_distance(partition_sample_without_duplicates[i], partition_sample_without_duplicates[j], element_type, reference, options.position_distance_normalizer, options.edit_distance_normalizer, options.cluster_max_distance)) Z = linkage(np.array(distances), method = "average") cluster_indices = list(fcluster(Z, options.cluster_max_distance, criterion='distance')) new_clusters = [[] for i in range(max(cluster_indices))] for signature_index, cluster_index in enumerate(cluster_indices): new_clusters[cluster_index-1].append(partition_sample_without_duplicates[signature_index]) clusters_final.extend(new_clusters) if len(partitions) > 0: if len(partitions[0]) > 0: logging.debug("%d out of %d partitions for %s exceeded 100 elements." % (large_partitions, len(partitions), partitions[0][0].type)) logging.debug("%d %s signatures were removed due to similarity to another signature from the same read." % (duplicate_signatures, partitions[0][0].type)) return clusters_final def calculate_score(cluster, std_span, std_pos, span, type): if std_span == None or std_pos == None: span_deviation_score = 0 pos_deviation_score = 0 else: span_deviation_score = 1 - min(1, std_span / span) pos_deviation_score = 1 - min(1, std_pos / span) if type == "INV": directions = [signature.direction for signature in cluster] direction_counts = [0, 0, 0, 0, 0] for direction in directions: if direction == "left_fwd": direction_counts[0] += 1 if direction == "left_rev": direction_counts[1] += 1 if direction == "right_fwd": direction_counts[2] += 1 if direction == "right_rev": direction_counts[3] += 1 if direction == "all": direction_counts[4] += 1 left_signatures = direction_counts[0] + direction_counts[1] right_signatures = direction_counts[2] + direction_counts[3] valid_signatures = min(left_signatures, right_signatures) + direction_counts[4] num_signatures = min(80, valid_signatures) else: num_signatures = min(80, len(cluster)) return num_signatures + span_deviation_score * (num_signatures / 8) + pos_deviation_score * (num_signatures / 8) def consolidate_clusters_unilocal(clusters): """Consolidate clusters to a list of (type, contig, mean start, mean end, cluster size, members) tuples.""" consolidated_clusters = [] for cluster in clusters: average_start = sum([member.get_source()[1] for member in cluster]) / len(cluster) average_end = sum([member.get_source()[2] for member in cluster]) / len(cluster) if len(cluster) > 1: std_span = stdev([member.get_source()[2] - member.get_source()[1] for member in cluster]) std_pos = stdev([(member.get_source()[2] + member.get_source()[1]) / 2 for member in cluster]) else: std_span = None std_pos = None score = calculate_score(cluster, std_span, std_pos, average_end - average_start, cluster[0].type) consolidated_clusters.append(SignatureClusterUniLocal(cluster[0].get_source()[0], int(round(average_start)), int(round(average_end)), score, len(cluster), cluster, cluster[0].type, std_span, std_pos)) return consolidated_clusters def consolidate_clusters_bilocal(clusters): """Consolidate clusters to a list of (type, contig, mean start, mean end, cluster size, members) tuples.""" consolidated_clusters = [] for cluster in clusters: #Source source_average_start = sum([member.get_source()[1] for member in cluster]) / len(cluster) source_average_end = sum([member.get_source()[2] for member in cluster]) / len(cluster) if len(cluster) > 1: source_std_span = stdev([member.get_source()[2] - member.get_source()[1] for member in cluster]) source_std_pos = stdev([(member.get_source()[2] + member.get_source()[1]) / 2 for member in cluster]) else: source_std_span = None source_std_pos = None if cluster[0].type == "DUP_TAN": max_copies = max([member.copies for member in cluster]) score = calculate_score(cluster, source_std_span, source_std_pos, source_average_end - source_average_start, cluster[0].type) consolidated_clusters.append(SignatureClusterBiLocal(cluster[0].get_source()[0], int(round(source_average_start)), int(round(source_average_end)), cluster[0].get_source()[0], int(round(source_average_end)), int(round(source_average_end)) + max_copies * (int(round(source_average_end)) - int(round(source_average_start))), score, len(cluster), cluster, cluster[0].type, source_std_span, source_std_pos)) elif cluster[0].type == "DUP_INT": #Destination destination_average_start = sum([member.get_destination()[1] for member in cluster]) / len(cluster) destination_average_end = sum([member.get_destination()[2] for member in cluster]) / len(cluster) if len(cluster) > 1: destination_std_span = stdev([member.get_destination()[2] - member.get_destination()[1] for member in cluster]) destination_std_pos = stdev([(member.get_destination()[2] + member.get_destination()[1]) / 2 for member in cluster]) else: destination_std_span = None destination_std_pos = None if source_std_span == None or source_std_pos == None or destination_std_span == None or destination_std_pos == None: score = calculate_score(cluster, None, None, mean([source_average_end - source_average_start, destination_average_end - destination_average_start]), cluster[0].type) consolidated_clusters.append(SignatureClusterBiLocal(cluster[0].get_source()[0], int(round(source_average_start)), int(round(source_average_end)), cluster[0].get_destination()[0], int(round(destination_average_start)), int(round(destination_average_end)), score, len(cluster), cluster, cluster[0].type, None, None)) else: score = calculate_score(cluster, mean([source_std_span, destination_std_span]), mean([source_std_pos, destination_std_pos]), mean([source_average_end - source_average_start, destination_average_end - destination_average_start]), cluster[0].type) consolidated_clusters.append(SignatureClusterBiLocal(cluster[0].get_source()[0], int(round(source_average_start)), int(round(source_average_end)), cluster[0].get_destination()[0], int(round(destination_average_start)), int(round(destination_average_end)), score, len(cluster), cluster, cluster[0].type, mean([source_std_span, destination_std_span]), mean([source_std_pos, destination_std_pos]))) elif cluster[0].type == "BND": #Destination destination_average_start = sum([member.get_destination()[1] for member in cluster]) / len(cluster) destination_average_end = sum([member.get_destination()[2] for member in cluster]) / len(cluster) #Directions directions1 = list(set([member.direction1 for member in cluster])) assert len(directions1) == 1 direction1 = directions1[0] directions2 = list(set([member.direction2 for member in cluster])) assert len(directions2) == 1 direction2 = directions2[0] if len(cluster) > 1: destination_std_pos = stdev([(member.get_destination()[2] + member.get_destination()[1]) / 2 for member in cluster]) else: destination_std_span = None destination_std_pos = None if source_std_pos == None or destination_std_pos == None: score = calculate_score(cluster, None, None, 500, cluster[0].type) new_signature_cluster = SignatureClusterBiLocal(cluster[0].get_source()[0], int(round(source_average_start)), int(round(source_average_end)), cluster[0].get_destination()[0], int(round(destination_average_start)), int(round(destination_average_end)), score, len(cluster), cluster, cluster[0].type, None, None) else: score = calculate_score(cluster, source_std_pos, destination_std_pos, 500, cluster[0].type) new_signature_cluster = SignatureClusterBiLocal(cluster[0].get_source()[0], int(round(source_average_start)), int(round(source_average_end)), cluster[0].get_destination()[0], int(round(destination_average_start)), int(round(destination_average_end)), score, len(cluster), cluster, cluster[0].type, source_std_pos, destination_std_pos) new_signature_cluster.direction1 = direction1 new_signature_cluster.direction2 = direction2 consolidated_clusters.append(new_signature_cluster) return consolidated_clusters def partition_and_cluster_candidates(candidates, options, type): partitions = form_partitions(candidates, options.partition_max_distance) clusters = [] large_partitions = 0 #initialize random number generator with fixed number to produce same output from same input seed(1524) # Find clusters in each partition individually. for partition in partitions: if len(partition) == 1: clusters.append([partition[0]]) continue elif len(partition) > 100: partition_sample = sample(partition, 100) large_partitions += 1 else: partition_sample = partition element_type = partition_sample[0].type distances = [] for i in range(len(partition_sample)-1): for j in range(i+1, len(partition_sample)): distances.append(span_position_distance_intdup_candidates(partition_sample[i], partition_sample[j], options.position_distance_normalizer)) Z = linkage(np.array(distances), method = "average") cluster_indices = list(fcluster(Z, options.cluster_max_distance, criterion='distance')) new_clusters = [[] for i in range(max(cluster_indices))] for signature_index, cluster_index in enumerate(cluster_indices): new_clusters[cluster_index-1].append(partition_sample[signature_index]) clusters.extend(new_clusters) if len(partitions) > 0: if len(partitions[0]) > 0: logging.debug("%d out of %d partitions for %s exceeded 100 elements." % (large_partitions, len(partitions), partitions[0][0].type)) logging.info("Clustered {0}: {1} partitions and {2} clusters".format(type, len(partitions), len(clusters))) final_candidates = [] for cluster in clusters: combined_score = max([candidate.score for candidate in cluster]) combined_members = [member for candidate in cluster for member in candidate.members] stds_span = [candidate.std_span for candidate in cluster if candidate.std_span != None] if len(stds_span) >= 1: combined_std_span = mean(stds_span) else: combined_std_span = None stds_pos = [candidate.std_pos for candidate in cluster if candidate.std_pos != None] if len(stds_pos) >= 1: combined_std_pos = mean(stds_pos) else: combined_std_pos = None #Source source_average_start = (sum([candidate.get_source()[1] for candidate in cluster]) / len(cluster)) source_average_end = (sum([candidate.get_source()[2] for candidate in cluster]) / len(cluster)) #Destination destination_average_start = (sum([candidate.get_destination()[1] for candidate in cluster]) / len(cluster)) destination_average_end = (sum([candidate.get_destination()[2] for candidate in cluster]) / len(cluster)) #Origin deleted? cutpaste = False for member in cluster: if member.cutpaste: cutpaste = True if cluster[0].type == "DUP_INT": final_candidates.append(CandidateDuplicationInterspersed(cluster[0].get_source()[0], int(round(source_average_start)), int(round(source_average_end)), cluster[0].get_destination()[0], int(round(destination_average_start)), int(round(destination_average_end)), combined_members, combined_score, combined_std_span, combined_std_pos, cutpaste)) return final_candidates def partition_and_cluster(signatures, options, type): partitions = form_partitions(signatures, options.partition_max_distance) with FastaFile(options.genome) as reference: clusters = clusters_from_partitions(partitions, reference, options) logging.info("Clustered {0}: {1} partitions and {2} clusters".format(type, len(partitions), len(clusters))) if type == "deleted regions" or type == "inserted regions" or type == "inverted regions": return sorted(consolidate_clusters_unilocal(clusters), key=lambda cluster: (cluster.contig, (cluster.end + cluster.start) / 2)) elif type == "tandem duplicated regions" or type == "inserted regions with detected region of origin" or type == "translocation breakpoints": return consolidate_clusters_bilocal(clusters) else: logging.error("Unknown parameter type={0} to function partition_and_cluster.") svim-2.0.0/src/svim/SVIM_genotyping.py000066400000000000000000000113111406305341300176230ustar00rootroot00000000000000import time import logging from svim.SVIM_intra import analyze_alignment_indel from svim.SVIM_inter import analyze_read_segments from svim.SVIM_COLLECT import retrieve_other_alignments def span_position_distance(candidate, signature, position_distance_normalizer): if candidate.type == "INS" or candidate.type == "DUP_INT": c_contig, c_start, c_end = candidate.get_destination() else: c_contig, c_start, c_end = candidate.get_source() if signature.type == "DUP_INT": s_contig, s_start, s_end = signature.get_destination() else: s_contig, s_start, s_end = signature.get_source() #ins signatures can support dup_int candidates and vice versa if not (candidate.type == "INS" and signature.type == "DUP_INT") and \ not (candidate.type == "DUP_INT" and signature.type == "INS") and \ candidate.type != signature.type: return float("inf") if c_contig != s_contig: return float("inf") span1 = c_end - c_start span2 = s_end - s_start center1 = (c_start + c_end) // 2 center2 = (s_start + s_end) // 2 position_distance = min(abs(c_start - s_start), abs(c_end - s_end), abs(center1 - center2)) / position_distance_normalizer span_distance = abs(span1 - span2) / max(span1, span2) return position_distance + span_distance def genotype(candidates, bam, type, options): num_candidates = len(candidates) for nr, candidate in enumerate(candidates): if (nr+1) % 10000 == 0: logging.info("Processed {0} of {1} candidates".format(nr+1, num_candidates)) if candidate.score < options.minimum_score: continue #Fetch alignments around variant locus if type == "INS" or type == "DUP_INT": contig, start, end = candidate.get_destination() #We need the insertion locus on the reference for which end is equal to start end = start else: contig, start, end = candidate.get_source() contig_length = bam.get_reference_length(contig) alignment_it = bam.fetch(contig=contig, start=max(0, start-1000), stop=min(contig_length, end+1000)) reads_supporting_variant = set([sig.read for sig in candidate.members]) #Count reads that overlap the locus and therefore support the reference reads_supporting_reference = set() #Loop through fetched alignments aln_no = 0 while aln_no < 500: try: current_alignment = next(alignment_it) except StopIteration: break if current_alignment.query_name in reads_supporting_variant: continue if current_alignment.is_unmapped or current_alignment.is_secondary or current_alignment.mapping_quality < options.min_mapq: continue aln_no += 1 if type == "DEL" or type == "INV": minimum_overlap = min((end - start) / 2, 2000) if (current_alignment.reference_start < (end - minimum_overlap) and current_alignment.reference_end > (end + 100) or current_alignment.reference_start < (start - 100) and current_alignment.reference_end > (start + minimum_overlap)): reads_supporting_reference.add(current_alignment.query_name) if type == "INS" or type == "DUP_INT": if current_alignment.reference_start < (start - 100) and current_alignment.reference_end > (end + 100): reads_supporting_reference.add(current_alignment.query_name) if (len(reads_supporting_variant) + len(reads_supporting_reference)) >= options.minimum_depth: candidate.support_fraction = len(reads_supporting_variant) / (len(reads_supporting_variant) + len(reads_supporting_reference)) if candidate.support_fraction >= options.homozygous_threshold: candidate.genotype = "1/1" elif candidate.support_fraction >= options.heterozygous_threshold and candidate.support_fraction < options.homozygous_threshold: candidate.genotype = "0/1" elif candidate.support_fraction < options.heterozygous_threshold: candidate.genotype = "0/0" else: candidate.genotype = "./." elif (len(reads_supporting_variant) + len(reads_supporting_reference)) > 0: candidate.support_fraction = len(reads_supporting_variant) / (len(reads_supporting_variant) + len(reads_supporting_reference)) candidate.genotype = "./." else: candidate.support_fraction = "." candidate.genotype = "./." candidate.ref_reads = len(reads_supporting_reference) candidate.alt_reads = len(reads_supporting_variant)svim-2.0.0/src/svim/SVIM_input_parsing.py000066400000000000000000001274751406305341300203450ustar00rootroot00000000000000import sys import os import logging import argparse def parse_arguments(program_version, arguments = sys.argv[1:]): parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description="""SVIM (pronounced SWIM) is a structural variant caller for long reads. It discriminates six different variant classes: deletions, tandem and interspersed duplications, inversions, insertions and translocations. SVIM is unique in its capability of extracting both the genomic origin and destination of duplications. SVIM consists of four major steps: - COLLECT detects signatures for SVs in long read alignments - CLUSTER merges signatures that come from the same SV - COMBINE combines clusters from different genomic regions and classifies them into distinct SV types - GENOTYPE uses alignments spanning SVs to determine their genotype SVIM can process two types of input. Firstly, it can detect SVs from raw reads by aligning them to a given reference genome first ("SVIM.py reads [options] working_dir reads genome"). Alternatively, it can detect SVs from existing reads alignments in SAM/BAM format ("SVIM.py alignment [options] working_dir bam_file"). """) subparsers = parser.add_subparsers(help='modes', dest='sub') parser.add_argument('--version', '-v', action='version', version='%(prog)s {version}'.format(version=program_version)) parser_fasta = subparsers.add_parser('reads', help='Detect SVs from raw reads. Align reads to given reference genome first.') parser_fasta.add_argument('working_dir', type=str, help='Working and output directory. \ Existing files in the directory are overwritten. \ If the directory does not exist, it is created.') parser_fasta.add_argument('reads', type=str, help='Read file (FASTA, FASTQ, gzipped FASTA, gzipped FASTQ or file list). \ The read file has to have one of the following supported file endings: \ FASTA: .fa, .fasta, .FA, .fa.gz, .fa.gzip, .fasta.gz, .fasta.gzip \ FASTQ: .fq, .fastq, .FQ, .fq.gz, .fq.gzip, .fastq.gz, .fastq.gzip \ FILE LIST: .fa.fn, fq.fn') parser_fasta.add_argument('genome', type=str, help='Reference genome file (FASTA)') parser_fasta.add_argument('--verbose', action='store_true', help='Enable more verbose logging (default: %(default)s)') group_fasta_align = parser_fasta.add_argument_group('ALIGN') group_fasta_align.add_argument('--cores', type=int, default=1, help='CPU cores to use for the alignment (default: %(default)s)') group_fasta_align.add_argument('--aligner', type=str, default="ngmlr", choices=["ngmlr", "minimap2"], help='Tool for read alignment: ngmlr or minimap2 (default: %(default)s)') group_fasta_align.add_argument('--nanopore', action='store_true', help='Use Nanopore settings for read alignment (default: %(default)s)') group_fasta_collect = parser_fasta.add_argument_group('COLLECT') group_fasta_collect.add_argument('--min_mapq', type=int, default=20, help='Minimum mapping quality of reads to consider (default: %(default)s). \ Reads with a lower mapping quality are ignored.') group_fasta_collect.add_argument('--min_sv_size', type=int, default=40, help='Minimum SV size to detect (default: %(default)s). \ SVIM can potentially detect events of any size but is limited by the \ signal-to-noise ratio in the input alignments. That means that more \ accurate reads and alignments enable the detection of smaller events. \ For current PacBio or Nanopore data, we would recommend a minimum size \ of 40bp or larger.') group_fasta_collect.add_argument('--max_sv_size', type=int, default=100000, help='Maximum SV size to detect (default: %(default)s). \ This parameter is used to distinguish long deletions (and inversions) from \ translocations which cannot be distinguished from the alignment alone. \ Split read segments mapping far apart on the reference could either \ indicate a very long deletion (inversion) or a translocation breakpoint. \ SVIM calls a translocation breakpoint if the mapping distance is larger \ than this parameter and a deletion (or inversion) if it is smaller or equal.') group_fasta_collect.add_argument('--segment_gap_tolerance', type=int, default=10, help='Maximum tolerated gap between adjacent alignment segments (default: %(default)s). \ This parameter applies to gaps on the reference and the read. Example: \ Deletions are detected from two subsequent segments of a split read that are mapped \ far apart from each other on the reference. The segment gap tolerance determines \ the maximum tolerated length of the read gap between both segments. If there is an \ unaligned read segment larger than this value between the two segments, no deletion is called.') group_fasta_collect.add_argument('--segment_overlap_tolerance', type=int, default=5, help='Maximum tolerated overlap between adjacent alignment segments (default: %(default)s). \ This parameter applies to overlaps on the reference and the read. Example: \ Deletions are detected from two subsequent segments of a split read that are mapped \ far apart from each other on the reference. The segment overlap tolerance determines \ the maximum tolerated length of an overlap between both segments on the read. If the \ overlap between the two segments on the read is larger than this value, no deletion is called.') group_fasta_collect.add_argument('--all_bnds', action='store_true', help='Output all rearrangements additionally in BND notation (default: %(default)s). \ By default, SV signatures from the read alignments are used to detect complete SVs, \ such as deletions, insertions and inversions. When this option is enabled, all SVs \ are also output in breakend (BND) notation as defined in the VCF specs. For instance, \ a deletion gets two records in the VCF output: 1. the normal record and 2. \ a record representing the novel adjacency between the deletion\'s start and \ end coordinate in the sample genome.') group_fasta_cluster = parser_fasta.add_argument_group('CLUSTER') group_fasta_cluster.add_argument('--partition_max_distance', type=int, default=1000, help='Maximum distance in bp between SVs in a partition (default: %(default)s). \ Before clustering, the SV signatures are divided into coarse partitions. This parameter \ determines the maximum distance between two subsequent signatures in the same partition. \ If the distance between two subsequent signatures \ is larger than this parameter, they are distributed into separate partitions.') group_fasta_cluster.add_argument('--position_distance_normalizer', type=int, default=900, help='Distance normalizer used for span-position distance (default: %(default)s). \ SVIM clusters the SV signatures using an hierarchical clustering approach and a \ novel distance metric called \"span-position distance\". Span-position distance \ is the sum of two components, span distance and position distance. \ The span distance is the difference in lengths between signatures normalized \ by the greater length and always lies in the interval [0,1]. \ The position distance is the difference in position between signatures \ normalized by the distance normalizer (this parameter). For a position difference \ of 1.8kb and a distance normalizer of 900, the position distance will be 2. \ A smaller distance normalizer leads to a higher position distance and as a \ consequence increases the importance of the position distance in the \ span-position distance relative to the span distance.') group_fasta_cluster.add_argument('--edit_distance_normalizer', type=float, default=1.0, help='Distance normalizer used specifically for insertions (default: %(default)s). \ SVIM clusters insertion signatures using an hierarchical clustering approach and a \ special distance metric for insertions. This distance is the sum of two \ components, position distance and edit distance between the insertion sequences. \ The edit distance is normalized (i.e. divided) by the product of the span of the \ longer insertion and this normalizer. The position distance is the difference in \ position between signatures normalized by the position distance normalizer \ (another parameter). A smaller edit distance normalizer leads to a larger \ edit distance and as a consequence increases the importance of the edit distance \ in the clustering process so that only insertions with very similar sequences \ are clustered together. A larger edit distance normalizer diminishes the importance \ of the insertion sequences in the clustering process.') group_fasta_cluster.add_argument('--cluster_max_distance', type=float, default=0.5, help='Maximum span-position distance between SVs in a cluster (default: %(default)s). \ This is the most important parameter because it determines the strictness \ of clustering. Choosing a large value leads to fewer but larger clusters with larger \ distances between its members. Choosing a small value leads to more but smaller \ clusters with smaller distances between its members. \ This parameter determines the height of the cut-off in the hierarchical clustering \ dendrogram.') group_fasta_combine = parser_fasta.add_argument_group('COMBINE') group_fasta_combine.add_argument('--del_ins_dup_max_distance', type=float, default=1.0, help='Maximum span-position distance between the origin of an insertion and a deletion to be flagged as a potential cut&paste insertion (default: %(default)s)') group_fasta_combine.add_argument('--trans_sv_max_distance', type=int, default=500, help='Maximum distance in bp between a translocation breakpoint and an SV signature to be combined (default: %(default)s)') group_fasta_combine.add_argument('--skip_consensus', action='store_true', help='Disable consensus computation for insertions (default: %(default)s). \ This reduces the time and memory consumption of SVIM and might be \ useful if consensus sequences are not needed. With this option, \ insertion calls are represented by symbolic alleles () instead \ of sequence alles in the output VCF. \ Consensus computation requires a modern CPU with the SSE 4.1 \ instruction set. For older CPUs missing this instruction set, \ consensus computation is automatically disabled.') group_fasta_combine.add_argument('--max_consensus_length', type=int, default=10000, help='Maximum size of insertion sequences for consensus computation. (default: %(default)s)\ For insertions longer than this threshold, no consensus is computed to save memory.') group_fasta_genotype = parser_fasta.add_argument_group('GENOTYPE') group_fasta_genotype.add_argument('--skip_genotyping', action='store_true', help='Disable genotyping (default: %(default)s)') group_fasta_genotype.add_argument('--minimum_score', type=int, default=3, help='Minimum score for genotyping (default: %(default)s). \ Only SV candidates with a higher or equal score are genotyped. Depending on the \ score distribution among the SV candidates, decreasing this value increases the \ runtime. We recommend to choose a value close to the score threshold used \ for filtering the SV candidates.') group_fasta_genotype.add_argument('--homozygous_threshold', type=float, default=0.8, help='Minimum variant allele frequency to be called as homozygous (default: %(default)s). \ Allele frequency is computed as the fraction of reads supporting the variant over the \ total number of reads covering the variant. Variants with an allele frequence greater \ than or equal to this threshold are called as homozygous alternative.') group_fasta_genotype.add_argument('--heterozygous_threshold', type=float, default=0.2, help='Minimum variant allele frequency to be called as heterozygous (default: %(default)s). \ Allele frequency is computed as the fraction of reads supporting the variant over the \ total number of reads covering the variant. Variants with an allele frequence greater \ than or equal to this threshold but lower than the homozygous threshold are called as \ heterozygous alternative. Variants with an allele frequence lower than this threshold \ are called as homozygous reference.') group_fasta_genotype.add_argument('--minimum_depth', type=int, default=4, help='Minimum total read depth for genotyping (default: %(default)s). \ Variants covered by a total number of reads lower than this value are not assigned \ a genotype (./. in the output VCF file).') group_fasta_output = parser_fasta.add_argument_group('OUTPUT') group_fasta_output.add_argument('--sample', type=str, default="Sample", help='Sample ID to include in output vcf file (default: %(default)s)') group_fasta_output.add_argument('--types', type=str, default="DEL,INS,INV,DUP:TANDEM,DUP:INT,BND", help='SV types to include in output VCF (default: %(default)s). \ Give a comma-separated list of SV types. The possible SV types are: DEL (deletions), \ INS (novel insertions), INV (inversions), DUP:TANDEM (tandem duplications), \ DUP:INT (interspersed duplications), BND (breakends).') group_fasta_output.add_argument('--symbolic_alleles', action='store_true', help='Use symbolic alleles, such as or in output VCF (default: %(default)s). \ By default, all SV alleles are represented by nucleotide sequences.') group_fasta_output.add_argument('--insertion_sequences', action='store_true', help='Output insertion sequences in INFO tag of VCF (default: %(default)s). \ If enabled, the INFO/SEQS tag contains a list of insertion sequences from the supporting reads.') group_fasta_output.add_argument('--tandem_duplications_as_insertions', action='store_true', help='Represent tandem duplications as insertions in output VCF (default: %(default)s). \ By default, tandem duplications are represented by the SVTYPE=DUP:TANDEM and the genomic source is given by the \ POS and END tags. When enabling this option, duplications are instead represented by the SVTYPE=INS \ and POS and END both give the insertion point of the duplication.') group_fasta_output.add_argument('--interspersed_duplications_as_insertions', action='store_true', help='Represent interspersed duplications as insertions in output VCF (default: %(default)s). \ By default, interspersed duplications are represented by the SVTYPE=DUP:INT and the genomic source is given by the \ POS and END tags. When enabling this option, duplications are instead represented by the SVTYPE=INS \ and POS and END both give the insertion point of the duplication.') group_fasta_output.add_argument('--read_names', action='store_true', help='Output names of supporting reads in INFO tag of VCF (default: %(default)s). \ If enabled, the INFO/READS tag contains the list of names of the supporting reads.') group_fasta_output.add_argument('--zmws', action='store_true', help='look for information on ZMWs in PacBio read names (default: %(default)s). \ If enabled, the INFO/ZMWS tag contains the number of ZMWs that produced supporting reads.') parser_bam = subparsers.add_parser('alignment', help='Detect SVs from an existing alignment') parser_bam.add_argument('working_dir', type=os.path.abspath, help='Working and output directory. \ Existing files in the directory are overwritten. \ If the directory does not exist, it is created.') parser_bam.add_argument('bam_file', type=str, help='Coordinate-sorted and indexed BAM file with aligned long reads') parser_bam.add_argument('genome', type=str, help='Reference genome file that the long reads were aligned to (FASTA)') parser_bam.add_argument('--verbose', action='store_true', help='Enable more verbose logging (default: %(default)s)') group_bam_collect = parser_bam.add_argument_group('COLLECT') group_bam_collect.add_argument('--min_mapq', type=int, default=20, help='Minimum mapping quality of reads to consider (default: %(default)s). \ Reads with a lower mapping quality are ignored.') group_bam_collect.add_argument('--min_sv_size', type=int, default=40, help='Minimum SV size to detect (default: %(default)s). \ SVIM can potentially detect events of any size but is limited by the \ signal-to-noise ratio in the input alignments. That means that more \ accurate reads and alignments enable the detection of smaller events. \ For current PacBio or Nanopore data, we would recommend a minimum size \ of 40bp or larger.') group_bam_collect.add_argument('--max_sv_size', type=int, default=100000, help='Maximum SV size to detect (default: %(default)s). \ This parameter is used to distinguish long deletions (and inversions) from \ translocations which cannot be distinguished from the alignment alone. \ Split read segments mapping far apart on the reference could either \ indicate a very long deletion (inversion) or a translocation breakpoint. \ SVIM calls a translocation breakpoint if the mapping distance is larger \ than this parameter and a deletion (or inversion) if it is smaller or equal.') group_bam_collect.add_argument('--segment_gap_tolerance', type=int, default=10, help='Maximum tolerated gap between adjacent alignment segments (default: %(default)s). \ This parameter applies to gaps on the reference and the read. Example: \ Deletions are detected from two subsequent segments of a split read that are mapped \ far apart from each other on the reference. The segment gap tolerance determines \ the maximum tolerated length of the read gap between both segments. If there is an \ unaligned read segment larger than this value between the two segments, no deletion is called.') group_bam_collect.add_argument('--segment_overlap_tolerance', type=int, default=5, help='Maximum tolerated overlap between adjacent alignment segments (default: %(default)s). \ This parameter applies to overlaps on the reference and the read. Example: \ Deletions are detected from two subsequent segments of a split read that are mapped \ far apart from each other on the reference. The segment overlap tolerance determines \ the maximum tolerated length of an overlap between both segments on the read. If the \ overlap between the two segments on the read is larger than this value, no deletion is called.') group_bam_cluster = parser_bam.add_argument_group('CLUSTER') group_bam_cluster.add_argument('--partition_max_distance', type=int, default=1000, help='Maximum distance in bp between SVs in a partition (default: %(default)s). \ Before clustering, the SV signatures are divided into coarse partitions. This parameter \ determines the maximum distance between two subsequent signatures in the same partition. \ If the distance between two subsequent signatures \ is larger than this parameter, they are distributed into separate partitions.') group_bam_cluster.add_argument('--position_distance_normalizer', type=int, default=900, help='Distance normalizer used for span-position distance (default: %(default)s). \ SVIM clusters the SV signatures using an hierarchical clustering approach and a \ novel distance metric called \"span-position distance\". Span-position distance \ is the sum of two components, span distance and position distance. \ The span distance is the difference in lengths between signatures normalized \ by the greater length and always lies in the interval [0,1]. \ The position distance is the difference in position between signatures \ normalized by the distance normalizer (this parameter). For a position difference \ of 1.8kb and a distance normalizer of 900, the position distance will be 2. \ A smaller distance normalizer leads to a higher position distance and as a \ consequence increases the importance of the position distance in the \ span-position distance relative to the span distance.') group_bam_cluster.add_argument('--edit_distance_normalizer', type=float, default=1.0, help='Distance normalizer used specifically for insertions (default: %(default)s). \ SVIM clusters insertion signatures using an hierarchical clustering approach and a \ special distance metric for insertions. This distance is the sum of two \ components, position distance and edit distance between the insertion sequences. \ The edit distance is normalized (i.e. divided) by the product of the span of the \ longer insertion and this normalizer. The position distance is the difference in \ position between signatures normalized by the position distance normalizer \ (another parameter). A smaller edit distance normalizer leads to a larger \ edit distance and as a consequence increases the importance of the edit distance \ in the clustering process so that only insertions with very similar sequences \ are clustered together. A larger edit distance normalizer diminishes the importance \ of the insertion sequences in the clustering process.') group_bam_cluster.add_argument('--cluster_max_distance', type=float, default=0.5, help='Maximum span-position distance between SVs in a cluster (default: %(default)s). \ This is the most important parameter because it determines the strictness \ of clustering. Choosing a large value leads to fewer but larger clusters with larger \ distances between its members. Choosing a small value leads to more but smaller \ clusters with smaller distances between its members. \ This parameter determines the height of the cut-off in the hierarchical clustering \ dendrogram.') group_bam_cluster.add_argument('--all_bnds', action='store_true', help='Output all rearrangements additionally in BND notation (default: %(default)s). \ By default, SV signatures from the read alignments are used to detect complete SVs, \ such as deletions, insertions and inversions. When this option is enabled, all SVs \ are also output in breakend (BND) notation as defined in the VCF specs. For instance, \ a deletion gets two records in the VCF output: 1. the normal record and 2. \ a record representing the novel adjacency between the deletion\'s start and \ end coordinate in the sample genome.') group_bam_combine = parser_bam.add_argument_group('COMBINE') group_bam_combine.add_argument('--del_ins_dup_max_distance', type=float, default=1.0, help='Maximum span-position distance between the origin of an insertion and a deletion to be flagged as a potential cut&paste insertion (default: %(default)s)') group_bam_combine.add_argument('--trans_sv_max_distance', type=int, default=500, help='Maximum distance in bp between a translocation breakpoint and an SV signature to be combined (default: %(default)s)') group_bam_combine.add_argument('--skip_consensus', action='store_true', help='Disable consensus computation for insertions (default: %(default)s). \ This reduces the time and memory consumption of SVIM and might be \ useful if consensus sequences are not needed. With this option, \ insertion calls are represented by symbolic alleles () instead \ of sequence alles in the output VCF. \ Consensus computation requires a modern CPU with the SSE 4.1 \ instruction set. For older CPUs missing this instruction set, \ consensus computation is automatically disabled.') group_bam_combine.add_argument('--max_consensus_length', type=int, default=10000, help='Maximum size of insertion sequences for consensus computation. (default: %(default)s)\ For insertions longer than this threshold, no consensus is computed to save memory.') group_bam_genotype = parser_bam.add_argument_group('GENOTYPE') group_bam_genotype.add_argument('--skip_genotyping', action='store_true', help='Disable genotyping (default: %(default)s)') group_bam_genotype.add_argument('--minimum_score', type=int, default=3, help='Minimum score for genotyping (default: %(default)s). \ Only SV candidates with a higher or equal score are genotyped. Depending on the \ score distribution among the SV candidates, decreasing this value increases the \ runtime. We recommend to choose a value close to the score threshold used \ for filtering the SV candidates.') group_bam_genotype.add_argument('--homozygous_threshold', type=float, default=0.8, help='Minimum variant allele frequency to be called as homozygous (default: %(default)s). \ Allele frequency is computed as the fraction of reads supporting the variant over the \ total number of reads covering the variant. Variants with an allele frequence greater \ than or equal to this threshold are called as homozygous alternative.') group_bam_genotype.add_argument('--heterozygous_threshold', type=float, default=0.2, help='Minimum variant allele frequency to be called as heterozygous (default: %(default)s). \ Allele frequency is computed as the fraction of reads supporting the variant over the \ total number of reads covering the variant. Variants with an allele frequence greater \ than or equal to this threshold but lower than the homozygous threshold are called as \ heterozygous alternative. Variants with an allele frequence lower than this threshold \ are called as homozygous reference.') group_bam_genotype.add_argument('--minimum_depth', type=int, default=4, help='Minimum total read depth for genotyping (default: %(default)s). \ Variants covered by a total number of reads lower than this value are not assigned \ a genotype (./. in the output VCF file).') group_bam_output = parser_bam.add_argument_group('OUTPUT') group_bam_output.add_argument('--sample', type=str, default="Sample", help='Sample ID to include in output vcf file (default: %(default)s)') group_bam_output.add_argument('--types', type=str, default="DEL,INS,INV,DUP:TANDEM,DUP:INT,BND", help='SV types to include in output VCF (default: %(default)s). \ Give a comma-separated list of SV types. The possible SV types are: DEL (deletions), \ INS (novel insertions), INV (inversions), DUP:TANDEM (tandem duplications), \ DUP:INT (interspersed duplications), BND (breakends).') group_bam_output.add_argument('--symbolic_alleles', action='store_true', help='Use symbolic alleles, such as or in output VCF (default: %(default)s). \ By default, all SV alleles are represented by nucleotide sequences.') group_bam_output.add_argument('--insertion_sequences', action='store_true', help='Output insertion sequences in INFO tag of VCF (default: %(default)s). \ If enabled, the INFO/SEQS tag contains a list of insertion sequences from the supporting reads.') group_bam_output.add_argument('--tandem_duplications_as_insertions', action='store_true', help='Represent tandem duplications as insertions in output VCF (default: %(default)s). \ By default, tandem duplications are represented by the SVTYPE=DUP:TANDEM and the genomic source is given by the \ POS and END tags. When enabling this option, duplications are instead represented by the SVTYPE=INS \ and POS and END both give the insertion point of the duplication.') group_bam_output.add_argument('--interspersed_duplications_as_insertions', action='store_true', help='Represent interspersed duplications as insertions in output VCF (default: %(default)s). \ By default, interspersed duplications are represented by the SVTYPE=DUP:INT and the genomic source is given by the \ POS and END tags. When enabling this option, duplications are instead represented by the SVTYPE=INS \ and POS and END both give the insertion point of the duplication.') group_bam_output.add_argument('--read_names', action='store_true', help='Output names of supporting reads in INFO tag of VCF (default: %(default)s). \ If enabled, the INFO/READS tag contains the list of names of the supporting reads.') group_bam_output.add_argument('--zmws', action='store_true', help='look for information on ZMWs in PacBio read names (default: %(default)s). \ If enabled, the INFO/ZMWS tag contains the number of ZMWs that produced supporting reads.') return parser.parse_args(arguments) def guess_file_type(reads_path): if reads_path.endswith(".fa") or reads_path.endswith(".fasta") or reads_path.endswith(".FA"): logging.info("Recognized reads file as FASTA format.") return "fasta" elif reads_path.endswith(".fq") or reads_path.endswith(".fastq") or reads_path.endswith(".FQ"): logging.info("Recognized reads file as FASTQ format.") return "fastq" elif reads_path.endswith(".fa.gz") or reads_path.endswith(".fasta.gz") or reads_path.endswith(".FA.gz") or reads_path.endswith(".fa.gzip") or reads_path.endswith(".fasta.gzip") or reads_path.endswith(".FA.gzip"): logging.info("Recognized reads file as gzipped FASTA format.") return "fasta_gzip" elif reads_path.endswith(".fq.gz") or reads_path.endswith(".fastq.gz") or reads_path.endswith(".FQ.gz") or reads_path.endswith(".fq.gzip") or reads_path.endswith(".fastq.gzip") or reads_path.endswith(".FQ.gzip"): logging.info("Recognized reads file as gzipped FASTQ format.") return "fastq_gzip" elif reads_path.endswith(".fa.fn") or reads_path.endswith(".fasta.fn") or reads_path.endswith(".FA.fn") or reads_path.endswith(".fq.fn") or reads_path.endswith(".fastq.fn") or reads_path.endswith(".FQ.fn"): logging.info("Recognized reads file as file list format.") return "list" else: logging.error("Unknown file ending of file {0}. See github.com/eldariont/svim/wiki/ for supported file endings. Exiting.".format(reads_path)) return "unknown" def read_file_list(path): file_list = open(path, "r") for line in file_list: yield line.strip() file_list.close()svim-2.0.0/src/svim/SVIM_inter.py000066400000000000000000000554671406305341300166050ustar00rootroot00000000000000from __future__ import print_function import sys import logging from statistics import mean from svim.SVSignature import SignatureDeletion, SignatureInsertion, SignatureInversion, SignatureTranslocation, SignatureDuplicationTandem, SignatureInsertionFrom from svim.SVIM_clustering import consolidate_clusters_bilocal, clusters_from_partitions def is_similar(chr1, start1, end1, chr2, start2, end2, span_position_treshold = 0.3): span1 = end1 - start1 span2 = end2 - start2 center1 = (start1 + end1) // 2 center2 = (start2 + end2) // 2 position_distance = abs(center1 - center2) / 900 span_distance = abs(span1 - span2) / max(span1, span2) if chr1 == chr2 and position_distance + span_distance < span_position_treshold: return True else: return False def analyze_read_segments(primary, supplementaries, bam, options): read_name = primary.query_name alignments = [primary] + supplementaries alignment_list = [] for alignment in alignments: #correct query coordinates for reversely mapped reads if alignment.is_reverse: inferred_read_length = alignment.infer_read_length() if inferred_read_length is None: logging.warning('Skipping alignment because pysam was unable to infer length of read from CIGAR string (alignment.infer_read_length() returned None). Query name: {0}, CIGAR: {1}'.format(alignment.query_name, alignment.cigarstring)) continue q_start = inferred_read_length - alignment.query_alignment_end q_end = inferred_read_length - alignment.query_alignment_start else: q_start = alignment.query_alignment_start q_end = alignment.query_alignment_end new_alignment_dict = { 'q_start': q_start, 'q_end': q_end, 'ref_id': alignment.reference_id, 'ref_start': alignment.reference_start, 'ref_end': alignment.reference_end, 'is_reverse': alignment.is_reverse } alignment_list.append(new_alignment_dict) sorted_alignment_list = sorted(alignment_list, key=lambda aln: (aln['q_start'], aln['q_end'])) #inferred_read_length = alignments[0].infer_read_length() sv_signatures = [] #Translocation signatures from other SV classes are stored separately for --all_bnd option translocation_signatures_all_bnds = [] tandem_duplications = [] translocations = [] for index in range(len(sorted_alignment_list) - 1): alignment_current = sorted_alignment_list[index] alignment_next = sorted_alignment_list[index + 1] distance_on_read = alignment_next['q_start'] - alignment_current['q_end'] #Same chromosome if alignment_current['ref_id'] == alignment_next['ref_id']: ref_chr = bam.getrname(alignment_current['ref_id']) #Same orientation if alignment_current['is_reverse'] == alignment_next['is_reverse']: #Compute distance on reference depending on orientation if alignment_current['is_reverse']: distance_on_reference = alignment_current['ref_start'] - alignment_next['ref_end'] else: distance_on_reference = alignment_next['ref_start'] - alignment_current['ref_end'] #No overlap on read if distance_on_read >= -options.segment_overlap_tolerance: #No overlap on reference if distance_on_reference >= -options.segment_overlap_tolerance: deviation = distance_on_read - distance_on_reference #INS candidate if deviation >= options.min_sv_size: #No gap on reference if distance_on_reference <= options.segment_gap_tolerance: if not alignment_current['is_reverse']: try: insertion_seq = primary.query_sequence[alignment_current['q_end']:alignment_current['q_end']+deviation] except TypeError: insertion_seq = "" sv_signatures.append(SignatureInsertion(ref_chr, alignment_current['ref_end'], alignment_current['ref_end'] + deviation, "suppl", read_name, insertion_seq)) else: try: insertion_seq = primary.query_sequence[primary.infer_read_length() - alignment_next['q_start']:primary.infer_read_length() - alignment_next['q_start'] + deviation] except TypeError: insertion_seq = "" sv_signatures.append(SignatureInsertion(ref_chr, alignment_current['ref_start'], alignment_current['ref_start'] + deviation, "suppl", read_name, insertion_seq)) #DEL candidate elif -options.max_sv_size <= deviation <= -options.min_sv_size: #No gap on read if distance_on_read <= options.segment_gap_tolerance: if not alignment_current['is_reverse']: sv_signatures.append(SignatureDeletion(ref_chr, alignment_current['ref_end'], alignment_current['ref_end'] - deviation, "suppl", read_name)) if options.all_bnds: translocation_signatures_all_bnds.append(SignatureTranslocation(ref_chr, alignment_current['ref_end'] - 1, 'fwd', ref_chr, alignment_current['ref_end'] - deviation, 'fwd', "suppl", read_name)) else: sv_signatures.append(SignatureDeletion(ref_chr, alignment_next['ref_end'], alignment_next['ref_end'] - deviation, "suppl", read_name)) if options.all_bnds: translocation_signatures_all_bnds.append(SignatureTranslocation(ref_chr, alignment_next['ref_end'] - 1, 'fwd', ref_chr, alignment_next['ref_end'] - deviation, 'fwd', "suppl", read_name)) #Either very large DEL or TRANS elif deviation < -options.max_sv_size: #No gap on read if distance_on_read <= options.segment_gap_tolerance: if not alignment_current['is_reverse']: sv_signatures.append(SignatureTranslocation(ref_chr, alignment_current['ref_end'] - 1, 'fwd', ref_chr, alignment_next['ref_start'], 'fwd', "suppl", read_name)) translocations.append(('fwd', 'fwd', ref_chr, alignment_current['ref_end'] - 1, ref_chr, alignment_next['ref_start'])) else: sv_signatures.append(SignatureTranslocation(ref_chr, alignment_current['ref_start'], 'rev', ref_chr, alignment_next['ref_end'] - 1, 'rev', "suppl", read_name)) translocations.append(('rev', 'rev', ref_chr, alignment_current['ref_start'], ref_chr, alignment_next['ref_end'] - 1)) #overlap on reference else: #Tandem Duplication if distance_on_reference <= -options.min_sv_size: if not alignment_current['is_reverse']: #Tandem Duplication if alignment_next['ref_end'] > alignment_current['ref_start']: tandem_duplications.append((ref_chr, alignment_next['ref_start'], alignment_current['ref_end'], True, True)) if options.all_bnds: translocation_signatures_all_bnds.append(SignatureTranslocation(ref_chr, alignment_current['ref_end'] - 1, 'fwd', ref_chr, alignment_next['ref_start'], 'fwd', "suppl", read_name)) #Large tandem duplication elif distance_on_reference >= -options.max_sv_size: tandem_duplications.append((ref_chr, alignment_next['ref_start'], alignment_current['ref_end'], False, True)) if options.all_bnds: translocation_signatures_all_bnds.append(SignatureTranslocation(ref_chr, alignment_current['ref_end'] - 1, 'fwd', ref_chr, alignment_next['ref_start'], 'fwd', "suppl", read_name)) #Either very large TANDEM or TRANS else: sv_signatures.append(SignatureTranslocation(ref_chr, alignment_current['ref_end'] - 1, 'fwd', ref_chr, alignment_next['ref_start'], 'fwd', "suppl", read_name)) translocations.append(('fwd', 'fwd', ref_chr, alignment_current['ref_end'] - 1, ref_chr, alignment_next['ref_start'])) else: #Tandem Duplication if alignment_next['ref_start'] < alignment_current['ref_end']: tandem_duplications.append((ref_chr, alignment_current['ref_start'], alignment_next['ref_end'], True, False)) if options.all_bnds: translocation_signatures_all_bnds.append(SignatureTranslocation(ref_chr, alignment_current['ref_start'], 'rev', ref_chr, alignment_next['ref_end'] - 1, 'rev', "suppl", read_name)) #Large tandem duplication elif distance_on_reference >= -options.max_sv_size: tandem_duplications.append((ref_chr, alignment_current['ref_start'], alignment_next['ref_end'], False, False)) if options.all_bnds: translocation_signatures_all_bnds.append(SignatureTranslocation(ref_chr, alignment_current['ref_start'], 'rev', ref_chr, alignment_next['ref_end'] - 1, 'rev', "suppl", read_name)) #Either very large TANDEM or TRANS else: sv_signatures.append(SignatureTranslocation(ref_chr, alignment_current['ref_start'], 'rev', ref_chr, alignment_next['ref_end'] - 1, 'rev', "suppl", read_name)) translocations.append(('rev', 'rev', ref_chr, alignment_current['ref_start'], ref_chr, alignment_next['ref_end'] - 1)) #Different orientations else: #Normal to reverse if not alignment_current['is_reverse'] and alignment_next['is_reverse']: if -options.segment_overlap_tolerance <= distance_on_read <= options.segment_gap_tolerance: if alignment_next['ref_start'] - alignment_current['ref_end'] >= -options.segment_overlap_tolerance: # Case 1 #INV candidate if options.min_sv_size <= alignment_next['ref_end'] - alignment_current['ref_end'] <= options.max_sv_size: sv_signatures.append(SignatureInversion(ref_chr, alignment_current['ref_end'], alignment_next['ref_end'], "suppl", read_name, "left_fwd")) if options.all_bnds: translocation_signatures_all_bnds.append(SignatureTranslocation(ref_chr, alignment_current['ref_end'] - 1, 'fwd', ref_chr, alignment_next['ref_end'] - 1, 'rev', "suppl", read_name)) #Either very large INV or TRANS elif alignment_next['ref_end'] - alignment_current['ref_end'] > options.max_sv_size: sv_signatures.append(SignatureTranslocation(ref_chr, alignment_current['ref_end'] - 1, 'fwd', ref_chr, alignment_next['ref_end'] - 1, 'rev', "suppl", read_name)) translocations.append(('fwd', 'rev', ref_chr, alignment_current['ref_end'] - 1, ref_chr, alignment_next['ref_end'] - 1)) elif alignment_current['ref_start'] - alignment_next['ref_end'] >= -options.segment_overlap_tolerance: # Case 3 #INV candidate if options.min_sv_size <= alignment_current['ref_end'] - alignment_next['ref_end'] <= options.max_sv_size: sv_signatures.append(SignatureInversion(ref_chr, alignment_next['ref_end'], alignment_current['ref_end'], "suppl", read_name, "left_rev")) if options.all_bnds: translocation_signatures_all_bnds.append(SignatureTranslocation(ref_chr, alignment_current['ref_end'] - 1, 'fwd', ref_chr, alignment_next['ref_end'] - 1, 'rev', "suppl", read_name)) #Either very large INV or TRANS elif alignment_current['ref_end'] - alignment_next['ref_end'] > options.max_sv_size: sv_signatures.append(SignatureTranslocation(ref_chr, alignment_current['ref_end'] - 1, 'fwd', ref_chr, alignment_next['ref_end'] - 1, 'rev', "suppl", read_name)) translocations.append(('fwd', 'rev', ref_chr, alignment_current['ref_end'] - 1, ref_chr, alignment_next['ref_end'] - 1)) else: pass #print("Overlapping read segments in read", read_name) #Reverse to normal if alignment_current['is_reverse'] and not alignment_next['is_reverse']: if -options.segment_overlap_tolerance <= distance_on_read <= options.segment_gap_tolerance: if alignment_next['ref_start'] - alignment_current['ref_end'] >= -options.segment_overlap_tolerance: # Case 2 #INV candidate if options.min_sv_size <= alignment_next['ref_start'] - alignment_current['ref_start'] <= options.max_sv_size: sv_signatures.append(SignatureInversion(ref_chr, alignment_current['ref_start'], alignment_next['ref_start'], "suppl", read_name, "right_fwd")) if options.all_bnds: translocation_signatures_all_bnds.append(SignatureTranslocation(ref_chr, alignment_current['ref_start'], 'rev', ref_chr, alignment_next['ref_start'], 'fwd', "suppl", read_name)) #Either very large INV or TRANS elif alignment_next['ref_start'] - alignment_current['ref_start'] > options.max_sv_size: sv_signatures.append(SignatureTranslocation(ref_chr, alignment_current['ref_start'], 'rev', ref_chr, alignment_next['ref_start'], 'fwd', "suppl", read_name)) translocations.append(('rev', 'fwd', ref_chr, alignment_current['ref_start'], ref_chr, alignment_next['ref_start'])) elif alignment_current['ref_start'] - alignment_next['ref_end'] >= -options.segment_overlap_tolerance: # Case 4 #INV candidate if options.min_sv_size <= alignment_current['ref_start'] - alignment_next['ref_start'] <= options.max_sv_size: sv_signatures.append(SignatureInversion(ref_chr, alignment_next['ref_start'], alignment_current['ref_start'], "suppl", read_name, "right_rev")) if options.all_bnds: translocation_signatures_all_bnds.append(SignatureTranslocation(ref_chr, alignment_current['ref_start'], 'rev', ref_chr, alignment_next['ref_start'], 'fwd', "suppl", read_name)) #Either very large INV or TRANS elif alignment_current['ref_start'] - alignment_next['ref_start'] > options.max_sv_size: sv_signatures.append(SignatureTranslocation(ref_chr, alignment_current['ref_start'], 'rev', ref_chr, alignment_next['ref_start'], 'fwd', "suppl", read_name)) translocations.append(('rev', 'fwd', ref_chr, alignment_current['ref_start'], ref_chr, alignment_next['ref_start'])) else: pass #print("Overlapping read segments in read", read_name) #Different chromosomes else: ref_chr_current = bam.getrname(alignment_current['ref_id']) ref_chr_next = bam.getrname(alignment_next['ref_id']) #Same orientation if alignment_current['is_reverse'] == alignment_next['is_reverse']: #No overlap on read if distance_on_read >= -options.segment_overlap_tolerance: #No gap on read if distance_on_read <= options.segment_gap_tolerance: if not alignment_current['is_reverse']: sv_signatures.append(SignatureTranslocation(ref_chr_current, alignment_current['ref_end'] - 1, 'fwd', ref_chr_next, alignment_next['ref_start'], 'fwd', "suppl", read_name)) translocations.append(('fwd', 'fwd', ref_chr_current, alignment_current['ref_end'] - 1, ref_chr_next, alignment_next['ref_start'])) else: sv_signatures.append(SignatureTranslocation(ref_chr_current, alignment_current['ref_start'], 'rev', ref_chr_next, alignment_next['ref_end'] - 1, 'rev', "suppl", read_name)) translocations.append(('rev', 'rev', ref_chr_current, alignment_current['ref_start'], ref_chr_next, alignment_next['ref_end'] - 1)) #Overlap on read else: pass #print("Overlapping read segments in read", read_name) #Different orientation else: #No overlap on read if distance_on_read >= -options.segment_overlap_tolerance: #No gap on read if distance_on_read <= options.segment_gap_tolerance: if not alignment_current['is_reverse']: sv_signatures.append(SignatureTranslocation(ref_chr_current, alignment_current['ref_end'] - 1, 'fwd', ref_chr_next, alignment_next['ref_end'] - 1, 'rev', "suppl", read_name)) translocations.append(('fwd', 'rev', ref_chr_current, alignment_current['ref_end'] - 1, ref_chr_next, alignment_next['ref_end'] - 1)) else: sv_signatures.append(SignatureTranslocation(ref_chr_current, alignment_current['ref_start'], 'rev', ref_chr_next, alignment_next['ref_start'], 'fwd', "suppl", read_name)) translocations.append(('rev', 'fwd', ref_chr_current, alignment_current['ref_start'], ref_chr_next, alignment_next['ref_start'])) #Overlap on read else: pass #print("Overlapping read segments in read", read_name) #Handle tandem duplications current_chromosome = None current_starts = [] current_ends = [] current_copy_number = 0 current_fully_covered = [] for tandem_duplication in tandem_duplications: if current_chromosome == None: current_chromosome = tandem_duplication[0] current_starts.append(tandem_duplication[1]) current_ends.append(tandem_duplication[2]) current_copy_number = 1 current_fully_covered.append(tandem_duplication[3]) current_direction = tandem_duplication[4] else: if is_similar(current_chromosome, mean(current_starts), mean(current_ends), tandem_duplication[0], tandem_duplication[1], tandem_duplication[2]) and current_direction == tandem_duplication[4]: current_starts.append(tandem_duplication[1]) current_ends.append(tandem_duplication[2]) current_copy_number += 1 current_fully_covered.append(tandem_duplication[3]) else: fully_covered = True if sum(current_fully_covered) else False sv_signatures.append(SignatureDuplicationTandem(current_chromosome, int(mean(current_starts)), int(mean(current_ends)), current_copy_number, fully_covered, "suppl", read_name)) current_chromosome = tandem_duplication[0] current_starts =[tandem_duplication[1]] current_ends =[tandem_duplication[2]] current_copy_number = 1 current_fully_covered = [tandem_duplication[3]] if current_chromosome != None: fully_covered = True if sum(current_fully_covered) else False sv_signatures.append(SignatureDuplicationTandem(current_chromosome, int(mean(current_starts)), int(mean(current_ends)), current_copy_number, fully_covered, "suppl", read_name)) #Handle insertions_from for this_index in range(len(translocations)): this_dir1 = translocations[this_index][0] this_dir2 = translocations[this_index][1] this_chr1 = translocations[this_index][2] this_pos1 = translocations[this_index][3] this_chr2 = translocations[this_index][4] this_pos2 = translocations[this_index][5] for before_dir1, before_dir2, before_chr1, before_pos1, before_chr2, before_pos2 in translocations[:this_index]: #Same direction at destination and origin if before_dir1 == this_dir2 and before_dir2 == this_dir1: #Same position at destination if is_similar(before_chr1, before_pos1, before_pos1+1, this_chr2, this_pos2, this_pos2+1, span_position_treshold=0.1): #Same chromosome for origin if before_chr2 == this_chr1: #INS_DUP candidate if before_dir2 == before_dir1: if before_dir1 == 'fwd': if options.min_sv_size <= this_pos1 - before_pos2 + 1 <= options.max_sv_size: sv_signatures.append(SignatureInsertionFrom(before_chr2, before_pos2, this_pos1 + 1, before_chr1, int(mean([before_pos1 + 1, this_pos2])), "suppl", read_name)) elif before_dir1 == 'rev': if options.min_sv_size <= before_pos2 - this_pos1 <= options.max_sv_size: sv_signatures.append(SignatureInsertionFrom(before_chr2, this_pos1, before_pos2 + 1, before_chr1, int(mean([before_pos1, this_pos2 + 1])), "suppl", read_name)) #INV_INS_DUP candidate else: pass return sv_signatures, translocation_signatures_all_bnds svim-2.0.0/src/svim/SVIM_intra.py000066400000000000000000000044451406305341300165670ustar00rootroot00000000000000from __future__ import print_function import sys from svim.SVSignature import SignatureDeletion, SignatureInsertion, SignatureTranslocation def analyze_cigar_indel(tuples, min_length): """Parses CIGAR tuples (op, len) and returns Indels with a length > minLength""" pos_ref = 0 pos_read = 0 indels = [] for operation, length in tuples: if operation == 0: # alignment match pos_ref += length pos_read += length elif operation == 1: # insertion if length >= min_length: indels.append((pos_ref, pos_read, length, "INS")) pos_read += length elif operation == 2: # deletion if length >= min_length: indels.append((pos_ref, pos_read, length, "DEL")) pos_ref += length elif operation == 4: # soft clip pos_read += length elif operation == 7 or operation == 8: # match or mismatch pos_ref += length pos_read += length return indels def analyze_alignment_indel(alignment, bam, query_name, options): sv_signatures = [] #Translocation signatures from other SV classes are stored separately for --all_bnd option translocation_signatures_all_bnds = [] ref_chr = bam.getrname(alignment.reference_id) ref_start = alignment.reference_start indels = analyze_cigar_indel(alignment.cigartuples, options.min_sv_size) for pos_ref, pos_read, length, typ in indels: if typ == "DEL": sv_signatures.append(SignatureDeletion(ref_chr, ref_start + pos_ref, ref_start + pos_ref + length, "cigar", query_name)) if options.all_bnds: translocation_signatures_all_bnds.append(SignatureTranslocation(ref_chr, ref_start + pos_ref, 'fwd', ref_chr, ref_start + pos_ref + length, 'fwd', "cigar", query_name)) elif typ == "INS": try: insertion_seq = alignment.query_sequence[pos_read:pos_read+length] except TypeError: insertion_seq = "" sv_signatures.append(SignatureInsertion(ref_chr, ref_start + pos_ref, ref_start + pos_ref + length, "cigar", query_name, insertion_seq)) return sv_signatures, translocation_signatures_all_bnds svim-2.0.0/src/svim/SVIM_merging.py000066400000000000000000000243501406305341300170770ustar00rootroot00000000000000from __future__ import print_function import sys from bisect import bisect_left from collections import defaultdict from math import pow, sqrt from svim.SVSignature import SignatureTranslocation, SignatureInsertionFrom, SignatureClusterBiLocal from svim.SVCandidate import CandidateDuplicationInterspersed from svim.SVIM_clustering import span_position_distance_clusters def flag_cutpaste_candidates(insertion_from_signature_clusters, deletion_signature_clusters, options): """Flag duplication signature clusters if they overlap a deletion""" int_duplication_candidates = [] for ins_cluster in insertion_from_signature_clusters: # Compute distances of every deletion cluster to the current insertion/duplication distances = [(del_index, span_position_distance_clusters(del_cluster, ins_cluster, options.position_distance_normalizer)) \ for del_index, del_cluster in enumerate(deletion_signature_clusters)] closest_deletion_index, closest_deletion = sorted(distances, key=lambda obj: obj[1])[0] source_contig, source_start, source_end = ins_cluster.get_source() dest_contig, dest_start, dest_end = ins_cluster.get_destination() # If close deletion cluster found if closest_deletion <= options.del_ins_dup_max_distance: #Potential cut&paste insertion int_duplication_candidates.append(CandidateDuplicationInterspersed(source_contig, source_start, source_end, dest_contig, dest_start, dest_end, ins_cluster.members, ins_cluster.score, ins_cluster.std_span, ins_cluster.std_pos, cutpaste=True)) else: #Interspersed duplication int_duplication_candidates.append(CandidateDuplicationInterspersed(source_contig, source_start, source_end, dest_contig, dest_start, dest_end, ins_cluster.members, ins_cluster.score, ins_cluster.std_span, ins_cluster.std_pos, cutpaste=False)) return int_duplication_candidates def get_closest_index(input_list, input_number): """ Assumes input_list is sorted. Returns index of closest value to input_number. If two numbers are equally close, return the index of the smallest number. """ if len(input_list) < 1: return None pos = bisect_left(input_list, input_number) if pos == 0: return 0 if pos == len(input_list): return len(input_list) - 1 before = input_list[pos - 1] after = input_list[pos] if after - input_number < input_number - before: return pos else: return pos - 1 def distance_positions(position1, position2): return float("inf") if position1[0] != position2[0] else abs(position1[1] - position2[1]) def calculate_score_insertion(main_score, translocation_distances, translocation_stds, destination_stds): """Calculate the score of a merged insertion or duplication detected from an insertion. Parameters: - main_score - score of the underlying main insertion - translocation_distances - mean distance of the translocation clusters flanking the main insertion (left) - translocation_stds - standard deviation of the translocation clusters flanking the main insertion (left) - destination_stds - standard deviations of the left and right translocation destinations""" #scale translocation distance to [0, 1] range td0 = max(0, 100 - translocation_distances[0]) / 100 td1 = max(0, 100 - translocation_distances[1]) / 100 #scale translocation std to [0, 1] range if translocation_stds[0] == None: ts0 = 1 else: ts0 = max(0, 100 - translocation_stds[0]) / 100 if translocation_stds[1] == None: ts1 = 1 else: ts1 = max(0, 100 - translocation_stds[1]) / 100 #scale destination stds to [0, 1] range if destination_stds[0] == None: ds0 = 1 else: ds0 = max(0, 100 - destination_stds[0]) / 100 if destination_stds[1] == None: ds1 = 1 else: ds1 = max(0, 100 - destination_stds[1]) / 100 #calculate final score as product of components product = td0 * td1 * ts0 * ts1 * ds0 * ds1 final_score = pow(product, 1/6) * main_score return final_score def merge_translocations_at_insertions(translocation_signature_clusters, insertion_signature_clusters, options): if len(insertion_signature_clusters) == 0: return [], [] #add reverse translocation signature clusters reversed_translocation_signature_clusters = [] for cluster in translocation_signature_clusters: reversed_cluster = SignatureClusterBiLocal(cluster.dest_contig, cluster.dest_start, cluster.dest_end, cluster.source_contig, cluster.source_start, cluster.source_end, cluster.score, cluster.size, cluster.members, cluster.type, cluster.std_pos, cluster.std_span) reversed_cluster.direction1 = 'fwd' if cluster.direction2 == 'rev' else 'rev' reversed_cluster.direction2 = 'fwd' if cluster.direction1 == 'rev' else 'rev' reversed_translocation_signature_clusters.append(reversed_cluster) translocation_signature_clusters.extend(reversed_translocation_signature_clusters) translocation_partitions_fwdfwd_dict = defaultdict(list) translocation_partitions_revrev_dict = defaultdict(list) for cluster in translocation_signature_clusters: if cluster.direction1 == 'fwd' and cluster.direction2 == 'fwd': translocation_partitions_fwdfwd_dict[cluster.source_contig].append(cluster) elif cluster.direction1 == 'rev' and cluster.direction2 == 'rev': translocation_partitions_revrev_dict[cluster.source_contig].append(cluster) for contig in translocation_partitions_fwdfwd_dict.keys(): translocation_partitions_fwdfwd_dict[contig] = sorted(translocation_partitions_fwdfwd_dict[contig], key=lambda cluster: cluster.get_key()) for contig in translocation_partitions_revrev_dict.keys(): translocation_partitions_revrev_dict[contig] = sorted(translocation_partitions_revrev_dict[contig], key=lambda cluster: cluster.get_key()) translocation_partition_means_fwdfwd_dict = {} translocation_partition_stds_fwdfwd_dict = {} for contig in translocation_partitions_fwdfwd_dict.keys(): translocation_partition_means_fwdfwd_dict[contig] = [cluster.source_start for cluster in translocation_partitions_fwdfwd_dict[contig]] translocation_partition_stds_fwdfwd_dict[contig] = [cluster.std_span for cluster in translocation_partitions_fwdfwd_dict[contig]] translocation_partition_means_revrev_dict = {} translocation_partition_stds_revrev_dict = {} for contig in translocation_partitions_revrev_dict.keys(): translocation_partition_means_revrev_dict[contig] = [cluster.source_start for cluster in translocation_partitions_revrev_dict[contig]] translocation_partition_stds_revrev_dict[contig] = [cluster.std_span for cluster in translocation_partitions_revrev_dict[contig]] inserted_regions_to_remove = [] insertion_from_signature_clusters = [] for insertion_index, ins_cluster in enumerate(insertion_signature_clusters): ins_contig, ins_start, ins_end = ins_cluster.get_source() try: closest_to_start_fwdfwd_index = get_closest_index(translocation_partition_means_fwdfwd_dict[ins_contig], ins_start) closest_to_start_fwdfwd_mean = translocation_partition_means_fwdfwd_dict[ins_contig][closest_to_start_fwdfwd_index] closest_to_start_revrev_index = get_closest_index(translocation_partition_means_revrev_dict[ins_contig], ins_start) closest_to_start_revrev_mean = translocation_partition_means_revrev_dict[ins_contig][closest_to_start_revrev_index] except KeyError: continue # if translocations found close to start of insertion if abs(closest_to_start_fwdfwd_mean - ins_start) <= options.trans_sv_max_distance and abs(closest_to_start_revrev_mean - ins_start) <= options.trans_sv_max_distance: destination_from_start_fwdfwd = (translocation_partitions_fwdfwd_dict[ins_contig][closest_to_start_fwdfwd_index].dest_contig, translocation_partitions_fwdfwd_dict[ins_contig][closest_to_start_fwdfwd_index].dest_start) destination_from_start_revrev = (translocation_partitions_revrev_dict[ins_contig][closest_to_start_revrev_index].dest_contig, translocation_partitions_revrev_dict[ins_contig][closest_to_start_revrev_index].dest_start) destination_from_start_fwdfwd_std = translocation_partitions_fwdfwd_dict[ins_contig][closest_to_start_fwdfwd_index].std_pos destination_from_start_revrev_std = translocation_partitions_revrev_dict[ins_contig][closest_to_start_revrev_index].std_pos # if the two destinations have the right distance distance = abs(destination_from_start_revrev[1] - destination_from_start_fwdfwd[1]) if destination_from_start_revrev[0] == destination_from_start_fwdfwd[0] and 0.95 <= ((ins_end - ins_start + 1) / (distance + 1)) <= 1.1: members = ins_cluster.members + translocation_partitions_fwdfwd_dict[ins_contig][closest_to_start_fwdfwd_index].members + translocation_partitions_revrev_dict[ins_contig][closest_to_start_revrev_index].members score = calculate_score_insertion(ins_cluster.score, [abs(closest_to_start_fwdfwd_mean - ins_start), abs(closest_to_start_revrev_mean - ins_start)], [translocation_partition_stds_fwdfwd_dict[ins_contig][closest_to_start_fwdfwd_index], translocation_partition_stds_revrev_dict[ins_contig][closest_to_start_revrev_index]], [destination_from_start_fwdfwd_std, destination_from_start_revrev_std]) insertion_from_signature_clusters.append(SignatureClusterBiLocal(destination_from_start_revrev[0], min(destination_from_start_revrev[1], destination_from_start_fwdfwd[1]), max(destination_from_start_revrev[1], destination_from_start_fwdfwd[1]), ins_contig, ins_start, ins_start + distance, score, len(members), members, "DUP_INT", ins_cluster.std_span, ins_cluster.std_pos)) inserted_regions_to_remove.append(insertion_index) return insertion_from_signature_clusters, inserted_regions_to_remove svim-2.0.0/src/svim/SVIM_plot.py000066400000000000000000000145411406305341300164260ustar00rootroot00000000000000import matplotlib import logging import random import math matplotlib.use('Agg') import matplotlib.pyplot as plt def plot_sv_lengths(deletion_candidates, inversion_candidates, int_duplication_candidates, tan_dup_candidates, novel_insertion_candidates, options): len_dict_5 = dict() len_dict_10 = dict() len_dict_5["DEL"] = [v.get_source()[2] - v.get_source()[1] for v in deletion_candidates if v.score >= 5] len_dict_5["INV"] = [v.get_source()[2] - v.get_source()[1] for v in inversion_candidates if v.score >= 5] len_dict_5["DUP_INT"] = [v.get_destination()[2] - v.get_destination()[1] for v in int_duplication_candidates if v.score >= 5] len_dict_5["DUP_TAN"] = [v.get_destination()[2] - v.get_destination()[1] for v in tan_dup_candidates if v.score >= 5] len_dict_5["INS"] = [v.get_destination()[2] - v.get_destination()[1] for v in novel_insertion_candidates if v.score >= 5] draw_sv_length_plot(dict_of_lengths=len_dict_5, output=options.working_dir + "/sv-lengths-q5.png") len_dict_10["DEL"] = [v.get_source()[2] - v.get_source()[1] for v in deletion_candidates if v.score >= 10] len_dict_10["INV"] = [v.get_source()[2] - v.get_source()[1] for v in inversion_candidates if v.score >= 10] len_dict_10["DUP_INT"] = [v.get_destination()[2] - v.get_destination()[1] for v in int_duplication_candidates if v.score >= 10] len_dict_10["DUP_TAN"] = [v.get_destination()[2] - v.get_destination()[1] for v in tan_dup_candidates if v.score >= 10] len_dict_10["INS"] = [v.get_destination()[2] - v.get_destination()[1] for v in novel_insertion_candidates if v.score >= 10] draw_sv_length_plot(dict_of_lengths=len_dict_10, output=options.working_dir + "/sv-lengths-q10.png") def draw_sv_length_plot(dict_of_lengths, output): """Makes two stacked bar charts Plotting two bar charts of number of SVs by length split by SV type Use a consistent colouring scheme for those in "standard_order" to make comparison reasonable First bar chart is up to 2kb with bins of 10bp Second bar chart is up to 20kb, with bins of 100bp and uses log scaling on the y-axis """ standard_order = ['DEL', 'INS', 'INV', 'DUP_INT', 'DUP_TAN'] names, lengths = zip( *sorted([(svtype, lengths) for svtype, lengths in dict_of_lengths.items()], key=lambda x: standard_order.index(x[0]))) plt.subplot(2, 1, 1) plt.hist(x=lengths, bins=[i for i in range(0, 2000, 10)], stacked=True, histtype='bar', label=names) plt.xlabel('Length of structural variant') plt.ylabel('Number of variants') plt.legend(frameon=False, fontsize="small") plt.subplot(2, 1, 2) plt.hist(x=lengths, bins=[i for i in range(0, 20000, 100)], stacked=True, histtype='bar', label=names, log=True) plt.xlabel('Length of structural variant') plt.ylabel('Number of variants') plt.legend(frameon=False, fontsize="small") plt.tight_layout() plt.savefig(output) plt.clf() def plot_sv_alleles(candidates, options): refs_11 = [candidate.ref_reads for candidate in candidates if candidate.genotype == '1/1' and candidate.score >= 5 and candidate.ref_reads != None and candidate.alt_reads != None] alts_11 = [candidate.alt_reads for candidate in candidates if candidate.genotype == '1/1' and candidate.score >= 5 and candidate.ref_reads != None and candidate.alt_reads != None] refs_10 = [candidate.ref_reads for candidate in candidates if candidate.genotype == '0/1' and candidate.score >= 5 and candidate.ref_reads != None and candidate.alt_reads != None] alts_10 = [candidate.alt_reads for candidate in candidates if candidate.genotype == '0/1' and candidate.score >= 5 and candidate.ref_reads != None and candidate.alt_reads != None] refs_00 = [candidate.ref_reads for candidate in candidates if candidate.genotype == '0/0' and candidate.score >= 5 and candidate.ref_reads != None and candidate.alt_reads != None] alts_00 = [candidate.alt_reads for candidate in candidates if candidate.genotype == '0/0' and candidate.score >= 5 and candidate.ref_reads != None and candidate.alt_reads != None] refs_nn = [candidate.ref_reads for candidate in candidates if candidate.genotype == './.' and candidate.score >= 5 and candidate.ref_reads != None and candidate.alt_reads != None] alts_nn = [candidate.alt_reads for candidate in candidates if candidate.genotype == './.' and candidate.score >= 5 and candidate.ref_reads != None and candidate.alt_reads != None] draw_allele_plot(refs_11, alts_11, refs_10, alts_10, refs_00, alts_00, refs_nn, alts_nn, output=options.working_dir + "/sv-genotypes-q5.png") def draw_allele_plot(refs_11, alts_11, refs_10, alts_10, refs_00, alts_00, refs_nn, alts_nn, output): """Makes a scatter plot of allele support """ num_points = len(refs_11)+len(refs_10)+len(refs_00)+len(refs_nn) point_alpha = 10 / math.sqrt(max(100, num_points)) plt.scatter(x=[ref+random.uniform(-0.5, 0.5) for ref in refs_11], y=[alt+random.uniform(-0.5, 0.5) for alt in alts_11], c='tab:red', alpha=point_alpha, label='1/1', edgecolors='none') plt.scatter(x=[ref+random.uniform(-0.5, 0.5) for ref in refs_10], y=[alt+random.uniform(-0.5, 0.5) for alt in alts_10], c='tab:purple', alpha=point_alpha, label='0/1', edgecolors='none') plt.scatter(x=[ref+random.uniform(-0.5, 0.5) for ref in refs_00], y=[alt+random.uniform(-0.5, 0.5) for alt in alts_00], c='tab:blue', alpha=point_alpha, label='0/0', edgecolors='none') plt.scatter(x=[ref+random.uniform(-0.5, 0.5) for ref in refs_nn], y=[alt+random.uniform(-0.5, 0.5) for alt in alts_nn], c='tab:brown', alpha=point_alpha, label='./.', edgecolors='none') axes = plt.gca() axes.set_xlim([0,60]) axes.set_ylim([0,60]) plt.xlabel('Reference allele support') plt.ylabel('Variant allele support') leg = plt.legend(frameon=True, fontsize="medium") for lh in leg.legendHandles: lh.set_alpha(1.0) plt.tight_layout() plt.savefig(output) plt.clf()svim-2.0.0/src/svim/SVSignature.py000066400000000000000000000316571406305341300170330ustar00rootroot00000000000000import logging class Signature: """Signature class for basic signatures of structural variants. An signature is always detected from a single read. """ def __init__(self, contig, start, end, signature, read): self.contig = contig self.start = start self.end = end self.signature = signature self.read = read self.type = None if self.end < self.start: logging.warning("Signature with invalid coordinates (end < start): " + self.as_string()) def get_source(self): return (self.contig, self.start, self.end) def get_key(self): contig, start, end = self.get_source() return (self.type, contig, end) def downstream_distance_to(self, signature2): """Return distance >= 0 between this signature's end and the start of signature2.""" this_contig, this_start, this_end = self.get_source() other_contig, other_start, other_end = signature2.get_source() if self.type == signature2.type and this_contig == other_contig: return max(0, other_start - this_end) else: return float("inf") def as_string(self, sep="\t"): contig, start, end = self.get_source() return sep.join(["{0}","{1}","{2}","{3}","{4}"]).format(contig, start, end, "{0};{1}".format(self.type, self.signature), self.read) class SignatureDeletion(Signature): """SV Signature: a region (contig:start-end) has been deleted and is not present in sample""" def __init__(self, contig, start, end, signature, read): self.contig = contig assert end >= start #0-based start of the deletion (first deleted base) self.start = start #0-based end of the deletion (one past the last deleted base) self.end = end self.signature = signature self.read = read self.type = "DEL" class SignatureInsertion(Signature): """SV Signature: a region of length end-start has been inserted at contig:start""" def __init__(self, contig, start, end, signature, read, sequence): self.contig = contig assert end >= start #0-based start of the insertion (base after the insertion) self.start = start #0-based start of the insertion (base after the insertion) + length of the insertion self.end = end self.signature = signature self.read = read self.sequence = sequence self.type = "INS" def get_key(self): contig, start, end = self.get_source() return (self.type, contig, start) def downstream_distance_to(self, signature2): """Return distance >= 0 between this signature's end and the start of signature2.""" this_contig, this_start, this_end = self.get_source() other_contig, other_start, other_end = signature2.get_source() if self.type == signature2.type and this_contig == other_contig: return max(0, other_start - this_start) else: return float("inf") class SignatureInversion(Signature): """SV Signature: a region (contig:start-end) has been inverted in the sample""" def __init__(self, contig, start, end, signature, read, direction): self.contig = contig assert end >= start #0-based start of the inversion (first inverted base) self.start = start #0-based end of the inversion (one past the last inverted base) self.end = end self.signature = signature self.read = read self.type = "INV" self.direction = direction def as_string(self, sep="\t"): contig, start, end = self.get_source() return sep.join(["{0}","{1}","{2}","{3}","{4}"]).format(contig, start, end, "{0};{1};{2}".format(self.type, self.direction, self.signature), self.read) class SignatureInsertionFrom(Signature): """SV Signature: a region (contig:start-end) has been inserted at contig2:pos in the sample""" def __init__(self, contig1, start, end, contig2, pos, signature, read): self.contig1 = contig1 assert end >= start #0-based start of the region (first copied base) self.start = start #0-based end of the region (one past the last copied base) self.end = end self.contig2 = contig2 #0-based start of the insertion (base after the insertion) self.pos = pos self.signature = signature self.read = read self.type = "DUP_INT" def get_source(self): return (self.contig1, self.start, self.end) def get_destination(self): source_contig, source_start, source_end = self.get_source() return (self.contig2, self.pos, self.pos + (source_end - source_start)) def get_key(self): source_contig, source_start, source_end = self.get_source() dest_contig, dest_start, dest_end = self.get_destination() return (self.type, dest_contig, source_contig, dest_start) def downstream_distance_to(self, signature2): """Return distance >= 0 between this signature's end and the start of signature2.""" this_source_contig, this_source_start, this_source_end = self.get_source() other_source_contig, other_source_start, other_source_end = signature2.get_source() this_destination_contig, this_destination_start, this_destination_end = self.get_destination() other_destination_contig, other_destination_start, other_destination_end = signature2.get_destination() if self.type == signature2.type and \ this_destination_contig == other_destination_contig and \ this_source_contig == other_source_contig: return max(0, other_destination_start - this_destination_start) else: return float("inf") def as_string(self, sep="\t"): source_contig, source_start, source_end = self.get_source() dest_contig, dest_start, dest_end = self.get_destination() return sep.join(["{0}:{1}-{2}","{3}:{4}-{5}","{6}", "{7}"]).format(source_contig, source_start, source_end, dest_contig, dest_start, dest_end, "{0};{1}".format(self.type, self.signature), self.read) class SignatureDuplicationTandem(Signature): """SV Signature: a region (contig:start-end) has been tandemly duplicated""" def __init__(self, contig, start, end, copies, fully_covered, signature, read): self.contig = contig assert end >= start #0-based start of the region (first copied base) self.start = start #0-based end of the region (one past the last copied base) self.end = end #number of copies inserted after end of the region (in tandem) self.copies = copies self.fully_covered = fully_covered self.signature = signature self.read = read self.type = "DUP_TAN" def get_destination(self): source_contig, source_start, source_end = self.get_source() return (source_contig, source_end, source_end + self.copies * (source_end - source_start)) def as_string(self, sep="\t"): source_contig, source_start, source_end = self.get_source() dest_contig, dest_start, dest_end = self.get_destination() return sep.join(["{0}:{1}-{2}","{3}:{4}-{5}","{6}", "{7}"]).format(source_contig, source_start, source_end, dest_contig, dest_start, dest_end, "{0};{1};{2}".format(self.type, self.signature, self.copies), self.read) class SignatureTranslocation(Signature): """SV Signature: two positions (contig1:pos1 and contig2:pos2) are connected in the sample""" def __init__(self, contig1, pos1, direction1, contig2, pos2, direction2, signature, read): if contig1 < contig2 or (contig1 == contig2 and pos1 < pos2): self.contig1 = contig1 #0-based source of the translocation (first base before the translocation) self.pos1 = pos1 self.direction1 = direction1 self.contig2 = contig2 #0-based destination of the translocation (first base after the translocation) self.pos2 = pos2 self.direction2 = direction2 else: self.contig1 = contig2 #0-based source of the translocation (first base before the translocation) self.pos1 = pos2 self.direction1 = 'fwd' if direction2 == 'rev' else 'rev' self.contig2 = contig1 #0-based destination of the translocation (first base after the translocation) self.pos2 = pos1 self.direction2 = 'fwd' if direction1 == 'rev' else 'rev' self.signature = signature self.read = read self.type = "BND" def get_source(self): return (self.contig1, self.pos1, self.pos1 + 1) def get_destination(self): return (self.contig2, self.pos2, self.pos2 + 1) def as_string(self, sep="\t"): source_contig, source_start, source_end = self.get_source() dest_contig, dest_start, dest_end = self.get_destination() return sep.join(["{0}:{1}-{2}","{3}:{4}-{5}","{6}", "{7}"]).format(source_contig, source_start, source_end, dest_contig, dest_start, dest_end, "{0};{1}".format(self.type, self.signature), self.read) def get_key(self): return (self.type, self.contig1, self.pos1) class SignatureClusterUniLocal(Signature): """Signature cluster class for clusters of signatures with only one genomic location. """ def __init__(self, contig, start, end, score, size, members, type, std_span, std_pos): self.contig = contig self.start = start self.end = end self.score = score self.std_span = std_span self.std_pos = std_pos self.size = size self.members = members self.type = type def get_bed_entry(self): return "{0}\t{1}\t{2}\t{3}\t{4}\t{5}".format(self.contig, self.start, self.end, "{0};{1};{2};{3}".format(self.type, self.size, self.std_span, self.std_pos), self.score, "["+"][".join([ev.as_string("|") for ev in self.members])+"]") def get_vcf_entry(self): if self.type in ["DEL", "INS", "INV"]: return "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}".format(self.contig, self.start+1, ".", "N", "<" + self.type + ">", ".", "PASS", "SVTYPE={0};END={1};SVLEN={2};STD_SPAN={3};STD_POS={4}".format(self.type, self.end, self.end - self.start, self.std_span, self.std_pos)) else: return def get_length(self): return self.end - self.start class SignatureClusterBiLocal(Signature): """Signature cluster class for clusters of signatures with two genomic locations (source and destination). """ def __init__(self, source_contig, source_start, source_end, dest_contig, dest_start, dest_end, score, size, members, type, std_span, std_pos): self.source_contig = source_contig self.source_start = source_start self.source_end = source_end self.dest_contig = dest_contig self.dest_start = dest_start self.dest_end = dest_end self.score = score self.std_span = std_span self.std_pos = std_pos self.size = size self.members = members self.type = type def get_source(self): return (self.source_contig, self.source_start, self.source_end) def get_destination(self): return (self.dest_contig, self.dest_start, self.dest_end) def get_bed_entries(self): source_entry = "{0}\t{1}\t{2}\t{3}\t{4}\t{5}".format(self.source_contig, self.source_start, self.source_end, "{0}_source;{1}:{2}-{3};{4};{5};{6}".format(self.type, self.dest_contig, self.dest_start, self.dest_end, self.size, self.std_span, self.std_pos), self.score, "["+"][".join([ev.as_string("|") for ev in self.members])+"]") dest_entry = "{0}\t{1}\t{2}\t{3}\t{4}\t{5}".format(self.dest_contig, self.dest_start, self.dest_end, "{0}_dest;{1}:{2}-{3};{4}".format(self.type, self.source_contig, self.source_start, self.source_end, self.size), self.score, "["+"][".join([ev.as_string("|") for ev in self.members])+"]") return (source_entry, dest_entry) def get_vcf_entry(self): if self.type == "DUP_TAN": return "{0}\t{1}\t{2}\t{3}\t{4}\t{5}\t{6}\t{7}".format(self.source_contig, self.source_start+1, ".", "N", "", ".", "PASS", "SVTYPE={0};END={1};SVLEN={2};STD_SPAN={3};STD_POS={4}".format("DUP:TANDEM", self.source_end, self.source_end - self.source_start, self.std_span, self.std_pos)) else: return def get_source_length(self): return self.source_end - self.source_start def get_destination_length(self): return self.dest_end - self.dest_startsvim-2.0.0/src/svim/__init__.py000066400000000000000000000000001406305341300163320ustar00rootroot00000000000000svim-2.0.0/src/svim/svim000066400000000000000000000276231406305341300151460ustar00rootroot00000000000000#!/usr/bin/env python3 __version__ = '2.0.0' __author__ = 'David Heller' import sys import os import re import pickle import gzip import logging import pysam from time import strftime, localtime from svim.SVIM_input_parsing import parse_arguments, guess_file_type, read_file_list from svim.SVIM_alignment import run_alignment from svim.SVIM_COLLECT import analyze_alignment_file_coordsorted, analyze_alignment_file_querysorted from svim.SVIM_CLUSTER import cluster_sv_signatures, write_signature_clusters_bed, write_signature_clusters_vcf from svim.SVIM_COMBINE import combine_clusters, write_candidates, write_final_vcf from svim.SVIM_genotyping import genotype from svim.SVIM_plot import plot_sv_lengths, plot_sv_alleles def main(): # Fetch command-line options options = parse_arguments(program_version=__version__) if not options.sub: print("Please choose one of the two modes ('reads' or 'alignment'). See --help for more information.") return # Set up logging logFormatter = logging.Formatter("%(asctime)s [%(levelname)-7.7s] %(message)s") rootLogger = logging.getLogger() if options.verbose: rootLogger.setLevel(logging.DEBUG) else: rootLogger.setLevel(logging.INFO) # Create working dir if it does not exist if not os.path.exists(options.working_dir): os.makedirs(options.working_dir) # Create log file fileHandler = logging.FileHandler("{0}/SVIM_{1}.log".format(options.working_dir, strftime("%y%m%d_%H%M%S", localtime())), mode="w") fileHandler.setFormatter(logFormatter) rootLogger.addHandler(fileHandler) consoleHandler = logging.StreamHandler() consoleHandler.setFormatter(logFormatter) rootLogger.addHandler(consoleHandler) logging.info("****************** Start SVIM, version {0} ******************".format(__version__)) logging.info("CMD: python3 {0}".format(" ".join(sys.argv))) logging.info("WORKING DIR: {0}".format(os.path.abspath(options.working_dir))) for arg in vars(options): logging.info("PARAMETER: {0}, VALUE: {1}".format(arg, getattr(options, arg))) logging.info("****************** STEP 1: COLLECT ******************") if options.sub == 'reads': logging.info("MODE: reads") logging.info("INPUT: {0}".format(os.path.abspath(options.reads))) logging.info("GENOME: {0}".format(os.path.abspath(options.genome))) reads_type = guess_file_type(options.reads) if reads_type == "unknown": return elif reads_type == "list": # List of read files sv_signatures = [] #Translocation signatures from other SV classes are stored separately for --all_bnd option translocation_signatures_all_bnds = [] for index, file_path in enumerate(read_file_list(options.reads)): logging.info("Starting processing of file {0} from the list..".format(index)) reads_type = guess_file_type(file_path) if reads_type == "unknown" or reads_type == "list": return bam_path = run_alignment(options.working_dir, options.genome, file_path, reads_type, options.cores, options.aligner, options.nanopore) aln_file = pysam.AlignmentFile(bam_path) sigs, trans_sigs = analyze_alignment_file_coordsorted(aln_file, options) sv_signatures.extend(sigs) translocation_signatures_all_bnds.extend(trans_sigs) else: # Single read file bam_path = run_alignment(options.working_dir, options.genome, options.reads, reads_type, options.cores, options.aligner, options.nanopore) aln_file = pysam.AlignmentFile(bam_path) sv_signatures, translocation_signatures_all_bnds = analyze_alignment_file_coordsorted(aln_file, options) elif options.sub == 'alignment': logging.info("MODE: alignment") logging.info("INPUT: {0}".format(os.path.abspath(options.bam_file))) aln_file = pysam.AlignmentFile(options.bam_file) try: if aln_file.header["HD"]["SO"] == "coordinate": try: aln_file.check_index() except ValueError: logging.warning("Input BAM file is missing a valid index. Please generate with 'samtools index'. Continuing without genotyping for now..") options.skip_genotyping = True except AttributeError: logging.warning("pysam's .check_index raised an Attribute error. Something is wrong with the input BAM file.") return sv_signatures, translocation_signatures_all_bnds = analyze_alignment_file_coordsorted(aln_file, options) elif aln_file.header["HD"]["SO"] == "queryname": sv_signatures, translocation_signatures_all_bnds = analyze_alignment_file_querysorted(aln_file, options) logging.warning("Skipping genotyping because it requires a coordinate-sorted input BAM file and an index. The given file, however, is queryname-sorted according to its header line.") options.skip_genotyping = True else: logging.error("Input BAM file needs to be coordinate-sorted or queryname-sorted. The given file, however, is unsorted according to its header line.") return except KeyError: logging.error("Is the given input BAM file sorted? It does not contain a sorting order in its header line.") return deletion_signatures = [ev for ev in sv_signatures if ev.type == "DEL"] insertion_signatures = [ev for ev in sv_signatures if ev.type == "INS"] inversion_signatures = [ev for ev in sv_signatures if ev.type == "INV"] tandem_duplication_signatures = [ev for ev in sv_signatures if ev.type == "DUP_TAN"] translocation_signatures = [ev for ev in sv_signatures if ev.type == "BND"] insertion_from_signatures = [ev for ev in sv_signatures if ev.type == "DUP_INT"] logging.info("Found {0} signatures for deleted regions.".format(len(deletion_signatures))) logging.info("Found {0} signatures for inserted regions.".format(len(insertion_signatures))) logging.info("Found {0} signatures for inverted regions.".format(len(inversion_signatures))) logging.info("Found {0} signatures for tandem duplicated regions.".format(len(tandem_duplication_signatures))) logging.info("Found {0} signatures for translocation breakpoints.".format(len(translocation_signatures))) if options.all_bnds: logging.info("Found {0} signatures for translocation breakpoints from other SV classes (DEL, INV, DUP).".format(len(translocation_signatures_all_bnds))) logging.info("Found {0} signatures for inserted regions with detected region of origin.".format(len(insertion_from_signatures))) logging.info("****************** STEP 2: CLUSTER ******************") signature_clusters = cluster_sv_signatures(sv_signatures, options) if options.all_bnds: rootLogger.setLevel(logging.WARNING) translocation_signature_clusters_all_bnds = cluster_sv_signatures(translocation_signatures_all_bnds, options) if options.verbose: rootLogger.setLevel(logging.DEBUG) else: rootLogger.setLevel(logging.INFO) # Write SV signature clusters logging.info("Finished clustering. Writing signature clusters..") if options.all_bnds: all_signature_clusters = (signature_clusters[0], signature_clusters[1], signature_clusters[2], signature_clusters[3], signature_clusters[4], signature_clusters[5] + translocation_signature_clusters_all_bnds[5]) write_signature_clusters_bed(options.working_dir, all_signature_clusters) write_signature_clusters_vcf(options.working_dir, all_signature_clusters, __version__) else: write_signature_clusters_bed(options.working_dir, signature_clusters) write_signature_clusters_vcf(options.working_dir, signature_clusters, __version__) logging.info("****************** STEP 3: COMBINE ******************") deletion_candidates, inversion_candidates, int_duplication_candidates, tan_dup_candidates, novel_insertion_candidates, breakend_candidates = combine_clusters(signature_clusters, options) if options.all_bnds: rootLogger.setLevel(logging.WARNING) breakend_candidates_all_bnds = combine_clusters(translocation_signature_clusters_all_bnds, options)[5] if options.verbose: rootLogger.setLevel(logging.DEBUG) else: rootLogger.setLevel(logging.INFO) if not options.skip_genotyping: logging.info("****************** STEP 4: GENOTYPE ******************") logging.info("Genotyping deletions..") genotype(deletion_candidates, aln_file, "DEL", options) logging.info("Genotyping inversions..") genotype(inversion_candidates, aln_file, "INV", options) logging.info("Genotyping novel insertions..") genotype(novel_insertion_candidates, aln_file, "INS", options) logging.info("Genotyping interspersed duplications..") genotype(int_duplication_candidates, aln_file, "DUP_INT", options) # Write SV candidates logging.info("Write SV candidates..") logging.info("Final deletion candidates: {0}".format(len(deletion_candidates))) logging.info("Final inversion candidates: {0}".format(len(inversion_candidates))) logging.info("Final interspersed duplication candidates: {0}".format(len(int_duplication_candidates))) logging.info("Final tandem duplication candidates: {0}".format(len(tan_dup_candidates))) logging.info("Final novel insertion candidates: {0}".format(len(novel_insertion_candidates))) logging.info("Final breakend candidates: {0}".format(len(breakend_candidates))) types_to_output = [entry.strip() for entry in options.types.split(",")] if options.all_bnds: logging.info("Final breakend candidates from other SV classes (DEL, INV, DUP): {0}".format(len(breakend_candidates_all_bnds))) write_candidates(options.working_dir, (int_duplication_candidates, inversion_candidates, tan_dup_candidates, deletion_candidates, novel_insertion_candidates, breakend_candidates + breakend_candidates_all_bnds)) write_final_vcf(int_duplication_candidates, inversion_candidates, tan_dup_candidates, deletion_candidates, novel_insertion_candidates, breakend_candidates + breakend_candidates_all_bnds, __version__, aln_file.references, aln_file.lengths, types_to_output, options) else: write_candidates(options.working_dir, (int_duplication_candidates, inversion_candidates, tan_dup_candidates, deletion_candidates, novel_insertion_candidates, breakend_candidates)) write_final_vcf(int_duplication_candidates, inversion_candidates, tan_dup_candidates, deletion_candidates, novel_insertion_candidates, breakend_candidates, __version__, aln_file.references, aln_file.lengths, types_to_output, options) logging.info("Draw plots..") rootLogger.setLevel(logging.WARNING) plot_sv_lengths(deletion_candidates, inversion_candidates, int_duplication_candidates, tan_dup_candidates, novel_insertion_candidates, options) if not options.skip_genotyping: plot_sv_alleles(deletion_candidates + inversion_candidates + int_duplication_candidates + novel_insertion_candidates, options) if options.verbose: rootLogger.setLevel(logging.DEBUG) else: rootLogger.setLevel(logging.INFO) logging.info("Done.") if __name__ == "__main__": try: sys.exit(main()) except Exception as e: logging.error(e, exc_info=True) svim-2.0.0/src/tests/000077500000000000000000000000001406305341300144175ustar00rootroot00000000000000svim-2.0.0/src/tests/__init__.py000066400000000000000000000000001406305341300165160ustar00rootroot00000000000000svim-2.0.0/src/tests/chimeric_read.sam000066400000000000000000001420251406305341300177030ustar00rootroot00000000000000@HD VN:1.0 SO:queryname @SQ SN:chr21 LN:48129895 read1 0 chr21 35346115 60 33M1I35M1D13M1D4M1I16M1D15M1I14M1I12M1D3M1D39M1D25M1I33M1I10M1I9M1I78M1I66M1I9M1D7M1I37M2I28M1I12M1I6M1I34M1I6M1I17M2I21M1I20M1I29M1I5M1I5M1I12M2I52M1I5M1I7M1I8M1I63M1D8M1I12M1I4M1I59M1D11M1I9M1I8M1D4M2I7M1D16M1D55M1D10M1I4M1I17M1I53M1I17M1I124M1I96M1I3M1I14M1I5M1I2M1I8M2I3M1I2M1I23M1I21M1I123M1I3M1I5M1D7M1I7M1I59M1I84M1I39M1I20M1I28M1I15M1I4M1I12M1I26M1D44M1I3M1I47M1I37M1I3M1D64M1I5M1D65M1I29M1I59M1I3M1I34M1D18M1D15M1D17M1I5M1I52M1D3M1I22M1I56M1I5M1I12M1D23M1I15M1I5M1I53M2I19M1D20M1I15M1I4M1D24M1I12M1I21M1I95M2I30M1I4M1D21M1I6M1I23M1I12M1I41M1I7M1I20M2I11M1D19M1I8M1I16M1D18M2I7M1D19M1I13M1I19M1I7M1I33M1I35M1I10M1I8M1I3M1I16M2I8M1I13M1I29M1I11M1I14M1I17M1I37M1I24M1D38M1I7M1I6M2I95M1D52M1I14M1I16M1I19M1I1M1D35M1I2M1D23M1I1M1I12M1I17M1I3M1I77M1I18M1I3M1I2M1I2M1I9M1I9M1I2M1I10M1I13M1I6M1I2M1I21M1I52M1D2M1I28M1I10M2I28M1I5M1I8M1I15M1I22M1D7M1I19M1I43M1D7M1D9M1I5M1D4M1D12M1I1M1I26M1I4M1I7M1I7M1D21M1I7M1I5M1D5M1I12M1I27M2I32M1I17M1I21M1I13M1D15M1I81M1I19M1D2M1I14M1I8M1I15M1I4M1I38M1I23M1I9M1I12M1I7M1D10M1I6M1I14M1I8M1I28M1I13M1I26M1I35M1I9M1D17M1I8M1I6M1D7M1D6M1I5M1I2M1I38M1I11M1D21M1I25M1D10M2I17M1D15M1I3M1I42M1I17M1D79M1I2M1I17M1I26M1I30M2I52M1I4M1I1M1D16M1I6M1I18M1D19M2I11M1I25M1I19M1D3M1I2M1I16M1I10M1D4M2I15M1I2M1D4M1I37M1I13M2I25M1D32M1D4M1I8M1I13M1I2M1I6M1I1M1D74M1I8M1I14M1I28M1I27M1I37M1D19M1I14M1I5M1I20M1I13M1D4M1D15M1I5M1D27M1D7M1I1M1I23M1I33M1I26M1I44M1D14M1I6M1D12M1I39M1I8M1I5M1I6M1I16M1I15M1I3M1I11M1I22M1D1M1D8M1D110M1I12M1I1M1I4M1I4M1I28M2I15M3405S * 0 0 TCCTTGCCTTTCAGGTGCTTTAAGAGGGCTTATCGTTCTTTGACTCCCCTAGTAACTCAAAAGGGTGGCAAGATGCAACTCTGAGGCTTTCCAAGCTCTCTGCAAGCCTGCCTTCAGTAAGTCTGTGCACCACAAAGAAGCATAACTTATATCCTCTAGCTGAATTTACCTCCTGCTTCGAGCCCCTGTTTCCCATGTCATTTTAGGGGTCCAGATGTTTTTCCTGGAGGCATATACACACACACACTACACACACACAACACACACATCACACACACACACACACACACATATATACACACACACATTCCAGGGGCCTCTGAGGTGCCAAGACATGTGTGGGGTCCAATCTGGTTAACACTGATGTCCATGATCATTGAGTTCATTTTTTATATCAACTATGTGACACTTCCTGATAGTTTCCGTTCCTGTGTTAGAATTTGCAAAGTTGCCGACTATTTCTTTGAATGAAGAGTAGCTAGATCATGGGTCTACACCAGGTCATCAAGCAGCCGATTTCCCCTGTTGGAAGCTCCAAGCAAATTGTCAGCTCCAAGCATTGCCACACAGCTGTGGGTGTGCCACACTCCTTTTCCTTTTAATAAGATGATGATCTGCTAGTGTGTGAATGCTCTTGGAGTTTTATTTGTAGGTTTTTTCCTTGTATGTCGCAAATGTATCTATATTTCATCTTCCCTCTTGAGGGATATTTTCCATGGGTAGAGGATCTAGCGCTTTCTCTTTTGCGATTCCATGTGTGGCTTTAAAAAAAAGTCAGTTGACTGTTTATTTATGGCTTCTCTAAAGGTGATCTCTTTCTCCTAAGGACTGCTTTAAAAGCTTTCGCGTTCCCTCCCTCCCTTCCTCCCACCCCATCCCCCTTCTTCCTTTCCTTCCTCCCTCCCTCCTCCCTTCTGCTTCCTCCCTTTTCTCCCTGTCGTTTCCTCCTTTCTCCCATCCTCCCTCCTCCCTCTTTCCTTCTTTTTTCCCTTCCTCCCGCCCTCCTTCCTTCTTCCATCTTATATATTATAAATAATTTTCTTTCTTTCTTTATTTTTTTTTGAGATGGAGTTTCGCTCTTGTTGCCCAGGCTGGAGTGCAACGGTGGTGATCTTGGCTTACTGCCAACCTCCACCTCCCTGGTTCAAGCGATTCACCTGCCTCAGCCTCCCTAGTAGCTGGGATTACAGGTGCCCGCCACCACACCCAGCTAATTTTTGTAATTGTAGTAGAGATGGGGTTTCACCATGGTGGGCCAGGCTGGTCTTGAACTCCTGAACTCAGGTAATCCACCCGCCTTGTCCTCCCAAAGTGCTGGAATTACAGGCATGAGCCACCACTCCCAGCCCTAATTAAAATATTTTCTTTCTGAGGCCCTTCCCTACCCTCTTGTACTTCTTTTGATACCGCCATCATAATGCATGTTAGGTGGACTCATCTGTGTACTCTATTACCCTCTCATCTGGTTTTTAATCTTTCTGTCTCCTTATACTGTAATCTCAGTGTTTCTTCTGCTTTAATTTTGGTATATGAATTATTCAGCTGTGTTCAAAGTGTGGTTCAAATGCGACTATATGCTTCTTTAAGTTTTTAGTTATTGTACTTCTCACCTCTAGAAGTTCTTCTGTGACACTTCTTATAGTTTCGCAGTTCTTGGGTAGAATTTGCAAACTTGTTGACTATTTCTTTGAAAAGAGTAGCTAGAATTGTTTTAGTCTGTGTATAGTGAGTATGAATATCTGAAACCTCTGTGAGTCTGTTTCTTTTGTCTCGCTAGCCCTACTCATGGCGTCCATGTTTTCCTGGATGTTTAGTTGGTTGTTCTTTGAATATGTCCAAACTCCCAGTTATTTCTGGGGAATACCCAAAGGCACACCTGTTCCTATCACCCAGTGAACTGCCATGTGTTTGTCTCCTGTCATGGTAGACATTTTGCATTCAGGGAGAGGCCTCTGCTCTCTGCAGAACAGTGTGAAATGCACCTCATTTTTGCCTTCAATGAAGGTTGTCTCTTGCTTGGCACCACCACTTCCTCAAATGACTAATGCCACTCTTGCTCACAGTGCTAAGGCGCTAAGGAGTTTATCAGAGGCTAGAGTGATTCTGCATCCTCTGGGGTTCTCAGGAAGCGAGGCCACAGGGCCATCTGTGCATGTAGTGACAGTGGGGTGGGGTGGATGAGCAGTTGGCAGTGGTGGGCACCTATGCTGCTGGCGCCACAGCTTTTTATATCTTGGTGAAAACAGACGCCCTTGCGTTTTCACTTGTAGATTCTCCGGCTACATCCACAAGAACAAATTCTGTCACAGAGGCCAGATAGACTATCAGTGTAATGGGCCCTTTCCCCTTTGATGATAGCAGCTTTTTTCGGGGGAGCAGGGGTTGCCAAACTTCTACGCCTTATTTTATCTTCCTTTCTGAGTTCTCCCCAGAGGCTGCTGGGTCCACCTCTCCCTCTGCCAGACAGACAGGCTCAGGACATAGCAAGAACGGAAGGGGTGTGGGGGTTGGTGGGAGGTATTAAGTTGGGGAAACGACAGCTCTTGTCTTTGTAGATTACCAGAACTGACACACGTACACACACGTACACTCACGCTCACTCACTCACTCATCTATCATCTATTTTTGGCCTAAGCTGCCTTTGGACACTTCCTTCGAAAAGCACATGAACTCTTCGGAGTTCTCCTGTTCCACCTTGGTAAATTTCCTATAGCCAACGCACTGAAAGTCCCTGCTGCCCTCCTTCCTCTGAGCTTGTGGGGCCCACAGATCCCCTGCTCCATTTCCTGCTTCATTTCAGCTGATGGTGAGCTTCCAGGGCAGGCTTGTAGGAAGGGCGGGCTAAGGCTGGGGAACTGCAGCACCCACCACCACCCGCCCGCCCCAGTGCTTGTTGCCCCTAGCCTCTGTCTCTCCTCGCTGTGCCCTTCCCTTAAGCCCCACTCTTGGCCTCCCACAAAGACAAGAGTTTGTTCTGTTTTTATATTGATAGGATGAACTCCCTCGTTCTAATACCTATCTGAATAGCCTGAGCAATTACATTTACAACCTCATGAAAAATACACAGCACTTGTCACGATGAATGATGTTTACCGTGAATAATTGCAGTTTTGAGTTCAAACATCCTTCTATAGTCCAAGAATGAGTAAATATCCCTATTCCCTACCCCTCCCCATTGCATTTGGGTTTGACATGACATGCAATCATTGTGAGTTTTACTGCCTGGGGTAGAATTGTTTTTGCCTGTATTGCCATTGCCAATTGACTGGAAGGACAAAAAAAGGAAACTTACCAATTGAGCAGTTTCTAGATGAACTTAAATATCTCAGAAATCTTGAAGCAATGAATGAACTATAACTTTCTGAAGTTGCAAAAATGATTCAAGTCGGAAGTGACCCATGGGAGCAACCTGGTCCTTAATTAGGACTGGAAAGAAAACGGTAGGCCCATGCGAGGTGGAACAGTGGCTTCCCCAAGGTCACACTGCAAGTGAGTGGTGGCACTAGTCACCCAGGAGTCCTCCTGACTCCATGGTATCCTGACCCCAGACCAGAGCCTTATTTGTGGTTAAAGAACGTGGAAAAACAGCTTATGTTTGAATTTTAGAAATCATGCCAGTAGCTAAAGATCTGCATTCTCATGAATATTGAGCTTTGCGTTGTGCGGGTTGCTACGAATTTTGTAACTATTTTCTCCTAGAGCTCTCAGTATTTTACATATGACCAATTCCATTCTATTATTGTCCCGGGAAAAGAGTGTGCATTTACTGAGCAGTAAGAGGGTGATTTGAAGTCCCTGGGGTCCCCCACCCTTGTCTTTCTATGTGAGATTGTATGTGCAGGGCTCACATTTATGTCTCCTCCAGGCAGCTCATGTGCACAGACTAGTTGTATTTATTAGAGCACCTACTGTGGTGCCATGGAAGCTATGTGGTAAGCTAAGCAGGACAAAAACTCCTGCTCTCACAAGAAGAAAGAATAAACAAGCAGACAATTGCCCGTATAATGTCAGCCTGGTGTGATGGCTCATCTCCTGGAATACCCTAGCACTTTGGGAGGTGACGATGGGAGGGATTGGCTTTAACTTCGGGAGTTCAAGATAAAGCCTGGGCAACCAAGACCCTTCTCTACAAAAAAAATCAAAAAATTAGACTGGGGGTCATGGCATGAGTCTCTGGCCCCTCGGGACCAGAGGCTCGGGAGCTGAGGCGTGGTTTTCACTGAGCCTGGGAGCGTTTGAGGCTGCAGTGAGCTGTGATCACAACCACCTGCACTGCCAGCCTGGCCACAGAGCGAGACCCTGTCCTCTAAATTAAACAACAACCCTAATGTCAAGATTAGGAAGTGTGGCAAAAAGGAACAAACAGTCAGGGTGACTGCCATTCTAGAAAGGGTGGTCGAGGGAAGGTCTCCTTGAAGCAGATCCTGTGGGAGTGGAGAGGGTAAGTCACGGACTCTCTGGAGGAACGAAGATTCCAGGTGGAAATGCCTTAGTGGGTGTTAAACGCTTTAACGGGTGTTAGCCAGAAAGCCAGAATGGCTGGAGCTGAAGGGAGTAGAGGAGGCGGCTAGGAAAATGAGGTCAGAAGATGGAGAGCAGGGCCGGATCACGAGAGCGGGCACAGAGAACGAGGAGGGAATTGGATTTTTGGAAATAGATTGTGGCAAAATACAAAATATTAAAATACATAAAATAAAATATGAAACCATAAATGTACTACCCTTTACCCATTTTAAGTATGCCAGTTCAGTTAGTGTTAAATACATTTACATCATTGTTACAACCCGTCTCCAGGAACTCTCTGCATCTTGCAAAATTAACAACTCTATGCCATTTCAACAGTTCGCATTTCCTCTCTCCCTCTCGTCATGGCAACCGCCATTGTTTCTTTCTTGTCTCGTAAATGTAGTACTTTGGGTGTAATCTCATCTAAGTGGAATCACACAGTGTGTGTCCTTTTAGTGACAGACTTTTTCACTTTGCATAATGTCCTGTAATATTCATCCATGTTGTAGCATGGTCAGATTTTATCCTTCCTTCTTAACATTTGTAATATTCCATTGTTACGGCATAGACTACATTTAGCTTATCTGTTCACCTGTGGATGGACAACTTGGGTTGCTATAGTAATAATATTGCTGTAAATATGGGTATACGATTATCTCTTTGTGACCCTGCTTTTAATTGTTTTGGATATATACCCAGAATATGGGAATTACTGAATCATGTTGGAGATTCTGTTTTTGATTTATTTATTTTATTTTTAATTTTTGAGATGGAGTCTCGTTTTCTGTCACCCAGGCTGGAGTGCAGTGACACGATCTCTGCTCACTGCAACCTCCTGCTCCCTTGCACCATTAATTCCTCAAAGATGCAGGCCTGGATCCAACGCCTTTGGGGACGGGTACTCCTGCCCACAGGAAAGAGGTTGAGGAGACAATAATGTCAACGAGAATCATCATCTTTGCTGTGGAAAATGGTGTGCAGAGTATTCTGGTGCCTCTGCCAAGTGATTGTCGCATGCTAGCCTTCCCTAGTAGCTGGGATTACAGGCATTTGCCACCAAGCCCCGGCTAATTTTGATGTATTTTTAGTAGAGACGGGGTTTATCATGTTGGCCAGGCTGGTCTCAAACTCCTGCCTCAAGGTGATCGCACCTGCCTCAGCACTCCCCAAATGGCTGGGATTATAGGTGTGAGCCATGCACCTGGCCTGATTGGTTATTTAAATTCTGAGAGGGGGTATACTAATGTGTTGAAAACACACAGGGATGGGATCAGGTATAGGTGGGGAGGGAGAGCAAAAGTTGGTATTTAGAATATGTTACGTTTGAACAAACCTAATACACATTCCACATCCAAGTGGACATAGAGGAGTTATTTGAAGCTCAGCAAGACACCTGAGTTGGGAGATCAGGAATTTGTGGATCATCTGGATATATTTAGCATTATGCCACAGGGCTGCTGCCCTGATTTTTTTCCCCCTAGACAGTTTCGCTCTGTTGCCTCTAGGCACGACCTTGACTCACTGCAGACCTCTGCCTCCCGGGTTCAAGCGATTCTCTTGCCCTCAGCCCCCCGAGTAGCTGGGATTAACAGGTGTCCACCATCACGCCCGGCTAATTTTAGTGCTTTTAATGAGATGGGTTTCACACATGTTGCCAGGTTGGTCTTCAAACTCCTGACCTCAGGTGATCCACCTGCCTTGGCCTCCCCAAAGTGGCTGGACCAGGCGATGAGCCACTGCACCTCGGCCACCACTGCCTGCTTTATTGTACCGCTTCGGCAAGCTAAGAATGGTTTTCAATTTTGTTTTTTTTTTTTTGGAGACGGAGTCTCACTCTGTCACCCAGGCTGGAGTGCAGTGGCACAATCTCAGCTCACTACAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAAGCCTCCCGAGTCACGCTGCGGACGTACAGTTGCCTGCCACCACGCCCAGCTATTATATTTTGTATTTTTGATCATCTGATATATTTAGCATTTATAGCCACAGGGCTGGCTGCCTGACTTTTTTTCCCCCTAACAGTTTCGCTCTTGTTGCCCAGGCACGACCTTGACTCACTGCACATCCTCTGCCTCCCGGGTGTTCAAGCGATTCTCTTGCCCTCAGCCCCCCGAGTAAGCTGGGTTACAGGTGTCCACCATCAACGCCCGGCTAATTTTAGTGCCTTATTAATAGAGATAGGGTTTCACCCATGTTGGCCAGGTGTCTCAAACTCCTGACCTCAGGTGATCCACACTGCCATTTGGCGCTCCTCAAAGTGCTGGCCTACGGCATGAGCCACTGCACCTGGCCATCCGACTGCCTGTTTTTGGTACTCCAGCTTGGGCAAGCTAAGAATGTTTTCACATTTTTGTTGTCTTTTTTTTTTGGAGACGGAGTCTCACTCTGTCACCTCAGGCTGGAGTGCAGTGGCAGCAATCTCAGCTCACTACAAGCTTCCGCCTCCCGGGTGTCATCGGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACCTACAGTTGCTGCTACCACGCCCAGCTAATTTTTTGTTATGATTTTGATCATCTGATATATTTAGCATTTATAGCCATCAGGTGCTGCTGCCTGGACCTTTTTTTCCCCTAGAAGTTTCGTTCTTGTTGCCCAGGCCACGACTCTTGACTTATGCAACCTCTGCGCTCCCGGGGTTCAAGCGATTCTCTTGCCTCAGCCCCCCGAGTAGCTGGGATTATCAGGTGTCCACCCATCACTGGCCCGGCCTAATTCTTAGTCGCTGTTCTAAAGAGATGAGGTTTTCCATGTTGAGCCAGGTTGGTCTCAGAACTCCTGACTCAGGTGATCCACCTGCCTTTGGCCTCCCAAAGTGCTGGCCAGGATGAGCCAGCTGCACCTGGCCACCACTGCCTGTTTCTGTACCGCTTGGCAAGCCTAAGAGATGGTTTTTCACATTTTTGTTTCTTTTTTTTTGGAGTACGGAGTCTCACCATCTGTCACCCAGGCTGGATTGCAGTGGCACAATCTCAGGCTCACTACAACGCTCCGCCTCCCGGGTTACACAGCTCATTCTCCTGCCTCCAGCCTCCCCGAGTAGCTGGGACTAACAGTTGCCTGCCACCACGCCCAGCTAATTTTTTGTATTTTTGATCATCTGATATATTTAGCATTTATAGCCACAGGGCTGCTCGCCTGACTTTTTTTCCCCCTAGCCACAGTTTGCTCTTGTTGCCCAGGCACGACCTTACTCACTGCCAACCTCTGGCCTCGCCGGGTTCAAGCGAGTTCTCTTGACCTCAGCCCCCCGAGTAGCTGGGATTACAGGTGTCCACCATCAGCGCCCGGCTAATTTTAGTGCTTTTAATAAGAGATGGGTTTCACCATTTGGCCAGGTGGTCTCAAACTCCTGGACCTCAGGTGATCCACCTGCCTTGGCCTCCCAAAGTGCTGGGCCAGGCATGAGCCACTGAACCTGGCCACCACTTGCCTGTTTTTGTACCGCTTGGCAAGCCTAAGAATGGATTTTCACATTTTTGTTTCTTTGTTTTTTGAGAGACGGAGTCTCCACTCCTGTCAACCCAGGCTGGAGATGCAGTGGCACAATCTCACGCTCACTACAAGCTCCGCCTCCCGGGTTACGCCATTCTCACTGCCTCAGTCCTTCCCGAGTGAGCTCGGGCTACAGTTGCCTTGCCACCACGCCCAGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACTGTGGTATGGATTCTTCCTGAACTCGTTGATCCGCCTGCCTCGGCCTCCCAAAAGGTGCTGGGATTACAGGCATGAGCCACCGTGACCGGCCACTGCTGCCGTGTTTTTGTACTGCTTGGCATAGCTAAGAATGGTTTTCATCACATTGTGAATAGTTTTGAAGAAAAATTTTTCAAGAATACATTTTGTGGCACACGAAAATTATATAGAAATTCAAAGTTTTCAATGTCACAAATAAGAGGTTTTATATTGGAACATAGCATGCTCACTCATTTAGGTATTGTTACGGCTTGCTTCCACACTACAAGCAGCCAGGGTTCAGAGGTCATGGCAGAGACCAACGTGGCCCACAAAGTGTGAATGTCACTTCCCTTTGTACAAGCTGTTTGCCCTAAAACTCTTGGCTAAGCAGAGTGAGTCATGAGAGAAGAGAGAAGGTGCATGGACTGAGGTCTGTCAAGGGCCTGTCAAGAGCCAGTAACCGTTGCTGTGCGCTCAAGGATGCCAAGATGAGTGGCTTTGCTCCATGAGCAACAAAGCCCAGGGGCCCAGCAAGCCAAGGCTGCTTGCCTCTTTGTCCCCTGGGGCCAGCTGTGCATCTTGTGGATGGGCTGAGAGTTGAGCAACCCATAACAGGCCTCCTGACCCACTGTGAGATGGTGGCATGTGAAAGGACCAGCATCCAACAGTCTTGCCTCTTACTATATTAGTTAATTTCCTGGGGCTACAGGAGCTTATTTACTCAAACTGAGTGACTAAAACAATGGAATGGATTTTCTCTCGCAGTTCTAAGAGGGTATTATTCTTTCAACAATCAAGGTGACAGCACGGCCATGCTCCCGAAAGACAGCTCTAGGAAAAGCTCCTGGTTTCTAGCGGCTGCCTGCACTTCCTTGGCATTCCTTGAGCTTCAGCTGCATCATTCAGTGCTTGCTTCGGTACCACCGTGCCTTCATCCCTGGGTCGCTTCCATCGTCTCTATGTCCAAATGTTCGCATCTTTTATAAGGATGCTTAGTCATTGGATCCCAGGGCCCATCCCTAATCTGGTATGACTTCATAATTAATGTGAGTTGCATTGAAAAGACGCTGTTATCCAAATAAAGCCATCTCACACGTATGGGGACTAGGACGTTCAGACATATCTTTTTGCGGGACAAAATTCTACTCCACTATACTTCCCACACACATGCAGCATTTCCACGTGGTGAATCTAAGAGTATTTAGGGTTCTTGCGATAAAAGGGATATGGCCCGGTTGACTATCCTGGGGATAAGAAATCAAAAGAGAAAAAAACTCTAGTATTGGTGTTAAGACCCTGCAGTTTTACATTCATTTCCTGCACCACTCCCTGCTGCCTGCAAGGCAGGCCAGCTGCTTCACCTAGGGGCAGAATTCGGAGAAAGAATACGTCTCTCTGGATGTAACTACTGCGACTCAGGCAACCCACTAGTGGAGCTTCACAGGTGGTTCATCTGCCAAAGATTCTGAGGTTTTTAGCCGTGAGATGATAGGGTCAGAAACCGGGTGGTGCTCATGGGGGAAAGCAGCTACTGGAAGAGTCTAGATGCTGCTTCCCCTCAGGACTGTGATTGACCTTCAACTGGCCCCTCATCCCCAAGAACCTCAGCTTCTTCCTCT * AS:i:10650 NM:i:354 SA:Z:chr21,35351847,+,7658S4M1D43M1I22M2I7M1D25M1D8M1I8M1I5M1I14M1I8M1I43M1I27M1I18M1D9M1D15M1I40M1I33M1I26M1I10M1I29M1I12M1I4M1I5M1I13M1I18M1I28M1D11M1I9M1I2M1I8M1I4M1I3M1D11M1I63M1I2M1I11M1I23M1I3M1I46M1I20M1I18M1I6M1I6M2I6M1I18M1D26M1I7M1I3M1I9M1D9M1I1M1I8M2I39M1I14M1I5M1I27M1I15M1I8M1I24M1I2M2I12M1I29M1I57M1I17M1I2M1I2M1I54M1I2M2I30M1I6M1I21M1I58M1I13M1I23M1I8M1D13M1D11M1D6M2I15M1I20M1D30M1I47M1I16M1I4M1D13M1D4M1I2M1D9M1D4M1I28M1I17M1I4M1I4M1D11M1I13M2I9M1I22M1I10M1I9M1D12M1I16M1D22M1I4M1I30M1I10M1I14M1I4M1I7M1D17M1I17M1I13M1I5M1D10M1I22M1D25M1I8M1I10M1I20M1I18M1I15M1I21M1I15M1I15M1I13M1I7M1I28M1I10M1I5M1I5M1D26M1I6M1D18M1I8M1I6M1I14M1I33M,40,155;chr21,35351847,+,7076S4M1D1M1D31M1I4M1I10M1I2M1I8M1D8M1D22M1I6M1I9M1D12M1I5M1I47M1I10M1I7M2I6M1I6M1I5M1I3M1I2M1I3M1D7M1I5M1D9M1I15M1I9M1D19M1I25M1D8M1I43M1I6M1I4M1I18M1D13M1I11M1I1M1I37M1I11M1I17M1I3M1I2M1I13M1I6M1I17M1I36M2242S,22,51;chr21,35351854,+,6495S38M1I24M1D44M1I1M1I16M2I16M1I16M1I7M1D18M1I20M1I3M1I11M1I8M1I14M1D1M1D30M1I5M2I5M1I4M1I13M1I1M1I24M1I2M1I13M1I4M2I1M1I4M1I14M1D17M1I37M1I20M1I21M1I14M1I3M1I1M1I36M1I9M1D28M2828S,40,41; read1 2048 chr21 35351847 40 7658S4M1D43M1I22M2I7M1D25M1D8M1I8M1I5M1I14M1I8M1I43M1I27M1I18M1D9M1D15M1I40M1I33M1I26M1I10M1I29M1I12M1I4M1I5M1I13M1I18M1I28M1D11M1I9M1I2M1I8M1I4M1I3M1D11M1I63M1I2M1I11M1I23M1I3M1I46M1I20M1I18M1I6M1I6M2I6M1I18M1D26M1I7M1I3M1I9M1D9M1I1M1I8M2I39M1I14M1I5M1I27M1I15M1I8M1I24M1I2M2I12M1I29M1I57M1I17M1I2M1I2M1I54M1I2M2I30M1I6M1I21M1I58M1I13M1I23M1I8M1D13M1D11M1D6M2I15M1I20M1D30M1I47M1I16M1I4M1D13M1D4M1I2M1D9M1D4M1I28M1I17M1I4M1I4M1D11M1I13M2I9M1I22M1I10M1I9M1D12M1I16M1D22M1I4M1I30M1I10M1I14M1I4M1I7M1D17M1I17M1I13M1I5M1D10M1I22M1D25M1I8M1I10M1I20M1I18M1I15M1I21M1I15M1I15M1I13M1I7M1I28M1I10M1I5M1I5M1D26M1I6M1D18M1I8M1I6M1I14M1I33M * 0 0 TCCTTGCCTTTCAGGTGCTTTAAGAGGGCTTATCGTTCTTTGACTCCCCTAGTAACTCAAAAGGGTGGCAAGATGCAACTCTGAGGCTTTCCAAGCTCTCTGCAAGCCTGCCTTCAGTAAGTCTGTGCACCACAAAGAAGCATAACTTATATCCTCTAGCTGAATTTACCTCCTGCTTCGAGCCCCTGTTTCCCATGTCATTTTAGGGGTCCAGATGTTTTTCCTGGAGGCATATACACACACACACTACACACACACAACACACACATCACACACACACACACACACACATATATACACACACACATTCCAGGGGCCTCTGAGGTGCCAAGACATGTGTGGGGTCCAATCTGGTTAACACTGATGTCCATGATCATTGAGTTCATTTTTTATATCAACTATGTGACACTTCCTGATAGTTTCCGTTCCTGTGTTAGAATTTGCAAAGTTGCCGACTATTTCTTTGAATGAAGAGTAGCTAGATCATGGGTCTACACCAGGTCATCAAGCAGCCGATTTCCCCTGTTGGAAGCTCCAAGCAAATTGTCAGCTCCAAGCATTGCCACACAGCTGTGGGTGTGCCACACTCCTTTTCCTTTTAATAAGATGATGATCTGCTAGTGTGTGAATGCTCTTGGAGTTTTATTTGTAGGTTTTTTCCTTGTATGTCGCAAATGTATCTATATTTCATCTTCCCTCTTGAGGGATATTTTCCATGGGTAGAGGATCTAGCGCTTTCTCTTTTGCGATTCCATGTGTGGCTTTAAAAAAAAGTCAGTTGACTGTTTATTTATGGCTTCTCTAAAGGTGATCTCTTTCTCCTAAGGACTGCTTTAAAAGCTTTCGCGTTCCCTCCCTCCCTTCCTCCCACCCCATCCCCCTTCTTCCTTTCCTTCCTCCCTCCCTCCTCCCTTCTGCTTCCTCCCTTTTCTCCCTGTCGTTTCCTCCTTTCTCCCATCCTCCCTCCTCCCTCTTTCCTTCTTTTTTCCCTTCCTCCCGCCCTCCTTCCTTCTTCCATCTTATATATTATAAATAATTTTCTTTCTTTCTTTATTTTTTTTTGAGATGGAGTTTCGCTCTTGTTGCCCAGGCTGGAGTGCAACGGTGGTGATCTTGGCTTACTGCCAACCTCCACCTCCCTGGTTCAAGCGATTCACCTGCCTCAGCCTCCCTAGTAGCTGGGATTACAGGTGCCCGCCACCACACCCAGCTAATTTTTGTAATTGTAGTAGAGATGGGGTTTCACCATGGTGGGCCAGGCTGGTCTTGAACTCCTGAACTCAGGTAATCCACCCGCCTTGTCCTCCCAAAGTGCTGGAATTACAGGCATGAGCCACCACTCCCAGCCCTAATTAAAATATTTTCTTTCTGAGGCCCTTCCCTACCCTCTTGTACTTCTTTTGATACCGCCATCATAATGCATGTTAGGTGGACTCATCTGTGTACTCTATTACCCTCTCATCTGGTTTTTAATCTTTCTGTCTCCTTATACTGTAATCTCAGTGTTTCTTCTGCTTTAATTTTGGTATATGAATTATTCAGCTGTGTTCAAAGTGTGGTTCAAATGCGACTATATGCTTCTTTAAGTTTTTAGTTATTGTACTTCTCACCTCTAGAAGTTCTTCTGTGACACTTCTTATAGTTTCGCAGTTCTTGGGTAGAATTTGCAAACTTGTTGACTATTTCTTTGAAAAGAGTAGCTAGAATTGTTTTAGTCTGTGTATAGTGAGTATGAATATCTGAAACCTCTGTGAGTCTGTTTCTTTTGTCTCGCTAGCCCTACTCATGGCGTCCATGTTTTCCTGGATGTTTAGTTGGTTGTTCTTTGAATATGTCCAAACTCCCAGTTATTTCTGGGGAATACCCAAAGGCACACCTGTTCCTATCACCCAGTGAACTGCCATGTGTTTGTCTCCTGTCATGGTAGACATTTTGCATTCAGGGAGAGGCCTCTGCTCTCTGCAGAACAGTGTGAAATGCACCTCATTTTTGCCTTCAATGAAGGTTGTCTCTTGCTTGGCACCACCACTTCCTCAAATGACTAATGCCACTCTTGCTCACAGTGCTAAGGCGCTAAGGAGTTTATCAGAGGCTAGAGTGATTCTGCATCCTCTGGGGTTCTCAGGAAGCGAGGCCACAGGGCCATCTGTGCATGTAGTGACAGTGGGGTGGGGTGGATGAGCAGTTGGCAGTGGTGGGCACCTATGCTGCTGGCGCCACAGCTTTTTATATCTTGGTGAAAACAGACGCCCTTGCGTTTTCACTTGTAGATTCTCCGGCTACATCCACAAGAACAAATTCTGTCACAGAGGCCAGATAGACTATCAGTGTAATGGGCCCTTTCCCCTTTGATGATAGCAGCTTTTTTCGGGGGAGCAGGGGTTGCCAAACTTCTACGCCTTATTTTATCTTCCTTTCTGAGTTCTCCCCAGAGGCTGCTGGGTCCACCTCTCCCTCTGCCAGACAGACAGGCTCAGGACATAGCAAGAACGGAAGGGGTGTGGGGGTTGGTGGGAGGTATTAAGTTGGGGAAACGACAGCTCTTGTCTTTGTAGATTACCAGAACTGACACACGTACACACACGTACACTCACGCTCACTCACTCACTCATCTATCATCTATTTTTGGCCTAAGCTGCCTTTGGACACTTCCTTCGAAAAGCACATGAACTCTTCGGAGTTCTCCTGTTCCACCTTGGTAAATTTCCTATAGCCAACGCACTGAAAGTCCCTGCTGCCCTCCTTCCTCTGAGCTTGTGGGGCCCACAGATCCCCTGCTCCATTTCCTGCTTCATTTCAGCTGATGGTGAGCTTCCAGGGCAGGCTTGTAGGAAGGGCGGGCTAAGGCTGGGGAACTGCAGCACCCACCACCACCCGCCCGCCCCAGTGCTTGTTGCCCCTAGCCTCTGTCTCTCCTCGCTGTGCCCTTCCCTTAAGCCCCACTCTTGGCCTCCCACAAAGACAAGAGTTTGTTCTGTTTTTATATTGATAGGATGAACTCCCTCGTTCTAATACCTATCTGAATAGCCTGAGCAATTACATTTACAACCTCATGAAAAATACACAGCACTTGTCACGATGAATGATGTTTACCGTGAATAATTGCAGTTTTGAGTTCAAACATCCTTCTATAGTCCAAGAATGAGTAAATATCCCTATTCCCTACCCCTCCCCATTGCATTTGGGTTTGACATGACATGCAATCATTGTGAGTTTTACTGCCTGGGGTAGAATTGTTTTTGCCTGTATTGCCATTGCCAATTGACTGGAAGGACAAAAAAAGGAAACTTACCAATTGAGCAGTTTCTAGATGAACTTAAATATCTCAGAAATCTTGAAGCAATGAATGAACTATAACTTTCTGAAGTTGCAAAAATGATTCAAGTCGGAAGTGACCCATGGGAGCAACCTGGTCCTTAATTAGGACTGGAAAGAAAACGGTAGGCCCATGCGAGGTGGAACAGTGGCTTCCCCAAGGTCACACTGCAAGTGAGTGGTGGCACTAGTCACCCAGGAGTCCTCCTGACTCCATGGTATCCTGACCCCAGACCAGAGCCTTATTTGTGGTTAAAGAACGTGGAAAAACAGCTTATGTTTGAATTTTAGAAATCATGCCAGTAGCTAAAGATCTGCATTCTCATGAATATTGAGCTTTGCGTTGTGCGGGTTGCTACGAATTTTGTAACTATTTTCTCCTAGAGCTCTCAGTATTTTACATATGACCAATTCCATTCTATTATTGTCCCGGGAAAAGAGTGTGCATTTACTGAGCAGTAAGAGGGTGATTTGAAGTCCCTGGGGTCCCCCACCCTTGTCTTTCTATGTGAGATTGTATGTGCAGGGCTCACATTTATGTCTCCTCCAGGCAGCTCATGTGCACAGACTAGTTGTATTTATTAGAGCACCTACTGTGGTGCCATGGAAGCTATGTGGTAAGCTAAGCAGGACAAAAACTCCTGCTCTCACAAGAAGAAAGAATAAACAAGCAGACAATTGCCCGTATAATGTCAGCCTGGTGTGATGGCTCATCTCCTGGAATACCCTAGCACTTTGGGAGGTGACGATGGGAGGGATTGGCTTTAACTTCGGGAGTTCAAGATAAAGCCTGGGCAACCAAGACCCTTCTCTACAAAAAAAATCAAAAAATTAGACTGGGGGTCATGGCATGAGTCTCTGGCCCCTCGGGACCAGAGGCTCGGGAGCTGAGGCGTGGTTTTCACTGAGCCTGGGAGCGTTTGAGGCTGCAGTGAGCTGTGATCACAACCACCTGCACTGCCAGCCTGGCCACAGAGCGAGACCCTGTCCTCTAAATTAAACAACAACCCTAATGTCAAGATTAGGAAGTGTGGCAAAAAGGAACAAACAGTCAGGGTGACTGCCATTCTAGAAAGGGTGGTCGAGGGAAGGTCTCCTTGAAGCAGATCCTGTGGGAGTGGAGAGGGTAAGTCACGGACTCTCTGGAGGAACGAAGATTCCAGGTGGAAATGCCTTAGTGGGTGTTAAACGCTTTAACGGGTGTTAGCCAGAAAGCCAGAATGGCTGGAGCTGAAGGGAGTAGAGGAGGCGGCTAGGAAAATGAGGTCAGAAGATGGAGAGCAGGGCCGGATCACGAGAGCGGGCACAGAGAACGAGGAGGGAATTGGATTTTTGGAAATAGATTGTGGCAAAATACAAAATATTAAAATACATAAAATAAAATATGAAACCATAAATGTACTACCCTTTACCCATTTTAAGTATGCCAGTTCAGTTAGTGTTAAATACATTTACATCATTGTTACAACCCGTCTCCAGGAACTCTCTGCATCTTGCAAAATTAACAACTCTATGCCATTTCAACAGTTCGCATTTCCTCTCTCCCTCTCGTCATGGCAACCGCCATTGTTTCTTTCTTGTCTCGTAAATGTAGTACTTTGGGTGTAATCTCATCTAAGTGGAATCACACAGTGTGTGTCCTTTTAGTGACAGACTTTTTCACTTTGCATAATGTCCTGTAATATTCATCCATGTTGTAGCATGGTCAGATTTTATCCTTCCTTCTTAACATTTGTAATATTCCATTGTTACGGCATAGACTACATTTAGCTTATCTGTTCACCTGTGGATGGACAACTTGGGTTGCTATAGTAATAATATTGCTGTAAATATGGGTATACGATTATCTCTTTGTGACCCTGCTTTTAATTGTTTTGGATATATACCCAGAATATGGGAATTACTGAATCATGTTGGAGATTCTGTTTTTGATTTATTTATTTTATTTTTAATTTTTGAGATGGAGTCTCGTTTTCTGTCACCCAGGCTGGAGTGCAGTGACACGATCTCTGCTCACTGCAACCTCCTGCTCCCTTGCACCATTAATTCCTCAAAGATGCAGGCCTGGATCCAACGCCTTTGGGGACGGGTACTCCTGCCCACAGGAAAGAGGTTGAGGAGACAATAATGTCAACGAGAATCATCATCTTTGCTGTGGAAAATGGTGTGCAGAGTATTCTGGTGCCTCTGCCAAGTGATTGTCGCATGCTAGCCTTCCCTAGTAGCTGGGATTACAGGCATTTGCCACCAAGCCCCGGCTAATTTTGATGTATTTTTAGTAGAGACGGGGTTTATCATGTTGGCCAGGCTGGTCTCAAACTCCTGCCTCAAGGTGATCGCACCTGCCTCAGCACTCCCCAAATGGCTGGGATTATAGGTGTGAGCCATGCACCTGGCCTGATTGGTTATTTAAATTCTGAGAGGGGGTATACTAATGTGTTGAAAACACACAGGGATGGGATCAGGTATAGGTGGGGAGGGAGAGCAAAAGTTGGTATTTAGAATATGTTACGTTTGAACAAACCTAATACACATTCCACATCCAAGTGGACATAGAGGAGTTATTTGAAGCTCAGCAAGACACCTGAGTTGGGAGATCAGGAATTTGTGGATCATCTGGATATATTTAGCATTATGCCACAGGGCTGCTGCCCTGATTTTTTTCCCCCTAGACAGTTTCGCTCTGTTGCCTCTAGGCACGACCTTGACTCACTGCAGACCTCTGCCTCCCGGGTTCAAGCGATTCTCTTGCCCTCAGCCCCCCGAGTAGCTGGGATTAACAGGTGTCCACCATCACGCCCGGCTAATTTTAGTGCTTTTAATGAGATGGGTTTCACACATGTTGCCAGGTTGGTCTTCAAACTCCTGACCTCAGGTGATCCACCTGCCTTGGCCTCCCCAAAGTGGCTGGACCAGGCGATGAGCCACTGCACCTCGGCCACCACTGCCTGCTTTATTGTACCGCTTCGGCAAGCTAAGAATGGTTTTCAATTTTGTTTTTTTTTTTTTGGAGACGGAGTCTCACTCTGTCACCCAGGCTGGAGTGCAGTGGCACAATCTCAGCTCACTACAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAAGCCTCCCGAGTCACGCTGCGGACGTACAGTTGCCTGCCACCACGCCCAGCTATTATATTTTGTATTTTTGATCATCTGATATATTTAGCATTTATAGCCACAGGGCTGGCTGCCTGACTTTTTTTCCCCCTAACAGTTTCGCTCTTGTTGCCCAGGCACGACCTTGACTCACTGCACATCCTCTGCCTCCCGGGTGTTCAAGCGATTCTCTTGCCCTCAGCCCCCCGAGTAAGCTGGGTTACAGGTGTCCACCATCAACGCCCGGCTAATTTTAGTGCCTTATTAATAGAGATAGGGTTTCACCCATGTTGGCCAGGTGTCTCAAACTCCTGACCTCAGGTGATCCACACTGCCATTTGGCGCTCCTCAAAGTGCTGGCCTACGGCATGAGCCACTGCACCTGGCCATCCGACTGCCTGTTTTTGGTACTCCAGCTTGGGCAAGCTAAGAATGTTTTCACATTTTTGTTGTCTTTTTTTTTTGGAGACGGAGTCTCACTCTGTCACCTCAGGCTGGAGTGCAGTGGCAGCAATCTCAGCTCACTACAAGCTTCCGCCTCCCGGGTGTCATCGGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACCTACAGTTGCTGCTACCACGCCCAGCTAATTTTTTGTTATGATTTTGATCATCTGATATATTTAGCATTTATAGCCATCAGGTGCTGCTGCCTGGACCTTTTTTTCCCCTAGAAGTTTCGTTCTTGTTGCCCAGGCCACGACTCTTGACTTATGCAACCTCTGCGCTCCCGGGGTTCAAGCGATTCTCTTGCCTCAGCCCCCCGAGTAGCTGGGATTATCAGGTGTCCACCCATCACTGGCCCGGCCTAATTCTTAGTCGCTGTTCTAAAGAGATGAGGTTTTCCATGTTGAGCCAGGTTGGTCTCAGAACTCCTGACTCAGGTGATCCACCTGCCTTTGGCCTCCCAAAGTGCTGGCCAGGATGAGCCAGCTGCACCTGGCCACCACTGCCTGTTTCTGTACCGCTTGGCAAGCCTAAGAGATGGTTTTTCACATTTTTGTTTCTTTTTTTTTGGAGTACGGAGTCTCACCATCTGTCACCCAGGCTGGATTGCAGTGGCACAATCTCAGGCTCACTACAACGCTCCGCCTCCCGGGTTACACAGCTCATTCTCCTGCCTCCAGCCTCCCCGAGTAGCTGGGACTAACAGTTGCCTGCCACCACGCCCAGCTAATTTTTTGTATTTTTGATCATCTGATATATTTAGCATTTATAGCCACAGGGCTGCTCGCCTGACTTTTTTTCCCCCTAGCCACAGTTTGCTCTTGTTGCCCAGGCACGACCTTACTCACTGCCAACCTCTGGCCTCGCCGGGTTCAAGCGAGTTCTCTTGACCTCAGCCCCCCGAGTAGCTGGGATTACAGGTGTCCACCATCAGCGCCCGGCTAATTTTAGTGCTTTTAATAAGAGATGGGTTTCACCATTTGGCCAGGTGGTCTCAAACTCCTGGACCTCAGGTGATCCACCTGCCTTGGCCTCCCAAAGTGCTGGGCCAGGCATGAGCCACTGAACCTGGCCACCACTTGCCTGTTTTTGTACCGCTTGGCAAGCCTAAGAATGGATTTTCACATTTTTGTTTCTTTGTTTTTTGAGAGACGGAGTCTCCACTCCTGTCAACCCAGGCTGGAGATGCAGTGGCACAATCTCACGCTCACTACAAGCTCCGCCTCCCGGGTTACGCCATTCTCACTGCCTCAGTCCTTCCCGAGTGAGCTCGGGCTACAGTTGCCTTGCCACCACGCCCAGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACTGTGGTATGGATTCTTCCTGAACTCGTTGATCCGCCTGCCTCGGCCTCCCAAAAGGTGCTGGGATTACAGGCATGAGCCACCGTGACCGGCCACTGCTGCCGTGTTTTTGTACTGCTTGGCATAGCTAAGAATGGTTTTCATCACATTGTGAATAGTTTTGAAGAAAAATTTTTCAAGAATACATTTTGTGGCACACGAAAATTATATAGAAATTCAAAGTTTTCAATGTCACAAATAAGAGGTTTTATATTGGAACATAGCATGCTCACTCATTTAGGTATTGTTACGGCTTGCTTCCACACTACAAGCAGCCAGGGTTCAGAGGTCATGGCAGAGACCAACGTGGCCCACAAAGTGTGAATGTCACTTCCCTTTGTACAAGCTGTTTGCCCTAAAACTCTTGGCTAAGCAGAGTGAGTCATGAGAGAAGAGAGAAGGTGCATGGACTGAGGTCTGTCAAGGGCCTGTCAAGAGCCAGTAACCGTTGCTGTGCGCTCAAGGATGCCAAGATGAGTGGCTTTGCTCCATGAGCAACAAAGCCCAGGGGCCCAGCAAGCCAAGGCTGCTTGCCTCTTTGTCCCCTGGGGCCAGCTGTGCATCTTGTGGATGGGCTGAGAGTTGAGCAACCCATAACAGGCCTCCTGACCCACTGTGAGATGGTGGCATGTGAAAGGACCAGCATCCAACAGTCTTGCCTCTTACTATATTAGTTAATTTCCTGGGGCTACAGGAGCTTATTTACTCAAACTGAGTGACTAAAACAATGGAATGGATTTTCTCTCGCAGTTCTAAGAGGGTATTATTCTTTCAACAATCAAGGTGACAGCACGGCCATGCTCCCGAAAGACAGCTCTAGGAAAAGCTCCTGGTTTCTAGCGGCTGCCTGCACTTCCTTGGCATTCCTTGAGCTTCAGCTGCATCATTCAGTGCTTGCTTCGGTACCACCGTGCCTTCATCCCTGGGTCGCTTCCATCGTCTCTATGTCCAAATGTTCGCATCTTTTATAAGGATGCTTAGTCATTGGATCCCAGGGCCCATCCCTAATCTGGTATGACTTCATAATTAATGTGAGTTGCATTGAAAAGACGCTGTTATCCAAATAAAGCCATCTCACACGTATGGGGACTAGGACGTTCAGACATATCTTTTTGCGGGACAAAATTCTACTCCACTATACTTCCCACACACATGCAGCATTTCCACGTGGTGAATCTAAGAGTATTTAGGGTTCTTGCGATAAAAGGGATATGGCCCGGTTGACTATCCTGGGGATAAGAAATCAAAAGAGAAAAAAACTCTAGTATTGGTGTTAAGACCCTGCAGTTTTACATTCATTTCCTGCACCACTCCCTGCTGCCTGCAAGGCAGGCCAGCTGCTTCACCTAGGGGCAGAATTCGGAGAAAGAATACGTCTCTCTGGATGTAACTACTGCGACTCAGGCAACCCACTAGTGGAGCTTCACAGGTGGTTCATCTGCCAAAGATTCTGAGGTTTTTAGCCGTGAGATGATAGGGTCAGAAACCGGGTGGTGCTCATGGGGGAAAGCAGCTACTGGAAGAGTCTAGATGCTGCTTCCCCTCAGGACTGTGATTGACCTTCAACTGGCCCCTCATCCCCAAGAACCTCAGCTTCTTCCTCT * AS:i:3450 NM:i:155 SA:Z:chr21,35346115,+,33M1I35M1D13M1D4M1I16M1D15M1I14M1I12M1D3M1D39M1D25M1I33M1I10M1I9M1I78M1I66M1I9M1D7M1I37M2I28M1I12M1I6M1I34M1I6M1I17M2I21M1I20M1I29M1I5M1I5M1I12M2I52M1I5M1I7M1I8M1I63M1D8M1I12M1I4M1I59M1D11M1I9M1I8M1D4M2I7M1D16M1D55M1D10M1I4M1I17M1I53M1I17M1I124M1I96M1I3M1I14M1I5M1I2M1I8M2I3M1I2M1I23M1I21M1I123M1I3M1I5M1D7M1I7M1I59M1I84M1I39M1I20M1I28M1I15M1I4M1I12M1I26M1D44M1I3M1I47M1I37M1I3M1D64M1I5M1D65M1I29M1I59M1I3M1I34M1D18M1D15M1D17M1I5M1I52M1D3M1I22M1I56M1I5M1I12M1D23M1I15M1I5M1I53M2I19M1D20M1I15M1I4M1D24M1I12M1I21M1I95M2I30M1I4M1D21M1I6M1I23M1I12M1I41M1I7M1I20M2I11M1D19M1I8M1I16M1D18M2I7M1D19M1I13M1I19M1I7M1I33M1I35M1I10M1I8M1I3M1I16M2I8M1I13M1I29M1I11M1I14M1I17M1I37M1I24M1D38M1I7M1I6M2I95M1D52M1I14M1I16M1I19M1I1M1D35M1I2M1D23M1I1M1I12M1I17M1I3M1I77M1I18M1I3M1I2M1I2M1I9M1I9M1I2M1I10M1I13M1I6M1I2M1I21M1I52M1D2M1I28M1I10M2I28M1I5M1I8M1I15M1I22M1D7M1I19M1I43M1D7M1D9M1I5M1D4M1D12M1I1M1I26M1I4M1I7M1I7M1D21M1I7M1I5M1D5M1I12M1I27M2I32M1I17M1I21M1I13M1D15M1I81M1I19M1D2M1I14M1I8M1I15M1I4M1I38M1I23M1I9M1I12M1I7M1D10M1I6M1I14M1I8M1I28M1I13M1I26M1I35M1I9M1D17M1I8M1I6M1D7M1D6M1I5M1I2M1I38M1I11M1D21M1I25M1D10M2I17M1D15M1I3M1I42M1I17M1D79M1I2M1I17M1I26M1I30M2I52M1I4M1I1M1D16M1I6M1I18M1D19M2I11M1I25M1I19M1D3M1I2M1I16M1I10M1D4M2I15M1I2M1D4M1I37M1I13M2I25M1D32M1D4M1I8M1I13M1I2M1I6M1I1M1D74M1I8M1I14M1I28M1I27M1I37M1D19M1I14M1I5M1I20M1I13M1D4M1D15M1I5M1D27M1D7M1I1M1I23M1I33M1I26M1I44M1D14M1I6M1D12M1I39M1I8M1I5M1I6M1I16M1I15M1I3M1I11M1I22M1D1M1D8M1D110M1I12M1I1M1I4M1I4M1I28M2I15M3405S,60,354;chr21,35351847,+,7076S4M1D1M1D31M1I4M1I10M1I2M1I8M1D8M1D22M1I6M1I9M1D12M1I5M1I47M1I10M1I7M2I6M1I6M1I5M1I3M1I2M1I3M1D7M1I5M1D9M1I15M1I9M1D19M1I25M1D8M1I43M1I6M1I4M1I18M1D13M1I11M1I1M1I37M1I11M1I17M1I3M1I2M1I13M1I6M1I17M1I36M2242S,22,51;chr21,35351854,+,6495S38M1I24M1D44M1I1M1I16M2I16M1I16M1I7M1D18M1I20M1I3M1I11M1I8M1I14M1D1M1D30M1I5M2I5M1I4M1I13M1I1M1I24M1I2M1I13M1I4M2I1M1I4M1I14M1D17M1I37M1I20M1I21M1I14M1I3M1I1M1I36M1I9M1D28M2828S,40,41; read1 2048 chr21 35351847 22 7076S4M1D1M1D31M1I4M1I10M1I2M1I8M1D8M1D22M1I6M1I9M1D12M1I5M1I47M1I10M1I7M2I6M1I6M1I5M1I3M1I2M1I3M1D7M1I5M1D9M1I15M1I9M1D19M1I25M1D8M1I43M1I6M1I4M1I18M1D13M1I11M1I1M1I37M1I11M1I17M1I3M1I2M1I13M1I6M1I17M1I36M2242S * 0 0 TCCTTGCCTTTCAGGTGCTTTAAGAGGGCTTATCGTTCTTTGACTCCCCTAGTAACTCAAAAGGGTGGCAAGATGCAACTCTGAGGCTTTCCAAGCTCTCTGCAAGCCTGCCTTCAGTAAGTCTGTGCACCACAAAGAAGCATAACTTATATCCTCTAGCTGAATTTACCTCCTGCTTCGAGCCCCTGTTTCCCATGTCATTTTAGGGGTCCAGATGTTTTTCCTGGAGGCATATACACACACACACTACACACACACAACACACACATCACACACACACACACACACACATATATACACACACACATTCCAGGGGCCTCTGAGGTGCCAAGACATGTGTGGGGTCCAATCTGGTTAACACTGATGTCCATGATCATTGAGTTCATTTTTTATATCAACTATGTGACACTTCCTGATAGTTTCCGTTCCTGTGTTAGAATTTGCAAAGTTGCCGACTATTTCTTTGAATGAAGAGTAGCTAGATCATGGGTCTACACCAGGTCATCAAGCAGCCGATTTCCCCTGTTGGAAGCTCCAAGCAAATTGTCAGCTCCAAGCATTGCCACACAGCTGTGGGTGTGCCACACTCCTTTTCCTTTTAATAAGATGATGATCTGCTAGTGTGTGAATGCTCTTGGAGTTTTATTTGTAGGTTTTTTCCTTGTATGTCGCAAATGTATCTATATTTCATCTTCCCTCTTGAGGGATATTTTCCATGGGTAGAGGATCTAGCGCTTTCTCTTTTGCGATTCCATGTGTGGCTTTAAAAAAAAGTCAGTTGACTGTTTATTTATGGCTTCTCTAAAGGTGATCTCTTTCTCCTAAGGACTGCTTTAAAAGCTTTCGCGTTCCCTCCCTCCCTTCCTCCCACCCCATCCCCCTTCTTCCTTTCCTTCCTCCCTCCCTCCTCCCTTCTGCTTCCTCCCTTTTCTCCCTGTCGTTTCCTCCTTTCTCCCATCCTCCCTCCTCCCTCTTTCCTTCTTTTTTCCCTTCCTCCCGCCCTCCTTCCTTCTTCCATCTTATATATTATAAATAATTTTCTTTCTTTCTTTATTTTTTTTTGAGATGGAGTTTCGCTCTTGTTGCCCAGGCTGGAGTGCAACGGTGGTGATCTTGGCTTACTGCCAACCTCCACCTCCCTGGTTCAAGCGATTCACCTGCCTCAGCCTCCCTAGTAGCTGGGATTACAGGTGCCCGCCACCACACCCAGCTAATTTTTGTAATTGTAGTAGAGATGGGGTTTCACCATGGTGGGCCAGGCTGGTCTTGAACTCCTGAACTCAGGTAATCCACCCGCCTTGTCCTCCCAAAGTGCTGGAATTACAGGCATGAGCCACCACTCCCAGCCCTAATTAAAATATTTTCTTTCTGAGGCCCTTCCCTACCCTCTTGTACTTCTTTTGATACCGCCATCATAATGCATGTTAGGTGGACTCATCTGTGTACTCTATTACCCTCTCATCTGGTTTTTAATCTTTCTGTCTCCTTATACTGTAATCTCAGTGTTTCTTCTGCTTTAATTTTGGTATATGAATTATTCAGCTGTGTTCAAAGTGTGGTTCAAATGCGACTATATGCTTCTTTAAGTTTTTAGTTATTGTACTTCTCACCTCTAGAAGTTCTTCTGTGACACTTCTTATAGTTTCGCAGTTCTTGGGTAGAATTTGCAAACTTGTTGACTATTTCTTTGAAAAGAGTAGCTAGAATTGTTTTAGTCTGTGTATAGTGAGTATGAATATCTGAAACCTCTGTGAGTCTGTTTCTTTTGTCTCGCTAGCCCTACTCATGGCGTCCATGTTTTCCTGGATGTTTAGTTGGTTGTTCTTTGAATATGTCCAAACTCCCAGTTATTTCTGGGGAATACCCAAAGGCACACCTGTTCCTATCACCCAGTGAACTGCCATGTGTTTGTCTCCTGTCATGGTAGACATTTTGCATTCAGGGAGAGGCCTCTGCTCTCTGCAGAACAGTGTGAAATGCACCTCATTTTTGCCTTCAATGAAGGTTGTCTCTTGCTTGGCACCACCACTTCCTCAAATGACTAATGCCACTCTTGCTCACAGTGCTAAGGCGCTAAGGAGTTTATCAGAGGCTAGAGTGATTCTGCATCCTCTGGGGTTCTCAGGAAGCGAGGCCACAGGGCCATCTGTGCATGTAGTGACAGTGGGGTGGGGTGGATGAGCAGTTGGCAGTGGTGGGCACCTATGCTGCTGGCGCCACAGCTTTTTATATCTTGGTGAAAACAGACGCCCTTGCGTTTTCACTTGTAGATTCTCCGGCTACATCCACAAGAACAAATTCTGTCACAGAGGCCAGATAGACTATCAGTGTAATGGGCCCTTTCCCCTTTGATGATAGCAGCTTTTTTCGGGGGAGCAGGGGTTGCCAAACTTCTACGCCTTATTTTATCTTCCTTTCTGAGTTCTCCCCAGAGGCTGCTGGGTCCACCTCTCCCTCTGCCAGACAGACAGGCTCAGGACATAGCAAGAACGGAAGGGGTGTGGGGGTTGGTGGGAGGTATTAAGTTGGGGAAACGACAGCTCTTGTCTTTGTAGATTACCAGAACTGACACACGTACACACACGTACACTCACGCTCACTCACTCACTCATCTATCATCTATTTTTGGCCTAAGCTGCCTTTGGACACTTCCTTCGAAAAGCACATGAACTCTTCGGAGTTCTCCTGTTCCACCTTGGTAAATTTCCTATAGCCAACGCACTGAAAGTCCCTGCTGCCCTCCTTCCTCTGAGCTTGTGGGGCCCACAGATCCCCTGCTCCATTTCCTGCTTCATTTCAGCTGATGGTGAGCTTCCAGGGCAGGCTTGTAGGAAGGGCGGGCTAAGGCTGGGGAACTGCAGCACCCACCACCACCCGCCCGCCCCAGTGCTTGTTGCCCCTAGCCTCTGTCTCTCCTCGCTGTGCCCTTCCCTTAAGCCCCACTCTTGGCCTCCCACAAAGACAAGAGTTTGTTCTGTTTTTATATTGATAGGATGAACTCCCTCGTTCTAATACCTATCTGAATAGCCTGAGCAATTACATTTACAACCTCATGAAAAATACACAGCACTTGTCACGATGAATGATGTTTACCGTGAATAATTGCAGTTTTGAGTTCAAACATCCTTCTATAGTCCAAGAATGAGTAAATATCCCTATTCCCTACCCCTCCCCATTGCATTTGGGTTTGACATGACATGCAATCATTGTGAGTTTTACTGCCTGGGGTAGAATTGTTTTTGCCTGTATTGCCATTGCCAATTGACTGGAAGGACAAAAAAAGGAAACTTACCAATTGAGCAGTTTCTAGATGAACTTAAATATCTCAGAAATCTTGAAGCAATGAATGAACTATAACTTTCTGAAGTTGCAAAAATGATTCAAGTCGGAAGTGACCCATGGGAGCAACCTGGTCCTTAATTAGGACTGGAAAGAAAACGGTAGGCCCATGCGAGGTGGAACAGTGGCTTCCCCAAGGTCACACTGCAAGTGAGTGGTGGCACTAGTCACCCAGGAGTCCTCCTGACTCCATGGTATCCTGACCCCAGACCAGAGCCTTATTTGTGGTTAAAGAACGTGGAAAAACAGCTTATGTTTGAATTTTAGAAATCATGCCAGTAGCTAAAGATCTGCATTCTCATGAATATTGAGCTTTGCGTTGTGCGGGTTGCTACGAATTTTGTAACTATTTTCTCCTAGAGCTCTCAGTATTTTACATATGACCAATTCCATTCTATTATTGTCCCGGGAAAAGAGTGTGCATTTACTGAGCAGTAAGAGGGTGATTTGAAGTCCCTGGGGTCCCCCACCCTTGTCTTTCTATGTGAGATTGTATGTGCAGGGCTCACATTTATGTCTCCTCCAGGCAGCTCATGTGCACAGACTAGTTGTATTTATTAGAGCACCTACTGTGGTGCCATGGAAGCTATGTGGTAAGCTAAGCAGGACAAAAACTCCTGCTCTCACAAGAAGAAAGAATAAACAAGCAGACAATTGCCCGTATAATGTCAGCCTGGTGTGATGGCTCATCTCCTGGAATACCCTAGCACTTTGGGAGGTGACGATGGGAGGGATTGGCTTTAACTTCGGGAGTTCAAGATAAAGCCTGGGCAACCAAGACCCTTCTCTACAAAAAAAATCAAAAAATTAGACTGGGGGTCATGGCATGAGTCTCTGGCCCCTCGGGACCAGAGGCTCGGGAGCTGAGGCGTGGTTTTCACTGAGCCTGGGAGCGTTTGAGGCTGCAGTGAGCTGTGATCACAACCACCTGCACTGCCAGCCTGGCCACAGAGCGAGACCCTGTCCTCTAAATTAAACAACAACCCTAATGTCAAGATTAGGAAGTGTGGCAAAAAGGAACAAACAGTCAGGGTGACTGCCATTCTAGAAAGGGTGGTCGAGGGAAGGTCTCCTTGAAGCAGATCCTGTGGGAGTGGAGAGGGTAAGTCACGGACTCTCTGGAGGAACGAAGATTCCAGGTGGAAATGCCTTAGTGGGTGTTAAACGCTTTAACGGGTGTTAGCCAGAAAGCCAGAATGGCTGGAGCTGAAGGGAGTAGAGGAGGCGGCTAGGAAAATGAGGTCAGAAGATGGAGAGCAGGGCCGGATCACGAGAGCGGGCACAGAGAACGAGGAGGGAATTGGATTTTTGGAAATAGATTGTGGCAAAATACAAAATATTAAAATACATAAAATAAAATATGAAACCATAAATGTACTACCCTTTACCCATTTTAAGTATGCCAGTTCAGTTAGTGTTAAATACATTTACATCATTGTTACAACCCGTCTCCAGGAACTCTCTGCATCTTGCAAAATTAACAACTCTATGCCATTTCAACAGTTCGCATTTCCTCTCTCCCTCTCGTCATGGCAACCGCCATTGTTTCTTTCTTGTCTCGTAAATGTAGTACTTTGGGTGTAATCTCATCTAAGTGGAATCACACAGTGTGTGTCCTTTTAGTGACAGACTTTTTCACTTTGCATAATGTCCTGTAATATTCATCCATGTTGTAGCATGGTCAGATTTTATCCTTCCTTCTTAACATTTGTAATATTCCATTGTTACGGCATAGACTACATTTAGCTTATCTGTTCACCTGTGGATGGACAACTTGGGTTGCTATAGTAATAATATTGCTGTAAATATGGGTATACGATTATCTCTTTGTGACCCTGCTTTTAATTGTTTTGGATATATACCCAGAATATGGGAATTACTGAATCATGTTGGAGATTCTGTTTTTGATTTATTTATTTTATTTTTAATTTTTGAGATGGAGTCTCGTTTTCTGTCACCCAGGCTGGAGTGCAGTGACACGATCTCTGCTCACTGCAACCTCCTGCTCCCTTGCACCATTAATTCCTCAAAGATGCAGGCCTGGATCCAACGCCTTTGGGGACGGGTACTCCTGCCCACAGGAAAGAGGTTGAGGAGACAATAATGTCAACGAGAATCATCATCTTTGCTGTGGAAAATGGTGTGCAGAGTATTCTGGTGCCTCTGCCAAGTGATTGTCGCATGCTAGCCTTCCCTAGTAGCTGGGATTACAGGCATTTGCCACCAAGCCCCGGCTAATTTTGATGTATTTTTAGTAGAGACGGGGTTTATCATGTTGGCCAGGCTGGTCTCAAACTCCTGCCTCAAGGTGATCGCACCTGCCTCAGCACTCCCCAAATGGCTGGGATTATAGGTGTGAGCCATGCACCTGGCCTGATTGGTTATTTAAATTCTGAGAGGGGGTATACTAATGTGTTGAAAACACACAGGGATGGGATCAGGTATAGGTGGGGAGGGAGAGCAAAAGTTGGTATTTAGAATATGTTACGTTTGAACAAACCTAATACACATTCCACATCCAAGTGGACATAGAGGAGTTATTTGAAGCTCAGCAAGACACCTGAGTTGGGAGATCAGGAATTTGTGGATCATCTGGATATATTTAGCATTATGCCACAGGGCTGCTGCCCTGATTTTTTTCCCCCTAGACAGTTTCGCTCTGTTGCCTCTAGGCACGACCTTGACTCACTGCAGACCTCTGCCTCCCGGGTTCAAGCGATTCTCTTGCCCTCAGCCCCCCGAGTAGCTGGGATTAACAGGTGTCCACCATCACGCCCGGCTAATTTTAGTGCTTTTAATGAGATGGGTTTCACACATGTTGCCAGGTTGGTCTTCAAACTCCTGACCTCAGGTGATCCACCTGCCTTGGCCTCCCCAAAGTGGCTGGACCAGGCGATGAGCCACTGCACCTCGGCCACCACTGCCTGCTTTATTGTACCGCTTCGGCAAGCTAAGAATGGTTTTCAATTTTGTTTTTTTTTTTTTGGAGACGGAGTCTCACTCTGTCACCCAGGCTGGAGTGCAGTGGCACAATCTCAGCTCACTACAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAAGCCTCCCGAGTCACGCTGCGGACGTACAGTTGCCTGCCACCACGCCCAGCTATTATATTTTGTATTTTTGATCATCTGATATATTTAGCATTTATAGCCACAGGGCTGGCTGCCTGACTTTTTTTCCCCCTAACAGTTTCGCTCTTGTTGCCCAGGCACGACCTTGACTCACTGCACATCCTCTGCCTCCCGGGTGTTCAAGCGATTCTCTTGCCCTCAGCCCCCCGAGTAAGCTGGGTTACAGGTGTCCACCATCAACGCCCGGCTAATTTTAGTGCCTTATTAATAGAGATAGGGTTTCACCCATGTTGGCCAGGTGTCTCAAACTCCTGACCTCAGGTGATCCACACTGCCATTTGGCGCTCCTCAAAGTGCTGGCCTACGGCATGAGCCACTGCACCTGGCCATCCGACTGCCTGTTTTTGGTACTCCAGCTTGGGCAAGCTAAGAATGTTTTCACATTTTTGTTGTCTTTTTTTTTTGGAGACGGAGTCTCACTCTGTCACCTCAGGCTGGAGTGCAGTGGCAGCAATCTCAGCTCACTACAAGCTTCCGCCTCCCGGGTGTCATCGGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACCTACAGTTGCTGCTACCACGCCCAGCTAATTTTTTGTTATGATTTTGATCATCTGATATATTTAGCATTTATAGCCATCAGGTGCTGCTGCCTGGACCTTTTTTTCCCCTAGAAGTTTCGTTCTTGTTGCCCAGGCCACGACTCTTGACTTATGCAACCTCTGCGCTCCCGGGGTTCAAGCGATTCTCTTGCCTCAGCCCCCCGAGTAGCTGGGATTATCAGGTGTCCACCCATCACTGGCCCGGCCTAATTCTTAGTCGCTGTTCTAAAGAGATGAGGTTTTCCATGTTGAGCCAGGTTGGTCTCAGAACTCCTGACTCAGGTGATCCACCTGCCTTTGGCCTCCCAAAGTGCTGGCCAGGATGAGCCAGCTGCACCTGGCCACCACTGCCTGTTTCTGTACCGCTTGGCAAGCCTAAGAGATGGTTTTTCACATTTTTGTTTCTTTTTTTTTGGAGTACGGAGTCTCACCATCTGTCACCCAGGCTGGATTGCAGTGGCACAATCTCAGGCTCACTACAACGCTCCGCCTCCCGGGTTACACAGCTCATTCTCCTGCCTCCAGCCTCCCCGAGTAGCTGGGACTAACAGTTGCCTGCCACCACGCCCAGCTAATTTTTTGTATTTTTGATCATCTGATATATTTAGCATTTATAGCCACAGGGCTGCTCGCCTGACTTTTTTTCCCCCTAGCCACAGTTTGCTCTTGTTGCCCAGGCACGACCTTACTCACTGCCAACCTCTGGCCTCGCCGGGTTCAAGCGAGTTCTCTTGACCTCAGCCCCCCGAGTAGCTGGGATTACAGGTGTCCACCATCAGCGCCCGGCTAATTTTAGTGCTTTTAATAAGAGATGGGTTTCACCATTTGGCCAGGTGGTCTCAAACTCCTGGACCTCAGGTGATCCACCTGCCTTGGCCTCCCAAAGTGCTGGGCCAGGCATGAGCCACTGAACCTGGCCACCACTTGCCTGTTTTTGTACCGCTTGGCAAGCCTAAGAATGGATTTTCACATTTTTGTTTCTTTGTTTTTTGAGAGACGGAGTCTCCACTCCTGTCAACCCAGGCTGGAGATGCAGTGGCACAATCTCACGCTCACTACAAGCTCCGCCTCCCGGGTTACGCCATTCTCACTGCCTCAGTCCTTCCCGAGTGAGCTCGGGCTACAGTTGCCTTGCCACCACGCCCAGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACTGTGGTATGGATTCTTCCTGAACTCGTTGATCCGCCTGCCTCGGCCTCCCAAAAGGTGCTGGGATTACAGGCATGAGCCACCGTGACCGGCCACTGCTGCCGTGTTTTTGTACTGCTTGGCATAGCTAAGAATGGTTTTCATCACATTGTGAATAGTTTTGAAGAAAAATTTTTCAAGAATACATTTTGTGGCACACGAAAATTATATAGAAATTCAAAGTTTTCAATGTCACAAATAAGAGGTTTTATATTGGAACATAGCATGCTCACTCATTTAGGTATTGTTACGGCTTGCTTCCACACTACAAGCAGCCAGGGTTCAGAGGTCATGGCAGAGACCAACGTGGCCCACAAAGTGTGAATGTCACTTCCCTTTGTACAAGCTGTTTGCCCTAAAACTCTTGGCTAAGCAGAGTGAGTCATGAGAGAAGAGAGAAGGTGCATGGACTGAGGTCTGTCAAGGGCCTGTCAAGAGCCAGTAACCGTTGCTGTGCGCTCAAGGATGCCAAGATGAGTGGCTTTGCTCCATGAGCAACAAAGCCCAGGGGCCCAGCAAGCCAAGGCTGCTTGCCTCTTTGTCCCCTGGGGCCAGCTGTGCATCTTGTGGATGGGCTGAGAGTTGAGCAACCCATAACAGGCCTCCTGACCCACTGTGAGATGGTGGCATGTGAAAGGACCAGCATCCAACAGTCTTGCCTCTTACTATATTAGTTAATTTCCTGGGGCTACAGGAGCTTATTTACTCAAACTGAGTGACTAAAACAATGGAATGGATTTTCTCTCGCAGTTCTAAGAGGGTATTATTCTTTCAACAATCAAGGTGACAGCACGGCCATGCTCCCGAAAGACAGCTCTAGGAAAAGCTCCTGGTTTCTAGCGGCTGCCTGCACTTCCTTGGCATTCCTTGAGCTTCAGCTGCATCATTCAGTGCTTGCTTCGGTACCACCGTGCCTTCATCCCTGGGTCGCTTCCATCGTCTCTATGTCCAAATGTTCGCATCTTTTATAAGGATGCTTAGTCATTGGATCCCAGGGCCCATCCCTAATCTGGTATGACTTCATAATTAATGTGAGTTGCATTGAAAAGACGCTGTTATCCAAATAAAGCCATCTCACACGTATGGGGACTAGGACGTTCAGACATATCTTTTTGCGGGACAAAATTCTACTCCACTATACTTCCCACACACATGCAGCATTTCCACGTGGTGAATCTAAGAGTATTTAGGGTTCTTGCGATAAAAGGGATATGGCCCGGTTGACTATCCTGGGGATAAGAAATCAAAAGAGAAAAAAACTCTAGTATTGGTGTTAAGACCCTGCAGTTTTACATTCATTTCCTGCACCACTCCCTGCTGCCTGCAAGGCAGGCCAGCTGCTTCACCTAGGGGCAGAATTCGGAGAAAGAATACGTCTCTCTGGATGTAACTACTGCGACTCAGGCAACCCACTAGTGGAGCTTCACAGGTGGTTCATCTGCCAAAGATTCTGAGGTTTTTAGCCGTGAGATGATAGGGTCAGAAACCGGGTGGTGCTCATGGGGGAAAGCAGCTACTGGAAGAGTCTAGATGCTGCTTCCCCTCAGGACTGTGATTGACCTTCAACTGGCCCCTCATCCCCAAGAACCTCAGCTTCTTCCTCT * AS:i:827 NM:i:51 SA:Z:chr21,35346115,+,33M1I35M1D13M1D4M1I16M1D15M1I14M1I12M1D3M1D39M1D25M1I33M1I10M1I9M1I78M1I66M1I9M1D7M1I37M2I28M1I12M1I6M1I34M1I6M1I17M2I21M1I20M1I29M1I5M1I5M1I12M2I52M1I5M1I7M1I8M1I63M1D8M1I12M1I4M1I59M1D11M1I9M1I8M1D4M2I7M1D16M1D55M1D10M1I4M1I17M1I53M1I17M1I124M1I96M1I3M1I14M1I5M1I2M1I8M2I3M1I2M1I23M1I21M1I123M1I3M1I5M1D7M1I7M1I59M1I84M1I39M1I20M1I28M1I15M1I4M1I12M1I26M1D44M1I3M1I47M1I37M1I3M1D64M1I5M1D65M1I29M1I59M1I3M1I34M1D18M1D15M1D17M1I5M1I52M1D3M1I22M1I56M1I5M1I12M1D23M1I15M1I5M1I53M2I19M1D20M1I15M1I4M1D24M1I12M1I21M1I95M2I30M1I4M1D21M1I6M1I23M1I12M1I41M1I7M1I20M2I11M1D19M1I8M1I16M1D18M2I7M1D19M1I13M1I19M1I7M1I33M1I35M1I10M1I8M1I3M1I16M2I8M1I13M1I29M1I11M1I14M1I17M1I37M1I24M1D38M1I7M1I6M2I95M1D52M1I14M1I16M1I19M1I1M1D35M1I2M1D23M1I1M1I12M1I17M1I3M1I77M1I18M1I3M1I2M1I2M1I9M1I9M1I2M1I10M1I13M1I6M1I2M1I21M1I52M1D2M1I28M1I10M2I28M1I5M1I8M1I15M1I22M1D7M1I19M1I43M1D7M1D9M1I5M1D4M1D12M1I1M1I26M1I4M1I7M1I7M1D21M1I7M1I5M1D5M1I12M1I27M2I32M1I17M1I21M1I13M1D15M1I81M1I19M1D2M1I14M1I8M1I15M1I4M1I38M1I23M1I9M1I12M1I7M1D10M1I6M1I14M1I8M1I28M1I13M1I26M1I35M1I9M1D17M1I8M1I6M1D7M1D6M1I5M1I2M1I38M1I11M1D21M1I25M1D10M2I17M1D15M1I3M1I42M1I17M1D79M1I2M1I17M1I26M1I30M2I52M1I4M1I1M1D16M1I6M1I18M1D19M2I11M1I25M1I19M1D3M1I2M1I16M1I10M1D4M2I15M1I2M1D4M1I37M1I13M2I25M1D32M1D4M1I8M1I13M1I2M1I6M1I1M1D74M1I8M1I14M1I28M1I27M1I37M1D19M1I14M1I5M1I20M1I13M1D4M1D15M1I5M1D27M1D7M1I1M1I23M1I33M1I26M1I44M1D14M1I6M1D12M1I39M1I8M1I5M1I6M1I16M1I15M1I3M1I11M1I22M1D1M1D8M1D110M1I12M1I1M1I4M1I4M1I28M2I15M3405S,60,354;chr21,35351847,+,7658S4M1D43M1I22M2I7M1D25M1D8M1I8M1I5M1I14M1I8M1I43M1I27M1I18M1D9M1D15M1I40M1I33M1I26M1I10M1I29M1I12M1I4M1I5M1I13M1I18M1I28M1D11M1I9M1I2M1I8M1I4M1I3M1D11M1I63M1I2M1I11M1I23M1I3M1I46M1I20M1I18M1I6M1I6M2I6M1I18M1D26M1I7M1I3M1I9M1D9M1I1M1I8M2I39M1I14M1I5M1I27M1I15M1I8M1I24M1I2M2I12M1I29M1I57M1I17M1I2M1I2M1I54M1I2M2I30M1I6M1I21M1I58M1I13M1I23M1I8M1D13M1D11M1D6M2I15M1I20M1D30M1I47M1I16M1I4M1D13M1D4M1I2M1D9M1D4M1I28M1I17M1I4M1I4M1D11M1I13M2I9M1I22M1I10M1I9M1D12M1I16M1D22M1I4M1I30M1I10M1I14M1I4M1I7M1D17M1I17M1I13M1I5M1D10M1I22M1D25M1I8M1I10M1I20M1I18M1I15M1I21M1I15M1I15M1I13M1I7M1I28M1I10M1I5M1I5M1D26M1I6M1D18M1I8M1I6M1I14M1I33M,40,155;chr21,35351854,+,6495S38M1I24M1D44M1I1M1I16M2I16M1I16M1I7M1D18M1I20M1I3M1I11M1I8M1I14M1D1M1D30M1I5M2I5M1I4M1I13M1I1M1I24M1I2M1I13M1I4M2I1M1I4M1I14M1D17M1I37M1I20M1I21M1I14M1I3M1I1M1I36M1I9M1D28M2828S,40,41; read1 2048 chr21 35351854 40 6495S38M1I24M1D44M1I1M1I16M2I16M1I16M1I7M1D18M1I20M1I3M1I11M1I8M1I14M1D1M1D30M1I5M2I5M1I4M1I13M1I1M1I24M1I2M1I13M1I4M2I1M1I4M1I14M1D17M1I37M1I20M1I21M1I14M1I3M1I1M1I36M1I9M1D28M2828S * 0 0 TCCTTGCCTTTCAGGTGCTTTAAGAGGGCTTATCGTTCTTTGACTCCCCTAGTAACTCAAAAGGGTGGCAAGATGCAACTCTGAGGCTTTCCAAGCTCTCTGCAAGCCTGCCTTCAGTAAGTCTGTGCACCACAAAGAAGCATAACTTATATCCTCTAGCTGAATTTACCTCCTGCTTCGAGCCCCTGTTTCCCATGTCATTTTAGGGGTCCAGATGTTTTTCCTGGAGGCATATACACACACACACTACACACACACAACACACACATCACACACACACACACACACACATATATACACACACACATTCCAGGGGCCTCTGAGGTGCCAAGACATGTGTGGGGTCCAATCTGGTTAACACTGATGTCCATGATCATTGAGTTCATTTTTTATATCAACTATGTGACACTTCCTGATAGTTTCCGTTCCTGTGTTAGAATTTGCAAAGTTGCCGACTATTTCTTTGAATGAAGAGTAGCTAGATCATGGGTCTACACCAGGTCATCAAGCAGCCGATTTCCCCTGTTGGAAGCTCCAAGCAAATTGTCAGCTCCAAGCATTGCCACACAGCTGTGGGTGTGCCACACTCCTTTTCCTTTTAATAAGATGATGATCTGCTAGTGTGTGAATGCTCTTGGAGTTTTATTTGTAGGTTTTTTCCTTGTATGTCGCAAATGTATCTATATTTCATCTTCCCTCTTGAGGGATATTTTCCATGGGTAGAGGATCTAGCGCTTTCTCTTTTGCGATTCCATGTGTGGCTTTAAAAAAAAGTCAGTTGACTGTTTATTTATGGCTTCTCTAAAGGTGATCTCTTTCTCCTAAGGACTGCTTTAAAAGCTTTCGCGTTCCCTCCCTCCCTTCCTCCCACCCCATCCCCCTTCTTCCTTTCCTTCCTCCCTCCCTCCTCCCTTCTGCTTCCTCCCTTTTCTCCCTGTCGTTTCCTCCTTTCTCCCATCCTCCCTCCTCCCTCTTTCCTTCTTTTTTCCCTTCCTCCCGCCCTCCTTCCTTCTTCCATCTTATATATTATAAATAATTTTCTTTCTTTCTTTATTTTTTTTTGAGATGGAGTTTCGCTCTTGTTGCCCAGGCTGGAGTGCAACGGTGGTGATCTTGGCTTACTGCCAACCTCCACCTCCCTGGTTCAAGCGATTCACCTGCCTCAGCCTCCCTAGTAGCTGGGATTACAGGTGCCCGCCACCACACCCAGCTAATTTTTGTAATTGTAGTAGAGATGGGGTTTCACCATGGTGGGCCAGGCTGGTCTTGAACTCCTGAACTCAGGTAATCCACCCGCCTTGTCCTCCCAAAGTGCTGGAATTACAGGCATGAGCCACCACTCCCAGCCCTAATTAAAATATTTTCTTTCTGAGGCCCTTCCCTACCCTCTTGTACTTCTTTTGATACCGCCATCATAATGCATGTTAGGTGGACTCATCTGTGTACTCTATTACCCTCTCATCTGGTTTTTAATCTTTCTGTCTCCTTATACTGTAATCTCAGTGTTTCTTCTGCTTTAATTTTGGTATATGAATTATTCAGCTGTGTTCAAAGTGTGGTTCAAATGCGACTATATGCTTCTTTAAGTTTTTAGTTATTGTACTTCTCACCTCTAGAAGTTCTTCTGTGACACTTCTTATAGTTTCGCAGTTCTTGGGTAGAATTTGCAAACTTGTTGACTATTTCTTTGAAAAGAGTAGCTAGAATTGTTTTAGTCTGTGTATAGTGAGTATGAATATCTGAAACCTCTGTGAGTCTGTTTCTTTTGTCTCGCTAGCCCTACTCATGGCGTCCATGTTTTCCTGGATGTTTAGTTGGTTGTTCTTTGAATATGTCCAAACTCCCAGTTATTTCTGGGGAATACCCAAAGGCACACCTGTTCCTATCACCCAGTGAACTGCCATGTGTTTGTCTCCTGTCATGGTAGACATTTTGCATTCAGGGAGAGGCCTCTGCTCTCTGCAGAACAGTGTGAAATGCACCTCATTTTTGCCTTCAATGAAGGTTGTCTCTTGCTTGGCACCACCACTTCCTCAAATGACTAATGCCACTCTTGCTCACAGTGCTAAGGCGCTAAGGAGTTTATCAGAGGCTAGAGTGATTCTGCATCCTCTGGGGTTCTCAGGAAGCGAGGCCACAGGGCCATCTGTGCATGTAGTGACAGTGGGGTGGGGTGGATGAGCAGTTGGCAGTGGTGGGCACCTATGCTGCTGGCGCCACAGCTTTTTATATCTTGGTGAAAACAGACGCCCTTGCGTTTTCACTTGTAGATTCTCCGGCTACATCCACAAGAACAAATTCTGTCACAGAGGCCAGATAGACTATCAGTGTAATGGGCCCTTTCCCCTTTGATGATAGCAGCTTTTTTCGGGGGAGCAGGGGTTGCCAAACTTCTACGCCTTATTTTATCTTCCTTTCTGAGTTCTCCCCAGAGGCTGCTGGGTCCACCTCTCCCTCTGCCAGACAGACAGGCTCAGGACATAGCAAGAACGGAAGGGGTGTGGGGGTTGGTGGGAGGTATTAAGTTGGGGAAACGACAGCTCTTGTCTTTGTAGATTACCAGAACTGACACACGTACACACACGTACACTCACGCTCACTCACTCACTCATCTATCATCTATTTTTGGCCTAAGCTGCCTTTGGACACTTCCTTCGAAAAGCACATGAACTCTTCGGAGTTCTCCTGTTCCACCTTGGTAAATTTCCTATAGCCAACGCACTGAAAGTCCCTGCTGCCCTCCTTCCTCTGAGCTTGTGGGGCCCACAGATCCCCTGCTCCATTTCCTGCTTCATTTCAGCTGATGGTGAGCTTCCAGGGCAGGCTTGTAGGAAGGGCGGGCTAAGGCTGGGGAACTGCAGCACCCACCACCACCCGCCCGCCCCAGTGCTTGTTGCCCCTAGCCTCTGTCTCTCCTCGCTGTGCCCTTCCCTTAAGCCCCACTCTTGGCCTCCCACAAAGACAAGAGTTTGTTCTGTTTTTATATTGATAGGATGAACTCCCTCGTTCTAATACCTATCTGAATAGCCTGAGCAATTACATTTACAACCTCATGAAAAATACACAGCACTTGTCACGATGAATGATGTTTACCGTGAATAATTGCAGTTTTGAGTTCAAACATCCTTCTATAGTCCAAGAATGAGTAAATATCCCTATTCCCTACCCCTCCCCATTGCATTTGGGTTTGACATGACATGCAATCATTGTGAGTTTTACTGCCTGGGGTAGAATTGTTTTTGCCTGTATTGCCATTGCCAATTGACTGGAAGGACAAAAAAAGGAAACTTACCAATTGAGCAGTTTCTAGATGAACTTAAATATCTCAGAAATCTTGAAGCAATGAATGAACTATAACTTTCTGAAGTTGCAAAAATGATTCAAGTCGGAAGTGACCCATGGGAGCAACCTGGTCCTTAATTAGGACTGGAAAGAAAACGGTAGGCCCATGCGAGGTGGAACAGTGGCTTCCCCAAGGTCACACTGCAAGTGAGTGGTGGCACTAGTCACCCAGGAGTCCTCCTGACTCCATGGTATCCTGACCCCAGACCAGAGCCTTATTTGTGGTTAAAGAACGTGGAAAAACAGCTTATGTTTGAATTTTAGAAATCATGCCAGTAGCTAAAGATCTGCATTCTCATGAATATTGAGCTTTGCGTTGTGCGGGTTGCTACGAATTTTGTAACTATTTTCTCCTAGAGCTCTCAGTATTTTACATATGACCAATTCCATTCTATTATTGTCCCGGGAAAAGAGTGTGCATTTACTGAGCAGTAAGAGGGTGATTTGAAGTCCCTGGGGTCCCCCACCCTTGTCTTTCTATGTGAGATTGTATGTGCAGGGCTCACATTTATGTCTCCTCCAGGCAGCTCATGTGCACAGACTAGTTGTATTTATTAGAGCACCTACTGTGGTGCCATGGAAGCTATGTGGTAAGCTAAGCAGGACAAAAACTCCTGCTCTCACAAGAAGAAAGAATAAACAAGCAGACAATTGCCCGTATAATGTCAGCCTGGTGTGATGGCTCATCTCCTGGAATACCCTAGCACTTTGGGAGGTGACGATGGGAGGGATTGGCTTTAACTTCGGGAGTTCAAGATAAAGCCTGGGCAACCAAGACCCTTCTCTACAAAAAAAATCAAAAAATTAGACTGGGGGTCATGGCATGAGTCTCTGGCCCCTCGGGACCAGAGGCTCGGGAGCTGAGGCGTGGTTTTCACTGAGCCTGGGAGCGTTTGAGGCTGCAGTGAGCTGTGATCACAACCACCTGCACTGCCAGCCTGGCCACAGAGCGAGACCCTGTCCTCTAAATTAAACAACAACCCTAATGTCAAGATTAGGAAGTGTGGCAAAAAGGAACAAACAGTCAGGGTGACTGCCATTCTAGAAAGGGTGGTCGAGGGAAGGTCTCCTTGAAGCAGATCCTGTGGGAGTGGAGAGGGTAAGTCACGGACTCTCTGGAGGAACGAAGATTCCAGGTGGAAATGCCTTAGTGGGTGTTAAACGCTTTAACGGGTGTTAGCCAGAAAGCCAGAATGGCTGGAGCTGAAGGGAGTAGAGGAGGCGGCTAGGAAAATGAGGTCAGAAGATGGAGAGCAGGGCCGGATCACGAGAGCGGGCACAGAGAACGAGGAGGGAATTGGATTTTTGGAAATAGATTGTGGCAAAATACAAAATATTAAAATACATAAAATAAAATATGAAACCATAAATGTACTACCCTTTACCCATTTTAAGTATGCCAGTTCAGTTAGTGTTAAATACATTTACATCATTGTTACAACCCGTCTCCAGGAACTCTCTGCATCTTGCAAAATTAACAACTCTATGCCATTTCAACAGTTCGCATTTCCTCTCTCCCTCTCGTCATGGCAACCGCCATTGTTTCTTTCTTGTCTCGTAAATGTAGTACTTTGGGTGTAATCTCATCTAAGTGGAATCACACAGTGTGTGTCCTTTTAGTGACAGACTTTTTCACTTTGCATAATGTCCTGTAATATTCATCCATGTTGTAGCATGGTCAGATTTTATCCTTCCTTCTTAACATTTGTAATATTCCATTGTTACGGCATAGACTACATTTAGCTTATCTGTTCACCTGTGGATGGACAACTTGGGTTGCTATAGTAATAATATTGCTGTAAATATGGGTATACGATTATCTCTTTGTGACCCTGCTTTTAATTGTTTTGGATATATACCCAGAATATGGGAATTACTGAATCATGTTGGAGATTCTGTTTTTGATTTATTTATTTTATTTTTAATTTTTGAGATGGAGTCTCGTTTTCTGTCACCCAGGCTGGAGTGCAGTGACACGATCTCTGCTCACTGCAACCTCCTGCTCCCTTGCACCATTAATTCCTCAAAGATGCAGGCCTGGATCCAACGCCTTTGGGGACGGGTACTCCTGCCCACAGGAAAGAGGTTGAGGAGACAATAATGTCAACGAGAATCATCATCTTTGCTGTGGAAAATGGTGTGCAGAGTATTCTGGTGCCTCTGCCAAGTGATTGTCGCATGCTAGCCTTCCCTAGTAGCTGGGATTACAGGCATTTGCCACCAAGCCCCGGCTAATTTTGATGTATTTTTAGTAGAGACGGGGTTTATCATGTTGGCCAGGCTGGTCTCAAACTCCTGCCTCAAGGTGATCGCACCTGCCTCAGCACTCCCCAAATGGCTGGGATTATAGGTGTGAGCCATGCACCTGGCCTGATTGGTTATTTAAATTCTGAGAGGGGGTATACTAATGTGTTGAAAACACACAGGGATGGGATCAGGTATAGGTGGGGAGGGAGAGCAAAAGTTGGTATTTAGAATATGTTACGTTTGAACAAACCTAATACACATTCCACATCCAAGTGGACATAGAGGAGTTATTTGAAGCTCAGCAAGACACCTGAGTTGGGAGATCAGGAATTTGTGGATCATCTGGATATATTTAGCATTATGCCACAGGGCTGCTGCCCTGATTTTTTTCCCCCTAGACAGTTTCGCTCTGTTGCCTCTAGGCACGACCTTGACTCACTGCAGACCTCTGCCTCCCGGGTTCAAGCGATTCTCTTGCCCTCAGCCCCCCGAGTAGCTGGGATTAACAGGTGTCCACCATCACGCCCGGCTAATTTTAGTGCTTTTAATGAGATGGGTTTCACACATGTTGCCAGGTTGGTCTTCAAACTCCTGACCTCAGGTGATCCACCTGCCTTGGCCTCCCCAAAGTGGCTGGACCAGGCGATGAGCCACTGCACCTCGGCCACCACTGCCTGCTTTATTGTACCGCTTCGGCAAGCTAAGAATGGTTTTCAATTTTGTTTTTTTTTTTTTGGAGACGGAGTCTCACTCTGTCACCCAGGCTGGAGTGCAGTGGCACAATCTCAGCTCACTACAAGCTCCGCCTCCCGGGTTCACGCCATTCTCCTGCCTCAAGCCTCCCGAGTCACGCTGCGGACGTACAGTTGCCTGCCACCACGCCCAGCTATTATATTTTGTATTTTTGATCATCTGATATATTTAGCATTTATAGCCACAGGGCTGGCTGCCTGACTTTTTTTCCCCCTAACAGTTTCGCTCTTGTTGCCCAGGCACGACCTTGACTCACTGCACATCCTCTGCCTCCCGGGTGTTCAAGCGATTCTCTTGCCCTCAGCCCCCCGAGTAAGCTGGGTTACAGGTGTCCACCATCAACGCCCGGCTAATTTTAGTGCCTTATTAATAGAGATAGGGTTTCACCCATGTTGGCCAGGTGTCTCAAACTCCTGACCTCAGGTGATCCACACTGCCATTTGGCGCTCCTCAAAGTGCTGGCCTACGGCATGAGCCACTGCACCTGGCCATCCGACTGCCTGTTTTTGGTACTCCAGCTTGGGCAAGCTAAGAATGTTTTCACATTTTTGTTGTCTTTTTTTTTTGGAGACGGAGTCTCACTCTGTCACCTCAGGCTGGAGTGCAGTGGCAGCAATCTCAGCTCACTACAAGCTTCCGCCTCCCGGGTGTCATCGGCCATTCTCCTGCCTCAGCCTCCCGAGTAGCTGGGACCTACAGTTGCTGCTACCACGCCCAGCTAATTTTTTGTTATGATTTTGATCATCTGATATATTTAGCATTTATAGCCATCAGGTGCTGCTGCCTGGACCTTTTTTTCCCCTAGAAGTTTCGTTCTTGTTGCCCAGGCCACGACTCTTGACTTATGCAACCTCTGCGCTCCCGGGGTTCAAGCGATTCTCTTGCCTCAGCCCCCCGAGTAGCTGGGATTATCAGGTGTCCACCCATCACTGGCCCGGCCTAATTCTTAGTCGCTGTTCTAAAGAGATGAGGTTTTCCATGTTGAGCCAGGTTGGTCTCAGAACTCCTGACTCAGGTGATCCACCTGCCTTTGGCCTCCCAAAGTGCTGGCCAGGATGAGCCAGCTGCACCTGGCCACCACTGCCTGTTTCTGTACCGCTTGGCAAGCCTAAGAGATGGTTTTTCACATTTTTGTTTCTTTTTTTTTGGAGTACGGAGTCTCACCATCTGTCACCCAGGCTGGATTGCAGTGGCACAATCTCAGGCTCACTACAACGCTCCGCCTCCCGGGTTACACAGCTCATTCTCCTGCCTCCAGCCTCCCCGAGTAGCTGGGACTAACAGTTGCCTGCCACCACGCCCAGCTAATTTTTTGTATTTTTGATCATCTGATATATTTAGCATTTATAGCCACAGGGCTGCTCGCCTGACTTTTTTTCCCCCTAGCCACAGTTTGCTCTTGTTGCCCAGGCACGACCTTACTCACTGCCAACCTCTGGCCTCGCCGGGTTCAAGCGAGTTCTCTTGACCTCAGCCCCCCGAGTAGCTGGGATTACAGGTGTCCACCATCAGCGCCCGGCTAATTTTAGTGCTTTTAATAAGAGATGGGTTTCACCATTTGGCCAGGTGGTCTCAAACTCCTGGACCTCAGGTGATCCACCTGCCTTGGCCTCCCAAAGTGCTGGGCCAGGCATGAGCCACTGAACCTGGCCACCACTTGCCTGTTTTTGTACCGCTTGGCAAGCCTAAGAATGGATTTTCACATTTTTGTTTCTTTGTTTTTTGAGAGACGGAGTCTCCACTCCTGTCAACCCAGGCTGGAGATGCAGTGGCACAATCTCACGCTCACTACAAGCTCCGCCTCCCGGGTTACGCCATTCTCACTGCCTCAGTCCTTCCCGAGTGAGCTCGGGCTACAGTTGCCTTGCCACCACGCCCAGCTAATTTTTTGTATTTTTAGTAGAGACGGGGTTTCACTGTGGTATGGATTCTTCCTGAACTCGTTGATCCGCCTGCCTCGGCCTCCCAAAAGGTGCTGGGATTACAGGCATGAGCCACCGTGACCGGCCACTGCTGCCGTGTTTTTGTACTGCTTGGCATAGCTAAGAATGGTTTTCATCACATTGTGAATAGTTTTGAAGAAAAATTTTTCAAGAATACATTTTGTGGCACACGAAAATTATATAGAAATTCAAAGTTTTCAATGTCACAAATAAGAGGTTTTATATTGGAACATAGCATGCTCACTCATTTAGGTATTGTTACGGCTTGCTTCCACACTACAAGCAGCCAGGGTTCAGAGGTCATGGCAGAGACCAACGTGGCCCACAAAGTGTGAATGTCACTTCCCTTTGTACAAGCTGTTTGCCCTAAAACTCTTGGCTAAGCAGAGTGAGTCATGAGAGAAGAGAGAAGGTGCATGGACTGAGGTCTGTCAAGGGCCTGTCAAGAGCCAGTAACCGTTGCTGTGCGCTCAAGGATGCCAAGATGAGTGGCTTTGCTCCATGAGCAACAAAGCCCAGGGGCCCAGCAAGCCAAGGCTGCTTGCCTCTTTGTCCCCTGGGGCCAGCTGTGCATCTTGTGGATGGGCTGAGAGTTGAGCAACCCATAACAGGCCTCCTGACCCACTGTGAGATGGTGGCATGTGAAAGGACCAGCATCCAACAGTCTTGCCTCTTACTATATTAGTTAATTTCCTGGGGCTACAGGAGCTTATTTACTCAAACTGAGTGACTAAAACAATGGAATGGATTTTCTCTCGCAGTTCTAAGAGGGTATTATTCTTTCAACAATCAAGGTGACAGCACGGCCATGCTCCCGAAAGACAGCTCTAGGAAAAGCTCCTGGTTTCTAGCGGCTGCCTGCACTTCCTTGGCATTCCTTGAGCTTCAGCTGCATCATTCAGTGCTTGCTTCGGTACCACCGTGCCTTCATCCCTGGGTCGCTTCCATCGTCTCTATGTCCAAATGTTCGCATCTTTTATAAGGATGCTTAGTCATTGGATCCCAGGGCCCATCCCTAATCTGGTATGACTTCATAATTAATGTGAGTTGCATTGAAAAGACGCTGTTATCCAAATAAAGCCATCTCACACGTATGGGGACTAGGACGTTCAGACATATCTTTTTGCGGGACAAAATTCTACTCCACTATACTTCCCACACACATGCAGCATTTCCACGTGGTGAATCTAAGAGTATTTAGGGTTCTTGCGATAAAAGGGATATGGCCCGGTTGACTATCCTGGGGATAAGAAATCAAAAGAGAAAAAAACTCTAGTATTGGTGTTAAGACCCTGCAGTTTTACATTCATTTCCTGCACCACTCCCTGCTGCCTGCAAGGCAGGCCAGCTGCTTCACCTAGGGGCAGAATTCGGAGAAAGAATACGTCTCTCTGGATGTAACTACTGCGACTCAGGCAACCCACTAGTGGAGCTTCACAGGTGGTTCATCTGCCAAAGATTCTGAGGTTTTTAGCCGTGAGATGATAGGGTCAGAAACCGGGTGGTGCTCATGGGGGAAAGCAGCTACTGGAAGAGTCTAGATGCTGCTTCCCCTCAGGACTGTGATTGACCTTCAACTGGCCCCTCATCCCCAAGAACCTCAGCTTCTTCCTCT * AS:i:879 NM:i:41 SA:Z:chr21,35346115,+,33M1I35M1D13M1D4M1I16M1D15M1I14M1I12M1D3M1D39M1D25M1I33M1I10M1I9M1I78M1I66M1I9M1D7M1I37M2I28M1I12M1I6M1I34M1I6M1I17M2I21M1I20M1I29M1I5M1I5M1I12M2I52M1I5M1I7M1I8M1I63M1D8M1I12M1I4M1I59M1D11M1I9M1I8M1D4M2I7M1D16M1D55M1D10M1I4M1I17M1I53M1I17M1I124M1I96M1I3M1I14M1I5M1I2M1I8M2I3M1I2M1I23M1I21M1I123M1I3M1I5M1D7M1I7M1I59M1I84M1I39M1I20M1I28M1I15M1I4M1I12M1I26M1D44M1I3M1I47M1I37M1I3M1D64M1I5M1D65M1I29M1I59M1I3M1I34M1D18M1D15M1D17M1I5M1I52M1D3M1I22M1I56M1I5M1I12M1D23M1I15M1I5M1I53M2I19M1D20M1I15M1I4M1D24M1I12M1I21M1I95M2I30M1I4M1D21M1I6M1I23M1I12M1I41M1I7M1I20M2I11M1D19M1I8M1I16M1D18M2I7M1D19M1I13M1I19M1I7M1I33M1I35M1I10M1I8M1I3M1I16M2I8M1I13M1I29M1I11M1I14M1I17M1I37M1I24M1D38M1I7M1I6M2I95M1D52M1I14M1I16M1I19M1I1M1D35M1I2M1D23M1I1M1I12M1I17M1I3M1I77M1I18M1I3M1I2M1I2M1I9M1I9M1I2M1I10M1I13M1I6M1I2M1I21M1I52M1D2M1I28M1I10M2I28M1I5M1I8M1I15M1I22M1D7M1I19M1I43M1D7M1D9M1I5M1D4M1D12M1I1M1I26M1I4M1I7M1I7M1D21M1I7M1I5M1D5M1I12M1I27M2I32M1I17M1I21M1I13M1D15M1I81M1I19M1D2M1I14M1I8M1I15M1I4M1I38M1I23M1I9M1I12M1I7M1D10M1I6M1I14M1I8M1I28M1I13M1I26M1I35M1I9M1D17M1I8M1I6M1D7M1D6M1I5M1I2M1I38M1I11M1D21M1I25M1D10M2I17M1D15M1I3M1I42M1I17M1D79M1I2M1I17M1I26M1I30M2I52M1I4M1I1M1D16M1I6M1I18M1D19M2I11M1I25M1I19M1D3M1I2M1I16M1I10M1D4M2I15M1I2M1D4M1I37M1I13M2I25M1D32M1D4M1I8M1I13M1I2M1I6M1I1M1D74M1I8M1I14M1I28M1I27M1I37M1D19M1I14M1I5M1I20M1I13M1D4M1D15M1I5M1D27M1D7M1I1M1I23M1I33M1I26M1I44M1D14M1I6M1D12M1I39M1I8M1I5M1I6M1I16M1I15M1I3M1I11M1I22M1D1M1D8M1D110M1I12M1I1M1I4M1I4M1I28M2I15M3405S,60,354;chr21,35351847,+,7658S4M1D43M1I22M2I7M1D25M1D8M1I8M1I5M1I14M1I8M1I43M1I27M1I18M1D9M1D15M1I40M1I33M1I26M1I10M1I29M1I12M1I4M1I5M1I13M1I18M1I28M1D11M1I9M1I2M1I8M1I4M1I3M1D11M1I63M1I2M1I11M1I23M1I3M1I46M1I20M1I18M1I6M1I6M2I6M1I18M1D26M1I7M1I3M1I9M1D9M1I1M1I8M2I39M1I14M1I5M1I27M1I15M1I8M1I24M1I2M2I12M1I29M1I57M1I17M1I2M1I2M1I54M1I2M2I30M1I6M1I21M1I58M1I13M1I23M1I8M1D13M1D11M1D6M2I15M1I20M1D30M1I47M1I16M1I4M1D13M1D4M1I2M1D9M1D4M1I28M1I17M1I4M1I4M1D11M1I13M2I9M1I22M1I10M1I9M1D12M1I16M1D22M1I4M1I30M1I10M1I14M1I4M1I7M1D17M1I17M1I13M1I5M1D10M1I22M1D25M1I8M1I10M1I20M1I18M1I15M1I21M1I15M1I15M1I13M1I7M1I28M1I10M1I5M1I5M1D26M1I6M1D18M1I8M1I6M1I14M1I33M,40,155;chr21,35351847,+,7076S4M1D1M1D31M1I4M1I10M1I2M1I8M1D8M1D22M1I6M1I9M1D12M1I5M1I47M1I10M1I7M2I6M1I6M1I5M1I3M1I2M1I3M1D7M1I5M1D9M1I15M1I9M1D19M1I25M1D8M1I43M1I6M1I4M1I18M1D13M1I11M1I1M1I37M1I11M1I17M1I3M1I2M1I13M1I6M1I17M1I36M2242S,22,51; svim-2.0.0/src/tests/test_Collect.py000066400000000000000000000332441406305341300174230ustar00rootroot00000000000000import unittest import pysam import tempfile from svim.SVIM_COLLECT import bam_iterator, analyze_alignment_file_querysorted, retrieve_other_alignments from svim.SVIM_input_parsing import parse_arguments from random import choice, triangular, uniform class TestCollect(unittest.TestCase): def generate_random_sequence(self, length): sequence="" for i in range(length): sequence+=choice("ACGT") return sequence def generate_random_cigar_string(self, readlength): """Generate random cigar string for a read of a given length. Simulate small mismatches and indels but nothing larger than 10bp.""" softclip_left = round(triangular(0, readlength, min(1000, readlength * 0.5))) non_clipped = readlength - softclip_left softclip_right = round(triangular(0, non_clipped, min(1000, non_clipped * 0.5))) non_clipped = readlength - softclip_left - softclip_right sequence = "" read_bases_consumed = 0 while read_bases_consumed < non_clipped: #choose next operation if len(sequence) == 0 or sequence[-1] == "I" or sequence[-1] == "D": next_operation = "M" next_length = round(triangular(1, non_clipped - read_bases_consumed, min(30, non_clipped - read_bases_consumed))) read_bases_consumed += next_length else: next_operation = choice("ID") if next_operation == "I": next_length = round(triangular(1, min(10, non_clipped - read_bases_consumed), 1)) read_bases_consumed += next_length else: next_length = round(triangular(1, 10, 1)) sequence += str(next_length) + next_operation return "{0}S{1}{2}S".format(softclip_left, sequence, softclip_right) def generate_random_cigar_string_hardclipped(self, readlength): """Generate random cigar string for a read of a given length. Simulate small mismatches and indels but nothing larger than 10bp. Simulate hard-clipping and return tuple (left-clipped, right-clipped, cigar)""" hardclip_left = round(triangular(0, readlength, min(1000, readlength * 0.5))) non_clipped = readlength - hardclip_left hardclip_right = round(triangular(0, non_clipped, min(1000, non_clipped * 0.5))) non_clipped = readlength - hardclip_left - hardclip_right sequence = "" read_bases_consumed = 0 while read_bases_consumed < non_clipped: #choose next operation if len(sequence) == 0 or sequence[-1] == "I" or sequence[-1] == "D": next_operation = "M" next_length = round(triangular(1, non_clipped - read_bases_consumed, min(30, non_clipped - read_bases_consumed))) read_bases_consumed += next_length else: next_operation = choice("ID") if next_operation == "I": next_length = round(triangular(1, min(10, non_clipped - read_bases_consumed), 1)) read_bases_consumed += next_length else: next_length = round(triangular(1, 10, 1)) sequence += str(next_length) + next_operation return (hardclip_left, hardclip_right, "{0}H{1}{2}H".format(hardclip_left, sequence, hardclip_right)) def generate_read(self, qname, flag): rname = "chr1" pos = int(uniform(1,249250620)) mapq = int(triangular(0, 60, 50)) length = int(triangular(100, 20000, 15000)) cigar = self.generate_random_cigar_string(length) seq = self.generate_random_sequence(length) read_info = (qname, flag, rname, pos, mapq, cigar, "*", 0, 0, seq, "*", "") return read_info def generate_split_read_with_sa_tags(self, qname, flag): length = int(triangular(100, 20000, 15000)) seq = self.generate_random_sequence(length) suppl_rname = "chr1" suppl_pos = int(uniform(1,249250620)) suppl_mapq = int(triangular(0, 60, 50)) suppl_hardclipped_left, suppl_hardclipped_right, suppl_cigar = self.generate_random_cigar_string_hardclipped(length) prim_rname = "chr1" prim_pos = int(uniform(1,249250620)) prim_mapq = int(triangular(0, 60, 50)) prim_cigar = self.generate_random_cigar_string(length) supplementary_read_info = ( qname, flag + 2048, suppl_rname, suppl_pos, suppl_mapq, suppl_cigar, "*", 0, 0, seq[suppl_hardclipped_left:-suppl_hardclipped_right], "*", "SA:Z:{rname},{pos},{strand},{cigar},{mapq},{nm};".format(rname=prim_rname, pos=prim_pos, strand=("-" if flag & 16 else "+"), cigar=prim_cigar, mapq=prim_mapq, nm=0)) primary_read_info = ( qname, flag, prim_rname, prim_pos, prim_mapq, prim_cigar, "*", 0, 0, seq, "*", "SA:Z:{rname},{pos},{strand},{cigar},{mapq},{nm};".format(rname=suppl_rname, pos=suppl_pos, strand=("-" if flag & 16 else "+"), cigar=suppl_cigar.replace("H", "S"), mapq=suppl_mapq, nm=0)) return (primary_read_info, supplementary_read_info) def setUp(self): self.bam_file = tempfile.NamedTemporaryFile() header = """@HD VN:1.0 SO:queryname @SQ SN:chr1 LN:249250621 @SQ SN:chr2 LN:243199373 @SQ SN:chr3 LN:198022430 @SQ SN:chr4 LN:191154276 @SQ SN:chr5 LN:180915260 @SQ SN:chr6 LN:171115067 @SQ SN:chr7 LN:159138663 @SQ SN:chr8 LN:146364022 @SQ SN:chr9 LN:141213431 @SQ SN:chr10 LN:135534747 @SQ SN:chr11 LN:135006516 @SQ SN:chr12 LN:133851895 @SQ SN:chr13 LN:115169878 @SQ SN:chr14 LN:107349540 @SQ SN:chr15 LN:102531392 @SQ SN:chr16 LN:90354753 @SQ SN:chr17 LN:81195210 @SQ SN:chr18 LN:78077248 @SQ SN:chr19 LN:59128983 @SQ SN:chr20 LN:63025520 @SQ SN:chr21 LN:48129895 @SQ SN:chr22 LN:51304566 @SQ SN:chrX LN:155270560 @SQ SN:chrY LN:59373566 @SQ SN:chr6_ssto_hap7 LN:4928567 @SQ SN:chr6_mcf_hap5 LN:4833398 @SQ SN:chr6_cox_hap2 LN:4795371 @SQ SN:chr6_mann_hap4 LN:4683263 @SQ SN:chr6_apd_hap1 LN:4622290 @SQ SN:chr6_qbl_hap6 LN:4611984 @SQ SN:chr6_dbb_hap3 LN:4610396 @SQ SN:chr17_ctg5_hap1 LN:1680828 @SQ SN:chr4_ctg9_hap1 LN:590426 @SQ SN:chr1_gl000192_random LN:547496 @SQ SN:chrUn_gl000225 LN:211173 @SQ SN:chr4_gl000194_random LN:191469 @SQ SN:chr4_gl000193_random LN:189789 @SQ SN:chr9_gl000200_random LN:187035 @SQ SN:chrUn_gl000222 LN:186861 @SQ SN:chrUn_gl000212 LN:186858 @SQ SN:chr7_gl000195_random LN:182896 @SQ SN:chrUn_gl000223 LN:180455 @SQ SN:chrUn_gl000224 LN:179693 @SQ SN:chrUn_gl000219 LN:179198 @SQ SN:chr17_gl000205_random LN:174588 @SQ SN:chrUn_gl000215 LN:172545 @SQ SN:chrUn_gl000216 LN:172294 @SQ SN:chrUn_gl000217 LN:172149 @SQ SN:chr9_gl000199_random LN:169874 @SQ SN:chrUn_gl000211 LN:166566 @SQ SN:chrUn_gl000213 LN:164239 @SQ SN:chrUn_gl000220 LN:161802 @SQ SN:chrUn_gl000218 LN:161147 @SQ SN:chr19_gl000209_random LN:159169 @SQ SN:chrUn_gl000221 LN:155397 @SQ SN:chrUn_gl000214 LN:137718 @SQ SN:chrUn_gl000228 LN:129120 @SQ SN:chrUn_gl000227 LN:128374 @SQ SN:chr1_gl000191_random LN:106433 @SQ SN:chr19_gl000208_random LN:92689 @SQ SN:chr9_gl000198_random LN:90085 @SQ SN:chr17_gl000204_random LN:81310 @SQ SN:chrUn_gl000233 LN:45941 @SQ SN:chrUn_gl000237 LN:45867 @SQ SN:chrUn_gl000230 LN:43691 @SQ SN:chrUn_gl000242 LN:43523 @SQ SN:chrUn_gl000243 LN:43341 @SQ SN:chrUn_gl000241 LN:42152 @SQ SN:chrUn_gl000236 LN:41934 @SQ SN:chrUn_gl000240 LN:41933 @SQ SN:chr17_gl000206_random LN:41001 @SQ SN:chrUn_gl000232 LN:40652 @SQ SN:chrUn_gl000234 LN:40531 @SQ SN:chr11_gl000202_random LN:40103 @SQ SN:chrUn_gl000238 LN:39939 @SQ SN:chrUn_gl000244 LN:39929 @SQ SN:chrUn_gl000248 LN:39786 @SQ SN:chr8_gl000196_random LN:38914 @SQ SN:chrUn_gl000249 LN:38502 @SQ SN:chrUn_gl000246 LN:38154 @SQ SN:chr17_gl000203_random LN:37498 @SQ SN:chr8_gl000197_random LN:37175 @SQ SN:chrUn_gl000245 LN:36651 @SQ SN:chrUn_gl000247 LN:36422 @SQ SN:chr9_gl000201_random LN:36148 @SQ SN:chrUn_gl000235 LN:34474 @SQ SN:chrUn_gl000239 LN:33824 @SQ SN:chr21_gl000210_random LN:27682 @SQ SN:chrUn_gl000231 LN:27386 @SQ SN:chrUn_gl000229 LN:19913 @SQ SN:chrM LN:16571 @SQ SN:chrUn_gl000226 LN:15008 @SQ SN:chr18_gl000207_random LN:4262 @PG ID:ngmlr PN:nextgenmap-lr VN:0.2.7 CL:ngmlr -t 10 -r hg19.fa -q reads.fa -o reads.ngmlr.hg19.bam""" self.bam_file.write(header.encode('utf-8')) self.read_infos = [] #10 reads with only primary alignment for index in range(10): read_info = self.generate_read("read{}".format(index+1), 0) self.read_infos.append(read_info) sam_entry = "\n" + "\t".join([str(el) for el in read_info]) self.bam_file.write(sam_entry.encode('utf-8')) #10 reads with primary and supplementary alignment for index in range(10, 20): primary_read_info, supplementary_read_info = self.generate_split_read_with_sa_tags("read{}".format(index+1), 0) #primary with SA tag self.read_infos.append(primary_read_info) sam_entry = "\n" + "\t".join([str(el) for el in primary_read_info]) self.bam_file.write(sam_entry.encode('utf-8')) #supplementary self.read_infos.append(supplementary_read_info) sam_entry = "\n" + "\t".join([str(el) for el in supplementary_read_info]) self.bam_file.write(sam_entry.encode('utf-8')) self.bam_file.seek(0) self.alignment_file = pysam.AlignmentFile(self.bam_file.name, "rb") def test_bam_iterator(self): bam_it = bam_iterator(self.alignment_file) num_primary_only = 0 num_primary_supplementary = 0 num_total = 0 for prim, suppl, sec in bam_it: if len(prim) == 1 and len(suppl) == 0 and len(sec) == 0: num_primary_only += 1 if len(prim) == 1 and len(suppl) == 1 and len(sec) == 0: num_primary_supplementary += 1 num_total += 1 self.assertEqual(num_total, 20) self.assertEqual(num_primary_only, 10) self.assertEqual(num_primary_supplementary, 10) def test_analyze_alignment_file_querysorted(self): arguments = ['alignment', 'myworkdir', 'mybamfile', 'mygenome'] options = parse_arguments('1.2.0', arguments) signatures, translocation_signatures_all_bnds = analyze_alignment_file_querysorted(self.alignment_file, options) self.assertEqual(len([sig for sig in signatures if sig.signature == "cigar"]), 0) def test_retrieve_supplementary_alignment_from_primary(self): alignment_it = self.alignment_file.fetch(until_eof=True) alignments = list(alignment_it) for i in range(10,30,2): primary = alignments[i] supplementary = alignments[i+1] retrieved_supplementary_alns = retrieve_other_alignments(primary, self.alignment_file) self.assertEqual(len(retrieved_supplementary_alns), 1) self.assertEqual(retrieved_supplementary_alns[0].cigarstring, supplementary.cigarstring.replace("H", "S")) self.assertEqual(retrieved_supplementary_alns[0].reference_id, supplementary.reference_id) self.assertEqual(retrieved_supplementary_alns[0].reference_start, supplementary.reference_start) self.assertEqual(retrieved_supplementary_alns[0].reference_end, supplementary.reference_end) self.assertEqual(retrieved_supplementary_alns[0].flag, supplementary.flag) self.assertEqual(retrieved_supplementary_alns[0].mapping_quality, supplementary.mapping_quality) self.assertEqual(retrieved_supplementary_alns[0].query_sequence[retrieved_supplementary_alns[0].query_alignment_start:retrieved_supplementary_alns[0].query_alignment_end], supplementary.query_sequence) self.assertEqual(retrieved_supplementary_alns[0].query_name, supplementary.query_name) def test_retrieve_primary_alignment_from_supplementary(self): alignment_it = self.alignment_file.fetch(until_eof=True) alignments = list(alignment_it) for i in range(10,30,2): primary = alignments[i] supplementary = alignments[i+1] retrieved_primary_alns = retrieve_other_alignments(supplementary, self.alignment_file) self.assertEqual(len(retrieved_primary_alns), 0)svim-2.0.0/src/tests/test_SVCandidate.py000066400000000000000000000124161406305341300201610ustar00rootroot00000000000000import unittest import unittest.mock from svim.SVCandidate import CandidateDeletion, CandidateInversion from svim.SVSignature import SignatureDeletion class DeletionCandidateTestCase(unittest.TestCase): def setUp(self): self.contig = "chr1" self.start = 1000 self.end = 2000 self.members = [SignatureDeletion(self.contig, self.start, self.end, "cigar", "read1")] self.score = 2 self.std_span = 10.2346 self.std_pos = 21.3453 self.deletion = CandidateDeletion(self.contig, self.start, self.end, self.members, self.score, self.std_span, self.std_pos) self.distance = 1000 self.size = 1000 self.other_contig = 'chr9' self.deletion2 = CandidateDeletion(self.contig, self.end+self.distance, self.end+self.distance+self.size, self.members, self.score, self.std_span, self.std_pos) self.inversion = CandidateInversion(self.contig, self.end+self.distance, self.end+self.distance+self.size, self.members, self.score, self.std_span, self.std_pos) self.deletion3 = CandidateDeletion(self.other_contig, self.end+self.distance, self.end+self.distance+self.size, self.members, self.score, self.std_span, self.std_pos) def test_get_key(self): self.assertEqual(self.deletion.get_key(), ("DEL", self.contig, self.end), 'incorrect key') def test_get_source(self): self.assertEqual(self.deletion.get_source(), (self.contig, self.start, self.end), 'incorrect source') def test_get_std_span(self): self.assertEqual(self.deletion.get_std_span(), round(self.std_span, 2)) self.assertEqual(self.deletion.get_std_span(3), round(self.std_span, 3)) def test_get_std_pos(self): self.assertEqual(self.deletion.get_std_pos(), round(self.std_pos, 2)) self.assertEqual(self.deletion.get_std_pos(3), round(self.std_pos, 3)) def test_downstream_distance_to(self): self.assertEqual(self.deletion.downstream_distance_to(self.deletion2), self.distance, 'incorrect distance') self.assertEqual(self.deletion2.downstream_distance_to(self.deletion), 0, 'distance should be 0 for upstream variant') self.assertEqual(self.deletion.downstream_distance_to(self.inversion), float('Inf'), 'distance should be Inf for other variant types') self.assertEqual(self.deletion.downstream_distance_to(self.deletion3), float('Inf'), 'distance should be Inf for variant on other contig') def test_get_vcf_entry(self): vcf_string_normal = "\t".join(["chr1", "1000", "PLACEHOLDERFORID", "N", "", "2", "PASS", "SVTYPE=DEL;END=2000;SVLEN=-1000;SUPPORT=1;STD_SPAN=10.23;STD_POS=21.35", "GT:DP:AD", "./.:.:.,."]) self.assertEqual(self.deletion.get_vcf_entry(), vcf_string_normal, 'incorrect VCF record') ref_allele = "ACGTCGGATCGCAT" alt_allele = "A" reference = unittest.mock.Mock() reference.fetch.side_effect = [ref_allele, alt_allele] vcf_string_sequence_allele = "\t".join(["chr1", "1000", "PLACEHOLDERFORID", "ACGTCGGATCGCAT", "A", "2", "PASS", "SVTYPE=DEL;END=2000;SVLEN=-1000;SUPPORT=1;STD_SPAN=10.23;STD_POS=21.35", "GT:DP:AD", "./.:.:.,."]) self.assertEqual(self.deletion.get_vcf_entry(sequence_alleles=True, reference=reference), vcf_string_sequence_allele, 'incorrect VCF record with sequence alleles') svim-2.0.0/src/tests/test_Signature.py000066400000000000000000000027121406305341300177730ustar00rootroot00000000000000import unittest from svim.SVSignature import SignatureDeletion,SignatureInsertion class TestSVSignature(unittest.TestCase): def test_accessors(self): deletion = SignatureDeletion("chr1", 100, 300, "cigar", "read1") self.assertEqual(deletion.get_source(), ("chr1", 100, 300)) self.assertEqual(deletion.get_key(), ("DEL", "chr1", 300)) def test_downstream_distance_to(self): deletion1 = SignatureDeletion("chr1", 100, 300, "cigar", "read1") deletion2 = SignatureDeletion("chr1", 450, 500, "cigar", "read2") deletion3 = SignatureDeletion("chr1", 150, 200, "cigar", "read3") deletion4 = SignatureDeletion("chr2", 350, 400, "cigar", "read3") insertion = SignatureInsertion("chr1", 150, 200, "cigar", "read2", "ACGTAGTAGCTAGCTTTGCTAGCATTAGCGACTGCTTACGCAGCTCCCTA") self.assertEqual(deletion1.downstream_distance_to(deletion2), 150) self.assertEqual(deletion1.downstream_distance_to(deletion3), 0) self.assertEqual(deletion1.downstream_distance_to(deletion4), float("Inf")) self.assertEqual(deletion1.downstream_distance_to(insertion), float("Inf")) def test_as_string(self): deletion1 = SignatureDeletion("chr1", 100, 300, "cigar", "read1") self.assertEqual(deletion1.as_string(), "chr1\t100\t300\tDEL;cigar\tread1") self.assertEqual(deletion1.as_string(":"), "chr1:100:300:DEL;cigar:read1") if __name__ == '__main__': unittest.main() svim-2.0.0/src/tests/test_clustering.py000066400000000000000000000054611406305341300202150ustar00rootroot00000000000000import unittest import tempfile from random import uniform, choices from svim.SVIM_clustering import form_partitions, partition_and_cluster from svim.SVSignature import SignatureDeletion from svim.SVIM_input_parsing import parse_arguments class TestSVIMClustering(unittest.TestCase): def setUp(self): self.signatures = [] #group 0 for i in range(10): center = 100000 + uniform(-100, 100) half_span = 1000 + uniform(-100, 100) new_sig = SignatureDeletion("chr1", center - half_span, center + half_span, "cigar", str(i)) self.signatures.append(new_sig) #group 1 for i in range(10, 20): center = 200000 + uniform(-100, 100) half_span = 1000 + uniform(-100, 100) new_sig = SignatureDeletion("chr1", center - half_span, center + half_span, "cigar", str(i)) self.signatures.append(new_sig) #group 2 for i in range(20, 30): center = 100000 + uniform(-100, 100) half_span = 2000 + uniform(-100, 100) new_sig = SignatureDeletion("chr1", center - half_span, center + half_span, "cigar", str(i)) self.signatures.append(new_sig) self.temp_genome = tempfile.NamedTemporaryFile(mode="w") self.temp_genome.write('>chr1\n') self.temp_genome.write("".join(choices(["A", "C", "G", "T"], k=300000))) self.options = parse_arguments('1.5.0', ['alignment', 'myworkdir', 'mybamfile', self.temp_genome.name]) def tearDown(self): self.temp_genome.close() def test_partitioning(self): partitions = form_partitions(self.signatures, 100) self.assertEqual(len(partitions), 2) for partition in partitions: groups_in_partition = set([int(member.read) // 10 for member in partition]) self.assertTrue(groups_in_partition in [set([0,2]), set([1])]) def test_partitioning_large_distance(self): partitions = form_partitions(self.signatures, 100000) self.assertEqual(len(partitions), 1) groups_in_partition = set([int(member.read) // 10 for member in partitions[0]]) self.assertEqual(groups_in_partition, set([0,1,2])) def test_clustering(self): clusters = partition_and_cluster(self.signatures, options=self.options, type="deleted regions") self.assertEqual(len(clusters), 3) for cluster in clusters: self.assertEqual(len(set([int(member.read) // 10 for member in cluster.members])), 1) def test_scores(self): clusters = partition_and_cluster(self.signatures, options=self.options, type="deleted regions") for cluster in clusters: self.assertGreaterEqual(cluster.score, 10) self.assertLessEqual(cluster.score, 10 + 20/8) if __name__ == '__main__': unittest.main() svim-2.0.0/src/tests/test_consensus.py000066400000000000000000000035601406305341300200540ustar00rootroot00000000000000import unittest from unittest.mock import Mock from random import choices from svim.SVIM_COMBINE import generate_insertion_consensus from svim.SVSignature import SignatureInsertion, SignatureClusterUniLocal class TestSVIMConsensus(unittest.TestCase): def setUp(self): self.nucleotides = ["A", "C", "G", "T"] #Prepare reference self.genome = "A"*100 + "C"*100 self.reference = Mock() self.reference.fetch = lambda contig, start, end: self.genome[start:end] def test_skipping(self): insertion_signatures = [] for i in range(10): insertion_signatures.append(SignatureInsertion("chr1", 100, 100100, "suppl", "read"+str(i), "".join(choices(self.nucleotides, k=100000)))) cluster_long = SignatureClusterUniLocal("chr1", 100, 100100, 10, 10, insertion_signatures, "INS", 0, 0) status_code, _ = generate_insertion_consensus(cluster_long, self.reference, maximum_haplotype_length = 10000) self.assertEqual(status_code, 1) def test_simple(self): #Prepare cluster insertion_sequence = "".join(choices(self.nucleotides, k=100)) insertion_signatures = [] for i in range(10): insertion_signatures.append(SignatureInsertion("chr1", 100, 200, "cigar", "read"+str(i), insertion_sequence)) cluster_simple = SignatureClusterUniLocal("chr1", 100, 200, 10, 10, insertion_signatures, "INS", 0, 0) status_code, result = generate_insertion_consensus(cluster_simple, self.reference) self.assertEqual(status_code, 0) realigned_insertion_start, realigned_insertion_size, insertion_consensus = result self.assertEqual(realigned_insertion_start, 100) self.assertEqual(realigned_insertion_size, 100) self.assertEqual(insertion_consensus, insertion_sequence) if __name__ == '__main__': unittest.main() svim-2.0.0/src/tests/test_input_parsing.py000066400000000000000000000023151406305341300207130ustar00rootroot00000000000000import unittest import tempfile from svim.SVIM_input_parsing import guess_file_type, parse_arguments class TestInputParsing(unittest.TestCase): def test_guess_file_type(self): fasta_paths = ["/test/path/test.file.fa", "/test/path/test.file.fasta", "/test/path/test.file.FA"] fastq_paths = ["/test/path/test.file.fq", "/test/path/test.file.FQ", "/test/path/test.file.fastq"] for p in fasta_paths: self.assertEqual(guess_file_type(p), "fasta") zipped_path1 = p + ".gz" zipped_path2 = p + ".gzip" self.assertEqual(guess_file_type(zipped_path1), "fasta_gzip") self.assertEqual(guess_file_type(zipped_path2), "fasta_gzip") list_path = p + ".fn" self.assertEqual(guess_file_type(list_path), "list") for p in fastq_paths: self.assertEqual(guess_file_type(p), "fastq") zipped_path1 = p + ".gz" zipped_path2 = p + ".gzip" self.assertEqual(guess_file_type(zipped_path1), "fastq_gzip") self.assertEqual(guess_file_type(zipped_path2), "fastq_gzip") list_path = p + ".fn" self.assertEqual(guess_file_type(list_path), "list") svim-2.0.0/src/tests/test_inter.py000066400000000000000000000007071406305341300171550ustar00rootroot00000000000000import unittest from svim.SVIM_inter import is_similar class TestSVIMInter(unittest.TestCase): def test_is_similar(self): self.assertFalse(is_similar("chrI", 0, 100, "chrII", 0, 100)) self.assertTrue(is_similar("chrI", 0, 100, "chrI", 0, 100)) self.assertTrue(is_similar("chrI", 0, 100, "chrI", 10, 90)) self.assertFalse(is_similar("chrI", 100, 105, "chrI", 21, 100)) if __name__ == '__main__': unittest.main() svim-2.0.0/src/tests/test_intra.py000066400000000000000000000017421406305341300171510ustar00rootroot00000000000000import unittest from svim.SVIM_intra import analyze_cigar_indel class TestSVIMIntra(unittest.TestCase): def test_analyze_cigar_indel(self): tuples = [(5,10), (4,20), (0,10), (7,10), (8,5), (0,5), (1,50), (0,30), (4,25), (5,15)] indels = [(30, 50, 50, "INS")] self.assertEqual(analyze_cigar_indel(tuples, 30), indels) tuples = [(5,10), (4,20), (0,30), (2,50), (0,30), (4,25), (5,15)] indels = [(30, 50, 50, "DEL")] self.assertEqual(analyze_cigar_indel(tuples, 30), indels) tuples = [(5,10), (4,20), (0,30), (2,40), (1,50), (0,30), (4,25), (5,15)] indels = [(30, 50, 40, "DEL"), (70, 50, 50, "INS")] self.assertEqual(analyze_cigar_indel(tuples, 30), indels) tuples = [(5,10), (4,20), (0,30), (1,40), (2,50), (0,30), (4,25), (5,15)] indels = [(30, 50, 40, "INS"), (30, 90, 50, "DEL")] self.assertEqual(analyze_cigar_indel(tuples, 30), indels) if __name__ == '__main__': unittest.main() svim-2.0.0/src/tests/test_satag.py000066400000000000000000000034421406305341300171320ustar00rootroot00000000000000import unittest import pysam import os from svim.SVIM_COLLECT import retrieve_other_alignments class TestSAExtraction(unittest.TestCase): def setUp(self): TESTDATA_FILENAME = os.path.join(os.path.dirname(__file__), 'chimeric_read.sam') self.samfile = pysam.AlignmentFile(TESTDATA_FILENAME, "r") self.alignments = list(self.samfile.fetch(until_eof=True)) def test_fetch(self): self.assertEqual(len(self.alignments), 4) def test_satag_length(self): primary = self.alignments[0] supplementary_alns = retrieve_other_alignments(primary, self.samfile) self.assertEqual(len(supplementary_alns), 3) def test_satag_extraction_complete(self): primary = self.alignments[0] supplementary_alns = retrieve_other_alignments(primary, self.samfile) for index, aln in enumerate(supplementary_alns): self.assertEqual(aln.cigarstring, self.alignments[index+1].cigarstring) self.assertEqual(aln.reference_id, self.alignments[index+1].reference_id) self.assertEqual(aln.reference_start, self.alignments[index+1].reference_start) self.assertEqual(aln.reference_end, self.alignments[index+1].reference_end) self.assertEqual(aln.flag, self.alignments[index+1].flag) self.assertEqual(aln.mapping_quality, self.alignments[index+1].mapping_quality) self.assertEqual(aln.query_sequence, self.alignments[index+1].query_sequence) self.assertEqual(aln.query_name, self.alignments[index+1].query_name) self.assertEqual(aln.query_alignment_start, self.alignments[index+1].query_alignment_start) self.assertEqual(aln.query_alignment_end, self.alignments[index+1].query_alignment_end) if __name__ == '__main__': unittest.main()