pax_global_header00006660000000000000000000000064144555306170014524gustar00rootroot0000000000000052 comment=40b9fb2f9a3c8dac498d5de6241af4f76146eed8 lambda-lambda-v2.0.1/000077500000000000000000000000001445553061700143505ustar00rootroot00000000000000lambda-lambda-v2.0.1/.gitmodules000066400000000000000000000001071445553061700165230ustar00rootroot00000000000000[submodule "seqan"] path = include/seqan url = ../../seqan/seqan.git lambda-lambda-v2.0.1/.travis.yml000066400000000000000000000047101445553061700164630ustar00rootroot00000000000000sudo: false language: cpp matrix: include: - os: linux compiler: gcc-4.9 addons: apt: sources: ['ubuntu-toolchain-r-test'] packages: ['g++-4.9', 'cmake', 'cmake-data', 'zlib1g-dev', 'libbz2-dev', 'libboost-dev', 'python', 'python-nose', 'python-jinja2', 'python-pip'] install: export CXX="g++-4.9" env: CMAKE_ARGS="-DLAMBDA_STATIC_BUILD=1" - os: linux compiler: gcc-5 addons: apt: sources: ['ubuntu-toolchain-r-test'] packages: ['g++-5', 'cmake', 'cmake-data', 'zlib1g-dev', 'libbz2-dev', 'libboost-dev', 'python', 'python-nose', 'python-jinja2', 'python-pip'] install: export CXX="g++-5" env: CMAKE_ARGS="-DLAMBDA_MULTIOPT_BUILD=1" - os: linux compiler: gcc-7 addons: apt: sources: ['ubuntu-toolchain-r-test'] packages: ['g++-7', 'cmake', 'cmake-data', 'zlib1g-dev', 'libbz2-dev', 'libboost-dev', 'python', 'python-nose', 'python-jinja2', 'python-pip'] install: export CXX="g++-7" env: CMAKE_ARGS="-DCMAKE_CXX_FLAGS=-std=c++17" - os: linux compiler: clang-3.9 addons: apt: sources: ['ubuntu-toolchain-r-test', 'llvm-toolchain-trusty-3.9'] packages: ['clang-3.9', 'g++-7', 'cmake', 'cmake-data', 'zlib1g-dev', 'libbz2-dev', 'libboost-dev', 'python', 'python-nose', 'python-jinja2', 'python-pip'] # g++ required for newer libstdc++ install: export CXX="clang++-3.9" - os: linux compiler: clang-5 addons: apt: sources: ['ubuntu-toolchain-r-test'] packages: ['clang-5.0', 'g++-7', 'cmake', 'cmake-data', 'zlib1g-dev', 'libbz2-dev', 'libboost-dev', 'python', 'python-nose', 'python-jinja2', 'python-pip'] # g++ required for newer libstdc++ install: export CXX="clang++-5.0" - os: osx compiler: gcc-4.9 before_install: - brew update - brew install gcc@4.9 install: export CXX="g++-4.9" env: CMAKE_ARGS="-DLAMBDA_FASTBUILD=1 -DLAMBDA_STATIC_BUILD=1" - os: osx osx_image: xcode10.1 compiler: gcc-7 before_install: - brew update - brew install gcc@7 install: export CXX="g++-7" env: CMAKE_ARGS="-DLAMBDA_FASTBUILD=1" script: - mkdir -p build && cd build - cmake .. -DLAMBDA_NATIVE_BUILD=0 -DLAMBDA_COMPILE_THREADS=2 ${CMAKE_ARGS} - travis_wait make VERBOSE=1 # need to prefix with travis_wait because it might take > 10min - ctest . lambda-lambda-v2.0.1/CMakeLists.txt000066400000000000000000000046731445553061700171220ustar00rootroot00000000000000# =========================================================================== # Lambda # =========================================================================== cmake_minimum_required (VERSION 3.0.0) string(ASCII 27 Esc) set(ColourBold "${Esc}[1m") set(ColourReset "${Esc}[m") set(ColourRed "${Esc}[31m") message ("${ColourBold}Compiler Detection${ColourReset}") project (lambda2 CXX) # ---------------------------------------------------------------------------- # Make "Release" the default cmake build type # ---------------------------------------------------------------------------- if (NOT CMAKE_BUILD_TYPE) set (CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo" FORCE) endif () # ---------------------------------------------------------------------------- # Begin of dependency detection # ---------------------------------------------------------------------------- message ("\n${ColourBold}Dependency detection${ColourReset}") if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/include/seqan/include/seqan/version.h") set (CMAKE_INCLUDE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/include/seqan/include ${CMAKE_INCLUDE_PATH}) set (CMAKE_PREFIX_PATH ${CMAKE_CURRENT_SOURCE_DIR}/include/seqan/util/cmake ${CMAKE_PREFIX_PATH}) set (CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/include/seqan/util/cmake ${CMAKE_MODULE_PATH}) message (STATUS "Found a local SeqAn library provided with the Lambda source code.") message ( " This will be preferred over system global headers.") endif () # ---------------------------------------------------------------------------- # Add Lambda targets # ---------------------------------------------------------------------------- add_subdirectory(src) # ---------------------------------------------------------------------------- # Warn if cmake build type is not "Release" # ---------------------------------------------------------------------------- if (NOT CMAKE_BUILD_TYPE STREQUAL Release) message (STATUS "${ColourRed}CMAKE_BUILD_TYPE is not \"Release\", your binaries will be slow.${ColourReset}") endif () # ---------------------------------------------------------------------------- # Add Tests # ---------------------------------------------------------------------------- # message ("\n${ColourBold}Setting up unit tests${ColourReset}") # add_subdirectory(tests) lambda-lambda-v2.0.1/INFO000066400000000000000000000012241445553061700150250ustar00rootroot00000000000000Name: lambda Author: Hannes Hauswedell Maintainer: Hannes Hauswedell License: AGPL v3 Copyright: 2013-2019, Hannes Hauswedell; 2016-2019 Knut Reinert, FU-Berlin Status: under development Description: Lambda is a biological sequence aligner optimized for many query sequences and searches in protein space. It is highly compatible to BLAST (bitscore and e-value statistics, tab separated and verbose output formats), much faster than BLAST and many other comparable tools and supports many other input and output formats, including standards- conformant .sam and .bam and many compression types lambda-lambda-v2.0.1/LICENSE-AGPL3.rst000066400000000000000000001037741445553061700170040ustar00rootroot00000000000000GNU Affero General Public License ================================= *Version 3, 19 November 2007* *Copyright © 2007 Free Software Foundation, In* Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble -------- The GNU Affero General Public License is a free, copyleft license for software and other kinds of works, specifically designed to ensure cooperation with the community in the case of network server software. The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, our General Public Licenses are intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things. Developers that use our General Public Licenses protect your rights with two steps: **(1)** assert copyright on the software, and **(2)** offer you this License which gives you legal permission to copy, distribute and/or modify the software. A secondary benefit of defending all users' freedom is that improvements made in alternate versions of the program, if they receive widespread use, become available for other developers to incorporate. Many developers of free software are heartened and encouraged by the resulting cooperation. However, in the case of software used on network servers, this result may fail to come about. The GNU General Public License permits making a modified version and letting the public access it on a server without ever releasing its source code to the public. The GNU Affero General Public License is designed specifically to ensure that, in such cases, the modified source code becomes available to the community. It requires the operator of a network server to provide the source code of the modified version running there to the users of that server. Therefore, public use of a modified version, on a publicly accessible server, gives the public access to the source code of the modified version. An older license, called the Affero General Public License and published by Affero, was designed to accomplish similar goals. This is a different license, not a version of the Affero GPL, but Affero has released a new version of the Affero GPL which permits relicensing under this license. The precise terms and conditions for copying, distribution and modification follow. TERMS AND CONDITIONS -------------------- 0. Definitions ~~~~~~~~~~~~~~ "This License" refers to version 3 of the GNU Affero General Public License. "Copyright" also means copyright-like laws that apply to other kinds of works, such as semiconductor masks. "The Program" refers to any copyrightable work licensed under this License. Each licensee is addressed as "you". "Licensees" and "recipients" may be individuals or organizations. To "modify" a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a "modified version" of the earlier work or a work "based on" the earlier work. A "covered work" means either the unmodified Program or a work based on the Program. To "propagate" a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well. To "convey" a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying. An interactive user interface displays "Appropriate Legal Notices" to the extent that it includes a convenient and prominently visible feature that **(1)** displays an appropriate copyright notice, and **(2)** tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion. 1. Source Code ~~~~~~~~~~~~~~ The "source code" for a work means the preferred form of the work for making modifications to it. "Object code" means any non-source form of a work. A "Standard Interface" means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language. The "System Libraries" of an executable work include anything, other than the work as a whole, that **(a)** is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and **(b)** serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A "Major Component", in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it. The "Corresponding Source" for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work. The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source. The Corresponding Source for a work in source code form is that same work. 2. Basic Permissions ~~~~~~~~~~~~~~~~~~~~ All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law. You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you. Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary. 3. Protecting Users' Legal Rights From Anti-Circumvention Law ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures. When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures. 4. Conveying Verbatim Copies ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program. You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee. ### 5. Conveying Modified Source Versions You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions: * **a)** The work must carry prominent notices stating that you modified it, and giving a relevant date. * **b)** The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to "keep intact all notices". * **c)** You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it. * **d)** If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so. A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an "aggregate" if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate. 6. Conveying Non-Source Forms ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways: * **a)** Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange. * **b)** Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either **(1)** a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or **(2)** access to copy the Corresponding Source from a network server at no charge. * **c)** Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b. * **d)** Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements. * **e)** Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d. A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work. A "User Product" is either **(1)** a "consumer product", which means any tangible personal property which is normally used for personal, family, or household purposes, or **(2)** anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, "normally used" refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product. "Installation Information" for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made. If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM). The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network. Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying. 7. Additional Terms ~~~~~~~~~~~~~~~~~~~ "Additional permissions" are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions. When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission. Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms: * **a)** Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or * **b)** Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or * **c)** Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or * **d)** Limiting the use for publicity purposes of names of licensors or authors of the material; or * **e)** Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or * **f)** Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors. All other non-permissive additional terms are considered "further restrictions" within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying. If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms. Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way. 8. Termination ~~~~~~~~~~~~~~ You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11). However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated **(a)** provisionally, unless and until the copyright holder explicitly and finally terminates your license, and **(b)** permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation. Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice. Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10. 9. Acceptance Not Required for Having Copies ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so. 10. Automatic Licensing of Downstream Recipients ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License. An "entity transaction" is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts. You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it. 11. Patents ~~~~~~~~~~~ A "contributor" is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's "contributor version". A contributor's "essential patent claims" are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, "control" includes the right to grant patent sublicenses in a manner consistent with the requirements of this License. Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version. In the following three paragraphs, a "patent license" is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To "grant" such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party. If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either **(1)** cause the Corresponding Source to be so available, or **(2)** arrange to deprive yourself of the benefit of the patent license for this particular work, or **(3)** arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. "Knowingly relying" means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid. If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it. A patent license is "discriminatory" if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license **(a)** in connection with copies of the covered work conveyed by you (or copies made from those copies), or **(b)** primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007. Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law. 12. No Surrender of Others' Freedom ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program. 13. Remote Network Interaction; Use with the GNU General Public License ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Notwithstanding any other provision of this License, if you modify the Program, your modified version must prominently offer all users interacting with it remotely through a computer network (if your version supports such interaction) an opportunity to receive the Corresponding Source of your version by providing access to the Corresponding Source from a network server at no charge, through some standard or customary means of facilitating copying of software. This Corresponding Source shall include the Corresponding Source for any work covered by version 3 of the GNU General Public License that is incorporated pursuant to the following paragraph. Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the work with which it is combined will remain governed by version 3 of the GNU General Public License. 14. Revised Versions of this License ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The Free Software Foundation may publish revised and/or new versions of the GNU Affero General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU Affero General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU Affero General Public License, you may choose any version ever published by the Free Software Foundation. If the Program specifies that a proxy can decide which future versions of the GNU Affero General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program. Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version. 15. Disclaimer of Warranty ~~~~~~~~~~~~~~~~~~~~~~~~~~ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. Limitation of Liability ~~~~~~~~~~~~~~~~~~~~~~~~~~~ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 17. Interpretation of Sections 15 and 16 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee. *END OF TERMS AND CONDITIONS* How to Apply These Terms to Your New Programs --------------------------------------------- If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. | | Copyright (C) | | This program is free software: you can redistribute it and/or modify | it under the terms of the GNU Affero General Public License as published by | the Free Software Foundation, either version 3 of the License, or | (at your option) any later version. | | This program is distributed in the hope that it will be useful, | but WITHOUT ANY WARRANTY; without even the implied warranty of | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | GNU Affero General Public License for more details. | | You should have received a copy of the GNU Affero General Public License | along with this program. If not, see . Also add information on how to contact you by electronic and paper mail. If your software can interact with users remotely through a computer network, you should also make sure that it provides a way for users to get its source. For example, if your program is a web application, its interface could display a "Source" link that leads users to an archive of the code. There are many ways you could offer source, and different solutions will be better for different programs; see section 13 for the specific requirements. You should also get your employer (if you work as a programmer) or school, if any, to sign a "copyright disclaimer" for the program, if necessary. For more information on this, and how to apply and follow the GNU AGPL, see . lambda-lambda-v2.0.1/LICENSE-BSD.rst000066400000000000000000000030471445553061700165760ustar00rootroot00000000000000BSD-License (3-clause) ====================== | Copyright (c) 2016-2019, Knut Reinert, Freie Universität Berlin | All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Knut Reinert or the FU Berlin nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. lambda-lambda-v2.0.1/LICENSE.rst000066400000000000000000000027311445553061700161670ustar00rootroot00000000000000lambda copyright ================ :: Copyright (c) 2013-2019, Hannes Hauswedell All rights reserved. Lambda is *free software*: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Lambda is distributed in the hope that it will be useful, but **without any warranty**; without even the implied warranty of **merchantability** or **fitness for a particular purpose**. See the file `LICENSE-AGPL3.rst <./LICENSE-AGPL3.rst>`__ or http://www.gnu.org/licenses/ for a full text of the license and the rights and obligations implied. Some of the contributions to Lambda are alternatively or additionally :: Copyright (c) 2016-2019, Knut Reinert and Freie Universität Berlin These are covered by the three clause BSD license as can be found in the file `LICENSE-BSD.rst <./LICENSE-BSD.rst>`__. In cases of doubt the terms of both licenses apply. submodules ========== When Lambda is distributed in binary form or when Lambda is distributed in source form including its submodules the following additional license terms apply: SeqAn copyright --------------- :: Copyright (c) 2006-2019, Knut Reinert and Freie Universität Berlin SeqAn is published under the terms of the three clause BSD license as can be found in the file `include/seqan/LICENSE <./include/seqan/LICENSE>`__ or printed by the parameter ``--copyright``. lambda-lambda-v2.0.1/README.rst000066400000000000000000000306711445553061700160460ustar00rootroot00000000000000Lambda: the Local Aligner for Massive Biological DatA ----------------------------------------------------- Lambda is a local aligner optimized for many query sequences and searches in protein space. It... * is highly compatible to BLAST (bitscore and e-value statistics, tab separated and verbose output formats) * is much faster than BLAST and many other comparable tools * supports many other input and output formats, including standards-conformant ``.sam`` and ``.bam`` and many compression types * has special features for species annotation and taxonomic analysis * is well-documented and easy to use (e.g. provides progress-bars and memory usage estimates) downloads and installation -------------------------- +------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+ | **Executables** | +------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+ | .. image:: https://raw.githubusercontent.com/seqan/lambda/gh-pages/images_readme/appbar.disk.download.png | Pre-built executables for GNU/Linux, Mac and FreeBSD are available from the | | :alt: Download Executables | `releases page `__. | | :target: https://github.com/seqan/lambda/releases | | | :width: 76px | | +------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+ | **Source code** | +------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+ | .. image:: https://raw.githubusercontent.com/seqan/lambda/gh-pages/images_readme/appbar.column.three.png | You can also build lambda from source which will result in binaries optimized for your | | :alt: Build from source | specific system (and thus faster). For instructions, please see the | | :target: https://github.com/seqan/lambda/wiki | `wiki `__. | | :width: 76px | | +------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+ usage instructions ------------------ Before you can search, you need to have an index. You can 1. download and unzip a pre-built index from the `wiki `__; or 2. index one yourself (this can take some time but only has to be done once): :: % bin/lambda2 mkindexp -d db.fasta *(in case you want to create a nucleotide index, instead use ``mkindexn``)* After that running Lambda is as simple as :: % bin/lambda2 searchp -q query.fasta -i db.fasta.lambda *(in case you want to perform a nucleotide search, instead use ``searchn``)* For a list of options, see the help pages: :: % bin/lambda2 --help % bin/lambda2 COMMAND --help Advanced options are available via ``--full-help`` or the man pages, and more documentation is available in the `wiki `__. authorship and copyright ------------------------ Lambda is being developed by `Hannes Hauswedell `__, but it incorporates a lot of work from other members of the `SeqAn project `__. +------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+ | **Please always cite the publication, also if using Lambda in comparisons and pipelines** | +------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+ | .. image:: https://raw.githubusercontent.com/seqan/lambda/gh-pages/images_readme/appbar.book.hardcover.open.png | *Lambda: the local aligner for massive biological data*; | | :alt: Please cite | Hannes Hauswedell, Jochen Singer, Knut Reinert; | | :target: http://bioinformatics.oxfordjournals.org/content/30/17/i349.abstract | `Bioinformatics 2014 30 (17): i349-i355 `__; | | :width: 76px | doi: 10.1093/bioinformatics/btu439 | +------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+ | **Please respect the license of the software** | +------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+ | .. image:: https://raw.githubusercontent.com/seqan/lambda/gh-pages/images_readme/copyleft.png | Lambda is Free and open source software, so you can use it for any purpose, free of charge. | | :alt: Respect the license | However certain conditions apply when you (re-)distribute and/or modify Lambda, please respect the | | :target: https://github.com/seqan/lambda/blob/master/LICENSE.rst | `license `__. | | :width: 76px | | +------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+ feedback & updates ------------------ +-------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+ | .. image:: https://raw.githubusercontent.com/seqan/lambda/gh-pages/images_readme/appbar.social.github.octocat.png | You can ask questions and report bugs on the `github tracker `__ . | | :alt: GitHub | Please also `subscribe `__ and/or star us! | | :target: https://github.com/seqan/lambda/issues | | | :width: 76px | | +-------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+ | .. image:: https://raw.githubusercontent.com/seqan/lambda/gh-pages/images_readme/appbar.email.png | To stay up to date via e-mail, please subscribe to the | | :alt: Newsletter | `newsletter `__. There is on average less than one e-mail | | :target: https://lists.fu-berlin.de/listinfo/lambda-users | per month. | | :width: 76px | | +-------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+ | .. image:: https://raw.githubusercontent.com/seqan/lambda/gh-pages/images_readme/appbar.social.twitter.png | You can also follow SeqAn on `twitter `__ to receive updates on Lambda. | | :alt: Newsletter | | | :target: https://twitter.com/SeqAnLib | | | :width: 76px | | +-------------------------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------------------------------------------+ *icons on this page by Austin Andrews / https://github.com/Templarian/WindowsIcons* lambda-lambda-v2.0.1/bin/000077500000000000000000000000001445553061700151205ustar00rootroot00000000000000lambda-lambda-v2.0.1/bin/lambda2.in000066400000000000000000000033001445553061700167460ustar00rootroot00000000000000#!/bin/sh CURDIR="$(cd "$(dirname "$0")" && pwd -P)/" SYSTEM_BIN_DIR="@CMAKE_INSTALL_FULL_BINDIR@/" if [ "${CURDIR}" = "${SYSTEM_BIN_DIR}" ]; then # we are installed PREFIX="@CMAKE_INSTALL_FULL_LIBEXECDIR@" else # we are just unpacked PREFIX="${CURDIR}/../@CMAKE_INSTALL_LIBEXECDIR@" fi BIN_FAILSAFE=${PREFIX}/lambda2 BIN_SSE4=${PREFIX}/lambda2-sse4 # failsafe is default BIN=${BIN_FAILSAFE} case $(uname) in "Linux") grep -E "flags.* popcnt " -q /proc/cpuinfo 2>/dev/null && \ grep -E "flags.* sse4_1 " -q /proc/cpuinfo 2>/dev/null && \ grep -E "flags.* sse4_2 " -q /proc/cpuinfo 2>/dev/null && \ export BIN=${BIN_SSE4} ;; "FreeBSD") grep -E "Feature.*POPCNT" -q /var/run/dmesg.boot 2>/dev/null && \ grep -E "Feature.*SSE4\.1" -q /var/run/dmesg.boot 2>/dev/null && \ grep -E "Feature.*SSE4\.2" -q /var/run/dmesg.boot 2>/dev/null && \ export BIN=${BIN_SSE4} ;; ## OpenBSD doesn't yet support POPCNT software side (although it does detect the cpu feature) # "OpenBSD") # grep -E "cpu.*POPCNT" -q /var/run/dmesg.boot 2>/dev/null && \ # grep -E "cpu.*SSE4\.1" -q /var/run/dmesg.boot 2>/dev/null && \ # grep -E "cpu.*SSE4\.2" -q /var/run/dmesg.boot 2>/dev/null && \ # export BIN=${BIN_SSE4} # ;; "Darwin") sysctl machdep.cpu.features 2>&1 | grep -E "POPCNT" -q 2>/dev/null && \ sysctl machdep.cpu.features 2>&1 | grep -E "SSE4\.1" -q 2>/dev/null && \ sysctl machdep.cpu.features 2>&1 | grep -E "SSE4\.2" -q 2>/dev/null && \ export BIN=${BIN_SSE4} ;; esac exec "${BIN}" "${@}" lambda-lambda-v2.0.1/include/000077500000000000000000000000001445553061700157735ustar00rootroot00000000000000lambda-lambda-v2.0.1/include/seqan/000077500000000000000000000000001445553061700171025ustar00rootroot00000000000000lambda-lambda-v2.0.1/src/000077500000000000000000000000001445553061700151375ustar00rootroot00000000000000lambda-lambda-v2.0.1/src/CMakeLists.txt000066400000000000000000000353171445553061700177100ustar00rootroot00000000000000# =========================================================================== # SeqAn - The Library for Sequence Analysis # =========================================================================== # File: /sandbox/h4nn3s/apps/lambda/CMakeLists.txt # # CMakeLists.txt file for lambda. # =========================================================================== # ---------------------------------------------------------------------------- # App version # ---------------------------------------------------------------------------- # change this after every release set (SEQAN_APP_VERSION_MAJOR "2") set (SEQAN_APP_VERSION_MINOR "0") set (SEQAN_APP_VERSION_PATCH "1") # don't change the following set (SEQAN_APP_VERSION "${SEQAN_APP_VERSION_MAJOR}.${SEQAN_APP_VERSION_MINOR}.${SEQAN_APP_VERSION_PATCH}") # adapt when necessary set (MINIMUM_SEQAN_VERSION "2.3.1") # ---------------------------------------------------------------------------- # Dependencies (continued) # ---------------------------------------------------------------------------- # Search SeqAn and select dependencies. find_package(OpenMP QUIET) find_package(ZLIB QUIET) find_package(BZip2 QUIET) find_package(SeqAn QUIET REQUIRED CONFIG) message(STATUS "These dependencies were found:") message( " OPENMP ${OPENMP_FOUND} ${OpenMP_CXX_FLAGS}") message( " ZLIB ${ZLIB_FOUND} ${ZLIB_VERSION_STRING}") message( " BZIP2 ${BZIP2_FOUND} ${BZIP2_VERSION_STRING}") message( " SEQAN ${SEQAN_FOUND} ${SEQAN_VERSION_STRING}") # Warn if OpenMP was not found. if (NOT OPENMP_FOUND) message (WARNING "WARNING WARNING WARNING\nWARNING: OpenMP not found. Lambda will be built without multi-threading! " "This is probably not what you want! Use GCC >= 4.9.1, Clang >= 3.8.0 or ICC >= 16.0.2\nWARNING WARNING WARNING") endif (NOT OPENMP_FOUND) # Warn if Zlib was not found. if (NOT ZLIB_FOUND) message (WARNING "WARNING: Zlib not found. Building lambda without support for gzipped input and output (this includes support for .bam).") endif (NOT ZLIB_FOUND) # Warn if BZip2 was not found. if (NOT BZIP2_FOUND) message (WARNING "WARNING: BZip2 not found. Building lambda without support for bzipped input and output.") endif (NOT BZIP2_FOUND) if (CMAKE_CXX_COMPILER_ID MATCHES "GNU") if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.9.1) message (FATAL_ERROR "Your GCC version is too old. Minimum version is GCC-4.9.1!") return () endif () elseif (CMAKE_CXX_COMPILER_ID MATCHES "Clang") if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 3.6) message (FATAL_ERROR "Your Clang version is too old. Please upgrade to 3.8.0 or use GCC.") return() elseif (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 3.8) message (WARNING "Your Clang version is too old, you will not have parallelism! Upgrade to 3.8.0 or newer.") endif () elseif (CMAKE_CXX_COMPILER_ID MATCHES "Intel") if (CMAKE_CXX_COMPILER_VERSION VERSION_LESS 17) message(WARNING "Your Intel Compiler version is too old. Please upgrade to 17.0.0 or newer!") endif () else () message(WARNING "Unknown compiler, you are own your own!") endif () if (SEQAN_VERSION_STRING VERSION_LESS "${MINIMUM_SEQAN_VERSION}") message (FATAL_ERROR "The minimum SeqAn verison required is ${MINIMUM_SEQAN_VERSION}!") return () endif () message(STATUS "The requirements were met.") # ---------------------------------------------------------------------------- # App-Level Configuration # ---------------------------------------------------------------------------- message ("\n${ColourBold}Build configuration${ColourReset}") message (STATUS "LAMBDA version is: ${SEQAN_APP_VERSION}") option (LAMBDA_FASTBUILD "Build only blastp and blastx modes (speeds up build)." OFF) option (LAMBDA_LINGAPS_OPT "Add optimized codepaths for linear gap costs (increases bin size and compile time)." OFF) option (LAMBDA_LONG_PROTEIN_SUBJ_SEQS "Make max protein sequence length == 4.3billion instead of 65,535. INVALIDATES INDEXS!" OFF) option (LAMBDA_MMAPPED_DB "Use mmapped access to the database." OFF) option (LAMBDA_NATIVE_BUILD "Architecture-specific optimizations, i.e. g++ -march=native." ON) option (LAMBDA_MULTIOPT_BUILD "Build and install multiple binaries with different optimisation levels + a dispatcher." OFF) option (LAMBDA_STATIC_BUILD "Include all libraries in the binaries." OFF) if (LAMBDA_FASTBUILD) add_definitions (-DFASTBUILD=1) endif (LAMBDA_FASTBUILD) if (LAMBDA_NATIVE_BUILD) add_definitions (-DLAMBDA_NATIVE_BUILD=1) set (SEQAN_CXX_FLAGS "${SEQAN_CXX_FLAGS} -march=native") if (CMAKE_CXX_COMPILER_ID MATCHES "Intel") set (SEQAN_CXX_FLAGS "${SEQAN_CXX_FLAGS} -xHOST -ipo -no-prec-div -fp-model fast=2") endif (CMAKE_CXX_COMPILER_ID MATCHES "Intel") endif (LAMBDA_NATIVE_BUILD) if (LAMBDA_STATIC_BUILD) add_definitions (-DLAMBDA_STATIC_BUILD=1) set(CMAKE_FIND_LIBRARY_SUFFIXES ".a") # apple does not support fully static builds, but at least libgcc and libstdc++ if (APPLE) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libgcc -static-libstdc++") message (WARNING "WARNING: Builds on Mac are never fully static.") else (APPLE) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static") endif (APPLE) # on linux cmake adds -rdynamic automatically which clang can't handle in static builds if (CMAKE_SYSTEM_NAME MATCHES "Linux") SET(CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS "") endif (CMAKE_SYSTEM_NAME MATCHES "Linux") endif (LAMBDA_STATIC_BUILD) if (LAMBDA_MMAPPED_DB) add_definitions (-DLAMBDA_MMAPPED_DB=1) endif (LAMBDA_MMAPPED_DB) if (LAMBDA_LINGAPS_OPT) add_definitions (-DLAMBDA_LINGAPS_OPT=1) endif () if (LAMBDA_LONG_PROTEIN_SUBJ_SEQS) add_definitions (-DLAMBDA_LONG_PROTEIN_SUBJ_SEQS=1) endif () message(STATUS "The following options are selected for the build:") message( " LAMBDA_FASTBUILD ${LAMBDA_FASTBUILD}") message( " LAMBDA_LINGAPS_OPT ${LAMBDA_LINGAPS_OPT}") message( " LAMBDA_LONG_PROTEIN_SUBJ_SEQS ${LAMBDA_LONG_PROTEIN_SUBJ_SEQS}") message( " LAMBDA_MMAPPED_DB ${LAMBDA_MMAPPED_DB}") message( " LAMBDA_NATIVE_BUILD ${LAMBDA_NATIVE_BUILD}") message( " LAMBDA_MULTIOPT_BUILD ${LAMBDA_MULTIOPT_BUILD}") message( " LAMBDA_STATIC_BUILD ${LAMBDA_STATIC_BUILD}") message(STATUS "Run 'cmake -LH' to get a comment on each option.") message(STATUS "Remove CMakeCache.txt and re-run cmake with -DOPTIONNAME=ON|OFF to change an option.") # deactivate the version check on broken seqan releases if (SEQAN_VERSION_STRING VERSION_LESS "2.3.2") add_definitions (-DSEQAN_DISABLE_VERSION_CHECK="YES") endif () # ---------------------------------------------------------------------------- # Compiler specifics # ---------------------------------------------------------------------------- if (CMAKE_CXX_COMPILER_ID MATCHES "Clang") set (SEQAN_CXX_FLAGS "${SEQAN_CXX_FLAGS} -ftemplate-depth-1024") # do not warn for variable length arrays set (SEQAN_CXX_FLAGS "${SEQAN_CXX_FLAGS} -Wno-vla-extension") endif () if (CMAKE_CXX_COMPILER_ID MATCHES "GNU") # do not warn for variable length arrays set (SEQAN_CXX_FLAGS "${SEQAN_CXX_FLAGS} -Wno-vla") # parallelize parts of build even for one translation unit if (NOT DEFINED LAMBDA_COMPILE_THREADS) include(ProcessorCount) ProcessorCount(LAMBDA_COMPILE_THREADS) endif () if (LAMBDA_COMPILE_THREADS GREATER 1) set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -flto=${LAMBDA_COMPILE_THREADS}") endif() # strip binaries to make them smaller set (CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -s") endif () if (NOT CMAKE_CXX_COMPILER_ID MATCHES "Intel") # -Wextra not met by current SeqAn on intel set (SEQAN_CXX_FLAGS "${SEQAN_CXX_FLAGS} -Wextra") endif () # ---------------------------------------------------------------------------- # Build Setup # ---------------------------------------------------------------------------- # Enable global exception handler for all seqan apps. set (SEQAN_DEFINITIONS ${SEQAN_DEFINITIONS} -DSEQAN_GLOBAL_EXCEPTION_HANDLER=1) # Add include directories. include_directories (${SEQAN_INCLUDE_DIRS}) # Add definitions set by find_package (SeqAn). add_definitions (${SEQAN_DEFINITIONS}) # Add definitions set by the build system. add_definitions (-DSEQAN_APP_VERSION="${SEQAN_APP_VERSION}") add_definitions (-DCMAKE_BUILD_TYPE="${CMAKE_BUILD_TYPE}") # Set the right output directory set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/bin) # Add CXX flags found by find_package (SeqAn). set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SEQAN_CXX_FLAGS} -Wall -pedantic") # Update the list of file names below if you add source files to your application. set (LAMBDA_SOURCE_FILES lambda.cpp shared_definitions.hpp shared_misc.hpp shared_options.hpp search.hpp search_algo.hpp search_datastructures.hpp search_misc.hpp search_output.hpp search_options.hpp mkindex.hpp mkindex_algo.hpp mkindex_misc.hpp mkindex_options.hpp mkindex_saca.hpp) add_executable (lambda2 ${LAMBDA_SOURCE_FILES}) target_link_libraries (lambda2 ${SEQAN_LIBRARIES}) if (LAMBDA_MULTIOPT_BUILD) add_executable (lambda2-sse4 ${LAMBDA_SOURCE_FILES}) target_link_libraries (lambda2-sse4 ${SEQAN_LIBRARIES}) set_target_properties (lambda2-sse4 PROPERTIES COMPILE_FLAGS "-mmmx -msse -msse2 -msse3 -mssse3 -msse4 -mpopcnt") endif () # ---------------------------------------------------------------------------- # Man-pages # ---------------------------------------------------------------------------- # Umbrella man-page add_custom_command (OUTPUT lambda2.1 COMMAND lambda2 --export-help=man > lambda2.1 DEPENDS lambda2) # searchn subcommand add_custom_command (OUTPUT lambda2-searchn.1 COMMAND lambda2 searchn --export-help=man > lambda2-searchn.1 DEPENDS lambda2) # searchp subcommand add_custom_command (OUTPUT lambda2-searchp.1 COMMAND lambda2 searchp --export-help=man > lambda2-searchp.1 DEPENDS lambda2) # mkindexn subcommand add_custom_command (OUTPUT lambda2-mkindexn.1 COMMAND lambda2 mkindexn --export-help=man > lambda2-mkindexn.1 DEPENDS lambda2) # mkindexp subcommand add_custom_command (OUTPUT lambda2-mkindexp.1 COMMAND lambda2 mkindexp --export-help=man > lambda2-mkindexp.1 DEPENDS lambda2) add_custom_target (manual ALL DEPENDS lambda2.1 lambda2-searchn.1 lambda2-searchp.1 lambda2-mkindexn.1 lambda2-mkindexp.1) # ---------------------------------------------------------------------------- # Installation # ---------------------------------------------------------------------------- # Adapt to system paths include (GNUInstallDirs) if (LAMBDA_MULTIOPT_BUILD) # Install lambda binaries into LIBEXECDIR install (TARGETS lambda2 lambda2-sse4 DESTINATION ${CMAKE_INSTALL_LIBEXECDIR}) # Install dispatcher script into BINDIR configure_file(../bin/lambda2.in ${CMAKE_CURRENT_BINARY_DIR}/lambda2-dispatch @ONLY) install (FILES ${CMAKE_CURRENT_BINARY_DIR}/lambda2-dispatch DESTINATION ${CMAKE_INSTALL_BINDIR} RENAME lambda2 PERMISSIONS OWNER_READ OWNER_WRITE OWNER_EXECUTE GROUP_READ GROUP_EXECUTE WORLD_READ WORLD_EXECUTE) elseif () # Install lambda into BINDIR, usually ${PREFIX}/bin install (TARGETS lambda2 DESTINATION ${CMAKE_INSTALL_BINDIR}) endif () # Install non-binary files for the package to DOCDIR, usually ${PREFIX}/share/doc/lambda2 install (FILES ../LICENSE.rst ../LICENSE-BSD.rst ../LICENSE-AGPL3.rst ../README.rst DESTINATION ${CMAKE_INSTALL_DOCDIR}) # Man pages into MANDIR, usually ${PREFIX}/share/man/man1 (or without share) install (FILES ${CMAKE_CURRENT_BINARY_DIR}/lambda2.1 ${CMAKE_CURRENT_BINARY_DIR}/lambda2-searchn.1 ${CMAKE_CURRENT_BINARY_DIR}/lambda2-searchp.1 ${CMAKE_CURRENT_BINARY_DIR}/lambda2-mkindexn.1 ${CMAKE_CURRENT_BINARY_DIR}/lambda2-mkindexp.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1) # ---------------------------------------------------------------------------- # CPack Install # ---------------------------------------------------------------------------- # Information set (CPACK_PACKAGE_NAME "lambda2") set (CPACK_PACKAGE_DESCRIPTION_SUMMARY "lambda -- the local aligner for massive bioligical data") set (CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/../README.rst") set (CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/../LICENSE.rst") set (CPACK_PACKAGE_VENDOR "Hannes Hauswedell ") set (CPACK_PACKAGE_CONTACT "${CPACK_PACKAGE_VENDOR}") set (CPACK_PACKAGE_VERSION_MAJOR "${SEQAN_APP_VERSION_MAJOR}") set (CPACK_PACKAGE_VERSION_MINOR "${SEQAN_APP_VERSION_MINOR}") set (CPACK_PACKAGE_VERSION_PATCH "${SEQAN_APP_VERSION_PATCH}") set (CPACK_PACKAGE_VERSION "${SEQAN_APP_VERSION}") set (CPACK_PACKAGE_INSTALL_DIRECTORY "${CPACK_PACKAGE_NAME} ${CPACK_PACKAGE_VERSION}") # Package format(s) if (CMAKE_SYSTEM_NAME MATCHES "Windows") set(CPACK_GENERATOR "ZIP;NSIS") elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin") set(CPACK_GENERATOR "ZIP;DragNDrop") elseif (CMAKE_VERSION VERSION_LESS "3.1") # TXZ support since 3.1 set(CPACK_GENERATOR "TBZ2") else() set(CPACK_GENERATOR "TXZ") endif () if (CMAKE_SYSTEM_NAME MATCHES "Linux") set(CPACK_GENERATOR "${CPACK_GENERATOR};DEB;RPM") endif () # Package architecture if (CMAKE_SYSTEM_PROCESSOR STREQUAL "amd64") set(CMAKE_SYSTEM_PROCESSOR "x86_64") set(CPACK_DEBIAN_PACKAGE_ARCHITECTURE "amd64") endif () if (CMAKE_CXX_FLAGS MATCHES "avx2") set (CMAKE_SYSTEM_PROCESSOR "${CMAKE_SYSTEM_PROCESSOR}_avx2") elseif (CMAKE_CXX_FLAGS MATCHES "sse4") set (CMAKE_SYSTEM_PROCESSOR "${CMAKE_SYSTEM_PROCESSOR}_sse4") endif() # Include architecture in package name if (NOT DEFINED CPACK_SYSTEM_NAME) set(CPACK_SYSTEM_NAME "${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}") endif (NOT DEFINED CPACK_SYSTEM_NAME) include (CPack) lambda-lambda-v2.0.1/src/lambda.cpp000066400000000000000000000067561445553061700171010ustar00rootroot00000000000000// ========================================================================== // lambda // ========================================================================== // Copyright (c) 2013-2019, Hannes Hauswedell

// Copyright (c) 2016-2019, Knut Reinert and Freie Universität Berlin // All rights reserved. // // This file is part of Lambda. // // Lambda is Free Software: you can redistribute it and/or modify it // under the terms found in the LICENSE[.md|.rst] file distributed // together with this file. // // Lambda is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // ========================================================================== // lambda.cpp: Main File for Lambda // ========================================================================== #include "search.hpp" #include "mkindex.hpp" using namespace seqan; ArgumentParser::ParseResult parseCommandLineMain(int argc, char const ** argv); int main(int argc, char const ** argv) { if (std::string(CMAKE_BUILD_TYPE) != "Release") std::cerr << "WARNING: This binary is not built in release mode and will be much slower than it should be!\n"; int until = argc; bool skipNext = false; for (int i = 1; i < argc; ++i) { // version check expects a parameter if (std::string(argv[i]) == "--version-check") skipNext = true; if (argv[i][0] != '-') { if (skipNext) { skipNext = false; } else { until = i + 1; break; } } } ArgumentParser::ParseResult res = parseCommandLineMain(until, argv); if (res == ArgumentParser::PARSE_ERROR) return ArgumentParser::PARSE_ERROR; else if (res != ArgumentParser::PARSE_OK) return 0; --until; // undo the "+ 1" above if ((std::string(argv[until]) == "searchp") || (std::string(argv[until]) == "searchn")) { return searchMain(argc - until, argv + until); } else if ((std::string(argv[until]) == "mkindexp") || (std::string(argv[until]) == "mkindexn")) { return mkindexMain(argc - until, argv + until); } else { // shouldn't be reached std::cerr << "WRONG ARGUMENTS!\n"; return -1; } } ArgumentParser::ParseResult parseCommandLineMain(int argc, char const ** argv) { ArgumentParser parser("lambda2"); setShortDescription(parser, "Lambda, the Local Aligner for Massive Biological DataA"); addUsageLine(parser, "[\\fIOPTIONS\\fP] COMMAND [\\fICOMMAND-OPTIONS\\fP]"); sharedSetup(parser); addArgument(parser, ArgParseArgument(ArgParseArgument::STRING, "COMMAND")); setHelpText(parser, 0, "The sub-program to execute. See below."); setValidValues(parser, 0, "searchp searchn mkindexp mkindexn"); addTextSection(parser, "Available commands"); addText(parser, "\\fBsearchp \\fP– Perform a protein search (BLASTP, BLASTX, TBLASTN, TBLASTX)."); addText(parser, "\\fBsearchn \\fP– Perform a nucleotide search (BLASTN, MEGABLAST)."); addText(parser, "\\fBmkindexp \\fP– Create an index for protein searches."); addText(parser, "\\fBmkindexn \\fP– Create an index for nucleotide searches."); addText(parser, "To view the help page for a specific command, simply run 'lambda command --help'."); return parse(parser, argc, argv); } lambda-lambda-v2.0.1/src/mkindex.hpp000066400000000000000000000272331445553061700173160ustar00rootroot00000000000000// ========================================================================== // lambda // ========================================================================== // Copyright (c) 2013-2019, Hannes Hauswedell

// Copyright (c) 2016-2019, Knut Reinert and Freie Universität Berlin // All rights reserved. // // This file is part of Lambda. // // Lambda is Free Software: you can redistribute it and/or modify it // under the terms found in the LICENSE[.md|.rst] file distributed // together with this file. // // Lambda is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // ========================================================================== // lambda.cpp: Main File for the main application // ========================================================================== #include #include #include #include #include #define LAMBDA_INDEXER 1 // some things are different for the indexer binary #include "shared_misc.hpp" #include "shared_definitions.hpp" #include "shared_options.hpp" #include "mkindex_misc.hpp" #include "mkindex_options.hpp" #include "mkindex_saca.hpp" #include "mkindex_algo.hpp" using namespace seqan; // ========================================================================== // Forwards // ========================================================================== void argConv0(LambdaIndexerOptions & options); template void argConv1(LambdaIndexerOptions const & options, BlastProgramSelector

const &); template void argConv2(LambdaIndexerOptions const & options, BlastProgramSelector

const &, TRedAlph const &); template void realMain(LambdaIndexerOptions const & options, BlastProgramSelector

const &, TRedAlph const &, TIndexSpecSpec const &); // ========================================================================== // Functions // ========================================================================== // -------------------------------------------------------------------------- // Function main() // -------------------------------------------------------------------------- // Program entry point. int mkindexMain(int const argc, char const ** argv) { // Parse the command line. seqan::ArgumentParser parser; LambdaIndexerOptions options; seqan::ArgumentParser::ParseResult res = parseCommandLine(options, argc, argv); // If there was an error parsing or built-in argument parser functionality // was triggered then we exit the program. The return code is 1 if there // were errors and 0 if there were none. if (res != seqan::ArgumentParser::PARSE_OK) return res == seqan::ArgumentParser::PARSE_ERROR; #ifdef NDEBUG try { argConv0(options); } catch (std::bad_alloc const & e) { std::cerr << "ERROR: Lambda ran out of memory :(\n" " You need to split your file into smaller segments.\n"; return -1; } catch (std::exception const & e) { std::cerr << "\n\nERROR: The following unspecified exception was thrown:\n" << " \"" << e.what() << "\"\n" << " If the problem persists, report an issue at https://github.com/seqan/lambda/issues " << "and include this output, as well as the output of `lambda2 --version`, thanks!\n"; return -1; } #else // In debug mode we don't catch the exceptions so that we get a backtrace from SeqAn's handler argConv0(options); #endif return 0; } void argConv0(LambdaIndexerOptions & options) { // set blastProgram if (options.blastProgram == BlastProgram::UNKNOWN) // already implies != BLASTN { myPrint(options, 1, "Detecting database alphabet... "); options.subjOrigAlphabet = detectSeqFileAlphabet(options.dbFile); myPrint(options, 1, _alphabetEnumToName(options.subjOrigAlphabet), " detected.\n"); if (options.subjOrigAlphabet == AlphabetEnum::DNA5) // needs to be translated options.blastProgram = BlastProgram::TBLASTX; // or TBLASTX, but difference is irrelevant for indexer else options.blastProgram = BlastProgram::BLASTX; // or BLASTP, but difference is irrelevant for indexer } switch(options.blastProgram) { case BlastProgram::BLASTN: return argConv2(options, BlastProgramSelector(), Dna5{}); // case BlastProgram::BLASTP: // return argConv1(options, BlastProgramSelector()); case BlastProgram::BLASTX: return argConv1(options, BlastProgramSelector()); // case BlastProgram::TBLASTN: // return argConv1(options, BlastProgramSelector()); case BlastProgram::TBLASTX: return argConv1(options, BlastProgramSelector()); default: break; } throw std::invalid_argument("ERROR: Could not determine blast program mode.\n"); } /// Alphabet reduction (skipped in case == BLASTN) template void argConv1(LambdaIndexerOptions const & options, BlastProgramSelector

const &) { using Tp = BlastProgramSelector

; switch (options.reducedAlphabet) { case AlphabetEnum::AMINO_ACID: return argConv2(options, Tp(), AminoAcid()); case AlphabetEnum::MURPHY10: return argConv2(options, Tp(), ReducedAminoAcid()); #if 0 case 10: return argConv2(options, ReducedAminoAcid>()); case 1: return argConv2(options, AminoAcid10()); case 8: return argConv2(options, ReducedAminoAcid>()); case 12: return argConv2(options, ReducedAminoAcid>()); #endif default: break; } throw std::invalid_argument("ERROR: Could not determine alphabet reduction.\n"); } template void argConv2(LambdaIndexerOptions const & options, BlastProgramSelector

const &, TRedAlph const &) { if (options.algo == "radixsort") return realMain(options, BlastProgramSelector

(), TRedAlph(), RadixSortSACreateTag()); else return realMain(options, BlastProgramSelector

(), TRedAlph(), Nothing()); } template void realMain(LambdaIndexerOptions const & options, BlastProgramSelector

const &, TRedAlph const &, TIndexSpecSpec const &) { using TOrigSet = TCDStringSet>>; using TTransSet = TCDStringSet>>; TTransSet translatedSeqs; { TOrigSet originalSeqs; std::unordered_map accToIdRank; // ids get saved to disk again immediately and are not kept in memory loadSubjSeqsAndIds(originalSeqs, accToIdRank, options); // preserve lengths of untranslated sequences if (sIsTranslated(p)) _saveOriginalSeqLengths(originalSeqs.limits, options); if (options.hasSTaxIds) { std::vector taxIdIsPresent; taxIdIsPresent.reserve(2'000'000); // read the mapping file and save relevant mappings to disk mapAndDumpTaxIDs(taxIdIsPresent, accToIdRank, length(originalSeqs), options); // read the mapping file and save relevant mappings to disk parseAndDumpTaxTree(taxIdIsPresent, options); } // translate or swap depending on program translateOrSwap(translatedSeqs, originalSeqs, options); } // dump translated and unreduced sequences (except where they are included in index) if ((options.alphReduction != 0) || (options.dbIndexType != DbIndexType::SUFFIX_ARRAY)) dumpTranslatedSeqs(translatedSeqs, options); // see if final sequence set actually fits into index checkIndexSize(translatedSeqs, options, BlastProgramSelector

()); if (options.dbIndexType == DbIndexType::FM_INDEX) { using TIndexSpec = TFMIndex; generateIndexAndDump(translatedSeqs, options, BlastProgramSelector

(), TRedAlph(), Fwd()); } else if (options.dbIndexType == DbIndexType::BI_FM_INDEX) { // using TIndexSpec = BidirectionalIndex>; // use regular FM-index tag, because we just create two of them using TIndexSpec = TFMIndexInBi; // first create the reverse index (which is actually unreversed) myPrint(options, 1, "Bi-Directional Index [backward]\n"); generateIndexAndDump(translatedSeqs, options, BlastProgramSelector

(), TRedAlph(), Rev()); // then create the regular/forward fm-index (which is actually reversed) myPrint(options, 1, "Bi-Directional Index [forward]\n"); generateIndexAndDump(translatedSeqs, options, BlastProgramSelector

(), TRedAlph(), Fwd()); } #ifdef LAMBDA_LEGACY_PATHS else { using TIndexSpec = IndexSa; generateIndexAndDump(translatedSeqs, options, BlastProgramSelector

(), TRedAlph(), Fwd()); } #endif // dump options for (auto && s : std::initializer_list> { { options.indexDir + "/option:db_index_type", std::to_string(static_cast(options.dbIndexType))}, { options.indexDir + "/option:alph_original", std::string(_alphTypeToName(OrigSubjAlph

())) }, { options.indexDir + "/option:alph_translated", std::string(_alphTypeToName(TransAlph

())) }, { options.indexDir + "/option:alph_reduced", std::string(_alphTypeToName(TRedAlph())) }, { options.indexDir + "/option:genetic_code", std::to_string(options.geneticCode) }, { options.indexDir + "/option:subj_seq_len_bits", std::to_string(sizeof(SizeTypePos_) * 8)}, { options.indexDir + "/option:generation", std::to_string(indexGeneration) }, }) { std::ofstream f{std::get<0>(s).c_str(), std::ios_base::out | std::ios_base::binary}; f << std::get<1>(s); f.close(); } } lambda-lambda-v2.0.1/src/mkindex_algo.hpp000066400000000000000000001117761445553061700203260ustar00rootroot00000000000000// ========================================================================== // lambda // ========================================================================== // Copyright (c) 2013-2019, Hannes Hauswedell

// Copyright (c) 2016-2019, Knut Reinert and Freie Universität Berlin // All rights reserved. // // This file is part of Lambda. // // Lambda is Free Software: you can redistribute it and/or modify it // under the terms found in the LICENSE[.md|.rst] file distributed // together with this file. // // Lambda is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // ========================================================================== // lambda_indexer.hpp: Main File for the indexer application // ========================================================================== #ifndef SEQAN_LAMBDA_LAMBDA_INDEXER_H_ #define SEQAN_LAMBDA_LAMBDA_INDEXER_H_ #include #include #include #include #include #include #include "mkindex_misc.hpp" #include "mkindex_saca.hpp" #include "shared_misc.hpp" #include "shared_options.hpp" #include "search_output.hpp" //TODO only needed because options are in one file, remove later using namespace seqan; // -------------------------------------------------------------------------- // Function loadSubj() // -------------------------------------------------------------------------- template void loadSubjSeqsAndIds(TCDStringSet> & originalSeqs, std::unordered_map & accToIdRank, LambdaIndexerOptions const & options) { // Make sure we have enough RAM to load the file auto ram = getTotalSystemMemory(); auto fS = fileSize(toCString(options.dbFile)); if (fS >= ram) std::cerr << "WARNING: Your sequence file is already larger than your physical memory!\n" << " This means you will likely encounter a crash with \"bad_alloc\".\n" << " Split you sequence file into many smaller ones or use a computer\n" << " with more memory!\n"; typedef TCDStringSet>> TIDs; TIDs ids; // the IDs // see http://www.uniprot.org/help/accession_numbers // https://www.ncbi.nlm.nih.gov/Sequin/acc.html // https://www.ncbi.nlm.nih.gov/refseq/about/ // TODO: make sure these don't trigger twice on one ID std::regex const accRegEx{"[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}|" // UNIPROT "[A-Z][0-9]{5}|[A-Z]{2}[0-9]{6}|" // NCBI nucl "[A-Z]{3}[0-9]{5}|" // NCBI prot "[A-Z]{4}[0-9]{8,10}|" // NCBI wgs "[A-Z]{5}[0-9]{7}|" // NCBI mga "(NC|AC|NG|NT|NW|NZ|NM|NR|XM|XR|NP|AP|XP|YP|ZP)_[0-9]+|" // RefSeq "UPI[A-F0-9]{10}"}; // UniParc // lambda that truncates IDs at first whitespace auto truncateID = [] (auto && id, uint64_t const) { IsWhitespace isWhitespace; for (size_t i = 0; i < length(id); ++i) { if (isWhitespace(id[i])) { resize(id, i); break; } } }; uint64_t noAcc = 0; uint64_t multiAcc = 0; // lambda that extracts accession numbers and saves them in the map auto extractAccIds = [&accToIdRank, &accRegEx, &noAcc, &multiAcc] (auto && id, uint64_t const rank) { // TODO avoid copying here by specializing regex_iterator std::string buf; assign(buf, id); uint64_t count = 0; for (auto it = std::sregex_iterator(buf.begin(), buf.end(), accRegEx), itEnd = std::sregex_iterator(); it != itEnd; ++it, ++count) { SEQAN_ASSERT_MSG(accToIdRank.count(it->str()) == 0, "An accession number appeared twice in the file, but they should be unique."); // TODO store acc outside as well accToIdRank[it->str()] = rank; } switch (count) { case 0: ++noAcc; break; case 1: break; default: ++multiAcc; break; } }; double start = sysTime(); myPrint(options, 1, "Loading Subject Sequences and Ids..."); SeqFileIn infile(toCString(options.dbFile)); if (options.truncateIDs) { if (options.hasSTaxIds) { myReadRecords(ids, originalSeqs, infile, [&] (auto && id, uint64_t const rank) { extractAccIds(std::forward(id), rank); truncateID(std::forward(id), rank); }); } else { myReadRecords(ids, originalSeqs, infile, truncateID); } } else { if (options.hasSTaxIds) myReadRecords(ids, originalSeqs, infile, extractAccIds); else myReadRecords(ids, originalSeqs, infile); } myPrint(options, 1, " done.\n"); double finish = sysTime() - start; myPrint(options, 2, "Runtime: ", finish, "s \n"); if (length(originalSeqs) == 0) { throw std::runtime_error("ERROR: No sequences in file. Aborting.\n"); } unsigned long maxLen = 0ul; for (auto const & s : originalSeqs) { if (length(s) > maxLen) { maxLen = length(s); } else if (length(s) == 0) { throw std::runtime_error("ERROR: Unexpectedly encountered a sequence of length 0 in the file." "Remove the entry and try again. Aborting.\n"); } } myPrint(options, 2, "Number of sequences read: ", length(originalSeqs), "\nLongest sequence read: ", maxLen, "\n"); if (length(originalSeqs) * 6 >= std::numeric_limits>::max()) { throw std::runtime_error(std::string("ERROR: Too many sequences submitted. The maximum (including frames) is ") + std::to_string(std::numeric_limits>::max()) + ".\n"); } if (maxLen >= std::numeric_limits>::max()) { throw std::runtime_error(std::string("ERROR: one or more of your subject sequences are too long. " "The maximum length is ") + std::to_string(std::numeric_limits>::max()) + ".\n"); } if (options.hasSTaxIds) { myPrint(options, 2, "Subjects without acc numbers: ", noAcc, '/', length(ids), "\n", "Subjects with more than one acc number: ", multiAcc, '/', length(ids), "\n"); } myPrint(options, 2, "\n"); myPrint(options, 1, "Dumping Subj Ids..."); //TODO save to TMPDIR instead CharString _path = options.indexDir; append(_path, "/seq_ids"); save(ids, toCString(_path)); myPrint(options, 1, " done.\n"); finish = sysTime() - start; myPrint(options, 2, "Runtime: ", finish, "s \n\n"); } // -------------------------------------------------------------------------- // Function loadSubj() // -------------------------------------------------------------------------- template inline void _saveOriginalSeqLengths(TLimits limits, // we want copy! LambdaIndexerOptions const & options) { double start = sysTime(); myPrint(options, 1, "Dumping untranslated subject lengths..."); for (uint32_t i = 0; i < (length(limits) - 1); ++i) limits[i] = limits[i+1] - limits[i]; // last entry not overwritten, should be the sum of all lengths CharString _path = options.indexDir; append(_path, "/untranslated_seq_lengths"); save(limits, toCString(_path)); myPrint(options, 1, " done.\n"); double finish = sysTime() - start; myPrint(options, 2, "Runtime: ", finish, "s \n\n"); } // -------------------------------------------------------------------------- // Function loadSubj() // -------------------------------------------------------------------------- template inline void translateOrSwap(TCDStringSet> & out, TCDStringSet> & in, LambdaIndexerOptions const & options) { double start = sysTime(); myPrint(options, 1, "Translating Subj Sequences..."); translate(out, in, SIX_FRAME, options.geneticCode); myPrint(options, 1, " done.\n"); double finish = sysTime() - start; myPrint(options, 2, "Runtime: ", finish, "s \n\n"); } template inline void translateOrSwap(TCDStringSet> & out, TCDStringSet> & in, LambdaIndexerOptions const & /**/) { swap(out, in); } // -------------------------------------------------------------------------- // Function loadSubj() // -------------------------------------------------------------------------- template inline void dumpTranslatedSeqs(TCDStringSet> const & translatedSeqs, LambdaIndexerOptions const & options) { double start = sysTime(); myPrint(options, 1, "Dumping unreduced Subj Sequences..."); //TODO save to TMPDIR instead std::string _path = options.indexDir + "/translated_seqs"; save(translatedSeqs, _path.c_str()); myPrint(options, 1, " done.\n"); double finish = sysTime() - start; myPrint(options, 2, "Runtime: ", finish, "s \n\n"); } // -------------------------------------------------------------------------- // Function loadSubj() // -------------------------------------------------------------------------- // template // inline void // reduceOrSwap(TCDStringSet> & out, // TCDStringSet> & in) // { // //TODO more output // // reduce implicitly // myPrint(options, 1, "Reducing..."); // out.concat = in.concat; // out.limits = in.limits; // } // // template // inline void // reduceOrSwap(TCDStringSet> & out, // TCDStringSet> & in) // { // swap(out, in); // } // -------------------------------------------------------------------------- // Function loadSubj() // -------------------------------------------------------------------------- template void checkIndexSize(TCDStringSet> const & seqs, LambdaIndexerOptions const & options, BlastProgramSelector

const &) { myPrint(options, 1, "Checking parameters of to-be-built index..."); // check number of sequences using SAV = typename SAValue>>::Type; uint64_t curNumSeq = length(seqs); uint64_t maxNumSeq = std::numeric_limits::Type>::max(); if (curNumSeq >= maxNumSeq) { throw std::invalid_argument(std::string("ERROR: Too many sequences to be indexed:\n ") + std::to_string(length(seqs)) + std::string(" in file, but only ") + std::to_string(maxNumSeq) + std::string(" supported by index.\n")); } // check length of sequences uint64_t maxLenSeq = std::numeric_limits::Type>::max(); uint64_t maxLen = 0ul; for (auto const & s : seqs) if (length(s) > maxLen) maxLen = length(s); if (maxLen >= maxLenSeq) { std::string err; err += "Sequences too long to be indexed:\n "; err += "length"; err += std::to_string(maxLen); err += " present in file, but only "; err += std::to_string(maxLenSeq); err += " supported by index.\n"; #ifndef LAMBDA_LONG_PROTEIN_SUBJ_SEQS if (p != BlastProgram::BLASTN) err += "You can recompile Lambda and add -DLAMBDA_LONG_PROTEIN_SUBJ_SEQS=1 to activate\n" "support for longer protein sequences.\n"; #endif throw std::invalid_argument(err); } // check available RAM auto ram = getTotalSystemMemory(); auto lS = lengthSum(seqs); unsigned long long factor = 0; if (options.algo == "radixsort") factor = sizeof(SizeTypeNum_) + sizeof(SizeTypePos_) + 4; // 4 is good heuristic else if (options.algo == "skew7ext") factor = 6; // TODO do some tests! auto estimatedSize = lS * factor; myPrint(options, 1, "done.\n"); if (estimatedSize >= ram) { std::cerr << "WARNING: Lambda estimates that it will need " << estimatedSize / 1024 / 1024 << "MB\n" << " of memory to index this file, but you have only " << ram / 1024 / 1024 << "MB\n" << " available on your system.\n" << " This means you will likely encounter a crash with \"bad_alloc\".\n" << " Split you sequence file into many smaller ones or use a computer\n" << " with more memory!\n"; } else { myPrint(options, 2, "Detected RAM: ", ram / 1024 / 1024, "MB, Estimated RAM usage: ", estimatedSize / 1024 / 1024, "MB\n\n"); } } // -------------------------------------------------------------------------- // Function mapAndDumpTaxIDs() // -------------------------------------------------------------------------- void mapAndDumpTaxIDs(std::vector & taxIdIsPresent, std::unordered_map const & accToIdRank, uint64_t const numSubjects, LambdaIndexerOptions const & options) { StringSet> sTaxIds; // not concat because we resize inbetween resize(sTaxIds, numSubjects); // c++ stream std::ifstream fin(toCString(options.accToTaxMapFile), std::ios_base::in | std::ios_base::binary); if (!fin.is_open()) { throw std::invalid_argument(std::string("ERROR: Could not open acc-to-tax-map file at ") + options.accToTaxMapFile + "\n"); } // transparent decompressor VirtualStream vfin {fin}; // stream iterator auto fit = directionIterator(vfin, Input()); myPrint(options, 1, "Parsing acc-to-tax-map file... "); double start = sysTime(); if (std::regex_match(options.accToTaxMapFile, std::regex{R"raw(.*\.accession2taxid(\.(gz|bgzf|bz2))?)raw"})) { _readMappingFileNCBI(fit, sTaxIds, taxIdIsPresent, accToIdRank); } else if (std::regex_match(options.accToTaxMapFile, std::regex{R"raw(.*\.dat(\.(gz|bgzf|bz2))?)raw"})) { _readMappingFileUniProt(fit, sTaxIds, taxIdIsPresent, accToIdRank); } else { throw std::invalid_argument("ERROR: extension of acc-to-tax-map file not handled.\n"); } // root node is always present if (taxIdIsPresent.size() < 2) taxIdIsPresent.resize(2); taxIdIsPresent[1] = true; myPrint(options, 1, "done.\n"); myPrint(options, 2, "Runtime: ", sysTime() - start, "s \n"); // TODO do something with the subjects that have no (valid) taxid? uint64_t nomap = 0; uint64_t multi = 0; for (auto const & s : sTaxIds) { if (length(s) == 0) ++nomap; else if (length(s) > 1) ++multi; } myPrint(options, 2, "Subjects without tax IDs: ", nomap, '/', numSubjects, "\n", "Subjects with more than one tax ID: ", multi, '/', numSubjects, "\n\n"); if ((nomap > 0) && ((numSubjects / nomap) < 5)) myPrint(options, 1, "WARNING: ", double(nomap) * 100 / numSubjects, "% of subjects have no taxID.\n" " Maybe you specified the wrong map file?\n\n"); myPrint(options, 1,"Dumping Subject Taxonomy IDs... "); start = sysTime(); // concat direct so that it's easier to read/write StringSet, Owner>> outSTaxIds = sTaxIds; save(outSTaxIds, std::string(options.indexDir + "/staxids").c_str()); myPrint(options, 1, "done.\n"); myPrint(options, 2, "Runtime: ", sysTime() - start, "s\n\n"); } // -------------------------------------------------------------------------- // Function mapAndDumpTaxIDs() // -------------------------------------------------------------------------- void parseAndDumpTaxTree(std::vector & taxIdIsPresent, LambdaIndexerOptions const & options) { String taxonParentIDs; // ever position has the index of its parent node reserve(taxonParentIDs, 2'000'000); // reserve 2million to save reallocs std::string path = options.taxDumpDir + "/nodes.dmp"; std::ifstream fin(path.c_str(), std::ios_base::in | std::ios_base::binary); if (!fin.is_open()) { throw std::runtime_error(std::string("ERROR: Could not open ") + path + "\n"); } // transparent decompressor VirtualStream vfin{fin}; // stream iterator auto fit = directionIterator(vfin, Input()); myPrint(options, 1, "Parsing nodes.dmp... "); double start = sysTime(); std::string buf; std::regex const numRegEx{"\\b\\d+\\b"}; while (!atEnd(fit)) { clear(buf); // read line readLine(buf, fit); uint32_t n = 0; uint32_t parent = 0; unsigned i = 0; for (auto it = std::sregex_iterator(buf.begin(), buf.end(), numRegEx), itEnd = std::sregex_iterator(); (it != itEnd) && (i < 2); ++it, ++i) { try { if (i == 0) n = lexicalCast(it->str()); else parent = lexicalCast(it->str()); } catch (BadLexicalCast const & badCast) { throw std::runtime_error( std::string("Error: Expected taxonomical ID, but got something I couldn't read: ") + std::string(badCast.what()) + "\n"); } } if (length(taxonParentIDs) <= n) resize(taxonParentIDs, n +1, 0); taxonParentIDs[n] = parent; } // also resize these, since we get new, possibly higher cardinality nodes taxIdIsPresent.resize(length(taxonParentIDs), false); myPrint(options, 1, "done.\n"); myPrint(options, 2, "Runtime: ", sysTime() - start, "s\n"); if (options.verbosity >= 2) { uint32_t heightMax = 0; uint32_t numNodes = 0; for (uint32_t i = 0; i < length(taxonParentIDs); ++i) { if (taxonParentIDs[i] > 0) ++numNodes; uint32_t height = 0; uint32_t curPar = taxonParentIDs[i]; while (curPar > 1) { curPar = taxonParentIDs[curPar]; ++height; } heightMax = std::max(heightMax, height); } myPrint(options, 2, "Number of nodes in tree: ", numNodes, "\n"); myPrint(options, 2, "Maximum Tree Height: ", heightMax, "\n\n"); } myPrint(options, 1, "Thinning and flattening Tree... "); start = sysTime(); // taxIdIsPresent are all directly present taxIds // taxIdIsPresentOrParent are also the (recursive) parents of the above // we need to differentiate this later, because we will remove some intermediate nodes // but we may not remove any that are directly present AND parents of directly present ones std::vector taxIdIsPresentOrParent{taxIdIsPresent}; // mark parents as present, too for (uint32_t i = 0; i < length(taxonParentIDs); ++i) { if (taxIdIsPresent[i]) { // get ancestors: uint32_t curPar = i; do { curPar = taxonParentIDs[curPar]; taxIdIsPresentOrParent[curPar] = true; } while (curPar > 1); } } // set unpresent nodes to 0 SEQAN_OMP_PRAGMA(parallel for) for (uint32_t i = 0; i < length(taxonParentIDs); ++i) if (!taxIdIsPresentOrParent[i]) taxonParentIDs[i] = 0; // count inDegrees String inDegrees; resize(inDegrees, length(taxonParentIDs), 0); for (uint32_t i = 0; i < length(taxonParentIDs); ++i) { // increase inDegree of parent uint32_t curPar = taxonParentIDs[i]; ++inDegrees[curPar]; } // skip parents with indegree 1 (flattening) for (uint32_t i = 0; i < length(taxonParentIDs); ++i) { uint32_t curPar = taxonParentIDs[i]; // those intermediate nodes that themselve represent sequences may not be skipped while ((curPar > 1) && (inDegrees[curPar] == 1) && (!taxIdIsPresent[curPar])) curPar = taxonParentIDs[curPar]; taxonParentIDs[i] = curPar; } // remove nodes that are now disconnected SEQAN_OMP_PRAGMA(parallel for) for (uint32_t i = 0; i < length(taxonParentIDs); ++i) { // those intermediate nodes that themselve represent sequences may not be skipped if ((inDegrees[i] == 1) && (!taxIdIsPresent[i])) { taxonParentIDs[i] = 0; taxIdIsPresentOrParent[i] = false; } } String taxonHeights; resize(taxonHeights, length(taxonParentIDs), 0); { uint32_t heightMax = 0; uint32_t numNodes = 0; for (uint32_t i = 0; i < length(taxonParentIDs); ++i) { if (taxonParentIDs[i] > 0) ++numNodes; uint32_t height = 0; uint32_t curPar = taxonParentIDs[i]; while (curPar > 1) { curPar = taxonParentIDs[curPar]; ++height; } taxonHeights[i] = height; heightMax = std::max(heightMax, height); } myPrint(options, 1, "done.\n"); myPrint(options, 2, "Runtime: ", sysTime() - start, "s\n"); myPrint(options, 2, "Number of nodes in tree: ", numNodes, "\n"); myPrint(options, 2, "Maximum Tree Height: ", heightMax, "\n\n"); } myPrint(options, 1,"Dumping Taxonomy Tree... "); start = sysTime(); save(taxonParentIDs, std::string(options.indexDir + "/tax_parents").c_str()); save(taxonHeights, std::string(options.indexDir + "/tax_heights").c_str()); myPrint(options, 1, "done.\n"); myPrint(options, 2, "Runtime: ", sysTime() - start, "s\n\n"); // DEBUG #ifndef NDEBUG for (uint32_t i = 0; i < length(taxonParentIDs); ++i) { if (!taxIdIsPresentOrParent[i] && (taxonParentIDs[i] != 0)) std::cerr << "WARNING: TaxID " << i << " has parent, but shouldn't.\n"; if (taxIdIsPresentOrParent[i] && (taxonParentIDs[i] == 0)) std::cerr << "WARNING: TaxID " << i << " has no parent, but should.\n"; if (taxIdIsPresent[i] && (taxonParentIDs[i] == 0)) std::cerr << "WARNING: TaxID " << i << " has no parent, but should. 2\n"; if (taxIdIsPresent[i] && !taxIdIsPresentOrParent[i]) std::cerr << "WARNING: TaxID " << i << " disappeared, but shouldn't have.\n"; if (!taxIdIsPresent[i] && taxIdIsPresentOrParent[i] && (inDegrees[i] == 1)) std::cerr << "WARNING: TaxID " << i << " should have disappeared, but didn't.\n"; } #endif /** read the names **/ StringSet taxonNames; // ever position has the index of its parent node resize(taxonNames, length(taxonParentIDs)); path = options.taxDumpDir + "/names.dmp"; std::ifstream fin2(path.c_str(), std::ios_base::in | std::ios_base::binary); if (!fin2.is_open()) throw std::runtime_error(std::string("ERROR: Could not open ") + path + "\n"); // transparent decompressor VirtualStream vfin2{fin2}; // stream iterator fit = directionIterator(vfin2, Input()); myPrint(options, 1, "Parsing names.dmp... "); start = sysTime(); std::regex const wordRegEx{R"([\w.,\"<> ]+)"}; std::string name; while (!atEnd(fit)) { clear(buf); // read line readLine(buf, fit); uint32_t taxId = 0; auto itWord = std::sregex_iterator(buf.begin(), buf.end(), wordRegEx); if (itWord == std::sregex_iterator()) { throw std::runtime_error("Error: Expected taxonomical ID in first column, but couldn't find it.\n"); } else { try { taxId = lexicalCast(itWord->str()); } catch (BadLexicalCast const & badCast) { throw std::runtime_error(std::string("Error: Expected taxonomical ID in first column, but got something" " I couldn't read: ") + std::string(badCast.what()) + "\n"); } if (taxId >= length(taxonNames)) { throw std::runtime_error(std::string("Error: taxonomical ID is ") + std::to_string(taxId) + ", but no such taxon in tree.\n"); } } // we don't need this name if (!taxIdIsPresentOrParent[taxId]) continue; if (++itWord == std::sregex_iterator()) throw std::runtime_error("Error: Expected name in second column, but couldn't find it.\n"); else name = itWord->str(); while (++itWord != std::sregex_iterator()) { if (itWord->str() == "scientific name") taxonNames[taxId] = name; } } myPrint(options, 1, "done.\n"); myPrint(options, 2, "Runtime: ", sysTime() - start, "s\n"); taxonNames[0] = "invalid"; for (uint32_t i = 0; i < length(taxonNames); ++i) { if (taxIdIsPresentOrParent[i] && empty(taxonNames[i])) { std::cerr << "Warning: Taxon with ID " << i << " has no name associated, defaulting to \"n/a\".\n"; taxonNames[i] = "n/a"; } } myPrint(options, 1,"Dumping Taxon names... "); start = sysTime(); // concat direct so that it's easier to read/write StringSet>> outTaxonNames = taxonNames; save(outTaxonNames, std::string(options.indexDir + "/tax_names").c_str()); myPrint(options, 1, "done.\n"); myPrint(options, 2, "Runtime: ", sysTime() - start, "s\n\n"); } // -------------------------------------------------------------------------- // Function createSuffixArray() // -------------------------------------------------------------------------- // If there is no overload with progress function, then strip it template inline void createSuffixArray(TSA & SA, StringSet const & s, TAlgo const &, TLambda &&) { return createSuffixArray(SA, s, TAlgo()); } // ---------------------------------------------------------------------------- // Function indexCreate // ---------------------------------------------------------------------------- template void indexCreateProgress(Index > & index, FibreSALF const &, LambdaIndexerOptions const & options) { typedef Index > TIndex; typedef typename Fibre::Type TTempSA; typedef typename Size::Type TSize; typedef typename DefaultIndexCreator::Type TAlgo; TText const & text = indexText(index); if (empty(text)) return; TTempSA tempSA; uint64_t lastPercent = 0; double s = sysTime(); myPrint(options, 1, "Generating Index 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%\n" " Progress: |"); // Create the full SA. resize(tempSA, lengthSum(text), Exact()); if (options.verbosity >= 1) { createSuffixArray(tempSA, text, TAlgo(), [&lastPercent] (uint64_t curPerc) { // needs locking, because called from multiple threads SEQAN_OMP_PRAGMA(critical(progressBar)) printProgressBar(lastPercent, curPerc * 0.85); // 85% of progress }); } else { createSuffixArray(tempSA, text, TAlgo()); } double sacaTime = sysTime() - s; if (options.verbosity >= 1) printProgressBar(lastPercent, 85); // Create the LF table. s = sysTime(); if (options.verbosity >= 1) { createLFProgress(indexLF(index), text, tempSA, [&lastPercent] (uint64_t curPerc) { // doesn't need locking, only writes from one thread printProgressBar(lastPercent, curPerc * 0.1); // 10% of progress }); } else { createLFProgress(indexLF(index), text, tempSA, [] (uint64_t) {}); } // Set the FMIndex LF as the CompressedSA LF. setFibre(indexSA(index), indexLF(index), FibreLF()); double bwtTime = sysTime() - s; if (options.verbosity >= 1) printProgressBar(lastPercent, 95); // Create the sampled SA. s = sysTime(); TSize numSentinel = countSequences(text); createCompressedSa(indexSA(index), tempSA, numSentinel); double sampleTime = sysTime() - s; if (options.verbosity >= 1) printProgressBar(lastPercent, 100); myPrint(options, 1, "\n"); myPrint(options, 2, "SA construction runtime: ", sacaTime, "s\n"); myPrint(options, 2, "BWT construction runtime: ", bwtTime, "s\n"); myPrint(options, 2, "SA sampling runtime: ", sampleTime, "s\n"); myPrint(options, 1, "\n"); } template void indexCreateProgress(Index > > & index, FibreSALF const &, LambdaIndexerOptions const & options) { myPrint(options, 1, "Bi-Directional Index [forward]\n"); indexCreateProgress(index.fwd, FibreSALF(), options); myPrint(options, 1, "Bi-Directional Index [backward]\n"); indexCreateProgress(index.rev, FibreSALF(), options); } template void indexCreateProgress(Index > & index, FibreSA const &, LambdaIndexerOptions const & options) { typedef Index > TIndex; typedef typename Fibre::Type TSA; typedef typename DefaultIndexCreator::Type TAlgo; TText const & text = indexText(index); if (empty(text)) return; TSA & sa = getFibre(index, FibreSA()); myPrint(options, 1, "Generating Index 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%\n" " Progress: |"); // Create the full SA. resize(sa, lengthSum(text), Exact()); if (options.verbosity >= 1) { createSuffixArray(sa, text, TAlgo(), [lastPercent = uint64_t{0ull}] (uint64_t curPerc) mutable { SEQAN_OMP_PRAGMA(critical(progressBar)) printProgressBar(lastPercent, curPerc); // 100% of progress }); } else { createSuffixArray(sa, text, TAlgo()); } } template inline void _clearSparseSuffixArray(T &, std::false_type const &) {} template inline void _clearSparseSuffixArray(T & dbIndex, std::true_type const &) { // reverse index does not require sampled suffix array, but its size :| clear(getFibre(getFibre(getFibre(dbIndex, FibreSA()), FibreSparseString()), FibreValues())); clear(getFibre(getFibre(getFibre(dbIndex, FibreSA()), FibreSparseString()), FibreIndicators())); } // -------------------------------------------------------------------------- // Function generateIndexAndDump() // -------------------------------------------------------------------------- #ifdef _OPENMP #define TID omp_get_thread_num() #else #define TID 0 #endif template inline void generateIndexAndDump(StringSet & seqs, LambdaIndexerOptions const & options, BlastProgramSelector

const &, TRedAlph_ const &, Tag const &) { using TTransSeqs = TCDStringSet>>; using TRedAlph = RedAlph; // ensures == Dna5 for BlastN using TRedSeqVirt = ModifiedString, Alloc<>>, ModView,TRedAlph>>>; using TRedSeqsVirt = StringSet>>; static bool constexpr indexIsFM = std::is_same >::value || std::is_same >::value; static bool constexpr alphReduction = !std::is_same, TRedAlph>::value; using TRedSeqs = typename std::conditional< !alphReduction, TTransSeqs, // owner TRedSeqsVirt>::type; // modview using TRedSeqsACT = typename std::conditional< !alphReduction, TTransSeqs &, // reference to owner TRedSeqsVirt>::type; // modview using TDbIndex = Index; using TFullFibre = typename std::conditional::type; static bool constexpr hasProgress = std::is_same::value; // Generate Index if (!hasProgress) myPrint(options, 1, "Generating Index..."); double s = sysTime(); // std::cerr << "indexIsFM: " << int(indexIsFM) << std::endl; // FM-Index needs reverse input if (indexIsFM && std::is_same, Fwd>::value) reverse(seqs); TRedSeqsACT redSubjSeqs(seqs); TDbIndex dbIndex(redSubjSeqs); // instantiate SA indexCreateProgress(dbIndex, TFullFibre(), options); // since we dumped unreduced sequences before and reduced sequences are // only "virtual" we clear them before dump std::decay_t tmpLimits; if (alphReduction || !indexIsFM) // fm indexes don't dump them anyways { if (indexIsFM && (std::is_same, Rev>::value)) { // these makes redSubjSeqs appear empty and deactivates output swap(tmpLimits, redSubjSeqs.limits); _clearSparseSuffixArray(dbIndex, std::integral_constant{}); } else { clear(seqs); clear(redSubjSeqs.limits); // limits part is not lightweight } } double e = sysTime() - s; if (!hasProgress) { myPrint(options, 1, " done.\n"); myPrint(options, 2, "Runtime: ", e, "s \n\n"); } // Dump Index myPrint(options, 1, "Writing Index to disk..."); s = sysTime(); std::string path = options.indexDir + "/index"; if (std::is_same, Rev>::value) path += ".rev"; save(dbIndex, path.c_str()); e = sysTime() - s; myPrint(options, 1, " done.\n"); myPrint(options, 2, "Runtime: ", e, "s \n"); if (alphReduction && indexIsFM && (std::is_same, Rev>::value)) { // we swap back so that the sequences can be used for building the second index swap(tmpLimits, redSubjSeqs.limits); // redSubjSeqs.limits = tmpLimits; } } #endif // header guard lambda-lambda-v2.0.1/src/mkindex_misc.hpp000066400000000000000000000345661445553061700203400ustar00rootroot00000000000000// ========================================================================== // lambda // ========================================================================== // Copyright (c) 2013-2019, Hannes Hauswedell

// Copyright (c) 2016-2019, Knut Reinert and Freie Universität Berlin // All rights reserved. // // This file is part of Lambda. // // Lambda is Free Software: you can redistribute it and/or modify it // under the terms found in the LICENSE[.md|.rst] file distributed // together with this file. // // Lambda is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // ========================================================================== // lambda_indexer_misc.hpp: misc stuff for indexer // ========================================================================== #ifndef LAMBDA_INDEXER_MISC_HPP_ #define LAMBDA_INDEXER_MISC_HPP_ template void getCwd(TString & string) { char cwd[1000]; #ifdef PLATFORM_WINDOWS _getcwd(cwd, 1000); #else getcwd(cwd, 1000); #endif assign(string, cwd); } template bool setEnv(TString const & key, TValue & value) { #ifdef PLATFORM_WINDOWS return !_putenv_s(toCString(key), toCString(value)); #else return !setenv(toCString(key), toCString(value), true); #endif } // ---------------------------------------------------------------------------- // Class ComparisonCounter // ---------------------------------------------------------------------------- #if 0 // why is this still here? template struct ComparisonCounter; // no counting template struct ComparisonCounter { uint64_t _comparisons = 0; uint64_t _expectedComparisons = 0; uint64_t _lastPercent = 0; ComparisonCounter(TText const &, uint64_t expectedComparisons = 0u) { (void)expectedComparisons; } // may be constexpr in c++14 inline void inc() const {} }; // every thread counts #ifdef _OPENMP template struct ComparisonCounter #else template struct ComparisonCounter #endif { uint64_t _comparisons = 0; uint64_t _expectedComparisons = 0; // uint64_t _twoPercent = 0; uint64_t _lastPercent = 0; uint64_t _checkEveryNHits = 1; ComparisonCounter(TText const & text, uint64_t expectedComparisons = 0u) { if (expectedComparisons == 0) { uint64_t l = length(concat(text)); _expectedComparisons = 1.2 * double(l) * std::log(l) / std::log(2); } else _expectedComparisons = expectedComparisons; // _twoPercent = _expectedComparisons / 50; _comparisons = 0; _lastPercent = 0; while ((_checkEveryNHits << 1) < (_expectedComparisons / 100)) _checkEveryNHits <<= 1; } inline void inc() { uint64_t comp = ++_comparisons; // it is not important that the henceforth _comparisons be actually // the same value (might not be due to SMP) // progress reporting if (comp & _checkEveryNHits) { uint64_t curPerc = comp * 100 / _expectedComparisons; if (curPerc < 100) printProgressBar(_lastPercent, curPerc); } } }; // only one thread counts #ifdef _OPENMP template struct ComparisonCounter { uint64_t _comparisons = 0; uint64_t _expectedComparisons = 0; // uint64_t _twoPercent = 0; uint64_t _lastPercent = 0; uint64_t _checkEveryNHits = 1; ComparisonCounter(TText const & text, uint64_t expectedComparisons = 0u) { if (expectedComparisons == 0) { uint64_t l = length(concat(text)); _expectedComparisons = 1.2 * double(l) * std::log(l) / std::log(2) / omp_get_max_threads(); } else _expectedComparisons = expectedComparisons; // _twoPercent = _expectedComparisons / 50; // _comparisons = 0; while ((_checkEveryNHits << 1) < (_expectedComparisons / 100)) _checkEveryNHits <<= 1; } inline void inc() { if (omp_get_thread_num() == 0) // only one thread counts { uint64_t comp = ++_comparisons; // progress reporting if (comp & _checkEveryNHits) { uint64_t curPerc = comp * 100 / _expectedComparisons; if (curPerc < 100) printProgressBar(_lastPercent, curPerc); } } } }; #endif #endif // ---------------------------------------------------------------------------- // function _readMappingFileNCBI // ---------------------------------------------------------------------------- template void _readMappingFileUniProt(TInputIterator & fit, TStaxIDs & sTaxIds, std::vector & taxIdIsPresent, std::unordered_map const & accToIdRank) { // skip line with headers skipLine(fit); //TODO this is too slow, investigate whether its the lookup or the allocs std::string acc; std::string nextColumn; while (!atEnd(fit)) { clear(acc); clear(nextColumn); // read accession number readUntil(acc, fit, IsBlank()); // skip whitespace skipUntil(fit, IsAlphaNum()); // read accession number readUntil(nextColumn, fit, IsBlank()); if ((nextColumn == "NCBI_TaxID") && (accToIdRank.count(acc) == 1)) { auto & sTaxIdV = sTaxIds[accToIdRank.at(acc)]; // skip whitespace skipUntil(fit, IsAlphaNum()); // read tax id clear(nextColumn); readUntil(nextColumn, fit, IsWhitespace()); uint32_t idNum = 0; try { idNum = lexicalCast(nextColumn); } catch (BadLexicalCast const & badCast) { throw std::runtime_error( std::string("Error: Expected taxonomical ID, but got something I couldn't read: ") + std::string(badCast.what()) + "\n"); } appendValue(sTaxIdV, idNum); if (taxIdIsPresent.size() < idNum + 1) taxIdIsPresent.resize(idNum + 1); taxIdIsPresent[idNum] = true; } skipLine(fit); } } template void _readMappingFileNCBI(TInputIterator & fit, TStaxIDs & sTaxIds, std::vector & taxIdIsPresent, std::unordered_map const & accToIdRank) { // skip line with headers skipLine(fit); //TODO this is too slow, investigate whether its the lookup or the allocs std::string buf; while (!atEnd(fit)) { clear(buf); // read accession number readUntil(buf, fit, IsBlank()); // we have a sequence with this ID in our database if (accToIdRank.count(buf) == 1) { auto & sTaxIdV = sTaxIds[accToIdRank.at(buf)]; // skip whitespace skipUntil(fit, IsAlphaNum()); // skip versioned acc skipUntil(fit, IsBlank()); // skip whitespace skipUntil(fit, IsAlphaNum()); // read tax id clear(buf); readUntil(buf, fit, IsBlank()); uint32_t idNum = 0; try { idNum = lexicalCast(buf); } catch (BadLexicalCast const & badCast) { throw std::runtime_error( std::string("Error: Expected taxonomical ID, but got something I couldn't read: ") + std::string(badCast.what()) + "\n"); } appendValue(sTaxIdV, idNum); if (taxIdIsPresent.size() < idNum + 1) taxIdIsPresent.resize(idNum + 1); taxIdIsPresent[idNum] = true; } skipLine(fit); } } /// REGEX version is 5x slower, but verifies file format correctness // template // inline int // _readMappingFileNCBI2(TInputIterator & fit, // TStaxIDs & sTaxIds, // std::vector & taxIdIsPresent, // std::unordered_map const & accToIdRank) // { // // skip line with headers // skipLine(fit); // // std::string buf; // // ACC ACC.VERSION taxid gi // std::regex const lineRE{"(\\w+)\\s(\\w+\\.?\\d*)\\s(\\d+)\\s(\\d+)"}; // std::smatch baseMatch; // uint32_t idNum = 0; // StringSet // // while (!atEnd(fit)) // { // clear(buf); // // read line // readLine(buf, fit); // // if (std::regex_match(buf, baseMatch, lineRE) && (baseMatch.size() == 5)) // { // // we have a sequence with this ID in our database // if (accToIdRank.count(baseMatch[1]) == 1) // { // auto & sTaxIdV = sTaxIds[accToIdRank.at(baseMatch[1])]; // idNum = 0; // try // { // idNum = lexicalCast(std::string(baseMatch[3])); // } // catch (BadLexicalCast const & badCast) // { // std::cerr << "Error: Expected taxonomical ID, but got something I couldn't read: " // << badCast.what() << "\n"; // return -1; // } // // appendValue(sTaxIdV, idNum); // if (taxIdIsPresent.size() < idNum + 1) // taxIdIsPresent.resize(idNum + 1); // taxIdIsPresent[idNum] = true; // } // // } else // { // std::cerr << "ERROR: The following line in the mapping file, did not satisfy the regex!\n" // << " " << buf << "\n\n"; // return -1; // } // } // // return 0; // } // ============================================================================ // Parallel BWT construction // ============================================================================ template void createRankDictionaryProgress(LF, TSpec, TConfig> & lf, TOtherText const & text, TSA const & sa, TCallback && progress) { typedef typename Value::Type TSAValue; typedef typename Size::Type TSize; // Resize the RankDictionary. TSize seqNum = countSequences(text); TSize totalLen = lengthSum(text); resize(lf.sentinels, seqNum + totalLen, Exact()); resize(lf.bwt, seqNum + totalLen, Exact()); // Fill the sentinel positions (they are all at the beginning of the bwt). for (TSize i = 0; i < seqNum; ++i) { if (length(text[seqNum - (i + 1)]) > 0) { setValue(lf.bwt, i, back(text[seqNum - (i + 1)])); setValue(lf.sentinels, i, false); } } /* Compute the rest of the bwt.*/ // align the chunk_size to underlying word boundaries to prevent parallel write to word spanning chunk boundary uint64_t const chunkSize = std::max(static_cast((length(sa) / omp_get_max_threads() / 64ull) * 64ull), uint64_t{1}); uint64_t const twoPercent = std::max(chunkSize / 50, uint64_t{1}); // the 0th thread might get an additional chunk because of the above alignment so we count from the 1st instead uint32_t const countThreadID = omp_get_max_threads() > 1 ? 1 : 0; SEQAN_OMP_PRAGMA(parallel for schedule(static, chunkSize)) for (TSize i = 0; i < length(sa); ++i) { TSAValue pos; // = SA[i]; posLocalize(pos, sa[i], stringSetLimits(text)); if (getSeqOffset(pos) != 0) { setValue(lf.bwt, i + seqNum, getValue(getValue(text, getSeqNo(pos)), getSeqOffset(pos) - 1)); setValue(lf.sentinels, i + seqNum, false); } else { setValue(lf.bwt, i + seqNum, lf.sentinelSubstitute); setValue(lf.sentinels, i + seqNum, true); } if (((static_cast(omp_get_thread_num()) == countThreadID) && ((i % chunkSize) % twoPercent == 0))) progress(((i % chunkSize) / twoPercent) * 2); } // Update all ranks. updateRanks(lf.bwt); // Update the auxiliary RankDictionary of sentinel positions. updateRanks(lf.sentinels); } template void createLFProgress(LF & lf, TOtherText const & text, TSA const & sa, TCallback && progress) { typedef LF TLF; typedef typename Value::Type TValue; typedef typename Size::Type TSize; // Clear assuming undefined state. clear(lf); // Compute prefix sum. prefixSums(lf.sums, text); // Choose the sentinel substitute. _setSentinelSubstitute(lf); // Create and index BWT bwt for rank queries. createRankDictionaryProgress(lf, text, sa, progress); // Add sentinels to prefix sum. TSize sentinelsCount = countSequences(text); for (TSize i = 0; i < length(lf.sums); ++i) lf.sums[i] += sentinelsCount; progress(100); } #endif // LAMBDA_INDEXER_MISC_HPP_ lambda-lambda-v2.0.1/src/mkindex_options.hpp000066400000000000000000000352521445553061700210710ustar00rootroot00000000000000// ========================================================================== // lambda // ========================================================================== // Copyright (c) 2013-2019, Hannes Hauswedell

// Copyright (c) 2016-2019, Knut Reinert and Freie Universität Berlin // All rights reserved. // // This file is part of Lambda. // // Lambda is Free Software: you can redistribute it and/or modify it // under the terms found in the LICENSE[.md|.rst] file distributed // together with this file. // // Lambda is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // ========================================================================== // options.h: contains the options and argument parser // ========================================================================== #ifndef SEQAN_LAMBDA_OPTIONS_H_ #define SEQAN_LAMBDA_OPTIONS_H_ #include #include #include #include #include #include #include #include using namespace seqan; // -------------------------------------------------------------------------- // Class LambdaIndexerOptions // -------------------------------------------------------------------------- struct LambdaIndexerOptions : public SharedOptions { std::string dbFile; // std::string segFile = ""; std::string algo = ""; std::string accToTaxMapFile; std::string taxDumpDir; bool truncateIDs; int alphReduction; LambdaIndexerOptions() : SharedOptions() {} }; // ========================================================================== // Functions // ========================================================================== // INDEXER ArgumentParser::ParseResult parseCommandLine(LambdaIndexerOptions & options, int argc, char const ** argv) { std::string programName = "lambda2 " + std::string(argv[0]); // this is important for option handling: if (std::string(argv[0]) == "mkindexn") options.blastProgram = BlastProgram::BLASTN; ArgumentParser parser(programName); setShortDescription(parser, "the Local Aligner for Massive Biological DatA"); // Define usage line and long description. addUsageLine(parser, "[\\fIOPTIONS\\fP] \\-d DATABASE.fasta [-i INDEX.lambda]\\fP"); sharedSetup(parser); #ifndef SEQAN_DISABLE_VERSION_CHECK // version checker initiated by top-level arg parser setDefaultValue(parser, "version-check", "0"); hideOption(parser, "version-check"); #endif addOption(parser, ArgParseOption("v", "verbosity", "Display more/less diagnostic output during operation: 0 [only errors]; 1 [default]; 2 " "[+run-time, options and statistics].", ArgParseArgument::INTEGER)); setDefaultValue(parser, "verbosity", "1"); setMinValue(parser, "verbosity", "0"); setMaxValue(parser, "verbosity", "2"); addDescription(parser, "This is the indexer_binary for creating lambda-compatible databases."); addSection(parser, "Input Options"); addOption(parser, ArgParseOption("d", "database", "Database sequences.", ArgParseArgument::INPUT_FILE, "IN")); setRequired(parser, "database"); setValidValues(parser, "database", getFileExtensions(SeqFileIn())); addOption(parser, ArgParseOption("m", "acc-tax-map", "An NCBI or UniProt accession-to-taxid mapping file. Download from " "ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/accession2taxid/ or " "ftp://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/idmapping/ .", ArgParseArgument::INPUT_FILE)); CharString taxExtensions = "accession2taxid dat"; #ifdef SEQAN_HAS_ZLIB taxExtensions+= " accession2taxid.gz"; taxExtensions+= " accession2taxid.bgzf"; taxExtensions+= " dat.gz"; taxExtensions+= " dat.bgzf"; #endif #ifdef SEQAN_HAS_BZIP2 taxExtensions+= " accession2taxid.bz2"; taxExtensions+= " dat.bz2"; #endif setValidValues(parser, "acc-tax-map", toCString(taxExtensions)); addOption(parser, ArgParseOption("x", "tax-dump-dir", "A directory that contains nodes.dmp and names.dmp; unzipped from " "ftp://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdump.tar.gz", ArgParseArgument::INPUT_DIRECTORY)); addSection(parser, "Output Options"); addOption(parser, ArgParseOption("i", "index", "The output directory for the index files (defaults to \"DATABASE.lambda\").", ArgParseArgument::OUTPUT_DIRECTORY, "OUT")); setValidValues(parser, "index", ".lambda"); addOption(parser, ArgParseOption("", "db-index-type", "Suffix array or full-text minute space.", ArgParseArgument::STRING, "type")); #ifdef LAMBDA_LEGACY_PATHS setValidValues(parser, "db-index-type", "sa fm bifm"); #else setValidValues(parser, "db-index-type", "fm bifm"); #endif setDefaultValue(parser, "db-index-type", "fm"); setAdvanced(parser, "db-index-type"); addOption(parser, ArgParseOption("", "truncate-ids", "Truncate IDs at first whitespace. This saves a lot of space and is irrelevant for all LAMBDA output formats " "other than BLAST Pairwise (.m0).", ArgParseArgument::BOOL)); setDefaultValue(parser, "truncate-ids", "on"); if (options.blastProgram != BlastProgram::BLASTN) { addSection(parser, "Alphabets and Translation"); addOption(parser, ArgParseOption("a", "input-alphabet", "Alphabet of the database sequences (specify to override auto-detection); " "if input is Dna, it will be translated.", ArgParseArgument::STRING)); setValidValues(parser, "input-alphabet", "auto dna5 aminoacid"); setDefaultValue(parser, "input-alphabet", "auto"); setAdvanced(parser, "input-alphabet"); addOption(parser, ArgParseOption("g", "genetic-code", "The translation table to use if input is Dna. See " "https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c" " for ids (default is generic).", ArgParseArgument::INTEGER)); setDefaultValue(parser, "genetic-code", "1"); setAdvanced(parser, "genetic-code"); addOption(parser, ArgParseOption("r", "alphabet-reduction", "Alphabet Reduction for seeding phase.", ArgParseArgument::STRING, "STR")); setValidValues(parser, "alphabet-reduction", "none murphy10"); setDefaultValue(parser, "alphabet-reduction", "murphy10"); setAdvanced(parser, "alphabet-reduction"); } addSection(parser, "Algorithm"); addOption(parser, ArgParseOption("", "algorithm", "Algorithm for SA construction (also used for FM; see Memory " " Requirements below!).", ArgParseArgument::STRING, "STR")); setValidValues(parser, "algorithm", "mergesort quicksortbuckets quicksort radixsort skew7ext"); setDefaultValue(parser, "algorithm", "radixsort"); setAdvanced(parser, "algorithm"); #ifdef _OPENMP addOption(parser, ArgParseOption("t", "threads", "number of threads to run concurrently (ignored if a == skew7ext).", ArgParseArgument::INTEGER)); setDefaultValue(parser, "threads", std::to_string(omp_get_max_threads())); setMinValue(parser, "threads", "1"); setMaxValue(parser, "threads", std::to_string(omp_get_max_threads() * 10)); #else addOption(parser, ArgParseOption("t", "threads", "LAMBDA BUILT WITHOUT OPENMP; setting this option has no effect.", ArgParseArgument::INTEGER)); setDefaultValue(parser, "threads", "1"); setMinValue(parser, "threads", "1"); setMaxValue(parser, "threads", "1"); #endif setAdvanced(parser, "threads"); std::string tmpdir; getCwd(tmpdir); addOption(parser, ArgParseOption("", "tmp-dir", "temporary directory used by skew, defaults to working directory.", ArgParseArgument::OUTPUT_DIRECTORY, "STR")); setDefaultValue(parser, "tmp-dir", tmpdir); setAdvanced(parser, "tmp-dir"); //TODO move manual / auto-detect // addTextSection(parser, "Memory requirements and Speed"); // addText(parser, "\033[1mmergesort [RAM]:\033[0m" // "\t14 * size(dbSeqs)"); // addText(parser, "\033[1mmergesort [speed]:\033[0m" // "\tup to t threads"); // addText(parser, "\033[1mquicksort and quicksortbuckets [RAM]:\033[0m" // "\t7 * size(dbSeqs)"); // addText(parser, "\033[1mquicksort [speed]:\033[0m" // "\t1-2 threads"); // addText(parser, "\033[1mquicksortbuckets [speed]:\033[0m" // "\t1-2 threads for initial sort, up to t for buckets"); // addText(parser, "\033[1mskew7ext [RAM]:\033[0m" // "\t2 * size(dbSeqs)"); // addText(parser, "\033[1mskew7ext [DISK]:\033[0m" // "\t30 * size(dbSeqs)"); // addText(parser, "\033[1mskew7ext [speed]:\033[0m" // "\tnot parallelized"); // addText(parser, "size(dbSeqs) refers to the total " // "sequence length and does not include IDs (which can " // "account for >50% of the file size for protein databases). " // "The space is the maximum obseverved factor, for many " // "databases the factor is smaller." ); // addText(parser, "Use mergesort if you have enough memory! If not, you will " // "probably want to use skew. For small databases and only a " // "few cores the quicksorts might be a good tradeoff. " // "mergesort and quicksortbuckets provide a rough progress " // "estimate."); // // addText(parser, "Disk space required is in TMPDIR which you can set as " // // "an environment variable."); addTextSection(parser, "Remarks"); addText(parser, "Please see the wiki () for more information on which indexes" " to chose and which algorithms to pick."); addText(parser, "Note that the indexes created are binary and not compatible between different CPU endiannesses. " "Also the on-disk format is still subject to change between Lambda versions."); // Parse command line. ArgumentParser::ParseResult res = parse(parser, argc, argv); // Only extract options if the program will continue after parseCommandLine() if (res != ArgumentParser::PARSE_OK) return res; // Options shared by lambda and its indexer res = parseCommandLineShared(options, parser); if (res != ArgumentParser::PARSE_OK) return res; std::string buffer; int buf = 0; getOptionValue(buffer, parser, "db-index-type"); if (buffer == "sa") options.dbIndexType = DbIndexType::SUFFIX_ARRAY; else if (buffer == "bifm") options.dbIndexType = DbIndexType::BI_FM_INDEX; else options.dbIndexType = DbIndexType::FM_INDEX; if (options.blastProgram == BlastProgram::BLASTN) { options.subjOrigAlphabet = AlphabetEnum::DNA5; options.transAlphabet = AlphabetEnum::DNA5; options.reducedAlphabet = AlphabetEnum::DNA5; } else { getOptionValue(buffer, parser, "input-alphabet"); if (buffer == "auto") options.subjOrigAlphabet = AlphabetEnum::DNA4; else if (buffer == "dna5") options.subjOrigAlphabet = AlphabetEnum::DNA5; else if (buffer == "aminoacid") options.subjOrigAlphabet = AlphabetEnum::AMINO_ACID; else throw std::invalid_argument("ERROR: Invalid argument to --input-alphabet\n"); getOptionValue(buffer, parser, "alphabet-reduction"); if (buffer == "murphy10") { options.reducedAlphabet = AlphabetEnum::MURPHY10; //TODO deprecate: options.alphReduction = 2; } else { options.reducedAlphabet = AlphabetEnum::AMINO_ACID; options.alphReduction = 0; } getOptionValue(buf, parser, "genetic-code"); switch (buf) { case 1: case 2: case 3: case 4: case 5: case 6: case 9: case 10: case 11: case 12: case 13: case 14: case 15: case 16: case 21: case 22: case 23: case 24 : case 25: options.geneticCode = static_cast(buf); break; default: std::cerr << "Invalid genetic code. See trans_table vars at " << "https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c" << std::endl; return ArgumentParser::PARSE_ERROR; } } getOptionValue(options.algo, parser, "algorithm"); if ((options.algo == "mergesort") || (options.algo == "quicksort") || (options.algo == "quicksortbuckets")) { std::cerr << "WARNING: " << options.algo << " tag is deprecated and superseded by \"radixsort\", please " << "adapt your program calls.\n"; options.algo = "radixsort"; } getOptionValue(tmpdir, parser, "tmp-dir"); setEnv("TMPDIR", tmpdir); getOptionValue(options.truncateIDs, parser, "truncate-ids"); getOptionValue(options.dbFile, parser, "database"); if (isSet(parser, "index")) getOptionValue(options.indexDir, parser, "index"); else options.indexDir = options.dbFile + ".lambda"; if (fileExists(options.indexDir.c_str())) { std::cerr << "ERROR: An output directory already exists at " << options.indexDir << '\n' << "Remove it, or choose a different location.\n"; return ArgumentParser::PARSE_ERROR; } else { if (mkdir(options.indexDir.c_str(), S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH)) { std::cerr << "ERROR: Cannot create output directory at " << options.indexDir << '\n';; return ArgumentParser::PARSE_ERROR; } } getOptionValue(options.accToTaxMapFile, parser, "acc-tax-map"); options.hasSTaxIds = (options.accToTaxMapFile != ""); getOptionValue(options.taxDumpDir, parser, "tax-dump-dir"); if (!options.taxDumpDir.empty()) { if (!options.hasSTaxIds) { std::cerr << "ERROR: There is no point in inclduing a taxonomic tree in the index, if\n" " you don't also include taxonomic IDs for your sequences.\n"; return ArgumentParser::PARSE_ERROR; } //TODO check existance of directory } return ArgumentParser::PARSE_OK; } #endif // header guard lambda-lambda-v2.0.1/src/mkindex_saca.hpp000066400000000000000000000464741445553061700203150ustar00rootroot00000000000000// ========================================================================== // radix_inplace.h // ========================================================================== // Copyright (c) 2006-2015, Knut Reinert, FU Berlin // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright // notice, this list of conditions and the following disclaimer in the // documentation and/or other materials provided with the distribution. // * Neither the name of Knut Reinert or the FU Berlin nor the names of // its contributors may be used to endorse or promote products derived // from this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY // OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH // DAMAGE. // // ========================================================================== // Author: Sascha Meiers // Author: Hannes Hauswedell // ========================================================================== // The Radix Sort functions are adapted from Martin Frith's "last" // tool (last.cbrc.jp), but he himself adapted the code from McIlroy, Bostic: // "Engineering radix sort" as well as Karkkainen, Rantala: "Engineering radix // sort for strings". Thanks to Martin for showing this to me. // ============================================================================ #ifndef CORE_INCLUDE_SEQAN_INDEX_RADIX_INPLACE_H_ #define CORE_INCLUDE_SEQAN_INDEX_RADIX_INPLACE_H_ #if defined(_OPENMP) && defined(__GNUC__) && !defined(__clang__) #include #define SORT __gnu_parallel::sort #else #define SORT std::sort #endif //TODO(h-2): for clang use std::experimenta::parallel if available namespace seqan { // ========================================================================== // Tags // ========================================================================== struct RadixSortSACreateTag {}; // ========================================================================== // Metafunctions // ========================================================================== template struct Fibre >, FibreTempSA> { typedef Index > TIndex_; typedef typename SAValue::Type TSAValue_; typedef String::Type> Type; }; template struct DefaultIndexCreator >, FibreSA> { typedef RadixSortSACreateTag Type; }; template struct Fibre >, FibreTempSA> { typedef Index > TIndex_; typedef typename SAValue::Type TSAValue_; typedef String::Type> Type; }; template < typename TText, typename TConfig> struct DefaultIndexCreator >, FibreSA> { typedef RadixSortSACreateTag Type; }; // ============================================================================ // Classes // ============================================================================ // ---------------------------------------------------------------------------- // struct RadixTextAccessor [String] // ---------------------------------------------------------------------------- template < typename TSAValue, // input typename TString, // string object that is referenced typename TSpec = void, // Suffix modifier typename TSize = unsigned> // return type (ordValue) struct RadixTextAccessor; /* * NOTE: * These accessors cannot resolve the correct order of out-of-bound-positions, * i.e. when suffixes are equal up to their last character. * All these cases get collected in a 0 bucket. * The InplaceRadixSorter takes care of that by calling a special * sort function on the 0 buckets. */ template struct RadixTextAccessor : public std::unary_function { TString const & text; typename Size::Type const L; RadixTextAccessor(TString const &str) : text(str), L(length(str)) {} template inline TSize operator()(TSAValue const &x, TSize2 depth) const { typename Size::Type pos = x + depth; if (pos >= L) return 0; TSize ret = ordValue(text[pos]); return ret+1; } }; // ---------------------------------------------------------------------------- // struct RadixTextAccessor [StringSet] // ---------------------------------------------------------------------------- template struct RadixTextAccessor, void, TSize> : public std::unary_function { StringSet const & text; String::Type> L; RadixTextAccessor(StringSet const &str) : text(str) { resize(L, length(text), Exact()); for(typename Size::Type i = 0; i < length(text); ++i) L[i] = length(text[i]); } template inline TSize operator()(TSAValue const &x, TSize2 depth) const { typename Size::Type pos = getSeqOffset(x) + depth; typename Size::Type seq = getSeqNo(x); if (pos >= L[seq]) return 0; TSize ret = ordValue(text[seq][pos]); return ret+1; } }; // ---------------------------------------------------------------------------- // struct _ZeroBucketComparator [StringSet] // ---------------------------------------------------------------------------- // Functors to compare suffixes from 0 bucket (suffixes that are lex. equal) // ---------------------------------------------------------------------------- template struct _ZeroBucketComparator { TLimitsString const & limits; _ZeroBucketComparator(TLimitsString const & lim) : limits(lim) { /*std::cout << "limits: " << limits << std::endl;*/ } inline bool operator()(TSAValue const & a, TSAValue const & b) const { typename Size::Type lena = limits[getSeqNo(a)+1]-limits[getSeqNo(a)] - getSeqOffset(a); typename Size::Type lenb = limits[getSeqNo(b)+1]-limits[getSeqNo(b)] - getSeqOffset(b); if (lena == lenb) return getSeqNo(a) > getSeqNo(b); else return lena < lenb; } }; // ---------------------------------------------------------------------------- // struct _ZeroBucketComparator [String] // ---------------------------------------------------------------------------- template struct _ZeroBucketComparator { _ZeroBucketComparator(Nothing const &) {} _ZeroBucketComparator(Nothing &) {} inline bool operator()(TSAValue const & a, TSAValue const & b) const { return a > b; } }; // ---------------------------------------------------------------------------- // struct RadixSortContext_ // ---------------------------------------------------------------------------- template // alph size = ValueSize + 1 struct RadixSortContext_ { typedef typename StringSetLimits::Type TLimitsString; // "Nothing" for Strings typedef RadixTextAccessor TAccessFunctor; typedef _ZeroBucketComparator TOrderFunctor; typedef typename TAccessFunctor::result_type TOrdValue; static_assert(Q < 256, "Alphabet size must be smaller 256!"); //TODO really? static const unsigned ORACLESIZE = 256; TText const & text; TAccessFunctor textAccess; TOrderFunctor comp; TSize bucketSize[Q]; std::array bucketEnd; RadixSortContext_(TText const & t) : text(t), textAccess(t), comp(stringSetLimits(t)) {} }; template inline void clear(RadixSortContext_ & context) { memset(context.bucketSize, 0, sizeof(TSize)*Q); } // ========================================================================== // Functions // ========================================================================== // ---------------------------------------------------------------------------- // Function _radixSort() // ---------------------------------------------------------------------------- template inline void _radixSort(std::vector > & stack, RadixSortContext_ & context, std::tuple const & item) { typedef RadixSortContext_ TContext; typedef typename TContext::TOrdValue TOrdValue; clear(context); // get bucket sizes (i.e. letter counts): // The intermediate oracle array makes it faster (see "Engineering // Radix Sort for Strings" by J Karkkainen & T Rantala) for(TSAValue* i = std::get<0>(item); i < std::get<1>(item); /* noop */ ) { // buffer for the next chars TOrdValue oracle [TContext::ORACLESIZE]; TOrdValue* oracleEnd = oracle + std::min(static_cast(TContext::ORACLESIZE), static_cast(std::get<1>(item) - i)); for(TOrdValue* j = oracle; j < oracleEnd; ++j ) *j = context.textAccess(*i++, std::get<2>(item)); for(TOrdValue* j = oracle; j < oracleEnd; ++j ) ++context.bucketSize[*j]; } // get bucket std::get<1>(item)s, and put buckets on the stack to sort within them later: // EDIT: 0 bucket is not sorted here, but later. TSize zeroBucketSize = context.bucketSize[0]; TSAValue* pos = std::get<0>(item) + context.bucketSize[0]; context.bucketEnd[0] = pos; for(unsigned i = 1; i < Q; ++i ) { TSAValue* nextPos = pos + context.bucketSize[i]; if (nextPos - pos > 1) stack.emplace_back(pos, nextPos, std::get<2>(item)+1); pos = nextPos; context.bucketEnd[i] = pos; } // permute items into the correct buckets: for(TSAValue* i = std::get<0>(item); i < std::get<1>(item); ) { TOrdValue subset; // unsigned is faster than uchar! TSAValue holdOut = *i; while(--context.bucketEnd[subset = context.textAccess(holdOut, std::get<2>(item))] > i ) std::swap(*context.bucketEnd[subset], holdOut); *i = holdOut; i += context.bucketSize[subset]; context.bucketSize[subset] = 0; // reset it so we can reuse it //TODO check if we need this, since we clear already! } // sort the 0 bucket using std::sort if(zeroBucketSize > 1) std::sort(std::get<0>(item), std::get<0>(item) + zeroBucketSize, context.comp); } // ---------------------------------------------------------------------------- // Function _radixSortWrapper() // ---------------------------------------------------------------------------- // switch to quicksort if the interval is sufficiently small //TODO: play with this value #ifndef _RADIX_SORT_SWITCH_TO_QUICKSORT_AT #define _RADIX_SORT_SWITCH_TO_QUICKSORT_AT 100 #endif template inline void _radixSortWrapper(std::vector > & stack, RadixSortContext_ & context, std::tuple const & i) { if (std::get<1>(i) - std::get<0>(i) < _RADIX_SORT_SWITCH_TO_QUICKSORT_AT) std::sort(std::get<0>(i), std::get<1>(i), SuffixLess_(context.text, std::get<2>(i))); else if (std::get<1>(i) - std::get<0>(i) >= 2) _radixSort(stack, context, i); } // ---------------------------------------------------------------------------- // Function inplaceFullRadixSort() [default] // ---------------------------------------------------------------------------- #ifdef _OPENMP #define N_THREADS omp_get_max_threads() #define I_THREAD omp_get_thread_num() #define MIN_BUCKETS 512 #else #define N_THREADS 1 #define I_THREAD 0 #define MIN_BUCKETS 100 // for somewhat decent progress reporting #endif // TODO: serial version // TODO: possibly quicksort directly on buckets in third steps, if buckets have been made small enough // TODO: double-check the effects of the new "secondStep" template void inPlaceRadixSort(TSA & sa, TText const & text, TLambda && progressCallback = [] (unsigned) mutable {}) { typedef typename Value::Type>::Type TAlphabet; typedef typename Value::Type TSAValue; typedef typename Size::Type TSize; typedef std::tuple TItem; static const unsigned SIGMA = static_cast(ValueSize::VALUE) + 1; SEQAN_ASSERT_LT_MSG(SIGMA, 1000u, "Attention: inplace radix sort is not suited for large alphabets"); typedef RadixSortContext_ TContext; if (empty(sa)) return; // otherwise access sa[0] fails /* stacks */ std::vector firstStack; firstStack.reserve(SIGMA); std::vector secondStack; secondStack.reserve(1000); std::vector> lStack(N_THREADS); // one per thread // reduce memory allocations in threads by reserving space for (auto & stack : lStack) stack.reserve(length(sa) / 1000); /* contexts */ TContext firstSecondContext{text}; std::vector lContext(N_THREADS, TContext{text}); // FIRST STEP // sort by the first character _radixSortWrapper(firstStack, firstSecondContext, TItem(&sa[0], &sa[0]+length(sa), 0)); progressCallback(5); // 5% progress guess after first char // SECOND STEP // sort by next n characters until the stack has reached a good size for distinct parallelization // NOTE that for small alphabets in combination with small texts, this step might sort the entire SA while (!firstStack.empty()) { SEQAN_OMP_PRAGMA(parallel for schedule(dynamic)) for (unsigned j = 0; j < length(firstStack); ++j) _radixSortWrapper(lStack[I_THREAD], lContext[I_THREAD], firstStack[j]); // merge local stacks and clear for next round or next step for (auto & stack : lStack) { secondStack.insert(secondStack.end(), stack.begin(), stack.end()); stack.clear(); } // sort the stack by interval size so that large intervals are moved to front // this improves parallelization of dynamic schedule SORT(secondStack.begin(), secondStack.end(), [] (TItem const & l, TItem const & r) { return (std::get<1>(l) - std::get<0>(l)) > (std::get<1>(r) - std::get<0>(r)); }); // check if largest interval "fits" in one thread efficiently // this works independently of alphabet size and just depends on the data // MIN_BUCKETS check additionally guarantees a degree of granularity if ((uint64_t(std::get<1>(secondStack.front()) - std::get<0>(secondStack.front())) <= (length(sa) / N_THREADS)) && (secondStack.size() >= MIN_BUCKETS)) break; // switch buffers for next round firstStack.clear(); std::swap(firstStack, secondStack); } progressCallback(10); // 10% progress guess after second step // THIRD STEP // sort the remaining intervals distinctly; here no locking and syncing is required anymore SEQAN_OMP_PRAGMA(parallel for schedule(dynamic)) for (unsigned j = 0; j < secondStack.size(); ++j) { lStack[I_THREAD].push_back(secondStack[j]); while (!lStack[I_THREAD].empty()) { TItem i = lStack[I_THREAD].back(); lStack[I_THREAD].pop_back(); _radixSortWrapper(lStack[I_THREAD], lContext[I_THREAD], i); } // progressCallback must be thread safe and cope with smaller numbers after big numbers // remaining characters alloted 90% of total progress progressCallback(10 + (j * 90) / secondStack.size()); } progressCallback(100); // done } // ---------------------------------------------------------------------------- // Function createSuffixArray // ---------------------------------------------------------------------------- template inline void createSuffixArray(TSA & SA, StringSet const & s, RadixSortSACreateTag const &, TLambda && progressCallback) { typedef typename Size::Type TSize; typedef typename Iterator::Type TIter; // 1. Fill suffix array with a permutation (the identity) TIter it = begin(SA, Standard()); for(unsigned j = 0; j < length(s); ++j) { TSize len = length(s[j]); for(TSize i = 0; i < len; ++i, ++it) *it = Pair(j, i); } // 2. Sort suffix array with inplace radix Sort inPlaceRadixSort(SA, s, progressCallback); } template inline void createSuffixArray(TSA & SA, StringSet const & s, RadixSortSACreateTag const &) { createSuffixArray(SA, s, RadixSortSACreateTag(), [] (unsigned) {}); } } #endif // #ifndef CORE_INCLUDE_SEQAN_INDEX_RADIX_INPLACE_H_ lambda-lambda-v2.0.1/src/search.hpp000066400000000000000000000541011445553061700171160ustar00rootroot00000000000000// ========================================================================== // lambda // ========================================================================== // Copyright (c) 2013-2019, Hannes Hauswedell

// Copyright (c) 2016-2019, Knut Reinert and Freie Universität Berlin // All rights reserved. // // This file is part of Lambda. // // Lambda is Free Software: you can redistribute it and/or modify it // under the terms found in the LICENSE[.md|.rst] file distributed // together with this file. // // Lambda is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // ========================================================================== // lambda.cpp: Main File for Lambda // ========================================================================== #include #include #include #include #include #include #include #include "shared_definitions.hpp" #include "shared_options.hpp" #include "shared_misc.hpp" #include "search_output.hpp" #include "search_options.hpp" #include "search_datastructures.hpp" #include "search_misc.hpp" #include "search_algo.hpp" // forwards void argConv0(LambdaOptions & options); //- template void argConv1(LambdaOptions & options, TOutFormat const & /**/, BlastTabularSpecSelector const &); //- template void argConv2(LambdaOptions & options, TOutFormat const & /**/, BlastTabularSpecSelector const &, BlastProgramSelector

const &); //- template void argConv3(LambdaOptions & options, TOutFormat const &, BlastTabularSpecSelector const &, BlastProgramSelector

const &, TRedAlph const &); //- template void argConv4(LambdaOptions & options, TOutFormat const & /**/, BlastTabularSpecSelector const &, BlastProgramSelector

const &, TRedAlph const & /**/, TScoreExtension const & /**/); //- template void realMain(LambdaOptions & options, TOutFormat const & /**/, BlastTabularSpecSelector const &, BlastProgramSelector

const &, TRedAlph const & /**/, TScoreExtension const & /**/); // -------------------------------------------------------------------------- // Function main() // -------------------------------------------------------------------------- int searchMain(int const argc, char const ** argv) { LambdaOptions options; seqan::ArgumentParser::ParseResult res = parseCommandLine(options, argc, argv); if (res != seqan::ArgumentParser::PARSE_OK) return res == seqan::ArgumentParser::PARSE_ERROR; #ifdef NDEBUG try { argConv0(options); } catch (std::bad_alloc const & e) { std::cerr << "\n\nERROR: Lambda ran out of memory :(\n" " You need to split your file into smaller segments or search against a smaller database.\n"; return -1; } catch (IndexException const & e) { std::cerr << "\n\nERROR: The following exception was thrown while reading the index:\n" << " \"" << e.what() << "\"\n" << " Make sure the directory exists and is readable; recreate the index and try again.\n" << " If the problem persists, report an issue at https://github.com/seqan/lambda/issues " << "and include this output, as well as the output of `lambda2 --version`, thanks!\n"; return -1; } catch (std::exception const & e) { std::cerr << "\n\nERROR: The following unspecified exception was thrown:\n" << " \"" << e.what() << "\"\n" << " If the problem persists, report an issue at https://github.com/seqan/lambda/issues " << "and include this output, as well as the output of `lambda2 --version`, thanks!\n"; return -1; } #else // In debug mode we don't catch the exceptions so that we get a backtrace from SeqAn's handler argConv0(options); #endif return 0; } // CONVERT Run-time options to compile-time Format-Type void argConv0(LambdaOptions & options) { myPrint(options, 1, "LAMBDA - the Local Aligner for Massive Biological DatA" "\n======================================================" "\nVersion ", SEQAN_APP_VERSION, "\n\n"); // Index myPrint(options, 1, "Reading index properties... "); readIndexOptions(options); myPrint(options, 1, "done.\n"); #ifndef LAMBDA_LEGACY_PATHS if (options.dbIndexType == DbIndexType::SUFFIX_ARRAY) throw IndexException("The index is of type suffix array, but support was removed." "Either rebuild lambda2 with '-DLAMBDA_LEGACY_PATHS=1' or re-create the index.\n"); #endif // LAMBDA_LEGACY_PATHS myPrint(options, 2, "Index properties\n" " type: ", _indexEnumToName(options.dbIndexType), "\n", " original alphabet: ", _alphabetEnumToName(options.subjOrigAlphabet), "\n"); if (_alphabetEnumToName(options.subjOrigAlphabet) == _alphabetEnumToName(options.transAlphabet)) { myPrint(options, 2, " translated alphabet: not translated\n"); if (options.geneticCode == 0) // use same geneticCode as Index, but index wasn't translated options.geneticCode = CANONICAL; } else { myPrint(options, 2, " translated alphabet: ", _alphabetEnumToName(options.transAlphabet), "\n"); myPrint(options, 2, " translation code: ", options.geneticCodeIndex, "\n"); if (options.geneticCode == 0) // use same geneticCode as Index { options.geneticCode = options.geneticCodeIndex; } else if (options.geneticCode != options.geneticCodeIndex) { std::cerr << "WARNING: The genetic code used when creating the index: " << options.geneticCodeIndex << "\n is not the same as now selected for the query sequences: " << options.geneticCode << "\n Are you sure this is what you want?\n"; } } if (_alphabetEnumToName(options.transAlphabet) == _alphabetEnumToName(options.reducedAlphabet)) { myPrint(options, 2, " reduced alphabet: not reduced\n"); } else { myPrint(options, 2, " reduced alphabet: ", _alphabetEnumToName(options.reducedAlphabet), "\n\n"); } if ((options.blastProgram == BlastProgram::BLASTN) && (options.reducedAlphabet != AlphabetEnum::DNA5)) { throw std::runtime_error("You are attempting a nucleotide search on a protein index." "Did you want to use 'lambda2 searchp' instead?"); } // query file if (options.qryOrigAlphabet == AlphabetEnum::DNA4) // means "auto", as dna4 not valid as argument to --query-alphabet { myPrint(options, 1, "Detecting query alphabet... "); options.qryOrigAlphabet = detectSeqFileAlphabet(options.queryFile); myPrint(options, 1, _alphabetEnumToName(options.qryOrigAlphabet), " detected.\n"); } // set blastProgram if (options.blastProgram == BlastProgram::UNKNOWN) { if ((options.transAlphabet == AlphabetEnum::DNA5) && (options.qryOrigAlphabet == AlphabetEnum::AMINO_ACID)) { throw IndexException("Query file is protein, but index is nucleotide. " "Recreate the index with 'lambda mkindexp'."); } else if ((options.transAlphabet == AlphabetEnum::DNA5) && (options.qryOrigAlphabet == AlphabetEnum::DNA5)) { options.blastProgram = BlastProgram::BLASTN; } else if (options.qryOrigAlphabet == AlphabetEnum::DNA5) // query will be translated { if (options.subjOrigAlphabet == options.transAlphabet) options.blastProgram = BlastProgram::BLASTX; else options.blastProgram = BlastProgram::TBLASTX; } else // query is aminoacid already { if (options.subjOrigAlphabet == options.transAlphabet) options.blastProgram = BlastProgram::BLASTP; else options.blastProgram = BlastProgram::TBLASTN; } } // some blastProgram-specific "late option modifiers" if (((options.blastProgram == BlastProgram::BLASTP) || (options.blastProgram == BlastProgram::TBLASTN)) && (!options.samBamTags[SamBamExtraTags<>::Q_AA_CIGAR])) options.samBamSeq = 0; // sizes checkRAM(options); // make sure output is writable int fd = open(toCString(options.output), O_WRONLY | O_CREAT | O_NOCTTY | O_NONBLOCK, 0600); if (fd < 0) { throw std::invalid_argument("Output file not writable. Check if the directory exists and you have correct " "permissions."); } else { close(fd); // will be opened again, later } // output format conversion to constexpr CharString output = options.output; if (endsWith(output, ".gz")) output = prefix(output, length(output) - 3); else if (endsWith(output, ".bz2")) output = prefix(output, length(output) - 4); if (endsWith(output, ".m0")) return argConv1(options, BlastReport(), BlastTabularSpecSelector()); else if (endsWith(output, ".m8")) return argConv1(options, BlastTabular(), BlastTabularSpecSelector()); else if (endsWith(output, ".m9")) return argConv1(options, BlastTabular(), BlastTabularSpecSelector()); else if (endsWith(output, ".sam") || endsWith(output, ".bam")) // handled elsewhere return argConv1(options, BlastTabular(), BlastTabularSpecSelector()); throw std::invalid_argument("Cannot handle output extension. THIS IS A BUG, please report it!"); } template void argConv1(LambdaOptions & options, TOutFormat const & /**/, BlastTabularSpecSelector const &) { switch(options.blastProgram) { #ifndef FASTBUILD case BlastProgram::BLASTN: return argConv3(options, TOutFormat(), BlastTabularSpecSelector(), BlastProgramSelector(), Dna5()); #endif case BlastProgram::BLASTP: return argConv2(options, TOutFormat(), BlastTabularSpecSelector(), BlastProgramSelector()); case BlastProgram::BLASTX: return argConv2(options, TOutFormat(), BlastTabularSpecSelector(), BlastProgramSelector()); #ifndef FASTBUILD case BlastProgram::TBLASTN: return argConv2(options, TOutFormat(), BlastTabularSpecSelector(), BlastProgramSelector()); case BlastProgram::TBLASTX: return argConv2(options, TOutFormat(), BlastTabularSpecSelector(), BlastProgramSelector()); #endif default: break; } throw std::invalid_argument("Could not determine blast program mode, THIS IS A BUG, please report it!"); } /// Alphabet reduction template void argConv2(LambdaOptions & options, TOutFormat const & /**/, BlastTabularSpecSelector const &, BlastProgramSelector

const &) { using Th = BlastTabularSpecSelector; using Tp = BlastProgramSelector

; switch (options.reducedAlphabet) { case AlphabetEnum::AMINO_ACID: return argConv3(options, TOutFormat(), Th(), Tp(), AminoAcid()); case AlphabetEnum::MURPHY10: return argConv3(options, TOutFormat(), Th(), Tp(), ReducedAminoAcid()); #if 0 case 10: return argConv2(options, TOutFormat(), ReducedAminoAcid>()); case 1: return argConv2(options, TOutFormat(), AminoAcid10()); case 8: return argConv2(options, TOutFormat(), ReducedAminoAcid>()); case 12: return argConv2(options, TOutFormat(), ReducedAminoAcid>()); #endif default: break; } throw std::invalid_argument("The alphabet reduction used by the index is not available. Possibly it was " "added in a later Lambda version. If your lambda version is up-to-date, please " "report this as a bug."); } // extension model template void argConv3(LambdaOptions & options, TOutFormat const &, BlastTabularSpecSelector const &, BlastProgramSelector

const &, TRedAlph const &) { if (options.gapOpen == 0) #ifndef LAMBDA_LINGAPS_OPT std::cerr << "ATTENTION: You have set the additional gap open cost to 0. If you run LAMBDA " "in this configuration regularly, you might want to rebuild it with " "LAMBDA_LINGAPS_OPT=1 to profit from additional optimizations.\n"; #else return argConv4(options, TOutFormat(), BlastTabularSpecSelector(), BlastProgramSelector

(), TRedAlph(), LinearGaps()); else #endif return argConv4(options, TOutFormat(), BlastTabularSpecSelector(), BlastProgramSelector

(), TRedAlph(), AffineGaps()); } template void argConv4(LambdaOptions & options, TOutFormat const & /**/, BlastTabularSpecSelector const &, BlastProgramSelector

const &, TRedAlph const & /**/, TScoreExtension const & /**/) { #ifdef LAMBDA_LEGACY_PATHS if (options.dbIndexType == DbIndexType::SUFFIX_ARRAY) return realMain>(options, TOutFormat(), BlastTabularSpecSelector(), BlastProgramSelector

(), TRedAlph(), TScoreExtension()); else #endif // LAMBDA_LEGACY_PATHS if (options.dbIndexType == DbIndexType::BI_FM_INDEX) return realMain>>(options, TOutFormat(), BlastTabularSpecSelector(), BlastProgramSelector

(), TRedAlph(), TScoreExtension()); else return realMain>(options, TOutFormat(), BlastTabularSpecSelector(), BlastProgramSelector

(), TRedAlph(), TScoreExtension()); } /// REAL MAIN #ifdef _OPENMP #define TID omp_get_thread_num() #else #define TID 0 #endif template void realMain(LambdaOptions & options, TOutFormat const & /**/, BlastTabularSpecSelector const &, BlastProgramSelector

const &, TRedAlph const & /**/, TScoreExtension const & /**/) { using TGlobalHolder = GlobalDataHolder; using TLocalHolder = LocalDataHolder; if (options.verbosity >= 2) printOptions(options); TGlobalHolder globalHolder; // context(globalHolder.outfile).scoringScheme._internalScheme = matr; prepareScoring(globalHolder, options); loadSubjects(globalHolder, options); loadDbIndexFromDisk(globalHolder, options); loadTaxonomy(globalHolder, options); loadQuery(globalHolder, options); // std::cout << "1st Query:\n" // << front(globalHolder.qrySeqs) << "\n" // << front(globalHolder.redQrySeqs) << "\n"; // // std::cout << "last Query:\n" // << back(globalHolder.qrySeqs) << "\n" // << back(globalHolder.redQrySeqs) << "\n"; // // std::cout << "1st Subject:\n" // << front(globalHolder.subjSeqs) << "\n" // << front(globalHolder.redSubjSeqs) << "\n"; // // std::cout << "last Subject:\n" // << back(globalHolder.subjSeqs) << "\n" // << back(globalHolder.redSubjSeqs) << "\n"; myWriteHeader(globalHolder, options); if (options.doubleIndexing) { myPrint(options, 1, "Searching ", options.queryPart, " blocks of query with ", options.threads, " threads...\n"); if ((options.isTerm) && (options.verbosity >= 1)) { for (unsigned char i=0; i< options.threads+3; ++i) std::cout << std::endl; std::cout << "\033[" << options.threads+2 << "A"; } } else { myPrint(options, 1, "Searching and extending hits on-line...progress:\n" "0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%\n|"); } double start = sysTime(); uint64_t lastPercent = 0; SEQAN_OMP_PRAGMA(parallel) { TLocalHolder localHolder(options, globalHolder); SEQAN_OMP_PRAGMA(for schedule(dynamic)) for (uint64_t t = 0; t < localHolder.nBlocks; ++t) { int res = 0; localHolder.init(t); // seed #ifdef LAMBDA_MICRO_STATS double buf = sysTime(); #endif if (options.doubleIndexing) { res = generateSeeds(localHolder); if (res) continue; res = generateTrieOverSeeds(localHolder); if (res) continue; } #ifdef LAMBDA_MICRO_STATS localHolder.stats.timeGenSeeds += sysTime() - buf; // search buf = sysTime(); #endif search(localHolder); //TODO seed refining if iterateMatches gives 0 results #ifdef LAMBDA_MICRO_STATS localHolder.stats.timeSearch += sysTime() - buf; #endif // // TODO DEBUG // for (auto const & m : localHolder.matches) // _printMatch(m); // sort if (options.filterPutativeAbundant || options.filterPutativeDuplicates || options.mergePutativeSiblings) { #ifdef LAMBDA_MICRO_STATS buf = sysTime(); #endif sortMatches(localHolder); #ifdef LAMBDA_MICRO_STATS localHolder.stats.timeSort += sysTime() - buf; #endif } // extend if (length(localHolder.matches) > 0) res = iterateMatches(localHolder); if (res) continue; if ((!options.doubleIndexing) && (TID == 0) && (options.verbosity >= 1)) { unsigned curPercent = ((t * 50) / localHolder.nBlocks) * 2; // round to even printProgressBar(lastPercent, curPercent); } } // implicit thread sync here if ((!options.doubleIndexing) && (TID == 0) && (options.verbosity >= 1)) printProgressBar(lastPercent, 100); SEQAN_OMP_PRAGMA(critical(statsAdd)) { globalHolder.stats += localHolder.stats; } } myPrint(options, 1, "\n"); myWriteFooter(globalHolder, options); if (!options.doubleIndexing) { myPrint(options, 2, "Runtime total: ", sysTime() - start, "s.\n\n"); } printStats(globalHolder.stats, options); } lambda-lambda-v2.0.1/src/search_algo.hpp000066400000000000000000003116011445553061700201210ustar00rootroot00000000000000// ========================================================================== // lambda // ========================================================================== // Copyright (c) 2013-2019, Hannes Hauswedell

// Copyright (c) 2016-2019, Knut Reinert and Freie Universität Berlin // All rights reserved. // // This file is part of Lambda. // // Lambda is Free Software: you can redistribute it and/or modify it // under the terms found in the LICENSE[.md|.rst] file distributed // together with this file. // // Lambda is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // ========================================================================== // lambda.hpp: contains the main progam pipeline // ========================================================================== #ifndef LAMBDA_SEARCH_ALGO_H_ #define LAMBDA_SEARCH_ALGO_H_ #include #include #include #include #include #include #include #include #include #include using namespace seqan; // ============================================================================ // Forwards // ============================================================================ template class GlobalDataHolder; template class LocalDataHolder; // ============================================================================ // Classes, structs, enums // ============================================================================ enum COMPUTERESULT_ { SUCCESS = 0, PREEXTEND, PERCENTIDENT, EVALUE, OTHER_FAIL }; //TODO replace with lambda // comparison operator to sort SA-Values based on the strings in the SA they refer to template struct Comp : public ::std::binary_function < TSav, TSav, bool > { TStringSet const & stringSet; Comp(TStringSet const & _stringSet) : stringSet(_stringSet) {} inline bool operator() (TSav const & i, TSav const & j) const { return (value(stringSet,getSeqNo(i)) < value(stringSet,getSeqNo(j))); } }; // ============================================================================ // Functions // ============================================================================ // -------------------------------------------------------------------------- // Function readIndexOption() // -------------------------------------------------------------------------- inline void readIndexOption(std::string & optionString, std::string const & optionIdentifier, LambdaOptions const & options) { std::ifstream f{(options.indexDir + "/option:" + optionIdentifier).c_str(), std::ios_base::in | std::ios_base::binary}; if (f.is_open()) { auto fit = directionIterator(f, Input()); readLine(optionString, fit); f.close(); } else { throw IndexException("Expected option specifier:\n" + options.indexDir + "/option:" + optionIdentifier); } } // -------------------------------------------------------------------------- // Function readIndexOptions() // -------------------------------------------------------------------------- void readIndexOptions(LambdaOptions & options) { // Check that directory exists and is readable struct stat path_stat; stat(toCString(options.indexDir), &path_stat); if (stat(toCString(options.indexDir), &path_stat) || !S_ISDIR(path_stat.st_mode)) throw IndexException("Index directory does not exist or is not readable.\n"); std::string buffer; uint64_t b = 0; readIndexOption(buffer, "generation", options); b = 0; if ((!lexicalCast(b, buffer)) || (b != static_cast(indexGeneration))) throw IndexException("Your index was created with an incompatible version of Lambda.\n"); buffer.clear(); readIndexOption(buffer, "alph_original", options); options.subjOrigAlphabet = _alphabetNameToEnum(buffer); buffer.clear(); readIndexOption(buffer, "alph_translated", options); options.transAlphabet = _alphabetNameToEnum(buffer); buffer.clear(); readIndexOption(buffer, "alph_reduced", options); options.reducedAlphabet = _alphabetNameToEnum(buffer); buffer.clear(); readIndexOption(buffer, "db_index_type", options); b = 0; if (!lexicalCast(b, buffer)) throw IndexException("Could not read the index type.\n\n"); options.dbIndexType = static_cast(b); if (options.subjOrigAlphabet != options.transAlphabet) { buffer.clear(); readIndexOption(buffer, "genetic_code", options); b = 0; if (!lexicalCast(b, buffer)) throw IndexException("Could not read the index's genetic code."); options.geneticCodeIndex = static_cast(b); } buffer.clear(); readIndexOption(buffer, "subj_seq_len_bits", options); b = 0; if ((!lexicalCast(b, buffer)) || (b != _alphabetEnumToSize(options.reducedAlphabet) * 8)) { std::string err; #ifndef LAMBDA_LONG_PROTEIN_SUBJ_SEQS err += "Your lambda executable was built with LAMBDA_LONG_PROTEIN_SUBJ_SEQS, " "but the index was created by an executable that was built without it."; #else err += "Your lambda executable was built without LAMBDA_LONG_PROTEIN_SUBJ_SEQS," "but the index was created by an executable that was built with it."; #endif err += "You need to recreate the index or rebuild Lambda."; throw IndexException(err); } } // -------------------------------------------------------------------------- // Function checkRAM() // -------------------------------------------------------------------------- void checkRAM(LambdaOptions const & options) { myPrint(options, 1, "Checking memory requirements... "); uint64_t ram = getTotalSystemMemory(); uint64_t sizeIndex = 0; uint64_t sizeQuery = 0; sizeIndex = dirSize(toCString(options.indexDir)); sizeQuery = fileSize(toCString(options.queryFile)); uint64_t requiredRAM = ((sizeIndex + sizeQuery) * 11) / 10; // give it +10% TODO verify if (requiredRAM >= ram) { myPrint(options, 1, "done.\n"); std::cerr << "WARNING: You need approximately " << requiredRAM / 1024 / 1024 << "MB of memory, " << "but you have only " << ram / 1024 / 1024 << " :'(\nYou should abort this run and try on a machine with more memory!"; } myPrint(options, 1, "met.\n"); myPrint(options, 2, "Detected: ", ram / 1024 / 1024, "MB, Estimated: ", requiredRAM / 1024 / 1024, "MB\n\n"); } // -------------------------------------------------------------------------- // Function prepareScoring() // -------------------------------------------------------------------------- template inline void prepareScoringMore(GlobalDataHolder & globalHolder, LambdaOptions const & options, std::true_type const & /**/) { setScoreMatch(context(globalHolder.outfile).scoringScheme, options.match); setScoreMismatch(context(globalHolder.outfile).scoringScheme, options.misMatch); } template inline void prepareScoringMore(GlobalDataHolder & globalHolder, LambdaOptions const & options, std::false_type const & /**/) { switch (options.scoringMethod) { // case 0: // return argConv3(options, TOutFormat(), Th(), Tp(), TRedAlph(), Score()); case 45: setScoreMatrixById(context(globalHolder.outfile).scoringScheme._internalScheme, AminoAcidScoreMatrixID::BLOSUM45); break; case 62: setScoreMatrixById(context(globalHolder.outfile).scoringScheme._internalScheme, AminoAcidScoreMatrixID::BLOSUM62); break; case 80: setScoreMatrixById(context(globalHolder.outfile).scoringScheme._internalScheme, AminoAcidScoreMatrixID::BLOSUM80); break; default: break; } } template void prepareScoring(GlobalDataHolder & globalHolder, LambdaOptions const & options) { using TGlobalHolder = GlobalDataHolder; prepareScoringMore(globalHolder, options, std::is_same>()); setScoreGapOpenBlast(context(globalHolder.outfile).scoringScheme, options.gapOpen); setScoreGapExtend(context(globalHolder.outfile).scoringScheme, options.gapExtend); if (!isValid(context(globalHolder.outfile).scoringScheme)) throw std::runtime_error{"Could not computer Karlin-Altschul-Values for Scoring Scheme.\n"}; } // -------------------------------------------------------------------------- // Function loadSubjects() // -------------------------------------------------------------------------- template void loadSubjects(GlobalDataHolder & globalHolder, LambdaOptions const & options) { using TGH = GlobalDataHolder; double start, finish; std::string strIdent; int ret; CharString _dbSeqs; if (TGH::indexIsFM || TGH::alphReduction) // otherwise sequences are loaded as part of index { start = sysTime(); strIdent = "Loading Subj Sequences..."; myPrint(options, 1, strIdent); _dbSeqs = options.indexDir; append(_dbSeqs, "/translated_seqs"); ret = open(globalHolder.subjSeqs, toCString(_dbSeqs), OPEN_RDONLY); if (ret != true) throw IndexException{"Could not open subject sequences.\n"}; if (length(globalHolder.subjSeqs) == 0) throw IndexException{"No sequences in database.\n"}; if (TGH::alphReduction) globalHolder.redSubjSeqs.limits = globalHolder.subjSeqs.limits; finish = sysTime() - start; myPrint(options, 1, " done.\n"); myPrint(options, 2, "Runtime: ", finish, "s \n", "Amount: ", length(globalHolder.subjSeqs), "\n\n"); } start = sysTime(); strIdent = "Loading Subj Ids..."; myPrint(options, 1, strIdent); _dbSeqs = options.indexDir; append(_dbSeqs, "/seq_ids"); ret = open(globalHolder.subjIds, toCString(_dbSeqs), OPEN_RDONLY); if (ret != true) throw IndexException{"Could not open subject IDs."}; finish = sysTime() - start; myPrint(options, 1, " done.\n"); myPrint(options, 2, "Runtime: ", finish, "s \n\n"); context(globalHolder.outfile).dbName = options.indexDir; // if subjects where translated, we don't have the untranslated seqs at all // but we still need the data for statistics and position un-translation if (sIsTranslated(p)) { start = sysTime(); std::string strIdent = "Loading Lengths of untranslated Subj sequences..."; myPrint(options, 1, strIdent); _dbSeqs = options.indexDir; append(_dbSeqs, "/untranslated_seq_lengths"); ret = open(globalHolder.untransSubjSeqLengths, toCString(_dbSeqs), OPEN_RDONLY); if (ret != true) throw IndexException{"Could not open unstranslated Subj sequence lengths."}; finish = sysTime() - start; myPrint(options, 1, " done.\n"); myPrint(options, 2, "Runtime: ", finish, "s \n\n"); } } // -------------------------------------------------------------------------- // Function loadIndexFromDisk() // -------------------------------------------------------------------------- template void loadDbIndexFromDisk(TGlobalHolder & globalHolder, LambdaOptions const & options) { std::string strIdent = "Loading Database Index..."; myPrint(options, 1, strIdent); double start = sysTime(); std::string path = toCString(options.indexDir); path += "/index"; int ret = open(globalHolder.dbIndex, path.c_str(), OPEN_RDONLY); if (ret != true) throw IndexException{"Could not open the fm index / suffix array."}; // assign previously loaded sub sequences (possibly modifier-wrapped // to the text-member of our new index (unless isFM, which doesnt need text) if ((!TGlobalHolder::indexIsFM) && (TGlobalHolder::alphReduction)) indexText(globalHolder.dbIndex) = globalHolder.redSubjSeqs; double finish = sysTime() - start; myPrint(options, 1, " done.\n"); myPrint(options, 2, "Runtime: ", finish, "s \n\n"); // TODO reactivate and remove one '\n' above // myPrint(options, 2, "No of Fibres: ", length(indexSA(globalHolder.dbIndex)), "\n\n"); // this is actually part of prepareScoring(), but the values are just available now if (sIsTranslated(TGlobalHolder::blastProgram )) { // last value has sum of lengths context(globalHolder.outfile).dbTotalLength = back(globalHolder.untransSubjSeqLengths); context(globalHolder.outfile).dbNumberOfSeqs = length(globalHolder.untransSubjSeqLengths) - 1; } else { context(globalHolder.outfile).dbTotalLength = length(concat(globalHolder.subjSeqs)); context(globalHolder.outfile).dbNumberOfSeqs = length(globalHolder.subjSeqs); } } // -------------------------------------------------------------------------- // Function loadSTaxIds() // -------------------------------------------------------------------------- template void loadTaxonomy(TGlobalHolder & globalHolder, LambdaOptions const & options) { if (!options.hasSTaxIds) return; std::string path = toCString(options.indexDir); path += "/staxids"; std::string strIdent = "Loading Subject Taxonomy IDs..."; myPrint(options, 1, strIdent); double start = sysTime(); int ret = open(globalHolder.sTaxIds, path.c_str(), OPEN_RDONLY); if (ret != true) { throw IndexException{"Could not load taxonomy IDS (but they are required for the chosen output options). " "Did you forget to specify a map file while indexing?"}; } double finish = sysTime() - start; myPrint(options, 1, " done.\n"); myPrint(options, 2, "Runtime: ", finish, "s \n\n"); SEQAN_ASSERT_EQ(length(globalHolder.sTaxIds), length(globalHolder.redSubjSeqs)); if (!options.computeLCA) return; strIdent = "Loading Subject Taxonomic Tree..."; myPrint(options, 1, strIdent); start = sysTime(); std::string taxTreeExceptMessage = "Could not load the taxonomy tree (but it is required for the chosen output options). " "Did you forget to specify a taxonomy dump dir while indexing?"; path = toCString(options.indexDir); path += "/tax_parents"; ret = open(globalHolder.taxParents, path.c_str(), OPEN_RDONLY); if (ret != true) throw IndexException{taxTreeExceptMessage}; path = toCString(options.indexDir); path += "/tax_heights"; ret = open(globalHolder.taxHeights, path.c_str(), OPEN_RDONLY); if (ret != true) throw IndexException{taxTreeExceptMessage}; path = toCString(options.indexDir); path += "/tax_names"; ret = open(globalHolder.taxNames, path.c_str(), OPEN_RDONLY); if (ret != true) throw IndexException{taxTreeExceptMessage}; finish = sysTime() - start; myPrint(options, 1, " done.\n"); myPrint(options, 2, "Runtime: ", finish, "s \n\n"); } // -------------------------------------------------------------------------- // Function loadQuery() // -------------------------------------------------------------------------- // BLASTX, TBLASTX template ::value> = 0> inline void loadQueryImplTrans(TCDStringSet> & target, TCDStringSet> & source, TUntransLengths & untransQrySeqLengths, LambdaOptions const & options) { myPrint(options, 1, "translating..."); // translate translate(target, source, SIX_FRAME, options.geneticCode); // preserve lengths of untranslated sequences resize(untransQrySeqLengths, length(source.limits), Exact()); #ifdef __clang__ SEQAN_OMP_PRAGMA(parallel for) #else SEQAN_OMP_PRAGMA(parallel for simd) #endif for (uint32_t i = 0; i < (length(untransQrySeqLengths) - 1); ++i) untransQrySeqLengths[i] = source.limits[i + 1] - source.limits[i]; // save sum of lengths (both strings have n + 1 elements back(source.limits) = length(source.concat); } // BLASTN template inline void loadQueryImplTrans(TCDStringSet, TSpec1>> & target, TCDStringSet, TSpec2>> & source, TUntransLengths & /**/, LambdaOptions const & options) { using TAlph = TransAlph; // using TReverseCompl = ModifiedString, // ModView>>, ModReverse>; myPrint(options, 1, " generating reverse complements..."); // no need for translation, but we need reverse complements resize(target.concat, length(source.concat) * 2); resize(target.limits, length(source) * 2 + 1); target.limits[0] = 0; uint64_t const l = length(target.limits) - 1; for (uint64_t i = 1; i < l; i+=2) { target.limits[i] = target.limits[i-1] + length(source[i/2]); target.limits[i+1] = target.limits[i] + length(source[i/2]); } FunctorComplement functor; uint64_t tBeg, tBegNext, len, sBeg; SEQAN_OMP_PRAGMA(parallel for schedule(dynamic) private(tBeg, tBegNext, len, sBeg)) for (uint64_t i = 0; i < (l - 1); i+=2) { tBeg = target.limits[i]; tBegNext = target.limits[i+1]; len = tBegNext - tBeg; sBeg = source.limits[i/2]; // avoid senseless copying and iterate manually for (uint32_t j = 0; j < len; ++j) { target.concat[tBeg + j] = source.concat[sBeg + j]; target.concat[tBegNext + j] = functor(source.concat[sBeg+len-j-1]); } } } // BLASTP, TBLASTN template inline void loadQueryImplTrans(TCDStringSet, TSpec1>> & target, TCDStringSet, TSpec2>> & source, TUntransLengths & /**/, LambdaOptions const & /**/) { // no need for translation, but sequences have to be in right place swap(target, source); } template void loadQuery(GlobalDataHolder & globalHolder, LambdaOptions & options) { using TGH = GlobalDataHolder; double start = sysTime(); std::string strIdent = "Loading Query Sequences and Ids..."; myPrint(options, 1, strIdent); TCDStringSet, typename TGH::TQryTag>> origSeqs; try { SeqFileIn infile(toCString(options.queryFile)); myReadRecords(globalHolder.qryIds, origSeqs, infile); } catch(std::exception const & e) { throw QueryException{"There was an file system or format error."}; } if (length(origSeqs) == 0) { throw QueryException{"Zero sequences submitted."}; } // translate loadQueryImplTrans(globalHolder.qrySeqs, origSeqs, globalHolder.untransQrySeqLengths, options); // sam and bam need original sequences if translation happened if (qIsTranslated(TGH::blastProgram) && (options.outFileFormat > 0) && (options.samBamSeq > 0)) std::swap(origSeqs, globalHolder.untranslatedQrySeqs); if (TGH::alphReduction) globalHolder.redQrySeqs.limits = globalHolder.qrySeqs.limits; double finish = sysTime() - start; myPrint(options, 1, " done.\n"); unsigned long maxLen = 0ul; for (auto const & s : globalHolder.qrySeqs) if (length(s) > maxLen) maxLen = length(s); myPrint(options, 2, "Runtime: ", finish, "s \n", "Number of effective query sequences: ", length(globalHolder.qrySeqs), "\nLongest query sequence: ", maxLen, "\n\n"); if (length(globalHolder.qrySeqs) >= std::numeric_limits::max()) { throw QueryException{"Too many sequences submitted. The maximum (including frames) is " + std::to_string(std::numeric_limits::max()) + "."}; } if (maxLen >= std::numeric_limits::max()) { throw QueryException{"One or more of your query sequences are too long. The maximum length is " + std::to_string(std::numeric_limits::max()) + "."}; } // TODO: after changing this, make options const again if (options.extensionMode == LambdaOptions::ExtensionMode::AUTO) { if (maxLen <= 100) { #if defined(SEQAN_SIMD_ENABLED) options.extensionMode = LambdaOptions::ExtensionMode::FULL_SIMD; #else options.extensionMode = LambdaOptions::ExtensionMode::FULL_SERIAL; #endif options.xDropOff = -1; options.filterPutativeAbundant = false; options.filterPutativeDuplicates = false; options.mergePutativeSiblings = false; } else { options.extensionMode = LambdaOptions::ExtensionMode::XDROP; } } } /// THREAD LOCAL STUFF // -------------------------------------------------------------------------- // Function generateSeeds() // -------------------------------------------------------------------------- /* #define THREADLINE std::cout << "\0338" << std::endl << "Thread " << lH.i \ << std::endl; \ for (unsigned char i=0; i< lH.i*20; ++i) std::cout << std::endl;*/ template inline int generateSeeds(TLocalHolder & lH) { if (lH.options.doubleIndexing) { appendToStatus(lH.statusStr, lH.options, 1, "Block ", std::setw(4), lH.i, ": Generating Seeds..."); if (lH.options.isTerm) myPrint(lH.options, 1, lH.statusStr); } double start = sysTime(); for (unsigned long i = lH.indexBeginQry; i < lH.indexEndQry; ++i) { for (unsigned j = 0; (j* lH.options.seedOffset + lH.options.seedLength) <= length(value(lH.gH.redQrySeqs, i)); ++j) { appendValue(lH.seeds, infix(value(lH.gH.redQrySeqs, i), j* lH.options.seedOffset, j* lH.options.seedOffset + lH.options.seedLength), Generous()); appendValue(lH.seedRefs, i, Generous()); appendValue(lH.seedRanks, j, Generous()); // std::cout << "seed: " << back(lH.seeds) << "\n"; } } double finish = sysTime() - start; if (lH.options.doubleIndexing) { appendToStatus(lH.statusStr, lH.options, 1, " done. "); appendToStatus(lH.statusStr, lH.options, 2, finish, "s. ", length(lH.seeds), " seeds created."); myPrint(lH.options, 1, lH.statusStr); } return 0; } // -------------------------------------------------------------------------- // Function generateTrieOverSeeds() // -------------------------------------------------------------------------- template inline int generateTrieOverSeeds(TLocalHolder & lH) { if (lH.options.doubleIndexing) { appendToStatus(lH.statusStr, lH.options, 1, "Generating Query-Index..."); if (lH.options.isTerm) myPrint(lH.options, 1, lH.statusStr); } double start = sysTime(); // we only want full length seed sequences in index, build up manually typedef typename Fibre::Type TSa; //TODO maybe swap here instead lH.seedIndex = decltype(lH.seedIndex)(lH.seeds); TSa & sa = indexSA(lH.seedIndex); resize(sa, length(lH.seeds)); for (unsigned u = 0; u < length(lH.seeds); ++u) { assignValueI1(value(sa,u), u); assignValueI2(value(sa,u), 0); } Comp::Type, typename TLocalHolder::TSeeds const> comp(lH.seeds); std::sort(begin(sa, Standard()), end(sa, Standard()), comp); typename Iterator >::Type it(lH.seedIndex); // instantiate double finish = sysTime() - start; if (lH.options.doubleIndexing) { appendToStatus(lH.statusStr, lH.options, 1, " done. "); appendToStatus(lH.statusStr, lH.options, 2, finish, "s. ", length(sa), " fibres in SeedIndex. ");; myPrint(lH.options, 1, lH.statusStr); } return 0; } // perform a fast local alignment score calculation on the seed and see if we // reach above threshold // WARNING the following function only works for hammingdistanced seeds template inline bool seedLooksPromising(LocalDataHolder const & lH, typename TGlobalHolder::TMatch const & m) { int64_t effectiveQBegin = m.qryStart; int64_t effectiveSBegin = m.subjStart; uint64_t actualLength = m.qryEnd - m.qryStart; uint64_t effectiveLength = std::max(static_cast(lH.options.seedLength * lH.options.preScoring), actualLength); if (effectiveLength > actualLength) { effectiveQBegin -= (effectiveLength - actualLength) / 2; effectiveSBegin -= (effectiveLength - actualLength) / 2; int64_t min = std::min(effectiveQBegin, effectiveSBegin); if (min < 0) { effectiveQBegin -= min; effectiveSBegin -= min; effectiveLength += min; } effectiveLength = std::min({ static_cast(length(lH.gH.qrySeqs[m.qryId]) - effectiveQBegin), static_cast(length(lH.gH.subjSeqs[m.subjId]) - effectiveSBegin), effectiveLength}); } auto const & qSeq = infix(lH.gH.qrySeqs[m.qryId], effectiveQBegin, effectiveQBegin + effectiveLength); auto const & sSeq = infix(lH.gH.subjSeqs[m.subjId], effectiveSBegin, effectiveSBegin + effectiveLength); int s = 0; int maxScore = 0; int const thresh = lH.options.preScoringThresh * effectiveLength; // score the diagonal for (uint64_t i = 0; i < effectiveLength; ++i) { s += score(seqanScheme(context(lH.gH.outfile).scoringScheme), qSeq[i], sSeq[i]); if (s < 0) s = 0; else if (s > maxScore) maxScore = s; if (maxScore >= thresh) return true; } return false; } // -------------------------------------------------------------------------- // Function onFind() // -------------------------------------------------------------------------- template inline void onFind(LocalDataHolder & lH, TSeedId const & seedId, TSubjOcc subjOcc) { using TMatch = typename TGlobalHolder::TMatch; SEQAN_ASSERT_LEQ_MSG(getSeqOffset(subjOcc) + lH.options.seedLength, length(lH.gH.subjSeqs[getSeqNo(subjOcc)]), "Seed reaches beyond end of subject sequence! Please report a bug with your files at " "http://www.seqan.de/lambda !"); if (TGlobalHolder::indexIsFM) // positions are reversed setSeqOffset(subjOcc, length(lH.gH.subjSeqs[getSeqNo(subjOcc)]) - getSeqOffset(subjOcc) - lH.options.seedLength); TMatch m {static_cast(lH.seedRefs[seedId]), static_cast(getSeqNo(subjOcc)), static_cast(lH.seedRanks[seedId] * lH.options.seedOffset), static_cast(lH.seedRanks[seedId] * lH.options.seedOffset + lH.options.seedLength), static_cast(getSeqOffset(subjOcc)), static_cast(getSeqOffset(subjOcc) + lH.options.seedLength)}; bool discarded = false; if (!seedLooksPromising(lH, m)) { discarded = true; ++lH.stats.hitsFailedPreExtendTest; } if (!discarded) lH.matches.emplace_back(m); } template inline void onFindVariable(LocalDataHolder & lH, TSubjOcc subjOcc, typename TGlobalHolder::TMatch::TQId const seedId, typename TGlobalHolder::TMatch::TPos const seedBegin, typename TGlobalHolder::TMatch::TPos const seedLength) { using TMatch = typename TGlobalHolder::TMatch; if (TGlobalHolder::indexIsFM) // positions are reversed setSeqOffset(subjOcc, length(lH.gH.subjSeqs[getSeqNo(subjOcc)]) - getSeqOffset(subjOcc) - seedLength); TMatch m {seedId, static_cast(getSeqNo(subjOcc)), seedBegin, static_cast(seedBegin + seedLength), static_cast(getSeqOffset(subjOcc)), static_cast(getSeqOffset(subjOcc) + seedLength)}; SEQAN_ASSERT_LT(m.qryStart, m.qryEnd); SEQAN_ASSERT_LT(m.subjStart, m.subjEnd); if (!seedLooksPromising(lH, m)) ++lH.stats.hitsFailedPreExtendTest; else lH.matches.emplace_back(m); } // -------------------------------------------------------------------------- // Function search() // -------------------------------------------------------------------------- template inline void __goDownNoErrors(TIndexIt & indexIt, TGoDownTag const &, TNeedleIt needleIt, TNeedleIt const & needleItEnd, TLambda & continRunnable, TLambda2 & reportRunnable) { TIndexIt prevIndexIt; do prevIndexIt = indexIt; while ((needleIt != needleItEnd) && goDown(indexIt, *(needleIt++), TGoDownTag()) && continRunnable(prevIndexIt, indexIt, true)); reportRunnable(prevIndexIt, true); } //TODO make number of errors configurable template inline void __goDownErrors(TIndexIt const & indexIt, TGoDownTag const &, TNeedleIt const & needleIt, TNeedleIt const & needleItEnd, TLambda & continRunnable, TLambda2 & reportRunnable) { using TAlph = typename Value::Type; unsigned contin = 0; if (needleIt != needleItEnd) { for (unsigned i = 0; i < ValueSize::VALUE; ++i) { TIndexIt nextIndexIt(indexIt); if (goDown(nextIndexIt, static_cast(i), TGoDownTag()) && continRunnable(indexIt, nextIndexIt, ordValue(*needleIt) != i)) { ++contin; if (ordValue(*needleIt) == i) __goDownErrors(nextIndexIt, TGoDownTag(), needleIt + 1, needleItEnd, continRunnable, reportRunnable); else __goDownNoErrors(nextIndexIt, TGoDownTag(), needleIt + 1, needleItEnd, continRunnable, reportRunnable); } } } if (contin == 0) reportRunnable(indexIt, false); } template inline void _searchSingleIndex(LocalDataHolder & lH) { typedef typename Iterator >::Type TIndexIt; // TODO optionize size_t constexpr seedHeurFactor = /*TGlobalHolder::indexIsBiFM ? 5 :*/ 10; size_t constexpr minResults = 1; size_t needlesSum = 0; size_t needlesPos = 0; size_t oldTotalMatches = 0; TIndexIt root(lH.gH.dbIndex); TIndexIt indexIt = root; std::function continRunnable; /* It is important to note some option dependencies: * lH.options.maxSeedDist == 0 -> lH.options.seedHalfExact == true * lH.options.maxSeedDist == 0 -> TGlobalHolder::indexIsBiFM == false * TGlobalHolder::indexIsBiFM -> lH.options.seedHalfExact == true * [these are enforced in options.hpp and save us some comparisons here */ SEQAN_ASSERT((lH.options.maxSeedDist != 0) || lH.options.seedHalfExact); SEQAN_ASSERT((lH.options.maxSeedDist != 0) || TGlobalHolder::indexIsBiFM); SEQAN_ASSERT((!TGlobalHolder::indexIsBiFM) || lH.options.seedHalfExact); size_t const goExactLength = lH.options.seedHalfExact ? (lH.options.seedLength / 2) : 0; for (size_t i = lH.indexBeginQry; i < lH.indexEndQry; ++i) { if (length(lH.gH.redQrySeqs[i]) < lH.options.seedLength) continue; size_t desiredOccs = 0; // the next sequences belong to a new set of query sequences if ((i % qNumFrames(TGlobalHolder::blastProgram)) == 0) { needlesSum = lH.gH.redQrySeqs.limits[i + qNumFrames(TGlobalHolder::blastProgram)] - lH.gH.redQrySeqs.limits[i]; // BROKEN:lengthSum(infix(lH.gH.redQrySeqs, lH.indexBeginQry, lH.indexEndQry)); // the above is faster anyway (but only works on concatdirect sets) needlesPos = 0; oldTotalMatches = length(lH.matches); // need to subtract matchcount from other queries } if (lH.options.adaptiveSeeding) { continRunnable = [&lH, &desiredOccs] (auto const & prevIndexIt, auto const & indexIt, bool const/*hasError*/) { // ADAPTIVE SEEDING: // always continue if minimum seed length not reached // TODO currently unclear why considering hasError provides no benefit, and why +1 does if (repLength(indexIt) <= (lH.options.seedLength + /*hasError* */ lH.options.seedDeltaIncreasesLength)) return true; else if (repLength(indexIt) > 2000) // maximum recursion depth return false; // always continue if it means not loosing hits if (countOccurrences(indexIt) == countOccurrences(prevIndexIt)) return true; // do vodoo heuristics to see if this hit is to frequent if (countOccurrences(indexIt) < desiredOccs) return false; return true; }; } else { continRunnable = [&lH] (auto const &, auto const & indexIt, bool const /*hasError*/) { // NON-ADAPTIVE return (repLength(indexIt) <= (lH.options.seedLength + /*hasError* */ lH.options.seedDeltaIncreasesLength)); }; } /* FORWARD SEARCH */ for (size_t seedBegin = 0; /* below */; seedBegin += lH.options.seedOffset) { // skip proteine 'X' or Dna 'N' while ((lH.gH.qrySeqs[i][seedBegin] == unknownValue>()) && (seedBegin <= length(lH.gH.redQrySeqs[i]) - lH.options.seedLength)) ++seedBegin; // termination criterium if (seedBegin > length(lH.gH.redQrySeqs[i]) - lH.options.seedLength) break; indexIt = root; if (lH.options.adaptiveSeeding) { desiredOccs = (length(lH.matches) - oldTotalMatches) >= lH.options.maxMatches ? minResults : (lH.options.maxMatches - (length(lH.matches) - oldTotalMatches)) * seedHeurFactor / std::max((needlesSum - needlesPos - seedBegin) / lH.options.seedOffset, static_cast(1)); if (desiredOccs == 0) desiredOccs = minResults; } // go down some characters without errors if bidirectional or halfExact for (size_t k = 0; k < goExactLength; ++k) if (!goDown(indexIt, lH.gH.redQrySeqs[i][seedBegin + k])) break; // if unsuccessful, move to next seed if (repLength(indexIt) != goExactLength) continue; auto reportRunnable = [&lH, &i, &seedBegin] (auto const & indexIt, bool const hasError) { if (repLength(indexIt) >= lH.options.seedLength + hasError * lH.options.seedDeltaIncreasesLength) { #ifdef LAMBDA_MICRO_STATS appendValue(lH.stats.seedLengths, repLength(indexIt)); #endif lH.stats.hitsAfterSeeding += countOccurrences(indexIt); for (auto occ : getOccurrences(indexIt)) onFindVariable(lH, occ, i, seedBegin, repLength(indexIt)); } }; if (lH.options.maxSeedDist) { __goDownErrors(indexIt, Fwd(), begin(lH.gH.redQrySeqs[i], Standard()) + seedBegin + goExactLength, end(lH.gH.redQrySeqs[i], Standard()), continRunnable, reportRunnable); } else __goDownNoErrors(indexIt, Fwd(), begin(lH.gH.redQrySeqs[i], Standard()) + seedBegin + goExactLength, end(lH.gH.redQrySeqs[i], Standard()), continRunnable, reportRunnable); } /* REVERSE SEARCH on BIDIRECTIONAL INDEXES */ if (TGlobalHolder::indexIsBiFM) { using TRevNeedle = ModifiedString; TRevNeedle revNeedle{lH.gH.redQrySeqs[i]}; for (size_t seedBegin = lH.options.seedLength - 1; /* below */; seedBegin += lH.options.seedOffset) { // skip proteine 'X' or Dna 'N' while ((lH.gH.qrySeqs[i][seedBegin] == unknownValue>()) && seedBegin < length(lH.gH.redQrySeqs[i])) // [different abort condition than above] ++seedBegin; // termination criterium if (seedBegin >= length(lH.gH.redQrySeqs[i])) // [different abort condition than above] break; indexIt = root; if (lH.options.adaptiveSeeding) { desiredOccs = (length(lH.matches) - oldTotalMatches) >= lH.options.maxMatches ? minResults : (lH.options.maxMatches - (length(lH.matches) - oldTotalMatches)) * seedHeurFactor / std::max((needlesSum - needlesPos - seedBegin) / lH.options.seedOffset, static_cast(1)); if (desiredOccs == 0) desiredOccs = minResults; } // go down seedOffset number of characters without errors for (size_t k = 0; k < (lH.options.seedLength - goExactLength); ++k) if (!goDown(indexIt, lH.gH.redQrySeqs[i][seedBegin - k], Rev())) // [rev and - instead of fwd] break; // if unsuccessful, move to next seed if (repLength(indexIt) != (lH.options.seedLength - goExactLength)) continue; auto reportRunnable = [&lH, &i, &seedBegin] (auto const & indexIt, bool const hasOneError) { if ((repLength(indexIt) >= lH.options.seedLength) && (hasOneError)) // [must have one error for rev] { //TODO remove debug stuff #ifdef LAMBDA_MICRO_STATS appendValue(lH.stats.seedLengths, repLength(indexIt)); #endif lH.stats.hitsAfterSeeding += countOccurrences(indexIt); for (auto occ : getOccurrences(indexIt)) // [different start pos] onFindVariable(lH, occ, i, seedBegin - repLength(indexIt) + 1, repLength(indexIt)); } }; // [rev and reverse needle] __goDownErrors(indexIt, Rev(), end(revNeedle, Standard()) - seedBegin + lH.options.seedLength - goExactLength - 1, end(revNeedle, Standard()), continRunnable, reportRunnable); } } needlesPos += length(lH.gH.redQrySeqs[i]); } } #ifdef LAMBDA_LEGACY_PATHS template inline void _searchDoubleIndex(TLocalHolder & lH) { appendToStatus(lH.statusStr, lH.options, 1, "Seeding..."); if (lH.options.isTerm) myPrint(lH.options, 1, lH.statusStr); double start = sysTime(); using LambdaFinder = Finder_ >; LambdaFinder finder; auto delegate = [&lH] (LambdaFinder const & finder) { auto qryOccs = getOccurrences(back(finder.patternStack)); auto subjOccs = getOccurrences(back(finder.textStack)); lH.stats.hitsAfterSeeding += length(qryOccs) * length(subjOccs); for (unsigned i = 0; i < length(qryOccs); ++i) for (unsigned j = 0; j < length(subjOccs); ++j) onFind(lH, getSeqNo(qryOccs[i]), subjOccs[j]); }; _find(finder, lH.gH.dbIndex, lH.seedIndex, lH.options.maxSeedDist, delegate); double finish = sysTime() - start; appendToStatus(lH.statusStr, lH.options, 1, " done. "); appendToStatus(lH.statusStr, lH.options, 2, finish, "s. #hits: ", length(lH.matches), " "); myPrint(lH.options, 1, lH.statusStr); } #endif // LAMBDA_LEGACY_PATHS template inline void search(TLocalHolder & lH) { #ifdef LAMBDA_LEGACY_PATHS if (lH.options.doubleIndexing) _searchDoubleIndex(lH); else #endif _searchSingleIndex(lH); } // -------------------------------------------------------------------------- // Function joinAndFilterMatches() // -------------------------------------------------------------------------- template inline void sortMatches(TLocalHolder & lH) { if (lH.options.doubleIndexing) { appendToStatus(lH.statusStr, lH.options, 1, "Sorting hits..."); if (lH.options.isTerm) myPrint(lH.options, 1, lH.statusStr); } double start = sysTime(); // std::sort(begin(lH.matches, Standard()), end(lH.matches, Standard())); // std::sort(lH.matches.begin(), lH.matches.end()); // if (lH.matches.size() > lH.options.maxMatches) // { // MatchSortComp comp(lH.matches); // std::sort(lH.matches.begin(), lH.matches.end(), comp); // } else if ((lH.options.filterPutativeAbundant) && (lH.matches.size() > lH.options.maxMatches)) // more expensive sort to get likely targets to front myHyperSortSingleIndex(lH.matches, lH.options.doubleIndexing, lH.gH); else std::sort(lH.matches.begin(), lH.matches.end()); double finish = sysTime() - start; if (lH.options.doubleIndexing) { appendToStatus(lH.statusStr, lH.options, 1, " done. "); appendToStatus(lH.statusStr, lH.options, 2, finish, "s. "); myPrint(lH.options, 1, lH.statusStr); } } // -------------------------------------------------------------------------- // Function _setFrames() // -------------------------------------------------------------------------- template inline void _setFrames(TBlastMatch & bm, typename TLocalHolder::TMatch const & m, TLocalHolder const &) { if (qIsTranslated(TLocalHolder::TGlobalHolder::blastProgram)) { bm.qFrameShift = (m.qryId % 3) + 1; if (m.qryId % 6 > 2) bm.qFrameShift = -bm.qFrameShift; } else if (qHasRevComp(TLocalHolder::TGlobalHolder::blastProgram)) { bm.qFrameShift = 1; if (m.qryId % 2) bm.qFrameShift = -bm.qFrameShift; } else { bm.qFrameShift = 0; } if (sIsTranslated(TLocalHolder::TGlobalHolder::blastProgram)) { bm.sFrameShift = (m.subjId % 3) + 1; if (m.subjId % 6 > 2) bm.sFrameShift = -bm.sFrameShift; } else if (sHasRevComp(TLocalHolder::TGlobalHolder::blastProgram)) { bm.sFrameShift = 1; if (m.subjId % 2) bm.sFrameShift = -bm.sFrameShift; } else { bm.sFrameShift = 0; } } // -------------------------------------------------------------------------- // Function _writeMatches() // -------------------------------------------------------------------------- template inline void _writeRecord(TBlastRecord & record, TLocalHolder & lH) { if (length(record.matches) > 0) { ++lH.stats.qrysWithHit; // sort and remove duplicates -> STL, yeah! auto const before = record.matches.size(); if (!lH.options.filterPutativeDuplicates) { record.matches.sort([] (auto const & m1, auto const & m2) { return std::tie(m1._n_sId, m1.qStart, m1.qEnd, m1.sStart, m1.sEnd, m1.qFrameShift, m1.sFrameShift) < std::tie(m2._n_sId, m2.qStart, m2.qEnd, m2.sStart, m2.sEnd, m2.qFrameShift, m2.sFrameShift); }); record.matches.unique([] (auto const & m1, auto const & m2) { return std::tie(m1._n_sId, m1.qStart, m1.qEnd, m1.sStart, m1.sEnd, m1.qFrameShift, m1.sFrameShift) == std::tie(m2._n_sId, m2.qStart, m2.qEnd, m2.sStart, m2.sEnd, m2.qFrameShift, m2.sFrameShift); }); lH.stats.hitsDuplicate += before - record.matches.size(); } // sort by evalue before writing record.matches.sort([] (auto const & m1, auto const & m2) { return m1.bitScore > m2.bitScore; }); // cutoff abundant if (record.matches.size() > lH.options.maxMatches) { lH.stats.hitsAbundant += record.matches.size() - lH.options.maxMatches; record.matches.resize(lH.options.maxMatches); } lH.stats.hitsFinal += record.matches.size(); // compute LCA if (lH.options.computeLCA) { record.lcaTaxId = 0; for (auto const & bm : record.matches) { if ((length(lH.gH.sTaxIds[bm._n_sId]) > 0) && (lH.gH.taxParents[lH.gH.sTaxIds[bm._n_sId][0]] != 0)) { record.lcaTaxId = lH.gH.sTaxIds[bm._n_sId][0]; break; } } if (record.lcaTaxId != 0) for (auto const & bm : record.matches) for (uint32_t const sTaxId : lH.gH.sTaxIds[bm._n_sId]) if (lH.gH.taxParents[sTaxId] != 0) // TODO do we want to skip unassigned subjects record.lcaTaxId = computeLCA(lH.gH.taxParents, lH.gH.taxHeights, sTaxId, record.lcaTaxId); record.lcaId = lH.gH.taxNames[record.lcaTaxId]; } myWriteRecord(lH, record); } } // -------------------------------------------------------------------------- // Function computeBlastMatch() // -------------------------------------------------------------------------- template inline int computeBlastMatch(typename TBlastRecord::TBlastMatch & bm, typename TLocalHolder::TMatch const & m, TBlastRecord const & record, TLocalHolder & lH) { using TMatch = typename TLocalHolder::TMatch; using TPos = typename TMatch::TPos; const unsigned long qryLength = length(value(lH.gH.qrySeqs, m.qryId)); SEQAN_ASSERT_LEQ(bm.qStart, bm.qEnd); SEQAN_ASSERT_LEQ(bm.sStart, bm.sEnd); // auto qryInfix = infix(curQry, // bm.qStart, // bm.qEnd); // auto subjInfix = infix(curSubj, // bm.sStart, // bm.sEnd); // std::cout << "Query Id: " << m.qryId // << "\t TrueQryId: " << getTrueQryId(bm.m, lH.options, TGlobalHolder::blastProgram) // << "\t length(qryIds): " << length(qryIds) // << "Subj Id: " << m.subjId // << "\t TrueSubjId: " << getTrueSubjId(bm.m, lH.options, TGlobalHolder::blastProgram) // << "\t length(subjIds): " << length(subjIds) << "\n\n"; assignSource(bm.alignRow0, infix(lH.gH.qrySeqs[m.qryId], bm.qStart, bm.qEnd)); assignSource(bm.alignRow1, infix(lH.gH.subjSeqs[m.subjId],bm.sStart, bm.sEnd)); // std::cout << "== Positions\n"; // std::cout << " " << bm.qStart << " - " << bm.qEnd << " [before ali]\n"; // std::cout << bm.align << std::endl; int scr = 0; // unsigned short seedLeng = 0; // double seedE = 0; // double seedB = 0; TPos row0len = bm.qEnd - bm.qStart; TPos row1len = bm.sEnd - bm.sStart; TPos band = (!lH.options.hammingOnly) * (lH.options.maxSeedDist); // // TODO FIGURE THIS OUT // if ((row0len > (lH.options.seedLength + band)) || // (row1len > (lH.options.seedLength + band))) // { // #pragma omp atomic // ++mergeCount; // std::cout << "qrId " << m.qryId << "\tsId: " << m.subjId << "\trow0len: " << row0len << "\trow1len: " << row1len << "\n"; // std::cout << source(row0) << "\n"; // std::cout << source(row1) << "\n"; // } auto seedsInSeed = std::max(row0len, row1len) / lH.options.seedLength; TPos maxDist = 0; if (lH.options.maxSeedDist <= 1) maxDist = std::abs(int(row1len) - int(row0len)); else maxDist = std::abs(int(row1len) - int(row0len)) + (seedsInSeed * band); // fast local alignment without DP-stuff if (maxDist == 0) { int scores[row0len+1]; // C99, C++14, -Wno-vla before that scores[0] = 0; unsigned newEnd = 0; unsigned newBeg = 0; // score the diagonal for (unsigned i = 0; i < row0len; ++i) { scores[i] += score(seqanScheme(context(lH.gH.outfile).scoringScheme), source(bm.alignRow0)[i], source(bm.alignRow1)[i]); if (scores[i] < 0) { scores[i] = 0; } else if (scores[i] >= scr) { scr = scores[i]; newEnd = i + 1; } // if (i 0; --i) { if (scores[i] == 0) { newBeg = i + 1; break; } } setEndPosition(bm.alignRow0, newEnd); setEndPosition(bm.alignRow1, newEnd); setBeginPosition(bm.alignRow0, newBeg); setBeginPosition(bm.alignRow1, newBeg); } else { // compute with DP-code scr = localAlignment(bm.alignRow0, bm.alignRow1, seqanScheme(context(lH.gH.outfile).scoringScheme), -maxDist, +maxDist); // scr = localAlignment2(bm.alignRow0, // bm.alignRow1, // seqanScheme(context(lH.gH.outfile).scoringScheme), // -maxDist, // +maxDist, // lH.alignContext); } // save new bounds of alignment bm.qEnd = bm.qStart + endPosition(bm.alignRow0); bm.qStart += beginPosition(bm.alignRow0); bm.sEnd = bm.sStart + endPosition(bm.alignRow1); bm.sStart += beginPosition(bm.alignRow1); // if (scr < lH.options.minSeedScore) // return PREEXTEND; #if 0 // OLD WAY extension with birte's code { // std::cout << " " << bm.qStart << " - " << bm.qEnd << " [after ali]\n"; // std::cout << bm.align << std::endl; decltype(seqanScheme(context(lH.gH.outfile).scoringScheme)) extScheme(seqanScheme(context(lH.gH.outfile).scoringScheme)); setScoreGapOpen (extScheme, -8); setScoreGapExtend(extScheme, -8); Seed seed(bm.sStart, bm.qStart, bm.sEnd, bm.qEnd); extendSeed(seed, curSubj, curQry, EXTEND_BOTH, extScheme, // seqanScheme(context(lH.gH.outfile).scoringScheme), int(lH.options.xDropOff), GappedXDrop()); bm.sStart = beginPositionH(seed); bm.qStart = beginPositionV(seed); bm.sEnd = endPositionH(seed); bm.qEnd = endPositionV(seed); assignSource(row0, infix(curQry, bm.qStart, bm.qEnd)); assignSource(row1, infix(curSubj, bm.sStart, bm.sEnd)); //DEBUG auto oldscr = scr; scr = localAlignment(bm.align, seqanScheme(context(lH.gH.outfile).scoringScheme), // alignConfig, lowerDiagonal(seed)-beginDiagonal(seed), upperDiagonal(seed)-beginDiagonal(seed)); // save new bounds of alignment bm.qEnd = bm.qStart + endPosition(row0); bm.qStart += beginPosition(row0); bm.sEnd = bm.sStart + endPosition(row1); bm.sStart += beginPosition(row1); if (scr < 0) // alignment got screwed up { std::cout << "SCREW UP\n"; std::cout << "beginDiag: " << beginDiagonal(seed) << "\tlowDiag: " << lowerDiagonal(seed) << "\tupDiag: " << upperDiagonal(seed) << '\n'; std::cout << "oldscore: " << oldscr << "\tseedscore: " << score(seed) << "\tscore: " << scr << '\n'; std::cout << bm.align << '\n'; } } #endif #if 0 // ungapped second prealign { Tuple positions = { { bm.qStart, bm.sStart, bm.qEnd, bm.sEnd} }; decltype(seqanScheme(context(lH.gH.outfile).scoringScheme)) extScheme(seqanScheme(context(lH.gH.outfile).scoringScheme)); setScoreGapOpen (extScheme, -100); setScoreGapExtend(extScheme, -100); scr = extendAlignment(bm.align, lH.alignContext, scr, curQry, curSubj, positions, EXTEND_BOTH, 0, // band of 0 size 0, // band of 0 size 1, // xdrop of 1 extScheme); bm.qStart = beginPosition(row0); bm.qEnd = endPosition(row0); bm.sStart = beginPosition(row1); bm.sEnd = endPosition(row1); } #endif if (((bm.qStart > 0) && (bm.sStart > 0)) || ((bm.qEnd < qryLength - 1) && (bm.sEnd < length(lH.gH.subjSeqs[m.subjId]) -1))) { maxDist = _bandSize(qryLength, lH); Tuple positions = { { bm.qStart, bm.sStart, bm.qEnd, bm.sEnd} }; if (lH.options.band != -1) { if (lH.options.xDropOff != -1) { scr = _extendAlignmentImpl(bm.alignRow0, bm.alignRow1, scr, lH.gH.qrySeqs[m.qryId], lH.gH.subjSeqs[m.subjId], positions, EXTEND_BOTH, -maxDist, +maxDist, lH.options.xDropOff, seqanScheme(context(lH.gH.outfile).scoringScheme), True(), True(), lH.alignContext); } else { scr = _extendAlignmentImpl(bm.alignRow0, bm.alignRow1, scr, lH.gH.qrySeqs[m.qryId], lH.gH.subjSeqs[m.subjId], positions, EXTEND_BOTH, -maxDist, +maxDist, lH.options.xDropOff, seqanScheme(context(lH.gH.outfile).scoringScheme), True(), False(), lH.alignContext); } } else { if (lH.options.xDropOff != -1) { scr = _extendAlignmentImpl(bm.alignRow0, bm.alignRow1, scr, lH.gH.qrySeqs[m.qryId], lH.gH.subjSeqs[m.subjId], positions, EXTEND_BOTH, -maxDist, +maxDist, lH.options.xDropOff, seqanScheme(context(lH.gH.outfile).scoringScheme), False(), True(), lH.alignContext); } else { scr = _extendAlignmentImpl(bm.alignRow0, bm.alignRow1, scr, lH.gH.qrySeqs[m.qryId], lH.gH.subjSeqs[m.subjId], positions, EXTEND_BOTH, -maxDist, +maxDist, lH.options.xDropOff, seqanScheme(context(lH.gH.outfile).scoringScheme), False(), False(), lH.alignContext); } } bm.sStart = beginPosition(bm.alignRow1); bm.qStart = beginPosition(bm.alignRow0); bm.sEnd = endPosition(bm.alignRow1); bm.qEnd = endPosition(bm.alignRow0); // std::cout << "AFTER:\n" << bm.align << "\n"; } // std::cerr << "AFTEREXT:\n "<< bm.align << "\n"; if (scr <= 0) { // std::cout << "## LATE FAIL\n" << bm.align << '\n'; return OTHER_FAIL; } // std::cout << "##LINE: " << __LINE__ << '\n'; // std::cout << "ALIGN BEFORE STATS:\n" << bm.align << "\n"; computeAlignmentStats(bm, context(lH.gH.outfile)); if (bm.alignStats.alignmentIdentity < lH.options.idCutOff) return PERCENTIDENT; // const unsigned long qryLength = length(row0); computeBitScore(bm, context(lH.gH.outfile)); computeEValueThreadSafe(bm, record.qLength, context(lH.gH.outfile)); if (bm.eValue > lH.options.maxEValue) return EVALUE; _setFrames(bm, m, lH); return 0; } template inline int iterateMatchesExtend(TLocalHolder & lH) { using TGlobalHolder = typename TLocalHolder::TGlobalHolder; // using TMatch = typename TGlobalHolder::TMatch; // using TPos = typename TMatch::TPos; using TBlastPos = uint32_t; //TODO why can't this be == TPos using TBlastMatch = BlastMatch< typename TLocalHolder::TAlignRow0, typename TLocalHolder::TAlignRow1, TBlastPos, typename Value::Type,// const &, typename Value::Type// const &, >; using TBlastRecord = BlastRecord::Type, std::vector, typename Value::Type, uint32_t>; // constexpr TPos TPosMax = std::numeric_limits::max(); // constexpr uint8_t qFactor = qHasRevComp(TGlobalHolder::blastProgram) ? 3 : 1; // constexpr uint8_t sFactor = sHasRevComp(TGlobalHolder::blastProgram) ? 3 : 1; double start = sysTime(); if (lH.options.doubleIndexing) { appendToStatus(lH.statusStr, lH.options, 1, "Extending and writing hits..."); myPrint(lH.options, 1, lH.statusStr); } // comperator that sorts by bitScore but also compensates for rounding errors auto compe = [] (auto const & m1, auto const & m2) { return std::tie(m2.bitScore, m1._n_sId, m1.qStart, m1.qEnd, m1.sStart, m1.sEnd, m1.qLength, m1.sLength, m1.qFrameShift, m1.sFrameShift) < std::tie(m1.bitScore, m2._n_sId, m2.qStart, m2.qEnd, m2.sStart, m2.sEnd, m2.qLength, m2.sLength, m2.qFrameShift, m2.sFrameShift); }; //DEBUG // std::cout << "Length of matches: " << length(lH.matches); // for (auto const & m : lH.matches) // { // std::cout << m.qryId << "\t" << getTrueQryId(m,lH.options, TGlobalHolder::blastProgram) << "\n"; // } // double topMaxMatchesMedianBitScore = 0; // outer loop over records // (only one iteration if single indexing is used) for (auto it = lH.matches.begin(), itN = std::next(it, 1), itEnd = lH.matches.end(); it != itEnd; ++it) { itN = std::next(it,1); auto const trueQryId = it->qryId / qNumFrames(TGlobalHolder::blastProgram); TBlastRecord record(lH.gH.qryIds[trueQryId]); record.qLength = (qIsTranslated(TGlobalHolder::blastProgram) ? lH.gH.untransQrySeqLengths[trueQryId] : length(lH.gH.qrySeqs[it->qryId])); // topMaxMatchesMedianBitScore = 0; // inner loop over matches per record for (; it != itEnd; ++it) { auto const trueSubjId = it->subjId / sNumFrames(TGlobalHolder::blastProgram); itN = std::next(it,1); // std::cout << "FOO\n" << std::flush; // std::cout << "QryStart: " << it->qryStart << "\n" << std::flush; // std::cout << "SubjStart: " << it->subjStart << "\n" << std::flush; // std::cout << "BAR\n" << std::flush; if (!isSetToSkip(*it)) { // ABUNDANCY and PUTATIVE ABUNDANCY CHECKS if ((lH.options.filterPutativeAbundant) && (record.matches.size() % lH.options.maxMatches == 0)) { if (record.matches.size() / lH.options.maxMatches == 1) { // numMaxMatches found the first time record.matches.sort(compe); } else if (record.matches.size() / lH.options.maxMatches > 1) { double medianTopNMatchesBefore = 0.0; // if (lH.options.filterPutativeAbundant) { medianTopNMatchesBefore = (std::next(record.matches.begin(), lH.options.maxMatches / 2))->bitScore; } uint64_t before = record.matches.size(); record.matches.sort(compe); // if we filter putative duplicates we never need to check for real duplicates if (!lH.options.filterPutativeDuplicates) { record.matches.unique([] (auto const & m1, auto const & m2) { return std::tie(m1._n_sId, m1.qStart, m1.qEnd, m1.sStart, m1.sEnd, m1.sLength, m1.qFrameShift, m1.sFrameShift) == std::tie(m2._n_sId, m2.qStart, m2.qEnd, m2.sStart, m2.sEnd, m2.sLength, m2.qFrameShift, m2.sFrameShift); }); lH.stats.hitsDuplicate += before - record.matches.size(); before = record.matches.size(); } if (record.matches.size() > (lH.options.maxMatches + 1)) // +1 so as not to trigger % == 0 in the next run record.matches.resize(lH.options.maxMatches + 1); lH.stats.hitsAbundant += before - record.matches.size(); // if (lH.options.filterPutativeAbundant) { double medianTopNMatchesAfter = (std::next(record.matches.begin(), lH.options.maxMatches / 2))->bitScore; // no new matches in top n/2 if (int(medianTopNMatchesAfter) <= int(medianTopNMatchesBefore)) { // declare all the rest as putative abundant while ((it != itEnd) && (trueQryId == it->qryId / qNumFrames(TGlobalHolder::blastProgram))) { // not already marked as abundant, duplicate or merged if (!isSetToSkip(*it)) ++lH.stats.hitsPutativeAbundant; ++it; } // move back so if-loop's increment still valid std::advance(it, -1); break; } } } } // std::cout << "BAX\n" << std::flush; // create blastmatch in list without copy or move record.matches.emplace_back(lH.gH.subjIds[trueSubjId]); auto & bm = back(record.matches); bm.qStart = it->qryStart; bm.qEnd = it->qryEnd; // it->qryStart + lH.options.seedLength; bm.sStart = it->subjStart; bm.sEnd = it->subjEnd;//it->subjStart + lH.options.seedLength; bm.sLength = sIsTranslated(TGlobalHolder::blastProgram) ? lH.gH.untransSubjSeqLengths[trueSubjId] : length(lH.gH.subjSeqs[it->subjId]); // MERGE PUTATIVE SIBLINGS INTO THIS MATCH if (lH.options.mergePutativeSiblings) { for (auto it2 = itN; (it2 != itEnd) && (trueQryId == it2->qryId / qNumFrames(TGlobalHolder::blastProgram)) && (trueSubjId == it2->subjId / sNumFrames(TGlobalHolder::blastProgram)); ++it2) { // same frame if ((it->qryId % qNumFrames(TGlobalHolder::blastProgram) == it2->qryId % qNumFrames(TGlobalHolder::blastProgram)) && (it->subjId % sNumFrames(TGlobalHolder::blastProgram) == it2->subjId % sNumFrames(TGlobalHolder::blastProgram))) { // TPos const qDist = (it2->qryStart >= bm.qEnd) // ? it2->qryStart - bm.qEnd // upstream // : 0; // overlap // // TPos sDist = TPosMax; // subj match region downstream of *it // if (it2->subjStart >= bm.sEnd) // upstream // sDist = it2->subjStart - bm.sEnd; // else if (it2->subjStart >= it->subjStart) // overlap // sDist = 0; // due to sorting it2->qryStart never <= it->qStart // so subject sequences must have same order if (it2->subjStart < it->subjStart) continue; long const qDist = it2->qryStart - bm.qEnd; long const sDist = it2->subjStart - bm.sEnd; if ((qDist == sDist) && (qDist <= (long)lH.options.seedGravity)) { bm.qEnd = std::max(bm.qEnd, static_cast(it2->qryEnd)); bm.sEnd = std::max(bm.sEnd, static_cast(it2->subjEnd)); ++lH.stats.hitsMerged; setToSkip(*it2); } } } } // do the extension and statistics int lret = computeBlastMatch(bm, *it, record, lH); switch (lret) { case COMPUTERESULT_::SUCCESS: // ++lH.stats.goodMatches; bm._n_qId = it->qryId / qNumFrames(TGlobalHolder::blastProgram); bm._n_sId = it->subjId / sNumFrames(TGlobalHolder::blastProgram); if (lH.options.hasSTaxIds) bm.sTaxIds = lH.gH.sTaxIds[it->subjId / sNumFrames(TGlobalHolder::blastProgram)]; break; case EVALUE: ++lH.stats.hitsFailedExtendEValueTest; break; case PERCENTIDENT: ++lH.stats.hitsFailedExtendPercentIdentTest; break; case PREEXTEND: ++lH.stats.hitsFailedPreExtendTest; break; default: std::cerr << "Unexpected Extension Failure:\n" << "qryId: " << it->qryId << "\t" << "subjId: " << it->subjId << "\t" << "seed qry: " << infix(lH.gH.redQrySeqs, it->qryStart, it->qryEnd) // it->qryStart + lH.options.seedLength) << "\n subj: " << infix(lH.gH.redSubjSeqs, it->subjStart, it->subjEnd) // it->subjStart + lH.options.seedLength) << "\nunred qry: " << infix(lH.gH.qrySeqs, it->qryStart, it->qryEnd) // it->qryStart + lH.options.seedLength) << "\n subj: " << infix(lH.gH.subjSeqs, it->subjStart, it->subjEnd) // it->subjStart + lH.options.seedLength) << "\nmatch qry: " << infix(lH.gH.qrySeqs, bm.qStart, bm.qEnd) << "\n subj: " << infix(lH.gH.subjSeqs, bm.sStart, bm.sEnd) << "\nalign: " << bm.alignRow0 << "\n " << bm.alignRow1 << "\n"; return lret; break; } if (lret != 0)// discard match { record.matches.pop_back(); } else if (lH.options.filterPutativeDuplicates) { // PUTATIVE DUBLICATES CHECK for (auto it2 = itN; (it2 != itEnd) && (trueQryId == it2->qryId / qNumFrames(TGlobalHolder::blastProgram)) && (trueSubjId == it2->subjId / sNumFrames(TGlobalHolder::blastProgram)); ++it2) { // same frame and same range if ((it->qryId == it2->qryId) && (it->subjId == it2->subjId) && (intervalOverlap(it2->qryStart, it2->qryEnd, // it2->qryStart + lH.options.seedLength, bm.qStart, bm.qEnd) > 0) && (intervalOverlap(it2->subjStart, it2->subjEnd, // it2->subjStart + lH.options.seedLength, bm.sStart, bm.sEnd) > 0)) { // deactivated alignment check to get rid of // duplicates early on // auto const & row0 = row(bm.align, 0); // auto const & row1 = row(bm.align, 1); // // part of alignment // if (toSourcePosition(row0, // toViewPosition(row1, // it2->subjStart // - bm.sStart)) // == TPos(it2->qryStart - bm.qStart)) // { ++lH.stats.hitsPutativeDuplicate; setToSkip(*it2); // } } } } } // last item or new TrueQryId if ((itN == itEnd) || (trueQryId != itN->qryId / qNumFrames(TGlobalHolder::blastProgram))) break; } _writeRecord(record, lH); } #ifdef LAMBDA_MICRO_STATS lH.stats.timeExtendTrace += sysTime() - start; #endif if (lH.options.doubleIndexing) { double finish = sysTime() - start; appendToStatus(lH.statusStr, lH.options, 1, " done. "); appendToStatus(lH.statusStr, lH.options, 2, finish, "s. "); myPrint(lH.options, 1, lH.statusStr); } return 0; } template inline void _setupAlignInfix(TBlastMatch & bm, typename TLocalHolder::TMatch const & m, TLocalHolder & lH) { int64_t startMod = (int64_t)m.subjStart - (int64_t)m.qryStart; bm.qEnd = length(lH.gH.qrySeqs[m.qryId]); decltype(bm.qEnd) band = _bandSize(bm.qEnd , lH); if (startMod >= 0) { bm.sStart = startMod; bm.qStart = 0; } else { bm.sStart = 0; bm.qStart = -startMod; } bm.sEnd = _min(bm.sStart + bm.qEnd - bm.qStart + band, length(lH.gH.subjSeqs[m.subjId])); if (bm.sStart >= band) bm.sStart -= band; else bm.sStart = 0; assignSource(bm.alignRow0, infix(lH.gH.qrySeqs[m.qryId], bm.qStart, bm.qEnd)); assignSource(bm.alignRow1, infix(lH.gH.subjSeqs[m.subjId], bm.sStart, bm.sEnd)); } template inline auto _untrueQryId(TBlastMatch const & bm, TLocalHolder const &) { if (qIsTranslated(TLocalHolder::TGlobalHolder::blastProgram)) { if (bm.qFrameShift > 0) return bm._n_qId * 6 + bm.qFrameShift - 1; else return bm._n_qId * 6 - bm.qFrameShift + 2; } else if (qHasRevComp(TLocalHolder::TGlobalHolder::blastProgram)) { if (bm.qFrameShift > 0) return bm._n_qId * 2; else return bm._n_qId * 2 + 1; } else { return bm._n_qId; } } template inline auto _untrueSubjId(TBlastMatch const & bm, TLocalHolder const &) { if (sIsTranslated(TLocalHolder::TGlobalHolder::blastProgram)) { if (bm.sFrameShift > 0) return bm._n_sId * 6 + bm.sFrameShift - 1; else return bm._n_sId * 6 - bm.sFrameShift + 2; } else if (sHasRevComp(TLocalHolder::TGlobalHolder::blastProgram)) { if (bm.sFrameShift > 0) return bm._n_sId * 2; else return bm._n_sId * 2 + 1; } else { return bm._n_sId; } } template inline void _expandAlign(TBlastMatch & bm, TLocalHolder const & lH) { auto oldQLen = length(source(bm.alignRow0)); auto oldSLen = length(source(bm.alignRow1)); // replace source from underneath without triggereng reset value(bm.alignRow0._source) = lH.gH.qrySeqs[_untrueQryId(bm, lH)]; value(bm.alignRow1._source) = lH.gH.subjSeqs[_untrueSubjId(bm, lH)]; // insert fields into array gaps if (bm.alignRow0._array[0] == 0) bm.alignRow0._array[1] += bm.qStart; else insert(bm.alignRow0._array, 0, std::vector{0, bm.qStart}); if (bm.alignRow0._array[length(bm.alignRow0._array) - 1] == 0) bm.alignRow0._array[length(bm.alignRow0._array) - 2] += length(source(bm.alignRow0)) - oldQLen; else append(bm.alignRow0._array, std::vector{length(source(bm.alignRow0)) - oldQLen, 0}); if (bm.alignRow1._array[0] == 0) bm.alignRow1._array[1] += bm.sStart; else insert(bm.alignRow1._array, 0, std::vector{0, bm.sStart}); if (bm.alignRow1._array[length(bm.alignRow1._array) - 1] == 0) bm.alignRow1._array[length(bm.alignRow1._array) - 2] += length(source(bm.alignRow1)) - oldSLen; else append(bm.alignRow1._array, std::vector{length(source(bm.alignRow1)) - oldSLen, 0}); // the begin positions from the align object are relative to the infix created above bm.qEnd = bm.qStart + endPosition(bm.alignRow0); bm.qStart = bm.qStart + beginPosition(bm.alignRow0); bm.sEnd = bm.sStart + endPosition(bm.alignRow1); bm.sStart = bm.sStart + beginPosition(bm.alignRow1); // set clipping positions on new gaps objects setBeginPosition(bm.alignRow0, bm.qStart); setEndPosition(bm.alignRow0, bm.qEnd); setBeginPosition(bm.alignRow1, bm.sStart); setEndPosition(bm.alignRow1, bm.sEnd); } #ifdef SEQAN_SIMD_ENABLED template inline void _setupDepSets(TDepSetH & depSetH, TDepSetV & depSetV, TBlastMatches const & blastMatches) { using TSimdAlign = typename SimdVector::Type; unsigned constexpr sizeBatch = LENGTH::VALUE; unsigned const fullSize = sizeBatch * ((length(blastMatches) + sizeBatch - 1) / sizeBatch); clear(depSetH); clear(depSetV); reserve(depSetH, fullSize); reserve(depSetV, fullSize); for (auto const & bm : blastMatches) { appendValue(depSetH, source(bm.alignRow0)); appendValue(depSetV, source(bm.alignRow1)); } // fill up last batch for (size_t i = length(blastMatches); i < fullSize; ++i) { appendValue(depSetH, source(back(blastMatches).alignRow0)); appendValue(depSetV, source(back(blastMatches).alignRow1)); } } template inline void _performAlignment(TDepSetH & depSetH, TDepSetV & depSetV, TBlastMatches & blastMatches, TLocalHolder & lH, std::integral_constant const &) { using TGlobalHolder = typename TLocalHolder::TGlobalHolder; using TAlignConfig = AlignConfig2, DPBandConfig, FreeEndGaps_, std::conditional_t >, TracebackOff> >; using TSimdAlign = typename SimdVector::Type; using TSimdScore = Score >; using TSize = typename Size::Type; using TMatch = typename TGlobalHolder::TMatch; using TPos = typename TMatch::TPos; using TTraceSegment = TraceSegment_; unsigned constexpr sizeBatch = LENGTH::VALUE; unsigned const fullSize = sizeBatch * ((length(blastMatches) + sizeBatch - 1) / sizeBatch); TSimdScore simdScoringScheme(seqanScheme(context(lH.gH.outfile).scoringScheme)); StringSet > trace; // TODO when band is available, create inside block with band TAlignConfig config;//(0, 2*band) auto matchIt = blastMatches.begin(); for (auto pos = 0u; pos < fullSize; pos += sizeBatch) { auto infSetH = infixWithLength(depSetH, pos, sizeBatch); auto infSetV = infixWithLength(depSetV, pos, sizeBatch); TSimdAlign resultsBatch; clear(trace); resize(trace, sizeBatch, Exact()); // TODO pass in lH.dpSIMDContext _prepareAndRunSimdAlignment(resultsBatch, trace, infSetH, infSetV, simdScoringScheme, config, typename TLocalHolder::TScoreExtension()); for(auto x = pos; x < pos + sizeBatch && x < length(blastMatches); ++x) { //TODO if constexpr if (withTrace) _adaptTraceSegmentsTo(matchIt->alignRow0, matchIt->alignRow1, trace[x - pos]); else matchIt->alignStats.alignmentScore = resultsBatch[x - pos]; ++matchIt; } } } template inline int iterateMatchesFullSimd(TLocalHolder & lH) { using TGlobalHolder = typename TLocalHolder::TGlobalHolder; // using TMatch = typename TGlobalHolder::TMatch; // using TPos = typename TMatch::TPos; using TBlastPos = uint32_t; //TODO why can't this be == TPos using TBlastMatch = BlastMatch< typename TLocalHolder::TAlignRow0, typename TLocalHolder::TAlignRow1, TBlastPos, typename Value::Type,// const &, typename Value::Type// const &, >; using TBlastRecord = BlastRecord::Type, std::vector, typename Value::Type, uint32_t>; // statistics #ifdef LAMBDA_MICRO_STATS ++lH.stats.numQueryWithExt; lH.stats.numExtScore += length(lH.matches); double start = sysTime(); #endif // Prepare string sets with sequences. StringSet::Type> depSetH; StringSet::Type> depSetV; // container of blastMatches (possibly from multiple queries decltype(TBlastRecord().matches) blastMatches; // create blast matches for (auto it = lH.matches.begin(), itEnd = lH.matches.end(); it != itEnd; ++it) { // create blastmatch in list without copy or move blastMatches.emplace_back(lH.gH.qryIds [it->qryId / qNumFrames(TGlobalHolder::blastProgram)], lH.gH.subjIds[it->subjId / sNumFrames(TGlobalHolder::blastProgram)]); auto & bm = back(blastMatches); bm._n_qId = it->qryId / qNumFrames(TGlobalHolder::blastProgram); bm._n_sId = it->subjId / sNumFrames(TGlobalHolder::blastProgram); bm.sLength = sIsTranslated(TGlobalHolder::blastProgram) ? lH.gH.untransSubjSeqLengths[bm._n_sId] : length(lH.gH.subjSeqs[_untrueSubjId(bm, lH)]); bm.qLength = qIsTranslated(TGlobalHolder::blastProgram) ? lH.gH.untransQrySeqLengths[bm._n_qId ] : length(lH.gH.qrySeqs[it->qryId]); _setupAlignInfix(bm, *it, lH); _setFrames(bm, *it, lH); if (lH.options.hasSTaxIds) bm.sTaxIds = lH.gH.sTaxIds[bm._n_sId]; } #ifdef LAMBDA_MICRO_STATS lH.stats.timeExtend += sysTime() - start; lH.stats.timeExtendTrace += sysTime() - start; //TODO remove this line! // filter out duplicates start = sysTime(); #endif auto before = length(blastMatches); blastMatches.sort([] (auto const & l, auto const & r) { return std::tie(l._n_qId, l._n_sId, l.sStart, l.sEnd, l.qStart, l.qEnd, l.qFrameShift, l.sFrameShift) < std::tie(r._n_qId, r._n_sId, r.sStart, r.sEnd, r.qStart, r.qEnd, r.qFrameShift, r.sFrameShift); }); blastMatches.unique([] (auto const & l, auto const & r) { return std::tie(l._n_qId, l._n_sId, l.sStart, l.sEnd, l.qStart, l.qEnd, l.qFrameShift, l.sFrameShift) == std::tie(r._n_qId, r._n_sId, r.sStart, r.sEnd, r.qStart, r.qEnd, r.qFrameShift, r.sFrameShift); }); lH.stats.hitsDuplicate += (before - length(blastMatches)); // sort by lengths to minimize padding in SIMD blastMatches.sort([] (auto const & l, auto const & r) { return std::make_tuple(length(source(l.alignRow0)), length(source(l.alignRow1))) < std::make_tuple(length(source(r.alignRow0)), length(source(r.alignRow1))); }); #ifdef LAMBDA_MICRO_STATS lH.stats.timeSort += sysTime() - start; start = sysTime(); #endif // fill batches _setupDepSets(depSetH, depSetV, blastMatches); // Run extensions WITHOUT ALIGNMENT _performAlignment(depSetH, depSetV, blastMatches, lH, std::false_type()); // copmute evalues and filter based on evalue for (auto it = blastMatches.begin(), itEnd = blastMatches.end(); it != itEnd; /*below*/) { TBlastMatch & bm = *it; if (lH.options.minBitScore > 0) { seqan::computeBitScore(bm, seqan::context(lH.gH.outfile)); if (bm.bitScore < lH.options.minBitScore) { ++lH.stats.hitsFailedExtendBitScoreTest; it = blastMatches.erase(it); continue; } } if (lH.options.maxEValue < 100) { computeEValueThreadSafe(bm, bm.qLength, seqan::context(lH.gH.outfile)); if (bm.eValue > lH.options.maxEValue) { ++lH.stats.hitsFailedExtendEValueTest; it = blastMatches.erase(it); continue; } } ++it; } if (length(blastMatches) == 0) return 0; // statistics #ifdef LAMBDA_MICRO_STATS lH.stats.numExtAli += length(blastMatches); lH.stats.timeExtend += sysTime() - start; start = sysTime(); #endif // reset and fill batches _setupDepSets(depSetH, depSetV, blastMatches); // Run extensions WITH ALIGNMENT _performAlignment(depSetH, depSetV, blastMatches, lH, std::true_type()); // sort by query blastMatches.sort([] (auto const & lhs, auto const & rhs) { return lhs._n_qId < rhs._n_qId; }); // compute the rest of the match properties for (auto it = blastMatches.begin(), itEnd = blastMatches.end(); it != itEnd; /*below*/) { TBlastMatch & bm = *it; _expandAlign(bm, lH); computeAlignmentStats(bm, context(lH.gH.outfile)); if (bm.alignStats.alignmentIdentity < lH.options.idCutOff) { ++lH.stats.hitsFailedExtendPercentIdentTest; it = blastMatches.erase(it); continue; } // not computed previously if (lH.options.minBitScore == 0) seqan::computeBitScore(bm, seqan::context(lH.gH.outfile)); if (lH.options.maxEValue == 100) computeEValueThreadSafe(bm, bm.qLength, seqan::context(lH.gH.outfile)); ++it; } #ifdef LAMBDA_MICRO_STATS lH.stats.timeExtendTrace += sysTime() - start; #endif if (length(blastMatches) == 0) return 0; // devide matches into records (per query) and write for (auto it = blastMatches.begin(), itLast = blastMatches.begin(); length(blastMatches) > 0; /*below*/) { if ((it == blastMatches.end()) || ((it != blastMatches.begin()) && (it->_n_qId != itLast->_n_qId))) { // create a record for each query TBlastRecord record(lH.gH.qryIds[itLast->_n_qId]); record.qLength = (qIsTranslated(TGlobalHolder::blastProgram) ? lH.gH.untransQrySeqLengths[itLast->_n_qId] : length(lH.gH.qrySeqs[_untrueQryId(*itLast, lH)])); // move the matches into the record record.matches.splice(record.matches.begin(), blastMatches, blastMatches.begin(), it); // write to file _writeRecord(record, lH); it = blastMatches.begin(); itLast = blastMatches.begin(); } else { itLast = it; ++it; } } return 0; } #endif // SEQAN_SIMD_ENABLED template inline int iterateMatchesFullSerial(TLocalHolder & lH) { using TGlobalHolder = typename TLocalHolder::TGlobalHolder; // using TMatch = typename TGlobalHolder::TMatch; // using TPos = typename TMatch::TPos; using TBlastPos = uint32_t; //TODO why can't this be == TPos using TBlastMatch = BlastMatch< typename TLocalHolder::TAlignRow0, typename TLocalHolder::TAlignRow1, TBlastPos, typename Value::Type,// const &, typename Value::Type// const &, >; using TBlastRecord = BlastRecord::Type, std::vector, typename Value::Type, uint32_t>; auto const trueQryId = lH.matches[0].qryId / qNumFrames(TGlobalHolder::blastProgram); TBlastRecord record(lH.gH.qryIds[trueQryId]); record.qLength = (qIsTranslated(TGlobalHolder::blastProgram) ? lH.gH.untransQrySeqLengths[trueQryId] : length(lH.gH.qrySeqs[lH.matches[0].qryId])); size_t band = _bandSize(length(lH.gH.qrySeqs[lH.matches[0].qryId]), lH); #ifdef LAMBDA_MICRO_STATS double start = sysTime(); #endif // create blast matches for (auto it = lH.matches.begin(), itEnd = lH.matches.end(); it != itEnd; ++it) { // create blastmatch in list without copy or move record.matches.emplace_back(lH.gH.qryIds [it->qryId / qNumFrames(TGlobalHolder::blastProgram)], lH.gH.subjIds[it->subjId / sNumFrames(TGlobalHolder::blastProgram)]); auto & bm = back(record.matches); auto & m = *it; bm._n_qId = it->qryId / qNumFrames(TGlobalHolder::blastProgram); bm._n_sId = it->subjId / sNumFrames(TGlobalHolder::blastProgram); bm.sLength = sIsTranslated(TGlobalHolder::blastProgram) ? lH.gH.untransSubjSeqLengths[bm._n_sId] : length(lH.gH.subjSeqs[it->subjId]); bm.qLength = qIsTranslated(TGlobalHolder::blastProgram) ? lH.gH.untransQrySeqLengths[bm._n_qId ] : length(lH.gH.qrySeqs[it->qryId]); _setupAlignInfix(bm, *it, lH); _setFrames(bm, m, lH); // Run extension WITHOUT TRACEBACK bm.alignStats.alignmentScore = localAlignmentScore(bm.alignRow0, bm.alignRow1, seqanScheme(context(lH.gH.outfile).scoringScheme), -band, +band); computeBitScore(bm, context(lH.gH.outfile)); if (bm.bitScore < lH.options.minBitScore) { ++lH.stats.hitsFailedExtendBitScoreTest; record.matches.pop_back(); continue; } computeEValueThreadSafe(bm, record.qLength, context(lH.gH.outfile)); if (bm.eValue > lH.options.maxEValue) { ++lH.stats.hitsFailedExtendEValueTest; record.matches.pop_back(); continue; } // Run extension WITH TRACEBACK localAlignment(bm.alignRow0, bm.alignRow1, seqanScheme(context(lH.gH.outfile).scoringScheme), -band, +band); _expandAlign(bm, lH); computeAlignmentStats(bm, context(lH.gH.outfile)); if (bm.alignStats.alignmentIdentity < lH.options.idCutOff) { ++lH.stats.hitsFailedExtendPercentIdentTest; record.matches.pop_back(); continue; } if (lH.options.hasSTaxIds) bm.sTaxIds = lH.gH.sTaxIds[bm._n_sId]; } #ifdef LAMBDA_MICRO_STATS lH.stats.timeExtendTrace += sysTime() - start; #endif _writeRecord(record, lH); return 0; } template inline int iterateMatches(TLocalHolder & lH) { #ifdef SEQAN_SIMD_ENABLED if (lH.options.extensionMode == LambdaOptions::ExtensionMode::FULL_SIMD) return iterateMatchesFullSimd(lH); else #endif if (lH.options.extensionMode == LambdaOptions::ExtensionMode::FULL_SERIAL) return iterateMatchesFullSerial(lH); else return iterateMatchesExtend(lH); } #endif // HEADER GUARD lambda-lambda-v2.0.1/src/search_datastructures.hpp000066400000000000000000000564101445553061700222600ustar00rootroot00000000000000// ========================================================================== // lambda // ========================================================================== // Copyright (c) 2013-2019, Hannes Hauswedell

// Copyright (c) 2016-2019, Knut Reinert and Freie Universität Berlin // All rights reserved. // // This file is part of Lambda. // // Lambda is Free Software: you can redistribute it and/or modify it // under the terms found in the LICENSE[.md|.rst] file distributed // together with this file. // // Lambda is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // ========================================================================== // holders.hpp: Data container structs // ========================================================================== #ifndef LAMBDA_SEARCH_DATASTRUCTURES_H_ #define LAMBDA_SEARCH_DATASTRUCTURES_H_ #include // ============================================================================ // Tags, Classes, Enums // ============================================================================ // ---------------------------------------------------------------------------- // struct Match // ---------------------------------------------------------------------------- template struct Match { typedef SizeTypeNum_ TQId; typedef SizeTypeNum_ TSId; typedef SizeTypePos_ TPos; TQId qryId; TSId subjId; TPos qryStart; TPos qryEnd; TPos subjStart; TPos subjEnd; inline bool operator== (Match const & m2) const { return std::tie(qryId, subjId, qryStart, subjStart, qryEnd, subjEnd) == std::tie(m2.qryId, m2.subjId, m2.qryStart, m2.subjStart, m2.qryEnd, m2.subjEnd); } inline bool operator< (Match const & m2) const { return std::tie(qryId, subjId, qryStart, subjStart, qryEnd, subjEnd) < std::tie(m2.qryId, m2.subjId, m2.qryStart, m2.subjStart, m2.qryEnd, m2.subjEnd); } }; template inline void setToSkip(Match & m) { using TPos = typename Match::TPos; constexpr TPos posMax = std::numeric_limits::max(); m.qryStart = posMax; m.subjStart = posMax; } template inline bool isSetToSkip(Match const & m) { using TPos = typename Match::TPos; constexpr TPos posMax = std::numeric_limits::max(); return (m.qryStart == posMax) && (m.subjStart == posMax); } template inline void _printMatch(Match const & m) { std::cout << "MATCH Query " << m.qryId << "(" << m.qryStart << ", " << m.qryEnd << ") on Subject "<< m.subjId << "(" << m.subjStart << ", " << m.subjEnd << ")" << std::endl << std::flush; } // ---------------------------------------------------------------------------- // struct StatsHolder // ---------------------------------------------------------------------------- struct StatsHolder { // seeding uint64_t hitsAfterSeeding; uint64_t hitsMerged; uint64_t hitsTooShort; uint64_t hitsMasked; #ifdef LAMBDA_MICRO_STATS std::vector seedLengths; #endif // pre-extension uint64_t hitsFailedPreExtendTest; uint64_t hitsPutativeDuplicate; uint64_t hitsPutativeAbundant; // post-extension uint64_t hitsFailedExtendPercentIdentTest; uint64_t hitsFailedExtendBitScoreTest; uint64_t hitsFailedExtendEValueTest; uint64_t hitsAbundant; uint64_t hitsDuplicate; // final uint64_t hitsFinal; uint64_t qrysWithHit; #ifdef LAMBDA_MICRO_STATS // times double timeGenSeeds; double timeSearch; double timeSort; double timeExtend; double timeExtendTrace; // extension counters uint64_t numQueryWithExt; uint64_t numExtScore; uint64_t numExtAli; #endif StatsHolder() { clear(); } void clear() { hitsAfterSeeding = 0; hitsMerged = 0; hitsTooShort = 0; hitsMasked = 0; hitsFailedPreExtendTest = 0; hitsPutativeDuplicate = 0; hitsPutativeAbundant = 0; hitsFailedExtendPercentIdentTest = 0; hitsFailedExtendBitScoreTest = 0; hitsFailedExtendEValueTest = 0; hitsAbundant = 0; hitsDuplicate = 0; hitsFinal = 0; qrysWithHit = 0; #ifdef LAMBDA_MICRO_STATS seedLengths.clear(); timeGenSeeds = 0; timeSearch = 0; timeSort = 0; timeExtend = 0; timeExtendTrace = 0; numQueryWithExt = 0; numExtScore = 0; numExtAli = 0; #endif } StatsHolder plus(StatsHolder const & rhs) { hitsAfterSeeding += rhs.hitsAfterSeeding; hitsMerged += rhs.hitsMerged; hitsTooShort += rhs.hitsTooShort; hitsMasked += rhs.hitsMasked; hitsFailedPreExtendTest += rhs.hitsFailedPreExtendTest; hitsPutativeDuplicate += rhs.hitsPutativeDuplicate; hitsPutativeAbundant += rhs.hitsPutativeAbundant; hitsFailedExtendPercentIdentTest += rhs.hitsFailedExtendPercentIdentTest; hitsFailedExtendBitScoreTest += rhs.hitsFailedExtendBitScoreTest; hitsFailedExtendEValueTest += rhs.hitsFailedExtendEValueTest; hitsAbundant += rhs.hitsAbundant; hitsDuplicate += rhs.hitsDuplicate; hitsFinal += rhs.hitsFinal; qrysWithHit += rhs.qrysWithHit; #ifdef LAMBDA_MICRO_STATS append(seedLengths, rhs.seedLengths); timeGenSeeds += rhs.timeGenSeeds; timeSearch += rhs.timeSearch; timeSort += rhs.timeSort; timeExtend += rhs.timeExtend; timeExtendTrace += rhs.timeExtendTrace; numQueryWithExt += rhs.numQueryWithExt; numExtScore += rhs.numExtScore; numExtAli += rhs.numExtAli; #endif return *this; } StatsHolder operator+(StatsHolder const& rhs) { StatsHolder tmp(*this); return tmp.plus(rhs); } StatsHolder operator+=(StatsHolder const& rhs) { this->plus(rhs); return *this; } }; void printStats(StatsHolder const & stats, LambdaOptions const & options) { if ((options.verbosity >= 1) && options.isTerm && options.doubleIndexing) for (unsigned char i=0; i < options.threads + 3; ++i) std::cout << std::endl; if (options.verbosity >= 2) { unsigned long rem = stats.hitsAfterSeeding; auto const w = _numberOfDigits(rem); // number of digits #define R " " << std::setw(w) #define RR " = " << std::setw(w) #define BLANKS for (unsigned i = 0; i< w; ++i) std::cout << " "; std::cout << "\033[1m HITS "; BLANKS; std::cout << "Remaining\033[0m" << "\n after Seeding "; BLANKS; std::cout << R << rem; if (stats.hitsMasked) std::cout << "\n - masked " << R << stats.hitsMasked << RR << (rem -= stats.hitsMasked); if (options.mergePutativeSiblings) std::cout << "\n - merged " << R << stats.hitsMerged << RR << (rem -= stats.hitsMerged); if (options.filterPutativeDuplicates) std::cout << "\n - putative duplicates " << R << stats.hitsPutativeDuplicate << RR << (rem -= stats.hitsPutativeDuplicate); if (options.filterPutativeAbundant) std::cout << "\n - putative abundant " << R << stats.hitsPutativeAbundant << RR << (rem -= stats.hitsPutativeAbundant); if (options.preScoring) std::cout << "\n - failed pre-extend test " << R << stats.hitsFailedPreExtendTest << RR << (rem -= stats.hitsFailedPreExtendTest); std::cout << "\n - failed e-value test " << R << stats.hitsFailedExtendEValueTest << RR << (rem -= stats.hitsFailedExtendEValueTest); std::cout << "\n - failed bitScore test " << R << stats.hitsFailedExtendBitScoreTest << RR << (rem -= stats.hitsFailedExtendBitScoreTest); std::cout << "\n - failed %-identity test " << R << stats.hitsFailedExtendPercentIdentTest << RR << (rem -= stats.hitsFailedExtendPercentIdentTest); std::cout << "\n - duplicates " << R << stats.hitsDuplicate << RR << (rem -= stats.hitsDuplicate); std::cout << "\n - abundant " << R << stats.hitsAbundant << "\033[1m" << RR << (rem -= stats.hitsAbundant) << "\033[0m\n\n"; if (rem != stats.hitsFinal) std::cout << "WARNING: hits don't add up\n"; #ifdef LAMBDA_MICRO_STATS std::cout << "Detailed Non-Wall-Clock times:\n" << " genSeeds: " << stats.timeGenSeeds << "\n" << " search: " << stats.timeSearch << "\n" << " sort: " << stats.timeSort << "\n" << " extend: " << stats.timeExtend << "\n" << " extendTrace: " << stats.timeExtendTrace << "\n\n"; if (length(stats.seedLengths)) { double _seedLengthSum = std::accumulate(stats.seedLengths.begin(), stats.seedLengths.end(), 0.0); double seedLengthMean = _seedLengthSum / stats.seedLengths.size(); double _seedLengthMeanSqSum = std::inner_product(stats.seedLengths.begin(), stats.seedLengths.end(), stats.seedLengths.begin(), 0.0); double seedLengthStdDev = std::sqrt(_seedLengthMeanSqSum / stats.seedLengths.size() - seedLengthMean * seedLengthMean); uint16_t seedLengthMax = *std::max_element(stats.seedLengths.begin(), stats.seedLengths.end()); std::cout << "SeedStats:\n" << " avgLength: " << seedLengthMean << "\n" << " stddev: " << seedLengthStdDev << "\n" << " max: " << seedLengthMax << "\n\n"; } #ifdef SEQAN_SIMD_ENABLED if (stats.numQueryWithExt > 0) std::cout << "Number of Extensions stats:\n" << " # queries with Extensions: " << stats.numQueryWithExt << "\n" << " avg # extensions without Ali: " << stats.numExtScore / stats.numQueryWithExt << "\n" << " avg # extensions with Ali: " << stats.numExtAli / stats.numQueryWithExt << "\n\n"; #endif #endif } if (options.verbosity >= 1) { auto const w = _numberOfDigits(stats.hitsFinal); std::cout << "Number of valid hits: " << std::setw(w) << stats.hitsFinal << "\nNumber of Queries with at least one valid hit: " << std::setw(w) << stats.qrysWithHit << "\n"; } } // ---------------------------------------------------------------------------- // struct GlobalDataHolder -- one object per program // ---------------------------------------------------------------------------- template inline T & _initHelper(T & t1, T &&) { // std::cout << "FOO\n"; return t1; } template inline T2 && _initHelper(T &, T2 && t2) { // std::cout << "BAR\n"; return std::move(t2); } template class GlobalDataHolder { public: using TRedAlph = RedAlph; // ensures == Dna5 for BlastN using TMatch = Match; static constexpr BlastProgram blastProgram = p; static constexpr bool indexIsBiFM = std::is_same>>::value; static constexpr bool indexIsFM = std::is_same>::value || indexIsBiFM; static constexpr bool alphReduction = !std::is_same, TRedAlph>::value; /* Sequence storage types */ using TStringTag = Alloc<>; #if defined(LAMBDA_MMAPPED_DB) using TDirectStringTag = MMap<>; #else using TDirectStringTag = TStringTag; #endif using TQryTag = TStringTag; using TSubjTag = TDirectStringTag; // even if subjects were translated they are now loaded from disk /* untranslated query sequences (ONLY USED FOR SAM/BAM OUTPUT) */ using TUntransQrySeqs = StringSet, TQryTag>, Owner>>; /* Possibly translated but yet unreduced sequences */ template using TTransSeqs = StringSet, TSpec>, Owner>>; using TTransQrySeqs = TTransSeqs; using TTransSubjSeqs = TTransSeqs; using TTransSubjReal = typename std::conditional< alphReduction || indexIsFM, TTransSubjSeqs, // real type TTransSubjSeqs &>::type; // will be initialized in constructor /* Reduced sequence objects, either as modstrings or as references to trans-strings */ template using TRedAlphModString = ModifiedString, TSpec>, ModView, TRedAlph>>>; using TRedQrySeqs = typename std::conditional< alphReduction, StringSet, Owner>>, // modview TTransQrySeqs &>::type; // reference to owner using TRedSubjSeqs = typename std::conditional< alphReduction, StringSet, Owner>>, // modview TTransSubjSeqs &>::type; // reference to owner /* sequence ID strings */ template using TIds = StringSet, Owner>>; using TQryIds = TIds; using TSubjIds = TIds; /* indeces and their type */ using TIndexSpec = TIndexSpec_; using TDbIndex = Index::type, TIndexSpec>; /* output file */ using TScoreScheme = std::conditional_t::value, Score, Score>>; // using TScoreScheme = TScoreScheme_; using TIOContext = BlastIOContext; using TFile = FormattedFile; using TBamFile = FormattedFile; /* misc types */ using TPositions = typename StringSetLimits::Type; using TMasking = StringSet, Owner>>; using TTaxIDs = StringSet, Owner>>; using TTaxParents = String; using TTaxHeights = String; using TTaxNames = StringSet>>; /* the actual members */ TDbIndex dbIndex; TUntransQrySeqs untranslatedQrySeqs; // used iff outformat is sam or bam TTransQrySeqs qrySeqs; TTransSubjReal subjSeqs; TRedQrySeqs redQrySeqs; TRedSubjSeqs redSubjSeqs; TQryIds qryIds; TSubjIds subjIds; TFile outfile; TBamFile outfileBam; TPositions untransQrySeqLengths; // used iff qIsTranslated(p) TPositions untransSubjSeqLengths; // used iff sIsTranslated(p) TTaxIDs sTaxIds; TTaxParents taxParents; TTaxHeights taxHeights; TTaxNames taxNames; StatsHolder stats; GlobalDataHolder() : subjSeqs(_initHelper(indexText(dbIndex), TTransSubjSeqs())),//std::integral_constant())),// : TTransSubjSeqs()), redQrySeqs(qrySeqs), redSubjSeqs(subjSeqs), stats() {} }; /* Documentation on the confusing type resolution used in the above class: * * !alphReduction && !indexIsFM e.g. BLASTN and SA-Index * * subjSeqs is & and initialized with indexText() * redSubjSeqs is & and initialized with subjSeqs * indexText(dbIndex) is non-ref owner StringSet assigned by loadDbIndexFromDisk() * * !alphReduction && indexIsFM e.g. BLASTN and FM-Index * * subjSeqs is non-ref owner StringSet and assigned in loadSubjects() * redSubjSeqs is & and initialized with subjSeqs * indexText(dbIndex) is non-ref owner StringSet, but never set (fmIndex doesnt need it) * * alphReduction && indexIsFM e.g. default * * subjSeqs is non-ref owner StringSet and assigned in loadSubjects() * redSubjSeqs is lightweight reduced StringSet and initialized with subjSeqs * indexText(dbIndex) is lightweight reduced StringSet, but never set (fmIndex doesnt need it) * * alphReduction && !indexIsFM e.g. default * * subjSeqs is non-ref owner StringSet and assigned in loadSubjects() * redSubjSeqs is lightweight reduced StringSet and initialized with subjSeqs * indexText(dbIndex) is lightweight reduced StringSet and assigned redSubjSeqs in loadDbIndexFromDisk */ // ---------------------------------------------------------------------------- // struct LocalDataHolder -- one object per thread // ---------------------------------------------------------------------------- template class LocalDataHolder { public: using TGlobalHolder = TGlobalHolder_; using TRedQrySeq = typename Value::type>::Type; using TSeeds = StringSet::Type>; using TSeedIndex = Index>; using TMatch = typename TGlobalHolder::TMatch; using TScoreExtension = TScoreExtension_; // references to global stuff LambdaOptions const & options; TGlobalHolder /*const*/ & gH; static constexpr BlastProgram blastProgram = TGlobalHolder::blastProgram; // this is the localHolder for the i-th part of the queries uint64_t i; uint64_t nBlocks; // regarding range of queries uint64_t indexBeginQry; uint64_t indexEndQry; // regarding seedingp TSeeds seeds; TSeedIndex seedIndex; // std::forward_list matches; std::vector matches; std::vector seedRefs; // mapping seed -> query std::vector seedRanks; // mapping seed -> relative rank // regarding extension using TAlignRow0 = Gaps::Type>::Type, ArrayGaps>; using TAlignRow1 = Gaps::Type>::Type, ArrayGaps>; #if (SEQAN_VERSION_MINOR < 4) using TDPContextNoSIMD = DPContext::Type, TScoreExtension>; #else // #if defined(SEQAN_SIMD_ENABLED) // using TCellValueSIMD = typename SimdVector::TYPE; // using TDPCellSIMD = DPCell_; // using TTraceValueSIMD = typename TraceBitMap_::Type; // using TScoreHostSIMD = String >; // using TTraceHostSIMD = String >; // using TDPContextSIMD = DPContext; // #endif using TCellValueNoSIMD = int16_t; using TDPCellNoSIMD = DPCell_; using TTraceValueNoSIMD = typename TraceBitMap_::Type; using TScoreHostNoSIMD = String >; using TTraceHostNoSIMD = String >; using TDPContextNoSIMD = DPContext; #endif using TAliExtContext = AliExtContext_; TAliExtContext alignContext; // #if defined(SEQAN_SIMD_ENABLED) // TDPContextSIMD alignSIMDContext; // #endif // map from sequence length to band size std::unordered_map bandTable; // regarding the gathering of stats StatsHolder stats; // progress string std::stringstream statusStr; // constructor LocalDataHolder(LambdaOptions const & _options, TGlobalHolder /*const*/ & _globalHolder) : options(_options), gH(_globalHolder), stats() { if (options.doubleIndexing) { nBlocks = options.queryPart; } else if (options.extensionMode == LambdaOptions::ExtensionMode::FULL_SIMD) { // division with rounding up nBlocks = (length(gH.redQrySeqs) + qNumFrames(blastProgram) * 10 - 1) / (qNumFrames(blastProgram) * 10); } else { nBlocks = length(gH.redQrySeqs) / qNumFrames(blastProgram); } } // copy constructor SHALLOW COPY ONLY, REQUIRED FOR firsprivate() LocalDataHolder(LocalDataHolder const & rhs) : options(rhs.options), gH(rhs.gH), stats() { } void init(uint64_t const _i) { i = _i; if (options.doubleIndexing) { indexBeginQry = (length(gH.qrySeqs) / options.queryPart) * i; indexEndQry = (i+1 == options.queryPart) // last interval ? length(gH.qrySeqs) // reach until end : (length(gH.qrySeqs) / options.queryPart) * (i+1); // make sure different frames of one sequence in same interval indexBeginQry -= (indexBeginQry % qNumFrames(blastProgram)); indexEndQry -= (indexEndQry % qNumFrames(blastProgram)); } else if (options.extensionMode == LambdaOptions::ExtensionMode::FULL_SIMD) { indexBeginQry = qNumFrames(blastProgram) * i * 10; indexEndQry = _min(qNumFrames(blastProgram) * (i+1) * 10, length(gH.qrySeqs)); } else { indexBeginQry = qNumFrames(blastProgram) * i; indexEndQry = qNumFrames(blastProgram) * (i+1); } clear(seeds); clear(seedIndex); matches.clear(); seedRefs.clear(); seedRanks.clear(); // stats.clear(); statusStr.clear(); statusStr.precision(2); } }; #endif // LAMBDA_SEARCH_DATASTRUCTURES_H_ lambda-lambda-v2.0.1/src/search_misc.hpp000066400000000000000000000225231445553061700201340ustar00rootroot00000000000000// ========================================================================== // lambda // ========================================================================== // Copyright (c) 2013-2019, Hannes Hauswedell

// Copyright (c) 2016-2019, Knut Reinert and Freie Universität Berlin // All rights reserved. // // This file is part of Lambda. // // Lambda is Free Software: you can redistribute it and/or modify it // under the terms found in the LICENSE[.md|.rst] file distributed // together with this file. // // Lambda is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // ========================================================================== // match.h: Main File for the match class // ========================================================================== #ifndef LAMBDA_SEARCH_MISC_H_ #define LAMBDA_SEARCH_MISC_H_ #include using namespace seqan; // ============================================================================ // Exceptions // ============================================================================ struct IndexException : public std::runtime_error { using std::runtime_error::runtime_error; }; struct QueryException : public std::runtime_error { using std::runtime_error::runtime_error; }; // ============================================================================ // Seeding related // ============================================================================ template inline void myHyperSortSingleIndex(std::vector> & matches, bool const doubleIndexing, TGH const &) { using TId = typename Match::TQId; // regular sort std::sort(matches.begin(), matches.end()); // trueQryId, begin, end std::vector> intervals; for (TId i = 1; i <= length(matches); ++i) { if ((i == length(matches)) || (matches[i-1].qryId != matches[i].qryId) || (matches[i-1].subjId / sNumFrames(TGH::blastProgram)) != (matches[i].subjId / sNumFrames(TGH::blastProgram))) { if (length(intervals) == 0) // first interval intervals.emplace_back(std::make_tuple(matches[i-1].qryId / qNumFrames(TGH::blastProgram), 0, i)); else intervals.emplace_back(std::make_tuple(matches[i-1].qryId / qNumFrames(TGH::blastProgram), std::get<2>(intervals.back()), i)); } } if (doubleIndexing) { // sort by trueQryId, then lengths of interval std::sort(intervals.begin(), intervals.end(), [] (std::tuple const & i1, std::tuple const & i2) { return (std::get<0>(i1) != std::get<0>(i2)) ? (std::get<0>(i1) < std::get<0>(i2)) : ((std::get<2>(i1) - std::get<1>(i1)) > (std::get<2>(i2) - std::get<1>(i2))); }); } else { // sort by lengths of interval, since trueQryId is the same anyway std::sort(intervals.begin(), intervals.end(), [] (std::tuple const & i1, std::tuple const & i2) { return (std::get<2>(i1) - std::get<1>(i1)) > (std::get<2>(i2) - std::get<1>(i2)); }); } std::vector> tmpVector; tmpVector.resize(matches.size()); TId newIndex = 0; for (auto const & i : intervals) { TId limit = std::get<2>(i); for (TId j = std::get<1>(i); j < limit; ++j) { tmpVector[newIndex] = matches[j]; newIndex++; } } std::swap(tmpVector, matches); } // ============================================================================ // Alignment-related // ============================================================================ template inline uint64_t quickHamming(T1 const & s1, T2 const & s2) { SEQAN_ASSERT_EQ(length(s1), length(s2)); uint64_t ret = 0; for (uint64_t i = 0; i < length(s1); ++i) if (s1[i] != s2[i]) ++ret; return ret; } template inline TScoreValue localAlignment2(Gaps & row0, Gaps & row1, Score const & scoringScheme, int const lowerDiag, int const upperDiag, TAlignContext & alignContext) { clear(alignContext.traceSegment); typedef FreeEndGaps_ TFreeEndGaps; typedef AlignConfig2, DPBand, TFreeEndGaps, TracebackOn > > TAlignConfig; TScoreValue score; DPScoutState_ scoutState; score = _setUpAndRunAlignment(alignContext.dpContext, alignContext.traceSegment, scoutState, row0, row1, scoringScheme, TAlignConfig(lowerDiag, upperDiag)); _adaptTraceSegmentsTo(row0, row1, alignContext.traceSegment); return score; } template inline int _bandSize(uint64_t const seqLength, TLocalHolder & lH) { switch (lH.options.band) { case -3: case -2: { int ret = 0; auto fit = lH.bandTable.find(seqLength); if (fit != lH.bandTable.end()) { ret = fit->second; } else { if (lH.options.band == -3) ret = ceil(std::log2(seqLength)); else ret = floor(sqrt(seqLength)); } lH.bandTable[seqLength] = ret; return ret; } break; case -1: return std::numeric_limits::max(); default: return lH.options.band; } } // ---------------------------------------------------------------------------- // Function computeEValueThreadSafe // ---------------------------------------------------------------------------- template inline double computeEValueThreadSafe(TBlastMatch & match, uint64_t ql, BlastIOContext & context) { #if defined(__FreeBSD__) // && version < 11 && defined(STDLIB_LLVM) because of https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=192320 // || version >= 11 && defined(STDLIB_GNU) because of https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=215709 static std::vector> _cachedLengthAdjustmentsArray(omp_get_num_threads()); std::unordered_map & _cachedLengthAdjustments = _cachedLengthAdjustmentsArray[omp_get_thread_num()]; #else static thread_local std::unordered_map _cachedLengthAdjustments; #endif // convert to 64bit and divide for translated sequences ql = ql / (qIsTranslated(context.blastProgram) ? 3 : 1); // length adjustment not yet computed if (_cachedLengthAdjustments.find(ql) == _cachedLengthAdjustments.end()) _cachedLengthAdjustments[ql] = _lengthAdjustment(context.dbTotalLength, ql, context.scoringScheme); uint64_t adj = _cachedLengthAdjustments[ql]; match.eValue = _computeEValue(match.alignStats.alignmentScore, ql - adj, context.dbTotalLength - adj, context.scoringScheme); return match.eValue; } // ---------------------------------------------------------------------------- // compute LCA // ---------------------------------------------------------------------------- template T computeLCA(String const & taxParents, String const & taxHeights, T n1, T n2) { if (n1 == n2) return n1; // move up so that nodes are on same height for (auto i = taxHeights[n1]; i > taxHeights[n2]; --i) n1 = taxParents[n1]; for (auto i = taxHeights[n2]; i > taxHeights[n1]; --i) n2 = taxParents[n2]; while ((n1 != 0) && ( n2 != 0)) { // common ancestor if (n1 == n2) return n1; // move up in parallel n1 = taxParents[n1]; n2 = taxParents[n2]; } SEQAN_FAIL("One of the paths didn't lead to root."); return 0; // avoid warnings on clang } #endif // header guard lambda-lambda-v2.0.1/src/search_options.hpp000066400000000000000000001260371445553061700207010ustar00rootroot00000000000000// ========================================================================== // lambda // ========================================================================== // Copyright (c) 2013-2019, Hannes Hauswedell

// Copyright (c) 2016-2019, Knut Reinert and Freie Universität Berlin // All rights reserved. // // This file is part of Lambda. // // Lambda is Free Software: you can redistribute it and/or modify it // under the terms found in the LICENSE[.md|.rst] file distributed // together with this file. // // Lambda is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // ========================================================================== // options.h: contains the options and argument parser // ========================================================================== #ifndef LAMBDA_SEARCH_OPTIONS_H_ #define LAMBDA_SEARCH_OPTIONS_H_ #include #include #include #include #include #include #include #include using namespace seqan; // ========================================================================== // Forwards // ========================================================================== template struct SamBamExtraTags; // ========================================================================== // Classes // ========================================================================== // -------------------------------------------------------------------------- // Class LambdaOptions // -------------------------------------------------------------------------- struct LambdaOptions : public SharedOptions { std::string queryFile; AlphabetEnum qryOrigAlphabet; bool revComp = true; int outFileFormat; // 0 = BLAST, 1 = SAM, 2 = BAM std::string output; std::vector::Enum> columns; std::string outputBam; std::bitset<64> samBamTags; bool samWithRefHeader; unsigned samBamSeq; bool samBamHardClip; bool versionInformationToOutputFile; unsigned queryPart = 0; // bool semiGlobal; bool doubleIndexing = false; bool adaptiveSeeding; unsigned seedLength = 0; unsigned maxSeedDist = 1; bool hammingOnly = true; int seedGravity = 0; unsigned seedOffset = 0; unsigned minSeedLength = 0; bool seedDeltaIncreasesLength = true; // unsigned int minSeedEVal = 0; // double minSeedBitS = -1; // 0 = manual, positive X = blosumX, negative Y = pamY int scoringMethod = 62; // scores int gapOpen = -11; int gapExtend = -1; int match = 0; // only for manual int misMatch = 0; // only for manual int xDropOff = 0; int band = -1; double minBitScore = 0; double maxEValue = 1e-04; int idCutOff = 0; unsigned long maxMatches = 500; bool computeLCA = false; GeneticCodeSpec geneticCodeIndex; enum class ExtensionMode : uint8_t { AUTO, XDROP, FULL_SERIAL, FULL_SIMD }; ExtensionMode extensionMode; bool filterPutativeDuplicates = true; bool filterPutativeAbundant = true; bool mergePutativeSiblings = true; bool seedHalfExact = false; int preScoring = 0; // 0 = off, 1 = seed, 2 = region ( double preScoringThresh = 0.0; LambdaOptions() : SharedOptions() { } }; ArgumentParser::ParseResult parseCommandLine(LambdaOptions & options, int argc, char const ** argv) { // save commandLine for (int i = 0; i < argc; ++i) options.commandLine += std::string(argv[i]) + " "; eraseBack(options.commandLine); std::string programName = "lambda2 " + std::string(argv[0]); // this is important for option handling: if (std::string(argv[0]) == "searchn") options.blastProgram = BlastProgram::BLASTN; ArgumentParser parser(programName); // Set short description, version, and date. setShortDescription(parser, "the Local Aligner for Massive Biological DatA"); // Define usage line and long description. addUsageLine(parser, "[\\fIOPTIONS\\fP] \\fI-q QUERY.fasta\\fP " "\\fI-i INDEX.lambda\\fP " "[\\fI-o output.m8\\fP]"); sharedSetup(parser); #ifndef SEQAN_DISABLE_VERSION_CHECK // version checker initiated by top-level arg parser setDefaultValue(parser, "version-check", "0"); hideOption(parser, "version-check"); #endif addOption(parser, ArgParseOption("v", "verbosity", "Display more/less diagnostic output during operation: 0 [only errors]; 1 [default]; 2 " "[+run-time, options and statistics].", ArgParseArgument::INTEGER)); setDefaultValue(parser, "verbosity", "1"); setMinValue(parser, "verbosity", "0"); setMaxValue(parser, "verbosity", "2"); addSection(parser, "Input Options"); addOption(parser, ArgParseOption("q", "query", "Query sequences.", ArgParseArgument::INPUT_FILE, "IN")); setValidValues(parser, "query", getFileExtensions(SeqFileIn())); setRequired(parser, "q"); if (options.blastProgram != BlastProgram::BLASTN) { addOption(parser, ArgParseOption("a", "input-alphabet", "Alphabet of the query sequences (specify to override auto-detection). Dna sequences will be translated.", ArgParseArgument::STRING)); setValidValues(parser, "input-alphabet", "auto dna5 aminoacid"); setDefaultValue(parser, "input-alphabet", "auto"); setAdvanced(parser, "input-alphabet"); addOption(parser, ArgParseOption("g", "genetic-code", "The translation table to use if input is Dna. See " "https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c" " for ids. Default is to use the same table that was used for the index or 1/CANONICAL if the index " "was not translated.", ArgParseArgument::INTEGER)); setDefaultValue(parser, "genetic-code", "0"); setAdvanced(parser, "genetic-code"); } addOption(parser, ArgParseOption("i", "index", std::string{"The database index (created by the 'lambda "} + (options.blastProgram == BlastProgram::BLASTN ? "mkindexn" : "mkindexp") + "' command).", ArgParseArgument::INPUT_DIRECTORY, "IN")); setRequired(parser, "index"); setValidValues(parser, "index", ".lambda"); addSection(parser, "Output Options"); addOption(parser, ArgParseOption("o", "output", "File to hold reports on hits (.m* are blastall -m* formats; .m8 is tab-separated, .m9 is tab-separated with " "with comments, .m0 is pairwise format).", ArgParseArgument::OUTPUT_FILE, "OUT")); auto exts = getFileExtensions(BlastTabularFileOut<>()); append(exts, getFileExtensions(BlastReportFileOut<>())); append(exts, getFileExtensions(BamFileOut())); CharString extsConcat; // remove .sam.bam, .sam.vcf.gz, .sam.tbi for (auto const & ext : exts) { if ((!endsWith(ext, ".bam") || startsWith(ext, ".bam")) && (!endsWith(ext, ".vcf.gz")) && (!endsWith(ext, ".sam.tbi"))) { append(extsConcat, ext); appendValue(extsConcat, ' '); } } setValidValues(parser, "output", toCString(extsConcat)); setDefaultValue(parser, "output", "output.m8"); addOption(parser, ArgParseOption("", "output-columns", "Print specified column combination and/or order (.m8 and .m9 outputs only); call -oc help for more details.", ArgParseArgument::STRING, "STR")); setDefaultValue(parser, "output-columns", "std"); setAdvanced(parser, "output-columns"); addOption(parser, ArgParseOption("", "percent-identity", "Output only matches above this threshold (checked before e-value " "check).", ArgParseArgument::INTEGER)); setDefaultValue(parser, "percent-identity", "0"); setMinValue(parser, "percent-identity", "0"); setMaxValue(parser, "percent-identity", "100"); addOption(parser, ArgParseOption("e", "e-value", "Output only matches that score below this threshold.", ArgParseArgument::DOUBLE)); setDefaultValue(parser, "e-value", "1e-04"); setMinValue(parser, "e-value", "0"); setMaxValue(parser, "e-value", "100"); addOption(parser, ArgParseOption("", "bit-score", "Output only matches that score above this threshold.", ArgParseArgument::DOUBLE)); setDefaultValue(parser, "bit-score", "0"); setMinValue(parser, "bit-score", "0"); setMaxValue(parser, "bit-score", "1000"); addOption(parser, ArgParseOption("n", "num-matches", "Print at most this number of matches per query.", ArgParseArgument::INTEGER)); setDefaultValue(parser, "num-matches", "256"); setMinValue(parser, "num-matches", "1"); setMaxValue(parser, "num-matches", "10000"); addOption(parser, ArgParseOption("", "sam-with-refheader", "BAM files require all subject names to be written to the header. For SAM this is not required, so Lambda does " "not automatically do it to save space (especially for protein database this is a lot!). If you still want " "them with SAM, e.g. for better BAM compatibility, use this option.", ArgParseArgument::BOOL)); setDefaultValue(parser, "sam-with-refheader", "off"); setAdvanced(parser, "sam-with-refheader"); std::string samBamSeqDescr; if (options.blastProgram == BlastProgram::BLASTN) { samBamSeqDescr = "Write matching DNA subsequence into SAM/BAM file."; } else { samBamSeqDescr = "For BLASTX and TBLASTX the matching protein " "sequence is \"untranslated\" and positions retransformed to the original sequence. For BLASTP and TBLASTN " "there is no DNA sequence so a \"*\" is written to the SEQ column. The matching protein sequence can be " "written as an optional tag, see --sam-bam-tags."; } addOption(parser, ArgParseOption("", "sam-bam-seq", samBamSeqDescr + " If set to uniq than " "the sequence is omitted iff it is identical to the previous match's subsequence.", ArgParseArgument::STRING, "STR")); setValidValues(parser, "sam-bam-seq", "always uniq never"); setDefaultValue(parser, "sam-bam-seq", "uniq"); setAdvanced(parser, "sam-bam-seq"); addOption(parser, ArgParseOption("", "sam-bam-tags", "Write the specified optional columns to the SAM/BAM file. Call --sam-bam-tags help for more details.", ArgParseArgument::STRING, "STR")); setDefaultValue(parser, "sam-bam-tags", "AS NM ae ai qf"); setAdvanced(parser, "sam-bam-tags"); addOption(parser, ArgParseOption("", "sam-bam-clip", "Whether to hard-clip or soft-clip the regions beyond the local match. Soft-clipping retains the full sequence " "in the output file, but obviously uses more space.", ArgParseArgument::STRING, "STR")); setValidValues(parser, "sam-bam-clip", "hard soft"); setDefaultValue(parser, "sam-bam-clip", "hard"); setAdvanced(parser, "sam-bam-clip"); addOption(parser, ArgParseOption("", "version-to-outputfile", "Write the Lambda program tag and version number to the output file.", ArgParseArgument::BOOL)); setDefaultValue(parser, "version-to-outputfile", "on"); hideOption(parser, "version-to-outputfile"); addSection(parser, "General Options"); #ifdef _OPENMP addOption(parser, ArgParseOption("t", "threads", "number of threads to run concurrently.", ArgParseArgument::INTEGER)); setDefaultValue(parser, "threads", omp_get_max_threads()); setMinValue(parser, "threads", "1"); setMaxValue(parser, "threads", std::to_string(omp_get_max_threads() * 10)); #else addOption(parser, ArgParseOption("t", "threads", "LAMBDA BUILT WITHOUT OPENMP; setting this option has no effect.", ArgParseArgument::INTEGER)); setDefaultValue(parser, "threads", "1"); setMinValue(parser, "threads", "1"); setMaxValue(parser, "threads", "1"); #endif setAdvanced(parser, "threads"); #ifdef LAMBDA_LEGACY_PATHS addOption(parser, ArgParseOption("", "query-index-type", "controls double-indexing.", ArgParseArgument::STRING)); setValidValues(parser, "query-index-type", "radix none"); setDefaultValue(parser, "query-index-type", "none"); setAdvanced(parser, "query-index-type"); addOption(parser, ArgParseOption("", "query-partitions", "Divide the query into qp number of blocks before processing; should be" " a multiple of the number of threads, defaults to one per thread. " "Only used with double-indexing; strong influence on memory, see below.", ArgParseArgument::INTEGER)); #ifdef _OPENMP setDefaultValue(parser, "query-partitions", omp_get_max_threads()); #else setDefaultValue(parser, "query-partitions", "1"); #endif // _OPENMP hideOption(parser, "query-partitions"); // HIDDEN #endif // LAMBDA_LEGACY_PATHS addSection(parser, "Seeding / Filtration"); addOption(parser, ArgParseOption("", "adaptive-seeding", "Grow the seed if it has too many hits (low complexity filter).", ArgParseArgument::BOOL)); if (options.blastProgram == BlastProgram::BLASTN) setDefaultValue(parser, "adaptive-seeding", "off"); else setDefaultValue(parser, "adaptive-seeding", "on"); setAdvanced(parser, "adaptive-seeding"); unsigned defaultSeedLength = (options.blastProgram == BlastProgram::BLASTN) ? 14 : 10; addOption(parser, ArgParseOption("", "seed-length", "Length of the seeds.", ArgParseArgument::INTEGER)); setDefaultValue(parser, "seed-length", std::to_string(defaultSeedLength)); setMinValue(parser, "seed-length", "3"); setMaxValue(parser, "seed-length", "50"); setAdvanced(parser, "seed-length"); addOption(parser, ArgParseOption("", "seed-offset", "Offset for seeding (if unset = seed-length/2).", ArgParseArgument::INTEGER)); setDefaultValue(parser, "seed-offset", std::to_string(defaultSeedLength / 2)); setAdvanced(parser, "seed-offset"); setMinValue(parser, "seed-offset", "1"); setMaxValue(parser, "seed-offset", "50"); addOption(parser, ArgParseOption("", "seed-delta", "maximum seed distance.", ArgParseArgument::INTEGER)); setDefaultValue(parser, "seed-delta", "1"); setAdvanced(parser, "seed-delta"); setMinValue(parser, "seed-delta", "0"); setMaxValue(parser, "seed-delta", "1"); addOption(parser, ArgParseOption("", "seed-delta-increases-length", "Seed delta increases the min. seed length (for affected seeds).", ArgParseArgument::BOOL)); setDefaultValue(parser, "seed-delta-increases-length", "off"); setAdvanced(parser, "seed-delta-increases-length"); addOption(parser, ArgParseOption("", "seed-half-exact", "Allow errors only in second half of seed.", ArgParseArgument::BOOL)); setDefaultValue(parser, "seed-half-exact", "on"); setAdvanced(parser, "seed-half-exact"); addOption(parser, ArgParseOption("", "seed-gravity", "Seeds closer than this are merged into region (if unset = " "seed-length).", ArgParseArgument::INTEGER)); setDefaultValue(parser, "seed-gravity", "10"); hideOption(parser, "seed-gravity"); // HIDDEN addOption(parser, ArgParseOption("", "seed-min-length", "after postproc shorter seeds are discarded (if unset = seed-length).", ArgParseArgument::INTEGER)); setDefaultValue(parser, "seed-min-length", "10"); hideOption(parser, "seed-min-length"); // HIDDEN addSection(parser, "Miscellaneous Heuristics"); addOption(parser, ArgParseOption("", "pre-scoring", "evaluate score of a region NUM times the size of the seed " "before extension (0 -> no pre-scoring, 1 -> evaluate seed, n-> area " "around seed, as well; default = 1 if no reduction is used).", ArgParseArgument::INTEGER)); setMinValue(parser, "pre-scoring", "1"); setMaxValue(parser, "pre-scoring", "10"); setDefaultValue(parser, "pre-scoring", "2"); setAdvanced(parser, "pre-scoring"); addOption(parser, ArgParseOption("", "pre-scoring-threshold", "minimum average score per position in pre-scoring region.", ArgParseArgument::DOUBLE)); setDefaultValue(parser, "pre-scoring-threshold", "2"); setMinValue(parser, "pre-scoring-threshold", "0"); setMaxValue(parser, "pre-scoring-threshold", "20"); setAdvanced(parser, "pre-scoring-threshold"); addOption(parser, ArgParseOption("", "filter-putative-duplicates", "filter hits that will likely duplicate a match already found.", ArgParseArgument::BOOL)); setDefaultValue(parser, "filter-putative-duplicates", "on"); setAdvanced(parser, "filter-putative-duplicates"); addOption(parser, ArgParseOption("", "filter-putative-abundant", "If the maximum number of matches per query are found already, " "stop searching if the remaining realm looks unfeasible.", ArgParseArgument::BOOL)); setDefaultValue(parser, "filter-putative-abundant", "on"); setAdvanced(parser, "filter-putative-abundant"); addOption(parser, ArgParseOption("", "merge-putative-siblings", "Merge seed from one region, " "stop searching if the remaining realm looks unfeasable.", ArgParseArgument::BOOL)); setDefaultValue(parser, "merge-putative-siblings", "on"); setAdvanced(parser, "merge-putative-siblings"); addSection(parser, "Scoring"); if (options.blastProgram != BlastProgram::BLASTN) { addOption(parser, ArgParseOption("s", "scoring-scheme", "use '45' for Blosum45; '62' for Blosum62 (default); '80' for Blosum80.", ArgParseArgument::INTEGER)); setDefaultValue(parser, "scoring-scheme", "62"); setAdvanced(parser, "scoring-scheme"); } addOption(parser, ArgParseOption("", "score-gap", "Score per gap character.", ArgParseArgument::INTEGER)); if (options.blastProgram == BlastProgram::BLASTN) setDefaultValue(parser, "score-gap", "-2"); else setDefaultValue(parser, "score-gap", "-1"); setMinValue(parser, "score-gap", "-1000"); setMaxValue(parser, "score-gap", "1000"); setAdvanced(parser, "score-gap"); addOption(parser, ArgParseOption("", "score-gap-open", "Additional cost for opening gap.", ArgParseArgument::INTEGER)); if (options.blastProgram == BlastProgram::BLASTN) setDefaultValue(parser, "score-gap-open", "-5"); else setDefaultValue(parser, "score-gap-open", "-11"); setMinValue(parser, "score-gap-open", "-1000"); setMaxValue(parser, "score-gap-open", "1000"); setAdvanced(parser, "score-gap-open"); if (options.blastProgram == BlastProgram::BLASTN) { addOption(parser, ArgParseOption("", "score-match", "Match score [only BLASTN])", ArgParseArgument::INTEGER)); setDefaultValue(parser, "score-match", "2"); setMinValue(parser, "score-match", "-1000"); setMaxValue(parser, "score-match", "1000"); setAdvanced(parser, "score-match"); addOption(parser, ArgParseOption("", "score-mismatch", "Mismatch score [only BLASTN]", ArgParseArgument::INTEGER)); setDefaultValue(parser, "score-mismatch", "-3"); setMinValue(parser, "score-mismatch", "-1000"); setMaxValue(parser, "score-mismatch", "1000"); setAdvanced(parser, "score-mismatch"); } addSection(parser, "Extension"); addOption(parser, ArgParseOption("x", "x-drop", "Stop Banded extension if score x below the maximum seen (-1 means no " "xdrop).", ArgParseArgument::INTEGER)); setDefaultValue(parser, "x-drop", "30"); setMinValue(parser, "x-drop", "-1"); setMaxValue(parser, "x-drop", "1000"); setAdvanced(parser, "x-drop"); addOption(parser, ArgParseOption("b", "band", "Size of the DP-band used in extension (-3 means log2 of query length; " "-2 means sqrt of query length; -1 means full dp; n means band of size " "2n+1)", ArgParseArgument::INTEGER)); setDefaultValue(parser, "band", "-3"); setMinValue(parser, "band", "-3"); setMaxValue(parser, "band", "1000"); setAdvanced(parser, "band"); addOption(parser, ArgParseOption("m", "extension-mode", "Choice of extension algorithms.", ArgParseArgument::STRING)); #ifdef SEQAN_SIMD_ENABLED setValidValues(parser, "extension-mode", "auto xdrop fullSerial fullSIMD"); #else setValidValues(parser, "extension-mode", "auto xdrop fullSerial"); #endif setDefaultValue(parser, "extension-mode", "auto"); setAdvanced(parser, "extension-mode"); addTextSection(parser, "Tuning"); addText(parser, "Tuning the seeding parameters and (de)activating alphabet " "reduction has a strong " "influence on both speed and sensitivity. We recommend the " "following alternative profiles for protein searches:"); addText(parser, "fast (high similarity): --seed-delta-increases-length on"); addText(parser, "sensitive (lower similarity): --seed-offset 3"); addText(parser, "For further information see the wiki: "); // addTextSection(parser, "Speed VS memory requirements"); // addText(parser, "Lambda requires approximately the following amount of RAM:" // " \033[1msize(queryFile) + size(dbIDs) + 2 * size(dbSeqs)\033[0m. " // "If you have more RAM, use double indexing and SA:\n" // "\033[1m-di sa -qi radix\033[0m " // "which will result in an additional speed-up of up to 30% " // "compared to the published version (you need to run the " // "indexer with \033[1m-di sa \033[0m, as well). The amount " // "of RAM required will be: " // "\033[1msize(queryFile) + size(dbIDs) + 7 * size(dbSeqs) + n\033[0m " // "where n grows slowly but linearly with input size. " // "Note that size(dbSeqs) refers to the total " // "sequence length and does not include IDs (so it is less " // "than the size of the file)."); // addText(parser, "To save more RAM, you can define " // "LAMBDA_BITCOPMRESSED_STRINGS while compiling lambda. " // "This will reduce memory usage by about:" // " \033[1m0.3 * ( size(queryFile) + size(dbSeqs) )\033[0m," // " but slow down lambda by about 10%."); // Parse command line. ArgumentParser::ParseResult res = parse(parser, argc, argv); // Only extract options if the program will continue after parseCommandLine() if (res != ArgumentParser::PARSE_OK) return res; // Options shared by lambda and its indexer res = parseCommandLineShared(options, parser); if (res != ArgumentParser::PARSE_OK) return res; std::string buffer; // Extract option values. getOptionValue(options.queryFile, parser, "query"); if (options.blastProgram == BlastProgram::BLASTN) { options.qryOrigAlphabet = AlphabetEnum::DNA5; } else { getOptionValue(buffer, parser, "input-alphabet"); if (buffer == "auto") options.qryOrigAlphabet = AlphabetEnum::DNA4; else if (buffer == "dna5") options.qryOrigAlphabet = AlphabetEnum::DNA5; else if (buffer == "aminoacid") options.qryOrigAlphabet = AlphabetEnum::AMINO_ACID; else throw std::invalid_argument("ERROR: Invalid argument to --input-alphabet\n"); int buf = 0; getOptionValue(buf, parser, "genetic-code"); switch (buf) { case 0: // take code from index case 1: case 2: case 3: case 4: case 5: case 6: case 9: case 10: case 11: case 12: case 13: case 14: case 15: case 16: case 21: case 22: case 23: case 24 : case 25: options.geneticCode = static_cast(buf); break; default: std::cerr << "Invalid genetic code. See trans_table vars at " << "https://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi?mode=c" << std::endl; return ArgumentParser::PARSE_ERROR; } } getOptionValue(options.indexDir, parser, "index"); getOptionValue(options.output, parser, "output"); buffer = options.output; if (endsWith(buffer, ".gz")) buffer.resize(length(buffer) - 3); else if (endsWith(buffer, ".bz2")) buffer.resize(length(buffer) - 4); if (endsWith(buffer, ".sam")) options.outFileFormat = 1; else if (endsWith(buffer, ".bam")) options.outFileFormat = 2; else options.outFileFormat = 0; getOptionValue(options.samWithRefHeader, parser, "sam-with-refheader"); clear(buffer); getOptionValue(buffer, parser, "sam-bam-seq"); if (buffer == "never") options.samBamSeq = 0; else if (buffer == "uniq") options.samBamSeq = 1; else options.samBamSeq = 2; clear(buffer); getOptionValue(buffer, parser, "sam-bam-clip"); options.samBamHardClip = (buffer == "hard"); clear(buffer); getOptionValue(buffer, parser, "output-columns"); if (buffer == "help") { std::cout << "Please specify the columns in this format -oc 'column1 column2', i.e. space-separated and " << "enclosed in single quotes.\nThe specifiers are the same as in NCBI Blast, currently " << "the following are supported:\n"; for (unsigned i = 0; i < length(BlastMatchField<>::implemented); ++i) { if (BlastMatchField<>::implemented[i]) { std::cout << "\t" << BlastMatchField<>::optionLabels[i] << (length(BlastMatchField<>::optionLabels[i]) >= 8 ? "\t" : "\t\t") << BlastMatchField<>::descriptions[i] << "\n"; } } return ArgumentParser::PARSE_HELP; } else { StringSet fields; strSplit(fields, buffer, IsSpace(), false); for (auto str : fields) { bool resolved = false; for (unsigned i = 0; i < length(BlastMatchField<>::optionLabels); ++i) { if (BlastMatchField<>::optionLabels[i] == str) { appendValue(options.columns, static_cast::Enum>(i)); resolved = true; if (static_cast::Enum>(i) == BlastMatchField<>::Enum::S_TAX_IDS) options.hasSTaxIds = true; else if ((static_cast::Enum>(i) == BlastMatchField<>::Enum::LCA_ID) || (static_cast::Enum>(i) == BlastMatchField<>::Enum::LCA_TAX_ID)) options.computeLCA = true; break; } } if (!resolved) { std::cerr << "Unknown column specifier \"" << str << "\". Please see -oc help for valid options.\n"; return ArgumentParser::PARSE_ERROR; } } } clear(buffer); getOptionValue(buffer, parser, "sam-bam-tags"); if (buffer == "help") { std::cout << "Please specify the tags in this format -oc 'tag1 tag2', i.e. space-separated and " << "enclosed in quotes. The order of tags is not preserved.\nThe following specifiers are " << "supported:\n"; for (auto const & c : SamBamExtraTags<>::keyDescPairs) std::cout << "\t" << std::get<0>(c) << "\t" << std::get<1>(c) << "\n"; return ArgumentParser::PARSE_HELP; } else { StringSet fields; strSplit(fields, buffer, IsSpace(), false); for (auto str : fields) { bool resolved = false; for (unsigned i = 0; i < length(SamBamExtraTags<>::keyDescPairs); ++i) { if (std::get<0>(SamBamExtraTags<>::keyDescPairs[i]) == str) { options.samBamTags[i] = true; resolved = true; break; } } if (!resolved) { std::cerr << "Unknown column specifier \"" << str << "\". Please see \"--sam-bam-tags help\" for valid options.\n"; return ArgumentParser::PARSE_ERROR; } } } if (options.samBamTags[SamBamExtraTags<>::S_TAX_IDS]) options.hasSTaxIds = true; if (options.samBamTags[SamBamExtraTags<>::LCA_ID] || options.samBamTags[SamBamExtraTags<>::LCA_TAX_ID]) options.computeLCA = true; // lca computation requires tax ids if (options.computeLCA) options.hasSTaxIds = true; getOptionValue(options.versionInformationToOutputFile, parser, "version-to-outputfile"); getOptionValue(options.adaptiveSeeding, parser, "adaptive-seeding"); clear(buffer); getOptionValue(options.seedLength, parser, "seed-length"); getOptionValue(options.seedOffset, parser, "seed-offset"); if (isSet(parser, "seed-gravity")) getOptionValue(options.seedGravity, parser, "seed-gravity"); else options.seedGravity = options.seedLength; if (isSet(parser, "seed-min-length")) getOptionValue(options.minSeedLength, parser, "seed-min-length"); else options.minSeedLength = options.seedLength; getOptionValue(options.maxSeedDist, parser, "seed-delta"); if (options.maxSeedDist == 0) { // the whole seed is exact, so it is also half-exact :) options.seedHalfExact = true; if (options.dbIndexType == DbIndexType::BI_FM_INDEX) { std::cerr << "WARNING: Exact seeeding doesn't benefit from bi-fm-index, so regular index is used.\n"; options.dbIndexType = DbIndexType::FM_INDEX; } } getOptionValue(options.seedDeltaIncreasesLength, parser, "seed-delta-increases-length"); getOptionValue(options.maxEValue, parser, "e-value"); getOptionValue(options.minBitScore, parser, "bit-score"); getOptionValue(options.idCutOff, parser, "percent-identity"); getOptionValue(options.xDropOff, parser, "x-drop"); getOptionValue(options.band, parser, "band"); #ifdef LAMBDA_LEGACY_PATHS getOptionValue(buffer, parser, "query-index-type"); options.doubleIndexing = (buffer == "radix"); if (options.doubleIndexing) { if (isSet(parser, "query-partitions")) getOptionValue(options.queryPart, parser, "query-partitions"); else options.queryPart = options.threads; if ((options.queryPart % options.threads) != 0) std::cout << "-qp not a multiple of -t; expect suboptimal performance.\n"; } else { options.queryPart = 1; } #endif if (options.blastProgram == BlastProgram::BLASTN) { options.scoringMethod = 0; getOptionValue(options.misMatch, parser, "score-mismatch"); getOptionValue(options.match, parser, "score-match"); } else { getOptionValue(options.scoringMethod, parser, "scoring-scheme"); switch (options.scoringMethod) { case 45: case 62: case 80: break; default: std::cerr << "Unsupported Scoring Scheme selected.\n"; return ArgumentParser::PARSE_ERROR; } } getOptionValue(options.gapExtend, parser, "score-gap"); getOptionValue(options.gapOpen, parser, "score-gap-open"); getOptionValue(options.filterPutativeDuplicates, parser, "filter-putative-duplicates"); getOptionValue(options.filterPutativeAbundant, parser, "filter-putative-abundant"); getOptionValue(options.mergePutativeSiblings, parser, "merge-putative-siblings"); getOptionValue(options.seedHalfExact, parser, "seed-half-exact"); if (options.dbIndexType == DbIndexType::BI_FM_INDEX) { if (options.seedHalfExact) std::cerr << "WARNING: seedHalfExact is already implied by bidirectional indexes.\n"; else options.seedHalfExact = true; } // TODO always prescore 1 getOptionValue(options.preScoring, parser, "pre-scoring"); if ((!isSet(parser, "pre-scoring")) && (options.reducedAlphabet == options.transAlphabet)) options.preScoring = 1; getOptionValue(options.preScoringThresh, parser, "pre-scoring-threshold"); // if (options.preScoring == 0) // options.preScoringThresh = 4; int numbuf; getOptionValue(numbuf, parser, "num-matches"); options.maxMatches = static_cast(numbuf); getOptionValue(buffer, parser, "extension-mode"); if (buffer == "fullSIMD") { options.extensionMode = LambdaOptions::ExtensionMode::FULL_SIMD; options.filterPutativeAbundant = false; options.filterPutativeDuplicates = false; options.mergePutativeSiblings = false; options.xDropOff = -1; } else if (buffer == "fullSerial") { options.extensionMode = LambdaOptions::ExtensionMode::FULL_SERIAL; options.filterPutativeAbundant = false; options.filterPutativeDuplicates = false; options.mergePutativeSiblings = false; options.xDropOff = -1; } else if (buffer == "xdrop") { options.extensionMode = LambdaOptions::ExtensionMode::XDROP; } else { options.extensionMode = LambdaOptions::ExtensionMode::AUTO; } return ArgumentParser::PARSE_OK; } // -------------------------------------------------------------------------- // Function printOptions() // -------------------------------------------------------------------------- template inline void printOptions(LambdaOptions const & options) { using TGH = typename TLH::TGlobalHolder; std::string bandStr; switch(options.band) { case -3: bandStr = "2 * log(queryLength) + 1"; break; case -2: bandStr = "2 * sqrt(queryLength) + 1"; break; case -1: bandStr = "no band"; break; default: bandStr = std::to_string(2 * options.band + 1); break; } std::cout << "OPTIONS\n" << " INPUT\n" << " query file: " << options.queryFile << "\n" << " index directory: " << options.indexDir << "\n" << " db index type: " << _indexEnumToName(options.dbIndexType) << "\n" << " OUTPUT (file)\n" << " output file: " << options.output << "\n" << " maximum e-value: " << options.maxEValue << "\n" << " minimum bit-score: " << options.minBitScore << "\n" << " minimum % identity: " << options.idCutOff << "\n" << " max #matches per query: " << options.maxMatches << "\n" << " include subj names in sam:" << options.samWithRefHeader << "\n" << " include seq in sam/bam: " << options.samBamSeq << "\n" << " with subject tax ids: " << options.hasSTaxIds << '\n' << " compute LCA: " << options.computeLCA << '\n' << " OUTPUT (stdout)\n" << " stdout is terminal: " << options.isTerm << "\n" << " terminal width: " << options.terminalCols << "\n" << " verbosity: " << options.verbosity << "\n" << " GENERAL\n" << " double indexing: " << options.doubleIndexing << "\n" << " threads: " << uint(options.threads) << "\n" << " query partitions: " << (options.doubleIndexing ? std::to_string(options.queryPart) : std::string("n/a")) << "\n" << " TRANSLATION AND ALPHABETS\n" << " genetic code: " << ((TGH::blastProgram != BlastProgram::BLASTN) && (TGH::blastProgram != BlastProgram::BLASTP) ? std::to_string(options.geneticCode) : std::string("n/a")) << "\n" << " blast mode: " << _programTagToString(TGH::blastProgram) << "\n" << " original alphabet (query):" << _alphTypeToName(OrigQryAlph()) << "\n" << " original alphabet (subj): " << _alphTypeToName(OrigSubjAlph()) << "\n" << " translated alphabet: " << _alphTypeToName(TransAlph()) << "\n" << " reduced alphabet: " << _alphTypeToName(typename TGH::TRedAlph()) << "\n" << " SEEDING\n" << " seed length: " << uint(options.seedLength) << "\n" << " seed offset: " << uint(options.seedOffset) << "\n" << " seed delta: " << uint(options.maxSeedDist) << "\n" << " seeds ungapped: " << uint(options.hammingOnly) << "\n" << " seed gravity: " << uint(options.seedGravity) << "\n" << " min seed length: " << uint(options.minSeedLength) << "\n" << " seed delta length inc.: " << (options.seedDeltaIncreasesLength ? std::string("on") : std::string("off")) << "\n" << " MISCELLANEOUS HEURISTICS\n" << " pre-scoring: " << (options.preScoring ? std::string("on") : std::string("off")) << "\n" << " pre-scoring-region: " << (options.preScoring ? std::to_string( options.preScoring * options.seedLength) : std::string("n/a")) << "\n" << " pre-scoring-threshold: " << (options.preScoring ? std::to_string( options.preScoringThresh) : std::string("n/a")) << "\n" << " putative-abundancy: " << (options.filterPutativeAbundant ? std::string("on") : std::string("off")) << "\n" << " putative-duplicates: " << (options.filterPutativeDuplicates ? std::string("on") : std::string("off")) << "\n" << " seed half exact: " << (options.seedHalfExact ? std::string("on") : std::string("off")) << "\n" << " SCORING\n" << " scoring scheme: " << options.scoringMethod << "\n" << " score-match: " << (options.scoringMethod ? std::string("n/a") : std::to_string(options.match)) << "\n" << " score-mismatch: " << (options.scoringMethod ? std::string("n/a") : std::to_string(options.misMatch)) << "\n" << " score-gap: " << options.gapExtend << "\n" << " score-gap-open: " << options.gapOpen << "\n" << " EXTENSION\n"; switch (options.extensionMode) { case LambdaOptions::ExtensionMode::AUTO: std::cout << " extensionMode: auto (depends on query length)\n" << " x-drop: " << options.xDropOff << "\n" << " band: " << bandStr << "\n" << " [depending on the automatically chosen mode x-drop or band might get disabled.\n"; break; case LambdaOptions::ExtensionMode::XDROP: std::cout << " extensionMode: individual\n" << " x-drop: " << options.xDropOff << "\n" << " band: " << bandStr << "\n"; break; case LambdaOptions::ExtensionMode::FULL_SERIAL: std::cout << " extensionMode: batch, but serialized\n" << " x-drop: not used\n" << " band: " << bandStr << "\n"; break; case LambdaOptions::ExtensionMode::FULL_SIMD: std::cout << " extensionMode: batch with SIMD\n" << " x-drop: not used\n" << " band: not used\n"; break; } std::cout << " BUILD OPTIONS:\n" << " cmake_build_type: " << std::string(CMAKE_BUILD_TYPE) << "\n" << " fastbuild: " #if defined(FASTBUILD) << "on\n" #else << "off\n" #endif << " native_build: " #if defined(LAMBDA_NATIVE_BUILD) << "on\n" #else << "off\n" #endif << " static_build: " #if defined(LAMBDA_STATIC_BUILD) << "on\n" #else << "off\n" #endif << " mmapped_db: " #if defined(LAMBDA_MMAPPED_DB) << "on\n" #else << "off\n" #endif << " lingaps_opt: " #if defined(LAMBDA_LINGAPS_OPT) << "on\n" #else << "off\n" #endif << " seqan_simd: " #if defined(SEQAN_SIMD_ENABLED) && defined(__AVX2__) << "avx2\n" #elif defined(SEQAN_SIMD_ENABLED) && defined(__SSE4_2__) << "sse4\n" #else << "off\n" #endif << "\n"; } #endif // header guard lambda-lambda-v2.0.1/src/search_output.hpp000066400000000000000000000676671445553061700205630ustar00rootroot00000000000000// ========================================================================== // lambda // ========================================================================== // Copyright (c) 2013-2019, Hannes Hauswedell

// Copyright (c) 2016-2019, Knut Reinert and Freie Universität Berlin // All rights reserved. // // This file is part of Lambda. // // Lambda is Free Software: you can redistribute it and/or modify it // under the terms found in the LICENSE[.md|.rst] file distributed // together with this file. // // Lambda is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // ========================================================================== // output.hpp: contains routines for file-writing // ========================================================================== #ifndef LAMBDA_SEARCH_OUTPUT_H_ #define LAMBDA_SEARCH_OUTPUT_H_ #include #include using namespace seqan; template struct SamBamExtraTags { enum Enum { // Q_START, // S_START, BIT_SCORE, Q_AA_CIGAR, EDIT_DISTANCE, MATCH_COUNT, SCORE, E_VALUE, P_IDENT, P_POS, Q_FRAME, Q_AA_SEQ, S_FRAME, S_TAX_IDS, LCA_ID, LCA_TAX_ID }; static constexpr const std::array, 14> keyDescPairs { { // { "ZS", "query start (in DNA if original was DNA)" }, // Q_START, // { "YS", "subject start (in DNA if original was DNA)" }, // S_START, { "AS", "bit score" }, // BIT_SCORE, { "OC", "query protein cigar (* for BLASTN)"}, // Q_AA_CIGAR, { "NM", "edit distance (in protein space unless BLASTN)"}, // EDIT_DISTANCE { "IH", "number of matches this query has"}, // MATCH_COUNT { "ar", "raw score" }, // SCORE, { "ae", "expect value" }, // E_VALUE, { "ai", "% identity (in protein space unless BLASTN) " }, // P_IDENT, { "ap", "% positive (in protein space unless BLASTN)"}, // P_POS, { "qf", "query frame" }, // Q_FRAME, { "qs", "query protein sequence (* for BLASTN)"}, // Q_AA_SEQ, { "sf", "subject frame" }, // S_FRAME, { "st", "subject taxonomy IDs (* if n/a)" }, // S_TAX_IDS, { "ls", "lowest common ancestor scientific name" }, // LCA_ID, { "lt", "lowest common ancestor taxonomy ID" }, // LCA_TAX_ID, } }; }; template constexpr const std::array, 14> SamBamExtraTags::keyDescPairs; // ---------------------------------------------------------------------------- // Function _untranslatedClipPositions() // ---------------------------------------------------------------------------- // similar to _untranslatePositions() from the blast module template inline void _untranslateSequence(TSequence1 & target, TSequence2 const & source, TNum const qStart, TNum const qEnd, int const qFrameShift) { if (qFrameShift >= 0) { target = infix(source, 3 * qStart + std::abs(qFrameShift) - 1, 3 * qEnd + std::abs(qFrameShift) - 1); } else { target = infix(source, length(source) - (3 * qEnd + std::abs(qFrameShift) - 1), length(source) - (3 * qStart + std::abs(qFrameShift) - 1)); reverseComplement(target); } } // ---------------------------------------------------------------------------- // Function blastMatchToCigar() convert seqan align to cigar // ---------------------------------------------------------------------------- template inline void blastMatchOneCigar(TCigar & cigar, TBlastMatch const & m, TBlastRecord const & r, TLocalHolder const & lH) { using TCElem = typename Value::Type; using TGlobalHolder = typename TLocalHolder::TGlobalHolder; SEQAN_ASSERT_EQ(length(m.alignRow0), length(m.alignRow1)); // translate positions into dna space unsigned const transFac = qIsTranslated(TGlobalHolder::blastProgram) ? 3 : 1; // clips resulting from translation / frameshift are always hard clips unsigned const leftFrameClip = std::abs(m.qFrameShift) - 1; unsigned const rightFrameClip = qIsTranslated(TGlobalHolder::blastProgram) ? (r.qLength - leftFrameClip) % 3 : 0; // regular clipping from local alignment (regions outside match) can be hard or soft unsigned const leftClip = m.qStart * transFac; unsigned const rightClip = (length(source(m.alignRow0)) - m.qEnd) * transFac; if (lH.options.samBamHardClip) { if (leftFrameClip + leftClip > 0) appendValue(cigar, TCElem('H', leftFrameClip + leftClip)); } else { if (leftFrameClip > 0) appendValue(cigar, TCElem('H', leftFrameClip)); if (leftClip > 0) appendValue(cigar, TCElem('S', leftClip)); } for (unsigned i = 0, count = 0; i < length(m.alignRow0); /* incremented below */) { // deletion in query count = 0; while (isGap(m.alignRow0, i) && (i < length(m.alignRow0))) { ++count; ++i; } if (count > 0) appendValue(cigar, TCElem('D', count * transFac)); // insertion in query count = 0; while (isGap(m.alignRow1, i) && (i < length(m.alignRow0))) { ++count; ++i; } if (count > 0) appendValue(cigar, TCElem('I', count * transFac)); // match or mismatch count = 0; while ((!isGap(m.alignRow0, i)) && (!isGap(m.alignRow1, i)) && (i < length(m.alignRow0))) { ++count; ++i; } if (count > 0) appendValue(cigar, TCElem('M', count * transFac)); } if (lH.options.samBamHardClip) { if (rightFrameClip + rightClip > 0) appendValue(cigar, TCElem('H', rightFrameClip + rightClip)); } else { if (rightClip > 0) appendValue(cigar, TCElem('S', rightClip)); if (rightFrameClip > 0) appendValue(cigar, TCElem('H', rightFrameClip)); } if (m.qFrameShift < 0) reverse(cigar); } // translation happened and we want both cigars template inline void blastMatchTwoCigar(TCigar & dnaCigar, TCigar & protCigar, TBlastMatch const & m, TBlastRecord const & r, TLocalHolder const & lH) { using TCElem = typename Value::Type; SEQAN_ASSERT_EQ(length(m.alignRow0), length(m.alignRow1)); // clips resulting from translation / frameshift are always hard clips unsigned const leftFrameClip = std::abs(m.qFrameShift) - 1; // in dna space unsigned const rightFrameClip = (r.qLength - leftFrameClip) % 3; // in dna space // regular clipping from local alignment (regions outside match) can be hard or soft unsigned const leftClip = m.qStart; // in protein space unsigned const rightClip = length(source(m.alignRow0)) - m.qEnd; // in protein space if (lH.options.samBamHardClip) { if (leftFrameClip + leftClip > 0) appendValue(dnaCigar, TCElem('H', leftFrameClip + 3 * leftClip)); if (leftClip > 0) appendValue(protCigar, TCElem('H', leftClip)); } else { if (leftFrameClip > 0) appendValue(dnaCigar, TCElem('H', leftFrameClip)); if (leftClip > 0) { appendValue(dnaCigar, TCElem('S', 3 * leftClip)); appendValue(protCigar, TCElem('S', leftClip)); } } for (unsigned i = 0, count = 0; i < length(m.alignRow0); /* incremented below */) { // deletion in query count = 0; while (isGap(m.alignRow0, i) && (i < length(m.alignRow0))) { ++count; ++i; } if (count > 0) { appendValue(dnaCigar, TCElem('D', count * 3)); appendValue(protCigar, TCElem('D', count)); } // insertion in query count = 0; while (isGap(m.alignRow1, i) && (i < length(m.alignRow0))) { ++count; ++i; } if (count > 0) { appendValue(dnaCigar, TCElem('I', count * 3)); appendValue(protCigar, TCElem('I', count)); } // match or mismatch count = 0; while ((!isGap(m.alignRow0, i)) && (!isGap(m.alignRow1, i)) && (i < length(m.alignRow0))) { ++count; ++i; } if (count > 0) { appendValue(dnaCigar, TCElem('M', count * 3)); appendValue(protCigar, TCElem('M', count)); } } if (lH.options.samBamHardClip) { if (rightFrameClip + rightClip > 0) appendValue(dnaCigar, TCElem('H', rightFrameClip + 3 * rightClip)); if (rightClip > 0) appendValue(protCigar, TCElem('H', rightClip)); } else { if (rightClip > 0) { appendValue(dnaCigar, TCElem('S', 3 * rightClip)); appendValue(protCigar, TCElem('S', rightClip)); } if (rightFrameClip > 0) appendValue(dnaCigar, TCElem('H', rightFrameClip)); } if (m.qFrameShift < 0) reverse(dnaCigar); // protCigar never reversed } // ---------------------------------------------------------------------------- // Function myWriteHeader() // ---------------------------------------------------------------------------- template inline void myWriteHeader(TGH & globalHolder, TLambdaOptions const & options) { if (options.outFileFormat == 0) // BLAST { open(globalHolder.outfile, toCString(options.output)); context(globalHolder.outfile).fields = options.columns; auto & versionString = context(globalHolder.outfile).versionString; clear(versionString); append(versionString, _programTagToString(TGH::blastProgram)); append(versionString, " 2.2.26+ [created by LAMBDA"); if (options.versionInformationToOutputFile) { append(versionString, "-"); append(versionString, SEQAN_APP_VERSION); } append(versionString, ", see http://seqan.de/lambda and please cite correctly in your academic work]"); writeHeader(globalHolder.outfile); } else // SAM or BAM { open(globalHolder.outfileBam, toCString(options.output)); auto & context = seqan::context(globalHolder.outfileBam); auto & subjSeqLengths = contigLengths(context); auto & subjIds = contigNames(context); // set sequence lengths if (sIsTranslated(TGH::blastProgram)) { //TODO can we get around a copy? subjSeqLengths = prefix(globalHolder.untransSubjSeqLengths, length(globalHolder.untransSubjSeqLengths) - 1); } else { // compute lengths ultra-fast resize(subjSeqLengths, length(globalHolder.subjSeqs)); #ifdef __clang__ SEQAN_OMP_PRAGMA(parallel for) #else SEQAN_OMP_PRAGMA(parallel for simd) #endif for (unsigned i = 0; i < length(subjSeqLengths); ++i) subjSeqLengths[i] = globalHolder.subjSeqs.limits[i+1] - globalHolder.subjSeqs.limits[i]; } // set namestore resize(subjIds, length(globalHolder.subjIds)); SEQAN_OMP_PRAGMA(parallel for) for (unsigned i = 0; i < length(globalHolder.subjIds); ++i) subjIds[i] = prefix(globalHolder.subjIds[i], std::find(begin(globalHolder.subjIds[i], Standard()), end(globalHolder.subjIds[i], Standard()), ' ') - begin(globalHolder.subjIds[i], Standard())); typedef BamHeaderRecord::TTag TTag; // CREATE HEADER BamHeader header; // Fill first header line. BamHeaderRecord firstRecord; firstRecord.type = BAM_HEADER_FIRST; appendValue(firstRecord.tags, TTag("VN", "1.4")); // appendValue(firstRecord.tags, TTag("SO", "unsorted")); appendValue(firstRecord.tags, TTag("GO", "query")); appendValue(header, firstRecord); // Fill program header line. if (options.versionInformationToOutputFile) { BamHeaderRecord pgRecord; pgRecord.type = BAM_HEADER_PROGRAM; appendValue(pgRecord.tags, TTag("ID", "lambda")); appendValue(pgRecord.tags, TTag("PN", "lambda")); appendValue(pgRecord.tags, TTag("VN", SEQAN_APP_VERSION)); appendValue(pgRecord.tags, TTag("CL", options.commandLine)); appendValue(header, pgRecord); } // Fill homepage header line. BamHeaderRecord hpRecord0; hpRecord0.type = BAM_HEADER_COMMENT; appendValue(hpRecord0.tags, TTag("CO", "Lambda is a high performance BLAST compatible local aligner, " "please see http://seqan.de/lambda for more information.")); appendValue(header, hpRecord0); BamHeaderRecord hpRecord1; hpRecord1.type = BAM_HEADER_COMMENT; appendValue(hpRecord1.tags, TTag("CO", "SAM/BAM dialect documentation is available here: " "https://github.com/seqan/lambda/wiki/Output-Formats")); appendValue(header, hpRecord1); BamHeaderRecord hpRecord2; hpRecord2.type = BAM_HEADER_COMMENT; appendValue(hpRecord2.tags, TTag("CO", "If you use any results found by Lambda, please cite " "Hauswedell et al. (2014) doi: 10.1093/bioinformatics/btu439")); appendValue(header, hpRecord2); // Fill extra tags header line. BamHeaderRecord tagRecord; tagRecord.type = BAM_HEADER_COMMENT; std::string columnHeaders = "Optional tags as follow"; for (unsigned i = 0; i < length(SamBamExtraTags<>::keyDescPairs); ++i) { if (options.samBamTags[i]) { columnHeaders += '\t'; columnHeaders += std::get<0>(SamBamExtraTags<>::keyDescPairs[i]); columnHeaders += ':'; columnHeaders += std::get<1>(SamBamExtraTags<>::keyDescPairs[i]); } } appendValue(tagRecord.tags, TTag("CO", columnHeaders)); appendValue(header, tagRecord); // sam and we don't want the headers if (!options.samWithRefHeader && (options.outFileFormat == 1)) { // we only write the header records that we actually created ourselves for (unsigned i = 0; i < length(header); ++i) write(globalHolder.outfileBam.iter, header[i], seqan::context(globalHolder.outfileBam), Sam()); } else { // ref header records are automatically added with default writeHeader() writeHeader(globalHolder.outfileBam, header); } } } // ---------------------------------------------------------------------------- // Function myWriteRecord() // ---------------------------------------------------------------------------- template inline void myWriteRecord(TLH & lH, TRecord const & record) { using TGH = typename TLH::TGlobalHolder; if (lH.options.outFileFormat == 0) // BLAST { SEQAN_OMP_PRAGMA(critical(filewrite)) { writeRecord(lH.gH.outfile, record); } } else // SAM or BAM { // convert multi-match blast-record to multiple SAM/BAM-Records std::vector bamRecords; bamRecords.resize(record.matches.size()); String> protCigar; std::string protCigarString = "*"; auto mIt = begin(record.matches, Standard()); for (auto & bamR : bamRecords) { // untranslate for sIsTranslated if (sIsTranslated(TGH::blastProgram)) { bamR.beginPos = mIt->sStart * 3 + std::abs(mIt->sFrameShift) - 1; if (mIt->sFrameShift < 0) bamR.beginPos = record.qLength - bamR.beginPos; } else { bamR.beginPos = mIt->sStart; } bamR.flag = BAM_FLAG_SECONDARY; // all are secondary for now if (mIt->qFrameShift < 0) bamR.flag |= BAM_FLAG_RC; // truncated query name bamR.qName = prefix(record.qId, std::find(begin(record.qId, Standard()), end(record.qId, Standard()), ' ') - begin(record.qId, Standard())); // reference ID bamR.rID = mIt->_n_sId; // compute cigar if (lH.options.samBamTags[SamBamExtraTags<>::Q_AA_CIGAR]) // amino acid cigar, too? { clear(protCigar); // native protein if ((TGH::blastProgram == BlastProgram::BLASTP) || (TGH::blastProgram == BlastProgram::TBLASTN)) blastMatchOneCigar(protCigar, *mIt, record, lH); else if (qIsTranslated(TGH::blastProgram)) // translated blastMatchTwoCigar(bamR.cigar, protCigar, *mIt, record, lH); else // BLASTN can't have protein sequence blastMatchOneCigar(bamR.cigar, *mIt, record, lH); } else { if ((TGH::blastProgram != BlastProgram::BLASTP) && (TGH::blastProgram != BlastProgram::TBLASTN)) blastMatchOneCigar(bamR.cigar, *mIt, record, lH); } // we want to include the seq bool writeSeq = false; if (lH.options.samBamSeq > 1) { writeSeq = true; } else if (lH.options.samBamSeq == 1) // only uniq sequences { if (mIt == begin(record.matches, Standard())) { writeSeq = true; } else { decltype(mIt) mPrevIt = mIt - 1; writeSeq = ((mIt->qFrameShift != mPrevIt->qFrameShift) || (beginPosition(mIt->alignRow0) != beginPosition(mPrevIt->alignRow0)) || (endPosition(mIt->alignRow0) != endPosition(mPrevIt->alignRow0))); } } if (TGH::blastProgram == BlastProgram::BLASTN) { if (lH.options.samBamHardClip) { if (writeSeq) bamR.seq = infix(source(mIt->alignRow0), beginPosition(mIt->alignRow0), endPosition(mIt->alignRow0)); } else { if (writeSeq) bamR.seq = source(mIt->alignRow0); } } else if (qIsTranslated(TGH::blastProgram)) { if (lH.options.samBamHardClip) { if (writeSeq) _untranslateSequence(bamR.seq, lH.gH.untranslatedQrySeqs[mIt->_n_qId], mIt->qStart, mIt->qEnd, mIt->qFrameShift); } else { if (writeSeq) _untranslateSequence(bamR.seq, lH.gH.untranslatedQrySeqs[mIt->_n_qId], decltype(length(source(mIt->alignRow0)))(0u), length(source(mIt->alignRow0)), mIt->qFrameShift); } } // else original query is protein and cannot be printed // custom tags //TODO untranslate? // if (lH.options.samBamTags[SamBamExtraTags<>::Q_START]) // appendTagValue(bamR.tags, // std::get<0>(SamBamExtraTags<>::keyDescPairs[SamBamExtraTags<>::Q_START]), // uint32_t(mIt->qStart), 'I'); // case S_START: if (lH.options.samBamTags[SamBamExtraTags<>::E_VALUE]) appendTagValue(bamR.tags, std::get<0>(SamBamExtraTags<>::keyDescPairs[SamBamExtraTags<>::E_VALUE]), float(mIt->eValue), 'f'); if (lH.options.samBamTags[SamBamExtraTags<>::BIT_SCORE]) appendTagValue(bamR.tags, std::get<0>(SamBamExtraTags<>::keyDescPairs[SamBamExtraTags<>::BIT_SCORE]), uint16_t(mIt->bitScore), 'S'); if (lH.options.samBamTags[SamBamExtraTags<>::SCORE]) appendTagValue(bamR.tags, std::get<0>(SamBamExtraTags<>::keyDescPairs[SamBamExtraTags<>::SCORE]), uint8_t(mIt->alignStats.alignmentScore), 'C'); if (lH.options.samBamTags[SamBamExtraTags<>::P_IDENT]) appendTagValue(bamR.tags, std::get<0>(SamBamExtraTags<>::keyDescPairs[SamBamExtraTags<>::P_IDENT]), uint8_t(mIt->alignStats.alignmentIdentity), 'C'); if (lH.options.samBamTags[SamBamExtraTags<>::P_POS]) appendTagValue(bamR.tags, std::get<0>(SamBamExtraTags<>::keyDescPairs[SamBamExtraTags<>::P_POS]), uint16_t(mIt->alignStats.alignmentSimilarity), 'S'); if (lH.options.samBamTags[SamBamExtraTags<>::Q_FRAME]) appendTagValue(bamR.tags, std::get<0>(SamBamExtraTags<>::keyDescPairs[SamBamExtraTags<>::Q_FRAME]), int8_t(mIt->qFrameShift), 'c'); if (lH.options.samBamTags[SamBamExtraTags<>::S_FRAME]) appendTagValue(bamR.tags, std::get<0>(SamBamExtraTags<>::keyDescPairs[SamBamExtraTags<>::S_FRAME]), int8_t(mIt->sFrameShift), 'c'); if (lH.options.samBamTags[SamBamExtraTags<>::S_TAX_IDS]) { //TODO append integer array, instead of transforming to string CharString buf; auto it = begin(buf); if (length(mIt->sTaxIds) == 0) { buf = "*"; } else { appendNumber(it, mIt->sTaxIds[0]); for (unsigned i = 1; i < length(mIt->sTaxIds); ++i) { write(it, ";"); appendNumber(it, mIt->sTaxIds[i]); } } appendTagValue(bamR.tags, std::get<0>(SamBamExtraTags<>::keyDescPairs[SamBamExtraTags<>::S_TAX_IDS]), buf, 'Z'); } if (lH.options.samBamTags[SamBamExtraTags<>::LCA_ID]) appendTagValue(bamR.tags, std::get<0>(SamBamExtraTags<>::keyDescPairs[SamBamExtraTags<>::LCA_ID]), record.lcaId, 'Z'); if (lH.options.samBamTags[SamBamExtraTags<>::LCA_TAX_ID]) appendTagValue(bamR.tags, std::get<0>(SamBamExtraTags<>::keyDescPairs[SamBamExtraTags<>::LCA_TAX_ID]), uint32_t(record.lcaTaxId), 'I'); if (lH.options.samBamTags[SamBamExtraTags<>::Q_AA_SEQ]) { if ((TGH::blastProgram == BlastProgram::BLASTN) || (!writeSeq)) appendTagValue(bamR.tags, std::get<0>(SamBamExtraTags<>::keyDescPairs[SamBamExtraTags<>::Q_AA_SEQ]), "*", 'Z'); else if (lH.options.samBamHardClip) appendTagValue(bamR.tags, std::get<0>(SamBamExtraTags<>::keyDescPairs[SamBamExtraTags<>::Q_AA_SEQ]), infix(source(mIt->alignRow0), beginPosition(mIt->alignRow0), endPosition(mIt->alignRow0)), 'Z'); else // full prot sequence appendTagValue(bamR.tags, std::get<0>(SamBamExtraTags<>::keyDescPairs[SamBamExtraTags<>::Q_AA_SEQ]), source(mIt->alignRow0), 'Z'); } if (lH.options.samBamTags[SamBamExtraTags<>::Q_AA_CIGAR]) { if (empty(protCigar)) { protCigarString = "*"; } else { clear(protCigarString); for (unsigned i = 0; i < length(protCigar); ++i) { appendNumber(protCigarString, protCigar[i].count); appendValue(protCigarString, protCigar[i].operation); } } appendTagValue(bamR.tags, std::get<0>(SamBamExtraTags<>::keyDescPairs[SamBamExtraTags<>::Q_AA_CIGAR]), protCigarString, 'Z'); } if (lH.options.samBamTags[SamBamExtraTags<>::EDIT_DISTANCE]) appendTagValue(bamR.tags, std::get<0>(SamBamExtraTags<>::keyDescPairs[SamBamExtraTags<>::EDIT_DISTANCE]), uint32_t(mIt->alignStats.alignmentLength - mIt->alignStats.numMatches), 'I'); if (lH.options.samBamTags[SamBamExtraTags<>::MATCH_COUNT]) appendTagValue(bamR.tags, std::get<0>(SamBamExtraTags<>::keyDescPairs[SamBamExtraTags<>::MATCH_COUNT]), uint32_t(length(record.matches)), 'I'); // goto next match ++mIt; } bamRecords.front().flag -= BAM_FLAG_SECONDARY; // remove BAM_FLAG_SECONDARY for first SEQAN_OMP_PRAGMA(critical(filewrite)) { for (auto & r : bamRecords) writeRecord(lH.gH.outfileBam, r); } } } // ---------------------------------------------------------------------------- // Function myWriteFooter() // ---------------------------------------------------------------------------- template inline void myWriteFooter(TGH & globalHolder, TLambdaOptions const & options) { if (options.outFileFormat == 0) // BLAST { writeFooter(globalHolder.outfile); } } #endif // LAMBDA_SEARCH_OUTPUT_H_ lambda-lambda-v2.0.1/src/shared_definitions.hpp000066400000000000000000000134661445553061700215230ustar00rootroot00000000000000// ========================================================================== // lambda // ========================================================================== // Copyright (c) 2013-2019, Hannes Hauswedell

// Copyright (c) 2016-2019, Knut Reinert and Freie Universität Berlin // All rights reserved. // // This file is part of Lambda. // // Lambda is Free Software: you can redistribute it and/or modify it // under the terms found in the LICENSE[.md|.rst] file distributed // together with this file. // // Lambda is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // ========================================================================== // options.h: contains the options and argument parser // ========================================================================== #ifndef SEQAN_SHARED_DEFINITIONS_H_ #define SEQAN_SHARED_DEFINITIONS_H_ #include #include using namespace seqan; // ========================================================================== // Metafunctions // ========================================================================== // SIZE TYPES // Expected Number of Sequences template using SizeTypeNum_ = uint32_t; // Expected Lengths of Sequences template struct SizeTypePosMeta_ { #ifdef LAMBDA_LONG_PROTEIN_SUBJ_SEQS using Type = uint32_t; #else using Type = uint16_t; #endif }; template <> struct SizeTypePosMeta_ { // DNA sequences are expected to be longer using Type = uint32_t; }; template using SizeTypePos_ = typename SizeTypePosMeta_::Type; // suffix array overloads namespace seqan { template struct SAValue, TSpec2>, TSpec3> > { typedef Pair, SizeTypePos_, Pack> Type; }; template struct SAValue, TFunctor>, TSpec3> > { typedef Pair, SizeTypePos_, Pack> Type; }; template struct SAValue, TFunctor>, TFunctor2>, TSpec3> > { typedef Pair, SizeTypePos_, Pack> Type; }; template struct SAValue, TSpec3> > { typedef Pair, SizeTypePos_, Pack> Type; }; template struct DefaultIndexStringSpec> { #if !defined(LAMBDA_INDEXER) && defined(LAMBDA_MMAPPED_DB) using Type = MMap<>; #else using Type = Alloc<>; #endif }; // our custom Bam Overload template struct FormattedFileContext, TStorageSpec> { typedef typename DefaultIndexStringSpec>::Type TStringSpec; // see above typedef StringSet, InfixSegment> > TNameStore; typedef NameStoreCache TNameStoreCache; typedef BamIOContext Type; }; } // Index Specs struct LambdaFMIndexConfig { using LengthSum = size_t; #if !defined(LAMBDA_INDEXER) && defined(LAMBDA_MMAPPED_DB) using TAlloc = MMap<>; #else using TAlloc = Alloc<>; #endif using Bwt = Levels >; using Sentinels = Levels >; static const unsigned SAMPLING = 10; }; struct LambdaFMIndexConfigInBi : LambdaFMIndexConfig { using Bwt = Levels >; }; template using TFMIndex = FMIndex; template using TFMIndexInBi = FMIndex; // lazy... template using TCDStringSet = StringSet > >; template using OrigQryAlph = typename std::conditional< (p == BlastProgram::BLASTN) || (p == BlastProgram::BLASTX) || (p == BlastProgram::TBLASTX), Dna5, AminoAcid>::type; template using OrigSubjAlph = typename std::conditional< (p == BlastProgram::BLASTN) || (p == BlastProgram::TBLASTN) || (p == BlastProgram::TBLASTX), Dna5, AminoAcid>::type; template using TransAlph = typename std::conditional<(p == BlastProgram::BLASTN), Dna5, AminoAcid>::type; template using RedAlph = typename std::conditional<(p == BlastProgram::BLASTN), Dna5, TRedAlph_>::type; // ========================================================================== // Global variables // ========================================================================== // this is increased after incompatible changes to on-disk format constexpr uint64_t indexGeneration = 1; #endif // header guard lambda-lambda-v2.0.1/src/shared_misc.hpp000066400000000000000000000320631445553061700201350ustar00rootroot00000000000000// ========================================================================== // lambda // ========================================================================== // Copyright (c) 2013-2019, Hannes Hauswedell

// Copyright (c) 2016-2019, Knut Reinert and Freie Universität Berlin // All rights reserved. // // This file is part of Lambda. // // Lambda is Free Software: you can redistribute it and/or modify it // under the terms found in the LICENSE[.md|.rst] file distributed // together with this file. // // Lambda is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // ========================================================================== // store.h: contains types and definitions for storing sequences and indices // ========================================================================== #ifndef LAMBDA_SHARED_MISC_H_ #define LAMBDA_SHARED_MISC_H_ #include #include #include #include #if __has_include() #include #endif #include #include #include #include #include using namespace seqan; // ============================================================================ // Forwards // ============================================================================ // ============================================================================ // Metafunctions // ============================================================================ // makes partial function specialization convenient template using MyEnableIf = typename std::enable_if::type; // ============================================================================ // Functions for translation and retranslation // ============================================================================ template inline std::basic_ostream & operator<<(std::basic_ostream & out, const Iter, seqan::Packed<> >, seqan::Packed<> > it) { out << *it; return out; } template inline bool inRange(TPos const i, TPos const beg, TPos const end) { return ((i >= beg) && (i < end)); } inline int64_t intervalOverlap(uint64_t const s1, uint64_t const e1, uint64_t const s2, uint64_t const e2) { return std::min(e1, e2) - std::max(s1, s2); } inline void printProgressBar(uint64_t & lastPercent, uint64_t curPerc) { //round down to even curPerc = curPerc & ~1; // #pragma omp critical(stdout) if ((curPerc > lastPercent) && (curPerc <= 100)) { for (uint64_t i = lastPercent + 2; i <= curPerc; i+=2) { if (i == 100) std::cout << "|" << std::flush; else if (i % 10 == 0) std::cout << ":" << std::flush; else std::cout << "." << std::flush; } lastPercent = curPerc; } } AlphabetEnum detectSeqFileAlphabet(std::string const & path) { SeqFileIn infile(path.c_str()); CharString meta; CharString seq; readRecord(meta, seq, infile); // for the alphabet test, ignore masks for (char & c : seq) c = std::toupper(c, std::locale()); if ((CharString(String(seq)) == seq) || (CharString(String(seq)) == seq)) { return AlphabetEnum::DNA5; } else if (CharString(String(seq)) == seq) { std::cerr << "\nWARNING: You query file was detected as non-standard DNA, but it could be AminoAcid, too.\n" "To explicitly read as AminoAcid, add '--query-alphabet aminoacid'.\n" "To ignore and disable this warning, add '--query-alphabet dna5'.\n"; return AlphabetEnum::DNA5; } else if (CharString(String(seq)) == seq) { return AlphabetEnum::AMINO_ACID; } throw std::runtime_error("Your query file contains illegal characters in the first sequence."); // unreachable return AlphabetEnum::AMINO_ACID; } // ---------------------------------------------------------------------------- // Function readRecord(Fasta); an overload that truncates Ids at first Whitespace // ---------------------------------------------------------------------------- template inline void _myReadRecordsImpl(TCDStringSet> & meta, TSeqStringSet & seq, FormattedFile & file, TRunnable && runnable) { typedef typename SeqFileBuffer_::Type TSeqBuffer; TSeqBuffer seqBuffer; // reuse the memory of context(file).buffer for seqBuffer (which has a different type but same sizeof(Alphabet)) swapPtr(seqBuffer.data_begin, context(file).buffer[1].data_begin); swapPtr(seqBuffer.data_end, context(file).buffer[1].data_end); seqBuffer.data_capacity = context(file).buffer[1].data_capacity; for (uint64_t count = 0; !atEnd(file); ++count) // count not used for abort condition { readRecord(context(file).buffer[0], seqBuffer, file); // run whatever magic we are pushing in: runnable(context(file).buffer[0], count); appendValue(meta, context(file).buffer[0]); appendValue(seq, seqBuffer); } swapPtr(seqBuffer.data_begin, context(file).buffer[1].data_begin); swapPtr(seqBuffer.data_end, context(file).buffer[1].data_end); context(file).buffer[1].data_capacity = seqBuffer.data_capacity; seqBuffer.data_capacity = 0; } // ---------------------------------------------------------------------------- // Generic Sequence loading // ---------------------------------------------------------------------------- template void myReadRecords(TCDStringSet> & ids, TCDStringSet> & seqs, TFile & file, TRunnable && runnable) { TCDStringSet> tmpSeqs; // all IUPAC nucleic acid characters are valid input try { _myReadRecordsImpl(ids, tmpSeqs, file, std::forward(runnable)); } catch(ParseError const & e) { std::string err; err += "\nParseError thrown: "; err += e.what(); err += "\nMake sure that the file is standards compliant. If you get an unexpected character warning " "make sure you have set the right program parameter (-p), i.e. " "Lambda expected nucleic acid alphabet, maybe the file was protein?\n"; throw std::runtime_error(err); } seqs = tmpSeqs; // convert IUPAC alphabet to Dna5 } template void myReadRecords(TCDStringSet> & ids, TCDStringSet> & seqs, TFile & file, TRunnable && runnable) { try { _myReadRecordsImpl(ids, seqs, file, std::forward(runnable)); } catch(ParseError const & e) { std::string err; err += "\nParseError thrown: "; err += e.what(); err += "\nMake sure that the file is standards compliant.\n"; throw std::runtime_error(err); } if (length(seqs) > 0) { // warn if sequences look like DNA if (CharString(String(CharString(seqs[0]))) == CharString(seqs[0])) std::cout << "\nWarning: The first query sequence looks like nucleic acid, but amino acid is expected.\n" " Make sure you have set the right program parameter (-p).\n"; } } template void myReadRecords(TCDStringSet> & ids, TCDStringSet> & seqs, TFile & file) { myReadRecords(ids, seqs, file, [] (auto const &, uint64_t const) {}); } // ---------------------------------------------------------------------------- // print if certain verbosity is set // ---------------------------------------------------------------------------- template inline void myPrintImpl(SharedOptions const & /**/, T const & first) { std::cout << first; } inline void myPrintImpl(SharedOptions const & options, std::stringstream const & first) { std::string str = first.str(); // std::cerr << "terminal cols: " << options.terminalCols // << " str.size() " << str.size() << "\n"; if (options.isTerm && (str.size() >= (options.terminalCols -12))) std::cout << str.substr(str.size()-options.terminalCols+12, options.terminalCols); else std::cout << str; } template inline void myPrintImpl(SharedOptions const & options, T const & first, Args const & ... args) { myPrintImpl(options, first); myPrintImpl(options, args...); } template inline void myPrintImplThread(SharedOptions const & options, // T const & first, Args const & ... args) { SEQAN_OMP_PRAGMA(critical(stdout)) { // std::cout << "\033[" << omp_get_thread_num() << "B"; // std::cout << "\033E"; if (options.isTerm) { for (unsigned char i=0; i< omp_get_thread_num(); ++i) std::cout << std::endl; std::cout << "\033[K"; } std::cout << "Thread " << std::setw(3) << omp_get_thread_num() << "| "; myPrintImpl(options, args...); std::cout << "\n" << std::flush; if (options.isTerm) std::cout << "\033[" << omp_get_thread_num()+1 << "A"; } } template inline void myPrint(SharedOptions const & options, const int verbose, Args const &... args) { if (options.verbosity >= verbose) { #if defined(_OPENMP) if (omp_in_parallel()) myPrintImplThread(options, args...); else #endif myPrintImpl(options, args...); std::cout << std::flush; } } template inline void appendToStatusImpl(std::stringstream & status, T const & first) { status << first; } template inline void appendToStatusImpl(std::stringstream & status, T const & first, Args const & ... args) { appendToStatusImpl(status, first); appendToStatusImpl(status, args...); } template inline void appendToStatus(std::stringstream & status, SharedOptions const & options, const int verbose, Args const & ... args) { if (options.verbosity >= verbose) appendToStatusImpl(status, args...); } // ---------------------------------------------------------------------------- // Function fileSize() // ---------------------------------------------------------------------------- uint64_t fileSize(char const * fileName) { struct stat st; if (stat(fileName, &st) != 0) throw std::runtime_error{"Could not read File.\n"}; return st.st_size; } // ---------------------------------------------------------------------------- // Function dirSize() // ---------------------------------------------------------------------------- uint64_t dirSize(char const * dirName) { DIR *d; struct dirent *de; struct stat buf; int exists; uint64_t total_size; d = opendir(dirName); if (d == NULL) throw std::runtime_error{"Could not read index directory.\n"}; total_size = 0; for (de = readdir(d); de != NULL; de = readdir(d)) { std::string curPath = dirName + std::string{"/"} + de->d_name; exists = stat(curPath.c_str(), &buf); if (exists < 0) { closedir(d); throw std::runtime_error{"Could not read index directory.\n"}; } else { total_size += buf.st_size; } } closedir(d); return total_size; } // ---------------------------------------------------------------------------- // Function fileSize() // ---------------------------------------------------------------------------- uint64_t getTotalSystemMemory() { #if defined(__APPLE__) uint64_t mem; size_t len = sizeof(mem); sysctlbyname("hw.memsize", &mem, &len, NULL, 0); return mem; #elif defined(__unix__) long pages = sysconf(_SC_PHYS_PAGES); long page_size = sysconf(_SC_PAGE_SIZE); return pages * page_size; #else # error "no way to get phys pages" #endif } #endif // header guard lambda-lambda-v2.0.1/src/shared_options.hpp000066400000000000000000000236161445553061700207010ustar00rootroot00000000000000// ========================================================================== // lambda // ========================================================================== // Copyright (c) 2013-2019, Hannes Hauswedell

// Copyright (c) 2016-2019, Knut Reinert and Freie Universität Berlin // All rights reserved. // // This file is part of Lambda. // // Lambda is Free Software: you can redistribute it and/or modify it // under the terms found in the LICENSE[.md|.rst] file distributed // together with this file. // // Lambda is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // // ========================================================================== // options.h: contains the options and argument parser // ========================================================================== #ifndef LAMBDA_SHARED_OPTIONS_H_ #define LAMBDA_SHARED_OPTIONS_H_ #include #include #include #include #include #include #include #include using namespace seqan; // ========================================================================== // Classes // ========================================================================== // -------------------------------------------------------------------------- // Enum DbIndexType // -------------------------------------------------------------------------- enum class DbIndexType : uint8_t { SUFFIX_ARRAY, FM_INDEX, BI_FM_INDEX }; inline std::string _indexEnumToName(DbIndexType const t) { switch (t) { case DbIndexType::SUFFIX_ARRAY: return "suffix_array"; case DbIndexType::FM_INDEX: return "fm_index"; case DbIndexType::BI_FM_INDEX: return "bi_fm_index"; } throw std::runtime_error("Error: unknown index type"); return ""; } inline DbIndexType _indexNameToEnum(std::string const t) { if (t == "suffix_array") return DbIndexType::SUFFIX_ARRAY; else if (t == "bi_fm_index") return DbIndexType::BI_FM_INDEX; else if (t == "fm_index") return DbIndexType::FM_INDEX; throw std::runtime_error("Error: unknown index type"); return DbIndexType::FM_INDEX; } // -------------------------------------------------------------------------- // Enum AlphabetEnum // -------------------------------------------------------------------------- constexpr const char * _alphTypeToName(Dna const & /**/) { return "dna4"; } constexpr const char * _alphTypeToName(Dna5 const & /**/) { return "dna5"; } constexpr const char * _alphTypeToName(AminoAcid const & /**/) { return "aminoacid"; } constexpr const char * _alphTypeToName(ReducedAminoAcid const & /**/) { return "murphy10"; } enum class AlphabetEnum : uint8_t { DNA4, DNA5, AMINO_ACID, MURPHY10, }; inline std::string _alphabetEnumToName(AlphabetEnum const t) { switch (t) { case AlphabetEnum::DNA4: return _alphTypeToName(Dna{}); case AlphabetEnum::DNA5: return _alphTypeToName(Dna5{}); case AlphabetEnum::AMINO_ACID: return _alphTypeToName(AminoAcid{}); case AlphabetEnum::MURPHY10: return _alphTypeToName(ReducedAminoAcid{}); } throw std::runtime_error("Error: unknown alphabet type"); return ""; } inline AlphabetEnum _alphabetNameToEnum(std::string const t) { if (t == _alphTypeToName(Dna{})) return AlphabetEnum::DNA4; else if (t == _alphTypeToName(Dna5{})) return AlphabetEnum::DNA5; else if (t == _alphTypeToName(AminoAcid{})) return AlphabetEnum::AMINO_ACID; else if (t == _alphTypeToName(ReducedAminoAcid{})) return AlphabetEnum::MURPHY10; throw std::runtime_error("Error: unknown alphabet type"); return AlphabetEnum::DNA4; } inline uint64_t _alphabetEnumToSize(AlphabetEnum const t) { switch (t) { case AlphabetEnum::DNA4: return sizeof(SizeTypePos_); case AlphabetEnum::DNA5: return sizeof(SizeTypePos_); case AlphabetEnum::AMINO_ACID: return sizeof(SizeTypePos_); case AlphabetEnum::MURPHY10: return sizeof(SizeTypePos_>); } throw std::runtime_error("Error: unknown alphabet type"); return 0; } // -------------------------------------------------------------------------- // Class SharedOptions // -------------------------------------------------------------------------- // This struct stores the options from the command line. struct SharedOptions { // Verbosity level. 0 -- quiet, 1 -- normal, 2 -- verbose, 3 -- very verbose. int verbosity = 1; std::string commandLine; std::string indexDir; DbIndexType dbIndexType; AlphabetEnum subjOrigAlphabet; AlphabetEnum transAlphabet; AlphabetEnum reducedAlphabet; GeneticCodeSpec geneticCode = static_cast(0);//CANONICAL; BlastProgram blastProgram = BlastProgram::UNKNOWN; bool isTerm = true; unsigned terminalCols = 80; unsigned threads = 1; bool hasSTaxIds = false; SharedOptions() { isTerm = isTerminal(); if (isTerm) { unsigned _rows; getTerminalSize(terminalCols, _rows); } } }; // ========================================================================== // Functions // ========================================================================== // -------------------------------------------------------------------------- // Function sharedSetup() // -------------------------------------------------------------------------- void sharedSetup(ArgumentParser & parser) { // Set short description, version, and date. std::string versionString = SEQAN_APP_VERSION; setVersion(parser, versionString); setDate(parser, __DATE__); setShortCopyright(parser, "2013-2019 Hannes Hauswedell, released under the GNU AGPL v3 (or later); " "2016-2019 Knut Reinert and Freie Universität Berlin, released under the 3-clause-BSDL"); setCitation(parser, "Hauswedell et al (2014); doi: 10.1093/bioinformatics/btu439"); setLongCopyright(parser, " Copyright (c) 2013-2019, Hannes Hauswedell\n" " All rights reserved.\n" "\n" " This program is free software: you can redistribute it and/or modify\n" " it under the terms of the GNU Affero General Public License as\n" " published by the Free Software Foundation, either version 3 of the\n" " License, or (at your option) any later version.\n" "\n" " Lambda is distributed in the hope that it will be useful,\n" " but WITHOUT ANY WARRANTY; without even the implied warranty of\n" " MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n" " GNU General Public License for more details.\n" "\n" " You should have received a copy of the GNU Affero General Public License\n" " along with this program. If not, see .\n" "\n" " Copyright (c) 2016-2019 Knut Reinert and Freie Universität Berlin\n" " All rights reserved.\n" "\n" " Redistribution and use in source and binary forms, with or without\n" " modification, are permitted provided that the following conditions are met:\n" "\n" " * Redistributions of source code must retain the above copyright\n" " notice, this list of conditions and the following disclaimer.\n" " * Redistributions in binary form must reproduce the above copyright\n" " notice, this list of conditions and the following disclaimer in the\n" " documentation and/or other materials provided with the distribution.\n" " * Neither the name of Knut Reinert or the FU Berlin nor the names of\n" " its contributors may be used to endorse or promote products derived\n" " from this software without specific prior written permission.\n" "\n" " THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n" " AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n" " IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE\n" " ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE\n" " FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n" " DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n" " SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n" " CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT\n" " LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY\n" " OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH\n" " DAMAGE.\n"); addDescription(parser, "Lambda is a local aligner optimized for many query " "sequences and searches in protein space. It is compatible to BLAST, but " "much faster than BLAST and many other comparable tools."); addDescription(parser, "Detailed information is available in the wiki: " ""); } ArgumentParser::ParseResult parseCommandLineShared(SharedOptions & options, ArgumentParser & parser) { int buf = 0; #ifdef _OPENMP getOptionValue(options.threads, parser, "threads"); omp_set_num_threads(options.threads); #else options.threads = 1; #endif getOptionValue(buf, parser, "verbosity"); switch(buf) { case 0: options.verbosity = 0; break; case 2: options.verbosity = 2; break; default: options.verbosity = 1; break; } return ArgumentParser::PARSE_OK; } #endif // header guard lambda-lambda-v2.0.1/tests/000077500000000000000000000000001445553061700155125ustar00rootroot00000000000000lambda-lambda-v2.0.1/tests/CMakeLists.txt000066400000000000000000000021301445553061700202460ustar00rootroot00000000000000# =========================================================================== # Lambda tests # =========================================================================== cmake_minimum_required (VERSION 3.0.0) enable_testing () include (CTest) ## only subset of tests if (LAMBDA_FASTBUILD) set (PROGS blastp blastx) else () set (PROGS blastn blastp blastx tblastn tblastx) endif () ## basic indexer tests foreach(PROG ${PROGS}) foreach(DI sa fm) add_test (NAME test_mkindex_${PROG}_${DI} COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/maintests.sh "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" ${PROG} ${DI} "MKINDEX" " ") endforeach() endforeach() ## basic search tests foreach(PROG ${PROGS}) foreach(DI sa fm) foreach(FF m0 m8 m9 sam bam m9.gz sam.bz2) add_test (NAME test_search_${PROG}_${DI}_${FF} COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/maintests.sh "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" ${PROG} ${DI} "SEARCH" ${FF}) endforeach() endforeach() endforeach() lambda-lambda-v2.0.1/tests/db_nucl.fasta.gz000066400000000000000000003562671445553061700206020ustar00rootroot00000000000000%_Vnucl_db.fasta[s\ɑE<<4^ꎖVI&7V$@RG_2ַ<2U*U3,] H&3%}}xzq~zqrzrzqv~qWr}ՇWڽv}zt?x~h;Flc董^=ͣww?Ow_>jym~|_1E}>ad޵S_8|Ojzi,v<'m{c~Ww[̯#A)pzoݽ0;/ռ:hݝU<˻]yy}c)unc^jN5wzDY1.bݨ>Ѧ6/E޻;u_>nڼ6/<15ۘ;֘ǫm>?!7T7u`̯90Olw.n^I;v0ټc^Txw%7k4ywA~ЦyJ b8jZt7y#ڼH|tm +沟{Bu=!8eܼshŶ>\_eނhuu݋yt;4\hy G+3]QZ\y/4}p/.*@w>t3Ӈ>E߯66,f$нotR_;W\\=LmQD7\hq}vLky3Я̫T]kƕϥ`C~:@:_'!h~!6bxf(r+" Ф;{׶xypON[ܾp~yտ{uͫW?+|}>F{0e*8k6(KE,oXe"tt6.;w^m6\ATms .sMB@x5]~mݟQ9I|ύG.7Sr'ANSރB"3uPQT:qq&6 DIR1N[gͦܦcؒ|Xj==[: a]y7C]y%Mz6WBj$z\@ѥTP#ڸ]a~^`ϝg4, .6Iԣk-E+, iNR]$`k"] u /ijk ʼnAJ8OtƸ+O8IW-}T"*Q5_zw@_,L5 S"=5N!Y̺¾ cNݢH7mIhHq#ÕL[R9Z70 i*:;STGhsEa7uɱ$|v3 ]ȉf$ae?0M'0aT&Q.%zD"Ixޜf#p+3/f3غԝ{x5 EFTk"Iq[}.D<ՄA. ܺꊪ +\n?c@ 6}S_t/B/*tf 5QL$Aq %j@g^l FW ,2٦' 04FaF[G M]/2ӼfK/Θ4BJ !k;H"j $1Ci֍)Z$Bd+08𯅖3]x|RϟYzvon{UNjsK!8:kHe6^ī<! W̙N+6 Pbsb NKwиN"gs$L5aL9Pͭ:R \"i9Jf|'I#yL]+^Gs $loOu#(K2Q9_ϦPj̀N)fw6m.ea}6[ԍgY/37sY!Hp"C']iW5FI@Z*ׁt5 ʜK4j suC$EZJ \&I 9au +OX@ a>a50(JX)',7ι<7JX]FZkEzJk]\t5E5ky/IB&PVR:o$,섅4 x+a!XDWB6+(> )؆Ɏ13L9\ɴRD"XJIM$l_;5]0Z8TvG0G;q?Ǣ'_KXgu )O* Lr=e]V{ѳ~aj;~EW;vD G"rigS{<"rq@GKwn"\ Uϖ*El-Ig<+NhmZ\ҲOgFDUQ-É\9 `$J˫+1nc%FJ<""lPLimcJ N ^rD xZGXL 3*]kNZ_LeoѠK" ZC%l1@ k!_}aˤ8?W'O**&1`o@2lӵB&Io Pz=(3r $Q;˘FV;wJ& VN+1ztIbpz~1&!~ބ}wU_H:ޕ `]qV<;{ω/}$/ver qi*V9"4/V9vh#T($QjcrDLZ#V2T  CFmh<}j#:d*G41o#@O. 9B rD ډrDf>{ꥆؑ#ؓB .W}\qѩ{ŞZe~"pBZzD_U+j%ټ">:=`hsE(%J:jFŐ"-SupDH7yeh?аYRqCC* ŽbZs؇#*G}+oGC+ H?㺱 Np@:-CCO\ ehFa{]CC ͨ6a[Ĭo?S+ Ai[`-d{zqJ௯_ ߔsQ܊=R0#կY#(u(UX0 l*u7Bg;+9XkI8WwEעxrQ s^$V:kP%ڦtV+mQ7t?S7/O/fWjQdW[`uЫ\"B7$.&Ҷr$ J%YAW/y pݔ=-r*!0*d?4;G__[f̅jvQ%Ibvj.N9\jH ,aO~Ӹ#ls@{&DH׌(~H(" z:v4a,q9"I]$XRmt"2\Bfjk.Tb7}x=yk62iZAR$VX*Zv ҐթPe7bF(jK:01b\|U5K :MWTh[ -͙Cf,ޫZbmS`6PPX:_CA^D l^ȉ?_Uv3ՃQ]J|[/OGW[i" dƍ(d5p\֯]QdNVqڙ :WP2a33X67(8takvdrk .nc86NXnrw!Q;lL,9޴"uB;KוUqe,84zʂp3VYpPS.e7 A~`bp3zr@'/^<}rq~rYuytw�_7W|ݽ{u{}^/r_޼l:7$_^jzim@:*g;J4J"D]?j$qKڬsXF iI]n$@x !&Fu-_UeJ0Lg 'Hv&= = >;'0.5D\G/gX M!Ϲ?Kyc4 dTPZGU-@=AP,e!ydex#-:A-F$(3EmA>FΣ76s6R- [SuBG*)loZ@wR!fcdà4rnk  M7U~4꠨nza,*i#Y+۶çX[Z tH(AwP8pLUOoe//f/=4_n.r7,eov~wWSs]`Bx%PlA( M4VP++@!giWiw,]hayj,4ԁ_E_Um+cK1X֝#%6-P BT.S򏽴hf0K 4f X@@nE+,!`+,aqq{D)3KX'J)[}}uݣ^wR:IJG*J+>#C)t$zV:btXF?@HĽQXQ#Jҁk!qP <<m =K-%t̠a7,#:8Y蠱'm%tõ,NK舶 !?Gm b:*,eЁJ!t `r DxzA >m $m XpwaI :u%tmSb } tuBf$oI 3:ܙ8"D[B0+BТ *F樖h& .[%tLd :СX9;0pXBGnKp%q|BӭYFpW @7FmuH:ahK0C&(c^ X*9댵 yڃ֯BG%tD[Bl б #J;:>鳧_O^FQce~\2G2p.2J>^F(Q'(ǔQe| e2poB8/F9Qlg2Jo^FAV{%^uUWmەQO_F*2 mF**,t2J>\F9.^>?;9}ţu/.]s]"߽~}{qPzEbǩ|0sYca dCYf F~b~HD)-aJDbU6^8N> BU[\3eyW 0"kޫliVXOVϮce;0&i%*Э@ʧQ[cW 4LňViJiTTi!VsM4p%zA;)@"~,`;cx_t$V%|Ã":)\$A\]UU%Q!j$0EsPz)U#bQ da03o!5~`0r`QZdY07 8; ;DXy\$Y,}\$H.*`1_$Z/Ǡ,I˥" +l7*]rJYgg0WCL|ے/-b.SmHi-\Q_sR؄HqޱWX~ג/<]/jå ^ݶsBcTGڒ/|QqK) r!%%p"^;/@O>rr`zw/Mۛ_/w}-/w?o/_}{Ueח|{ڿ$p^Ml.$F@=ʻ/5`=#Z!: 8ވJLKH1@& kz nrS[ܲArlm$1(2p˜4 /Ȉn?M|0ВUa;Fk |ps >Hb#>4jsLn97pޫfuFs>tXQiU^|.EbTLL@եUdT;E/s ?mNv0tJ~guC&N6!٣j'ZN 2D}ڪv"FdcN(Űi:TN\:qBq tpm,*a4B\/_N|.@6N( :}-ͥxE0g(t72^;(.XC/F WOTOJ'fU0(ۤF$4??EWR22҂߰;Փm_=idIm(`w[cT jo<\=ʥoTO TBZz-nG!ӓs'|w^Qc\?0;1$ka?V(C4 $ѫ ܱ";\]Km9$YE[U\k%Z0 1hVUdl\r[֢E EZE[UK nmUE搥 af8WU$NKpC[UFުr,h0z͢?Y;KvZnjC4o-Kf\(8a%b8vm L@Q CFzɴI*ӥ]aE|s\\p߁(<=DUKu8> E1GȠ`l`K%; ;~=Al a"Y8{.tO@ֹ@[Ɍ;!S b[Gz7<ғBf:QEC|>xz!ζԋޖz57-l1Nr0zJǝjfA X,Nngg+B!s$J*E%^t#O/Ξ>9y˃psĿ~o+ B+̛WFPjldr.jdNuHm_=s!,v rA(VEbD6{'#BΰmyDJe kÔFL%V& h6hfg"[ PPmJPn)@ID$[c}eO_ǰ8(úB{`9gNqz5SðM#ڇKvu>%9ܳbjHZq>Dg;Վ3L#!ٰW/(F#RW(~|'\`Θ:0jЎ4@."ceY wnc#*U|d5M] ;̐=Rtß0h8n0;º8T7^gvPmf|D=&JWywcnAAUZo\hA26J}Qqn!G|eҎc$dUݹ֣ؔJo1XQ5(ztEˬ0p`[ UV)c&TjgI@%'移\\raQlI=\/i=Q E-_`}(FMǡ(*"(TfU[ĜW(s f*,c7Lr v(f۾`⒁Ut;clHɄFv ^plS)bXV?{ N|B)Rg6a:Q'ȅl6+s LmFU>.MF u:)/z0Zu9 z 8w4bzH2z(ĦIu'KDzrb`p $OÅʘ{Ő/gt3SE[, 4ჵ& n<"m,"f;ٳӋ?3& inMۧMǙOڏ37m27csLs)sSqSv`n~lnJM5M?fn"`s)sSqSM<#MۧMǙOڏ37m27~i8s6>anj?ܴ}tCM<#MۧMdn>enZRұ)8ͪ]Rz,D@\"Bjj}o!;ugg/NN_<9}7o{ìIZk_1Weoniuht_` țneU6 PC5݄ xں Yiq3Ms}ۻ!xERO7'℺ mSqtp'0Xp:.i^jbbV0')=]d*}]b;a~|%ӕMO|umzbkb[eƭ/"@q@a"Y^ELbU嚧QqHP7Ԭ=2cYYE=q¶?b[ #cc[f_(3:*[UѬb5<2iJۂAc1F A~*+f*+z+qMKT12coUbX.,=Jj>x#O,CvQFi6iޝ7c>lxW赤N[Ѝ]W߉zj|X[ ꇁ/6a֏+$|{öz?!\{~s4/ڄZ/6jQL 5֘!1kF>9}ų'OwɓG)cۻw|sr8 =W] B{ wzÛs} J0 >Qdžp5.ӛ/ JD\R|dc,hgeQ,r[$Y&^<8q[۲7F"{[xhڱ;zţv}y%;/)7't6uзaאA1lytQ*@#bAÏdF$#i[:oOY|QCanGBP;BSܗm>?/Q!Bhc^aJ.6X$?m6.ajFS44SpP"lEGK赕q7R"tǕS~c{Q[BW//?EWRSW{^%^6wp[*.QrUX[ 8ܧxȈ%;s^}bn+#:( dAQQ;x%Kk(ǫ,s_ٱ=@OHUi 2OxNX] E}{O))S:x_d}ONOOO/xz݁I<]<.~rOo_]}}w˳g[yMZj WKyӨ\Yl[E n! (pE",V6V1A hH6i}In=+ A( Z!Vi p3dZ\L{[j_s*̣~oQqQ-jhj0lt0qJg1p.rM:?LF$ULizD$uRxI}_AKC-Hͨ{ײCnȗTYQNchW'3kYW+rTRUP>e6m0& _H(2kޕѹwXS模\m! ю!V T,4\~0JFJ)&./xזFZݭoK#eD;I-P8 82\Wۍ58}4o(}Ϙ>Z ']7z\t[my7te $ٌ,li+{TY1׸1[dUDV s֬>s+s @^uZ))Ǟ?/](ŋ^^@>|izg>:4J~ptRV;ܰc pz}u(1  ,'*T>1y3;Ldx"s7_]X"sDFC,HdrDV\Yݱs4qVD[1[?QDK9k؋&cXayPT~f2p'pg|.H>!緆@tXfg~gGZz ^p p4e\bIxF5VW4NxêڳgulܩoG5jۃ5@>X3 5mX6$ѻƻcv80=a+"p+a+UP +hPvLlHGvP\z~LrE4ӶL<.:[K0`ځ}q%m*ɚ`*dZ<( P1`ܧ< JZEBKw/gز/ wO(g'.NΟ8lb޼Wz9o\%~{Roxwxs*^=eR$rg8B"$O?`v Y:y PYn s!: w5+IZBȔ?9j1-Pި]Rlޫ2ӑC䐁ԔݺpHSEHmA tM|T=2K~]͑\tOUN/K%Ð,IEAN?_zӾ^&Z?-)Za#@9mU^>h?P*`ߍx\@9ȦLT$4 qM'9J.«Л[ ,Ny(9ח-uUXEK*ghDz;&K,ŪDG6)2EVZ6Un zI&7Dd>F:qRA{d+^ҧ47lC# SK'ai1!iͧťkm5hem4ѱK!r^mVތFO[HQ u>AE7`BN} K:Gc Ku|ՃáVkXzv"b{ScɠIKt٘fQR|fyS9L%Fm=,UѤb8&ӓO^\<;/IC_vonB7WXVrjGJ֢.U҄ŤA0elAi_6OLZjL:Vz;>]֑DR˫X(fPR%BO-V2 v" ZAU O>u#"HU4F_7kG@ 5V7ZCIo$PmAz<{c+eZnu=љ"gzN XUEW!1Y˳*aJ/Xezk  @JO“ }Z_2\(5ۣwбUBv(݂mU%c i>{8ڦLe;9*(Dm])k~FZuKgq-(8 ,= 90Ht?ǰߐ%ڬnRpRvݏqF} :!25Z K#|%=+A!`Q\Ј4~p0%ζ,?q'vhl鳓ONNOOϞGŻW^/pa_߼WjHګ3?'XZ[n z U&ڙPH&!s%tº4 gbGv"Y4"01X;8͋iN#@ǽDh7h=H-f[ FBwe;{Q}U3?yT6:AVCTeԇF6[^;3ʏEr Ehm T|1"^^ ȨohEj[|Z&hMfIL4'ZATGtUn*2O,o/Ԍhn.F艰.eՇPY}ɢ^W3C-C-Ħ߾m&[ŜH}Wc:!՘HqWcEҔ5hY{aeHuyFuq%o=u/fPGJ[ tqH$i8ݳw&3mt%)l>jZIDŽZۓF͇RTi?zrzųVcJY)/aQfí5.6ի9qZHl>qB&^$ۺ ~l QQ6`˃,/mEޱ! SyD~e6KPYW@p͂z:GY w)jGZB3']]61dOJ" Y0QxG|z*jG(pR!vd:r!JU\9,JH!M{.JIoz,E!*O!xH9{Qz/Ĵh&Q"8:~!*t#1lxkXf ֵu mpUzt?7;XH \'9ق(jG׳mN JP0(ȈhªJz,-,Ԏ]89={ӳ1zۆxxǛɦfmEt;JX0bqwja'ZQB3ߌY@j}\NZ1;jDIJ[<&݅Uj+G=hx^A00QznTy>}>XڇjƎfcib2ڇvץ} вm,ޘV i(>p+ɯO4ew1S)hf=+z8=94Z6C VϞs.A\c]iqT8#~Tuf=h%)#vQI$$B ɠKh?SJ^h?im/}RG)m/}G1Ƕ>O)}l{Ƕ>G/gH?"}l{Ƕ>O)}l{@F[ʇ;}f٥|Cɳ''/O/~.Ewm֓"0sGdQ)ޘ@BrpQфhpͻM{W#j6#>QcyTD4[黽VN z>QiG%`X<=*J=*O{TޣRjS*ǁG٩ hDZtQ_䙏zTޣ\ۣa XJv2]4,5N$x:3D4h0hEݻ%w-X'rŇR ̫HעӑK]e?1sKޗ`=!C>{Z p@2[3! EqVb 20"+̈́VT [q .p@%4+c53Bn' )3¡͐{ jij _6YR7k(4faJ0綴 :ĒDexJ"涴 a.D  IG@^iY#t᫮e-4נ2eLffcZZFIZW> K'˻ ºwtA$wFR\'T}55%! }~NIGQ!:Rb]RZÓk*fʕ!}z|M:CA Wy`ґ~(˰\ M@tLk=ݝwYJ\&ɪt % mxАے5 U.YI2n[G8HhKoȸ77{/+_G8Un`Yd tVGAOzt.ac^tttb]|TM٦0-O:+c3p;V4e,K]7#V{ԿE\}铓g'O68/:p1zbՁ`R7JnOq܁Q}Quc2VC~Oqnn^P _H%^XrbՍDJ#uMN*Y9ÞaL}apL\/2RwbN#8Aqj[GbO.C C[*Y-CC4'dl3bXTvtN` d@ioPÏBPqa>f00x>A~X 7hAI ' BTQɚ0!j*>tOgXVnychw  &Ǧ8LJg*̈́VA3]Tޭ4-2d[敨Gw|:2//V@?`x2IClgcl1]Wblh+w%؝]p%Un&ZY;%PBKd3; R#)*$_NC!T4@H ,-?=w2  c;k⥋YAb/GOݴ=э)tS \wC:<%oQ2 TAʖK3_ 4>ؤEЋVĒ Pma7q%cQ jEڨQd4.fLcזr,-nkR;g+Z`34oS"R8G"^uv%>Mle]250:ȥ\:.TvK^ElV,H}X.` h (RkwU 4}u}/MCmH .X•ȡaf⠐K3Ҕ41MkfjapA6YkrhXfPWɮ[M}í~tG4Q[/͠*Md{nWR;ps3Mmh,eKB{ׯftK38;xzqr~~h6*&>+'(L5K²v8`px{(G/O<}Q3>M(ܗ(难5jBؑRga }3>+|>qt5}"F_X]N}}nc)!r-ERػ/U a:/Xބ0 aұagO[DW5:cTG}i^ '=4*䧏/+(:'|an}đrwr.+M{}D~ҝAe<ྌ}ru\<SԔCO ӯ܁xĝ aW/'֚,eDz%x8zqt34VfYg;hSvqA?V gðAe j{Soj ECdʜ҆GS]P)=z˖kF+*3 ʽi `v:Xy~zG @ =]ޖpW/qdz<׭ܫzM4ZWa}[fW]hc)܏KLM$3?Ml)l(M#/qgkr!A<"#ڛ]Õ<uvcH=x> r ҫU{ ZcdxnVȫV۫ AYJ%VA{|J km6jpZ7x5xK?ēZgRU07rK DD-'X< dQ. ~74e=V׽~?d, ؉K8TF[->m@;JА~5F7"(n0c ƹ%NR " ͖G*@tcXұіQ‡ ɴ>Jai1GZpj꣄GB֯gF1R2E#&Ϥ.RG[L&_KѭՌA!(gbV!wЕC z [&PϮE=b== N^HN7\0w7np_kǸn^KZN%)t(wPI`*;%=\Q桁xt` ;~{*`54{#1#) EKtS=]`Khc8;zv^M=/D m@7)Ry_&%zlDlDKpuXG[bRJIN\D\4豍%z\;ӗQX-vyV-ѣ= -mHÀ$~ 5$ 1ϲ mITӈ- F$۲ښ[Ж&-) lfDK_-)B,NdDҖ{ "i~۲iI[gBWía"*`P GdO!pX "i>F; !Hwr}x|6\<{E"~|?_ď/<''/^?{s?O7e{5'Rwݞ??L/?bdO>]4o?Pd[ p| jmmy>~ E`E|fiA/(ҖcqXj[Z Ζ#ejm1|s|Ě;ZV#jy,ҖC ,GTrxy>F[\#r԰Ov-Gp]E|Q-3Ir*,ҖC|G[``-χ -hMH[h!ׂu<j6ͯ-χE<st<N_ -|hH[#my>z[,my><iE`rϲt<my>z[ZǺӋOΞ}Q=_T/Ez$YU99}ۛW7_7)5|PW=u>ûß7vBW'I,σLEUlb|D0l|4QKmB -P~2nj-> au7<0^$vV)$ "0Sǯ$=I `9eZ􃁨 ܥ<3 t<:-9]ޞϚ@6_{vK*Fynr.gDZW y).鉶\RWk#J $m uCnGhRMDEſ9 ɱ\X]!Tvq#2 ;1ʮ;nAvD#΁M_)࠾X!'AEA^˓,deW v!HpFx&C`TWZPXqMd!GŞ-& aY`QPkzV~qBtge으vy8м5' B'}=2n 6`@CI)1r}\V@Vq)bRw.ľ*6mwpڰ0(sT1bjxS0LG)XNm6J:Q( =x4.=y9'^&J?=i_̌i?aXbNN(NbۗNo?9g'OON}>٣ۛWN7R7o/ۻ?]TA9U~Xa2 "x}RrRx<)l $J^qnsn3T-6nT 8Al1@"`Nvť n J%|H뼓u]a-])H #r;s ze&H̓ xw@Z:]罖#u2[wN5MV>vW$R{rHH-"&Nΐ\]Z3ҕˊ-Wn"fY@e"5B'M؂ Ƒ 3]r'yj"5<1luImQƆ+X*>[$ =C Ky .9`ϧ%T3cқB*Vhe W+T#)T +4T+͆Ywv?a+I\ I)ITAI6Jhh9mtƳ'ONahۻo"7{W}2c?]_e7PeR K$Rc(iiHB JnXT%l A(E)m(,UήU `pWe|vHdTq5.$H쪄ҏ3ggDF/${0HM8@n0+CoBxbTM ʫ2f+c +ҠnH\EpYd^xC?Fb@b:t%F (j%[YEtIyVj8.C1P@`K"Q˞ PS†'2֗^AKh򐧕 %n.D5 N"&͒2uy8NBqed-Xd t>N0yr9UF 6J/!FPQj9 즫 CGz>Ұ2y l0_2Jˀv xSNVh}PSClOBe[,\7 cccccccb jӋ?y8GYJ[s&+)c6[`A@Wpl>0yn G6pRi.t^Wz.\d.?I{> tG-?Q۴͡󸟞m =󸗞kAI(Q7>"?a%[ϭܪ@ʬrVw ̝rpy8?]aqyv&![u&"2 nG n{7?cwA:.Xm;:Tf@9,{ɴ*mKgS+ɷ%ҵs8G7ے|g%v |,z%߽4x8GĶ$_sے|Mb-%¼L-ܶ/εŹ~P.Ź%ss_ۛWJ]J'O^<=3wAܾ滛ov~w;yq1Hð2ܣkf'(ݱ)-'*ET"HI*86d(?)S2l\nK 烅({LP(CrS$MC$![eAh}Ct $, cзnA[fAI w>-NiC9]\\uP@jʤEl+*p# .)lT}T5gUo0R-\ՙoKc=-4W(u` "E1Z3C 2,x]w(9`b= TXJF Rh+ r uiF T$-= `3a:\f^(䠘i5^$P gݚ+n,wh_LOJjI =ݏT$KVF~-ㇷPJmY$<ǏZ ؤl]HjZ}x!QQUu9(HiB\Ehr*3V7ᚢLi^R)U%7[u/*P^ȳljj*pTNG+rhHādmMt9r]u'UF߿ϢMH ( y0 @.mD7”ɋȕUR[ZZ{UC*mǛcD9A%[xyX7JW.r.̪ ː4Mܒ&N5P؅ݢj0f`:)MyrҘ3&Xu^0)XBwsul)"dUW&QTj'3o6uay4╎_W:_W:D\{~tr|p~vuxpyruAv]\a/wwo`|w'n~o~~M7w~{}~q}=*Î`oPuM@6>MQ41ҫBnPci.k<4[ e?:#=Ra[hȹ'.R?u`|N ʺ3=/bnaC{l#K51+aՊY=(^f{ m44أB=#S.(]$1mc55>m|5mچ#Ic6k]=BM=8dUx"ڐ5 \E 2b]m2[ ~3-iH% jD[K$Q.kOFcC3:% aJ`n EЬ`bN;3S/Ъ69d\{!W1+ 1hAI'e4&XFNR"`ޒx*H"t%XY#,6/P>3S}w'EC6$*~ ")to0+ +."ѬBsVWSQd3)fuogu\u-㣃Wa+֭c֚ . PMZǂEYON.N./|>AȞ A^!+y M HM B љy\=,tg^wo뎶~z_7T\xĄj^Ƣ#@@\t,q1CÓE pYL;):r!3^HN:g[]0jshkI6FJW<B[<(0>Y$"hK ]J+n.Gd FQTP=#}YAUSw CT " XJE/l{"l=,{[Bi6-u,>7ѕBH^*Y> hB } _K#L5CY7eÇFb4l-(M{//|7b_AZXjocY&LR؅O[Bh #>/\2A%"^Fn1^>L^PbA»\NWM+'Kpmݙn8QKR,#uî!GAHm c[ !l}ZU]غ2eZB=dt|BzFv~CF$7/YXj3E=[V֪cM֖dzp'GW'''րWk5j xī5״]_\\Z^yW^~qu%~4 ѢlFNxqqH 8zp=EJ߳_wrE9UŒ!.vrŷ}tK]ӌg/Cz _BM'IGG?rr4՘84&G?lr=9ᓣ'YGO9>91~v8z<'߬">&1H D@dt6G?&я DLx Dxpcsg}1D0,49zJlvяP^=//E)ﶈ &cGZt3Vb倔59zM]_`dg0X1'GPh^g1OL[R%$@OaM/B$E!⨲IzYEOی;=><_#N6l=_ܾYL ,ד=r;yѿ"\_X)%N2n?M̝RU@VhԇaCi9: AU4K=ZsMb@J F"CRB= b/dn/#ޡ;@ "E Z*0JƂqcvB/ӣDPwޅhy|@Nɘ;ߟ']R?U |/̧>M;ŀAzɖEݼzAꬢQ<`)( k *ƦsCJ0&?'o΁ºj"ֱVm`L'#3)LZث6NAPwċ*ݠr:&P?%KF`Ӎb:z)PpTS'S=fVQ+B5AA PҁoI)yxМ"1pQʂ yt>v|!b鍤L~uҦ,$ 4zip3,HHAa,OZJSo_ѥ;Ei 2J ?YE]\Hy^Lktš~&ãÓx|I_0l۷oӧdo?u>|nbg&)Q@}5%u:jKy7O؇,l,t^uiF[ٌ\41$`6RK'#~cv>6" Ȭ]z i5iHvۘ ~!!E3KWzG)MU0Εpiz9fKNHL1rm$`[$!mm-⁼R nٓS l׳MRv+o} 68B"%E۳ϋHGP[heDDZ!%KGٴ2HU`tdMp+YJ: (!BɲF^%oene6YdQM7TfpJ3`*lδZ-"FsQ\I)0 0R%EݙV9ϴB'UGdx|T݄J"u,&MzG /yRHLPA5>WY )ΓΎNN_WUn_W=^W'_GWgj_CmO琢ܾZU<>ȩ[ng$)r;gr{[uDrh!}i̵S-CTp5 7._#6t9ب%=B&K#Rq20 ѐH^pkBV C ZdZ?}Ly R`\ES{d17ٯۄ-EC&~^Hluy 7fQB :5#+S ލ?`<* pr4o?{aj Oqf?{IǃYd>h8@KHdv.HҌ v"|ct/動fAQHCU=5˗@Y @ ;Vu5B.UR5NFBfWP*NЋ'@+@HH@HEp@G`υTv*!R80 ᵈqԗU 3V9.M 󏝽(edZk2gIKFA3Ceg! @N % X\( d_e8>>8>:>8)RaȬ0WB^h.4$jm&ԙ[I;S4) @T33;S RAgMUf5U&NCjo$ŋTX%Vг6:Eՙ_템_ANdN?yxn>sHD"`tG^-D*!5i"Dj5a77bVb׀T]gg. Ra`RA3eK0;m=/lTv|۲BqHnэtw@˕OCi5TXlX==;R;=?ۛIk\st*nc+XT1d3/bWU9(K0RSWh-Ֆ_c6B)B07$85[X~\fU æ n0=QRXYہ2 æ AU׆29;:]$Y;H#pƯyf]Hd幓,>͗ +S8.nr4Ϟ󪔿*JR*JF=*pW>vF}Q&(P>,}C֧J\.KJ\&O*ۉ oQm盔^i٦+uR>%'cU̳rǕ?]إ+x\4e8E쐡r,8 sTm އwu},Or 6Gݯ"/$ k+K+%U$IDR'oF>rBPCJ5R!d% {4uAMҼ!z7U_w!6u @k)-GJOGw8./!{|^aH8IU:ʐE%"!ʤ4 $  XkOJ/t=,WPTt2Hod2 [d UBΤTbB\ͲgP0yD<7Uy# Hd!F֬g\Ӡۭn1_5Y>aGGGWgM930'^gSS=̦H*2t(u,pENQba,i,' U9gV{ ( 8xi X^zN TRlb%|П]fRik"[w}Dd>1C:Ek6fm".B2t\=6z`hgVDba5PI".d$&j)-ڍf&PI;hqED wQ1tUx8gy.0֝b@*֝!/(DAP>-܂c}!c)b/Ss>؇iΐ؇+')5Jߕ( \9.BODo}`% *ew(FY<^\תgW'J}qCW ?)6w>޼8l#50o&ԷGSG #IAh+egˈ{Mz e\*$gD@Aa<"t S<-tHs(,)fJw `S8*˳g8SںζcD\rdb ؔL0e/t9a ZTLY|*n[]); &~l*o~x:B3 )`_Ta]kD c %Ij %B#^_Լ vٵ8"`* Ak۷^J)--=ܢ-VWnG]ߌ|/>{lxL?|%M퐉dNj6]CPHZ'ҿdVĺ(1U<*\(YpS:%~MJ4鴄0e &,T*}$--(]cjU5d-(wJC-CI?:+vtTeUTE^EwjXI.{A"36Jxw;v'}"9S{[`DNC!X&\"BҒfF̮ͨVIZ7A$IuX_lO$2BIj`kdASydj6&cؘdN _Ez`݇I Z5IS? H&ςQDI9mzB;QrIϊ؉F+JιBXN)JH$~g5MUR&Ψk3*]xY)fTy=H^6`vyl]?6m}mSfn(b IK}MjJ._m{PNY}ötg^\]iƱX3/ԌU3jj?C3!GMejvN5Pl}$3?$Kz&Pr7Q%B2cIVu&Tj1"I-qf,lҝ14͵".Epuɖ2nSci_Up"\a*H@Ժ~5akuprxvu~ώ/~ Xާ׏]_}"bz\v_qтBQ!􊰍.0V+lg'~=N%:3f%13H]+ m`v3M[N NgZDs`P9]&9k% = րO&ZBg]:heg#t[yvUCT`L7>޹9ֻjR`(tkڷӐltpuHS#[R ViqҦOɓv   %b)CI"'^J M @zA${@+K'H {ҙGZ<2VU=Bb,N=c! )(|#X;e6lЂ'ʪlԒPMGdXYF+ \ӕ!!ؚWOe@ d~ĤQ+=EtZ)i zdNF\0ߨ("-@JF2 HVX_^`d&)ɑUQWp b"#EO * JfRozN5Dëkxwx|L־Ds$n0eˢg4硲5,;6ܬWfHyDsNsFfqst3#S0'jI Hu`R19QJ-#6 kHzE櫒r|ښ :!\A-RyZ^EZ-}N_BV%Ge $\&2Ρz ]aCb>ìjg_[p) SwɌ2M$U+Z_+CQ+4 m) ϙ>):mLM}O?Kub=d}gGWG }~s훻X#ϟVcoܼ}sMx}xtͳuUSJ=xXghBA蜯1B'N2Ep<{yKݷ%f.Lz=eB и. lIsCWAm!_?LZʼn:8)*$0ac8 U3/2E .7QM@->59UCIA`V-B$q0hmtXt_yRX]o%~\8 4ahb,UQ(5E')D(.3<._}@Sjҹ!mLgb<@  @0]h0_UY2E-)KRk@'T%iuXLނöjYm 5fgRxT<]o*=DM!ru-\~XɱnP(*Z\g7=$UfxN/N/NO/waן?(7S'Pv?~au-^?~`77>};Oxum-z>5!gkMiIr '5}EQǕ_ %9Es`,htd|Ԕv?GU a( 5[e!/c--c.F}/NH.Um`C<kfSȋmEx<U<%+K0XVh Y$f#ˍP>yǞlzڍ01\^ d\pUM%[gAÐ`р.NQ5Ouڑ+ iSN 8ΑhQ󲔦lnҴZ_8a݆zezA8Y65H~q NdQIP-9pBdhU s0NNF8I& 'َQғnBVįѽ-S7a+)JC5B7 ͚E!lL$@8YV8V'qZc&ծG8J 'Kh2cHzӬ^brvuyq~pzvpt|y7Lؔ+U]XWJndiŠ(R nj>~TZ03.l:L*G-pj&EN@xabݰ0SQ[IwP  |-Uy>CϚ)-}ds O`@ޣX;R/3@dEjJz-uLx$X&GQ^IES"14ΐZA+i HpSCtiZSѡci创}[+9gF95%;dژ}[7bjhqoJC]EF r3c˷-H%[мӼrmު l)`각3DdNU ϔ-KOXwlO,O@csˢ-BA͒ FnjV]Qź A ðRYY=f\l!MCHEtL$CZ2u2 W}%X.%xMk\.䪤|!Bhj9ڹQOi98IѹHiyjK"DYTӃӋÃ󳋳`_}zokĿ}>/KT7k5sWo?~i jk]՛(B'KqMvaIl+Uo,>5ӗ Lz+=?1oZGKz4Ԭ}r̦F}ۍ1Ss@yKOAr܏cG o*03Vrru9m"滜a4~?ukI, .gF_nG㋶`[LW]3 hFT8L=P(!6 D9?5]\Kꌆ9ǂݍ'Qh9`!mAz*)g[6!d+9-6̀=mjTW=< Q^7C7ñu _$h| H}t&Qr76S[RiX&n-C{mՊXע0l\:eʢ>@Q e: UP`4>SI0xwlXZxZh|4iҦsoZvs5d$ C=Y8נ._*DMr) Y 8zyMhw@e&lKs׭3`GTPk=(2𔘲9svI-8lbD]'j6}m;d\ ?n p4@^A1vt%Ʀ (ym>95.hL{a|><\P* =^){;0y)BUiWW|kVjreL&-,#dB}*n24_FH8MՎɾ2 Xh:*P:,+QW\Ԝ:kW\ZdsPV!U%T*CvE6cJ%sč|E%n׶%[q"|ťE6 `1Oti`Tqc:bL <ɲ[-u.nlL=&snbpuv I5X#, t'I"UߺF"6ns{#oSˈxnQ"h(Ly7C7,Nr…9`NGx0[i"hN.@Z3h,!U耐̟*A7k%[G3 ;ӧ~Z0b@jWź\}zN_Zڲ;]Kʬ ;m#[\x-> Ek 8`S Y:.iCXrfm>82/ wWXaTv:{iJm ! ipAb d:@.8B7ɫ`Bcu1jܨ@a]vj}~t"eA0®i >-k01sRvLÁ .nXK!{)źW̻U`ED8a{ĊGDDi)tmEҌp ڗt+SevQ.uѳe <g}[ QT5WuКeqFL-ɸĸ@ N;?9;<;<:8>v>ٛOo7|۵!j6?w{u~q#mGԳsp8T-s(*?0}Mp̿X9c1/F??^uzpZP׶)Ӧ %u=ߪ{FU}ʣh#Vs'{@3L{ԃ=ϡzd/r^S%rzcp3u>)Iuon?|$~ۗ|#=r^Wִ!= QHk호 0=#n/.zR)穞~Q;<=;<8><=8=:`۷+Kzm Pm/; _l^]B@6͈`Gʬ޸@1FX=ۿhk: %$'sz >ozJx4CM"֒{nɮWSe YÖ+YE \u1ĨDu{ FnmpSy:=/Oƃd`ԯl}'m}rN!u=߼PS]5G!c=# a٭.65Fz@Noqʽ"tշ i0EKف HΕZC0!z.#갶Ic wIP9& ;Jp&>dk1x,+Ll=IU嬬PI ]EFZ{l].UKĤiM*MIsX~%O}ȼ@jU.ks)/*0o/ J:)m]/=.REI& XB001"STJ'~@ K[A8i 1Ӄ{Rg{벼}c,hؒ[.XoA,+CExKO45:]a21P|=ё`b#z禦O -Ʈz#_$!SKQ P,^۬O(m7-BsO7=,m7UGqtæݴ}++m7QjqtH=ƹEMS ¯6HřF>ؚ7MߺH!5a#@%@t N>0rQa: qypTQHQbHu)S_ᥝWfQN?SvzI~!F(x n@ 62]oݣtK NXϡJe@5DXWGE"ZRv !v^5 +Rt@~oUjl 2{lZ6V~yU-7k4T~#*vD6c$= S"5ěR^N:=WHu"y!78AIG' ԇk=OLtu*hȠIZ;J9n_ps'Eg '2$v'uEMP"D$ce7)>\Ɣ~sȧˣ˳\nZ/+%\)v,i{Fuα-4+T{էJ4(?hi^_V%M&R)$ea,l*$'` &)lTKmZd'J]  )0<-.O KqN/V+ǒ D=6PklK]cXc* / yڛ4clP̐ .[x=$Օ.-p"@ݠi&wT;NL'側MXkKt賤Z3Ј{qQt <41gt%'0@\GRRuK9.nn>D4S lo*1+:JIА8w;ctC\ݡ9Z_~~K:4G !](A)X8gk4ߓrbROkŴ\VPA Pd&mN,,j; t@Z`oi 0]U=>*ѕm;iˡL53OzFDqԐR\ϻ6myurprzyZIg̦sv3?6nVoUƾz nt1"wmYd3Sm1A3f`h^-,hƜD~zi>fLk6pzMrHXyvߌI26ViSEnSoz͘zSmzK+i^2͘!q3 mzKP0V?3͘>Uӳ˃|t'r}ǽ NA9Jy*gT:7#&$w1/ .cd4^!GĔ*aǫG%zLYy s:GlrچOLB5Ys\U릿ɈlOO3n(ZNiŸCьڴ8ԐIڹ)K.ä.ѩPG<Kcehֵ*\:_G. 95t7:Hދktx8'GצFmdJ\O[oe ꍾ0L, Zji&J FiKP6`5:C "J1PnkfJj z5NPKײ@p:_!z-rJA?_˹]qG[kLUjfQ(cCݶ&W}E8] VQ7_c ju6(6ɩ?םS6$ G41E\hfqMh)lHZYɈ -)=[+bS- fOm=zB*S4Cg)ÁNkp9lOE9_1kMΖ$g!JјR%D9eS,Tds]m7]JIH-"Gt@N_P[:{Q(  %m퓄 &l,%X{k3vB,l]6 wHun/Z`CPwqjf8q+baal^np^, T횰M[8LnBϻŇ^gX),TW֊Րv뢭(D$4ҧH+p0#6\|mAc[ъF5'huR,{]'ʙX q'WeœQ,J l@Z|~"iv4xݠ>LZ PpT5TTɦF,S矘G OYpVVVw̄k[lG2vu'XK+Y7P'sc@IaN9 5z$f,-Qmdt˓:ds8_,J7g>m{? O?i+QGom Ro_]J=.!%JMosj5 *b1Y rjB3s:s3R}Og6B*`ZDAiN @B6 ,&:OՔʯYvR滑t&!-l m]`@VVnzLX.0)%BiM5Qb KN6N|"JJ o >_Y"ףDX%QD˲?ONot"l.DJ>U%|LMR*ژyp[8锤_v)rVxRvg?H 1*]%9e5A^G'"Ef(3k-L;5 U ? &ꉎx&hOhji_љ[M0^s ڞ&Y=C <7 Oi:}ja Nl)Mp^$6m "ƒO5տXߥ& VQS7:` h0۳&X gjaCMк hutI}s4#I,Xe9-@@RÎJNSSG''K1A>&x5Ȯ"^ NUDSe&Sp7 ֥ C?f/{+?Qk7 , 2؃FTAf_gq.g.\ 3ExnUu{Dd2!rfȅk)ӟajč\(1'qcUcH%(j,385$NeLb٫(sVGpE2²6t4!gPb%,2\j}ή ]J}wx|$9nTIbӱK9cI/d2v=(t u?'wD&/^9AkT"Hl+틥M6gj,WYâI [F.[@Pg*>ŬvpaQT6\I>Sۆ;Mw-T1z|16n9唘ysE(5>r]\Ia-ɫ"&4j)mp(?XZ0c(-GORbIj~ArJ%4**ܓKISU 54F׺KŦ)ߦߐSI2鐭=[TTo=&QgVҺQqzM%mTҪ@@I71X.J<SIc`vĶĮ0XbywlTFN%;nP.%<$ JbSICk[V1c fsT)ç6l*iCW0SJ$2J6§ܲhIQ(;⤤TG0I ,94INjB?F!e4#ޗk)AuKy)yKiأj 3N5[J&53 . QWH_[:ZJt/ 1xKi1 *?zPS8Y孺SNkWg$)RJK֬HbD4\ؠ&u tO)F2:氮!Nn}rV8: }V Qx'LZd> \`*WPBtFfcǸ 0O g 9!2t ,kpm賅CmrsV^Yb7eFSUJN˰" bseˡ1T\i)"I}n6-DHM>=Ai>`)H Z .v`4׻;]\4#`vMMb'b wagWW҆`2Tt=|b0=+ؓ A! nϐ;U9'9*MisgCDic6{TX80Mt~ x9]Oݕ\➈NUAXf`"|7'&üDtl+Ne6t ErKMT Mzm֋CvxNHØipYHg4f`woO!u++ZEtl( %/+>L=D$;)t6ЎuQ(tN&KV.stс"h0G1}0E Ĕ55"HL砛+)rHjǛ䯪ERwӛ 911Ci}n]7Hg$yqur|qp||y_In_ @Y*M/lރCHN)A0DADͯ Iˤ׭e2zk&LW9,xS^h\a*WneJ,ɘ QlKg)/J_K(79P3za#l$عK eZTg(AWLMI.Nڍ^v|KS>4_(z IN~bFWZO.OF1lBn.<kwu{ާwkR'^ZOop{?}s_8?kjBv\Pa:ćVؤv[ rx& Z{D<ō<56m.D{DTтPm-WQmF@Ҵ3ǢkgGSчaC!:0C4hvǴ"ĉѴfQk2pۢ7tißr !lw bRASP7!A/GZ >$zlO6 4خ٬Nvߕ@`dXoᴎ' >DP2AT9pGMh re8s5 jcUZ=bD&Ch<_a P\GhY)PvܟJ+q)Z:E7G &F$'h =u >1n`Jv)z- AxV˚)cԂ8žsDFk?֥'#Q[hѢHZ&X9#D?КdL#( Y6Ajj4XTi Txン˃ӯ(~o++}ͻ4quO/ 5 %_8kAs滼dTADդ?k((mKPюGZ*1a>4 JkZ{[ޖD(LoĦ'`I(EȫؘD)RJ8T}c'EzqhuY.,$S^7SvSfgEѿ8X>Slr8MҔr2Uє3a-6ܥAzdOKYp8~r?uUĎˡmAYQ~TyY[L[/J8?(8Z: R՘j7~ AvTC&SPd W~3'QRS%=)4&i Tj?J Z}Hy)1T EÐ~B\NNϿZֳOp66u}/>޼xo]?㧛OʃM̞6r<%)4!ĝ+-vjm8l.AЀ;z`/piGB-p" n+l"dN 0UK#֮qLt9X˅FU!IVwC"4>I&C5xؘJke+B*Րbz}#·A旕SF5ΩX)-RT7lP/SX ԳRԮB.X$36!ԜPyv¢Fa *{+\AF?U}/k"M7jf@{Z ]ay臌㵇qE,ỦK:ӇNy ^5hvȮ$ J1Y0Rg $鴄}%TXѝ 1 f鏀3ܭk<=;<:8><wo>3,WlOD8xgT4˄J`ɞɇhEN*Ddw",.hK5wgvi ?]u&)o#th'> >쉩3ygEO3G,5k|ϓ@mQTzg&Na%mɱVh`o45'B7bs !dc E^'n#)jx[fjVVF iL≮2C2cʟXf 0g[bV<3:L=MB }L_2;ˌ_/jŒHAٓ'k@kh<ć?asN82l3KD B B:-(4%..S}`*jpA4@iSYz 38_|Ѝ @&@ fv5hzGUz=dn#$Sõ/-&r š &"*JC425(11FdW҈6r  TzZE9^SheY)PgǤ٫xL&\I0Vr}qփ>0Qyncb"V6@eQ Fwv I01W)( Uyxk؝Uttyyqx_ʟ^wwf#k?o޾{־<o]T&T54-k 2^M Rݳ!IJMްrBEcM󪛔 qatYѕ%uO8PMP s32aA9OQ:1欦b2a=v~T5#fko}LպEg0SiXv]]{ACt"OS)ڗaUrLug9pp #z6MgH$0,*a6%ݨ=.!˨Od2yEɖ -Vkف=kqAQK\[HmxP3dC%`ydU#'a@_ RkN'cDBڄg"(C1CW ?Z 5P/Inc]̫\}64;dFڛ(xHaXSyV&eIcZ7?%rw7d|T.-J-ljN6Qh9NGCGqU{7J9Qh9H(yO-&J-ifzS9s=^-185185o`.z\Nwq^cqَ_֙8*Qz쬿&Tf*RvL=.4ؽ*dˀGڋ@)Gsfcqj3WKq N< +RsCG,XB/ V`Rt]dEoؐs/gμekspps:Ǔ<S]]x4f˯m]=dF2M)2Z@`ƵsNbI\bŨ-FDIipJypo!܏Z'4[$lcttI"u xl/6< VrbyKZ( WGD~&FRYsy| M :L؞FG K H 8B54dbcw=E?Ϗl,BRW(N{`%bmՊ_̘4xen~{Q (cx{kA(gbxOϦ\O p25R?$Cy?u\x24 bx9ZAo`n={<F,pδ 2lyS@"-.QVl k6q VS+=d^m,M56]2y=d6gi_[c0o,h1ijf:` z'SW#'Ɋ)RV"QL(4Y< +`O((005Y1 'W۳+&W1)8rKzMIosvÐ NFx ,\H`LA$pK@~akژxg!Q$ZcMv'qѳ/엟wi{uyOg; KOreiDt#T,X&ҧdD&Jd2S6 wD/}ͻ__6ځgNz#ڹDfSTB53&Ű~]F qXzS)IRdI#Mb=bb0;UD4gVɮ[NKo0DU O}.89Hb"b2US1uy¥V9_~!.1,)Ls:S*PĞR`ݼ!f+(VQ-haz&>ؖ4(uG+ĥ.axMk0;jPNVl2Q@Q(W^:um%CJ^ L}LSK\|janLZA͞Z",H`St5%P G*YG$ XTGg q!Z3 U A^ƪ?yIpk宺]b"gԮwЛ9~-#NsV1i;KLy)?Tqk1f=m EdI6t`2O(FQCT]*n.ݝA;rE{0MgP$u{½H0IB8!Ta}K rySvXCFsrφ"[b3iLV]"m<3 t,rUKyU7D[+SxB';LjU-TT0腂\LYR%95 dw1;ߍ!i:$%CSYIu'vRz7Yj@O{Y!gC2`(bTQj y6JMo9JD"+pdPfsrdQ A"{cM^2;Cč4m"ƥK-2ElRʍ"e}:=$K Gx|x=eQ|*bt_gt`+^Z2HUf6,|ASy&yJ>z¡Oh擜%޿s£:/ۯ~ݷoaތ޼b|q}~wV[}VQt*̖x.YjIHTwv+(*/JY\[ u$ʃWAH6:H'WSMHp љoW9~nijBٍ7:ł8.ɺAamT: wnsR3HHff_C94˘=D&o(15-wo\I~QGugm>L SaԄUM]C z"DMUZ}^>S듩0U N}!/U0ÚqRc%fz`f3eƖ-j]$|o6Mՙ-T{g ŜKʿ\z e;,Tm2}̪V]ߜ]ܿ S7*߼2]t2] < KL˩RJ@*΂%iZXxWI#‹I_|tUQcZ! =hWIơQnig%}1eV“M 4UxOjea= А:V,9&+*H^kԟ0v,ri,S̭<=)yZ *ORZO=څd,NHm-OrG<ªb˩V Bw@PV˓)_tRJ$!uD˓G^!O*O"Xk?$6cI)-W8X$Fc(I\6ITTk;TT`iNY(d/HcOYݫmKFy o`jm$ΙFy)ᄔ مX H.Y0:aA!UW2< q%R:R|5I]ly85yAy0X0@̓saF#Daɗz3 o]mQC~YS..w.oϯN/7~[f7ş}MSY?޼e1Nmxjs6I{bڥPNns6DM#\5:g淪9cOELMj@DE7 Y᝙ޡ-VjqQ1(Cv`W~$ZY-}SlL(ǔ;el|;;N99ZTf"99A|{N: C>a[ܳ (eCÜѡ5LRZjEpJyzjU??JuX}ʄ}C1Pg2ޞѨx-6|Ġ#%b7AM TcS㠀9{LNK*&^|*WJ]%a iJ380(hWtv"ԌrIj5RCk Sq<%w$xam=PLZ G{CJIdWS$9: u[RR۝҇XGꑔ`f3y`4{rxSXJuɬbGAx^ $bZ}`iIj4=y'r AR Qޗ,ܒPImŔ>`iT0yJ/>ZRfd6W1UĀuK23Lt;UZة|wu)_]^\_/~v?ZN*^hd Z}psVKNxk AƐjW,~Q p]%4`IL"V/dӺޖM5Ÿt7Z65Y+Ѻ ;زiqf6F˦pJw#k4Z5TIwB`{gkD{B$2,Q .֍"xn5ִ<9Ҧ:hTU]t ɅԆuFcjwvAhJHQQF݉乞.GBNiy4A=fkމ]?Pl2nrgb3XL,&?˟OJ,_>Un._m ʳ7߿?yGn~;D.1WfBDٓ1VBbך/˟`huxHiA0 rXS r&c `H2QUnp? <ͤق3[SOt u N}z #a] BvOKFFvՔ!vqԉ94Ch֨'/Bv8Ih॓+KhCKԲږX2T4Dtm9;"fGx43rT[Z 2uNQ^3FyYFY &:۪24_l&ę6cɂYi:Ojr57$ֱ T*Ny3~`zo }ђӾC/{Fq4e^@eWz q54WO#>^ ;Li0x[44RmѺH# W$TpУ2[QKURDgf= ?6Ct1J%F VȎ!I'Ւ7*4R͓> ߮b#*C岗F ZS5e]V:OBzw|Qa[bxJ׀Pk  Lwb_r""E!0UϬ\Riw@G+ޚ'Rzv>>dKۀ|Xdu~k늷Mf(ӈZ Oa888xu7ң*w?~׷5K鯈#q Bۅv$uKB4WH<,^X,֞[h? ȵGYt!%>:#GGN}VpL}tG7-6S2z6@dªZ d^b"*1vwy61Rv U׬4_&E&{v@ #DNwu뀉ib۠uӥSgVN!:ϳ@T73j``[*n7*:ieہhe)V*-IOhf \3ri*4@?d;ϩΑc KFZm Dn>&r+4ƚ1[(Dn0޵꽩1&Sf ՘5s3d'vFn-eX X@SK90)^ȡa(W(f [@=XJX⃇L/3i8B[zywno?|V@?+ g㫋'zbp凧]_oz:囇g_BNlo΃o"UWnҼ@ed̅ 6 Oj/une*ዒm2X>LgWL24Md1qQ:\°rVv붊_ Ӈg18 kIdqaOn.Z\ f:#tHa\leǵ̹+/OjbkrقnjPbo1(cZu.R.aej.D/rL?'D~:_ c:)@UDK05䉞JD8]J64rNBem))B]#!'jZNUNd;TD W<_ 9f-Ihh0 9XٜJYuԡRP"4EhD Q`6! @`Zj&+kqP3R œII0E0lZWriئzSZ uKfM/$;u:ZXXc~p *wHSaD{sCו *w` __\]ߞ]ܟ~R+ʯ۽/7n|AVFFP4'jbT{C A=IbgS@i#KT W# X @]QTXmf}X5"NS].0J(= >TDiEnTDۨѣ5@Px}"6 *Xj·w.Ni>0?^E-Aa?|lb! LCݹ > V9׊Lǫ@Z\Y3IZUɴP0o|J_4ķzIUd#"SEeJG𔟛~ ZTyy zPŴ׷]1(}  Xҭ9 @+ 5@&6p⎄3!lfLr/z7/dU JFȯ< :K$-a{G{ ( Qbh/Y60rϷY~[FB50κcKavUHp:  zN)fxE%TxM@W#)m&~ۣL S+3i,37?J2X0>YG']&'Hp~`z=F+f2#宽3M@dK@`lӯb{`ҏz{`6n ]q6]`&uI=SHccB\U: ,abĪ;zz\'UQ+ºAJT֩ro0笊%v5CvGei:4Ikob+|Uwt{c*2ߚ|vmr :LʋO(IPaYDP83':XY\&L|Z)ٙeRᰔNoŶ‚lhg%eA}MRb(aMcjI>d-;J`ʐ~V?x?ۍt꛲3k@`Gt|AAu-%0n:bm7`RȀGY/Pe^zuSvRU/X Rݪu-qo>ފdUuu!`E|̸  &+eB<Ѩ+ڨ[>1xYV岘#FZ#CXT$*ITPJ0JuCiV!_SC'A1*.U)ׁ tgbH!b(U^Ԋ[WB6G6k,UsO^\^]_EntY<.jF!R 8HhFa ƪmEͽ"Ղ[05c~ZglDvEyւa[dj 'jTfqi[0cv'j҂K=5m`@>٭v s^VEm{Wf{-DM55ק~Ђтk56%r3Qӏ>YۇOG.l-=Lm#;QҙjVZVVhZвLlv̊>W/a* Dp2DX)1DX) O \M(HT虅c)C؈:D kpO7_`0A{h=P)6[VmЦ7llٻ/k7[ۥ(P\ "C9KUS˅]vrw̸.d{c=1?dζC_ߨw7gwgzfZߟ`=?A\8X8`%/8Ķaz|&=czs_A_ǡTZZKuOՎ գZLPZ>Dyt' (Dr-#xŘ1'ft'S.Ȓ)W L'XRȉX(ըͱT#뭭wWWgwWg.6n~o޿$5[1}DT 5Qe;Vwl )ŻOe/4UV4$!TP,D]~Z]+c䈽 }:_X=qV*`t}VN2&>rVX]OW߭|cD;?(%<JҴL@! NlzwpW2-@lP)Ks>tq3y-l+DG"*H koYFO2KY)d)T)P'ag0V:bj3y a܁8a[M{ec[JarJQI\s)+&IGGm-|(H$m^aNW7#2#+D n;fEI+<7tR/Kh֋!⌿62'2``(NxSUREv~CҶʽv̙^*^d{u ¿V~=n*ښD@áC^TPA# bLUEWQf9Ta?IAVV1*"%l" ";GD* b.- bET+O-r*kI GAU{l+q qCZK>T[HBA,^Rvr Jk3BDUA vXO a O^Tc݅VW0aZSnrVzOA,(^ (06ɶHoŶh{ b~8BEEa"XO b bi| vٛas6 iϣ b|-RCrUXy-Rr$K?a !n!n&pTA wJoq#Cl J+*H]iiuXl;S bl-EʁHޭUA<ةg7WہiK6|**"|xW_=/ǿ|O߾x՛o߼Gw_~S/iUn:>jt,DT:ZSl6lթ@ bV.;FgYȤ` 'zc-:}6R7g4#MLT'90i}BP=6P5)!OAY"lV̅$#dI*i$E-@K xZ_z7rl61P(0$[L(|s)|phHKH :'0p fղ9[Cw20õ`DcOLs i2.!k2(#bIw*rUYdKShK52ҖҜuiB||f#}:WG^%MC8jMmRI{5*@wRܯ[EӒ>P "cNXH9F! &*ԥ0Juڭ}5'礉[(Yu[9ZD躐zP~bB[LK W3$""rC ֦q({ױálT }Y7,Xҝ")?4,ʶqiEBnA 7|590*i#!l jKql~Λ:ιM GSkjՔ)!6O-UþU O{w}z|Q>A'Cڎ\,T+3B̢25W]`Ғ ~ PYx/-*5 tL\1w5c"UsJUxUM$_u}ݱMoADWk,оkF|=YT09eC2vL,5$Z]\𮀈F*"YR5:S#SiCR \RєMA0ׁ|\<hiЊx5ڊx)ՓQcb\";*:E<"MṠxMv%0V)O ʝJS `B|m)@҆bzUL[Oh%mD{!!T' 7H3{DY`1f*Hi ѩ.EA I2+%gi 4BʑҒV)\7a:]6FE<"I|2͞KIÏL|٢GSLq&A+ Xe7 `X/V%KP|ϡnF BC)\mr.(G헿\P=nDSl 1Pms.(eQqq\{otP@neLM.(#?\O4wah򘆶wZU1~An*aX0E9Ғҍ8[i~2hӰ{ջ [QM!=*2\B޽ ;_zvLêw/bbL0~MhTaz 渚P+ibL4'26V3 aQ(bwM_ȞNmXdpT[rMi{)mLJ EvZZ~B(py[Ql:ZY}&[e*X_M4eNiЂ;Us d%ȇUJ1(04vr 3ʶom>)c*k~>@o e[<](KA9N%oT769TwVO׺\`3egU(ۗg7g^ *l*ہ-3恲vl?; byU٤~rolnƆ֮XhAݗlKP0V^1@C=V[8c#{c4J&yIَ-Fք&lxlozcSnL~B6lS (ۘdzc2'ӥmdUֆ/*۶lCq)lS ظ S#I+UՀ^Fu_ܓyFrށzdK5YjV-6xW i/ ~fEI J we;ǚVDcڴ[&;Oeێ6ի q*1mNxI׃U3 u/Oe޸9M, o+kLJenIdmXYۏY#d,k?EY#kP־9{uvqsi/߼?_7_WU߶=xGGz6ejQ=!j+W*2oX;/c3:# ~mVd-UM@)?*.*4|FlL6 nuam[{k"-էcZ+~7XN_ "{q@˄&sKZDvH.d!Pm[- smpmwD!M5O% 0YL-5R!Ra-8\hQ>0kmL85J#G+I֑UU X)ElQ;Pޢv0_E%Qej-xʈP31Ciybp}?8xtxӴ~m;l?|tdSqraі}_Nxk޿g͎6?o?yoyhsWxhڶ~8LgAGh8޿~h_hk`wx∎m1N|y}ķn:r{ Ʌ [wG2aG[lyJִxowHѦgF`h y%"o}wtR9+b#gb 3Z". N7}3y6;b?N͕U+ipIb3ei_y,+ys}qvq3/tc !36xEzo"1dw*}BPÄflf fHCO@V-yfWnh>ע5˫h~ ƴ@CrDTs֙b) ɛ/k}4{=)oP Nd3T#z|ge:Ys3Ň ],b33u[+[\Z>M_F4 DU4J{l%G92/4;+%yFf&Ϧ"'!-4;uR-a 0&2=3{ԧϩ]UHUݢMkIMfWy--)R1֝Z=pdOqh:nRA6'3͠wxA,m=_VZ!la8亂.A S ީrə_ƢU uҢ%^ZfGE IG&b6$d'I#uzV4ȪG]] c1&ѓ2LDBLhv,\F&I0JXMD_A-hWחwgWgw@ض\UN*ɝC}<eO!886%lPrlLc5Dx%j7"9K9!FD U6IN- iАaֻgѻBF SনCgSs >({=VKRB7D4Ḑ^L׾M6݇, LW Tכ0Է%١\,.OAg$zԡ'XZMZwN ܒKESKڎ)BO},RTfjdkdXUgZ͏[/ʡk '~@AW|>Tnb+X H?8Qە 0}V1kwOndA`QdNAcaŒ 6Of.M(u@rGCk#`౪~-7k>.B6q]=0 C7\5&%do ՜-?NTӇM_Cٱd~Sf+u!mHoϮonnoF1 CF#nao&Ā۞]_c9Egm2qoZw[軵%x΀=jnM1J; 2Q{%=gw]+/9ߡcu7}OgEFRHw3ZKI LT/29cp| nwO;M[G&mc٫rX5o7mf]U!e\v\7-[ae0ƧrMm|On>;䳕r|HԪe5e|\ٟL>;Z$)ob1l"Q7צ\&cٰu.bHۼ흷e1{*o91lϞy[ /Fil)UmgӱjHn>9E$Usŭ l`||-1l ؜Ep| %m=Tl>[LYՙrVnxmg g9eφu)JmL>[8Ϧdnzٌ6ާXg|D@j>lc>&=,kh:[5t|vgWXm|L>{uВ#2&}w\N>w@h/@hO ?2۳E|.O`b ]tw(")?n|w7>../x]j$CŎ} =EP6m^R8gvGDnן=Q6/Hm""4yXX{›τyim^ڼ|M.hf+l݁ՆiW'' QljsF6Cv"G3*k6ʢ=)Kf% EP)+dI&~;.5 иf, ҄<'c./yd3: fW,*duXꡣb=qK NQ. 6#UȻMkDuMq.(ȼxˋo|ab uws0jX%r=%ȡ*Re隊ބB֢-ljOH5t/YFt!R&.@աD/r C;=^rK{2yj8 zI7)?}_~7o}x!g ɶdZdҭorܖfU(ݤ1KgrӘHor301 JX=~q5S"nQs`VB+hKoD;jLJS>ΗzDXN t҈E]vj#X#AV R : 4K+-6a&ÓZ2Dq_{F'0f4<9дڄ:C IZQL4焬]rXd<0Jo%*|TɈ`1zFSv NUl+l\=:t^}2`EsU!.eBsԺNhvXA5WS(?to)R|A(ܰɈ|96R[jw"VJgR MKj6/WOn{__?<.oo.k#&`q6-*\`R#h$T'{Eb JTJ:'T!KA/#n8vGPg c2VubB .\ 1 :OH)w45IsrN&1uOܦ)wQ hgpzdA{RIefaM֫:|rHjAF`fom__$ 熂ETf`I1f&kl='=*Um)̜&ka1KAU #9Zji:yJyt0flaHz(msEPPNo~%#{jubn⒄bXOm}Db= 5\Pd:XlɃ ~VɼCucNmbMd`-Q6X\[F"J'C[ԺS\,?]aB:r~m?DTnWtc, @-<'G+~eTvO)_#x˧=!/l SI?޼p~9Ư!`Eb=P}ō4Ʀʜ%@D>l[jPkZ9MXf]15Bj]$Fx]Џ卑ݥ#A#c6U],]rAlwq"MQKvvVK6խ6v]lPIJ 5_1U\Ft\P\8b<NJu!2j~+]~rX1^vƊ 682%K>+җ%JkMȦ*ȠUll*[b~8A݌*2QP܄ѢT F\P^lPQfp`wS"ϔxF$zghY)ƾM5Z=;\Z=Uxjl9_)V\)gDHQ6UNTC6UȩH12o$ j(i4R ls{EH*R# hvy"]Vz\5lkHqߦZHꓦ#wO믪TXUBO#(Tcm{<'pv .$$J>(s+u1 *xiA5SOI*V,,b־W,7'U1{_c{9t/W/yTJy 9K}ww=^ޟ_ޟ;qC~A;˚_~suqwdYnKZCCHzg}0;U Ac:cL2GhA*= &ӱc:a ݎaY EUt,#Lǎ8'&;\zSO z@&Y{c!x'zcm3F6ct,"/8+LV0eC{ ,`0Du΂ m֑CkG@:e d:/mJ*TQFsY8Z{xxue7S/x45 ' #9Kqx>vaSixjz|ٍ_FuH;%o] QƬ L(-!%AوShrHNe`Ȕ4Mƴ)X`z닻x~w?S_{O0߿{N%?߽Mq+6"*cXl ybVoOњo=jvGl{ēK*Z{\N8Txt#oy>qJU[OEy%9›7:RA C: P|uV:LzшmiY)%pLփ,¿Vn#8 BHegJJ iZ%SvT,;cγC_3,(ՂI2ѕ;md町m:~1T>?fxpǔ+> .r){AK"0+# 9I7w39-cgNm)7e)ߦX @6+P⚋)<3˭Bq,3:gB[attѣ\1`܊h-~ḭ̂ZQT Ue$`!Iσ(3JR|E4z5KD*R LI=+ɷ td,Y(H~0 Y95мwG͸G% sb$Z/W(tp Uҹ|{YzfʨZg5)lzMgِ,SKfTL ᚁZlޏ,h"bk|l+ۋ˳ v}CrhG"k`.'T.E\M= , Th人M}r@3PM^UģAs:jupKfM*]5T$r*ԯ 3ӕ`&z,ώ ̌ʾ$bajfNA V3I:fVDZsmu'CYu1A%pkAl"}Y;U*tHڊXJgj/@f*RV4%"e# R'3#^G`t ~u ,Rnfꟿ9WWOPoP9 5=]!\rJӣs}< bh\Athm'ߝZhRl_aZ[q2YmLG SsEs# 2BeĜeBO3̹ o /VG|r B3, _oאNx͜GN2>86gx?8+59'<$Lƾs= CGRK#$1C|[VT,&B1X]Nb.#z5` NDZ!}ҫtuf=ӫeVsZށ{ucohmS9e謦݋MUΓzo^}0`PԫsW Uӫ^]j^ի5' >Cڛ3 \Y{;k+I{_Ǽ!&aa'LZaoSZOhSzSs6`C6@j}'UGddb$ڧ]N+u9 1k oDkPY:壼BT z.8DgMSGZCZQs?$?~wM[>DKs؝jO k.ojgk]IԂ򵵡$Q,y6'//QvXyh!05MHjokX C^3[Isv&"8*"1d:,%m d)=kh&ܨNep6jn.(zwPPeSTi(YpYc8rҽ*Փ _&18D`N!ک &0KxB)d恑] 2I.ꇐNI.i<+5U"lyGo@$;dhTE]1}UU1. 52Mn obIP,Z"&Њ$7X3`2ڢYau2;-qޞ$qkADM-C;IE,1w7QTäs%0Mь֐qzz\-To4 ڥ]-8Ϙ3&[brO`W吉ꢚ|̂)PC[HNPr.AZ[&(9Ab3D0]M'LP.ޅMP>rr <(6A{8hINPZkGmP&(AA i&(_|r !eOP>rc ʛ>}z<5PP>ba7 ,=Рc2\&(]PsP@ {>&/P PNcI\ʇOP.X8(V.Os4b-1CZᗪ۳Wgח6~}hC IBfk].BVFZڵ6.D Ƒr|V*ӸBzщp){#ap͔D&GZ-$R0h濌A9jA4Mw"GFl A? 8KWJW KATcBEI3Py2PCJ zJ7_!J3K{vyUCu=lIBHv@1zPD4*E": f2R,y'"_Y}ֹg3 _ThsO0H=z:V)ܦ:T`k,źłu&cf6s`!_xjr"belJ/m"|%wa?t٣5Mj 5[5T+қwVo|xVT=ǓAH?Z?$ٝpԢDon&c/hVI^rϵ,*wG8}mNsX0LSDjMx&ݿ::vjIoDIǤg3o@|&=HRHf<ѪE\4ըnmyv$oliYÔ4WPHHT‹-״W?h,=a#1?Rh#h&@#d7xL[㽋`Ihc69%4ȧu->m6*hˏVQ؎3Ҿ$)us,\$=aPVA9j8$ kWGZ[A'qRA[[A'qRA[[A-)hw+h㤂|6 k} 8-_= 8-WymTЖ }֊,|u[y)XZc:>}WߎsdFqND^SYq2OӰ_%cU?ϞݷlO¿O?ͨnQ!&oWei- 03&SXϲ(_N&ʅNoԀ0Buz۵ E|j if?pi"z4zRP$hkYM4svA~LJe}]l}.tg$lm2eQbMa.Zޤlue-% 'E"/$:K㊤LT(ณ2tXU-5NoC4"䎐pa.t[tGhC1ѥV)rJy>qc U6#š@v3u两tDFP*04j$Pg ڏ GClw4 AA2 Q 5|Јd.N!(97$ +tYI65$Xѷ /ѐYOY֙J3< ELWvI)tt2O݌*Q:IRbȖ:˅⤏Ŋ鞐æg:-BzhH&ZriHƔ!n[[#LS|zITI.tH`{QxC} Ɍʯ1nv(Nd"A.m01N Sr gb !#d]\?=ot3ׯ++_]6.Ͽ|R483Gmz9fPj ( $wJ:" J m &@c$"y*FmL RA--EX=i}l{qBoBQ`lvvcKabpX'qÆ8il `H5BTӫ,=mj412yR*jVR>rSI=:RMбއ BgKIil HZ8?).2K#c %#c*jMZTZUPӶI&d MGZ-=FwP:U dB6&2A2:d)H r]sRy*~dg@g#- aA8A8&@Bdݘ*N#A`[-1ղr{UTIlcKm-1V5aEx3k2v/Ɩ Ł\ΰlWKƖVay-寣?Aݶž6qʓtLy^^^ߜfɋ_8>Hw8wf,Lvn@٪ȼ]n(2ॡf͘ 6AFPDBPu6( ᣥ!<4Ȉ%,७udg4ǐ=΄ )TuOl+B-ƒܥV؀Xf-="H H!B6&wUz?)vn H۝EMUéM 6Rr;%ڍz)$Ҿ s(omfgJHa=L0au ܨ R<( r7YBȗ-#P=Nr O7i#:jȜٶ|z*zs&ol, IK6z(B[~ڝ*\gا,>X!8NnRCށ/݌է?m02v;qHc1c>=U-O4p ZucRhJM+|FբPHVRU++J>j`0 N"|j!fL!5a q},5O"5mQXg84zu9w+dzrss4'{50Ͷ *V9+=c"~UC)cVh^5D 5pfR)Ղj𕭊٫e F\o "`Q݄eaQeW%UH%nVKgf\z|'@)!n$5gU,HulaxҋP!RSU+ EMAIU\$50>H"+XoBy]EQ!ҞAPɶq\w P$0뚠R0̛Z?¥,zp>5o<-u2,B7ji3?D}1O(BVWBV*}wIM뷯˟?޿{WE__~mnN[Ai[hlu]5bicJ+jF ~liEН!.H…nޱfXSBkJlCRApZTћS)q@HY _ºȅ;FZvѠWhoQ(BQeu!$3_)HV4izoiGjEc8d$,0h.)wHE0fW!$Hӧe Y ^X0"wyy f-*;cV!r-\sTWq[c4(`[fZtު(i{Kޒd{콥僼5{oE9mC"D0_.y;oY{Iu5JG xMJg@-gehr#PLߐ&b +Il΄1Rd9E2TfdV.0G>{cB#8y>F#SNU;NImJvtw+{kӏcudTo(E\u"BkH9ЙH*CѾ:ט9<?Y/U콅J.7?>M >; //// {zv͖~ Ko/W46),8xFnis6PLF[ =Fi!w )I($ao #Tٛ M*$ Jk0*&N]r:ѼݘLg+An@ԝpy;e_-eA&z0oM6ؐ5X -zml=D x.0gwiCYq ]u˷qR{>MM獩4Y=0>!EٮWktƜ],E2d@ !9ȕ@w|QI} Fq+[ӯ45?]Mi u=|6,DIdvIOoû7k^zwH?߿Ux~zӷ)V7Έ7 LءS(ɳ)nPJG Y,JEMf&]EՇ?M 8,1?k7w9Uz}? O@{]<'!4+mbxB|*Tm{2j@k ;f-"9q0 G'DL"أ?o(^NϺ HV tJnES=ǬeK{t5$`LY}Zڳ%&/A2!5Ba:Ww9Nr;$Wivr^IntuZR: ʫ2\}gʈ%uF)L4AX;m epwgmHmfjMp.$1e=Rǧ'1ϏMi_pg4UUcvSڗI*3dLnJ20olcI cƤ sT6[QS>9tS:.Iȵ2oL0I~>#i% mTw黼zyf닛Ï/noW_0P5|;#Я<0Z_#xbY1~K=%tMB7/"fd#"Xpq|4v', KRT˶VrIE;{U!9>8 >I7d7>?{@8H ϪJw"`?P:gh،_md^Ѽ -mJrxUMGNEtL⎠vkP_D哨fJ9&UPK@2Rn-R;kGrά3WaԠS=fN)%;YᵥЦek~AJۅn2ŭɺ< |rM)`. "twk7O}%l~1MfCR A5<:T`x% @Զ|E׷O w;0۷xjoou0ƫw޾iğ??¬DƋ|h11@AS}ySٸA-cn /=%RĪpQFcVVz.B(hGB*ƌQn0` 'm0,6PL0ez! 2r,.(i7M F֑ek]q `qGO}D]Ae~iFRT)2E2.SD0,d;GfO*u*ògaw!WZ2M?\pQr0u#rp9%i>-.)XKʅ$P1 = WMCnP o:m|m%\o.;j*xl3 8 HZBZ(J+SBc Y0=NO6#IJ!+AQ8:':h'!z JobYu1W `߲4T}kQ̷DqZ&Nq>Hs\?)0,x'CMڭ\`Ts\UO"@e[Ƃ !pJ)@[`S sX&&@$֩,^\^oU&Hm/ ~ufR -;>O7Ϧr}H׏\="S/&v4uCUfGY(d3Kg8K++{R~ O)g_Kx.T  /DQ AK"豿ggf 9u<+;2ܾɋe%>og/D ;nEWWa D\;>P$xCoTeXcmF(D橢 ZT a{\-[O, WԆ\8jOt M"kQ534Hu⡑P1$e3R %ᡚx]~WPK]7֢¹{lCk"4$)Qd,$sXҡIhl+Z᝸zWOXZW4/'2BNII#PANz:Pj 4nCd@X.>R Ƅ,Q"VǻHM"Ʈd8  ,q._6F#Xf5.v=K١'*/@һ6U;dK8zΡ=U;;^vjPFD4{/JH3 qSgGJ@(ʥY[;P.CY/..^\j_ů~?~}qrJo]` ǯ 2NicT \(arV54&τjhL2B>d",rբ9 =i-fī`qaVֆ7et1!BJa IW*bV5LBEj#>h:olK]ANq HٞPIo#f15KO($;vDh >Cp /!NPG񌮉aGDn:4J;E9.r%(ds6TCwhsTj4/6GthwtMs K/_(+ ߘEM45k5h:$ܧZ1OBT| ${s` ŋDIPb́Թv4DX8F#gun-L @V# ԡGa=NvάsJXOw/ ՗i);>Gue %C 28x;m ڵ:<)ĦYIή>TuPK/RRٮRm$eyMH_춛ͨ ,FNq`^M1C-jo셤魛 FOxl._`J'A,^%*)vLh=.N lR]kXEv4D5!sY(g*liZFXy p <5vC+]J- UP}e%)Ȃ LC=Smh[hm%F9hgnB182#q>UK hw3c+VȊ P>E3=g*5:"Ɔk5bC9:f#P PY$uQp̠Ey؇dx6?TOx*Gz 8 JAzWZL>rƼ-cw6:uyzAp~u駿Ӌۯ8AU8A6'(RBSBTĊNQ RgngSyR>g)Ӆ 0(uv6 LR` #2 3eBNe-}A [s*¤b8"WUNe79p*ީђjIf7Dԣ7^;e<&7k$GZ\SyMQTyRBpig,ZΩ,l-/wNe&' O;cs*wX 'Ckjz#j}h~ M͉é9 j?5'f)z9*/q6xWE@WjG,` ń`1Ky(D F2 j*IXH޶3ƣKägÎR.8aiUq(ӆDb\dLaC_Lx$:ťRw[&ń^v@t4,}yqvy :W?oz~}92ea0v(v6d[_ڜ]9I vFgehId֒MAT\$9.,E$IEd PE  dp$WN?\⺕TF_6;`ԝrOiQh[ =h w2eR(Mj2%eQp_|6Be8 V69qs2O ˬIȩYgQ-fCD$wU mo(Y[;CіT !??$G_[QeMGyt+OK^ *ήh*f{TC5\[8W/^<1ot?Www1nИ=nB tmj``OjT[9:HuA"8a[f&Qj]d4JqEqm5%gPj-"uxH8l`<ltV3ѓ*l\O`#,FBl څ9Quv'L+KUsH:m*D}+F}1+BxO"'H6gIljB Pakɝ6y] 4JCԡb&ڽ&dkwcR7jxjDEQmS1nLƒݍ)Ib+LM@jbEe oE5RIܧ<"FIX[o!ZWp96Im@E]5fP$=L`Al_cz۳c`>el#;ZR/*ې*d3|"Xb7U2˾~pf!V<Ԏvh2ȝyYx<"r sy}qvy{IfԉD?Wo4$mp{ǧ c *txڑ`h#4U8uJBA+-ȸU9)&pbOsw4{N=bz;hg!YST&+pc.F 6hA+$چguP8M+%`61$7KX&[/ip;MVIwlsJYTdOERQ*me>TUtl%&dXR=pݞ9{ްyfC*н"nD4JPA'Րjt"S6ND[EH.`6`kM63&Ĵ_ y :IN*A\ŝTHwrl/pl`,#*g;ظS[B53ѝYhIh()HE"(C͸)x 1<<&Daad[f&F MM d\ʹy_wvɧӟNT`Hvʥ:})&" %5r)!0JM!\kGlh^#~;UEGQVSTHvcX>'#$#S_S>S0YNP@>ѩ[jARզxe,?fZo|\R?k/Ig  \7t5kJJ4En2CZ S1~\Y mx m@^ .doĨ97A۔?yHxUt˹I`e!,l~ ߋv$wsi""s%:Q1JU_R ܦ+}m-RߛӃ5J @:ǛCt*,2yDfhۍ1m6|2`@MZG*cg!¿8Eׯ?W$//+㧷nZZpjhy li\"bjr h ƘzM@,bZ4.C.&.l=CgLhzpv*SOzQH)!Ak@= ?~ͲC{䕠Hր(!"t֨Ft]ⰱ+AQӟ E><` ".T9qQ~(sF{ˋ]VKmnRB5( bD*mUGmTSՓϜވN!pDE$[npD z V UbgѿXg}Ӿ&N69pLG7 ll)z-)\=˄4i;k*rwӀ)FxĔ)"154NtY;m a7 n 7z Dֺ7LB Ѣ H{8,wbE4+}F@:o99^`2A-qA=%WILc)q Xׇ+ϒl^F |:U7-u \lBҒam)1%u.-u%Zd:mj{>BV{f8nq[o1tGj7wO>{.KUd$YӖf$(R:f5VYW >+(͑-Dcf}FckTՌy*ϓ )PЗ[bwΜzf{= Z{wUτDrByxڴpNk3;ڮCj:L(o @Bgv7X}B!.bq㧘cxq9=*¼jIn2*ՂK&\\W?;:nn..׮/>ܿy^uw,w\?~x~m&^xc&$R6l ᐥPU R$`V{w&V_#!LPtgv\ABI'fBpS٧oY"Sn eNi/;1ٴg[krs>e 5lPA5x7kI;L"ph,}LB Zci4֒mOҭ65V`8йkbsaV},[k,mj,.!c\ yԚ4k,zRwKbHn|۪S!d{POAO1Kafԥlͼ@_d3wOJ4Q4R~!r920Dŗ2q Tt0V!G +J +rh]VxB贰bV|'PJ>V1BPP\j^X5Ny1S1L[0xK(d+@ y', +gu_x&0ñ`փ8XT]Lo.nϮ/nn.w}iMv?|xN_?{Mru}3bf`1fUzčYJNʱ1MixC/\}+"zV;:Ug imO%u#rV8i=EKvj=6zm%@VnXFGLզimbkS U1I:1ފѹT rfk$ek/%%B㲅c]vncAhvz.vkN)Ө`"8?.,& M@">p7$!@C\A qВ+/6<y~De tRFDfUSr!/SrIJ6 +vg>|ԄUI]Ƞ]c(O5|v أZJOkRݴ&G]S$E+ 3]lmH0ZfR+nNOug끓Z= hl**bXڤPLٮ^e+c.4y/#B|PENm#Z񙟺\FT3-Ԏ3 1'[ D%1\0/am#8.1yήϯ\.j[vWS[]hSOug iȥ6[Wtz "_Pe#ωFNT:IC^PjWSǺ8Wy`j42tYM$D!D|Î!R%Zu"A ꣘s0FJpcSzriHxM&od4W<#S'7Azv6;;mLDi>Dئ 5ڥFئܑIlFU%h!=LށCz2y6lF&ˡBu4dd9|bNLDj&CcuBCjxENo&x<6mU`3hPkoAt3,2ȡD:X)=z.G5$OS*;;kt^﵇(|qIʙt\vm[x@ \:Ia`n j5n$EKp v{_ֻFք&En6M *&Œ M]F`7V$`= R{-qaP@:YpOER Kߠ$=F~NS5@zٵOS&oV]}b׿lij4-4UgYXոFꆨAUx,UL kυrζbi$ 1ISbai ̈́ IݗB~-j1-qSN"%C1(V3ij 3-ygj">]]oCePYOFPELpu@OIz+&TI" j *jtرcB5ؽJ/Zn&zU2`}SPP]3j*PM,sBPm;AQR3uВ(ʶZgUB E[c<sJThT{y!Û_7{S0mlf93׮p[ٰDal˦X'U{0`YA-P\Y J "*5Y{4N7mojO7 7-WЄ 1mS'dl@m?hZM1XmM{:?)/+ZA$jiE -tfrA9R{wV?Q9$-7l7#Ԍ1*?lOhZ17-ƛzh)#hW}j)^ N PdSwQ 㮬!F1@˱Cnx%+]CB:&ؗϡa@MnBbTK Ԧe.%޸ot4Sԩx@ xL}9sI=OƓM*P1ڱ1&[ou`BLKDe"OB0u+TpN@m$/.^^o7/>ՒtrYNȥdO'\J*5B\T"3MҀp#-Ti2!NP3Re)' []MBPle".FV&ſ99*ER_"5d݋ gcWc"6H|5k\n*UQ<^B <{]*GR%ŧ@hY1PoLұuV:7Rڴ-,kyi~4 bM:&# ̺D'ur][16b'nU[}aM_&M:nVaXO9 (fjnPZK{MYt١/bw"%\Y(3Qx(5J}f՚!BI:L˔%h>|Z9$s!P W6 qJ암l ql}Dj0$T 9zU1k Z$7ҙ(Acj疧S}^rX\ԉnR-w]&--~|Փ?x᧏,y}q>ߨ`SxT :7Fh!҈* !QwK&. ^ GH _6I/0Uh0*o>ywJfXP ANг&x^Bia- TN1=2NdmG`텩IE 1j< sQ=eQb_*z(!LV3L`tM~TYS)dR TkjsT| gRS}[Fp!) D.$!jv|95O悐6K۠j+ASy:.OoZtMte0D%n!!Nȁgņxbݔ8;KnCJ 8{(!FTm=AdGw>gwOLjinX}H/#Mؐ,dS ѐ9iO ) c^@!zє)0Bu!SWCrA"3_EC2l>[t*TO`9xJx4RosxΑY;wqw{}vt4)J# FkKr]ZR\`aÒʄOS^IMqR)Ê!dLo~6)ξ@8:X>|`<嚄)F\O_NIq9)KqtR\<䚜RE8;! $UR\~g;)NɊ2e :)Ο5Rg'ZG'8^)r5L^\=/Wa <9O0Gh<' ,W@(A9PT!ҹ̡DGMJeUbG1mooMIIs)D- GnJۻ)nJ{\rlE;PxM91zJYoܚa+,&z|"JF 0-= b V! PX+Y7% ;b$+pC _Yv<&қI(GYM&.N44Gt7erS>UmNu48[9uaO(R#9BڜvǜF"y R.u1g^;5Uv4%&Jƭf昍dL)[B/Vg(EBg7g;oǏ$y>D?Cs9U+T(:⨼1f< 6DCQբM \p/AلBIi*(M8S;f4YwL /ӂA SjmZ7؊.7]AFKt{hL$P[f3&o^s=-!AYoٲqPNȰP,LHsgt`+S$ˌ=do7@օA%AǏG RZ3:jz ol QrXtx?Nldj3tY,cӎaO11Ƒ $ $iڱ 6lb(A:bwFqe$ε:?(N$au74>/Y;b,]wQ``'aT~|(S3!b)8L=Ҋ?ɣYLJgI}u?ۛ7Ow_ S)8]%/`0u-RhI癃AFJӹuHd^o2oP泵A(*qIrcʬ'g==XEbM_|Sݶ LVJhqo`A~`ACZa)wɥF Ӂ0m c!3`%RWkvaHirBL5bUĨ6 #PXTj+5knH_]\ݫao_>wXO_zӧoߜ99w5/C(q]) `s"eRZAo\bFѪקG2(Q\J[.yz͞9QC*AbB8ה16'92ڞuQ&?J]";C3*PbՂc@c1MXi lhSfoHKن} AMĽ&M1 xSd#ePThbkPlniَ9uwn*17*T4eS,,: Hl<=8P)*D$`~&z"d עZ.by^LdZ<kuBiZA!p3Kʿ]=˜#|s~sp΢{;|c'T:i5P-m+B 1x&PGikB}υ6T&Fp@=HĒyuH]PO&3׉Sȟ'"H>et&nY69٧'b]Lf4k2b'\Ysf>'IފT[r16`7 Lxu'LKN&WII<#f_ 3YFzr-^;פg&3MQOj`3h@hd';@w2B^R iL1RAk)Fl=H=i7:")qS9Ty&3 ߜﳉzm[ Ȩ>%&}2o95 |Ŗ6bb#\[QEJڭ3cer']ał)]Iϡ%GFhLȵ]e'u EB94iIYV=`wknuָw jL~Bx;:ВhڣR,RF4nPT"$Rj@C*ie=,-WVypsI-WZt[|S)^S>JsG(pM(+MVm|M=POQ+g5b/KU9)τ PWip7jK .#BkP8Q{:塲W)BKrݾQ`u)_k*v"3 `RN˅T`+a4 H !JxB M8ST;%@Q̴%`Tq@nGc-]>jD)[: CTtk(dt3!Jӕ+-Lr&LWu^GwNW)Jh7 X?|2'dT+$,iQU۲WD$ d^35H^])ϮoÛցߚZl#iNw"9NRpyw)7&l բ 8}6nvQOXC8aZ}peoz Ƴ.v l.(N\Sf_,jqP`OiF5ɍX mt{dV9YP3{%l2d#i$m~V_9X2Mm!8-0A$H5/&d&*BLTr yD)%T2`\ 7^!r^ %UI5؁Fs Dώ&Kt524U?6~znxғ /C|"Sz"A:1HH;&{4ǖt/U$Y$F4'#Tgx!cHh={y{bPb Tb]nrX\JP?y9ʽ3{g( e4͎2c rwr"b4 öf(>] 6||Ye3y/܊ok2kZM =U5I e}jF<0ҿ?Ks(9]tTz+P*`9o_UM6gc p];Qr?W1d9zlY9VpV:pe38yA=ٜ[ta YOB*$DԮ'/#'~.mhg=wxuqywOW?oz~}r?@RyD5Pyg-/L[Rhߒ-ä}+zǕkQ@a,!%U`eNh&}+Æ|0W$-ѧ}\Hcؕ>6ؠI:EbE[ۥl#ͥ &ALhE!ոnַW:8Tj ,fO;mvs:X{[^Y~-0trXp]hq"NgB"\Y[Z뻳ۗN ݿԄA͚hk4$ !agq 2[j:BA9MRI::Z7^[6jYbo~k%`X܂"Hh/=2G֫kofXiXCú(4Uj4p'Q'SQ($ֽE"ea]ɟQIqKm!IZ|3_'m5;+=;Yj] DQժ O0XelLxϔS6%NSiDr cC~L cS6`"T%N beV1YGE`总M\48@د%PaEL ӥib\?sˎaP.q :{.$Tb380CFx$FAG X!5 svIpnzl@G͇^OA6ÎoƉR2$ɹXEOI")!NአedH5 W|bel;(x){dáz35Ӊt6l3l3ވ zQI Ntq]KyoperL.@%5/_o38(یxObU,9Y,DYLfͮAjɬ] ej"2U9T怇iWw#80[WUSI.XJ%INM*@Jg$"f D/q Q@ΫW/.W遏p\y\)@37\uC燙Oe>yX6>뒾pb|˖Wfy*-[(L&g'3!Y *#-˖pP|I#Cg>>1<̷>iC|'H2_]Zje>?K2߲e13ox xj4%>_8tM5&i.'IMvkJdM]?:C!#)aϰxD a1-60@:}c8ԑE+ޠ1M@N1]WJP띅OEj0)}6@7NVkE1oEs1jJS}c(aLFmgѹO͏**U%qSki!®M>n'_\]]\]]ܼxY6іuBa;;P <󼇩 iGrJ.viXS^c2 {jI;*h-otF*ShןF3&c[8][T>2%٧z1RP-0cvߥA7*7r4+%} ڠR{&8lҎM_LATaej&8l47] i~MZ ?tYLo3YB#a~iu6-2 egrRNW־Mamr ˩l鄇<3Ya˫뻇t#1Ǐ'ٿIOd<<@ɱN=PmnFN *tNU`R^N]6Jugr!W j{hQM+1WWrCA3fc/v}!]ⶭa6em3-UPr$6X=(Lf֧qA,鸅KS 0h>,DŽ؉ѝLS#Sz˘GS9XYg5j% KZXp gV`Vz,+J-@CYk z𤦶46Nij~zTS_jҗ7@ɟԖu#z.l,VDӁb>XC*X8RAuȔѽ1*zzW:tcZNײIYb4;n0 ɪED4TrRwMItw*7X⬎ß>>{!wJ~sBb$]]|h΂{Ӈt7lgZ?7k+NT# N]AdVP|Ӫ"L芰5c`D<,=gBIE?P\\a?tdzRUw]}RHȘ|d[%Gu_JUϪ\05VaS5* .߆!#$ '95K$<.+](26(*h*SSJ_ٔTʹ|y}vywy~v}uwU4\~Paw?LtO.r!!n]M.G\=h0(p-dwsKeWK:!eek.ZOxZcq{&jNi: a9`AdD+:ɉccCMBCXFDql l@Ix[C%Y:;?0Ns4Vx@VXYO3XI`J >aF@)Tt 䟝 䏦IaTh,FM2u%zb L w5h~a#8 yރB=B_0o#%m~JE9X?*4XD0Fh 2&] j2ۗ/~Ww&.{n:yso+_7翺BDʪ2-}"3ܻ,JZ a6KD$P03y{roqT.Eؼk>&g.rfIRZ+D=9*f.X,O$\Q- 7fj:XX^C[54ELn2h'96;ɹjQi !zdcc9z6mvS ;{+,do_b_be'9zESI婸b >&dIhB 8\68ɥ'Zq;[Px3$WU* Wa'nb=Ā[H(s)[)o±-s(4"96(\l' W"K%V*tբt#14u/J>`TΓ* Qԥ.Xj3}z VlC+g¹J9?4n͗8?7 ڱҏLYEyO;=a=u%u:C6"s-_ \z#7wn^_<]zӇ?}~֬~.[>~b~x7˟>\.gsM R<3:RʧFHeJE\TN)ۏyySvv]ܬ뚅&`V!M0ӮMk=j3# lF(4qn"v.`h]P( ]P8&0R )'YK hJ@l׷(]?=@cdY⥏A w h3$xL +ՇOe]t(GMz!JIV`?|v&";X*ASZ% sƎ -JLjg? \a#`ߢ eN𞊞ʮњ>;^opj"[Gi6у ,A++"I t0vezY|5ρŋQ LI%bRֺAƟk[|32FkBً2M+R@.-P_^`K-hťa4u p2 uEWdM,!p5,4SŭYFlhъ%G`|ps}usw}wuwν.Bc>15"Vd,Zd♂s{lgEfU7RD!gm Q8´Ի{~*W%;8ad狌l{"^2߆ws?"%=_Qr(Uab]'w`D1v9 Pla+N:^F*3WŸ7wnw~x^2ew2 _?;* E'f7G'܀E!a]3 ;jb!PW<BЎ-tK ?Mcn\>5'z}0xL8·5rbhS0\l5 M|øU:2N/ ttXppYS4y[W#t ѵӵWO`)kPB PbX}:4NjqZ䑄˚B]*FiZ@P_bnQYkzuSpYcwY o;A;sRspQ]A٨|S Hޖ.UTڱTut~X{Upa.wj?װ*sͤW;r? e]g*W;>TT!d\$c 9eGXl8l<"čXF9uR IhAɵ vDLy>a.1-rКE^^- N$p4JJtn; Ɛrs)ƿUxrǮut1K%+pc* P|J1etx<X 4b X gX./CŰü\ P JMN(3n9ĭ_pUV6+jsU __  ]b8ǒ0c, 3|4H\ڏ[fGDdUEgzт G[8r|/<c3?< Bx xqb~Jd VL4UYAU"凘5LIȘt2yߡehXXy EwZ]U)Q30*diA?q6bʉaiŨ_sjۦ*ZLx>j_`뛫;sϯc9-HOhK@۰7dDr]_h Os ʜQ !S_!:5b 8qcPE@r)GaGo,U*b5Kgv 8ywVBW ~p @?ہщ?e>V ;drdefJUFJQfC<·!w#v)~x)IW1__vurѹ(vb2Ϯ|rR:pzu,%^SSe'S%ORdBo /4rxpLmx^T5Do}uY E$RRPJďti@J7/hև)]m„y:Yo;%UBh%d=]nG}d! MbF`K xBGBh[⒬eD˗CGCGB(dK!b.c 7!qX'h@K)/oz킷i{-O|4HtT6X vd@J3CÅnπNxhՒ^ bR X* |X% Ű1%6H4M߅EtZDqn^H槷3V f>6B,eW!jBbVbV-Va:P_l0<[M2іNPr,IfQx&@j]?3J(}4< |RpFjΕQmͺ`e n B-S?w?cׇ] /r~?}۷~~8/@~znH_Uwn*]Gt?))4FUķYyaȭ |IBQcY(ѝdžϲ3XFՃP I6Jv*cT&ȶN\ٰ~L lx)l"ȰPu2[E2ƃVH1atBACqNO]m9?+cN؞O/yaYWz#?Vs˱2?L)?@|]lڹ%6%f(-QrKDK2elݹ%[b{ꖈh?ɖ=B(G1226PJDeQ 3'B|F=[Yk|aBh: Zj 30cc8Ó&6YW csc?`~,1ԠmcLul [gǐ(Mcz3s!ٱ}Hy㛫7g ,/;(@8 )*P'Gh}r}P7Oytv $1S.3bQk1a-}gB A!LL ܋3^+}EkSuC, MpPJ} V oU j~ 85>M_K}9^ó=BT"oF 8$"jG!+>[˦%o>U#fi6}-]Φ߀n7C-#۴Rk¿YBf BhUъ*>}4" )p;Qso~ٞğt.3@ms!e~ϝfsjCf~)+chRb8a܃s7~;6.v6wmIg}hI?WkA>9 ds7kU6w%+M<Z}y샾Ql޴Vnh8Ϙ.ȱ tvB2]'P%1Pc^J'rVIB@ 8E!WL[NQay8[fdH4vZiefhZ",%rj8ѦB^ɉ(7_HcǼ" }Afhw/MRv;Y?pA+eiOe1®(>noW_~x3zU+?c;F2*ل,)2.U>K>d`b7zؗGlq*L_Exf)K2Il{zlw,I-pܶĶxz(V|-ķr Kw?5V(/[b'tގe;7Uf)Ӻ7fJ@RUK&vHH[)l|Nim,ygA7 f(0K*7Rr-tB YʎstBkTYJRpԔnקpӡ[^EoGs/*1K bf{=%tH6I;6Sh)[^gg,6=3KJq.lb)$Ӫ? ]vS]c"NX%ZAB,~kuIYX[x-kj`q<>:^=zߟno>{dEM/SJ LҟS4~ꆧmiCxȖ9͛4Ğ;R[a1v%:=Ktq:z'l@(t)u'P@йNՙ'/R|! L{a=._oޝCCM6iM+6 mwx<#>~(sԑ^ Mqgr{ԨF8&2-p:>]ژ`$%`h ~"D+E`fǫ"X"(ܽ'v4X26' `$Z B"E꛴a/IgDspr_q:-^& o@Y#O<^aIn:*MOvWYs]cTuVvȅe{̢YmAݑv C3Z=v̌,Q1'S@ѯScPvIIKtU#ߴlambda-lambda-v2.0.1/tests/db_nucl_sa.md5sums.gz000066400000000000000000000004451445553061700215440ustar00rootroot00000000000000$aVdb_nucl_sa.md5sumsN1<%4k{tyC'A( W$iffL&KmOj zr8]YϛgLkooޠ!@P5 |5΃m̛h*jk,4"ej mt/푌FaG(r-.hԴ7v)Rs">~Zv#z-,i !Ih!o;X:T:B6f,+mglambda-lambda-v2.0.1/tests/db_prot.fasta.gz000066400000000000000000001763711445553061700206210ustar00rootroot00000000000000B`Vprot_db.fastaks7-~>aHQሉ@W(\j~c˚6cp;_B)Js'3mU\k%2*]]__]?O?~>{ᵻ7ov}sx׻?<|g]IRr9KiK>ɧc$k_R0sE^diKSϝY4I ^샮!*S!eaz6퓶of1yYuMa9ʗRq!83KY ՕESɏ uRvՇ})XU\Ę3~)%;\b d2l;sɵ!)38oa2!J)N`L-rO/TT fC1b* IeMTd0Ke4i!$&|ڲXXw5eCs2_ #[5x#]*ˌ3͌V=,vFl"2`ߛU)F飩 &K0pJ# (RS}:#T8H%d}Ƶ%3fcw JOn`(:4ƦqߎW%%JƆL?B Ξ1 88Ơܾ}{tyݿ}O>~}ݫ'?`!_i՚:JgyĢcW'vLz/B*JnFO -LX5ğ~CS}&#R({Dt/=<ӅQ+~sVZ~H6;t=\U4&|*%j2> Z%VS!(s|ŵ\f]rtX5xD}VNcxD2``yg?wR' 4%  |~BezEҌXfo9I"966IGmBLCE8tX[Svv%ϳώ6%_}{w?[F@toǢ/p9CT8le*χ\`3aLڄX CUGga"F| Q$ 1vX~x0Ӱ*%Wqȵ:Ī}Vy ڜ@_kg}yժ0ďp }^cq`wEF,7^Wه5DrX],těCr[n `Fեǭnśp`uVE8b7k3{U­JwL5yW0v s]_0O+n;%o.XaUS]^bI'-)8C|C i'$NxFfjbXae22)>؀(igZ+p %AEԪM [\INnsIFvb^`5 ,Qe%HZqe& r9:u:="XظJs7F&>ك~nVb fa}R^ Ns߫ P ƫT_eGa*Gxr AK3lT5VfČOL!%ED@ .,}e4Z*xŋ翿- {ia @8k[bFh\zFp,vLc_@)!@ţ^~GUvg8'*~KNLbɰ]K!z98/|y6N (`[MXB_U!qB́ HT* rP* ꬺHz>&V@Zrl+ =¾K9؛`ptH-PSAW]j6Bq!K=܁֪&aK;Asz)$!'BBfUb {lSl"K)06qlf6fǛ@Z 肅jy@G3Od։TO03FMw+H+9FB tnBӚQ'Chs B51=$nH=BB1Lɀ gc<9hbcTǍ|q~ ]:x/$7۷~Ww߿!lأŕ9&|c Q(M(  i/& PUOQ*XN?1P|IBRϣqH M=߽/uP} t". So!{ Ʃ(a{ʶe+z)P6@hũo85>Zb%]_s|83N-85,>q*0N$f*ttJO^UH ;@"<.eaouX6 yyOonΞ=XPmUOQE@;DUGc'%RSa42'l4 BC!\"2,C끸#0n5 5J w\IƄYje>7[U(2+*``:lm1bĠ=" : XX'70!*S4o-̟d|1Q祤 JVV2ꌈx/.Ϯ@MήoΟ= y?9[PI[_m(?8C??oZ[Y6.Lcӑw=-:, 0aG KL‚H7Tm(BoHĨJK}575 aeb\f;oT;8*p"+qAz|♈iutwIn^%xT{`p)+&Pz:ax XhHpwojBH==_O0;!hDqpND>WPvFX kX# 6{p+20JD(f_`Z @ 0ίYTA( 4IR{."TGSp\T^ z]2݁2rh<`׈ qRvY3<ֹ&0UpgmN%WO?ݽݻ?{yf~A?e F; =Z= FXl%KbRD*26\MT/'Op} { .Hq̓$.bؗN{ քTܩ8bR{KxN H7Tq߭z2+-UNT+cB*XNx@^t.#0.Eoc\0PΛ#EZ=)RgOΟ_^xöۏ ?AD?3yhRo?ˇW?֏?tww#?aq[Y焸P Q%٠"~ 3TMjN! X"s`q32 Q=Z:pg.0$H .$#3%L vF`)1q;zDfmIc*j fOldVp S׃9$[U/<c8ZKr=s0"ͪbߩBImČ} G<3#7swy]2oXV$Bb6,qWSpOz]4X9:swk&Xf/^ TY?0>°@)PAk1YeEV~җ~]r%1 n`A?~ƒWS2?%C@k]]0XT8`JA{[U Aj>E2(2BY"_vV)%BNr*;Ljh'7]7'Z+?Tw<'ٱCujx=F4j]5R &m | f`% &vvxu/ǻǛg#%ld,Bn S=ܠ] (5$fZ1 ę>țOϮo`7W[׿U#E2踲:EA5IRC#l[/SgG<=$ׂdeX L5wAؙP *U/*ч ٷ @%$'\[HiAk d+AʡK"CJO L`D-/%U[b31dLSE)MPaVtv݋9xgO$x$ǻ{~f|OxӻW~Yv=bW1AΊkrSc5~4;?{ўXO=u)WZJVZ3F~/p,t8Qb1ŮQ;Y? eW ћ!a_pq66hG=q#j2@,LbT65У@VEߚGd')kq !Ϯ#>j~2ݻ 5{7f,cI h`s3}?pwwU{aӼ reg 9dB8Fm.!; jWFpwe@Ǥd3AJ3C '0jdrHŽ Yň5 q *IK;e< $Cb- Cuǝ|ۊyQ sy IHͯWɫ?>f_ŒX>!QC9벴ÜrruYUAЬ7척3;P?42T2!D:@bMx̶MZlV쪩w(5"t+US+cf%Ӏ[̐Fހ q? `k.]|";YsJfҺ,1ݶCٺ,W_]6\+`X;WIuvOzPx>٬4Q{S/@4ļv<ӷA-W%iFoW A>wOM6_Duzx +8|*g=F LR֢#l@q VB[td%z %^q+N`h[V*Ttuj + JNCW筸2 B5x6ܬ`] "_GNn S M L|^wU֪ٖʬ9dp[5<Ȅ(|y!n0#5t3 nK/ٵ #N-JR-Y`l#KDX p>(ĜVXZ,JCL(UZC|E_? iEPo @ Fe=ALc%swPl7 SAder#9C1@#mDw:^B/` XXXOb"O5ZݾH&;"L- yz`D(זPr!6i ie )2U8@ FA.0[-҈ /k 3wS*&;F8Ӯkh8L9_@M/˻vpr4 q[FV#LsPAWgG(+|EX*ȠHZ2bv$A߁."겭س8U-fðn?m(kmڐGݚ 5%f䖽>@}ߓL0.diPGg&[::V=GQvZI._g: kb10~Je}9ݾ,ldiLTJFcsXΟ]<8{vqI1} *RL],sv8ȥ0Oʆ},Fd\z [ :eFi z٧l.-b-Tbdž}}d" yRL!4l) `~񳺥9 /kߥRLvK1bʏmcA4n*RLbJ!sd_L!Ib.sbJ$ѵS٧bʬ{^K1}zRfMOK-;t4PPBTZ)sS,Xn\ʵk{K1]+Wٳ)y 0?pon߽O?<_ݧ'鱟ܔ87%q9n=ǔAŏnmDZ|;qn]C:0 a.OSm10sR]%lmw~ KvnMlgs+y(v#󔮫k92p)VAWF <$ %&*{*G*2u6tvUqi#+5pM] :ۧ$JVq!:@L81u6VoW+[#)iOŁ7{WL+~q}vً?Lpǽ\ td=eH`U0ƅN3͎ <$(йgrY4Vuz!"Dnmgm6gP#^FpDLh[+Db7m4"t;ށ# )( {L-7ΪhSӄQL-J3m t$u X/N鰖RLWg}KD)j^LtuCXz-}dn| 8|ؖ%nXKǛvb~tCȏu2+K-5ڊ7 B@J@~֒y,J]ÛrWϮ.//C(ϸV=#,xXf5Vΰne[2P`~KM($SEKT#fDz'$K{L] &Qo K1 `T? jԅdodd̥@%S9*o&nDv`'L5d*`KC/mTjST0@5#VHTV2 \5<0`v[2GpO[2e9o %SO![抛۞А63df]R3ZCKvr,x %Sa2P L_\.\_8;߶[o_b;N^تoOoq+[-^yǏ>|?|'뿜_\_w ,.TȬ9[k%ighϙd*KNAsnegC:0qۍf,' u =˯;klPO#$p<)L25F]V&'GEڦȍj6E_{EZu;׻p/pGG[#̶_t-_po$KFKTUG-d=?ݲ|lOmA0RIJpcbMn\`I-:!lSV*]ix8"{W3 * 7xtOIE,*fn ܷW?7<R1~+ <xvq\QbOW7O&Vb}\ݠ]@ q |󺤕?1lyP4hs7p]iȋ!Gǒf$"_Ӥ1sG+O܄zƳᦗuzQBm¤,mCE!*:SB25Ys\,TB|T.ܜa9 ܘ@vp&Ӛ؍&l˶Qؐ+~Xt8 ki2 ǎudBph(UPrϟm /$9}/||򟟿pw__tw{r~)rxAp fCKF8 1[S쪎UٖɃ9kv sn7;tLR"t䫳S٩bv٩t㸟gw! X7-q:B A@F+P꾸mb3k\NMQoRt8n?4BbOl+6*.񺟓vsƘrc`b1H ҂mV S~ʱhii`O[nQr ױ9=ϝKb'Ќ x <d{Qg1r^+8{41knWqm1-mqn+knni's8k@ۼXsXY"=9_, @8眰#m=Tk|HOiT+jR-C"#ߖ kYm\ [m/L!d ld4`ᙘȭ]4Vswi⭙͋˳?̷߾_~{7߰,|!VpkݵdM!GM$0N |̱fwfn=?g{B70ǢԇǢ.@A0f*tF* y {:@9\lžu,JڸزbVp1ێ1.IM'{* w 󳋋뱉((Dr~}Iuvtfٯ?7cRmVJqYgO ۍSI;]. k{:W؜> Q2Ќ}Wew^։(7\˃C hwCynRF)ڡ10u='a 7|/JeSjҌj}h꿂wl*̬eg]x` p c@5Cs1nֺBlk _s9.Dq ќ+Y }ZSXlNXb X?8ٳ󋋯~$?)ʧ;lSwqNOwyA2%l<;,;q9<h.pmㆬ'cYKNř* r{?nh;gSGDj\8ɳUgD{=M?ȳONt:擩L=ƃ0q=?ز_{%../_\]?]k~*SQlq*B㧢r=dzBLJ*7d7ݦQ{q[{჌P2/e!\yު9%F5KXJr[ +@ C^H .3l/h22y(kG,xZ{e(v13.œ-eqoZnzXBy% 7OZ)7(B6$Y@Wi\f;=:Wbj9oh0s\w,V\M!ĝmMYZȎ6.vr,F18%g(Pሎs3yV%=Zh-gJfͭxl$|!Bƺ p<_ʶF"& ߋc}P^d1 ٝjwy<9ӛgtG_{{j$$Y*y  bx,n O'-nձEmI).A;;c&F-AS/=W Mjnݺն !{n1Yv7!é@SBuvs~"ilX༬} S)flCyhNzh(?cZ:E" cjgmLtm〳Fh{" sBf8ؠGm54Ώn̝__e\][W~ CǘR`XI5-| _ҥa6ڑl&h4 D 7]?ݶΏl[jpl2{EH6ɃH"}H$t=f6EI,PmGCm0`865vm Pgmӣ f`M&@~dzKަġ#[ q=^{2`@WIj8>4Rt*d愅g0J$NHp#eg=Y+|ĎTlF;/2&6E$1I$+]_:I:J R><á㮲=_MGXUf@'ApY8w]8be!=} $&o {xxzeHeGpjm`z&V28G+0kӕ}r~|<7Wg/nή1aicMé& yhh$#&9H#DiGT w ϡL3aHiG:#)lfagyӏ;|76 P؏>Mk=DYʣd"xNjխ 2g;&7ݬ4;Mԥ <."&yceMOX!!SUydRqnv(w0/<:A8,)8D*mNԮTVQ L@kD~[GK6}1!j+w+?rzǿ&':~ROwߟS7o9>ν_S@RIUn' eO89Ԙ;4c}c 0O+ /ߜ]^>,"=K6"u"syv?WLu-ubei5싑tR)vy'3򜸗1VN,q|pE¬5XX8SX*b֯y.RbƽwM@MVEAZFsqoF;<ZbV~~qqvqA^m텆?w>Emcwm,z27X36FZȹL_w}%X_ƢOX*L-o]a#m,߬j 41yċ,ukdQ Dm-m%^Z%^ Xxmmç;G3r//._y|0mA!6F!5s5͍I`ZA\@YB6z<<7J)f%]Qɻo'qWM-&z),/N HxchXT:RpCC]io·^ ͼFi*"XLJpNЀq aOhF` PCUlAUckl+h;+~>rXǔtd{ZqΊޙ7| %Hv$aSnFFΟj' X\jqj:_iN00 >1''/GDkƯ:6&pm Tjj%Ni?pif-:C3UҋXs?6wK@ؿ{=]]Yreom3=6H /}}c74o~ |L?x%"u}8g0 l:]gڬGGIVDmzz咳VԜ2,-}x:t>! GDQ\3A قG˳^7\b9žYn-aT|sSKE..f2 ;w ?NnW4RC3îIeسGxն6{E#i썬޳o3p2Nܷ&G2FӌCuzl̹P R% HJnMH)~r|d! ?xl^_S%+Ƿ_qO'9"iKs8&| y|e9zzPhsp MC*_: H%@@(%7fJٶ(a&gY 煹Ixn&D'5zI h|CNw"] Rsc ۞Z)|bFkV8OЙA׌ 9ZC3?"D@E ,ڍ麆 ٌiq aˬ (6tssʘ3-NO3\!wxG:g h/⋕vz|󻯿ݱZǔ'4Qs {d0xY4É06ۖ(kUxk|B|>Ix8 A. ԣ`7Ɍv%3̜9pu,\]湙f kMuEOD1$[Z`Qi7%WPv-KDm" אwd]'EubGIQgZ}NH 5pl EOtHVxU]|\^>>^^_>,!}*:zDo|7_3}=ݧۋ7{߿rӼa^T!aŧ@\z34Mdl9YxfdF3 4`%lu"wd)^I%%f34O]&c"B 0Oe&jQ`F4ѽ )#C'.!,,Hi(YXd%4o'TD|ܼD8+PγI!uV!+w r@+.@)ۤjDXdL~`PY@i̤/<فm(f#'Dztty&"v`c9K3YPy`B3H52|jqB0X+R8M[m"#8}ap)uƖwVVWoyW}[鈀iWߐaϪoaR8qp" >HWo*Ro8Q -NTZ}K?^}yw-6$oYOV}ۃG#]{+QPg{寧YnZo=0E.j}YnL&I,gBN$ irs} eYBÒ} |8$.:Dۈ@6Z2q"e&hsi Ad: qD+(X򜂥2?k =ݒE,? Mū7_|xwo}о~| 0z^LJ3aڒaW[?ӎ,q+rT0^9F4Ǽg7@L `r9gdֹHa7-y2IRG}D#i$< HS[rm4Aޔ+Z 4z_kHb'bl iid}83,sMK̪-ܷLiD020, oY&_` %#Ф\pmvd/Y{{qtyqJ2nă)m1H#􍕔3l|Pg[ :wzR7Rj6K÷:JcGV:XEDJ,Epj-aPY}f DUD~4baBuHRDE*,# t$.ƴr$ 5r$;j89KOQ\x7XbI-GX G"#̶VTc:Q~@ t3u^?%ؼVy$9,la_YrjT[/K^HJL}o& (dlkꗳЂ/ڔ*r~W_QjGQD{7S0R[H6~dž8C 86_!ΐl q~d,gql3$?cc7ww4(Ms3M+%^"u6xrڻ.0*# iLOɀ& vW,FNqI˧GOӟ|*~t /Sx'pZa0=ts<}yfC霁04vD϶ *djq]lRo򙨗aZD=f}-.3I82wB6h\;E*Xo[װnVÖJ8ad5̟6S}+2"qpG$ݍ2OKF[<5ܴѯeۜ-7 SdF;T LU%=da%etyH( RZk$d*X ݐS<9/vz(IHg蚙2˞LX# /P [I[{יWGXk4rEĵ E< .^fz HN).φ} )~n 2[Emꌋx I[8p2/p'8a}lz.6u%5vf_q{mm w6.*`zSr&YA=+.ᵐrQsRo聡, #'P=s7GN@oBieA\"u=ٽfVH7,#Qn_8_4 a8,tl17\vkYlΘ7#Pf6 W>rW D`#~OKYI顥uhء"t\\k5&: :ͩm6 SuTel݄Dx2gT>O!D.r ~0vNԸԖ Vs uᎭ'!5^&Y=]p3Ӫ<ﴚ{o>c{BE]FϢw5.X$=zUY*exfHP v2sOdpByY(e8&*0mMX3sԝWگJJy@ FJK86,G=#cJO[] ۛr7P޵g`=ັɮ.NPO^zؖ/]W$W1g]"օE3*בMK<x<x-RFrFFcUtt;eKEXRt470 av[)ùeZ[[Z=mpoӔ>p$!bRlL93:Ĩ@`#ĊSS ])tmÎR.n1z᳂+a(׸_T; RHQ O#,'uS]-k3tCU:-M%YS-iM b GQ\k撴 W~}Nm Ήov{[k$w_M@Nu@SKͻp<=W.7a:N-o|ç'#ySpu}GJ.54z)ME8c%"vNi*U6Tui7i4D%mݔDmZzsl*mL?X&՝QX6!3Xmj O H\WMT*F=,7$2x)`l!s1]H1k:x1ł2!heOl'9ŏ<^7rh<#dW{8J!YVƮ`p.NÅ " Ox @vȳ+UXR_C lOOy_'8[}ww@`?ˍȡ/iUzo?wD"$#dRq {H].FEKZg.jkSvgԵg?ɰg?ΝY7PϪdžM=oDag߯={~O(MI* .tgd㣔6=x96\l-5giGp)n ciʳ+` $n#vΕ9:#kh`X)ٳ]r)27wLky GNs- S,.!E?n`sr| TZ-HB|(kwH]{22l{ ?\^# A ,|ݻ/Koo:? 3Rwǟ>~z?}?Ea"4_l@Ml"8I tG^">; J?t)M%ZeJ3Nޙ,gL:#Hu@KRH@{ڣ j$m ̑?"Nǃ&7.rϬ@*2 &9M D$Ֆ[MmQ|-+xCuk7)K^i1sVmW.뫻+n?o_ o{_}: Uy܈|_nA*5+W)نgqLVJJX2s)NM{8U.nM{b~ &RݽWliYT /qiLy&w[ܰR!X0ql񑚵]..BHW8w ƥX?솋7բkJ!P!,rɕCAQ<(~x}t{qǰsDGW}UbOtX28zO5#E/ɤ7kه[;UΫI' ]U"r^ =;ś@jʼn{4yj8JjjRe8OP!F_$|& . JedjNg% bp[- @24'a ":䋔O*>U%b|qZ8Ԙ,=}0=<=5DU ]K}EKcuUGs6Oo>|lW|Qk oD/;6VMS96: d@gnKC.!(9Le%6tÍp ]%~Ƒ9]Jd5p'+W89 k0,)G&.k]>,'$A)5,ƐD$Ud1$աye*GJa;bƐTL *qw 5Y@T>>9Y3 VNdO% *iD5Rsː278@݈Fzdʶ,(ns8Ɲ/LFe6G0J9C4Vh4 {_0m833!Sbe)v?6Wp66G%Z{'gn'#pN8žGD2<؀Dܣ}=vtf4?qg IWx֗,4c7 򁫞 Jk%x<~\AпxCWuX3Fz(W88ꀙDl:`,ǟfRݐ؀~<ݐV"DtC%b#{SƩ=Qa]p5p_}ۿ6_y߽?G(6ؔC&dQ`ōqGNm)DrZ$4HCcAlI'NzZڡ28NYg8c4񄥡2e2&nfR:3$8t=bO]=2B 3ZSMI>c*.$egdr˲ʶck^ Ž0N0Oi'ބ#W I$vuuE:Еn`Y a:œ#N|@nUդ@hYN@]$SÑ]? Q 7[7ߝF0.ϱ8HN oRgFq6w`\wxA;e2jsIͱ1bH39KMNQ?) H‰{_$W`sdlEO*sln4(67r;6fs2jsqgs@%zaVq(h$ "?ܐHWpZ|tGVk\ "]|*%aF,Ǟ *Ƚ2' *{'Q,O1p Unn8lz^0/O;'ߨ6޾ӇO{բVUB2R%@ ?!Z@b oWX$thVBRD*$WHFU6cl:|Teo:+BB3nA3:,x3{> g[wg<+" yIܻo'B4Fb< =G\LG,TfK3~g$ ؚv@4)Ͱ|`'ҘF]G맟G}y?EEɝ&i -B(Ŭ9 "n7 bW%g$ԪAX Ȝ.6}?;x[WDT [n(DE>{d=\ Y^R[.]bM]7P:n4P1$͜}"v bb=CŠp{2>h|/,V2F~!r1] 6pm8l /Hp,Vm#1 ֍aOI͖eԧ i4N ̘ʊE+w+@^̜8oe9[8ޤ4& dt%L"BuJ/94p8i1b2T6-N۩a`Xh}X:ܸGҬ=q)oj~S} i NTViӒI$& ۛ U#r6ebqQP_MsTu-qt=z113Dֶ}^FndN +Kp= n V1F]]l5b8ו˻+w]dzn{T@} /$*=EtV;*R,dXjPȤJD l"&RGK<+1Iirj񢍴JmjRrp &e`RuRUh &gDzE_@jR\{nR,N)pyWs(ò!BI39x8:oP56P=G> 4>QpEQzaF(|MhӬMi!3TItq+L^]^XRu\y6y_}w7;|Uˣ?#{N9H,xUF>#lgˬs z`TEz%nMhذ_`p,yTWx|N%̮q#{ᬝg`#rM¯ *gQirAwgEV3Sgi>c FL\|:6'H-C cW2K ';F]VAI/I:>̯iTs.2an./neo~}1sG(׭GϭG=ñLڣ#E"]30O,lb-yXOr2~Qp~^ZCA]]t_=."V)JfmW(k uqb⛎;7t̼sZ.YXf;W e! T(JL,ui_ nLXT繠rg5+QO\>>]]_rzw7GpФR.^7L+\\).^1H%YpZqj%{ K +%w[\yU3&MjtQHi#) 1na~_Z֊*B*"TN/YHA VdV&NE*B!j)TA4nOacggKdB6Z Le4:ë*qDP͑&na2y";CxRtR3$ɤ1<#[`$C.|rġpJt"Qn$+%7*P9z)ɓEr@ɫMNQ8ӓ\ɀk׀K.UQ 8+S4/ )pI^.Dpcm_ x%;i)BxZQbm 7[jx!hʣ^ "!F•rK}-TnU,nq[\/-}UʰqspM TgҀ)AN ej"ɮoo?'e"_<"t1+< g^I6uu8U:ȍ{788>zyOOp7OoїoxOД?}ŧspuoJă;wr?F`ox V4S?4 (:}/t̀$WÏFgtu AN{!!;;}<> 0@eR&T3?1怿sU&N lQνsuS9w4)8,a43ǘCX}zY*e`k4\XUf?$iR!P ap8%Gҳ-aUvO rS~>) 닫뫟N?V}O> ܞXծO,bŠ|Gm+qjU~CYiG im@!Mvʒƶt+ cͷ[ܢZ.jA*ouQZxйșI[kA.Rh+H* hqmm8T? 1ыj4w[ef(d/%J)f&/|l'ڬJ d-Wjjhc)sgr+j ymk(&G+)g"3N#Yҙ XX>fv,y&J,XH{0'i{ av%Pg| WQ08Ul988G*'jf1fQ3qLIAJ[ZOy"JCĕr0Jgj-u3:O=AIqo޾3ٰ$`U'[AQX{ٓHbw՜j6a5'SřP|zYXͱyB Z9JDz1/rR;9 )DW"gN K|]̚#pi^9k59qS^l_϶zL-rzUsx"fɴ9 Scs*3'ˡ)^d"zzN-iz918s=Mz0+>'X00w<_Uml5RYRׇq>1(2ec"@)YԱŪBM}SUV'D\"C%*vТx@y;$L# f4:㎘ $ D* _rarKeR%8G3Y-ޖNTla2qqrZ Ki3Arn̺GFGFGܒfBZ'Q*u}X ~0C 46 eB5QObԩ,N+8 S[H +Fx 뇛ǫ }w?_!_zҷ[S?ƣ17?m(U 2X=+VY4(V韉U.ft/*VJI9=Ŋ,*;jbHr*3J*"0BN@x5US2HFobm7FiKwbMMe N-Ur^YS@LiV_\_\^\<+?5A3$]WVK61#U Kd{Bĥ+W{Kgqq7@?O%f*T"])[8M+hG\[q8)ȧ}~w{>Բ!1c;"D;Bi ٸOgRGl\G 6.$F XK:.Q2.w3ȘΤKY6Yd4rZ6BYz맧 ߇_qNL"K9לax?Yi m-rj78K?%2% Pː1CfX&ʚ ̓竁!W`jZڡY\W}EMÔ$;^1U Q~DԆ oן ?mZ1‹}oӎ}M1HYK/P'Pٱ3V/_QaM#Y7ŋAocafF=3ܴ&cɀԦ|9W8mbݟ뻻ۧ'Ho/o)O`o+N^6@/ +ي9g2CPt_=▃Q 3HɽV7Bkï]EJ]:j?#c;S'rJ.<ŗ*~ZWaj&.zZ 8F0E:H{jنymnnݚ,śY(+6>Y,k {u7ewr"ԙ$%3B2a,kM0X4ڛRbo;wҩ_>_?>ˌx:]QI@j6bNb6xRDΖD_pt70'ܺQ)aZ0S"J$ TuuWqͰC]NLNl̼36f1'1l2ec&g.Lڶ~;^cc&׶L"v|;!O>#SRju;ފ9!@&Hv~>G<=ۯwk9uNL`qY>診{i7\|RRME ʊR N X.7 >AkW-(ػ$feGsUT=KaRVi\Xօ%\{0qv0\Vʙ̈5LҎaJa0Aib+ 5|7qİ0zK<v&1;[;qlz7tds`|9FO$0+ p*r n(s &-8@œND-v_p 0n8vffguY~*JGڒap] R Ovx"ڶ0zL,Y~?,׵ju_,r|j$#*W)TI:."፩;G۾+M,=:Y"|DU:Yq$$um>,~Cgqc«"- Г-4\rk{@[\\iÉEOr@}+( ;pҀ~'z˿cHdjn3Jny U5"vh3 V؋XE ;"C%Ç׭;(L$ڲWZ'2bfcƭ]702 lr+U,x$ ShZ^eh]g*er a3zdqV!MI|g "YyU]AuY`;.}^R>]f&6ԭ@bKghDŽ̦ G-'Df Ql!U$* fljDU{)"D렚i`?2`8Ԟ?"JaxFOq4dϐ݌==CB ʼnz{3T?g(g(g(KԞמ8Sŏ *(6^{ /E/&[ls:rו%O%A֨6Y«a(Ʋ{ֵp,rf$V٪vuGSȀLc"mL;f16iZ:E)#5/~Hq&rJ+2>dOMp,ƛfps3B;fނE4w|GuϗruLwk6QnCbnwk qvBcVR`OW%D2:%"FGk ( nt] G+QE")I&&q'¿HW! aMT~Ie6 r#AJK.Y\ ϩN60y2Ѻ f`K ᓴI4=>) A$cHv3Qy'aUR+>=UnH(ϋoA<C@Z^yKa6ؘsH5˛y la}v/JEltHK r+sI"]EN~]Edq%+l JVźJV#07En%+T,,yzIaeCUxĮN"oz\]T{|}MBqFrL=ŜϨV6}k}?iQrʕPn4^6BIjzdkWo<([p \T+k2^\=_kxsĬeYa ",ȳBrclԊT8,t^Ϻ]bKîamZz./ܖ\kS}p5"KwzX/1H9z]h.qw[K]zaV| fg0kwYq6l۬NxUjc(9(5z3%Ww=:"eQzT} kv2/H#wn.o~!h(UPB;+{D$#xAG\JVW"Aip^rXe+݁ȣ<ܐ~޾Hg<ȣ \_F*iu:0$7?XLC+#,hZgN'MS"2ZiuN`8פVUiu ᄳxE+ xgU+HR1Ypb*tZ~~x|uߤljy_&5w7W$ g5.!9nmv;vM!ԯ#38sR~qHб J! jx2_:x3Ok7q"dMp7onxT5Lyg3޻;W\qWun};bM8UwfQbM׈5qLT»}q>bb1Ki`̚Q D;̯oKۿ;̝ou˷ylƿzBWgth36X(wd^n4q>qbt$Kd$H9ggF=vn@Ic nJmImZQd5:G2W+Hz\ KuZGX;$YyXlP,>mv,RKsՒνc-Ws 4~Vfh`'#F:y} ;DVy+J.kq')ڱZ$,7y7y#‘ ^F6}#?Fz۫NjWaυ0gV)'#LK{9͜хy*IXޮInK}篨ǹ0/'4 XlvJ6RꪱȈT=vzJeFsE蜧)?xz#G?P`sQv(N6E4ZK63QՒclFpL4WF]+.-s }cɆb DS(EBT0QmH!|ԡAGM>O~F>u.^@mꍋKI@ce^m e5 L&e"R]r a10[- *D#GK`Bu AR]=)1qw'޿988gsyҸ~ޫla"#:r^)l-W̛Ɋx6c^j!8ܦGE(%u,q07 s.ʾ^[B"mټ0'u{̞R#:j7sls?5mi& U7fIe GXg#$ ^dі2RqkϬbBXճ+(ӳV!QbeguxYY'V{V V qqg!n Ԗ[ڳ#W㶼\lMeߕs ۷C)Q*.kGߒ؜^LQ簷h {rCg2߼rg|s"'tGiG?}Hp,Sfۉle@iV>.v4 ef)GSCE {b(PeCbu*2'՜5C?ǣ𔴧LqPa5Z'Csa 6AܡLA(sn(s\8ص"G.词%j@Q}TGǩU%<2+'^a=!Lh(xAl t=*ea:t68տcLJjm;՝. * ΄ KkF:gH!}Y*˧Z*ldp'CѶE6d˞SQ!+*>7h<_o$Nxa|Z6{F^8p;!w(%"ƹkY{U箙̎L8BteP}mJGYb[W䭗 oFALUVȊd@,cs7%9qxv["vA~p$MTmY g(B#!?ge((G#2*w7C?!?jZfދX Ko*"BȦ:VqKty(%6/r3-BWbMdGST ܳ4p: rRsR,t$cD1@xud`/ڝIД }眔4ztAHEt9;lsmm dNme⋑\> f.|IȬˆ+~^v1"_x8 rTa#P⧰H+uhaC&9!S_DTfrmoUty#l4]ZJAKN*f)k-6qLm45"u1=-RfG:)4)64 .l6vӔ*9zɝleOe;Pz83fB_gqp*w[Vp?ҤN+-Mem|#J%S6yd bй[-UÊ'u^{'̸"4w\^۵:kiZr@i: G===x7u|"ퟯQ˹jyoNΰg|P-t:/h3ǃgaaFZL(y@h'쩠gؙe@4B(\)+$eR(K6eu%PRA逰$JLjvDٙM7;Ɂ.,u%@E8SՏ\ q pAbfXhmZQ ^M9C-Bx@r1kF']#ךNOz?<=Pcfc\B[IC>ٟ/΅ s!"1me8 Uo̅Ɓ\]΅l0A`KwW#<)M8doCY gPqz65XeOI؊Mc"Pf"cUu6949wh6A$[CoI* ,Ԗ$Z' OXC'ܦu礑}\)Vza,j҆>&#XTq8ii8a6;.7l{A:w@ "=w>Qþ7zM^ Q+Kl+[vd[+΋ũ@VFyŞq[50"_ CZ)V-PQ4ʓ1ti-$`HG >> h|DAJjYDF8ĶE :_eX'\ISWyTE۾'^/XHA.,Xd=uv 2yN-C/ /ź~hqOtg%^N3*}9 N" ~z,mhHQ@NyL2?O/צuEx_)9O'cȶ UZB^oWO;zn )>7owHRR=lQoGxegnֶܔM1~);;%{hJ]LN {lfhÈ+ٓ$kǾ$@d[ flL.=>˅Cr%; 05_ƐW"i# 2ky*dqtC?G6T.EB'u{ V:Qv$@FI]֙]mq*$8Ñ^ &JҤ\$wll eU"~B'$keAi~A:1=w}Lj{f)EN?.sõVVC<Ff;rdr lr<7%7VqD?{O?|i)F\^z3(\ոs;n?Ml2UcrzB<}^U+qmM/6" MoirFצ 9Q RSqf : l s#$FME lȠ \ Ps],Ջl5lՋFQ(vlPf^$}rYzI0޶mq$ #Y}[HL8fY(E9/ލx߮Nh eC}UȲshi9aT'THx $MO9$Y̐ BSS_Gf\tVgE1a2w6~>v}.}~OUTE7kD:d|lvԩllZvf=*.=# S%738wϐ;&dd-HZM9-j2yG}Y#w%Y*>t`@A b~5/{UiP&q 8brMcG0,r'U$qMJQk8  BI]yxoȜ*o+?YOu t*?GH+vGCV'rHL,1!pn&=?qd x"7An ja44)y\nfp['*1dy|ٕy4U_lڶJRjSML<5$z#% S *pbbjKWP$7PlPj(& qKϝ)6&1 b^nֽ!hL:FbXe/Ơ|A;kRKoRv!dCp;HBtKh%65H*tZl6KCx ͻG9V9Dڍh9CYǞLRDB9NX)(W*rùcE:utcEşc#BlYq#mٱ¾v'E C(;VpL.49VcE1o'=VQl4%%gBN2 IA ?qQy6`,io;"Ǝh카fp]D3\(Ķ,F-;$fR߀Mr(l^f.QtE K Tߢc 5`j5ٷ[e83?6S8"Z7ܺpgk_qݽ1~56gűurt]:uO1;W,nCxT`{x6ΩB)ip|8_/ nPw?|\]j 6xf$BQ85TEr 2¤M,M;OWqI62M 'Q+yX{1㔃jD3^LXdV.=xGYHZQ, >⡍5*/\MDH4(C<YOϢ^AY>UdZnd̋}^wPYie.nD9|ƋV4y懈^r{91]O GԔh PhN._ L |?dz?Qݾ<~$^B JsuIuP[ |%;<9Rwt RXx1*R< r">pQKW6. %WǡzzLsJp&6%FG=` )d*UP4_=.iX#Z/ǒ\+p|hDJ`EfB?z1A6<4&>>T2LKqԖvwdbA6WA-o.1NpDDhe' O։@6UUqRBz'<%,.EbnZQh᢫u{cUYf7s,X. T9FIg;oŃ:`<bb]dO(p! T=cy?_8|@cnpZn,29woǯ}<,cVr.{'K3ymp-G_OJك2Br6*n׉8.PBl&7rcPI1'j{x;>=y Gm9іKVAAʉ@ёGjs5_#TrEbn. \aF5gd5jz--gwguӂrJ$՜jӟQf*o%*uҌ*;nr,j!pzt&ow Q R_˿tWGv(kh@x4Mրz"Xq{3h|+q)o+ȼTA|RܣvI' b+x.]>R5ZXk Ms8lRH+t^xiHh>Hh#Q+'S -c*|dD"S!dt=Qƻ~y"=v/S ϟ>ůM(Ѿݯ :S6j.(iԯYPƕvo YeT3"ܾрY]${hPvkhJO6S%Zi,EݭUѪv'{D dn"1p{HDpIݒhDnQ+ 5-B iRjlEdoD5{"a=<6l'u?}ZSW~m()or0zT\ DC>1]/_暮Hn5U@U_ uVTa*<@ [L F6PC!C?< %/+*&`+csSNʕ${g~J3h{B8R@ϑsL7RnbXYinCIBMom78ٵ$ӏ[ No`kA¶^ռ̣;'!ŷ!F cekU! lOYG~&o?tDSS^rӠtkz9͈ÐeSdE1 U Cx:5lue_5m|e|: juVs38rIE0!\qxkNqwrZ-O;'/2k_Wy.d{z]CEYrݛ|A2PW~3y` 8.*C(k*T L++-ߜ'~̗ R*XЌlTC3 a~0_ 7WaH[5MfV?Qx}^3+Wtʘրe0liff0f٘9l1C٫ X,̾YUo 6o13^N&6FsM^JxSh"t@.ݍ>{4I=jJFSPk wQuq|ۨ ۸Z^}2wgsNTɩ[.bhc+umyF8Jimm1b8fi1 t>KՋXj?hmius{nªZ7NSWx鍝bҀA, WKdim@qYpi@DWSomRjDSyTs r!(]BŎ7o H̞Wy@NQG #Vp^QNȜܝ+yb dFaЪ[&'v<@ϥn&-zT zolc=x՞ ˃W@;.Α9y'qLҒTx\W0yt.GкbDaX*D=s=Ѣ8DX?1tAaNEeD|2qu:6Ϥ9Ԙ~ʠMNq:znB8Z+CNv:0]:m2ERX5g [sMvR[_-{qe!NHDS™=GWzC.6 *=ULn\,<5;VWPk?%RmWWs"vK]]꣫KOlkJ  Y pz@)%n"~m-hLlہ ɻ{Q e^7$W3ډ>X٧-K,mgQ!}o)4JH Di;:dsj#mZ|b vmW/vmn҈z_pEj6_] 9weXlM]y{ѧLycyJMo*#൬W} egV s Y8K8,@ ?P%i_ +jz4 +ܼOCaBVjtk !|uW U6mn)#+ۧOKӗ.GqHX+yȞQUVKd]%eS6a{xAN@ǖ^(o5ymSxPK})/,Ky$*Cѹh6Xes`98dn95t4r՘L©)$U zp$ ಮ6 nߐ{Vyζg =pe+2T}~yMoyoN?ut>r(g.p>K-^* @(0f'xHʩܸ2sE] ' ([x>Lw'5Ae-^ bf&lKNg^MM&)^%R](CgRх)P 9n/-RiZ"M-MYl'|BhA8ni\v7UN>]'azVډha/8khJą:.Pr{DZX[ pK64!EjM^t_jIeVEx[8alH*d"dB"'`YV ߉jtv\{8Av/9֌0nw1]oܴ=JxȱZG0ȢbC}?FR|3P\#g( s&+6Qr{ԍ{]ߜ>~¡t_-sBvn%?忽(ٌ9L7[Y!lsL3uU_vr`D{|85]BY˜!q{Er.#g\3B#螙'Hϛ08D,~Jj5 TpHU՜x3rIA,a23\ ldlX,b4HkX!=7Rh<8f𶑒`4>XbkFJyBUF[!)YJƵFJ%B8ʅ+Ȃ)VU=_#^*j<1R Ds}(Y dYKNB{Rw{n#px>/C߽?Ɲ|w pwNs./6}qQ:?#li$jzh.:>O{|NWfkO9ƘIԙۑm҂ﴔőKg"8aiC 0)[}\G[\ę=e%j(z6\PǦuNTX#Ƚv s!C$9ϑ~ pWޏ]x?~ N)SW fks=DŽx !V&K9djd_HBǗYyCȚ~S7sۇC*[l xx]jE󵲲pâf+bI'KJ^s7SkS)GTE%7cé[TR"/7ϻEC>bcVK(j$ek5$g|U'u] ڣ-[5([39TfV,[~l U;F:ekf՚b#[A&5jM([dٚZrp5c||#i=wiۯ\l1.$ؒ$ouQԉ$Sl+OH1M\$X|V5i@f$JԓH \]q()E^03! @8i M+{9 } Twyg'th+׍]gڋɄ=i<^UIDawPbuyhep薞n%铇/H}On|tgʼ>Xv! =킸1 K3Ւϴ&ZS-r˷JuRg?)v%8˷}{˩m\ z$DO-2 Xr8|& U& MWj^8|9:J7%@|l/u}8$'Ta@Սne>|\:>Ç"A_Ĺx(D$>1lT>WyKSC"z(Q><]6M LZԱ <T&KwVIyq;/d^e`5eZ"e 6[5fV9wT]mӦO/Z]jWXgg; ^,e{mf5d~n+7lVENM]H/q,#Ѯ2pV><7olӗ?/~Yw?'/*p9ѫ bLf  H_B?9=/w_ qgk`Y"1y>A)(΢G֟+yK]ىP2z7u?wS$vQj.0}pyEo)}`PF) x¦F QϫGmȫGĉҐe' #PoۂdeBu';~yse!mHHkDʊ'3Pab].$'.L/ew{FJVaI ҭ\B\ 5kX(~"TL!j#mo*aYuvEdczҒ"{!Xt+WQzQfu=!%D})TZ+cD(̈́SjU%/(U #Ĕf])^6q1Iڪc"Fl8qXBT,{kDQs|3^Ip{G%NI]n& v&a3pӨ'Y j"-j NwK|aU^=(X QWUrGlbF|2Ul8` M@qqHT#]VrO5+TZEJRӄ`mvˑbzsūV׀*#Q2XJ s+{+j;P_|+CRo` g'-~Js=)>l| ~h\ {Z7{CB?c`{v7$nKʿ#D߈^%V+ŷqS}|hƵ5m>Hw܋=;_}r{zy{v漈EHk%3{F@ȇ-.qٚ{FVuE<%x4ٞ ռg=m){F#y3*Þy}w3j0߸gTf7_quqψQ|g=#+ۂ&+W?gy~x=Έ$zQNl?!}~@ӧ?;B"(^vAYdL8ES7mP4ClF4$ V.rO `GTkn*ZEw Fնad|G%zψ t${čq$+T(P 1˘jLV-j u(*KdFiᤵ8}j]J=p7BS1ut`•k*b7 Cf v̶ A ݙlP/[1q\hKӗcKQ:QʆV([A=aU[jmƾj8eh3u|BOQPlMn D5]]ؿ<~㶲 vH3o3yP'3y9'<'Ŗ V3vO[K:b qIMtz% ܘ{I"p=M5 4拏/jK\]MF:HzQATC i?&ADEG !PZ*:L  e^(*bIT}U^u0>N(;Mt /:/}yQ)MAet-ڡ' m,pSHDft(9O 6j@ `ʓp4Ў5hDaBŝRov(vxw˰~?t6[Ld:5-VXxx{yQhfn/SZK»O B$,saQH;%o 5/^iCK'd/$4vbbq^>ɍS-s,*~ 19w""ͫޝUB&h:pD4sCA֩BXk&ug,-As7X2t^oǦw@uǗ_> ?|;cw~݇‡>uv߿~S! ̒CdP?PHNt[ "D:V\g+6UHAŷSb(gfIc8eK^SBRS1vN(׮q="殱秃R J>t.*1r*:j\ E!pRҢJI?Wa}ތ؞yv*f&;Xr6cɎ}}Jv |Ǎdv6}z-k! !3ԙSlc?P ay|zݽQKE϶(%PlQlo-I);i·^QuD5*2oQ:*M֡ŽEuNZq^̞E@},Rks7BDAޯ=P Vd"EeeߛV\,Y̝3Yi#., ⲳ}V;y!${zyErgĄ7FvڑdYnAUQ{qyptS-ʴ <4d2qm=d76OZQjssnlD!S8V.%7$nlqº5ey A]CDo;8T9Nڦo&8i}ݿ<=r~s*^.s{JWũ̼3v~3;ſ4C(Qllambda-lambda-v2.0.1/tests/db_prot_fm.md5sums.gz000066400000000000000000000012751445553061700215700ustar00rootroot00000000000000F`Vprot_fm_db.md5sumsˎ\E y dӪ]"_d4 RH3i,ʟo?aM􃵮 bNL@wx9NVFHI2 5Huk| %NaLu\"_xݿ|Ѓ=5 pk"=W? s,[Q459HݷR&;Pi~9+LU,0kfꦋ[UҜrzSœGm-Ž- БPlambda-lambda-v2.0.1/tests/db_prot_sa.md5sums.gz000066400000000000000000000004541445553061700215670ustar00rootroot00000000000000DG`Vprot_sa_db.md5sums;NCA E{VA Mz\f _xI/잩B"HK{}lw&aPk-zܘog-b=앏3`){i3K\ x]vq!cUh2pN*-:QY??wɆ i 5_O~3yh]zbjz+xʯ/vNNbi~I {=Yk2WafPDwo/ 6+ϝ8% %uA%|Lglambda-lambda-v2.0.1/tests/db_trans_fm.md5sums.gz000066400000000000000000000013311445553061700217240ustar00rootroot00000000000000&_Vtrans_fm_db.md5sums\7 {?EʤPDQ~x}x{=H[\}<:v=\o۟trҢ抍TġGsڶy'y<_>d/&sfWR 5kM{v-6:L@^ 9#9l5QZ7'" 7%\G߱`Q- Ac9E鍘,:ؐRtj] zпqsU޾F U1܎ZM_?=?})OyC3 3; ٍm5ġb1drHҶ ` 5x9~ZrshH\]V MBbQAt B'tFZk.VqW>E@4VG]e2UGh9kԖޢTW[s?ƴƽ 4 d ~v(C/ď7&Aݦ #o":(,rhee l&aXdLt *&[Ǯac3xx+s8\yRFܹڝK^3Bmؐh+xMmGoMVr\'P: ХG~3|cM6u!!WtMW/Pu(P}U!!i33GCǯ.g\ۇroꖺHz},g ~'ˣ7 lambda-lambda-v2.0.1/tests/db_trans_sa.md5sums.gz000066400000000000000000000005201445553061700217240ustar00rootroot00000000000000&_Vtrans_sa_db.md5sums;N1 E{VA ͓$NRѲ_`$< ''h@rsϕrzryu 8@oYͧ9.IlV},QJބSCZf~ xX˾@̉@":Ԫ"z˺m|<6 Go2GQ2D0@9C_b ,Kq!Zt. /dev/stderr [ "$MYTMP" = "" ] || rm -r "${MYTMP}" exit 1 } [ $# -ne 6 ] && exit 1 SRCDIR=$1 BINDIR=$2 PROG=$3 DI=$4 MODE=$5 EXTENSION=$6 # check existence of commands which openssl gunzip mktemp diff cat zcat zgrep > /dev/null [ $? -eq 0 ] || errorout "Not all required programs found. Needs: openssl gunzip mktemp diff cat zcat zgrep" SALPH=prot # actual subject alph QALPHIN=prot # query input file alph SALPHIN=prot # subject input file alph case "$PROG" in "blastn") QALPHIN=nucl SALPH=nucl SALPHIN=nucl ;; "blastp") ;; "blastx") QALPHIN=nucl ;; "tblastn") SALPH=trans SALPHIN=nucl ;; "tblastx") SALPH=trans QALPHIN=nucl SALPHIN=nucl ;; esac MYTMP="$(mktemp -q -d -t "$(basename "$0").XXXXXX" 2>/dev/null || mktemp -q -d)" [ $? -eq 0 ] || errorout "Could not create tmp" cd "$MYTMP" [ $? -eq 0 ] || errorout "Could not cd to tmp" gunzip < "${SRCDIR}/tests/db_${SALPHIN}.fasta.gz" > db.fasta [ $? -eq 0 ] || errorout "Could not unzip database file" ${BINDIR}/bin/lambda_indexer -d db.fasta -di ${DI} -p ${PROG} [ $? -eq 0 ] || errorout "Could not run the indexer" openssl md5 * > md5sums [ $? -eq 0 ] || errorout "Could not run md5 or md5sums" gunzip < "${SRCDIR}/tests/db_${SALPH}_${DI}.md5sums.gz" > md5sums.orig [ $? -eq 0 ] || errorout "Could not unzip md5sums.orig" [ "$(cat md5sums)" = "$(cat md5sums.orig)" ] || errorout "$(diff -u md5sums md5sums.orig)" ## INDEXER tests end here if [ "$MODE" = "MKINDEX" ]; then rm -r "${MYTMP}" exit 0 fi gunzip < "${SRCDIR}/tests/queries_${QALPHIN}.fasta.gz" > queries.fasta [ $? -eq 0 ] || errorout "Could not unzip queries.fasta" ${BINDIR}/bin/lambda -d db.fasta -di ${DI} -p ${PROG} -q queries.fasta -t 1 --version-to-outputfile off \ -o output_${PROG}_${DI}.${EXTENSION} [ $? -eq 0 ] || errorout "Search failed." [ "$(openssl md5 output_${PROG}_${DI}.${EXTENSION})" = \ "$(zgrep "(output_${PROG}_${DI}.${EXTENSION})" "${SRCDIR}/tests/search_test_outfile.md5sums.gz")" ] || errorout "MD5 mismatch of output file" rm -r "${MYTMP}" lambda-lambda-v2.0.1/tests/queries_nucl.fasta.gz000066400000000000000000001015451445553061700216550ustar00rootroot00000000000000|D`Vnucl_queries.fastaɎ$ɲEc6#&@f> 7x9" zdKSUxi#;7?#FfVFdE?Z9翣2Ͽy_U~qxW5|q7>=9&~~g ~q0?/87sȟa~./g>ۑ\CÞ".6ο8_t=WߏW^c_߾͟U_V ~.uV?>sF"8} m=GٞܞG?ߣ=wLvaսσs}s[Fua:eEϖBO>˟㜔M~8C~^Q\wO}<]Cp6>n/CUs9IiNJxnsbc|h>jː?e']rnTgtN9VGyG9*mdGOu2k0m}S =onr!>ap}^ų``(jy}yM|^Vtawtuȏc={A'j\( Gt^2IԳ휛'PY=ugUh2_{Pmz(! xK}:حcpzS|/C~'aPMeL6IrIIUpZs/h94I_>.EGCf <Pg/d/ṯσrlQpJ>_ rTJ6z#jsOCC# 6zG1TP&OdCezs|pϕT񠍪G3{gPc]z:K8L0W]T`=+n7\d%vWdt3MÔ!jsql[b̷ Ua0./y 86dݾ $(V{rt~e@ݯ~uz@Otﯮ3U>UI u:FX>9vqͻ8P'g.C~<Cu9D.BQkt.9 f0n.V ^Z%0 ;4,U Ԡssݕ5wI6C%V@DHDiA =!!a@ìAg\'AVyu!*>*4 Oz/$ tlKNs P?г[nS7h%|6V SZ ݙ+~T,& A]p( ?w>bTwJö5wFob?=I bs{=õq^ bym&LMj}U%obf5z;v/8I46}XT?GCӌoKap(J`^؜y2ɼF.<{KZ0䒌 󘔠OU0CY,C`_cCbC E2S}>Î]o#sfﱂ@;ݿcxt8,-CZ6d@/Ah$}DuR1z S(&߽G@,{_)TBt(8>T"16&@qQ>jrܙJ$-8(Fծː// :ߠR.#UC!_7m7@/& ЁXQ!CLҩ/CTȔMʛ5U,ⲊUU@"!x>8)*5pYHQtv}bDɊ/C~56)vH>MKU-K9Y<$@1tdR^)3C|)8/~G|\-;%?v~Uv{ ٢]FhAϚS;Dː߇5 \'*c $4% <]ڵ3ϗvBKT+ {²B;5[weHP<*U @zyPflCBS AOfRoCDŽgPr_=oyq9 [7jiOHBvaː}RpLFΑ0+;sR- 79uE? N Q$(݅i-yː߇fJL'[ e(2 |dï?}by|pdsĹewJk%a%OY}pxJZ!,c6p*{X%]D%As}yVBs l V:HC]hvC+ v4J>LR!}n^r 7ւu2n#Jh9 nf@[ی|}*j[6T!4ו*:% ]<Q唽1ֳLuNA.X<=/5. Xf)f)R%a uJd.3I-*+,CէBң8l\S め"[U6 Aioy^j:C?.TBK;CMh!.luCpo{o5zO.du%ۘ^Pƺh%ː>^738jl=53OY*rVQZ%Z" c]M~ﲎ%t^r:|k8`X+"ˢb^!>U17p6Y#ŶT)DȞ0 ^]2qgQ]rJK@SCOxײ'lYxIT(z˒i]S9eȯC X-VrJiψvqXW e2$>b5 :# &n^9+JfURvf9%4!IL|Ԫ S6G!EmSqd[Kآ&ǩfcKUJiZr`1HYW[4H]/6ہpfFeDFqJHctlC~)[aΒL'&Pp]8(>]O\1BP(dex z;at T1?B,ЗVJ*8׭0kX{DOo !~9nr=l* R‡#n)vsx`!ՙOB$qri%pKtC}BuVWTcJ;2#*3SrJn.jچb< Ԝe܈*@( 䌬c0-=ZjaB d(Xc^MZR֖jZlu>WO?l"[QG3@-ACs@ib|!U>iL ˠ."lNq?Y"v80VV$0)uZl#8kؑ>Eɼ0iр=1ya ס:uTm耦2%Cả01%&!U>3M>z CZbs gYϐio*ĔYlÊʱrؓAW<21B*6tmHqc# UB 1閬2&l^I_lvX;Px/c-y6uCmAX~ؐSN CRV7O-t6F]h$QtXCDpܖq;~=+GYq)u*1޷AF:扇B3a'T@+ j%{P-\YMR|ymQ(ej=Zpy9b7( *\߉H r^m\"c!T/o17 j'K2|X>S7lJѰF8hQV](Lyeog"(d+7b{-Tt6Hcb/o?xTݖYV8S.z &yLcuH%b rFJ 6iGT1hTuC9hpg(y9 JA*yMD S c=ޅ.r{J$`z~x[)-:扇nt᝹jc4[n( @^-u;}d} MN(F14=ط6= i=lC(8s>\R=3AԞm3m#_']#YY ꉁk] 4mn_'s¢bp516a|7x76x/ )i*[wnSk83@שp9Bp2?b"ԦA ;2]Iw0TqWbbf&1leQT v,Oyg68.+%/PS诧=!9 5(.c?W(Ь]a"i]})6/øbMJy˪tVˑ9^1?əl`RKܯMݷ~/ c~>PϡK6ݞhyJ6*Ɛ߷=!5@ XjABڶ3Z:{ב3>޳)(hX5JĹ1VBK1.$ 5UaMafה 42矣\^CA449m5%最%ITjjgZ^>_[x;S5,Մl`ݴ9{_B`1ϸlXZǗ' Cб))m1|6|Xh ѼUcNd!ER]Mg[ϳ ``X6٫u,Ϗ&~[-Fb]0!ú:5P n\?)@C=ȋ,E<_#Ħ}IDlogD(έ2r>S;4=Z 5,)0ajZ=ed6d߇lcUg8ڤB}E('@)܉9gl(l 4PW>nq\."L9X 8峄 cdn-!נZ(PC4z+"Rncoxb)(%P:v$wf4:5Y9SӗS/ge%_m>uU PTǷ1:dd4;BJ$MS|T pBY!I)IqRpbmj~}{c~^B e$0((Z=Vބ]u!v ,s$,mٞz߯s3e ǰni@l|Iˆ3x|ZPLg~hi23qX%"#rYOO[KRB !n`Aa:KWNOtDI%P)}жNZ9RI}}"(Bp$GKV 슱/Y|;h|{j$Z#)7<*Y81O,'[0YZ-Ju ˉ[8d pEpv~2f_Yj38]<4cN!(hbyK<tHZ4J,c~BqhrnLi6DXu̗Cy fv=ٜ֪3&!å uAuKǑJfYi\eQ ԧh}gk!$uQĐY8v bBCQP2(yYe׳q=xAÆ1?1pJNX& yM#Y /R@ ƴoIK֓6~ƩĵkOW:#tKg@z}~xyBnlu԰ |T>?U= @>JpBcyEz:(V޳`ۂ F/dX̕⢼v?x3[T46H5^'pXIPU0~H<3!BQC)9P<ևD8 M)ٙ%Т"&'CycX)ӔK ^%u%ȴjFŮc~]JPŞ25Q8J/3-#1BH18"B"R3 ([EV5hb_X\rJ6foc~!VG3|t\|jQ1WLK PgKR TYNYź# i/(֜Tx퐩V>%}S8nJ7l^:Q_u̷c)sMMZN}ꐤ帎RP/pˉȹmiEW0@ȕcؗWy!-K{nՠt)V:RAl(y`O . k*'S<ӊ\+.ʃS*$_&:ro3Ur$H\b9!:mc~vVԏpsI)8K!KD^<-TW"@38a?á ?M5ֳ( #G!Cu6YWe&DTaH/onXIoy`[ gz''JƜ,"#HTYP,,KEZ_eܧϗf]Cc7)Ԅ ]- 9z3XqM3~}!ۨqm;k[J \mĴjLg~n]YETUtˌ@_yظR+b fA%)"v JuXr`uK?ߵRb<+A1g7 ke㏃V>+:ZQVMs72fK} AL96놜 p{lipy[иJډ`G-Mї} ocv :-Z]EihB+XҍA zvZa U\ 9SsNTX<,C؆q^S;.bYs}}֠Iԉk ^"O]}av',ӢE%0bf H Qi @&6|v^?9-Y t*f% 2dXQr4Soϧ~%P\eM› ^bL ;סVo HC1yck 0j0n*iE?wa<}Ȯ4/v BMI4A.癧r,L+ߩjɘR1;?IR IgQMrNc`#N9p똯b(~-h+); LM8oǮX>T3DE rKS잽AP ѩ<= QnNc l|40*tCK)Aw9Ih>GeO*,<8mBT8B " hLMYcv, am rb{cs?=jHhX@YDOö߇b}q$ey3 MCGSrI-QrٟׅBdnK zsLOx`VnR@ˠ℥K Ik5z~t}O4PT{ cz*lIϟwyS+1yژ wqNkP4Q P ,ry#v3|?2/%J8p c/ >!)4zq_Y|ZP.%.t׎Դ#|(i"we{:'MQ>@KVC4=* ²Vo~"[()$橙e '! y׿E'_IQv:yIMIŹ`k{uåDx !*6eў.?NcCd >Hc?AKZy\ne_,9zAz7 +0 ))%V;ϛ_<Hog#جc~ Yd9%7>  ( Ma .lc:ub8ҽ~N[-M % 1R' 2H^st`N!:I2w_Apv8_qq?TGbiRxC9n-Ipd~sg1Zғ6$s utp:w:^|=rO+  Ka>5<"-Fz AiOocr AQ́8"YA8K:̄¤8`JqAYsVݝ l?TD7bk*N $HԔyi#Q1l667uLyB6\UzЦtV3/!S*K䱎vLvk.<#>*R#L׃к;ce-pR@|NJ.6e|s>5YҤ3ڗj~[w ؠUP"1X<|Ʉ=.<% kiL7?qbS޽xo>OjSmSNd 5SRK.2~NU|6Rr\ps"C;N/>%u̗?n7KM;!#2t, Tf1;pr؆W<@^E<ԡAEM ɬc!Ztv2d# 2NVGyxiFdHH }Az@ Ɩ#JmR4K?'? XFΌ^nc@&>B#%`>C̛ܩM+>uU\M |‰% o-.ƾ Cj-6f$c!:Ctqx>RcRS*[ 6˩=1E2 g! ;dIv*n<5D& m>Uy IF]) 1ߏY&e%# 5]"p@ްF3AUuL [wL"OGD(,sńIzHCk 1xσKT5wbg218F }s?@WFQP¾sBάc~'5ү7!Wkry~G ><:fZ,%CVb!} Yѡ?2&fըOa홧.F*̽Ȯ$6Wyŭw{4kEW+4R"A͒=:G \ KԈ$@G2EuyjIeaBh5$ٔeq&g1ԿԴ7+_J4شoҡRBNwGiTr=+_oǕ*LP_-2){ {occG](VP:U"PҨ#sD{1m=砥H-9:a'c5fK2Є'}똟oYH!N N/X\&9D<3!"c\#otz{ 9pou/=ސ!vzvH9b2`|pvn)穜M$9(QHڝ=ŽOM! IKKW6 GQ"Y9ԐSpRNR3.8p\q~xO?z؍r7Ծߺ/.aps%@b]l. W^uchSQUԔlBn8p<fܚNu:vI3R :/2,=ޕ/~v7Աd(}=C CjZĆ\[鑑 <B{myt`a37Qj8.$)sr뽎y ϒZH۠ÉS_t͐žX7y)"-]p>4yEj DyvA{ѳ1_)a%03^.ÅmCV`|+?HAdŕU6NwOu~NCMkZ*[58}(ŒT rH$qϧ.e(&က;^Z޾(,ϧFL;rCay5{@_czشYs_/ hᬃjٔ0c丝\.'ƫ>_k3 C\}kt!6!αg_Z.:%2' Ag +=#2h9%Օ% Aȕ~ L(EQP wvhL`=Ckr]{p|Do2Ki(V Qobz̴yHKWSE>UWF6Ƥ=DϹM (RWrsBтc! վu3OmndL@FIb:~e<-ϧs8~@-7RKz9`\2c~ JwF=u(gBRjKlR [P/G6ˈհ:Vkҹ(}(uS.@ehJ NW>XH粕98۾˦9bhS~IƆKyOv1vLּLqJ,. 7t\YAIo XfQ{~pz'm\p3ĸW&:}mBF'5OՖ1\@o п-?Del+?Es%=d3OMm1 ib,LʰPr3-&OPSZ|;pCu>r?DLEEnqZ]L۳Z b-.jrAŧ| Zmn:$i;)Z;#. a3Pd󩻔 Ѡ BORpaGD!!1CR%jԊUPa܏͚(FF!ExCI"neeSK +^2@sa' !#wȅsR ƌܡH5350y`#֐ d_]fg– s 3pzu xkJdtn̘:[:f `cC<uL FO %ٕMZF Qp&+}0N{X /~)K^֛(œGc, CUYm A|X,ߜWr)¿y~cw NMk³ D흶RqK.IAPy}-ABK4a8ؚ\/8+bL'jܲ?ȰTFa5KZ㡩R=N Pj;+/l 4&yr  04~$zz8bJ3'Go* N&kXSMU I NIhKj>ȵ޳?O<ĉJ!,yC;'P̲`B7q\:.48 .ZsTMKj8hg?Mo xEj_t:Uj(͔,c=r_` #W4p&`g$?I֦|Gp ,K|㍌y鼄|=6`1kPn58/r\JRa3oq1aXdacX\5hNB"ɿO`)z+m ܡt&%==3QVMPRd$8\.#5ב\܂uO #e@ˁ!aĐ I_%62r!h.Q к,0C9:fFHtI60#b<5zt[ʰL>kerAđ?1a`@R\<$@)Jb޶<~K.s.伧K!U(lV=ۿ/ڶe'lMOd=K5 S˒`movK?g:H9(쥡gZ`ܰMh> _ϟ>Tnhz}O9^I߮-WSPWal_i^|h_#Cեq{()kUeC Sl u7S(Yr`l1C53)E5Y1sB<4/HuM,]9" rwR?ٔR ɀQTy$.HPr(u[y يM ڈ)A]f=M* oc=~`B5r9AddV@"AB@W<Zx=%'UxWp 0:7{!}u]]vgДt>}x02Wg:ՁGhLҺ/ԥFfw}]2P늘 7T䒿0<6`d$޶Mӈ|6 T(1~fDCg"fLi CM|%"$"HRTE]"5r{ߌȕT89gq̐ĚxZ*0 :#TӯnHCDMq!\O݀ 9¶jP5@IiM_t~(Q#_RQ8C_whB _XYTnXS;.>0U/>)C ^`B8@8 Iop7h>C2$I1)a"8ʒ4^X/|pb8*gm-.(t΄zHn2*i-[Iv =աW>o5*0' pyc6FyccY#2B("WdCZK5㰊/b5~iRF$rVqK/q1\3)$+AZs~ռ %S)pf\Xk #Y*3L q{1!1(%t .,c X'5w~` 9@Z [b A`U $GeLǨ8:e9/=7B)s(,z{;XYO?Bk"wyo:;s{TM '%q:K^l}HZ"j8~{=|u@=g~[C`njtEw[ ҏvWԯ^g~/]` j*ߣ`P qХ#$w[s:)Rê< yXDZ߽ȏ3O /PoHǻm+PBy0貺fW@nS_v:U^kbb7NSA/gmWc8DnHI)zCS2lZﷷw!W0t.~jERCKv _-]RcՎߏmDKk-RKS9JinI m&ϧ*jms7Q"kJMq[,V똟R1%k:÷0_ o="~ >7j>HI]ʺӠ6`od kvpّ2j}ab!f.43dMZ`PbU'2׹1?=nSWrFڴu7?䧭cpe%z@& ZtSzʄ!" g7:Zb}g< 2 r;9NR63SN* "gc3q x}fp@f1;oZ[=|;Tziuo5uoIN4 :=w!`Lg` ̍s#~t$tPnC9\mGf=v4#_BZEtOD专}1۷C-<&9ӏ{]Mb̲a9矮csie@G\⴨#D&d_s*,ɮܴ5f>/{_y?Ĉ5-c~E9k^ S u0^UJݼ|\n̏_8c\=Ԫ;֘w_ϯ#BD&RR 0ϋ*/Ը7ydK2f߷_TKў8 }EBpz&_| %tT$< l8U2Kp} le_/ݥ<Kn#t,a4$M'`5:晧"Zq,lƆ t6ve\T?u5|L*QAm6Byj{z?dBXҋ Vcڈ!/C]գ6'qEyֳ>Wi?9Z2ZNlȚTO9dNuLԥEԽb޽߁'\CX]:LZ$/QTW'ToO#mvݟsEWƾST|̾DD_d1UTѻ>1a$6fN'(';Ȱ1ӭ33w[/#+#;K;$*5EJxM,mnfK˸;`:ݱ303Cy/:}rkJy K*ŏp [bή2Y;$cT9rxT8PJ[c~bq2e:LU.Sl qf}ϧF[%Ƃ?] pNS9t^Tu̯뙁,EKT0=8cu?d9:^xf{3Y8 eK/%F=JUIw ۪}ϧeLj).Rٽ/$\4pFvM`;h9v3Is̠1ߎ[ @ UHuE$7fHdCjKR3Sd{NŲX6;qLbR\R7nuqKbV(_R˕|J3yJى5kd@(O[HpD :&xhBȔ 횙nXP`f_//T8r" g"@#>uwEMS&GW@׭޾vA I j7i8L'oR) )}1Sxfq<a% ^hp2~8j:QBAF%Z]cߣֆ{whH#BCM pTv} x}#`'*e綇RBZƉrZE 0:O(ٶh_m0]N9U1>?R9ɤ 1Of([Pkw Y[_nuٯ9edPixDS U:&`?l3Q86T*e̷?Zc n&A r <>o-y2 mXqZ8gwWbX&gVhAaqȁ]fggȿ\ܼϷ)Da,!PQ(PS}p% CM]LoRlC=lucj~%[}3O}yCNh3(IIIHq\bn<<-WT]T( 2]^49/ze4/tb9S4CBL=z:ס^`JȽr3O٠X򗣴2*ĐR]o[zICY m_Lr∧y&eO?w^®"{;9R>cY׳T|p5{~,&l 99kHK]:''fBX}p(Rݨ;JQʝ*"r+NW-Ugag$n?1㮝-2bcj%JnZy'+қ7.y@J>,k㘴}J ȵ*cc4VXsj B,1֐(,Z(JSR}:f!݂W&I3]fdl8"t[ϯC D$V A ,Pzs݉B"trnIprZѥZt0:__Soȅs*V Yîi4V u8gt3O͟j-䲰Y nWZj! *]y7R$\g,ݥl}os""q ,F~ C5!Xu#댞<=t"Z;hei+eȔ$d:fc(H\u.LZ KoE{ko@li>nH#2yxڱ5/yQ~̇t%y[1`paz`o( sz j󙩚 }@t,a4 WP,<3;\ ,c~a鳆iħ܉ĠIķKIU #=D$=HNޤӐ{RN9;V LxK'fJiQQc4} }#M4auw={& f-0wONr^z\޳???˻ZXQOhdj#w*oө?s@UoEեHͶ DC)uEpIcleJEx:s6reV\W-bgoU>J|"(n(z~~Ð9FB/S(^g*H0ޘV Cm^W'Z$ֹ۟͠CLYQ{;d"P Ù3O B&~4}cHOZӲwc1ϹT 삓\$@4j"\{q:;i8&nn]\n]HĸWVj?%<{*JYzJk< CH/ra~q 6x)R^ujyPcSv[d</(c >$:&בBySY-B.jKzmx/^|/rCo22ן\/q>1䡷"۱B`hυU^eЛ$E @(:=ҟ][sN'6MeE ]=?i5gu@(*M߇bC_"l8N8XKRDӮK CA[8RSL71Yy'u#0"梈,07o0wHHSIh5[޿ē,];AtnO T;v أ`i*3PyWM "TeȆsmW7c $}/1eOOʕu"Qƕҷaq%?9 Du̗Y#dاZPRh]VJ mv߷/>|NJgQ"gЅkA1C~N )6!ƅH7; jD^0c~oވu̙+Yq8w40U~߰YRI6L}SaTo QWjzF1X(aJIm[z6L/+ujouCܳ_.8O:~>/fB2ŠH6 ->W*zJR0DAIR+Q:jYckJ@( Oƒ;4&F^<}TyW*K~&ZGU=C@{1[ӢƂkRnq/rg)Ҿ؄:3Oj Il?CV @zʑ1ɛϷcjrtC9:A;˛q~{jqtu1~S,KW_oIXbSXGFuqJ "H?u3O]jj W] V:ڨ O֩vϗIZu/YiP;`ܾ_H>׎jC&&两Tlu% T&-aDk"R1ǡ LӤ &BNdX'\<1tڍBy+C%0,  󭄜ckQ C9J gtPz() -\sٷD4=ier! pgAo>݊Fi'b'X* cA6EXhD\t/Pn~j*؋JC9؈҇xYή835;,0L%kek_8?q89Iym%ه'Pʀ#ROeX:+ĝ߃6Cm=S(:04fy!-ɒ-#q~=ֳ<]OʻCK1YN8B%\~D>'!C.x)dž@6:}seh ˖ g"!N`s !$>>? $n ;H>?TS}N2*bU{M8:o$;yzi}U7 pyYK ev/XXpnu̯CZ./jp_b0!amr}'QʹZXm#UCDJB+B,suj_&e ; "{SzM0/{BRa)uYIJ†D51JqVLi {ڻ]Bd@[Bzl!.ɳwهOlf5Lg̴@ >sӷZ5CvԙΜHǑ!jXXqE9\XIݥSRZI`A> Dpz+wnl{GV(CX.NOl=$|Ƕs~AJe"&S7ED1s= :| eb18nKEPd鼝\yFjH1R!+GW_rz-Uq@,)w8OY SnhA5,O1_˔g19x){濵' 㻿~H6eKVM:38{[3ʀ>9N*(SCGήc~ yL!.Ɉ5x\˳S'c:CwaDbͧ46E8w\u̿R* Qruɡ5S%8_x>.gvZNv#2%U0t$)xf}b: 8:Lb RtUM)_qן?/ j'1ua h`u~{ebԬlb6M胉fGM.)u?)D'a [Z ;}2T xo[PpRۘ8'"+5[[t帷p :)\:7`E7˳^0/%*PR*1?9=ѽpQJv)}|j8qbH :RX*^r%)z9K:m}9?$,װbRDƿyjzsY.Lj.Uxk\.{A~ʳ獘ot782h:ˁOӒ8hd5]+:f_Se=Y+ N@s͵cp-_/p}_cWe^ &sEuLcsHifviw&_XY WBjyC.n̥ºVC*^2cD$M~.) __L1s_tm qӞu̳NK";/rȡњf&a6~nc~x{ZD_gÍ6W:Og^S!_ p~״ǎeBj: E:0|t@"RN{cT zP%Mn:vNƩ6cS$ZV䅑*S;,Ľ{fPR(p+|*Ce( Dr 1? [S~05DVPq_CԩD2.ѥxUmByF<\֮E+D1\hq' S18z}(ߓ!B5eS4YLX"Xy`t1(faBt )VSʋ)!QEIEui_o|pkFK s z M5E>3$JٴL@ݵևJ%4PzLc)z\`ЁGOW^[?cEL7)" Mئ"08V^|?-WF= o8*]Ἣ'8t,x·o!ɐ6 $^qiCBe8W@/׮c~<)Y5!ҷ$:N99pyU,g%íM :*&32j1:1c}g:ھTy /%~rTzH3Y[W_cj 0QCQ%qZ[,@Z|UB߮s46ݎ #QoC%6qhvmA25pr0[O[T;wQ}!{^^+A*r0RFp peR]=[^TocYM\椭hevMe5ߎiHbvoSm~++vW ,s)9B,>B6K[}gPm?LאƋ(&%l!zj~J;m;NI m U'4~(Cohѣ7PSq;+:~jW\U`W~#}>gy?Mv 3dX5?miЊ]0:.I ]=v>dc[^TmUu*qL5P of#kMc麒idӌ+2^|~p nOS^fބodY)Z ;0Q~g-T"ߘ`:$glj"=lЖG?ݡw/(ek$Je m[gpx8e75(mvhKDg&DCZ_?Apԝ~ d ǐEʩ^6p0`bk~އjby?G3jl7jk5Cb C(챁.=6oѯe"pEFwx=G7Т:\|3{+ؖaQdѢoĥ-&$=n5?ѩ1V4f8AA.Ē/ 6L*=WZh2;M<9RRrW{͙E`?eyM7<#6w6ﮭ5*'P1K[rbz OkhwoGm>*e3`\?mll rvS-yha46 4S|Bҧȕ`TaQV(xk[?TMlSsb"X1#:rӗC4"GB"J}P-] xö [t(8GO:}{3q8#f\9j]ϣ hPnf2A .TKwm }4^avP֬,+Y1"˥s:6UmbeBwd _ h)%8q| EB}[yx.uD?V O 1Uŷ$Xa.v.4R迨Y ~=&0ql.T&Yu9=.5+7aÉEb5)j:x|!k+>dWZEpHqk7t5RMWOPZQҦ\܈veEKc&VtUp@yٟyvO\@53}m~$aPVj4}Өa)tN#kpOFW)`_ۿgr?6}FŸ.J Ð|94yl+﫢ժ|\]\rxO3OaELp6-K)a^QG&Ģt@tT|;ĠCnS?c~$!Һ݇mY*4fTDV4G6RN( -˜?[x6hVly\v_=A]0 H>k~bgHol'X([zZ^ݣG(E]hOeHpL@?Y" BWdb5gLyH{]9\'ᒎ3a펇o|6*꒜ApW!#J7^常}6 7JY1.Pr0Tu F誸d1c_CirKQV qk/۴!ZvHtEEW-3G5s?:)%tRZoPJ>ABBn9zK"Or|?"J*$W/%iH>+U)^B~D!֒H75Qoy{,zl?JI΍4r7Pl@<|lz~{SV>[0<Wƛ: i@no=bW\ڞϧ̌OP3p]`% מeIդeoT3ImGUn҅(Qdه}&Y*a!՚PbHYnf6sۀedv}xQfJw^/kHȶr~ y8ұΣʀcϗ% oF06#ucR~Ig\׭C@Iu`>kYA+&4^s>Fb| lAC[?Jb񔶚?A}*&Ig3~91̳9ے bN5n:URrT͕W{|>5D},I[=$G.;g#XAmV#&!":ߏ/P[RRKs8$3mIy xU rFGEP|Jz)4EչX>??|Y*wW!d]qUvʪ#>?_0nAP:`UiVs>zwXƄPxnKI~ ;%Ѝ< t BKxeTGZۯTwXŽ1 (2Nk=R`NڑRByseZV|y h_]/X$UAn  um ɂE#/p;M܆&8&*#H] ].9@GEj=ā= ArxlD5&&7ZjY6O͖VUƮ_KGv^=,!C*4R1n!󩵨 #5k,toiz j5p5SJcdC0*1 %FdbXS_y|=:ޟ`uݱYSf|KoD4|{ͿDxK)z]ʌ.5zw%&ko#hA2^ 5cxܬɒ鷷~,( ~դޕܻGx`?g%[@n~)=iS>r$ϼ:)ޥ폺3'?Um UR Mvyrok{2alD_%[ #+{˟x(fRS6U~V6T$Ψ[2 /V*e՝OZ&fSJ[T][Qy[7[5Sp\*dk8_uv2ۤ6;%B>}.q*mʐ?"9U__^t,Ue0~[fTtZԻWsѽ}c_oWj04R 6iIrBsMf;0 5${fIp䔶ַao:9MZ2FWvctضa]dlÿK):)jl\zIvyϛ˺p*v-FaS4p,e)f{W[Cقie=l/94-Y κ5Vy\a D$( 6HKhdв-u_vgXq1تgȝRN5w8mB<ܦMfUPvLM QbIuIeRD) ĦN~1)$Y∄k[5)İ]H\L]m/YŨZMol]궪RnRb[^lK%]Vuvw:i¢bnIG"MmCR/QChiKTͤV0H[@Pr XWlzBzcFM E5ALZUC9zz c Y$4:7QV+%;{TlRé|M30.|n&o|o-!A9p%ׇHPPz;y;NK~ջ,T)–暶Mjs*iQbnļuit^ZjM>lj0EjG#v/e6,h-f}HyzLJ+ `Vs>-so"B[nɥFS !֨"jEm|hfXmR׺R @0XaT?Ns֩lf)3]ھmjR {ֲ6+9Æy 7aoO]Z`TRǿĆ7,;䬡4:@F^TҀvrqޮg8/̜ G04ؓaضe{J,QR5 c:'u9hWXrh/\ &q0@sv5m>u|+lۄ79uܠ4awA(U&`6` QAŭY rMK-N0)`FVwBzo XD;hƭ xzq>6ioWZ%c,Y'6~cNJ;3ecXހ-_5 @+Z+*a-Z@)A?ðŻa&{\"N0*@[blq1bǏAyuЛ@ðA_/}^ Mhq% %(_ o pAu~`m{\3 iug-k@7oZVD0- 0.a}mU+w[֓9[FVNVɗfLz9h͟4u[ gsZ sCO! Mmyq^e$ @!e-d[AeXi:J+Mʯ nHE+]Ʌ U550C4 18|_bȝ"V(cR RG-P:EܺmM?؄ݷ˜ _ pvfgK@E< jqJc**N](KC~+nQf@8!:R+=|(9l1@ wѕlM%F <"ӛ'6Nx >`lQF 0hR;% 0h`1Ni͸|ڃ/aݲas-rZy1 ?&|e{cde`%L8":`Pz!h~[}܌&ªG ys/\T0yzܥ!\72, ؿ7Jl.7aL]? q@CפGw GȆ}\$z, a]a_߿#n0nwt&;3 Ŭ=D۳^7@' azhqS)N3m * 2VV+`ס3V *0Ǿ>E~mj0GDJ ^ZN@~9Vg loxmBpnǣRKQ'UqNOܚ_L':'Qƣii +x8᮰mJ q@7P=ㄧHF=~ 4y8VVe%| IEX4/Ti%,\"j'0h2sE( f3Lx} a-/a!ŀvR[eQ TҪmxӾ_ $c \ܹ 8p2J*rq&}`2TBXW  eup @"lB' ؙ-CfpbfޥcS{`K"Ѵlqޑ! ch@Ll*.AJRbD0S.R:!jW~ĺe^|F٢b`//JB^ \_%[̊?ybR@]f궺q-$to͗4p4hfh-ҕ-:#T Gp W ^ӚL(~Mw's dBhl ܸ׻͌l4N{at懘$ 6OMe?ӄVa<}'Xb0.,_]wNΔEyC~@&$DF0צqK|iCέ,1cMnٷS#  3h*%yr\xQƓz0Duq.֍vF K;; AtX쪻$ TKPB38k8M , Iն Wʝh1CM -=ڡ%>@ I=0 if5ҳ)r{uqT'PHŁk}ؔx#pTSP}X\x$`J@RG|KӡxOg;͇Eƹ&f'*t n`'O!+@i':|𶸸H e ]7.:Ƶ: ez5f>']&^@?C>nֱX,`qC*vb @Xڎi <P5&}ܡ%% ҄% uL \w {9շ[B_>e`Xp>+- ($2dw cŘ1sxJށ*|\DYX\$ cp6aou<қFq,^40g[yfl$ys͐-PȢ<>> GjB)%msZ\F|Q<}`AjSot;Sqfrq kdMc/ w(9, ?bVx'TU K`ӖÂLw6-;\^ߢ P E )yPq…Z>nǾ }^E^k>sViI}d Ch:کvKٿ:h`>tשFs2o3^WOzn εź"i=,'ܙإ`YBޏD,4y@(Q b@A'F -655ģFy C4pi{TpzE>js:P.k0$tnLHxMkfǚ;+ힷ_&l/02 |L3J G:b٨2`8*:cHހ2Xc M]HWmP>XC:۰{̶qb nZ 70''',RNk|Ev.ʏ+C[ѓZ^+\> /U-5&f#v(4:m3~~ 'lVvy ;> ɾ+6 [Ͼ-cEiig$>u%IWkEHq<iMoޭU!9Y-H0Oy[w1O8o?SsC^)x^),fwNtI%9Pb!i`XwrzY,:*3 #5UOĺ < 8|BY#x{izSr.C21T'gLPFr|Y!S(e8@Uf9^^/@0@[=DUq4<J}ЀΚ19r;3]@ R[Tև5/R%0x'L'ry-yZ ^>ZFc@S{3^_/"NjiѠI*@14U0xζN ݵϼP, 5(ڍx#@0:䅰pbLatZN41=h0&uO'-jZ67 'OBNoiyi 0rm#W, aoq·NGP02:1E7e ƬQBxĐtTGwB9a v׋$E0f7b%oO1O|Hnr!LO܂zx ?`LL7*HO<590'VMPc<:j`}|ȢfK=zZ@۠WM%~NZfvqH#>WAA%G.8+=&mXaGJOqeg^6;½Ǝq tXCsgLr=8*aA5,JT6(|"HCDp3OK[V-rSy1wπL;2hۈ52qd5*(7 i=et޽2_q/DoJM0<:lf'ݶT:8~/ϣkgRTAxfWXk+& 2[!A=2p\$d}iA*8 ^u4$L{D]3LC%*=DLaeFC|ށBf 1n@HOq <,'Eՠa0ǭY1Jx0.E'YlLl Q/yỾ23 xAs]ut4Ƽ iޖZ'jL4uy\nU5'զ ϗEUΟ B5`UI' Wx98b⑃brхtue{T./y@o`(+CZp,(?HW9{_\k+IҮڌ+AL1vܮߠt7s'Ykc<W-ˑ!xңω@cu+c `ߧ<mC?9$Y he$0;S؍^00a\=gkHdطbM;*3ŝݮP %V4T`/_C;nW cTN@­vy7ە(Y+u3`NSlc%M_pp^x.nـ3rOL6,s_tڷ!<ۚ&VMJGRn"/` K `` G"'O B% J >4ܢRN 8ʐh)oi<#c?ƽo0޼n/}+dm[6\an^j?ma@CG%|MS8@ -~RM LzsaQmxW{p5" "i o,ohu~HxN^Y#t# 'Oa"'8:N ug{bկm 5?N t{&ƙ]qc?_ޡѾJ*O8"=,n^Uq.Mձ.&z'\D"%@xǪ?up4i4$&KvE)JP9to<bCn`WO'n|Yl?-҃܁*,7_-,HnC;N` Cŭ^6gd%R>*fD.A6 !@%y#* ^;R#Jv09s=!'83xlEiwn$;O^ED7ɝ!6lJ=/j:~Sv]K88tŶ.:EW P, '(㠏pC,AV_JaL'<ğ'` RW?7It Zcdߑ!P3ecjb}˭Q Y< O'#>҉`kH~c{}9<( S!A0k4 sfXxOa-;o -< 0hH! 0zmoڶN юSɚP]8jm5P[ /n[#wE~r˪_z+vi$!ㄛC.]mT׬?ua aЍ/tY@y0ϻsׯ>~n)XvwXx]?mQ筟B 8..sXi<Ơ#wP'3UȢ]| +&ܗ X4̦[ ]-y[<_.j[ۍtLMCYqmUm}6s02jY0z23w?VgE;G wQXu}Y,!K!/Χ5q24W-d i _?fH0gcㄗ+DX#4ko;kG8`)g6Ca6l6" /ro A%8[ O,#Swִwybqo1N (DYJsT1uDV>[+޶1@#g-Xl)gP[,ְ.Ďa.cal^QLzPP'y^ugBk DxJzzK7mf68,GڛLR؝nϫ׷7ցō K0qAm#. i?YLya?;ɲ{+|FAx 4qi'q[В=7Q roYɰ Cl+G 7#a!uY8Nlkp7`>mfW:K{=T>\+Yr9[![%P4j=~\.77)i8 i%0jp8 GDKXUd#~4 ㄻ_pf@}}nGѹLy`R*m57ox/(Q]`IjoGR/ s)阴ԚdvIbࣘZ}j eB6N`7&zKR@ۡf#thn_lS eϪv`BVm-tݬtWח8J Wuo+$3q ,p<\sh{O&x'{M*ՒVh?Z `>$#AeMNF?^ZC٘Gj˛ИLb{a=O]s[VPc@L|^쌁M`h_{0N.z6.Q݌(vn/R/.gЊ+6ˈlO=N`he 7м-lf<7~qDn%$¾f3O#>1(zp7t'qi×҃)oypdO[ᘃ>*Ԧq͍/Snè22҇:,~>LXr?3[AfwJ+{ECTۧeɃٌNKs!]?"fm9!#=G#o"J{'<>wiRbdN;R+e؀a4=`$)-ĜV:' LYNz Wj;;8F_b"Ѐg1vu[7:A{HocUEҘzaJD{4vdG8<@>38>X+NAJc`~VzL|{w".0}Q{bvG|e,7yY=cV׃ n3y8"JڠyY'IlؓX@->aaG~dqgUX %1 tJ`Oq$kЮӁFǿ_XzZPhlX96gF9%Uhΐܐ]/,G|~E礉;KNlA^/19`y# Y]݉CƜހc{oњ>>Mu?3|/cO]Iv(AZ c;BmY AHa?jmX 2$<>M w}͖yX0=j&G8!7@F֔\Ruc{h1 Y+?\#h~9b'@giR~[;i%(j~I98yR;֗a}YO` ^Ű۸G6u :̲ F֓u>näzuGy6]acmxÇudò8Mkis iڎecJ]O&uƽ&zsA'fOaOߝOad]0G`GXkDW*yCJj \Z)qmuQ?0SO@3,6ɇSwp%qVHb<0iHBfE0׳h |CNg~0rAB|T y^odM!ia3֎(&01yePth(E׀UoZ/OyA01+'  Hk_Fײ8}k<d ǹo0%6y~l<ۤX'|  :z;DOeq…~8 wCtU⨝!'zUzcu\M(ۃ'CoEa٬Z( VxGmƤۧp#e?Ojl?2G'S]gb,ΑhF5̤PFv;zi#gw43kK # dUOS iYpx޿1`eX<_MJ>> KƱQ\~ m]3aMђn\$V v 76qM2 UFyg^CT` g*0 BUdiէ}y@d^O =Ox &#Zj ̅1mk6q9>p%Y͛6u' 浳8ZѬwVLuW85])h0mX_ݎǷxS};\6{prB[ӬS0lL^dlĮ2 c8Ra 9O&DB̄ w1g?9S}> ψظmkyTxu`x#.kq9pzKпfA#lxITkDve lw1ÄۜRK:P!#m΄2@nie1 8~aC;ٍ(yಜJQF?kjuG?); ,0lE#[Tx +F%L6TN/kmsG`(ۿ3bd#+R*R'9OCHdJX}p@cɋl>V|2#3"奀W@˭0=dz^o XF)\fq_+߾#@G8*`u1 4S)ґGYQ'C%]@z` {;츫B` /Kܼ'g6aɺho1&0f\>_`8:u暾|1z YyBa ?'̃D ~Dذĥb|6#dڎ0GԮ={K.Gj0Jz D$Wh'htlg%dO9G).VI&AU+T~3$kp4Z`<0Πe`ݎ}ic$hȧ/x .Ib+d6d/G 3:)6.Yb>OeEypF^鹔iO@`{S,y7k졻\9(,Z GA [szrde%iE27N&ͬ|)e=e8'c@?X<"NЯ̳нY'?ݦ@^ְbٛ 3N CbWӍw;4hlFU zK/``,!C=6/UxPe':zUGY:+iho#p8hQۖ Ḭmmu?^_şbDJ aoQd h7]fXb4DדbO NFi PZyZH(WY4w7Nx %IO皡`=ɭG tK1ZX|ƾP4s>}GhgF bH20lOW\+V 8n"Ie߱a>-[4l&O:U3Wbj~z ލrlbLIoLt'>:]l]|5*~4^́8l軣7MwbojS(mJ̶fMGivt:N8| 8- ŕDf64whP0$ܽ>mhR HV./Ƃ)$+bh3#>N2e˧QFݕG١ [ؾѶM/ H1EؔʓcM%L c).RND}.xπz{Ad8Pi0rs`;٦ T± vGq-636eb&aTB06{T,;귩0_ jel4F/[.M8:Y" Dw\hw9*2b;SqfRٵv&oUed43ޱ(,}z耂FK{T"!vm9~g-*a,ǭtcU!аrQQ@i do=A OQXxh`tNo_Gg( t4 ޝ>H{nkI7d7BcTv\>Ǘii9F Kg34S^oE_cB&Les`q17DH`ç73J;P/ӌ6GBDLd6=lĎi ʄL%RAdzy4O<,}2>{OgJgO!`S1NP2ෞ͛C)ݕ~= o)Ҟ:t/uICs458Lx `ص]6b;zɝY`DbǗ ?j_JjwNZnCm1=G?zg[lJ s#1c/y2g=:~_CJ"m"8֨S5Q g}.>2Q? +w4"7f{Ό{k rhpDԎP Ë-EUrاAU^?ծw}/[qAR&,'FC0|!?R[dث 36il9?4 X"/LrJ#f3WS0Ѥui" 0\*}30Wh֭iz5!~}xo DnJ 1ѩWIZ9Z`}u>(}|d͖w,Xf$3iL9 5So5/%. G|T*5I"&+Hc0KSLpՍqMHͪʟ2#bh *HC O'`퐇6^kؘS_"oMZ"J_)owR |Ƌ`o`͢6ٔ a+[$Hɚ)BE2"-~Ǝ3RbYizZiPL{D'=nˤ7[CVF~>(X[E) LИ;Kց o򲭧GQv .+M9g,u?MUmkƙ#E5lǒX=CʇWӬQ~}iD%YH8U6%n UPXJ?gRvFЛNI~= 'EmYl&gKX.X@6U~zy9X3Œ̗P7s? SF춌?#m~*\h0fqs=]_YӔA""K4os&6bJ}!vA$͂AF%ކPj<:0o&B?^h,3:: G.i>NԻ^) t{QZm?lJ@ncIu ̶Uvn҃{q›> {z@KQ% -6 G&i~Cv2%|#L0*HB 3ױ:/bږS]ƑTfO7y&7{gڋ̺%68Ǵ,#˩rb 4<Ss ƓTkt-{n9$bH؄g}Һy6!#vR0m+offܼDOc|rd,(A{IZJdTW1!7{޶UuyP,b4h4N{:&qy i`33Bl$:G4&: x)2yF>C5xiM16w闿|ٵAPX|®.3 Oo!\MNek_A/Rynڕve_ vY҄hWVB%R!wţ9SjbLvo -%o!JBPNͥThP8b{yR0ur/׸WMWeH}w 8g),T/V fG(עՖʼn4:a] *jU6oz}4ӧ vފ+ziSs fZSȬ:i JDhyJ >@V9> ^om=o}_ݴ LW7m`iM^i74Ǜ&!LSS4JɱDz`b.ͧ 0îc;xsS/Qt&^?ieܑ4L1feT'uW&2-VZO5y|ZTAE35AzܭɁ_0 kp䌜[ ύGMntu60ܴ镛60q`zOիd"1kpD-g"=˙jS6D#TkYXsE@٭ZY1\3D(T~v -\F\*V'< V"iJ= v+:1ϴInsnz;5iL:ˋCZWozj[;^yko/]c0